[autoscaler] hotfix calculate_node_resources (#10874)

This commit is contained in:
Alex Wu
2020-09-18 13:39:00 -07:00
committed by GitHub
parent 102c498653
commit e56f2b8586
2 changed files with 22 additions and 1 deletions
@@ -56,6 +56,7 @@ class ResourceDemandScheduler:
nodes: List of existing nodes in the cluster.
pending_nodes: Summary of node types currently being launched.
resource_demands: Vector of resource demands from the scheduler.
usage_by_ip: Mapping from ip to available resources.
"""
if resource_demands is None:
@@ -113,7 +114,6 @@ class ResourceDemandScheduler:
tags = self.provider.node_tags(node_id)
if TAG_RAY_USER_NODE_TYPE in tags:
node_type = tags[TAG_RAY_USER_NODE_TYPE]
node_type_counts[node_type] += 1
ip = self.provider.internal_ip(node_id)
available_resources = usage_by_ip.get(ip)
add_node(node_type, available_resources)
@@ -180,6 +180,27 @@ def test_get_nodes_to_launch_limits():
assert to_launch == []
def test_calculate_node_resources():
provider = MockProvider()
scheduler = ResourceDemandScheduler(provider, TYPES_A, 10)
provider.create_node({}, {TAG_RAY_USER_NODE_TYPE: "p2.8xlarge"}, 2)
nodes = provider.non_terminated_nodes({})
ips = provider.non_terminated_node_ips({})
# 2 free p2.8xls
utilizations = {ip: {"GPU": 8} for ip in ips}
# 1 more on the way
pending_nodes = {"p2.8xlarge": 1}
# requires 4 p2.8xls (only 3 are in cluster/pending)
demands = [{"GPU": 8}] * (len(utilizations) + 2)
to_launch = scheduler.get_nodes_to_launch(nodes, pending_nodes, demands,
utilizations)
assert to_launch == [("p2.8xlarge", 1)]
class LoadMetricsTest(unittest.TestCase):
def testResourceDemandVector(self):
lm = LoadMetrics()