diff --git a/python/ray/autoscaler/resource_demand_scheduler.py b/python/ray/autoscaler/resource_demand_scheduler.py index 00acd2f01..3b3bd20ba 100644 --- a/python/ray/autoscaler/resource_demand_scheduler.py +++ b/python/ray/autoscaler/resource_demand_scheduler.py @@ -56,6 +56,7 @@ class ResourceDemandScheduler: nodes: List of existing nodes in the cluster. pending_nodes: Summary of node types currently being launched. resource_demands: Vector of resource demands from the scheduler. + usage_by_ip: Mapping from ip to available resources. """ if resource_demands is None: @@ -113,7 +114,6 @@ class ResourceDemandScheduler: tags = self.provider.node_tags(node_id) if TAG_RAY_USER_NODE_TYPE in tags: node_type = tags[TAG_RAY_USER_NODE_TYPE] - node_type_counts[node_type] += 1 ip = self.provider.internal_ip(node_id) available_resources = usage_by_ip.get(ip) add_node(node_type, available_resources) diff --git a/python/ray/tests/test_resource_demand_scheduler.py b/python/ray/tests/test_resource_demand_scheduler.py index dea49e821..f6200b0e5 100644 --- a/python/ray/tests/test_resource_demand_scheduler.py +++ b/python/ray/tests/test_resource_demand_scheduler.py @@ -180,6 +180,27 @@ def test_get_nodes_to_launch_limits(): assert to_launch == [] +def test_calculate_node_resources(): + provider = MockProvider() + scheduler = ResourceDemandScheduler(provider, TYPES_A, 10) + + provider.create_node({}, {TAG_RAY_USER_NODE_TYPE: "p2.8xlarge"}, 2) + + nodes = provider.non_terminated_nodes({}) + + ips = provider.non_terminated_node_ips({}) + # 2 free p2.8xls + utilizations = {ip: {"GPU": 8} for ip in ips} + # 1 more on the way + pending_nodes = {"p2.8xlarge": 1} + # requires 4 p2.8xls (only 3 are in cluster/pending) + demands = [{"GPU": 8}] * (len(utilizations) + 2) + to_launch = scheduler.get_nodes_to_launch(nodes, pending_nodes, demands, + utilizations) + + assert to_launch == [("p2.8xlarge", 1)] + + class LoadMetricsTest(unittest.TestCase): def testResourceDemandVector(self): lm = LoadMetrics()