From d826452e0bb7c98e77f2416a3b9eb123b96d1cb0 Mon Sep 17 00:00:00 2001 From: Ameer Haj Ali Date: Thu, 19 Nov 2020 01:24:38 +0200 Subject: [PATCH] [autoscaler] fix max_workers bug in resource_demand_scheduler by counting the head node (#12123) --- python/ray/autoscaler/_private/resource_demand_scheduler.py | 5 +++-- python/ray/tests/test_autoscaler.py | 3 +-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/ray/autoscaler/_private/resource_demand_scheduler.py b/python/ray/autoscaler/_private/resource_demand_scheduler.py index 62a71ef3a..545bb7128 100644 --- a/python/ray/autoscaler/_private/resource_demand_scheduler.py +++ b/python/ray/autoscaler/_private/resource_demand_scheduler.py @@ -192,7 +192,8 @@ class ResourceDemandScheduler: resource_demands) logger.info("Resource demands: {}".format(resource_demands)) logger.info("Unfulfilled demands: {}".format(unfulfilled)) - max_to_add = self.max_workers - sum(node_type_counts.values()) + # Add 1 to account for the head node. + max_to_add = self.max_workers + 1 - sum(node_type_counts.values()) if resource_requests: nodes_to_add_based_on_requests = get_nodes_for( self.node_types, node_type_counts, max_to_add, @@ -452,7 +453,7 @@ class ResourceDemandScheduler: # nodes. The remaining will be allocated on new nodes. unfulfilled, node_resources = get_bin_pack_residual( node_resources, bundles, strict_spread=True) - max_to_add = self.max_workers - sum(node_type_counts.values()) + max_to_add = self.max_workers + 1 - sum(node_type_counts.values()) # Allocate new nodes for the remaining bundles that don't fit. to_launch = get_nodes_for( self.node_types, diff --git a/python/ray/tests/test_autoscaler.py b/python/ray/tests/test_autoscaler.py index 7a582cbd2..f99b3791c 100644 --- a/python/ray/tests/test_autoscaler.py +++ b/python/ray/tests/test_autoscaler.py @@ -616,8 +616,7 @@ class AutoscalingTest(unittest.TestCase): def testAggressiveAutoscaling(self): config = SMALL_CLUSTER.copy() config["min_workers"] = 0 - config["max_workers"] = 20 - config["initial_workers"] = 10 + config["max_workers"] = 10 config["idle_timeout_minutes"] = 0 config["upscaling_speed"] = config["max_workers"] config_path = self.write_config(config)