mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 18:44:07 +08:00
Fix autoscaler wrong parameter names (#13966)
* prepare for head node * move command runner interface outside _private * remove space * Eric * flake * min_workers in multi node type * fixing edge cases * eric not idle * fix target_workers to consider min_workers of node types * idle timeout * minor * minor fix * test * lint * eric v2 * eric 3 * min_workers constraint before bin packing * Update resource_demand_scheduler.py * Revert "Update resource_demand_scheduler.py" This reverts commit 818a63a2c86d8437b3ef21c5035d701c1d1127b5. * reducing diff * make get_nodes_to_launch return a dict * merge * weird merge fix * auto fill instance types for AWS * Alex/Eric * Update doc/source/cluster/autoscaling.rst * merge autofill and input from user * logger.exception * make the yaml use the default autofill * docs Eric * remove test_autoscaler_yaml from windows tests * lets try changing the test a bit * return test * lets see * edward * Limit max launch concurrency * commenting frac TODO * move to resource demand scheduler * use STATUS UP TO DATE * Eric * make logger of gc freed refs debug instead of info * add cluster name to docker mount prefix directory * grrR * fix tests * moving docker directory to sdk * move the import to prevent circular dependency * smallf fix * ian * fix max launch concurrency bug to assume failing nodes as pending and consider only load_metric's connected nodes as running * small fix * improve code readability * lint Co-authored-by: Ameer Haj Ali <ameerhajali@ameers-mbp.lan> Co-authored-by: Alex Wu <alex@anyscale.io> Co-authored-by: Alex Wu <itswu.alex@gmail.com> Co-authored-by: Eric Liang <ekhliang@gmail.com> Co-authored-by: Ameer Haj Ali <ameerhajali@Ameers-MacBook-Pro.local>
This commit is contained in:
@@ -43,7 +43,7 @@ logger = logging.getLogger(__name__)
|
||||
# that will be passed into a NodeUpdaterThread.
|
||||
UpdateInstructions = namedtuple(
|
||||
"UpdateInstructions",
|
||||
["node_id", "init_commands", "start_ray_commands", "docker_config"])
|
||||
["node_id", "setup_commands", "ray_start_commands", "docker_config"])
|
||||
|
||||
AutoscalerSummary = namedtuple(
|
||||
"AutoscalerSummary",
|
||||
@@ -283,7 +283,7 @@ class StandardAutoscaler:
|
||||
# problems. They should at a minimum be spawned as daemon threads.
|
||||
# See https://github.com/ray-project/ray/pull/5903 for more info.
|
||||
T = []
|
||||
for node_id, commands, ray_start, docker_config in (
|
||||
for node_id, setup_commands, ray_start_commands, docker_config in (
|
||||
self.should_update(node_id) for node_id in nodes):
|
||||
if node_id is not None:
|
||||
resources = self._node_resources(node_id)
|
||||
@@ -291,8 +291,8 @@ class StandardAutoscaler:
|
||||
T.append(
|
||||
threading.Thread(
|
||||
target=self.spawn_updater,
|
||||
args=(node_id, commands, ray_start, resources,
|
||||
docker_config)))
|
||||
args=(node_id, setup_commands, ray_start_commands,
|
||||
resources, docker_config)))
|
||||
for t in T:
|
||||
t.start()
|
||||
for t in T:
|
||||
@@ -633,25 +633,25 @@ class StandardAutoscaler:
|
||||
|
||||
successful_updated = self.num_successful_updates.get(node_id, 0) > 0
|
||||
if successful_updated and self.config.get("restart_only", False):
|
||||
init_commands = []
|
||||
ray_commands = self.config["worker_start_ray_commands"]
|
||||
setup_commands = []
|
||||
ray_start_commands = self.config["worker_start_ray_commands"]
|
||||
elif successful_updated and self.config.get("no_restart", False):
|
||||
init_commands = self._get_node_type_specific_fields(
|
||||
setup_commands = self._get_node_type_specific_fields(
|
||||
node_id, "worker_setup_commands")
|
||||
ray_commands = []
|
||||
ray_start_commands = []
|
||||
else:
|
||||
init_commands = self._get_node_type_specific_fields(
|
||||
setup_commands = self._get_node_type_specific_fields(
|
||||
node_id, "worker_setup_commands")
|
||||
ray_commands = self.config["worker_start_ray_commands"]
|
||||
ray_start_commands = self.config["worker_start_ray_commands"]
|
||||
|
||||
docker_config = self._get_node_specific_docker_config(node_id)
|
||||
return UpdateInstructions(
|
||||
node_id=node_id,
|
||||
init_commands=init_commands,
|
||||
start_ray_commands=ray_commands,
|
||||
setup_commands=setup_commands,
|
||||
ray_start_commands=ray_start_commands,
|
||||
docker_config=docker_config)
|
||||
|
||||
def spawn_updater(self, node_id, init_commands, ray_start_commands,
|
||||
def spawn_updater(self, node_id, setup_commands, ray_start_commands,
|
||||
node_resources, docker_config):
|
||||
logger.info(f"Creating new (spawn_updater) updater thread for node"
|
||||
f" {node_id}.")
|
||||
@@ -665,7 +665,8 @@ class StandardAutoscaler:
|
||||
initialization_commands=with_head_node_ip(
|
||||
self._get_node_type_specific_fields(
|
||||
node_id, "initialization_commands"), self.head_node_ip),
|
||||
setup_commands=with_head_node_ip(init_commands, self.head_node_ip),
|
||||
setup_commands=with_head_node_ip(setup_commands,
|
||||
self.head_node_ip),
|
||||
ray_start_commands=with_head_node_ip(ray_start_commands,
|
||||
self.head_node_ip),
|
||||
runtime_hash=self.runtime_hash,
|
||||
|
||||
Reference in New Issue
Block a user