mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 15:22:56 +08:00
[autoscaler] Should use internal IP for ssh (#2209)
This commit is contained in:
@@ -489,7 +489,8 @@ class StandardAutoscaler(object):
|
||||
with_head_node_ip(self.config["worker_start_ray_commands"]),
|
||||
self.runtime_hash,
|
||||
redirect_output=not self.verbose_updates,
|
||||
process_runner=self.process_runner)
|
||||
process_runner=self.process_runner,
|
||||
use_internal_ip=True)
|
||||
updater.start()
|
||||
self.updaters[node_id] = updater
|
||||
|
||||
@@ -515,7 +516,8 @@ class StandardAutoscaler(object):
|
||||
with_head_node_ip(init_commands),
|
||||
self.runtime_hash,
|
||||
redirect_output=not self.verbose_updates,
|
||||
process_runner=self.process_runner)
|
||||
process_runner=self.process_runner,
|
||||
use_internal_ip=True)
|
||||
updater.start()
|
||||
self.updaters[node_id] = updater
|
||||
|
||||
|
||||
@@ -39,14 +39,16 @@ class NodeUpdater(object):
|
||||
setup_cmds,
|
||||
runtime_hash,
|
||||
redirect_output=True,
|
||||
process_runner=subprocess):
|
||||
process_runner=subprocess,
|
||||
use_internal_ip=False):
|
||||
self.daemon = True
|
||||
self.process_runner = process_runner
|
||||
self.node_id = node_id
|
||||
self.use_internal_ip = use_internal_ip
|
||||
self.provider = get_node_provider(provider_config, cluster_name)
|
||||
self.ssh_private_key = auth_config["ssh_private_key"]
|
||||
self.ssh_user = auth_config["ssh_user"]
|
||||
self.ssh_ip = self.provider.external_ip(node_id)
|
||||
self.node_id = node_id
|
||||
self.ssh_ip = self.get_node_ip()
|
||||
self.file_mounts = {
|
||||
remote: os.path.expanduser(local)
|
||||
for remote, local in file_mounts.items()
|
||||
@@ -65,6 +67,12 @@ class NodeUpdater(object):
|
||||
self.stdout = sys.stdout
|
||||
self.stderr = sys.stderr
|
||||
|
||||
def get_node_ip(self):
|
||||
if self.use_internal_ip:
|
||||
return self.provider.internal_ip(self.node_id)
|
||||
else:
|
||||
return self.provider.external_ip(self.node_id)
|
||||
|
||||
def run(self):
|
||||
print("NodeUpdater: Updating {} to {}, logging to {}".format(
|
||||
self.node_id, self.runtime_hash, self.output_name))
|
||||
@@ -107,7 +115,7 @@ class NodeUpdater(object):
|
||||
print(
|
||||
"NodeUpdater: Waiting for IP of {}...".format(self.node_id),
|
||||
file=self.stdout)
|
||||
self.ssh_ip = self.provider.external_ip(self.node_id)
|
||||
self.ssh_ip = self.get_node_ip()
|
||||
if self.ssh_ip is not None:
|
||||
break
|
||||
time.sleep(10)
|
||||
|
||||
Reference in New Issue
Block a user