[autoscaler] Should use internal IP for ssh (#2209)

This commit is contained in:
Eric Liang
2018-06-08 01:08:59 -07:00
committed by Richard Liaw
parent 31046f7e06
commit 8da558f5b7
2 changed files with 16 additions and 6 deletions
+4 -2
View File
@@ -489,7 +489,8 @@ class StandardAutoscaler(object):
with_head_node_ip(self.config["worker_start_ray_commands"]),
self.runtime_hash,
redirect_output=not self.verbose_updates,
process_runner=self.process_runner)
process_runner=self.process_runner,
use_internal_ip=True)
updater.start()
self.updaters[node_id] = updater
@@ -515,7 +516,8 @@ class StandardAutoscaler(object):
with_head_node_ip(init_commands),
self.runtime_hash,
redirect_output=not self.verbose_updates,
process_runner=self.process_runner)
process_runner=self.process_runner,
use_internal_ip=True)
updater.start()
self.updaters[node_id] = updater
+12 -4
View File
@@ -39,14 +39,16 @@ class NodeUpdater(object):
setup_cmds,
runtime_hash,
redirect_output=True,
process_runner=subprocess):
process_runner=subprocess,
use_internal_ip=False):
self.daemon = True
self.process_runner = process_runner
self.node_id = node_id
self.use_internal_ip = use_internal_ip
self.provider = get_node_provider(provider_config, cluster_name)
self.ssh_private_key = auth_config["ssh_private_key"]
self.ssh_user = auth_config["ssh_user"]
self.ssh_ip = self.provider.external_ip(node_id)
self.node_id = node_id
self.ssh_ip = self.get_node_ip()
self.file_mounts = {
remote: os.path.expanduser(local)
for remote, local in file_mounts.items()
@@ -65,6 +67,12 @@ class NodeUpdater(object):
self.stdout = sys.stdout
self.stderr = sys.stderr
def get_node_ip(self):
if self.use_internal_ip:
return self.provider.internal_ip(self.node_id)
else:
return self.provider.external_ip(self.node_id)
def run(self):
print("NodeUpdater: Updating {} to {}, logging to {}".format(
self.node_id, self.runtime_hash, self.output_name))
@@ -107,7 +115,7 @@ class NodeUpdater(object):
print(
"NodeUpdater: Waiting for IP of {}...".format(self.node_id),
file=self.stdout)
self.ssh_ip = self.provider.external_ip(self.node_id)
self.ssh_ip = self.get_node_ip()
if self.ssh_ip is not None:
break
time.sleep(10)