From 8da558f5b76904fa5f8ec0c36b608bfd30549f53 Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Fri, 8 Jun 2018 01:08:59 -0700 Subject: [PATCH] [autoscaler] Should use internal IP for ssh (#2209) --- python/ray/autoscaler/autoscaler.py | 6 ++++-- python/ray/autoscaler/updater.py | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/python/ray/autoscaler/autoscaler.py b/python/ray/autoscaler/autoscaler.py index b7170caf0..c8b852c4e 100644 --- a/python/ray/autoscaler/autoscaler.py +++ b/python/ray/autoscaler/autoscaler.py @@ -489,7 +489,8 @@ class StandardAutoscaler(object): with_head_node_ip(self.config["worker_start_ray_commands"]), self.runtime_hash, redirect_output=not self.verbose_updates, - process_runner=self.process_runner) + process_runner=self.process_runner, + use_internal_ip=True) updater.start() self.updaters[node_id] = updater @@ -515,7 +516,8 @@ class StandardAutoscaler(object): with_head_node_ip(init_commands), self.runtime_hash, redirect_output=not self.verbose_updates, - process_runner=self.process_runner) + process_runner=self.process_runner, + use_internal_ip=True) updater.start() self.updaters[node_id] = updater diff --git a/python/ray/autoscaler/updater.py b/python/ray/autoscaler/updater.py index 84a7e8494..9972d2424 100644 --- a/python/ray/autoscaler/updater.py +++ b/python/ray/autoscaler/updater.py @@ -39,14 +39,16 @@ class NodeUpdater(object): setup_cmds, runtime_hash, redirect_output=True, - process_runner=subprocess): + process_runner=subprocess, + use_internal_ip=False): self.daemon = True self.process_runner = process_runner + self.node_id = node_id + self.use_internal_ip = use_internal_ip self.provider = get_node_provider(provider_config, cluster_name) self.ssh_private_key = auth_config["ssh_private_key"] self.ssh_user = auth_config["ssh_user"] - self.ssh_ip = self.provider.external_ip(node_id) - self.node_id = node_id + self.ssh_ip = self.get_node_ip() self.file_mounts = { remote: os.path.expanduser(local) for remote, local in file_mounts.items() @@ -65,6 +67,12 @@ class NodeUpdater(object): self.stdout = sys.stdout self.stderr = sys.stderr + def get_node_ip(self): + if self.use_internal_ip: + return self.provider.internal_ip(self.node_id) + else: + return self.provider.external_ip(self.node_id) + def run(self): print("NodeUpdater: Updating {} to {}, logging to {}".format( self.node_id, self.runtime_hash, self.output_name)) @@ -107,7 +115,7 @@ class NodeUpdater(object): print( "NodeUpdater: Waiting for IP of {}...".format(self.node_id), file=self.stdout) - self.ssh_ip = self.provider.external_ip(self.node_id) + self.ssh_ip = self.get_node_ip() if self.ssh_ip is not None: break time.sleep(10)