diff --git a/python/ray/autoscaler/_private/autoscaler.py b/python/ray/autoscaler/_private/autoscaler.py index b918df7f3..ba1fb4fe4 100644 --- a/python/ray/autoscaler/_private/autoscaler.py +++ b/python/ray/autoscaler/_private/autoscaler.py @@ -47,7 +47,7 @@ class StandardAutoscaler: There are two ways to start an autoscaling cluster: manually by running `ray start --head --autoscaling-config=/path/to/config.yaml` on a instance that has permission to launch other instances, or you can also use - `ray create_or_update /path/to/config.yaml` from your laptop, which will + `ray up /path/to/config.yaml` from your laptop, which will configure the right AWS/Cloud roles automatically. StandardAutoscaler's `update` method is periodically called by `monitor.py` diff --git a/python/ray/autoscaler/_private/command_runner.py b/python/ray/autoscaler/_private/command_runner.py index 52ead65cd..075efa377 100644 --- a/python/ray/autoscaler/_private/command_runner.py +++ b/python/ray/autoscaler/_private/command_runner.py @@ -29,8 +29,6 @@ from ray.autoscaler._private.subprocess_output_util import ( from ray.autoscaler._private.cli_logger import cli_logger, cf from ray.util.debug import log_once -from ray.autoscaler._private.constants import RAY_HOME - logger = logging.getLogger(__name__) # How long to wait for a node to start, in seconds @@ -114,6 +112,7 @@ class KubernetesCommandRunner(CommandRunnerInterface): self.node_id = str(node_id) self.namespace = namespace self.kubectl = ["kubectl", "-n", self.namespace] + self._home_cached = None def run( self, @@ -195,7 +194,7 @@ class KubernetesCommandRunner(CommandRunnerInterface): logger.warning("'rsync_filter' detected but is currently " "unsupported for k8s.") if target.startswith("~"): - target = RAY_HOME + target[1:] + target = self._home + target[1:] try: flags = "-aqz" if is_rsync_silent() else "-avz" @@ -211,7 +210,7 @@ class KubernetesCommandRunner(CommandRunnerInterface): "rsync failed: '{}'. Falling back to 'kubectl cp'".format(e), UserWarning) if target.startswith("~"): - target = RAY_HOME + target[1:] + target = self._home + target[1:] self.process_runner.check_call(self.kubectl + [ "cp", source, "{}/{}:{}".format(self.namespace, self.node_id, @@ -219,8 +218,8 @@ class KubernetesCommandRunner(CommandRunnerInterface): ]) def run_rsync_down(self, source, target, options=None): - if target.startswith("~"): - target = RAY_HOME + target[1:] + if source.startswith("~"): + source = self._home + source[1:] try: flags = "-aqz" if is_rsync_silent() else "-avz" @@ -236,7 +235,7 @@ class KubernetesCommandRunner(CommandRunnerInterface): "rsync failed: '{}'. Falling back to 'kubectl cp'".format(e), UserWarning) if target.startswith("~"): - target = RAY_HOME + target[1:] + target = self._home + target[1:] self.process_runner.check_call(self.kubectl + [ "cp", "{}/{}:{}".format(self.namespace, self.node_id, source), @@ -244,8 +243,21 @@ class KubernetesCommandRunner(CommandRunnerInterface): ]) def remote_shell_command_str(self): - return "{} exec -it {} bash".format(" ".join(self.kubectl), - self.node_id) + return "{} exec -it {} -- bash".format(" ".join(self.kubectl), + self.node_id) + + @property + def _home(self): + # TODO (Dmitri): Think about how to use the node's HOME variable + # without making an extra kubectl exec call. + if self._home_cached is None: + cmd = self.kubectl + [ + "exec", "-it", self.node_id, "--", "printenv", "HOME" + ] + joined_cmd = " ".join(cmd) + raw_out = self.process_runner.check_output(joined_cmd, shell=True) + self._home_cached = raw_out.decode().strip("\n\r") + return self._home_cached class SSHOptions: