Dmitri/k8s command runner home try again (#12609)

This commit is contained in:
Gekho457
2020-12-08 09:44:22 -08:00
committed by GitHub
parent 2a9079aef9
commit f61bc79a87
2 changed files with 22 additions and 10 deletions
+1 -1
View File
@@ -47,7 +47,7 @@ class StandardAutoscaler:
There are two ways to start an autoscaling cluster: manually by running
`ray start --head --autoscaling-config=/path/to/config.yaml` on a
instance that has permission to launch other instances, or you can also use
`ray create_or_update /path/to/config.yaml` from your laptop, which will
`ray up /path/to/config.yaml` from your laptop, which will
configure the right AWS/Cloud roles automatically.
StandardAutoscaler's `update` method is periodically called by `monitor.py`
@@ -29,8 +29,6 @@ from ray.autoscaler._private.subprocess_output_util import (
from ray.autoscaler._private.cli_logger import cli_logger, cf
from ray.util.debug import log_once
from ray.autoscaler._private.constants import RAY_HOME
logger = logging.getLogger(__name__)
# How long to wait for a node to start, in seconds
@@ -114,6 +112,7 @@ class KubernetesCommandRunner(CommandRunnerInterface):
self.node_id = str(node_id)
self.namespace = namespace
self.kubectl = ["kubectl", "-n", self.namespace]
self._home_cached = None
def run(
self,
@@ -195,7 +194,7 @@ class KubernetesCommandRunner(CommandRunnerInterface):
logger.warning("'rsync_filter' detected but is currently "
"unsupported for k8s.")
if target.startswith("~"):
target = RAY_HOME + target[1:]
target = self._home + target[1:]
try:
flags = "-aqz" if is_rsync_silent() else "-avz"
@@ -211,7 +210,7 @@ class KubernetesCommandRunner(CommandRunnerInterface):
"rsync failed: '{}'. Falling back to 'kubectl cp'".format(e),
UserWarning)
if target.startswith("~"):
target = RAY_HOME + target[1:]
target = self._home + target[1:]
self.process_runner.check_call(self.kubectl + [
"cp", source, "{}/{}:{}".format(self.namespace, self.node_id,
@@ -219,8 +218,8 @@ class KubernetesCommandRunner(CommandRunnerInterface):
])
def run_rsync_down(self, source, target, options=None):
if target.startswith("~"):
target = RAY_HOME + target[1:]
if source.startswith("~"):
source = self._home + source[1:]
try:
flags = "-aqz" if is_rsync_silent() else "-avz"
@@ -236,7 +235,7 @@ class KubernetesCommandRunner(CommandRunnerInterface):
"rsync failed: '{}'. Falling back to 'kubectl cp'".format(e),
UserWarning)
if target.startswith("~"):
target = RAY_HOME + target[1:]
target = self._home + target[1:]
self.process_runner.check_call(self.kubectl + [
"cp", "{}/{}:{}".format(self.namespace, self.node_id, source),
@@ -244,8 +243,21 @@ class KubernetesCommandRunner(CommandRunnerInterface):
])
def remote_shell_command_str(self):
return "{} exec -it {} bash".format(" ".join(self.kubectl),
self.node_id)
return "{} exec -it {} -- bash".format(" ".join(self.kubectl),
self.node_id)
@property
def _home(self):
# TODO (Dmitri): Think about how to use the node's HOME variable
# without making an extra kubectl exec call.
if self._home_cached is None:
cmd = self.kubectl + [
"exec", "-it", self.node_id, "--", "printenv", "HOME"
]
joined_cmd = " ".join(cmd)
raw_out = self.process_runner.check_output(joined_cmd, shell=True)
self._home_cached = raw_out.decode().strip("\n\r")
return self._home_cached
class SSHOptions: