mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 23:08:32 +08:00
[docker] Revert to rsync & cp instead of file mount for bootstrap config/key (#10734)
This commit is contained in:
committed by
Barak Michener
parent
0592dcacba
commit
049b7b2017
@@ -709,6 +709,10 @@ class DockerCommandRunner(CommandRunnerInterface):
|
||||
return string
|
||||
|
||||
def run_init(self, *, as_head, file_mounts):
|
||||
BOOTSTRAP_MOUNTS = [
|
||||
"~/ray_bootstrap_config.yaml", "~/ray_bootstrap_key.pem"
|
||||
]
|
||||
|
||||
image = self.docker_config.get("image")
|
||||
image = self.docker_config.get(
|
||||
f"{'head' if as_head else 'worker'}_image", image)
|
||||
@@ -720,8 +724,14 @@ class DockerCommandRunner(CommandRunnerInterface):
|
||||
|
||||
self.run("docker pull {}".format(image), run_env="host")
|
||||
|
||||
# Bootstrap files cannot be bind mounted because docker opens the
|
||||
# underlying inode. When the file is switched, docker becomes outdated.
|
||||
cleaned_bind_mounts = file_mounts.copy()
|
||||
for mnt in BOOTSTRAP_MOUNTS:
|
||||
cleaned_bind_mounts.pop(mnt, None)
|
||||
|
||||
start_command = docker_start_cmds(
|
||||
self.ssh_command_runner.ssh_user, image, file_mounts,
|
||||
self.ssh_command_runner.ssh_user, image, cleaned_bind_mounts,
|
||||
self.container_name,
|
||||
self.docker_config.get("run_options", []) + self.docker_config.get(
|
||||
f"{'head' if as_head else 'worker'}_run_options", []))
|
||||
@@ -746,7 +756,8 @@ class DockerCommandRunner(CommandRunnerInterface):
|
||||
active_remote_mounts = [
|
||||
mnt["Destination"] for mnt in active_mounts
|
||||
]
|
||||
for remote, local in file_mounts.items():
|
||||
# Ignore ray bootstrap files.
|
||||
for remote, local in cleaned_bind_mounts.items():
|
||||
remote = self._docker_expand_user(remote)
|
||||
if remote not in active_remote_mounts:
|
||||
cli_logger.error(
|
||||
@@ -756,4 +767,13 @@ class DockerCommandRunner(CommandRunnerInterface):
|
||||
cli_logger.verbose(
|
||||
"Unable to check if file_mounts specified in the YAML "
|
||||
"differ from those on the running container.")
|
||||
|
||||
# Explicitly copy in ray bootstrap files.
|
||||
for mount in BOOTSTRAP_MOUNTS:
|
||||
if mount in file_mounts:
|
||||
self.ssh_command_runner.run(
|
||||
"docker cp {src} {container}:{dst}".format(
|
||||
src=os.path.join(DOCKER_MOUNT_PREFIX, mount),
|
||||
container=self.container_name,
|
||||
dst=self._docker_expand_user(mount)))
|
||||
self.initialized = True
|
||||
|
||||
@@ -54,15 +54,17 @@ class MockProcessRunner:
|
||||
def check_output(self, cmd):
|
||||
self.check_call(cmd)
|
||||
return_string = "command-output"
|
||||
key_to_delete = None
|
||||
for pattern, pair in self.call_response.items():
|
||||
key_to_shrink = None
|
||||
for pattern, response_list in self.call_response.items():
|
||||
if pattern in str(cmd):
|
||||
return_string = pair[0]
|
||||
if pair[1] - 1 == 0:
|
||||
key_to_delete = pattern
|
||||
return_string = response_list[0]
|
||||
key_to_shrink = pattern
|
||||
break
|
||||
if key_to_delete:
|
||||
del self.call_response[key_to_delete]
|
||||
if key_to_shrink:
|
||||
self.call_response[key_to_shrink] = self.call_response[
|
||||
key_to_shrink][1:]
|
||||
if len(self.call_response[key_to_shrink]) == 0:
|
||||
del self.call_response[key_to_shrink]
|
||||
|
||||
return return_string.encode()
|
||||
|
||||
@@ -108,8 +110,8 @@ class MockProcessRunner:
|
||||
def clear_history(self):
|
||||
self.calls = []
|
||||
|
||||
def respond_to_call(self, pattern, response, num_times=1):
|
||||
self.call_response[pattern] = (response, num_times)
|
||||
def respond_to_call(self, pattern, response_list):
|
||||
self.call_response[pattern] = response_list
|
||||
|
||||
|
||||
class MockProvider(NodeProvider):
|
||||
@@ -400,6 +402,10 @@ class AutoscalingTest(unittest.TestCase):
|
||||
config_path = self.write_config(SMALL_CLUSTER)
|
||||
self.provider = MockProvider()
|
||||
runner = MockProcessRunner()
|
||||
runner.respond_to_call("json .Mounts", ["[]"])
|
||||
# Two initial calls to docker cp, one before run, two final calls to cp
|
||||
runner.respond_to_call(".State.Running",
|
||||
["false", "false", "false", "true", "true"])
|
||||
get_or_create_head_node(
|
||||
SMALL_CLUSTER,
|
||||
config_path,
|
||||
@@ -414,6 +420,15 @@ class AutoscalingTest(unittest.TestCase):
|
||||
runner.assert_has_call("1.2.3.4", "head_setup_cmd")
|
||||
runner.assert_has_call("1.2.3.4", "start_ray_head")
|
||||
self.assertEqual(self.provider.mock_nodes[0].node_type, None)
|
||||
runner.assert_has_call("1.2.3.4", pattern="docker run")
|
||||
runner.assert_not_has_call(
|
||||
"1.2.3.4", pattern="-v /tmp/ray_tmp_mount/~/ray_bootstrap_config")
|
||||
runner.assert_has_call(
|
||||
"1.2.3.4",
|
||||
pattern="docker cp /tmp/ray_tmp_mount/~/ray_bootstrap_key.pem")
|
||||
runner.assert_has_call(
|
||||
"1.2.3.4",
|
||||
pattern="docker cp /tmp/ray_tmp_mount/~/ray_bootstrap_config.yaml")
|
||||
|
||||
def testScaleUp(self):
|
||||
config_path = self.write_config(SMALL_CLUSTER)
|
||||
|
||||
@@ -202,7 +202,7 @@ def test_docker_rsync():
|
||||
remote_file = "/root/protected-file"
|
||||
remote_host_file = f"{DOCKER_MOUNT_PREFIX}{remote_file}"
|
||||
|
||||
process_runner.respond_to_call("docker inspect -f", "true")
|
||||
process_runner.respond_to_call("docker inspect -f", ["true"])
|
||||
cmd_runner.run_rsync_up(
|
||||
local_mount, remote_mount, options={"file_mount": True})
|
||||
|
||||
@@ -219,7 +219,7 @@ def test_docker_rsync():
|
||||
process_runner.clear_history()
|
||||
##############################
|
||||
|
||||
process_runner.respond_to_call("docker inspect -f", "true")
|
||||
process_runner.respond_to_call("docker inspect -f", ["true"])
|
||||
cmd_runner.run_rsync_up(
|
||||
local_file, remote_file, options={"file_mount": False})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user