From acf4d53b55779822873a438bf696aaa59537a1c0 Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Wed, 20 Feb 2019 21:35:33 -0800 Subject: [PATCH] [autoscaler] Fix redirects, fix submit (#4085) --- python/ray/autoscaler/updater.py | 26 ++++++++++++++++++-------- python/ray/rllib/setup-rllib-dev.py | 1 + python/ray/scripts/scripts.py | 11 ++++++++--- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/python/ray/autoscaler/updater.py b/python/ray/autoscaler/updater.py index e3697adba..ef8f34394 100644 --- a/python/ray/autoscaler/updater.py +++ b/python/ray/autoscaler/updater.py @@ -206,12 +206,9 @@ class NodeUpdater(object): m = "{}: Synced {} to {}".format(self.node_id, local_path, remote_path) with LogTimer("NodeUpdater {}".format(m)): - self.ssh_cmd( - "mkdir -p {}".format(os.path.dirname(remote_path)), - redirect=open("/dev/null", "w"), - ) - self.rsync_up( - local_path, remote_path, redirect=open("/dev/null", "w")) + self.ssh_cmd("mkdir -p {}".format( + os.path.dirname(remote_path))) + self.rsync_up(local_path, remote_path) # Run init commands self.provider.set_node_tags(self.node_id, @@ -220,15 +217,19 @@ class NodeUpdater(object): m = "{}: Initialization commands completed".format(self.node_id) with LogTimer("NodeUpdater: {}".format(m)): for cmd in self.initialization_commands: - self.ssh_cmd(cmd, redirect=open("/dev/null", "w")) + self.ssh_cmd(cmd) m = "{}: Setup commands completed".format(self.node_id) with LogTimer("NodeUpdater: {}".format(m)): for cmd in self.setup_commands: - self.ssh_cmd(cmd, redirect=open("/dev/null", "w")) + self.ssh_cmd(cmd) def rsync_up(self, source, target, redirect=None, check_error=True): self.set_ssh_ip_if_required() + if redirect is None: + if logger.getEffectiveLevel() > logging.DEBUG: + redirect = open("/dev/null", "w") + self.get_caller(check_error)( [ "rsync", "-e", @@ -242,6 +243,11 @@ class NodeUpdater(object): def rsync_down(self, source, target, redirect=None, check_error=True): self.set_ssh_ip_if_required() + + if redirect is None: + if logger.getEffectiveLevel() > logging.DEBUG: + redirect = open("/dev/null", "w") + self.get_caller(check_error)( [ "rsync", "-e", @@ -264,6 +270,10 @@ class NodeUpdater(object): self.set_ssh_ip_if_required() + if redirect is None: + if logger.getEffectiveLevel() > logging.DEBUG: + redirect = open("/dev/null", "w") + logger.info("NodeUpdater: Running {} on {}...".format( cmd, self.ssh_ip)) ssh = ["ssh"] diff --git a/python/ray/rllib/setup-rllib-dev.py b/python/ray/rllib/setup-rllib-dev.py index d85f048d5..f50bdffbe 100755 --- a/python/ray/rllib/setup-rllib-dev.py +++ b/python/ray/rllib/setup-rllib-dev.py @@ -38,6 +38,7 @@ if __name__ == "__main__": do_link("rllib") do_link("tune") do_link("autoscaler") + do_link("scripts") print("Created links.\n\nIf you run into issues initializing Ray, please " "ensure that your local repo and the installed Ray are in sync " "(pip install -U the latest wheels at " diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py index 0fa2bf829..731445868 100644 --- a/python/ray/scripts/scripts.py +++ b/python/ray/scripts/scripts.py @@ -573,6 +573,11 @@ def rsync_up(cluster_config_file, source, target, cluster_name): @cli.command() @click.argument("cluster_config_file", required=True, type=str) +@click.option( + "--docker", + is_flag=True, + default=False, + help="Runs command in the docker container specified in cluster_config.") @click.option( "--stop", is_flag=True, @@ -600,8 +605,8 @@ def rsync_up(cluster_config_file, source, target, cluster_name): "--port-forward", required=False, type=int, help="Port to forward.") @click.argument("script", required=True, type=str) @click.argument("script_args", required=False, type=str, nargs=-1) -def submit(cluster_config_file, screen, tmux, stop, start, cluster_name, - port_forward, script, script_args): +def submit(cluster_config_file, docker, screen, tmux, stop, start, + cluster_name, port_forward, script, script_args): """Uploads and runs a script on the specified cluster. The script is automatically synced to the following location: @@ -618,7 +623,7 @@ def submit(cluster_config_file, screen, tmux, stop, start, cluster_name, rsync(cluster_config_file, script, target, cluster_name, down=False) cmd = " ".join(["python", target] + list(script_args)) - exec_cluster(cluster_config_file, cmd, screen, tmux, stop, False, + exec_cluster(cluster_config_file, cmd, docker, screen, tmux, stop, False, cluster_name, port_forward)