diff --git a/python/ray/autoscaler/commands.py b/python/ray/autoscaler/commands.py index be1b5a144..25544c3bd 100644 --- a/python/ray/autoscaler/commands.py +++ b/python/ray/autoscaler/commands.py @@ -379,7 +379,7 @@ def exec_cluster(config_file, cmd, docker, screen, tmux, stop, start, cmd, screen, tmux, - expect_error=stop, + expect_error=True, port_forward=port_forward) if tmux or screen: diff --git a/python/ray/autoscaler/updater.py b/python/ray/autoscaler/updater.py index 9ddf9b56f..f34d4d600 100644 --- a/python/ray/autoscaler/updater.py +++ b/python/ray/autoscaler/updater.py @@ -291,12 +291,20 @@ class NodeUpdater(object): "-L", "{}:localhost:{}".format(port_forward, port_forward) ] - self.get_caller(expect_error)( - ssh + ssh_opt + get_default_ssh_options( - self.ssh_private_key, connect_timeout, self.ssh_control_path) + - ["{}@{}".format(self.ssh_user, self.ssh_ip), cmd], - stdout=redirect or sys.stdout, - stderr=redirect or sys.stderr) + final_cmd = ssh + ssh_opt + get_default_ssh_options( + self.ssh_private_key, connect_timeout, self.ssh_control_path) + [ + "{}@{}".format(self.ssh_user, self.ssh_ip), cmd + ] + + try: + self.get_caller(expect_error)( + final_cmd, + stdout=redirect or sys.stdout, + stderr=redirect or sys.stderr) + except subprocess.CalledProcessError: + logger.error("Command failed: \n\n {}\n".format( + " ".join(final_cmd))) + sys.exit(1) class NodeUpdaterThread(NodeUpdater, Thread):