diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py index 43e30f590..255ee12dc 100644 --- a/python/ray/scripts/scripts.py +++ b/python/ray/scripts/scripts.py @@ -63,11 +63,13 @@ def cli(): help="the amount of a user-defined custom resource on this node") @click.option("--head", is_flag=True, default=False, help="provide this argument for the head node") +@click.option("--no-ui", is_flag=True, default=False, + help="provide this argument if the UI should not be started") @click.option("--block", is_flag=True, default=False, help="provide this argument to block forever in this command") def start(node_ip_address, redis_address, redis_port, num_redis_shards, object_manager_port, num_workers, num_cpus, num_gpus, - num_custom_resource, head, block): + num_custom_resource, head, no_ui, block): # Note that we redirect stdout and stderr to /dev/null because otherwise # attempts to print may cause exceptions if a process is started inside of # an SSH connection and the SSH connection dies. TODO(rkn): This is a @@ -103,7 +105,8 @@ def start(node_ip_address, redis_address, redis_port, num_redis_shards, num_cpus=num_cpus, num_gpus=num_gpus, num_custom_resource=num_custom_resource, - num_redis_shards=num_redis_shards) + num_redis_shards=num_redis_shards, + include_webui=(not no_ui)) print(address_info) print("\nStarted Ray on this node. You can add additional nodes to " "the cluster by calling\n\n" @@ -128,6 +131,9 @@ def start(node_ip_address, redis_address, redis_port, num_redis_shards, if num_redis_shards is not None: raise Exception("If --head is not passed in, --num-redis-shards " "must not be provided.") + if no_ui: + raise Exception("If --head is not passed in, the --no-ui flag is " + "not relevant.") redis_ip_address, redis_port = redis_address.split(":") # Wait for the Redis server to be started. And throw an exception if we # can't connect to it. diff --git a/python/ray/services.py b/python/ray/services.py index 82ffe4f12..acaa6d62b 100644 --- a/python/ray/services.py +++ b/python/ray/services.py @@ -1084,7 +1084,8 @@ def start_ray_head(address_info=None, num_cpus=None, num_gpus=None, num_custom_resource=None, - num_redis_shards=None): + num_redis_shards=None, + include_webui=True): """Start Ray in local mode. Args: @@ -1119,6 +1120,7 @@ def start_ray_head(address_info=None, num_gpus (int): number of gpus to configure the local scheduler with. num_redis_shards: The number of Redis shards to start in addition to the primary Redis shard. + include_webui: True if the UI should be started and false otherwise. Returns: A dictionary of the address information for the processes that were @@ -1137,7 +1139,7 @@ def start_ray_head(address_info=None, redirect_output=redirect_output, include_global_scheduler=True, include_log_monitor=True, - include_webui=True, + include_webui=include_webui, start_workers_from_local_scheduler=start_workers_from_local_scheduler, num_cpus=num_cpus, num_gpus=num_gpus, diff --git a/test/jenkins_tests/multi_node_docker_test.py b/test/jenkins_tests/multi_node_docker_test.py index 9764e41f0..bf3ba5553 100644 --- a/test/jenkins_tests/multi_node_docker_test.py +++ b/test/jenkins_tests/multi_node_docker_test.py @@ -120,7 +120,8 @@ class DockerRunner(object): "--redis-port=6379", "--num-redis-shards={}".format(num_redis_shards), "--num-cpus={}".format(num_cpus), - "--num-gpus={}".format(num_gpus)]) + "--num-gpus={}".format(num_gpus), + "--no-ui"]) print("Starting head node with command:{}".format(command)) proc = subprocess.Popen(command, @@ -219,9 +220,20 @@ class DockerRunner(object): def stop_ray(self): """Stop the Ray cluster.""" - self._stop_node(self.head_container_id) + success = True + + try: + self._stop_node(self.head_container_id) + except: + success = False + for container_id in self.worker_container_ids: - self._stop_node(container_id) + try: + self._stop_node(container_id) + except: + success = False + + return success def run_test(self, test_script, num_drivers, driver_locations=None): """Run a test script. @@ -320,7 +332,7 @@ if __name__ == "__main__": run_results = d.run_test(args.test_script, args.num_drivers, driver_locations=driver_locations) finally: - d.stop_ray() + successfully_stopped = d.stop_ray() any_failed = False for run_result in run_results: @@ -332,5 +344,8 @@ if __name__ == "__main__": if any_failed: sys.exit(1) + elif not successfully_stopped: + print("There was a failure when attempting to stop the containers.") + sys.exit(1) else: sys.exit(0)