diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py index f13f4d8ee..5dbc79ddd 100644 --- a/python/ray/scripts/scripts.py +++ b/python/ray/scripts/scripts.py @@ -153,6 +153,7 @@ def start(node_ip_address, redis_address, redis_port, num_redis_shards, object_store_memory=object_store_memory, num_workers=num_workers, cleanup=False, + redirect_worker_output=True, redirect_output=True, resources=resources, num_redis_shards=num_redis_shards, @@ -222,6 +223,7 @@ def start(node_ip_address, redis_address, redis_port, num_redis_shards, num_workers=num_workers, object_store_memory=object_store_memory, cleanup=False, + redirect_worker_output=True, redirect_output=True, resources=resources, plasma_directory=plasma_directory, diff --git a/python/ray/services.py b/python/ray/services.py index 79f4b22e2..1209047d3 100644 --- a/python/ray/services.py +++ b/python/ray/services.py @@ -1050,6 +1050,7 @@ def start_ray_processes(address_info=None, redis_max_clients=None, worker_path=None, cleanup=True, + redirect_worker_output=False, redirect_output=False, include_global_scheduler=False, include_log_monitor=False, @@ -1090,8 +1091,10 @@ def start_ray_processes(address_info=None, cleanup (bool): If cleanup is true, then the processes started here will be killed by services.cleanup() when the Python process that called this method exits. - redirect_output (bool): True if stdout and stderr should be redirected - to a file. + redirect_worker_output: True if the stdout and stderr of worker + processes should be redirected to files. + redirect_output (bool): True if stdout and stderr for non-worker + processes should be redirected to files and false otherwise. include_global_scheduler (bool): If include_global_scheduler is True, then start a global scheduler process. include_log_monitor (bool): If True, then start a log monitor to @@ -1114,6 +1117,8 @@ def start_ray_processes(address_info=None, A dictionary of the address information for the processes that were started. """ + print("Process STDOUT and STDERR is being redirected to /tmp/raylogs/.") + if resources is None: resources = {} if not isinstance(resources, list): @@ -1149,7 +1154,8 @@ def start_ray_processes(address_info=None, num_redis_shards=num_redis_shards, redis_max_clients=redis_max_clients, redirect_output=True, - redirect_worker_output=redirect_output, cleanup=cleanup) + redirect_worker_output=redirect_worker_output, + cleanup=cleanup) address_info["redis_address"] = redis_address if "RAY_USE_NEW_GCS" in os.environ: credis_address = start_credis( @@ -1247,9 +1253,12 @@ def start_ray_processes(address_info=None, # If we're starting the workers from Python, the local scheduler # should not start any workers. num_local_scheduler_workers = 0 - # Start the local scheduler. + # Start the local scheduler. Note that if we do not wish to redirect + # the worker output, then we cannot redirect the local scheduler + # output. local_scheduler_stdout_file, local_scheduler_stderr_file = ( - new_log_files("local_scheduler_{}".format(i), redirect_output)) + new_log_files("local_scheduler_{}".format(i), + redirect_output=redirect_worker_output)) local_scheduler_name = start_local_scheduler( redis_address, node_ip_address, @@ -1314,6 +1323,7 @@ def start_ray_node(node_ip_address, object_store_memory=None, worker_path=None, cleanup=True, + redirect_worker_output=False, redirect_output=False, resources=None, plasma_directory=None, @@ -1340,8 +1350,10 @@ def start_ray_node(node_ip_address, cleanup (bool): If cleanup is true, then the processes started here will be killed by services.cleanup() when the Python process that called this method exits. - redirect_output (bool): True if stdout and stderr should be redirected - to a file. + redirect_worker_output: True if the stdout and stderr of worker + processes should be redirected to files. + redirect_output (bool): True if stdout and stderr for non-worker + processes should be redirected to files and false otherwise. resources: A dictionary mapping resource name to the available quantity of that resource. plasma_directory: A directory where the Plasma memory mapped files will @@ -1363,6 +1375,7 @@ def start_ray_node(node_ip_address, worker_path=worker_path, include_log_monitor=True, cleanup=cleanup, + redirect_worker_output=redirect_worker_output, redirect_output=redirect_output, resources=resources, plasma_directory=plasma_directory, @@ -1378,6 +1391,7 @@ def start_ray_head(address_info=None, object_store_memory=None, worker_path=None, cleanup=True, + redirect_worker_output=False, redirect_output=False, start_workers_from_local_scheduler=True, resources=None, @@ -1414,8 +1428,10 @@ def start_ray_head(address_info=None, cleanup (bool): If cleanup is true, then the processes started here will be killed by services.cleanup() when the Python process that called this method exits. - redirect_output (bool): True if stdout and stderr should be redirected - to a file. + redirect_worker_output: True if the stdout and stderr of worker + processes should be redirected to files. + redirect_output (bool): True if stdout and stderr for non-worker + processes should be redirected to files and false otherwise. start_workers_from_local_scheduler (bool): If this flag is True, then start the initial workers from the local scheduler. Else, start them from Python. @@ -1447,6 +1463,7 @@ def start_ray_head(address_info=None, object_store_memory=object_store_memory, worker_path=worker_path, cleanup=cleanup, + redirect_worker_output=redirect_worker_output, redirect_output=redirect_output, include_global_scheduler=True, include_log_monitor=True, diff --git a/python/ray/worker.py b/python/ray/worker.py index e7adf3218..a89ad5972 100644 --- a/python/ray/worker.py +++ b/python/ray/worker.py @@ -1267,7 +1267,8 @@ def _init(address_info=None, num_local_schedulers=None, object_store_memory=None, driver_mode=SCRIPT_MODE, - redirect_output=False, + redirect_worker_output=False, + redirect_output=True, start_workers_from_local_scheduler=True, num_cpus=None, num_gpus=None, @@ -1306,8 +1307,10 @@ def _init(address_info=None, object store with. driver_mode (bool): The mode in which to start the driver. This should be one of ray.SCRIPT_MODE, ray.PYTHON_MODE, and ray.SILENT_MODE. - redirect_output (bool): True if stdout and stderr for all the processes - should be redirected to files and false otherwise. + redirect_worker_output: True if the stdout and stderr of worker + processes should be redirected to files. + redirect_output (bool): True if stdout and stderr for non-worker + processes should be redirected to files and false otherwise. start_workers_from_local_scheduler (bool): If this flag is True, then start the initial workers from the local scheduler. Else, start them from Python. The latter case is for debugging purposes only. @@ -1387,6 +1390,7 @@ def _init(address_info=None, num_workers=num_workers, num_local_schedulers=num_local_schedulers, object_store_memory=object_store_memory, + redirect_worker_output=redirect_worker_output, redirect_output=redirect_output, start_workers_from_local_scheduler=( start_workers_from_local_scheduler), @@ -1457,7 +1461,8 @@ def _init(address_info=None, def init(redis_address=None, node_ip_address=None, object_id_seed=None, - num_workers=None, driver_mode=SCRIPT_MODE, redirect_output=False, + num_workers=None, driver_mode=SCRIPT_MODE, + redirect_worker_output=False, redirect_output=True, num_cpus=None, num_gpus=None, resources=None, num_custom_resource=None, num_redis_shards=None, redis_max_clients=None, plasma_directory=None, @@ -1483,8 +1488,10 @@ def init(redis_address=None, node_ip_address=None, object_id_seed=None, provided if redis_address is not provided. driver_mode (bool): The mode in which to start the driver. This should be one of ray.SCRIPT_MODE, ray.PYTHON_MODE, and ray.SILENT_MODE. - redirect_output (bool): True if stdout and stderr for all the processes - should be redirected to files and false otherwise. + redirect_worker_output: True if the stdout and stderr of worker + processes should be redirected to files. + redirect_output (bool): True if stdout and stderr for non-worker + processes should be redirected to files and false otherwise. num_cpus (int): Number of cpus the user wishes all local schedulers to be configured with. num_gpus (int): Number of gpus the user wishes all local schedulers to @@ -1521,6 +1528,7 @@ def init(redis_address=None, node_ip_address=None, object_id_seed=None, "redis_address": redis_address} return _init(address_info=info, start_ray_local=(redis_address is None), num_workers=num_workers, driver_mode=driver_mode, + redirect_worker_output=redirect_worker_output, redirect_output=redirect_output, num_cpus=num_cpus, num_gpus=num_gpus, resources=resources, num_redis_shards=num_redis_shards, diff --git a/src/local_scheduler/local_scheduler_client.cc b/src/local_scheduler/local_scheduler_client.cc index cff4c1e23..eda4f2ff7 100644 --- a/src/local_scheduler/local_scheduler_client.cc +++ b/src/local_scheduler/local_scheduler_client.cc @@ -84,7 +84,7 @@ TaskSpec *local_scheduler_get_task(LocalSchedulerConnection *conn, * scheduler gives this client a task. */ read_message(conn->conn, &type, &reply_size, &reply); if (type == DISCONNECT_CLIENT) { - RAY_LOG(WARNING) << "Exiting because local scheduler closed connection."; + RAY_LOG(DEBUG) << "Exiting because local scheduler closed connection."; exit(1); } RAY_CHECK(type == MessageType_ExecuteTask); diff --git a/test/runtest.py b/test/runtest.py index 6eef49bec..8689b3706 100644 --- a/test/runtest.py +++ b/test/runtest.py @@ -1952,7 +1952,7 @@ class GlobalStateAPI(unittest.TestCase): ray.global_state.object_table(result_id)) def testLogFileAPI(self): - ray.init(redirect_output=True) + ray.init(redirect_worker_output=True) message = "unique message" @@ -2016,7 +2016,7 @@ class GlobalStateAPI(unittest.TestCase): def testWorkers(self): num_workers = 3 ray.init( - redirect_output=True, + redirect_worker_output=True, num_cpus=num_workers, num_workers=num_workers)