Stream logs to driver by default. (#3892)

* Stream logs to driver by default.

* Fix from rebase

* Redirect raylet output independently of worker output.

* Fix.

* Create redis client with services.create_redis_client.

* Suppress Redis connection error at exit.

* Remove thread_safe_client from redis.

* Shutdown driver threads in ray.shutdown().

* Add warning for too many log messages.

* Only stop threads if worker is connected.

* Only stop threads if they exist.

* Remove unnecessary try/excepts.

* Fix

* Only add new logging handler once.

* Increase timeout.

* Fix tempfile test.

* Fix logging in cluster_utils.

* Revert "Increase timeout."

This reverts commit b3846b89040bcd8e583b2e18cb513cb040e71d95.

* Retry longer when connecting to plasma store from node manager and object manager.

* Close pubsub channels to avoid leaking file descriptors.

* Limit log monitor open files to 200.

* Increase plasma connect retries.

* Add comment.
This commit is contained in:
Robert Nishihara
2019-02-07 19:53:50 -08:00
committed by Philipp Moritz
parent 0aa74fb1fd
commit ef527f84ab
17 changed files with 511 additions and 344 deletions
+2 -4
View File
@@ -149,7 +149,6 @@ class Node(object):
stdout_file, stderr_file = new_log_monitor_log_file()
process_info = ray.services.start_log_monitor(
self.redis_address,
self._node_ip_address,
stdout_file=stdout_file,
stderr_file=stderr_file,
redis_password=self._ray_params.redis_password)
@@ -189,8 +188,7 @@ class Node(object):
object_store_memory=self._ray_params.object_store_memory,
plasma_directory=self._ray_params.plasma_directory,
huge_pages=self._ray_params.huge_pages,
plasma_store_socket_name=self._plasma_store_socket_name,
redis_password=self._ray_params.redis_password)
plasma_store_socket_name=self._plasma_store_socket_name)
assert (
ray_constants.PROCESS_TYPE_PLASMA_STORE not in self.all_processes)
self.all_processes[ray_constants.PROCESS_TYPE_PLASMA_STORE] = [
@@ -212,7 +210,7 @@ class Node(object):
or get_raylet_socket_name())
self.prepare_socket_file(self._raylet_socket_name)
stdout_file, stderr_file = new_raylet_log_file(
redirect_output=self._ray_params.redirect_worker_output)
redirect_output=self._ray_params.redirect_output)
process_info = ray.services.start_raylet(
self._redis_address,
self._node_ip_address,