From 0a3ff489fa7d5d2c4feeaeb1257631bb7f8d1ec8 Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Mon, 5 Aug 2019 23:35:09 -0700 Subject: [PATCH] Send raylet error logs through the log monitor (#5351) --- python/ray/log_monitor.py | 8 ++++++-- python/ray/worker.py | 11 +++++++++-- src/ray/util/logging.cc | 24 +++++++++++++++++++++++- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/python/ray/log_monitor.py b/python/ray/log_monitor.py index 421231686..8b6b3547e 100644 --- a/python/ray/log_monitor.py +++ b/python/ray/log_monitor.py @@ -90,10 +90,12 @@ class LogMonitor(object): def update_log_filenames(self): """Update the list of log files to monitor.""" - # we only monior worker log files + # output of user code is written here log_file_paths = glob.glob("{}/worker*[.out|.err]".format( self.logs_dir)) - for file_path in log_file_paths: + # segfaults and other serious errors are logged here + raylet_err_paths = glob.glob("{}/raylet*.err".format(self.logs_dir)) + for file_path in log_file_paths + raylet_err_paths: if os.path.isfile( file_path) and file_path not in self.log_filenames: self.log_filenames.add(file_path) @@ -195,6 +197,8 @@ class LogMonitor(object): file_info.worker_pid = int( lines_to_publish[0].split(" ")[-1]) lines_to_publish = lines_to_publish[1:] + elif "/raylet" in file_info.filename: + file_info.worker_pid = "raylet" # Record the current position in the file. file_info.file_position = file_info.file_handle.tell() diff --git a/python/ray/worker.py b/python/ray/worker.py index 91117aca1..00fad481b 100644 --- a/python/ray/worker.py +++ b/python/ray/worker.py @@ -1594,15 +1594,22 @@ def print_logs(redis_client, threads_stopped): num_consecutive_messages_received += 1 data = json.loads(ray.utils.decode(msg["data"])) + + def color_for(data): + if data["pid"] == "raylet": + return colorama.Fore.YELLOW + else: + return colorama.Fore.CYAN + if data["ip"] == localhost: for line in data["lines"]: print("{}{}(pid={}){} {}".format( - colorama.Style.DIM, colorama.Fore.CYAN, data["pid"], + colorama.Style.DIM, color_for(data), data["pid"], colorama.Style.RESET_ALL, line)) else: for line in data["lines"]: print("{}{}(pid={}, ip={}){} {}".format( - colorama.Style.DIM, colorama.Fore.CYAN, data["pid"], + colorama.Style.DIM, color_for(data), data["pid"], data["ip"], colorama.Style.RESET_ALL, line)) if (num_consecutive_messages_received % 100 == 0 diff --git a/src/ray/util/logging.cc b/src/ray/util/logging.cc index 7311181c1..e57712d84 100644 --- a/src/ray/util/logging.cc +++ b/src/ray/util/logging.cc @@ -11,11 +11,28 @@ #include #ifdef RAY_USE_GLOG +#include #include "glog/logging.h" #endif namespace ray { +#ifdef RAY_USE_GLOG +struct StdoutLogger : public google::base::Logger { + virtual void Write(bool /* should flush */, time_t /* timestamp */, const char *message, + int length) { + // note: always flush otherwise it never shows up in raylet.out + std::cout << std::string(message, length) << std::flush; + } + + virtual void Flush() { std::cout.flush(); } + + virtual google::uint32 LogSize() { return 0; } +}; + +static StdoutLogger stdout_logger_singleton; +#endif + // This is the default implementation of ray log, // which is independent of any libs. class CerrLog { @@ -122,7 +139,12 @@ void RayLog::StartRayLog(const std::string &app_name, RayLogLevel severity_thres #ifdef RAY_USE_GLOG google::InitGoogleLogging(app_name_.c_str()); int mapped_severity_threshold = GetMappedSeverity(severity_threshold_); - google::SetStderrLogging(mapped_severity_threshold); + google::SetStderrLogging(GetMappedSeverity(RayLogLevel::ERROR)); + for (int i = static_cast(severity_threshold_); + i <= static_cast(RayLogLevel::FATAL); ++i) { + int level = GetMappedSeverity(static_cast(i)); + google::base::SetLogger(level, &stdout_logger_singleton); + } // Enable log file if log_dir_ is not empty. if (!log_dir_.empty()) { auto dir_ends_with_slash = log_dir_;