From 27e1f513e3cd8cdd43c83a7c1e44f5aaad685109 Mon Sep 17 00:00:00 2001 From: Lingxuan Zuo Date: Mon, 28 Sep 2020 22:15:15 +0800 Subject: [PATCH] [Log] make glog flush and RAY_LOG thread-safe (#11002) * make glog flush and RAY_LOG thread-safe * dump error log to console * mapping all levels to destination * hack glog for exporting message to stdout if no base name given * patch lint * use stdout logger by default * add raylet std/err pytest checker * add worker logs file check * fix asan check * loop in glog enums * fix python lint * lint for autoindent * fix indent lint * make raylet.err is not empty --- bazel/ray_deps_setup.bzl | 1 + python/ray/tests/test_raylet_output.py | 58 ++++++++++++++++++++++++ src/ray/util/logging.cc | 42 ++++++++--------- thirdparty/patches/glog-suffix-log.patch | 13 ++++++ 4 files changed, 92 insertions(+), 22 deletions(-) create mode 100644 python/ray/tests/test_raylet_output.py create mode 100644 thirdparty/patches/glog-suffix-log.patch diff --git a/bazel/ray_deps_setup.bzl b/bazel/ray_deps_setup.bzl index 84198d416..aa05680df 100644 --- a/bazel/ray_deps_setup.bzl +++ b/bazel/ray_deps_setup.bzl @@ -184,6 +184,7 @@ def ray_deps_setup(): patches = [ "//thirdparty/patches:glog-log-pid-tid.patch", "//thirdparty/patches:glog-stack-trace.patch", + "//thirdparty/patches:glog-suffix-log.patch", ], ) diff --git a/python/ray/tests/test_raylet_output.py b/python/ray/tests/test_raylet_output.py new file mode 100644 index 000000000..4a6261225 --- /dev/null +++ b/python/ray/tests/test_raylet_output.py @@ -0,0 +1,58 @@ +import os +import sys +import glob + +import pytest +import ray +from ray.test_utils import ( + wait_for_condition, ) + + +def enable_export_loglevel(func): + # For running in both python and pytest, this decorator makes sure + # log level env parameter will be changed. + # Make raylet emit a log to raylet.err. + os.environ["RAY_BACKEND_LOG_LEVEL"] = "info" + return func + + +@enable_export_loglevel +def test_ray_log_redirected(ray_start_regular): + session_dir = ray.worker._global_node.get_session_dir_path() + assert os.path.exists(session_dir), "Session dir not found." + raylet_out_path = "{}/logs/raylet.out".format(session_dir) + raylet_err_path = "{}/logs/raylet.err".format(session_dir) + + @ray.remote + class Actor: + def __init__(self): + pass + + def get_pid(self): + return os.getpid() + + def file_exists_and_not_empty(filename): + return os.path.exists(filename) and os.path.getsize(filename) > 0 + + actor = Actor.remote() + remote_pid = ray.get(actor.get_pid.remote()) + local_pid = os.getpid() + + wait_for_condition(lambda: all( + map(file_exists_and_not_empty, [raylet_out_path, raylet_err_path]))) + + core_worker_logs = glob.glob("{}/logs/python-core-worker*{}.log".format( + session_dir, remote_pid)) + driver_log = glob.glob("{}/logs/python-core-driver*{}.log".format( + session_dir, local_pid)) + assert len(core_worker_logs) > 0 and len(driver_log) > 0 + all_worker_logs = core_worker_logs + driver_log + wait_for_condition( + lambda: all(map(file_exists_and_not_empty, all_worker_logs))) + + +if __name__ == "__main__": + # Make subprocess happy in bazel. + os.environ["LC_ALL"] = "en_US.UTF-8" + os.environ["LANG"] = "en_US.UTF-8" + sys.exit(pytest.main(["-v", __file__])) diff --git a/src/ray/util/logging.cc b/src/ray/util/logging.cc index 63e82ec65..62dee6bfc 100644 --- a/src/ray/util/logging.cc +++ b/src/ray/util/logging.cc @@ -48,10 +48,8 @@ namespace ray { #ifdef RAY_USE_GLOG -struct StreamLogger : public google::base::Logger { - std::ofstream out_; - - std::ostream &out() { return out_.is_open() ? out_ : std::cout; } +struct StdoutLogger : public google::base::Logger { + std::ostream &out() { return std::cout; } virtual void Write(bool /* should flush */, time_t /* timestamp */, const char *message, int length) { @@ -64,7 +62,7 @@ struct StreamLogger : public google::base::Logger { virtual google::uint32 LogSize() { return 0; } }; -static StreamLogger stream_logger_singleton; +static StdoutLogger stdout_logger_singleton; #endif // This is the default implementation of ray log, @@ -173,6 +171,7 @@ void RayLog::StartRayLog(const std::string &app_name, RayLogLevel severity_thres log_dir_ = log_dir; #ifdef RAY_USE_GLOG google::InitGoogleLogging(app_name_.c_str()); + int level = GetMappedSeverity(static_cast(severity_threshold_)); if (!log_dir_.empty()) { // Enable log file if log_dir_ is not empty. std::string dir_ends_with_slash = log_dir_; @@ -189,23 +188,23 @@ void RayLog::StartRayLog(const std::string &app_name, RayLogLevel severity_thres app_name_without_path = app_file_name; } } - char buffer[80]; - time_t rawtime; - time(&rawtime); -#ifdef _WIN32 - int pid = _getpid(); -#else - pid_t pid = getpid(); -#endif - strftime(buffer, sizeof(buffer), "%Y%m%d-%H%M%S", localtime(&rawtime)); - std::string path = dir_ends_with_slash + app_name_without_path + "." + buffer + "." + - std::to_string(pid) + ".log"; - stream_logger_singleton.out_.rdbuf()->pubsetbuf(0, 0); - stream_logger_singleton.out_.open(path.c_str(), - std::ios_base::app | std::ios_base::binary); + app_name_without_path += "."; + google::SetLogFilenameExtension(app_name_without_path.c_str()); + google::SetLogDestination(level, dir_ends_with_slash.c_str()); + FLAGS_stop_logging_if_full_disk = true; + } else { + // NOTE(lingxuan.zlx): If no specific log dir or empty directory string, + // we use stdout by default. + google::base::SetLogger(level, &stdout_logger_singleton); } - for (int lvl = 0; lvl < NUM_SEVERITIES; ++lvl) { - google::base::SetLogger(lvl, &stream_logger_singleton); + for (int i = GLOG_INFO; i <= GLOG_FATAL; ++i) { + if (i != level) { + // NOTE(lingxuan.zlx): It means nothing can be printed or sinked to pass + // an empty destination. + // Reference from glog: + // https://github.com/google/glog/blob/0a2e5931bd5ff22fd3bf8999eb8ce776f159cda6/src/logging.cc#L1110 + google::SetLogDestination(i, ""); + } } google::SetStderrLogging(GetMappedSeverity(RayLogLevel::ERROR)); #endif @@ -238,7 +237,6 @@ void RayLog::UninstallSignalAction() { } void RayLog::ShutDownRayLog() { - stream_logger_singleton.out_.close(); #ifdef RAY_USE_GLOG UninstallSignalAction(); google::ShutdownGoogleLogging(); diff --git a/thirdparty/patches/glog-suffix-log.patch b/thirdparty/patches/glog-suffix-log.patch new file mode 100644 index 000000000..d3f220f31 --- /dev/null +++ b/thirdparty/patches/glog-suffix-log.patch @@ -0,0 +1,13 @@ +diff --git src/logging.cc src/logging.cc +--- src/logging.cc ++++ src/logging.cc +@@ -1135,7 +1135,8 @@ void LogFileObject::Write(bool force_flush, + << setw(2) << tm_time.tm_min + << setw(2) << tm_time.tm_sec + << '.' +- << GetMainThreadPid(); ++ << GetMainThreadPid() ++ << ".log"; + const string& time_pid_string = time_pid_stream.str(); + + if (base_filename_selected_) {