Change logfile names and also allow plasma store socket to be passed in. (#2862)

This commit is contained in:
Si-Yuan
2018-10-03 10:03:53 -07:00
committed by Robert Nishihara
parent 9c606ea06c
commit cc7e2ecdd5
13 changed files with 696 additions and 140 deletions
+292
View File
@@ -0,0 +1,292 @@
import binascii
import collections
import datetime
import errno
import logging
import os
import shutil
import tempfile
import ray.utils
logger = logging.getLogger(__name__)
_incremental_dict = collections.defaultdict(lambda: 0)
_temp_root = None
def make_inc_temp(suffix="", prefix="", directory_name="/tmp/ray"):
"""Return a incremental temporary file name. The file is not created.
Args:
suffix (str): The suffix of the temp file.
prefix (str): The prefix of the temp file.
directory_name (str) : The base directory of the temp file.
Returns:
A string of file name. If there existing a file having the same name,
the returned name will look like
"{directory_name}/{prefix}.{unique_index}{suffix}"
"""
index = _incremental_dict[suffix, prefix, directory_name]
# `tempfile.TMP_MAX` could be extremely large,
# so using `range` in Python2.x should be avoided.
while index < tempfile.TMP_MAX:
if index == 0:
filename = os.path.join(directory_name, prefix + suffix)
else:
filename = os.path.join(directory_name,
prefix + "." + str(index) + suffix)
index += 1
if not os.path.exists(filename):
_incremental_dict[suffix, prefix,
directory_name] = index # Save the index.
return filename
raise FileExistsError(errno.EEXIST, "No usable temporary filename found")
def try_to_create_directory(directory_path):
"""Attempt to create a directory that is globally readable/writable.
Args:
directory_path: The path of the directory to create.
"""
if not os.path.exists(directory_path):
try:
os.makedirs(directory_path)
except OSError as e:
if e.errno != os.errno.EEXIST:
raise e
logger.warning(
"Attempted to create '{}', but the directory already "
"exists.".format(directory_path))
# Change the log directory permissions so others can use it. This is
# important when multiple people are using the same machine.
os.chmod(directory_path, 0o0777)
def get_temp_root():
"""Get the path of the temporary root. If not existing, it will be created.
"""
global _temp_root
date_str = datetime.datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
# Lazy creation. Avoid creating directories never used.
if _temp_root is None:
_temp_root = make_inc_temp(
prefix="session_{date_str}_{pid}".format(
pid=os.getpid(), date_str=date_str),
directory_name="/tmp/ray")
try_to_create_directory(_temp_root)
return _temp_root
def set_temp_root(path):
"""Set the path of the temporary root. It will be created lazily."""
global _temp_root
_temp_root = path
def get_logs_dir_path():
"""Get a temp dir for logging."""
logs_dir = os.path.join(get_temp_root(), "logs")
try_to_create_directory(logs_dir)
return logs_dir
def get_sockets_dir_path():
"""Get a temp dir for sockets."""
sockets_dir = os.path.join(get_temp_root(), "sockets")
try_to_create_directory(sockets_dir)
return sockets_dir
def get_raylet_socket_name(suffix=""):
"""Get a socket name for raylet."""
sockets_dir = get_sockets_dir_path()
raylet_socket_name = make_inc_temp(
prefix="raylet", directory_name=sockets_dir, suffix=suffix)
return raylet_socket_name
def get_object_store_socket_name():
"""Get a socket name for plasma object store."""
sockets_dir = get_sockets_dir_path()
return make_inc_temp(prefix="plasma_store", directory_name=sockets_dir)
def get_plasma_manager_socket_name():
"""Get a socket name for plasma manager."""
sockets_dir = get_sockets_dir_path()
return make_inc_temp(prefix="plasma_manager", directory_name=sockets_dir)
def get_local_scheduler_socket_name(suffix=""):
"""Get a socket name for local scheduler.
This function could be unsafe. The socket name may
refer to a file that did not exist at some point, but by the time
you get around to creating it, someone else may have beaten you to
the punch.
"""
sockets_dir = get_sockets_dir_path()
raylet_socket_name = make_inc_temp(
prefix="scheduler", directory_name=sockets_dir, suffix=suffix)
return raylet_socket_name
def get_ipython_notebook_path(port):
"""Get a new ipython notebook path"""
notebook_filepath = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "WebUI.ipynb")
# We copy the notebook file so that the original doesn't get modified by
# the user.
notebook_name = make_inc_temp(
suffix=".ipynb", prefix="ray_ui", directory_name=get_temp_root())
new_notebook_filepath = os.path.join(get_logs_dir_path(), notebook_name)
shutil.copy(notebook_filepath, new_notebook_filepath)
new_notebook_directory = os.path.dirname(new_notebook_filepath)
token = ray.utils.decode(binascii.hexlify(os.urandom(24)))
webui_url = ("http://localhost:{}/notebooks/{}?token={}".format(
port, os.path.basename(notebook_name), token))
return new_notebook_directory, webui_url, token
def get_temp_redis_config_path():
"""Get a temp name of the redis config file."""
redis_config_name = make_inc_temp(
prefix="redis_conf", directory_name=get_temp_root())
return redis_config_name
def new_log_files(name, redirect_output):
"""Generate partially randomized filenames for log files.
Args:
name (str): descriptive string for this log file.
redirect_output (bool): True if files should be generated for logging
stdout and stderr and false if stdout and stderr should not be
redirected.
Returns:
If redirect_output is true, this will return a tuple of two
filehandles. The first is for redirecting stdout and the second is
for redirecting stderr. If redirect_output is false, this will
return a tuple of two None objects.
"""
if not redirect_output:
return None, None
# Create a directory to be used for process log files.
logs_dir = get_logs_dir_path()
# Create another directory that will be used by some of the RL algorithms.
# TODO(suquark): This is done by the old code.
# We should be able to control its path later.
try_to_create_directory("/tmp/ray")
log_stdout = make_inc_temp(
suffix=".out", prefix=name, directory_name=logs_dir)
log_stderr = make_inc_temp(
suffix=".err", prefix=name, directory_name=logs_dir)
# Line-buffer the output (mode 1)
log_stdout_file = open(log_stdout, "a", buffering=1)
log_stderr_file = open(log_stderr, "a", buffering=1)
return log_stdout_file, log_stderr_file
def new_redis_log_file(redirect_output, shard_number=None):
"""Create new logging files for redis"""
if shard_number is None:
redis_stdout_file, redis_stderr_file = new_log_files(
"redis", redirect_output)
else:
redis_stdout_file, redis_stderr_file = new_log_files(
"redis-shard_{}".format(shard_number), redirect_output)
return redis_stdout_file, redis_stderr_file
def new_raylet_log_file(local_scheduler_index, redirect_output):
"""Create new logging files for raylet."""
raylet_stdout_file, raylet_stderr_file = new_log_files(
"raylet_{}".format(local_scheduler_index),
redirect_output=redirect_output)
return raylet_stdout_file, raylet_stderr_file
def new_local_scheduler_log_file(local_scheduler_index, redirect_output):
"""Create new logging files for local scheduler.
It is only used in non-raylet versions.
"""
local_scheduler_stdout_file, local_scheduler_stderr_file = (new_log_files(
"local_scheduler_{}".format(local_scheduler_index),
redirect_output=redirect_output))
return local_scheduler_stdout_file, local_scheduler_stderr_file
def new_webui_log_file():
"""Create new logging files for web ui."""
ui_stdout_file, ui_stderr_file = new_log_files(
"webui", redirect_output=True)
return ui_stdout_file, ui_stderr_file
def new_worker_log_file(local_scheduler_index, worker_index, redirect_output):
"""Create new logging files for workers with local scheduler index.
It is only used in non-raylet versions.
"""
worker_stdout_file, worker_stderr_file = new_log_files(
"worker_{}_{}".format(local_scheduler_index, worker_index),
redirect_output)
return worker_stdout_file, worker_stderr_file
def new_worker_redirected_log_file(worker_id):
"""Create new logging files for workers to redirect its output."""
worker_stdout_file, worker_stderr_file = (new_log_files(
"worker-" + ray.utils.binary_to_hex(worker_id), True))
return worker_stdout_file, worker_stderr_file
def new_log_monitor_log_file():
"""Create new logging files for the log monitor."""
log_monitor_stdout_file, log_monitor_stderr_file = new_log_files(
"log_monitor", redirect_output=True)
return log_monitor_stdout_file, log_monitor_stderr_file
def new_global_scheduler_log_file(redirect_output):
"""Create new logging files for the new global scheduler.
It is only used in non-raylet versions.
"""
global_scheduler_stdout_file, global_scheduler_stderr_file = (
new_log_files("global_scheduler", redirect_output))
return global_scheduler_stdout_file, global_scheduler_stderr_file
def new_plasma_store_log_file(local_scheduler_index, redirect_output):
"""Create new logging files for the plasma store."""
plasma_store_stdout_file, plasma_store_stderr_file = new_log_files(
"plasma_store_{}".format(local_scheduler_index), redirect_output)
return plasma_store_stdout_file, plasma_store_stderr_file
def new_plasma_manager_log_file(local_scheduler_index, redirect_output):
"""Create new logging files for the plasma manager."""
plasma_manager_stdout_file, plasma_manager_stderr_file = new_log_files(
"plasma_manager_{}".format(local_scheduler_index), redirect_output)
return plasma_manager_stdout_file, plasma_manager_stderr_file
def new_monitor_log_file(redirect_output):
"""Create new logging files for the monitor."""
monitor_stdout_file, monitor_stderr_file = new_log_files(
"monitor", redirect_output)
return monitor_stdout_file, monitor_stderr_file