diff --git a/.travis.yml b/.travis.yml index d9dd9f228..1c0e2eef5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -202,6 +202,9 @@ script: # ray temp file tests - python -m pytest -v --durations=10 test/tempfile_test.py + # ray debug tools tests + - python -m pytest -v --durations=10 test/debug_tools_test.py + # modin test files - python python/ray/test/test_modin.py diff --git a/.travis/install-dependencies.sh b/.travis/install-dependencies.sh index a81944fc3..a983a6c5e 100755 --- a/.travis/install-dependencies.sh +++ b/.travis/install-dependencies.sh @@ -19,7 +19,7 @@ fi if [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "linux" ]]; then sudo apt-get update - sudo apt-get install -y cmake pkg-config build-essential autoconf curl libtool python-dev python-numpy python-pip unzip + sudo apt-get install -y cmake pkg-config build-essential autoconf curl libtool python-dev python-numpy python-pip unzip tmux gdb # Install miniconda. wget https://repo.continuum.io/miniconda/Miniconda2-4.5.4-Linux-x86_64.sh -O miniconda.sh -nv bash miniconda.sh -b -p $HOME/miniconda @@ -28,7 +28,7 @@ if [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "linux" ]]; then feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler pytest-timeout mock flaky elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then sudo apt-get update - sudo apt-get install -y cmake pkg-config python-dev python-numpy build-essential autoconf curl libtool unzip + sudo apt-get install -y cmake pkg-config python-dev python-numpy build-essential autoconf curl libtool unzip tmux gdb # Install miniconda. wget https://repo.continuum.io/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh -O miniconda.sh -nv bash miniconda.sh -b -p $HOME/miniconda diff --git a/python/ray/services.py b/python/ray/services.py index c9eb23058..ca1cc8c72 100644 --- a/python/ray/services.py +++ b/python/ray/services.py @@ -22,6 +22,7 @@ import ray import ray.ray_constants as ray_constants from ray.tempfile_services import ( + get_gdb_init_path, get_ipython_notebook_path, get_logs_dir_path, get_temp_root, @@ -294,15 +295,18 @@ def start_ray_process(command, if os.environ.get(tmux_env_var) == "1": logger.info("Detected environment variable '%s'.", tmux_env_var) use_tmux = True + gdb_env_var = "RAY_{}_GDB".format(process_type.upper()) + if os.environ.get(gdb_env_var) == "1": + logger.info("Detected environment variable '%s'.", gdb_env_var) + use_gdb = True - if use_gdb: - raise NotImplementedError - if use_tmux: - raise NotImplementedError - if sum([use_valgrind, use_valgrind_profiler, use_perftools_profiler]) > 1: + if sum( + [use_gdb, use_valgrind, use_valgrind_profiler, use_perftools_profiler + ]) > 1: raise ValueError( - "At most one of the 'use_valgrind', 'use_valgrind_profiler', and " - "'use_perftools_profiler' flags can be used at a time.") + "At most one of the 'use_gdb', 'use_valgrind', " + "'use_valgrind_profiler', and 'use_perftools_profiler' flags can " + "be used at a time.") if env_updates is None: env_updates = {} if not isinstance(env_updates, dict): @@ -311,6 +315,18 @@ def start_ray_process(command, modified_env = os.environ.copy() modified_env.update(env_updates) + if use_gdb: + if not use_tmux: + raise ValueError( + "If 'use_gdb' is true, then 'use_tmux' must be true as well.") + gdb_init_path = get_gdb_init_path(process_type) + ray_process_path = command[0] + ray_process_args = command[1:] + run_args = " ".join(["'{}'".format(arg) for arg in ray_process_args]) + with open(gdb_init_path, "w") as gdb_init_file: + gdb_init_file.write("run {}".format(run_args)) + command = ["gdb", ray_process_path, "-x", gdb_init_path] + if use_valgrind: command = [ "valgrind", "--track-origins=yes", "--leak-check=full", @@ -325,6 +341,12 @@ def start_ray_process(command, modified_env["LD_PRELOAD"] = os.environ["PERFTOOLS_PATH"] modified_env["CPUPROFILE"] = os.environ["PERFTOOLS_LOGFILE"] + if use_tmux: + # The command has to be created exactly as below to ensure that it + # works on all versions of tmux. (Tested with tmux 1.8-5, travis' + # version, and tmux 2.1) + command = ["tmux", "new-session", "-d", "{}".format(" ".join(command))] + process = subprocess.Popen( command, env=modified_env, @@ -1205,8 +1227,8 @@ def determine_plasma_store_config(object_store_memory=None, "up space by deleting files in /dev/shm or terminating " "any running plasma_store_server processes. If you are " "inside a Docker container, you may need to pass an " - "argument with the flag '--shm-size' to 'docker run'." - .format(shm_avail)) + "argument with the flag '--shm-size' to 'docker run'.". + format(shm_avail)) else: plasma_directory = "/tmp" @@ -1221,8 +1243,9 @@ def determine_plasma_store_config(object_store_memory=None, "plasma_directory is set.") if not os.path.isdir(plasma_directory): - raise Exception("The file {} does not exist or is not a directory." - .format(plasma_directory)) + raise Exception( + "The file {} does not exist or is not a directory.".format( + plasma_directory)) return object_store_memory, plasma_directory diff --git a/python/ray/tempfile_services.py b/python/ray/tempfile_services.py index 156843b79..66016a4f1 100644 --- a/python/ray/tempfile_services.py +++ b/python/ray/tempfile_services.py @@ -142,6 +142,12 @@ def get_ipython_notebook_path(): return notebook_name +def get_gdb_init_path(process_type): + return make_inc_temp( + prefix="gdb_init_{}".format(process_type), + directory_name=get_temp_root()) + + def new_log_files(name, redirect_output): """Generate partially randomized filenames for log files. diff --git a/test/debug_tools_test.py b/test/debug_tools_test.py new file mode 100644 index 000000000..642b82cb5 --- /dev/null +++ b/test/debug_tools_test.py @@ -0,0 +1,50 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import subprocess +import sys + +import pytest + +import ray + + +@pytest.fixture +def ray_gdb_start(): + # Setup environment and start ray + _environ = os.environ.copy() + for process_name in ["RAYLET", "PLASMA_STORE"]: + os.environ["RAY_{}_GDB".format(process_name)] = "1" + os.environ["RAY_{}_TMUX".format(process_name)] = "1" + + yield None + + # Restore original environment and stop ray + os.environ.clear() + os.environ.update(_environ) + ray.shutdown() + + +@pytest.mark.skipif( + sys.platform != "linux" and sys.platform != "linux2", + reason="This test requires Linux.") +def test_raylet_gdb(ray_gdb_start): + # ray_gdb_start yields the expected process name + ray.init(num_cpus=1) + + @ray.remote + def f(): + return 42 + + assert ray.get(f.remote()) == 42 + + # Check process name in `ps aux | grep gdb` + for process_name in ["raylet/raylet", "plasma/plasma_store_server"]: + pgrep_command = subprocess.Popen( + ["pgrep", "-f", "gdb.*{}".format(process_name)], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + assert pgrep_command.communicate()[0] + subprocess.call(["pkill", "-f", "gdb.*{}".format(process_name)])