diff --git a/doc/source/api.rst b/doc/source/api.rst index 3c16cc8a8..65e31e5a4 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -57,3 +57,11 @@ The Ray Command Line API .. click:: ray.scripts.scripts:get_head_ip :prog: ray get_head_ip :show-nested: + +.. click:: ray.scripts.scripts:stack + :prog: ray stack + :show-nested: + +.. click:: ray.scripts.scripts:timeline + :prog: ray timeline + :show-nested: diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst index a0fe8a0d4..3ab98ce8d 100644 --- a/doc/source/rllib.rst +++ b/doc/source/rllib.rst @@ -136,4 +136,5 @@ try setting ``OMP_NUM_THREADS=1``. Similarly, check configured system limits wit If you encounter out-of-memory errors, consider setting ``redis_max_memory`` and ``object_store_memory`` in ``ray.init()`` to reduce memory usage. For debugging unexpected hangs or performance problems, you can run ``ray stack`` to dump -the stack traces of all Ray workers on the current node. This requires py-spy to be installed. +the stack traces of all Ray workers on the current node, and ``ray timeline`` to dump +a timeline visualization of tasks to a file. diff --git a/python/ray/rllib/setup-rllib-dev.py b/python/ray/rllib/setup-rllib-dev.py index 3e059ecf4..e31f80490 100755 --- a/python/ray/rllib/setup-rllib-dev.py +++ b/python/ray/rllib/setup-rllib-dev.py @@ -46,6 +46,7 @@ if __name__ == "__main__": do_link("rllib", force=args.yes) do_link("tune", force=args.yes) do_link("autoscaler", force=args.yes) + do_link("scripts", force=args.yes) print("Created links.\n\nIf you run into issues initializing Ray, please " "ensure that your local repo and the installed Ray are in sync " "(pip install -U the latest wheels at " diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py index e27479b07..c9de2c0cb 100644 --- a/python/ray/scripts/scripts.py +++ b/python/ray/scripts/scripts.py @@ -3,10 +3,12 @@ from __future__ import division from __future__ import print_function import click +from datetime import datetime import json import logging import os import subprocess +import sys import ray.services as services from ray.autoscaler.commands import ( @@ -697,6 +699,52 @@ done subprocess.call(COMMAND, shell=True) +@cli.command() +@click.option( + "--redis-address", + required=False, + type=str, + help="Override the redis address to connect to.") +def timeline(redis_address): + if not redis_address: + import psutil + pids = psutil.pids() + redis_addresses = set() + for pid in pids: + try: + proc = psutil.Process(pid) + for arglist in proc.cmdline(): + for arg in arglist.split(" "): + if arg.startswith("--redis-address="): + addr = arg.split("=")[1] + redis_addresses.add(addr) + except psutil.AccessDenied: + pass + except psutil.NoSuchProcess: + pass + if len(redis_addresses) > 1: + logger.info( + "Found multiple active Ray instances: {}. ".format( + redis_addresses) + + "Please specify the one to connect to with --redis-address.") + sys.exit(1) + elif not redis_addresses: + logger.info( + "Could not find any running Ray instance. " + "Please specify the one to connect to with --redis-address.") + sys.exit(1) + redis_address = redis_addresses.pop() + logger.info("Connecting to Ray instance at {}.".format(redis_address)) + ray.init(redis_address=redis_address) + time = datetime.today().strftime("%Y-%m-%d_%H-%M-%S") + filename = "/tmp/ray-timeline-{}.json".format(time) + ray.global_state.chrome_tracing_dump(filename=filename) + size = os.path.getsize(filename) + logger.info("Trace file written to {} ({} bytes).".format(filename, size)) + logger.info( + "You can open this with chrome://tracing in the Chrome browser.") + + cli.add_command(start) cli.add_command(stop) cli.add_command(create_or_update, name="up") @@ -711,6 +759,7 @@ cli.add_command(kill_random_node) cli.add_command(get_head_ip, name="get_head_ip") cli.add_command(get_worker_ips) cli.add_command(stack) +cli.add_command(timeline) def main():