diff --git a/doc/source/index.rst b/doc/source/index.rst index 03bef2cee..402b0ccef 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -43,6 +43,7 @@ Example Program actors.rst using-ray-with-gpus.rst rllib.rst + tune.rst webui.rst .. toctree:: diff --git a/doc/source/ray-tune-tensorboard.png b/doc/source/ray-tune-tensorboard.png new file mode 100644 index 000000000..850172529 Binary files /dev/null and b/doc/source/ray-tune-tensorboard.png differ diff --git a/doc/source/ray-tune-viskit.png b/doc/source/ray-tune-viskit.png new file mode 100644 index 000000000..9c146b5b1 Binary files /dev/null and b/doc/source/ray-tune-viskit.png differ diff --git a/doc/source/tune.rst b/doc/source/tune.rst new file mode 100644 index 000000000..78a4e1e17 --- /dev/null +++ b/doc/source/tune.rst @@ -0,0 +1,160 @@ +Ray.tune: Efficient distributed hyperparameter search +===================================================== + +This document describes Ray.tune, a hyperparameter tuning tool for long-running tasks such as RL and deep learning training. It has the following features: + +- Early stopping algorithms such as `Median Stopping Rule `__ and `HyperBand `__. + +- Integration with visualization tools such as `TensorBoard `__, `rllab's VisKit `__, and a `parallel coordinates visualization `__. + +- Flexible trial variant generation, including grid search, random search, and conditional parameter distributions. + +- Resource-aware scheduling, including support for concurrent runs of algorithms that may themselves be parallel and distributed. + + +Getting Started +--------------- + +:: + + from ray.tune import register_trainable, grid_search, run_experiments + + def my_func(config, reporter): + import time, numpy as np + i = 0 + while True: + reporter(timesteps_total=i, mean_accuracy=i ** config["alpha"]) + i += config["beta"] + time.sleep(.01) + + register_trainable("my_func", my_func) + + run_experiments({ + "my_experiment": { + "run": "my_func", + "resources": { "cpu": 1, "gpu": 0 }, + "stop": { "mean_accuracy": 100 }, + "config": { + "alpha": grid_search([0.2, 0.4, 0.6]), + "beta": grid_search([1, 2]), + }, + } + }) + + +This script runs a small grid search over the ``my_func`` function using ray.tune, reporting status on the command line until the stopping condition of ``mean_accuracy >= 100`` is reached (for metrics like _loss_ that decrease over time, specify `neg_mean_loss `__ as a condition instead): + +:: + + == Status == + Using FIFO scheduling algorithm. + Resources used: 4/8 CPUs, 0/0 GPUs + Tensorboard logdir: /tmp/ray/my_experiment + - my_func_0_alpha=0.2,beta=1: RUNNING [pid=6778], 209 s, 20604 ts, 7.29 acc + - my_func_1_alpha=0.4,beta=1: RUNNING [pid=6780], 208 s, 20522 ts, 53.1 acc + - my_func_2_alpha=0.6,beta=1: TERMINATED [pid=6789], 21 s, 2190 ts, 101 acc + - my_func_3_alpha=0.2,beta=2: RUNNING [pid=6791], 208 s, 41004 ts, 8.37 acc + - my_func_4_alpha=0.4,beta=2: RUNNING [pid=6800], 209 s, 41204 ts, 70.1 acc + - my_func_5_alpha=0.6,beta=2: TERMINATED [pid=6809], 10 s, 2164 ts, 100 acc + +In order to report incremental progress, ``my_func`` periodically calls the ``reporter`` function passed in by Ray.tune to return the current timestep and other metrics as defined in `ray.tune.result.TrainingResult `__. + +Visualizing Results +------------------- + +Ray.tune logs trial results to a unique directory per experiment, e.g. ``/tmp/ray/my_experiment`` in the above example. The log records are compatible with a number of visualization tools: + +To visualize learning in tensorboard, run: + +:: + + $ pip install tensorboard + $ tensorboard --logdir=/tmp/ray/my_experiment + +.. image:: ray-tune-tensorboard.png + +To use rllab's VisKit (you may have to install some dependencies), run: + +:: + + $ git clone https://github.com/rll/rllab.git + $ python rllab/rllab/viskit/frontend.py /tmp/ray/my_experiment + +.. image:: ray-tune-viskit.png + +Finally, to view the results with a `parallel coordinates visualization `__, open `ParalleCoordinatesVisualization.ipynb `__ as follows and run its cells: + +:: + + $ cd $RAY_HOME/python/ray/tune + $ jupyter-notebook ParallelCoordinatesVisualization.ipynb + +Trial Variant Generation +------------------------ + +In the above example, we specified a grid search over two parameters using the ``grid_search`` helper function. Ray.tune also supports sampling parameters from user-specified lambda functions, which can be used in combination with grid search. + +The following shows grid search over two nested parameters combined with random sampling from two lambda functions. Note that the value of ``beta`` depends on the value of ``alpha``, which is represented by referencing ``spec.config.alpha`` in the lambda function. This lets you specify conditional parameter distributions. + +:: + + "config": { + "alpha": lambda spec: np.random.uniform(100), + "beta": lambda spec: spec.config.alpha * np.random.normal(), + "nn_layers": [ + grid_search([16, 64, 256]), + grid_search([16, 64, 256]), + ], + }, + "repeat": 10, + +By default, each random variable and grid search point is sampled once. To take multiple random samples or repeat grid search runs, add ``repeat: N`` to the experiment config. E.g. in the above, ``"repeat": 10`` repeats the 3x3 grid search 10 times, for a total of 90 trials, each with randomly sampled values of ``alpha`` and ``beta``. + +For more information on variant generation, see `variant_generator.py `__. + +Early Stopping +-------------- + +To reduce costs, long-running trials can often be early stopped if their initial performance is not promising. Ray.tune allows early stopping algorithms to be plugged in on top of existing grid or random searches. This can be enabled by setting the ``scheduler`` parameter of ``run_experiments``, e.g. + +:: + + run_experiments({...}, scheduler=MedianStoppingRule()) + +Currently we support the following early stopping algorithms, or you can write your own that implements the `TrialScheduler `__ interface: + +.. autoclass:: ray.tune.median_stopping_rule.MedianStoppingRule +.. autoclass:: ray.tune.hyperband.HyperBandScheduler + +Checkpointing support +--------------------- + +To enable checkpoint / resume, the full ``Trainable`` API must be implemented (though as shown in the examples above, you can get away with just supplying a ``train(config, reporter)`` func if you don't need checkpointing). Implementing this interface is required to support resource multiplexing in schedulers such as HyperBand. For example, all `RLlib agents `__ implement the ``Trainable`` API. + +.. autoclass:: ray.tune.trainable.Trainable + :members: + +Resource Allocation +------------------- + +Ray.tune runs each trial as a Ray actor, allocating the specified GPU and CPU ``resources`` to each actor (defaulting to 1 CPU per trial). A trial will not be scheduled unless at least that amount of resources is available in the cluster, preventing the cluster from being overloaded. + +If your trainable function / class creates further Ray actors or tasks that also consume CPU / GPU resources, you will also want to set ``driver_cpu_limit`` or ``driver_gpu_limit`` to tell Ray not to assign the entire resource reservation to your top-level trainable function, as described in `trial.py `__. + +Command-line JSON/YAML API +-------------------------- + +The JSON config passed to ``run_experiments`` can also be put in a JSON or YAML file, and the experiments run using the ``tune.py`` script. This supports the same functionality as the Python API, e.g.: + +:: + + cd ray/python/tune + ./tune.py -f examples/tune_mnist_ray.yaml --scheduler=MedianStoppingRule + + +For more examples of experiments described by YAML files, see `RLlib tuned examples `__. + +Running in a large cluster +-------------------------- + +The ``run_experiments`` also takes any arguments that ``ray.init()`` does. This can be used to pass in the redis address of a multi-node Ray cluster. For more details, check out the `tune.py script `__. diff --git a/python/ray/tune/README.rst b/python/ray/tune/README.rst index 58477bddb..9c3970264 100644 --- a/python/ray/tune/README.rst +++ b/python/ray/tune/README.rst @@ -1,177 +1,6 @@ -Parallel hyperparameter search with Ray -======================================= +Ray.tune: Efficient distributed hyperparameter search +===================================================== -Using ray.tune with existing training scripts ------------------------------------------------ +Ray.tune is a hyperparameter tuning tool for long-running tasks such as RL and deep learning training. -With only a couple changes, you can adapt any existing script for parallel -hyperparameter search with Ray.tune. - -First, you must define a ``train(config, status_reporter)`` function in your -script. This will be the entry point which Ray will call into. - -.. code:: python - - def train(config, status_reporter): - pass - -Second, you should periodically report training status by passing a -``TrainingResult`` tuple to ``status_reporter.report()``. - -.. code:: python - - from ray.tune.result import TrainingResult - - def train(config, status_reporter): - for step in range(1000): - ... # do an optimization step, etc. - status_reporter.report(TrainingResult( - timesteps_total=step, # required - mean_loss=train_loss, # optional - mean_accuracy=train_accuracy # optional - )) - -You can then launch a hyperparameter tuning run by running ``tune.py``. -For example: - -.. code:: bash - - cd python/ray/tune - ./tune.py -f examples/tune_mnist_ray.yaml - -The YAML or JSON file passed to ``tune.py`` specifies the configuration of the -trials to launch. You can also use ray.tune programmatically, e.g. the above -example also defines a main() using tune APIs that can be run directly: - -.. code:: bash - - python examples/tune_mnist_ray.py - -When run, ``./tune.py`` will schedule the trials on Ray, creating a new local -Ray cluster if an existing cluster address is not specified. Incremental -status will be reported on the command line, and you can also view the reported -metrics using Tensorboard: - -.. code:: text - - == Status == - Resources used: 4/4 CPUs, 0/0 GPUs - Tensorboard logdir: /tmp/ray/tune_mnist - - script_custom_0_activation=relu: RUNNING [pid=27708], 16 s, 20 ts, 0.46 acc - - script_custom_1_activation=elu: RUNNING [pid=27709], 16 s, 20 ts, 0.54 acc - - script_custom_2_activation=tanh: RUNNING [pid=27711], 18 s, 20 ts, 0.74 acc - - script_custom_3_activation=relu: RUNNING [pid=27713], 12 s, 10 ts, 0.22 acc - - script_custom_4_activation=elu: PENDING - - script_custom_5_activation=tanh: PENDING - - script_custom_6_activation=relu: PENDING - - script_custom_7_activation=elu: PENDING - - script_custom_8_activation=tanh: PENDING - - script_custom_9_activation=relu: PENDING - -Note that if your script requires GPUs, you should specify the number of gpus -required per trial in the ``resources`` section. Additionally, Ray should be -initialized with the ``--num-gpus`` argument (you can also pass this argument -to ``tune.py``). - -Specifying search parameters ----------------------------- - -To specify search parameters, variables in the ``config`` section may be set to -different values for each trial. You can either specify ``grid_search: `` -in place of a concrete value to specify a grid search across the list of -values, or ``eval: `` for values to be sampled from the given Python -expression. - -.. code:: yaml - - cartpole-ppo: - env: CartPole-v0 - run: PPO - repeat: 2 - stop: - episode_reward_mean: 200 - time_total_s: 180 - resources: - cpu: 5 - driver_cpu_limit: 1 # of the 5 CPUs, only 1 is used by the driver - config: - num_workers: 4 - timesteps_per_batch: - grid_search: [4000, 40000] - sgd_batchsize: - grid_search: [128, 256, 512] - num_sgd_iter: - eval: spec.config.sgd_batchsize * 2 - lr: - eval: random.uniform(1e-4, 1e-3) - -When using the Python API, the above is equivalent to the following program: - -.. code:: python - - import random - import ray - from ray.tune.result import TrainingResult - from ray.tune.trial_runner import TrialRunner - from ray.tune.variant_generator import grid_search, generate_trials - - runner = TrialRunner() - - spec = { - "env": "CartPole-v0", - "run": "PPO", - "repeat": 2, - "stop": { - "episode_reward_mean": 200, - "time_total_s": 180, - }, - "resources": { - "cpu": 4, - }, - "config": { - "num_workers": 4, - "timesteps_per_batch": grid_search([4000, 40000]), - "sgd_batchsize": grid_search([128, 256, 512]), - "num_sgd_iter": lambda spec: spec.config.sgd_batchsize * 2, - "lr": lambda spec: random.uniform(1e-4, 1e-3), - }, - } - - for trial in generate_trials(spec): - runner.add_trial(trial) - - ray.init() - - while not runner.is_finished(): - runner.step() - print(runner.debug_string()) - -Note that conditional dependencies between variables can be expressed by -variable references, e.g. ``spec.config.sgd_batchsize`` in the above example. -It is also possible to combine grid search and lambda functions by having -a lambda function return a grid search object or vice versa. - -Using ray.tune as a library ---------------------------- - -Ray.tune's Python API allows for finer-grained control over trial setup and -scheduling. Some more examples of calling ray.tune programmatically include: - -- ``python/ray/tune/examples/tune_mnist_ray.py`` (see the main function) -- ``python/ray/rllib/train.py`` -- ``python/ray/rllib/tune.py`` - -Using ray.tune with Ray RLlib ------------------------------ - -Another way to use ray.tune is through RLlib's ``python/ray/rllib/train.py`` -script. This script allows you to select between different RL algorithms with -the ``--run`` option. For example, to train pong with the A3C algorithm, run: - -- ``./train.py --env=PongDeterministic-v4 --run=A3C --stop '{"time_total_s": 3200}' --resources '{"cpu": 8}' --config '{"num_workers": 8}'`` - -or - -- ``./train.py -f tuned_examples/pong-a3c.yaml`` - -You can find more RLlib examples in ``python/ray/rllib/tuned_examples``. +Documentation can be `found here `__. diff --git a/python/ray/tune/hyperband.py b/python/ray/tune/hyperband.py index 9dda79877..e56923890 100644 --- a/python/ray/tune/hyperband.py +++ b/python/ray/tune/hyperband.py @@ -8,32 +8,47 @@ from ray.tune.trial_scheduler import FIFOScheduler, TrialScheduler from ray.tune.trial import Trial +# Implementation notes: +# This implementation contains 3 logical levels. +# Each HyperBand iteration is a "band". There can be multiple +# bands running at once, and there can be 1 band that is incomplete. +# +# In each band, there are at most `s` + 1 brackets. +# `s` is a value determined by given parameters, and assigned on +# a cyclic basis. +# +# In each bracket, there are at most `n(s)` trials, indicating that +# `n` is a function of `s`. These trials go through a series of +# halving procedures, dropping lowest performers. Multiple +# brackets are running at once. +# +# Trials added will be inserted into the most recent bracket +# and band and will spill over to new brackets/bands accordingly. +# +# This maintains the bracket size and max trial count per band +# to 5 and 117 respectively, which correspond to that of +# `max_attr=81, eta=3` from the blog post. Trials will fill up +# from smallest bracket to largest, with largest +# having the most rounds of successive halving. class HyperBandScheduler(FIFOScheduler): - """Implements HyperBand. + """Implements the HyperBand early stopping algorithm. - Blog post: https://people.eecs.berkeley.edu/~kjamieson/hyperband.html + HyperBandScheduler early stops trials using the HyperBand optimization + algorithm. It divides trials into brackets of varying sizes, and + periodically early stops low-performing trials within each bracket. - This implementation contains 3 logical levels. - Each HyperBand iteration is a "band". There can be multiple - bands running at once, and there can be 1 band that is incomplete. + To use this implementation of HyperBand with Ray.tune, all you need + to do is specify the max length of time a trial can run `max_t`, the time + units `time_attr`, and the name of the reported objective value + `reward_attr`. We automatically determine reasonable values for the other + HyperBand parameters based on the given values. - In each band, there are at most `s` + 1 brackets. - `s` is a value determined by given parameters, and assigned on - a cyclic basis. + For example, to limit trials to 10 minutes and early stop based on the + `episode_mean_reward` attr, construct: - In each bracket, there are at most `n(s)` trials, indicating that - `n` is a function of `s`. These trials go through a series of - halving procedures, dropping lowest performers. Multiple - brackets are running at once. + ``HyperBand('time_total_s', 'episode_reward_mean', 600)`` - Trials added will be inserted into the most recent bracket - and band and will spill over to new brackets/bands accordingly. - - This maintains the bracket size and max trial count per band - to 5 and 117 respectively, which correspond to that of - `max_attr=81, eta=3` from the blog post. Trials will fill up - from smallest bracket to largest, with largest - having the most rounds of successive halving. + See also: https://people.eecs.berkeley.edu/~kjamieson/hyperband.html Args: time_attr (str): The TrainingResult attr to use for comparing time. @@ -46,7 +61,7 @@ class HyperBandScheduler(FIFOScheduler): max_t (int): max time units per trial. Trials will be stopped after max_t time units (determined by time_attr) have passed. The HyperBand scheduler automatically tries to determine a - reasonable number of brackets based on this and eta. + reasonable number of brackets based on this. """ def __init__( diff --git a/python/ray/tune/median_stopping_rule.py b/python/ray/tune/median_stopping_rule.py index 8dfc5ddb2..1f3ab6aa0 100644 --- a/python/ray/tune/median_stopping_rule.py +++ b/python/ray/tune/median_stopping_rule.py @@ -11,7 +11,7 @@ from ray.tune.trial_scheduler import FIFOScheduler, TrialScheduler class MedianStoppingRule(FIFOScheduler): """Implements the median stopping rule as described in the Vizier paper: - https://research.google.com/pubs/pub46180.html + https://research.google.com/pubs/pub46180.html Args: time_attr (str): The TrainingResult attr to use for comparing time. @@ -24,7 +24,7 @@ class MedianStoppingRule(FIFOScheduler): grace_period (float): Only stop trials at least this old in time. The units are the same as the attribute named by `time_attr`. min_samples_required (int): Min samples to compute median over. - hard_stop (bool): If false, pauses trials instead of stopping + hard_stop (bool): If False, pauses trials instead of stopping them. When all other trials are complete, paused trials will be resumed and allowed to run FIFO. """ diff --git a/python/ray/tune/result.py b/python/ray/tune/result.py index 08ef62552..0fff3a41c 100644 --- a/python/ray/tune/result.py +++ b/python/ray/tune/result.py @@ -6,7 +6,7 @@ from collections import namedtuple """ When using ray.tune with custom training scripts, you must periodically report -training status back to Ray by calling status_reporter.report(result). +training status back to Ray by calling reporter(result). Most of the fields are optional, the only required one is timesteps_total. diff --git a/python/ray/tune/trainable.py b/python/ray/tune/trainable.py index 66c659449..6274e4898 100644 --- a/python/ray/tune/trainable.py +++ b/python/ray/tune/trainable.py @@ -6,11 +6,11 @@ from __future__ import print_function class Trainable(object): """Interface for trainable models, functions, etc. - Implementing this interface is required to use ray.tune's full + Implementing this interface is required to use Ray.tune's full functionality, though you can also get away with supplying just a `my_train(config, reporter)` function and calling: - register_trainable("my_func", train) + ``register_trainable("my_func", train)`` to register it for use with tune. The function will be automatically converted to this interface (sans checkpoint functionality).""" diff --git a/python/ray/tune/trial_runner.py b/python/ray/tune/trial_runner.py index 557ce8a64..2fc77c9b3 100644 --- a/python/ray/tune/trial_runner.py +++ b/python/ray/tune/trial_runner.py @@ -162,7 +162,11 @@ class TrialRunner(object): try: result = ray.get(result_id) trial.result_logger.on_result(result) - print("result", result) + print("TrainingResult for {}:".format(trial)) + for k, v in result._asdict().items(): + if v is not None: + print(" {}={}".format(k, v)) + print() trial.last_result = result self._total_time += result.time_this_iter_s diff --git a/python/ray/tune/visual_utils.py b/python/ray/tune/visual_utils.py index 561097519..fa3278d5a 100644 --- a/python/ray/tune/visual_utils.py +++ b/python/ray/tune/visual_utils.py @@ -49,18 +49,22 @@ def _parse_configs(cfg_path): def _resolve(directory, result_fname): - resultp = osp.join(directory, result_fname) - res_dict = _parse_results(resultp) - cfgp = osp.join(directory, "config.json") - cfg_dict = _parse_configs(cfgp) - cfg_dict.update(res_dict) - return cfg_dict + try: + resultp = osp.join(directory, result_fname) + res_dict = _parse_results(resultp) + cfgp = osp.join(directory, "params.json") + cfg_dict = _parse_configs(cfgp) + cfg_dict.update(res_dict) + return cfg_dict + except Exception: + return None def load_results_to_df(directory, result_name="result.json"): exp_directories = [dirpath for dirpath, dirs, files in os.walk(directory) for f in files if f == result_name] data = [_resolve(d, result_name) for d in exp_directories] + data = [d for d in data if d] return pd.DataFrame(data)