diff --git a/doc/source/index.rst b/doc/source/index.rst index 056cf7a8a..ae43afd39 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -70,6 +70,7 @@ Ray comes with libraries that accelerate deep learning and reinforcement learnin :caption: Ray Tune tune.rst + tune-config.rst hyperband.rst pbt.rst diff --git a/doc/source/tune-config.rst b/doc/source/tune-config.rst new file mode 100644 index 000000000..0ed21d9e1 --- /dev/null +++ b/doc/source/tune-config.rst @@ -0,0 +1,81 @@ +Experiment Configuration +======================== + + +Experiment Setup +---------------- + +There are two ways to setup an experiment - one via Python and one via JSON. + +The first is to create an Experiment object. You can then pass in either +a single experiment or a list of experiments to `run_experiments`, as follows: + +.. code-block:: python + + # Single experiment + run_experiments(Experiment(...)) + + # Multiple experiments + run_experiments([Experiment(...), Experiment(...), ...]) + +.. autoclass:: ray.tune.Experiment + +An example of this can be found in `hyperband_example.py `__. + +Alternatively, you can pass in a JSON object. This uses the same fields as +the `ray.tune.Experiment`, except the experiment name is the key of the top level +dictionary. + +.. code-block:: python + + run_experiments({ + "my_experiment_name": { + "run": "my_func", + "resources": { "cpu": 1, "gpu": 0 }, + "stop": { "mean_accuracy": 100 }, + "config": { + "alpha": grid_search([0.2, 0.4, 0.6]), + "beta": grid_search([1, 2]), + }, + "upload_dir": "s3://your_bucket/path", + "local_dir": "~/ray_results", + "max_failures": 2 + } + }) + +An example of this can be found in `async_hyperband_example.py `__. + + +Trial Variant Generation +------------------------ + +In the above example, we specified a grid search over two parameters using the ``grid_search`` helper function. Ray Tune also supports sampling parameters from user-specified lambda functions, which can be used in combination with grid search. + +The following shows grid search over two nested parameters combined with random sampling from two lambda functions. Note that the value of ``beta`` depends on the value of ``alpha``, which is represented by referencing ``spec.config.alpha`` in the lambda function. This lets you specify conditional parameter distributions. + +.. code-block:: python + + "config": { + "alpha": lambda spec: np.random.uniform(100), + "beta": lambda spec: spec.config.alpha * np.random.normal(), + "nn_layers": [ + grid_search([16, 64, 256]), + grid_search([16, 64, 256]), + ], + }, + "repeat": 10, + +By default, each random variable and grid search point is sampled once. To take multiple random samples or repeat grid search runs, add ``repeat: N`` to the experiment config. E.g. in the above, ``"repeat": 10`` repeats the 3x3 grid search 10 times, for a total of 90 trials, each with randomly sampled values of ``alpha`` and ``beta``. + +For more information on variant generation, see `variant_generator.py `__. + + +Resource Allocation +------------------- + +Ray Tune runs each trial as a Ray actor, allocating the specified GPU and CPU ``resources`` to each actor (defaulting to 1 CPU per trial). A trial will not be scheduled unless at least that amount of resources is available in the cluster, preventing the cluster from being overloaded. + +If GPU resources are not requested, the ``CUDA_VISIBLE_DEVICES`` environment variable will be set as empty, disallowing GPU access. +Otherwise, it will be set to a GPU in the list (this is managed by Ray). + +If your trainable function / class creates further Ray actors or tasks that also consume CPU / GPU resources, you will also want to set ``driver_cpu_limit`` or ``driver_gpu_limit`` to tell Ray not to assign the entire resource reservation to your top-level trainable function, as described in `trial.py `__. For example, if a trainable class requires 1 GPU itself, but will launch 4 actors each using another GPU, then it should set ``"gpu": 5, "driver_gpu_limit": 1``. diff --git a/doc/source/tune.rst b/doc/source/tune.rst index 537c0aaf6..b0cdceb86 100644 --- a/doc/source/tune.rst +++ b/doc/source/tune.rst @@ -1,41 +1,17 @@ Ray Tune: Hyperparameter Optimization Framework =============================================== -This document describes Ray Tune, a hyperparameter tuning framework for long-running tasks such as RL and deep learning training. Ray Tune makes it easy to go from running one or more experiments on a single machine to running on a large cluster with efficient search algorithms. - -It has the following features: - -- Scalable implementations of search algorithms such as `Population Based Training (PBT) `__, `Median Stopping Rule `__, and `HyperBand `__. - -- Integration with visualization tools such as `TensorBoard `__, `rllab's VisKit `__, and a `parallel coordinates visualization `__. - -- Flexible trial variant generation, including grid search, random search, and conditional parameter distributions. - -- Resource-aware scheduling, including support for concurrent runs of algorithms that may themselves be parallel and distributed. - - -You can find the code for Ray Tune `here on GitHub `__. - -Concepts --------- - -.. image:: tune-api.svg - -Ray Tune schedules a number of *trials* in a cluster. Each trial runs a user-defined Python function or class and is parameterized by a json *config* variation passed to the user code. - -Ray Tune provides a ``run_experiments(spec)`` function that generates and runs the trials described by the experiment specification. The trials are scheduled and managed by a *trial scheduler* that implements the search algorithm (default is FIFO). - -Ray Tune can be used anywhere Ray can, e.g. on your laptop with ``ray.init()`` embedded in a Python script, or in an `auto-scaling cluster `__ for massive parallelism. +Ray Tune is a hyperparameter optimization framework for long-running tasks such as RL and deep learning training. Ray Tune makes it easy to go from running one or more experiments on a single machine to running on a large cluster with efficient search algorithms. Getting Started --------------- +To use Ray Tune, add a two-line modification to a function: + .. code-block:: python + :emphasize-lines: 1,5 - import ray - from ray.tune import register_trainable, grid_search, run_experiments - - def my_func(config, reporter): + def my_func(config, reporter): # add the reporter parameter import time, numpy as np i = 0 while True: @@ -43,19 +19,21 @@ Getting Started i += config["beta"] time.sleep(.01) - register_trainable("my_func", my_func) +Then, kick off your experiment: +.. code-block:: python + + tune.register_trainable("my_func", my_func) ray.init() - run_experiments({ + + tune.run_experiments({ "my_experiment": { "run": "my_func", - "resources": { "cpu": 1, "gpu": 0 }, "stop": { "mean_accuracy": 100 }, "config": { - "alpha": grid_search([0.2, 0.4, 0.6]), - "beta": grid_search([1, 2]), - }, - "upload_dir": "s3://your_bucket/path", + "alpha": tune.grid_search([0.2, 0.4, 0.6]), + "beta": tune.grid_search([1, 2]), + } } }) @@ -68,20 +46,60 @@ This script runs a small grid search over the ``my_func`` function using Ray Tun Using FIFO scheduling algorithm. Resources used: 4/8 CPUs, 0/0 GPUs Result logdir: ~/ray_results/my_experiment - - my_func_0_alpha=0.2,beta=1: RUNNING [pid=6778], 209 s, 20604 ts, 7.29 acc - - my_func_1_alpha=0.4,beta=1: RUNNING [pid=6780], 208 s, 20522 ts, 53.1 acc - - my_func_2_alpha=0.6,beta=1: TERMINATED [pid=6789], 21 s, 2190 ts, 101 acc - - my_func_3_alpha=0.2,beta=2: RUNNING [pid=6791], 208 s, 41004 ts, 8.37 acc - - my_func_4_alpha=0.4,beta=2: RUNNING [pid=6800], 209 s, 41204 ts, 70.1 acc - - my_func_5_alpha=0.6,beta=2: TERMINATED [pid=6809], 10 s, 2164 ts, 100 acc + - my_func_0_alpha=0.2,beta=1: RUNNING [pid=6778], 209 s, 20604 ts, 7.29 acc + - my_func_1_alpha=0.4,beta=1: RUNNING [pid=6780], 208 s, 20522 ts, 53.1 acc + - my_func_2_alpha=0.6,beta=1: TERMINATED [pid=6789], 21 s, 2190 ts, 101 acc + - my_func_3_alpha=0.2,beta=2: RUNNING [pid=6791], 208 s, 41004 ts, 8.37 acc + - my_func_4_alpha=0.4,beta=2: RUNNING [pid=6800], 209 s, 41204 ts, 70.1 acc + - my_func_5_alpha=0.6,beta=2: TERMINATED [pid=6809], 10 s, 2164 ts, 100 acc + +In order to report incremental progress, ``my_func`` periodically calls the ``reporter`` function passed in by Ray Tune to return the current timestep and other metrics as defined in `ray.tune.result.TrainingResult `__. Incremental results will be synced to local disk on the head node of the cluster. + +Learn more `about specifying experiments `__ . + +Features +-------- + +Ray Tune has the following features: + +- Scalable implementations of search algorithms such as `Population Based Training (PBT) `__, `Median Stopping Rule `__, and `HyperBand `__. + +- Integration with visualization tools such as `TensorBoard `__, `rllab's VisKit `__, and a `parallel coordinates visualization `__. + +- Flexible trial variant generation, including grid search, random search, and conditional parameter distributions. + +- Resource-aware scheduling, including support for concurrent runs of algorithms that may themselves be parallel and distributed. + + +Concepts +-------- + +.. image:: tune-api.svg + +Ray Tune schedules a number of *trials* in a cluster. Each trial runs a user-defined Python function or class and is parameterized by a *config* variation passed to the user code. + +In order to run any given function, you need to run ``register_trainable`` to a name. This makes all Ray workers aware of the function. + +.. autofunction:: ray.tune.register_trainable + +Ray Tune provides a ``run_experiments`` function that generates and runs the trials described by the experiment specification. The trials are scheduled and managed by a *trial scheduler* that implements the search algorithm (default is FIFO). + +.. autofunction:: ray.tune.run_experiments + +Ray Tune can be used anywhere Ray can, e.g. on your laptop with ``ray.init()`` embedded in a Python script, or in an `auto-scaling cluster `__ for massive parallelism. + +You can find the code for Ray Tune `here on GitHub `__. -In order to report incremental progress, ``my_func`` periodically calls the ``reporter`` function passed in by Ray Tune to return the current timestep and other metrics as defined in `ray.tune.result.TrainingResult `__. Incremental results will be synced to local disk on the head node of the cluster and optionally uploaded to the specified ``upload_dir`` (e.g. S3 path). Trial Schedulers ---------------- By default, Ray Tune schedules trials in serial order with the ``FIFOScheduler`` class. However, you can also specify a custom scheduling algorithm that can early stop trials, perturb parameters, or incorporate suggestions from an external service. Currently implemented trial schedulers include `Population Based Training (PBT) `__, `Median Stopping Rule `__, and `HyperBand `__. +.. code-block:: python + + run_experiments({...}, scheduler=AsyncHyperBandScheduler()) + Visualizing Results ------------------- @@ -119,28 +137,6 @@ Finally, to view the results with a `parallel coordinates visualization `__. Trial Checkpointing ------------------- @@ -186,12 +182,6 @@ The class interface that must be implemented to enable checkpointing is as follo .. autoclass:: ray.tune.trainable.Trainable -Resource Allocation -------------------- - -Ray Tune runs each trial as a Ray actor, allocating the specified GPU and CPU ``resources`` to each actor (defaulting to 1 CPU per trial). A trial will not be scheduled unless at least that amount of resources is available in the cluster, preventing the cluster from being overloaded. - -If your trainable function / class creates further Ray actors or tasks that also consume CPU / GPU resources, you will also want to set ``driver_cpu_limit`` or ``driver_gpu_limit`` to tell Ray not to assign the entire resource reservation to your top-level trainable function, as described in `trial.py `__. For example, if a trainable class requires 1 GPU itself, but will launch 4 actors each using another GPU, then it should set ``"gpu": 5, "driver_gpu_limit": 1``. Client API ---------- diff --git a/doc/source/using-ray-with-tensorflow.rst b/doc/source/using-ray-with-tensorflow.rst index ae8d51ac0..8ea9a8b59 100644 --- a/doc/source/using-ray-with-tensorflow.rst +++ b/doc/source/using-ray-with-tensorflow.rst @@ -3,10 +3,9 @@ Using Ray with TensorFlow This document describes best practices for using Ray with TensorFlow. -To see more involved examples using TensorFlow, take a look at `hyperparameter optimization`_, +To see more involved examples using TensorFlow, take a look at `A3C`_, `ResNet`_, `Policy Gradients`_, and `LBFGS`_. -.. _`hyperparameter optimization`: http://ray.readthedocs.io/en/latest/example-hyperopt.html .. _`A3C`: http://ray.readthedocs.io/en/latest/example-a3c.html .. _`ResNet`: http://ray.readthedocs.io/en/latest/example-resnet.html .. _`Policy Gradients`: http://ray.readthedocs.io/en/latest/example-policy-gradient.html diff --git a/python/ray/tune/__init__.py b/python/ray/tune/__init__.py index 0e3a150ca..ec52d9591 100644 --- a/python/ray/tune/__init__.py +++ b/python/ray/tune/__init__.py @@ -3,7 +3,7 @@ from __future__ import division from __future__ import print_function from ray.tune.error import TuneError -from ray.tune.tune import run_experiments +from ray.tune.tune import run_experiments, Experiment from ray.tune.registry import register_env, register_trainable from ray.tune.result import TrainingResult from ray.tune.trainable import Trainable @@ -18,4 +18,5 @@ __all__ = [ "register_env", "register_trainable", "run_experiments", + "Experiment" ] diff --git a/python/ray/tune/examples/hyperband_example.py b/python/ray/tune/examples/hyperband_example.py index 734add55c..896b65b83 100755 --- a/python/ray/tune/examples/hyperband_example.py +++ b/python/ray/tune/examples/hyperband_example.py @@ -13,7 +13,7 @@ import numpy as np import ray from ray.tune import Trainable, TrainingResult, register_trainable, \ - run_experiments + run_experiments, Experiment from ray.tune.hyperband import HyperBandScheduler @@ -62,15 +62,14 @@ if __name__ == "__main__": time_attr="timesteps_total", reward_attr="episode_reward_mean", max_t=100) - run_experiments({ - "hyperband_test": { - "run": "my_class", - "stop": {"training_iteration": 1 if args.smoke_test else 99999}, - "repeat": 20, - "resources": {"cpu": 1, "gpu": 0}, - "config": { - "width": lambda spec: 10 + int(90 * random.random()), - "height": lambda spec: int(100 * random.random()), - }, - } - }, scheduler=hyperband) + exp = Experiment( + name="hyperband_test", + run="my_class", + repeat=20, + stop={"training_iteration": 1 if args.smoke_test else 99999}, + config={ + "width": lambda spec: 10 + int(90 * random.random()), + "height": lambda spec: int(100 * random.random()) + }) + + run_experiments(exp, scheduler=hyperband) diff --git a/python/ray/tune/experiment.py b/python/ray/tune/experiment.py new file mode 100644 index 000000000..0ebfa0aab --- /dev/null +++ b/python/ray/tune/experiment.py @@ -0,0 +1,56 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ray.tune.variant_generator import generate_trials +from ray.tune.result import DEFAULT_RESULTS_DIR + + +class Experiment(object): + """Tracks experiment specifications. + + Parameters: + name (str): Name of experiment. + run (str): The algorithm or model to train. This may refer to the + name of a built-on algorithm (e.g. RLLib's DQN or PPO), or a + user-defined trainable function or class + registered in the tune registry. + stop (dict): The stopping criteria. The keys may be any field in + TrainingResult, whichever is reached first. Defaults to + empty dict. + config (dict): Algorithm-specific configuration + (e.g. env, hyperparams). Defaults to empty dict. + resources (dict): Machine resources to allocate per trial, + e.g. ``{"cpu": 64, "gpu": 8}``. Note that GPUs will not be + assigned unless you specify them here. Defaults to 1 CPU and 0 + GPUs. + repeat (int): Number of times to repeat each trial. Defaults to 1. + local_dir (str): Local dir to save training results to. + Defaults to ``~/ray_results``. + upload_dir (str): Optional URI to sync training results + to (e.g. ``s3://bucket``). + checkpoint_freq (int): How many training iterations between + checkpoints. A value of 0 (default) disables checkpointing. + max_failures (int): Try to recover a trial from its last + checkpoint at least this many times. Only applies if + checkpointing is enabled. Defaults to 3. + """ + def __init__(self, name, run, stop=None, config=None, + resources=None, repeat=1, local_dir=None, + upload_dir="", checkpoint_freq=0, max_failures=3): + spec = { + "run": run, + "stop": stop or {}, + "config": config or {}, + "resources": resources or {"cpu": 1, "gpu": 0}, + "repeat": repeat, + "local_dir": local_dir or DEFAULT_RESULTS_DIR, + "upload_dir": upload_dir, + "checkpoint_freq": checkpoint_freq, + "max_failures": max_failures + } + self._trials = generate_trials(spec, name) + + def trials(self): + for trial in self._trials: + yield trial diff --git a/python/ray/tune/test/trial_runner_test.py b/python/ray/tune/test/trial_runner_test.py index 5b97b8409..cb858403f 100644 --- a/python/ray/tune/test/trial_runner_test.py +++ b/python/ray/tune/test/trial_runner_test.py @@ -13,6 +13,7 @@ from ray.tune import Trainable, TuneError from ray.tune import register_env, register_trainable, run_experiments from ray.tune.registry import _default_registry, TRAINABLE_CLASS from ray.tune.result import DEFAULT_RESULTS_DIR +from ray.tune.experiment import Experiment from ray.tune.trial import Trial, Resources from ray.tune.trial_runner import TrialRunner from ray.tune.variant_generator import generate_trials, grid_search, \ @@ -203,6 +204,79 @@ class TrainableFunctionApiTest(unittest.TestCase): self.assertEqual(trial.last_result.timesteps_total, 99) +class RunExperimentTest(unittest.TestCase): + + def setUp(self): + ray.init() + + def tearDown(self): + ray.worker.cleanup() + _register_all() # re-register the evicted objects + + def testDict(self): + def train(config, reporter): + for i in range(100): + reporter(timesteps_total=i) + register_trainable("f1", train) + trials = run_experiments({ + "foo": { + "run": "f1", + "config": { + "script_min_iter_time_s": 0 + } + }, + "bar": { + "run": "f1", + "config": { + "script_min_iter_time_s": 0 + } + } + }) + for trial in trials: + self.assertEqual(trial.status, Trial.TERMINATED) + self.assertEqual(trial.last_result.timesteps_total, 99) + + def testExperiment(self): + def train(config, reporter): + for i in range(100): + reporter(timesteps_total=i) + register_trainable("f1", train) + exp1 = Experiment(**{ + "name": "foo", + "run": "f1", + "config": { + "script_min_iter_time_s": 0 + } + }) + [trial] = run_experiments(exp1) + self.assertEqual(trial.status, Trial.TERMINATED) + self.assertEqual(trial.last_result.timesteps_total, 99) + + def testExperimentList(self): + def train(config, reporter): + for i in range(100): + reporter(timesteps_total=i) + register_trainable("f1", train) + exp1 = Experiment(**{ + "name": "foo", + "run": "f1", + "config": { + "script_min_iter_time_s": 0 + } + }) + exp2 = Experiment(**{ + "name": "bar", + "run": "f1", + "config": { + "script_min_iter_time_s": 0 + } + }) + trials = run_experiments([exp1, exp2]) + for trial in trials: + self.assertEqual(trial.status, Trial.TERMINATED) + self.assertEqual(trial.last_result.timesteps_total, 99) + + class VariantGeneratorTest(unittest.TestCase): def testParseToTrials(self): trials = generate_trials({ diff --git a/python/ray/tune/tune.py b/python/ray/tune/tune.py index 0683974b1..28e6fba9b 100644 --- a/python/ray/tune/tune.py +++ b/python/ray/tune/tune.py @@ -14,6 +14,7 @@ from ray.tune.trial_runner import TrialRunner from ray.tune.trial_scheduler import FIFOScheduler from ray.tune.web_server import TuneServer from ray.tune.variant_generator import generate_trials +from ray.tune.experiment import Experiment _SCHEDULERS = { @@ -35,6 +36,18 @@ def _make_scheduler(args): def run_experiments(experiments, scheduler=None, with_server=False, server_port=TuneServer.DEFAULT_PORT, verbose=True): + """Tunes experiments. + + Args: + experiments (Experiment | list | dict): Experiments to run. + scheduler (TrialScheduler): Scheduler for executing + the experiment. Choose among FIFO (default), MedianStopping, + AsyncHyperBand, or HyperBand. + with_server (bool): Starts a background Tune server. Needed for + using the Client API. + server_port (int): Port number for launching TuneServer. + verbose (bool): How much output should be printed for each trial. + """ # Make sure rllib agents are registered from ray import rllib # noqa # pylint: disable=unused-import @@ -45,10 +58,22 @@ def run_experiments(experiments, scheduler=None, with_server=False, runner = TrialRunner( scheduler, launch_web_server=with_server, server_port=server_port) - for name, spec in experiments.items(): - for trial in generate_trials(spec, name): + if type(experiments) is dict: + for name, spec in experiments.items(): + for trial in generate_trials(spec, name): + trial.set_verbose(verbose) + runner.add_trial(trial) + elif (type(experiments) is list and + all(isinstance(exp, Experiment) for exp in experiments)): + for experiment in experiments: + for trial in experiment.trials(): + trial.set_verbose(verbose) + runner.add_trial(trial) + elif isinstance(experiments, Experiment): + for trial in experiments.trials(): trial.set_verbose(verbose) runner.add_trial(trial) + print(runner.debug_string(max_debug=99999)) last_debug = 0