mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 15:22:56 +08:00
[tune] Documentation for Ray.tune (#1243)
This commit is contained in:
+4
-175
@@ -1,177 +1,6 @@
|
||||
Parallel hyperparameter search with Ray
|
||||
=======================================
|
||||
Ray.tune: Efficient distributed hyperparameter search
|
||||
=====================================================
|
||||
|
||||
Using ray.tune with existing training scripts
|
||||
-----------------------------------------------
|
||||
Ray.tune is a hyperparameter tuning tool for long-running tasks such as RL and deep learning training.
|
||||
|
||||
With only a couple changes, you can adapt any existing script for parallel
|
||||
hyperparameter search with Ray.tune.
|
||||
|
||||
First, you must define a ``train(config, status_reporter)`` function in your
|
||||
script. This will be the entry point which Ray will call into.
|
||||
|
||||
.. code:: python
|
||||
|
||||
def train(config, status_reporter):
|
||||
pass
|
||||
|
||||
Second, you should periodically report training status by passing a
|
||||
``TrainingResult`` tuple to ``status_reporter.report()``.
|
||||
|
||||
.. code:: python
|
||||
|
||||
from ray.tune.result import TrainingResult
|
||||
|
||||
def train(config, status_reporter):
|
||||
for step in range(1000):
|
||||
... # do an optimization step, etc.
|
||||
status_reporter.report(TrainingResult(
|
||||
timesteps_total=step, # required
|
||||
mean_loss=train_loss, # optional
|
||||
mean_accuracy=train_accuracy # optional
|
||||
))
|
||||
|
||||
You can then launch a hyperparameter tuning run by running ``tune.py``.
|
||||
For example:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
cd python/ray/tune
|
||||
./tune.py -f examples/tune_mnist_ray.yaml
|
||||
|
||||
The YAML or JSON file passed to ``tune.py`` specifies the configuration of the
|
||||
trials to launch. You can also use ray.tune programmatically, e.g. the above
|
||||
example also defines a main() using tune APIs that can be run directly:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
python examples/tune_mnist_ray.py
|
||||
|
||||
When run, ``./tune.py`` will schedule the trials on Ray, creating a new local
|
||||
Ray cluster if an existing cluster address is not specified. Incremental
|
||||
status will be reported on the command line, and you can also view the reported
|
||||
metrics using Tensorboard:
|
||||
|
||||
.. code:: text
|
||||
|
||||
== Status ==
|
||||
Resources used: 4/4 CPUs, 0/0 GPUs
|
||||
Tensorboard logdir: /tmp/ray/tune_mnist
|
||||
- script_custom_0_activation=relu: RUNNING [pid=27708], 16 s, 20 ts, 0.46 acc
|
||||
- script_custom_1_activation=elu: RUNNING [pid=27709], 16 s, 20 ts, 0.54 acc
|
||||
- script_custom_2_activation=tanh: RUNNING [pid=27711], 18 s, 20 ts, 0.74 acc
|
||||
- script_custom_3_activation=relu: RUNNING [pid=27713], 12 s, 10 ts, 0.22 acc
|
||||
- script_custom_4_activation=elu: PENDING
|
||||
- script_custom_5_activation=tanh: PENDING
|
||||
- script_custom_6_activation=relu: PENDING
|
||||
- script_custom_7_activation=elu: PENDING
|
||||
- script_custom_8_activation=tanh: PENDING
|
||||
- script_custom_9_activation=relu: PENDING
|
||||
|
||||
Note that if your script requires GPUs, you should specify the number of gpus
|
||||
required per trial in the ``resources`` section. Additionally, Ray should be
|
||||
initialized with the ``--num-gpus`` argument (you can also pass this argument
|
||||
to ``tune.py``).
|
||||
|
||||
Specifying search parameters
|
||||
----------------------------
|
||||
|
||||
To specify search parameters, variables in the ``config`` section may be set to
|
||||
different values for each trial. You can either specify ``grid_search: <list>``
|
||||
in place of a concrete value to specify a grid search across the list of
|
||||
values, or ``eval: <str>`` for values to be sampled from the given Python
|
||||
expression.
|
||||
|
||||
.. code:: yaml
|
||||
|
||||
cartpole-ppo:
|
||||
env: CartPole-v0
|
||||
run: PPO
|
||||
repeat: 2
|
||||
stop:
|
||||
episode_reward_mean: 200
|
||||
time_total_s: 180
|
||||
resources:
|
||||
cpu: 5
|
||||
driver_cpu_limit: 1 # of the 5 CPUs, only 1 is used by the driver
|
||||
config:
|
||||
num_workers: 4
|
||||
timesteps_per_batch:
|
||||
grid_search: [4000, 40000]
|
||||
sgd_batchsize:
|
||||
grid_search: [128, 256, 512]
|
||||
num_sgd_iter:
|
||||
eval: spec.config.sgd_batchsize * 2
|
||||
lr:
|
||||
eval: random.uniform(1e-4, 1e-3)
|
||||
|
||||
When using the Python API, the above is equivalent to the following program:
|
||||
|
||||
.. code:: python
|
||||
|
||||
import random
|
||||
import ray
|
||||
from ray.tune.result import TrainingResult
|
||||
from ray.tune.trial_runner import TrialRunner
|
||||
from ray.tune.variant_generator import grid_search, generate_trials
|
||||
|
||||
runner = TrialRunner()
|
||||
|
||||
spec = {
|
||||
"env": "CartPole-v0",
|
||||
"run": "PPO",
|
||||
"repeat": 2,
|
||||
"stop": {
|
||||
"episode_reward_mean": 200,
|
||||
"time_total_s": 180,
|
||||
},
|
||||
"resources": {
|
||||
"cpu": 4,
|
||||
},
|
||||
"config": {
|
||||
"num_workers": 4,
|
||||
"timesteps_per_batch": grid_search([4000, 40000]),
|
||||
"sgd_batchsize": grid_search([128, 256, 512]),
|
||||
"num_sgd_iter": lambda spec: spec.config.sgd_batchsize * 2,
|
||||
"lr": lambda spec: random.uniform(1e-4, 1e-3),
|
||||
},
|
||||
}
|
||||
|
||||
for trial in generate_trials(spec):
|
||||
runner.add_trial(trial)
|
||||
|
||||
ray.init()
|
||||
|
||||
while not runner.is_finished():
|
||||
runner.step()
|
||||
print(runner.debug_string())
|
||||
|
||||
Note that conditional dependencies between variables can be expressed by
|
||||
variable references, e.g. ``spec.config.sgd_batchsize`` in the above example.
|
||||
It is also possible to combine grid search and lambda functions by having
|
||||
a lambda function return a grid search object or vice versa.
|
||||
|
||||
Using ray.tune as a library
|
||||
---------------------------
|
||||
|
||||
Ray.tune's Python API allows for finer-grained control over trial setup and
|
||||
scheduling. Some more examples of calling ray.tune programmatically include:
|
||||
|
||||
- ``python/ray/tune/examples/tune_mnist_ray.py`` (see the main function)
|
||||
- ``python/ray/rllib/train.py``
|
||||
- ``python/ray/rllib/tune.py``
|
||||
|
||||
Using ray.tune with Ray RLlib
|
||||
-----------------------------
|
||||
|
||||
Another way to use ray.tune is through RLlib's ``python/ray/rllib/train.py``
|
||||
script. This script allows you to select between different RL algorithms with
|
||||
the ``--run`` option. For example, to train pong with the A3C algorithm, run:
|
||||
|
||||
- ``./train.py --env=PongDeterministic-v4 --run=A3C --stop '{"time_total_s": 3200}' --resources '{"cpu": 8}' --config '{"num_workers": 8}'``
|
||||
|
||||
or
|
||||
|
||||
- ``./train.py -f tuned_examples/pong-a3c.yaml``
|
||||
|
||||
You can find more RLlib examples in ``python/ray/rllib/tuned_examples``.
|
||||
Documentation can be `found here <https://github.com/ray-project/ray/blob/master/doc/source/tune.rst>`__.
|
||||
|
||||
@@ -8,32 +8,47 @@ from ray.tune.trial_scheduler import FIFOScheduler, TrialScheduler
|
||||
from ray.tune.trial import Trial
|
||||
|
||||
|
||||
# Implementation notes:
|
||||
# This implementation contains 3 logical levels.
|
||||
# Each HyperBand iteration is a "band". There can be multiple
|
||||
# bands running at once, and there can be 1 band that is incomplete.
|
||||
#
|
||||
# In each band, there are at most `s` + 1 brackets.
|
||||
# `s` is a value determined by given parameters, and assigned on
|
||||
# a cyclic basis.
|
||||
#
|
||||
# In each bracket, there are at most `n(s)` trials, indicating that
|
||||
# `n` is a function of `s`. These trials go through a series of
|
||||
# halving procedures, dropping lowest performers. Multiple
|
||||
# brackets are running at once.
|
||||
#
|
||||
# Trials added will be inserted into the most recent bracket
|
||||
# and band and will spill over to new brackets/bands accordingly.
|
||||
#
|
||||
# This maintains the bracket size and max trial count per band
|
||||
# to 5 and 117 respectively, which correspond to that of
|
||||
# `max_attr=81, eta=3` from the blog post. Trials will fill up
|
||||
# from smallest bracket to largest, with largest
|
||||
# having the most rounds of successive halving.
|
||||
class HyperBandScheduler(FIFOScheduler):
|
||||
"""Implements HyperBand.
|
||||
"""Implements the HyperBand early stopping algorithm.
|
||||
|
||||
Blog post: https://people.eecs.berkeley.edu/~kjamieson/hyperband.html
|
||||
HyperBandScheduler early stops trials using the HyperBand optimization
|
||||
algorithm. It divides trials into brackets of varying sizes, and
|
||||
periodically early stops low-performing trials within each bracket.
|
||||
|
||||
This implementation contains 3 logical levels.
|
||||
Each HyperBand iteration is a "band". There can be multiple
|
||||
bands running at once, and there can be 1 band that is incomplete.
|
||||
To use this implementation of HyperBand with Ray.tune, all you need
|
||||
to do is specify the max length of time a trial can run `max_t`, the time
|
||||
units `time_attr`, and the name of the reported objective value
|
||||
`reward_attr`. We automatically determine reasonable values for the other
|
||||
HyperBand parameters based on the given values.
|
||||
|
||||
In each band, there are at most `s` + 1 brackets.
|
||||
`s` is a value determined by given parameters, and assigned on
|
||||
a cyclic basis.
|
||||
For example, to limit trials to 10 minutes and early stop based on the
|
||||
`episode_mean_reward` attr, construct:
|
||||
|
||||
In each bracket, there are at most `n(s)` trials, indicating that
|
||||
`n` is a function of `s`. These trials go through a series of
|
||||
halving procedures, dropping lowest performers. Multiple
|
||||
brackets are running at once.
|
||||
``HyperBand('time_total_s', 'episode_reward_mean', 600)``
|
||||
|
||||
Trials added will be inserted into the most recent bracket
|
||||
and band and will spill over to new brackets/bands accordingly.
|
||||
|
||||
This maintains the bracket size and max trial count per band
|
||||
to 5 and 117 respectively, which correspond to that of
|
||||
`max_attr=81, eta=3` from the blog post. Trials will fill up
|
||||
from smallest bracket to largest, with largest
|
||||
having the most rounds of successive halving.
|
||||
See also: https://people.eecs.berkeley.edu/~kjamieson/hyperband.html
|
||||
|
||||
Args:
|
||||
time_attr (str): The TrainingResult attr to use for comparing time.
|
||||
@@ -46,7 +61,7 @@ class HyperBandScheduler(FIFOScheduler):
|
||||
max_t (int): max time units per trial. Trials will be stopped after
|
||||
max_t time units (determined by time_attr) have passed.
|
||||
The HyperBand scheduler automatically tries to determine a
|
||||
reasonable number of brackets based on this and eta.
|
||||
reasonable number of brackets based on this.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
||||
@@ -11,7 +11,7 @@ from ray.tune.trial_scheduler import FIFOScheduler, TrialScheduler
|
||||
class MedianStoppingRule(FIFOScheduler):
|
||||
"""Implements the median stopping rule as described in the Vizier paper:
|
||||
|
||||
https://research.google.com/pubs/pub46180.html
|
||||
https://research.google.com/pubs/pub46180.html
|
||||
|
||||
Args:
|
||||
time_attr (str): The TrainingResult attr to use for comparing time.
|
||||
@@ -24,7 +24,7 @@ class MedianStoppingRule(FIFOScheduler):
|
||||
grace_period (float): Only stop trials at least this old in time.
|
||||
The units are the same as the attribute named by `time_attr`.
|
||||
min_samples_required (int): Min samples to compute median over.
|
||||
hard_stop (bool): If false, pauses trials instead of stopping
|
||||
hard_stop (bool): If False, pauses trials instead of stopping
|
||||
them. When all other trials are complete, paused trials will be
|
||||
resumed and allowed to run FIFO.
|
||||
"""
|
||||
|
||||
@@ -6,7 +6,7 @@ from collections import namedtuple
|
||||
|
||||
"""
|
||||
When using ray.tune with custom training scripts, you must periodically report
|
||||
training status back to Ray by calling status_reporter.report(result).
|
||||
training status back to Ray by calling reporter(result).
|
||||
|
||||
Most of the fields are optional, the only required one is timesteps_total.
|
||||
|
||||
|
||||
@@ -6,11 +6,11 @@ from __future__ import print_function
|
||||
class Trainable(object):
|
||||
"""Interface for trainable models, functions, etc.
|
||||
|
||||
Implementing this interface is required to use ray.tune's full
|
||||
Implementing this interface is required to use Ray.tune's full
|
||||
functionality, though you can also get away with supplying just a
|
||||
`my_train(config, reporter)` function and calling:
|
||||
|
||||
register_trainable("my_func", train)
|
||||
``register_trainable("my_func", train)``
|
||||
|
||||
to register it for use with tune. The function will be automatically
|
||||
converted to this interface (sans checkpoint functionality)."""
|
||||
|
||||
@@ -162,7 +162,11 @@ class TrialRunner(object):
|
||||
try:
|
||||
result = ray.get(result_id)
|
||||
trial.result_logger.on_result(result)
|
||||
print("result", result)
|
||||
print("TrainingResult for {}:".format(trial))
|
||||
for k, v in result._asdict().items():
|
||||
if v is not None:
|
||||
print(" {}={}".format(k, v))
|
||||
print()
|
||||
trial.last_result = result
|
||||
self._total_time += result.time_this_iter_s
|
||||
|
||||
|
||||
@@ -49,18 +49,22 @@ def _parse_configs(cfg_path):
|
||||
|
||||
|
||||
def _resolve(directory, result_fname):
|
||||
resultp = osp.join(directory, result_fname)
|
||||
res_dict = _parse_results(resultp)
|
||||
cfgp = osp.join(directory, "config.json")
|
||||
cfg_dict = _parse_configs(cfgp)
|
||||
cfg_dict.update(res_dict)
|
||||
return cfg_dict
|
||||
try:
|
||||
resultp = osp.join(directory, result_fname)
|
||||
res_dict = _parse_results(resultp)
|
||||
cfgp = osp.join(directory, "params.json")
|
||||
cfg_dict = _parse_configs(cfgp)
|
||||
cfg_dict.update(res_dict)
|
||||
return cfg_dict
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def load_results_to_df(directory, result_name="result.json"):
|
||||
exp_directories = [dirpath for dirpath, dirs, files in os.walk(directory)
|
||||
for f in files if f == result_name]
|
||||
data = [_resolve(d, result_name) for d in exp_directories]
|
||||
data = [d for d in data if d]
|
||||
return pd.DataFrame(data)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user