From 87cbf2aedd94677105b6f82242f703189b28629b Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Sun, 17 May 2020 12:19:44 -0700 Subject: [PATCH] [docs][tune] Make search algorithm, scheduler docs better! (#8179) --- doc/source/index.rst | 2 - doc/source/tune-schedulers.rst | 172 -------- doc/source/tune-searchalg.rst | 368 ------------------ .../tune/_tutorials/tune-60-seconds.rst | 4 +- doc/source/tune/_tutorials/tune-usage.rst | 7 +- doc/source/tune/api_docs/analysis.rst | 11 +- doc/source/tune/api_docs/logging.rst | 161 ++++++-- doc/source/tune/api_docs/schedulers.rst | 162 +++++++- doc/source/tune/api_docs/suggestion.rst | 188 +++++++-- python/ray/tune/examples/hyperopt_example.py | 2 +- python/ray/tune/suggest/ax.py | 15 +- python/ray/tune/suggest/bayesopt.py | 15 +- python/ray/tune/suggest/dragonfly.py | 56 ++- python/ray/tune/suggest/hyperopt.py | 58 +-- python/ray/tune/suggest/nevergrad.py | 37 +- python/ray/tune/suggest/repeater.py | 12 + python/ray/tune/suggest/sigopt.py | 12 +- python/ray/tune/suggest/skopt.py | 34 +- python/ray/tune/suggest/zoopt.py | 34 +- 19 files changed, 630 insertions(+), 720 deletions(-) delete mode 100644 doc/source/tune-schedulers.rst delete mode 100644 doc/source/tune-searchalg.rst diff --git a/doc/source/index.rst b/doc/source/index.rst index e84f60f5a..5c4e04fa9 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -250,8 +250,6 @@ Getting Involved tune.rst Tutorials, Guides, Examples - tune-schedulers.rst - tune-searchalg.rst tune/api_docs/overview.rst tune-contrib.rst diff --git a/doc/source/tune-schedulers.rst b/doc/source/tune-schedulers.rst deleted file mode 100644 index fd91a632a..000000000 --- a/doc/source/tune-schedulers.rst +++ /dev/null @@ -1,172 +0,0 @@ -.. _tune-schedulers: - -Tune Trial Schedulers -===================== - -By default, Tune schedules trials in serial order with the ``FIFOScheduler`` class. However, you can also specify a custom scheduling algorithm that can early stop trials or perturb parameters. - -.. code-block:: python - - tune.run( ... , scheduler=AsyncHyperBandScheduler()) - -Tune includes distributed implementations of early stopping algorithms such as `Median Stopping Rule `__, `HyperBand `__, and an `asynchronous version of HyperBand `__. These algorithms are very resource efficient and can outperform Bayesian Optimization methods in `many cases `__. All schedulers take in a ``metric``, which is a value returned in the result dict of your Trainable and is maximized or minimized according to ``mode``. - -Current Available Trial Schedulers: - -.. contents:: - :local: - :backlinks: none - -.. _tune-scheduler-pbt: - -Population Based Training (PBT) -------------------------------- - -Tune includes a distributed implementation of `Population Based Training (PBT) `__. This can be enabled by setting the ``scheduler`` parameter of ``tune.run``, e.g. - -.. code-block:: python - - pbt_scheduler = PopulationBasedTraining( - time_attr='time_total_s', - metric='mean_accuracy', - mode='max', - perturbation_interval=600.0, - hyperparam_mutations={ - "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5], - "alpha": lambda: random.uniform(0.0, 1.0), - ... - }) - tune.run( ... , scheduler=pbt_scheduler) - -When the PBT scheduler is enabled, each trial variant is treated as a member of the population. Periodically, top-performing trials are checkpointed (this requires your Trainable to support :ref:`save and restore `). Low-performing trials clone the checkpoints of top performers and perturb the configurations in the hope of discovering an even better variation. - -You can run this `toy PBT example `__ to get an idea of how how PBT operates. When training in PBT mode, a single trial may see many different hyperparameters over its lifetime, which is recorded in its ``result.json`` file. The following figure generated by the example shows PBT with optimizing a LR schedule over the course of a single experiment: - -.. image:: pbt.png - -.. autoclass:: ray.tune.schedulers.PopulationBasedTraining - :noindex: - -.. _tune-scheduler-hyperband: - -Asynchronous HyperBand ----------------------- - -The `asynchronous version of HyperBand `__ scheduler can be used by setting the ``scheduler`` parameter of ``tune.run``, e.g. - -.. code-block:: python - - async_hb_scheduler = AsyncHyperBandScheduler( - time_attr='training_iteration', - metric='episode_reward_mean', - mode='max', - max_t=100, - grace_period=10, - reduction_factor=3, - brackets=3) - tune.run( ... , scheduler=async_hb_scheduler) - -Compared to the original version of HyperBand, this implementation provides better parallelism and avoids straggler issues during eliminations. An example of this can be found in `async_hyperband_example.py `__. **We recommend using this over the standard HyperBand scheduler.** - -.. autoclass:: ray.tune.schedulers.AsyncHyperBandScheduler - :noindex: - -HyperBand ---------- - -.. note:: Note that the HyperBand scheduler requires your trainable to support :ref:`saving and restoring `. Checkpointing enables the scheduler to multiplex many concurrent trials onto a limited size cluster. - -Tune also implements the `standard version of HyperBand `__. You can use it as such: - -.. code-block:: python - - tune.run( ... , scheduler=HyperBandScheduler()) - -An example of this can be found in `hyperband_example.py `__. The progress of one such HyperBand run is shown below. - - -:: - - == Status == - Using HyperBand: num_stopped=0 total_brackets=5 - Round #0: - Bracket(n=5, r=100, completed=80%): {'PAUSED': 4, 'PENDING': 1} - Bracket(n=8, r=33, completed=23%): {'PAUSED': 4, 'PENDING': 4} - Bracket(n=15, r=11, completed=4%): {'RUNNING': 2, 'PAUSED': 2, 'PENDING': 11} - Bracket(n=34, r=3, completed=0%): {'RUNNING': 2, 'PENDING': 32} - Bracket(n=81, r=1, completed=0%): {'PENDING': 38} - Resources used: 4/4 CPUs, 0/0 GPUs - Result logdir: ~/ray_results/hyperband_test - PAUSED trials: - - my_class_0_height=99,width=43: PAUSED [pid=11664], 0 s, 100 ts, 97.1 rew - - my_class_11_height=85,width=81: PAUSED [pid=11771], 0 s, 33 ts, 32.8 rew - - my_class_12_height=0,width=52: PAUSED [pid=11785], 0 s, 33 ts, 0 rew - - my_class_19_height=44,width=88: PAUSED [pid=11811], 0 s, 11 ts, 5.47 rew - - my_class_27_height=96,width=84: PAUSED [pid=11840], 0 s, 11 ts, 12.5 rew - ... 5 more not shown - PENDING trials: - - my_class_10_height=12,width=25: PENDING - - my_class_13_height=90,width=45: PENDING - - my_class_14_height=69,width=45: PENDING - - my_class_15_height=41,width=11: PENDING - - my_class_16_height=57,width=69: PENDING - ... 81 more not shown - RUNNING trials: - - my_class_23_height=75,width=51: RUNNING [pid=11843], 0 s, 1 ts, 1.47 rew - - my_class_26_height=16,width=48: RUNNING - - my_class_31_height=40,width=10: RUNNING - - my_class_53_height=28,width=96: RUNNING - -.. autoclass:: ray.tune.schedulers.HyperBandScheduler - :noindex: - - -HyperBand Implementation Details -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Implementation details may deviate slightly from theory but are focused on increasing usability. Note: ``R``, ``s_max``, and ``eta`` are parameters of HyperBand given by the paper. See `this post `_ for context. - -1. Both ``s_max`` (representing the ``number of brackets - 1``) and ``eta``, representing the downsampling rate, are fixed. In many practical settings, ``R``, which represents some resource unit and often the number of training iterations, can be set reasonably large, like ``R >= 200``. For simplicity, assume ``eta = 3``. Varying ``R`` between ``R = 200`` and ``R = 1000`` creates a huge range of the number of trials needed to fill up all brackets. - -.. image:: images/hyperband_bracket.png - -On the other hand, holding ``R`` constant at ``R = 300`` and varying ``eta`` also leads to HyperBand configurations that are not very intuitive: - -.. image:: images/hyperband_eta.png - -The implementation takes the same configuration as the example given in the paper and exposes ``max_t``, which is not a parameter in the paper. - -2. The example in the `post `_ to calculate ``n_0`` is actually a little different than the algorithm given in the paper. In this implementation, we implement ``n_0`` according to the paper (which is `n` in the below example): - -.. image:: images/hyperband_allocation.png - - -3. There are also implementation specific details like how trials are placed into brackets which are not covered in the paper. This implementation places trials within brackets according to smaller bracket first - meaning that with low number of trials, there will be less early stopping. - -HyperBand (BOHB) ----------------- - -.. tip:: This implementation is still experimental. Please report issues on https://github.com/ray-project/ray/issues/. Thanks! - -This class is a variant of HyperBand that enables the BOHB Algorithm. This implementation is true to the original HyperBand implementation and does not implement pipelining nor straggler mitigation. - -This is to be used in conjunction with the Tune BOHB search algorithm. See `TuneBOHB `_ for package requirements, examples, and details. - -An example of this in use can be found in `bohb_example.py `_. - -.. autoclass:: ray.tune.schedulers.HyperBandForBOHB - :noindex: - - -Median Stopping Rule --------------------- - -The Median Stopping Rule implements the simple strategy of stopping a trial if its performance falls below the median of other trials at similar points in time. You can set the ``scheduler`` parameter as such: - -.. code-block:: python - - tune.run( ... , scheduler=MedianStoppingRule()) - -.. autoclass:: ray.tune.schedulers.MedianStoppingRule - :noindex: - diff --git a/doc/source/tune-searchalg.rst b/doc/source/tune-searchalg.rst deleted file mode 100644 index b9d344639..000000000 --- a/doc/source/tune-searchalg.rst +++ /dev/null @@ -1,368 +0,0 @@ -.. _tune-search-alg: - -Tune Search Algorithms -====================== - -Tune provides various hyperparameter search algorithms to efficiently optimize your model. Tune allows you to use different search algorithms in combination with different trial schedulers. Tune will by default implicitly use the Variant Generation algorithm to create trials. - -You can utilize these search algorithms as follows: - -.. code-block:: python - - tune.run(my_function, search_alg=SearchAlgorithm(...)) - -Currently, Tune offers the following search algorithms (and library integrations): - -- `Grid Search and Random Search `__ -- `BayesOpt `__ -- `HyperOpt `__ -- `SigOpt `__ -- `Nevergrad `__ -- `Scikit-Optimize `__ -- `Ax `__ -- `BOHB `__ - - -Variant Generation (Grid Search/Random Search) ----------------------------------------------- - -By default, Tune uses a BasicVariantGenerator to sample trials. This supports random search and grid search as specified by the ``config`` parameter of ``tune.run``. - -.. autoclass:: ray.tune.suggest.BasicVariantGenerator - :show-inheritance: - :noindex: - -Read about this in the :ref:`Grid/Random Search API `. - -Note that other search algorithms will require a different search space declaration than the default Tune format. - - -Repeated Evaluations --------------------- - -Use ``ray.tune.suggest.Repeater`` to average over multiple evaluations of the same -hyperparameter configurations. This is useful in cases where the evaluated -training procedure has high variance (i.e., in reinforcement learning). - -By default, ``Repeater`` will take in a ``repeat`` parameter and a ``search_alg``. -The ``search_alg`` will suggest new configurations to try, and the ``Repeater`` -will run ``repeat`` trials of the configuration. It will then average the -``search_alg.metric`` from the final results of each repeated trial. - -See the API documentation (:ref:`repeater-doc`) for more details. - -.. code-block:: python - - from ray.tune.suggest import Repeater - - search_alg = BayesOpt(...) - re_search_alg = Repeater(search_alg, repeat=10) - - # Repeat 2 samples 10 times each. - tune.run(trainable, num_samples=20, search_alg=re_search_alg) - -.. note:: This does not apply for grid search and random search. -.. warning:: It is recommended to not use ``Repeater`` with a TrialScheduler. - Early termination can negatively affect the average reported metric. - - -BayesOpt Search ---------------- - -The ``BayesOptSearch`` is a SearchAlgorithm that is backed by the `bayesian-optimization `__ package to perform sequential model-based hyperparameter optimization. Note that this class does not extend ``ray.tune.suggest.BasicVariantGenerator``, so you will not be able to use Tune's default variant generation/search space declaration when using BayesOptSearch. - -In order to use this search algorithm, you will need to install Bayesian Optimization via the following command: - -.. code-block:: bash - - $ pip install bayesian-optimization - -This algorithm requires `setting a search space and defining a utility function `__. You can use BayesOptSearch like follows: - -.. code-block:: python - - tune.run(... , search_alg=BayesOptSearch(bayesopt_space, utility_kwargs=utility_params, ... )) - -An example of this can be found in `bayesopt_example.py `__. - -.. autoclass:: ray.tune.suggest.bayesopt.BayesOptSearch - :show-inheritance: - :noindex: - -.. _tune-hyperopt: - -HyperOpt Search (Tree-structured Parzen Estimators) ---------------------------------------------------- - -The ``HyperOptSearch`` is a SearchAlgorithm that is backed by `HyperOpt `__ to perform sequential model-based hyperparameter optimization. Note that this class does not extend ``ray.tune.suggest.BasicVariantGenerator``, so you will not be able to use Tune's default variant generation/search space declaration when using HyperOptSearch. - -In order to use this search algorithm, you will need to install HyperOpt via the following command: - -.. code-block:: bash - - $ pip install --upgrade git+git://github.com/hyperopt/hyperopt.git - -This algorithm requires using the `HyperOpt search space specification `__. You can use HyperOptSearch like follows: - -.. code-block:: python - - tune.run(... , search_alg=HyperOptSearch(hyperopt_space, ... )) - -An example of this can be found in `hyperopt_example.py `__. - -.. autoclass:: ray.tune.suggest.hyperopt.HyperOptSearch - :show-inheritance: - :noindex: - - -SigOpt Search -------------- - -The ``SigOptSearch`` is a SearchAlgorithm that is backed by `SigOpt `__ to perform sequential model-based hyperparameter optimization. Note that this class does not extend ``ray.tune.suggest.BasicVariantGenerator``, so you will not be able to use Tune's default variant generation/search space declaration when using SigOptSearch. - -In order to use this search algorithm, you will need to install SigOpt via the following command: - -.. code-block:: bash - - $ pip install sigopt - -This algorithm requires the user to have a `SigOpt API key `__ to make requests to the API. Store the API token as an environment variable named ``SIGOPT_KEY`` like follows: - -.. code-block:: bash - - $ export SIGOPT_KEY= ... - -This algorithm requires using the `SigOpt experiment and space specification `__. You can use SigOptSearch like follows: - -.. code-block:: python - - tune.run(... , search_alg=SigOptSearch(sigopt_space, ... )) - -An example of this can be found in `sigopt_example.py `__. - -.. autoclass:: ray.tune.suggest.sigopt.SigOptSearch - :show-inheritance: - :noindex: - -.. _tune-nevergrad: - -Nevergrad Search ----------------- - -The ``NevergradSearch`` is a SearchAlgorithm that is backed by `Nevergrad `__ to perform sequential model-based hyperparameter optimization. Note that this class does not extend ``ray.tune.suggest.BasicVariantGenerator``, so you will not be able to use Tune's default variant generation/search space declaration when using NevergradSearch. - -In order to use this search algorithm, you will need to install Nevergrad via the following command.: - -.. code-block:: bash - - $ pip install nevergrad - -Keep in mind that ``nevergrad`` is a Python 3.6+ library. - -This algorithm requires using an optimizer provided by ``nevergrad``, of which there are many options. A good rundown can be found on their README's `Optimization `__ section. You can use ``NevergradSearch`` like follows: - -.. code-block:: python - - tune.run(... , search_alg=NevergradSearch(optimizer, parameter_names, ... )) - -An example of this can be found in `nevergrad_example.py `__. - -.. autoclass:: ray.tune.suggest.nevergrad.NevergradSearch - :show-inheritance: - :noindex: - -Scikit-Optimize Search ----------------------- - -The ``SkOptSearch`` is a SearchAlgorithm that is backed by `Scikit-Optimize `__ to perform sequential model-based hyperparameter optimization. Note that this class does not extend ``ray.tune.suggest.BasicVariantGenerator``, so you will not be able to use Tune's default variant generation/search space declaration when using SkOptSearch. - -In order to use this search algorithm, you will need to install Scikit-Optimize via the following command: - -.. code-block:: bash - - $ pip install scikit-optimize - -This algorithm requires using the `Scikit-Optimize ask and tell interface `__. This interface requires using the `Optimizer `__ provided by Scikit-Optimize. You can use SkOptSearch like follows: - -.. code-block:: python - - optimizer = Optimizer(dimension, ...) - tune.run(... , search_alg=SkOptSearch(optimizer, parameter_names, ... )) - -An example of this can be found in `skopt_example.py `__. - -.. autoclass:: ray.tune.suggest.skopt.SkOptSearch - :show-inheritance: - :noindex: - -Dragonfly Search ----------------- - -The ``DragonflySearch`` is a SearchAlgorithm that is backed by `Dragonfly `__ to perform sequential Bayesian optimization. Note that this class does not extend ``ray.tune.suggest.BasicVariantGenerator``, so you will not be able to use Tune's default variant generation/search space declaration when using DragonflySearch. - -.. code-block:: bash - - $ pip install dragonfly-opt - -This algorithm requires using the `Dragonfly ask and tell interface `__. This interface requires using FunctionCallers and optimizers provided by Dragonfly. You can use `DragonflySearch` like follows: - -.. code-block:: python - - from dragonfly.opt.gp_bandit import EuclideanGPBandit - from dragonfly.exd.experiment_caller import EuclideanFunctionCaller - from dragonfly import load_config - domain_config = load_config({'domain': ...}) - func_caller = EuclideanFunctionCaller(None, domain_config.domain.list_of_domains[0]) - optimizer = EuclideanGPBandit(func_caller, ask_tell_mode=True) - algo = DragonflySearch(optimizer, ...) - -An example of this can be found in `dragonfly_example.py `__. - -.. autoclass:: ray.tune.suggest.dragonfly.DragonflySearch - :show-inheritance: - :noindex: - -.. _tune-ax: - -Ax Search ---------- - -The ``AxSearch`` is a SearchAlgorithm that is backed by `Ax `__ to perform sequential model-based hyperparameter optimization. Ax is a platform for understanding, managing, deploying, and automating adaptive experiments. Ax provides an easy to use interface with BoTorch, a flexible, modern library for Bayesian optimization in PyTorch. Note that this class does not extend ``ray.tune.suggest.BasicVariantGenerator``, so you will not be able to use Tune's default variant generation/search space declaration when using AxSearch. - -In order to use this search algorithm, you will need to install PyTorch, Ax, and sqlalchemy. Instructions to install PyTorch locally can be found `here `__. You can install Ax and sqlalchemy via the following command: - -.. code-block:: bash - - $ pip install ax-platform sqlalchemy - -This algorithm requires specifying a search space and objective. You can use `AxSearch` like follows: - -.. code-block:: python - - client = AxClient(enforce_sequential_optimization=False) - client.create_experiment( ... ) - tune.run(... , search_alg=AxSearch(client)) - -An example of this can be found in `ax_example.py `__. - -.. autoclass:: ray.tune.suggest.ax.AxSearch - :show-inheritance: - :noindex: - -BOHB ----- - -.. tip:: This implementation is still experimental. Please report issues on https://github.com/ray-project/ray/issues/. Thanks! - -``BOHB`` (Bayesian Optimization HyperBand) is a SearchAlgorithm that is backed by `HpBandSter `__ to perform sequential model-based hyperparameter optimization in conjunction with HyperBand. Note that this class does not extend ``ray.tune.suggest.BasicVariantGenerator``, so you will not be able to use Tune's default variant generation/search space declaration when using BOHB. - -Importantly, BOHB is intended to be paired with a specific scheduler class: `HyperBandForBOHB `__. - -This algorithm requires using the `ConfigSpace search space specification `_. In order to use this search algorithm, you will need to install ``HpBandSter`` and ``ConfigSpace``: - -.. code-block:: bash - - $ pip install hpbandster ConfigSpace - - -You can use ``TuneBOHB`` in conjunction with ``HyperBandForBOHB`` as follows: - -.. code-block:: python - - # BOHB uses ConfigSpace for their hyperparameter search space - import ConfigSpace as CS - - config_space = CS.ConfigurationSpace() - config_space.add_hyperparameter( - CS.UniformFloatHyperparameter("height", lower=10, upper=100)) - config_space.add_hyperparameter( - CS.UniformFloatHyperparameter("width", lower=0, upper=100)) - - experiment_metrics = dict(metric="episode_reward_mean", mode="min") - bohb_hyperband = HyperBandForBOHB( - time_attr="training_iteration", max_t=100, **experiment_metrics) - bohb_search = TuneBOHB( - config_space, max_concurrent=4, **experiment_metrics) - - tune.run(MyTrainableClass, - name="bohb_test", - scheduler=bohb_hyperband, - search_alg=bohb_search, - num_samples=5) - -Take a look at `an example here `_. See the `BOHB paper `_ for more details. - -.. autoclass:: ray.tune.suggest.bohb.TuneBOHB - :show-inheritance: - :noindex: - -ZOOpt Search ------------- - -The ``ZOOptSearch`` is a SearchAlgorithm for derivative-free optimization. It is backed by the `ZOOpt `__ package. Currently, Asynchronous Sequential RAndomized COordinate Shrinking (ASRacos) algorithm is implemented in Tune. Note that this class does not extend ``ray.tune.suggest.BasicVariantGenerator``, so you will not be able to use Tune’s default variant generation/search space declaration when using ZOOptSearch. - -In order to use this search algorithm, you will need to install the ZOOpt package **(>=0.4.0)** via the following command: - -.. code-block:: bash - - $ pip install -U zoopt - -Keep in mind that zoopt only supports Python 3. - -This algorithm allows users to mix continuous dimensions and discrete dimensions, for example: - -.. code-block:: python - - dim_dict = { - # for continuous dimensions: (continuous, search_range, precision) - "height": (ValueType.CONTINUOUS, [-10, 10], 1e-2), - # for discrete dimensions: (discrete, search_range, has_order) - "width": (ValueType.DISCRETE, [-10, 10], False) - } - - config = { - "num_samples": 200 if args.smoke_test else 1000, - "config": { - "iterations": 10, # evaluation times - }, - "stop": { - "timesteps_total": 10 # cumstom stop rules - } - } - - zoopt_search = ZOOptSearch( - algo="Asracos", # only support ASRacos currently - budget=config["num_samples"], - dim_dict=dim_dict, - max_concurrent=4, - metric="mean_loss", - mode="min") - - run(my_objective, - search_alg=zoopt_search, - name="zoopt_search", - **config) - -An example of this can be found in `zoopt_example.py `__. - -.. autoclass:: ray.tune.suggest.zoopt.ZOOptSearch - :show-inheritance: - :noindex: - -Contributing a New Algorithm ----------------------------- - -If you are interested in implementing or contributing a new Search Algorithm, the API is straightforward: - -.. autoclass:: ray.tune.suggest.SearchAlgorithm - :members: - :noindex: - -Model-Based Suggestion Algorithms -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Often times, hyperparameter search algorithms are model-based and may be quite simple to implement. For this, one can extend the following abstract class and implement ``on_trial_complete``, and ``suggest``. - -.. autoclass:: ray.tune.suggest.Searcher - :show-inheritance: - :noindex: diff --git a/doc/source/tune/_tutorials/tune-60-seconds.rst b/doc/source/tune/_tutorials/tune-60-seconds.rst index e45f9b4db..08b1887a7 100644 --- a/doc/source/tune/_tutorials/tune-60-seconds.rst +++ b/doc/source/tune/_tutorials/tune-60-seconds.rst @@ -132,9 +132,9 @@ To optimize the hyperparameters of your training process, you will want to use a stop={"training_iteration": 20} ) -Tune has SearchAlgorithms that integrate with many popular **optimization** libraries, such as :ref:`Nevergrad ` and :ref:`Hyperopt `. +Tune has SearchAlgorithms that integrate with many popular **optimization** libraries, such as :ref:`Nevergrad ` and :ref:`Hyperopt `. -See the documentation: :ref:`searchalg-ref`. +See the documentation: :ref:`tune-search-alg`. Trial Schedulers ---------------- diff --git a/doc/source/tune/_tutorials/tune-usage.rst b/doc/source/tune/_tutorials/tune-usage.rst index 14497b80e..9be3ed1b3 100644 --- a/doc/source/tune/_tutorials/tune-usage.rst +++ b/doc/source/tune/_tutorials/tune-usage.rst @@ -282,8 +282,11 @@ Note that in the above example the currently running trials will not stop immedi Logging/Tensorboard ------------------- +Tune by default will log results for Tensorboard, CSV, and JSON formats. If you need to log something lower level like model weights or gradients, see :ref:`Trainable Logging `. + +**Learn more about logging and customizations here**: :ref:`loggers-docstring`. + Tune will log the results of each trial to a subfolder under a specified local dir, which defaults to ``~/ray_results``. -Tune by default will log results for Tensorboard, CSV, and JSON formats. .. code-block:: bash @@ -292,8 +295,6 @@ Tune by default will log results for Tensorboard, CSV, and JSON formats. # trainable_name and trial_name are autogenerated. tune.run(trainable, num_samples=2) -Learn about how to customize logging paths and outputs: :ref:`loggers-docstring`. - Tune automatically outputs Tensorboard files during ``tune.run``. To visualize learning in tensorboard, install tensorboardX: .. code-block:: bash diff --git a/doc/source/tune/api_docs/analysis.rst b/doc/source/tune/api_docs/analysis.rst index d0c42f1bb..2ec32f686 100644 --- a/doc/source/tune/api_docs/analysis.rst +++ b/doc/source/tune/api_docs/analysis.rst @@ -3,9 +3,6 @@ Analysis (tune.analysis) ======================== -Analyzing Results ------------------ - You can use the ``ExperimentAnalysis`` object for analyzing results. It is returned automatically when calling ``tune.run``. .. code-block:: python @@ -41,15 +38,15 @@ You may want to get a summary of multiple experiments that point to the same ``l .. _exp-analysis-docstring: -ExperimentAnalysis -~~~~~~~~~~~~~~~~~~ +ExperimentAnalysis (tune.ExperimentAnalysis) +-------------------------------------------- .. autoclass:: ray.tune.ExperimentAnalysis :show-inheritance: :members: -Analysis -~~~~~~~~ +Analysis (tune.Analysis) +------------------------ .. autoclass:: ray.tune.Analysis :members: diff --git a/doc/source/tune/api_docs/logging.rst b/doc/source/tune/api_docs/logging.rst index 9b29a4fcc..4e985c1ce 100644 --- a/doc/source/tune/api_docs/logging.rst +++ b/doc/source/tune/api_docs/logging.rst @@ -3,10 +3,131 @@ Loggers (tune.logger) ===================== -Tune has default loggers for Tensorboard, CSV, and JSON formats. +Tune has default loggers for Tensorboard, CSV, and JSON formats. By default, Tune only logs the returned result dictionaries from the training function. -Logging Path ------------- +If you need to log something lower level like model weights or gradients, see :ref:`Trainable Logging `. + +Custom Loggers +-------------- + +You can create a custom logger by inheriting the Logger interface (:ref:`logger-interface`): + +.. code-block:: python + + from ray.tune.logger import Logger + + class MLFLowLogger(Logger): + """MLFlow logger. + + Requires the experiment configuration to have a MLFlow Experiment ID + or manually set the proper environment variables. + """ + + def _init(self): + from mlflow.tracking import MlflowClient + client = MlflowClient() + + # self.config is the same config that your Trainable will see. + run = client.create_run(self.config.get("mlflow_experiment_id")) + self._run_id = run.info.run_id + for key, value in self.config.items(): + client.log_param(self._run_id, key, value) + self.client = client + + def on_result(self, result): + for key, value in result.items(): + if not isinstance(value, float): + continue + self.client.log_metric( + self._run_id, key, value, step=result.get(TRAINING_ITERATION)) + + def close(self): + self.client.set_terminated(self._run_id) + +You can then pass in your own logger as follows: + +.. code-block:: python + + from ray.tune.logger import DEFAULT_LOGGERS + + tune.run( + MyTrainableClass, + name="experiment_name", + loggers=DEFAULT_LOGGERS + (CustomLogger1, CustomLogger2) + ) + +These loggers will be called along with the default Tune loggers. You can also check out `logger.py `__ for implementation details. + +An example of creating a custom logger can be found in `logging_example.py `__. + +.. _trainable-logging: + +Trainable Logging +----------------- + +By default, Tune only logs the *training result dictionaries* from your Trainable. However, you may want to visualize the model weights, model graph, or use a custom logging library that requires multi-process logging. For example, you may want to do this if: + + * you're using `Weights and Biases `_ + * you're using `MLFlow `__ + * you're trying to log images to Tensorboard. + +You can do this in the trainable, as shown below: + +.. tip:: Make sure that any logging calls or objects stay within scope of the Trainable. You may see Pickling/serialization errors or inconsistent logs otherwise. + +**Function API**: + +.. code-block:: python + + def trainable(config): + library.init( + name=trial_id, + id=trial_id, + resume=trial_id, + reinit=True, + allow_val_change=True) + library.set_log_path(tune.track.logdir) + + for step in range(100): + library.log_model(...) + library.log(results, step=step) + tune.track.log(results) + + +**Class API**: + +.. code-block:: python + + class CustomLogging(tune.Trainable) + def _setup(self, config): + trial_id = self.trial_id + library.init( + name=trial_id, + id=trial_id, + resume=trial_id, + reinit=True, + allow_val_change=True) + library.set_log_path(self.logdir) + + def _train(self): + library.log_model(...) + + def _log_result(self, result): + res_dict = { + str(k): v + for k, v in result.items() + if (v and "config" not in k and not isinstance(v, str)) + } + step = result["training_iteration"] + library.log(res_dict, step=step) + +Use ``self.logdir`` (only for Class API) or ``tune.track.logdir`` (only for Function API) for the trial log directory. + +In the distributed case, these logs will be sync'ed back to the driver under your logger path. This will allow you to visualize and analyze logs of all distributed training workers on a single machine. + + +Log Directory +------------- Tune will log the results of each trial to a subfolder under a specified local dir, which defaults to ``~/ray_results``. @@ -51,25 +172,6 @@ to `tune.run`. This takes a function with the following signature: See the documentation on Trials: :ref:`trial-docstring`. -Custom Loggers --------------- - -You can pass in your own logging mechanisms to output logs in custom formats as follows: - -.. code-block:: python - - from ray.tune.logger import DEFAULT_LOGGERS - - tune.run( - MyTrainableClass, - name="experiment_name", - loggers=DEFAULT_LOGGERS + (CustomLogger1, CustomLogger2) - ) - -These loggers will be called along with the default Tune loggers. All loggers must inherit the Logger interface (:ref:`logger-interface`). You can also check out `logger.py `__ for implementation details. - -An example can be found in `logging_example.py `__. - Viskit ------ @@ -85,13 +187,6 @@ The nonrelevant metrics (like timing stats) can be disabled on the left to show .. image:: /ray-tune-viskit.png -.. _logger-interface: - -Logger ------- - -.. autoclass:: ray.tune.logger.Logger - UnifiedLogger ------------- @@ -118,3 +213,11 @@ MLFLowLogger Tune also provides a default logger for `MLFlow `_. You can install MLFlow via ``pip install mlflow``. An example can be found `mlflow_example.py `__. Note that this currently does not include artifact logging support. For this, you can use the native MLFlow APIs inside your Trainable definition. .. autoclass:: ray.tune.logger.MLFLowLogger + + +.. _logger-interface: + +Logger +------ + +.. autoclass:: ray.tune.logger.Logger diff --git a/doc/source/tune/api_docs/schedulers.rst b/doc/source/tune/api_docs/schedulers.rst index a67bbceed..4d03e276e 100644 --- a/doc/source/tune/api_docs/schedulers.rst +++ b/doc/source/tune/api_docs/schedulers.rst @@ -1,38 +1,172 @@ -.. _schedulers-ref: +.. _tune-schedulers: Trial Schedulers (tune.schedulers) ================================== -FIFOScheduler -~~~~~~~~~~~~~ +In Tune, some hyperparameter optimization algorithms are written as "scheduling algorithms". These Trial Schedulers can early terminate bad trials, pause trials, clone trials, and alter hyperparameters of a running trial. -.. autoclass:: ray.tune.schedulers.FIFOScheduler +All Trial Schedulers take in a ``metric``, which is a value returned in the result dict of your Trainable and is maximized or minimized according to ``mode``. -HyperBandScheduler -~~~~~~~~~~~~~~~~~~ +.. code-block:: python -.. autoclass:: ray.tune.schedulers.HyperBandScheduler + tune.run( ... , scheduler=Scheduler(metric="accuracy", mode="max")) -ASHAScheduler/AsyncHyperBandScheduler -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _schedulers-ref: + +Summary +------- + +Tune includes distributed implementations of early stopping algorithms such as `Median Stopping Rule `__, `HyperBand `__, and `ASHA `__. Tune also includes a distributed implementation of `Population Based Training (PBT) `__. + +.. tip:: The easiest scheduler to start with is the ``ASHAScheduler`` which will aggressively terminate low-performing trials. + +When using schedulers, you may face compatibility issues, as shown in the below compatibility matrix. Certain schedulers cannot be used with Search Algorithms, and certain schedulers are only compatible with the :ref:`tune-class-api`. + +.. list-table:: TrialScheduler Feature Compatibility Matrix + :header-rows: 1 + + * - Scheduler + - Class API Required? + - SearchAlg Compatible? + - Example + * - :ref:`ASHA ` + - No + - Yes + - `Link `__ + * - :ref:`Median Stopping Rule ` + - No + - Yes + - :ref:`Link ` + * - :ref:`HyperBand ` + - Yes + - Yes + - `Link `__ + * - :ref:`BOHB ` + - Yes + - Only TuneBOHB + - `Link `__ + * - :ref:`Population Based Training ` + - Yes + - Not Compatible + - `Link `__ + +.. _tune-scheduler-hyperband: + +ASHA (tune.schedulers.ASHAScheduler) +------------------------------------ + +The `ASHA `__ scheduler can be used by setting the ``scheduler`` parameter of ``tune.run``, e.g. + +.. code-block:: python + + asha_scheduler = ASHAScheduler( + time_attr='training_iteration', + metric='episode_reward_mean', + mode='max', + max_t=100, + grace_period=10, + reduction_factor=3, + brackets=3) + tune.run( ... , scheduler=asha_scheduler) + +Compared to the original version of HyperBand, this implementation provides better parallelism and avoids straggler issues during eliminations. **We recommend using this over the standard HyperBand scheduler.** An example of this can be `found here `_. .. autoclass:: ray.tune.schedulers.AsyncHyperBandScheduler .. autoclass:: ray.tune.schedulers.ASHAScheduler -MedianStoppingRule -~~~~~~~~~~~~~~~~~~ +.. _tune-original-hyperband: + +HyperBand (tune.schedulers.HyperBandScheduler) +---------------------------------------------- + +Tune implements the `standard version of HyperBand `__. **We recommend using the ASHA Scheduler over the standard HyperBand scheduler.** + +.. autoclass:: ray.tune.schedulers.HyperBandScheduler + + +HyperBand Implementation Details +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Implementation details may deviate slightly from theory but are focused on increasing usability. Note: ``R``, ``s_max``, and ``eta`` are parameters of HyperBand given by the paper. See `this post `_ for context. + +1. Both ``s_max`` (representing the ``number of brackets - 1``) and ``eta``, representing the downsampling rate, are fixed. In many practical settings, ``R``, which represents some resource unit and often the number of training iterations, can be set reasonably large, like ``R >= 200``. For simplicity, assume ``eta = 3``. Varying ``R`` between ``R = 200`` and ``R = 1000`` creates a huge range of the number of trials needed to fill up all brackets. + +.. image:: /images/hyperband_bracket.png + +On the other hand, holding ``R`` constant at ``R = 300`` and varying ``eta`` also leads to HyperBand configurations that are not very intuitive: + +.. image:: /images/hyperband_eta.png + +The implementation takes the same configuration as the example given in the paper and exposes ``max_t``, which is not a parameter in the paper. + +2. The example in the `post `_ to calculate ``n_0`` is actually a little different than the algorithm given in the paper. In this implementation, we implement ``n_0`` according to the paper (which is `n` in the below example): + +.. image:: /images/hyperband_allocation.png + + +3. There are also implementation specific details like how trials are placed into brackets which are not covered in the paper. This implementation places trials within brackets according to smaller bracket first - meaning that with low number of trials, there will be less early stopping. + +.. _tune-scheduler-msr: + +Median Stopping Rule (tune.schedulers.MedianStoppingRule) +--------------------------------------------------------- + +The Median Stopping Rule implements the simple strategy of stopping a trial if its performance falls below the median of other trials at similar points in time. .. autoclass:: ray.tune.schedulers.MedianStoppingRule -PopulationBasedTraining -~~~~~~~~~~~~~~~~~~~~~~~ +.. _tune-scheduler-pbt: + +Population Based Training (tune.schedulers.PopulationBasedTraining) +------------------------------------------------------------------- + +Tune includes a distributed implementation of `Population Based Training (PBT) `__. This can be enabled by setting the ``scheduler`` parameter of ``tune.run``, e.g. + +.. code-block:: python + + pbt_scheduler = PopulationBasedTraining( + time_attr='time_total_s', + metric='mean_accuracy', + mode='max', + perturbation_interval=600.0, + hyperparam_mutations={ + "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5], + "alpha": lambda: random.uniform(0.0, 1.0), + ... + }) + tune.run( ... , scheduler=pbt_scheduler) + +When the PBT scheduler is enabled, each trial variant is treated as a member of the population. Periodically, top-performing trials are checkpointed (this requires your Trainable to support :ref:`save and restore `). Low-performing trials clone the checkpoints of top performers and perturb the configurations in the hope of discovering an even better variation. + +You can run this `toy PBT example `__ to get an idea of how how PBT operates. When training in PBT mode, a single trial may see many different hyperparameters over its lifetime, which is recorded in its ``result.json`` file. The following figure generated by the example shows PBT with optimizing a LR schedule over the course of a single experiment: + +.. image:: /pbt.png .. autoclass:: ray.tune.schedulers.PopulationBasedTraining +.. _tune-scheduler-bohb: + +BOHB (tune.schedulers.HyperBandForBOHB) +--------------------------------------- + +This class is a variant of HyperBand that enables the `BOHB Algorithm `_. This implementation is true to the original HyperBand implementation and does not implement pipelining nor straggler mitigation. + +This is to be used in conjunction with the Tune BOHB search algorithm. See :ref:`TuneBOHB ` for package requirements, examples, and details. + +An example of this in use can be found in `bohb_example.py `_. + +.. autoclass:: ray.tune.schedulers.HyperBandForBOHB + + +FIFOScheduler +------------- + +.. autoclass:: ray.tune.schedulers.FIFOScheduler + TrialScheduler -~~~~~~~~~~~~~~ +-------------- .. autoclass:: ray.tune.schedulers.TrialScheduler :members: diff --git a/doc/source/tune/api_docs/suggestion.rst b/doc/source/tune/api_docs/suggestion.rst index c9d3facef..dd3853a2f 100644 --- a/doc/source/tune/api_docs/suggestion.rst +++ b/doc/source/tune/api_docs/suggestion.rst @@ -1,73 +1,197 @@ -.. _searchalg-ref: +.. _tune-search-alg: Search Algorithms (tune.suggest) ================================ -.. _repeater-doc: +Tune's Search Algorithms are wrappers around open-source optimization libraries for efficient hyperparameter selection. Each library has a specific way of defining the search space - please refer to their documentation for more details. -Repeater --------- +You can utilize these search algorithms as follows: -.. autoclass:: ray.tune.suggest.Repeater +.. code-block:: python -ConcurrencyLimiter ------------------- + from ray.tune.suggest.hyperopt import HyperOptSearch + tune.run(my_function, search_alg=HyperOptSearch(...)) -.. autoclass:: ray.tune.suggest.ConcurrencyLimiter +Summary +------- -AxSearch --------- +.. list-table:: + :header-rows: 1 + + * - SearchAlgorithm + - Summary + - Website + - Code Example + * - :ref:`AxSearch ` + - Bayesian/Bandit Optimization + - [`Ax `__] + - `Link `__ + * - :ref:`DragonflySearch ` + - Scalable Bayesian Optimization + - [`Dragonfly `__] + - `Link `__ + * - :ref:`SkoptSearch ` + - Bayesian Optimization + - [`Scikit-Optimize `__] + - `Link `__ + * - :ref:`HyperOptSearch ` + - Tree-Parzen Estimators + - [`HyperOpt `__] + - `Link `__ + * - :ref:`BayesOptSearch ` + - Bayesian Optimization + - [`BayesianOptimization `__] + - `Link `__ + * - :ref:`TuneBOHB ` + - Bayesian Opt/HyperBand + - [`BOHB `__] + - `Link `__ + * - :ref:`NevergradSearch ` + - Gradient-free Optimization + - [`Nevergrad `__] + - `Link `__ + * - :ref:`ZOOptSearch ` + - Zeroth-order Optimization + - [`ZOOpt `__] + - `Link `__ + * - :ref:`SigOptSearch ` + - Closed source + - [`SigOpt `__] + - `Link `__ + + +.. note::Search algorithms will require a different search space declaration than the default Tune format - meaning that you will not be able to combine ``tune.grid_search`` with the below integrations. + +.. note:: Unlike :ref:`Tune's Trial Schedulers `, Tune SearchAlgorithms cannot affect or stop training processes. However, you can use them together to **early stop the evaluation of bad trials**. + +**Want to use your own algorithm?** The interface is easy to implement. :ref:`Read instructions here `. + + +Tune also provides helpful utilities to use with Search Algorithms: + + * :ref:`repeater`: Support for running each *sampled hyperparameter* with multiple random seeds. + * :ref:`limiter`: Limits the amount of concurrent trials when running optimization. + + +.. _tune-ax: + +Ax (tune.suggest.ax.AxSearch) +----------------------------- .. autoclass:: ray.tune.suggest.ax.AxSearch -BayesOptSearch --------------- +.. _bayesopt: + +Bayesian Optimization (tune.suggest.bayesopt.BayesOptSearch) +------------------------------------------------------------ + .. autoclass:: ray.tune.suggest.bayesopt.BayesOptSearch -TuneBOHB --------- +.. _`BayesianOptimization search space specification`: https://github.com/fmfn/BayesianOptimization/blob/master/examples/advanced-tour.ipynb + +.. _suggest-TuneBOHB: + +BOHB (tune.suggest.bohb.TuneBOHB) +--------------------------------- + +BOHB (Bayesian Optimization HyperBand) is an algorithm that both terminates bad trials and also uses Bayesian Optimization to improve the hyperparameter search. It is backed by the `HpBandSter library `_. + +Importantly, BOHB is intended to be paired with a specific scheduler class: `HyperBandForBOHB `__. + +This algorithm requires using the `ConfigSpace search space specification `_. In order to use this search algorithm, you will need to install ``HpBandSter`` and ``ConfigSpace``: + +.. code-block:: bash + + $ pip install hpbandster ConfigSpace + +See the `BOHB paper `_ for more details. .. autoclass:: ray.tune.suggest.bohb.TuneBOHB -DragonflySearch ---------------- +.. _Dragonfly: + +Dragonfly (tune.suggest.dragonfly.DragonflySearch) +-------------------------------------------------- .. autoclass:: ray.tune.suggest.dragonfly.DragonflySearch -HyperOptSearch --------------- +.. _tune-hyperopt: + +HyperOpt (tune.suggest.hyperopt.HyperOptSearch) +----------------------------------------------- .. autoclass:: ray.tune.suggest.hyperopt.HyperOptSearch -NevergradSearch ---------------- +.. _nevergrad: + +Nevergrad (tune.suggest.nevergrad.NevergradSearch) +-------------------------------------------------- .. autoclass:: ray.tune.suggest.nevergrad.NevergradSearch -SigOptSearch ------------- +.. _`Nevergrad README's Optimization section`: https://github.com/facebookresearch/nevergrad/blob/master/docs/optimization.rst#choosing-an-optimizer + +.. _sigopt: + +SigOpt (tune.suggest.sigopt.SigOptSearch) +----------------------------------------- + +You will need to use the `SigOpt experiment and space specification `__ to specify your search space. .. autoclass:: ray.tune.suggest.sigopt.SigOptSearch -SkOptSearch ------------ +.. _skopt: + +Scikit-Optimize (tune.suggest.skopt.SkOptSearch) +------------------------------------------------ .. autoclass:: ray.tune.suggest.skopt.SkOptSearch -ZOOptSearch ------------ +.. _`skopt Optimizer object`: https://scikit-optimize.github.io/#skopt.Optimizer + +.. _zoopt: + +ZOOpt (tune.suggest.zoopt.ZOOptSearch) +-------------------------------------- .. autoclass:: ray.tune.suggest.zoopt.ZOOptSearch -SearchAlgorithm ---------------- +.. _repeater: -.. autoclass:: ray.tune.suggest.SearchAlgorithm - :members: +Repeated Evaluations (tune.suggest.Repeater) +-------------------------------------------- -Searcher --------- +Use ``ray.tune.suggest.Repeater`` to average over multiple evaluations of the same +hyperparameter configurations. This is useful in cases where the evaluated +training procedure has high variance (i.e., in reinforcement learning). + +By default, ``Repeater`` will take in a ``repeat`` parameter and a ``search_alg``. +The ``search_alg`` will suggest new configurations to try, and the ``Repeater`` +will run ``repeat`` trials of the configuration. It will then average the +``search_alg.metric`` from the final results of each repeated trial. + + +.. warning:: It is recommended to not use ``Repeater`` with a TrialScheduler. + Early termination can negatively affect the average reported metric. + +.. autoclass:: ray.tune.suggest.Repeater + +.. _limiter: + +ConcurrencyLimiter (tune.suggest.ConcurrencyLimiter) +---------------------------------------------------- + +Use ``ray.tune.suggest.ConcurrencyLimiter`` to limit the amount of concurrency when using a search algorithm. This is useful when a given optimization algorithm does not parallelize very well (like a naive Bayesian Optimization). + +.. autoclass:: ray.tune.suggest.ConcurrencyLimiter + +.. _byo-algo: + +Implementing your own Search Algorithm +-------------------------------------- + +If you are interested in implementing or contributing a new Search Algorithm, provide the following interface: .. autoclass:: ray.tune.suggest.Searcher :members: diff --git a/python/ray/tune/examples/hyperopt_example.py b/python/ray/tune/examples/hyperopt_example.py index 3d8bfab81..b81680b4c 100644 --- a/python/ray/tune/examples/hyperopt_example.py +++ b/python/ray/tune/examples/hyperopt_example.py @@ -28,7 +28,7 @@ if __name__ == "__main__": parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - ray.init() + ray.init(configure_logging=False) space = { "width": hp.uniform("width", 0, 20), diff --git a/python/ray/tune/suggest/ax.py b/python/ray/tune/suggest/ax.py index db02b6bea..6fa26703a 100644 --- a/python/ray/tune/suggest/ax.py +++ b/python/ray/tune/suggest/ax.py @@ -10,11 +10,18 @@ logger = logging.getLogger(__name__) class AxSearch(Searcher): - """A wrapper around Ax to provide trial suggestions. + """Uses `Ax `_ to optimize hyperparameters. - Requires Ax to be installed. Ax is an open source tool from - Facebook for configuring and optimizing experiments. More information - can be found in https://ax.dev/. + Ax is a platform for understanding, managing, deploying, and + automating adaptive experiments. Ax provides an easy to use + interface with BoTorch, a flexible, modern library for Bayesian + optimization in PyTorch. More information can be found in https://ax.dev/. + + To use this search algorithm, you must install Ax and sqlalchemy: + + .. code-block:: bash + + $ pip install ax-platform sqlalchemy Parameters: parameters (list[dict]): Parameters in the experiment search space. diff --git a/python/ray/tune/suggest/bayesopt.py b/python/ray/tune/suggest/bayesopt.py index 1de179cfa..2d3936ac0 100644 --- a/python/ray/tune/suggest/bayesopt.py +++ b/python/ray/tune/suggest/bayesopt.py @@ -12,10 +12,19 @@ logger = logging.getLogger(__name__) class BayesOptSearch(Searcher): - """A wrapper around BayesOpt to provide trial suggestions. + """Uses fmfn/BayesianOptimization to optimize hyperparameters. - Requires BayesOpt to be installed. You can install BayesOpt with the - command: ``pip install bayesian-optimization``. + fmfn/BayesianOptimization is a library for Bayesian Optimization. More + info can be found here: https://github.com/fmfn/BayesianOptimization. + + You will need to install fmfn/BayesianOptimization via the following: + + .. code-block:: bash + + pip install bayesian-optimization + + This algorithm requires setting a search space using the + `BayesianOptimization search space specification`_. Parameters: space (dict): Continuous search space. Parameters will be sampled from diff --git a/python/ray/tune/suggest/dragonfly.py b/python/ray/tune/suggest/dragonfly.py index a4e60415d..d859eabd4 100644 --- a/python/ray/tune/suggest/dragonfly.py +++ b/python/ray/tune/suggest/dragonfly.py @@ -16,27 +16,26 @@ logger = logging.getLogger(__name__) class DragonflySearch(Searcher): - """A wrapper around Dragonfly to provide trial suggestions. + """Uses Dragonfly to optimize hyperparameters. - Requires Dragonfly to be installed via ``pip install dragonfly-opt``. + Dragonfly provides an array of tools to scale up Bayesian optimisation to + expensive large scale problems, including high dimensional optimisation. + parallel evaluations in synchronous or asynchronous settings, + multi-fidelity optimisation (using cheap approximations to speed up the + optimisation process), and multi-objective optimisation. For more info: - Parameters: - optimizer (dragonfly.opt.BlackboxOptimiser): Optimizer provided - from dragonfly. Choose an optimiser that extends BlackboxOptimiser. - metric (str): The training result objective value attribute. - mode (str): One of {min, max}. Determines whether objective is - minimizing or maximizing the metric attribute. - points_to_evaluate (list of lists): A list of points you'd like to run - first before sampling from the optimiser, e.g. these could be - parameter configurations you already know work well to help - the optimiser select good values. Each point is a list of the - parameters using the order definition given by parameter_names. - evaluated_rewards (list): If you have previously evaluated the - parameters passed in as points_to_evaluate you can avoid - re-running those trials by passing in the reward attributes - as a list so the optimiser can be told the results without - needing to re-compute the trial. Must be the same length as - points_to_evaluate. + * Dragonfly Website: https://github.com/dragonfly/dragonfly + * Dragonfly Documentation: https://dragonfly-opt.readthedocs.io/ + + To use this search algorithm, install Dragonfly: + + .. code-block:: bash + + $ pip install dragonfly-opt + + + This interface requires using FunctionCallers and optimizers provided by + Dragonfly. .. code-block:: python @@ -70,6 +69,25 @@ class DragonflySearch(Searcher): algo = DragonflySearch(optimizer, metric="objective", mode="max") tune.run(my_func, algo=algo) + + Parameters: + optimizer (dragonfly.opt.BlackboxOptimiser): Optimizer provided + from dragonfly. Choose an optimiser that extends BlackboxOptimiser. + metric (str): The training result objective value attribute. + mode (str): One of {min, max}. Determines whether objective is + minimizing or maximizing the metric attribute. + points_to_evaluate (list of lists): A list of points you'd like to run + first before sampling from the optimiser, e.g. these could be + parameter configurations you already know work well to help + the optimiser select good values. Each point is a list of the + parameters using the order definition given by parameter_names. + evaluated_rewards (list): If you have previously evaluated the + parameters passed in as points_to_evaluate you can avoid + re-running those trials by passing in the reward attributes + as a list so the optimiser can be told the results without + needing to re-compute the trial. Must be the same length as + points_to_evaluate. + """ def __init__(self, diff --git a/python/ray/tune/suggest/hyperopt.py b/python/ray/tune/suggest/hyperopt.py index f328630ea..c70fccf37 100644 --- a/python/ray/tune/suggest/hyperopt.py +++ b/python/ray/tune/suggest/hyperopt.py @@ -19,12 +19,42 @@ logger = logging.getLogger(__name__) class HyperOptSearch(Searcher): """A wrapper around HyperOpt to provide trial suggestions. - Requires HyperOpt to be installed from source. - Uses the Tree-structured Parzen Estimators algorithm, although can be - trivially extended to support any algorithm HyperOpt uses. Externally - added trials will not be tracked by HyperOpt. Trials of the current run - can be saved using save method, trials of a previous run can be loaded - using restore method, thus enabling a warm start feature. + HyperOpt a Python library for serial and parallel optimization + over awkward search spaces, which may include real-valued, discrete, + and conditional dimensions. More info can be found at + http://hyperopt.github.io/hyperopt. + + HyperOptSearch uses the Tree-structured Parzen Estimators algorithm, + though it can be trivially extended to support any algorithm HyperOpt + supports. + + To use this search algorithm, you will need to install HyperOpt: + + .. code-block:: bash + + pip install -U hyperopt + + You will not be able to leverage Tune's default ``grid_search`` + and random search primitives when using HyperOptSearch. You need to + use the `HyperOpt search space specification + `_. + + .. code-block:: python + + space = { + 'width': hp.uniform('width', 0, 20), + 'height': hp.uniform('height', -100, 100), + 'activation': hp.choice("activation", ["relu", "tanh"]) + } + current_best_params = [{ + 'width': 10, + 'height': 0, + 'activation': 0, # The index of "relu" + }] + algo = HyperOptSearch( + space, metric="mean_loss", mode="min", + points_to_evaluate=current_best_params) + Parameters: space (dict): HyperOpt configuration. Parameters will be sampled @@ -50,22 +80,6 @@ class HyperOptSearch(Searcher): max_concurrent: Deprecated. use_early_stopped_trials: Deprecated. - .. code-block:: python - - space = { - 'width': hp.uniform('width', 0, 20), - 'height': hp.uniform('height', -100, 100), - 'activation': hp.choice("activation", ["relu", "tanh"]) - } - current_best_params = [{ - 'width': 10, - 'height': 0, - 'activation': 0, # The index of "relu" - }] - algo = HyperOptSearch( - space, metric="mean_loss", mode="min", - points_to_evaluate=current_best_params) - """ def __init__( diff --git a/python/ray/tune/suggest/nevergrad.py b/python/ray/tune/suggest/nevergrad.py index 55732fae1..2e62de5a4 100644 --- a/python/ray/tune/suggest/nevergrad.py +++ b/python/ray/tune/suggest/nevergrad.py @@ -11,14 +11,30 @@ logger = logging.getLogger(__name__) class NevergradSearch(Searcher): - """A wrapper around Nevergrad to provide trial suggestions. - - Requires Nevergrad to be installed. + """Uses Nevergrad to optimize hyperparameters. Nevergrad is an open source tool from Facebook for derivative free - optimization of parameters and/or hyperparameters. It features a wide - range of optimizers in a standard ask and tell interface. More information - can be found at https://github.com/facebookresearch/nevergrad. + optimization. More info can be found at: + https://github.com/facebookresearch/nevergrad. + + You will need to install Nevergrad via the following command: + + .. code-block:: bash + + $ pip install nevergrad + + This algorithm requires using an optimizer provided by Nevergrad, of + which there are many options. A good rundown can be found on + the `Nevergrad README's Optimization section`_. + + .. code-block:: python + + from nevergrad.optimization import optimizerlib + + instrumentation = 1 + optimizer = optimizerlib.OnePlusOne(instrumentation, budget=100) + algo = NevergradSearch( + optimizer, ["lr"], metric="mean_loss", mode="min") Parameters: optimizer (nevergrad.optimization.Optimizer): Optimizer provided @@ -33,15 +49,6 @@ class NevergradSearch(Searcher): use_early_stopped_trials: Deprecated. max_concurrent: Deprecated. - .. code-block:: python - - from nevergrad.optimization import optimizerlib - - instrumentation = 1 - optimizer = optimizerlib.OnePlusOne(instrumentation, budget=100) - algo = NevergradSearch( - optimizer, ["lr"], metric="mean_loss", mode="min") - Note: In nevergrad v0.2.0+, optimizers can be instrumented. For instance, the following will specifies searching diff --git a/python/ray/tune/suggest/repeater.py b/python/ray/tune/suggest/repeater.py index e2caf108a..215119ff0 100644 --- a/python/ray/tune/suggest/repeater.py +++ b/python/ray/tune/suggest/repeater.py @@ -88,6 +88,18 @@ class Repeater(Searcher): Trainable/Function config which corresponds to the index of the repeated trial. This can be used for seeds. Defaults to True. + Example: + + .. code-block:: python + + from ray.tune.suggest import Repeater + + search_alg = BayesOptSearch(...) + re_search_alg = Repeater(search_alg, repeat=10) + + # Repeat 2 samples 10 times each. + tune.run(trainable, num_samples=20, search_alg=re_search_alg) + """ def __init__(self, searcher, repeat=1, set_index=True): diff --git a/python/ray/tune/suggest/sigopt.py b/python/ray/tune/suggest/sigopt.py index 84524c3bb..8e9ee92db 100644 --- a/python/ray/tune/suggest/sigopt.py +++ b/python/ray/tune/suggest/sigopt.py @@ -15,8 +15,16 @@ logger = logging.getLogger(__name__) class SigOptSearch(Searcher): """A wrapper around SigOpt to provide trial suggestions. - Requires SigOpt to be installed. Requires user to store their SigOpt - API key locally as an environment variable at `SIGOPT_KEY`. + You must install SigOpt and have a SigOpt API key to use this module. + Store the API token as an environment variable ``SIGOPT_KEY`` as follows: + + .. code-block:: bash + + pip install -U sigopt + export SIGOPT_KEY= ... + + You will need to use the `SigOpt experiment and space specification + `_. This module manages its own concurrency. diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py index 46d21db32..b4adee351 100644 --- a/python/ray/tune/suggest/skopt.py +++ b/python/ray/tune/suggest/skopt.py @@ -41,9 +41,19 @@ def _validate_warmstart(parameter_names, points_to_evaluate, class SkOptSearch(Searcher): - """A wrapper around skopt to provide trial suggestions. + """Uses Scikit Optimize (skopt) to optimize hyperparameters. - Requires skopt to be installed. + Scikit-optimize is a black-box optimization library. + Read more here: https://scikit-optimize.github.io. + + You will need to install Scikit-Optimize to use this module. + + .. code-block:: bash + + pip install scikit-optimize + + + This Search Algorithm requires you to pass in a `skopt Optimizer object`_. Parameters: optimizer (skopt.optimizer.Optimizer): Optimizer provided @@ -68,14 +78,18 @@ class SkOptSearch(Searcher): use_early_stopped_trials: Deprecated. Example: - >>> from skopt import Optimizer - >>> optimizer = Optimizer([(0,20),(-100,100)]) - >>> current_best_params = [[10, 0], [15, -20]] - >>> algo = SkOptSearch(optimizer, - >>> ["width", "height"], - >>> metric="mean_loss", - >>> mode="min", - >>> points_to_evaluate=current_best_params) + + .. code-block:: python + + from skopt import Optimizer + optimizer = Optimizer([(0,20),(-100,100)]) + current_best_params = [[10, 0], [15, -20]] + + algo = SkOptSearch(optimizer, + ["width", "height"], + metric="mean_loss", + mode="min", + points_to_evaluate=current_best_params) """ def __init__(self, diff --git a/python/ray/tune/suggest/zoopt.py b/python/ray/tune/suggest/zoopt.py index 369c34b41..874853b4e 100644 --- a/python/ray/tune/suggest/zoopt.py +++ b/python/ray/tune/suggest/zoopt.py @@ -15,22 +15,12 @@ logger = logging.getLogger(__name__) class ZOOptSearch(Searcher): """A wrapper around ZOOpt to provide trial suggestions. - Requires zoopt package (>=0.4.0) to be installed. You can install it - with the command: ``pip install -U zoopt``. + ZOOptSearch is a library for derivative-free optimization. It is backed by + the `ZOOpt `__ package. Currently, + Asynchronous Sequential RAndomized COordinate Shrinking (ASRacos) + is implemented in Tune. - Parameters: - algo (str): To specify an algorithm in zoopt you want to use. - Only support ASRacos currently. - budget (int): Number of samples. - dim_dict (dict): Dimension dictionary. - For continuous dimensions: (continuous, search_range, precision); - For discrete dimensions: (discrete, search_range, has_order). - More details can be found in zoopt package. - metric (str): The training result objective value attribute. - Defaults to "episode_reward_mean". - mode (str): One of {min, max}. Determines whether objective is - minimizing or maximizing the metric attribute. - Defaults to "min". + To use ZOOptSearch, install zoopt (>=0.4.0): ``pip install -U zoopt``. .. code-block:: python @@ -65,6 +55,20 @@ class ZOOptSearch(Searcher): name="zoopt_search", **config) + Parameters: + algo (str): To specify an algorithm in zoopt you want to use. + Only support ASRacos currently. + budget (int): Number of samples. + dim_dict (dict): Dimension dictionary. + For continuous dimensions: (continuous, search_range, precision); + For discrete dimensions: (discrete, search_range, has_order). + More details can be found in zoopt package. + metric (str): The training result objective value attribute. + Defaults to "episode_reward_mean". + mode (str): One of {min, max}. Determines whether objective is + minimizing or maximizing the metric attribute. + Defaults to "min". + """ optimizer = None