From 6e6674a8242124242ff1f4802bd3768a864ed50c Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Mon, 1 Jan 2018 11:10:44 -0800 Subject: [PATCH] [rllib] Split docs into user and development guide (#1377) * docs * Update README.rst * Sat Dec 30 15:23:49 PST 2017 * comments * Sun Dec 31 23:33:30 PST 2017 * Sun Dec 31 23:33:38 PST 2017 * Sun Dec 31 23:37:46 PST 2017 * Sun Dec 31 23:39:28 PST 2017 * Sun Dec 31 23:43:05 PST 2017 * Sun Dec 31 23:51:55 PST 2017 * Sun Dec 31 23:52:51 PST 2017 --- README.rst | 4 +- doc/source/index.rst | 5 +- doc/source/rllib-dev.rst | 129 +++++++++++++++++++++++++ doc/source/rllib.rst | 110 ++++----------------- doc/source/tune.rst | 6 +- python/ray/rllib/README.rst | 23 +++-- python/ray/rllib/models/action_dist.py | 2 +- python/ray/rllib/models/catalog.py | 12 ++- python/ray/tune/README.rst | 6 +- 9 files changed, 181 insertions(+), 116 deletions(-) create mode 100644 doc/source/rllib-dev.rst diff --git a/README.rst b/README.rst index 83e8221f5..ce3c158ea 100644 --- a/README.rst +++ b/README.rst @@ -13,8 +13,8 @@ Ray is a flexible, high-performance distributed execution framework. Ray comes with libraries that accelerate deep learning and reinforcement learning development: -- `Ray.tune`_: Efficient Distributed Hyperparameter Search -- `Ray RLlib`_: A Composable and Scalable Reinforcement Learning Library +- `Ray.tune`_: Hyperparameter Optimization Framework +- `Ray RLlib`_: A Scalable Reinforcement Learning Library .. _`Ray.tune`: http://ray.readthedocs.io/en/latest/tune.html .. _`Ray RLlib`: http://ray.readthedocs.io/en/latest/rllib.html diff --git a/doc/source/index.rst b/doc/source/index.rst index a4de6cf3b..9459f9ea5 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -5,8 +5,8 @@ Ray Ray comes with libraries that accelerate deep learning and reinforcement learning development: -- `Ray.tune`_: Efficient Distributed Hyperparameter Search -- `Ray RLlib`_: A Composable and Scalable Reinforcement Learning Library +- `Ray.tune`_: Hyperparameter Optimization Framework +- `Ray RLlib`_: A Scalable Reinforcement Learning Library .. _`Ray.tune`: tune.html .. _`Ray RLlib`: rllib.html @@ -52,6 +52,7 @@ Example Program using-ray-with-gpus.rst tune.rst rllib.rst + rllib-dev.rst webui.rst .. toctree:: diff --git a/doc/source/rllib-dev.rst b/doc/source/rllib-dev.rst new file mode 100644 index 000000000..bc0d7881c --- /dev/null +++ b/doc/source/rllib-dev.rst @@ -0,0 +1,129 @@ +RLlib Developer Guide +===================== + +.. note:: + + This guide will take you through steps for implementing a new algorithm in RLlib. To apply existing algorithms already implemented in RLlib, please see the `user docs `__. + +Recipe for an RLlib algorithm +----------------------------- + +Here are the steps for implementing a new algorithm in RLlib: + +1. Define an algorithm-specific `Evaluator class <#evaluators-and-optimizers>`__ (the core of the algorithm). Evaluators encapsulate framework-specific components such as the policy and loss functions. For an example, see the `A3C Evaluator implementation `__. + + +2. Pick an appropriate `RLlib optimizer class <#evaluators-and-optimizers>`__. Optimizers manage the parallel execution of the algorithm. RLlib provides several built-in optimizers for gradient-based algorithms. Advanced algorithms may find it beneficial to implement their own optimizers. + + +3. Wrap the two up in an `Agent class <#agents>`__. Agents are the user-facing API of RLlib. They provide the necessary "glue" and implement accessory functionality such as statistics reporting and checkpointing. + +To help with implementation, RLlib provides common action distributions, preprocessors, and neural network models, found in `catalog.py `__, which are shared by all algorithms. Note that most of these utilities are currently Tensorflow specific. + +Defining a custom model +----------------------- + +Often you will want to plug in your own neural network into an existing RLlib algorithm. +This can be easily done by defining your own `Model class <#models-and-preprocessors>`__ and registering it in the RLlib catalog, after which it will be available for use by all RLlib algorithms. + +An example usage of a custom model looks like this: + +:: + + from ray.rllib.models import ModelCatalog, Model + + class MyModelClass(Model): + def _init(self, inputs, num_outputs, options): + layer1 = slim.fully_connected(inputs, 64, ...) + layer2 = slim.fully_connected(inputs, 64, ...) + ... + return layerN, layerN_minus_1 + + ModelCatalog.register_custom_model("my_model", MyModelClass) + + alg = ppo.PPOAgent(env="CartPole-v0", config={ + "custom_model": "my_model", + }) + + +Note that if you need to reference large data objects as part of the computation, e.g. weights, you can put them into the Ray object store with ``ray.put`` and then retrieve them from inside your model class. + + +The Developer API +----------------- + +The following APIs are the building blocks of RLlib algorithms. Note that they are not yet considered stable. + +Agents +~~~~~~ + +Agents implement a particular algorithm and can be used to run +some number of iterations of the algorithm, save and load the state +of training and evaluate the current policy. All agents inherit from +a common base class: + +.. autoclass:: ray.rllib.agent.Agent + :members: + +Evaluators and Optimizers +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: ray.rllib.optimizers.evaluator.Evaluator + :members: + +.. autoclass:: ray.rllib.optimizers.optimizer.Optimizer + :members: + +Sample Batches +~~~~~~~~~~~~~~ + +In order for Optimizers to manipulate sample data, they should be returned from Evaluators +in the SampleBatch format (a wrapper around a dict). + +.. autoclass:: ray.rllib.optimizers.SampleBatch + :members: + +Models and Preprocessors +~~~~~~~~~~~~~~~~~~~~~~~~ + +Algorithms share neural network models which inherit from the following class: + +.. autoclass:: ray.rllib.models.Model + :members: + +Currently we support fully connected and convolutional TensorFlow policies on all algorithms: + +.. autoclass:: ray.rllib.models.FullyConnectedNetwork +.. autoclass:: ray.rllib.models.ConvolutionalNetwork + +A3C also supports a TensorFlow LSTM policy. + +.. autoclass:: ray.rllib.models.LSTM + +Observations are transformed by Preprocessors before used in the model: + +.. autoclass:: ray.rllib.models.preprocessors.Preprocessor + :members: + +Action Distributions +~~~~~~~~~~~~~~~~~~~~ + +Actions can be sampled from different distributions which have a common base +class: + +.. autoclass:: ray.rllib.models.ActionDistribution + :members: + +Currently we support the following action distributions: + +.. autoclass:: ray.rllib.models.Categorical +.. autoclass:: ray.rllib.models.DiagGaussian +.. autoclass:: ray.rllib.models.Deterministic + +The Model Catalog +~~~~~~~~~~~~~~~~~ + +The Model Catalog is the mechanism for algorithms to get preprocessors, models, and action distributions for varying gym environments. It enables sharing of these components across different algorithms. + +.. autoclass:: ray.rllib.models.ModelCatalog + :members: diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst index 7c170a3b6..bef9d5248 100644 --- a/doc/source/rllib.rst +++ b/doc/source/rllib.rst @@ -1,5 +1,5 @@ -Ray RLlib: A Composable and Scalable Reinforcement Learning Library -=================================================================== +Ray RLlib: A Scalable Reinforcement Learning Library +==================================================== Ray RLlib is a reinforcement learning library that aims to provide both performance and composability: @@ -8,28 +8,27 @@ Ray RLlib is a reinforcement learning library that aims to provide both performa - Pluggable distributed RL execution strategies - Composability - - Integration with the `Ray.tune `__ hyperparam tuning tool + - Integration with the `Ray.tune `__ hyperparam tuning tool - Support for multiple frameworks (TensorFlow, PyTorch) - Scalable primitives for developing new algorithms - Shared models between algorithms -You can find the code for RLlib `here on GitHub `__, and the NIPS symposium paper `here `__. +You can find the code for RLlib `here on GitHub `__, and the NIPS symposium paper `here `__. RLlib currently provides the following algorithms: -- `Proximal Policy Optimization `__ which +- `Proximal Policy Optimization (PPO) `__ which is a proximal variant of `TRPO `__. -- Evolution Strategies which is decribed in `this +- `The Asynchronous Advantage Actor-Critic (A3C) `__. + +- `Deep Q Networks (DQN) `__. + +- Evolution Strategies, as described in `this paper `__. Our implementation is adapted from `here `__. -- `The Asynchronous Advantage Actor-Critic `__ - based on `the OpenAI starter agent `__. - -- `Deep Q Network (DQN) `__. - These algorithms can be run on any `OpenAI Gym MDP `__, including custom ones written and registered by the user. @@ -170,15 +169,15 @@ Custom Models and Preprocessors ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RLlib includes default neural network models and preprocessors for common gym -environments, but you can also specify your own. For example: +environments, but you can also specify your own as follows. The interfaces for +custom model and preprocessor classes are documented in the +`RLlib Developer Guide `__. :: import ray from ray.rllib.models import ModelCatalog - # The interfaces for custom models and preprocessors classes are documented - # below in the Developer API section. ModelCatalog.register_custom_preprocessor("my_prep", MyPreprocessorClass) ModelCatalog.register_custom_model("my_model", MyModelClass) @@ -193,7 +192,7 @@ Using RLlib with Ray.tune ------------------------- All Agents implemented in RLlib support the -`tune Trainable `__ interface. +`tune Trainable `__ interface. Here is an example of using the command-line interface with RLlib: @@ -232,82 +231,9 @@ in the ``config`` section of the experiments. run_experiments(experiment) -.. _`managing a cluster with parallel ssh`: http://ray.readthedocs.io/en/latest/using-ray-on-a-large-cluster.html +.. _`managing a cluster with parallel ssh`: using-ray-on-a-large-cluster.html +Contributing to RLlib +--------------------- -The Developer API ------------------ - -This part of the API will be useful if you need to change existing RL algorithms -or implement new ones. Note that the API is not considered to be stable yet. - -Agents -~~~~~~ - -Agents implement a particular algorithm and can be used to run -some number of iterations of the algorithm, save and load the state -of training and evaluate the current policy. All agents inherit from -a common base class: - -.. autoclass:: ray.rllib.agent.Agent - :members: - -Optimizers and Evaluators -~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. autoclass:: ray.rllib.optimizers.optimizer.Optimizer - :members: - -.. autoclass:: ray.rllib.optimizers.evaluator.Evaluator - :members: - -Models and Preprocessors -~~~~~~~~~~~~~~~~~~~~~~~~ - -Algorithms share neural network models which inherit from the following class: - -.. autoclass:: ray.rllib.models.Model - -Currently we support fully connected and convolutional TensorFlow policies on all algorithms: - -.. autofunction:: ray.rllib.models.FullyConnectedNetwork -.. autofunction:: ray.rllib.models.ConvolutionalNetwork - -A3C also supports a TensorFlow LSTM policy. - -.. autofunction:: ray.rllib.models.LSTM - -Observations are transformed by Preprocessors before used in the model: - -.. autoclass:: ray.rllib.models.preprocessors.Preprocessor - -Action Distributions -~~~~~~~~~~~~~~~~~~~~ - -Actions can be sampled from different distributions which have a common base -class: - -.. autoclass:: ray.rllib.models.ActionDistribution - :members: - -Currently we support the following action distributions: - -.. autofunction:: ray.rllib.models.Categorical -.. autofunction:: ray.rllib.models.DiagGaussian -.. autofunction:: ray.rllib.models.Deterministic - -The Model Catalog -~~~~~~~~~~~~~~~~~ - -The Model Catalog is a mechanism for picking good default values for -various gym environments. Here is an example usage: -:: - - dist_class, dist_dim = ModelCatalog.get_action_dist(env.action_space) - model = ModelCatalog.get_model(registry, inputs, dist_dim) - dist = dist_class(model.outputs) - action_op = dist.sample() - - -.. autoclass:: ray.rllib.models.ModelCatalog - :members: +See the `RLlib Developer Guide `__. diff --git a/doc/source/tune.rst b/doc/source/tune.rst index 6752b7f10..e8deee0ce 100644 --- a/doc/source/tune.rst +++ b/doc/source/tune.rst @@ -1,7 +1,7 @@ -Ray.tune: Efficient Distributed Hyperparameter Search -===================================================== +Ray.tune: Hyperparameter Optimization Framework +=============================================== -This document describes Ray.tune, a hyperparameter tuning tool for long-running tasks such as RL and deep learning training. It has the following features: +This document describes Ray.tune, a hyperparameter tuning framework for long-running tasks such as RL and deep learning training. It has the following features: - Early stopping algorithms such as `Median Stopping Rule `__ and `HyperBand `__. diff --git a/python/ray/rllib/README.rst b/python/ray/rllib/README.rst index 54af05dbb..3b6d1bab1 100644 --- a/python/ray/rllib/README.rst +++ b/python/ray/rllib/README.rst @@ -1,23 +1,22 @@ -Ray RLlib: A Composable and Scalable Reinforcement Learning Library -=================================================================== +Ray RLlib: A Scalable Reinforcement Learning Library +==================================================== -This README provides a brief technical overview of RLlib. See also the `user documentation `__ and `NIPS symposium paper `__. +This README provides a brief technical overview of RLlib. See also the `user documentation `__ and `NIPS symposium paper `__. RLlib currently provides the following algorithms: -- `Proximal Policy Optimization `__ which +- `Proximal Policy Optimization (PPO) `__ which is a proximal variant of `TRPO `__. -- Evolution Strategies which is decribed in `this +- `The Asynchronous Advantage Actor-Critic (A3C) `__. + +- `Deep Q Networks (DQN) `__. + +- Evolution Strategies, as described in `this paper `__. Our implementation - borrows code from + is adapted from `here `__. -- `The Asynchronous Advantage Actor-Critic `__ - based on `the OpenAI starter agent `__. - -- `Deep Q Network (DQN) `__. - These algorithms can be run on any OpenAI Gym MDP, including custom ones written and registered by the user. @@ -51,4 +50,4 @@ These are the currently available optimizers: Common utilities ---------------- -RLlib defines common action distributions, preprocessors, and neural network models, found in ``models/catalog.py``, which are shared by all algorithms. More information on these classes can be found in the `developer API docs `__. +RLlib defines common action distributions, preprocessors, and neural network models, found in ``models/catalog.py``, which are shared by all algorithms. More information on these classes can be found in the `RLlib Developer Guide `__. diff --git a/python/ray/rllib/models/action_dist.py b/python/ray/rllib/models/action_dist.py index 844761728..ac6fc671e 100644 --- a/python/ray/rllib/models/action_dist.py +++ b/python/ray/rllib/models/action_dist.py @@ -21,7 +21,7 @@ class ActionDistribution(object): raise NotImplementedError def kl(self, other): - """The KL-divergene between two action distributions.""" + """The KL-divergence between two action distributions.""" raise NotImplementedError def entropy(self): diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py index 0615619c2..cc87fd2a4 100644 --- a/python/ray/rllib/models/catalog.py +++ b/python/ray/rllib/models/catalog.py @@ -36,7 +36,17 @@ MODEL_CONFIGS = [ class ModelCatalog(object): - """Registry of default models and action distributions for envs.""" + """Registry of models, preprocessors, and action distributions for envs. + + Examples: + >>> prep = ModelCatalog.get_preprocessor(env) + >>> observation = prep.transform(raw_observation) + + >>> dist_cls, dist_dim = ModelCatalog.get_action_dist(env.action_space) + >>> model = ModelCatalog.get_model(registry, inputs, dist_dim) + >>> dist = dist_cls(model.outputs) + >>> action = dist.sample() + """ ATARI_OBS_SHAPE = (210, 160, 3) ATARI_RAM_OBS_SHAPE = (128,) diff --git a/python/ray/tune/README.rst b/python/ray/tune/README.rst index 697ad6973..7c2d5eff6 100644 --- a/python/ray/tune/README.rst +++ b/python/ray/tune/README.rst @@ -1,7 +1,7 @@ -Ray.tune: Efficient distributed hyperparameter search -===================================================== +Ray.tune: Hyperparameter Optimization Framework +=============================================== -Ray.tune is a hyperparameter tuning tool for long-running tasks such as RL and deep learning training. +Ray.tune is a hyperparameter tuning framework for long-running tasks such as RL and deep learning training. User documentation can be `found here `__.