From 6e6674a8242124242ff1f4802bd3768a864ed50c Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Mon, 1 Jan 2018 11:10:44 -0800
Subject: [PATCH] [rllib] Split docs into user and development guide (#1377)

* docs

* Update README.rst

* Sat Dec 30 15:23:49 PST 2017

* comments

* Sun Dec 31 23:33:30 PST 2017

* Sun Dec 31 23:33:38 PST 2017

* Sun Dec 31 23:37:46 PST 2017

* Sun Dec 31 23:39:28 PST 2017

* Sun Dec 31 23:43:05 PST 2017

* Sun Dec 31 23:51:55 PST 2017

* Sun Dec 31 23:52:51 PST 2017
---
 README.rst                             |   4 +-
 doc/source/index.rst                   |   5 +-
 doc/source/rllib-dev.rst               | 129 +++++++++++++++++++++++++
 doc/source/rllib.rst                   | 110 ++++-----------------
 doc/source/tune.rst                    |   6 +-
 python/ray/rllib/README.rst            |  23 +++--
 python/ray/rllib/models/action_dist.py |   2 +-
 python/ray/rllib/models/catalog.py     |  12 ++-
 python/ray/tune/README.rst             |   6 +-
 9 files changed, 181 insertions(+), 116 deletions(-)
 create mode 100644 doc/source/rllib-dev.rst

diff --git a/README.rst b/README.rst
index 83e8221f5..ce3c158ea 100644
--- a/README.rst
+++ b/README.rst
@@ -13,8 +13,8 @@ Ray is a flexible, high-performance distributed execution framework.
 
 Ray comes with libraries that accelerate deep learning and reinforcement learning development:
 
-- `Ray.tune`_: Efficient Distributed Hyperparameter Search
-- `Ray RLlib`_: A Composable and Scalable Reinforcement Learning Library
+- `Ray.tune`_: Hyperparameter Optimization Framework
+- `Ray RLlib`_: A Scalable Reinforcement Learning Library
 
 .. _`Ray.tune`: http://ray.readthedocs.io/en/latest/tune.html
 .. _`Ray RLlib`: http://ray.readthedocs.io/en/latest/rllib.html
diff --git a/doc/source/index.rst b/doc/source/index.rst
index a4de6cf3b..9459f9ea5 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -5,8 +5,8 @@ Ray
 
 Ray comes with libraries that accelerate deep learning and reinforcement learning development:
 
-- `Ray.tune`_: Efficient Distributed Hyperparameter Search
-- `Ray RLlib`_: A Composable and Scalable Reinforcement Learning Library
+- `Ray.tune`_: Hyperparameter Optimization Framework
+- `Ray RLlib`_: A Scalable Reinforcement Learning Library
 
 .. _`Ray.tune`: tune.html
 .. _`Ray RLlib`: rllib.html
@@ -52,6 +52,7 @@ Example Program
    using-ray-with-gpus.rst
    tune.rst
    rllib.rst
+   rllib-dev.rst
    webui.rst
 
 .. toctree::
diff --git a/doc/source/rllib-dev.rst b/doc/source/rllib-dev.rst
new file mode 100644
index 000000000..bc0d7881c
--- /dev/null
+++ b/doc/source/rllib-dev.rst
@@ -0,0 +1,129 @@
+RLlib Developer Guide
+=====================
+
+.. note::
+
+    This guide will take you through steps for implementing a new algorithm in RLlib. To apply existing algorithms already implemented in RLlib, please see the `user docs <rllib.html>`__.
+
+Recipe for an RLlib algorithm
+-----------------------------
+
+Here are the steps for implementing a new algorithm in RLlib:
+
+1. Define an algorithm-specific `Evaluator class <#evaluators-and-optimizers>`__ (the core of the algorithm). Evaluators encapsulate framework-specific components such as the policy and loss functions. For an example, see the `A3C Evaluator implementation <https://github.com/ray-project/ray/blob/master/python/ray/rllib/a3c/a3c_evaluator.py>`__.
+
+
+2. Pick an appropriate `RLlib optimizer class <#evaluators-and-optimizers>`__. Optimizers manage the parallel execution of the algorithm. RLlib provides several built-in optimizers for gradient-based algorithms. Advanced algorithms may find it beneficial to implement their own optimizers.
+
+
+3. Wrap the two up in an `Agent class <#agents>`__. Agents are the user-facing API of RLlib. They provide the necessary "glue" and implement accessory functionality such as statistics reporting and checkpointing.
+
+To help with implementation, RLlib provides common action distributions, preprocessors, and neural network models, found in `catalog.py <https://github.com/ray-project/ray/blob/master/python/ray/rllib/models/catalog.py>`__, which are shared by all algorithms. Note that most of these utilities are currently Tensorflow specific.
+
+Defining a custom model
+-----------------------
+
+Often you will want to plug in your own neural network into an existing RLlib algorithm.
+This can be easily done by defining your own `Model class <#models-and-preprocessors>`__ and registering it in the RLlib catalog, after which it will be available for use by all RLlib algorithms.
+
+An example usage of a custom model looks like this:
+
+::
+
+    from ray.rllib.models import ModelCatalog, Model
+
+    class MyModelClass(Model):
+        def _init(self, inputs, num_outputs, options):
+            layer1 = slim.fully_connected(inputs, 64, ...)
+            layer2 = slim.fully_connected(inputs, 64, ...)
+            ...
+            return layerN, layerN_minus_1
+
+    ModelCatalog.register_custom_model("my_model", MyModelClass)
+
+    alg = ppo.PPOAgent(env="CartPole-v0", config={
+        "custom_model": "my_model",
+    })
+
+
+Note that if you need to reference large data objects as part of the computation, e.g. weights, you can put them into the Ray object store with ``ray.put`` and then retrieve them from inside your model class.
+
+
+The Developer API
+-----------------
+
+The following APIs are the building blocks of RLlib algorithms. Note that they are not yet considered stable.
+
+Agents
+~~~~~~
+
+Agents implement a particular algorithm and can be used to run
+some number of iterations of the algorithm, save and load the state
+of training and evaluate the current policy. All agents inherit from
+a common base class:
+
+.. autoclass:: ray.rllib.agent.Agent
+    :members:
+
+Evaluators and Optimizers
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: ray.rllib.optimizers.evaluator.Evaluator
+    :members:
+
+.. autoclass:: ray.rllib.optimizers.optimizer.Optimizer
+    :members:
+
+Sample Batches
+~~~~~~~~~~~~~~
+
+In order for Optimizers to manipulate sample data, they should be returned from Evaluators
+in the SampleBatch format (a wrapper around a dict).
+
+.. autoclass:: ray.rllib.optimizers.SampleBatch
+    :members:
+
+Models and Preprocessors
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Algorithms share neural network models which inherit from the following class:
+
+.. autoclass:: ray.rllib.models.Model
+    :members:
+
+Currently we support fully connected and convolutional TensorFlow policies on all algorithms:
+
+.. autoclass:: ray.rllib.models.FullyConnectedNetwork
+.. autoclass:: ray.rllib.models.ConvolutionalNetwork
+
+A3C also supports a TensorFlow LSTM policy.
+
+.. autoclass:: ray.rllib.models.LSTM
+
+Observations are transformed by Preprocessors before used in the model:
+
+.. autoclass:: ray.rllib.models.preprocessors.Preprocessor
+    :members:
+
+Action Distributions
+~~~~~~~~~~~~~~~~~~~~
+
+Actions can be sampled from different distributions which have a common base
+class:
+
+.. autoclass:: ray.rllib.models.ActionDistribution
+    :members:
+
+Currently we support the following action distributions:
+
+.. autoclass:: ray.rllib.models.Categorical
+.. autoclass:: ray.rllib.models.DiagGaussian
+.. autoclass:: ray.rllib.models.Deterministic
+
+The Model Catalog
+~~~~~~~~~~~~~~~~~
+
+The Model Catalog is the mechanism for algorithms to get preprocessors, models, and action distributions for varying gym environments. It enables sharing of these components across different algorithms.
+
+.. autoclass:: ray.rllib.models.ModelCatalog
+    :members:
diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst
index 7c170a3b6..bef9d5248 100644
--- a/doc/source/rllib.rst
+++ b/doc/source/rllib.rst
@@ -1,5 +1,5 @@
-Ray RLlib: A Composable and Scalable Reinforcement Learning Library
-===================================================================
+Ray RLlib: A Scalable Reinforcement Learning Library
+====================================================
 
 Ray RLlib is a reinforcement learning library that aims to provide both performance and composability:
 
@@ -8,28 +8,27 @@ Ray RLlib is a reinforcement learning library that aims to provide both performa
     - Pluggable distributed RL execution strategies
 
 - Composability
-    - Integration with the `Ray.tune <http://ray.readthedocs.io/en/latest/tune.html>`__ hyperparam tuning tool
+    - Integration with the `Ray.tune <tune.html>`__ hyperparam tuning tool
     - Support for multiple frameworks (TensorFlow, PyTorch)
     - Scalable primitives for developing new algorithms
     - Shared models between algorithms
 
-You can find the code for RLlib `here on GitHub <https://github.com/ray-project/ray/tree/master/python/ray/rllib>`__, and the NIPS symposium paper `here <https://drive.google.com/open?id=1lDMOFLMUQXn8qGtuahOBUwjmFb2iASxu>`__.
+You can find the code for RLlib `here on GitHub <https://github.com/ray-project/ray/tree/master/python/ray/rllib>`__, and the NIPS symposium paper `here <https://arxiv.org/abs/1712.09381>`__.
 
 RLlib currently provides the following algorithms:
 
--  `Proximal Policy Optimization <https://arxiv.org/abs/1707.06347>`__ which
+-  `Proximal Policy Optimization (PPO) <https://arxiv.org/abs/1707.06347>`__ which
    is a proximal variant of `TRPO <https://arxiv.org/abs/1502.05477>`__.
 
--  Evolution Strategies which is decribed in `this
+-  `The Asynchronous Advantage Actor-Critic (A3C) <https://arxiv.org/abs/1602.01783>`__.
+
+- `Deep Q Networks (DQN) <https://arxiv.org/abs/1312.5602>`__.
+
+-  Evolution Strategies, as described in `this
    paper <https://arxiv.org/abs/1703.03864>`__. Our implementation
    is adapted from
    `here <https://github.com/openai/evolution-strategies-starter>`__.
 
--  `The Asynchronous Advantage Actor-Critic <https://arxiv.org/abs/1602.01783>`__
-   based on `the OpenAI starter agent <https://github.com/openai/universe-starter-agent>`__.
-
-- `Deep Q Network (DQN) <https://arxiv.org/abs/1312.5602>`__.
-
 These algorithms can be run on any `OpenAI Gym MDP <https://github.com/openai/gym>`__,
 including custom ones written and registered by the user.
 
@@ -170,15 +169,15 @@ Custom Models and Preprocessors
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 RLlib includes default neural network models and preprocessors for common gym
-environments, but you can also specify your own. For example:
+environments, but you can also specify your own as follows. The interfaces for 
+custom model and preprocessor classes are documented in the
+`RLlib Developer Guide <rllib-dev.html>`__.
 
 ::
 
     import ray
     from ray.rllib.models import ModelCatalog
 
-    # The interfaces for custom models and preprocessors classes are documented
-    # below in the Developer API section.
     ModelCatalog.register_custom_preprocessor("my_prep", MyPreprocessorClass)
     ModelCatalog.register_custom_model("my_model", MyModelClass)
 
@@ -193,7 +192,7 @@ Using RLlib with Ray.tune
 -------------------------
 
 All Agents implemented in RLlib support the
-`tune Trainable <http://ray.readthedocs.io/en/latest/tune.html#ray.tune.trainable.Trainable>`__ interface.
+`tune Trainable <tune.html#ray.tune.trainable.Trainable>`__ interface.
 
 Here is an example of using the command-line interface with RLlib:
 
@@ -232,82 +231,9 @@ in the ``config`` section of the experiments.
 
     run_experiments(experiment)
 
-.. _`managing a cluster with parallel ssh`: http://ray.readthedocs.io/en/latest/using-ray-on-a-large-cluster.html
+.. _`managing a cluster with parallel ssh`: using-ray-on-a-large-cluster.html
 
+Contributing to RLlib
+---------------------
 
-The Developer API
------------------
-
-This part of the API will be useful if you need to change existing RL algorithms
-or implement new ones. Note that the API is not considered to be stable yet.
-
-Agents
-~~~~~~
-
-Agents implement a particular algorithm and can be used to run
-some number of iterations of the algorithm, save and load the state
-of training and evaluate the current policy. All agents inherit from
-a common base class:
-
-.. autoclass:: ray.rllib.agent.Agent
-    :members:
-
-Optimizers and Evaluators
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. autoclass:: ray.rllib.optimizers.optimizer.Optimizer
-    :members:
-
-.. autoclass:: ray.rllib.optimizers.evaluator.Evaluator
-    :members:
-
-Models and Preprocessors
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-Algorithms share neural network models which inherit from the following class:
-
-.. autoclass:: ray.rllib.models.Model
-
-Currently we support fully connected and convolutional TensorFlow policies on all algorithms:
-
-.. autofunction:: ray.rllib.models.FullyConnectedNetwork
-.. autofunction:: ray.rllib.models.ConvolutionalNetwork
-
-A3C also supports a TensorFlow LSTM policy.
-
-.. autofunction:: ray.rllib.models.LSTM
-
-Observations are transformed by Preprocessors before used in the model:
-
-.. autoclass:: ray.rllib.models.preprocessors.Preprocessor
-
-Action Distributions
-~~~~~~~~~~~~~~~~~~~~
-
-Actions can be sampled from different distributions which have a common base
-class:
-
-.. autoclass:: ray.rllib.models.ActionDistribution
-    :members:
-
-Currently we support the following action distributions:
-
-.. autofunction:: ray.rllib.models.Categorical
-.. autofunction:: ray.rllib.models.DiagGaussian
-.. autofunction:: ray.rllib.models.Deterministic
-
-The Model Catalog
-~~~~~~~~~~~~~~~~~
-
-The Model Catalog is a mechanism for picking good default values for
-various gym environments. Here is an example usage:
-::
-
-    dist_class, dist_dim = ModelCatalog.get_action_dist(env.action_space)
-    model = ModelCatalog.get_model(registry, inputs, dist_dim)
-    dist = dist_class(model.outputs)
-    action_op = dist.sample()
-
-
-.. autoclass:: ray.rllib.models.ModelCatalog
-    :members:
+See the `RLlib Developer Guide <rllib-dev.html>`__.
diff --git a/doc/source/tune.rst b/doc/source/tune.rst
index 6752b7f10..e8deee0ce 100644
--- a/doc/source/tune.rst
+++ b/doc/source/tune.rst
@@ -1,7 +1,7 @@
-Ray.tune: Efficient Distributed Hyperparameter Search
-=====================================================
+Ray.tune: Hyperparameter Optimization Framework
+===============================================
 
-This document describes Ray.tune, a hyperparameter tuning tool for long-running tasks such as RL and deep learning training. It has the following features:
+This document describes Ray.tune, a hyperparameter tuning framework for long-running tasks such as RL and deep learning training. It has the following features:
 
 -  Early stopping algorithms such as `Median Stopping Rule <https://research.google.com/pubs/pub46180.html>`__ and `HyperBand <https://arxiv.org/abs/1603.06560>`__.
 
diff --git a/python/ray/rllib/README.rst b/python/ray/rllib/README.rst
index 54af05dbb..3b6d1bab1 100644
--- a/python/ray/rllib/README.rst
+++ b/python/ray/rllib/README.rst
@@ -1,23 +1,22 @@
-Ray RLlib: A Composable and Scalable Reinforcement Learning Library
-===================================================================
+Ray RLlib: A Scalable Reinforcement Learning Library
+====================================================
 
-This README provides a brief technical overview of RLlib. See also the `user documentation <http://ray.readthedocs.io/en/latest/rllib.html>`__ and `NIPS symposium paper <https://drive.google.com/open?id=1lDMOFLMUQXn8qGtuahOBUwjmFb2iASxu>`__.
+This README provides a brief technical overview of RLlib. See also the `user documentation <http://ray.readthedocs.io/en/latest/rllib.html>`__ and `NIPS symposium paper <https://arxiv.org/abs/1712.09381>`__.
 
 RLlib currently provides the following algorithms:
 
--  `Proximal Policy Optimization <https://arxiv.org/abs/1707.06347>`__ which
+-  `Proximal Policy Optimization (PPO) <https://arxiv.org/abs/1707.06347>`__ which
    is a proximal variant of `TRPO <https://arxiv.org/abs/1502.05477>`__.
 
--  Evolution Strategies which is decribed in `this
+-  `The Asynchronous Advantage Actor-Critic (A3C) <https://arxiv.org/abs/1602.01783>`__.
+
+- `Deep Q Networks (DQN) <https://arxiv.org/abs/1312.5602>`__.
+
+-  Evolution Strategies, as described in `this
    paper <https://arxiv.org/abs/1703.03864>`__. Our implementation
-   borrows code from
+   is adapted from
    `here <https://github.com/openai/evolution-strategies-starter>`__.
 
--  `The Asynchronous Advantage Actor-Critic <https://arxiv.org/abs/1602.01783>`__
-   based on `the OpenAI starter agent <https://github.com/openai/universe-starter-agent>`__.
-
-- `Deep Q Network (DQN) <https://arxiv.org/abs/1312.5602>`__.
-
 These algorithms can be run on any OpenAI Gym MDP, including custom ones written and registered by the user.
 
 
@@ -51,4 +50,4 @@ These are the currently available optimizers:
 Common utilities
 ----------------
 
-RLlib defines common action distributions, preprocessors, and neural network models, found in ``models/catalog.py``, which are shared by all algorithms. More information on these classes can be found in the `developer API docs <http://ray.readthedocs.io/en/latest/rllib.html#the-developer-api>`__.
+RLlib defines common action distributions, preprocessors, and neural network models, found in ``models/catalog.py``, which are shared by all algorithms. More information on these classes can be found in the `RLlib Developer Guide <http://ray.readthedocs.io/en/latest/rllib-dev.html>`__.
diff --git a/python/ray/rllib/models/action_dist.py b/python/ray/rllib/models/action_dist.py
index 844761728..ac6fc671e 100644
--- a/python/ray/rllib/models/action_dist.py
+++ b/python/ray/rllib/models/action_dist.py
@@ -21,7 +21,7 @@ class ActionDistribution(object):
         raise NotImplementedError
 
     def kl(self, other):
-        """The KL-divergene between two action distributions."""
+        """The KL-divergence between two action distributions."""
         raise NotImplementedError
 
     def entropy(self):
diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py
index 0615619c2..cc87fd2a4 100644
--- a/python/ray/rllib/models/catalog.py
+++ b/python/ray/rllib/models/catalog.py
@@ -36,7 +36,17 @@ MODEL_CONFIGS = [
 
 
 class ModelCatalog(object):
-    """Registry of default models and action distributions for envs."""
+    """Registry of models, preprocessors, and action distributions for envs.
+
+    Examples:
+        >>> prep = ModelCatalog.get_preprocessor(env)
+        >>> observation = prep.transform(raw_observation)
+
+        >>> dist_cls, dist_dim = ModelCatalog.get_action_dist(env.action_space)
+        >>> model = ModelCatalog.get_model(registry, inputs, dist_dim)
+        >>> dist = dist_cls(model.outputs)
+        >>> action = dist.sample()
+    """
 
     ATARI_OBS_SHAPE = (210, 160, 3)
     ATARI_RAM_OBS_SHAPE = (128,)
diff --git a/python/ray/tune/README.rst b/python/ray/tune/README.rst
index 697ad6973..7c2d5eff6 100644
--- a/python/ray/tune/README.rst
+++ b/python/ray/tune/README.rst
@@ -1,7 +1,7 @@
-Ray.tune: Efficient distributed hyperparameter search
-=====================================================
+Ray.tune: Hyperparameter Optimization Framework
+===============================================
 
-Ray.tune is a hyperparameter tuning tool for long-running tasks such as RL and deep learning training.
+Ray.tune is a hyperparameter tuning framework for long-running tasks such as RL and deep learning training.
 
 User documentation can be `found here <http://ray.readthedocs.io/en/latest/tune.html>`__.