From 35f739866692c8f2a14102f5be0c21b69f8ac214 Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Wed, 6 Dec 2017 18:17:51 -0800 Subject: [PATCH] [rllib] Update RLlib docs and README (#1288) Updates the rllib docs and README. --- README.rst | 8 ++ doc/source/index.rst | 10 ++- doc/source/rllib.rst | 153 +++++++++++++++++--------------- doc/source/tune.rst | 3 +- python/ray/rllib/README.rst | 66 +++++++++----- python/ray/rllib/agent.py | 3 +- python/ray/tune/trial_runner.py | 2 +- 7 files changed, 151 insertions(+), 94 deletions(-) diff --git a/README.rst b/README.rst index 53cf726a3..83e8221f5 100644 --- a/README.rst +++ b/README.rst @@ -11,6 +11,14 @@ Ray Ray is a flexible, high-performance distributed execution framework. +Ray comes with libraries that accelerate deep learning and reinforcement learning development: + +- `Ray.tune`_: Efficient Distributed Hyperparameter Search +- `Ray RLlib`_: A Composable and Scalable Reinforcement Learning Library + +.. _`Ray.tune`: http://ray.readthedocs.io/en/latest/tune.html +.. _`Ray RLlib`: http://ray.readthedocs.io/en/latest/rllib.html + Installation ------------ diff --git a/doc/source/index.rst b/doc/source/index.rst index 45819a957..1bc329cc9 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -3,6 +3,14 @@ Ray *Ray is a flexible, high-performance distributed execution framework.* +Ray comes with libraries that accelerate deep learning and reinforcement learning development: + +- `Ray.tune`_: Efficient Distributed Hyperparameter Search +- `Ray RLlib`_: A Composable and Scalable Reinforcement Learning Library + +.. _`Ray.tune`: tune.html +.. _`Ray RLlib`: rllib.html + Example Program --------------- @@ -42,8 +50,8 @@ Example Program api.rst actors.rst using-ray-with-gpus.rst - rllib.rst tune.rst + rllib.rst webui.rst .. toctree:: diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst index 0db6e0c6c..a9692bba4 100644 --- a/doc/source/rllib.rst +++ b/doc/source/rllib.rst @@ -1,15 +1,28 @@ -RLLib: A Scalable Reinforcement Learning Library -================================================ +Ray RLlib: A Composable and Scalable Reinforcement Learning Library +=================================================================== -This document describes Ray's reinforcement learning library. -It currently supports the following algorithms: +Ray RLlib is a reinforcement learning library that aims to provide both performance and composability: + +- Performance + - High performance algorithm implementions + - Pluggable distributed RL execution strategies + +- Composability + - Integration with the `Ray.tune `__ hyperparam tuning tool + - Support for multiple frameworks (TensorFlow, PyTorch) + - Scalable primitives for developing new algorithms + - Shared models between algorithms + +You can find the code for RLlib `here on GitHub `__. + +RLlib currently provides the following algorithms: - `Proximal Policy Optimization `__ which is a proximal variant of `TRPO `__. - Evolution Strategies which is decribed in `this paper `__. Our implementation - borrows code from + is adapted from `here `__. - `The Asynchronous Advantage Actor-Critic `__ @@ -28,7 +41,7 @@ including custom ones written and registered by the user. Getting Started --------------- -You can train an example DQN agent with the following command +You can train a simple DQN agent with the following command :: @@ -79,12 +92,11 @@ Some good hyperparameters and settings are available in (some of them are tuned to run on GPUs). If you find better settings or tune an algorithm on a different domain, consider submitting a Pull Request! -The User API ------------- +Python User API +--------------- You will be using this part of the API if you run the existing algorithms -on a new problem. Note that the API is not considered to be stable yet. -Here is an example how to use it: +on a new problem. Here is an example how to use it: :: @@ -108,7 +120,7 @@ Custom Environments ~~~~~~~~~~~~~~~~~~~ To train against a custom environment, i.e. one not in the gym catalog, you -can pass a function that returns an env instead of an env id. For example: +can register a function that creates the env to refer to it by name. For example: :: @@ -117,17 +129,11 @@ can pass a function that returns an env instead of an env id. For example: from ray.rllib import ppo env_creator = lambda: create_my_env() - env_creator_key = "custom_env" - register_env(env_creator_key, env_creator) + env_creator_name = "custom_env" + register_env(env_creator_name, env_creator) ray.init() - alg = ppo.PPOAgent(env=env_creator_key, registry=get_registry()) - -The Developer API ------------------ - -This part of the API will be useful if you need to change existing RL algorithms -or implement new ones. Note that the API is not considered to be stable yet. + alg = ppo.PPOAgent(env=env_creator_name, registry=get_registry()) Agents ~~~~~~ @@ -140,6 +146,63 @@ a common base class: .. autoclass:: ray.rllib.agent.Agent :members: +Using RLlib on a cluster +------------------------ + +First create a cluster as described in `managing a cluster with parallel ssh`_. +You can then run RLlib on this cluster by passing the address of the main redis +shard into ``train.py`` with ``--redis-address``. + +Using RLlib with Ray.tune +------------------------- + +All Agents implemented in RLlib support the +`tune Trainable `__ interface. + +Here is an example of using Ray.tune with RLlib: + +:: + + python ray/python/ray/rllib/train.py -f tuned_examples/cartpole-grid-search-example.yaml + +Here is an example using the Python API. + +:: + + from ray.tune.tune import run_experiments + from ray.tune.variant_generator import grid_search + + + experiment = { + 'cartpole-ppo': { + 'run': 'PPO', + 'env': 'CartPole-v0', + 'resources': { + 'cpu': 2, + 'driver_cpu_limit': 1}, + 'stop': { + 'episode_reward_mean': 200, + 'time_total_s': 180 + }, + 'config': { + 'num_sgd_iter': grid_search([1, 4]), + 'num_workers': 2, + 'sgd_batchsize': grid_search([128, 256, 512]) + } + } + } + + run_experiments(experiment) + +.. _`managing a cluster with parallel ssh`: http://ray.readthedocs.io/en/latest/using-ray-on-a-large-cluster.html + + +The Developer API +----------------- + +This part of the API will be useful if you need to change existing RL algorithms +or implement new ones. Note that the API is not considered to be stable yet. + Models ~~~~~~ @@ -186,53 +249,3 @@ various gym environments. Here is an example usage: .. autoclass:: ray.rllib.models.ModelCatalog :members: - -Using RLLib on a cluster ------------------------- - -First create a cluster as described in `managing a cluster with parallel ssh`_. -You can then run RLLib on this cluster by passing the address of the main redis -shard into ``train.py`` with ``--redis-address``. - -Using RLLib with Ray.tune -------------------------- - -All Agents implemented in RLLib support the -`Trainable `__ interface. - -Here is an example of using Ray.tune with RLLib: - -:: - - python ray/python/ray/rllib/train.py -f tuned_examples/cartpole-grid-search-example.yaml - -Here is an example using the Python API. - -:: - - from ray.tune.tune import run_experiments - from ray.tune.variant_generator import grid_search - - - experiment = { - 'cartpole-ppo': { - 'run': 'PPO', - 'env': 'CartPole-v0', - 'resources': { - 'cpu': 2, - 'driver_cpu_limit': 1}, - 'stop': { - 'episode_reward_mean': 200, - 'time_total_s': 180 - }, - 'config': { - 'num_sgd_iter': grid_search([1, 4]), - 'num_workers': 2, - 'sgd_batchsize': grid_search([128, 256, 512]) - } - } - } - - run_experiments(experiment) - -.. _`managing a cluster with parallel ssh`: http://ray.readthedocs.io/en/latest/using-ray-on-a-large-cluster.html diff --git a/doc/source/tune.rst b/doc/source/tune.rst index 593416d0c..1d4253316 100644 --- a/doc/source/tune.rst +++ b/doc/source/tune.rst @@ -11,6 +11,7 @@ This document describes Ray.tune, a hyperparameter tuning tool for long-running - Resource-aware scheduling, including support for concurrent runs of algorithms that may themselves be parallel and distributed. +You can find the code for Ray.tune `here on GitHub `__. Getting Started --------------- @@ -49,7 +50,7 @@ This script runs a small grid search over the ``my_func`` function using ray.tun == Status == Using FIFO scheduling algorithm. Resources used: 4/8 CPUs, 0/0 GPUs - Tensorboard logdir: /tmp/ray/my_experiment + Result logdir: /tmp/ray/my_experiment - my_func_0_alpha=0.2,beta=1: RUNNING [pid=6778], 209 s, 20604 ts, 7.29 acc - my_func_1_alpha=0.4,beta=1: RUNNING [pid=6780], 208 s, 20522 ts, 53.1 acc - my_func_2_alpha=0.6,beta=1: TERMINATED [pid=6789], 21 s, 2190 ts, 101 acc diff --git a/python/ray/rllib/README.rst b/python/ray/rllib/README.rst index 7eb108287..240438d04 100644 --- a/python/ray/rllib/README.rst +++ b/python/ray/rllib/README.rst @@ -1,31 +1,57 @@ -RLLib: A Scalable Reinforcement Learning Library +RLlib: A Scalable Reinforcement Learning Library ================================================ -Getting Started ---------------- +This README provides a brief technical overview of RLlib. See also the `user documentation `__. -You can run training with +RLlib currently provides the following algorithms: -:: +- `Proximal Policy Optimization `__ which + is a proximal variant of `TRPO `__. - python train.py --env CartPole-v0 --run PPO - -The available algorithms are: - -- ``PPO`` is a proximal variant of - `TRPO `__. - -- ``ES`` is decribed in `this +- Evolution Strategies which is decribed in `this paper `__. Our implementation borrows code from `here `__. -- ``DQN`` is an implementation of `Deep Q - Networks `__ based on - `OpenAI baselines `__. +- `The Asynchronous Advantage Actor-Critic `__ + based on `the OpenAI starter agent `__. -- ``A3C`` is an implementation of - `A3C `__ based on `the OpenAI - starter agent `__. +- `Deep Q Network (DQN) `__. -User documentation can be `found here `__. +Proximal Policy Optimization scales to hundreds of cores and several GPUs, Evolution Strategies to clusters with thousands of cores and the Asynchronous Advantage Actor-Critic scales to dozens of cores on a single node. + +These algorithms can be run on any OpenAI Gym MDP, including custom ones written and registered by the user. + +For more detailed usage information, see the `user documentation `__. + +Training API +------------ + +All RLlib algorithms implement a common training API (agent.py), which enables multiple algorithms to be easily evaluated: + +:: + + # Train a model on a single environment + python train.py --env CartPole-v0 --run PPO + + # Integration with ray.tune for hyperparam evaluation + python train.py -f tuned_examples/cartpole-grid-search-example.yaml + +Evaluator and Optimizer abstractions +------------------------------------ + +RLlib's gradient-based algorithms are composed using two abstractions: Evaluators (evaluator.py) and Optimizers (optimizers/optimizer.py). Optimizers encapsulate a particular distributed optimization strategy for RL. Evaluators encapsulate the model graph, and once implemented, any Optimizer may be "plugged in" to any algorithm that implements the Evaluator interface. + +This pluggability enables optimization strategies to be re-used and improved across different algorithms and deep learning frameworks (RLlib's optimizers work with both TensorFlow and PyTorch, though currently only A3C has a PyTorch graph implementation). + +These are the currently available optimizers: + +- ``AsyncOptimizer`` is an asynchronous RL optimizer, i.e. like A3C. It asynchronously pulls and applies gradients from evaluators, sending updated weights back as needed. +- ``LocalSyncOptimizer`` is a simple synchronous RL optimizer. It pulls samples from remote evaluators, concatenates them, and then updates a local model. The updated model weights are then broadcast to all remote evalutaors. +- ``LocalMultiGPUOptimizer`` (currently available for PPO) This optimizer performs SGD over a number of local GPUs, and pins experience data in GPU memory to amortize the copy overhead for multiple SGD passes. +- ``AllReduceOptimizer`` (planned) This optimizer would use the Allreduce primitive to scalably synchronize weights among a number of remote GPU workers. + +Common utilities +---------------- + +RLlib defines common action distributions, preprocessors, and neural network models, found in ``models/catalog.py``, which are shared by all algorithms. More information on these classes can be found in the `developer API docs `__. diff --git a/python/ray/rllib/agent.py b/python/ray/rllib/agent.py index 8ca499b2d..1770a1a29 100644 --- a/python/ray/rllib/agent.py +++ b/python/ray/rllib/agent.py @@ -35,7 +35,8 @@ class Agent(Trainable): env_creator (func): Function that creates a new training env. config (obj): Algorithm-specific configuration data. logdir (str): Directory in which training outputs should be placed. - registry (obj): Object registry. + registry (obj): Tune object registry, for registering user-defined + classes and objects by name. """ _allow_unknown_configs = False diff --git a/python/ray/tune/trial_runner.py b/python/ray/tune/trial_runner.py index e0d6eddba..63aeba821 100644 --- a/python/ray/tune/trial_runner.py +++ b/python/ray/tune/trial_runner.py @@ -118,7 +118,7 @@ class TrialRunner(object): self._committed_resources.gpu, self._avail_resources.gpu)) for local_dir in sorted(set([t.local_dir for t in self._trials])): - messages.append("Tensorboard logdir: {}".format(local_dir)) + messages.append("Result logdir: {}".format(local_dir)) for t in self._trials: if t.local_dir == local_dir: messages.append(