diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst
index 86662d48d..26f1af0b5 100644
--- a/doc/source/rllib.rst
+++ b/doc/source/rllib.rst
@@ -240,6 +240,38 @@ these custom classes can be found in the
 
 For a full example of a custom model in code, see the `Carla RLlib model <https://github.com/ray-project/ray/blob/master/examples/carla/models.py>`__ and associated `training scripts <https://github.com/ray-project/ray/tree/master/examples/carla>`__. The ``CarlaModel`` class defined there operates over a composite (Tuple) observation space including both images and scalar measurements.
 
+Multi-Agent Models
+~~~~~~~~~~~~~~~~~~
+RLlib supports multi-agent training with PPO. Currently it supports both
+shared, i.e. all agents have the same model, and non-shared multi-agent models. However, it only supports shared
+rewards and does not yet support individual rewards for each agent. 
+
+
+While Generalized Advantage Estimation is supported in multiagent scenarios, 
+it is assumed that it possible for the estimator to access the observations of 
+all of the agents. 
+
+
+Important config parameters are described below
+
+.. code-block:: python
+
+    config["model"].update({"fcnet_hiddens": [256, 256]}) # dimension of value function
+    options = {"multiagent_obs_shapes": [3, 3], # length of each observation space
+               "multiagent_act_shapes": [1, 1], # length of each action space
+               "multiagent_shared_model": True, # whether the model should be shared
+               # list of dimensions of multiagent feedforward nets
+               "multiagent_fcnet_hiddens": [[32, 32]] * 2} 
+    config["model"].update({"custom_options": options})
+
+For a full example of a multiagent model in code, see the 
+`MultiAgent Pendulum <https://github.com/ray-project/ray/blob/master/python/ray/rllib/examples/multiagent_mountaincar.py>`__. 
+The ``MultiAgentPendulumEnv`` defined there operates
+over a composite (Tuple) enclosing a list of Boxes; each Box represents the 
+observation of an agent. The action space is a list of Discrete actions, each 
+element corresponding to half of the total torque. The environment will return a list of actions
+that can be iterated over and applied to each agent. 
+
 External Data API
 ~~~~~~~~~~~~~~~~~
 *coming soon!*
diff --git a/python/ray/rllib/examples/multiagent_mountaincar_env.py b/python/ray/rllib/examples/multiagent_mountaincar_env.py
index 3018aa7f3..d50302eea 100644
--- a/python/ray/rllib/examples/multiagent_mountaincar_env.py
+++ b/python/ray/rllib/examples/multiagent_mountaincar_env.py
@@ -22,8 +22,8 @@ class MultiAgentMountainCarEnv(MountainCarEnv):
         self.viewer = None
 
         self.action_space = [Discrete(3) for _ in range(2)]
-        self.observation_space = Tuple(tuple(Box(self.low, self.high)
-                                             for _ in range(2)))
+        self.observation_space = Tuple([
+            Box(self.low, self.high) for _ in range(2)])
 
         self._seed()
         self.reset()
diff --git a/python/ray/rllib/examples/multiagent_pendulum_env.py b/python/ray/rllib/examples/multiagent_pendulum_env.py
index da727bcaa..b2095e625 100644
--- a/python/ray/rllib/examples/multiagent_pendulum_env.py
+++ b/python/ray/rllib/examples/multiagent_pendulum_env.py
@@ -24,8 +24,8 @@ class MultiAgentPendulumEnv(PendulumEnv):
         self.action_space = [Box(low=-self.max_torque / 2,
                                  high=self.max_torque / 2, shape=(1,))
                              for _ in range(2)]
-        self.observation_space = Tuple(tuple(Box(low=-high, high=high)
-                                             for _ in range(2)))
+        self.observation_space = Tuple([
+            Box(low=-high, high=high) for _ in range(2)])
 
         self._seed()