diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst index 86662d48d..26f1af0b5 100644 --- a/doc/source/rllib.rst +++ b/doc/source/rllib.rst @@ -240,6 +240,38 @@ these custom classes can be found in the For a full example of a custom model in code, see the `Carla RLlib model `__ and associated `training scripts `__. The ``CarlaModel`` class defined there operates over a composite (Tuple) observation space including both images and scalar measurements. +Multi-Agent Models +~~~~~~~~~~~~~~~~~~ +RLlib supports multi-agent training with PPO. Currently it supports both +shared, i.e. all agents have the same model, and non-shared multi-agent models. However, it only supports shared +rewards and does not yet support individual rewards for each agent. + + +While Generalized Advantage Estimation is supported in multiagent scenarios, +it is assumed that it possible for the estimator to access the observations of +all of the agents. + + +Important config parameters are described below + +.. code-block:: python + + config["model"].update({"fcnet_hiddens": [256, 256]}) # dimension of value function + options = {"multiagent_obs_shapes": [3, 3], # length of each observation space + "multiagent_act_shapes": [1, 1], # length of each action space + "multiagent_shared_model": True, # whether the model should be shared + # list of dimensions of multiagent feedforward nets + "multiagent_fcnet_hiddens": [[32, 32]] * 2} + config["model"].update({"custom_options": options}) + +For a full example of a multiagent model in code, see the +`MultiAgent Pendulum `__. +The ``MultiAgentPendulumEnv`` defined there operates +over a composite (Tuple) enclosing a list of Boxes; each Box represents the +observation of an agent. The action space is a list of Discrete actions, each +element corresponding to half of the total torque. The environment will return a list of actions +that can be iterated over and applied to each agent. + External Data API ~~~~~~~~~~~~~~~~~ *coming soon!* diff --git a/python/ray/rllib/examples/multiagent_mountaincar_env.py b/python/ray/rllib/examples/multiagent_mountaincar_env.py index 3018aa7f3..d50302eea 100644 --- a/python/ray/rllib/examples/multiagent_mountaincar_env.py +++ b/python/ray/rllib/examples/multiagent_mountaincar_env.py @@ -22,8 +22,8 @@ class MultiAgentMountainCarEnv(MountainCarEnv): self.viewer = None self.action_space = [Discrete(3) for _ in range(2)] - self.observation_space = Tuple(tuple(Box(self.low, self.high) - for _ in range(2))) + self.observation_space = Tuple([ + Box(self.low, self.high) for _ in range(2)]) self._seed() self.reset() diff --git a/python/ray/rllib/examples/multiagent_pendulum_env.py b/python/ray/rllib/examples/multiagent_pendulum_env.py index da727bcaa..b2095e625 100644 --- a/python/ray/rllib/examples/multiagent_pendulum_env.py +++ b/python/ray/rllib/examples/multiagent_pendulum_env.py @@ -24,8 +24,8 @@ class MultiAgentPendulumEnv(PendulumEnv): self.action_space = [Box(low=-self.max_torque / 2, high=self.max_torque / 2, shape=(1,)) for _ in range(2)] - self.observation_space = Tuple(tuple(Box(low=-high, high=high) - for _ in range(2))) + self.observation_space = Tuple([ + Box(low=-high, high=high) for _ in range(2)]) self._seed()