diff --git a/python/requirements_rllib.txt b/python/requirements_rllib.txt
index 0b7543587..94ae9cdbb 100644
--- a/python/requirements_rllib.txt
+++ b/python/requirements_rllib.txt
@@ -10,3 +10,6 @@ smart_open
 pybullet
 # For tests on PettingZoo's multi-agent envs.
 pettingzoo>=1.4.0
+# For tests on RecSim and Kaggle envs.
+recsim
+kaggle_environments
diff --git a/rllib/BUILD b/rllib/BUILD
index 6b6717389..daa623dff 100644
--- a/rllib/BUILD
+++ b/rllib/BUILD
@@ -1067,6 +1067,20 @@ py_test(
     srcs = ["env/wrappers/tests/test_recsim_wrapper.py"]
 )
 
+py_test(
+    name = "env/wrappers/tests/test_exception_wrapper",
+    tags = ["env"],
+    size = "small",
+    srcs = ["env/wrappers/tests/test_exception_wrapper.py"]
+)
+
+py_test(
+    name = "env/wrappers/tests/test_group_agents_wrapper",
+    tags = ["env"],
+    size = "small",
+    srcs = ["env/wrappers/tests/test_group_agents_wrapper.py"]
+)
+
 # --------------------------------------------------------------------
 # Models and Distributions
 # rllib/models/
diff --git a/rllib/agents/mbmpo/mbmpo.py b/rllib/agents/mbmpo/mbmpo.py
index 9cbf94c67..a3731ea9d 100644
--- a/rllib/agents/mbmpo/mbmpo.py
+++ b/rllib/agents/mbmpo/mbmpo.py
@@ -21,7 +21,7 @@ from ray.rllib.agents.mbmpo.utils import calculate_gae_advantages, \
     MBMPOExploration
 from ray.rllib.agents.trainer_template import build_trainer
 from ray.rllib.env.env_context import EnvContext
-from ray.rllib.env.model_vector_env import model_vector_env
+from ray.rllib.env.wrappers.model_vector_env import model_vector_env
 from ray.rllib.evaluation.metrics import collect_episodes, collect_metrics, \
     get_learner_stats
 from ray.rllib.evaluation.worker_set import WorkerSet
diff --git a/rllib/agents/qmix/qmix_policy.py b/rllib/agents/qmix/qmix_policy.py
index 539efa2c5..20966aaec 100644
--- a/rllib/agents/qmix/qmix_policy.py
+++ b/rllib/agents/qmix/qmix_policy.py
@@ -7,6 +7,7 @@ import ray
 from ray.rllib.agents.qmix.mixers import VDNMixer, QMixer
 from ray.rllib.agents.qmix.model import RNNModel, _get_size
 from ray.rllib.env.multi_agent_env import ENV_STATE
+from ray.rllib.env.wrappers.group_agents_wrapper import GROUP_REWARDS
 from ray.rllib.evaluation.metrics import LEARNER_STATS_KEY
 from ray.rllib.models.torch.torch_action_dist import TorchCategorical
 from ray.rllib.policy.policy import Policy
@@ -14,7 +15,6 @@ from ray.rllib.policy.rnn_sequencing import chop_into_sequences
 from ray.rllib.policy.sample_batch import SampleBatch
 from ray.rllib.models.catalog import ModelCatalog
 from ray.rllib.models.modelv2 import _unpack_obs
-from ray.rllib.env.constants import GROUP_REWARDS
 from ray.rllib.utils.framework import try_import_torch
 from ray.rllib.utils.annotations import override
 
diff --git a/rllib/env/__init__.py b/rllib/env/__init__.py
index 48936c40f..61e889e80 100644
--- a/rllib/env/__init__.py
+++ b/rllib/env/__init__.py
@@ -1,27 +1,34 @@
 from ray.rllib.env.base_env import BaseEnv
-from ray.rllib.env.dm_env_wrapper import DMEnv
-from ray.rllib.env.dm_control_wrapper import DMCEnv
-from ray.rllib.env.unity3d_env import Unity3DEnv
-from ray.rllib.env.pettingzoo_env import PettingZooEnv
-from ray.rllib.env.multi_agent_env import MultiAgentEnv
+from ray.rllib.env.env_context import EnvContext
 from ray.rllib.env.external_env import ExternalEnv
 from ray.rllib.env.external_multi_agent_env import ExternalMultiAgentEnv
-from ray.rllib.env.vector_env import VectorEnv
-from ray.rllib.env.env_context import EnvContext
+from ray.rllib.env.multi_agent_env import MultiAgentEnv
 from ray.rllib.env.policy_client import PolicyClient
 from ray.rllib.env.policy_server_input import PolicyServerInput
+from ray.rllib.env.remote_vector_env import RemoteVectorEnv
+from ray.rllib.env.vector_env import VectorEnv
+
+from ray.rllib.env.wrappers.dm_env_wrapper import DMEnv
+from ray.rllib.env.wrappers.dm_control_wrapper import DMCEnv
+from ray.rllib.env.wrappers.group_agents_wrapper import GroupAgentsWrapper
+from ray.rllib.env.wrappers.kaggle_wrapper import KaggleFootballMultiAgentEnv
+from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
+from ray.rllib.env.wrappers.unity3d_env import Unity3DEnv
 
 __all__ = [
     "BaseEnv",
-    "MultiAgentEnv",
-    "ExternalEnv",
-    "ExternalMultiAgentEnv",
-    "VectorEnv",
-    "EnvContext",
     "DMEnv",
     "DMCEnv",
-    "Unity3DEnv",
+    "EnvContext",
+    "ExternalEnv",
+    "ExternalMultiAgentEnv",
+    "GroupAgentsWrapper",
+    "KaggleFootballMultiAgentEnv",
+    "MultiAgentEnv",
     "PettingZooEnv",
     "PolicyClient",
     "PolicyServerInput",
+    "RemoteVectorEnv",
+    "Unity3DEnv",
+    "VectorEnv",
 ]
diff --git a/rllib/env/atari_wrappers.py b/rllib/env/atari_wrappers.py
index 1c2b14c77..31877d82d 100644
--- a/rllib/env/atari_wrappers.py
+++ b/rllib/env/atari_wrappers.py
@@ -1,319 +1,25 @@
-import numpy as np
-from collections import deque
-import gym
-from gym import spaces
-import cv2
-cv2.ocl.setUseOpenCL(False)
+from ray.rllib.env.wrappers.atari_wrappers import is_atari, \
+    get_wrapper_by_cls, MonitorEnv, NoopResetEnv, ClipRewardEnv, \
+    FireResetEnv, EpisodicLifeEnv, MaxAndSkipEnv, WarpFrame, FrameStack, \
+    FrameStackTrajectoryView, ScaledFloatFrame, wrap_deepmind
+from ray.rllib.utils.deprecation import deprecation_warning
 
+deprecation_warning(
+    old="ray.rllib.env.atari_wrappers....",
+    new="ray.rllib.env.wrappers.atari_wrappers....",
+    error=False,
+)
 
-def is_atari(env):
-    if (hasattr(env.observation_space, "shape")
-            and env.observation_space.shape is not None
-            and len(env.observation_space.shape) <= 2):
-        return False
-    return hasattr(env, "unwrapped") and hasattr(env.unwrapped, "ale")
-
-
-def get_wrapper_by_cls(env, cls):
-    """Returns the gym env wrapper of the given class, or None."""
-    currentenv = env
-    while True:
-        if isinstance(currentenv, cls):
-            return currentenv
-        elif isinstance(currentenv, gym.Wrapper):
-            currentenv = currentenv.env
-        else:
-            return None
-
-
-class MonitorEnv(gym.Wrapper):
-    def __init__(self, env=None):
-        """Record episodes stats prior to EpisodicLifeEnv, etc."""
-        gym.Wrapper.__init__(self, env)
-        self._current_reward = None
-        self._num_steps = None
-        self._total_steps = None
-        self._episode_rewards = []
-        self._episode_lengths = []
-        self._num_episodes = 0
-        self._num_returned = 0
-
-    def reset(self, **kwargs):
-        obs = self.env.reset(**kwargs)
-
-        if self._total_steps is None:
-            self._total_steps = sum(self._episode_lengths)
-
-        if self._current_reward is not None:
-            self._episode_rewards.append(self._current_reward)
-            self._episode_lengths.append(self._num_steps)
-            self._num_episodes += 1
-
-        self._current_reward = 0
-        self._num_steps = 0
-
-        return obs
-
-    def step(self, action):
-        obs, rew, done, info = self.env.step(action)
-        self._current_reward += rew
-        self._num_steps += 1
-        self._total_steps += 1
-        return (obs, rew, done, info)
-
-    def get_episode_rewards(self):
-        return self._episode_rewards
-
-    def get_episode_lengths(self):
-        return self._episode_lengths
-
-    def get_total_steps(self):
-        return self._total_steps
-
-    def next_episode_results(self):
-        for i in range(self._num_returned, len(self._episode_rewards)):
-            yield (self._episode_rewards[i], self._episode_lengths[i])
-        self._num_returned = len(self._episode_rewards)
-
-
-class NoopResetEnv(gym.Wrapper):
-    def __init__(self, env, noop_max=30):
-        """Sample initial states by taking random number of no-ops on reset.
-        No-op is assumed to be action 0.
-        """
-        gym.Wrapper.__init__(self, env)
-        self.noop_max = noop_max
-        self.override_num_noops = None
-        self.noop_action = 0
-        assert env.unwrapped.get_action_meanings()[0] == "NOOP"
-
-    def reset(self, **kwargs):
-        """ Do no-op action for a number of steps in [1, noop_max]."""
-        self.env.reset(**kwargs)
-        if self.override_num_noops is not None:
-            noops = self.override_num_noops
-        else:
-            noops = self.unwrapped.np_random.randint(1, self.noop_max + 1)
-        assert noops > 0
-        obs = None
-        for _ in range(noops):
-            obs, _, done, _ = self.env.step(self.noop_action)
-            if done:
-                obs = self.env.reset(**kwargs)
-        return obs
-
-    def step(self, ac):
-        return self.env.step(ac)
-
-
-class ClipRewardEnv(gym.RewardWrapper):
-    def __init__(self, env):
-        gym.RewardWrapper.__init__(self, env)
-
-    def reward(self, reward):
-        """Bin reward to {+1, 0, -1} by its sign."""
-        return np.sign(reward)
-
-
-class FireResetEnv(gym.Wrapper):
-    def __init__(self, env):
-        """Take action on reset.
-
-        For environments that are fixed until firing."""
-        gym.Wrapper.__init__(self, env)
-        assert env.unwrapped.get_action_meanings()[1] == "FIRE"
-        assert len(env.unwrapped.get_action_meanings()) >= 3
-
-    def reset(self, **kwargs):
-        self.env.reset(**kwargs)
-        obs, _, done, _ = self.env.step(1)
-        if done:
-            self.env.reset(**kwargs)
-        obs, _, done, _ = self.env.step(2)
-        if done:
-            self.env.reset(**kwargs)
-        return obs
-
-    def step(self, ac):
-        return self.env.step(ac)
-
-
-class EpisodicLifeEnv(gym.Wrapper):
-    def __init__(self, env):
-        """Make end-of-life == end-of-episode, but only reset on true game over.
-        Done by DeepMind for the DQN and co. since it helps value estimation.
-        """
-        gym.Wrapper.__init__(self, env)
-        self.lives = 0
-        self.was_real_done = True
-
-    def step(self, action):
-        obs, reward, done, info = self.env.step(action)
-        self.was_real_done = done
-        # check current lives, make loss of life terminal,
-        # then update lives to handle bonus lives
-        lives = self.env.unwrapped.ale.lives()
-        if lives < self.lives and lives > 0:
-            # for Qbert sometimes we stay in lives == 0 condtion for a few fr
-            # so its important to keep lives > 0, so that we only reset once
-            # the environment advertises done.
-            done = True
-        self.lives = lives
-        return obs, reward, done, info
-
-    def reset(self, **kwargs):
-        """Reset only when lives are exhausted.
-        This way all states are still reachable even though lives are episodic,
-        and the learner need not know about any of this behind-the-scenes.
-        """
-        if self.was_real_done:
-            obs = self.env.reset(**kwargs)
-        else:
-            # no-op step to advance from terminal/lost life state
-            obs, _, _, _ = self.env.step(0)
-        self.lives = self.env.unwrapped.ale.lives()
-        return obs
-
-
-class MaxAndSkipEnv(gym.Wrapper):
-    def __init__(self, env, skip=4):
-        """Return only every `skip`-th frame"""
-        gym.Wrapper.__init__(self, env)
-        # most recent raw observations (for max pooling across time steps)
-        self._obs_buffer = np.zeros(
-            (2, ) + env.observation_space.shape, dtype=np.uint8)
-        self._skip = skip
-
-    def step(self, action):
-        """Repeat action, sum reward, and max over last observations."""
-        total_reward = 0.0
-        done = None
-        for i in range(self._skip):
-            obs, reward, done, info = self.env.step(action)
-            if i == self._skip - 2:
-                self._obs_buffer[0] = obs
-            if i == self._skip - 1:
-                self._obs_buffer[1] = obs
-            total_reward += reward
-            if done:
-                break
-        # Note that the observation on the done=True frame
-        # doesn't matter
-        max_frame = self._obs_buffer.max(axis=0)
-
-        return max_frame, total_reward, done, info
-
-    def reset(self, **kwargs):
-        return self.env.reset(**kwargs)
-
-
-class WarpFrame(gym.ObservationWrapper):
-    def __init__(self, env, dim):
-        """Warp frames to the specified size (dim x dim)."""
-        gym.ObservationWrapper.__init__(self, env)
-        self.width = dim
-        self.height = dim
-        self.observation_space = spaces.Box(
-            low=0,
-            high=255,
-            shape=(self.height, self.width, 1),
-            dtype=np.uint8)
-
-    def observation(self, frame):
-        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
-        frame = cv2.resize(
-            frame, (self.width, self.height), interpolation=cv2.INTER_AREA)
-        return frame[:, :, None]
-
-
-# TODO: (sven) Deprecated class. Remove once traj. view is the norm.
-class FrameStack(gym.Wrapper):
-    def __init__(self, env, k):
-        """Stack k last frames."""
-        gym.Wrapper.__init__(self, env)
-        self.k = k
-        self.frames = deque([], maxlen=k)
-        shp = env.observation_space.shape
-        self.observation_space = spaces.Box(
-            low=0,
-            high=255,
-            shape=(shp[0], shp[1], shp[2] * k),
-            dtype=env.observation_space.dtype)
-
-    def reset(self):
-        ob = self.env.reset()
-        for _ in range(self.k):
-            self.frames.append(ob)
-        return self._get_ob()
-
-    def step(self, action):
-        ob, reward, done, info = self.env.step(action)
-        self.frames.append(ob)
-        return self._get_ob(), reward, done, info
-
-    def _get_ob(self):
-        assert len(self.frames) == self.k
-        return np.concatenate(self.frames, axis=2)
-
-
-class FrameStackTrajectoryView(gym.ObservationWrapper):
-    def __init__(self, env):
-        """No stacking. Trajectory View API takes care of this."""
-        gym.Wrapper.__init__(self, env)
-        shp = env.observation_space.shape
-        assert shp[2] == 1
-        self.observation_space = spaces.Box(
-            low=0,
-            high=255,
-            shape=(shp[0], shp[1]),
-            dtype=env.observation_space.dtype)
-
-    def observation(self, observation):
-        return np.squeeze(observation, axis=-1)
-
-
-class ScaledFloatFrame(gym.ObservationWrapper):
-    def __init__(self, env):
-        gym.ObservationWrapper.__init__(self, env)
-        self.observation_space = gym.spaces.Box(
-            low=0, high=1, shape=env.observation_space.shape, dtype=np.float32)
-
-    def observation(self, observation):
-        # careful! This undoes the memory optimization, use
-        # with smaller replay buffers only.
-        return np.array(observation).astype(np.float32) / 255.0
-
-
-def wrap_deepmind(
-        env,
-        dim=84,
-        # TODO: (sven) Remove once traj. view is norm.
-        framestack=True,
-        framestack_via_traj_view_api=False):
-    """Configure environment for DeepMind-style Atari.
-
-    Note that we assume reward clipping is done outside the wrapper.
-
-    Args:
-        dim (int): Dimension to resize observations to (dim x dim).
-        framestack (bool): Whether to framestack observations.
-    """
-    env = MonitorEnv(env)
-    env = NoopResetEnv(env, noop_max=30)
-    if env.spec is not None and "NoFrameskip" in env.spec.id:
-        env = MaxAndSkipEnv(env, skip=4)
-    env = EpisodicLifeEnv(env)
-    if "FIRE" in env.unwrapped.get_action_meanings():
-        env = FireResetEnv(env)
-    env = WarpFrame(env, dim)
-    # env = ScaledFloatFrame(env)  # TODO: use for dqn?
-    # env = ClipRewardEnv(env)  # reward clipping is handled by policy eval
-    # New way of frame stacking via the trajectory view API (model config key:
-    # `num_framestacks=[int]`.
-    if framestack_via_traj_view_api:
-        env = FrameStackTrajectoryView(env)
-    # Old way (w/o traj. view API) via model config key: `framestack=True`.
-    # TODO: (sven) Remove once traj. view is norm.
-    elif framestack is True:
-        env = FrameStack(env, 4)
-    return env
+is_atari = is_atari
+get_wrapper_by_cls = get_wrapper_by_cls
+MonitorEnv = MonitorEnv
+NoopResetEnv = NoopResetEnv
+ClipRewardEnv = ClipRewardEnv
+FireResetEnv = FireResetEnv
+EpisodicLifeEnv = EpisodicLifeEnv
+MaxAndSkipEnv = MaxAndSkipEnv
+WarpFrame = WarpFrame
+FrameStack = FrameStack
+FrameStackTrajectoryView = FrameStackTrajectoryView
+ScaledFloatFrame = ScaledFloatFrame
+wrap_deepmind = wrap_deepmind
diff --git a/rllib/env/constants.py b/rllib/env/constants.py
index 45c9b7119..953e92509 100644
--- a/rllib/env/constants.py
+++ b/rllib/env/constants.py
@@ -1,15 +1,12 @@
-# info key for the individual rewards of an agent, for example:
-# info: {
-#   group_1: {
-#      _group_rewards: [5, -1, 1],  # 3 agents in this group
-#   }
-# }
-GROUP_REWARDS = "_group_rewards"
+from ray.rllib.env.wrappers.group_agents_wrapper import GROUP_REWARDS as GR, \
+    GROUP_INFO as GI
+from ray.rllib.utils.deprecation import deprecation_warning
 
-# info key for the individual infos of an agent, for example:
-# info: {
-#   group_1: {
-#      _group_infos: [{"foo": ...}, {}],  # 2 agents in this group
-#   }
-# }
-GROUP_INFO = "_group_info"
+deprecation_warning(
+    old="ray.rllib.env.constants.GROUP_[REWARDS|INFO]",
+    new="ray.rllib.env.wrappers.group_agents_wrapper.GROUP_[REWARDS|INFO]",
+    error=False,
+)
+
+GROUP_REWARDS = GR
+GROUP_INFO = GI
diff --git a/rllib/env/dm_control_wrapper.py b/rllib/env/dm_control_wrapper.py
index 6734e2a3a..d9fa233e2 100644
--- a/rllib/env/dm_control_wrapper.py
+++ b/rllib/env/dm_control_wrapper.py
@@ -1,203 +1,10 @@
-"""
-DeepMind Control Suite Wrapper directly sourced from:
-https://github.com/denisyarats/dmc2gym
+from ray.rllib.env.wrappers.dm_control_wrapper import DMCEnv as DCE
+from ray.rllib.utils.deprecation import deprecation_warning
 
-MIT License
+deprecation_warning(
+    old="ray.rllib.env.dm_control_wrapper.DMCEnv",
+    new="ray.rllib.env.wrappers.dm_control_wrapper.DMCEnv",
+    error=False,
+)
 
-Copyright (c) 2020 Denis Yarats
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-"""
-from gym import core, spaces
-try:
-    from dm_env import specs
-except ImportError:
-    specs = None
-try:
-    from dm_control import suite
-except ImportError:
-    suite = None
-import numpy as np
-
-
-def _spec_to_box(spec):
-    def extract_min_max(s):
-        assert s.dtype == np.float64 or s.dtype == np.float32
-        dim = np.int(np.prod(s.shape))
-        if type(s) == specs.Array:
-            bound = np.inf * np.ones(dim, dtype=np.float32)
-            return -bound, bound
-        elif type(s) == specs.BoundedArray:
-            zeros = np.zeros(dim, dtype=np.float32)
-            return s.minimum + zeros, s.maximum + zeros
-
-    mins, maxs = [], []
-    for s in spec:
-        mn, mx = extract_min_max(s)
-        mins.append(mn)
-        maxs.append(mx)
-    low = np.concatenate(mins, axis=0)
-    high = np.concatenate(maxs, axis=0)
-    assert low.shape == high.shape
-    return spaces.Box(low, high, dtype=np.float32)
-
-
-def _flatten_obs(obs):
-    obs_pieces = []
-    for v in obs.values():
-        flat = np.array([v]) if np.isscalar(v) else v.ravel()
-        obs_pieces.append(flat)
-    return np.concatenate(obs_pieces, axis=0)
-
-
-class DMCEnv(core.Env):
-    def __init__(self,
-                 domain_name,
-                 task_name,
-                 task_kwargs=None,
-                 visualize_reward=False,
-                 from_pixels=False,
-                 height=64,
-                 width=64,
-                 camera_id=0,
-                 frame_skip=2,
-                 environment_kwargs=None,
-                 channels_first=True,
-                 preprocess=True):
-        self._from_pixels = from_pixels
-        self._height = height
-        self._width = width
-        self._camera_id = camera_id
-        self._frame_skip = frame_skip
-        self._channels_first = channels_first
-        self.preprocess = preprocess
-
-        if specs is None:
-            raise RuntimeError((
-                "The `specs` module from `dm_env` was not imported. Make sure "
-                "`dm_env` is installed and visible in the current python "
-                "environment."))
-        if suite is None:
-            raise RuntimeError(
-                ("The `suite` module from `dm_control` was not imported. Make "
-                 "sure `dm_control` is installed and visible in the current "
-                 "python enviornment."))
-
-        # create task
-        self._env = suite.load(
-            domain_name=domain_name,
-            task_name=task_name,
-            task_kwargs=task_kwargs,
-            visualize_reward=visualize_reward,
-            environment_kwargs=environment_kwargs)
-
-        # true and normalized action spaces
-        self._true_action_space = _spec_to_box([self._env.action_spec()])
-        self._norm_action_space = spaces.Box(
-            low=-1.0,
-            high=1.0,
-            shape=self._true_action_space.shape,
-            dtype=np.float32)
-
-        # create observation space
-        if from_pixels:
-            shape = [3, height,
-                     width] if channels_first else [height, width, 3]
-            self._observation_space = spaces.Box(
-                low=0, high=255, shape=shape, dtype=np.uint8)
-            if preprocess:
-                self._observation_space = spaces.Box(
-                    low=-0.5, high=0.5, shape=shape, dtype=np.float32)
-        else:
-            self._observation_space = _spec_to_box(
-                self._env.observation_spec().values())
-
-        self._state_space = _spec_to_box(self._env.observation_spec().values())
-
-        self.current_state = None
-
-    def __getattr__(self, name):
-        return getattr(self._env, name)
-
-    def _get_obs(self, time_step):
-        if self._from_pixels:
-            obs = self.render(
-                height=self._height,
-                width=self._width,
-                camera_id=self._camera_id)
-            if self._channels_first:
-                obs = obs.transpose(2, 0, 1).copy()
-            if self.preprocess:
-                obs = obs / 255.0 - 0.5
-        else:
-            obs = _flatten_obs(time_step.observation)
-        return obs
-
-    def _convert_action(self, action):
-        action = action.astype(np.float64)
-        true_delta = self._true_action_space.high - self._true_action_space.low
-        norm_delta = self._norm_action_space.high - self._norm_action_space.low
-        action = (action - self._norm_action_space.low) / norm_delta
-        action = action * true_delta + self._true_action_space.low
-        action = action.astype(np.float32)
-        return action
-
-    @property
-    def observation_space(self):
-        return self._observation_space
-
-    @property
-    def state_space(self):
-        return self._state_space
-
-    @property
-    def action_space(self):
-        return self._norm_action_space
-
-    def step(self, action):
-        assert self._norm_action_space.contains(action)
-        action = self._convert_action(action)
-        assert self._true_action_space.contains(action)
-        reward = 0
-        extra = {"internal_state": self._env.physics.get_state().copy()}
-
-        for _ in range(self._frame_skip):
-            time_step = self._env.step(action)
-            reward += time_step.reward or 0
-            done = time_step.last()
-            if done:
-                break
-        obs = self._get_obs(time_step)
-        self.current_state = _flatten_obs(time_step.observation)
-        extra["discount"] = time_step.discount
-        return obs, reward, done, extra
-
-    def reset(self):
-        time_step = self._env.reset()
-        self.current_state = _flatten_obs(time_step.observation)
-        obs = self._get_obs(time_step)
-        return obs
-
-    def render(self, mode="rgb_array", height=None, width=None, camera_id=0):
-        assert mode == "rgb_array", "only support for rgb_array mode"
-        height = height or self._height
-        width = width or self._width
-        camera_id = camera_id or self._camera_id
-        return self._env.physics.render(
-            height=height, width=width, camera_id=camera_id)
+DMCEnv = DCE
diff --git a/rllib/env/dm_env_wrapper.py b/rllib/env/dm_env_wrapper.py
index e402c4cc6..354de1d53 100644
--- a/rllib/env/dm_env_wrapper.py
+++ b/rllib/env/dm_env_wrapper.py
@@ -1,94 +1,10 @@
-import gym
-from gym import spaces
+from ray.rllib.env.wrappers.dm_env_wrapper import DMEnv as DE
+from ray.rllib.utils.deprecation import deprecation_warning
 
-import numpy as np
+deprecation_warning(
+    old="ray.rllib.env.dm_env_wrapper.DMEnv",
+    new="ray.rllib.env.wrappers.dm_env_wrapper.DMEnv",
+    error=False,
+)
 
-try:
-    from dm_env import specs
-except ImportError:
-    specs = None
-
-
-def _convert_spec_to_space(spec):
-    if isinstance(spec, dict):
-        return spaces.Dict(
-            {k: _convert_spec_to_space(v)
-             for k, v in spec.items()})
-    if isinstance(spec, specs.DiscreteArray):
-        return spaces.Discrete(spec.num_values)
-    elif isinstance(spec, specs.BoundedArray):
-        return spaces.Box(
-            low=np.asscalar(spec.minimum),
-            high=np.asscalar(spec.maximum),
-            shape=spec.shape,
-            dtype=spec.dtype)
-    elif isinstance(spec, specs.Array):
-        return spaces.Box(
-            low=-float("inf"),
-            high=float("inf"),
-            shape=spec.shape,
-            dtype=spec.dtype)
-
-    raise NotImplementedError(
-        ("Could not convert `Array` spec of type {} to Gym space. "
-         "Attempted to convert: {}").format(type(spec), spec))
-
-
-class DMEnv(gym.Env):
-    """A `gym.Env` wrapper for the `dm_env` API.
-    """
-
-    metadata = {"render.modes": ["rgb_array"]}
-
-    def __init__(self, dm_env):
-        super(DMEnv, self).__init__()
-        self._env = dm_env
-        self._prev_obs = None
-
-        if specs is None:
-            raise RuntimeError((
-                "The `specs` module from `dm_env` was not imported. Make sure "
-                "`dm_env` is installed and visible in the current python "
-                "environment."))
-
-    def step(self, action):
-        ts = self._env.step(action)
-
-        reward = ts.reward
-        if reward is None:
-            reward = 0.
-
-        return ts.observation, reward, ts.last(), {"discount": ts.discount}
-
-    def reset(self):
-        ts = self._env.reset()
-        return ts.observation
-
-    def render(self, mode="rgb_array"):
-        if self._prev_obs is None:
-            raise ValueError(
-                "Environment not started. Make sure to reset before rendering."
-            )
-
-        if mode == "rgb_array":
-            return self._prev_obs
-        else:
-            raise NotImplementedError(
-                "Render mode '{}' is not supported.".format(mode))
-
-    @property
-    def action_space(self):
-        spec = self._env.action_spec()
-        return _convert_spec_to_space(spec)
-
-    @property
-    def observation_space(self):
-        spec = self._env.observation_spec()
-        return _convert_spec_to_space(spec)
-
-    @property
-    def reward_range(self):
-        spec = self._env.reward_spec()
-        if isinstance(spec, specs.BoundedArray):
-            return spec.minimum, spec.maximum
-        return -float("inf"), float("inf")
+DMEnv = DE
diff --git a/rllib/env/env_context.py b/rllib/env/env_context.py
index c5f9dd62d..637b68317 100644
--- a/rllib/env/env_context.py
+++ b/rllib/env/env_context.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from ray.rllib.utils.annotations import PublicAPI
 from ray.rllib.utils.typing import EnvConfigDict
 
@@ -24,9 +26,11 @@ class EnvContext(dict):
                  env_config: EnvConfigDict,
                  worker_index: int,
                  vector_index: int = 0,
-                 remote: bool = False):
+                 remote: bool = False,
+                 num_workers: Optional[int] = None):
         dict.__init__(self, env_config)
         self.worker_index = worker_index
+        self.num_workers = num_workers
         self.vector_index = vector_index
         self.remote = remote
 
@@ -34,10 +38,12 @@ class EnvContext(dict):
                             env_config: EnvConfigDict = None,
                             worker_index: int = None,
                             vector_index: int = None,
-                            remote: bool = None):
+                            remote: bool = None,
+                            num_workers: Optional[int] = None):
         return EnvContext(
             env_config if env_config is not None else self,
             worker_index if worker_index is not None else self.worker_index,
             vector_index if vector_index is not None else self.vector_index,
             remote if remote is not None else self.remote,
+            num_workers if num_workers is not None else self.num_workers,
         )
diff --git a/rllib/env/group_agents_wrapper.py b/rllib/env/group_agents_wrapper.py
index eab11b8ee..c53d7b938 100644
--- a/rllib/env/group_agents_wrapper.py
+++ b/rllib/env/group_agents_wrapper.py
@@ -1,103 +1,11 @@
-from collections import OrderedDict
+from ray.rllib.env.wrappers.group_agents_wrapper import GroupAgentsWrapper as \
+    GAW
+from ray.rllib.utils.deprecation import deprecation_warning
 
-from ray.rllib.env.constants import GROUP_REWARDS, GROUP_INFO
-from ray.rllib.env.multi_agent_env import MultiAgentEnv
+deprecation_warning(
+    old="ray.rllib.env.group_agents_wrapper._GroupAgentsWrapper",
+    new="ray.rllib.env.wrappers.group_agents_wrapper.GroupAgentsWrapper",
+    error=False,
+)
 
-
-# TODO(ekl) we should add some unit tests for this
-class _GroupAgentsWrapper(MultiAgentEnv):
-    """Wraps a MultiAgentEnv environment with agents grouped as specified.
-
-    See multi_agent_env.py for the specification of groups.
-
-    This API is experimental.
-    """
-
-    def __init__(self, env, groups, obs_space=None, act_space=None):
-        """Wrap an existing multi-agent env to group agents together.
-
-        See MultiAgentEnv.with_agent_groups() for usage info.
-
-        Args:
-            env (MultiAgentEnv): env to wrap
-            groups (dict): Grouping spec as documented in MultiAgentEnv
-            obs_space (Space): Optional observation space for the grouped
-                env. Must be a tuple space.
-            act_space (Space): Optional action space for the grouped env.
-                Must be a tuple space.
-        """
-
-        self.env = env
-        self.groups = groups
-        self.agent_id_to_group = {}
-        for group_id, agent_ids in groups.items():
-            for agent_id in agent_ids:
-                if agent_id in self.agent_id_to_group:
-                    raise ValueError(
-                        "Agent id {} is in multiple groups".format(
-                            agent_id, groups))
-                self.agent_id_to_group[agent_id] = group_id
-        if obs_space is not None:
-            self.observation_space = obs_space
-        if act_space is not None:
-            self.action_space = act_space
-
-    def reset(self):
-        obs = self.env.reset()
-        return self._group_items(obs)
-
-    def step(self, action_dict):
-        # Ungroup and send actions
-        action_dict = self._ungroup_items(action_dict)
-        obs, rewards, dones, infos = self.env.step(action_dict)
-
-        # Apply grouping transforms to the env outputs
-        obs = self._group_items(obs)
-        rewards = self._group_items(
-            rewards, agg_fn=lambda gvals: list(gvals.values()))
-        dones = self._group_items(
-            dones, agg_fn=lambda gvals: all(gvals.values()))
-        infos = self._group_items(
-            infos, agg_fn=lambda gvals: {GROUP_INFO: list(gvals.values())})
-
-        # Aggregate rewards, but preserve the original values in infos
-        for agent_id, rew in rewards.items():
-            if isinstance(rew, list):
-                rewards[agent_id] = sum(rew)
-                if agent_id not in infos:
-                    infos[agent_id] = {}
-                infos[agent_id][GROUP_REWARDS] = rew
-
-        return obs, rewards, dones, infos
-
-    def _ungroup_items(self, items):
-        out = {}
-        for agent_id, value in items.items():
-            if agent_id in self.groups:
-                assert len(value) == len(self.groups[agent_id]), \
-                    (agent_id, value, self.groups)
-                for a, v in zip(self.groups[agent_id], value):
-                    out[a] = v
-            else:
-                out[agent_id] = value
-        return out
-
-    def _group_items(self, items, agg_fn=lambda gvals: list(gvals.values())):
-        grouped_items = {}
-        for agent_id, item in items.items():
-            if agent_id in self.agent_id_to_group:
-                group_id = self.agent_id_to_group[agent_id]
-                if group_id in grouped_items:
-                    continue  # already added
-                group_out = OrderedDict()
-                for a in self.groups[group_id]:
-                    if a in items:
-                        group_out[a] = items[a]
-                    else:
-                        raise ValueError(
-                            "Missing member of group {}: {}: {}".format(
-                                group_id, a, items))
-                grouped_items[group_id] = agg_fn(group_out)
-            else:
-                grouped_items[agent_id] = item
-        return grouped_items
+_GroupAgentsWrapper = GAW
diff --git a/rllib/env/model_vector_env.py b/rllib/env/model_vector_env.py
index b5b9700d6..4a7378753 100644
--- a/rllib/env/model_vector_env.py
+++ b/rllib/env/model_vector_env.py
@@ -1,134 +1,10 @@
-import logging
-import numpy as np
-from gym.spaces import Discrete
-from ray.rllib.utils.annotations import override
-from ray.rllib.env.vector_env import VectorEnv
-from ray.rllib.evaluation.rollout_worker import get_global_worker
-from ray.rllib.env.base_env import BaseEnv
-from ray.rllib.utils.typing import EnvType
+from ray.rllib.env.wrappers.model_vector_env import model_vector_env as mve
+from ray.rllib.utils.deprecation import deprecation_warning
 
-logger = logging.getLogger(__name__)
+deprecation_warning(
+    old="ray.rllib.env.model_vector_env.model_vector_env",
+    new="ray.rllib.env.wrappers.model_vector_env.model_vector_env",
+    error=False,
+)
 
-
-def model_vector_env(env: EnvType) -> BaseEnv:
-    """Returns a VectorizedEnv wrapper around the given environment.
-
-    To obtain worker configs, one can call get_global_worker().
-
-    Args:
-        env (EnvType): The input environment (of any supported environment
-            type) to be convert to a _VectorizedModelGymEnv (wrapped as
-            an RLlib BaseEnv).
-
-    Returns:
-        BaseEnv: The BaseEnv converted input `env`.
-    """
-    worker = get_global_worker()
-    worker_index = worker.worker_index
-    if worker_index:
-        env = _VectorizedModelGymEnv(
-            make_env=worker.make_env_fn,
-            existing_envs=[env],
-            num_envs=worker.num_envs,
-            observation_space=env.observation_space,
-            action_space=env.action_space,
-        )
-    return BaseEnv.to_base_env(
-        env,
-        make_env=worker.make_env_fn,
-        num_envs=worker.num_envs,
-        remote_envs=False,
-        remote_env_batch_wait_ms=0)
-
-
-class _VectorizedModelGymEnv(VectorEnv):
-    """Vectorized Environment Wrapper for MB-MPO.
-
-    Primary change is in the `vector_step` method, which calls the dynamics
-    models for next_obs "calculation" (instead of the actual env). Also, the
-    actual envs need to have two extra methods implemented: `reward(obs)` and
-    (optionally) `done(obs)`. If `done` is not implemented, we will assume
-    that episodes in the env do not terminate, ever.
-    """
-
-    def __init__(self,
-                 make_env=None,
-                 existing_envs=None,
-                 num_envs=1,
-                 *,
-                 observation_space=None,
-                 action_space=None,
-                 env_config=None):
-        self.make_env = make_env
-        self.envs = existing_envs
-        self.num_envs = num_envs
-        while len(self.envs) < num_envs:
-            self.envs.append(self.make_env(len(self.envs)))
-
-        super().__init__(
-            observation_space=observation_space
-            or self.envs[0].observation_space,
-            action_space=action_space or self.envs[0].action_space,
-            num_envs=num_envs)
-        worker = get_global_worker()
-        self.model, self.device = worker.foreach_policy(
-            lambda x, y: (x.dynamics_model, x.device))[0]
-
-    @override(VectorEnv)
-    def vector_reset(self):
-        """Override parent to store actual env obs for upcoming predictions.
-        """
-        self.cur_obs = [e.reset() for e in self.envs]
-        return self.cur_obs
-
-    @override(VectorEnv)
-    def reset_at(self, index):
-        """Override parent to store actual env obs for upcoming predictions.
-        """
-        obs = self.envs[index].reset()
-        self.cur_obs[index] = obs
-        return obs
-
-    @override(VectorEnv)
-    def vector_step(self, actions):
-        if self.cur_obs is None:
-            raise ValueError("Need to reset env first")
-
-        # If discrete, need to one-hot actions
-        if isinstance(self.action_space, Discrete):
-            act = np.array(actions)
-            new_act = np.zeros((act.size, act.max() + 1))
-            new_act[np.arange(act.size), act] = 1
-            actions = new_act.astype("float32")
-
-        # Batch the TD-model prediction.
-        obs_batch = np.stack(self.cur_obs, axis=0)
-        action_batch = np.stack(actions, axis=0)
-        # Predict the next observation, given previous a) real obs
-        # (after a reset), b) predicted obs (any other time).
-        next_obs_batch = self.model.predict_model_batches(
-            obs_batch, action_batch, device=self.device)
-        next_obs_batch = np.clip(next_obs_batch, -1000, 1000)
-
-        # Call env's reward function.
-        # Note: Each actual env must implement one to output exact rewards.
-        rew_batch = self.envs[0].reward(obs_batch, action_batch,
-                                        next_obs_batch)
-
-        # If env has a `done` method, use it.
-        if hasattr(self.envs[0], "done"):
-            dones_batch = self.envs[0].done(next_obs_batch)
-        # Otherwise, assume the episode does not end.
-        else:
-            dones_batch = np.asarray([False for _ in range(self.num_envs)])
-
-        info_batch = [{} for _ in range(self.num_envs)]
-
-        self.cur_obs = next_obs_batch
-
-        return list(next_obs_batch), list(rew_batch), list(
-            dones_batch), info_batch
-
-    @override(VectorEnv)
-    def get_unwrapped(self):
-        return self.envs
+model_vector_env = mve
diff --git a/rllib/env/multi_agent_env.py b/rllib/env/multi_agent_env.py
index 1000a6a0d..610aa2e5e 100644
--- a/rllib/env/multi_agent_env.py
+++ b/rllib/env/multi_agent_env.py
@@ -117,7 +117,69 @@ class MultiAgentEnv:
             ... })
         """
 
-        from ray.rllib.env.group_agents_wrapper import _GroupAgentsWrapper
-        return _GroupAgentsWrapper(self, groups, obs_space, act_space)
+        from ray.rllib.env.wrappers.group_agents_wrapper import \
+            GroupAgentsWrapper
+        return GroupAgentsWrapper(self, groups, obs_space, act_space)
 # __grouping_doc_end__
 # yapf: enable
+
+
+def make_multi_agent(env_name_or_creator):
+    """Convenience wrapper for any sigle-agent env to be converted into MA.
+
+    Agent IDs are int numbers starting from 0 (first agent).
+
+    Args:
+        env_name_or_creator (Union[str, Callable[]]: String specifier or
+            env_maker function.
+
+    Returns:
+        Type[MultiAgentEnv]: New MultiAgentEnv class to be used as env.
+            The constructor takes a config dict with `num_agents` key
+            (default=1). The reset of the config dict will be passed on to the
+            underlying single-agent env's constructor.
+
+    Examples:
+         >>> # By gym string:
+         >>> ma_cartpole_cls = make_multi_agent("CartPole-v0")
+         >>> # Create a 2 agent multi-agent cartpole.
+         >>> ma_cartpole = ma_cartpole_cls({"num_agents": 2})
+         >>> obs = ma_cartpole.reset()
+         >>> print(obs)
+         ... {0: [...], 1: [...]}
+
+         >>> # By env-maker callable:
+         >>> ma_stateless_cartpole_cls = make_multi_agent(
+         ...    lambda config: StatelessCartPole(config))
+         >>> # Create a 2 agent multi-agent stateless cartpole.
+         >>> ma_stateless_cartpole = ma_stateless_cartpole_cls(
+         ...    {"num_agents": 2})
+    """
+
+    class MultiEnv(MultiAgentEnv):
+        def __init__(self, config):
+            num = config.pop("num_agents", 1)
+            if isinstance(env_name_or_creator, str):
+                self.agents = [
+                    gym.make(env_name_or_creator) for _ in range(num)
+                ]
+            else:
+                self.agents = [env_name_or_creator(config) for _ in range(num)]
+            self.dones = set()
+            self.observation_space = self.agents[0].observation_space
+            self.action_space = self.agents[0].action_space
+
+        def reset(self):
+            self.dones = set()
+            return {i: a.reset() for i, a in enumerate(self.agents)}
+
+        def step(self, action_dict):
+            obs, rew, done, info = {}, {}, {}, {}
+            for i, action in action_dict.items():
+                obs[i], rew[i], done[i], info[i] = self.agents[i].step(action)
+                if done[i]:
+                    self.dones.add(i)
+            done["__all__"] = len(self.dones) == len(self.agents)
+            return obs, rew, done, info
+
+    return MultiEnv
diff --git a/rllib/env/pettingzoo_env.py b/rllib/env/pettingzoo_env.py
index 9c45b6224..64ef1668a 100644
--- a/rllib/env/pettingzoo_env.py
+++ b/rllib/env/pettingzoo_env.py
@@ -1,185 +1,10 @@
-from ray.rllib.env.multi_agent_env import MultiAgentEnv
+from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv as PE
+from ray.rllib.utils.deprecation import deprecation_warning
 
+deprecation_warning(
+    old="ray.rllib.env.pettingzoo_env.PettingZooEnv",
+    new="ray.rllib.env.wrappers.pettingzoo_env.PettingZooEnv",
+    error=False,
+)
 
-class PettingZooEnv(MultiAgentEnv):
-    """An interface to the PettingZoo MARL environment library.
-
-    See: https://github.com/PettingZoo-Team/PettingZoo
-
-    Inherits from MultiAgentEnv and exposes a given AEC
-    (actor-environment-cycle) game from the PettingZoo project via the
-    MultiAgentEnv public API.
-
-    Note that the wrapper has some important limitations:
-
-    1. All agents have the same action_spaces and observation_spaces.
-       Note: If, within your aec game, agents do not have homogeneous action /
-       observation spaces, apply SuperSuit wrappers
-       to apply padding functionality: https://github.com/PettingZoo-Team/
-       SuperSuit#built-in-multi-agent-only-functions
-    2. Environments are positive sum games (-> Agents are expected to cooperate
-       to maximize reward). This isn't a hard restriction, it just that
-       standard algorithms aren't expected to work well in highly competitive
-       games.
-
-    Examples:
-        >>> from pettingzoo.butterfly import prison_v2
-        >>> env = PettingZooEnv(prison_v2.env())
-        >>> obs = env.reset()
-        >>> print(obs)
-        # only returns the observation for the agent which should be stepping
-        {
-            'prisoner_0': array([[[0, 0, 0],
-                [0, 0, 0],
-                [0, 0, 0],
-                ...,
-                [0, 0, 0],
-                [0, 0, 0],
-                [0, 0, 0]]], dtype=uint8)
-        }
-        >>> obs, rewards, dones, infos = env.step({
-        ...                 "prisoner_0": 1
-        ...             })
-        # only returns the observation, reward, info, etc, for
-        # the agent who's turn is next.
-        >>> print(obs)
-        {
-            'prisoner_1': array([[[0, 0, 0],
-                [0, 0, 0],
-                [0, 0, 0],
-                ...,
-                [0, 0, 0],
-                [0, 0, 0],
-                [0, 0, 0]]], dtype=uint8)
-        }
-        >>> print(rewards)
-        {
-            'prisoner_1': 0
-        }
-        >>> print(dones)
-        {
-            'prisoner_1': False, '__all__': False
-        }
-        >>> print(infos)
-        {
-            'prisoner_1': {'map_tuple': (1, 0)}
-        }
-    """
-
-    def __init__(self, env):
-        self.env = env
-        # agent idx list
-        self.agents = self.env.possible_agents
-
-        # Get dictionaries of obs_spaces and act_spaces
-        self.observation_spaces = self.env.observation_spaces
-        self.action_spaces = self.env.action_spaces
-
-        # Get first observation space, assuming all agents have equal space
-        self.observation_space = self.observation_spaces[self.agents[0]]
-
-        # Get first action space, assuming all agents have equal space
-        self.action_space = self.action_spaces[self.agents[0]]
-
-        assert all(obs_space == self.observation_space
-                   for obs_space
-                   in self.env.observation_spaces.values()), \
-            "Observation spaces for all agents must be identical. Perhaps " \
-            "SuperSuit's pad_observations wrapper can help (useage: " \
-            "`supersuit.aec_wrappers.pad_observations(env)`"
-
-        assert all(act_space == self.action_space
-                   for act_space in self.env.action_spaces.values()), \
-            "Action spaces for all agents must be identical. Perhaps " \
-            "SuperSuit's pad_action_space wrapper can help (useage: " \
-            "`supersuit.aec_wrappers.pad_action_space(env)`"
-
-        self.reset()
-
-    def reset(self):
-        self.env.reset()
-        return {
-            self.env.agent_selection: self.env.observe(
-                self.env.agent_selection)
-        }
-
-    def step(self, action):
-        self.env.step(action[self.env.agent_selection])
-        obs_d = {}
-        rew_d = {}
-        done_d = {}
-        info_d = {}
-        while self.env.agents:
-            obs, rew, done, info = self.env.last()
-            a = self.env.agent_selection
-            obs_d[a] = obs
-            rew_d[a] = rew
-            done_d[a] = done
-            info_d[a] = info
-            if self.env.dones[self.env.agent_selection]:
-                self.env.step(None)
-            else:
-                break
-
-        all_done = not self.env.agents
-        done_d["__all__"] = all_done
-
-        return obs_d, rew_d, done_d, info_d
-
-    def close(self):
-        self.env.close()
-
-    def seed(self, seed=None):
-        self.env.seed(seed)
-
-    def render(self, mode="human"):
-        return self.env.render(mode)
-
-
-class ParallelPettingZooEnv(MultiAgentEnv):
-    def __init__(self, env):
-        self.par_env = env
-        # agent idx list
-        self.agents = self.par_env.possible_agents
-
-        # Get dictionaries of obs_spaces and act_spaces
-        self.observation_spaces = self.par_env.observation_spaces
-        self.action_spaces = self.par_env.action_spaces
-
-        # Get first observation space, assuming all agents have equal space
-        self.observation_space = self.observation_spaces[self.agents[0]]
-
-        # Get first action space, assuming all agents have equal space
-        self.action_space = self.action_spaces[self.agents[0]]
-
-        assert all(obs_space == self.observation_space
-                   for obs_space
-                   in self.par_env.observation_spaces.values()), \
-            "Observation spaces for all agents must be identical. Perhaps " \
-            "SuperSuit's pad_observations wrapper can help (useage: " \
-            "`supersuit.aec_wrappers.pad_observations(env)`"
-
-        assert all(act_space == self.action_space
-                   for act_space in self.par_env.action_spaces.values()), \
-            "Action spaces for all agents must be identical. Perhaps " \
-            "SuperSuit's pad_action_space wrapper can help (useage: " \
-            "`supersuit.aec_wrappers.pad_action_space(env)`"
-
-        self.reset()
-
-    def reset(self):
-        return self.par_env.reset()
-
-    def step(self, action_dict):
-        obss, rews, dones, infos = self.par_env.step(action_dict)
-        dones["__all__"] = all(dones.values())
-        return obss, rews, dones, infos
-
-    def close(self):
-        self.par_env.close()
-
-    def seed(self, seed=None):
-        self.par_env.seed(seed)
-
-    def render(self, mode="human"):
-        return self.par_env.render(mode)
+PettingZooEnv = PE
diff --git a/rllib/env/unity3d_env.py b/rllib/env/unity3d_env.py
index 753c23443..3326e6c78 100644
--- a/rllib/env/unity3d_env.py
+++ b/rllib/env/unity3d_env.py
@@ -1,275 +1,10 @@
-from gym.spaces import Box, MultiDiscrete, Tuple as TupleSpace
-import logging
-import numpy as np
-import random
-import time
-from typing import Callable, Optional, Tuple
+from ray.rllib.env.wrappers.unity3d_env import Unity3DEnv as UE
+from ray.rllib.utils.deprecation import deprecation_warning
 
-from ray.rllib.env.multi_agent_env import MultiAgentEnv
-from ray.rllib.utils.annotations import override
-from ray.rllib.utils.typing import MultiAgentDict, PolicyID, AgentID
+deprecation_warning(
+    old="ray.rllib.env.unity3d_env.Unity3DEnv",
+    new="ray.rllib.env.wrappers.unity3d_env.Unity3DEnv",
+    error=False,
+)
 
-logger = logging.getLogger(__name__)
-
-
-class Unity3DEnv(MultiAgentEnv):
-    """A MultiAgentEnv representing a single Unity3D game instance.
-
-    For an example on how to use this Env with a running Unity3D editor
-    or with a compiled game, see:
-    `rllib/examples/unity3d_env_local.py`
-    For an example on how to use it inside a Unity game client, which
-    connects to an RLlib Policy server, see:
-    `rllib/examples/serving/unity3d_[client|server].py`
-
-    Supports all Unity3D (MLAgents) examples, multi- or single-agent and
-    gets converted automatically into an ExternalMultiAgentEnv, when used
-    inside an RLlib PolicyClient for cloud/distributed training of Unity games.
-    """
-
-    _BASE_PORT = 5004
-
-    def __init__(self,
-                 file_name: str = None,
-                 port: Optional[int] = None,
-                 seed: int = 0,
-                 no_graphics: bool = False,
-                 timeout_wait: int = 300,
-                 episode_horizon: int = 1000):
-        """Initializes a Unity3DEnv object.
-
-        Args:
-            file_name (Optional[str]): Name of the Unity game binary.
-                If None, will assume a locally running Unity3D editor
-                to be used, instead.
-            port (Optional[int]): Port number to connect to Unity environment.
-            seed (int): A random seed value to use for the Unity3D game.
-            no_graphics (bool): Whether to run the Unity3D simulator in
-                no-graphics mode. Default: False.
-            timeout_wait (int): Time (in seconds) to wait for connection from
-                the Unity3D instance.
-            episode_horizon (int): A hard horizon to abide to. After at most
-                this many steps (per-agent episode `step()` calls), the
-                Unity3D game is reset and will start again (finishing the
-                multi-agent episode that the game represents).
-                Note: The game itself may contain its own episode length
-                limits, which are always obeyed (on top of this value here).
-        """
-
-        super().__init__()
-
-        if file_name is None:
-            print(
-                "No game binary provided, will use a running Unity editor "
-                "instead.\nMake sure you are pressing the Play (|>) button in "
-                "your editor to start.")
-
-        import mlagents_envs
-        from mlagents_envs.environment import UnityEnvironment
-
-        # Try connecting to the Unity3D game instance. If a port is blocked
-        while True:
-            # Sleep for random time to allow for concurrent startup of many
-            # environments (num_workers >> 1). Otherwise, would lead to port
-            # conflicts sometimes.
-            time.sleep(random.randint(1, 10))
-            port_ = port or self._BASE_PORT
-            self._BASE_PORT += 1
-            try:
-                self.unity_env = UnityEnvironment(
-                    file_name=file_name,
-                    worker_id=0,
-                    base_port=port_,
-                    seed=seed,
-                    no_graphics=no_graphics,
-                    timeout_wait=timeout_wait,
-                )
-                print("Created UnityEnvironment for port {}".format(port_))
-            except mlagents_envs.exception.UnityWorkerInUseException:
-                pass
-            else:
-                break
-
-        # Reset entire env every this number of step calls.
-        self.episode_horizon = episode_horizon
-        # Keep track of how many times we have called `step` so far.
-        self.episode_timesteps = 0
-
-    @override(MultiAgentEnv)
-    def step(
-            self, action_dict: MultiAgentDict
-    ) -> Tuple[MultiAgentDict, MultiAgentDict, MultiAgentDict, MultiAgentDict]:
-        """Performs one multi-agent step through the game.
-
-        Args:
-            action_dict (dict): Multi-agent action dict with:
-                keys=agent identifier consisting of
-                [MLagents behavior name, e.g. "Goalie?team=1"] + "_" +
-                [Agent index, a unique MLAgent-assigned index per single agent]
-
-        Returns:
-            tuple:
-                - obs: Multi-agent observation dict.
-                    Only those observations for which to get new actions are
-                    returned.
-                - rewards: Rewards dict matching `obs`.
-                - dones: Done dict with only an __all__ multi-agent entry in
-                    it. __all__=True, if episode is done for all agents.
-                - infos: An (empty) info dict.
-        """
-
-        # Set only the required actions (from the DecisionSteps) in Unity3D.
-        all_agents = []
-        for behavior_name in self.unity_env.behavior_specs:
-            for agent_id in self.unity_env.get_steps(behavior_name)[
-                    0].agent_id_to_index.keys():
-                key = behavior_name + "_{}".format(agent_id)
-                all_agents.append(key)
-                self.unity_env.set_action_for_agent(behavior_name, agent_id,
-                                                    action_dict[key])
-        # Do the step.
-        self.unity_env.step()
-
-        obs, rewards, dones, infos = self._get_step_results()
-
-        # Global horizon reached? -> Return __all__ done=True, so user
-        # can reset. Set all agents' individual `done` to True as well.
-        self.episode_timesteps += 1
-        if self.episode_timesteps > self.episode_horizon:
-            return obs, rewards, dict({
-                "__all__": True
-            }, **{agent_id: True
-                  for agent_id in all_agents}), infos
-
-        return obs, rewards, dones, infos
-
-    @override(MultiAgentEnv)
-    def reset(self) -> MultiAgentDict:
-        """Resets the entire Unity3D scene (a single multi-agent episode)."""
-        self.episode_timesteps = 0
-        self.unity_env.reset()
-        obs, _, _, _ = self._get_step_results()
-        return obs
-
-    def _get_step_results(self):
-        """Collects those agents' obs/rewards that have to act in next `step`.
-
-        Returns:
-            Tuple:
-                obs: Multi-agent observation dict.
-                    Only those observations for which to get new actions are
-                    returned.
-                rewards: Rewards dict matching `obs`.
-                dones: Done dict with only an __all__ multi-agent entry in it.
-                    __all__=True, if episode is done for all agents.
-                infos: An (empty) info dict.
-        """
-        obs = {}
-        rewards = {}
-        infos = {}
-        for behavior_name in self.unity_env.behavior_specs:
-            decision_steps, terminal_steps = self.unity_env.get_steps(
-                behavior_name)
-            # Important: Only update those sub-envs that are currently
-            # available within _env_state.
-            # Loop through all envs ("agents") and fill in, whatever
-            # information we have.
-            for agent_id, idx in decision_steps.agent_id_to_index.items():
-                key = behavior_name + "_{}".format(agent_id)
-                os = tuple(o[idx] for o in decision_steps.obs)
-                os = os[0] if len(os) == 1 else os
-                obs[key] = os
-                rewards[key] = decision_steps.reward[idx]  # rewards vector
-            for agent_id, idx in terminal_steps.agent_id_to_index.items():
-                key = behavior_name + "_{}".format(agent_id)
-                # Only overwrite rewards (last reward in episode), b/c obs
-                # here is the last obs (which doesn't matter anyways).
-                # Unless key does not exist in obs.
-                if key not in obs:
-                    os = tuple(o[idx] for o in terminal_steps.obs)
-                    obs[key] = os = os[0] if len(os) == 1 else os
-                rewards[key] = terminal_steps.reward[idx]  # rewards vector
-
-        # Only use dones if all agents are done, then we should do a reset.
-        return obs, rewards, {"__all__": False}, infos
-
-    @staticmethod
-    def get_policy_configs_for_game(
-            game_name: str) -> Tuple[dict, Callable[[AgentID], PolicyID]]:
-
-        # The RLlib server must know about the Spaces that the Client will be
-        # using inside Unity3D, up-front.
-        obs_spaces = {
-            # 3DBall.
-            "3DBall": Box(float("-inf"), float("inf"), (8, )),
-            # 3DBallHard.
-            "3DBallHard": Box(float("-inf"), float("inf"), (45, )),
-            # Pyramids.
-            "Pyramids": TupleSpace([
-                Box(float("-inf"), float("inf"), (56, )),
-                Box(float("-inf"), float("inf"), (56, )),
-                Box(float("-inf"), float("inf"), (56, )),
-                Box(float("-inf"), float("inf"), (4, )),
-            ]),
-            # SoccerStrikersVsGoalie.
-            "Goalie": Box(float("-inf"), float("inf"), (738, )),
-            "Striker": TupleSpace([
-                Box(float("-inf"), float("inf"), (231, )),
-                Box(float("-inf"), float("inf"), (63, )),
-            ]),
-            # Tennis.
-            "Tennis": Box(float("-inf"), float("inf"), (27, )),
-            # VisualHallway.
-            "VisualHallway": Box(float("-inf"), float("inf"), (84, 84, 3)),
-            # Walker.
-            "Walker": Box(float("-inf"), float("inf"), (212, )),
-            # FoodCollector.
-            "FoodCollector": TupleSpace([
-                Box(float("-inf"), float("inf"), (49, )),
-                Box(float("-inf"), float("inf"), (4, )),
-            ]),
-        }
-        action_spaces = {
-            # 3DBall.
-            "3DBall": Box(
-                float("-inf"), float("inf"), (2, ), dtype=np.float32),
-            # 3DBallHard.
-            "3DBallHard": Box(
-                float("-inf"), float("inf"), (2, ), dtype=np.float32),
-            # Pyramids.
-            "Pyramids": MultiDiscrete([5]),
-            # SoccerStrikersVsGoalie.
-            "Goalie": MultiDiscrete([3, 3, 3]),
-            "Striker": MultiDiscrete([3, 3, 3]),
-            # Tennis.
-            "Tennis": Box(float("-inf"), float("inf"), (3, )),
-            # VisualHallway.
-            "VisualHallway": MultiDiscrete([5]),
-            # Walker.
-            "Walker": Box(float("-inf"), float("inf"), (39, )),
-            # FoodCollector.
-            "FoodCollector": MultiDiscrete([3, 3, 3, 2]),
-        }
-
-        # Policies (Unity: "behaviors") and agent-to-policy mapping fns.
-        if game_name == "SoccerStrikersVsGoalie":
-            policies = {
-                "Goalie": (None, obs_spaces["Goalie"], action_spaces["Goalie"],
-                           {}),
-                "Striker": (None, obs_spaces["Striker"],
-                            action_spaces["Striker"], {}),
-            }
-
-            def policy_mapping_fn(agent_id):
-                return "Striker" if "Striker" in agent_id else "Goalie"
-
-        else:
-            policies = {
-                game_name: (None, obs_spaces[game_name],
-                            action_spaces[game_name], {}),
-            }
-
-            def policy_mapping_fn(agent_id):
-                return game_name
-
-        return policies, policy_mapping_fn
+Unity3DEnv = UE
diff --git a/rllib/env/wrappers/atari_wrappers.py b/rllib/env/wrappers/atari_wrappers.py
new file mode 100644
index 000000000..0353484a0
--- /dev/null
+++ b/rllib/env/wrappers/atari_wrappers.py
@@ -0,0 +1,320 @@
+from collections import deque
+import cv2
+import gym
+from gym import spaces
+import numpy as np
+
+cv2.ocl.setUseOpenCL(False)
+
+
+def is_atari(env):
+    if (hasattr(env.observation_space, "shape")
+            and env.observation_space.shape is not None
+            and len(env.observation_space.shape) <= 2):
+        return False
+    return hasattr(env, "unwrapped") and hasattr(env.unwrapped, "ale")
+
+
+def get_wrapper_by_cls(env, cls):
+    """Returns the gym env wrapper of the given class, or None."""
+    currentenv = env
+    while True:
+        if isinstance(currentenv, cls):
+            return currentenv
+        elif isinstance(currentenv, gym.Wrapper):
+            currentenv = currentenv.env
+        else:
+            return None
+
+
+class MonitorEnv(gym.Wrapper):
+    def __init__(self, env=None):
+        """Record episodes stats prior to EpisodicLifeEnv, etc."""
+        gym.Wrapper.__init__(self, env)
+        self._current_reward = None
+        self._num_steps = None
+        self._total_steps = None
+        self._episode_rewards = []
+        self._episode_lengths = []
+        self._num_episodes = 0
+        self._num_returned = 0
+
+    def reset(self, **kwargs):
+        obs = self.env.reset(**kwargs)
+
+        if self._total_steps is None:
+            self._total_steps = sum(self._episode_lengths)
+
+        if self._current_reward is not None:
+            self._episode_rewards.append(self._current_reward)
+            self._episode_lengths.append(self._num_steps)
+            self._num_episodes += 1
+
+        self._current_reward = 0
+        self._num_steps = 0
+
+        return obs
+
+    def step(self, action):
+        obs, rew, done, info = self.env.step(action)
+        self._current_reward += rew
+        self._num_steps += 1
+        self._total_steps += 1
+        return (obs, rew, done, info)
+
+    def get_episode_rewards(self):
+        return self._episode_rewards
+
+    def get_episode_lengths(self):
+        return self._episode_lengths
+
+    def get_total_steps(self):
+        return self._total_steps
+
+    def next_episode_results(self):
+        for i in range(self._num_returned, len(self._episode_rewards)):
+            yield (self._episode_rewards[i], self._episode_lengths[i])
+        self._num_returned = len(self._episode_rewards)
+
+
+class NoopResetEnv(gym.Wrapper):
+    def __init__(self, env, noop_max=30):
+        """Sample initial states by taking random number of no-ops on reset.
+        No-op is assumed to be action 0.
+        """
+        gym.Wrapper.__init__(self, env)
+        self.noop_max = noop_max
+        self.override_num_noops = None
+        self.noop_action = 0
+        assert env.unwrapped.get_action_meanings()[0] == "NOOP"
+
+    def reset(self, **kwargs):
+        """ Do no-op action for a number of steps in [1, noop_max]."""
+        self.env.reset(**kwargs)
+        if self.override_num_noops is not None:
+            noops = self.override_num_noops
+        else:
+            noops = self.unwrapped.np_random.randint(1, self.noop_max + 1)
+        assert noops > 0
+        obs = None
+        for _ in range(noops):
+            obs, _, done, _ = self.env.step(self.noop_action)
+            if done:
+                obs = self.env.reset(**kwargs)
+        return obs
+
+    def step(self, ac):
+        return self.env.step(ac)
+
+
+class ClipRewardEnv(gym.RewardWrapper):
+    def __init__(self, env):
+        gym.RewardWrapper.__init__(self, env)
+
+    def reward(self, reward):
+        """Bin reward to {+1, 0, -1} by its sign."""
+        return np.sign(reward)
+
+
+class FireResetEnv(gym.Wrapper):
+    def __init__(self, env):
+        """Take action on reset.
+
+        For environments that are fixed until firing."""
+        gym.Wrapper.__init__(self, env)
+        assert env.unwrapped.get_action_meanings()[1] == "FIRE"
+        assert len(env.unwrapped.get_action_meanings()) >= 3
+
+    def reset(self, **kwargs):
+        self.env.reset(**kwargs)
+        obs, _, done, _ = self.env.step(1)
+        if done:
+            self.env.reset(**kwargs)
+        obs, _, done, _ = self.env.step(2)
+        if done:
+            self.env.reset(**kwargs)
+        return obs
+
+    def step(self, ac):
+        return self.env.step(ac)
+
+
+class EpisodicLifeEnv(gym.Wrapper):
+    def __init__(self, env):
+        """Make end-of-life == end-of-episode, but only reset on true game over.
+        Done by DeepMind for the DQN and co. since it helps value estimation.
+        """
+        gym.Wrapper.__init__(self, env)
+        self.lives = 0
+        self.was_real_done = True
+
+    def step(self, action):
+        obs, reward, done, info = self.env.step(action)
+        self.was_real_done = done
+        # check current lives, make loss of life terminal,
+        # then update lives to handle bonus lives
+        lives = self.env.unwrapped.ale.lives()
+        if lives < self.lives and lives > 0:
+            # for Qbert sometimes we stay in lives == 0 condtion for a few fr
+            # so its important to keep lives > 0, so that we only reset once
+            # the environment advertises done.
+            done = True
+        self.lives = lives
+        return obs, reward, done, info
+
+    def reset(self, **kwargs):
+        """Reset only when lives are exhausted.
+        This way all states are still reachable even though lives are episodic,
+        and the learner need not know about any of this behind-the-scenes.
+        """
+        if self.was_real_done:
+            obs = self.env.reset(**kwargs)
+        else:
+            # no-op step to advance from terminal/lost life state
+            obs, _, _, _ = self.env.step(0)
+        self.lives = self.env.unwrapped.ale.lives()
+        return obs
+
+
+class MaxAndSkipEnv(gym.Wrapper):
+    def __init__(self, env, skip=4):
+        """Return only every `skip`-th frame"""
+        gym.Wrapper.__init__(self, env)
+        # most recent raw observations (for max pooling across time steps)
+        self._obs_buffer = np.zeros(
+            (2, ) + env.observation_space.shape, dtype=np.uint8)
+        self._skip = skip
+
+    def step(self, action):
+        """Repeat action, sum reward, and max over last observations."""
+        total_reward = 0.0
+        done = None
+        for i in range(self._skip):
+            obs, reward, done, info = self.env.step(action)
+            if i == self._skip - 2:
+                self._obs_buffer[0] = obs
+            if i == self._skip - 1:
+                self._obs_buffer[1] = obs
+            total_reward += reward
+            if done:
+                break
+        # Note that the observation on the done=True frame
+        # doesn't matter
+        max_frame = self._obs_buffer.max(axis=0)
+
+        return max_frame, total_reward, done, info
+
+    def reset(self, **kwargs):
+        return self.env.reset(**kwargs)
+
+
+class WarpFrame(gym.ObservationWrapper):
+    def __init__(self, env, dim):
+        """Warp frames to the specified size (dim x dim)."""
+        gym.ObservationWrapper.__init__(self, env)
+        self.width = dim
+        self.height = dim
+        self.observation_space = spaces.Box(
+            low=0,
+            high=255,
+            shape=(self.height, self.width, 1),
+            dtype=np.uint8)
+
+    def observation(self, frame):
+        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
+        frame = cv2.resize(
+            frame, (self.width, self.height), interpolation=cv2.INTER_AREA)
+        return frame[:, :, None]
+
+
+# TODO: (sven) Deprecated class. Remove once traj. view is the norm.
+class FrameStack(gym.Wrapper):
+    def __init__(self, env, k):
+        """Stack k last frames."""
+        gym.Wrapper.__init__(self, env)
+        self.k = k
+        self.frames = deque([], maxlen=k)
+        shp = env.observation_space.shape
+        self.observation_space = spaces.Box(
+            low=0,
+            high=255,
+            shape=(shp[0], shp[1], shp[2] * k),
+            dtype=env.observation_space.dtype)
+
+    def reset(self):
+        ob = self.env.reset()
+        for _ in range(self.k):
+            self.frames.append(ob)
+        return self._get_ob()
+
+    def step(self, action):
+        ob, reward, done, info = self.env.step(action)
+        self.frames.append(ob)
+        return self._get_ob(), reward, done, info
+
+    def _get_ob(self):
+        assert len(self.frames) == self.k
+        return np.concatenate(self.frames, axis=2)
+
+
+class FrameStackTrajectoryView(gym.ObservationWrapper):
+    def __init__(self, env):
+        """No stacking. Trajectory View API takes care of this."""
+        gym.Wrapper.__init__(self, env)
+        shp = env.observation_space.shape
+        assert shp[2] == 1
+        self.observation_space = spaces.Box(
+            low=0,
+            high=255,
+            shape=(shp[0], shp[1]),
+            dtype=env.observation_space.dtype)
+
+    def observation(self, observation):
+        return np.squeeze(observation, axis=-1)
+
+
+class ScaledFloatFrame(gym.ObservationWrapper):
+    def __init__(self, env):
+        gym.ObservationWrapper.__init__(self, env)
+        self.observation_space = gym.spaces.Box(
+            low=0, high=1, shape=env.observation_space.shape, dtype=np.float32)
+
+    def observation(self, observation):
+        # careful! This undoes the memory optimization, use
+        # with smaller replay buffers only.
+        return np.array(observation).astype(np.float32) / 255.0
+
+
+def wrap_deepmind(
+        env,
+        dim=84,
+        # TODO: (sven) Remove once traj. view is norm.
+        framestack=True,
+        framestack_via_traj_view_api=False):
+    """Configure environment for DeepMind-style Atari.
+
+    Note that we assume reward clipping is done outside the wrapper.
+
+    Args:
+        dim (int): Dimension to resize observations to (dim x dim).
+        framestack (bool): Whether to framestack observations.
+    """
+    env = MonitorEnv(env)
+    env = NoopResetEnv(env, noop_max=30)
+    if env.spec is not None and "NoFrameskip" in env.spec.id:
+        env = MaxAndSkipEnv(env, skip=4)
+    env = EpisodicLifeEnv(env)
+    if "FIRE" in env.unwrapped.get_action_meanings():
+        env = FireResetEnv(env)
+    env = WarpFrame(env, dim)
+    # env = ScaledFloatFrame(env)  # TODO: use for dqn?
+    # env = ClipRewardEnv(env)  # reward clipping is handled by policy eval
+    # New way of frame stacking via the trajectory view API (model config key:
+    # `num_framestacks=[int]`.
+    if framestack_via_traj_view_api:
+        env = FrameStackTrajectoryView(env)
+    # Old way (w/o traj. view API) via model config key: `framestack=True`.
+    # TODO: (sven) Remove once traj. view is norm.
+    elif framestack is True:
+        env = FrameStack(env, 4)
+    return env
diff --git a/rllib/env/wrappers/dm_control_wrapper.py b/rllib/env/wrappers/dm_control_wrapper.py
new file mode 100644
index 000000000..6734e2a3a
--- /dev/null
+++ b/rllib/env/wrappers/dm_control_wrapper.py
@@ -0,0 +1,203 @@
+"""
+DeepMind Control Suite Wrapper directly sourced from:
+https://github.com/denisyarats/dmc2gym
+
+MIT License
+
+Copyright (c) 2020 Denis Yarats
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+from gym import core, spaces
+try:
+    from dm_env import specs
+except ImportError:
+    specs = None
+try:
+    from dm_control import suite
+except ImportError:
+    suite = None
+import numpy as np
+
+
+def _spec_to_box(spec):
+    def extract_min_max(s):
+        assert s.dtype == np.float64 or s.dtype == np.float32
+        dim = np.int(np.prod(s.shape))
+        if type(s) == specs.Array:
+            bound = np.inf * np.ones(dim, dtype=np.float32)
+            return -bound, bound
+        elif type(s) == specs.BoundedArray:
+            zeros = np.zeros(dim, dtype=np.float32)
+            return s.minimum + zeros, s.maximum + zeros
+
+    mins, maxs = [], []
+    for s in spec:
+        mn, mx = extract_min_max(s)
+        mins.append(mn)
+        maxs.append(mx)
+    low = np.concatenate(mins, axis=0)
+    high = np.concatenate(maxs, axis=0)
+    assert low.shape == high.shape
+    return spaces.Box(low, high, dtype=np.float32)
+
+
+def _flatten_obs(obs):
+    obs_pieces = []
+    for v in obs.values():
+        flat = np.array([v]) if np.isscalar(v) else v.ravel()
+        obs_pieces.append(flat)
+    return np.concatenate(obs_pieces, axis=0)
+
+
+class DMCEnv(core.Env):
+    def __init__(self,
+                 domain_name,
+                 task_name,
+                 task_kwargs=None,
+                 visualize_reward=False,
+                 from_pixels=False,
+                 height=64,
+                 width=64,
+                 camera_id=0,
+                 frame_skip=2,
+                 environment_kwargs=None,
+                 channels_first=True,
+                 preprocess=True):
+        self._from_pixels = from_pixels
+        self._height = height
+        self._width = width
+        self._camera_id = camera_id
+        self._frame_skip = frame_skip
+        self._channels_first = channels_first
+        self.preprocess = preprocess
+
+        if specs is None:
+            raise RuntimeError((
+                "The `specs` module from `dm_env` was not imported. Make sure "
+                "`dm_env` is installed and visible in the current python "
+                "environment."))
+        if suite is None:
+            raise RuntimeError(
+                ("The `suite` module from `dm_control` was not imported. Make "
+                 "sure `dm_control` is installed and visible in the current "
+                 "python enviornment."))
+
+        # create task
+        self._env = suite.load(
+            domain_name=domain_name,
+            task_name=task_name,
+            task_kwargs=task_kwargs,
+            visualize_reward=visualize_reward,
+            environment_kwargs=environment_kwargs)
+
+        # true and normalized action spaces
+        self._true_action_space = _spec_to_box([self._env.action_spec()])
+        self._norm_action_space = spaces.Box(
+            low=-1.0,
+            high=1.0,
+            shape=self._true_action_space.shape,
+            dtype=np.float32)
+
+        # create observation space
+        if from_pixels:
+            shape = [3, height,
+                     width] if channels_first else [height, width, 3]
+            self._observation_space = spaces.Box(
+                low=0, high=255, shape=shape, dtype=np.uint8)
+            if preprocess:
+                self._observation_space = spaces.Box(
+                    low=-0.5, high=0.5, shape=shape, dtype=np.float32)
+        else:
+            self._observation_space = _spec_to_box(
+                self._env.observation_spec().values())
+
+        self._state_space = _spec_to_box(self._env.observation_spec().values())
+
+        self.current_state = None
+
+    def __getattr__(self, name):
+        return getattr(self._env, name)
+
+    def _get_obs(self, time_step):
+        if self._from_pixels:
+            obs = self.render(
+                height=self._height,
+                width=self._width,
+                camera_id=self._camera_id)
+            if self._channels_first:
+                obs = obs.transpose(2, 0, 1).copy()
+            if self.preprocess:
+                obs = obs / 255.0 - 0.5
+        else:
+            obs = _flatten_obs(time_step.observation)
+        return obs
+
+    def _convert_action(self, action):
+        action = action.astype(np.float64)
+        true_delta = self._true_action_space.high - self._true_action_space.low
+        norm_delta = self._norm_action_space.high - self._norm_action_space.low
+        action = (action - self._norm_action_space.low) / norm_delta
+        action = action * true_delta + self._true_action_space.low
+        action = action.astype(np.float32)
+        return action
+
+    @property
+    def observation_space(self):
+        return self._observation_space
+
+    @property
+    def state_space(self):
+        return self._state_space
+
+    @property
+    def action_space(self):
+        return self._norm_action_space
+
+    def step(self, action):
+        assert self._norm_action_space.contains(action)
+        action = self._convert_action(action)
+        assert self._true_action_space.contains(action)
+        reward = 0
+        extra = {"internal_state": self._env.physics.get_state().copy()}
+
+        for _ in range(self._frame_skip):
+            time_step = self._env.step(action)
+            reward += time_step.reward or 0
+            done = time_step.last()
+            if done:
+                break
+        obs = self._get_obs(time_step)
+        self.current_state = _flatten_obs(time_step.observation)
+        extra["discount"] = time_step.discount
+        return obs, reward, done, extra
+
+    def reset(self):
+        time_step = self._env.reset()
+        self.current_state = _flatten_obs(time_step.observation)
+        obs = self._get_obs(time_step)
+        return obs
+
+    def render(self, mode="rgb_array", height=None, width=None, camera_id=0):
+        assert mode == "rgb_array", "only support for rgb_array mode"
+        height = height or self._height
+        width = width or self._width
+        camera_id = camera_id or self._camera_id
+        return self._env.physics.render(
+            height=height, width=width, camera_id=camera_id)
diff --git a/rllib/env/wrappers/dm_env_wrapper.py b/rllib/env/wrappers/dm_env_wrapper.py
new file mode 100644
index 000000000..e402c4cc6
--- /dev/null
+++ b/rllib/env/wrappers/dm_env_wrapper.py
@@ -0,0 +1,94 @@
+import gym
+from gym import spaces
+
+import numpy as np
+
+try:
+    from dm_env import specs
+except ImportError:
+    specs = None
+
+
+def _convert_spec_to_space(spec):
+    if isinstance(spec, dict):
+        return spaces.Dict(
+            {k: _convert_spec_to_space(v)
+             for k, v in spec.items()})
+    if isinstance(spec, specs.DiscreteArray):
+        return spaces.Discrete(spec.num_values)
+    elif isinstance(spec, specs.BoundedArray):
+        return spaces.Box(
+            low=np.asscalar(spec.minimum),
+            high=np.asscalar(spec.maximum),
+            shape=spec.shape,
+            dtype=spec.dtype)
+    elif isinstance(spec, specs.Array):
+        return spaces.Box(
+            low=-float("inf"),
+            high=float("inf"),
+            shape=spec.shape,
+            dtype=spec.dtype)
+
+    raise NotImplementedError(
+        ("Could not convert `Array` spec of type {} to Gym space. "
+         "Attempted to convert: {}").format(type(spec), spec))
+
+
+class DMEnv(gym.Env):
+    """A `gym.Env` wrapper for the `dm_env` API.
+    """
+
+    metadata = {"render.modes": ["rgb_array"]}
+
+    def __init__(self, dm_env):
+        super(DMEnv, self).__init__()
+        self._env = dm_env
+        self._prev_obs = None
+
+        if specs is None:
+            raise RuntimeError((
+                "The `specs` module from `dm_env` was not imported. Make sure "
+                "`dm_env` is installed and visible in the current python "
+                "environment."))
+
+    def step(self, action):
+        ts = self._env.step(action)
+
+        reward = ts.reward
+        if reward is None:
+            reward = 0.
+
+        return ts.observation, reward, ts.last(), {"discount": ts.discount}
+
+    def reset(self):
+        ts = self._env.reset()
+        return ts.observation
+
+    def render(self, mode="rgb_array"):
+        if self._prev_obs is None:
+            raise ValueError(
+                "Environment not started. Make sure to reset before rendering."
+            )
+
+        if mode == "rgb_array":
+            return self._prev_obs
+        else:
+            raise NotImplementedError(
+                "Render mode '{}' is not supported.".format(mode))
+
+    @property
+    def action_space(self):
+        spec = self._env.action_spec()
+        return _convert_spec_to_space(spec)
+
+    @property
+    def observation_space(self):
+        spec = self._env.observation_spec()
+        return _convert_spec_to_space(spec)
+
+    @property
+    def reward_range(self):
+        spec = self._env.reward_spec()
+        if isinstance(spec, specs.BoundedArray):
+            return spec.minimum, spec.maximum
+        return -float("inf"), float("inf")
diff --git a/rllib/env/wrappers/group_agents_wrapper.py b/rllib/env/wrappers/group_agents_wrapper.py
new file mode 100644
index 000000000..322bdaf2a
--- /dev/null
+++ b/rllib/env/wrappers/group_agents_wrapper.py
@@ -0,0 +1,117 @@
+from collections import OrderedDict
+
+from ray.rllib.env.multi_agent_env import MultiAgentEnv
+
+# info key for the individual rewards of an agent, for example:
+# info: {
+#   group_1: {
+#      _group_rewards: [5, -1, 1],  # 3 agents in this group
+#   }
+# }
+GROUP_REWARDS = "_group_rewards"
+
+# info key for the individual infos of an agent, for example:
+# info: {
+#   group_1: {
+#      _group_infos: [{"foo": ...}, {}],  # 2 agents in this group
+#   }
+# }
+GROUP_INFO = "_group_info"
+
+
+class GroupAgentsWrapper(MultiAgentEnv):
+    """Wraps a MultiAgentEnv environment with agents grouped as specified.
+
+    See multi_agent_env.py for the specification of groups.
+
+    This API is experimental.
+    """
+
+    def __init__(self, env, groups, obs_space=None, act_space=None):
+        """Wrap an existing multi-agent env to group agents together.
+
+        See MultiAgentEnv.with_agent_groups() for usage info.
+
+        Args:
+            env (MultiAgentEnv): env to wrap
+            groups (dict): Grouping spec as documented in MultiAgentEnv.
+            obs_space (Space): Optional observation space for the grouped
+                env. Must be a tuple space.
+            act_space (Space): Optional action space for the grouped env.
+                Must be a tuple space.
+        """
+
+        self.env = env
+        self.groups = groups
+        self.agent_id_to_group = {}
+        for group_id, agent_ids in groups.items():
+            for agent_id in agent_ids:
+                if agent_id in self.agent_id_to_group:
+                    raise ValueError(
+                        "Agent id {} is in multiple groups".format(
+                            agent_id, groups))
+                self.agent_id_to_group[agent_id] = group_id
+        if obs_space is not None:
+            self.observation_space = obs_space
+        if act_space is not None:
+            self.action_space = act_space
+
+    def reset(self):
+        obs = self.env.reset()
+        return self._group_items(obs)
+
+    def step(self, action_dict):
+        # Ungroup and send actions
+        action_dict = self._ungroup_items(action_dict)
+        obs, rewards, dones, infos = self.env.step(action_dict)
+
+        # Apply grouping transforms to the env outputs
+        obs = self._group_items(obs)
+        rewards = self._group_items(
+            rewards, agg_fn=lambda gvals: list(gvals.values()))
+        dones = self._group_items(
+            dones, agg_fn=lambda gvals: all(gvals.values()))
+        infos = self._group_items(
+            infos, agg_fn=lambda gvals: {GROUP_INFO: list(gvals.values())})
+
+        # Aggregate rewards, but preserve the original values in infos
+        for agent_id, rew in rewards.items():
+            if isinstance(rew, list):
+                rewards[agent_id] = sum(rew)
+                if agent_id not in infos:
+                    infos[agent_id] = {}
+                infos[agent_id][GROUP_REWARDS] = rew
+
+        return obs, rewards, dones, infos
+
+    def _ungroup_items(self, items):
+        out = {}
+        for agent_id, value in items.items():
+            if agent_id in self.groups:
+                assert len(value) == len(self.groups[agent_id]), \
+                    (agent_id, value, self.groups)
+                for a, v in zip(self.groups[agent_id], value):
+                    out[a] = v
+            else:
+                out[agent_id] = value
+        return out
+
+    def _group_items(self, items, agg_fn=lambda gvals: list(gvals.values())):
+        grouped_items = {}
+        for agent_id, item in items.items():
+            if agent_id in self.agent_id_to_group:
+                group_id = self.agent_id_to_group[agent_id]
+                if group_id in grouped_items:
+                    continue  # already added
+                group_out = OrderedDict()
+                for a in self.groups[group_id]:
+                    if a in items:
+                        group_out[a] = items[a]
+                    else:
+                        raise ValueError(
+                            "Missing member of group {}: {}: {}".format(
+                                group_id, a, items))
+                grouped_items[group_id] = agg_fn(group_out)
+            else:
+                grouped_items[agent_id] = item
+        return grouped_items
diff --git a/rllib/env/wrappers/kaggle_wrapper.py b/rllib/env/wrappers/kaggle_wrapper.py
index 4586aa16a..cd3a2dee6 100644
--- a/rllib/env/wrappers/kaggle_wrapper.py
+++ b/rllib/env/wrappers/kaggle_wrapper.py
@@ -5,8 +5,10 @@ Source: https://github.com/Kaggle/kaggle-environments
 
 from copy import deepcopy
 from typing import Any, Dict, Optional, Tuple
-
-import kaggle_environments
+try:
+    import kaggle_environments
+except ImportError:
+    pass
 import numpy as np
 from gym.spaces import Box
 from gym.spaces import Dict as DictSpace
diff --git a/rllib/env/wrappers/model_vector_env.py b/rllib/env/wrappers/model_vector_env.py
new file mode 100644
index 000000000..b5b9700d6
--- /dev/null
+++ b/rllib/env/wrappers/model_vector_env.py
@@ -0,0 +1,134 @@
+import logging
+import numpy as np
+from gym.spaces import Discrete
+from ray.rllib.utils.annotations import override
+from ray.rllib.env.vector_env import VectorEnv
+from ray.rllib.evaluation.rollout_worker import get_global_worker
+from ray.rllib.env.base_env import BaseEnv
+from ray.rllib.utils.typing import EnvType
+
+logger = logging.getLogger(__name__)
+
+
+def model_vector_env(env: EnvType) -> BaseEnv:
+    """Returns a VectorizedEnv wrapper around the given environment.
+
+    To obtain worker configs, one can call get_global_worker().
+
+    Args:
+        env (EnvType): The input environment (of any supported environment
+            type) to be convert to a _VectorizedModelGymEnv (wrapped as
+            an RLlib BaseEnv).
+
+    Returns:
+        BaseEnv: The BaseEnv converted input `env`.
+    """
+    worker = get_global_worker()
+    worker_index = worker.worker_index
+    if worker_index:
+        env = _VectorizedModelGymEnv(
+            make_env=worker.make_env_fn,
+            existing_envs=[env],
+            num_envs=worker.num_envs,
+            observation_space=env.observation_space,
+            action_space=env.action_space,
+        )
+    return BaseEnv.to_base_env(
+        env,
+        make_env=worker.make_env_fn,
+        num_envs=worker.num_envs,
+        remote_envs=False,
+        remote_env_batch_wait_ms=0)
+
+
+class _VectorizedModelGymEnv(VectorEnv):
+    """Vectorized Environment Wrapper for MB-MPO.
+
+    Primary change is in the `vector_step` method, which calls the dynamics
+    models for next_obs "calculation" (instead of the actual env). Also, the
+    actual envs need to have two extra methods implemented: `reward(obs)` and
+    (optionally) `done(obs)`. If `done` is not implemented, we will assume
+    that episodes in the env do not terminate, ever.
+    """
+
+    def __init__(self,
+                 make_env=None,
+                 existing_envs=None,
+                 num_envs=1,
+                 *,
+                 observation_space=None,
+                 action_space=None,
+                 env_config=None):
+        self.make_env = make_env
+        self.envs = existing_envs
+        self.num_envs = num_envs
+        while len(self.envs) < num_envs:
+            self.envs.append(self.make_env(len(self.envs)))
+
+        super().__init__(
+            observation_space=observation_space
+            or self.envs[0].observation_space,
+            action_space=action_space or self.envs[0].action_space,
+            num_envs=num_envs)
+        worker = get_global_worker()
+        self.model, self.device = worker.foreach_policy(
+            lambda x, y: (x.dynamics_model, x.device))[0]
+
+    @override(VectorEnv)
+    def vector_reset(self):
+        """Override parent to store actual env obs for upcoming predictions.
+        """
+        self.cur_obs = [e.reset() for e in self.envs]
+        return self.cur_obs
+
+    @override(VectorEnv)
+    def reset_at(self, index):
+        """Override parent to store actual env obs for upcoming predictions.
+        """
+        obs = self.envs[index].reset()
+        self.cur_obs[index] = obs
+        return obs
+
+    @override(VectorEnv)
+    def vector_step(self, actions):
+        if self.cur_obs is None:
+            raise ValueError("Need to reset env first")
+
+        # If discrete, need to one-hot actions
+        if isinstance(self.action_space, Discrete):
+            act = np.array(actions)
+            new_act = np.zeros((act.size, act.max() + 1))
+            new_act[np.arange(act.size), act] = 1
+            actions = new_act.astype("float32")
+
+        # Batch the TD-model prediction.
+        obs_batch = np.stack(self.cur_obs, axis=0)
+        action_batch = np.stack(actions, axis=0)
+        # Predict the next observation, given previous a) real obs
+        # (after a reset), b) predicted obs (any other time).
+        next_obs_batch = self.model.predict_model_batches(
+            obs_batch, action_batch, device=self.device)
+        next_obs_batch = np.clip(next_obs_batch, -1000, 1000)
+
+        # Call env's reward function.
+        # Note: Each actual env must implement one to output exact rewards.
+        rew_batch = self.envs[0].reward(obs_batch, action_batch,
+                                        next_obs_batch)
+
+        # If env has a `done` method, use it.
+        if hasattr(self.envs[0], "done"):
+            dones_batch = self.envs[0].done(next_obs_batch)
+        # Otherwise, assume the episode does not end.
+        else:
+            dones_batch = np.asarray([False for _ in range(self.num_envs)])
+
+        info_batch = [{} for _ in range(self.num_envs)]
+
+        self.cur_obs = next_obs_batch
+
+        return list(next_obs_batch), list(rew_batch), list(
+            dones_batch), info_batch
+
+    @override(VectorEnv)
+    def get_unwrapped(self):
+        return self.envs
diff --git a/rllib/env/wrappers/pettingzoo_env.py b/rllib/env/wrappers/pettingzoo_env.py
new file mode 100644
index 000000000..9c45b6224
--- /dev/null
+++ b/rllib/env/wrappers/pettingzoo_env.py
@@ -0,0 +1,185 @@
+from ray.rllib.env.multi_agent_env import MultiAgentEnv
+
+
+class PettingZooEnv(MultiAgentEnv):
+    """An interface to the PettingZoo MARL environment library.
+
+    See: https://github.com/PettingZoo-Team/PettingZoo
+
+    Inherits from MultiAgentEnv and exposes a given AEC
+    (actor-environment-cycle) game from the PettingZoo project via the
+    MultiAgentEnv public API.
+
+    Note that the wrapper has some important limitations:
+
+    1. All agents have the same action_spaces and observation_spaces.
+       Note: If, within your aec game, agents do not have homogeneous action /
+       observation spaces, apply SuperSuit wrappers
+       to apply padding functionality: https://github.com/PettingZoo-Team/
+       SuperSuit#built-in-multi-agent-only-functions
+    2. Environments are positive sum games (-> Agents are expected to cooperate
+       to maximize reward). This isn't a hard restriction, it just that
+       standard algorithms aren't expected to work well in highly competitive
+       games.
+
+    Examples:
+        >>> from pettingzoo.butterfly import prison_v2
+        >>> env = PettingZooEnv(prison_v2.env())
+        >>> obs = env.reset()
+        >>> print(obs)
+        # only returns the observation for the agent which should be stepping
+        {
+            'prisoner_0': array([[[0, 0, 0],
+                [0, 0, 0],
+                [0, 0, 0],
+                ...,
+                [0, 0, 0],
+                [0, 0, 0],
+                [0, 0, 0]]], dtype=uint8)
+        }
+        >>> obs, rewards, dones, infos = env.step({
+        ...                 "prisoner_0": 1
+        ...             })
+        # only returns the observation, reward, info, etc, for
+        # the agent who's turn is next.
+        >>> print(obs)
+        {
+            'prisoner_1': array([[[0, 0, 0],
+                [0, 0, 0],
+                [0, 0, 0],
+                ...,
+                [0, 0, 0],
+                [0, 0, 0],
+                [0, 0, 0]]], dtype=uint8)
+        }
+        >>> print(rewards)
+        {
+            'prisoner_1': 0
+        }
+        >>> print(dones)
+        {
+            'prisoner_1': False, '__all__': False
+        }
+        >>> print(infos)
+        {
+            'prisoner_1': {'map_tuple': (1, 0)}
+        }
+    """
+
+    def __init__(self, env):
+        self.env = env
+        # agent idx list
+        self.agents = self.env.possible_agents
+
+        # Get dictionaries of obs_spaces and act_spaces
+        self.observation_spaces = self.env.observation_spaces
+        self.action_spaces = self.env.action_spaces
+
+        # Get first observation space, assuming all agents have equal space
+        self.observation_space = self.observation_spaces[self.agents[0]]
+
+        # Get first action space, assuming all agents have equal space
+        self.action_space = self.action_spaces[self.agents[0]]
+
+        assert all(obs_space == self.observation_space
+                   for obs_space
+                   in self.env.observation_spaces.values()), \
+            "Observation spaces for all agents must be identical. Perhaps " \
+            "SuperSuit's pad_observations wrapper can help (useage: " \
+            "`supersuit.aec_wrappers.pad_observations(env)`"
+
+        assert all(act_space == self.action_space
+                   for act_space in self.env.action_spaces.values()), \
+            "Action spaces for all agents must be identical. Perhaps " \
+            "SuperSuit's pad_action_space wrapper can help (useage: " \
+            "`supersuit.aec_wrappers.pad_action_space(env)`"
+
+        self.reset()
+
+    def reset(self):
+        self.env.reset()
+        return {
+            self.env.agent_selection: self.env.observe(
+                self.env.agent_selection)
+        }
+
+    def step(self, action):
+        self.env.step(action[self.env.agent_selection])
+        obs_d = {}
+        rew_d = {}
+        done_d = {}
+        info_d = {}
+        while self.env.agents:
+            obs, rew, done, info = self.env.last()
+            a = self.env.agent_selection
+            obs_d[a] = obs
+            rew_d[a] = rew
+            done_d[a] = done
+            info_d[a] = info
+            if self.env.dones[self.env.agent_selection]:
+                self.env.step(None)
+            else:
+                break
+
+        all_done = not self.env.agents
+        done_d["__all__"] = all_done
+
+        return obs_d, rew_d, done_d, info_d
+
+    def close(self):
+        self.env.close()
+
+    def seed(self, seed=None):
+        self.env.seed(seed)
+
+    def render(self, mode="human"):
+        return self.env.render(mode)
+
+
+class ParallelPettingZooEnv(MultiAgentEnv):
+    def __init__(self, env):
+        self.par_env = env
+        # agent idx list
+        self.agents = self.par_env.possible_agents
+
+        # Get dictionaries of obs_spaces and act_spaces
+        self.observation_spaces = self.par_env.observation_spaces
+        self.action_spaces = self.par_env.action_spaces
+
+        # Get first observation space, assuming all agents have equal space
+        self.observation_space = self.observation_spaces[self.agents[0]]
+
+        # Get first action space, assuming all agents have equal space
+        self.action_space = self.action_spaces[self.agents[0]]
+
+        assert all(obs_space == self.observation_space
+                   for obs_space
+                   in self.par_env.observation_spaces.values()), \
+            "Observation spaces for all agents must be identical. Perhaps " \
+            "SuperSuit's pad_observations wrapper can help (useage: " \
+            "`supersuit.aec_wrappers.pad_observations(env)`"
+
+        assert all(act_space == self.action_space
+                   for act_space in self.par_env.action_spaces.values()), \
+            "Action spaces for all agents must be identical. Perhaps " \
+            "SuperSuit's pad_action_space wrapper can help (useage: " \
+            "`supersuit.aec_wrappers.pad_action_space(env)`"
+
+        self.reset()
+
+    def reset(self):
+        return self.par_env.reset()
+
+    def step(self, action_dict):
+        obss, rews, dones, infos = self.par_env.step(action_dict)
+        dones["__all__"] = all(dones.values())
+        return obss, rews, dones, infos
+
+    def close(self):
+        self.par_env.close()
+
+    def seed(self, seed=None):
+        self.par_env.seed(seed)
+
+    def render(self, mode="human"):
+        return self.par_env.render(mode)
diff --git a/rllib/env/wrappers/recsim_wrapper.py b/rllib/env/wrappers/recsim_wrapper.py
index ad846262a..3344cbb6e 100644
--- a/rllib/env/wrappers/recsim_wrapper.py
+++ b/rllib/env/wrappers/recsim_wrapper.py
@@ -5,12 +5,11 @@ Source: https://github.com/google-research/recsim
 """
 
 from collections import OrderedDict
-from typing import List
-
 import gym
-import numpy as np
 from gym import spaces
+import numpy as np
 from recsim.environments import interest_evolution
+from typing import List
 
 from ray.rllib.utils.error import UnsupportedSpaceException
 from ray.tune.registry import register_env
diff --git a/rllib/env/wrappers/tests/test_group_agents_wrapper.py b/rllib/env/wrappers/tests/test_group_agents_wrapper.py
new file mode 100644
index 000000000..3d5fcba30
--- /dev/null
+++ b/rllib/env/wrappers/tests/test_group_agents_wrapper.py
@@ -0,0 +1,30 @@
+import unittest
+
+from ray.rllib.env.wrappers.group_agents_wrapper import GroupAgentsWrapper
+from ray.rllib.env.multi_agent_env import make_multi_agent
+
+
+class TestGroupAgentsWrapper(unittest.TestCase):
+    def test_group_agents_wrapper(self):
+        MultiAgentCartPole = make_multi_agent("CartPole-v0")
+        grouped_ma_cartpole = GroupAgentsWrapper(
+            env=MultiAgentCartPole({
+                "num_agents": 4
+            }),
+            groups={
+                "group1": [0, 1],
+                "group2": [2, 3]
+            })
+        obs = grouped_ma_cartpole.reset()
+        self.assertTrue(len(obs) == 2)
+        self.assertTrue("group1" in obs and "group2" in obs)
+        self.assertTrue(
+            isinstance(obs["group1"], list) and len(obs["group1"]) == 2)
+        self.assertTrue(
+            isinstance(obs["group2"], list) and len(obs["group2"]) == 2)
+
+
+if __name__ == "__main__":
+    import sys
+    import pytest
+    sys.exit(pytest.main(["-v", __file__]))
diff --git a/rllib/env/wrappers/tests/test_kaggle_wrapper.py b/rllib/env/wrappers/tests/test_kaggle_wrapper.py
index 56300cbc7..3584e29a7 100644
--- a/rllib/env/wrappers/tests/test_kaggle_wrapper.py
+++ b/rllib/env/wrappers/tests/test_kaggle_wrapper.py
@@ -1,9 +1,7 @@
+from kaggle_environments.utils import structify
 import unittest
 
-from kaggle_environments.utils import structify
-
-from ray.rllib.env.wrappers.kaggle_wrapper import \
-    KaggleFootballMultiAgentEnv
+from ray.rllib.env.wrappers.kaggle_wrapper import KaggleFootballMultiAgentEnv
 
 
 class TestKaggleFootballMultiAgentEnv(unittest.TestCase):
diff --git a/rllib/env/wrappers/tests/test_recsim_wrapper.py b/rllib/env/wrappers/tests/test_recsim_wrapper.py
index 3211dca71..d1b3e3b1b 100644
--- a/rllib/env/wrappers/tests/test_recsim_wrapper.py
+++ b/rllib/env/wrappers/tests/test_recsim_wrapper.py
@@ -1,6 +1,5 @@
-import unittest
-
 import gym
+import unittest
 
 from ray.rllib.env.wrappers.recsim_wrapper import (
     MultiDiscreteToDiscreteActionWrapper, make_recsim_env)
diff --git a/rllib/env/wrappers/unity3d_env.py b/rllib/env/wrappers/unity3d_env.py
new file mode 100644
index 000000000..753c23443
--- /dev/null
+++ b/rllib/env/wrappers/unity3d_env.py
@@ -0,0 +1,275 @@
+from gym.spaces import Box, MultiDiscrete, Tuple as TupleSpace
+import logging
+import numpy as np
+import random
+import time
+from typing import Callable, Optional, Tuple
+
+from ray.rllib.env.multi_agent_env import MultiAgentEnv
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.typing import MultiAgentDict, PolicyID, AgentID
+
+logger = logging.getLogger(__name__)
+
+
+class Unity3DEnv(MultiAgentEnv):
+    """A MultiAgentEnv representing a single Unity3D game instance.
+
+    For an example on how to use this Env with a running Unity3D editor
+    or with a compiled game, see:
+    `rllib/examples/unity3d_env_local.py`
+    For an example on how to use it inside a Unity game client, which
+    connects to an RLlib Policy server, see:
+    `rllib/examples/serving/unity3d_[client|server].py`
+
+    Supports all Unity3D (MLAgents) examples, multi- or single-agent and
+    gets converted automatically into an ExternalMultiAgentEnv, when used
+    inside an RLlib PolicyClient for cloud/distributed training of Unity games.
+    """
+
+    _BASE_PORT = 5004
+
+    def __init__(self,
+                 file_name: str = None,
+                 port: Optional[int] = None,
+                 seed: int = 0,
+                 no_graphics: bool = False,
+                 timeout_wait: int = 300,
+                 episode_horizon: int = 1000):
+        """Initializes a Unity3DEnv object.
+
+        Args:
+            file_name (Optional[str]): Name of the Unity game binary.
+                If None, will assume a locally running Unity3D editor
+                to be used, instead.
+            port (Optional[int]): Port number to connect to Unity environment.
+            seed (int): A random seed value to use for the Unity3D game.
+            no_graphics (bool): Whether to run the Unity3D simulator in
+                no-graphics mode. Default: False.
+            timeout_wait (int): Time (in seconds) to wait for connection from
+                the Unity3D instance.
+            episode_horizon (int): A hard horizon to abide to. After at most
+                this many steps (per-agent episode `step()` calls), the
+                Unity3D game is reset and will start again (finishing the
+                multi-agent episode that the game represents).
+                Note: The game itself may contain its own episode length
+                limits, which are always obeyed (on top of this value here).
+        """
+
+        super().__init__()
+
+        if file_name is None:
+            print(
+                "No game binary provided, will use a running Unity editor "
+                "instead.\nMake sure you are pressing the Play (|>) button in "
+                "your editor to start.")
+
+        import mlagents_envs
+        from mlagents_envs.environment import UnityEnvironment
+
+        # Try connecting to the Unity3D game instance. If a port is blocked
+        while True:
+            # Sleep for random time to allow for concurrent startup of many
+            # environments (num_workers >> 1). Otherwise, would lead to port
+            # conflicts sometimes.
+            time.sleep(random.randint(1, 10))
+            port_ = port or self._BASE_PORT
+            self._BASE_PORT += 1
+            try:
+                self.unity_env = UnityEnvironment(
+                    file_name=file_name,
+                    worker_id=0,
+                    base_port=port_,
+                    seed=seed,
+                    no_graphics=no_graphics,
+                    timeout_wait=timeout_wait,
+                )
+                print("Created UnityEnvironment for port {}".format(port_))
+            except mlagents_envs.exception.UnityWorkerInUseException:
+                pass
+            else:
+                break
+
+        # Reset entire env every this number of step calls.
+        self.episode_horizon = episode_horizon
+        # Keep track of how many times we have called `step` so far.
+        self.episode_timesteps = 0
+
+    @override(MultiAgentEnv)
+    def step(
+            self, action_dict: MultiAgentDict
+    ) -> Tuple[MultiAgentDict, MultiAgentDict, MultiAgentDict, MultiAgentDict]:
+        """Performs one multi-agent step through the game.
+
+        Args:
+            action_dict (dict): Multi-agent action dict with:
+                keys=agent identifier consisting of
+                [MLagents behavior name, e.g. "Goalie?team=1"] + "_" +
+                [Agent index, a unique MLAgent-assigned index per single agent]
+
+        Returns:
+            tuple:
+                - obs: Multi-agent observation dict.
+                    Only those observations for which to get new actions are
+                    returned.
+                - rewards: Rewards dict matching `obs`.
+                - dones: Done dict with only an __all__ multi-agent entry in
+                    it. __all__=True, if episode is done for all agents.
+                - infos: An (empty) info dict.
+        """
+
+        # Set only the required actions (from the DecisionSteps) in Unity3D.
+        all_agents = []
+        for behavior_name in self.unity_env.behavior_specs:
+            for agent_id in self.unity_env.get_steps(behavior_name)[
+                    0].agent_id_to_index.keys():
+                key = behavior_name + "_{}".format(agent_id)
+                all_agents.append(key)
+                self.unity_env.set_action_for_agent(behavior_name, agent_id,
+                                                    action_dict[key])
+        # Do the step.
+        self.unity_env.step()
+
+        obs, rewards, dones, infos = self._get_step_results()
+
+        # Global horizon reached? -> Return __all__ done=True, so user
+        # can reset. Set all agents' individual `done` to True as well.
+        self.episode_timesteps += 1
+        if self.episode_timesteps > self.episode_horizon:
+            return obs, rewards, dict({
+                "__all__": True
+            }, **{agent_id: True
+                  for agent_id in all_agents}), infos
+
+        return obs, rewards, dones, infos
+
+    @override(MultiAgentEnv)
+    def reset(self) -> MultiAgentDict:
+        """Resets the entire Unity3D scene (a single multi-agent episode)."""
+        self.episode_timesteps = 0
+        self.unity_env.reset()
+        obs, _, _, _ = self._get_step_results()
+        return obs
+
+    def _get_step_results(self):
+        """Collects those agents' obs/rewards that have to act in next `step`.
+
+        Returns:
+            Tuple:
+                obs: Multi-agent observation dict.
+                    Only those observations for which to get new actions are
+                    returned.
+                rewards: Rewards dict matching `obs`.
+                dones: Done dict with only an __all__ multi-agent entry in it.
+                    __all__=True, if episode is done for all agents.
+                infos: An (empty) info dict.
+        """
+        obs = {}
+        rewards = {}
+        infos = {}
+        for behavior_name in self.unity_env.behavior_specs:
+            decision_steps, terminal_steps = self.unity_env.get_steps(
+                behavior_name)
+            # Important: Only update those sub-envs that are currently
+            # available within _env_state.
+            # Loop through all envs ("agents") and fill in, whatever
+            # information we have.
+            for agent_id, idx in decision_steps.agent_id_to_index.items():
+                key = behavior_name + "_{}".format(agent_id)
+                os = tuple(o[idx] for o in decision_steps.obs)
+                os = os[0] if len(os) == 1 else os
+                obs[key] = os
+                rewards[key] = decision_steps.reward[idx]  # rewards vector
+            for agent_id, idx in terminal_steps.agent_id_to_index.items():
+                key = behavior_name + "_{}".format(agent_id)
+                # Only overwrite rewards (last reward in episode), b/c obs
+                # here is the last obs (which doesn't matter anyways).
+                # Unless key does not exist in obs.
+                if key not in obs:
+                    os = tuple(o[idx] for o in terminal_steps.obs)
+                    obs[key] = os = os[0] if len(os) == 1 else os
+                rewards[key] = terminal_steps.reward[idx]  # rewards vector
+
+        # Only use dones if all agents are done, then we should do a reset.
+        return obs, rewards, {"__all__": False}, infos
+
+    @staticmethod
+    def get_policy_configs_for_game(
+            game_name: str) -> Tuple[dict, Callable[[AgentID], PolicyID]]:
+
+        # The RLlib server must know about the Spaces that the Client will be
+        # using inside Unity3D, up-front.
+        obs_spaces = {
+            # 3DBall.
+            "3DBall": Box(float("-inf"), float("inf"), (8, )),
+            # 3DBallHard.
+            "3DBallHard": Box(float("-inf"), float("inf"), (45, )),
+            # Pyramids.
+            "Pyramids": TupleSpace([
+                Box(float("-inf"), float("inf"), (56, )),
+                Box(float("-inf"), float("inf"), (56, )),
+                Box(float("-inf"), float("inf"), (56, )),
+                Box(float("-inf"), float("inf"), (4, )),
+            ]),
+            # SoccerStrikersVsGoalie.
+            "Goalie": Box(float("-inf"), float("inf"), (738, )),
+            "Striker": TupleSpace([
+                Box(float("-inf"), float("inf"), (231, )),
+                Box(float("-inf"), float("inf"), (63, )),
+            ]),
+            # Tennis.
+            "Tennis": Box(float("-inf"), float("inf"), (27, )),
+            # VisualHallway.
+            "VisualHallway": Box(float("-inf"), float("inf"), (84, 84, 3)),
+            # Walker.
+            "Walker": Box(float("-inf"), float("inf"), (212, )),
+            # FoodCollector.
+            "FoodCollector": TupleSpace([
+                Box(float("-inf"), float("inf"), (49, )),
+                Box(float("-inf"), float("inf"), (4, )),
+            ]),
+        }
+        action_spaces = {
+            # 3DBall.
+            "3DBall": Box(
+                float("-inf"), float("inf"), (2, ), dtype=np.float32),
+            # 3DBallHard.
+            "3DBallHard": Box(
+                float("-inf"), float("inf"), (2, ), dtype=np.float32),
+            # Pyramids.
+            "Pyramids": MultiDiscrete([5]),
+            # SoccerStrikersVsGoalie.
+            "Goalie": MultiDiscrete([3, 3, 3]),
+            "Striker": MultiDiscrete([3, 3, 3]),
+            # Tennis.
+            "Tennis": Box(float("-inf"), float("inf"), (3, )),
+            # VisualHallway.
+            "VisualHallway": MultiDiscrete([5]),
+            # Walker.
+            "Walker": Box(float("-inf"), float("inf"), (39, )),
+            # FoodCollector.
+            "FoodCollector": MultiDiscrete([3, 3, 3, 2]),
+        }
+
+        # Policies (Unity: "behaviors") and agent-to-policy mapping fns.
+        if game_name == "SoccerStrikersVsGoalie":
+            policies = {
+                "Goalie": (None, obs_spaces["Goalie"], action_spaces["Goalie"],
+                           {}),
+                "Striker": (None, obs_spaces["Striker"],
+                            action_spaces["Striker"], {}),
+            }
+
+            def policy_mapping_fn(agent_id):
+                return "Striker" if "Striker" in agent_id else "Goalie"
+
+        else:
+            policies = {
+                game_name: (None, obs_spaces[game_name],
+                            action_spaces[game_name], {}),
+            }
+
+            def policy_mapping_fn(agent_id):
+                return game_name
+
+        return policies, policy_mapping_fn
diff --git a/rllib/evaluation/rollout_worker.py b/rllib/evaluation/rollout_worker.py
index 4370ee93a..d0770cdf7 100644
--- a/rllib/evaluation/rollout_worker.py
+++ b/rllib/evaluation/rollout_worker.py
@@ -9,13 +9,13 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeVar, \
     TYPE_CHECKING, Union
 
 import ray
-from ray.rllib.env.atari_wrappers import wrap_deepmind, is_atari
 from ray.rllib.env.base_env import BaseEnv
 from ray.rllib.env.env_context import EnvContext
 from ray.rllib.env.external_env import ExternalEnv
 from ray.rllib.env.multi_agent_env import MultiAgentEnv
 from ray.rllib.env.external_multi_agent_env import ExternalMultiAgentEnv
 from ray.rllib.env.vector_env import VectorEnv
+from ray.rllib.env.wrappers.atari_wrappers import wrap_deepmind, is_atari
 from ray.rllib.evaluation.sampler import AsyncSampler, SyncSampler
 from ray.rllib.evaluation.rollout_metrics import RolloutMetrics
 from ray.rllib.models import ModelCatalog
@@ -343,7 +343,8 @@ class RolloutWorker(ParallelIteratorWorker):
         elif log_level == "DEBUG":
             enable_periodic_logging()
 
-        env_context = EnvContext(env_config or {}, worker_index)
+        env_context = EnvContext(
+            env_config or {}, worker_index, num_workers=num_workers)
         self.env_context = env_context
         self.policy_config: TrainerConfigDict = policy_config
         if callbacks:
diff --git a/rllib/evaluation/sampler.py b/rllib/evaluation/sampler.py
index 09819cc14..eb81b65de 100644
--- a/rllib/evaluation/sampler.py
+++ b/rllib/evaluation/sampler.py
@@ -18,7 +18,8 @@ from ray.rllib.evaluation.rollout_metrics import RolloutMetrics
 from ray.rllib.evaluation.sample_batch_builder import \
     MultiAgentSampleBatchBuilder
 from ray.rllib.env.base_env import BaseEnv, ASYNC_RESET_RETURN
-from ray.rllib.env.atari_wrappers import get_wrapper_by_cls, MonitorEnv
+from ray.rllib.env.wrappers.atari_wrappers import get_wrapper_by_cls, \
+    MonitorEnv
 from ray.rllib.models.preprocessors import Preprocessor
 from ray.rllib.offline import InputReader
 from ray.rllib.policy.policy import clip_action, Policy
diff --git a/rllib/examples/env/dm_control_suite.py b/rllib/examples/env/dm_control_suite.py
index 165344794..9ba05da3c 100644
--- a/rllib/examples/env/dm_control_suite.py
+++ b/rllib/examples/env/dm_control_suite.py
@@ -1,4 +1,4 @@
-from ray.rllib.env.dm_control_wrapper import DMCEnv
+from ray.rllib.env.wrappers.dm_control_wrapper import DMCEnv
 """
 8 Environments from Deepmind Control Suite
 """
diff --git a/rllib/examples/env/multi_agent.py b/rllib/examples/env/multi_agent.py
index 096dea205..ded842427 100644
--- a/rllib/examples/env/multi_agent.py
+++ b/rllib/examples/env/multi_agent.py
@@ -1,38 +1,18 @@
 import gym
 
-from ray.rllib.env.multi_agent_env import MultiAgentEnv
+from ray.rllib.env.multi_agent_env import MultiAgentEnv, make_multi_agent
 from ray.rllib.examples.env.mock_env import MockEnv, MockEnv2
 from ray.rllib.examples.env.stateless_cartpole import StatelessCartPole
+from ray.rllib.utils.deprecation import deprecation_warning
 
 
 def make_multiagent(env_name_or_creator):
-    class MultiEnv(MultiAgentEnv):
-        def __init__(self, config):
-            num = config.pop("num_agents", 1)
-            if isinstance(env_name_or_creator, str):
-                self.agents = [
-                    gym.make(env_name_or_creator) for _ in range(num)
-                ]
-            else:
-                self.agents = [env_name_or_creator(config) for _ in range(num)]
-            self.dones = set()
-            self.observation_space = self.agents[0].observation_space
-            self.action_space = self.agents[0].action_space
-
-        def reset(self):
-            self.dones = set()
-            return {i: a.reset() for i, a in enumerate(self.agents)}
-
-        def step(self, action_dict):
-            obs, rew, done, info = {}, {}, {}, {}
-            for i, action in action_dict.items():
-                obs[i], rew[i], done[i], info[i] = self.agents[i].step(action)
-                if done[i]:
-                    self.dones.add(i)
-            done["__all__"] = len(self.dones) == len(self.agents)
-            return obs, rew, done, info
-
-    return MultiEnv
+    deprecation_warning(
+        old="ray.rllib.examples.env.multi_agent.make_multiagent",
+        new="ray.rllib.env.multi_agent_env.make_multi_agent",
+        error=False,
+    )
+    return make_multi_agent(env_name_or_creator)
 
 
 class BasicMultiAgent(MultiAgentEnv):
@@ -162,8 +142,8 @@ class RoundRobinMultiAgent(MultiAgentEnv):
         return obs, rew, done, info
 
 
-MultiAgentCartPole = make_multiagent("CartPole-v0")
-MultiAgentMountainCar = make_multiagent("MountainCarContinuous-v0")
-MultiAgentPendulum = make_multiagent("Pendulum-v0")
-MultiAgentStatelessCartPole = make_multiagent(
+MultiAgentCartPole = make_multi_agent("CartPole-v0")
+MultiAgentMountainCar = make_multi_agent("MountainCarContinuous-v0")
+MultiAgentPendulum = make_multi_agent("Pendulum-v0")
+MultiAgentStatelessCartPole = make_multi_agent(
     lambda config: StatelessCartPole(config))
diff --git a/rllib/examples/multi_agent_independent_learning.py b/rllib/examples/multi_agent_independent_learning.py
index 62ce00f41..a929ea2b2 100644
--- a/rllib/examples/multi_agent_independent_learning.py
+++ b/rllib/examples/multi_agent_independent_learning.py
@@ -1,6 +1,6 @@
 from ray import tune
 from ray.tune.registry import register_env
-from ray.rllib.env.pettingzoo_env import PettingZooEnv
+from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
 from pettingzoo.sisl import waterworld_v2
 
 # Based on code from github.com/parametersharingmadrl/parametersharingmadrl
diff --git a/rllib/examples/multi_agent_parameter_sharing.py b/rllib/examples/multi_agent_parameter_sharing.py
index e53bc45ec..e49d84d11 100644
--- a/rllib/examples/multi_agent_parameter_sharing.py
+++ b/rllib/examples/multi_agent_parameter_sharing.py
@@ -1,6 +1,6 @@
 from ray import tune
 from ray.tune.registry import register_env
-from ray.rllib.env.pettingzoo_env import PettingZooEnv
+from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
 from pettingzoo.sisl import waterworld_v0
 
 # Based on code from github.com/parametersharingmadrl/parametersharingmadrl
diff --git a/rllib/examples/serving/unity3d_client.py b/rllib/examples/serving/unity3d_client.py
index 4f3784aa7..8c8784ebf 100644
--- a/rllib/examples/serving/unity3d_client.py
+++ b/rllib/examples/serving/unity3d_client.py
@@ -31,7 +31,7 @@ $ python unity3d_client.py --inference-mode=local --game [path to game binary]
 import argparse
 
 from ray.rllib.env.policy_client import PolicyClient
-from ray.rllib.env.unity3d_env import Unity3DEnv
+from ray.rllib.env.wrappers.unity3d_env import Unity3DEnv
 
 SERVER_ADDRESS = "localhost"
 SERVER_PORT = 9900
diff --git a/rllib/examples/serving/unity3d_server.py b/rllib/examples/serving/unity3d_server.py
index 0a39d3e6b..56c1a0089 100755
--- a/rllib/examples/serving/unity3d_server.py
+++ b/rllib/examples/serving/unity3d_server.py
@@ -34,7 +34,7 @@ import ray
 from ray.tune import register_env
 from ray.rllib.agents.ppo import PPOTrainer
 from ray.rllib.env.policy_server_input import PolicyServerInput
-from ray.rllib.env.unity3d_env import Unity3DEnv
+from ray.rllib.env.wrappers.unity3d_env import Unity3DEnv
 from ray.rllib.examples.env.random_env import RandomMultiAgentEnv
 
 SERVER_ADDRESS = "localhost"
diff --git a/rllib/examples/unity3d_env_local.py b/rllib/examples/unity3d_env_local.py
index 6ffcc8d2b..7dea67ad6 100644
--- a/rllib/examples/unity3d_env_local.py
+++ b/rllib/examples/unity3d_env_local.py
@@ -25,7 +25,7 @@ import os
 
 import ray
 from ray import tune
-from ray.rllib.env.unity3d_env import Unity3DEnv
+from ray.rllib.env.wrappers.unity3d_env import Unity3DEnv
 from ray.rllib.utils.test_utils import check_learning_achieved
 
 parser = argparse.ArgumentParser()
diff --git a/rllib/tuned_examples/ppo/unity3d-soccer-strikers-vs-goalie-ppo.yaml b/rllib/tuned_examples/ppo/unity3d-soccer-strikers-vs-goalie-ppo.yaml
index 5a089f5a6..195bf6340 100644
--- a/rllib/tuned_examples/ppo/unity3d-soccer-strikers-vs-goalie-ppo.yaml
+++ b/rllib/tuned_examples/ppo/unity3d-soccer-strikers-vs-goalie-ppo.yaml
@@ -8,7 +8,7 @@
 # goalie's reward (-1 if goal) across all within-scene parallelized playing
 # fields (8 fields with each 2 strikers + 1 goalie, for the soccer env).
 unity3d-soccer-strikers-vs-goalie-ppo:
-    env: ray.rllib.env.unity3d_env.Unity3DEnv
+    env: ray.rllib.env.wrappers.unity3d_env.Unity3DEnv
     run: PPO
     stop:
         timesteps_total: 1000000