diff --git a/rllib/examples/env/random_env.py b/rllib/examples/env/random_env.py
index af41ce183..6167ec882 100644
--- a/rllib/examples/env/random_env.py
+++ b/rllib/examples/env/random_env.py
@@ -25,10 +25,15 @@ class RandomEnv(gym.Env):
             gym.spaces.Box(low=-1.0, high=1.0, shape=(), dtype=np.float32))
         # Chance that an episode ends at any step.
         self.p_done = config.get("p_done", 0.1)
+        # A max episode length.
+        self.max_episode_len = config.get("max_episode_len", None)
         # Whether to check action bounds.
         self.check_action_bounds = config.get("check_action_bounds", False)
+        # Steps taken so far (after last reset).
+        self.steps = 0
 
     def reset(self):
+        self.steps = 0
         return self.observation_space.sample()
 
     def step(self, action):
@@ -40,11 +45,19 @@ class RandomEnv(gym.Env):
             raise ValueError("Illegal action for {}: {}".format(
                 self.action_space, action))
 
-        return self.observation_space.sample(), \
-            float(self.reward_space.sample()), \
-            bool(np.random.choice(
+        self.steps += 1
+        # We are done as per our max-episode-len.
+        if self.max_episode_len is not None and \
+                self.steps >= self.max_episode_len:
+            done = True
+        # Max not reached yet -> Sample done via p_done.
+        else:
+            done = bool(np.random.choice(
                 [True, False], p=[self.p_done, 1.0 - self.p_done]
-            )), {}
+            ))
+
+        return self.observation_space.sample(), \
+            float(self.reward_space.sample()), done, {}
 
 
 # Multi-agent version of the RandomEnv.
diff --git a/rllib/examples/policy/random_policy.py b/rllib/examples/policy/random_policy.py
index 4cd2d0206..6081768d4 100644
--- a/rllib/examples/policy/random_policy.py
+++ b/rllib/examples/policy/random_policy.py
@@ -1,3 +1,4 @@
+from gym.spaces import Box
 import numpy as np
 import random
 
@@ -11,6 +12,17 @@ class RandomPolicy(Policy):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
+        # Whether for compute_actions, the bounds given in action_space
+        # should be ignored (default: False). This is to test action-clipping
+        # and any Env's reaction to bounds breaches.
+        if self.config.get("ignore_action_bounds", False) and \
+                isinstance(self.action_space, Box):
+            self.action_space_for_sampling = Box(
+                -float("inf"), float("inf"),
+                shape=self.action_space.shape, dtype=self.action_space.dtype)
+        else:
+            self.action_space_for_sampling = self.action_space
+
     @override(Policy)
     def compute_actions(self,
                         obs_batch,
@@ -20,7 +32,8 @@ class RandomPolicy(Policy):
                         **kwargs):
         # Alternatively, a numpy array would work here as well.
         # e.g.: np.array([random.choice([0, 1])] * len(obs_batch))
-        return [self.action_space.sample() for _ in obs_batch], [], {}
+        return [self.action_space_for_sampling.sample() for _ in obs_batch], \
+               [], {}
 
     @override(Policy)
     def learn_on_batch(self, samples):
diff --git a/rllib/tests/test_rollout_worker.py b/rllib/tests/test_rollout_worker.py
index ccd2280f3..038d47e22 100644
--- a/rllib/tests/test_rollout_worker.py
+++ b/rllib/tests/test_rollout_worker.py
@@ -14,12 +14,15 @@ from ray.rllib.evaluation.rollout_worker import RolloutWorker
 from ray.rllib.evaluation.metrics import collect_metrics
 from ray.rllib.evaluation.postprocessing import compute_advantages
 from ray.rllib.examples.policy.random_policy import RandomPolicy
+from ray.rllib.policy.policy import Policy
 from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID, SampleBatch
+from ray.rllib.utils.annotations import override
 from ray.rllib.utils.test_utils import check, framework_iterator
 from ray.tune.registry import register_env
 
 
 class MockPolicy(RandomPolicy):
+    @override(RandomPolicy)
     def compute_actions(self,
                         obs_batch,
                         state_batches=None,
@@ -31,16 +34,19 @@ class MockPolicy(RandomPolicy):
                         **kwargs):
         return np.array([random.choice([0, 1])] * len(obs_batch)), [], {}
 
+    @override(Policy)
     def postprocess_trajectory(self,
                                batch,
                                other_agent_batches=None,
                                episode=None):
         assert episode is not None
+        super().postprocess_trajectory(batch, other_agent_batches, episode)
         return compute_advantages(
             batch, 100.0, 0.9, use_gae=False, use_critic=False)
 
 
 class BadPolicy(RandomPolicy):
+    @override(RandomPolicy)
     def compute_actions(self,
                         obs_batch,
                         state_batches=None,
@@ -106,12 +112,15 @@ class MockVectorEnv(VectorEnv):
             num_envs=num_envs)
         self.envs = [MockEnv(episode_length) for _ in range(num_envs)]
 
+    @override(VectorEnv)
     def vector_reset(self):
         return [e.reset() for e in self.envs]
 
+    @override(VectorEnv)
     def reset_at(self, index):
         return self.envs[index].reset()
 
+    @override(VectorEnv)
     def vector_step(self, actions):
         obs_batch, rew_batch, done_batch, info_batch = [], [], [], []
         for i in range(len(self.envs)):
@@ -122,6 +131,7 @@ class MockVectorEnv(VectorEnv):
             info_batch.append(info)
         return obs_batch, rew_batch, done_batch, info_batch
 
+    @override(VectorEnv)
     def get_unwrapped(self):
         return self.envs
 
@@ -266,8 +276,73 @@ class TestRolloutWorker(unittest.TestCase):
             self.assertEqual(results3, [[1, 1], [1, 1], [1, 1]])
             pg.stop()
 
+    def test_action_clipping(self):
+        from ray.rllib.examples.env.random_env import RandomEnv
+        action_space = gym.spaces.Box(-2.0, 1.0, (3,))
+
+        # Clipping: True (clip between Policy's action_space.low/high),
+        ev = RolloutWorker(
+            env_creator=lambda _: RandomEnv(config=dict(
+                action_space=action_space,
+                max_episode_len=10,
+                p_done=0.0,
+                check_action_bounds=True,
+            )),
+            policy=RandomPolicy,
+            policy_config=dict(
+                action_space=action_space,
+                ignore_action_bounds=True,
+            ),
+            clip_actions=True,
+            batch_mode="complete_episodes")
+        sample = ev.sample()
+        # Check, whether the action bounds have been breached (expected).
+        # We still arrived here b/c we clipped according to the Env's action
+        # space.
+        self.assertGreater(np.max(sample["actions"]), action_space.high[0])
+        self.assertLess(np.min(sample["actions"]), action_space.low[0])
+        ev.stop()
+
+        # Clipping: False and RandomPolicy produces invalid actions.
+        # Expect Env to complain.
+        ev2 = RolloutWorker(
+            env_creator=lambda _: RandomEnv(config=dict(
+                action_space=action_space,
+                max_episode_len=10,
+                p_done=0.0,
+                check_action_bounds=True,
+            )),
+            policy=RandomPolicy,
+            policy_config=dict(
+                action_space=action_space,
+                ignore_action_bounds=True,
+            ),
+            clip_actions=False,  # <- should lead to Env complaining
+            batch_mode="complete_episodes")
+        self.assertRaisesRegex(ValueError, r"Illegal action", ev2.sample)
+        ev2.stop()
+
+        # Clipping: False and RandomPolicy produces valid (bounded) actions.
+        # Expect "actions" in SampleBatch to be unclipped.
+        ev3 = RolloutWorker(
+            env_creator=lambda _: RandomEnv(config=dict(
+                action_space=action_space,
+                max_episode_len=10,
+                p_done=0.0,
+                check_action_bounds=True,
+            )),
+            policy=RandomPolicy,
+            policy_config=dict(action_space=action_space),
+            # Should not be a problem as RandomPolicy abides to bounds.
+            clip_actions=False,
+            batch_mode="complete_episodes")
+        sample = ev3.sample()
+        self.assertGreater(np.min(sample["actions"]), action_space.low[0])
+        self.assertLess(np.max(sample["actions"]), action_space.high[0])
+        ev3.stop()
+
     def test_reward_clipping(self):
-        # Clipping: on.
+        # Clipping: True (clip between -1.0 and 1.0).
         ev = RolloutWorker(
             env_creator=lambda _: MockEnv2(episode_length=10),
             policy=MockPolicy,
@@ -278,7 +353,27 @@ class TestRolloutWorker(unittest.TestCase):
         self.assertEqual(result["episode_reward_mean"], 1000)
         ev.stop()
 
-        # Clipping: off.
+        from ray.rllib.examples.env.random_env import RandomEnv
+
+        # Clipping in certain range (-2.0, 2.0).
+        ev2 = RolloutWorker(
+            env_creator=lambda _: RandomEnv(
+                dict(
+                    reward_space=gym.spaces.Box(low=-10, high=10, shape=()),
+                    p_done=0.0,
+                    max_episode_len=10,
+                )),
+            policy=MockPolicy,
+            clip_rewards=2.0,
+            batch_mode="complete_episodes")
+        sample = ev2.sample()
+        self.assertEqual(max(sample["rewards"]), 2.0)
+        self.assertEqual(min(sample["rewards"]), -2.0)
+        self.assertLess(np.mean(sample["rewards"]), 0.5)
+        self.assertGreater(np.mean(sample["rewards"]), -0.5)
+        ev2.stop()
+
+        # Clipping: Off.
         ev2 = RolloutWorker(
             env_creator=lambda _: MockEnv2(episode_length=10),
             policy=MockPolicy,