diff --git a/docker/examples/Dockerfile b/docker/examples/Dockerfile
index ae69f1d89..c9a964d8a 100644
--- a/docker/examples/Dockerfile
+++ b/docker/examples/Dockerfile
@@ -3,5 +3,5 @@
 FROM ray-project/deploy
 RUN conda install -y -c conda-forge tensorflow
 RUN apt-get install -y zlib1g-dev
-RUN pip install gym[atari]==0.9.5 opencv-python==3.2.0.8 smart_open
+RUN pip install gym[atari] opencv-python==3.2.0.8 smart_open
 # RUN conda install -y -q pytorch torchvision -c soumith
diff --git a/examples/carla/env.py b/examples/carla/env.py
index 94cacee75..c88a71b28 100644
--- a/examples/carla/env.py
+++ b/examples/carla/env.py
@@ -143,21 +143,21 @@ class CarlaEnv(gym.Env):
         if config["discrete_actions"]:
             self.action_space = Discrete(len(DISCRETE_ACTIONS))
         else:
-            self.action_space = Box(-1.0, 1.0, shape=(2,))
+            self.action_space = Box(-1.0, 1.0, shape=(2,), dtype=np.float32)
         if config["use_depth_camera"]:
             image_space = Box(
                 -1.0, 1.0, shape=(
                     config["y_res"], config["x_res"],
-                    1 * config["framestack"]))
+                    1 * config["framestack"]), dtype=np.float32)
         else:
             image_space = Box(
-                0.0, 255.0, shape=(
+                0, 255, shape=(
                     config["y_res"], config["x_res"],
-                    3 * config["framestack"]))
-        self.observation_space = Tuple(
+                    3 * config["framestack"]), dtype=np.uint8)
+        self.observation_space = Tuple(  # forward_speed, dist to goal
             [image_space,
              Discrete(len(COMMANDS_ENUM)),  # next_command
-             Box(-128.0, 128.0, shape=(2,))])  # forward_speed, dist to goal
+             Box(-128.0, 128.0, shape=(2,), dtype=np.float32)])
 
         # TODO(ekl) this isn't really a proper gym spec
         self._spec = lambda: None
diff --git a/examples/custom_env/custom_env.py b/examples/custom_env/custom_env.py
index e20e88671..87aa757db 100644
--- a/examples/custom_env/custom_env.py
+++ b/examples/custom_env/custom_env.py
@@ -4,6 +4,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import numpy as np
 import gym
 from gym.spaces import Discrete, Box
 from gym.envs.registration import EnvSpec
@@ -22,7 +23,8 @@ class SimpleCorridor(gym.Env):
         self.end_pos = config["corridor_length"]
         self.cur_pos = 0
         self.action_space = Discrete(2)
-        self.observation_space = Box(0.0, self.end_pos, shape=(1,))
+        self.observation_space = Box(
+            0.0, self.end_pos, shape=(1,), dtype=np.float32)
         self._spec = EnvSpec("SimpleCorridor-{}-v0".format(self.end_pos))
 
     def _reset(self):
diff --git a/python/ray/rllib/dqn/common/wrappers.py b/python/ray/rllib/dqn/common/wrappers.py
index e5bed0241..9ac859952 100644
--- a/python/ray/rllib/dqn/common/wrappers.py
+++ b/python/ray/rllib/dqn/common/wrappers.py
@@ -22,7 +22,7 @@ class NoopResetEnv(gym.Wrapper):
         self.override_num_noops = None
         assert env.unwrapped.get_action_meanings()[0] == 'NOOP'
 
-    def _reset(self):
+    def reset(self):
         """ Do no-op action for a number of steps in [1, noop_max]."""
         self.env.reset()
         if self.override_num_noops is not None:
@@ -46,7 +46,7 @@ class FireResetEnv(gym.Wrapper):
         assert env.unwrapped.get_action_meanings()[1] == 'FIRE'
         assert len(env.unwrapped.get_action_meanings()) >= 3
 
-    def _reset(self):
+    def reset(self):
         self.env.reset()
         obs, _, done, _ = self.env.step(1)
         if done:
@@ -68,7 +68,7 @@ class EpisodicLifeEnv(gym.Wrapper):
         self.was_real_done = True
         self.was_real_reset = False
 
-    def _step(self, action):
+    def step(self, action):
         obs, reward, done, info = self.env.step(action)
         self.was_real_done = done
         # check current lives, make loss of life terminal,
@@ -82,7 +82,7 @@ class EpisodicLifeEnv(gym.Wrapper):
         self.lives = lives
         return obs, reward, done, info
 
-    def _reset(self):
+    def reset(self):
         """Reset only when lives are exhausted.
         This way all states are still reachable even though lives are episodic,
         and the learner need not know about any of this behind-the-scenes.
@@ -106,7 +106,7 @@ class MaxAndSkipEnv(gym.Wrapper):
         self._obs_buffer = deque(maxlen=2)
         self._skip = skip
 
-    def _step(self, action):
+    def step(self, action):
         total_reward = 0.0
         done = None
         for _ in range(self._skip):
@@ -120,7 +120,7 @@ class MaxAndSkipEnv(gym.Wrapper):
 
         return max_frame, total_reward, done, info
 
-    def _reset(self):
+    def reset(self):
         """Clear past frame buffer and init. to first obs. from inner env."""
         self._obs_buffer.clear()
         obs = self.env.reset()
@@ -132,9 +132,10 @@ class MaxAndSkipEnv(gym.Wrapper):
 class ProcessFrame80(gym.ObservationWrapper):
     def __init__(self, env=None):
         super(ProcessFrame80, self).__init__(env)
-        self.observation_space = spaces.Box(low=0, high=255, shape=(80, 80, 1))
+        self.observation_space = spaces.Box(
+            low=0, high=255, shape=(80, 80, 1), dtype=np.uint8)
 
-    def _observation(self, obs):
+    def observation(self, obs):
         return ProcessFrame80.process(obs)
 
     @staticmethod
@@ -155,7 +156,7 @@ class ProcessFrame80(gym.ObservationWrapper):
 
 
 class ClippedRewardsWrapper(gym.RewardWrapper):
-    def _reward(self, reward):
+    def reward(self, reward):
         """Change all the positive rewards to 1, negative to -1 and keep
         zero."""
         return np.sign(reward)
@@ -195,15 +196,16 @@ class FrameStack(gym.Wrapper):
         self.frames = deque([], maxlen=k)
         shp = env.observation_space.shape
         self.observation_space = spaces.Box(
-            low=0, high=255, shape=(shp[0], shp[1], shp[2] * k))
+            low=0, high=255, shape=(shp[0], shp[1], shp[2] * k),
+            dtype=np.uint8)
 
-    def _reset(self):
+    def reset(self):
         ob = self.env.reset()
         for _ in range(self.k):
             self.frames.append(ob)
         return self._get_ob()
 
-    def _step(self, action):
+    def step(self, action):
         ob, reward, done, info = self.env.step(action)
         self.frames.append(ob)
         return self._get_ob(), reward, done, info
diff --git a/python/ray/rllib/es/policies.py b/python/ray/rllib/es/policies.py
index 57c74befc..36a404c48 100644
--- a/python/ray/rllib/es/policies.py
+++ b/python/ray/rllib/es/policies.py
@@ -20,8 +20,7 @@ def rollout(policy, env, timestep_limit=None, add_noise=False):
     If add_noise is True, the rollout will take noisy actions with
     noise drawn from that stream. Otherwise, no action noise will be added.
     """
-    env_timestep_limit = env.spec.tags.get("wrapper_config.TimeLimit"
-                                           ".max_episode_steps")
+    env_timestep_limit = env.spec.max_episode_steps
     timestep_limit = (env_timestep_limit if timestep_limit is None
                       else min(timestep_limit, env_timestep_limit))
     rews = []
diff --git a/python/ray/rllib/examples/multiagent_mountaincar_env.py b/python/ray/rllib/examples/multiagent_mountaincar_env.py
index d50302eea..d454937ac 100644
--- a/python/ray/rllib/examples/multiagent_mountaincar_env.py
+++ b/python/ray/rllib/examples/multiagent_mountaincar_env.py
@@ -1,4 +1,4 @@
-import math
+from math import cos
 from gym.spaces import Box, Tuple, Discrete
 import numpy as np
 from gym.envs.classic_control.mountain_car import MountainCarEnv
@@ -23,17 +23,17 @@ class MultiAgentMountainCarEnv(MountainCarEnv):
 
         self.action_space = [Discrete(3) for _ in range(2)]
         self.observation_space = Tuple([
-            Box(self.low, self.high) for _ in range(2)])
+            Box(self.low, self.high, dtype=np.float32) for _ in range(2)])
 
-        self._seed()
+        self.seed()
         self.reset()
 
-    def _step(self, action):
+    def step(self, action):
         summed_act = 0.5 * np.sum(action)
 
         position, velocity = self.state
         velocity += (summed_act - 1) * 0.001
-        velocity += math.cos(3 * position) * (-0.0025)
+        velocity += cos(3 * position) * (-0.0025)
         velocity = np.clip(velocity, -self.max_speed, self.max_speed)
         position += velocity
         position = np.clip(position, self.min_position, self.max_position)
@@ -47,6 +47,6 @@ class MultiAgentMountainCarEnv(MountainCarEnv):
         self.state = (position, velocity)
         return [np.array(self.state) for _ in range(2)], reward, done, {}
 
-    def _reset(self):
+    def reset(self):
         self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
         return [np.array(self.state) for _ in range(2)]
diff --git a/python/ray/rllib/examples/multiagent_pendulum_env.py b/python/ray/rllib/examples/multiagent_pendulum_env.py
index b2095e625..44c86f4e6 100644
--- a/python/ray/rllib/examples/multiagent_pendulum_env.py
+++ b/python/ray/rllib/examples/multiagent_pendulum_env.py
@@ -22,18 +22,20 @@ class MultiAgentPendulumEnv(PendulumEnv):
 
         high = np.array([1., 1., self.max_speed])
         self.action_space = [Box(low=-self.max_torque / 2,
-                                 high=self.max_torque / 2, shape=(1,))
+                                 high=self.max_torque / 2,
+                                 shape=(1,),
+                                 dtype=np.float32)
                              for _ in range(2)]
         self.observation_space = Tuple([
-            Box(low=-high, high=high) for _ in range(2)])
+            Box(low=-high, high=high, dtype=np.float32) for _ in range(2)])
 
-        self._seed()
+        self.seed()
 
-    def _seed(self, seed=None):
+    def seed(self, seed=None):
         self.np_random, seed = seeding.np_random(seed)
         return [seed]
 
-    def _step(self, u):
+    def step(self, u):
         th, thdot = self.state  # th := theta
 
         summed_u = np.sum(u)
@@ -55,7 +57,7 @@ class MultiAgentPendulumEnv(PendulumEnv):
         self.state = np.array([newth, newthdot])
         return self._get_obs(), -costs, False, {}
 
-    def _reset(self):
+    def reset(self):
         high = np.array([np.pi, 1])
         self.state = self.np_random.uniform(low=-high, high=high)
         self.last_u = None
diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py
index 4844bbb98..8a423d309 100644
--- a/python/ray/rllib/models/catalog.py
+++ b/python/ray/rllib/models/catalog.py
@@ -266,7 +266,8 @@ class _RLlibPreprocessorWrapper(gym.ObservationWrapper):
         self.preprocessor = preprocessor
 
         from gym.spaces.box import Box
-        self.observation_space = Box(-1.0, 1.0, preprocessor.shape)
+        self.observation_space = Box(
+            -1.0, 1.0, preprocessor.shape, dtype=np.float32)
 
-    def _observation(self, observation):
+    def observation(self, observation):
         return self.preprocessor.transform(observation)
diff --git a/python/ray/rllib/test/test_catalog.py b/python/ray/rllib/test/test_catalog.py
index 5f3ac01f8..c5e503b71 100644
--- a/python/ray/rllib/test/test_catalog.py
+++ b/python/ray/rllib/test/test_catalog.py
@@ -47,7 +47,7 @@ class ModelCatalogTest(unittest.TestCase):
         class TupleEnv(object):
             def __init__(self):
                 self.observation_space = Tuple(
-                    [Discrete(5), Box(0, 1, shape=(3,))])
+                    [Discrete(5), Box(0, 1, shape=(3,), dtype=np.float32)])
         p1 = ModelCatalog.get_preprocessor(
             get_registry(), TupleEnv())
         self.assertEqual(p1.shape, (8,))
diff --git a/python/ray/rllib/test/test_supported_spaces.py b/python/ray/rllib/test/test_supported_spaces.py
index 109b585f8..bb8c5ecdc 100644
--- a/python/ray/rllib/test/test_supported_spaces.py
+++ b/python/ray/rllib/test/test_supported_spaces.py
@@ -4,6 +4,7 @@ import traceback
 import gym
 from gym.spaces import Box, Discrete, Tuple
 from gym.envs.registration import EnvSpec
+import numpy as np
 
 import ray
 from ray.rllib.agent import get_agent_class
@@ -12,19 +13,27 @@ from ray.tune.registry import register_env
 
 ACTION_SPACES_TO_TEST = {
     "discrete": Discrete(5),
-    "vector": Box(0.0, 1.0, (5,)),
-    "simple_tuple": Tuple([Box(0.0, 1.0, (5,)), Box(0.0, 1.0, (5,))]),
-    "implicit_tuple": [Box(0.0, 1.0, (5,)), Box(0.0, 1.0, (5,))],
+    "vector": Box(0.0, 1.0, (5,), dtype=np.float32),
+    "simple_tuple": Tuple([
+        Box(0.0, 1.0, (5,), dtype=np.float32),
+        Box(0.0, 1.0, (5,), dtype=np.float32)]),
+    "implicit_tuple": [
+        Box(0.0, 1.0, (5,), dtype=np.float32),
+        Box(0.0, 1.0, (5,), dtype=np.float32)],
 }
 
 OBSERVATION_SPACES_TO_TEST = {
     "discrete": Discrete(5),
-    "vector": Box(0.0, 1.0, (5,)),
-    "image": Box(0.0, 1.0, (80, 80, 1)),
-    "atari": Box(0.0, 1.0, (210, 160, 3)),
-    "atari_ram": Box(0.0, 1.0, (128,)),
-    "simple_tuple": Tuple([Box(0.0, 1.0, (5,)), Box(0.0, 1.0, (5,))]),
-    "mixed_tuple": Tuple([Discrete(10), Box(0.0, 1.0, (5,))]),
+    "vector": Box(0.0, 1.0, (5,), dtype=np.float32),
+    "image": Box(0.0, 1.0, (80, 80, 1), dtype=np.float32),
+    "atari": Box(0.0, 1.0, (210, 160, 3), dtype=np.float32),
+    "atari_ram": Box(0.0, 1.0, (128,), dtype=np.float32),
+    "simple_tuple": Tuple([
+        Box(0.0, 1.0, (5,), dtype=np.float32),
+        Box(0.0, 1.0, (5,), dtype=np.float32)]),
+    "mixed_tuple": Tuple([
+        Discrete(10),
+        Box(0.0, 1.0, (5,), dtype=np.float32)]),
 }
 
 # (alg, action_space, obs_space)
@@ -59,7 +68,7 @@ def make_stub_env(action_space, obs_space):
         def __init__(self):
             self.action_space = action_space
             self.observation_space = obs_space
-            self._spec = EnvSpec("StubEnv-v0")
+            self.spec = EnvSpec("StubEnv-v0")
 
         def reset(self):
             sample = self.observation_space.sample()
diff --git a/python/ray/rllib/utils/sampler.py b/python/ray/rllib/utils/sampler.py
index f62978a95..86be66106 100644
--- a/python/ray/rllib/utils/sampler.py
+++ b/python/ray/rllib/utils/sampler.py
@@ -196,8 +196,7 @@ def _env_runner(env, policy, num_local_steps, horizon, obs_filter):
     """
     last_observation = obs_filter(env.reset())
     try:
-        horizon = horizon if horizon else env.spec.tags.get(
-            "wrapper_config.TimeLimit.max_episode_steps")
+        horizon = horizon if horizon else env.spec.max_episode_steps
     except Exception:
         print("Warning, no horizon specified, assuming infinite")
     if not horizon: