diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst
index bef9d5248..c10b5cc33 100644
--- a/doc/source/rllib.rst
+++ b/doc/source/rllib.rst
@@ -157,7 +157,7 @@ can register a function that creates the env to refer to it by name. For example
     from ray.tune.registry import register_env
     from ray.rllib import ppo
 
-    env_creator = lambda: create_my_env()
+    env_creator = lambda env_config: create_my_env()
     env_creator_name = "custom_env"
     register_env(env_creator_name, env_creator)
 
diff --git a/examples/carla/README b/examples/carla/README
index 98a821b97..a066b048a 100644
--- a/examples/carla/README
+++ b/examples/carla/README
@@ -1,12 +1,14 @@
-(Experimental) gym environment for https://github.com/carla-simulator/carla
+(Experimental) OpenAI gym environment for https://github.com/carla-simulator/carla
 
-To run, first download and unpack the Carla release from this URL: https://github.com/carla-simulator/carla/releases/tag/0.7.0
+To run, first download and unpack the Carla binaries from this URL: https://github.com/carla-simulator/carla/releases/tag/0.7.0
 
-Then, you can try running env.py to drive the car. Run train_ppo.py or train_dqn.py to attempt training.
+Note that currently you also need to clone the Python code from `carla/benchmark_branch` which includes the Carla planner.
+
+Then, you can try running env.py to drive the car. Run one of the train_* scripts to attempt training.
 
     $ pkill -9 Carla
-    $ export PYTHONPATH=/home/ubuntu/CARLA_0.7.0/PythonClient:$PYTHONPATH
-    $ export CARLA_SERVER=/home/ubuntu/CARLA_0.7.0/CarlaUE4.sh
+    $ export CARLA_SERVER=/PATH/TO/CARLA_0.7.0/CarlaUE4.sh
+    $ export CARLA_PY_PATH=/PATH/TO/CARLA_BENCHMARK_BRANCH_REPO/PythonClient
     $ python env.py
 
-Note that the reward function is currently hard-coded to drive straight down the street.
+Check out the scenarios.py file for different training and test scenarios that can be used.
diff --git a/examples/carla/a3c_lane_keep.py b/examples/carla/a3c_lane_keep.py
new file mode 100644
index 000000000..cbe453d32
--- /dev/null
+++ b/examples/carla/a3c_lane_keep.py
@@ -0,0 +1,50 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from ray.tune import register_env, run_experiments
+
+from env import CarlaEnv, ENV_CONFIG
+from models import register_carla_model
+from scenarios import LANE_KEEP
+
+env_name = "carla_env"
+env_config = ENV_CONFIG.copy()
+env_config.update({
+    "verbose": False,
+    "x_res": 80,
+    "y_res": 80,
+    "use_depth_camera": False,
+    "discrete_actions": False,
+    "server_map": "/Game/Maps/Town02",
+    "reward_function": "lane_keep",
+    "enable_planner": False,
+    "scenarios": [LANE_KEEP],
+})
+
+register_env(env_name, lambda env_config: CarlaEnv(env_config))
+register_carla_model()
+
+run_experiments({
+    "carla-a3c": {
+        "run": "A3C",
+        "env": "carla_env",
+        "resources": {"cpu": 4, "gpu": 1},
+        "config": {
+            "env_config": env_config,
+            "model": {
+                "custom_model": "carla",
+                "custom_options": {
+                    "image_shape": [80, 80, 6],
+                },
+                "conv_filters": [
+                    [16, [8, 8], 4],
+                    [32, [4, 4], 2],
+                    [512, [10, 10], 1],
+                ],
+            },
+            "gamma": 0.8,
+            "num_workers": 1,
+        },
+    },
+})
diff --git a/examples/carla/dqn_lane_keep.py b/examples/carla/dqn_lane_keep.py
new file mode 100644
index 000000000..0adb8c309
--- /dev/null
+++ b/examples/carla/dqn_lane_keep.py
@@ -0,0 +1,55 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from ray.tune import register_env, run_experiments
+
+from env import CarlaEnv, ENV_CONFIG
+from models import register_carla_model
+from scenarios import LANE_KEEP
+
+env_name = "carla_env"
+env_config = ENV_CONFIG.copy()
+env_config.update({
+    "verbose": False,
+    "x_res": 80,
+    "y_res": 80,
+    "use_depth_camera": False,
+    "discrete_actions": True,
+    "server_map": "/Game/Maps/Town02",
+    "reward_function": "lane_keep",
+    "enable_planner": False,
+    "scenarios": [LANE_KEEP],
+})
+
+register_env(env_name, lambda env_config: CarlaEnv(env_config))
+register_carla_model()
+
+run_experiments({
+    "carla-dqn": {
+        "run": "DQN",
+        "env": "carla_env",
+        "resources": {"cpu": 4, "gpu": 1},
+        "config": {
+            "env_config": env_config,
+            "model": {
+                "custom_model": "carla",
+                "custom_options": {
+                    "image_shape": [80, 80, 6],
+                },
+                "conv_filters": [
+                    [16, [8, 8], 4],
+                    [32, [4, 4], 2],
+                    [512, [10, 10], 1],
+                ],
+            },
+            "timesteps_per_iteration": 100,
+            "learning_starts": 1000,
+            "schedule_max_timesteps": 100000,
+            "gamma": 0.8,
+            "tf_session_args": {
+              "gpu_options": {"allow_growth": True},
+            },
+        },
+    },
+})
diff --git a/examples/carla/env.py b/examples/carla/env.py
index ee8f9fd4c..94cacee75 100644
--- a/examples/carla/env.py
+++ b/examples/carla/env.py
@@ -1,14 +1,18 @@
+"""OpenAI gym environment for Carla. Run this file for a demo."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
 from datetime import datetime
+import atexit
 import cv2
 import os
 import json
 import random
 import signal
 import subprocess
+import sys
 import time
 import traceback
 
@@ -18,13 +22,10 @@ try:
 except Exception:
     pass
 
-from carla.client import CarlaClient
-from carla.sensor import Camera
-from carla.settings import CarlaSettings
-
 import gym
-from gym.spaces import Box, Discrete
+from gym.spaces import Box, Discrete, Tuple
 
+from scenarios import DEFAULT_SCENARIO
 
 # Set this where you want to save image outputs (or empty string to disable)
 CARLA_OUT_PATH = os.environ.get("CARLA_OUT", os.path.expanduser("~/carla_out"))
@@ -33,48 +34,132 @@ if CARLA_OUT_PATH and not os.path.exists(CARLA_OUT_PATH):
 
 # Set this to the path of your Carla binary
 SERVER_BINARY = os.environ.get(
-    "CARLA_SERVER", "/home/ubuntu/carla-0.7/CarlaUE4.sh")
+    "CARLA_SERVER", os.path.expanduser("~/CARLA_0.7.0/CarlaUE4.sh"))
+
+assert os.path.exists(SERVER_BINARY)
+if "CARLA_PY_PATH" in os.environ:
+    sys.path.append(os.path.expanduser(os.environ["CARLA_PY_PATH"]))
+else:
+    # TODO(ekl) switch this to the binary path once the planner is in master
+    sys.path.append(os.path.expanduser("~/carla/PythonClient/"))
+
+try:
+    from carla.client import CarlaClient
+    from carla.sensor import Camera
+    from carla.settings import CarlaSettings
+    from carla.planner.planner import Planner, REACH_GOAL, GO_STRAIGHT, \
+        TURN_RIGHT, TURN_LEFT, LANE_FOLLOW
+except Exception as e:
+    print("Failed to import Carla python libs, try setting $CARLA_PY_PATH")
+    raise e
+
+# Carla planner commands
+COMMANDS_ENUM = {
+    REACH_GOAL: "REACH_GOAL",
+    GO_STRAIGHT: "GO_STRAIGHT",
+    TURN_RIGHT: "TURN_RIGHT",
+    TURN_LEFT: "TURN_LEFT",
+    LANE_FOLLOW: "LANE_FOLLOW",
+}
+
+# Mapping from string repr to one-hot encoding index to feed to the model
+COMMAND_ORDINAL = {
+    "REACH_GOAL": 0,
+    "GO_STRAIGHT": 1,
+    "TURN_RIGHT": 2,
+    "TURN_LEFT": 3,
+    "LANE_FOLLOW": 4,
+}
 
 # Number of retries if the server doesn't respond
 RETRIES_ON_ERROR = 5
 
+# Dummy Z coordinate to use when we only care about (x, y)
+GROUND_Z = 22
+
 # Default environment configuration
 ENV_CONFIG = {
+    "log_images": True,
+    "enable_planner": True,
+    "framestack": 2,  # note: only [1, 2] currently supported
+    "convert_images_to_video": True,
+    "early_terminate_on_collision": True,
     "verbose": True,
-    "render_x_res": 400,
-    "render_y_res": 300,
+    "reward_function": "custom",
+    "render_x_res": 800,
+    "render_y_res": 600,
     "x_res": 80,
     "y_res": 80,
-    "map": "/Game/Maps/Town02",
-    "random_starting_location": False,
+    "server_map": "/Game/Maps/Town02",
+    "scenarios": [DEFAULT_SCENARIO],
     "use_depth_camera": False,
-    "discrete_actions": False,
-    "max_steps": 50,
-    "num_vehicles": 20,
-    "num_pedestrians": 40,
-    "weather": [1],  # [1, 3, 7, 8, 14]
-
-    # Defaults to driving down the road /Game/Maps/Town02, start pos 0
-    "target_x": -7.5,
-    "target_y": 120,
+    "discrete_actions": True,
+    "squash_action_logits": False,
 }
 
 
+DISCRETE_ACTIONS = {
+    # coast
+    0: [0.0, 0.0],
+    # turn left
+    1: [0.0, -0.5],
+    # turn right
+    2: [0.0, 0.5],
+    # forward
+    3: [1.0, 0.0],
+    # brake
+    4: [-0.5, 0.0],
+    # forward left
+    5: [1.0, -0.5],
+    # forward right
+    6: [1.0, 0.5],
+    # brake left
+    7: [-0.5, -0.5],
+    # brake right
+    8: [-0.5, 0.5],
+}
+
+
+live_carla_processes = set()
+
+
+def cleanup():
+    print("Killing live carla processes", live_carla_processes)
+    for pgid in live_carla_processes:
+        os.killpg(pgid, signal.SIGKILL)
+
+
+atexit.register(cleanup)
+
+
 class CarlaEnv(gym.Env):
 
     def __init__(self, config=ENV_CONFIG):
         self.config = config
+        self.city = self.config["server_map"].split("/")[-1]
+        if self.config["enable_planner"]:
+            self.planner = Planner(self.city)
 
         if config["discrete_actions"]:
-            self.action_space = Discrete(10)
+            self.action_space = Discrete(len(DISCRETE_ACTIONS))
         else:
-            self.action_space = Box(-1.0, 1.0, shape=(3,))
+            self.action_space = Box(-1.0, 1.0, shape=(2,))
         if config["use_depth_camera"]:
-            self.observation_space = Box(
-                -1.0, 1.0, shape=(config["y_res"], config["x_res"], 1))
+            image_space = Box(
+                -1.0, 1.0, shape=(
+                    config["y_res"], config["x_res"],
+                    1 * config["framestack"]))
         else:
-            self.observation_space = Box(
-                0.0, 255.0, shape=(config["y_res"], config["x_res"], 3))
+            image_space = Box(
+                0.0, 255.0, shape=(
+                    config["y_res"], config["x_res"],
+                    3 * config["framestack"]))
+        self.observation_space = Tuple(
+            [image_space,
+             Discrete(len(COMMANDS_ENUM)),  # next_command
+             Box(-128.0, 128.0, shape=(2,))])  # forward_speed, dist to goal
+
+        # TODO(ekl) this isn't really a proper gym spec
         self._spec = lambda: None
         self._spec.id = "Carla-v0"
 
@@ -84,24 +169,36 @@ class CarlaEnv(gym.Env):
         self.num_steps = 0
         self.total_reward = 0
         self.prev_measurement = None
+        self.prev_image = None
         self.episode_id = None
         self.measurements_file = None
         self.weather = None
-        self.player_start = None
+        self.scenario = None
+        self.start_pos = None
+        self.end_pos = None
+        self.start_coord = None
+        self.end_coord = None
+        self.last_obs = None
 
     def init_server(self):
         print("Initializing new Carla server...")
         # Create a new server process and start the client.
         self.server_port = random.randint(10000, 60000)
         self.server_process = subprocess.Popen(
-            [SERVER_BINARY, self.config["map"],
+            [SERVER_BINARY, self.config["server_map"],
              "-windowed", "-ResX=400", "-ResY=300",
              "-carla-server",
              "-carla-world-port={}".format(self.server_port)],
             preexec_fn=os.setsid, stdout=open(os.devnull, "w"))
+        live_carla_processes.add(os.getpgid(self.server_process.pid))
 
-        self.client = CarlaClient("localhost", self.server_port)
-        self.client.connect()
+        for i in range(RETRIES_ON_ERROR):
+            try:
+                self.client = CarlaClient("localhost", self.server_port)
+                return self.client.connect()
+            except Exception as e:
+                print("Error connecting: {}, attempt {}".format(e, i))
+                time.sleep(2)
 
     def clear_server_state(self):
         print("Clearing Carla server state")
@@ -113,7 +210,9 @@ class CarlaEnv(gym.Env):
             print("Error disconnecting client: {}".format(e))
             pass
         if self.server_process:
-            os.killpg(os.getpgid(self.server_process.pid), signal.SIGKILL)
+            pgid = os.getpgid(self.server_process.pid)
+            os.killpg(pgid, signal.SIGKILL)
+            live_carla_processes.remove(pgid)
             self.server_port = None
             self.server_process = None
 
@@ -126,9 +225,6 @@ class CarlaEnv(gym.Env):
             try:
                 if not self.server_process:
                     self.init_server()
-                # reset twice since the first time a server is initialized,
-                # the starting location is different
-                self._reset()
                 return self._reset()
             except Exception as e:
                 print("Error during reset: {}".format(traceback.format_exc()))
@@ -138,7 +234,9 @@ class CarlaEnv(gym.Env):
 
     def _reset(self):
         self.num_steps = 0
+        self.total_reward = 0
         self.prev_measurement = None
+        self.prev_image = None
         self.episode_id = datetime.today().strftime("%Y-%m-%d_%H-%M-%S_%f")
         self.measurements_file = None
 
@@ -146,20 +244,23 @@ class CarlaEnv(gym.Env):
         # the CarlaSettings.ini file. Here we set the configuration we
         # want for the new episode.
         settings = CarlaSettings()
-        self.weather = random.choice(self.config["weather"])
+        self.scenario = random.choice(self.config["scenarios"])
+        assert self.scenario["city"] == self.city, (self.scenario, self.city)
+        self.weather = random.choice(self.scenario["weather_distribution"])
         settings.set(
             SynchronousMode=True,
             SendNonPlayerAgentsInfo=True,
-            NumberOfVehicles=self.config["num_vehicles"],
-            NumberOfPedestrians=self.config["num_pedestrians"],
+            NumberOfVehicles=self.scenario["num_vehicles"],
+            NumberOfPedestrians=self.scenario["num_pedestrians"],
             WeatherId=self.weather)
         settings.randomize_seeds()
 
-        camera1 = Camera("CameraDepth", PostProcessing="Depth")
-        camera1.set_image_size(
-            self.config["render_x_res"], self.config["render_y_res"])
-        camera1.set_position(30, 0, 130)
-        settings.add_sensor(camera1)
+        if self.config["use_depth_camera"]:
+            camera1 = Camera("CameraDepth", PostProcessing="Depth")
+            camera1.set_image_size(
+                self.config["render_x_res"], self.config["render_y_res"])
+            camera1.set_position(30, 0, 130)
+            settings.add_sensor(camera1)
 
         camera2 = Camera("CameraRGB")
         camera2.set_image_size(
@@ -167,25 +268,45 @@ class CarlaEnv(gym.Env):
         camera2.set_position(30, 0, 130)
         settings.add_sensor(camera2)
 
+        # Setup start and end positions
         scene = self.client.load_settings(settings)
-
-        # Choose one player start at random.
-        number_of_player_starts = len(scene.player_start_spots)
-        if self.config["random_starting_location"]:
-            self.player_start = random.randint(
-                0, max(0, number_of_player_starts - 1))
-        else:
-            self.player_start = 0
+        positions = scene.player_start_spots
+        self.start_pos = positions[self.scenario["start_pos_id"]]
+        self.end_pos = positions[self.scenario["end_pos_id"]]
+        self.start_coord = [
+            self.start_pos.location.x // 100, self.start_pos.location.y // 100]
+        self.end_coord = [
+            self.end_pos.location.x // 100, self.end_pos.location.y // 100]
+        print(
+            "Start pos {} ({}), end {} ({})".format(
+                self.scenario["start_pos_id"], self.start_coord,
+                self.scenario["end_pos_id"], self.end_coord))
 
         # Notify the server that we want to start the episode at the
         # player_start index. This function blocks until the server is ready
         # to start the episode.
         print("Starting new episode...")
-        self.client.start_episode(self.player_start)
+        self.client.start_episode(self.scenario["start_pos_id"])
 
         image, py_measurements = self._read_observation()
         self.prev_measurement = py_measurements
-        return self.preprocess_image(image)
+        return self.encode_obs(self.preprocess_image(image), py_measurements)
+
+    def encode_obs(self, image, py_measurements):
+        assert self.config["framestack"] in [1, 2]
+        prev_image = self.prev_image
+        self.prev_image = image
+        if prev_image is None:
+            prev_image = image
+        if self.config["framestack"] == 2:
+            image = np.concatenate([prev_image, image], axis=2)
+        obs = (
+            image,
+            COMMAND_ORDINAL[py_measurements["next_command"]],
+            [py_measurements["forward_speed"],
+             py_measurements["distance_to_goal"]])
+        self.last_obs = obs
+        return obs
 
     def step(self, action):
         try:
@@ -196,42 +317,22 @@ class CarlaEnv(gym.Env):
                 "Error during step, terminating episode early",
                 traceback.format_exc())
             self.clear_server_state()
-            return np.zeros(self.observation_space.shape), 0.0, True, {}
+            return (self.last_obs, 0.0, True, {})
 
     def _step(self, action):
         if self.config["discrete_actions"]:
-            action = int(action)
-            assert action in range(10)
-            if action == 9:
-                brake = 1.0
-                steer = 0.0
-                throttle = 0.0
-                reverse = False
-            else:
-                brake = 0.0
-                if action >= 6:
-                    steer = -1.0
-                elif action >= 3:
-                    steer = 1.0
-                else:
-                    steer = 0.0
-                action %= 3
-                if action == 0:
-                    throttle = 0.0
-                    reverse = False
-                elif action == 1:
-                    throttle = 1.0
-                    reverse = False
-                elif action == 2:
-                    throttle = 1.0
-                    reverse = True
+            action = DISCRETE_ACTIONS[int(action)]
+        assert len(action) == 2, "Invalid action {}".format(action)
+        if self.config["squash_action_logits"]:
+            forward = 2 * float(sigmoid(action[0]) - 0.5)
+            throttle = float(np.clip(forward, 0, 1))
+            brake = float(np.abs(np.clip(forward, -1, 0)))
+            steer = 2 * float(sigmoid(action[1]) - 0.5)
         else:
-            assert len(action) == 3, "Invalid action {}".format(action)
-            steer = action[0]
-            throttle = min(1.0, abs(action[1]))
-            brake = max(0.0, min(1.0, action[2]))
-            reverse = action[1] < 0.0
-
+            throttle = float(np.clip(action[0], 0, 1))
+            brake = float(np.abs(np.clip(action[0], -1, 0)))
+            steer = float(np.clip(action[1], -1, 1))
+        reverse = False
         hand_brake = False
 
         if self.config["verbose"]:
@@ -245,15 +346,12 @@ class CarlaEnv(gym.Env):
 
         # Process observations
         image, py_measurements = self._read_observation()
-        reward, done = compute_reward(
-            self.config, self.prev_measurement, py_measurements)
-        if self.num_steps > self.config["max_steps"]:
-            done = True
-        self.total_reward += reward
-        py_measurements["reward"] = reward
-        py_measurements["total_reward"] = self.total_reward
-        py_measurements["done"] = done
-        py_measurements["action"] = action
+        if self.config["verbose"]:
+            print("Next command", py_measurements["next_command"])
+        if type(action) is np.ndarray:
+            py_measurements["action"] = [float(a) for a in action]
+        else:
+            py_measurements["action"] = action
         py_measurements["control"] = {
             "steer": steer,
             "throttle": throttle,
@@ -261,6 +359,16 @@ class CarlaEnv(gym.Env):
             "reverse": reverse,
             "hand_brake": hand_brake,
         }
+        reward = compute_reward(
+            self, self.prev_measurement, py_measurements)
+        self.total_reward += reward
+        py_measurements["reward"] = reward
+        py_measurements["total_reward"] = self.total_reward
+        done = (self.num_steps > self.scenario["max_steps"] or
+                py_measurements["next_command"] == "REACH_GOAL" or
+                (self.config["early_terminate_on_collision"] and
+                 collided_done(py_measurements)))
+        py_measurements["done"] = done
         self.prev_measurement = py_measurements
 
         # Write out measurements to file
@@ -276,19 +384,41 @@ class CarlaEnv(gym.Env):
             if done:
                 self.measurements_file.close()
                 self.measurements_file = None
+                if self.config["convert_images_to_video"]:
+                    self.images_to_video()
 
         self.num_steps += 1
         image = self.preprocess_image(image)
-        return image, reward, done, py_measurements
+        return (
+            self.encode_obs(image, py_measurements), reward, done,
+            py_measurements)
+
+    def images_to_video(self):
+        videos_dir = os.path.join(CARLA_OUT_PATH, "Videos")
+        if not os.path.exists(videos_dir):
+            os.makedirs(videos_dir)
+        ffmpeg_cmd = (
+            "ffmpeg -loglevel -8 -r 60 -f image2 -s {x_res}x{y_res} "
+            "-start_number 0 -i "
+            "{img}_%04d.jpg -vcodec libx264 {vid}.mp4 && rm -f {img}_*.jpg "
+        ).format(
+            x_res=self.config["render_x_res"],
+            y_res=self.config["render_y_res"],
+            vid=os.path.join(videos_dir, self.episode_id),
+            img=os.path.join(CARLA_OUT_PATH, "CameraRGB", self.episode_id))
+        print("Executing ffmpeg command", ffmpeg_cmd)
+        subprocess.call(ffmpeg_cmd, shell=True)
 
     def preprocess_image(self, image):
         if self.config["use_depth_camera"]:
+            assert self.config["use_depth_camera"]
             data = (image.data - 0.5) * 2
             data = data.reshape(
                 self.config["render_y_res"], self.config["render_x_res"], 1)
             data = cv2.resize(
                 data, (self.config["x_res"], self.config["y_res"]),
                 interpolation=cv2.INTER_AREA)
+            data = np.expand_dims(data, 2)
         else:
             data = image.data.reshape(
                 self.config["render_y_res"], self.config["render_x_res"], 3)
@@ -316,29 +446,68 @@ class CarlaEnv(gym.Env):
                 observation = image
 
         cur = measurements.player_measurements
+
+        if self.config["enable_planner"]:
+            next_command = COMMANDS_ENUM[
+                self.planner.get_next_command(
+                    [cur.transform.location.x, cur.transform.location.y,
+                     GROUND_Z],
+                    [cur.transform.orientation.x, cur.transform.orientation.y,
+                     GROUND_Z],
+                    [self.end_pos.location.x, self.end_pos.location.y,
+                     GROUND_Z],
+                    [self.end_pos.orientation.x, self.end_pos.orientation.y,
+                     GROUND_Z])
+            ]
+        else:
+            next_command = "LANE_FOLLOW"
+
+        if next_command == "REACH_GOAL":
+            distance_to_goal = 0.0  # avoids crash in planner
+        elif self.config["enable_planner"]:
+            distance_to_goal = self.planner.get_shortest_path_distance(
+                [cur.transform.location.x, cur.transform.location.y, GROUND_Z],
+                [cur.transform.orientation.x, cur.transform.orientation.y,
+                 GROUND_Z],
+                [self.end_pos.location.x, self.end_pos.location.y, GROUND_Z],
+                [self.end_pos.orientation.x, self.end_pos.orientation.y,
+                 GROUND_Z]) / 100
+        else:
+            distance_to_goal = -1
+
+        distance_to_goal_euclidean = float(np.linalg.norm(
+            [cur.transform.location.x - self.end_pos.location.x,
+             cur.transform.location.y - self.end_pos.location.y]) / 100)
+
         py_measurements = {
             "episode_id": self.episode_id,
             "step": self.num_steps,
             "x": cur.transform.location.x,
             "y": cur.transform.location.y,
+            "x_orient": cur.transform.orientation.x,
+            "y_orient": cur.transform.orientation.y,
             "forward_speed": cur.forward_speed,
+            "distance_to_goal": distance_to_goal,
+            "distance_to_goal_euclidean": distance_to_goal_euclidean,
             "collision_vehicles": cur.collision_vehicles,
             "collision_pedestrians": cur.collision_pedestrians,
             "collision_other": cur.collision_other,
             "intersection_offroad": cur.intersection_offroad,
             "intersection_otherlane": cur.intersection_otherlane,
             "weather": self.weather,
-            "map": self.config["map"],
-            "target_x": self.config["target_x"],
-            "target_y": self.config["target_y"],
+            "map": self.config["server_map"],
+            "start_coord": self.start_coord,
+            "end_coord": self.end_coord,
+            "current_scenario": self.scenario,
             "x_res": self.config["x_res"],
             "y_res": self.config["y_res"],
-            "num_vehicles": self.config["num_vehicles"],
-            "num_pedestrians": self.config["num_pedestrians"],
-            "max_steps": self.config["max_steps"],
+            "num_vehicles": self.scenario["num_vehicles"],
+            "num_pedestrians": self.scenario["num_pedestrians"],
+            "max_steps": self.scenario["max_steps"],
+            "next_command": next_command,
         }
 
-        if CARLA_OUT_PATH:
+        if CARLA_OUT_PATH and self.config["log_images"]:
             for name, image in sensor_data.items():
                 out_dir = os.path.join(CARLA_OUT_PATH, name)
                 if not os.path.exists(out_dir):
@@ -352,23 +521,18 @@ class CarlaEnv(gym.Env):
         return observation, py_measurements
 
 
-def distance(x1, y1, x2, y2):
-    return ((x1 - x2)**2 + (y1 - y2)**2)**0.5
-
-
-def compute_reward(config, prev, current):
-    prev_x = prev["x"] / 100  # cm -> m
-    prev_y = prev["y"] / 100
-    cur_x = current["x"] / 100  # cm -> m
-    cur_y = current["y"] / 100
-
+def compute_reward_corl2017(env, prev, current):
     reward = 0.0
-    done = False
+
+    cur_dist = current["distance_to_goal"]
+
+    prev_dist = prev["distance_to_goal"]
+
+    if env.config["verbose"]:
+        print("Cur dist {}, prev dist {}".format(cur_dist, prev_dist))
 
     # Distance travelled toward the goal in m
-    reward += (
-        distance(prev_x, prev_y, config["target_x"], config["target_y"]) -
-        distance(cur_x, cur_y, config["target_x"], config["target_y"]))
+    reward += np.clip(prev_dist - cur_dist, -10.0, 10.0)
 
     # Change in speed (km/h)
     reward += 0.05 * (current["forward_speed"] - prev["forward_speed"])
@@ -387,21 +551,89 @@ def compute_reward(config, prev, current):
     reward -= 2 * (
         current["intersection_otherlane"] - prev["intersection_otherlane"])
 
-    if distance(cur_x, cur_y, config["target_x"], config["target_y"]) < 10:
-        done = True
+    return reward
 
-    return reward, done
+
+def compute_reward_custom(env, prev, current):
+    reward = 0.0
+
+    cur_dist = current["distance_to_goal"]
+    prev_dist = prev["distance_to_goal"]
+
+    if env.config["verbose"]:
+        print("Cur dist {}, prev dist {}".format(cur_dist, prev_dist))
+
+    # Distance travelled toward the goal in m
+    reward += np.clip(prev_dist - cur_dist, -10.0, 10.0)
+
+    # Speed reward, up 30.0 (km/h)
+    reward += np.clip(current["forward_speed"], 0.0, 30.0) / 10
+
+    # New collision damage
+    new_damage = (
+        current["collision_vehicles"] + current["collision_pedestrians"] +
+        current["collision_other"] - prev["collision_vehicles"] -
+        prev["collision_pedestrians"] - prev["collision_other"])
+    if new_damage:
+        reward -= 100.0
+
+    # Sidewalk intersection
+    reward -= current["intersection_offroad"]
+
+    # Opposite lane intersection
+    reward -= current["intersection_otherlane"]
+
+    # Reached goal
+    if current["next_command"] == "REACH_GOAL":
+        reward += 100.0
+
+    return reward
+
+
+def compute_reward_lane_keep(env, prev, current):
+    reward = 0.0
+
+    # Speed reward, up 30.0 (km/h)
+    reward += np.clip(current["forward_speed"], 0.0, 30.0) / 10
+
+    # New collision damage
+    new_damage = (
+        current["collision_vehicles"] + current["collision_pedestrians"] +
+        current["collision_other"] - prev["collision_vehicles"] -
+        prev["collision_pedestrians"] - prev["collision_other"])
+    if new_damage:
+        reward -= 100.0
+
+    # Sidewalk intersection
+    reward -= current["intersection_offroad"]
+
+    # Opposite lane intersection
+    reward -= current["intersection_otherlane"]
+
+    return reward
+
+
+REWARD_FUNCTIONS = {
+    "corl2017": compute_reward_corl2017,
+    "custom": compute_reward_custom,
+    "lane_keep": compute_reward_lane_keep,
+}
+
+
+def compute_reward(env, prev, current):
+    return REWARD_FUNCTIONS[env.config["reward_function"]](
+        env, prev, current)
 
 
 def print_measurements(measurements):
     number_of_agents = len(measurements.non_player_agents)
     player_measurements = measurements.player_measurements
-    message = 'Vehicle at ({pos_x:.1f}, {pos_y:.1f}), '
-    message += '{speed:.2f} km/h, '
-    message += 'Collision: {{vehicles={col_cars:.0f}, '
-    message += 'pedestrians={col_ped:.0f}, other={col_other:.0f}}}, '
-    message += '{other_lane:.0f}% other lane, {offroad:.0f}% off-road, '
-    message += '({agents_num:d} non-player agents in the scene)'
+    message = "Vehicle at ({pos_x:.1f}, {pos_y:.1f}), "
+    message += "{speed:.2f} km/h, "
+    message += "Collision: {{vehicles={col_cars:.0f}, "
+    message += "pedestrians={col_ped:.0f}, other={col_other:.0f}}}, "
+    message += "{other_lane:.0f}% other lane, {offroad:.0f}% off-road, "
+    message += "({agents_num:d} non-player agents in the scene)"
     message = message.format(
         pos_x=player_measurements.transform.location.x / 100,  # cm -> m
         pos_y=player_measurements.transform.location.y / 100,
@@ -415,22 +647,34 @@ def print_measurements(measurements):
     print(message)
 
 
-if __name__ == '__main__':
-    env = CarlaEnv()
-    obs = env.reset()
-    print("reset", obs)
-    start = time.time()
-    done = False
-    i = 0
-    total_reward = 0.0
-    while not done:
-        i += 1
-        if ENV_CONFIG["discrete_actions"]:
-            obs, reward, done, info = env.step(1)
-        else:
-            obs, reward, done, info = env.step([0, 1, 0])
-        total_reward += reward
-        print(
-            i, "obs", obs.shape, "rew", reward, "total", total_reward,
-            "done", done)
-    print("{} fps".format(100 / (time.time() - start)))
+def sigmoid(x):
+    x = float(x)
+    return np.exp(x) / (1 + np.exp(x))
+
+
+def collided_done(py_measurements):
+    m = py_measurements
+    collided = (
+        m["collision_vehicles"] > 0 or m["collision_pedestrians"] > 0 or
+        m["collision_other"] > 0)
+    return bool(collided or m["total_reward"] < -100)
+
+
+if __name__ == "__main__":
+    for _ in range(2):
+        env = CarlaEnv()
+        obs = env.reset()
+        print("reset", obs)
+        start = time.time()
+        done = False
+        i = 0
+        total_reward = 0.0
+        while not done:
+            i += 1
+            if ENV_CONFIG["discrete_actions"]:
+                obs, reward, done, info = env.step(1)
+            else:
+                obs, reward, done, info = env.step([0, 1, 0])
+            total_reward += reward
+            print(i, "rew", reward, "total", total_reward, "done", done)
+        print("{} fps".format(100 / (time.time() - start)))
diff --git a/examples/carla/models.py b/examples/carla/models.py
new file mode 100644
index 000000000..fdc194dcc
--- /dev/null
+++ b/examples/carla/models.py
@@ -0,0 +1,96 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+from tensorflow.contrib.layers import xavier_initializer
+
+from ray.rllib.models.catalog import ModelCatalog
+from ray.rllib.models.misc import normc_initializer
+from ray.rllib.models.model import Model
+
+
+class CarlaModel(Model):
+    """Carla model that can process the observation tuple.
+
+    The architecture processes the image using convolutional layers, the
+    metrics using fully connected layers, and then combines them with
+    further fully connected layers.
+    """
+
+    def _init(self, inputs, num_outputs, options):
+        # Parse options
+        image_shape = options["custom_options"]["image_shape"]
+        convs = options.get("conv_filters", [
+            [16, [8, 8], 4],
+            [32, [5, 5], 3],
+            [32, [5, 5], 2],
+            [512, [10, 10], 1],
+        ])
+        hiddens = options.get("fcnet_hiddens", [64])
+        fcnet_activation = options.get("fcnet_activation", "tanh")
+        if fcnet_activation == "tanh":
+            activation = tf.nn.tanh
+        elif fcnet_activation == "relu":
+            activation = tf.nn.relu
+
+        # Sanity checks
+        image_size = np.product(image_shape)
+        expected_shape = [image_size + 5 + 2]
+        assert inputs.shape.as_list()[1:] == expected_shape, \
+            (inputs.shape.as_list()[1:], expected_shape)
+
+        # Reshape the input vector back into its components
+        vision_in = tf.reshape(
+            inputs[:, :image_size], [tf.shape(inputs)[0]] + image_shape)
+        metrics_in = inputs[:, image_size:]
+        print("Vision in shape", vision_in)
+        print("Metrics in shape", metrics_in)
+
+        # Setup vision layers
+        with tf.name_scope("carla_vision"):
+            for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1):
+                vision_in = slim.conv2d(
+                    vision_in, out_size, kernel, stride,
+                    scope="conv{}".format(i))
+            out_size, kernel, stride = convs[-1]
+            vision_in = slim.conv2d(
+                vision_in, out_size, kernel, stride,
+                padding="VALID", scope="conv_out")
+            vision_in = tf.squeeze(vision_in, [1, 2])
+
+        # Setup metrics layer
+        with tf.name_scope("carla_metrics"):
+            metrics_in = slim.fully_connected(
+                metrics_in, 64,
+                weights_initializer=xavier_initializer(),
+                activation_fn=activation,
+                scope="metrics_out")
+
+        print("Shape of vision out is", vision_in.shape)
+        print("Shape of metric out is", metrics_in.shape)
+
+        # Combine the metrics and vision inputs
+        with tf.name_scope("carla_out"):
+            i = 1
+            last_layer = tf.concat([vision_in, metrics_in], axis=1)
+            print("Shape of concatenated out is", last_layer.shape)
+            for size in hiddens:
+                last_layer = slim.fully_connected(
+                    last_layer, size,
+                    weights_initializer=xavier_initializer(),
+                    activation_fn=activation,
+                    scope="fc{}".format(i))
+                i += 1
+            output = slim.fully_connected(
+                last_layer, num_outputs,
+                weights_initializer=normc_initializer(0.01),
+                activation_fn=None, scope="fc_out")
+
+        return output, last_layer
+
+
+def register_carla_model():
+    ModelCatalog.register_custom_model("carla", CarlaModel)
diff --git a/examples/carla/ppo_lane_keep.py b/examples/carla/ppo_lane_keep.py
new file mode 100644
index 000000000..dbd015f7e
--- /dev/null
+++ b/examples/carla/ppo_lane_keep.py
@@ -0,0 +1,60 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from ray.tune import register_env, run_experiments
+
+from env import CarlaEnv, ENV_CONFIG
+from models import register_carla_model
+from scenarios import LANE_KEEP
+
+env_name = "carla_env"
+env_config = ENV_CONFIG.copy()
+env_config.update({
+    "verbose": False,
+    "x_res": 80,
+    "y_res": 80,
+    "use_depth_camera": False,
+    "discrete_actions": False,
+    "server_map": "/Game/Maps/Town02",
+    "reward_function": "lane_keep",
+    "enable_planner": False,
+    "scenarios": [LANE_KEEP],
+})
+
+register_env(env_name, lambda env_config: CarlaEnv(env_config))
+register_carla_model()
+
+run_experiments({
+    "carla-ppo": {
+        "run": "PPO",
+        "env": "carla_env",
+        "resources": {"cpu": 4, "gpu": 1},
+        "config": {
+            "env_config": env_config,
+            "model": {
+                "custom_model": "carla",
+                "custom_options": {
+                    "image_shape": [80, 80, 6],
+                },
+                "conv_filters": [
+                    [16, [8, 8], 4],
+                    [32, [4, 4], 2],
+                    [512, [10, 10], 1],
+                ],
+            },
+            "num_workers": 1,
+            "timesteps_per_batch": 2000,
+            "min_steps_per_task": 100,
+            "lambda": 0.95,
+            "clip_param": 0.2,
+            "num_sgd_iter": 20,
+            "sgd_stepsize": 0.0001,
+            "sgd_batchsize": 32,
+            "devices": ["/gpu:0"],
+            "tf_session_args": {
+              "gpu_options": {"allow_growth": True}
+            }
+        },
+    },
+})
diff --git a/examples/carla/scenarios.py b/examples/carla/scenarios.py
new file mode 100644
index 000000000..e6494af18
--- /dev/null
+++ b/examples/carla/scenarios.py
@@ -0,0 +1,119 @@
+"""Collection of Carla scenarios, including those from the CoRL 2017 paper."""
+
+
+TEST_WEATHERS = [0, 2, 5, 7, 9, 10, 11, 12, 13]
+TRAIN_WEATHERS = [1, 3, 4, 6, 8, 14]
+
+
+def build_scenario(
+        city, start, end, vehicles, pedestrians, max_steps, weathers):
+    return {
+        "city": city,
+        "num_vehicles": vehicles,
+        "num_pedestrians": pedestrians,
+        "weather_distribution": weathers,
+        "start_pos_id": start,
+        "end_pos_id": end,
+        "max_steps": max_steps,
+    }
+
+
+# Simple scenario for Town02 that involves driving down a road
+DEFAULT_SCENARIO = build_scenario(
+    city="Town02", start=36, end=40, vehicles=20, pedestrians=40,
+    max_steps=200, weathers=[0])
+
+# Simple scenario for Town02 that involves driving down a road
+LANE_KEEP = build_scenario(
+    city="Town02", start=36, end=40, vehicles=0, pedestrians=0,
+    max_steps=2000, weathers=[0])
+
+# Scenarios from the CoRL2017 paper
+POSES_TOWN1_STRAIGHT = [
+    [36, 40], [39, 35], [110, 114], [7, 3], [0, 4],
+    [68, 50], [61, 59], [47, 64], [147, 90], [33, 87],
+    [26, 19], [80, 76], [45, 49], [55, 44], [29, 107],
+    [95, 104], [84, 34], [53, 67], [22, 17], [91, 148],
+    [20, 107], [78, 70], [95, 102], [68, 44], [45, 69]]
+
+
+POSES_TOWN1_ONE_CURVE = [
+    [138, 17], [47, 16], [26, 9], [42, 49], [140, 124],
+    [85, 98], [65, 133], [137, 51], [76, 66], [46, 39],
+    [40, 60], [0, 29], [4, 129], [121, 140], [2, 129],
+    [78, 44], [68, 85], [41, 102], [95, 70], [68, 129],
+    [84, 69], [47, 79], [110, 15], [130, 17], [0, 17]]
+
+POSES_TOWN1_NAV = [
+    [105, 29], [27, 130], [102, 87], [132, 27], [24, 44],
+    [96, 26], [34, 67], [28, 1], [140, 134], [105, 9],
+    [148, 129], [65, 18], [21, 16], [147, 97], [42, 51],
+    [30, 41], [18, 107], [69, 45], [102, 95], [18, 145],
+    [111, 64], [79, 45], [84, 69], [73, 31], [37, 81]]
+
+
+POSES_TOWN2_STRAIGHT = [
+    [38,  34],  [4,  2],  [12,  10],  [62,  55],  [43,  47],
+    [64,  66],  [78,  76], [59, 57], [61, 18], [35, 39],
+    [12, 8], [0, 18], [75, 68], [54, 60], [45, 49],
+    [46, 42], [53, 46], [80, 29], [65, 63], [0, 81],
+    [54, 63], [51, 42], [16, 19], [17, 26], [77, 68]]
+
+POSES_TOWN2_ONE_CURVE = [
+    [37,  76],  [8,  24],  [60,  69],  [38,  10],  [21,  1],
+    [58, 71], [74, 32], [44, 0], [71, 16], [14, 24],
+    [34, 11], [43, 14], [75, 16], [80, 21], [3, 23],
+    [75, 59], [50, 47], [11, 19], [77, 34], [79, 25],
+    [40, 63], [58, 76], [79, 55], [16, 61], [27, 11]]
+
+POSES_TOWN2_NAV = [
+    [19, 66], [79, 14], [19, 57], [23, 1],
+    [53, 76], [42, 13], [31, 71], [33, 5],
+    [54, 30], [10, 61], [66, 3], [27, 12],
+    [79, 19], [2, 29], [16, 14], [5, 57],
+    [70, 73], [46, 67], [57, 50], [61, 49], [21, 12],
+    [51, 81], [77, 68], [56, 65], [43, 54]]
+
+TOWN1_STRAIGHT = [
+    build_scenario("Town01", start, end, 0, 0, 300, TEST_WEATHERS)
+    for (start, end) in POSES_TOWN1_STRAIGHT]
+
+TOWN1_ONE_CURVE = [
+    build_scenario("Town01", start, end, 0, 0, 600, TEST_WEATHERS)
+    for (start, end) in POSES_TOWN1_ONE_CURVE]
+
+TOWN1_NAVIGATION = [
+    build_scenario("Town01", start, end, 0, 0, 900, TEST_WEATHERS)
+    for (start, end) in POSES_TOWN1_NAV]
+
+TOWN1_NAVIGATION_DYNAMIC = [
+    build_scenario("Town01", start, end, 20, 50, 900, TEST_WEATHERS)
+    for (start, end) in POSES_TOWN1_NAV]
+
+TOWN2_STRAIGHT = [
+    build_scenario("Town02", start, end, 0, 0, 300, TRAIN_WEATHERS)
+    for (start, end) in POSES_TOWN2_STRAIGHT]
+
+TOWN2_STRAIGHT_DYNAMIC = [
+    build_scenario("Town02", start, end, 20, 50, 300, TRAIN_WEATHERS)
+    for (start, end) in POSES_TOWN2_STRAIGHT]
+
+TOWN2_ONE_CURVE = [
+    build_scenario("Town02", start, end, 0, 0, 600, TRAIN_WEATHERS)
+    for (start, end) in POSES_TOWN2_ONE_CURVE]
+
+TOWN2_NAVIGATION = [
+    build_scenario("Town02", start, end, 0, 0, 900, TRAIN_WEATHERS)
+    for (start, end) in POSES_TOWN2_NAV]
+
+TOWN2_NAVIGATION_DYNAMIC = [
+    build_scenario("Town02", start, end, 20, 50, 900, TRAIN_WEATHERS)
+    for (start, end) in POSES_TOWN2_NAV]
+
+TOWN1_ALL = (
+    TOWN1_STRAIGHT + TOWN1_ONE_CURVE + TOWN1_NAVIGATION +
+    TOWN1_NAVIGATION_DYNAMIC)
+
+TOWN2_ALL = (
+    TOWN2_STRAIGHT + TOWN2_ONE_CURVE + TOWN2_NAVIGATION +
+    TOWN2_NAVIGATION_DYNAMIC)
diff --git a/examples/carla/train_a3c.py b/examples/carla/train_a3c.py
new file mode 100644
index 000000000..0089e6cc3
--- /dev/null
+++ b/examples/carla/train_a3c.py
@@ -0,0 +1,53 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import ray
+from ray.tune import grid_search, register_env, run_experiments
+
+from env import CarlaEnv, ENV_CONFIG
+from models import register_carla_model
+from scenarios import TOWN2_STRAIGHT
+
+env_name = "carla_env"
+env_config = ENV_CONFIG.copy()
+env_config.update({
+    "verbose": False,
+    "x_res": 80,
+    "y_res": 80,
+    "squash_action_logits": grid_search([False, True]),
+    "use_depth_camera": False,
+    "discrete_actions": False,
+    "server_map": "/Game/Maps/Town02",
+    "reward_function": grid_search(["custom", "corl2017"]),
+    "scenarios": TOWN2_STRAIGHT,
+})
+
+register_env(env_name, lambda env_config: CarlaEnv(env_config))
+register_carla_model()
+redis_address = ray.services.get_node_ip_address() + ":6379"
+
+run_experiments({
+    "carla-a3c": {
+        "run": "A3C",
+        "env": "carla_env",
+        "resources": {"cpu": 5, "gpu": 2, "driver_gpu_limit": 0},
+        "config": {
+            "env_config": env_config,
+            "use_gpu_for_workers": True,
+            "model": {
+                "custom_model": "carla",
+                "custom_options": {
+                    "image_shape": [80, 80, 6],
+                },
+                "conv_filters": [
+                    [16, [8, 8], 4],
+                    [32, [4, 4], 2],
+                    [512, [10, 10], 1],
+                ],
+            },
+            "gamma": 0.95,
+            "num_workers": 2,
+        },
+    },
+}, redis_address=redis_address)
diff --git a/examples/carla/train_dqn.py b/examples/carla/train_dqn.py
index a0135202d..991efb356 100644
--- a/examples/carla/train_dqn.py
+++ b/examples/carla/train_dqn.py
@@ -5,38 +5,51 @@ from __future__ import print_function
 from ray.tune import register_env, run_experiments
 
 from env import CarlaEnv, ENV_CONFIG
+from models import register_carla_model
+from scenarios import TOWN2_ONE_CURVE
 
 env_name = "carla_env"
 env_config = ENV_CONFIG.copy()
 env_config.update({
     "verbose": False,
-    "x_res": 240,
-    "y_res": 240,
-    "use_depth_camera": False,
+    "x_res": 80,
+    "y_res": 80,
     "discrete_actions": True,
-    "max_steps": 200,
-    "weather": [1, 3, 7, 8, 14],
+    "server_map": "/Game/Maps/Town02",
+    "reward_function": "custom",
+    "scenarios": TOWN2_ONE_CURVE,
 })
-register_env(env_name, lambda: CarlaEnv(env_config))
+
+register_env(env_name, lambda env_config: CarlaEnv(env_config))
+register_carla_model()
 
 run_experiments({
-    "carla": {
+    "carla-dqn": {
         "run": "DQN",
         "env": "carla_env",
         "resources": {"cpu": 4, "gpu": 1},
         "config": {
-            "model":  {
+            "env_config": env_config,
+            "model": {
+                "custom_model": "carla",
+                "custom_options": {
+                    "image_shape": [
+                        80, 80,
+                        lambda spec: spec.config.env_config.framestack * (
+                            spec.config.env_config.use_depth_camera and 1 or 3
+                        ),
+                    ],
+                },
                 "conv_filters": [
                     [16, [8, 8], 4],
-                    [32, [5, 5], 3],
-                    [32, [5, 5], 2],
+                    [32, [4, 4], 2],
                     [512, [10, 10], 1],
                 ],
             },
             "timesteps_per_iteration": 100,
             "learning_starts": 1000,
             "schedule_max_timesteps": 100000,
-            "gamma": 0.95,
+            "gamma": 0.8,
             "tf_session_args": {
               "gpu_options": {"allow_growth": True},
             },
diff --git a/examples/carla/train_ppo.py b/examples/carla/train_ppo.py
index ab13df4e7..467b6fe7d 100644
--- a/examples/carla/train_ppo.py
+++ b/examples/carla/train_ppo.py
@@ -5,6 +5,8 @@ from __future__ import print_function
 from ray.tune import register_env, run_experiments
 
 from env import CarlaEnv, ENV_CONFIG
+from models import register_carla_model
+from scenarios import TOWN2_STRAIGHT
 
 env_name = "carla_env"
 env_config = ENV_CONFIG.copy()
@@ -12,11 +14,13 @@ env_config.update({
     "verbose": False,
     "x_res": 80,
     "y_res": 80,
-    "use_depth_camera": True,
+    "use_depth_camera": False,
     "discrete_actions": False,
-    "max_steps": 150,
+    "server_map": "/Game/Maps/Town02",
+    "scenarios": TOWN2_STRAIGHT,
 })
-register_env(env_name, lambda: CarlaEnv(env_config))
+register_env(env_name, lambda env_config: CarlaEnv(env_config))
+register_carla_model()
 
 run_experiments({
     "carla": {
@@ -24,6 +28,19 @@ run_experiments({
         "env": "carla_env",
         "resources": {"cpu": 4, "gpu": 1},
         "config": {
+            "env_config": env_config,
+            "model": {
+                "custom_model": "carla",
+                "custom_options": {
+                    "image_shape": [
+                        env_config["x_res"], env_config["y_res"], 6],
+                },
+                "conv_filters": [
+                    [16, [8, 8], 4],
+                    [32, [4, 4], 2],
+                    [512, [10, 10], 1],
+                ],
+            },
             "num_workers": 1,
             "timesteps_per_batch": 2000,
             "min_steps_per_task": 100,
diff --git a/python/ray/rllib/a3c/a3c.py b/python/ray/rllib/a3c/a3c.py
index 2116a8ab4..546938edb 100644
--- a/python/ray/rllib/a3c/a3c.py
+++ b/python/ray/rllib/a3c/a3c.py
@@ -10,7 +10,8 @@ import ray
 from ray.rllib.agent import Agent
 from ray.rllib.optimizers import AsyncOptimizer
 from ray.rllib.utils import FilterManager
-from ray.rllib.a3c.a3c_evaluator import A3CEvaluator, RemoteA3CEvaluator
+from ray.rllib.a3c.a3c_evaluator import A3CEvaluator, RemoteA3CEvaluator, \
+    GPURemoteA3CEvaluator
 from ray.tune.result import TrainingResult
 
 
@@ -39,6 +40,8 @@ DEFAULT_CONFIG = {
     "vf_loss_coeff": 0.5,
     # Entropy coefficient
     "entropy_coeff": -0.01,
+    # Whether to place workers on GPUs
+    "use_gpu_for_workers": False,
     # Model and preprocessor options
     "model": {
         # (Image statespace) - Converts image to Channels = 1
@@ -54,21 +57,27 @@ DEFAULT_CONFIG = {
     "optimizer": {
         # Number of gradients applied for each `train` step
         "grads_per_step": 100,
-    }
+    },
+    # Arguments to pass to the env creator
+    "env_config": {},
 }
 
 
 class A3CAgent(Agent):
     _agent_name = "A3C"
     _default_config = DEFAULT_CONFIG
-    _allow_unknown_subkeys = ["model", "optimizer"]
+    _allow_unknown_subkeys = ["model", "optimizer", "env_config"]
 
     def _init(self):
         self.local_evaluator = A3CEvaluator(
             self.registry, self.env_creator, self.config, self.logdir,
             start_sampler=False)
+        if self.config["use_gpu_for_workers"]:
+            remote_cls = GPURemoteA3CEvaluator
+        else:
+            remote_cls = RemoteA3CEvaluator
         self.remote_evaluators = [
-            RemoteA3CEvaluator.remote(
+            remote_cls.remote(
                 self.registry, self.env_creator, self.config, self.logdir)
             for i in range(self.config["num_workers"])]
         self.optimizer = AsyncOptimizer(
diff --git a/python/ray/rllib/a3c/a3c_evaluator.py b/python/ray/rllib/a3c/a3c_evaluator.py
index e4864b5d1..95360bf76 100644
--- a/python/ray/rllib/a3c/a3c_evaluator.py
+++ b/python/ray/rllib/a3c/a3c_evaluator.py
@@ -29,7 +29,7 @@ class A3CEvaluator(Evaluator):
     def __init__(
             self, registry, env_creator, config, logdir, start_sampler=True):
         env = ModelCatalog.get_preprocessor_as_wrapper(
-            registry, env_creator(), config["model"])
+            registry, env_creator(config["env_config"]), config["model"])
         self.env = env
         policy_cls = get_policy_cls(config)
         # TODO(rliaw): should change this to be just env.observation_space
@@ -116,3 +116,4 @@ class A3CEvaluator(Evaluator):
 
 
 RemoteA3CEvaluator = ray.remote(A3CEvaluator)
+GPURemoteA3CEvaluator = ray.remote(num_gpus=1)(A3CEvaluator)
diff --git a/python/ray/rllib/a3c/tfpolicy.py b/python/ray/rllib/a3c/tfpolicy.py
index a2f5377cf..f311d7461 100644
--- a/python/ray/rllib/a3c/tfpolicy.py
+++ b/python/ray/rllib/a3c/tfpolicy.py
@@ -78,7 +78,8 @@ class TFPolicy(Policy):
 
         # TODO(rliaw): Can consider exposing these parameters
         self.sess = tf.Session(graph=self.g, config=tf.ConfigProto(
-            intra_op_parallelism_threads=1, inter_op_parallelism_threads=2))
+            intra_op_parallelism_threads=1, inter_op_parallelism_threads=2,
+            gpu_options=tf.GPUOptions(allow_growth=True)))
         self.variables = ray.experimental.TensorFlowVariables(self.loss,
                                                               self.sess)
         self.sess.run(tf.global_variables_initializer())
diff --git a/python/ray/rllib/agent.py b/python/ray/rllib/agent.py
index 742c2cee1..92bcf4b48 100644
--- a/python/ray/rllib/agent.py
+++ b/python/ray/rllib/agent.py
@@ -46,8 +46,6 @@ def _deep_update(original, new_dict, new_keys_allowed, whitelist):
             if not new_keys_allowed:
                 raise Exception(
                     "Unknown config parameter `{}` ".format(k))
-            else:
-                logger.warn("`{}` not in default configuration...".format(k))
         if type(original.get(k)) is dict:
             if k in whitelist:
                 _deep_update(original[k], value, True, [])
@@ -98,9 +96,9 @@ class Agent(Trainable):
                 self.env_creator = registry.get(ENV_CREATOR, env)
             else:
                 import gym  # soft dependency
-                self.env_creator = lambda: gym.make(env)
+                self.env_creator = lambda env_config: gym.make(env)
         else:
-            self.env_creator = lambda: None
+            self.env_creator = lambda env_config: None
         self.config = self._default_config.copy()
         self.registry = registry
 
diff --git a/python/ray/rllib/dqn/dqn.py b/python/ray/rllib/dqn/dqn.py
index eba14f018..432e8a3a6 100644
--- a/python/ray/rllib/dqn/dqn.py
+++ b/python/ray/rllib/dqn/dqn.py
@@ -28,6 +28,8 @@ DEFAULT_CONFIG = dict(
     model={},
     # Discount factor for the MDP
     gamma=0.99,
+    # Arguments to pass to the env creator
+    env_config={},
 
     # === Exploration ===
     # Max num timesteps for annealing schedules. Exploration is annealed from
@@ -107,7 +109,8 @@ DEFAULT_CONFIG = dict(
 
 class DQNAgent(Agent):
     _agent_name = "DQN"
-    _allow_unknown_subkeys = ["model", "optimizer", "tf_session_args"]
+    _allow_unknown_subkeys = [
+        "model", "optimizer", "tf_session_args", "env_config"]
     _default_config = DEFAULT_CONFIG
 
     def _init(self):
diff --git a/python/ray/rllib/dqn/dqn_evaluator.py b/python/ray/rllib/dqn/dqn_evaluator.py
index 60388a68a..04dab1669 100644
--- a/python/ray/rllib/dqn/dqn_evaluator.py
+++ b/python/ray/rllib/dqn/dqn_evaluator.py
@@ -18,7 +18,7 @@ class DQNEvaluator(TFMultiGPUSupport):
     TODO(rliaw): Support observation/reward filters?"""
 
     def __init__(self, registry, env_creator, config, logdir):
-        env = env_creator()
+        env = env_creator(config["env_config"])
         env = wrap_dqn(registry, env, config["model"])
         self.env = env
         self.config = config
diff --git a/python/ray/rllib/es/es.py b/python/ray/rllib/es/es.py
index 28b1c8bb9..0e8d666d9 100644
--- a/python/ray/rllib/es/es.py
+++ b/python/ray/rllib/es/es.py
@@ -37,7 +37,8 @@ DEFAULT_CONFIG = dict(
     return_proc_mode="centered_rank",
     num_workers=10,
     stepsize=0.01,
-    observation_filter="MeanStdFilter")
+    observation_filter="MeanStdFilter",
+    env_config={})
 
 
 @ray.remote
@@ -70,7 +71,7 @@ class Worker(object):
         self.policy_params = policy_params
         self.noise = SharedNoiseTable(noise)
 
-        self.env = env_creator()
+        self.env = env_creator(config["env_config"])
         self.preprocessor = ModelCatalog.get_preprocessor(registry, self.env)
 
         self.sess = utils.make_session(single_threaded=True)
@@ -135,13 +136,14 @@ class Worker(object):
 class ESAgent(Agent):
     _agent_name = "ES"
     _default_config = DEFAULT_CONFIG
+    _allow_unknown_subkeys = ["env_config"]
 
     def _init(self):
         policy_params = {
             "action_noise_std": 0.01
         }
 
-        env = self.env_creator()
+        env = self.env_creator(self.config["env_config"])
         preprocessor = ModelCatalog.get_preprocessor(self.registry, env)
 
         self.sess = utils.make_session(single_threaded=False)
diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py
index cc87fd2a4..bd490cc0b 100644
--- a/python/ray/rllib/models/catalog.py
+++ b/python/ray/rllib/models/catalog.py
@@ -9,9 +9,7 @@ from ray.tune.registry import RLLIB_MODEL, RLLIB_PREPROCESSOR, \
 
 from ray.rllib.models.action_dist import (
     Categorical, Deterministic, DiagGaussian)
-from ray.rllib.models.preprocessors import (
-    NoPreprocessor, AtariRamPreprocessor, AtariPixelPreprocessor,
-    OneHotPreprocessor)
+from ray.rllib.models.preprocessors import get_preprocessor
 from ray.rllib.models.fcnet import FullyConnectedNetwork
 from ray.rllib.models.visionnet import VisionNetwork
 
@@ -48,9 +46,6 @@ class ModelCatalog(object):
         >>> action = dist.sample()
     """
 
-    ATARI_OBS_SHAPE = (210, 160, 3)
-    ATARI_RAM_OBS_SHAPE = (128,)
-
     @staticmethod
     def get_action_dist(action_space, dist_type=None):
         """Returns action distribution class and size for the given action space.
@@ -147,40 +142,19 @@ class ModelCatalog(object):
             preprocessor (Preprocessor): Preprocessor for the env observations.
         """
 
-        # For older gym versions that don't set shape for Discrete
-        if not hasattr(env.observation_space, "shape") and \
-                isinstance(env.observation_space, gym.spaces.Discrete):
-            env.observation_space.shape = ()
-
-        obs_shape = env.observation_space.shape
-
         for k in options.keys():
             if k not in MODEL_CONFIGS:
                 raise Exception(
                     "Unknown config key `{}`, all keys: {}".format(
                         k, MODEL_CONFIGS))
 
-        print("Observation shape is {}".format(obs_shape))
-
         if "custom_preprocessor" in options:
             preprocessor = options["custom_preprocessor"]
             print("Using custom preprocessor {}".format(preprocessor))
             return registry.get(RLLIB_PREPROCESSOR, preprocessor)(
                 env.observation_space, options)
 
-        if obs_shape == ():
-            print("Using one-hot preprocessor for discrete envs.")
-            preprocessor = OneHotPreprocessor
-        elif obs_shape == ModelCatalog.ATARI_OBS_SHAPE:
-            print("Assuming Atari pixel env, using AtariPixelPreprocessor.")
-            preprocessor = AtariPixelPreprocessor
-        elif obs_shape == ModelCatalog.ATARI_RAM_OBS_SHAPE:
-            print("Assuming Atari ram env, using AtariRamPreprocessor.")
-            preprocessor = AtariRamPreprocessor
-        else:
-            print("Not using any observation preprocessor.")
-            preprocessor = NoPreprocessor
-
+        preprocessor = get_preprocessor(env.observation_space)
         return preprocessor(env.observation_space, options)
 
     @staticmethod
diff --git a/python/ray/rllib/models/preprocessors.py b/python/ray/rllib/models/preprocessors.py
index d03e03e7c..e90a0d6d8 100644
--- a/python/ray/rllib/models/preprocessors.py
+++ b/python/ray/rllib/models/preprocessors.py
@@ -3,6 +3,10 @@ from __future__ import division
 from __future__ import print_function
 import cv2
 import numpy as np
+import gym
+
+ATARI_OBS_SHAPE = (210, 160, 3)
+ATARI_RAM_OBS_SHAPE = (128,)
 
 
 class Preprocessor(object):
@@ -13,6 +17,7 @@ class Preprocessor(object):
     """
 
     def __init__(self, obs_space, options):
+        legacy_patch_shapes(obs_space)
         self._obs_space = obs_space
         self._options = options
         self._init()
@@ -40,7 +45,6 @@ class AtariPixelPreprocessor(Preprocessor):
         if self._channel_major:
             self.shape = self.shape[-1:] + self.shape[:-1]
 
-    # TODO(ekl) why does this need to return an extra size-1 dim (the [None])
     def transform(self, observation):
         """Downsamples images from (210, 160, 3) by the configured factor."""
         scaled = observation[25:-25, :, :]
@@ -64,7 +68,6 @@ class AtariPixelPreprocessor(Preprocessor):
         return scaled
 
 
-# TODO(rliaw): Also should include the deepmind preprocessor
 class AtariRamPreprocessor(Preprocessor):
     def _init(self):
         self.shape = (128,)
@@ -90,3 +93,75 @@ class NoPreprocessor(Preprocessor):
 
     def transform(self, observation):
         return observation
+
+
+class TupleFlatteningPreprocessor(Preprocessor):
+    """Preprocesses each tuple element, then flattens it all into a vector.
+
+    If desired, the vector output can be unpacked via tf.reshape() within a
+    custom model to handle each component separately.
+    """
+
+    def _init(self):
+        assert isinstance(self._obs_space, gym.spaces.Tuple)
+        size = 0
+        self.preprocessors = []
+        for i in range(len(self._obs_space.spaces)):
+            space = self._obs_space.spaces[i]
+            print("Creating sub-preprocessor for", space)
+            preprocessor = get_preprocessor(space)(space, self._options)
+            self.preprocessors.append(preprocessor)
+            size += np.product(preprocessor.shape)
+        self.shape = (size,)
+
+    def transform(self, observation):
+        assert len(observation) == len(self.preprocessors), observation
+        return np.concatenate([
+            np.reshape(p.transform(o), [np.product(p.shape)])
+            for (o, p) in zip(observation, self.preprocessors)])
+
+
+def get_preprocessor(space):
+    """Returns an appropriate preprocessor class for the given space."""
+
+    legacy_patch_shapes(space)
+    obs_shape = space.shape
+    print("Observation shape is {}".format(obs_shape))
+
+    if obs_shape == ():
+        print("Using one-hot preprocessor for discrete envs.")
+        preprocessor = OneHotPreprocessor
+    elif obs_shape == ATARI_OBS_SHAPE:
+        print("Assuming Atari pixel env, using AtariPixelPreprocessor.")
+        preprocessor = AtariPixelPreprocessor
+    elif obs_shape == ATARI_RAM_OBS_SHAPE:
+        print("Assuming Atari ram env, using AtariRamPreprocessor.")
+        preprocessor = AtariRamPreprocessor
+    elif isinstance(space, gym.spaces.Tuple):
+        print("Using a TupleFlatteningPreprocessor")
+        preprocessor = TupleFlatteningPreprocessor
+    else:
+        print("Not using any observation preprocessor.")
+        preprocessor = NoPreprocessor
+
+    return preprocessor
+
+
+def legacy_patch_shapes(space):
+    """Assigns shapes to spaces that don't have shapes.
+
+    This is only needed for older gym versions that don't set shapes properly
+    for Tuple and Discrete spaces.
+    """
+
+    if not hasattr(space, "shape"):
+        if isinstance(space, gym.spaces.Discrete):
+            space.shape = ()
+        elif isinstance(space, gym.spaces.Tuple):
+            shapes = []
+            for s in space.spaces:
+                shape = legacy_patch_shapes(s)
+                shapes.append(shape)
+            space.shape = tuple(shapes)
+
+    return space.shape
diff --git a/python/ray/rllib/ppo/ppo.py b/python/ray/rllib/ppo/ppo.py
index d33490235..184988f22 100644
--- a/python/ray/rllib/ppo/ppo.py
+++ b/python/ray/rllib/ppo/ppo.py
@@ -78,12 +78,14 @@ DEFAULT_CONFIG = {
     "tf_debug_inf_or_nan": False,
     # If True, we write tensorflow logs and checkpoints
     "write_logs": True,
+    # Arguments to pass to the env creator
+    "env_config": {},
 }
 
 
 class PPOAgent(Agent):
     _agent_name = "PPO"
-    _allow_unknown_subkeys = ["model", "tf_session_args"]
+    _allow_unknown_subkeys = ["model", "tf_session_args", "env_config"]
     _default_config = DEFAULT_CONFIG
 
     def _init(self):
diff --git a/python/ray/rllib/ppo/ppo_evaluator.py b/python/ray/rllib/ppo/ppo_evaluator.py
index 7fc37407d..e559b354d 100644
--- a/python/ray/rllib/ppo/ppo_evaluator.py
+++ b/python/ray/rllib/ppo/ppo_evaluator.py
@@ -43,7 +43,7 @@ class PPOEvaluator(Evaluator):
         self.config = config
         self.logdir = logdir
         self.env = ModelCatalog.get_preprocessor_as_wrapper(
-            registry, env_creator(), config["model"])
+            registry, env_creator(config["env_config"]), config["model"])
         if is_remote:
             config_proto = tf.ConfigProto()
         else:
diff --git a/python/ray/rllib/test/test_catalog.py b/python/ray/rllib/test/test_catalog.py
index 63cad29b4..5f3ac01f8 100644
--- a/python/ray/rllib/test/test_catalog.py
+++ b/python/ray/rllib/test/test_catalog.py
@@ -2,6 +2,7 @@ import gym
 import numpy as np
 import tensorflow as tf
 import unittest
+from gym.spaces import Box, Discrete, Tuple
 
 import ray
 from ray.tune.registry import get_registry
@@ -34,11 +35,25 @@ class ModelCatalogTest(unittest.TestCase):
     def testGymPreprocessors(self):
         p1 = ModelCatalog.get_preprocessor(
             get_registry(), gym.make("CartPole-v0"))
-        assert type(p1) == NoPreprocessor
+        self.assertEqual(type(p1), NoPreprocessor)
 
         p2 = ModelCatalog.get_preprocessor(
             get_registry(), gym.make("FrozenLake-v0"))
-        assert type(p2) == OneHotPreprocessor
+        self.assertEqual(type(p2), OneHotPreprocessor)
+
+    def testTuplePreprocessor(self):
+        ray.init()
+
+        class TupleEnv(object):
+            def __init__(self):
+                self.observation_space = Tuple(
+                    [Discrete(5), Box(0, 1, shape=(3,))])
+        p1 = ModelCatalog.get_preprocessor(
+            get_registry(), TupleEnv())
+        self.assertEqual(p1.shape, (8,))
+        self.assertEqual(
+            list(p1.transform((0, [1, 2, 3]))),
+            [float(x) for x in [1, 0, 0, 0, 0, 1, 2, 3]])
 
     def testCustomPreprocessor(self):
         ray.init()
@@ -47,12 +62,12 @@ class ModelCatalogTest(unittest.TestCase):
         env = gym.make("CartPole-v0")
         p1 = ModelCatalog.get_preprocessor(
             get_registry(), env, {"custom_preprocessor": "foo"})
-        assert type(p1) == CustomPreprocessor
+        self.assertEqual(str(type(p1)), str(CustomPreprocessor))
         p2 = ModelCatalog.get_preprocessor(
             get_registry(), env, {"custom_preprocessor": "bar"})
-        assert type(p2) == CustomPreprocessor2
+        self.assertEqual(str(type(p2)), str(CustomPreprocessor2))
         p3 = ModelCatalog.get_preprocessor(get_registry(), env)
-        assert type(p3) == NoPreprocessor
+        self.assertEqual(type(p3), NoPreprocessor)
 
     def testDefaultModels(self):
         ray.init()
@@ -60,19 +75,19 @@ class ModelCatalogTest(unittest.TestCase):
         with tf.variable_scope("test1"):
             p1 = ModelCatalog.get_model(
                 get_registry(), np.zeros((10, 3), dtype=np.float32), 5)
-            assert type(p1) == FullyConnectedNetwork
+            self.assertEqual(type(p1), FullyConnectedNetwork)
 
         with tf.variable_scope("test2"):
             p2 = ModelCatalog.get_model(
                 get_registry(), np.zeros((10, 80, 80, 3), dtype=np.float32), 5)
-            assert type(p2) == VisionNetwork
+            self.assertEqual(type(p2), VisionNetwork)
 
     def testCustomModel(self):
         ray.init()
         ModelCatalog.register_custom_model("foo", CustomModel)
         p1 = ModelCatalog.get_model(
             get_registry(), 1, 5, {"custom_model": "foo"})
-        assert type(p1) == CustomModel
+        self.assertEqual(str(type(p1)), str(CustomModel))
 
 
 if __name__ == "__main__":
diff --git a/python/ray/rllib/utils/sampler.py b/python/ray/rllib/utils/sampler.py
index 9ba65eb93..d846ca5ac 100644
--- a/python/ray/rllib/utils/sampler.py
+++ b/python/ray/rllib/utils/sampler.py
@@ -193,8 +193,12 @@ def _env_runner(env, policy, num_local_steps, horizon, obs_filter):
             terminal condition, and other fields as dictated by `policy`.
     """
     last_observation = obs_filter(env.reset())
-    horizon = horizon if horizon else env.spec.tags.get(
-        "wrapper_config.TimeLimit.max_episode_steps")
+    try:
+        horizon = horizon if horizon else env.spec.tags.get(
+            "wrapper_config.TimeLimit.max_episode_steps")
+    except Exception:
+        print("Warning, no horizon specified, assuming infinite")
+        horizon = 999999
     assert horizon > 0
     if hasattr(policy, "get_initial_features"):
         last_features = policy.get_initial_features()
diff --git a/python/ray/tune/trial.py b/python/ray/tune/trial.py
index 7958fda13..67b658a26 100644
--- a/python/ray/tune/trial.py
+++ b/python/ray/tune/trial.py
@@ -306,6 +306,8 @@ class Trial(object):
 
         def logger_creator(config):
             # Set the working dir in the remote process, for user file writes
+            if not os.path.exists(remote_logdir):
+                os.makedirs(remote_logdir)
             os.chdir(remote_logdir)
             return NoopLogger(config, remote_logdir)