[rllib] TensorFlow 2 compatibility (#4802)

2026-07-03 12:19:50 +08:00 · 2019-05-16 22:12:07 -07:00
parent 7d5ef6d99c
commit 3807fb505b
32 changed files with 140 additions and 1235 deletions
@@ -399,8 +399,6 @@ class DDPGPolicyGraph(DDPGPostprocessing, TFPolicyGraph):
        self.set_pure_exploration_phase(state[2])

    def _build_q_network(self, obs, obs_space, action_space, actions):
-        import tensorflow.contrib.layers as layers
-
        if self.config["use_state_preprocessor"]:
            q_model = ModelCatalog.get_model({
                "obs": obs,
@@ -413,16 +411,12 @@ class DDPGPolicyGraph(DDPGPostprocessing, TFPolicyGraph):

        activation = getattr(tf.nn, self.config["critic_hidden_activation"])
        for hidden in self.config["critic_hiddens"]:
-            q_out = layers.fully_connected(
-                q_out, num_outputs=hidden, activation_fn=activation)
-        q_values = layers.fully_connected(
-            q_out, num_outputs=1, activation_fn=None)
+            q_out = tf.layers.dense(q_out, units=hidden, activation=activation)
+        q_values = tf.layers.dense(q_out, units=1, activation=None)

        return q_values, q_model

    def _build_policy_network(self, obs, obs_space, action_space):
-        import tensorflow.contrib.layers as layers
-
        if self.config["use_state_preprocessor"]:
            model = ModelCatalog.get_model({
                "obs": obs,
@@ -434,16 +428,19 @@ class DDPGPolicyGraph(DDPGPostprocessing, TFPolicyGraph):
            action_out = obs

        activation = getattr(tf.nn, self.config["actor_hidden_activation"])
-        normalizer_fn = layers.layer_norm if self.config["parameter_noise"] \
-            else None
        for hidden in self.config["actor_hiddens"]:
-            action_out = layers.fully_connected(
-                action_out,
-                num_outputs=hidden,
-                activation_fn=activation,
-                normalizer_fn=normalizer_fn)
-        action_out = layers.fully_connected(
-            action_out, num_outputs=self.dim_actions, activation_fn=None)
+            if self.config["parameter_noise"]:
+                import tensorflow.contrib.layers as layers
+                action_out = layers.fully_connected(
+                    action_out,
+                    num_outputs=hidden,
+                    activation_fn=activation,
+                    normalizer_fn=layers.layer_norm)
+            else:
+                action_out = tf.layers.dense(
+                    action_out, units=hidden, activation=activation)
+        action_out = tf.layers.dense(
+            action_out, units=self.dim_actions, activation=None)

        # Use sigmoid to scale to [0,1], but also double magnitude of input to
        # emulate behaviour of tanh activation used in DDPG and TD3 papers.
@@ -507,7 +504,7 @@ class DDPGPolicyGraph(DDPGPostprocessing, TFPolicyGraph):

            def make_uniform_random_actions():
                # pure random exploration option
-                uniform_random_actions = tf.random.uniform(
+                uniform_random_actions = tf.random_uniform(
                    tf.shape(deterministic_actions))
                # rescale uniform random actions according to action range
                tf_range = tf.constant(action_range[None], dtype="float32")
@@ -154,8 +154,6 @@ class QNetwork(object):
                 v_max=10.0,
                 sigma0=0.5,
                 parameter_noise=False):
-        import tensorflow.contrib.layers as layers
-
        self.model = model
        with tf.variable_scope("action_value"):
            if hiddens:
@@ -164,13 +162,18 @@ class QNetwork(object):
                    if use_noisy:
                        action_out = self.noisy_layer(
                            "hidden_%d" % i, action_out, hiddens[i], sigma0)
-                    else:
+                    elif parameter_noise:
+                        import tensorflow.contrib.layers as layers
                        action_out = layers.fully_connected(
                            action_out,
                            num_outputs=hiddens[i],
                            activation_fn=tf.nn.relu,
-                            normalizer_fn=layers.layer_norm
-                            if parameter_noise else None)
+                            normalizer_fn=layers.layer_norm)
+                    else:
+                        action_out = tf.layers.dense(
+                            action_out,
+                            units=hiddens[i],
+                            activation=tf.nn.relu)
            else:
                # Avoid postprocessing the outputs. This enables custom models
                # to be used for parametric action DQN.
@@ -183,10 +186,8 @@ class QNetwork(object):
                    sigma0,
                    non_linear=False)
            elif hiddens:
-                action_scores = layers.fully_connected(
-                    action_out,
-                    num_outputs=num_actions * num_atoms,
-                    activation_fn=None)
+                action_scores = tf.layers.dense(
+                    action_out, units=num_actions * num_atoms, activation=None)
            else:
                action_scores = model.outputs
            if num_atoms > 1:
@@ -214,13 +215,15 @@ class QNetwork(object):
                        state_out = self.noisy_layer("dueling_hidden_%d" % i,
                                                     state_out, hiddens[i],
                                                     sigma0)
-                    else:
-                        state_out = layers.fully_connected(
+                    elif parameter_noise:
+                        state_out = tf.contrib.layers.fully_connected(
                            state_out,
                            num_outputs=hiddens[i],
                            activation_fn=tf.nn.relu,
-                            normalizer_fn=layers.layer_norm
-                            if parameter_noise else None)
+                            normalizer_fn=tf.contrib.layers.layer_norm)
+                    else:
+                        state_out = tf.layers.dense(
+                            state_out, units=hiddens[i], activation=tf.nn.relu)
                if use_noisy:
                    state_score = self.noisy_layer(
                        "dueling_output",
@@ -229,8 +232,8 @@ class QNetwork(object):
                        sigma0,
                        non_linear=False)
                else:
-                    state_score = layers.fully_connected(
-                        state_out, num_outputs=num_atoms, activation_fn=None)
+                    state_score = tf.layers.dense(
+                        state_out, units=num_atoms, activation=None)
            if num_atoms > 1:
                support_logits_per_action_mean = tf.reduce_mean(
                    support_logits_per_action, 1)
@@ -38,8 +38,6 @@ from ray.rllib.models.action_dist import Categorical
 from ray.rllib.utils import try_import_tf

 tf = try_import_tf()
-if tf:
-    nest = tf.contrib.framework.nest

 VTraceFromLogitsReturns = collections.namedtuple("VTraceFromLogitsReturns", [
    "vs", "pg_advantages", "log_rhos", "behaviour_action_log_probs",
@@ -278,14 +278,11 @@ class VTracePolicyGraph(LearningRateSchedule, VTracePostprocessing,
                self.KL_stats.update({
                    "mean_KL_{}".format(i): tf.reduce_mean(kl),
                    "max_KL_{}".format(i): tf.reduce_max(kl),
-                    "median_KL_{}".format(i): tf.contrib.distributions.
-                    percentile(kl, 50.0),
                })
        else:
            self.KL_stats = {
                "mean_KL": tf.reduce_mean(kls[0]),
                "max_KL": tf.reduce_max(kls[0]),
-                "median_KL": tf.contrib.distributions.percentile(kls[0], 50.0),
            }

        # Initialize TFPolicyGraph
@@ -26,8 +26,10 @@ from __future__ import print_function

 from absl.testing import parameterized
 import numpy as np
-import tensorflow as tf
 import vtrace
+from ray.rllib.utils import try_import_tf
+
+tf = try_import_tf()


 def _shaped_arange(*shape):
@@ -399,14 +399,11 @@ class AsyncPPOPolicyGraph(LearningRateSchedule, APPOPostprocessing,
                self.KL_stats.update({
                    "mean_KL_{}".format(i): tf.reduce_mean(kl),
                    "max_KL_{}".format(i): tf.reduce_max(kl),
-                    "median_KL_{}".format(i): tf.contrib.distributions.
-                    percentile(kl, 50.0),
                })
        else:
            self.KL_stats = {
                "mean_KL": tf.reduce_mean(kls[0]),
                "max_KL": tf.reduce_max(kls[0]),
-                "median_KL": tf.contrib.distributions.percentile(kls[0], 50.0),
            }

        # Initialize TFPolicyGraph
@@ -4,11 +4,13 @@ from __future__ import print_function

 import unittest
 import numpy as np
-import tensorflow as tf
 from numpy.testing import assert_allclose

 from ray.rllib.models.action_dist import Categorical
 from ray.rllib.agents.ppo.utils import flatten, concatenate
+from ray.rllib.utils import try_import_tf
+
+tf = try_import_tf()


 # TODO(ekl): move to rllib/models dir
@@ -5,13 +5,13 @@ from __future__ import print_function

 import argparse

-import tensorflow as tf
-import tensorflow.contrib.slim as slim
-
 import ray
 from ray import tune
 from ray.rllib.models import Model, ModelCatalog
 from ray.rllib.models.misc import normc_initializer
+from ray.rllib.utils import try_import_tf
+
+tf = try_import_tf()

 parser = argparse.ArgumentParser()
 parser.add_argument("--num-iters", type=int, default=200)
@@ -24,21 +24,21 @@ class BatchNormModel(Model):
        hiddens = [256, 256]
        for i, size in enumerate(hiddens):
            label = "fc{}".format(i)
-            last_layer = slim.fully_connected(
+            last_layer = tf.layers.dense(
                last_layer,
                size,
-                weights_initializer=normc_initializer(1.0),
-                activation_fn=tf.nn.tanh,
-                scope=label)
+                kernel_initializer=normc_initializer(1.0),
+                activation=tf.nn.tanh,
+                name=label)
            # Add a batch norm layer
            last_layer = tf.layers.batch_normalization(
                last_layer, training=input_dict["is_training"])
-        output = slim.fully_connected(
+        output = tf.layers.dense(
            last_layer,
            num_outputs,
-            weights_initializer=normc_initializer(0.01),
-            activation_fn=None,
-            scope="fc_out")
+            kernel_initializer=normc_initializer(0.01),
+            activation=None,
+            name="fc_out")
        return output, last_layer


@@ -1,14 +0,0 @@
-(Experimental) OpenAI gym environment for https://github.com/carla-simulator/carla
-
-To run, first download and unpack the Carla binaries from this URL: https://github.com/carla-simulator/carla/releases/tag/0.7.0
-
-Note that currently you also need to clone the Python code from `carla/benchmark_branch` which includes the Carla planner.
-
-Then, you can try running env.py to drive the car. Run one of the train_* scripts to attempt training.
-
-    $ pkill -9 Carla
-    $ export CARLA_SERVER=/PATH/TO/CARLA_0.7.0/CarlaUE4.sh
-    $ export CARLA_PY_PATH=/PATH/TO/CARLA_BENCHMARK_BRANCH_REPO/PythonClient
-    $ python env.py
-
-Check out the scenarios.py file for different training and test scenarios that can be used.
@@ -1,684 +0,0 @@
-"""OpenAI gym environment for Carla. Run this file for a demo."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from datetime import datetime
-import atexit
-import cv2
-import os
-import json
-import random
-import signal
-import subprocess
-import sys
-import time
-import traceback
-
-import numpy as np
-try:
-    import scipy.misc
-except Exception:
-    pass
-
-import gym
-from gym.spaces import Box, Discrete, Tuple
-
-from scenarios import DEFAULT_SCENARIO
-
-# Set this where you want to save image outputs (or empty string to disable)
-CARLA_OUT_PATH = os.environ.get("CARLA_OUT", os.path.expanduser("~/carla_out"))
-if CARLA_OUT_PATH and not os.path.exists(CARLA_OUT_PATH):
-    os.makedirs(CARLA_OUT_PATH)
-
-# Set this to the path of your Carla binary
-SERVER_BINARY = os.environ.get("CARLA_SERVER",
-                               os.path.expanduser("~/CARLA_0.7.0/CarlaUE4.sh"))
-
-assert os.path.exists(SERVER_BINARY)
-if "CARLA_PY_PATH" in os.environ:
-    sys.path.append(os.path.expanduser(os.environ["CARLA_PY_PATH"]))
-else:
-    # TODO(ekl) switch this to the binary path once the planner is in master
-    sys.path.append(os.path.expanduser("~/carla/PythonClient/"))
-
-try:
-    from carla.client import CarlaClient
-    from carla.sensor import Camera
-    from carla.settings import CarlaSettings
-    from carla.planner.planner import Planner, REACH_GOAL, GO_STRAIGHT, \
-        TURN_RIGHT, TURN_LEFT, LANE_FOLLOW
-except Exception as e:
-    print("Failed to import Carla python libs, try setting $CARLA_PY_PATH")
-    raise e
-
-# Carla planner commands
-COMMANDS_ENUM = {
-    REACH_GOAL: "REACH_GOAL",
-    GO_STRAIGHT: "GO_STRAIGHT",
-    TURN_RIGHT: "TURN_RIGHT",
-    TURN_LEFT: "TURN_LEFT",
-    LANE_FOLLOW: "LANE_FOLLOW",
-}
-
-# Mapping from string repr to one-hot encoding index to feed to the model
-COMMAND_ORDINAL = {
-    "REACH_GOAL": 0,
-    "GO_STRAIGHT": 1,
-    "TURN_RIGHT": 2,
-    "TURN_LEFT": 3,
-    "LANE_FOLLOW": 4,
-}
-
-# Number of retries if the server doesn't respond
-RETRIES_ON_ERROR = 5
-
-# Dummy Z coordinate to use when we only care about (x, y)
-GROUND_Z = 22
-
-# Default environment configuration
-ENV_CONFIG = {
-    "log_images": True,
-    "enable_planner": True,
-    "framestack": 2,  # note: only [1, 2] currently supported
-    "convert_images_to_video": True,
-    "early_terminate_on_collision": True,
-    "verbose": True,
-    "reward_function": "custom",
-    "render_x_res": 800,
-    "render_y_res": 600,
-    "x_res": 80,
-    "y_res": 80,
-    "server_map": "/Game/Maps/Town02",
-    "scenarios": [DEFAULT_SCENARIO],
-    "use_depth_camera": False,
-    "discrete_actions": True,
-    "squash_action_logits": False,
-}
-
-DISCRETE_ACTIONS = {
-    # coast
-    0: [0.0, 0.0],
-    # turn left
-    1: [0.0, -0.5],
-    # turn right
-    2: [0.0, 0.5],
-    # forward
-    3: [1.0, 0.0],
-    # brake
-    4: [-0.5, 0.0],
-    # forward left
-    5: [1.0, -0.5],
-    # forward right
-    6: [1.0, 0.5],
-    # brake left
-    7: [-0.5, -0.5],
-    # brake right
-    8: [-0.5, 0.5],
-}
-
-live_carla_processes = set()
-
-
-def cleanup():
-    print("Killing live carla processes", live_carla_processes)
-    for pgid in live_carla_processes:
-        os.killpg(pgid, signal.SIGKILL)
-
-
-atexit.register(cleanup)
-
-
-class CarlaEnv(gym.Env):
-    def __init__(self, config=ENV_CONFIG):
-        self.config = config
-        self.city = self.config["server_map"].split("/")[-1]
-        if self.config["enable_planner"]:
-            self.planner = Planner(self.city)
-
-        if config["discrete_actions"]:
-            self.action_space = Discrete(len(DISCRETE_ACTIONS))
-        else:
-            self.action_space = Box(-1.0, 1.0, shape=(2, ), dtype=np.float32)
-        if config["use_depth_camera"]:
-            image_space = Box(
-                -1.0,
-                1.0,
-                shape=(config["y_res"], config["x_res"],
-                       1 * config["framestack"]),
-                dtype=np.float32)
-        else:
-            image_space = Box(
-                0,
-                255,
-                shape=(config["y_res"], config["x_res"],
-                       3 * config["framestack"]),
-                dtype=np.uint8)
-        self.observation_space = Tuple(  # forward_speed, dist to goal
-            [
-                image_space,
-                Discrete(len(COMMANDS_ENUM)),  # next_command
-                Box(-128.0, 128.0, shape=(2, ), dtype=np.float32)
-            ])
-
-        # TODO(ekl) this isn't really a proper gym spec
-        self._spec = lambda: None
-        self._spec.id = "Carla-v0"
-
-        self.server_port = None
-        self.server_process = None
-        self.client = None
-        self.num_steps = 0
-        self.total_reward = 0
-        self.prev_measurement = None
-        self.prev_image = None
-        self.episode_id = None
-        self.measurements_file = None
-        self.weather = None
-        self.scenario = None
-        self.start_pos = None
-        self.end_pos = None
-        self.start_coord = None
-        self.end_coord = None
-        self.last_obs = None
-
-    def init_server(self):
-        print("Initializing new Carla server...")
-        # Create a new server process and start the client.
-        self.server_port = random.randint(10000, 60000)
-        self.server_process = subprocess.Popen(
-            [
-                SERVER_BINARY, self.config["server_map"], "-windowed",
-                "-ResX=400", "-ResY=300", "-carla-server",
-                "-carla-world-port={}".format(self.server_port)
-            ],
-            preexec_fn=os.setsid,
-            stdout=open(os.devnull, "w"))
-        live_carla_processes.add(os.getpgid(self.server_process.pid))
-
-        for i in range(RETRIES_ON_ERROR):
-            try:
-                self.client = CarlaClient("localhost", self.server_port)
-                return self.client.connect()
-            except Exception as e:
-                print("Error connecting: {}, attempt {}".format(e, i))
-                time.sleep(2)
-
-    def clear_server_state(self):
-        print("Clearing Carla server state")
-        try:
-            if self.client:
-                self.client.disconnect()
-                self.client = None
-        except Exception as e:
-            print("Error disconnecting client: {}".format(e))
-            pass
-        if self.server_process:
-            pgid = os.getpgid(self.server_process.pid)
-            os.killpg(pgid, signal.SIGKILL)
-            live_carla_processes.remove(pgid)
-            self.server_port = None
-            self.server_process = None
-
-    def __del__(self):
-        self.clear_server_state()
-
-    def reset(self):
-        error = None
-        for _ in range(RETRIES_ON_ERROR):
-            try:
-                if not self.server_process:
-                    self.init_server()
-                return self._reset()
-            except Exception as e:
-                print("Error during reset: {}".format(traceback.format_exc()))
-                self.clear_server_state()
-                error = e
-        raise error
-
-    def _reset(self):
-        self.num_steps = 0
-        self.total_reward = 0
-        self.prev_measurement = None
-        self.prev_image = None
-        self.episode_id = datetime.today().strftime("%Y-%m-%d_%H-%M-%S_%f")
-        self.measurements_file = None
-
-        # Create a CarlaSettings object. This object is a wrapper around
-        # the CarlaSettings.ini file. Here we set the configuration we
-        # want for the new episode.
-        settings = CarlaSettings()
-        self.scenario = random.choice(self.config["scenarios"])
-        assert self.scenario["city"] == self.city, (self.scenario, self.city)
-        self.weather = random.choice(self.scenario["weather_distribution"])
-        settings.set(
-            SynchronousMode=True,
-            SendNonPlayerAgentsInfo=True,
-            NumberOfVehicles=self.scenario["num_vehicles"],
-            NumberOfPedestrians=self.scenario["num_pedestrians"],
-            WeatherId=self.weather)
-        settings.randomize_seeds()
-
-        if self.config["use_depth_camera"]:
-            camera1 = Camera("CameraDepth", PostProcessing="Depth")
-            camera1.set_image_size(self.config["render_x_res"],
-                                   self.config["render_y_res"])
-            camera1.set_position(30, 0, 130)
-            settings.add_sensor(camera1)
-
-        camera2 = Camera("CameraRGB")
-        camera2.set_image_size(self.config["render_x_res"],
-                               self.config["render_y_res"])
-        camera2.set_position(30, 0, 130)
-        settings.add_sensor(camera2)
-
-        # Setup start and end positions
-        scene = self.client.load_settings(settings)
-        positions = scene.player_start_spots
-        self.start_pos = positions[self.scenario["start_pos_id"]]
-        self.end_pos = positions[self.scenario["end_pos_id"]]
-        self.start_coord = [
-            self.start_pos.location.x // 100, self.start_pos.location.y // 100
-        ]
-        self.end_coord = [
-            self.end_pos.location.x // 100, self.end_pos.location.y // 100
-        ]
-        print("Start pos {} ({}), end {} ({})".format(
-            self.scenario["start_pos_id"], self.start_coord,
-            self.scenario["end_pos_id"], self.end_coord))
-
-        # Notify the server that we want to start the episode at the
-        # player_start index. This function blocks until the server is ready
-        # to start the episode.
-        print("Starting new episode...")
-        self.client.start_episode(self.scenario["start_pos_id"])
-
-        image, py_measurements = self._read_observation()
-        self.prev_measurement = py_measurements
-        return self.encode_obs(self.preprocess_image(image), py_measurements)
-
-    def encode_obs(self, image, py_measurements):
-        assert self.config["framestack"] in [1, 2]
-        prev_image = self.prev_image
-        self.prev_image = image
-        if prev_image is None:
-            prev_image = image
-        if self.config["framestack"] == 2:
-            image = np.concatenate([prev_image, image], axis=2)
-        obs = (image, COMMAND_ORDINAL[py_measurements["next_command"]], [
-            py_measurements["forward_speed"],
-            py_measurements["distance_to_goal"]
-        ])
-        self.last_obs = obs
-        return obs
-
-    def step(self, action):
-        try:
-            obs = self._step(action)
-            return obs
-        except Exception:
-            print("Error during step, terminating episode early",
-                  traceback.format_exc())
-            self.clear_server_state()
-            return (self.last_obs, 0.0, True, {})
-
-    def _step(self, action):
-        if self.config["discrete_actions"]:
-            action = DISCRETE_ACTIONS[int(action)]
-        assert len(action) == 2, "Invalid action {}".format(action)
-        if self.config["squash_action_logits"]:
-            forward = 2 * float(sigmoid(action[0]) - 0.5)
-            throttle = float(np.clip(forward, 0, 1))
-            brake = float(np.abs(np.clip(forward, -1, 0)))
-            steer = 2 * float(sigmoid(action[1]) - 0.5)
-        else:
-            throttle = float(np.clip(action[0], 0, 1))
-            brake = float(np.abs(np.clip(action[0], -1, 0)))
-            steer = float(np.clip(action[1], -1, 1))
-        reverse = False
-        hand_brake = False
-
-        if self.config["verbose"]:
-            print("steer", steer, "throttle", throttle, "brake", brake,
-                  "reverse", reverse)
-
-        self.client.send_control(
-            steer=steer,
-            throttle=throttle,
-            brake=brake,
-            hand_brake=hand_brake,
-            reverse=reverse)
-
-        # Process observations
-        image, py_measurements = self._read_observation()
-        if self.config["verbose"]:
-            print("Next command", py_measurements["next_command"])
-        if type(action) is np.ndarray:
-            py_measurements["action"] = [float(a) for a in action]
-        else:
-            py_measurements["action"] = action
-        py_measurements["control"] = {
-            "steer": steer,
-            "throttle": throttle,
-            "brake": brake,
-            "reverse": reverse,
-            "hand_brake": hand_brake,
-        }
-        reward = compute_reward(self, self.prev_measurement, py_measurements)
-        self.total_reward += reward
-        py_measurements["reward"] = reward
-        py_measurements["total_reward"] = self.total_reward
-        done = (self.num_steps > self.scenario["max_steps"]
-                or py_measurements["next_command"] == "REACH_GOAL"
-                or (self.config["early_terminate_on_collision"]
-                    and collided_done(py_measurements)))
-        py_measurements["done"] = done
-        self.prev_measurement = py_measurements
-
-        # Write out measurements to file
-        if CARLA_OUT_PATH:
-            if not self.measurements_file:
-                self.measurements_file = open(
-                    os.path.join(
-                        CARLA_OUT_PATH,
-                        "measurements_{}.json".format(self.episode_id)), "w")
-            self.measurements_file.write(json.dumps(py_measurements))
-            self.measurements_file.write("\n")
-            if done:
-                self.measurements_file.close()
-                self.measurements_file = None
-                if self.config["convert_images_to_video"]:
-                    self.images_to_video()
-
-        self.num_steps += 1
-        image = self.preprocess_image(image)
-        return (self.encode_obs(image, py_measurements), reward, done,
-                py_measurements)
-
-    def images_to_video(self):
-        videos_dir = os.path.join(CARLA_OUT_PATH, "Videos")
-        if not os.path.exists(videos_dir):
-            os.makedirs(videos_dir)
-        ffmpeg_cmd = (
-            "ffmpeg -loglevel -8 -r 60 -f image2 -s {x_res}x{y_res} "
-            "-start_number 0 -i "
-            "{img}_%04d.jpg -vcodec libx264 {vid}.mp4 && rm -f {img}_*.jpg "
-        ).format(
-            x_res=self.config["render_x_res"],
-            y_res=self.config["render_y_res"],
-            vid=os.path.join(videos_dir, self.episode_id),
-            img=os.path.join(CARLA_OUT_PATH, "CameraRGB", self.episode_id))
-        print("Executing ffmpeg command", ffmpeg_cmd)
-        subprocess.call(ffmpeg_cmd, shell=True)
-
-    def preprocess_image(self, image):
-        if self.config["use_depth_camera"]:
-            assert self.config["use_depth_camera"]
-            data = (image.data - 0.5) * 2
-            data = data.reshape(self.config["render_y_res"],
-                                self.config["render_x_res"], 1)
-            data = cv2.resize(
-                data, (self.config["x_res"], self.config["y_res"]),
-                interpolation=cv2.INTER_AREA)
-            data = np.expand_dims(data, 2)
-        else:
-            data = image.data.reshape(self.config["render_y_res"],
-                                      self.config["render_x_res"], 3)
-            data = cv2.resize(
-                data, (self.config["x_res"], self.config["y_res"]),
-                interpolation=cv2.INTER_AREA)
-            data = (data.astype(np.float32) - 128) / 128
-        return data
-
-    def _read_observation(self):
-        # Read the data produced by the server this frame.
-        measurements, sensor_data = self.client.read_data()
-
-        # Print some of the measurements.
-        if self.config["verbose"]:
-            print_measurements(measurements)
-
-        observation = None
-        if self.config["use_depth_camera"]:
-            camera_name = "CameraDepth"
-        else:
-            camera_name = "CameraRGB"
-        for name, image in sensor_data.items():
-            if name == camera_name:
-                observation = image
-
-        cur = measurements.player_measurements
-
-        if self.config["enable_planner"]:
-            next_command = COMMANDS_ENUM[self.planner.get_next_command(
-                [cur.transform.location.x, cur.transform.location.y, GROUND_Z],
-                [
-                    cur.transform.orientation.x, cur.transform.orientation.y,
-                    GROUND_Z
-                ],
-                [self.end_pos.location.x, self.end_pos.location.y, GROUND_Z], [
-                    self.end_pos.orientation.x, self.end_pos.orientation.y,
-                    GROUND_Z
-                ])]
-        else:
-            next_command = "LANE_FOLLOW"
-
-        if next_command == "REACH_GOAL":
-            distance_to_goal = 0.0  # avoids crash in planner
-        elif self.config["enable_planner"]:
-            distance_to_goal = self.planner.get_shortest_path_distance([
-                cur.transform.location.x, cur.transform.location.y, GROUND_Z
-            ], [
-                cur.transform.orientation.x, cur.transform.orientation.y,
-                GROUND_Z
-            ], [self.end_pos.location.x, self.end_pos.location.y, GROUND_Z], [
-                self.end_pos.orientation.x, self.end_pos.orientation.y,
-                GROUND_Z
-            ]) / 100
-        else:
-            distance_to_goal = -1
-
-        distance_to_goal_euclidean = float(
-            np.linalg.norm([
-                cur.transform.location.x - self.end_pos.location.x,
-                cur.transform.location.y - self.end_pos.location.y
-            ]) / 100)
-
-        py_measurements = {
-            "episode_id": self.episode_id,
-            "step": self.num_steps,
-            "x": cur.transform.location.x,
-            "y": cur.transform.location.y,
-            "x_orient": cur.transform.orientation.x,
-            "y_orient": cur.transform.orientation.y,
-            "forward_speed": cur.forward_speed,
-            "distance_to_goal": distance_to_goal,
-            "distance_to_goal_euclidean": distance_to_goal_euclidean,
-            "collision_vehicles": cur.collision_vehicles,
-            "collision_pedestrians": cur.collision_pedestrians,
-            "collision_other": cur.collision_other,
-            "intersection_offroad": cur.intersection_offroad,
-            "intersection_otherlane": cur.intersection_otherlane,
-            "weather": self.weather,
-            "map": self.config["server_map"],
-            "start_coord": self.start_coord,
-            "end_coord": self.end_coord,
-            "current_scenario": self.scenario,
-            "x_res": self.config["x_res"],
-            "y_res": self.config["y_res"],
-            "num_vehicles": self.scenario["num_vehicles"],
-            "num_pedestrians": self.scenario["num_pedestrians"],
-            "max_steps": self.scenario["max_steps"],
-            "next_command": next_command,
-        }
-
-        if CARLA_OUT_PATH and self.config["log_images"]:
-            for name, image in sensor_data.items():
-                out_dir = os.path.join(CARLA_OUT_PATH, name)
-                if not os.path.exists(out_dir):
-                    os.makedirs(out_dir)
-                out_file = os.path.join(
-                    out_dir, "{}_{:>04}.jpg".format(self.episode_id,
-                                                    self.num_steps))
-                scipy.misc.imsave(out_file, image.data)
-
-        assert observation is not None, sensor_data
-        return observation, py_measurements
-
-
-def compute_reward_corl2017(env, prev, current):
-    reward = 0.0
-
-    cur_dist = current["distance_to_goal"]
-
-    prev_dist = prev["distance_to_goal"]
-
-    if env.config["verbose"]:
-        print("Cur dist {}, prev dist {}".format(cur_dist, prev_dist))
-
-    # Distance travelled toward the goal in m
-    reward += np.clip(prev_dist - cur_dist, -10.0, 10.0)
-
-    # Change in speed (km/h)
-    reward += 0.05 * (current["forward_speed"] - prev["forward_speed"])
-
-    # New collision damage
-    reward -= .00002 * (
-        current["collision_vehicles"] + current["collision_pedestrians"] +
-        current["collision_other"] - prev["collision_vehicles"] -
-        prev["collision_pedestrians"] - prev["collision_other"])
-
-    # New sidewalk intersection
-    reward -= 2 * (
-        current["intersection_offroad"] - prev["intersection_offroad"])
-
-    # New opposite lane intersection
-    reward -= 2 * (
-        current["intersection_otherlane"] - prev["intersection_otherlane"])
-
-    return reward
-
-
-def compute_reward_custom(env, prev, current):
-    reward = 0.0
-
-    cur_dist = current["distance_to_goal"]
-    prev_dist = prev["distance_to_goal"]
-
-    if env.config["verbose"]:
-        print("Cur dist {}, prev dist {}".format(cur_dist, prev_dist))
-
-    # Distance travelled toward the goal in m
-    reward += np.clip(prev_dist - cur_dist, -10.0, 10.0)
-
-    # Speed reward, up 30.0 (km/h)
-    reward += np.clip(current["forward_speed"], 0.0, 30.0) / 10
-
-    # New collision damage
-    new_damage = (
-        current["collision_vehicles"] + current["collision_pedestrians"] +
-        current["collision_other"] - prev["collision_vehicles"] -
-        prev["collision_pedestrians"] - prev["collision_other"])
-    if new_damage:
-        reward -= 100.0
-
-    # Sidewalk intersection
-    reward -= current["intersection_offroad"]
-
-    # Opposite lane intersection
-    reward -= current["intersection_otherlane"]
-
-    # Reached goal
-    if current["next_command"] == "REACH_GOAL":
-        reward += 100.0
-
-    return reward
-
-
-def compute_reward_lane_keep(env, prev, current):
-    reward = 0.0
-
-    # Speed reward, up 30.0 (km/h)
-    reward += np.clip(current["forward_speed"], 0.0, 30.0) / 10
-
-    # New collision damage
-    new_damage = (
-        current["collision_vehicles"] + current["collision_pedestrians"] +
-        current["collision_other"] - prev["collision_vehicles"] -
-        prev["collision_pedestrians"] - prev["collision_other"])
-    if new_damage:
-        reward -= 100.0
-
-    # Sidewalk intersection
-    reward -= current["intersection_offroad"]
-
-    # Opposite lane intersection
-    reward -= current["intersection_otherlane"]
-
-    return reward
-
-
-REWARD_FUNCTIONS = {
-    "corl2017": compute_reward_corl2017,
-    "custom": compute_reward_custom,
-    "lane_keep": compute_reward_lane_keep,
-}
-
-
-def compute_reward(env, prev, current):
-    return REWARD_FUNCTIONS[env.config["reward_function"]](env, prev, current)
-
-
-def print_measurements(measurements):
-    number_of_agents = len(measurements.non_player_agents)
-    player_measurements = measurements.player_measurements
-    message = "Vehicle at ({pos_x:.1f}, {pos_y:.1f}), "
-    message += "{speed:.2f} km/h, "
-    message += "Collision: {{vehicles={col_cars:.0f}, "
-    message += "pedestrians={col_ped:.0f}, other={col_other:.0f}}}, "
-    message += "{other_lane:.0f}% other lane, {offroad:.0f}% off-road, "
-    message += "({agents_num:d} non-player agents in the scene)"
-    message = message.format(
-        pos_x=player_measurements.transform.location.x / 100,  # cm -> m
-        pos_y=player_measurements.transform.location.y / 100,
-        speed=player_measurements.forward_speed,
-        col_cars=player_measurements.collision_vehicles,
-        col_ped=player_measurements.collision_pedestrians,
-        col_other=player_measurements.collision_other,
-        other_lane=100 * player_measurements.intersection_otherlane,
-        offroad=100 * player_measurements.intersection_offroad,
-        agents_num=number_of_agents)
-    print(message)
-
-
-def sigmoid(x):
-    x = float(x)
-    return np.exp(x) / (1 + np.exp(x))
-
-
-def collided_done(py_measurements):
-    m = py_measurements
-    collided = (m["collision_vehicles"] > 0 or m["collision_pedestrians"] > 0
-                or m["collision_other"] > 0)
-    return bool(collided or m["total_reward"] < -100)
-
-
-if __name__ == "__main__":
-    for _ in range(2):
-        env = CarlaEnv()
-        obs = env.reset()
-        print("reset", obs)
-        start = time.time()
-        done = False
-        i = 0
-        total_reward = 0.0
-        while not done:
-            i += 1
-            if ENV_CONFIG["discrete_actions"]:
-                obs, reward, done, info = env.step(1)
-            else:
-                obs, reward, done, info = env.step([0, 1, 0])
-            total_reward += reward
-            print(i, "rew", reward, "total", total_reward, "done", done)
-        print("{} fps".format(100 / (time.time() - start)))
@@ -1,108 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import tensorflow as tf
-import tensorflow.contrib.slim as slim
-from tensorflow.contrib.layers import xavier_initializer
-
-from ray.rllib.models.catalog import ModelCatalog
-from ray.rllib.models.misc import normc_initializer
-from ray.rllib.models.model import Model
-
-
-class CarlaModel(Model):
-    """Carla model that can process the observation tuple.
-
-    The architecture processes the image using convolutional layers, the
-    metrics using fully connected layers, and then combines them with
-    further fully connected layers.
-    """
-
-    # TODO(ekl): use build_layers_v2 for native dict space support
-    def _build_layers(self, inputs, num_outputs, options):
-        # Parse options
-        image_shape = options["custom_options"]["image_shape"]
-        convs = options.get("conv_filters", [
-            [16, [8, 8], 4],
-            [32, [5, 5], 3],
-            [32, [5, 5], 2],
-            [512, [10, 10], 1],
-        ])
-        hiddens = options.get("fcnet_hiddens", [64])
-        fcnet_activation = options.get("fcnet_activation", "tanh")
-        if fcnet_activation == "tanh":
-            activation = tf.nn.tanh
-        elif fcnet_activation == "relu":
-            activation = tf.nn.relu
-
-        # Sanity checks
-        image_size = np.product(image_shape)
-        expected_shape = [image_size + 5 + 2]
-        assert inputs.shape.as_list()[1:] == expected_shape, \
-            (inputs.shape.as_list()[1:], expected_shape)
-
-        # Reshape the input vector back into its components
-        vision_in = tf.reshape(inputs[:, :image_size],
-                               [tf.shape(inputs)[0]] + image_shape)
-        metrics_in = inputs[:, image_size:]
-        print("Vision in shape", vision_in)
-        print("Metrics in shape", metrics_in)
-
-        # Setup vision layers
-        with tf.name_scope("carla_vision"):
-            for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1):
-                vision_in = slim.conv2d(
-                    vision_in,
-                    out_size,
-                    kernel,
-                    stride,
-                    scope="conv{}".format(i))
-            out_size, kernel, stride = convs[-1]
-            vision_in = slim.conv2d(
-                vision_in,
-                out_size,
-                kernel,
-                stride,
-                padding="VALID",
-                scope="conv_out")
-            vision_in = tf.squeeze(vision_in, [1, 2])
-
-        # Setup metrics layer
-        with tf.name_scope("carla_metrics"):
-            metrics_in = slim.fully_connected(
-                metrics_in,
-                64,
-                weights_initializer=xavier_initializer(),
-                activation_fn=activation,
-                scope="metrics_out")
-
-        print("Shape of vision out is", vision_in.shape)
-        print("Shape of metric out is", metrics_in.shape)
-
-        # Combine the metrics and vision inputs
-        with tf.name_scope("carla_out"):
-            i = 1
-            last_layer = tf.concat([vision_in, metrics_in], axis=1)
-            print("Shape of concatenated out is", last_layer.shape)
-            for size in hiddens:
-                last_layer = slim.fully_connected(
-                    last_layer,
-                    size,
-                    weights_initializer=xavier_initializer(),
-                    activation_fn=activation,
-                    scope="fc{}".format(i))
-                i += 1
-            output = slim.fully_connected(
-                last_layer,
-                num_outputs,
-                weights_initializer=normc_initializer(0.01),
-                activation_fn=None,
-                scope="fc_out")
-
-        return output, last_layer
-
-
-def register_carla_model():
-    ModelCatalog.register_custom_model("carla", CarlaModel)
@@ -1,131 +0,0 @@
-"""Collection of Carla scenarios, including those from the CoRL 2017 paper."""
-
-TEST_WEATHERS = [0, 2, 5, 7, 9, 10, 11, 12, 13]
-TRAIN_WEATHERS = [1, 3, 4, 6, 8, 14]
-
-
-def build_scenario(city, start, end, vehicles, pedestrians, max_steps,
-                   weathers):
-    return {
-        "city": city,
-        "num_vehicles": vehicles,
-        "num_pedestrians": pedestrians,
-        "weather_distribution": weathers,
-        "start_pos_id": start,
-        "end_pos_id": end,
-        "max_steps": max_steps,
-    }
-
-
-# Simple scenario for Town02 that involves driving down a road
-DEFAULT_SCENARIO = build_scenario(
-    city="Town02",
-    start=36,
-    end=40,
-    vehicles=20,
-    pedestrians=40,
-    max_steps=200,
-    weathers=[0])
-
-# Simple scenario for Town02 that involves driving down a road
-LANE_KEEP = build_scenario(
-    city="Town02",
-    start=36,
-    end=40,
-    vehicles=0,
-    pedestrians=0,
-    max_steps=2000,
-    weathers=[0])
-
-# Scenarios from the CoRL2017 paper
-POSES_TOWN1_STRAIGHT = [[36, 40], [39, 35], [110, 114], [7, 3], [0, 4], [
-    68, 50
-], [61, 59], [47, 64], [147, 90], [33, 87], [26, 19], [80, 76], [45, 49], [
-    55, 44
-], [29, 107], [95, 104], [84, 34], [53, 67], [22, 17], [91, 148], [20, 107],
-                        [78, 70], [95, 102], [68, 44], [45, 69]]
-
-POSES_TOWN1_ONE_CURVE = [[138, 17], [47, 16], [26, 9], [42, 49], [140, 124], [
-    85, 98
-], [65, 133], [137, 51], [76, 66], [46, 39], [40, 60], [0, 29], [4, 129], [
-    121, 140
-], [2, 129], [78, 44], [68, 85], [41, 102], [95, 70], [68, 129], [84, 69],
-                         [47, 79], [110, 15], [130, 17], [0, 17]]
-
-POSES_TOWN1_NAV = [[105, 29], [27, 130], [102, 87], [132, 27], [24, 44], [
-    96, 26
-], [34, 67], [28, 1], [140, 134], [105, 9], [148, 129], [65, 18], [21, 16], [
-    147, 97
-], [42, 51], [30, 41], [18, 107], [69, 45], [102, 95], [18, 145], [111, 64],
-                   [79, 45], [84, 69], [73, 31], [37, 81]]
-
-POSES_TOWN2_STRAIGHT = [[38, 34], [4, 2], [12, 10], [62, 55], [43, 47], [
-    64, 66
-], [78, 76], [59, 57], [61, 18], [35, 39], [12, 8], [0, 18], [75, 68], [
-    54, 60
-], [45, 49], [46, 42], [53, 46], [80, 29], [65, 63], [0, 81], [54, 63],
-                        [51, 42], [16, 19], [17, 26], [77, 68]]
-
-POSES_TOWN2_ONE_CURVE = [[37, 76], [8, 24], [60, 69], [38, 10], [21, 1], [
-    58, 71
-], [74, 32], [44, 0], [71, 16], [14, 24], [34, 11], [43, 14], [75, 16], [
-    80, 21
-], [3, 23], [75, 59], [50, 47], [11, 19], [77, 34], [79, 25], [40, 63],
-                         [58, 76], [79, 55], [16, 61], [27, 11]]
-
-POSES_TOWN2_NAV = [[19, 66], [79, 14], [19, 57], [23, 1], [53, 76], [42, 13], [
-    31, 71
-], [33, 5], [54, 30], [10, 61], [66, 3], [27, 12], [79, 19], [2, 29], [16, 14],
-                   [5, 57], [70, 73], [46, 67], [57, 50], [61, 49], [21, 12],
-                   [51, 81], [77, 68], [56, 65], [43, 54]]
-
-TOWN1_STRAIGHT = [
-    build_scenario("Town01", start, end, 0, 0, 300, TEST_WEATHERS)
-    for (start, end) in POSES_TOWN1_STRAIGHT
-]
-
-TOWN1_ONE_CURVE = [
-    build_scenario("Town01", start, end, 0, 0, 600, TEST_WEATHERS)
-    for (start, end) in POSES_TOWN1_ONE_CURVE
-]
-
-TOWN1_NAVIGATION = [
-    build_scenario("Town01", start, end, 0, 0, 900, TEST_WEATHERS)
-    for (start, end) in POSES_TOWN1_NAV
-]
-
-TOWN1_NAVIGATION_DYNAMIC = [
-    build_scenario("Town01", start, end, 20, 50, 900, TEST_WEATHERS)
-    for (start, end) in POSES_TOWN1_NAV
-]
-
-TOWN2_STRAIGHT = [
-    build_scenario("Town02", start, end, 0, 0, 300, TRAIN_WEATHERS)
-    for (start, end) in POSES_TOWN2_STRAIGHT
-]
-
-TOWN2_STRAIGHT_DYNAMIC = [
-    build_scenario("Town02", start, end, 20, 50, 300, TRAIN_WEATHERS)
-    for (start, end) in POSES_TOWN2_STRAIGHT
-]
-
-TOWN2_ONE_CURVE = [
-    build_scenario("Town02", start, end, 0, 0, 600, TRAIN_WEATHERS)
-    for (start, end) in POSES_TOWN2_ONE_CURVE
-]
-
-TOWN2_NAVIGATION = [
-    build_scenario("Town02", start, end, 0, 0, 900, TRAIN_WEATHERS)
-    for (start, end) in POSES_TOWN2_NAV
-]
-
-TOWN2_NAVIGATION_DYNAMIC = [
-    build_scenario("Town02", start, end, 20, 50, 900, TRAIN_WEATHERS)
-    for (start, end) in POSES_TOWN2_NAV
-]
-
-TOWN1_ALL = (TOWN1_STRAIGHT + TOWN1_ONE_CURVE + TOWN1_NAVIGATION +
-             TOWN1_NAVIGATION_DYNAMIC)
-
-TOWN2_ALL = (TOWN2_STRAIGHT + TOWN2_ONE_CURVE + TOWN2_NAVIGATION +
-             TOWN2_NAVIGATION_DYNAMIC)
@@ -1,51 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import ray
-from ray.tune import grid_search, run_experiments
-
-from env import CarlaEnv, ENV_CONFIG
-from models import register_carla_model
-from scenarios import TOWN2_STRAIGHT
-
-env_config = ENV_CONFIG.copy()
-env_config.update({
-    "verbose": False,
-    "x_res": 80,
-    "y_res": 80,
-    "squash_action_logits": grid_search([False, True]),
-    "use_depth_camera": False,
-    "discrete_actions": False,
-    "server_map": "/Game/Maps/Town02",
-    "reward_function": grid_search(["custom", "corl2017"]),
-    "scenarios": TOWN2_STRAIGHT,
-})
-
-register_carla_model()
-redis_address = ray.services.get_node_ip_address() + ":6379"
-
-ray.init(redis_address=redis_address)
-run_experiments({
-    "carla-a3c": {
-        "run": "A3C",
-        "env": CarlaEnv,
-        "config": {
-            "env_config": env_config,
-            "use_gpu_for_workers": True,
-            "model": {
-                "custom_model": "carla",
-                "custom_options": {
-                    "image_shape": [80, 80, 6],
-                },
-                "conv_filters": [
-                    [16, [8, 8], 4],
-                    [32, [4, 4], 2],
-                    [512, [10, 10], 1],
-                ],
-            },
-            "gamma": 0.95,
-            "num_workers": 2,
-        },
-    },
-})
@@ -1,65 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import ray
-from ray.tune import run_experiments
-
-from env import CarlaEnv, ENV_CONFIG
-from models import register_carla_model
-from scenarios import TOWN2_ONE_CURVE
-
-env_config = ENV_CONFIG.copy()
-env_config.update({
-    "verbose": False,
-    "x_res": 80,
-    "y_res": 80,
-    "discrete_actions": True,
-    "server_map": "/Game/Maps/Town02",
-    "reward_function": "custom",
-    "scenarios": TOWN2_ONE_CURVE,
-})
-
-register_carla_model()
-
-ray.init()
-
-
-def shape_out(spec):
-    return (spec.config.env_config.framestack *
-            (spec.config.env_config.use_depth_camera and 1 or 3))
-
-
-run_experiments({
-    "carla-dqn": {
-        "run": "DQN",
-        "env": CarlaEnv,
-        "config": {
-            "env_config": env_config,
-            "model": {
-                "custom_model": "carla",
-                "custom_options": {
-                    "image_shape": [
-                        80,
-                        80,
-                        shape_out,
-                    ],
-                },
-                "conv_filters": [
-                    [16, [8, 8], 4],
-                    [32, [4, 4], 2],
-                    [512, [10, 10], 1],
-                ],
-            },
-            "timesteps_per_iteration": 100,
-            "learning_starts": 1000,
-            "schedule_max_timesteps": 100000,
-            "gamma": 0.8,
-            "tf_session_args": {
-                "gpu_options": {
-                    "allow_growth": True
-                },
-            },
-        },
-    },
-})
@@ -1,55 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import ray
-from ray.tune import run_experiments
-
-from env import CarlaEnv, ENV_CONFIG
-from models import register_carla_model
-from scenarios import TOWN2_STRAIGHT
-
-env_config = ENV_CONFIG.copy()
-env_config.update({
-    "verbose": False,
-    "x_res": 80,
-    "y_res": 80,
-    "use_depth_camera": False,
-    "discrete_actions": False,
-    "server_map": "/Game/Maps/Town02",
-    "scenarios": TOWN2_STRAIGHT,
-})
-register_carla_model()
-
-ray.init()
-run_experiments({
-    "carla": {
-        "run": "PPO",
-        "env": CarlaEnv,
-        "config": {
-            "env_config": env_config,
-            "model": {
-                "custom_model": "carla",
-                "custom_options": {
-                    "image_shape": [
-                        env_config["x_res"], env_config["y_res"], 6
-                    ],
-                },
-                "conv_filters": [
-                    [16, [8, 8], 4],
-                    [32, [4, 4], 2],
-                    [512, [10, 10], 1],
-                ],
-            },
-            "num_workers": 1,
-            "train_batch_size": 2000,
-            "sample_batch_size": 100,
-            "lambda": 0.95,
-            "clip_param": 0.2,
-            "num_sgd_iter": 20,
-            "lr": 0.0001,
-            "sgd_minibatch_size": 32,
-            "num_gpus": 1,
-        },
-    },
-})
@@ -11,11 +11,13 @@ from __future__ import print_function
 from gym.spaces import Discrete, Box
 import gym
 import numpy as np
-import tensorflow as tf

 import ray
 from ray.rllib.models import Model, ModelCatalog
 from ray.tune import run_experiments, sample_from
+from ray.rllib.utils import try_import_tf
+
+tf = try_import_tf()


 class FastModel(Model):
@@ -15,7 +15,6 @@ $ python custom_loss.py --input-files=/tmp/cartpole

 import argparse
 import os
-import tensorflow as tf

 import ray
 from ray import tune
@@ -23,6 +22,9 @@ from ray.rllib.models import (Categorical, FullyConnectedNetwork, Model,
                              ModelCatalog)
 from ray.rllib.models.model import restore_original_dimensions
 from ray.rllib.offline import JsonReader
+from ray.rllib.utils import try_import_tf
+
+tf = try_import_tf()

 parser = argparse.ArgumentParser()
 parser.add_argument("--iters", type=int, default=200)
@@ -6,9 +6,11 @@ from __future__ import print_function

 import os
 import ray
-import tensorflow as tf

 from ray.rllib.agents.registry import get_agent_class
+from ray.rllib.utils import try_import_tf
+
+tf = try_import_tf()

 ray.init(num_cpus=10)

@@ -16,14 +16,14 @@ import argparse
 import gym
 import random

-import tensorflow as tf
-import tensorflow.contrib.slim as slim
-
 import ray
 from ray import tune
 from ray.rllib.models import Model, ModelCatalog
 from ray.rllib.tests.test_multi_agent_env import MultiCartpole
 from ray.tune.registry import register_env
+from ray.rllib.utils import try_import_tf
+
+tf = try_import_tf()

 parser = argparse.ArgumentParser()

@@ -43,12 +43,12 @@ class CustomModel1(Model):
                tf.VariableScope(tf.AUTO_REUSE, "shared"),
                reuse=tf.AUTO_REUSE,
                auxiliary_name_scope=False):
-            last_layer = slim.fully_connected(
-                input_dict["obs"], 64, activation_fn=tf.nn.relu, scope="fc1")
-        last_layer = slim.fully_connected(
-            last_layer, 64, activation_fn=tf.nn.relu, scope="fc2")
-        output = slim.fully_connected(
-            last_layer, num_outputs, activation_fn=None, scope="fc_out")
+            last_layer = tf.layers.dense(
+                input_dict["obs"], 64, activation=tf.nn.relu, name="fc1")
+        last_layer = tf.layers.dense(
+            last_layer, 64, activation=tf.nn.relu, name="fc2")
+        output = tf.layers.dense(
+            last_layer, num_outputs, activation=None, name="fc_out")
        return output, last_layer


@@ -59,12 +59,12 @@ class CustomModel2(Model):
                tf.VariableScope(tf.AUTO_REUSE, "shared"),
                reuse=tf.AUTO_REUSE,
                auxiliary_name_scope=False):
-            last_layer = slim.fully_connected(
-                input_dict["obs"], 64, activation_fn=tf.nn.relu, scope="fc1")
-        last_layer = slim.fully_connected(
-            last_layer, 64, activation_fn=tf.nn.relu, scope="fc2")
-        output = slim.fully_connected(
-            last_layer, num_outputs, activation_fn=None, scope="fc_out")
+            last_layer = tf.layers.dense(
+                input_dict["obs"], 64, activation=tf.nn.relu, name="fc1")
+        last_layer = tf.layers.dense(
+            last_layer, 64, activation=tf.nn.relu, name="fc2")
+        output = tf.layers.dense(
+            last_layer, num_outputs, activation=None, name="fc_out")
        return output, last_layer


@@ -23,14 +23,15 @@ import random
 import numpy as np
 import gym
 from gym.spaces import Box, Discrete, Dict
-import tensorflow as tf
-import tensorflow.contrib.slim as slim

 import ray
 from ray import tune
 from ray.rllib.models import Model, ModelCatalog
 from ray.rllib.models.misc import normc_initializer
 from ray.tune.registry import register_env
+from ray.rllib.utils import try_import_tf
+
+tf = try_import_tf()

 parser = argparse.ArgumentParser()
 parser.add_argument("--stop", type=int, default=200)
@@ -134,18 +135,18 @@ class ParametricActionsModel(Model):
        hiddens = [256, 256]
        for i, size in enumerate(hiddens):
            label = "fc{}".format(i)
-            last_layer = slim.fully_connected(
+            last_layer = tf.layers.dense(
                last_layer,
                size,
-                weights_initializer=normc_initializer(1.0),
-                activation_fn=tf.nn.tanh,
-                scope=label)
-        output = slim.fully_connected(
+                kernel_initializer=normc_initializer(1.0),
+                activation=tf.nn.tanh,
+                name=label)
+        output = tf.layers.dense(
            last_layer,
            action_embed_size,
-            weights_initializer=normc_initializer(0.01),
-            activation_fn=None,
-            scope="fc_out")
+            kernel_initializer=normc_initializer(0.01),
+            activation=None,
+            name="fc_out")

        # Expand the model output to [BATCH, 1, EMBED_SIZE]. Note that the
        # avail actions tensor is of shape [BATCH, MAX_ACTIONS, EMBED_SIZE].
@@ -12,7 +12,11 @@ from ray.rllib.utils import try_import_tf
 tf = try_import_tf()

 if tf:
-    use_tf150_api = (distutils.version.LooseVersion(tf.VERSION) >=
+    if hasattr(tf, "__version__"):
+        version = tf.__version__
+    else:
+        version = tf.VERSION
+    use_tf150_api = (distutils.version.LooseVersion(version) >=
                     distutils.version.LooseVersion("1.5.0"))
 else:
    use_tf150_api = False
@@ -21,8 +21,6 @@ class FullyConnectedNetwork(Model):
        model that processes the components separately, use _build_layers_v2().
        """

-        import tensorflow.contrib.slim as slim
-
        hiddens = options.get("fcnet_hiddens")
        activation = get_activation_fn(options.get("fcnet_activation"))

@@ -31,18 +29,18 @@ class FullyConnectedNetwork(Model):
            last_layer = inputs
            for size in hiddens:
                label = "fc{}".format(i)
-                last_layer = slim.fully_connected(
+                last_layer = tf.layers.dense(
                    last_layer,
                    size,
-                    weights_initializer=normc_initializer(1.0),
-                    activation_fn=activation,
-                    scope=label)
+                    kernel_initializer=normc_initializer(1.0),
+                    activation=activation,
+                    name=label)
                i += 1
            label = "fc_out"
-            output = slim.fully_connected(
+            output = tf.layers.dense(
                last_layer,
                num_outputs,
-                weights_initializer=normc_initializer(0.01),
-                activation_fn=None,
-                scope=label)
+                kernel_initializer=normc_initializer(0.01),
+                activation=None,
+                name=label)
            return output, last_layer
@@ -38,8 +38,6 @@ class LSTM(Model):

    @override(Model)
    def _build_layers_v2(self, input_dict, num_outputs, options):
-        import tensorflow.contrib.rnn as rnn
-
        cell_size = options.get("lstm_cell_size")
        if options.get("lstm_use_prev_action_reward"):
            action_dim = int(
@@ -76,7 +74,7 @@ class LSTM(Model):
            self.state_in = [c_in, h_in]

        # Setup LSTM outputs
-        state_in = rnn.LSTMStateTuple(c_in, h_in)
+        state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
        lstm_out, lstm_state = tf.nn.dynamic_rnn(
            lstm,
            last_layer,
@@ -15,8 +15,6 @@ class VisionNetwork(Model):

    @override(Model)
    def _build_layers_v2(self, input_dict, num_outputs, options):
-        import tensorflow.contrib.slim as slim
-
        inputs = input_dict["obs"]
        filters = options.get("conv_filters")
        if not filters:
@@ -26,28 +24,29 @@ class VisionNetwork(Model):

        with tf.name_scope("vision_net"):
            for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
-                inputs = slim.conv2d(
+                inputs = tf.layers.conv2d(
                    inputs,
                    out_size,
                    kernel,
                    stride,
-                    activation_fn=activation,
-                    scope="conv{}".format(i))
+                    activation=activation,
+                    padding="same",
+                    name="conv{}".format(i))
            out_size, kernel, stride = filters[-1]
-            fc1 = slim.conv2d(
+            fc1 = tf.layers.conv2d(
                inputs,
                out_size,
                kernel,
                stride,
-                activation_fn=activation,
-                padding="VALID",
-                scope="fc1")
-            fc2 = slim.conv2d(
+                activation=activation,
+                padding="valid",
+                name="fc1")
+            fc2 = tf.layers.conv2d(
                fc1,
                num_outputs, [1, 1],
-                activation_fn=None,
-                normalizer_fn=None,
-                scope="fc2")
+                activation=None,
+                padding="same",
+                name="fc2")
            return flatten(fc2), flatten(fc1)


@@ -17,6 +17,9 @@ from ray.rllib.optimizers.aso_minibatch_buffer import MinibatchBuffer
 from ray.rllib.optimizers.multi_gpu_impl import LocalSyncParallelOptimizer
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.timer import TimerStat
+from ray.rllib.utils import try_import_tf
+
+tf = try_import_tf()

 logger = logging.getLogger(__name__)

@@ -38,9 +41,6 @@ class TFMultiGPULearner(LearnerThread):
                 learner_queue_size=16,
                 num_data_load_threads=16,
                 _fake_gpus=False):
-        # Multi-GPU requires TensorFlow to function.
-        import tensorflow as tf
-
        LearnerThread.__init__(self, local_evaluator, minibatch_buffer_size,
                               num_sgd_iter, learner_queue_size)
        self.lr = lr
@@ -1,6 +1,5 @@
 import gym
 import numpy as np
-import tensorflow as tf
 import unittest
 from gym.spaces import Box, Discrete, Tuple

@@ -12,6 +11,9 @@ from ray.rllib.models.preprocessors import (NoPreprocessor, OneHotPreprocessor,
                                            Preprocessor)
 from ray.rllib.models.fcnet import FullyConnectedNetwork
 from ray.rllib.models.visionnet import VisionNetwork
+from ray.rllib.utils import try_import_tf
+
+tf = try_import_tf()


 class CustomPreprocessor(Preprocessor):
@@ -6,8 +6,6 @@ import gym
 import numpy as np
 import pickle
 import unittest
-import tensorflow as tf
-import tensorflow.contrib.rnn as rnn

 import ray
 from ray.rllib.agents.ppo import PPOTrainer
@@ -16,6 +14,9 @@ from ray.rllib.models.lstm import add_time_dimension, chop_into_sequences
 from ray.rllib.models.misc import linear, normc_initializer
 from ray.rllib.models.model import Model
 from ray.tune.registry import register_env
+from ray.rllib.utils import try_import_tf
+
+tf = try_import_tf()


 class LSTMUtilsTest(unittest.TestCase):
@@ -104,7 +105,7 @@ class RNNSpyModel(Model):
        last_layer = add_time_dimension(features, self.seq_lens)

        # Setup the LSTM cell
-        lstm = rnn.BasicLSTMCell(cell_size, state_is_tuple=True)
+        lstm = tf.nn.rnn_cell.BasicLSTMCell(cell_size, state_is_tuple=True)
        self.state_init = [
            np.zeros(lstm.state_size.c, np.float32),
            np.zeros(lstm.state_size.h, np.float32)
@@ -121,7 +122,7 @@ class RNNSpyModel(Model):
        self.state_in = [c_in, h_in]

        # Setup LSTM outputs
-        state_in = rnn.LSTMStateTuple(c_in, h_in)
+        state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
        lstm_out, lstm_state = tf.nn.dynamic_rnn(
            lstm,
            last_layer,
@@ -7,8 +7,6 @@ import pickle
 from gym import spaces
 from gym.envs.registration import EnvSpec
 import gym
-import tensorflow.contrib.slim as slim
-import tensorflow as tf
 import unittest

 import ray
@@ -25,6 +23,9 @@ from ray.rllib.models.pytorch.model import TorchModel
 from ray.rllib.rollout import rollout
 from ray.rllib.tests.test_external_env import SimpleServing
 from ray.tune.registry import register_env
+from ray.rllib.utils import try_import_tf
+
+tf = try_import_tf()

 DICT_SPACE = spaces.Dict({
    "sensors": spaces.Dict({
@@ -179,8 +180,8 @@ class DictSpyModel(Model):
            stateful=True)

        with tf.control_dependencies([spy_fn]):
-            output = slim.fully_connected(
-                input_dict["obs"]["sensors"]["position"], num_outputs)
+            output = tf.layers.dense(input_dict["obs"]["sensors"]["position"],
+                                     num_outputs)
        return output, output


@@ -208,7 +209,7 @@ class TupleSpyModel(Model):
            stateful=True)

        with tf.control_dependencies([spy_fn]):
-            output = slim.fully_connected(input_dict["obs"][0], num_outputs)
+            output = tf.layers.dense(input_dict["obs"][0], num_outputs)
        return output, output


@@ -4,7 +4,6 @@ from __future__ import print_function

 import gym
 import numpy as np
-import tensorflow as tf
 import time
 import unittest

@@ -16,6 +15,9 @@ from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator
 from ray.rllib.optimizers import AsyncGradientsOptimizer, AsyncSamplesOptimizer
 from ray.rllib.optimizers.aso_tree_aggregator import TreeAggregator
 from ray.rllib.tests.mock_evaluator import _MockEvaluator
+from ray.rllib.utils import try_import_tf
+
+tf = try_import_tf()


 class AsyncOptimizerTest(unittest.TestCase):
@@ -33,10 +33,15 @@ def try_import_tf():
        return None

    try:
-        import tensorflow as tf
+        import tensorflow.compat.v1 as tf
+        tf.disable_v2_behavior()
        return tf
    except ImportError:
-        return None
+        try:
+            import tensorflow as tf
+            return tf
+        except ImportError:
+            return None


 __all__ = [