[rllib] TensorFlow 2 compatibility (#4802)

This commit is contained in:
Eric Liang
2019-05-16 22:12:07 -07:00
committed by GitHub
parent 7d5ef6d99c
commit 3807fb505b
32 changed files with 140 additions and 1235 deletions
@@ -399,8 +399,6 @@ class DDPGPolicyGraph(DDPGPostprocessing, TFPolicyGraph):
self.set_pure_exploration_phase(state[2])
def _build_q_network(self, obs, obs_space, action_space, actions):
import tensorflow.contrib.layers as layers
if self.config["use_state_preprocessor"]:
q_model = ModelCatalog.get_model({
"obs": obs,
@@ -413,16 +411,12 @@ class DDPGPolicyGraph(DDPGPostprocessing, TFPolicyGraph):
activation = getattr(tf.nn, self.config["critic_hidden_activation"])
for hidden in self.config["critic_hiddens"]:
q_out = layers.fully_connected(
q_out, num_outputs=hidden, activation_fn=activation)
q_values = layers.fully_connected(
q_out, num_outputs=1, activation_fn=None)
q_out = tf.layers.dense(q_out, units=hidden, activation=activation)
q_values = tf.layers.dense(q_out, units=1, activation=None)
return q_values, q_model
def _build_policy_network(self, obs, obs_space, action_space):
import tensorflow.contrib.layers as layers
if self.config["use_state_preprocessor"]:
model = ModelCatalog.get_model({
"obs": obs,
@@ -434,16 +428,19 @@ class DDPGPolicyGraph(DDPGPostprocessing, TFPolicyGraph):
action_out = obs
activation = getattr(tf.nn, self.config["actor_hidden_activation"])
normalizer_fn = layers.layer_norm if self.config["parameter_noise"] \
else None
for hidden in self.config["actor_hiddens"]:
action_out = layers.fully_connected(
action_out,
num_outputs=hidden,
activation_fn=activation,
normalizer_fn=normalizer_fn)
action_out = layers.fully_connected(
action_out, num_outputs=self.dim_actions, activation_fn=None)
if self.config["parameter_noise"]:
import tensorflow.contrib.layers as layers
action_out = layers.fully_connected(
action_out,
num_outputs=hidden,
activation_fn=activation,
normalizer_fn=layers.layer_norm)
else:
action_out = tf.layers.dense(
action_out, units=hidden, activation=activation)
action_out = tf.layers.dense(
action_out, units=self.dim_actions, activation=None)
# Use sigmoid to scale to [0,1], but also double magnitude of input to
# emulate behaviour of tanh activation used in DDPG and TD3 papers.
@@ -507,7 +504,7 @@ class DDPGPolicyGraph(DDPGPostprocessing, TFPolicyGraph):
def make_uniform_random_actions():
# pure random exploration option
uniform_random_actions = tf.random.uniform(
uniform_random_actions = tf.random_uniform(
tf.shape(deterministic_actions))
# rescale uniform random actions according to action range
tf_range = tf.constant(action_range[None], dtype="float32")
+18 -15
View File
@@ -154,8 +154,6 @@ class QNetwork(object):
v_max=10.0,
sigma0=0.5,
parameter_noise=False):
import tensorflow.contrib.layers as layers
self.model = model
with tf.variable_scope("action_value"):
if hiddens:
@@ -164,13 +162,18 @@ class QNetwork(object):
if use_noisy:
action_out = self.noisy_layer(
"hidden_%d" % i, action_out, hiddens[i], sigma0)
else:
elif parameter_noise:
import tensorflow.contrib.layers as layers
action_out = layers.fully_connected(
action_out,
num_outputs=hiddens[i],
activation_fn=tf.nn.relu,
normalizer_fn=layers.layer_norm
if parameter_noise else None)
normalizer_fn=layers.layer_norm)
else:
action_out = tf.layers.dense(
action_out,
units=hiddens[i],
activation=tf.nn.relu)
else:
# Avoid postprocessing the outputs. This enables custom models
# to be used for parametric action DQN.
@@ -183,10 +186,8 @@ class QNetwork(object):
sigma0,
non_linear=False)
elif hiddens:
action_scores = layers.fully_connected(
action_out,
num_outputs=num_actions * num_atoms,
activation_fn=None)
action_scores = tf.layers.dense(
action_out, units=num_actions * num_atoms, activation=None)
else:
action_scores = model.outputs
if num_atoms > 1:
@@ -214,13 +215,15 @@ class QNetwork(object):
state_out = self.noisy_layer("dueling_hidden_%d" % i,
state_out, hiddens[i],
sigma0)
else:
state_out = layers.fully_connected(
elif parameter_noise:
state_out = tf.contrib.layers.fully_connected(
state_out,
num_outputs=hiddens[i],
activation_fn=tf.nn.relu,
normalizer_fn=layers.layer_norm
if parameter_noise else None)
normalizer_fn=tf.contrib.layers.layer_norm)
else:
state_out = tf.layers.dense(
state_out, units=hiddens[i], activation=tf.nn.relu)
if use_noisy:
state_score = self.noisy_layer(
"dueling_output",
@@ -229,8 +232,8 @@ class QNetwork(object):
sigma0,
non_linear=False)
else:
state_score = layers.fully_connected(
state_out, num_outputs=num_atoms, activation_fn=None)
state_score = tf.layers.dense(
state_out, units=num_atoms, activation=None)
if num_atoms > 1:
support_logits_per_action_mean = tf.reduce_mean(
support_logits_per_action, 1)
-2
View File
@@ -38,8 +38,6 @@ from ray.rllib.models.action_dist import Categorical
from ray.rllib.utils import try_import_tf
tf = try_import_tf()
if tf:
nest = tf.contrib.framework.nest
VTraceFromLogitsReturns = collections.namedtuple("VTraceFromLogitsReturns", [
"vs", "pg_advantages", "log_rhos", "behaviour_action_log_probs",
@@ -278,14 +278,11 @@ class VTracePolicyGraph(LearningRateSchedule, VTracePostprocessing,
self.KL_stats.update({
"mean_KL_{}".format(i): tf.reduce_mean(kl),
"max_KL_{}".format(i): tf.reduce_max(kl),
"median_KL_{}".format(i): tf.contrib.distributions.
percentile(kl, 50.0),
})
else:
self.KL_stats = {
"mean_KL": tf.reduce_mean(kls[0]),
"max_KL": tf.reduce_max(kls[0]),
"median_KL": tf.contrib.distributions.percentile(kls[0], 50.0),
}
# Initialize TFPolicyGraph
@@ -26,8 +26,10 @@ from __future__ import print_function
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
import vtrace
from ray.rllib.utils import try_import_tf
tf = try_import_tf()
def _shaped_arange(*shape):
@@ -399,14 +399,11 @@ class AsyncPPOPolicyGraph(LearningRateSchedule, APPOPostprocessing,
self.KL_stats.update({
"mean_KL_{}".format(i): tf.reduce_mean(kl),
"max_KL_{}".format(i): tf.reduce_max(kl),
"median_KL_{}".format(i): tf.contrib.distributions.
percentile(kl, 50.0),
})
else:
self.KL_stats = {
"mean_KL": tf.reduce_mean(kls[0]),
"max_KL": tf.reduce_max(kls[0]),
"median_KL": tf.contrib.distributions.percentile(kls[0], 50.0),
}
# Initialize TFPolicyGraph
+3 -1
View File
@@ -4,11 +4,13 @@ from __future__ import print_function
import unittest
import numpy as np
import tensorflow as tf
from numpy.testing import assert_allclose
from ray.rllib.models.action_dist import Categorical
from ray.rllib.agents.ppo.utils import flatten, concatenate
from ray.rllib.utils import try_import_tf
tf = try_import_tf()
# TODO(ekl): move to rllib/models dir
+11 -11
View File
@@ -5,13 +5,13 @@ from __future__ import print_function
import argparse
import tensorflow as tf
import tensorflow.contrib.slim as slim
import ray
from ray import tune
from ray.rllib.models import Model, ModelCatalog
from ray.rllib.models.misc import normc_initializer
from ray.rllib.utils import try_import_tf
tf = try_import_tf()
parser = argparse.ArgumentParser()
parser.add_argument("--num-iters", type=int, default=200)
@@ -24,21 +24,21 @@ class BatchNormModel(Model):
hiddens = [256, 256]
for i, size in enumerate(hiddens):
label = "fc{}".format(i)
last_layer = slim.fully_connected(
last_layer = tf.layers.dense(
last_layer,
size,
weights_initializer=normc_initializer(1.0),
activation_fn=tf.nn.tanh,
scope=label)
kernel_initializer=normc_initializer(1.0),
activation=tf.nn.tanh,
name=label)
# Add a batch norm layer
last_layer = tf.layers.batch_normalization(
last_layer, training=input_dict["is_training"])
output = slim.fully_connected(
output = tf.layers.dense(
last_layer,
num_outputs,
weights_initializer=normc_initializer(0.01),
activation_fn=None,
scope="fc_out")
kernel_initializer=normc_initializer(0.01),
activation=None,
name="fc_out")
return output, last_layer
-14
View File
@@ -1,14 +0,0 @@
(Experimental) OpenAI gym environment for https://github.com/carla-simulator/carla
To run, first download and unpack the Carla binaries from this URL: https://github.com/carla-simulator/carla/releases/tag/0.7.0
Note that currently you also need to clone the Python code from `carla/benchmark_branch` which includes the Carla planner.
Then, you can try running env.py to drive the car. Run one of the train_* scripts to attempt training.
$ pkill -9 Carla
$ export CARLA_SERVER=/PATH/TO/CARLA_0.7.0/CarlaUE4.sh
$ export CARLA_PY_PATH=/PATH/TO/CARLA_BENCHMARK_BRANCH_REPO/PythonClient
$ python env.py
Check out the scenarios.py file for different training and test scenarios that can be used.
-684
View File
@@ -1,684 +0,0 @@
"""OpenAI gym environment for Carla. Run this file for a demo."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datetime import datetime
import atexit
import cv2
import os
import json
import random
import signal
import subprocess
import sys
import time
import traceback
import numpy as np
try:
import scipy.misc
except Exception:
pass
import gym
from gym.spaces import Box, Discrete, Tuple
from scenarios import DEFAULT_SCENARIO
# Set this where you want to save image outputs (or empty string to disable)
CARLA_OUT_PATH = os.environ.get("CARLA_OUT", os.path.expanduser("~/carla_out"))
if CARLA_OUT_PATH and not os.path.exists(CARLA_OUT_PATH):
os.makedirs(CARLA_OUT_PATH)
# Set this to the path of your Carla binary
SERVER_BINARY = os.environ.get("CARLA_SERVER",
os.path.expanduser("~/CARLA_0.7.0/CarlaUE4.sh"))
assert os.path.exists(SERVER_BINARY)
if "CARLA_PY_PATH" in os.environ:
sys.path.append(os.path.expanduser(os.environ["CARLA_PY_PATH"]))
else:
# TODO(ekl) switch this to the binary path once the planner is in master
sys.path.append(os.path.expanduser("~/carla/PythonClient/"))
try:
from carla.client import CarlaClient
from carla.sensor import Camera
from carla.settings import CarlaSettings
from carla.planner.planner import Planner, REACH_GOAL, GO_STRAIGHT, \
TURN_RIGHT, TURN_LEFT, LANE_FOLLOW
except Exception as e:
print("Failed to import Carla python libs, try setting $CARLA_PY_PATH")
raise e
# Carla planner commands
COMMANDS_ENUM = {
REACH_GOAL: "REACH_GOAL",
GO_STRAIGHT: "GO_STRAIGHT",
TURN_RIGHT: "TURN_RIGHT",
TURN_LEFT: "TURN_LEFT",
LANE_FOLLOW: "LANE_FOLLOW",
}
# Mapping from string repr to one-hot encoding index to feed to the model
COMMAND_ORDINAL = {
"REACH_GOAL": 0,
"GO_STRAIGHT": 1,
"TURN_RIGHT": 2,
"TURN_LEFT": 3,
"LANE_FOLLOW": 4,
}
# Number of retries if the server doesn't respond
RETRIES_ON_ERROR = 5
# Dummy Z coordinate to use when we only care about (x, y)
GROUND_Z = 22
# Default environment configuration
ENV_CONFIG = {
"log_images": True,
"enable_planner": True,
"framestack": 2, # note: only [1, 2] currently supported
"convert_images_to_video": True,
"early_terminate_on_collision": True,
"verbose": True,
"reward_function": "custom",
"render_x_res": 800,
"render_y_res": 600,
"x_res": 80,
"y_res": 80,
"server_map": "/Game/Maps/Town02",
"scenarios": [DEFAULT_SCENARIO],
"use_depth_camera": False,
"discrete_actions": True,
"squash_action_logits": False,
}
DISCRETE_ACTIONS = {
# coast
0: [0.0, 0.0],
# turn left
1: [0.0, -0.5],
# turn right
2: [0.0, 0.5],
# forward
3: [1.0, 0.0],
# brake
4: [-0.5, 0.0],
# forward left
5: [1.0, -0.5],
# forward right
6: [1.0, 0.5],
# brake left
7: [-0.5, -0.5],
# brake right
8: [-0.5, 0.5],
}
live_carla_processes = set()
def cleanup():
print("Killing live carla processes", live_carla_processes)
for pgid in live_carla_processes:
os.killpg(pgid, signal.SIGKILL)
atexit.register(cleanup)
class CarlaEnv(gym.Env):
def __init__(self, config=ENV_CONFIG):
self.config = config
self.city = self.config["server_map"].split("/")[-1]
if self.config["enable_planner"]:
self.planner = Planner(self.city)
if config["discrete_actions"]:
self.action_space = Discrete(len(DISCRETE_ACTIONS))
else:
self.action_space = Box(-1.0, 1.0, shape=(2, ), dtype=np.float32)
if config["use_depth_camera"]:
image_space = Box(
-1.0,
1.0,
shape=(config["y_res"], config["x_res"],
1 * config["framestack"]),
dtype=np.float32)
else:
image_space = Box(
0,
255,
shape=(config["y_res"], config["x_res"],
3 * config["framestack"]),
dtype=np.uint8)
self.observation_space = Tuple( # forward_speed, dist to goal
[
image_space,
Discrete(len(COMMANDS_ENUM)), # next_command
Box(-128.0, 128.0, shape=(2, ), dtype=np.float32)
])
# TODO(ekl) this isn't really a proper gym spec
self._spec = lambda: None
self._spec.id = "Carla-v0"
self.server_port = None
self.server_process = None
self.client = None
self.num_steps = 0
self.total_reward = 0
self.prev_measurement = None
self.prev_image = None
self.episode_id = None
self.measurements_file = None
self.weather = None
self.scenario = None
self.start_pos = None
self.end_pos = None
self.start_coord = None
self.end_coord = None
self.last_obs = None
def init_server(self):
print("Initializing new Carla server...")
# Create a new server process and start the client.
self.server_port = random.randint(10000, 60000)
self.server_process = subprocess.Popen(
[
SERVER_BINARY, self.config["server_map"], "-windowed",
"-ResX=400", "-ResY=300", "-carla-server",
"-carla-world-port={}".format(self.server_port)
],
preexec_fn=os.setsid,
stdout=open(os.devnull, "w"))
live_carla_processes.add(os.getpgid(self.server_process.pid))
for i in range(RETRIES_ON_ERROR):
try:
self.client = CarlaClient("localhost", self.server_port)
return self.client.connect()
except Exception as e:
print("Error connecting: {}, attempt {}".format(e, i))
time.sleep(2)
def clear_server_state(self):
print("Clearing Carla server state")
try:
if self.client:
self.client.disconnect()
self.client = None
except Exception as e:
print("Error disconnecting client: {}".format(e))
pass
if self.server_process:
pgid = os.getpgid(self.server_process.pid)
os.killpg(pgid, signal.SIGKILL)
live_carla_processes.remove(pgid)
self.server_port = None
self.server_process = None
def __del__(self):
self.clear_server_state()
def reset(self):
error = None
for _ in range(RETRIES_ON_ERROR):
try:
if not self.server_process:
self.init_server()
return self._reset()
except Exception as e:
print("Error during reset: {}".format(traceback.format_exc()))
self.clear_server_state()
error = e
raise error
def _reset(self):
self.num_steps = 0
self.total_reward = 0
self.prev_measurement = None
self.prev_image = None
self.episode_id = datetime.today().strftime("%Y-%m-%d_%H-%M-%S_%f")
self.measurements_file = None
# Create a CarlaSettings object. This object is a wrapper around
# the CarlaSettings.ini file. Here we set the configuration we
# want for the new episode.
settings = CarlaSettings()
self.scenario = random.choice(self.config["scenarios"])
assert self.scenario["city"] == self.city, (self.scenario, self.city)
self.weather = random.choice(self.scenario["weather_distribution"])
settings.set(
SynchronousMode=True,
SendNonPlayerAgentsInfo=True,
NumberOfVehicles=self.scenario["num_vehicles"],
NumberOfPedestrians=self.scenario["num_pedestrians"],
WeatherId=self.weather)
settings.randomize_seeds()
if self.config["use_depth_camera"]:
camera1 = Camera("CameraDepth", PostProcessing="Depth")
camera1.set_image_size(self.config["render_x_res"],
self.config["render_y_res"])
camera1.set_position(30, 0, 130)
settings.add_sensor(camera1)
camera2 = Camera("CameraRGB")
camera2.set_image_size(self.config["render_x_res"],
self.config["render_y_res"])
camera2.set_position(30, 0, 130)
settings.add_sensor(camera2)
# Setup start and end positions
scene = self.client.load_settings(settings)
positions = scene.player_start_spots
self.start_pos = positions[self.scenario["start_pos_id"]]
self.end_pos = positions[self.scenario["end_pos_id"]]
self.start_coord = [
self.start_pos.location.x // 100, self.start_pos.location.y // 100
]
self.end_coord = [
self.end_pos.location.x // 100, self.end_pos.location.y // 100
]
print("Start pos {} ({}), end {} ({})".format(
self.scenario["start_pos_id"], self.start_coord,
self.scenario["end_pos_id"], self.end_coord))
# Notify the server that we want to start the episode at the
# player_start index. This function blocks until the server is ready
# to start the episode.
print("Starting new episode...")
self.client.start_episode(self.scenario["start_pos_id"])
image, py_measurements = self._read_observation()
self.prev_measurement = py_measurements
return self.encode_obs(self.preprocess_image(image), py_measurements)
def encode_obs(self, image, py_measurements):
assert self.config["framestack"] in [1, 2]
prev_image = self.prev_image
self.prev_image = image
if prev_image is None:
prev_image = image
if self.config["framestack"] == 2:
image = np.concatenate([prev_image, image], axis=2)
obs = (image, COMMAND_ORDINAL[py_measurements["next_command"]], [
py_measurements["forward_speed"],
py_measurements["distance_to_goal"]
])
self.last_obs = obs
return obs
def step(self, action):
try:
obs = self._step(action)
return obs
except Exception:
print("Error during step, terminating episode early",
traceback.format_exc())
self.clear_server_state()
return (self.last_obs, 0.0, True, {})
def _step(self, action):
if self.config["discrete_actions"]:
action = DISCRETE_ACTIONS[int(action)]
assert len(action) == 2, "Invalid action {}".format(action)
if self.config["squash_action_logits"]:
forward = 2 * float(sigmoid(action[0]) - 0.5)
throttle = float(np.clip(forward, 0, 1))
brake = float(np.abs(np.clip(forward, -1, 0)))
steer = 2 * float(sigmoid(action[1]) - 0.5)
else:
throttle = float(np.clip(action[0], 0, 1))
brake = float(np.abs(np.clip(action[0], -1, 0)))
steer = float(np.clip(action[1], -1, 1))
reverse = False
hand_brake = False
if self.config["verbose"]:
print("steer", steer, "throttle", throttle, "brake", brake,
"reverse", reverse)
self.client.send_control(
steer=steer,
throttle=throttle,
brake=brake,
hand_brake=hand_brake,
reverse=reverse)
# Process observations
image, py_measurements = self._read_observation()
if self.config["verbose"]:
print("Next command", py_measurements["next_command"])
if type(action) is np.ndarray:
py_measurements["action"] = [float(a) for a in action]
else:
py_measurements["action"] = action
py_measurements["control"] = {
"steer": steer,
"throttle": throttle,
"brake": brake,
"reverse": reverse,
"hand_brake": hand_brake,
}
reward = compute_reward(self, self.prev_measurement, py_measurements)
self.total_reward += reward
py_measurements["reward"] = reward
py_measurements["total_reward"] = self.total_reward
done = (self.num_steps > self.scenario["max_steps"]
or py_measurements["next_command"] == "REACH_GOAL"
or (self.config["early_terminate_on_collision"]
and collided_done(py_measurements)))
py_measurements["done"] = done
self.prev_measurement = py_measurements
# Write out measurements to file
if CARLA_OUT_PATH:
if not self.measurements_file:
self.measurements_file = open(
os.path.join(
CARLA_OUT_PATH,
"measurements_{}.json".format(self.episode_id)), "w")
self.measurements_file.write(json.dumps(py_measurements))
self.measurements_file.write("\n")
if done:
self.measurements_file.close()
self.measurements_file = None
if self.config["convert_images_to_video"]:
self.images_to_video()
self.num_steps += 1
image = self.preprocess_image(image)
return (self.encode_obs(image, py_measurements), reward, done,
py_measurements)
def images_to_video(self):
videos_dir = os.path.join(CARLA_OUT_PATH, "Videos")
if not os.path.exists(videos_dir):
os.makedirs(videos_dir)
ffmpeg_cmd = (
"ffmpeg -loglevel -8 -r 60 -f image2 -s {x_res}x{y_res} "
"-start_number 0 -i "
"{img}_%04d.jpg -vcodec libx264 {vid}.mp4 && rm -f {img}_*.jpg "
).format(
x_res=self.config["render_x_res"],
y_res=self.config["render_y_res"],
vid=os.path.join(videos_dir, self.episode_id),
img=os.path.join(CARLA_OUT_PATH, "CameraRGB", self.episode_id))
print("Executing ffmpeg command", ffmpeg_cmd)
subprocess.call(ffmpeg_cmd, shell=True)
def preprocess_image(self, image):
if self.config["use_depth_camera"]:
assert self.config["use_depth_camera"]
data = (image.data - 0.5) * 2
data = data.reshape(self.config["render_y_res"],
self.config["render_x_res"], 1)
data = cv2.resize(
data, (self.config["x_res"], self.config["y_res"]),
interpolation=cv2.INTER_AREA)
data = np.expand_dims(data, 2)
else:
data = image.data.reshape(self.config["render_y_res"],
self.config["render_x_res"], 3)
data = cv2.resize(
data, (self.config["x_res"], self.config["y_res"]),
interpolation=cv2.INTER_AREA)
data = (data.astype(np.float32) - 128) / 128
return data
def _read_observation(self):
# Read the data produced by the server this frame.
measurements, sensor_data = self.client.read_data()
# Print some of the measurements.
if self.config["verbose"]:
print_measurements(measurements)
observation = None
if self.config["use_depth_camera"]:
camera_name = "CameraDepth"
else:
camera_name = "CameraRGB"
for name, image in sensor_data.items():
if name == camera_name:
observation = image
cur = measurements.player_measurements
if self.config["enable_planner"]:
next_command = COMMANDS_ENUM[self.planner.get_next_command(
[cur.transform.location.x, cur.transform.location.y, GROUND_Z],
[
cur.transform.orientation.x, cur.transform.orientation.y,
GROUND_Z
],
[self.end_pos.location.x, self.end_pos.location.y, GROUND_Z], [
self.end_pos.orientation.x, self.end_pos.orientation.y,
GROUND_Z
])]
else:
next_command = "LANE_FOLLOW"
if next_command == "REACH_GOAL":
distance_to_goal = 0.0 # avoids crash in planner
elif self.config["enable_planner"]:
distance_to_goal = self.planner.get_shortest_path_distance([
cur.transform.location.x, cur.transform.location.y, GROUND_Z
], [
cur.transform.orientation.x, cur.transform.orientation.y,
GROUND_Z
], [self.end_pos.location.x, self.end_pos.location.y, GROUND_Z], [
self.end_pos.orientation.x, self.end_pos.orientation.y,
GROUND_Z
]) / 100
else:
distance_to_goal = -1
distance_to_goal_euclidean = float(
np.linalg.norm([
cur.transform.location.x - self.end_pos.location.x,
cur.transform.location.y - self.end_pos.location.y
]) / 100)
py_measurements = {
"episode_id": self.episode_id,
"step": self.num_steps,
"x": cur.transform.location.x,
"y": cur.transform.location.y,
"x_orient": cur.transform.orientation.x,
"y_orient": cur.transform.orientation.y,
"forward_speed": cur.forward_speed,
"distance_to_goal": distance_to_goal,
"distance_to_goal_euclidean": distance_to_goal_euclidean,
"collision_vehicles": cur.collision_vehicles,
"collision_pedestrians": cur.collision_pedestrians,
"collision_other": cur.collision_other,
"intersection_offroad": cur.intersection_offroad,
"intersection_otherlane": cur.intersection_otherlane,
"weather": self.weather,
"map": self.config["server_map"],
"start_coord": self.start_coord,
"end_coord": self.end_coord,
"current_scenario": self.scenario,
"x_res": self.config["x_res"],
"y_res": self.config["y_res"],
"num_vehicles": self.scenario["num_vehicles"],
"num_pedestrians": self.scenario["num_pedestrians"],
"max_steps": self.scenario["max_steps"],
"next_command": next_command,
}
if CARLA_OUT_PATH and self.config["log_images"]:
for name, image in sensor_data.items():
out_dir = os.path.join(CARLA_OUT_PATH, name)
if not os.path.exists(out_dir):
os.makedirs(out_dir)
out_file = os.path.join(
out_dir, "{}_{:>04}.jpg".format(self.episode_id,
self.num_steps))
scipy.misc.imsave(out_file, image.data)
assert observation is not None, sensor_data
return observation, py_measurements
def compute_reward_corl2017(env, prev, current):
reward = 0.0
cur_dist = current["distance_to_goal"]
prev_dist = prev["distance_to_goal"]
if env.config["verbose"]:
print("Cur dist {}, prev dist {}".format(cur_dist, prev_dist))
# Distance travelled toward the goal in m
reward += np.clip(prev_dist - cur_dist, -10.0, 10.0)
# Change in speed (km/h)
reward += 0.05 * (current["forward_speed"] - prev["forward_speed"])
# New collision damage
reward -= .00002 * (
current["collision_vehicles"] + current["collision_pedestrians"] +
current["collision_other"] - prev["collision_vehicles"] -
prev["collision_pedestrians"] - prev["collision_other"])
# New sidewalk intersection
reward -= 2 * (
current["intersection_offroad"] - prev["intersection_offroad"])
# New opposite lane intersection
reward -= 2 * (
current["intersection_otherlane"] - prev["intersection_otherlane"])
return reward
def compute_reward_custom(env, prev, current):
reward = 0.0
cur_dist = current["distance_to_goal"]
prev_dist = prev["distance_to_goal"]
if env.config["verbose"]:
print("Cur dist {}, prev dist {}".format(cur_dist, prev_dist))
# Distance travelled toward the goal in m
reward += np.clip(prev_dist - cur_dist, -10.0, 10.0)
# Speed reward, up 30.0 (km/h)
reward += np.clip(current["forward_speed"], 0.0, 30.0) / 10
# New collision damage
new_damage = (
current["collision_vehicles"] + current["collision_pedestrians"] +
current["collision_other"] - prev["collision_vehicles"] -
prev["collision_pedestrians"] - prev["collision_other"])
if new_damage:
reward -= 100.0
# Sidewalk intersection
reward -= current["intersection_offroad"]
# Opposite lane intersection
reward -= current["intersection_otherlane"]
# Reached goal
if current["next_command"] == "REACH_GOAL":
reward += 100.0
return reward
def compute_reward_lane_keep(env, prev, current):
reward = 0.0
# Speed reward, up 30.0 (km/h)
reward += np.clip(current["forward_speed"], 0.0, 30.0) / 10
# New collision damage
new_damage = (
current["collision_vehicles"] + current["collision_pedestrians"] +
current["collision_other"] - prev["collision_vehicles"] -
prev["collision_pedestrians"] - prev["collision_other"])
if new_damage:
reward -= 100.0
# Sidewalk intersection
reward -= current["intersection_offroad"]
# Opposite lane intersection
reward -= current["intersection_otherlane"]
return reward
REWARD_FUNCTIONS = {
"corl2017": compute_reward_corl2017,
"custom": compute_reward_custom,
"lane_keep": compute_reward_lane_keep,
}
def compute_reward(env, prev, current):
return REWARD_FUNCTIONS[env.config["reward_function"]](env, prev, current)
def print_measurements(measurements):
number_of_agents = len(measurements.non_player_agents)
player_measurements = measurements.player_measurements
message = "Vehicle at ({pos_x:.1f}, {pos_y:.1f}), "
message += "{speed:.2f} km/h, "
message += "Collision: {{vehicles={col_cars:.0f}, "
message += "pedestrians={col_ped:.0f}, other={col_other:.0f}}}, "
message += "{other_lane:.0f}% other lane, {offroad:.0f}% off-road, "
message += "({agents_num:d} non-player agents in the scene)"
message = message.format(
pos_x=player_measurements.transform.location.x / 100, # cm -> m
pos_y=player_measurements.transform.location.y / 100,
speed=player_measurements.forward_speed,
col_cars=player_measurements.collision_vehicles,
col_ped=player_measurements.collision_pedestrians,
col_other=player_measurements.collision_other,
other_lane=100 * player_measurements.intersection_otherlane,
offroad=100 * player_measurements.intersection_offroad,
agents_num=number_of_agents)
print(message)
def sigmoid(x):
x = float(x)
return np.exp(x) / (1 + np.exp(x))
def collided_done(py_measurements):
m = py_measurements
collided = (m["collision_vehicles"] > 0 or m["collision_pedestrians"] > 0
or m["collision_other"] > 0)
return bool(collided or m["total_reward"] < -100)
if __name__ == "__main__":
for _ in range(2):
env = CarlaEnv()
obs = env.reset()
print("reset", obs)
start = time.time()
done = False
i = 0
total_reward = 0.0
while not done:
i += 1
if ENV_CONFIG["discrete_actions"]:
obs, reward, done, info = env.step(1)
else:
obs, reward, done, info = env.step([0, 1, 0])
total_reward += reward
print(i, "rew", reward, "total", total_reward, "done", done)
print("{} fps".format(100 / (time.time() - start)))
-108
View File
@@ -1,108 +0,0 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.contrib.layers import xavier_initializer
from ray.rllib.models.catalog import ModelCatalog
from ray.rllib.models.misc import normc_initializer
from ray.rllib.models.model import Model
class CarlaModel(Model):
"""Carla model that can process the observation tuple.
The architecture processes the image using convolutional layers, the
metrics using fully connected layers, and then combines them with
further fully connected layers.
"""
# TODO(ekl): use build_layers_v2 for native dict space support
def _build_layers(self, inputs, num_outputs, options):
# Parse options
image_shape = options["custom_options"]["image_shape"]
convs = options.get("conv_filters", [
[16, [8, 8], 4],
[32, [5, 5], 3],
[32, [5, 5], 2],
[512, [10, 10], 1],
])
hiddens = options.get("fcnet_hiddens", [64])
fcnet_activation = options.get("fcnet_activation", "tanh")
if fcnet_activation == "tanh":
activation = tf.nn.tanh
elif fcnet_activation == "relu":
activation = tf.nn.relu
# Sanity checks
image_size = np.product(image_shape)
expected_shape = [image_size + 5 + 2]
assert inputs.shape.as_list()[1:] == expected_shape, \
(inputs.shape.as_list()[1:], expected_shape)
# Reshape the input vector back into its components
vision_in = tf.reshape(inputs[:, :image_size],
[tf.shape(inputs)[0]] + image_shape)
metrics_in = inputs[:, image_size:]
print("Vision in shape", vision_in)
print("Metrics in shape", metrics_in)
# Setup vision layers
with tf.name_scope("carla_vision"):
for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1):
vision_in = slim.conv2d(
vision_in,
out_size,
kernel,
stride,
scope="conv{}".format(i))
out_size, kernel, stride = convs[-1]
vision_in = slim.conv2d(
vision_in,
out_size,
kernel,
stride,
padding="VALID",
scope="conv_out")
vision_in = tf.squeeze(vision_in, [1, 2])
# Setup metrics layer
with tf.name_scope("carla_metrics"):
metrics_in = slim.fully_connected(
metrics_in,
64,
weights_initializer=xavier_initializer(),
activation_fn=activation,
scope="metrics_out")
print("Shape of vision out is", vision_in.shape)
print("Shape of metric out is", metrics_in.shape)
# Combine the metrics and vision inputs
with tf.name_scope("carla_out"):
i = 1
last_layer = tf.concat([vision_in, metrics_in], axis=1)
print("Shape of concatenated out is", last_layer.shape)
for size in hiddens:
last_layer = slim.fully_connected(
last_layer,
size,
weights_initializer=xavier_initializer(),
activation_fn=activation,
scope="fc{}".format(i))
i += 1
output = slim.fully_connected(
last_layer,
num_outputs,
weights_initializer=normc_initializer(0.01),
activation_fn=None,
scope="fc_out")
return output, last_layer
def register_carla_model():
ModelCatalog.register_custom_model("carla", CarlaModel)
@@ -1,131 +0,0 @@
"""Collection of Carla scenarios, including those from the CoRL 2017 paper."""
TEST_WEATHERS = [0, 2, 5, 7, 9, 10, 11, 12, 13]
TRAIN_WEATHERS = [1, 3, 4, 6, 8, 14]
def build_scenario(city, start, end, vehicles, pedestrians, max_steps,
weathers):
return {
"city": city,
"num_vehicles": vehicles,
"num_pedestrians": pedestrians,
"weather_distribution": weathers,
"start_pos_id": start,
"end_pos_id": end,
"max_steps": max_steps,
}
# Simple scenario for Town02 that involves driving down a road
DEFAULT_SCENARIO = build_scenario(
city="Town02",
start=36,
end=40,
vehicles=20,
pedestrians=40,
max_steps=200,
weathers=[0])
# Simple scenario for Town02 that involves driving down a road
LANE_KEEP = build_scenario(
city="Town02",
start=36,
end=40,
vehicles=0,
pedestrians=0,
max_steps=2000,
weathers=[0])
# Scenarios from the CoRL2017 paper
POSES_TOWN1_STRAIGHT = [[36, 40], [39, 35], [110, 114], [7, 3], [0, 4], [
68, 50
], [61, 59], [47, 64], [147, 90], [33, 87], [26, 19], [80, 76], [45, 49], [
55, 44
], [29, 107], [95, 104], [84, 34], [53, 67], [22, 17], [91, 148], [20, 107],
[78, 70], [95, 102], [68, 44], [45, 69]]
POSES_TOWN1_ONE_CURVE = [[138, 17], [47, 16], [26, 9], [42, 49], [140, 124], [
85, 98
], [65, 133], [137, 51], [76, 66], [46, 39], [40, 60], [0, 29], [4, 129], [
121, 140
], [2, 129], [78, 44], [68, 85], [41, 102], [95, 70], [68, 129], [84, 69],
[47, 79], [110, 15], [130, 17], [0, 17]]
POSES_TOWN1_NAV = [[105, 29], [27, 130], [102, 87], [132, 27], [24, 44], [
96, 26
], [34, 67], [28, 1], [140, 134], [105, 9], [148, 129], [65, 18], [21, 16], [
147, 97
], [42, 51], [30, 41], [18, 107], [69, 45], [102, 95], [18, 145], [111, 64],
[79, 45], [84, 69], [73, 31], [37, 81]]
POSES_TOWN2_STRAIGHT = [[38, 34], [4, 2], [12, 10], [62, 55], [43, 47], [
64, 66
], [78, 76], [59, 57], [61, 18], [35, 39], [12, 8], [0, 18], [75, 68], [
54, 60
], [45, 49], [46, 42], [53, 46], [80, 29], [65, 63], [0, 81], [54, 63],
[51, 42], [16, 19], [17, 26], [77, 68]]
POSES_TOWN2_ONE_CURVE = [[37, 76], [8, 24], [60, 69], [38, 10], [21, 1], [
58, 71
], [74, 32], [44, 0], [71, 16], [14, 24], [34, 11], [43, 14], [75, 16], [
80, 21
], [3, 23], [75, 59], [50, 47], [11, 19], [77, 34], [79, 25], [40, 63],
[58, 76], [79, 55], [16, 61], [27, 11]]
POSES_TOWN2_NAV = [[19, 66], [79, 14], [19, 57], [23, 1], [53, 76], [42, 13], [
31, 71
], [33, 5], [54, 30], [10, 61], [66, 3], [27, 12], [79, 19], [2, 29], [16, 14],
[5, 57], [70, 73], [46, 67], [57, 50], [61, 49], [21, 12],
[51, 81], [77, 68], [56, 65], [43, 54]]
TOWN1_STRAIGHT = [
build_scenario("Town01", start, end, 0, 0, 300, TEST_WEATHERS)
for (start, end) in POSES_TOWN1_STRAIGHT
]
TOWN1_ONE_CURVE = [
build_scenario("Town01", start, end, 0, 0, 600, TEST_WEATHERS)
for (start, end) in POSES_TOWN1_ONE_CURVE
]
TOWN1_NAVIGATION = [
build_scenario("Town01", start, end, 0, 0, 900, TEST_WEATHERS)
for (start, end) in POSES_TOWN1_NAV
]
TOWN1_NAVIGATION_DYNAMIC = [
build_scenario("Town01", start, end, 20, 50, 900, TEST_WEATHERS)
for (start, end) in POSES_TOWN1_NAV
]
TOWN2_STRAIGHT = [
build_scenario("Town02", start, end, 0, 0, 300, TRAIN_WEATHERS)
for (start, end) in POSES_TOWN2_STRAIGHT
]
TOWN2_STRAIGHT_DYNAMIC = [
build_scenario("Town02", start, end, 20, 50, 300, TRAIN_WEATHERS)
for (start, end) in POSES_TOWN2_STRAIGHT
]
TOWN2_ONE_CURVE = [
build_scenario("Town02", start, end, 0, 0, 600, TRAIN_WEATHERS)
for (start, end) in POSES_TOWN2_ONE_CURVE
]
TOWN2_NAVIGATION = [
build_scenario("Town02", start, end, 0, 0, 900, TRAIN_WEATHERS)
for (start, end) in POSES_TOWN2_NAV
]
TOWN2_NAVIGATION_DYNAMIC = [
build_scenario("Town02", start, end, 20, 50, 900, TRAIN_WEATHERS)
for (start, end) in POSES_TOWN2_NAV
]
TOWN1_ALL = (TOWN1_STRAIGHT + TOWN1_ONE_CURVE + TOWN1_NAVIGATION +
TOWN1_NAVIGATION_DYNAMIC)
TOWN2_ALL = (TOWN2_STRAIGHT + TOWN2_ONE_CURVE + TOWN2_NAVIGATION +
TOWN2_NAVIGATION_DYNAMIC)
@@ -1,51 +0,0 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import ray
from ray.tune import grid_search, run_experiments
from env import CarlaEnv, ENV_CONFIG
from models import register_carla_model
from scenarios import TOWN2_STRAIGHT
env_config = ENV_CONFIG.copy()
env_config.update({
"verbose": False,
"x_res": 80,
"y_res": 80,
"squash_action_logits": grid_search([False, True]),
"use_depth_camera": False,
"discrete_actions": False,
"server_map": "/Game/Maps/Town02",
"reward_function": grid_search(["custom", "corl2017"]),
"scenarios": TOWN2_STRAIGHT,
})
register_carla_model()
redis_address = ray.services.get_node_ip_address() + ":6379"
ray.init(redis_address=redis_address)
run_experiments({
"carla-a3c": {
"run": "A3C",
"env": CarlaEnv,
"config": {
"env_config": env_config,
"use_gpu_for_workers": True,
"model": {
"custom_model": "carla",
"custom_options": {
"image_shape": [80, 80, 6],
},
"conv_filters": [
[16, [8, 8], 4],
[32, [4, 4], 2],
[512, [10, 10], 1],
],
},
"gamma": 0.95,
"num_workers": 2,
},
},
})
@@ -1,65 +0,0 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import ray
from ray.tune import run_experiments
from env import CarlaEnv, ENV_CONFIG
from models import register_carla_model
from scenarios import TOWN2_ONE_CURVE
env_config = ENV_CONFIG.copy()
env_config.update({
"verbose": False,
"x_res": 80,
"y_res": 80,
"discrete_actions": True,
"server_map": "/Game/Maps/Town02",
"reward_function": "custom",
"scenarios": TOWN2_ONE_CURVE,
})
register_carla_model()
ray.init()
def shape_out(spec):
return (spec.config.env_config.framestack *
(spec.config.env_config.use_depth_camera and 1 or 3))
run_experiments({
"carla-dqn": {
"run": "DQN",
"env": CarlaEnv,
"config": {
"env_config": env_config,
"model": {
"custom_model": "carla",
"custom_options": {
"image_shape": [
80,
80,
shape_out,
],
},
"conv_filters": [
[16, [8, 8], 4],
[32, [4, 4], 2],
[512, [10, 10], 1],
],
},
"timesteps_per_iteration": 100,
"learning_starts": 1000,
"schedule_max_timesteps": 100000,
"gamma": 0.8,
"tf_session_args": {
"gpu_options": {
"allow_growth": True
},
},
},
},
})
@@ -1,55 +0,0 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import ray
from ray.tune import run_experiments
from env import CarlaEnv, ENV_CONFIG
from models import register_carla_model
from scenarios import TOWN2_STRAIGHT
env_config = ENV_CONFIG.copy()
env_config.update({
"verbose": False,
"x_res": 80,
"y_res": 80,
"use_depth_camera": False,
"discrete_actions": False,
"server_map": "/Game/Maps/Town02",
"scenarios": TOWN2_STRAIGHT,
})
register_carla_model()
ray.init()
run_experiments({
"carla": {
"run": "PPO",
"env": CarlaEnv,
"config": {
"env_config": env_config,
"model": {
"custom_model": "carla",
"custom_options": {
"image_shape": [
env_config["x_res"], env_config["y_res"], 6
],
},
"conv_filters": [
[16, [8, 8], 4],
[32, [4, 4], 2],
[512, [10, 10], 1],
],
},
"num_workers": 1,
"train_batch_size": 2000,
"sample_batch_size": 100,
"lambda": 0.95,
"clip_param": 0.2,
"num_sgd_iter": 20,
"lr": 0.0001,
"sgd_minibatch_size": 32,
"num_gpus": 1,
},
},
})
@@ -11,11 +11,13 @@ from __future__ import print_function
from gym.spaces import Discrete, Box
import gym
import numpy as np
import tensorflow as tf
import ray
from ray.rllib.models import Model, ModelCatalog
from ray.tune import run_experiments, sample_from
from ray.rllib.utils import try_import_tf
tf = try_import_tf()
class FastModel(Model):
+3 -1
View File
@@ -15,7 +15,6 @@ $ python custom_loss.py --input-files=/tmp/cartpole
import argparse
import os
import tensorflow as tf
import ray
from ray import tune
@@ -23,6 +22,9 @@ from ray.rllib.models import (Categorical, FullyConnectedNetwork, Model,
ModelCatalog)
from ray.rllib.models.model import restore_original_dimensions
from ray.rllib.offline import JsonReader
from ray.rllib.utils import try_import_tf
tf = try_import_tf()
parser = argparse.ArgumentParser()
parser.add_argument("--iters", type=int, default=200)
@@ -6,9 +6,11 @@ from __future__ import print_function
import os
import ray
import tensorflow as tf
from ray.rllib.agents.registry import get_agent_class
from ray.rllib.utils import try_import_tf
tf = try_import_tf()
ray.init(num_cpus=10)
@@ -16,14 +16,14 @@ import argparse
import gym
import random
import tensorflow as tf
import tensorflow.contrib.slim as slim
import ray
from ray import tune
from ray.rllib.models import Model, ModelCatalog
from ray.rllib.tests.test_multi_agent_env import MultiCartpole
from ray.tune.registry import register_env
from ray.rllib.utils import try_import_tf
tf = try_import_tf()
parser = argparse.ArgumentParser()
@@ -43,12 +43,12 @@ class CustomModel1(Model):
tf.VariableScope(tf.AUTO_REUSE, "shared"),
reuse=tf.AUTO_REUSE,
auxiliary_name_scope=False):
last_layer = slim.fully_connected(
input_dict["obs"], 64, activation_fn=tf.nn.relu, scope="fc1")
last_layer = slim.fully_connected(
last_layer, 64, activation_fn=tf.nn.relu, scope="fc2")
output = slim.fully_connected(
last_layer, num_outputs, activation_fn=None, scope="fc_out")
last_layer = tf.layers.dense(
input_dict["obs"], 64, activation=tf.nn.relu, name="fc1")
last_layer = tf.layers.dense(
last_layer, 64, activation=tf.nn.relu, name="fc2")
output = tf.layers.dense(
last_layer, num_outputs, activation=None, name="fc_out")
return output, last_layer
@@ -59,12 +59,12 @@ class CustomModel2(Model):
tf.VariableScope(tf.AUTO_REUSE, "shared"),
reuse=tf.AUTO_REUSE,
auxiliary_name_scope=False):
last_layer = slim.fully_connected(
input_dict["obs"], 64, activation_fn=tf.nn.relu, scope="fc1")
last_layer = slim.fully_connected(
last_layer, 64, activation_fn=tf.nn.relu, scope="fc2")
output = slim.fully_connected(
last_layer, num_outputs, activation_fn=None, scope="fc_out")
last_layer = tf.layers.dense(
input_dict["obs"], 64, activation=tf.nn.relu, name="fc1")
last_layer = tf.layers.dense(
last_layer, 64, activation=tf.nn.relu, name="fc2")
output = tf.layers.dense(
last_layer, num_outputs, activation=None, name="fc_out")
return output, last_layer
@@ -23,14 +23,15 @@ import random
import numpy as np
import gym
from gym.spaces import Box, Discrete, Dict
import tensorflow as tf
import tensorflow.contrib.slim as slim
import ray
from ray import tune
from ray.rllib.models import Model, ModelCatalog
from ray.rllib.models.misc import normc_initializer
from ray.tune.registry import register_env
from ray.rllib.utils import try_import_tf
tf = try_import_tf()
parser = argparse.ArgumentParser()
parser.add_argument("--stop", type=int, default=200)
@@ -134,18 +135,18 @@ class ParametricActionsModel(Model):
hiddens = [256, 256]
for i, size in enumerate(hiddens):
label = "fc{}".format(i)
last_layer = slim.fully_connected(
last_layer = tf.layers.dense(
last_layer,
size,
weights_initializer=normc_initializer(1.0),
activation_fn=tf.nn.tanh,
scope=label)
output = slim.fully_connected(
kernel_initializer=normc_initializer(1.0),
activation=tf.nn.tanh,
name=label)
output = tf.layers.dense(
last_layer,
action_embed_size,
weights_initializer=normc_initializer(0.01),
activation_fn=None,
scope="fc_out")
kernel_initializer=normc_initializer(0.01),
activation=None,
name="fc_out")
# Expand the model output to [BATCH, 1, EMBED_SIZE]. Note that the
# avail actions tensor is of shape [BATCH, MAX_ACTIONS, EMBED_SIZE].
+5 -1
View File
@@ -12,7 +12,11 @@ from ray.rllib.utils import try_import_tf
tf = try_import_tf()
if tf:
use_tf150_api = (distutils.version.LooseVersion(tf.VERSION) >=
if hasattr(tf, "__version__"):
version = tf.__version__
else:
version = tf.VERSION
use_tf150_api = (distutils.version.LooseVersion(version) >=
distutils.version.LooseVersion("1.5.0"))
else:
use_tf150_api = False
+8 -10
View File
@@ -21,8 +21,6 @@ class FullyConnectedNetwork(Model):
model that processes the components separately, use _build_layers_v2().
"""
import tensorflow.contrib.slim as slim
hiddens = options.get("fcnet_hiddens")
activation = get_activation_fn(options.get("fcnet_activation"))
@@ -31,18 +29,18 @@ class FullyConnectedNetwork(Model):
last_layer = inputs
for size in hiddens:
label = "fc{}".format(i)
last_layer = slim.fully_connected(
last_layer = tf.layers.dense(
last_layer,
size,
weights_initializer=normc_initializer(1.0),
activation_fn=activation,
scope=label)
kernel_initializer=normc_initializer(1.0),
activation=activation,
name=label)
i += 1
label = "fc_out"
output = slim.fully_connected(
output = tf.layers.dense(
last_layer,
num_outputs,
weights_initializer=normc_initializer(0.01),
activation_fn=None,
scope=label)
kernel_initializer=normc_initializer(0.01),
activation=None,
name=label)
return output, last_layer
+1 -3
View File
@@ -38,8 +38,6 @@ class LSTM(Model):
@override(Model)
def _build_layers_v2(self, input_dict, num_outputs, options):
import tensorflow.contrib.rnn as rnn
cell_size = options.get("lstm_cell_size")
if options.get("lstm_use_prev_action_reward"):
action_dim = int(
@@ -76,7 +74,7 @@ class LSTM(Model):
self.state_in = [c_in, h_in]
# Setup LSTM outputs
state_in = rnn.LSTMStateTuple(c_in, h_in)
state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
lstm_out, lstm_state = tf.nn.dynamic_rnn(
lstm,
last_layer,
+12 -13
View File
@@ -15,8 +15,6 @@ class VisionNetwork(Model):
@override(Model)
def _build_layers_v2(self, input_dict, num_outputs, options):
import tensorflow.contrib.slim as slim
inputs = input_dict["obs"]
filters = options.get("conv_filters")
if not filters:
@@ -26,28 +24,29 @@ class VisionNetwork(Model):
with tf.name_scope("vision_net"):
for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
inputs = slim.conv2d(
inputs = tf.layers.conv2d(
inputs,
out_size,
kernel,
stride,
activation_fn=activation,
scope="conv{}".format(i))
activation=activation,
padding="same",
name="conv{}".format(i))
out_size, kernel, stride = filters[-1]
fc1 = slim.conv2d(
fc1 = tf.layers.conv2d(
inputs,
out_size,
kernel,
stride,
activation_fn=activation,
padding="VALID",
scope="fc1")
fc2 = slim.conv2d(
activation=activation,
padding="valid",
name="fc1")
fc2 = tf.layers.conv2d(
fc1,
num_outputs, [1, 1],
activation_fn=None,
normalizer_fn=None,
scope="fc2")
activation=None,
padding="same",
name="fc2")
return flatten(fc2), flatten(fc1)
@@ -17,6 +17,9 @@ from ray.rllib.optimizers.aso_minibatch_buffer import MinibatchBuffer
from ray.rllib.optimizers.multi_gpu_impl import LocalSyncParallelOptimizer
from ray.rllib.utils.annotations import override
from ray.rllib.utils.timer import TimerStat
from ray.rllib.utils import try_import_tf
tf = try_import_tf()
logger = logging.getLogger(__name__)
@@ -38,9 +41,6 @@ class TFMultiGPULearner(LearnerThread):
learner_queue_size=16,
num_data_load_threads=16,
_fake_gpus=False):
# Multi-GPU requires TensorFlow to function.
import tensorflow as tf
LearnerThread.__init__(self, local_evaluator, minibatch_buffer_size,
num_sgd_iter, learner_queue_size)
self.lr = lr
+3 -1
View File
@@ -1,6 +1,5 @@
import gym
import numpy as np
import tensorflow as tf
import unittest
from gym.spaces import Box, Discrete, Tuple
@@ -12,6 +11,9 @@ from ray.rllib.models.preprocessors import (NoPreprocessor, OneHotPreprocessor,
Preprocessor)
from ray.rllib.models.fcnet import FullyConnectedNetwork
from ray.rllib.models.visionnet import VisionNetwork
from ray.rllib.utils import try_import_tf
tf = try_import_tf()
class CustomPreprocessor(Preprocessor):
+5 -4
View File
@@ -6,8 +6,6 @@ import gym
import numpy as np
import pickle
import unittest
import tensorflow as tf
import tensorflow.contrib.rnn as rnn
import ray
from ray.rllib.agents.ppo import PPOTrainer
@@ -16,6 +14,9 @@ from ray.rllib.models.lstm import add_time_dimension, chop_into_sequences
from ray.rllib.models.misc import linear, normc_initializer
from ray.rllib.models.model import Model
from ray.tune.registry import register_env
from ray.rllib.utils import try_import_tf
tf = try_import_tf()
class LSTMUtilsTest(unittest.TestCase):
@@ -104,7 +105,7 @@ class RNNSpyModel(Model):
last_layer = add_time_dimension(features, self.seq_lens)
# Setup the LSTM cell
lstm = rnn.BasicLSTMCell(cell_size, state_is_tuple=True)
lstm = tf.nn.rnn_cell.BasicLSTMCell(cell_size, state_is_tuple=True)
self.state_init = [
np.zeros(lstm.state_size.c, np.float32),
np.zeros(lstm.state_size.h, np.float32)
@@ -121,7 +122,7 @@ class RNNSpyModel(Model):
self.state_in = [c_in, h_in]
# Setup LSTM outputs
state_in = rnn.LSTMStateTuple(c_in, h_in)
state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
lstm_out, lstm_state = tf.nn.dynamic_rnn(
lstm,
last_layer,
+6 -5
View File
@@ -7,8 +7,6 @@ import pickle
from gym import spaces
from gym.envs.registration import EnvSpec
import gym
import tensorflow.contrib.slim as slim
import tensorflow as tf
import unittest
import ray
@@ -25,6 +23,9 @@ from ray.rllib.models.pytorch.model import TorchModel
from ray.rllib.rollout import rollout
from ray.rllib.tests.test_external_env import SimpleServing
from ray.tune.registry import register_env
from ray.rllib.utils import try_import_tf
tf = try_import_tf()
DICT_SPACE = spaces.Dict({
"sensors": spaces.Dict({
@@ -179,8 +180,8 @@ class DictSpyModel(Model):
stateful=True)
with tf.control_dependencies([spy_fn]):
output = slim.fully_connected(
input_dict["obs"]["sensors"]["position"], num_outputs)
output = tf.layers.dense(input_dict["obs"]["sensors"]["position"],
num_outputs)
return output, output
@@ -208,7 +209,7 @@ class TupleSpyModel(Model):
stateful=True)
with tf.control_dependencies([spy_fn]):
output = slim.fully_connected(input_dict["obs"][0], num_outputs)
output = tf.layers.dense(input_dict["obs"][0], num_outputs)
return output, output
+3 -1
View File
@@ -4,7 +4,6 @@ from __future__ import print_function
import gym
import numpy as np
import tensorflow as tf
import time
import unittest
@@ -16,6 +15,9 @@ from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator
from ray.rllib.optimizers import AsyncGradientsOptimizer, AsyncSamplesOptimizer
from ray.rllib.optimizers.aso_tree_aggregator import TreeAggregator
from ray.rllib.tests.mock_evaluator import _MockEvaluator
from ray.rllib.utils import try_import_tf
tf = try_import_tf()
class AsyncOptimizerTest(unittest.TestCase):
+7 -2
View File
@@ -33,10 +33,15 @@ def try_import_tf():
return None
try:
import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
return tf
except ImportError:
return None
try:
import tensorflow as tf
return tf
except ImportError:
return None
__all__ = [