mirror of
https://github.com/wassname/ray.git
synced 2026-07-03 12:19:50 +08:00
[rllib] TensorFlow 2 compatibility (#4802)
This commit is contained in:
@@ -399,8 +399,6 @@ class DDPGPolicyGraph(DDPGPostprocessing, TFPolicyGraph):
|
||||
self.set_pure_exploration_phase(state[2])
|
||||
|
||||
def _build_q_network(self, obs, obs_space, action_space, actions):
|
||||
import tensorflow.contrib.layers as layers
|
||||
|
||||
if self.config["use_state_preprocessor"]:
|
||||
q_model = ModelCatalog.get_model({
|
||||
"obs": obs,
|
||||
@@ -413,16 +411,12 @@ class DDPGPolicyGraph(DDPGPostprocessing, TFPolicyGraph):
|
||||
|
||||
activation = getattr(tf.nn, self.config["critic_hidden_activation"])
|
||||
for hidden in self.config["critic_hiddens"]:
|
||||
q_out = layers.fully_connected(
|
||||
q_out, num_outputs=hidden, activation_fn=activation)
|
||||
q_values = layers.fully_connected(
|
||||
q_out, num_outputs=1, activation_fn=None)
|
||||
q_out = tf.layers.dense(q_out, units=hidden, activation=activation)
|
||||
q_values = tf.layers.dense(q_out, units=1, activation=None)
|
||||
|
||||
return q_values, q_model
|
||||
|
||||
def _build_policy_network(self, obs, obs_space, action_space):
|
||||
import tensorflow.contrib.layers as layers
|
||||
|
||||
if self.config["use_state_preprocessor"]:
|
||||
model = ModelCatalog.get_model({
|
||||
"obs": obs,
|
||||
@@ -434,16 +428,19 @@ class DDPGPolicyGraph(DDPGPostprocessing, TFPolicyGraph):
|
||||
action_out = obs
|
||||
|
||||
activation = getattr(tf.nn, self.config["actor_hidden_activation"])
|
||||
normalizer_fn = layers.layer_norm if self.config["parameter_noise"] \
|
||||
else None
|
||||
for hidden in self.config["actor_hiddens"]:
|
||||
action_out = layers.fully_connected(
|
||||
action_out,
|
||||
num_outputs=hidden,
|
||||
activation_fn=activation,
|
||||
normalizer_fn=normalizer_fn)
|
||||
action_out = layers.fully_connected(
|
||||
action_out, num_outputs=self.dim_actions, activation_fn=None)
|
||||
if self.config["parameter_noise"]:
|
||||
import tensorflow.contrib.layers as layers
|
||||
action_out = layers.fully_connected(
|
||||
action_out,
|
||||
num_outputs=hidden,
|
||||
activation_fn=activation,
|
||||
normalizer_fn=layers.layer_norm)
|
||||
else:
|
||||
action_out = tf.layers.dense(
|
||||
action_out, units=hidden, activation=activation)
|
||||
action_out = tf.layers.dense(
|
||||
action_out, units=self.dim_actions, activation=None)
|
||||
|
||||
# Use sigmoid to scale to [0,1], but also double magnitude of input to
|
||||
# emulate behaviour of tanh activation used in DDPG and TD3 papers.
|
||||
@@ -507,7 +504,7 @@ class DDPGPolicyGraph(DDPGPostprocessing, TFPolicyGraph):
|
||||
|
||||
def make_uniform_random_actions():
|
||||
# pure random exploration option
|
||||
uniform_random_actions = tf.random.uniform(
|
||||
uniform_random_actions = tf.random_uniform(
|
||||
tf.shape(deterministic_actions))
|
||||
# rescale uniform random actions according to action range
|
||||
tf_range = tf.constant(action_range[None], dtype="float32")
|
||||
|
||||
@@ -154,8 +154,6 @@ class QNetwork(object):
|
||||
v_max=10.0,
|
||||
sigma0=0.5,
|
||||
parameter_noise=False):
|
||||
import tensorflow.contrib.layers as layers
|
||||
|
||||
self.model = model
|
||||
with tf.variable_scope("action_value"):
|
||||
if hiddens:
|
||||
@@ -164,13 +162,18 @@ class QNetwork(object):
|
||||
if use_noisy:
|
||||
action_out = self.noisy_layer(
|
||||
"hidden_%d" % i, action_out, hiddens[i], sigma0)
|
||||
else:
|
||||
elif parameter_noise:
|
||||
import tensorflow.contrib.layers as layers
|
||||
action_out = layers.fully_connected(
|
||||
action_out,
|
||||
num_outputs=hiddens[i],
|
||||
activation_fn=tf.nn.relu,
|
||||
normalizer_fn=layers.layer_norm
|
||||
if parameter_noise else None)
|
||||
normalizer_fn=layers.layer_norm)
|
||||
else:
|
||||
action_out = tf.layers.dense(
|
||||
action_out,
|
||||
units=hiddens[i],
|
||||
activation=tf.nn.relu)
|
||||
else:
|
||||
# Avoid postprocessing the outputs. This enables custom models
|
||||
# to be used for parametric action DQN.
|
||||
@@ -183,10 +186,8 @@ class QNetwork(object):
|
||||
sigma0,
|
||||
non_linear=False)
|
||||
elif hiddens:
|
||||
action_scores = layers.fully_connected(
|
||||
action_out,
|
||||
num_outputs=num_actions * num_atoms,
|
||||
activation_fn=None)
|
||||
action_scores = tf.layers.dense(
|
||||
action_out, units=num_actions * num_atoms, activation=None)
|
||||
else:
|
||||
action_scores = model.outputs
|
||||
if num_atoms > 1:
|
||||
@@ -214,13 +215,15 @@ class QNetwork(object):
|
||||
state_out = self.noisy_layer("dueling_hidden_%d" % i,
|
||||
state_out, hiddens[i],
|
||||
sigma0)
|
||||
else:
|
||||
state_out = layers.fully_connected(
|
||||
elif parameter_noise:
|
||||
state_out = tf.contrib.layers.fully_connected(
|
||||
state_out,
|
||||
num_outputs=hiddens[i],
|
||||
activation_fn=tf.nn.relu,
|
||||
normalizer_fn=layers.layer_norm
|
||||
if parameter_noise else None)
|
||||
normalizer_fn=tf.contrib.layers.layer_norm)
|
||||
else:
|
||||
state_out = tf.layers.dense(
|
||||
state_out, units=hiddens[i], activation=tf.nn.relu)
|
||||
if use_noisy:
|
||||
state_score = self.noisy_layer(
|
||||
"dueling_output",
|
||||
@@ -229,8 +232,8 @@ class QNetwork(object):
|
||||
sigma0,
|
||||
non_linear=False)
|
||||
else:
|
||||
state_score = layers.fully_connected(
|
||||
state_out, num_outputs=num_atoms, activation_fn=None)
|
||||
state_score = tf.layers.dense(
|
||||
state_out, units=num_atoms, activation=None)
|
||||
if num_atoms > 1:
|
||||
support_logits_per_action_mean = tf.reduce_mean(
|
||||
support_logits_per_action, 1)
|
||||
|
||||
@@ -38,8 +38,6 @@ from ray.rllib.models.action_dist import Categorical
|
||||
from ray.rllib.utils import try_import_tf
|
||||
|
||||
tf = try_import_tf()
|
||||
if tf:
|
||||
nest = tf.contrib.framework.nest
|
||||
|
||||
VTraceFromLogitsReturns = collections.namedtuple("VTraceFromLogitsReturns", [
|
||||
"vs", "pg_advantages", "log_rhos", "behaviour_action_log_probs",
|
||||
|
||||
@@ -278,14 +278,11 @@ class VTracePolicyGraph(LearningRateSchedule, VTracePostprocessing,
|
||||
self.KL_stats.update({
|
||||
"mean_KL_{}".format(i): tf.reduce_mean(kl),
|
||||
"max_KL_{}".format(i): tf.reduce_max(kl),
|
||||
"median_KL_{}".format(i): tf.contrib.distributions.
|
||||
percentile(kl, 50.0),
|
||||
})
|
||||
else:
|
||||
self.KL_stats = {
|
||||
"mean_KL": tf.reduce_mean(kls[0]),
|
||||
"max_KL": tf.reduce_max(kls[0]),
|
||||
"median_KL": tf.contrib.distributions.percentile(kls[0], 50.0),
|
||||
}
|
||||
|
||||
# Initialize TFPolicyGraph
|
||||
|
||||
@@ -26,8 +26,10 @@ from __future__ import print_function
|
||||
|
||||
from absl.testing import parameterized
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import vtrace
|
||||
from ray.rllib.utils import try_import_tf
|
||||
|
||||
tf = try_import_tf()
|
||||
|
||||
|
||||
def _shaped_arange(*shape):
|
||||
|
||||
@@ -399,14 +399,11 @@ class AsyncPPOPolicyGraph(LearningRateSchedule, APPOPostprocessing,
|
||||
self.KL_stats.update({
|
||||
"mean_KL_{}".format(i): tf.reduce_mean(kl),
|
||||
"max_KL_{}".format(i): tf.reduce_max(kl),
|
||||
"median_KL_{}".format(i): tf.contrib.distributions.
|
||||
percentile(kl, 50.0),
|
||||
})
|
||||
else:
|
||||
self.KL_stats = {
|
||||
"mean_KL": tf.reduce_mean(kls[0]),
|
||||
"max_KL": tf.reduce_max(kls[0]),
|
||||
"median_KL": tf.contrib.distributions.percentile(kls[0], 50.0),
|
||||
}
|
||||
|
||||
# Initialize TFPolicyGraph
|
||||
|
||||
@@ -4,11 +4,13 @@ from __future__ import print_function
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from ray.rllib.models.action_dist import Categorical
|
||||
from ray.rllib.agents.ppo.utils import flatten, concatenate
|
||||
from ray.rllib.utils import try_import_tf
|
||||
|
||||
tf = try_import_tf()
|
||||
|
||||
|
||||
# TODO(ekl): move to rllib/models dir
|
||||
|
||||
@@ -5,13 +5,13 @@ from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
|
||||
import tensorflow as tf
|
||||
import tensorflow.contrib.slim as slim
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.rllib.models import Model, ModelCatalog
|
||||
from ray.rllib.models.misc import normc_initializer
|
||||
from ray.rllib.utils import try_import_tf
|
||||
|
||||
tf = try_import_tf()
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--num-iters", type=int, default=200)
|
||||
@@ -24,21 +24,21 @@ class BatchNormModel(Model):
|
||||
hiddens = [256, 256]
|
||||
for i, size in enumerate(hiddens):
|
||||
label = "fc{}".format(i)
|
||||
last_layer = slim.fully_connected(
|
||||
last_layer = tf.layers.dense(
|
||||
last_layer,
|
||||
size,
|
||||
weights_initializer=normc_initializer(1.0),
|
||||
activation_fn=tf.nn.tanh,
|
||||
scope=label)
|
||||
kernel_initializer=normc_initializer(1.0),
|
||||
activation=tf.nn.tanh,
|
||||
name=label)
|
||||
# Add a batch norm layer
|
||||
last_layer = tf.layers.batch_normalization(
|
||||
last_layer, training=input_dict["is_training"])
|
||||
output = slim.fully_connected(
|
||||
output = tf.layers.dense(
|
||||
last_layer,
|
||||
num_outputs,
|
||||
weights_initializer=normc_initializer(0.01),
|
||||
activation_fn=None,
|
||||
scope="fc_out")
|
||||
kernel_initializer=normc_initializer(0.01),
|
||||
activation=None,
|
||||
name="fc_out")
|
||||
return output, last_layer
|
||||
|
||||
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
(Experimental) OpenAI gym environment for https://github.com/carla-simulator/carla
|
||||
|
||||
To run, first download and unpack the Carla binaries from this URL: https://github.com/carla-simulator/carla/releases/tag/0.7.0
|
||||
|
||||
Note that currently you also need to clone the Python code from `carla/benchmark_branch` which includes the Carla planner.
|
||||
|
||||
Then, you can try running env.py to drive the car. Run one of the train_* scripts to attempt training.
|
||||
|
||||
$ pkill -9 Carla
|
||||
$ export CARLA_SERVER=/PATH/TO/CARLA_0.7.0/CarlaUE4.sh
|
||||
$ export CARLA_PY_PATH=/PATH/TO/CARLA_BENCHMARK_BRANCH_REPO/PythonClient
|
||||
$ python env.py
|
||||
|
||||
Check out the scenarios.py file for different training and test scenarios that can be used.
|
||||
@@ -1,684 +0,0 @@
|
||||
"""OpenAI gym environment for Carla. Run this file for a demo."""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from datetime import datetime
|
||||
import atexit
|
||||
import cv2
|
||||
import os
|
||||
import json
|
||||
import random
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
import numpy as np
|
||||
try:
|
||||
import scipy.misc
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
import gym
|
||||
from gym.spaces import Box, Discrete, Tuple
|
||||
|
||||
from scenarios import DEFAULT_SCENARIO
|
||||
|
||||
# Set this where you want to save image outputs (or empty string to disable)
|
||||
CARLA_OUT_PATH = os.environ.get("CARLA_OUT", os.path.expanduser("~/carla_out"))
|
||||
if CARLA_OUT_PATH and not os.path.exists(CARLA_OUT_PATH):
|
||||
os.makedirs(CARLA_OUT_PATH)
|
||||
|
||||
# Set this to the path of your Carla binary
|
||||
SERVER_BINARY = os.environ.get("CARLA_SERVER",
|
||||
os.path.expanduser("~/CARLA_0.7.0/CarlaUE4.sh"))
|
||||
|
||||
assert os.path.exists(SERVER_BINARY)
|
||||
if "CARLA_PY_PATH" in os.environ:
|
||||
sys.path.append(os.path.expanduser(os.environ["CARLA_PY_PATH"]))
|
||||
else:
|
||||
# TODO(ekl) switch this to the binary path once the planner is in master
|
||||
sys.path.append(os.path.expanduser("~/carla/PythonClient/"))
|
||||
|
||||
try:
|
||||
from carla.client import CarlaClient
|
||||
from carla.sensor import Camera
|
||||
from carla.settings import CarlaSettings
|
||||
from carla.planner.planner import Planner, REACH_GOAL, GO_STRAIGHT, \
|
||||
TURN_RIGHT, TURN_LEFT, LANE_FOLLOW
|
||||
except Exception as e:
|
||||
print("Failed to import Carla python libs, try setting $CARLA_PY_PATH")
|
||||
raise e
|
||||
|
||||
# Carla planner commands
|
||||
COMMANDS_ENUM = {
|
||||
REACH_GOAL: "REACH_GOAL",
|
||||
GO_STRAIGHT: "GO_STRAIGHT",
|
||||
TURN_RIGHT: "TURN_RIGHT",
|
||||
TURN_LEFT: "TURN_LEFT",
|
||||
LANE_FOLLOW: "LANE_FOLLOW",
|
||||
}
|
||||
|
||||
# Mapping from string repr to one-hot encoding index to feed to the model
|
||||
COMMAND_ORDINAL = {
|
||||
"REACH_GOAL": 0,
|
||||
"GO_STRAIGHT": 1,
|
||||
"TURN_RIGHT": 2,
|
||||
"TURN_LEFT": 3,
|
||||
"LANE_FOLLOW": 4,
|
||||
}
|
||||
|
||||
# Number of retries if the server doesn't respond
|
||||
RETRIES_ON_ERROR = 5
|
||||
|
||||
# Dummy Z coordinate to use when we only care about (x, y)
|
||||
GROUND_Z = 22
|
||||
|
||||
# Default environment configuration
|
||||
ENV_CONFIG = {
|
||||
"log_images": True,
|
||||
"enable_planner": True,
|
||||
"framestack": 2, # note: only [1, 2] currently supported
|
||||
"convert_images_to_video": True,
|
||||
"early_terminate_on_collision": True,
|
||||
"verbose": True,
|
||||
"reward_function": "custom",
|
||||
"render_x_res": 800,
|
||||
"render_y_res": 600,
|
||||
"x_res": 80,
|
||||
"y_res": 80,
|
||||
"server_map": "/Game/Maps/Town02",
|
||||
"scenarios": [DEFAULT_SCENARIO],
|
||||
"use_depth_camera": False,
|
||||
"discrete_actions": True,
|
||||
"squash_action_logits": False,
|
||||
}
|
||||
|
||||
DISCRETE_ACTIONS = {
|
||||
# coast
|
||||
0: [0.0, 0.0],
|
||||
# turn left
|
||||
1: [0.0, -0.5],
|
||||
# turn right
|
||||
2: [0.0, 0.5],
|
||||
# forward
|
||||
3: [1.0, 0.0],
|
||||
# brake
|
||||
4: [-0.5, 0.0],
|
||||
# forward left
|
||||
5: [1.0, -0.5],
|
||||
# forward right
|
||||
6: [1.0, 0.5],
|
||||
# brake left
|
||||
7: [-0.5, -0.5],
|
||||
# brake right
|
||||
8: [-0.5, 0.5],
|
||||
}
|
||||
|
||||
live_carla_processes = set()
|
||||
|
||||
|
||||
def cleanup():
|
||||
print("Killing live carla processes", live_carla_processes)
|
||||
for pgid in live_carla_processes:
|
||||
os.killpg(pgid, signal.SIGKILL)
|
||||
|
||||
|
||||
atexit.register(cleanup)
|
||||
|
||||
|
||||
class CarlaEnv(gym.Env):
|
||||
def __init__(self, config=ENV_CONFIG):
|
||||
self.config = config
|
||||
self.city = self.config["server_map"].split("/")[-1]
|
||||
if self.config["enable_planner"]:
|
||||
self.planner = Planner(self.city)
|
||||
|
||||
if config["discrete_actions"]:
|
||||
self.action_space = Discrete(len(DISCRETE_ACTIONS))
|
||||
else:
|
||||
self.action_space = Box(-1.0, 1.0, shape=(2, ), dtype=np.float32)
|
||||
if config["use_depth_camera"]:
|
||||
image_space = Box(
|
||||
-1.0,
|
||||
1.0,
|
||||
shape=(config["y_res"], config["x_res"],
|
||||
1 * config["framestack"]),
|
||||
dtype=np.float32)
|
||||
else:
|
||||
image_space = Box(
|
||||
0,
|
||||
255,
|
||||
shape=(config["y_res"], config["x_res"],
|
||||
3 * config["framestack"]),
|
||||
dtype=np.uint8)
|
||||
self.observation_space = Tuple( # forward_speed, dist to goal
|
||||
[
|
||||
image_space,
|
||||
Discrete(len(COMMANDS_ENUM)), # next_command
|
||||
Box(-128.0, 128.0, shape=(2, ), dtype=np.float32)
|
||||
])
|
||||
|
||||
# TODO(ekl) this isn't really a proper gym spec
|
||||
self._spec = lambda: None
|
||||
self._spec.id = "Carla-v0"
|
||||
|
||||
self.server_port = None
|
||||
self.server_process = None
|
||||
self.client = None
|
||||
self.num_steps = 0
|
||||
self.total_reward = 0
|
||||
self.prev_measurement = None
|
||||
self.prev_image = None
|
||||
self.episode_id = None
|
||||
self.measurements_file = None
|
||||
self.weather = None
|
||||
self.scenario = None
|
||||
self.start_pos = None
|
||||
self.end_pos = None
|
||||
self.start_coord = None
|
||||
self.end_coord = None
|
||||
self.last_obs = None
|
||||
|
||||
def init_server(self):
|
||||
print("Initializing new Carla server...")
|
||||
# Create a new server process and start the client.
|
||||
self.server_port = random.randint(10000, 60000)
|
||||
self.server_process = subprocess.Popen(
|
||||
[
|
||||
SERVER_BINARY, self.config["server_map"], "-windowed",
|
||||
"-ResX=400", "-ResY=300", "-carla-server",
|
||||
"-carla-world-port={}".format(self.server_port)
|
||||
],
|
||||
preexec_fn=os.setsid,
|
||||
stdout=open(os.devnull, "w"))
|
||||
live_carla_processes.add(os.getpgid(self.server_process.pid))
|
||||
|
||||
for i in range(RETRIES_ON_ERROR):
|
||||
try:
|
||||
self.client = CarlaClient("localhost", self.server_port)
|
||||
return self.client.connect()
|
||||
except Exception as e:
|
||||
print("Error connecting: {}, attempt {}".format(e, i))
|
||||
time.sleep(2)
|
||||
|
||||
def clear_server_state(self):
|
||||
print("Clearing Carla server state")
|
||||
try:
|
||||
if self.client:
|
||||
self.client.disconnect()
|
||||
self.client = None
|
||||
except Exception as e:
|
||||
print("Error disconnecting client: {}".format(e))
|
||||
pass
|
||||
if self.server_process:
|
||||
pgid = os.getpgid(self.server_process.pid)
|
||||
os.killpg(pgid, signal.SIGKILL)
|
||||
live_carla_processes.remove(pgid)
|
||||
self.server_port = None
|
||||
self.server_process = None
|
||||
|
||||
def __del__(self):
|
||||
self.clear_server_state()
|
||||
|
||||
def reset(self):
|
||||
error = None
|
||||
for _ in range(RETRIES_ON_ERROR):
|
||||
try:
|
||||
if not self.server_process:
|
||||
self.init_server()
|
||||
return self._reset()
|
||||
except Exception as e:
|
||||
print("Error during reset: {}".format(traceback.format_exc()))
|
||||
self.clear_server_state()
|
||||
error = e
|
||||
raise error
|
||||
|
||||
def _reset(self):
|
||||
self.num_steps = 0
|
||||
self.total_reward = 0
|
||||
self.prev_measurement = None
|
||||
self.prev_image = None
|
||||
self.episode_id = datetime.today().strftime("%Y-%m-%d_%H-%M-%S_%f")
|
||||
self.measurements_file = None
|
||||
|
||||
# Create a CarlaSettings object. This object is a wrapper around
|
||||
# the CarlaSettings.ini file. Here we set the configuration we
|
||||
# want for the new episode.
|
||||
settings = CarlaSettings()
|
||||
self.scenario = random.choice(self.config["scenarios"])
|
||||
assert self.scenario["city"] == self.city, (self.scenario, self.city)
|
||||
self.weather = random.choice(self.scenario["weather_distribution"])
|
||||
settings.set(
|
||||
SynchronousMode=True,
|
||||
SendNonPlayerAgentsInfo=True,
|
||||
NumberOfVehicles=self.scenario["num_vehicles"],
|
||||
NumberOfPedestrians=self.scenario["num_pedestrians"],
|
||||
WeatherId=self.weather)
|
||||
settings.randomize_seeds()
|
||||
|
||||
if self.config["use_depth_camera"]:
|
||||
camera1 = Camera("CameraDepth", PostProcessing="Depth")
|
||||
camera1.set_image_size(self.config["render_x_res"],
|
||||
self.config["render_y_res"])
|
||||
camera1.set_position(30, 0, 130)
|
||||
settings.add_sensor(camera1)
|
||||
|
||||
camera2 = Camera("CameraRGB")
|
||||
camera2.set_image_size(self.config["render_x_res"],
|
||||
self.config["render_y_res"])
|
||||
camera2.set_position(30, 0, 130)
|
||||
settings.add_sensor(camera2)
|
||||
|
||||
# Setup start and end positions
|
||||
scene = self.client.load_settings(settings)
|
||||
positions = scene.player_start_spots
|
||||
self.start_pos = positions[self.scenario["start_pos_id"]]
|
||||
self.end_pos = positions[self.scenario["end_pos_id"]]
|
||||
self.start_coord = [
|
||||
self.start_pos.location.x // 100, self.start_pos.location.y // 100
|
||||
]
|
||||
self.end_coord = [
|
||||
self.end_pos.location.x // 100, self.end_pos.location.y // 100
|
||||
]
|
||||
print("Start pos {} ({}), end {} ({})".format(
|
||||
self.scenario["start_pos_id"], self.start_coord,
|
||||
self.scenario["end_pos_id"], self.end_coord))
|
||||
|
||||
# Notify the server that we want to start the episode at the
|
||||
# player_start index. This function blocks until the server is ready
|
||||
# to start the episode.
|
||||
print("Starting new episode...")
|
||||
self.client.start_episode(self.scenario["start_pos_id"])
|
||||
|
||||
image, py_measurements = self._read_observation()
|
||||
self.prev_measurement = py_measurements
|
||||
return self.encode_obs(self.preprocess_image(image), py_measurements)
|
||||
|
||||
def encode_obs(self, image, py_measurements):
|
||||
assert self.config["framestack"] in [1, 2]
|
||||
prev_image = self.prev_image
|
||||
self.prev_image = image
|
||||
if prev_image is None:
|
||||
prev_image = image
|
||||
if self.config["framestack"] == 2:
|
||||
image = np.concatenate([prev_image, image], axis=2)
|
||||
obs = (image, COMMAND_ORDINAL[py_measurements["next_command"]], [
|
||||
py_measurements["forward_speed"],
|
||||
py_measurements["distance_to_goal"]
|
||||
])
|
||||
self.last_obs = obs
|
||||
return obs
|
||||
|
||||
def step(self, action):
|
||||
try:
|
||||
obs = self._step(action)
|
||||
return obs
|
||||
except Exception:
|
||||
print("Error during step, terminating episode early",
|
||||
traceback.format_exc())
|
||||
self.clear_server_state()
|
||||
return (self.last_obs, 0.0, True, {})
|
||||
|
||||
def _step(self, action):
|
||||
if self.config["discrete_actions"]:
|
||||
action = DISCRETE_ACTIONS[int(action)]
|
||||
assert len(action) == 2, "Invalid action {}".format(action)
|
||||
if self.config["squash_action_logits"]:
|
||||
forward = 2 * float(sigmoid(action[0]) - 0.5)
|
||||
throttle = float(np.clip(forward, 0, 1))
|
||||
brake = float(np.abs(np.clip(forward, -1, 0)))
|
||||
steer = 2 * float(sigmoid(action[1]) - 0.5)
|
||||
else:
|
||||
throttle = float(np.clip(action[0], 0, 1))
|
||||
brake = float(np.abs(np.clip(action[0], -1, 0)))
|
||||
steer = float(np.clip(action[1], -1, 1))
|
||||
reverse = False
|
||||
hand_brake = False
|
||||
|
||||
if self.config["verbose"]:
|
||||
print("steer", steer, "throttle", throttle, "brake", brake,
|
||||
"reverse", reverse)
|
||||
|
||||
self.client.send_control(
|
||||
steer=steer,
|
||||
throttle=throttle,
|
||||
brake=brake,
|
||||
hand_brake=hand_brake,
|
||||
reverse=reverse)
|
||||
|
||||
# Process observations
|
||||
image, py_measurements = self._read_observation()
|
||||
if self.config["verbose"]:
|
||||
print("Next command", py_measurements["next_command"])
|
||||
if type(action) is np.ndarray:
|
||||
py_measurements["action"] = [float(a) for a in action]
|
||||
else:
|
||||
py_measurements["action"] = action
|
||||
py_measurements["control"] = {
|
||||
"steer": steer,
|
||||
"throttle": throttle,
|
||||
"brake": brake,
|
||||
"reverse": reverse,
|
||||
"hand_brake": hand_brake,
|
||||
}
|
||||
reward = compute_reward(self, self.prev_measurement, py_measurements)
|
||||
self.total_reward += reward
|
||||
py_measurements["reward"] = reward
|
||||
py_measurements["total_reward"] = self.total_reward
|
||||
done = (self.num_steps > self.scenario["max_steps"]
|
||||
or py_measurements["next_command"] == "REACH_GOAL"
|
||||
or (self.config["early_terminate_on_collision"]
|
||||
and collided_done(py_measurements)))
|
||||
py_measurements["done"] = done
|
||||
self.prev_measurement = py_measurements
|
||||
|
||||
# Write out measurements to file
|
||||
if CARLA_OUT_PATH:
|
||||
if not self.measurements_file:
|
||||
self.measurements_file = open(
|
||||
os.path.join(
|
||||
CARLA_OUT_PATH,
|
||||
"measurements_{}.json".format(self.episode_id)), "w")
|
||||
self.measurements_file.write(json.dumps(py_measurements))
|
||||
self.measurements_file.write("\n")
|
||||
if done:
|
||||
self.measurements_file.close()
|
||||
self.measurements_file = None
|
||||
if self.config["convert_images_to_video"]:
|
||||
self.images_to_video()
|
||||
|
||||
self.num_steps += 1
|
||||
image = self.preprocess_image(image)
|
||||
return (self.encode_obs(image, py_measurements), reward, done,
|
||||
py_measurements)
|
||||
|
||||
def images_to_video(self):
|
||||
videos_dir = os.path.join(CARLA_OUT_PATH, "Videos")
|
||||
if not os.path.exists(videos_dir):
|
||||
os.makedirs(videos_dir)
|
||||
ffmpeg_cmd = (
|
||||
"ffmpeg -loglevel -8 -r 60 -f image2 -s {x_res}x{y_res} "
|
||||
"-start_number 0 -i "
|
||||
"{img}_%04d.jpg -vcodec libx264 {vid}.mp4 && rm -f {img}_*.jpg "
|
||||
).format(
|
||||
x_res=self.config["render_x_res"],
|
||||
y_res=self.config["render_y_res"],
|
||||
vid=os.path.join(videos_dir, self.episode_id),
|
||||
img=os.path.join(CARLA_OUT_PATH, "CameraRGB", self.episode_id))
|
||||
print("Executing ffmpeg command", ffmpeg_cmd)
|
||||
subprocess.call(ffmpeg_cmd, shell=True)
|
||||
|
||||
def preprocess_image(self, image):
|
||||
if self.config["use_depth_camera"]:
|
||||
assert self.config["use_depth_camera"]
|
||||
data = (image.data - 0.5) * 2
|
||||
data = data.reshape(self.config["render_y_res"],
|
||||
self.config["render_x_res"], 1)
|
||||
data = cv2.resize(
|
||||
data, (self.config["x_res"], self.config["y_res"]),
|
||||
interpolation=cv2.INTER_AREA)
|
||||
data = np.expand_dims(data, 2)
|
||||
else:
|
||||
data = image.data.reshape(self.config["render_y_res"],
|
||||
self.config["render_x_res"], 3)
|
||||
data = cv2.resize(
|
||||
data, (self.config["x_res"], self.config["y_res"]),
|
||||
interpolation=cv2.INTER_AREA)
|
||||
data = (data.astype(np.float32) - 128) / 128
|
||||
return data
|
||||
|
||||
def _read_observation(self):
|
||||
# Read the data produced by the server this frame.
|
||||
measurements, sensor_data = self.client.read_data()
|
||||
|
||||
# Print some of the measurements.
|
||||
if self.config["verbose"]:
|
||||
print_measurements(measurements)
|
||||
|
||||
observation = None
|
||||
if self.config["use_depth_camera"]:
|
||||
camera_name = "CameraDepth"
|
||||
else:
|
||||
camera_name = "CameraRGB"
|
||||
for name, image in sensor_data.items():
|
||||
if name == camera_name:
|
||||
observation = image
|
||||
|
||||
cur = measurements.player_measurements
|
||||
|
||||
if self.config["enable_planner"]:
|
||||
next_command = COMMANDS_ENUM[self.planner.get_next_command(
|
||||
[cur.transform.location.x, cur.transform.location.y, GROUND_Z],
|
||||
[
|
||||
cur.transform.orientation.x, cur.transform.orientation.y,
|
||||
GROUND_Z
|
||||
],
|
||||
[self.end_pos.location.x, self.end_pos.location.y, GROUND_Z], [
|
||||
self.end_pos.orientation.x, self.end_pos.orientation.y,
|
||||
GROUND_Z
|
||||
])]
|
||||
else:
|
||||
next_command = "LANE_FOLLOW"
|
||||
|
||||
if next_command == "REACH_GOAL":
|
||||
distance_to_goal = 0.0 # avoids crash in planner
|
||||
elif self.config["enable_planner"]:
|
||||
distance_to_goal = self.planner.get_shortest_path_distance([
|
||||
cur.transform.location.x, cur.transform.location.y, GROUND_Z
|
||||
], [
|
||||
cur.transform.orientation.x, cur.transform.orientation.y,
|
||||
GROUND_Z
|
||||
], [self.end_pos.location.x, self.end_pos.location.y, GROUND_Z], [
|
||||
self.end_pos.orientation.x, self.end_pos.orientation.y,
|
||||
GROUND_Z
|
||||
]) / 100
|
||||
else:
|
||||
distance_to_goal = -1
|
||||
|
||||
distance_to_goal_euclidean = float(
|
||||
np.linalg.norm([
|
||||
cur.transform.location.x - self.end_pos.location.x,
|
||||
cur.transform.location.y - self.end_pos.location.y
|
||||
]) / 100)
|
||||
|
||||
py_measurements = {
|
||||
"episode_id": self.episode_id,
|
||||
"step": self.num_steps,
|
||||
"x": cur.transform.location.x,
|
||||
"y": cur.transform.location.y,
|
||||
"x_orient": cur.transform.orientation.x,
|
||||
"y_orient": cur.transform.orientation.y,
|
||||
"forward_speed": cur.forward_speed,
|
||||
"distance_to_goal": distance_to_goal,
|
||||
"distance_to_goal_euclidean": distance_to_goal_euclidean,
|
||||
"collision_vehicles": cur.collision_vehicles,
|
||||
"collision_pedestrians": cur.collision_pedestrians,
|
||||
"collision_other": cur.collision_other,
|
||||
"intersection_offroad": cur.intersection_offroad,
|
||||
"intersection_otherlane": cur.intersection_otherlane,
|
||||
"weather": self.weather,
|
||||
"map": self.config["server_map"],
|
||||
"start_coord": self.start_coord,
|
||||
"end_coord": self.end_coord,
|
||||
"current_scenario": self.scenario,
|
||||
"x_res": self.config["x_res"],
|
||||
"y_res": self.config["y_res"],
|
||||
"num_vehicles": self.scenario["num_vehicles"],
|
||||
"num_pedestrians": self.scenario["num_pedestrians"],
|
||||
"max_steps": self.scenario["max_steps"],
|
||||
"next_command": next_command,
|
||||
}
|
||||
|
||||
if CARLA_OUT_PATH and self.config["log_images"]:
|
||||
for name, image in sensor_data.items():
|
||||
out_dir = os.path.join(CARLA_OUT_PATH, name)
|
||||
if not os.path.exists(out_dir):
|
||||
os.makedirs(out_dir)
|
||||
out_file = os.path.join(
|
||||
out_dir, "{}_{:>04}.jpg".format(self.episode_id,
|
||||
self.num_steps))
|
||||
scipy.misc.imsave(out_file, image.data)
|
||||
|
||||
assert observation is not None, sensor_data
|
||||
return observation, py_measurements
|
||||
|
||||
|
||||
def compute_reward_corl2017(env, prev, current):
|
||||
reward = 0.0
|
||||
|
||||
cur_dist = current["distance_to_goal"]
|
||||
|
||||
prev_dist = prev["distance_to_goal"]
|
||||
|
||||
if env.config["verbose"]:
|
||||
print("Cur dist {}, prev dist {}".format(cur_dist, prev_dist))
|
||||
|
||||
# Distance travelled toward the goal in m
|
||||
reward += np.clip(prev_dist - cur_dist, -10.0, 10.0)
|
||||
|
||||
# Change in speed (km/h)
|
||||
reward += 0.05 * (current["forward_speed"] - prev["forward_speed"])
|
||||
|
||||
# New collision damage
|
||||
reward -= .00002 * (
|
||||
current["collision_vehicles"] + current["collision_pedestrians"] +
|
||||
current["collision_other"] - prev["collision_vehicles"] -
|
||||
prev["collision_pedestrians"] - prev["collision_other"])
|
||||
|
||||
# New sidewalk intersection
|
||||
reward -= 2 * (
|
||||
current["intersection_offroad"] - prev["intersection_offroad"])
|
||||
|
||||
# New opposite lane intersection
|
||||
reward -= 2 * (
|
||||
current["intersection_otherlane"] - prev["intersection_otherlane"])
|
||||
|
||||
return reward
|
||||
|
||||
|
||||
def compute_reward_custom(env, prev, current):
|
||||
reward = 0.0
|
||||
|
||||
cur_dist = current["distance_to_goal"]
|
||||
prev_dist = prev["distance_to_goal"]
|
||||
|
||||
if env.config["verbose"]:
|
||||
print("Cur dist {}, prev dist {}".format(cur_dist, prev_dist))
|
||||
|
||||
# Distance travelled toward the goal in m
|
||||
reward += np.clip(prev_dist - cur_dist, -10.0, 10.0)
|
||||
|
||||
# Speed reward, up 30.0 (km/h)
|
||||
reward += np.clip(current["forward_speed"], 0.0, 30.0) / 10
|
||||
|
||||
# New collision damage
|
||||
new_damage = (
|
||||
current["collision_vehicles"] + current["collision_pedestrians"] +
|
||||
current["collision_other"] - prev["collision_vehicles"] -
|
||||
prev["collision_pedestrians"] - prev["collision_other"])
|
||||
if new_damage:
|
||||
reward -= 100.0
|
||||
|
||||
# Sidewalk intersection
|
||||
reward -= current["intersection_offroad"]
|
||||
|
||||
# Opposite lane intersection
|
||||
reward -= current["intersection_otherlane"]
|
||||
|
||||
# Reached goal
|
||||
if current["next_command"] == "REACH_GOAL":
|
||||
reward += 100.0
|
||||
|
||||
return reward
|
||||
|
||||
|
||||
def compute_reward_lane_keep(env, prev, current):
|
||||
reward = 0.0
|
||||
|
||||
# Speed reward, up 30.0 (km/h)
|
||||
reward += np.clip(current["forward_speed"], 0.0, 30.0) / 10
|
||||
|
||||
# New collision damage
|
||||
new_damage = (
|
||||
current["collision_vehicles"] + current["collision_pedestrians"] +
|
||||
current["collision_other"] - prev["collision_vehicles"] -
|
||||
prev["collision_pedestrians"] - prev["collision_other"])
|
||||
if new_damage:
|
||||
reward -= 100.0
|
||||
|
||||
# Sidewalk intersection
|
||||
reward -= current["intersection_offroad"]
|
||||
|
||||
# Opposite lane intersection
|
||||
reward -= current["intersection_otherlane"]
|
||||
|
||||
return reward
|
||||
|
||||
|
||||
REWARD_FUNCTIONS = {
|
||||
"corl2017": compute_reward_corl2017,
|
||||
"custom": compute_reward_custom,
|
||||
"lane_keep": compute_reward_lane_keep,
|
||||
}
|
||||
|
||||
|
||||
def compute_reward(env, prev, current):
|
||||
return REWARD_FUNCTIONS[env.config["reward_function"]](env, prev, current)
|
||||
|
||||
|
||||
def print_measurements(measurements):
|
||||
number_of_agents = len(measurements.non_player_agents)
|
||||
player_measurements = measurements.player_measurements
|
||||
message = "Vehicle at ({pos_x:.1f}, {pos_y:.1f}), "
|
||||
message += "{speed:.2f} km/h, "
|
||||
message += "Collision: {{vehicles={col_cars:.0f}, "
|
||||
message += "pedestrians={col_ped:.0f}, other={col_other:.0f}}}, "
|
||||
message += "{other_lane:.0f}% other lane, {offroad:.0f}% off-road, "
|
||||
message += "({agents_num:d} non-player agents in the scene)"
|
||||
message = message.format(
|
||||
pos_x=player_measurements.transform.location.x / 100, # cm -> m
|
||||
pos_y=player_measurements.transform.location.y / 100,
|
||||
speed=player_measurements.forward_speed,
|
||||
col_cars=player_measurements.collision_vehicles,
|
||||
col_ped=player_measurements.collision_pedestrians,
|
||||
col_other=player_measurements.collision_other,
|
||||
other_lane=100 * player_measurements.intersection_otherlane,
|
||||
offroad=100 * player_measurements.intersection_offroad,
|
||||
agents_num=number_of_agents)
|
||||
print(message)
|
||||
|
||||
|
||||
def sigmoid(x):
|
||||
x = float(x)
|
||||
return np.exp(x) / (1 + np.exp(x))
|
||||
|
||||
|
||||
def collided_done(py_measurements):
|
||||
m = py_measurements
|
||||
collided = (m["collision_vehicles"] > 0 or m["collision_pedestrians"] > 0
|
||||
or m["collision_other"] > 0)
|
||||
return bool(collided or m["total_reward"] < -100)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for _ in range(2):
|
||||
env = CarlaEnv()
|
||||
obs = env.reset()
|
||||
print("reset", obs)
|
||||
start = time.time()
|
||||
done = False
|
||||
i = 0
|
||||
total_reward = 0.0
|
||||
while not done:
|
||||
i += 1
|
||||
if ENV_CONFIG["discrete_actions"]:
|
||||
obs, reward, done, info = env.step(1)
|
||||
else:
|
||||
obs, reward, done, info = env.step([0, 1, 0])
|
||||
total_reward += reward
|
||||
print(i, "rew", reward, "total", total_reward, "done", done)
|
||||
print("{} fps".format(100 / (time.time() - start)))
|
||||
@@ -1,108 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import tensorflow.contrib.slim as slim
|
||||
from tensorflow.contrib.layers import xavier_initializer
|
||||
|
||||
from ray.rllib.models.catalog import ModelCatalog
|
||||
from ray.rllib.models.misc import normc_initializer
|
||||
from ray.rllib.models.model import Model
|
||||
|
||||
|
||||
class CarlaModel(Model):
|
||||
"""Carla model that can process the observation tuple.
|
||||
|
||||
The architecture processes the image using convolutional layers, the
|
||||
metrics using fully connected layers, and then combines them with
|
||||
further fully connected layers.
|
||||
"""
|
||||
|
||||
# TODO(ekl): use build_layers_v2 for native dict space support
|
||||
def _build_layers(self, inputs, num_outputs, options):
|
||||
# Parse options
|
||||
image_shape = options["custom_options"]["image_shape"]
|
||||
convs = options.get("conv_filters", [
|
||||
[16, [8, 8], 4],
|
||||
[32, [5, 5], 3],
|
||||
[32, [5, 5], 2],
|
||||
[512, [10, 10], 1],
|
||||
])
|
||||
hiddens = options.get("fcnet_hiddens", [64])
|
||||
fcnet_activation = options.get("fcnet_activation", "tanh")
|
||||
if fcnet_activation == "tanh":
|
||||
activation = tf.nn.tanh
|
||||
elif fcnet_activation == "relu":
|
||||
activation = tf.nn.relu
|
||||
|
||||
# Sanity checks
|
||||
image_size = np.product(image_shape)
|
||||
expected_shape = [image_size + 5 + 2]
|
||||
assert inputs.shape.as_list()[1:] == expected_shape, \
|
||||
(inputs.shape.as_list()[1:], expected_shape)
|
||||
|
||||
# Reshape the input vector back into its components
|
||||
vision_in = tf.reshape(inputs[:, :image_size],
|
||||
[tf.shape(inputs)[0]] + image_shape)
|
||||
metrics_in = inputs[:, image_size:]
|
||||
print("Vision in shape", vision_in)
|
||||
print("Metrics in shape", metrics_in)
|
||||
|
||||
# Setup vision layers
|
||||
with tf.name_scope("carla_vision"):
|
||||
for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1):
|
||||
vision_in = slim.conv2d(
|
||||
vision_in,
|
||||
out_size,
|
||||
kernel,
|
||||
stride,
|
||||
scope="conv{}".format(i))
|
||||
out_size, kernel, stride = convs[-1]
|
||||
vision_in = slim.conv2d(
|
||||
vision_in,
|
||||
out_size,
|
||||
kernel,
|
||||
stride,
|
||||
padding="VALID",
|
||||
scope="conv_out")
|
||||
vision_in = tf.squeeze(vision_in, [1, 2])
|
||||
|
||||
# Setup metrics layer
|
||||
with tf.name_scope("carla_metrics"):
|
||||
metrics_in = slim.fully_connected(
|
||||
metrics_in,
|
||||
64,
|
||||
weights_initializer=xavier_initializer(),
|
||||
activation_fn=activation,
|
||||
scope="metrics_out")
|
||||
|
||||
print("Shape of vision out is", vision_in.shape)
|
||||
print("Shape of metric out is", metrics_in.shape)
|
||||
|
||||
# Combine the metrics and vision inputs
|
||||
with tf.name_scope("carla_out"):
|
||||
i = 1
|
||||
last_layer = tf.concat([vision_in, metrics_in], axis=1)
|
||||
print("Shape of concatenated out is", last_layer.shape)
|
||||
for size in hiddens:
|
||||
last_layer = slim.fully_connected(
|
||||
last_layer,
|
||||
size,
|
||||
weights_initializer=xavier_initializer(),
|
||||
activation_fn=activation,
|
||||
scope="fc{}".format(i))
|
||||
i += 1
|
||||
output = slim.fully_connected(
|
||||
last_layer,
|
||||
num_outputs,
|
||||
weights_initializer=normc_initializer(0.01),
|
||||
activation_fn=None,
|
||||
scope="fc_out")
|
||||
|
||||
return output, last_layer
|
||||
|
||||
|
||||
def register_carla_model():
|
||||
ModelCatalog.register_custom_model("carla", CarlaModel)
|
||||
@@ -1,131 +0,0 @@
|
||||
"""Collection of Carla scenarios, including those from the CoRL 2017 paper."""
|
||||
|
||||
TEST_WEATHERS = [0, 2, 5, 7, 9, 10, 11, 12, 13]
|
||||
TRAIN_WEATHERS = [1, 3, 4, 6, 8, 14]
|
||||
|
||||
|
||||
def build_scenario(city, start, end, vehicles, pedestrians, max_steps,
|
||||
weathers):
|
||||
return {
|
||||
"city": city,
|
||||
"num_vehicles": vehicles,
|
||||
"num_pedestrians": pedestrians,
|
||||
"weather_distribution": weathers,
|
||||
"start_pos_id": start,
|
||||
"end_pos_id": end,
|
||||
"max_steps": max_steps,
|
||||
}
|
||||
|
||||
|
||||
# Simple scenario for Town02 that involves driving down a road
|
||||
DEFAULT_SCENARIO = build_scenario(
|
||||
city="Town02",
|
||||
start=36,
|
||||
end=40,
|
||||
vehicles=20,
|
||||
pedestrians=40,
|
||||
max_steps=200,
|
||||
weathers=[0])
|
||||
|
||||
# Simple scenario for Town02 that involves driving down a road
|
||||
LANE_KEEP = build_scenario(
|
||||
city="Town02",
|
||||
start=36,
|
||||
end=40,
|
||||
vehicles=0,
|
||||
pedestrians=0,
|
||||
max_steps=2000,
|
||||
weathers=[0])
|
||||
|
||||
# Scenarios from the CoRL2017 paper
|
||||
POSES_TOWN1_STRAIGHT = [[36, 40], [39, 35], [110, 114], [7, 3], [0, 4], [
|
||||
68, 50
|
||||
], [61, 59], [47, 64], [147, 90], [33, 87], [26, 19], [80, 76], [45, 49], [
|
||||
55, 44
|
||||
], [29, 107], [95, 104], [84, 34], [53, 67], [22, 17], [91, 148], [20, 107],
|
||||
[78, 70], [95, 102], [68, 44], [45, 69]]
|
||||
|
||||
POSES_TOWN1_ONE_CURVE = [[138, 17], [47, 16], [26, 9], [42, 49], [140, 124], [
|
||||
85, 98
|
||||
], [65, 133], [137, 51], [76, 66], [46, 39], [40, 60], [0, 29], [4, 129], [
|
||||
121, 140
|
||||
], [2, 129], [78, 44], [68, 85], [41, 102], [95, 70], [68, 129], [84, 69],
|
||||
[47, 79], [110, 15], [130, 17], [0, 17]]
|
||||
|
||||
POSES_TOWN1_NAV = [[105, 29], [27, 130], [102, 87], [132, 27], [24, 44], [
|
||||
96, 26
|
||||
], [34, 67], [28, 1], [140, 134], [105, 9], [148, 129], [65, 18], [21, 16], [
|
||||
147, 97
|
||||
], [42, 51], [30, 41], [18, 107], [69, 45], [102, 95], [18, 145], [111, 64],
|
||||
[79, 45], [84, 69], [73, 31], [37, 81]]
|
||||
|
||||
POSES_TOWN2_STRAIGHT = [[38, 34], [4, 2], [12, 10], [62, 55], [43, 47], [
|
||||
64, 66
|
||||
], [78, 76], [59, 57], [61, 18], [35, 39], [12, 8], [0, 18], [75, 68], [
|
||||
54, 60
|
||||
], [45, 49], [46, 42], [53, 46], [80, 29], [65, 63], [0, 81], [54, 63],
|
||||
[51, 42], [16, 19], [17, 26], [77, 68]]
|
||||
|
||||
POSES_TOWN2_ONE_CURVE = [[37, 76], [8, 24], [60, 69], [38, 10], [21, 1], [
|
||||
58, 71
|
||||
], [74, 32], [44, 0], [71, 16], [14, 24], [34, 11], [43, 14], [75, 16], [
|
||||
80, 21
|
||||
], [3, 23], [75, 59], [50, 47], [11, 19], [77, 34], [79, 25], [40, 63],
|
||||
[58, 76], [79, 55], [16, 61], [27, 11]]
|
||||
|
||||
POSES_TOWN2_NAV = [[19, 66], [79, 14], [19, 57], [23, 1], [53, 76], [42, 13], [
|
||||
31, 71
|
||||
], [33, 5], [54, 30], [10, 61], [66, 3], [27, 12], [79, 19], [2, 29], [16, 14],
|
||||
[5, 57], [70, 73], [46, 67], [57, 50], [61, 49], [21, 12],
|
||||
[51, 81], [77, 68], [56, 65], [43, 54]]
|
||||
|
||||
TOWN1_STRAIGHT = [
|
||||
build_scenario("Town01", start, end, 0, 0, 300, TEST_WEATHERS)
|
||||
for (start, end) in POSES_TOWN1_STRAIGHT
|
||||
]
|
||||
|
||||
TOWN1_ONE_CURVE = [
|
||||
build_scenario("Town01", start, end, 0, 0, 600, TEST_WEATHERS)
|
||||
for (start, end) in POSES_TOWN1_ONE_CURVE
|
||||
]
|
||||
|
||||
TOWN1_NAVIGATION = [
|
||||
build_scenario("Town01", start, end, 0, 0, 900, TEST_WEATHERS)
|
||||
for (start, end) in POSES_TOWN1_NAV
|
||||
]
|
||||
|
||||
TOWN1_NAVIGATION_DYNAMIC = [
|
||||
build_scenario("Town01", start, end, 20, 50, 900, TEST_WEATHERS)
|
||||
for (start, end) in POSES_TOWN1_NAV
|
||||
]
|
||||
|
||||
TOWN2_STRAIGHT = [
|
||||
build_scenario("Town02", start, end, 0, 0, 300, TRAIN_WEATHERS)
|
||||
for (start, end) in POSES_TOWN2_STRAIGHT
|
||||
]
|
||||
|
||||
TOWN2_STRAIGHT_DYNAMIC = [
|
||||
build_scenario("Town02", start, end, 20, 50, 300, TRAIN_WEATHERS)
|
||||
for (start, end) in POSES_TOWN2_STRAIGHT
|
||||
]
|
||||
|
||||
TOWN2_ONE_CURVE = [
|
||||
build_scenario("Town02", start, end, 0, 0, 600, TRAIN_WEATHERS)
|
||||
for (start, end) in POSES_TOWN2_ONE_CURVE
|
||||
]
|
||||
|
||||
TOWN2_NAVIGATION = [
|
||||
build_scenario("Town02", start, end, 0, 0, 900, TRAIN_WEATHERS)
|
||||
for (start, end) in POSES_TOWN2_NAV
|
||||
]
|
||||
|
||||
TOWN2_NAVIGATION_DYNAMIC = [
|
||||
build_scenario("Town02", start, end, 20, 50, 900, TRAIN_WEATHERS)
|
||||
for (start, end) in POSES_TOWN2_NAV
|
||||
]
|
||||
|
||||
TOWN1_ALL = (TOWN1_STRAIGHT + TOWN1_ONE_CURVE + TOWN1_NAVIGATION +
|
||||
TOWN1_NAVIGATION_DYNAMIC)
|
||||
|
||||
TOWN2_ALL = (TOWN2_STRAIGHT + TOWN2_ONE_CURVE + TOWN2_NAVIGATION +
|
||||
TOWN2_NAVIGATION_DYNAMIC)
|
||||
@@ -1,51 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import ray
|
||||
from ray.tune import grid_search, run_experiments
|
||||
|
||||
from env import CarlaEnv, ENV_CONFIG
|
||||
from models import register_carla_model
|
||||
from scenarios import TOWN2_STRAIGHT
|
||||
|
||||
env_config = ENV_CONFIG.copy()
|
||||
env_config.update({
|
||||
"verbose": False,
|
||||
"x_res": 80,
|
||||
"y_res": 80,
|
||||
"squash_action_logits": grid_search([False, True]),
|
||||
"use_depth_camera": False,
|
||||
"discrete_actions": False,
|
||||
"server_map": "/Game/Maps/Town02",
|
||||
"reward_function": grid_search(["custom", "corl2017"]),
|
||||
"scenarios": TOWN2_STRAIGHT,
|
||||
})
|
||||
|
||||
register_carla_model()
|
||||
redis_address = ray.services.get_node_ip_address() + ":6379"
|
||||
|
||||
ray.init(redis_address=redis_address)
|
||||
run_experiments({
|
||||
"carla-a3c": {
|
||||
"run": "A3C",
|
||||
"env": CarlaEnv,
|
||||
"config": {
|
||||
"env_config": env_config,
|
||||
"use_gpu_for_workers": True,
|
||||
"model": {
|
||||
"custom_model": "carla",
|
||||
"custom_options": {
|
||||
"image_shape": [80, 80, 6],
|
||||
},
|
||||
"conv_filters": [
|
||||
[16, [8, 8], 4],
|
||||
[32, [4, 4], 2],
|
||||
[512, [10, 10], 1],
|
||||
],
|
||||
},
|
||||
"gamma": 0.95,
|
||||
"num_workers": 2,
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -1,65 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import ray
|
||||
from ray.tune import run_experiments
|
||||
|
||||
from env import CarlaEnv, ENV_CONFIG
|
||||
from models import register_carla_model
|
||||
from scenarios import TOWN2_ONE_CURVE
|
||||
|
||||
env_config = ENV_CONFIG.copy()
|
||||
env_config.update({
|
||||
"verbose": False,
|
||||
"x_res": 80,
|
||||
"y_res": 80,
|
||||
"discrete_actions": True,
|
||||
"server_map": "/Game/Maps/Town02",
|
||||
"reward_function": "custom",
|
||||
"scenarios": TOWN2_ONE_CURVE,
|
||||
})
|
||||
|
||||
register_carla_model()
|
||||
|
||||
ray.init()
|
||||
|
||||
|
||||
def shape_out(spec):
|
||||
return (spec.config.env_config.framestack *
|
||||
(spec.config.env_config.use_depth_camera and 1 or 3))
|
||||
|
||||
|
||||
run_experiments({
|
||||
"carla-dqn": {
|
||||
"run": "DQN",
|
||||
"env": CarlaEnv,
|
||||
"config": {
|
||||
"env_config": env_config,
|
||||
"model": {
|
||||
"custom_model": "carla",
|
||||
"custom_options": {
|
||||
"image_shape": [
|
||||
80,
|
||||
80,
|
||||
shape_out,
|
||||
],
|
||||
},
|
||||
"conv_filters": [
|
||||
[16, [8, 8], 4],
|
||||
[32, [4, 4], 2],
|
||||
[512, [10, 10], 1],
|
||||
],
|
||||
},
|
||||
"timesteps_per_iteration": 100,
|
||||
"learning_starts": 1000,
|
||||
"schedule_max_timesteps": 100000,
|
||||
"gamma": 0.8,
|
||||
"tf_session_args": {
|
||||
"gpu_options": {
|
||||
"allow_growth": True
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -1,55 +0,0 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import ray
|
||||
from ray.tune import run_experiments
|
||||
|
||||
from env import CarlaEnv, ENV_CONFIG
|
||||
from models import register_carla_model
|
||||
from scenarios import TOWN2_STRAIGHT
|
||||
|
||||
env_config = ENV_CONFIG.copy()
|
||||
env_config.update({
|
||||
"verbose": False,
|
||||
"x_res": 80,
|
||||
"y_res": 80,
|
||||
"use_depth_camera": False,
|
||||
"discrete_actions": False,
|
||||
"server_map": "/Game/Maps/Town02",
|
||||
"scenarios": TOWN2_STRAIGHT,
|
||||
})
|
||||
register_carla_model()
|
||||
|
||||
ray.init()
|
||||
run_experiments({
|
||||
"carla": {
|
||||
"run": "PPO",
|
||||
"env": CarlaEnv,
|
||||
"config": {
|
||||
"env_config": env_config,
|
||||
"model": {
|
||||
"custom_model": "carla",
|
||||
"custom_options": {
|
||||
"image_shape": [
|
||||
env_config["x_res"], env_config["y_res"], 6
|
||||
],
|
||||
},
|
||||
"conv_filters": [
|
||||
[16, [8, 8], 4],
|
||||
[32, [4, 4], 2],
|
||||
[512, [10, 10], 1],
|
||||
],
|
||||
},
|
||||
"num_workers": 1,
|
||||
"train_batch_size": 2000,
|
||||
"sample_batch_size": 100,
|
||||
"lambda": 0.95,
|
||||
"clip_param": 0.2,
|
||||
"num_sgd_iter": 20,
|
||||
"lr": 0.0001,
|
||||
"sgd_minibatch_size": 32,
|
||||
"num_gpus": 1,
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -11,11 +11,13 @@ from __future__ import print_function
|
||||
from gym.spaces import Discrete, Box
|
||||
import gym
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
import ray
|
||||
from ray.rllib.models import Model, ModelCatalog
|
||||
from ray.tune import run_experiments, sample_from
|
||||
from ray.rllib.utils import try_import_tf
|
||||
|
||||
tf = try_import_tf()
|
||||
|
||||
|
||||
class FastModel(Model):
|
||||
|
||||
@@ -15,7 +15,6 @@ $ python custom_loss.py --input-files=/tmp/cartpole
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import tensorflow as tf
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
@@ -23,6 +22,9 @@ from ray.rllib.models import (Categorical, FullyConnectedNetwork, Model,
|
||||
ModelCatalog)
|
||||
from ray.rllib.models.model import restore_original_dimensions
|
||||
from ray.rllib.offline import JsonReader
|
||||
from ray.rllib.utils import try_import_tf
|
||||
|
||||
tf = try_import_tf()
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--iters", type=int, default=200)
|
||||
|
||||
@@ -6,9 +6,11 @@ from __future__ import print_function
|
||||
|
||||
import os
|
||||
import ray
|
||||
import tensorflow as tf
|
||||
|
||||
from ray.rllib.agents.registry import get_agent_class
|
||||
from ray.rllib.utils import try_import_tf
|
||||
|
||||
tf = try_import_tf()
|
||||
|
||||
ray.init(num_cpus=10)
|
||||
|
||||
|
||||
@@ -16,14 +16,14 @@ import argparse
|
||||
import gym
|
||||
import random
|
||||
|
||||
import tensorflow as tf
|
||||
import tensorflow.contrib.slim as slim
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.rllib.models import Model, ModelCatalog
|
||||
from ray.rllib.tests.test_multi_agent_env import MultiCartpole
|
||||
from ray.tune.registry import register_env
|
||||
from ray.rllib.utils import try_import_tf
|
||||
|
||||
tf = try_import_tf()
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
@@ -43,12 +43,12 @@ class CustomModel1(Model):
|
||||
tf.VariableScope(tf.AUTO_REUSE, "shared"),
|
||||
reuse=tf.AUTO_REUSE,
|
||||
auxiliary_name_scope=False):
|
||||
last_layer = slim.fully_connected(
|
||||
input_dict["obs"], 64, activation_fn=tf.nn.relu, scope="fc1")
|
||||
last_layer = slim.fully_connected(
|
||||
last_layer, 64, activation_fn=tf.nn.relu, scope="fc2")
|
||||
output = slim.fully_connected(
|
||||
last_layer, num_outputs, activation_fn=None, scope="fc_out")
|
||||
last_layer = tf.layers.dense(
|
||||
input_dict["obs"], 64, activation=tf.nn.relu, name="fc1")
|
||||
last_layer = tf.layers.dense(
|
||||
last_layer, 64, activation=tf.nn.relu, name="fc2")
|
||||
output = tf.layers.dense(
|
||||
last_layer, num_outputs, activation=None, name="fc_out")
|
||||
return output, last_layer
|
||||
|
||||
|
||||
@@ -59,12 +59,12 @@ class CustomModel2(Model):
|
||||
tf.VariableScope(tf.AUTO_REUSE, "shared"),
|
||||
reuse=tf.AUTO_REUSE,
|
||||
auxiliary_name_scope=False):
|
||||
last_layer = slim.fully_connected(
|
||||
input_dict["obs"], 64, activation_fn=tf.nn.relu, scope="fc1")
|
||||
last_layer = slim.fully_connected(
|
||||
last_layer, 64, activation_fn=tf.nn.relu, scope="fc2")
|
||||
output = slim.fully_connected(
|
||||
last_layer, num_outputs, activation_fn=None, scope="fc_out")
|
||||
last_layer = tf.layers.dense(
|
||||
input_dict["obs"], 64, activation=tf.nn.relu, name="fc1")
|
||||
last_layer = tf.layers.dense(
|
||||
last_layer, 64, activation=tf.nn.relu, name="fc2")
|
||||
output = tf.layers.dense(
|
||||
last_layer, num_outputs, activation=None, name="fc_out")
|
||||
return output, last_layer
|
||||
|
||||
|
||||
|
||||
@@ -23,14 +23,15 @@ import random
|
||||
import numpy as np
|
||||
import gym
|
||||
from gym.spaces import Box, Discrete, Dict
|
||||
import tensorflow as tf
|
||||
import tensorflow.contrib.slim as slim
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.rllib.models import Model, ModelCatalog
|
||||
from ray.rllib.models.misc import normc_initializer
|
||||
from ray.tune.registry import register_env
|
||||
from ray.rllib.utils import try_import_tf
|
||||
|
||||
tf = try_import_tf()
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--stop", type=int, default=200)
|
||||
@@ -134,18 +135,18 @@ class ParametricActionsModel(Model):
|
||||
hiddens = [256, 256]
|
||||
for i, size in enumerate(hiddens):
|
||||
label = "fc{}".format(i)
|
||||
last_layer = slim.fully_connected(
|
||||
last_layer = tf.layers.dense(
|
||||
last_layer,
|
||||
size,
|
||||
weights_initializer=normc_initializer(1.0),
|
||||
activation_fn=tf.nn.tanh,
|
||||
scope=label)
|
||||
output = slim.fully_connected(
|
||||
kernel_initializer=normc_initializer(1.0),
|
||||
activation=tf.nn.tanh,
|
||||
name=label)
|
||||
output = tf.layers.dense(
|
||||
last_layer,
|
||||
action_embed_size,
|
||||
weights_initializer=normc_initializer(0.01),
|
||||
activation_fn=None,
|
||||
scope="fc_out")
|
||||
kernel_initializer=normc_initializer(0.01),
|
||||
activation=None,
|
||||
name="fc_out")
|
||||
|
||||
# Expand the model output to [BATCH, 1, EMBED_SIZE]. Note that the
|
||||
# avail actions tensor is of shape [BATCH, MAX_ACTIONS, EMBED_SIZE].
|
||||
|
||||
@@ -12,7 +12,11 @@ from ray.rllib.utils import try_import_tf
|
||||
tf = try_import_tf()
|
||||
|
||||
if tf:
|
||||
use_tf150_api = (distutils.version.LooseVersion(tf.VERSION) >=
|
||||
if hasattr(tf, "__version__"):
|
||||
version = tf.__version__
|
||||
else:
|
||||
version = tf.VERSION
|
||||
use_tf150_api = (distutils.version.LooseVersion(version) >=
|
||||
distutils.version.LooseVersion("1.5.0"))
|
||||
else:
|
||||
use_tf150_api = False
|
||||
|
||||
@@ -21,8 +21,6 @@ class FullyConnectedNetwork(Model):
|
||||
model that processes the components separately, use _build_layers_v2().
|
||||
"""
|
||||
|
||||
import tensorflow.contrib.slim as slim
|
||||
|
||||
hiddens = options.get("fcnet_hiddens")
|
||||
activation = get_activation_fn(options.get("fcnet_activation"))
|
||||
|
||||
@@ -31,18 +29,18 @@ class FullyConnectedNetwork(Model):
|
||||
last_layer = inputs
|
||||
for size in hiddens:
|
||||
label = "fc{}".format(i)
|
||||
last_layer = slim.fully_connected(
|
||||
last_layer = tf.layers.dense(
|
||||
last_layer,
|
||||
size,
|
||||
weights_initializer=normc_initializer(1.0),
|
||||
activation_fn=activation,
|
||||
scope=label)
|
||||
kernel_initializer=normc_initializer(1.0),
|
||||
activation=activation,
|
||||
name=label)
|
||||
i += 1
|
||||
label = "fc_out"
|
||||
output = slim.fully_connected(
|
||||
output = tf.layers.dense(
|
||||
last_layer,
|
||||
num_outputs,
|
||||
weights_initializer=normc_initializer(0.01),
|
||||
activation_fn=None,
|
||||
scope=label)
|
||||
kernel_initializer=normc_initializer(0.01),
|
||||
activation=None,
|
||||
name=label)
|
||||
return output, last_layer
|
||||
|
||||
@@ -38,8 +38,6 @@ class LSTM(Model):
|
||||
|
||||
@override(Model)
|
||||
def _build_layers_v2(self, input_dict, num_outputs, options):
|
||||
import tensorflow.contrib.rnn as rnn
|
||||
|
||||
cell_size = options.get("lstm_cell_size")
|
||||
if options.get("lstm_use_prev_action_reward"):
|
||||
action_dim = int(
|
||||
@@ -76,7 +74,7 @@ class LSTM(Model):
|
||||
self.state_in = [c_in, h_in]
|
||||
|
||||
# Setup LSTM outputs
|
||||
state_in = rnn.LSTMStateTuple(c_in, h_in)
|
||||
state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
|
||||
lstm_out, lstm_state = tf.nn.dynamic_rnn(
|
||||
lstm,
|
||||
last_layer,
|
||||
|
||||
@@ -15,8 +15,6 @@ class VisionNetwork(Model):
|
||||
|
||||
@override(Model)
|
||||
def _build_layers_v2(self, input_dict, num_outputs, options):
|
||||
import tensorflow.contrib.slim as slim
|
||||
|
||||
inputs = input_dict["obs"]
|
||||
filters = options.get("conv_filters")
|
||||
if not filters:
|
||||
@@ -26,28 +24,29 @@ class VisionNetwork(Model):
|
||||
|
||||
with tf.name_scope("vision_net"):
|
||||
for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
|
||||
inputs = slim.conv2d(
|
||||
inputs = tf.layers.conv2d(
|
||||
inputs,
|
||||
out_size,
|
||||
kernel,
|
||||
stride,
|
||||
activation_fn=activation,
|
||||
scope="conv{}".format(i))
|
||||
activation=activation,
|
||||
padding="same",
|
||||
name="conv{}".format(i))
|
||||
out_size, kernel, stride = filters[-1]
|
||||
fc1 = slim.conv2d(
|
||||
fc1 = tf.layers.conv2d(
|
||||
inputs,
|
||||
out_size,
|
||||
kernel,
|
||||
stride,
|
||||
activation_fn=activation,
|
||||
padding="VALID",
|
||||
scope="fc1")
|
||||
fc2 = slim.conv2d(
|
||||
activation=activation,
|
||||
padding="valid",
|
||||
name="fc1")
|
||||
fc2 = tf.layers.conv2d(
|
||||
fc1,
|
||||
num_outputs, [1, 1],
|
||||
activation_fn=None,
|
||||
normalizer_fn=None,
|
||||
scope="fc2")
|
||||
activation=None,
|
||||
padding="same",
|
||||
name="fc2")
|
||||
return flatten(fc2), flatten(fc1)
|
||||
|
||||
|
||||
|
||||
@@ -17,6 +17,9 @@ from ray.rllib.optimizers.aso_minibatch_buffer import MinibatchBuffer
|
||||
from ray.rllib.optimizers.multi_gpu_impl import LocalSyncParallelOptimizer
|
||||
from ray.rllib.utils.annotations import override
|
||||
from ray.rllib.utils.timer import TimerStat
|
||||
from ray.rllib.utils import try_import_tf
|
||||
|
||||
tf = try_import_tf()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -38,9 +41,6 @@ class TFMultiGPULearner(LearnerThread):
|
||||
learner_queue_size=16,
|
||||
num_data_load_threads=16,
|
||||
_fake_gpus=False):
|
||||
# Multi-GPU requires TensorFlow to function.
|
||||
import tensorflow as tf
|
||||
|
||||
LearnerThread.__init__(self, local_evaluator, minibatch_buffer_size,
|
||||
num_sgd_iter, learner_queue_size)
|
||||
self.lr = lr
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import gym
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import unittest
|
||||
from gym.spaces import Box, Discrete, Tuple
|
||||
|
||||
@@ -12,6 +11,9 @@ from ray.rllib.models.preprocessors import (NoPreprocessor, OneHotPreprocessor,
|
||||
Preprocessor)
|
||||
from ray.rllib.models.fcnet import FullyConnectedNetwork
|
||||
from ray.rllib.models.visionnet import VisionNetwork
|
||||
from ray.rllib.utils import try_import_tf
|
||||
|
||||
tf = try_import_tf()
|
||||
|
||||
|
||||
class CustomPreprocessor(Preprocessor):
|
||||
|
||||
@@ -6,8 +6,6 @@ import gym
|
||||
import numpy as np
|
||||
import pickle
|
||||
import unittest
|
||||
import tensorflow as tf
|
||||
import tensorflow.contrib.rnn as rnn
|
||||
|
||||
import ray
|
||||
from ray.rllib.agents.ppo import PPOTrainer
|
||||
@@ -16,6 +14,9 @@ from ray.rllib.models.lstm import add_time_dimension, chop_into_sequences
|
||||
from ray.rllib.models.misc import linear, normc_initializer
|
||||
from ray.rllib.models.model import Model
|
||||
from ray.tune.registry import register_env
|
||||
from ray.rllib.utils import try_import_tf
|
||||
|
||||
tf = try_import_tf()
|
||||
|
||||
|
||||
class LSTMUtilsTest(unittest.TestCase):
|
||||
@@ -104,7 +105,7 @@ class RNNSpyModel(Model):
|
||||
last_layer = add_time_dimension(features, self.seq_lens)
|
||||
|
||||
# Setup the LSTM cell
|
||||
lstm = rnn.BasicLSTMCell(cell_size, state_is_tuple=True)
|
||||
lstm = tf.nn.rnn_cell.BasicLSTMCell(cell_size, state_is_tuple=True)
|
||||
self.state_init = [
|
||||
np.zeros(lstm.state_size.c, np.float32),
|
||||
np.zeros(lstm.state_size.h, np.float32)
|
||||
@@ -121,7 +122,7 @@ class RNNSpyModel(Model):
|
||||
self.state_in = [c_in, h_in]
|
||||
|
||||
# Setup LSTM outputs
|
||||
state_in = rnn.LSTMStateTuple(c_in, h_in)
|
||||
state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in)
|
||||
lstm_out, lstm_state = tf.nn.dynamic_rnn(
|
||||
lstm,
|
||||
last_layer,
|
||||
|
||||
@@ -7,8 +7,6 @@ import pickle
|
||||
from gym import spaces
|
||||
from gym.envs.registration import EnvSpec
|
||||
import gym
|
||||
import tensorflow.contrib.slim as slim
|
||||
import tensorflow as tf
|
||||
import unittest
|
||||
|
||||
import ray
|
||||
@@ -25,6 +23,9 @@ from ray.rllib.models.pytorch.model import TorchModel
|
||||
from ray.rllib.rollout import rollout
|
||||
from ray.rllib.tests.test_external_env import SimpleServing
|
||||
from ray.tune.registry import register_env
|
||||
from ray.rllib.utils import try_import_tf
|
||||
|
||||
tf = try_import_tf()
|
||||
|
||||
DICT_SPACE = spaces.Dict({
|
||||
"sensors": spaces.Dict({
|
||||
@@ -179,8 +180,8 @@ class DictSpyModel(Model):
|
||||
stateful=True)
|
||||
|
||||
with tf.control_dependencies([spy_fn]):
|
||||
output = slim.fully_connected(
|
||||
input_dict["obs"]["sensors"]["position"], num_outputs)
|
||||
output = tf.layers.dense(input_dict["obs"]["sensors"]["position"],
|
||||
num_outputs)
|
||||
return output, output
|
||||
|
||||
|
||||
@@ -208,7 +209,7 @@ class TupleSpyModel(Model):
|
||||
stateful=True)
|
||||
|
||||
with tf.control_dependencies([spy_fn]):
|
||||
output = slim.fully_connected(input_dict["obs"][0], num_outputs)
|
||||
output = tf.layers.dense(input_dict["obs"][0], num_outputs)
|
||||
return output, output
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ from __future__ import print_function
|
||||
|
||||
import gym
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import time
|
||||
import unittest
|
||||
|
||||
@@ -16,6 +15,9 @@ from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator
|
||||
from ray.rllib.optimizers import AsyncGradientsOptimizer, AsyncSamplesOptimizer
|
||||
from ray.rllib.optimizers.aso_tree_aggregator import TreeAggregator
|
||||
from ray.rllib.tests.mock_evaluator import _MockEvaluator
|
||||
from ray.rllib.utils import try_import_tf
|
||||
|
||||
tf = try_import_tf()
|
||||
|
||||
|
||||
class AsyncOptimizerTest(unittest.TestCase):
|
||||
|
||||
@@ -33,10 +33,15 @@ def try_import_tf():
|
||||
return None
|
||||
|
||||
try:
|
||||
import tensorflow as tf
|
||||
import tensorflow.compat.v1 as tf
|
||||
tf.disable_v2_behavior()
|
||||
return tf
|
||||
except ImportError:
|
||||
return None
|
||||
try:
|
||||
import tensorflow as tf
|
||||
return tf
|
||||
except ImportError:
|
||||
return None
|
||||
|
||||
|
||||
__all__ = [
|
||||
|
||||
Reference in New Issue
Block a user