[rllib] Propagate model options correctly in ARS / ES, to action dist of PPO (#2974)

* fix

* fix

* fix it

* propagate conf to action dist

* move carla example too

* rr

* Update policies.py

* wip

* lint
This commit is contained in:
Eric Liang
2018-10-01 12:49:39 -07:00
committed by GitHub
parent e4bea8d10e
commit b45bed4bce
29 changed files with 322 additions and 377 deletions
+20 -45
View File
@@ -25,19 +25,17 @@ Result = namedtuple("Result", [
])
DEFAULT_CONFIG = with_common_config({
'noise_stdev': 0.02, # std deviation of parameter noise
'num_rollouts': 32, # number of perturbs to try
'rollouts_used': 32, # number of perturbs to keep in gradient estimate
'num_workers': 2,
'sgd_stepsize': 0.01, # sgd step-size
'observation_filter': "MeanStdFilter",
'noise_size': 250000000,
'eval_prob': 0.03, # probability of evaluating the parameter rewards
'report_length': 10, # how many of the last rewards we average over
'env_config': {},
'offset': 0,
'policy_type': "LinearPolicy", # ["LinearPolicy", "MLPPolicy"]
"fcnet_hiddens": [32, 32], # fcnet structure of MLPPolicy
"noise_stdev": 0.02, # std deviation of parameter noise
"num_rollouts": 32, # number of perturbs to try
"rollouts_used": 32, # number of perturbs to keep in gradient estimate
"num_workers": 2,
"sgd_stepsize": 0.01, # sgd step-size
"observation_filter": "MeanStdFilter",
"noise_size": 250000000,
"eval_prob": 0.03, # probability of evaluating the parameter rewards
"report_length": 10, # how many of the last rewards we average over
"env_config": {},
"offset": 0,
})
@@ -67,15 +65,9 @@ class SharedNoiseTable(object):
@ray.remote
class Worker(object):
def __init__(self,
config,
policy_params,
env_creator,
noise,
min_task_runtime=0.2):
def __init__(self, config, env_creator, noise, min_task_runtime=0.2):
self.min_task_runtime = min_task_runtime
self.config = config
self.policy_params = policy_params
self.noise = SharedNoiseTable(noise)
self.env = env_creator(config["env_config"])
@@ -83,15 +75,9 @@ class Worker(object):
self.preprocessor = models.ModelCatalog.get_preprocessor(self.env)
self.sess = utils.make_session(single_threaded=True)
if config["policy_type"] == "LinearPolicy":
self.policy = policies.LinearPolicy(
self.sess, self.env.action_space, self.preprocessor,
config["observation_filter"], **policy_params)
else:
self.policy = policies.MLPPolicy(
self.sess, self.env.action_space, self.preprocessor,
config["observation_filter"], config["fcnet_hiddens"],
**policy_params)
self.policy = policies.GenericPolicy(
self.sess, self.env.action_space, self.preprocessor,
config["observation_filter"], config["model"])
def rollout(self, timestep_limit, add_noise=False):
rollout_rewards, rollout_length = policies.rollout(
@@ -160,25 +146,14 @@ class ARSAgent(Agent):
return Resources(cpu=1, gpu=0, extra_cpu=cf["num_workers"])
def _init(self):
policy_params = {"action_noise_std": 0.0}
# register the linear network
utils.register_linear_network()
env = self.env_creator(self.config["env_config"])
from ray.rllib import models
preprocessor = models.ModelCatalog.get_preprocessor(env)
self.sess = utils.make_session(single_threaded=False)
if self.config["policy_type"] == "LinearPolicy":
self.policy = policies.LinearPolicy(
self.sess, env.action_space, preprocessor,
self.config["observation_filter"], **policy_params)
else:
self.policy = policies.MLPPolicy(
self.sess, env.action_space, preprocessor,
self.config["observation_filter"],
self.config["fcnet_hiddens"], **policy_params)
self.policy = policies.GenericPolicy(
self.sess, env.action_space, preprocessor,
self.config["observation_filter"], self.config["model"])
self.optimizer = optimizers.SGD(self.policy,
self.config["sgd_stepsize"])
@@ -194,8 +169,8 @@ class ARSAgent(Agent):
# Create the actors.
print("Creating actors.")
self.workers = [
Worker.remote(self.config, policy_params, self.env_creator,
noise_id) for _ in range(self.config["num_workers"])
Worker.remote(self.config, self.env_creator, noise_id)
for _ in range(self.config["num_workers"])
]
self.episodes_so_far = 0
+4 -39
View File
@@ -11,7 +11,6 @@ import tensorflow as tf
import ray
from ray.rllib.utils.filter import get_filter
from ray.rllib.utils.error import UnsupportedSpaceException
from ray.rllib.models import ModelCatalog
@@ -59,14 +58,8 @@ class GenericPolicy(object):
action_space,
preprocessor,
observation_filter,
action_noise_std,
options={}):
if len(preprocessor.shape) > 1:
raise UnsupportedSpaceException(
"Observation space {} is not supported with ARS.".format(
preprocessor.shape))
model_config,
action_noise_std=0.0):
self.sess = sess
self.action_space = action_space
self.action_noise_std = action_noise_std
@@ -78,9 +71,9 @@ class GenericPolicy(object):
# Policy network.
dist_class, dist_dim = ModelCatalog.get_action_dist(
action_space, dist_type="deterministic")
action_space, model_config, dist_type="deterministic")
model = ModelCatalog.get_model(self.inputs, dist_dim, options=options)
model = ModelCatalog.get_model(self.inputs, dist_dim, model_config)
dist = dist_class(model.outputs)
self.sampler = dist.sample()
@@ -106,31 +99,3 @@ class GenericPolicy(object):
def get_weights(self):
return self.variables.get_flat()
class LinearPolicy(GenericPolicy):
def __init__(self, sess, action_space, preprocessor, observation_filter,
action_noise_std):
options = {"custom_model": "LinearNetwork"}
GenericPolicy.__init__(
self,
sess,
action_space,
preprocessor,
observation_filter,
action_noise_std,
options=options)
class MLPPolicy(GenericPolicy):
def __init__(self, sess, action_space, preprocessor, observation_filter,
fcnet_hiddens, action_noise_std):
options = {"fcnet_hiddens": fcnet_hiddens}
GenericPolicy.__init__(
self,
sess,
action_space,
preprocessor,
observation_filter,
action_noise_std,
options=options)
-21
View File
@@ -7,9 +7,6 @@ from __future__ import print_function
import numpy as np
import tensorflow as tf
from ray.rllib.models import ModelCatalog, Model
import tensorflow.contrib.slim as slim
from ray.rllib.models.misc import normc_initializer
def compute_ranks(x):
@@ -62,21 +59,3 @@ def batched_weighted_sum(weights, vecs, batch_size):
np.asarray(batch_vecs, dtype=np.float32))
num_items_summed += len(batch_weights)
return total, num_items_summed
class LinearNetwork(Model):
"""Generic linear network."""
def _build_layers(self, inputs, num_outputs, _):
with tf.name_scope("linear"):
output = slim.fully_connected(
inputs,
num_outputs,
weights_initializer=normc_initializer(0.01),
activation_fn=None,
)
return output, inputs
def register_linear_network():
ModelCatalog.register_custom_model("LinearNetwork", LinearNetwork)
+7 -5
View File
@@ -10,7 +10,7 @@ import numpy as np
import time
import ray
from ray.rllib.agents import Agent
from ray.rllib.agents import Agent, with_common_config
from ray.tune.trial import Resources
from ray.rllib.agents.es import optimizers
@@ -24,7 +24,7 @@ Result = namedtuple("Result", [
"eval_returns", "eval_lengths"
])
DEFAULT_CONFIG = {
DEFAULT_CONFIG = with_common_config({
"l2_coeff": 0.005,
"noise_stdev": 0.02,
"episodes_per_batch": 1000,
@@ -38,7 +38,8 @@ DEFAULT_CONFIG = {
"report_length": 10,
"env": None,
"env_config": {},
}
"model": {},
})
@ray.remote
@@ -81,7 +82,7 @@ class Worker(object):
self.sess = utils.make_session(single_threaded=True)
self.policy = policies.GenericPolicy(
self.sess, self.env.action_space, self.preprocessor,
config["observation_filter"], **policy_params)
config["observation_filter"], config["model"], **policy_params)
def rollout(self, timestep_limit, add_noise=True):
rollout_rewards, rollout_length = policies.rollout(
@@ -161,7 +162,8 @@ class ESAgent(Agent):
self.sess = utils.make_session(single_threaded=False)
self.policy = policies.GenericPolicy(
self.sess, env.action_space, preprocessor,
self.config["observation_filter"], **policy_params)
self.config["observation_filter"], self.config["model"],
**policy_params)
self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"])
self.report_length = self.config["report_length"]
+3 -3
View File
@@ -39,7 +39,7 @@ def rollout(policy, env, timestep_limit=None, add_noise=False):
class GenericPolicy(object):
def __init__(self, sess, action_space, preprocessor, observation_filter,
action_noise_std):
model_options, action_noise_std):
self.sess = sess
self.action_space = action_space
self.action_noise_std = action_noise_std
@@ -51,8 +51,8 @@ class GenericPolicy(object):
# Policy network.
dist_class, dist_dim = ModelCatalog.get_action_dist(
self.action_space, dist_type="deterministic")
model = ModelCatalog.get_model(self.inputs, dist_dim)
self.action_space, model_options, dist_type="deterministic")
model = ModelCatalog.get_model(self.inputs, dist_dim, model_options)
dist = dist_class(model.outputs)
self.sampler = dist.sample()
@@ -24,8 +24,8 @@ class PGPolicyGraph(TFPolicyGraph):
obs = tf.placeholder(tf.float32, shape=[None] + list(obs_space.shape))
dist_class, self.logit_dim = ModelCatalog.get_action_dist(
action_space, self.config["model"])
self.model = ModelCatalog.get_model(
obs, self.logit_dim, options=self.config["model"])
self.model = ModelCatalog.get_model(obs, self.logit_dim,
self.config["model"])
action_dist = dist_class(self.model.outputs) # logit for each action
# Setup policy loss
@@ -54,7 +54,7 @@ class PPOLoss(object):
vf_loss_coeff (float): Coefficient of the value function loss
use_gae (bool): If true, use the Generalized Advantage Estimator.
"""
dist_cls, _ = ModelCatalog.get_action_dist(action_space)
dist_cls, _ = ModelCatalog.get_action_dist(action_space, {})
prev_dist = dist_cls(logits)
# Make loss functions.
logp_ratio = tf.exp(
@@ -108,7 +108,8 @@ class PPOPolicyGraph(LearningRateSchedule, TFPolicyGraph):
self.config = config
self.kl_coeff_val = self.config["kl_coeff"]
self.kl_target = self.config["kl_target"]
dist_cls, logit_dim = ModelCatalog.get_action_dist(action_space)
dist_cls, logit_dim = ModelCatalog.get_action_dist(
action_space, self.config["model"])
if existing_inputs:
obs_ph, value_targets_ph, adv_ph, act_ph, \
+14
View File
@@ -0,0 +1,14 @@
(Experimental) OpenAI gym environment for https://github.com/carla-simulator/carla
To run, first download and unpack the Carla binaries from this URL: https://github.com/carla-simulator/carla/releases/tag/0.7.0
Note that currently you also need to clone the Python code from `carla/benchmark_branch` which includes the Carla planner.
Then, you can try running env.py to drive the car. Run one of the train_* scripts to attempt training.
$ pkill -9 Carla
$ export CARLA_SERVER=/PATH/TO/CARLA_0.7.0/CarlaUE4.sh
$ export CARLA_PY_PATH=/PATH/TO/CARLA_BENCHMARK_BRANCH_REPO/PythonClient
$ python env.py
Check out the scenarios.py file for different training and test scenarios that can be used.
@@ -0,0 +1,51 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import ray
from ray.tune import register_env, run_experiments
from env import CarlaEnv, ENV_CONFIG
from models import register_carla_model
from scenarios import LANE_KEEP
env_name = "carla_env"
env_config = ENV_CONFIG.copy()
env_config.update({
"verbose": False,
"x_res": 80,
"y_res": 80,
"use_depth_camera": False,
"discrete_actions": False,
"server_map": "/Game/Maps/Town02",
"reward_function": "lane_keep",
"enable_planner": False,
"scenarios": [LANE_KEEP],
})
register_env(env_name, lambda env_config: CarlaEnv(env_config))
register_carla_model()
ray.init()
run_experiments({
"carla-a3c": {
"run": "A3C",
"env": "carla_env",
"config": {
"env_config": env_config,
"model": {
"custom_model": "carla",
"custom_options": {
"image_shape": [80, 80, 6],
},
"conv_filters": [
[16, [8, 8], 4],
[32, [4, 4], 2],
[512, [10, 10], 1],
],
},
"gamma": 0.8,
"num_workers": 1,
},
},
})
@@ -0,0 +1,53 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import ray
from ray.tune import register_env, run_experiments
from env import CarlaEnv, ENV_CONFIG
from models import register_carla_model
from scenarios import LANE_KEEP
env_name = "carla_env"
env_config = ENV_CONFIG.copy()
env_config.update({
"verbose": False,
"x_res": 80,
"y_res": 80,
"use_depth_camera": False,
"discrete_actions": True,
"server_map": "/Game/Maps/Town02",
"reward_function": "lane_keep",
"enable_planner": False,
"scenarios": [LANE_KEEP],
})
register_env(env_name, lambda env_config: CarlaEnv(env_config))
register_carla_model()
ray.init()
run_experiments({
"carla-dqn": {
"run": "DQN",
"env": "carla_env",
"config": {
"env_config": env_config,
"model": {
"custom_model": "carla",
"custom_options": {
"image_shape": [80, 80, 6],
},
"conv_filters": [
[16, [8, 8], 4],
[32, [4, 4], 2],
[512, [10, 10], 1],
],
},
"timesteps_per_iteration": 100,
"learning_starts": 1000,
"schedule_max_timesteps": 100000,
"gamma": 0.8,
},
},
})
+684
View File
@@ -0,0 +1,684 @@
"""OpenAI gym environment for Carla. Run this file for a demo."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datetime import datetime
import atexit
import cv2
import os
import json
import random
import signal
import subprocess
import sys
import time
import traceback
import numpy as np
try:
import scipy.misc
except Exception:
pass
import gym
from gym.spaces import Box, Discrete, Tuple
from scenarios import DEFAULT_SCENARIO
# Set this where you want to save image outputs (or empty string to disable)
CARLA_OUT_PATH = os.environ.get("CARLA_OUT", os.path.expanduser("~/carla_out"))
if CARLA_OUT_PATH and not os.path.exists(CARLA_OUT_PATH):
os.makedirs(CARLA_OUT_PATH)
# Set this to the path of your Carla binary
SERVER_BINARY = os.environ.get("CARLA_SERVER",
os.path.expanduser("~/CARLA_0.7.0/CarlaUE4.sh"))
assert os.path.exists(SERVER_BINARY)
if "CARLA_PY_PATH" in os.environ:
sys.path.append(os.path.expanduser(os.environ["CARLA_PY_PATH"]))
else:
# TODO(ekl) switch this to the binary path once the planner is in master
sys.path.append(os.path.expanduser("~/carla/PythonClient/"))
try:
from carla.client import CarlaClient
from carla.sensor import Camera
from carla.settings import CarlaSettings
from carla.planner.planner import Planner, REACH_GOAL, GO_STRAIGHT, \
TURN_RIGHT, TURN_LEFT, LANE_FOLLOW
except Exception as e:
print("Failed to import Carla python libs, try setting $CARLA_PY_PATH")
raise e
# Carla planner commands
COMMANDS_ENUM = {
REACH_GOAL: "REACH_GOAL",
GO_STRAIGHT: "GO_STRAIGHT",
TURN_RIGHT: "TURN_RIGHT",
TURN_LEFT: "TURN_LEFT",
LANE_FOLLOW: "LANE_FOLLOW",
}
# Mapping from string repr to one-hot encoding index to feed to the model
COMMAND_ORDINAL = {
"REACH_GOAL": 0,
"GO_STRAIGHT": 1,
"TURN_RIGHT": 2,
"TURN_LEFT": 3,
"LANE_FOLLOW": 4,
}
# Number of retries if the server doesn't respond
RETRIES_ON_ERROR = 5
# Dummy Z coordinate to use when we only care about (x, y)
GROUND_Z = 22
# Default environment configuration
ENV_CONFIG = {
"log_images": True,
"enable_planner": True,
"framestack": 2, # note: only [1, 2] currently supported
"convert_images_to_video": True,
"early_terminate_on_collision": True,
"verbose": True,
"reward_function": "custom",
"render_x_res": 800,
"render_y_res": 600,
"x_res": 80,
"y_res": 80,
"server_map": "/Game/Maps/Town02",
"scenarios": [DEFAULT_SCENARIO],
"use_depth_camera": False,
"discrete_actions": True,
"squash_action_logits": False,
}
DISCRETE_ACTIONS = {
# coast
0: [0.0, 0.0],
# turn left
1: [0.0, -0.5],
# turn right
2: [0.0, 0.5],
# forward
3: [1.0, 0.0],
# brake
4: [-0.5, 0.0],
# forward left
5: [1.0, -0.5],
# forward right
6: [1.0, 0.5],
# brake left
7: [-0.5, -0.5],
# brake right
8: [-0.5, 0.5],
}
live_carla_processes = set()
def cleanup():
print("Killing live carla processes", live_carla_processes)
for pgid in live_carla_processes:
os.killpg(pgid, signal.SIGKILL)
atexit.register(cleanup)
class CarlaEnv(gym.Env):
def __init__(self, config=ENV_CONFIG):
self.config = config
self.city = self.config["server_map"].split("/")[-1]
if self.config["enable_planner"]:
self.planner = Planner(self.city)
if config["discrete_actions"]:
self.action_space = Discrete(len(DISCRETE_ACTIONS))
else:
self.action_space = Box(-1.0, 1.0, shape=(2, ), dtype=np.float32)
if config["use_depth_camera"]:
image_space = Box(
-1.0,
1.0,
shape=(config["y_res"], config["x_res"],
1 * config["framestack"]),
dtype=np.float32)
else:
image_space = Box(
0,
255,
shape=(config["y_res"], config["x_res"],
3 * config["framestack"]),
dtype=np.uint8)
self.observation_space = Tuple( # forward_speed, dist to goal
[
image_space,
Discrete(len(COMMANDS_ENUM)), # next_command
Box(-128.0, 128.0, shape=(2, ), dtype=np.float32)
])
# TODO(ekl) this isn't really a proper gym spec
self._spec = lambda: None
self._spec.id = "Carla-v0"
self.server_port = None
self.server_process = None
self.client = None
self.num_steps = 0
self.total_reward = 0
self.prev_measurement = None
self.prev_image = None
self.episode_id = None
self.measurements_file = None
self.weather = None
self.scenario = None
self.start_pos = None
self.end_pos = None
self.start_coord = None
self.end_coord = None
self.last_obs = None
def init_server(self):
print("Initializing new Carla server...")
# Create a new server process and start the client.
self.server_port = random.randint(10000, 60000)
self.server_process = subprocess.Popen(
[
SERVER_BINARY, self.config["server_map"], "-windowed",
"-ResX=400", "-ResY=300", "-carla-server",
"-carla-world-port={}".format(self.server_port)
],
preexec_fn=os.setsid,
stdout=open(os.devnull, "w"))
live_carla_processes.add(os.getpgid(self.server_process.pid))
for i in range(RETRIES_ON_ERROR):
try:
self.client = CarlaClient("localhost", self.server_port)
return self.client.connect()
except Exception as e:
print("Error connecting: {}, attempt {}".format(e, i))
time.sleep(2)
def clear_server_state(self):
print("Clearing Carla server state")
try:
if self.client:
self.client.disconnect()
self.client = None
except Exception as e:
print("Error disconnecting client: {}".format(e))
pass
if self.server_process:
pgid = os.getpgid(self.server_process.pid)
os.killpg(pgid, signal.SIGKILL)
live_carla_processes.remove(pgid)
self.server_port = None
self.server_process = None
def __del__(self):
self.clear_server_state()
def reset(self):
error = None
for _ in range(RETRIES_ON_ERROR):
try:
if not self.server_process:
self.init_server()
return self._reset()
except Exception as e:
print("Error during reset: {}".format(traceback.format_exc()))
self.clear_server_state()
error = e
raise error
def _reset(self):
self.num_steps = 0
self.total_reward = 0
self.prev_measurement = None
self.prev_image = None
self.episode_id = datetime.today().strftime("%Y-%m-%d_%H-%M-%S_%f")
self.measurements_file = None
# Create a CarlaSettings object. This object is a wrapper around
# the CarlaSettings.ini file. Here we set the configuration we
# want for the new episode.
settings = CarlaSettings()
self.scenario = random.choice(self.config["scenarios"])
assert self.scenario["city"] == self.city, (self.scenario, self.city)
self.weather = random.choice(self.scenario["weather_distribution"])
settings.set(
SynchronousMode=True,
SendNonPlayerAgentsInfo=True,
NumberOfVehicles=self.scenario["num_vehicles"],
NumberOfPedestrians=self.scenario["num_pedestrians"],
WeatherId=self.weather)
settings.randomize_seeds()
if self.config["use_depth_camera"]:
camera1 = Camera("CameraDepth", PostProcessing="Depth")
camera1.set_image_size(self.config["render_x_res"],
self.config["render_y_res"])
camera1.set_position(30, 0, 130)
settings.add_sensor(camera1)
camera2 = Camera("CameraRGB")
camera2.set_image_size(self.config["render_x_res"],
self.config["render_y_res"])
camera2.set_position(30, 0, 130)
settings.add_sensor(camera2)
# Setup start and end positions
scene = self.client.load_settings(settings)
positions = scene.player_start_spots
self.start_pos = positions[self.scenario["start_pos_id"]]
self.end_pos = positions[self.scenario["end_pos_id"]]
self.start_coord = [
self.start_pos.location.x // 100, self.start_pos.location.y // 100
]
self.end_coord = [
self.end_pos.location.x // 100, self.end_pos.location.y // 100
]
print("Start pos {} ({}), end {} ({})".format(
self.scenario["start_pos_id"], self.start_coord,
self.scenario["end_pos_id"], self.end_coord))
# Notify the server that we want to start the episode at the
# player_start index. This function blocks until the server is ready
# to start the episode.
print("Starting new episode...")
self.client.start_episode(self.scenario["start_pos_id"])
image, py_measurements = self._read_observation()
self.prev_measurement = py_measurements
return self.encode_obs(self.preprocess_image(image), py_measurements)
def encode_obs(self, image, py_measurements):
assert self.config["framestack"] in [1, 2]
prev_image = self.prev_image
self.prev_image = image
if prev_image is None:
prev_image = image
if self.config["framestack"] == 2:
image = np.concatenate([prev_image, image], axis=2)
obs = (image, COMMAND_ORDINAL[py_measurements["next_command"]], [
py_measurements["forward_speed"],
py_measurements["distance_to_goal"]
])
self.last_obs = obs
return obs
def step(self, action):
try:
obs = self._step(action)
return obs
except Exception:
print("Error during step, terminating episode early",
traceback.format_exc())
self.clear_server_state()
return (self.last_obs, 0.0, True, {})
def _step(self, action):
if self.config["discrete_actions"]:
action = DISCRETE_ACTIONS[int(action)]
assert len(action) == 2, "Invalid action {}".format(action)
if self.config["squash_action_logits"]:
forward = 2 * float(sigmoid(action[0]) - 0.5)
throttle = float(np.clip(forward, 0, 1))
brake = float(np.abs(np.clip(forward, -1, 0)))
steer = 2 * float(sigmoid(action[1]) - 0.5)
else:
throttle = float(np.clip(action[0], 0, 1))
brake = float(np.abs(np.clip(action[0], -1, 0)))
steer = float(np.clip(action[1], -1, 1))
reverse = False
hand_brake = False
if self.config["verbose"]:
print("steer", steer, "throttle", throttle, "brake", brake,
"reverse", reverse)
self.client.send_control(
steer=steer,
throttle=throttle,
brake=brake,
hand_brake=hand_brake,
reverse=reverse)
# Process observations
image, py_measurements = self._read_observation()
if self.config["verbose"]:
print("Next command", py_measurements["next_command"])
if type(action) is np.ndarray:
py_measurements["action"] = [float(a) for a in action]
else:
py_measurements["action"] = action
py_measurements["control"] = {
"steer": steer,
"throttle": throttle,
"brake": brake,
"reverse": reverse,
"hand_brake": hand_brake,
}
reward = compute_reward(self, self.prev_measurement, py_measurements)
self.total_reward += reward
py_measurements["reward"] = reward
py_measurements["total_reward"] = self.total_reward
done = (self.num_steps > self.scenario["max_steps"]
or py_measurements["next_command"] == "REACH_GOAL"
or (self.config["early_terminate_on_collision"]
and collided_done(py_measurements)))
py_measurements["done"] = done
self.prev_measurement = py_measurements
# Write out measurements to file
if CARLA_OUT_PATH:
if not self.measurements_file:
self.measurements_file = open(
os.path.join(
CARLA_OUT_PATH,
"measurements_{}.json".format(self.episode_id)), "w")
self.measurements_file.write(json.dumps(py_measurements))
self.measurements_file.write("\n")
if done:
self.measurements_file.close()
self.measurements_file = None
if self.config["convert_images_to_video"]:
self.images_to_video()
self.num_steps += 1
image = self.preprocess_image(image)
return (self.encode_obs(image, py_measurements), reward, done,
py_measurements)
def images_to_video(self):
videos_dir = os.path.join(CARLA_OUT_PATH, "Videos")
if not os.path.exists(videos_dir):
os.makedirs(videos_dir)
ffmpeg_cmd = (
"ffmpeg -loglevel -8 -r 60 -f image2 -s {x_res}x{y_res} "
"-start_number 0 -i "
"{img}_%04d.jpg -vcodec libx264 {vid}.mp4 && rm -f {img}_*.jpg "
).format(
x_res=self.config["render_x_res"],
y_res=self.config["render_y_res"],
vid=os.path.join(videos_dir, self.episode_id),
img=os.path.join(CARLA_OUT_PATH, "CameraRGB", self.episode_id))
print("Executing ffmpeg command", ffmpeg_cmd)
subprocess.call(ffmpeg_cmd, shell=True)
def preprocess_image(self, image):
if self.config["use_depth_camera"]:
assert self.config["use_depth_camera"]
data = (image.data - 0.5) * 2
data = data.reshape(self.config["render_y_res"],
self.config["render_x_res"], 1)
data = cv2.resize(
data, (self.config["x_res"], self.config["y_res"]),
interpolation=cv2.INTER_AREA)
data = np.expand_dims(data, 2)
else:
data = image.data.reshape(self.config["render_y_res"],
self.config["render_x_res"], 3)
data = cv2.resize(
data, (self.config["x_res"], self.config["y_res"]),
interpolation=cv2.INTER_AREA)
data = (data.astype(np.float32) - 128) / 128
return data
def _read_observation(self):
# Read the data produced by the server this frame.
measurements, sensor_data = self.client.read_data()
# Print some of the measurements.
if self.config["verbose"]:
print_measurements(measurements)
observation = None
if self.config["use_depth_camera"]:
camera_name = "CameraDepth"
else:
camera_name = "CameraRGB"
for name, image in sensor_data.items():
if name == camera_name:
observation = image
cur = measurements.player_measurements
if self.config["enable_planner"]:
next_command = COMMANDS_ENUM[self.planner.get_next_command(
[cur.transform.location.x, cur.transform.location.y, GROUND_Z],
[
cur.transform.orientation.x, cur.transform.orientation.y,
GROUND_Z
],
[self.end_pos.location.x, self.end_pos.location.y, GROUND_Z], [
self.end_pos.orientation.x, self.end_pos.orientation.y,
GROUND_Z
])]
else:
next_command = "LANE_FOLLOW"
if next_command == "REACH_GOAL":
distance_to_goal = 0.0 # avoids crash in planner
elif self.config["enable_planner"]:
distance_to_goal = self.planner.get_shortest_path_distance([
cur.transform.location.x, cur.transform.location.y, GROUND_Z
], [
cur.transform.orientation.x, cur.transform.orientation.y,
GROUND_Z
], [self.end_pos.location.x, self.end_pos.location.y, GROUND_Z], [
self.end_pos.orientation.x, self.end_pos.orientation.y,
GROUND_Z
]) / 100
else:
distance_to_goal = -1
distance_to_goal_euclidean = float(
np.linalg.norm([
cur.transform.location.x - self.end_pos.location.x,
cur.transform.location.y - self.end_pos.location.y
]) / 100)
py_measurements = {
"episode_id": self.episode_id,
"step": self.num_steps,
"x": cur.transform.location.x,
"y": cur.transform.location.y,
"x_orient": cur.transform.orientation.x,
"y_orient": cur.transform.orientation.y,
"forward_speed": cur.forward_speed,
"distance_to_goal": distance_to_goal,
"distance_to_goal_euclidean": distance_to_goal_euclidean,
"collision_vehicles": cur.collision_vehicles,
"collision_pedestrians": cur.collision_pedestrians,
"collision_other": cur.collision_other,
"intersection_offroad": cur.intersection_offroad,
"intersection_otherlane": cur.intersection_otherlane,
"weather": self.weather,
"map": self.config["server_map"],
"start_coord": self.start_coord,
"end_coord": self.end_coord,
"current_scenario": self.scenario,
"x_res": self.config["x_res"],
"y_res": self.config["y_res"],
"num_vehicles": self.scenario["num_vehicles"],
"num_pedestrians": self.scenario["num_pedestrians"],
"max_steps": self.scenario["max_steps"],
"next_command": next_command,
}
if CARLA_OUT_PATH and self.config["log_images"]:
for name, image in sensor_data.items():
out_dir = os.path.join(CARLA_OUT_PATH, name)
if not os.path.exists(out_dir):
os.makedirs(out_dir)
out_file = os.path.join(
out_dir, "{}_{:>04}.jpg".format(self.episode_id,
self.num_steps))
scipy.misc.imsave(out_file, image.data)
assert observation is not None, sensor_data
return observation, py_measurements
def compute_reward_corl2017(env, prev, current):
reward = 0.0
cur_dist = current["distance_to_goal"]
prev_dist = prev["distance_to_goal"]
if env.config["verbose"]:
print("Cur dist {}, prev dist {}".format(cur_dist, prev_dist))
# Distance travelled toward the goal in m
reward += np.clip(prev_dist - cur_dist, -10.0, 10.0)
# Change in speed (km/h)
reward += 0.05 * (current["forward_speed"] - prev["forward_speed"])
# New collision damage
reward -= .00002 * (
current["collision_vehicles"] + current["collision_pedestrians"] +
current["collision_other"] - prev["collision_vehicles"] -
prev["collision_pedestrians"] - prev["collision_other"])
# New sidewalk intersection
reward -= 2 * (
current["intersection_offroad"] - prev["intersection_offroad"])
# New opposite lane intersection
reward -= 2 * (
current["intersection_otherlane"] - prev["intersection_otherlane"])
return reward
def compute_reward_custom(env, prev, current):
reward = 0.0
cur_dist = current["distance_to_goal"]
prev_dist = prev["distance_to_goal"]
if env.config["verbose"]:
print("Cur dist {}, prev dist {}".format(cur_dist, prev_dist))
# Distance travelled toward the goal in m
reward += np.clip(prev_dist - cur_dist, -10.0, 10.0)
# Speed reward, up 30.0 (km/h)
reward += np.clip(current["forward_speed"], 0.0, 30.0) / 10
# New collision damage
new_damage = (
current["collision_vehicles"] + current["collision_pedestrians"] +
current["collision_other"] - prev["collision_vehicles"] -
prev["collision_pedestrians"] - prev["collision_other"])
if new_damage:
reward -= 100.0
# Sidewalk intersection
reward -= current["intersection_offroad"]
# Opposite lane intersection
reward -= current["intersection_otherlane"]
# Reached goal
if current["next_command"] == "REACH_GOAL":
reward += 100.0
return reward
def compute_reward_lane_keep(env, prev, current):
reward = 0.0
# Speed reward, up 30.0 (km/h)
reward += np.clip(current["forward_speed"], 0.0, 30.0) / 10
# New collision damage
new_damage = (
current["collision_vehicles"] + current["collision_pedestrians"] +
current["collision_other"] - prev["collision_vehicles"] -
prev["collision_pedestrians"] - prev["collision_other"])
if new_damage:
reward -= 100.0
# Sidewalk intersection
reward -= current["intersection_offroad"]
# Opposite lane intersection
reward -= current["intersection_otherlane"]
return reward
REWARD_FUNCTIONS = {
"corl2017": compute_reward_corl2017,
"custom": compute_reward_custom,
"lane_keep": compute_reward_lane_keep,
}
def compute_reward(env, prev, current):
return REWARD_FUNCTIONS[env.config["reward_function"]](env, prev, current)
def print_measurements(measurements):
number_of_agents = len(measurements.non_player_agents)
player_measurements = measurements.player_measurements
message = "Vehicle at ({pos_x:.1f}, {pos_y:.1f}), "
message += "{speed:.2f} km/h, "
message += "Collision: {{vehicles={col_cars:.0f}, "
message += "pedestrians={col_ped:.0f}, other={col_other:.0f}}}, "
message += "{other_lane:.0f}% other lane, {offroad:.0f}% off-road, "
message += "({agents_num:d} non-player agents in the scene)"
message = message.format(
pos_x=player_measurements.transform.location.x / 100, # cm -> m
pos_y=player_measurements.transform.location.y / 100,
speed=player_measurements.forward_speed,
col_cars=player_measurements.collision_vehicles,
col_ped=player_measurements.collision_pedestrians,
col_other=player_measurements.collision_other,
other_lane=100 * player_measurements.intersection_otherlane,
offroad=100 * player_measurements.intersection_offroad,
agents_num=number_of_agents)
print(message)
def sigmoid(x):
x = float(x)
return np.exp(x) / (1 + np.exp(x))
def collided_done(py_measurements):
m = py_measurements
collided = (m["collision_vehicles"] > 0 or m["collision_pedestrians"] > 0
or m["collision_other"] > 0)
return bool(collided or m["total_reward"] < -100)
if __name__ == "__main__":
for _ in range(2):
env = CarlaEnv()
obs = env.reset()
print("reset", obs)
start = time.time()
done = False
i = 0
total_reward = 0.0
while not done:
i += 1
if ENV_CONFIG["discrete_actions"]:
obs, reward, done, info = env.step(1)
else:
obs, reward, done, info = env.step([0, 1, 0])
total_reward += reward
print(i, "rew", reward, "total", total_reward, "done", done)
print("{} fps".format(100 / (time.time() - start)))
+107
View File
@@ -0,0 +1,107 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.contrib.layers import xavier_initializer
from ray.rllib.models.catalog import ModelCatalog
from ray.rllib.models.misc import normc_initializer
from ray.rllib.models.model import Model
class CarlaModel(Model):
"""Carla model that can process the observation tuple.
The architecture processes the image using convolutional layers, the
metrics using fully connected layers, and then combines them with
further fully connected layers.
"""
def _build_layers(self, inputs, num_outputs, options):
# Parse options
image_shape = options["custom_options"]["image_shape"]
convs = options.get("conv_filters", [
[16, [8, 8], 4],
[32, [5, 5], 3],
[32, [5, 5], 2],
[512, [10, 10], 1],
])
hiddens = options.get("fcnet_hiddens", [64])
fcnet_activation = options.get("fcnet_activation", "tanh")
if fcnet_activation == "tanh":
activation = tf.nn.tanh
elif fcnet_activation == "relu":
activation = tf.nn.relu
# Sanity checks
image_size = np.product(image_shape)
expected_shape = [image_size + 5 + 2]
assert inputs.shape.as_list()[1:] == expected_shape, \
(inputs.shape.as_list()[1:], expected_shape)
# Reshape the input vector back into its components
vision_in = tf.reshape(inputs[:, :image_size],
[tf.shape(inputs)[0]] + image_shape)
metrics_in = inputs[:, image_size:]
print("Vision in shape", vision_in)
print("Metrics in shape", metrics_in)
# Setup vision layers
with tf.name_scope("carla_vision"):
for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1):
vision_in = slim.conv2d(
vision_in,
out_size,
kernel,
stride,
scope="conv{}".format(i))
out_size, kernel, stride = convs[-1]
vision_in = slim.conv2d(
vision_in,
out_size,
kernel,
stride,
padding="VALID",
scope="conv_out")
vision_in = tf.squeeze(vision_in, [1, 2])
# Setup metrics layer
with tf.name_scope("carla_metrics"):
metrics_in = slim.fully_connected(
metrics_in,
64,
weights_initializer=xavier_initializer(),
activation_fn=activation,
scope="metrics_out")
print("Shape of vision out is", vision_in.shape)
print("Shape of metric out is", metrics_in.shape)
# Combine the metrics and vision inputs
with tf.name_scope("carla_out"):
i = 1
last_layer = tf.concat([vision_in, metrics_in], axis=1)
print("Shape of concatenated out is", last_layer.shape)
for size in hiddens:
last_layer = slim.fully_connected(
last_layer,
size,
weights_initializer=xavier_initializer(),
activation_fn=activation,
scope="fc{}".format(i))
i += 1
output = slim.fully_connected(
last_layer,
num_outputs,
weights_initializer=normc_initializer(0.01),
activation_fn=None,
scope="fc_out")
return output, last_layer
def register_carla_model():
ModelCatalog.register_custom_model("carla", CarlaModel)
@@ -0,0 +1,63 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import ray
from ray.tune import register_env, run_experiments
from env import CarlaEnv, ENV_CONFIG
from models import register_carla_model
from scenarios import LANE_KEEP
env_name = "carla_env"
env_config = ENV_CONFIG.copy()
env_config.update({
"verbose": False,
"x_res": 80,
"y_res": 80,
"use_depth_camera": False,
"discrete_actions": False,
"server_map": "/Game/Maps/Town02",
"reward_function": "lane_keep",
"enable_planner": False,
"scenarios": [LANE_KEEP],
})
register_env(env_name, lambda env_config: CarlaEnv(env_config))
register_carla_model()
ray.init()
run_experiments({
"carla-ppo": {
"run": "PPO",
"env": "carla_env",
"config": {
"env_config": env_config,
"model": {
"custom_model": "carla",
"custom_options": {
"image_shape": [80, 80, 6],
},
"conv_filters": [
[16, [8, 8], 4],
[32, [4, 4], 2],
[512, [10, 10], 1],
],
},
"num_workers": 1,
"timesteps_per_batch": 2000,
"min_steps_per_task": 100,
"lambda": 0.95,
"clip_param": 0.2,
"num_sgd_iter": 20,
"sgd_stepsize": 0.0001,
"sgd_batchsize": 32,
"devices": ["/gpu:0"],
"tf_session_args": {
"gpu_options": {
"allow_growth": True
}
}
},
},
})
@@ -0,0 +1,131 @@
"""Collection of Carla scenarios, including those from the CoRL 2017 paper."""
TEST_WEATHERS = [0, 2, 5, 7, 9, 10, 11, 12, 13]
TRAIN_WEATHERS = [1, 3, 4, 6, 8, 14]
def build_scenario(city, start, end, vehicles, pedestrians, max_steps,
weathers):
return {
"city": city,
"num_vehicles": vehicles,
"num_pedestrians": pedestrians,
"weather_distribution": weathers,
"start_pos_id": start,
"end_pos_id": end,
"max_steps": max_steps,
}
# Simple scenario for Town02 that involves driving down a road
DEFAULT_SCENARIO = build_scenario(
city="Town02",
start=36,
end=40,
vehicles=20,
pedestrians=40,
max_steps=200,
weathers=[0])
# Simple scenario for Town02 that involves driving down a road
LANE_KEEP = build_scenario(
city="Town02",
start=36,
end=40,
vehicles=0,
pedestrians=0,
max_steps=2000,
weathers=[0])
# Scenarios from the CoRL2017 paper
POSES_TOWN1_STRAIGHT = [[36, 40], [39, 35], [110, 114], [7, 3], [0, 4], [
68, 50
], [61, 59], [47, 64], [147, 90], [33, 87], [26, 19], [80, 76], [45, 49], [
55, 44
], [29, 107], [95, 104], [84, 34], [53, 67], [22, 17], [91, 148], [20, 107],
[78, 70], [95, 102], [68, 44], [45, 69]]
POSES_TOWN1_ONE_CURVE = [[138, 17], [47, 16], [26, 9], [42, 49], [140, 124], [
85, 98
], [65, 133], [137, 51], [76, 66], [46, 39], [40, 60], [0, 29], [4, 129], [
121, 140
], [2, 129], [78, 44], [68, 85], [41, 102], [95, 70], [68, 129], [84, 69],
[47, 79], [110, 15], [130, 17], [0, 17]]
POSES_TOWN1_NAV = [[105, 29], [27, 130], [102, 87], [132, 27], [24, 44], [
96, 26
], [34, 67], [28, 1], [140, 134], [105, 9], [148, 129], [65, 18], [21, 16], [
147, 97
], [42, 51], [30, 41], [18, 107], [69, 45], [102, 95], [18, 145], [111, 64],
[79, 45], [84, 69], [73, 31], [37, 81]]
POSES_TOWN2_STRAIGHT = [[38, 34], [4, 2], [12, 10], [62, 55], [43, 47], [
64, 66
], [78, 76], [59, 57], [61, 18], [35, 39], [12, 8], [0, 18], [75, 68], [
54, 60
], [45, 49], [46, 42], [53, 46], [80, 29], [65, 63], [0, 81], [54, 63],
[51, 42], [16, 19], [17, 26], [77, 68]]
POSES_TOWN2_ONE_CURVE = [[37, 76], [8, 24], [60, 69], [38, 10], [21, 1], [
58, 71
], [74, 32], [44, 0], [71, 16], [14, 24], [34, 11], [43, 14], [75, 16], [
80, 21
], [3, 23], [75, 59], [50, 47], [11, 19], [77, 34], [79, 25], [40, 63],
[58, 76], [79, 55], [16, 61], [27, 11]]
POSES_TOWN2_NAV = [[19, 66], [79, 14], [19, 57], [23, 1], [53, 76], [42, 13], [
31, 71
], [33, 5], [54, 30], [10, 61], [66, 3], [27, 12], [79, 19], [2, 29], [16, 14],
[5, 57], [70, 73], [46, 67], [57, 50], [61, 49], [21, 12],
[51, 81], [77, 68], [56, 65], [43, 54]]
TOWN1_STRAIGHT = [
build_scenario("Town01", start, end, 0, 0, 300, TEST_WEATHERS)
for (start, end) in POSES_TOWN1_STRAIGHT
]
TOWN1_ONE_CURVE = [
build_scenario("Town01", start, end, 0, 0, 600, TEST_WEATHERS)
for (start, end) in POSES_TOWN1_ONE_CURVE
]
TOWN1_NAVIGATION = [
build_scenario("Town01", start, end, 0, 0, 900, TEST_WEATHERS)
for (start, end) in POSES_TOWN1_NAV
]
TOWN1_NAVIGATION_DYNAMIC = [
build_scenario("Town01", start, end, 20, 50, 900, TEST_WEATHERS)
for (start, end) in POSES_TOWN1_NAV
]
TOWN2_STRAIGHT = [
build_scenario("Town02", start, end, 0, 0, 300, TRAIN_WEATHERS)
for (start, end) in POSES_TOWN2_STRAIGHT
]
TOWN2_STRAIGHT_DYNAMIC = [
build_scenario("Town02", start, end, 20, 50, 300, TRAIN_WEATHERS)
for (start, end) in POSES_TOWN2_STRAIGHT
]
TOWN2_ONE_CURVE = [
build_scenario("Town02", start, end, 0, 0, 600, TRAIN_WEATHERS)
for (start, end) in POSES_TOWN2_ONE_CURVE
]
TOWN2_NAVIGATION = [
build_scenario("Town02", start, end, 0, 0, 900, TRAIN_WEATHERS)
for (start, end) in POSES_TOWN2_NAV
]
TOWN2_NAVIGATION_DYNAMIC = [
build_scenario("Town02", start, end, 20, 50, 900, TRAIN_WEATHERS)
for (start, end) in POSES_TOWN2_NAV
]
TOWN1_ALL = (TOWN1_STRAIGHT + TOWN1_ONE_CURVE + TOWN1_NAVIGATION +
TOWN1_NAVIGATION_DYNAMIC)
TOWN2_ALL = (TOWN2_STRAIGHT + TOWN2_ONE_CURVE + TOWN2_NAVIGATION +
TOWN2_NAVIGATION_DYNAMIC)
@@ -0,0 +1,53 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import ray
from ray.tune import grid_search, register_env, run_experiments
from env import CarlaEnv, ENV_CONFIG
from models import register_carla_model
from scenarios import TOWN2_STRAIGHT
env_name = "carla_env"
env_config = ENV_CONFIG.copy()
env_config.update({
"verbose": False,
"x_res": 80,
"y_res": 80,
"squash_action_logits": grid_search([False, True]),
"use_depth_camera": False,
"discrete_actions": False,
"server_map": "/Game/Maps/Town02",
"reward_function": grid_search(["custom", "corl2017"]),
"scenarios": TOWN2_STRAIGHT,
})
register_env(env_name, lambda env_config: CarlaEnv(env_config))
register_carla_model()
redis_address = ray.services.get_node_ip_address() + ":6379"
ray.init(redis_address=redis_address)
run_experiments({
"carla-a3c": {
"run": "A3C",
"env": "carla_env",
"config": {
"env_config": env_config,
"use_gpu_for_workers": True,
"model": {
"custom_model": "carla",
"custom_options": {
"image_shape": [80, 80, 6],
},
"conv_filters": [
[16, [8, 8], 4],
[32, [4, 4], 2],
[512, [10, 10], 1],
],
},
"gamma": 0.95,
"num_workers": 2,
},
},
})
@@ -0,0 +1,67 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import ray
from ray.tune import register_env, run_experiments
from env import CarlaEnv, ENV_CONFIG
from models import register_carla_model
from scenarios import TOWN2_ONE_CURVE
env_name = "carla_env"
env_config = ENV_CONFIG.copy()
env_config.update({
"verbose": False,
"x_res": 80,
"y_res": 80,
"discrete_actions": True,
"server_map": "/Game/Maps/Town02",
"reward_function": "custom",
"scenarios": TOWN2_ONE_CURVE,
})
register_env(env_name, lambda env_config: CarlaEnv(env_config))
register_carla_model()
ray.init()
def shape_out(spec):
return (spec.config.env_config.framestack *
(spec.config.env_config.use_depth_camera and 1 or 3))
run_experiments({
"carla-dqn": {
"run": "DQN",
"env": "carla_env",
"config": {
"env_config": env_config,
"model": {
"custom_model": "carla",
"custom_options": {
"image_shape": [
80,
80,
shape_out,
],
},
"conv_filters": [
[16, [8, 8], 4],
[32, [4, 4], 2],
[512, [10, 10], 1],
],
},
"timesteps_per_iteration": 100,
"learning_starts": 1000,
"schedule_max_timesteps": 100000,
"gamma": 0.8,
"tf_session_args": {
"gpu_options": {
"allow_growth": True
},
},
},
},
})
@@ -0,0 +1,57 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import ray
from ray.tune import register_env, run_experiments
from env import CarlaEnv, ENV_CONFIG
from models import register_carla_model
from scenarios import TOWN2_STRAIGHT
env_name = "carla_env"
env_config = ENV_CONFIG.copy()
env_config.update({
"verbose": False,
"x_res": 80,
"y_res": 80,
"use_depth_camera": False,
"discrete_actions": False,
"server_map": "/Game/Maps/Town02",
"scenarios": TOWN2_STRAIGHT,
})
register_env(env_name, lambda env_config: CarlaEnv(env_config))
register_carla_model()
ray.init(redirect_output=True)
run_experiments({
"carla": {
"run": "PPO",
"env": "carla_env",
"config": {
"env_config": env_config,
"model": {
"custom_model": "carla",
"custom_options": {
"image_shape": [
env_config["x_res"], env_config["y_res"], 6
],
},
"conv_filters": [
[16, [8, 8], 4],
[32, [4, 4], 2],
[512, [10, 10], 1],
],
},
"num_workers": 1,
"train_batch_size": 2000,
"sample_batch_size": 100,
"lambda": 0.95,
"clip_param": 0.2,
"num_sgd_iter": 20,
"lr": 0.0001,
"sgd_minibatch_size": 32,
"num_gpus": 1,
},
},
})
+58
View File
@@ -0,0 +1,58 @@
"""Example of a custom gym environment. Run this for a demo."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import gym
from gym.spaces import Discrete, Box
from gym.envs.registration import EnvSpec
import ray
from ray.tune import run_experiments
from ray.tune.registry import register_env
class SimpleCorridor(gym.Env):
"""Example of a custom env in which you have to walk down a corridor.
You can configure the length of the corridor via the env config."""
def __init__(self, config):
self.end_pos = config["corridor_length"]
self.cur_pos = 0
self.action_space = Discrete(2)
self.observation_space = Box(
0.0, self.end_pos, shape=(1, ), dtype=np.float32)
self._spec = EnvSpec("SimpleCorridor-{}-v0".format(self.end_pos))
def reset(self):
self.cur_pos = 0
return [self.cur_pos]
def step(self, action):
assert action in [0, 1], action
if action == 0 and self.cur_pos > 0:
self.cur_pos -= 1
elif action == 1:
self.cur_pos += 1
done = self.cur_pos >= self.end_pos
return [self.cur_pos], 1 if done else 0, done, {}
if __name__ == "__main__":
env_creator_name = "corridor"
register_env(env_creator_name, lambda config: SimpleCorridor(config))
ray.init()
run_experiments({
"demo": {
"run": "PPO",
"env": "corridor",
"config": {
"env_config": {
"corridor_length": 5,
},
},
},
})
+7 -10
View File
@@ -51,14 +51,15 @@ class ModelCatalog(object):
>>> prep = ModelCatalog.get_preprocessor(env)
>>> observation = prep.transform(raw_observation)
>>> dist_cls, dist_dim = ModelCatalog.get_action_dist(env.action_space)
>>> model = ModelCatalog.get_model(inputs, dist_dim)
>>> dist_cls, dist_dim = ModelCatalog.get_action_dist(
env.action_space, {})
>>> model = ModelCatalog.get_model(inputs, dist_dim, options)
>>> dist = dist_cls(model.outputs)
>>> action = dist.sample()
"""
@staticmethod
def get_action_dist(action_space, config=None, dist_type=None):
def get_action_dist(action_space, config, dist_type=None):
"""Returns action distribution class and size for the given action space.
Args:
@@ -90,7 +91,8 @@ class ModelCatalog(object):
child_dist = []
input_lens = []
for action in action_space.spaces:
dist, action_size = ModelCatalog.get_action_dist(action)
dist, action_size = ModelCatalog.get_action_dist(
action, config)
child_dist.append(dist)
input_lens.append(action_size)
return partial(
@@ -139,11 +141,7 @@ class ModelCatalog(object):
" not supported".format(action_space))
@staticmethod
def get_model(inputs,
num_outputs,
options=None,
state_in=None,
seq_lens=None):
def get_model(inputs, num_outputs, options, state_in=None, seq_lens=None):
"""Returns a suitable model conforming to given input and output specs.
Args:
@@ -157,7 +155,6 @@ class ModelCatalog(object):
model (Model): Neural network model.
"""
options = options or {}
model = ModelCatalog._get_model(inputs, num_outputs, options, state_in,
seq_lens)
+3 -2
View File
@@ -69,12 +69,13 @@ class ModelCatalogTest(unittest.TestCase):
ray.init()
with tf.variable_scope("test1"):
p1 = ModelCatalog.get_model(np.zeros((10, 3), dtype=np.float32), 5)
p1 = ModelCatalog.get_model(
np.zeros((10, 3), dtype=np.float32), 5, {})
self.assertEqual(type(p1), FullyConnectedNetwork)
with tf.variable_scope("test2"):
p2 = ModelCatalog.get_model(
np.zeros((10, 84, 84, 3), dtype=np.float32), 5)
np.zeros((10, 84, 84, 3), dtype=np.float32), 5, {})
self.assertEqual(type(p2), VisionNetwork)
def testCustomModel(self):
@@ -1,4 +1,3 @@
# can expect improvement to -140 reward in ~300-500k timesteps
swimmer-ars:
env: Swimmer-v2
run: ARS
@@ -9,8 +8,9 @@ swimmer-ars:
num_workers: 1
sgd_stepsize: 0.02
noise_size: 250000000
policy_type: LinearPolicy
eval_prob: 0.2
offset: 0
observation_filter: NoFilter
report_length: 3
model:
fcnet_hiddens: [] # a linear policy