diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst index bef9d5248..c10b5cc33 100644 --- a/doc/source/rllib.rst +++ b/doc/source/rllib.rst @@ -157,7 +157,7 @@ can register a function that creates the env to refer to it by name. For example from ray.tune.registry import register_env from ray.rllib import ppo - env_creator = lambda: create_my_env() + env_creator = lambda env_config: create_my_env() env_creator_name = "custom_env" register_env(env_creator_name, env_creator) diff --git a/examples/carla/README b/examples/carla/README index 98a821b97..a066b048a 100644 --- a/examples/carla/README +++ b/examples/carla/README @@ -1,12 +1,14 @@ -(Experimental) gym environment for https://github.com/carla-simulator/carla +(Experimental) OpenAI gym environment for https://github.com/carla-simulator/carla -To run, first download and unpack the Carla release from this URL: https://github.com/carla-simulator/carla/releases/tag/0.7.0 +To run, first download and unpack the Carla binaries from this URL: https://github.com/carla-simulator/carla/releases/tag/0.7.0 -Then, you can try running env.py to drive the car. Run train_ppo.py or train_dqn.py to attempt training. +Note that currently you also need to clone the Python code from `carla/benchmark_branch` which includes the Carla planner. + +Then, you can try running env.py to drive the car. Run one of the train_* scripts to attempt training. $ pkill -9 Carla - $ export PYTHONPATH=/home/ubuntu/CARLA_0.7.0/PythonClient:$PYTHONPATH - $ export CARLA_SERVER=/home/ubuntu/CARLA_0.7.0/CarlaUE4.sh + $ export CARLA_SERVER=/PATH/TO/CARLA_0.7.0/CarlaUE4.sh + $ export CARLA_PY_PATH=/PATH/TO/CARLA_BENCHMARK_BRANCH_REPO/PythonClient $ python env.py -Note that the reward function is currently hard-coded to drive straight down the street. +Check out the scenarios.py file for different training and test scenarios that can be used. diff --git a/examples/carla/a3c_lane_keep.py b/examples/carla/a3c_lane_keep.py new file mode 100644 index 000000000..cbe453d32 --- /dev/null +++ b/examples/carla/a3c_lane_keep.py @@ -0,0 +1,50 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ray.tune import register_env, run_experiments + +from env import CarlaEnv, ENV_CONFIG +from models import register_carla_model +from scenarios import LANE_KEEP + +env_name = "carla_env" +env_config = ENV_CONFIG.copy() +env_config.update({ + "verbose": False, + "x_res": 80, + "y_res": 80, + "use_depth_camera": False, + "discrete_actions": False, + "server_map": "/Game/Maps/Town02", + "reward_function": "lane_keep", + "enable_planner": False, + "scenarios": [LANE_KEEP], +}) + +register_env(env_name, lambda env_config: CarlaEnv(env_config)) +register_carla_model() + +run_experiments({ + "carla-a3c": { + "run": "A3C", + "env": "carla_env", + "resources": {"cpu": 4, "gpu": 1}, + "config": { + "env_config": env_config, + "model": { + "custom_model": "carla", + "custom_options": { + "image_shape": [80, 80, 6], + }, + "conv_filters": [ + [16, [8, 8], 4], + [32, [4, 4], 2], + [512, [10, 10], 1], + ], + }, + "gamma": 0.8, + "num_workers": 1, + }, + }, +}) diff --git a/examples/carla/dqn_lane_keep.py b/examples/carla/dqn_lane_keep.py new file mode 100644 index 000000000..0adb8c309 --- /dev/null +++ b/examples/carla/dqn_lane_keep.py @@ -0,0 +1,55 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ray.tune import register_env, run_experiments + +from env import CarlaEnv, ENV_CONFIG +from models import register_carla_model +from scenarios import LANE_KEEP + +env_name = "carla_env" +env_config = ENV_CONFIG.copy() +env_config.update({ + "verbose": False, + "x_res": 80, + "y_res": 80, + "use_depth_camera": False, + "discrete_actions": True, + "server_map": "/Game/Maps/Town02", + "reward_function": "lane_keep", + "enable_planner": False, + "scenarios": [LANE_KEEP], +}) + +register_env(env_name, lambda env_config: CarlaEnv(env_config)) +register_carla_model() + +run_experiments({ + "carla-dqn": { + "run": "DQN", + "env": "carla_env", + "resources": {"cpu": 4, "gpu": 1}, + "config": { + "env_config": env_config, + "model": { + "custom_model": "carla", + "custom_options": { + "image_shape": [80, 80, 6], + }, + "conv_filters": [ + [16, [8, 8], 4], + [32, [4, 4], 2], + [512, [10, 10], 1], + ], + }, + "timesteps_per_iteration": 100, + "learning_starts": 1000, + "schedule_max_timesteps": 100000, + "gamma": 0.8, + "tf_session_args": { + "gpu_options": {"allow_growth": True}, + }, + }, + }, +}) diff --git a/examples/carla/env.py b/examples/carla/env.py index ee8f9fd4c..94cacee75 100644 --- a/examples/carla/env.py +++ b/examples/carla/env.py @@ -1,14 +1,18 @@ +"""OpenAI gym environment for Carla. Run this file for a demo.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function from datetime import datetime +import atexit import cv2 import os import json import random import signal import subprocess +import sys import time import traceback @@ -18,13 +22,10 @@ try: except Exception: pass -from carla.client import CarlaClient -from carla.sensor import Camera -from carla.settings import CarlaSettings - import gym -from gym.spaces import Box, Discrete +from gym.spaces import Box, Discrete, Tuple +from scenarios import DEFAULT_SCENARIO # Set this where you want to save image outputs (or empty string to disable) CARLA_OUT_PATH = os.environ.get("CARLA_OUT", os.path.expanduser("~/carla_out")) @@ -33,48 +34,132 @@ if CARLA_OUT_PATH and not os.path.exists(CARLA_OUT_PATH): # Set this to the path of your Carla binary SERVER_BINARY = os.environ.get( - "CARLA_SERVER", "/home/ubuntu/carla-0.7/CarlaUE4.sh") + "CARLA_SERVER", os.path.expanduser("~/CARLA_0.7.0/CarlaUE4.sh")) + +assert os.path.exists(SERVER_BINARY) +if "CARLA_PY_PATH" in os.environ: + sys.path.append(os.path.expanduser(os.environ["CARLA_PY_PATH"])) +else: + # TODO(ekl) switch this to the binary path once the planner is in master + sys.path.append(os.path.expanduser("~/carla/PythonClient/")) + +try: + from carla.client import CarlaClient + from carla.sensor import Camera + from carla.settings import CarlaSettings + from carla.planner.planner import Planner, REACH_GOAL, GO_STRAIGHT, \ + TURN_RIGHT, TURN_LEFT, LANE_FOLLOW +except Exception as e: + print("Failed to import Carla python libs, try setting $CARLA_PY_PATH") + raise e + +# Carla planner commands +COMMANDS_ENUM = { + REACH_GOAL: "REACH_GOAL", + GO_STRAIGHT: "GO_STRAIGHT", + TURN_RIGHT: "TURN_RIGHT", + TURN_LEFT: "TURN_LEFT", + LANE_FOLLOW: "LANE_FOLLOW", +} + +# Mapping from string repr to one-hot encoding index to feed to the model +COMMAND_ORDINAL = { + "REACH_GOAL": 0, + "GO_STRAIGHT": 1, + "TURN_RIGHT": 2, + "TURN_LEFT": 3, + "LANE_FOLLOW": 4, +} # Number of retries if the server doesn't respond RETRIES_ON_ERROR = 5 +# Dummy Z coordinate to use when we only care about (x, y) +GROUND_Z = 22 + # Default environment configuration ENV_CONFIG = { + "log_images": True, + "enable_planner": True, + "framestack": 2, # note: only [1, 2] currently supported + "convert_images_to_video": True, + "early_terminate_on_collision": True, "verbose": True, - "render_x_res": 400, - "render_y_res": 300, + "reward_function": "custom", + "render_x_res": 800, + "render_y_res": 600, "x_res": 80, "y_res": 80, - "map": "/Game/Maps/Town02", - "random_starting_location": False, + "server_map": "/Game/Maps/Town02", + "scenarios": [DEFAULT_SCENARIO], "use_depth_camera": False, - "discrete_actions": False, - "max_steps": 50, - "num_vehicles": 20, - "num_pedestrians": 40, - "weather": [1], # [1, 3, 7, 8, 14] - - # Defaults to driving down the road /Game/Maps/Town02, start pos 0 - "target_x": -7.5, - "target_y": 120, + "discrete_actions": True, + "squash_action_logits": False, } +DISCRETE_ACTIONS = { + # coast + 0: [0.0, 0.0], + # turn left + 1: [0.0, -0.5], + # turn right + 2: [0.0, 0.5], + # forward + 3: [1.0, 0.0], + # brake + 4: [-0.5, 0.0], + # forward left + 5: [1.0, -0.5], + # forward right + 6: [1.0, 0.5], + # brake left + 7: [-0.5, -0.5], + # brake right + 8: [-0.5, 0.5], +} + + +live_carla_processes = set() + + +def cleanup(): + print("Killing live carla processes", live_carla_processes) + for pgid in live_carla_processes: + os.killpg(pgid, signal.SIGKILL) + + +atexit.register(cleanup) + + class CarlaEnv(gym.Env): def __init__(self, config=ENV_CONFIG): self.config = config + self.city = self.config["server_map"].split("/")[-1] + if self.config["enable_planner"]: + self.planner = Planner(self.city) if config["discrete_actions"]: - self.action_space = Discrete(10) + self.action_space = Discrete(len(DISCRETE_ACTIONS)) else: - self.action_space = Box(-1.0, 1.0, shape=(3,)) + self.action_space = Box(-1.0, 1.0, shape=(2,)) if config["use_depth_camera"]: - self.observation_space = Box( - -1.0, 1.0, shape=(config["y_res"], config["x_res"], 1)) + image_space = Box( + -1.0, 1.0, shape=( + config["y_res"], config["x_res"], + 1 * config["framestack"])) else: - self.observation_space = Box( - 0.0, 255.0, shape=(config["y_res"], config["x_res"], 3)) + image_space = Box( + 0.0, 255.0, shape=( + config["y_res"], config["x_res"], + 3 * config["framestack"])) + self.observation_space = Tuple( + [image_space, + Discrete(len(COMMANDS_ENUM)), # next_command + Box(-128.0, 128.0, shape=(2,))]) # forward_speed, dist to goal + + # TODO(ekl) this isn't really a proper gym spec self._spec = lambda: None self._spec.id = "Carla-v0" @@ -84,24 +169,36 @@ class CarlaEnv(gym.Env): self.num_steps = 0 self.total_reward = 0 self.prev_measurement = None + self.prev_image = None self.episode_id = None self.measurements_file = None self.weather = None - self.player_start = None + self.scenario = None + self.start_pos = None + self.end_pos = None + self.start_coord = None + self.end_coord = None + self.last_obs = None def init_server(self): print("Initializing new Carla server...") # Create a new server process and start the client. self.server_port = random.randint(10000, 60000) self.server_process = subprocess.Popen( - [SERVER_BINARY, self.config["map"], + [SERVER_BINARY, self.config["server_map"], "-windowed", "-ResX=400", "-ResY=300", "-carla-server", "-carla-world-port={}".format(self.server_port)], preexec_fn=os.setsid, stdout=open(os.devnull, "w")) + live_carla_processes.add(os.getpgid(self.server_process.pid)) - self.client = CarlaClient("localhost", self.server_port) - self.client.connect() + for i in range(RETRIES_ON_ERROR): + try: + self.client = CarlaClient("localhost", self.server_port) + return self.client.connect() + except Exception as e: + print("Error connecting: {}, attempt {}".format(e, i)) + time.sleep(2) def clear_server_state(self): print("Clearing Carla server state") @@ -113,7 +210,9 @@ class CarlaEnv(gym.Env): print("Error disconnecting client: {}".format(e)) pass if self.server_process: - os.killpg(os.getpgid(self.server_process.pid), signal.SIGKILL) + pgid = os.getpgid(self.server_process.pid) + os.killpg(pgid, signal.SIGKILL) + live_carla_processes.remove(pgid) self.server_port = None self.server_process = None @@ -126,9 +225,6 @@ class CarlaEnv(gym.Env): try: if not self.server_process: self.init_server() - # reset twice since the first time a server is initialized, - # the starting location is different - self._reset() return self._reset() except Exception as e: print("Error during reset: {}".format(traceback.format_exc())) @@ -138,7 +234,9 @@ class CarlaEnv(gym.Env): def _reset(self): self.num_steps = 0 + self.total_reward = 0 self.prev_measurement = None + self.prev_image = None self.episode_id = datetime.today().strftime("%Y-%m-%d_%H-%M-%S_%f") self.measurements_file = None @@ -146,20 +244,23 @@ class CarlaEnv(gym.Env): # the CarlaSettings.ini file. Here we set the configuration we # want for the new episode. settings = CarlaSettings() - self.weather = random.choice(self.config["weather"]) + self.scenario = random.choice(self.config["scenarios"]) + assert self.scenario["city"] == self.city, (self.scenario, self.city) + self.weather = random.choice(self.scenario["weather_distribution"]) settings.set( SynchronousMode=True, SendNonPlayerAgentsInfo=True, - NumberOfVehicles=self.config["num_vehicles"], - NumberOfPedestrians=self.config["num_pedestrians"], + NumberOfVehicles=self.scenario["num_vehicles"], + NumberOfPedestrians=self.scenario["num_pedestrians"], WeatherId=self.weather) settings.randomize_seeds() - camera1 = Camera("CameraDepth", PostProcessing="Depth") - camera1.set_image_size( - self.config["render_x_res"], self.config["render_y_res"]) - camera1.set_position(30, 0, 130) - settings.add_sensor(camera1) + if self.config["use_depth_camera"]: + camera1 = Camera("CameraDepth", PostProcessing="Depth") + camera1.set_image_size( + self.config["render_x_res"], self.config["render_y_res"]) + camera1.set_position(30, 0, 130) + settings.add_sensor(camera1) camera2 = Camera("CameraRGB") camera2.set_image_size( @@ -167,25 +268,45 @@ class CarlaEnv(gym.Env): camera2.set_position(30, 0, 130) settings.add_sensor(camera2) + # Setup start and end positions scene = self.client.load_settings(settings) - - # Choose one player start at random. - number_of_player_starts = len(scene.player_start_spots) - if self.config["random_starting_location"]: - self.player_start = random.randint( - 0, max(0, number_of_player_starts - 1)) - else: - self.player_start = 0 + positions = scene.player_start_spots + self.start_pos = positions[self.scenario["start_pos_id"]] + self.end_pos = positions[self.scenario["end_pos_id"]] + self.start_coord = [ + self.start_pos.location.x // 100, self.start_pos.location.y // 100] + self.end_coord = [ + self.end_pos.location.x // 100, self.end_pos.location.y // 100] + print( + "Start pos {} ({}), end {} ({})".format( + self.scenario["start_pos_id"], self.start_coord, + self.scenario["end_pos_id"], self.end_coord)) # Notify the server that we want to start the episode at the # player_start index. This function blocks until the server is ready # to start the episode. print("Starting new episode...") - self.client.start_episode(self.player_start) + self.client.start_episode(self.scenario["start_pos_id"]) image, py_measurements = self._read_observation() self.prev_measurement = py_measurements - return self.preprocess_image(image) + return self.encode_obs(self.preprocess_image(image), py_measurements) + + def encode_obs(self, image, py_measurements): + assert self.config["framestack"] in [1, 2] + prev_image = self.prev_image + self.prev_image = image + if prev_image is None: + prev_image = image + if self.config["framestack"] == 2: + image = np.concatenate([prev_image, image], axis=2) + obs = ( + image, + COMMAND_ORDINAL[py_measurements["next_command"]], + [py_measurements["forward_speed"], + py_measurements["distance_to_goal"]]) + self.last_obs = obs + return obs def step(self, action): try: @@ -196,42 +317,22 @@ class CarlaEnv(gym.Env): "Error during step, terminating episode early", traceback.format_exc()) self.clear_server_state() - return np.zeros(self.observation_space.shape), 0.0, True, {} + return (self.last_obs, 0.0, True, {}) def _step(self, action): if self.config["discrete_actions"]: - action = int(action) - assert action in range(10) - if action == 9: - brake = 1.0 - steer = 0.0 - throttle = 0.0 - reverse = False - else: - brake = 0.0 - if action >= 6: - steer = -1.0 - elif action >= 3: - steer = 1.0 - else: - steer = 0.0 - action %= 3 - if action == 0: - throttle = 0.0 - reverse = False - elif action == 1: - throttle = 1.0 - reverse = False - elif action == 2: - throttle = 1.0 - reverse = True + action = DISCRETE_ACTIONS[int(action)] + assert len(action) == 2, "Invalid action {}".format(action) + if self.config["squash_action_logits"]: + forward = 2 * float(sigmoid(action[0]) - 0.5) + throttle = float(np.clip(forward, 0, 1)) + brake = float(np.abs(np.clip(forward, -1, 0))) + steer = 2 * float(sigmoid(action[1]) - 0.5) else: - assert len(action) == 3, "Invalid action {}".format(action) - steer = action[0] - throttle = min(1.0, abs(action[1])) - brake = max(0.0, min(1.0, action[2])) - reverse = action[1] < 0.0 - + throttle = float(np.clip(action[0], 0, 1)) + brake = float(np.abs(np.clip(action[0], -1, 0))) + steer = float(np.clip(action[1], -1, 1)) + reverse = False hand_brake = False if self.config["verbose"]: @@ -245,15 +346,12 @@ class CarlaEnv(gym.Env): # Process observations image, py_measurements = self._read_observation() - reward, done = compute_reward( - self.config, self.prev_measurement, py_measurements) - if self.num_steps > self.config["max_steps"]: - done = True - self.total_reward += reward - py_measurements["reward"] = reward - py_measurements["total_reward"] = self.total_reward - py_measurements["done"] = done - py_measurements["action"] = action + if self.config["verbose"]: + print("Next command", py_measurements["next_command"]) + if type(action) is np.ndarray: + py_measurements["action"] = [float(a) for a in action] + else: + py_measurements["action"] = action py_measurements["control"] = { "steer": steer, "throttle": throttle, @@ -261,6 +359,16 @@ class CarlaEnv(gym.Env): "reverse": reverse, "hand_brake": hand_brake, } + reward = compute_reward( + self, self.prev_measurement, py_measurements) + self.total_reward += reward + py_measurements["reward"] = reward + py_measurements["total_reward"] = self.total_reward + done = (self.num_steps > self.scenario["max_steps"] or + py_measurements["next_command"] == "REACH_GOAL" or + (self.config["early_terminate_on_collision"] and + collided_done(py_measurements))) + py_measurements["done"] = done self.prev_measurement = py_measurements # Write out measurements to file @@ -276,19 +384,41 @@ class CarlaEnv(gym.Env): if done: self.measurements_file.close() self.measurements_file = None + if self.config["convert_images_to_video"]: + self.images_to_video() self.num_steps += 1 image = self.preprocess_image(image) - return image, reward, done, py_measurements + return ( + self.encode_obs(image, py_measurements), reward, done, + py_measurements) + + def images_to_video(self): + videos_dir = os.path.join(CARLA_OUT_PATH, "Videos") + if not os.path.exists(videos_dir): + os.makedirs(videos_dir) + ffmpeg_cmd = ( + "ffmpeg -loglevel -8 -r 60 -f image2 -s {x_res}x{y_res} " + "-start_number 0 -i " + "{img}_%04d.jpg -vcodec libx264 {vid}.mp4 && rm -f {img}_*.jpg " + ).format( + x_res=self.config["render_x_res"], + y_res=self.config["render_y_res"], + vid=os.path.join(videos_dir, self.episode_id), + img=os.path.join(CARLA_OUT_PATH, "CameraRGB", self.episode_id)) + print("Executing ffmpeg command", ffmpeg_cmd) + subprocess.call(ffmpeg_cmd, shell=True) def preprocess_image(self, image): if self.config["use_depth_camera"]: + assert self.config["use_depth_camera"] data = (image.data - 0.5) * 2 data = data.reshape( self.config["render_y_res"], self.config["render_x_res"], 1) data = cv2.resize( data, (self.config["x_res"], self.config["y_res"]), interpolation=cv2.INTER_AREA) + data = np.expand_dims(data, 2) else: data = image.data.reshape( self.config["render_y_res"], self.config["render_x_res"], 3) @@ -316,29 +446,68 @@ class CarlaEnv(gym.Env): observation = image cur = measurements.player_measurements + + if self.config["enable_planner"]: + next_command = COMMANDS_ENUM[ + self.planner.get_next_command( + [cur.transform.location.x, cur.transform.location.y, + GROUND_Z], + [cur.transform.orientation.x, cur.transform.orientation.y, + GROUND_Z], + [self.end_pos.location.x, self.end_pos.location.y, + GROUND_Z], + [self.end_pos.orientation.x, self.end_pos.orientation.y, + GROUND_Z]) + ] + else: + next_command = "LANE_FOLLOW" + + if next_command == "REACH_GOAL": + distance_to_goal = 0.0 # avoids crash in planner + elif self.config["enable_planner"]: + distance_to_goal = self.planner.get_shortest_path_distance( + [cur.transform.location.x, cur.transform.location.y, GROUND_Z], + [cur.transform.orientation.x, cur.transform.orientation.y, + GROUND_Z], + [self.end_pos.location.x, self.end_pos.location.y, GROUND_Z], + [self.end_pos.orientation.x, self.end_pos.orientation.y, + GROUND_Z]) / 100 + else: + distance_to_goal = -1 + + distance_to_goal_euclidean = float(np.linalg.norm( + [cur.transform.location.x - self.end_pos.location.x, + cur.transform.location.y - self.end_pos.location.y]) / 100) + py_measurements = { "episode_id": self.episode_id, "step": self.num_steps, "x": cur.transform.location.x, "y": cur.transform.location.y, + "x_orient": cur.transform.orientation.x, + "y_orient": cur.transform.orientation.y, "forward_speed": cur.forward_speed, + "distance_to_goal": distance_to_goal, + "distance_to_goal_euclidean": distance_to_goal_euclidean, "collision_vehicles": cur.collision_vehicles, "collision_pedestrians": cur.collision_pedestrians, "collision_other": cur.collision_other, "intersection_offroad": cur.intersection_offroad, "intersection_otherlane": cur.intersection_otherlane, "weather": self.weather, - "map": self.config["map"], - "target_x": self.config["target_x"], - "target_y": self.config["target_y"], + "map": self.config["server_map"], + "start_coord": self.start_coord, + "end_coord": self.end_coord, + "current_scenario": self.scenario, "x_res": self.config["x_res"], "y_res": self.config["y_res"], - "num_vehicles": self.config["num_vehicles"], - "num_pedestrians": self.config["num_pedestrians"], - "max_steps": self.config["max_steps"], + "num_vehicles": self.scenario["num_vehicles"], + "num_pedestrians": self.scenario["num_pedestrians"], + "max_steps": self.scenario["max_steps"], + "next_command": next_command, } - if CARLA_OUT_PATH: + if CARLA_OUT_PATH and self.config["log_images"]: for name, image in sensor_data.items(): out_dir = os.path.join(CARLA_OUT_PATH, name) if not os.path.exists(out_dir): @@ -352,23 +521,18 @@ class CarlaEnv(gym.Env): return observation, py_measurements -def distance(x1, y1, x2, y2): - return ((x1 - x2)**2 + (y1 - y2)**2)**0.5 - - -def compute_reward(config, prev, current): - prev_x = prev["x"] / 100 # cm -> m - prev_y = prev["y"] / 100 - cur_x = current["x"] / 100 # cm -> m - cur_y = current["y"] / 100 - +def compute_reward_corl2017(env, prev, current): reward = 0.0 - done = False + + cur_dist = current["distance_to_goal"] + + prev_dist = prev["distance_to_goal"] + + if env.config["verbose"]: + print("Cur dist {}, prev dist {}".format(cur_dist, prev_dist)) # Distance travelled toward the goal in m - reward += ( - distance(prev_x, prev_y, config["target_x"], config["target_y"]) - - distance(cur_x, cur_y, config["target_x"], config["target_y"])) + reward += np.clip(prev_dist - cur_dist, -10.0, 10.0) # Change in speed (km/h) reward += 0.05 * (current["forward_speed"] - prev["forward_speed"]) @@ -387,21 +551,89 @@ def compute_reward(config, prev, current): reward -= 2 * ( current["intersection_otherlane"] - prev["intersection_otherlane"]) - if distance(cur_x, cur_y, config["target_x"], config["target_y"]) < 10: - done = True + return reward - return reward, done + +def compute_reward_custom(env, prev, current): + reward = 0.0 + + cur_dist = current["distance_to_goal"] + prev_dist = prev["distance_to_goal"] + + if env.config["verbose"]: + print("Cur dist {}, prev dist {}".format(cur_dist, prev_dist)) + + # Distance travelled toward the goal in m + reward += np.clip(prev_dist - cur_dist, -10.0, 10.0) + + # Speed reward, up 30.0 (km/h) + reward += np.clip(current["forward_speed"], 0.0, 30.0) / 10 + + # New collision damage + new_damage = ( + current["collision_vehicles"] + current["collision_pedestrians"] + + current["collision_other"] - prev["collision_vehicles"] - + prev["collision_pedestrians"] - prev["collision_other"]) + if new_damage: + reward -= 100.0 + + # Sidewalk intersection + reward -= current["intersection_offroad"] + + # Opposite lane intersection + reward -= current["intersection_otherlane"] + + # Reached goal + if current["next_command"] == "REACH_GOAL": + reward += 100.0 + + return reward + + +def compute_reward_lane_keep(env, prev, current): + reward = 0.0 + + # Speed reward, up 30.0 (km/h) + reward += np.clip(current["forward_speed"], 0.0, 30.0) / 10 + + # New collision damage + new_damage = ( + current["collision_vehicles"] + current["collision_pedestrians"] + + current["collision_other"] - prev["collision_vehicles"] - + prev["collision_pedestrians"] - prev["collision_other"]) + if new_damage: + reward -= 100.0 + + # Sidewalk intersection + reward -= current["intersection_offroad"] + + # Opposite lane intersection + reward -= current["intersection_otherlane"] + + return reward + + +REWARD_FUNCTIONS = { + "corl2017": compute_reward_corl2017, + "custom": compute_reward_custom, + "lane_keep": compute_reward_lane_keep, +} + + +def compute_reward(env, prev, current): + return REWARD_FUNCTIONS[env.config["reward_function"]]( + env, prev, current) def print_measurements(measurements): number_of_agents = len(measurements.non_player_agents) player_measurements = measurements.player_measurements - message = 'Vehicle at ({pos_x:.1f}, {pos_y:.1f}), ' - message += '{speed:.2f} km/h, ' - message += 'Collision: {{vehicles={col_cars:.0f}, ' - message += 'pedestrians={col_ped:.0f}, other={col_other:.0f}}}, ' - message += '{other_lane:.0f}% other lane, {offroad:.0f}% off-road, ' - message += '({agents_num:d} non-player agents in the scene)' + message = "Vehicle at ({pos_x:.1f}, {pos_y:.1f}), " + message += "{speed:.2f} km/h, " + message += "Collision: {{vehicles={col_cars:.0f}, " + message += "pedestrians={col_ped:.0f}, other={col_other:.0f}}}, " + message += "{other_lane:.0f}% other lane, {offroad:.0f}% off-road, " + message += "({agents_num:d} non-player agents in the scene)" message = message.format( pos_x=player_measurements.transform.location.x / 100, # cm -> m pos_y=player_measurements.transform.location.y / 100, @@ -415,22 +647,34 @@ def print_measurements(measurements): print(message) -if __name__ == '__main__': - env = CarlaEnv() - obs = env.reset() - print("reset", obs) - start = time.time() - done = False - i = 0 - total_reward = 0.0 - while not done: - i += 1 - if ENV_CONFIG["discrete_actions"]: - obs, reward, done, info = env.step(1) - else: - obs, reward, done, info = env.step([0, 1, 0]) - total_reward += reward - print( - i, "obs", obs.shape, "rew", reward, "total", total_reward, - "done", done) - print("{} fps".format(100 / (time.time() - start))) +def sigmoid(x): + x = float(x) + return np.exp(x) / (1 + np.exp(x)) + + +def collided_done(py_measurements): + m = py_measurements + collided = ( + m["collision_vehicles"] > 0 or m["collision_pedestrians"] > 0 or + m["collision_other"] > 0) + return bool(collided or m["total_reward"] < -100) + + +if __name__ == "__main__": + for _ in range(2): + env = CarlaEnv() + obs = env.reset() + print("reset", obs) + start = time.time() + done = False + i = 0 + total_reward = 0.0 + while not done: + i += 1 + if ENV_CONFIG["discrete_actions"]: + obs, reward, done, info = env.step(1) + else: + obs, reward, done, info = env.step([0, 1, 0]) + total_reward += reward + print(i, "rew", reward, "total", total_reward, "done", done) + print("{} fps".format(100 / (time.time() - start))) diff --git a/examples/carla/models.py b/examples/carla/models.py new file mode 100644 index 000000000..fdc194dcc --- /dev/null +++ b/examples/carla/models.py @@ -0,0 +1,96 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import tensorflow as tf +import tensorflow.contrib.slim as slim +from tensorflow.contrib.layers import xavier_initializer + +from ray.rllib.models.catalog import ModelCatalog +from ray.rllib.models.misc import normc_initializer +from ray.rllib.models.model import Model + + +class CarlaModel(Model): + """Carla model that can process the observation tuple. + + The architecture processes the image using convolutional layers, the + metrics using fully connected layers, and then combines them with + further fully connected layers. + """ + + def _init(self, inputs, num_outputs, options): + # Parse options + image_shape = options["custom_options"]["image_shape"] + convs = options.get("conv_filters", [ + [16, [8, 8], 4], + [32, [5, 5], 3], + [32, [5, 5], 2], + [512, [10, 10], 1], + ]) + hiddens = options.get("fcnet_hiddens", [64]) + fcnet_activation = options.get("fcnet_activation", "tanh") + if fcnet_activation == "tanh": + activation = tf.nn.tanh + elif fcnet_activation == "relu": + activation = tf.nn.relu + + # Sanity checks + image_size = np.product(image_shape) + expected_shape = [image_size + 5 + 2] + assert inputs.shape.as_list()[1:] == expected_shape, \ + (inputs.shape.as_list()[1:], expected_shape) + + # Reshape the input vector back into its components + vision_in = tf.reshape( + inputs[:, :image_size], [tf.shape(inputs)[0]] + image_shape) + metrics_in = inputs[:, image_size:] + print("Vision in shape", vision_in) + print("Metrics in shape", metrics_in) + + # Setup vision layers + with tf.name_scope("carla_vision"): + for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1): + vision_in = slim.conv2d( + vision_in, out_size, kernel, stride, + scope="conv{}".format(i)) + out_size, kernel, stride = convs[-1] + vision_in = slim.conv2d( + vision_in, out_size, kernel, stride, + padding="VALID", scope="conv_out") + vision_in = tf.squeeze(vision_in, [1, 2]) + + # Setup metrics layer + with tf.name_scope("carla_metrics"): + metrics_in = slim.fully_connected( + metrics_in, 64, + weights_initializer=xavier_initializer(), + activation_fn=activation, + scope="metrics_out") + + print("Shape of vision out is", vision_in.shape) + print("Shape of metric out is", metrics_in.shape) + + # Combine the metrics and vision inputs + with tf.name_scope("carla_out"): + i = 1 + last_layer = tf.concat([vision_in, metrics_in], axis=1) + print("Shape of concatenated out is", last_layer.shape) + for size in hiddens: + last_layer = slim.fully_connected( + last_layer, size, + weights_initializer=xavier_initializer(), + activation_fn=activation, + scope="fc{}".format(i)) + i += 1 + output = slim.fully_connected( + last_layer, num_outputs, + weights_initializer=normc_initializer(0.01), + activation_fn=None, scope="fc_out") + + return output, last_layer + + +def register_carla_model(): + ModelCatalog.register_custom_model("carla", CarlaModel) diff --git a/examples/carla/ppo_lane_keep.py b/examples/carla/ppo_lane_keep.py new file mode 100644 index 000000000..dbd015f7e --- /dev/null +++ b/examples/carla/ppo_lane_keep.py @@ -0,0 +1,60 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ray.tune import register_env, run_experiments + +from env import CarlaEnv, ENV_CONFIG +from models import register_carla_model +from scenarios import LANE_KEEP + +env_name = "carla_env" +env_config = ENV_CONFIG.copy() +env_config.update({ + "verbose": False, + "x_res": 80, + "y_res": 80, + "use_depth_camera": False, + "discrete_actions": False, + "server_map": "/Game/Maps/Town02", + "reward_function": "lane_keep", + "enable_planner": False, + "scenarios": [LANE_KEEP], +}) + +register_env(env_name, lambda env_config: CarlaEnv(env_config)) +register_carla_model() + +run_experiments({ + "carla-ppo": { + "run": "PPO", + "env": "carla_env", + "resources": {"cpu": 4, "gpu": 1}, + "config": { + "env_config": env_config, + "model": { + "custom_model": "carla", + "custom_options": { + "image_shape": [80, 80, 6], + }, + "conv_filters": [ + [16, [8, 8], 4], + [32, [4, 4], 2], + [512, [10, 10], 1], + ], + }, + "num_workers": 1, + "timesteps_per_batch": 2000, + "min_steps_per_task": 100, + "lambda": 0.95, + "clip_param": 0.2, + "num_sgd_iter": 20, + "sgd_stepsize": 0.0001, + "sgd_batchsize": 32, + "devices": ["/gpu:0"], + "tf_session_args": { + "gpu_options": {"allow_growth": True} + } + }, + }, +}) diff --git a/examples/carla/scenarios.py b/examples/carla/scenarios.py new file mode 100644 index 000000000..e6494af18 --- /dev/null +++ b/examples/carla/scenarios.py @@ -0,0 +1,119 @@ +"""Collection of Carla scenarios, including those from the CoRL 2017 paper.""" + + +TEST_WEATHERS = [0, 2, 5, 7, 9, 10, 11, 12, 13] +TRAIN_WEATHERS = [1, 3, 4, 6, 8, 14] + + +def build_scenario( + city, start, end, vehicles, pedestrians, max_steps, weathers): + return { + "city": city, + "num_vehicles": vehicles, + "num_pedestrians": pedestrians, + "weather_distribution": weathers, + "start_pos_id": start, + "end_pos_id": end, + "max_steps": max_steps, + } + + +# Simple scenario for Town02 that involves driving down a road +DEFAULT_SCENARIO = build_scenario( + city="Town02", start=36, end=40, vehicles=20, pedestrians=40, + max_steps=200, weathers=[0]) + +# Simple scenario for Town02 that involves driving down a road +LANE_KEEP = build_scenario( + city="Town02", start=36, end=40, vehicles=0, pedestrians=0, + max_steps=2000, weathers=[0]) + +# Scenarios from the CoRL2017 paper +POSES_TOWN1_STRAIGHT = [ + [36, 40], [39, 35], [110, 114], [7, 3], [0, 4], + [68, 50], [61, 59], [47, 64], [147, 90], [33, 87], + [26, 19], [80, 76], [45, 49], [55, 44], [29, 107], + [95, 104], [84, 34], [53, 67], [22, 17], [91, 148], + [20, 107], [78, 70], [95, 102], [68, 44], [45, 69]] + + +POSES_TOWN1_ONE_CURVE = [ + [138, 17], [47, 16], [26, 9], [42, 49], [140, 124], + [85, 98], [65, 133], [137, 51], [76, 66], [46, 39], + [40, 60], [0, 29], [4, 129], [121, 140], [2, 129], + [78, 44], [68, 85], [41, 102], [95, 70], [68, 129], + [84, 69], [47, 79], [110, 15], [130, 17], [0, 17]] + +POSES_TOWN1_NAV = [ + [105, 29], [27, 130], [102, 87], [132, 27], [24, 44], + [96, 26], [34, 67], [28, 1], [140, 134], [105, 9], + [148, 129], [65, 18], [21, 16], [147, 97], [42, 51], + [30, 41], [18, 107], [69, 45], [102, 95], [18, 145], + [111, 64], [79, 45], [84, 69], [73, 31], [37, 81]] + + +POSES_TOWN2_STRAIGHT = [ + [38, 34], [4, 2], [12, 10], [62, 55], [43, 47], + [64, 66], [78, 76], [59, 57], [61, 18], [35, 39], + [12, 8], [0, 18], [75, 68], [54, 60], [45, 49], + [46, 42], [53, 46], [80, 29], [65, 63], [0, 81], + [54, 63], [51, 42], [16, 19], [17, 26], [77, 68]] + +POSES_TOWN2_ONE_CURVE = [ + [37, 76], [8, 24], [60, 69], [38, 10], [21, 1], + [58, 71], [74, 32], [44, 0], [71, 16], [14, 24], + [34, 11], [43, 14], [75, 16], [80, 21], [3, 23], + [75, 59], [50, 47], [11, 19], [77, 34], [79, 25], + [40, 63], [58, 76], [79, 55], [16, 61], [27, 11]] + +POSES_TOWN2_NAV = [ + [19, 66], [79, 14], [19, 57], [23, 1], + [53, 76], [42, 13], [31, 71], [33, 5], + [54, 30], [10, 61], [66, 3], [27, 12], + [79, 19], [2, 29], [16, 14], [5, 57], + [70, 73], [46, 67], [57, 50], [61, 49], [21, 12], + [51, 81], [77, 68], [56, 65], [43, 54]] + +TOWN1_STRAIGHT = [ + build_scenario("Town01", start, end, 0, 0, 300, TEST_WEATHERS) + for (start, end) in POSES_TOWN1_STRAIGHT] + +TOWN1_ONE_CURVE = [ + build_scenario("Town01", start, end, 0, 0, 600, TEST_WEATHERS) + for (start, end) in POSES_TOWN1_ONE_CURVE] + +TOWN1_NAVIGATION = [ + build_scenario("Town01", start, end, 0, 0, 900, TEST_WEATHERS) + for (start, end) in POSES_TOWN1_NAV] + +TOWN1_NAVIGATION_DYNAMIC = [ + build_scenario("Town01", start, end, 20, 50, 900, TEST_WEATHERS) + for (start, end) in POSES_TOWN1_NAV] + +TOWN2_STRAIGHT = [ + build_scenario("Town02", start, end, 0, 0, 300, TRAIN_WEATHERS) + for (start, end) in POSES_TOWN2_STRAIGHT] + +TOWN2_STRAIGHT_DYNAMIC = [ + build_scenario("Town02", start, end, 20, 50, 300, TRAIN_WEATHERS) + for (start, end) in POSES_TOWN2_STRAIGHT] + +TOWN2_ONE_CURVE = [ + build_scenario("Town02", start, end, 0, 0, 600, TRAIN_WEATHERS) + for (start, end) in POSES_TOWN2_ONE_CURVE] + +TOWN2_NAVIGATION = [ + build_scenario("Town02", start, end, 0, 0, 900, TRAIN_WEATHERS) + for (start, end) in POSES_TOWN2_NAV] + +TOWN2_NAVIGATION_DYNAMIC = [ + build_scenario("Town02", start, end, 20, 50, 900, TRAIN_WEATHERS) + for (start, end) in POSES_TOWN2_NAV] + +TOWN1_ALL = ( + TOWN1_STRAIGHT + TOWN1_ONE_CURVE + TOWN1_NAVIGATION + + TOWN1_NAVIGATION_DYNAMIC) + +TOWN2_ALL = ( + TOWN2_STRAIGHT + TOWN2_ONE_CURVE + TOWN2_NAVIGATION + + TOWN2_NAVIGATION_DYNAMIC) diff --git a/examples/carla/train_a3c.py b/examples/carla/train_a3c.py new file mode 100644 index 000000000..0089e6cc3 --- /dev/null +++ b/examples/carla/train_a3c.py @@ -0,0 +1,53 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import ray +from ray.tune import grid_search, register_env, run_experiments + +from env import CarlaEnv, ENV_CONFIG +from models import register_carla_model +from scenarios import TOWN2_STRAIGHT + +env_name = "carla_env" +env_config = ENV_CONFIG.copy() +env_config.update({ + "verbose": False, + "x_res": 80, + "y_res": 80, + "squash_action_logits": grid_search([False, True]), + "use_depth_camera": False, + "discrete_actions": False, + "server_map": "/Game/Maps/Town02", + "reward_function": grid_search(["custom", "corl2017"]), + "scenarios": TOWN2_STRAIGHT, +}) + +register_env(env_name, lambda env_config: CarlaEnv(env_config)) +register_carla_model() +redis_address = ray.services.get_node_ip_address() + ":6379" + +run_experiments({ + "carla-a3c": { + "run": "A3C", + "env": "carla_env", + "resources": {"cpu": 5, "gpu": 2, "driver_gpu_limit": 0}, + "config": { + "env_config": env_config, + "use_gpu_for_workers": True, + "model": { + "custom_model": "carla", + "custom_options": { + "image_shape": [80, 80, 6], + }, + "conv_filters": [ + [16, [8, 8], 4], + [32, [4, 4], 2], + [512, [10, 10], 1], + ], + }, + "gamma": 0.95, + "num_workers": 2, + }, + }, +}, redis_address=redis_address) diff --git a/examples/carla/train_dqn.py b/examples/carla/train_dqn.py index a0135202d..991efb356 100644 --- a/examples/carla/train_dqn.py +++ b/examples/carla/train_dqn.py @@ -5,38 +5,51 @@ from __future__ import print_function from ray.tune import register_env, run_experiments from env import CarlaEnv, ENV_CONFIG +from models import register_carla_model +from scenarios import TOWN2_ONE_CURVE env_name = "carla_env" env_config = ENV_CONFIG.copy() env_config.update({ "verbose": False, - "x_res": 240, - "y_res": 240, - "use_depth_camera": False, + "x_res": 80, + "y_res": 80, "discrete_actions": True, - "max_steps": 200, - "weather": [1, 3, 7, 8, 14], + "server_map": "/Game/Maps/Town02", + "reward_function": "custom", + "scenarios": TOWN2_ONE_CURVE, }) -register_env(env_name, lambda: CarlaEnv(env_config)) + +register_env(env_name, lambda env_config: CarlaEnv(env_config)) +register_carla_model() run_experiments({ - "carla": { + "carla-dqn": { "run": "DQN", "env": "carla_env", "resources": {"cpu": 4, "gpu": 1}, "config": { - "model": { + "env_config": env_config, + "model": { + "custom_model": "carla", + "custom_options": { + "image_shape": [ + 80, 80, + lambda spec: spec.config.env_config.framestack * ( + spec.config.env_config.use_depth_camera and 1 or 3 + ), + ], + }, "conv_filters": [ [16, [8, 8], 4], - [32, [5, 5], 3], - [32, [5, 5], 2], + [32, [4, 4], 2], [512, [10, 10], 1], ], }, "timesteps_per_iteration": 100, "learning_starts": 1000, "schedule_max_timesteps": 100000, - "gamma": 0.95, + "gamma": 0.8, "tf_session_args": { "gpu_options": {"allow_growth": True}, }, diff --git a/examples/carla/train_ppo.py b/examples/carla/train_ppo.py index ab13df4e7..467b6fe7d 100644 --- a/examples/carla/train_ppo.py +++ b/examples/carla/train_ppo.py @@ -5,6 +5,8 @@ from __future__ import print_function from ray.tune import register_env, run_experiments from env import CarlaEnv, ENV_CONFIG +from models import register_carla_model +from scenarios import TOWN2_STRAIGHT env_name = "carla_env" env_config = ENV_CONFIG.copy() @@ -12,11 +14,13 @@ env_config.update({ "verbose": False, "x_res": 80, "y_res": 80, - "use_depth_camera": True, + "use_depth_camera": False, "discrete_actions": False, - "max_steps": 150, + "server_map": "/Game/Maps/Town02", + "scenarios": TOWN2_STRAIGHT, }) -register_env(env_name, lambda: CarlaEnv(env_config)) +register_env(env_name, lambda env_config: CarlaEnv(env_config)) +register_carla_model() run_experiments({ "carla": { @@ -24,6 +28,19 @@ run_experiments({ "env": "carla_env", "resources": {"cpu": 4, "gpu": 1}, "config": { + "env_config": env_config, + "model": { + "custom_model": "carla", + "custom_options": { + "image_shape": [ + env_config["x_res"], env_config["y_res"], 6], + }, + "conv_filters": [ + [16, [8, 8], 4], + [32, [4, 4], 2], + [512, [10, 10], 1], + ], + }, "num_workers": 1, "timesteps_per_batch": 2000, "min_steps_per_task": 100, diff --git a/python/ray/rllib/a3c/a3c.py b/python/ray/rllib/a3c/a3c.py index 2116a8ab4..546938edb 100644 --- a/python/ray/rllib/a3c/a3c.py +++ b/python/ray/rllib/a3c/a3c.py @@ -10,7 +10,8 @@ import ray from ray.rllib.agent import Agent from ray.rllib.optimizers import AsyncOptimizer from ray.rllib.utils import FilterManager -from ray.rllib.a3c.a3c_evaluator import A3CEvaluator, RemoteA3CEvaluator +from ray.rllib.a3c.a3c_evaluator import A3CEvaluator, RemoteA3CEvaluator, \ + GPURemoteA3CEvaluator from ray.tune.result import TrainingResult @@ -39,6 +40,8 @@ DEFAULT_CONFIG = { "vf_loss_coeff": 0.5, # Entropy coefficient "entropy_coeff": -0.01, + # Whether to place workers on GPUs + "use_gpu_for_workers": False, # Model and preprocessor options "model": { # (Image statespace) - Converts image to Channels = 1 @@ -54,21 +57,27 @@ DEFAULT_CONFIG = { "optimizer": { # Number of gradients applied for each `train` step "grads_per_step": 100, - } + }, + # Arguments to pass to the env creator + "env_config": {}, } class A3CAgent(Agent): _agent_name = "A3C" _default_config = DEFAULT_CONFIG - _allow_unknown_subkeys = ["model", "optimizer"] + _allow_unknown_subkeys = ["model", "optimizer", "env_config"] def _init(self): self.local_evaluator = A3CEvaluator( self.registry, self.env_creator, self.config, self.logdir, start_sampler=False) + if self.config["use_gpu_for_workers"]: + remote_cls = GPURemoteA3CEvaluator + else: + remote_cls = RemoteA3CEvaluator self.remote_evaluators = [ - RemoteA3CEvaluator.remote( + remote_cls.remote( self.registry, self.env_creator, self.config, self.logdir) for i in range(self.config["num_workers"])] self.optimizer = AsyncOptimizer( diff --git a/python/ray/rllib/a3c/a3c_evaluator.py b/python/ray/rllib/a3c/a3c_evaluator.py index e4864b5d1..95360bf76 100644 --- a/python/ray/rllib/a3c/a3c_evaluator.py +++ b/python/ray/rllib/a3c/a3c_evaluator.py @@ -29,7 +29,7 @@ class A3CEvaluator(Evaluator): def __init__( self, registry, env_creator, config, logdir, start_sampler=True): env = ModelCatalog.get_preprocessor_as_wrapper( - registry, env_creator(), config["model"]) + registry, env_creator(config["env_config"]), config["model"]) self.env = env policy_cls = get_policy_cls(config) # TODO(rliaw): should change this to be just env.observation_space @@ -116,3 +116,4 @@ class A3CEvaluator(Evaluator): RemoteA3CEvaluator = ray.remote(A3CEvaluator) +GPURemoteA3CEvaluator = ray.remote(num_gpus=1)(A3CEvaluator) diff --git a/python/ray/rllib/a3c/tfpolicy.py b/python/ray/rllib/a3c/tfpolicy.py index a2f5377cf..f311d7461 100644 --- a/python/ray/rllib/a3c/tfpolicy.py +++ b/python/ray/rllib/a3c/tfpolicy.py @@ -78,7 +78,8 @@ class TFPolicy(Policy): # TODO(rliaw): Can consider exposing these parameters self.sess = tf.Session(graph=self.g, config=tf.ConfigProto( - intra_op_parallelism_threads=1, inter_op_parallelism_threads=2)) + intra_op_parallelism_threads=1, inter_op_parallelism_threads=2, + gpu_options=tf.GPUOptions(allow_growth=True))) self.variables = ray.experimental.TensorFlowVariables(self.loss, self.sess) self.sess.run(tf.global_variables_initializer()) diff --git a/python/ray/rllib/agent.py b/python/ray/rllib/agent.py index 742c2cee1..92bcf4b48 100644 --- a/python/ray/rllib/agent.py +++ b/python/ray/rllib/agent.py @@ -46,8 +46,6 @@ def _deep_update(original, new_dict, new_keys_allowed, whitelist): if not new_keys_allowed: raise Exception( "Unknown config parameter `{}` ".format(k)) - else: - logger.warn("`{}` not in default configuration...".format(k)) if type(original.get(k)) is dict: if k in whitelist: _deep_update(original[k], value, True, []) @@ -98,9 +96,9 @@ class Agent(Trainable): self.env_creator = registry.get(ENV_CREATOR, env) else: import gym # soft dependency - self.env_creator = lambda: gym.make(env) + self.env_creator = lambda env_config: gym.make(env) else: - self.env_creator = lambda: None + self.env_creator = lambda env_config: None self.config = self._default_config.copy() self.registry = registry diff --git a/python/ray/rllib/dqn/dqn.py b/python/ray/rllib/dqn/dqn.py index eba14f018..432e8a3a6 100644 --- a/python/ray/rllib/dqn/dqn.py +++ b/python/ray/rllib/dqn/dqn.py @@ -28,6 +28,8 @@ DEFAULT_CONFIG = dict( model={}, # Discount factor for the MDP gamma=0.99, + # Arguments to pass to the env creator + env_config={}, # === Exploration === # Max num timesteps for annealing schedules. Exploration is annealed from @@ -107,7 +109,8 @@ DEFAULT_CONFIG = dict( class DQNAgent(Agent): _agent_name = "DQN" - _allow_unknown_subkeys = ["model", "optimizer", "tf_session_args"] + _allow_unknown_subkeys = [ + "model", "optimizer", "tf_session_args", "env_config"] _default_config = DEFAULT_CONFIG def _init(self): diff --git a/python/ray/rllib/dqn/dqn_evaluator.py b/python/ray/rllib/dqn/dqn_evaluator.py index 60388a68a..04dab1669 100644 --- a/python/ray/rllib/dqn/dqn_evaluator.py +++ b/python/ray/rllib/dqn/dqn_evaluator.py @@ -18,7 +18,7 @@ class DQNEvaluator(TFMultiGPUSupport): TODO(rliaw): Support observation/reward filters?""" def __init__(self, registry, env_creator, config, logdir): - env = env_creator() + env = env_creator(config["env_config"]) env = wrap_dqn(registry, env, config["model"]) self.env = env self.config = config diff --git a/python/ray/rllib/es/es.py b/python/ray/rllib/es/es.py index 28b1c8bb9..0e8d666d9 100644 --- a/python/ray/rllib/es/es.py +++ b/python/ray/rllib/es/es.py @@ -37,7 +37,8 @@ DEFAULT_CONFIG = dict( return_proc_mode="centered_rank", num_workers=10, stepsize=0.01, - observation_filter="MeanStdFilter") + observation_filter="MeanStdFilter", + env_config={}) @ray.remote @@ -70,7 +71,7 @@ class Worker(object): self.policy_params = policy_params self.noise = SharedNoiseTable(noise) - self.env = env_creator() + self.env = env_creator(config["env_config"]) self.preprocessor = ModelCatalog.get_preprocessor(registry, self.env) self.sess = utils.make_session(single_threaded=True) @@ -135,13 +136,14 @@ class Worker(object): class ESAgent(Agent): _agent_name = "ES" _default_config = DEFAULT_CONFIG + _allow_unknown_subkeys = ["env_config"] def _init(self): policy_params = { "action_noise_std": 0.01 } - env = self.env_creator() + env = self.env_creator(self.config["env_config"]) preprocessor = ModelCatalog.get_preprocessor(self.registry, env) self.sess = utils.make_session(single_threaded=False) diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py index cc87fd2a4..bd490cc0b 100644 --- a/python/ray/rllib/models/catalog.py +++ b/python/ray/rllib/models/catalog.py @@ -9,9 +9,7 @@ from ray.tune.registry import RLLIB_MODEL, RLLIB_PREPROCESSOR, \ from ray.rllib.models.action_dist import ( Categorical, Deterministic, DiagGaussian) -from ray.rllib.models.preprocessors import ( - NoPreprocessor, AtariRamPreprocessor, AtariPixelPreprocessor, - OneHotPreprocessor) +from ray.rllib.models.preprocessors import get_preprocessor from ray.rllib.models.fcnet import FullyConnectedNetwork from ray.rllib.models.visionnet import VisionNetwork @@ -48,9 +46,6 @@ class ModelCatalog(object): >>> action = dist.sample() """ - ATARI_OBS_SHAPE = (210, 160, 3) - ATARI_RAM_OBS_SHAPE = (128,) - @staticmethod def get_action_dist(action_space, dist_type=None): """Returns action distribution class and size for the given action space. @@ -147,40 +142,19 @@ class ModelCatalog(object): preprocessor (Preprocessor): Preprocessor for the env observations. """ - # For older gym versions that don't set shape for Discrete - if not hasattr(env.observation_space, "shape") and \ - isinstance(env.observation_space, gym.spaces.Discrete): - env.observation_space.shape = () - - obs_shape = env.observation_space.shape - for k in options.keys(): if k not in MODEL_CONFIGS: raise Exception( "Unknown config key `{}`, all keys: {}".format( k, MODEL_CONFIGS)) - print("Observation shape is {}".format(obs_shape)) - if "custom_preprocessor" in options: preprocessor = options["custom_preprocessor"] print("Using custom preprocessor {}".format(preprocessor)) return registry.get(RLLIB_PREPROCESSOR, preprocessor)( env.observation_space, options) - if obs_shape == (): - print("Using one-hot preprocessor for discrete envs.") - preprocessor = OneHotPreprocessor - elif obs_shape == ModelCatalog.ATARI_OBS_SHAPE: - print("Assuming Atari pixel env, using AtariPixelPreprocessor.") - preprocessor = AtariPixelPreprocessor - elif obs_shape == ModelCatalog.ATARI_RAM_OBS_SHAPE: - print("Assuming Atari ram env, using AtariRamPreprocessor.") - preprocessor = AtariRamPreprocessor - else: - print("Not using any observation preprocessor.") - preprocessor = NoPreprocessor - + preprocessor = get_preprocessor(env.observation_space) return preprocessor(env.observation_space, options) @staticmethod diff --git a/python/ray/rllib/models/preprocessors.py b/python/ray/rllib/models/preprocessors.py index d03e03e7c..e90a0d6d8 100644 --- a/python/ray/rllib/models/preprocessors.py +++ b/python/ray/rllib/models/preprocessors.py @@ -3,6 +3,10 @@ from __future__ import division from __future__ import print_function import cv2 import numpy as np +import gym + +ATARI_OBS_SHAPE = (210, 160, 3) +ATARI_RAM_OBS_SHAPE = (128,) class Preprocessor(object): @@ -13,6 +17,7 @@ class Preprocessor(object): """ def __init__(self, obs_space, options): + legacy_patch_shapes(obs_space) self._obs_space = obs_space self._options = options self._init() @@ -40,7 +45,6 @@ class AtariPixelPreprocessor(Preprocessor): if self._channel_major: self.shape = self.shape[-1:] + self.shape[:-1] - # TODO(ekl) why does this need to return an extra size-1 dim (the [None]) def transform(self, observation): """Downsamples images from (210, 160, 3) by the configured factor.""" scaled = observation[25:-25, :, :] @@ -64,7 +68,6 @@ class AtariPixelPreprocessor(Preprocessor): return scaled -# TODO(rliaw): Also should include the deepmind preprocessor class AtariRamPreprocessor(Preprocessor): def _init(self): self.shape = (128,) @@ -90,3 +93,75 @@ class NoPreprocessor(Preprocessor): def transform(self, observation): return observation + + +class TupleFlatteningPreprocessor(Preprocessor): + """Preprocesses each tuple element, then flattens it all into a vector. + + If desired, the vector output can be unpacked via tf.reshape() within a + custom model to handle each component separately. + """ + + def _init(self): + assert isinstance(self._obs_space, gym.spaces.Tuple) + size = 0 + self.preprocessors = [] + for i in range(len(self._obs_space.spaces)): + space = self._obs_space.spaces[i] + print("Creating sub-preprocessor for", space) + preprocessor = get_preprocessor(space)(space, self._options) + self.preprocessors.append(preprocessor) + size += np.product(preprocessor.shape) + self.shape = (size,) + + def transform(self, observation): + assert len(observation) == len(self.preprocessors), observation + return np.concatenate([ + np.reshape(p.transform(o), [np.product(p.shape)]) + for (o, p) in zip(observation, self.preprocessors)]) + + +def get_preprocessor(space): + """Returns an appropriate preprocessor class for the given space.""" + + legacy_patch_shapes(space) + obs_shape = space.shape + print("Observation shape is {}".format(obs_shape)) + + if obs_shape == (): + print("Using one-hot preprocessor for discrete envs.") + preprocessor = OneHotPreprocessor + elif obs_shape == ATARI_OBS_SHAPE: + print("Assuming Atari pixel env, using AtariPixelPreprocessor.") + preprocessor = AtariPixelPreprocessor + elif obs_shape == ATARI_RAM_OBS_SHAPE: + print("Assuming Atari ram env, using AtariRamPreprocessor.") + preprocessor = AtariRamPreprocessor + elif isinstance(space, gym.spaces.Tuple): + print("Using a TupleFlatteningPreprocessor") + preprocessor = TupleFlatteningPreprocessor + else: + print("Not using any observation preprocessor.") + preprocessor = NoPreprocessor + + return preprocessor + + +def legacy_patch_shapes(space): + """Assigns shapes to spaces that don't have shapes. + + This is only needed for older gym versions that don't set shapes properly + for Tuple and Discrete spaces. + """ + + if not hasattr(space, "shape"): + if isinstance(space, gym.spaces.Discrete): + space.shape = () + elif isinstance(space, gym.spaces.Tuple): + shapes = [] + for s in space.spaces: + shape = legacy_patch_shapes(s) + shapes.append(shape) + space.shape = tuple(shapes) + + return space.shape diff --git a/python/ray/rllib/ppo/ppo.py b/python/ray/rllib/ppo/ppo.py index d33490235..184988f22 100644 --- a/python/ray/rllib/ppo/ppo.py +++ b/python/ray/rllib/ppo/ppo.py @@ -78,12 +78,14 @@ DEFAULT_CONFIG = { "tf_debug_inf_or_nan": False, # If True, we write tensorflow logs and checkpoints "write_logs": True, + # Arguments to pass to the env creator + "env_config": {}, } class PPOAgent(Agent): _agent_name = "PPO" - _allow_unknown_subkeys = ["model", "tf_session_args"] + _allow_unknown_subkeys = ["model", "tf_session_args", "env_config"] _default_config = DEFAULT_CONFIG def _init(self): diff --git a/python/ray/rllib/ppo/ppo_evaluator.py b/python/ray/rllib/ppo/ppo_evaluator.py index 7fc37407d..e559b354d 100644 --- a/python/ray/rllib/ppo/ppo_evaluator.py +++ b/python/ray/rllib/ppo/ppo_evaluator.py @@ -43,7 +43,7 @@ class PPOEvaluator(Evaluator): self.config = config self.logdir = logdir self.env = ModelCatalog.get_preprocessor_as_wrapper( - registry, env_creator(), config["model"]) + registry, env_creator(config["env_config"]), config["model"]) if is_remote: config_proto = tf.ConfigProto() else: diff --git a/python/ray/rllib/test/test_catalog.py b/python/ray/rllib/test/test_catalog.py index 63cad29b4..5f3ac01f8 100644 --- a/python/ray/rllib/test/test_catalog.py +++ b/python/ray/rllib/test/test_catalog.py @@ -2,6 +2,7 @@ import gym import numpy as np import tensorflow as tf import unittest +from gym.spaces import Box, Discrete, Tuple import ray from ray.tune.registry import get_registry @@ -34,11 +35,25 @@ class ModelCatalogTest(unittest.TestCase): def testGymPreprocessors(self): p1 = ModelCatalog.get_preprocessor( get_registry(), gym.make("CartPole-v0")) - assert type(p1) == NoPreprocessor + self.assertEqual(type(p1), NoPreprocessor) p2 = ModelCatalog.get_preprocessor( get_registry(), gym.make("FrozenLake-v0")) - assert type(p2) == OneHotPreprocessor + self.assertEqual(type(p2), OneHotPreprocessor) + + def testTuplePreprocessor(self): + ray.init() + + class TupleEnv(object): + def __init__(self): + self.observation_space = Tuple( + [Discrete(5), Box(0, 1, shape=(3,))]) + p1 = ModelCatalog.get_preprocessor( + get_registry(), TupleEnv()) + self.assertEqual(p1.shape, (8,)) + self.assertEqual( + list(p1.transform((0, [1, 2, 3]))), + [float(x) for x in [1, 0, 0, 0, 0, 1, 2, 3]]) def testCustomPreprocessor(self): ray.init() @@ -47,12 +62,12 @@ class ModelCatalogTest(unittest.TestCase): env = gym.make("CartPole-v0") p1 = ModelCatalog.get_preprocessor( get_registry(), env, {"custom_preprocessor": "foo"}) - assert type(p1) == CustomPreprocessor + self.assertEqual(str(type(p1)), str(CustomPreprocessor)) p2 = ModelCatalog.get_preprocessor( get_registry(), env, {"custom_preprocessor": "bar"}) - assert type(p2) == CustomPreprocessor2 + self.assertEqual(str(type(p2)), str(CustomPreprocessor2)) p3 = ModelCatalog.get_preprocessor(get_registry(), env) - assert type(p3) == NoPreprocessor + self.assertEqual(type(p3), NoPreprocessor) def testDefaultModels(self): ray.init() @@ -60,19 +75,19 @@ class ModelCatalogTest(unittest.TestCase): with tf.variable_scope("test1"): p1 = ModelCatalog.get_model( get_registry(), np.zeros((10, 3), dtype=np.float32), 5) - assert type(p1) == FullyConnectedNetwork + self.assertEqual(type(p1), FullyConnectedNetwork) with tf.variable_scope("test2"): p2 = ModelCatalog.get_model( get_registry(), np.zeros((10, 80, 80, 3), dtype=np.float32), 5) - assert type(p2) == VisionNetwork + self.assertEqual(type(p2), VisionNetwork) def testCustomModel(self): ray.init() ModelCatalog.register_custom_model("foo", CustomModel) p1 = ModelCatalog.get_model( get_registry(), 1, 5, {"custom_model": "foo"}) - assert type(p1) == CustomModel + self.assertEqual(str(type(p1)), str(CustomModel)) if __name__ == "__main__": diff --git a/python/ray/rllib/utils/sampler.py b/python/ray/rllib/utils/sampler.py index 9ba65eb93..d846ca5ac 100644 --- a/python/ray/rllib/utils/sampler.py +++ b/python/ray/rllib/utils/sampler.py @@ -193,8 +193,12 @@ def _env_runner(env, policy, num_local_steps, horizon, obs_filter): terminal condition, and other fields as dictated by `policy`. """ last_observation = obs_filter(env.reset()) - horizon = horizon if horizon else env.spec.tags.get( - "wrapper_config.TimeLimit.max_episode_steps") + try: + horizon = horizon if horizon else env.spec.tags.get( + "wrapper_config.TimeLimit.max_episode_steps") + except Exception: + print("Warning, no horizon specified, assuming infinite") + horizon = 999999 assert horizon > 0 if hasattr(policy, "get_initial_features"): last_features = policy.get_initial_features() diff --git a/python/ray/tune/trial.py b/python/ray/tune/trial.py index 7958fda13..67b658a26 100644 --- a/python/ray/tune/trial.py +++ b/python/ray/tune/trial.py @@ -306,6 +306,8 @@ class Trial(object): def logger_creator(config): # Set the working dir in the remote process, for user file writes + if not os.path.exists(remote_logdir): + os.makedirs(remote_logdir) os.chdir(remote_logdir) return NoopLogger(config, remote_logdir)