[rllib] Changes for preprocessors (#1033)

* Changes for preprocessors

* removed comments

* Changes + push for lint

* linted

* adding dependency for travis

* linting won't pass

* reordering

* needed for testing

* added comments

* pip it

* pip dependencies
This commit is contained in:
Richard Liaw
2017-09-30 13:11:20 -07:00
committed by Philipp Moritz
parent 2d0f439b7b
commit 16e82b43d1
7 changed files with 67 additions and 59 deletions
+16 -7
View File
@@ -12,13 +12,18 @@ import ray
from ray.rllib.a3c.runner import RunnerThread, process_rollout
from ray.rllib.a3c.envs import create_env
from ray.rllib.common import Agent, TrainingResult
from ray.rllib.a3c.shared_model import SharedModel
from ray.rllib.a3c.shared_model_lstm import SharedModelLSTM
DEFAULT_CONFIG = {
"num_workers": 4,
"num_batches_per_iteration": 100,
"batch_size": 10
"batch_size": 10,
"use_lstm": True,
"model": {"grayscale": True,
"zero_mean": False,
"dim": 42}
}
@@ -28,8 +33,9 @@ class Runner(object):
The gradient computation is also executed from this object.
"""
def __init__(self, env_name, policy_cls, actor_id, batch_size, logdir):
env = create_env(env_name)
def __init__(self, env_name, policy_cls, actor_id, batch_size,
preprocess_config, logdir):
env = create_env(env_name, preprocess_config)
self.id = actor_id
# TODO(rliaw): should change this to be just env.observation_space
self.policy = policy_cls(env.observation_space.shape, env.action_space)
@@ -88,16 +94,19 @@ class Runner(object):
class A3CAgent(Agent):
def __init__(self, env_name, config,
policy_cls=SharedModelLSTM, upload_dir=None):
def __init__(self, env_name, config, upload_dir=None):
config.update({"alg": "A3C"})
Agent.__init__(self, env_name, config, upload_dir=upload_dir)
self.env = create_env(env_name)
self.env = create_env(env_name, config["model"])
if config.get("use_lstm", True):
policy_cls = SharedModelLSTM
else:
policy_cls = SharedModel
self.policy = policy_cls(
self.env.observation_space.shape, self.env.action_space)
self.agents = [
Runner.remote(env_name, policy_cls, i,
config["batch_size"], self.logdir)
config["batch_size"], config["model"], self.logdir)
for i in range(config["num_workers"])]
self.parameters = self.policy.get_weights()
+13 -35
View File
@@ -2,57 +2,35 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import gym
from gym.spaces.box import Box
import logging
import numpy as np
import time
from ray.rllib.models import ModelCatalog
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
def create_env(env_id):
def create_env(env_id, options):
env = gym.make(env_id)
if hasattr(env.env, "ale"):
env = AtariProcessing(env)
env = RLLibPreprocessing(env_id, env, options)
env = Diagnostic(env)
return env
def _process_frame42(frame):
frame = frame[34:(34 + 160), :160]
# Resize by half, then down to 42x42 (essentially mipmapping). If we resize
# directly we lose pixels that, when mapped to 42x42, aren't close enough
# to the pixel boundary.
frame = cv2.resize(frame, (80, 80))
frame = cv2.resize(frame, (42, 42))
frame = frame.mean(2)
frame = frame.astype(np.float32)
frame *= (1.0 / 255.0)
frame = np.reshape(frame, [42, 42, 1])
return frame
def _process_frame80(frame):
frame = frame[34:(34 + 160), :160]
# Resize by half, then down to 80x80.
frame = cv2.resize(frame, (80, 80))
frame = frame.mean(2)
frame = frame.astype(np.float32)
frame *= (1.0 / 255.0)
frame = np.reshape(frame, [80, 80, 1])
return frame
class AtariProcessing(gym.ObservationWrapper):
def __init__(self, env=None):
super(AtariProcessing, self).__init__(env)
self.observation_space = Box(0.0, 1.0, [42, 42, 1])
class RLLibPreprocessing(gym.ObservationWrapper):
def __init__(self, env_id, env=None, options=dict()):
super(RLLibPreprocessing, self).__init__(env)
self.preprocessor = ModelCatalog.get_preprocessor(
env_id, env.observation_space.shape, options)
self._process_shape = self.preprocessor.transform_shape(
env.observation_space.shape)
self.observation_space = Box(-1.0, 1.0, self._process_shape)
def _observation(self, observation):
return _process_frame42(observation)
return self.preprocessor.transform(observation).squeeze(0)
class Diagnostic(gym.Wrapper):
+8 -6
View File
@@ -13,12 +13,14 @@ from ray.rllib.models.visionnet import VisionNetwork
MODEL_CONFIGS = [
"conv_filters",
"downscale_factor",
"extra_frameskip",
"fcnet_activation",
"fcnet_hiddens",
"free_log_std"
"conv_filters", # Number of filters
"dim", # Dimension for ATARI
"grayscale", # Converts ATARI frame to 1 Channel Grayscale image
"zero_mean", # Changes frame to range from [-1, 1] if true
"extra_frameskip", # (int) for number of frames to skip
"fcnet_activation", # Nonlinearity for fully connected net (tanh, relu)
"fcnet_hiddens", # Number of hidden layers for fully connected net
"free_log_std" # Documented in ray.rllib.models.Model
]
+24 -6
View File
@@ -1,6 +1,8 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
class Preprocessor(object):
@@ -24,20 +26,36 @@ class Preprocessor(object):
class AtariPixelPreprocessor(Preprocessor):
def _init(self):
self.downscale_factor = self.options.get("downscale_factor", 2)
self.dim = int(160 / self.downscale_factor)
self.grayscale = self.options.get("grayscale", False)
self.zero_mean = self.options.get("zero_mean", True)
self.dim = self.options.get("dim", 80)
def transform_shape(self, obs_shape):
return (self.dim, self.dim, 3)
if self.grayscale:
return (self.dim, self.dim, 1)
else:
return (self.dim, self.dim, 3)
# TODO(ekl) why does this need to return an extra size-1 dim (the [None])
def transform(self, observation):
"""Downsamples images from (210, 160, 3) by the configured factor."""
scaled = observation[
25:-25:self.downscale_factor, ::self.downscale_factor, :][None]
return (scaled - 128) / 128
scaled = observation[25:-25, :, :]
if self.dim < 80:
scaled = cv2.resize(scaled, (80, 80))
scaled = cv2.resize(scaled, (self.dim, self.dim))
if self.grayscale:
scaled = scaled.mean(2)
scaled = scaled.astype(np.float32)
scaled = np.reshape(scaled, [self.dim, self.dim, 1])
scaled = scaled[None]
if self.zero_mean:
scaled = (scaled - 128) / 128
else:
scaled *= 1.0 / 255.0
return scaled
# TODO(rliaw): Also should include the deepmind preprocessor
class AtariRamPreprocessor(Preprocessor):
def transform_shape(self, obs_shape):
return (128,)