mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 21:38:18 +08:00
[rllib] Changes for preprocessors (#1033)
* Changes for preprocessors * removed comments * Changes + push for lint * linted * adding dependency for travis * linting won't pass * reordering * needed for testing * added comments * pip it * pip dependencies
This commit is contained in:
committed by
Philipp Moritz
parent
2d0f439b7b
commit
16e82b43d1
@@ -12,13 +12,18 @@ import ray
|
||||
from ray.rllib.a3c.runner import RunnerThread, process_rollout
|
||||
from ray.rllib.a3c.envs import create_env
|
||||
from ray.rllib.common import Agent, TrainingResult
|
||||
from ray.rllib.a3c.shared_model import SharedModel
|
||||
from ray.rllib.a3c.shared_model_lstm import SharedModelLSTM
|
||||
|
||||
|
||||
DEFAULT_CONFIG = {
|
||||
"num_workers": 4,
|
||||
"num_batches_per_iteration": 100,
|
||||
"batch_size": 10
|
||||
"batch_size": 10,
|
||||
"use_lstm": True,
|
||||
"model": {"grayscale": True,
|
||||
"zero_mean": False,
|
||||
"dim": 42}
|
||||
}
|
||||
|
||||
|
||||
@@ -28,8 +33,9 @@ class Runner(object):
|
||||
|
||||
The gradient computation is also executed from this object.
|
||||
"""
|
||||
def __init__(self, env_name, policy_cls, actor_id, batch_size, logdir):
|
||||
env = create_env(env_name)
|
||||
def __init__(self, env_name, policy_cls, actor_id, batch_size,
|
||||
preprocess_config, logdir):
|
||||
env = create_env(env_name, preprocess_config)
|
||||
self.id = actor_id
|
||||
# TODO(rliaw): should change this to be just env.observation_space
|
||||
self.policy = policy_cls(env.observation_space.shape, env.action_space)
|
||||
@@ -88,16 +94,19 @@ class Runner(object):
|
||||
|
||||
|
||||
class A3CAgent(Agent):
|
||||
def __init__(self, env_name, config,
|
||||
policy_cls=SharedModelLSTM, upload_dir=None):
|
||||
def __init__(self, env_name, config, upload_dir=None):
|
||||
config.update({"alg": "A3C"})
|
||||
Agent.__init__(self, env_name, config, upload_dir=upload_dir)
|
||||
self.env = create_env(env_name)
|
||||
self.env = create_env(env_name, config["model"])
|
||||
if config.get("use_lstm", True):
|
||||
policy_cls = SharedModelLSTM
|
||||
else:
|
||||
policy_cls = SharedModel
|
||||
self.policy = policy_cls(
|
||||
self.env.observation_space.shape, self.env.action_space)
|
||||
self.agents = [
|
||||
Runner.remote(env_name, policy_cls, i,
|
||||
config["batch_size"], self.logdir)
|
||||
config["batch_size"], config["model"], self.logdir)
|
||||
for i in range(config["num_workers"])]
|
||||
self.parameters = self.policy.get_weights()
|
||||
|
||||
|
||||
@@ -2,57 +2,35 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import cv2
|
||||
import gym
|
||||
from gym.spaces.box import Box
|
||||
import logging
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
from ray.rllib.models import ModelCatalog
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
def create_env(env_id):
|
||||
def create_env(env_id, options):
|
||||
env = gym.make(env_id)
|
||||
if hasattr(env.env, "ale"):
|
||||
env = AtariProcessing(env)
|
||||
env = RLLibPreprocessing(env_id, env, options)
|
||||
env = Diagnostic(env)
|
||||
return env
|
||||
|
||||
|
||||
def _process_frame42(frame):
|
||||
frame = frame[34:(34 + 160), :160]
|
||||
# Resize by half, then down to 42x42 (essentially mipmapping). If we resize
|
||||
# directly we lose pixels that, when mapped to 42x42, aren't close enough
|
||||
# to the pixel boundary.
|
||||
frame = cv2.resize(frame, (80, 80))
|
||||
frame = cv2.resize(frame, (42, 42))
|
||||
frame = frame.mean(2)
|
||||
frame = frame.astype(np.float32)
|
||||
frame *= (1.0 / 255.0)
|
||||
frame = np.reshape(frame, [42, 42, 1])
|
||||
return frame
|
||||
|
||||
|
||||
def _process_frame80(frame):
|
||||
frame = frame[34:(34 + 160), :160]
|
||||
# Resize by half, then down to 80x80.
|
||||
frame = cv2.resize(frame, (80, 80))
|
||||
frame = frame.mean(2)
|
||||
frame = frame.astype(np.float32)
|
||||
frame *= (1.0 / 255.0)
|
||||
frame = np.reshape(frame, [80, 80, 1])
|
||||
return frame
|
||||
|
||||
|
||||
class AtariProcessing(gym.ObservationWrapper):
|
||||
def __init__(self, env=None):
|
||||
super(AtariProcessing, self).__init__(env)
|
||||
self.observation_space = Box(0.0, 1.0, [42, 42, 1])
|
||||
class RLLibPreprocessing(gym.ObservationWrapper):
|
||||
def __init__(self, env_id, env=None, options=dict()):
|
||||
super(RLLibPreprocessing, self).__init__(env)
|
||||
self.preprocessor = ModelCatalog.get_preprocessor(
|
||||
env_id, env.observation_space.shape, options)
|
||||
self._process_shape = self.preprocessor.transform_shape(
|
||||
env.observation_space.shape)
|
||||
self.observation_space = Box(-1.0, 1.0, self._process_shape)
|
||||
|
||||
def _observation(self, observation):
|
||||
return _process_frame42(observation)
|
||||
return self.preprocessor.transform(observation).squeeze(0)
|
||||
|
||||
|
||||
class Diagnostic(gym.Wrapper):
|
||||
|
||||
@@ -13,12 +13,14 @@ from ray.rllib.models.visionnet import VisionNetwork
|
||||
|
||||
|
||||
MODEL_CONFIGS = [
|
||||
"conv_filters",
|
||||
"downscale_factor",
|
||||
"extra_frameskip",
|
||||
"fcnet_activation",
|
||||
"fcnet_hiddens",
|
||||
"free_log_std"
|
||||
"conv_filters", # Number of filters
|
||||
"dim", # Dimension for ATARI
|
||||
"grayscale", # Converts ATARI frame to 1 Channel Grayscale image
|
||||
"zero_mean", # Changes frame to range from [-1, 1] if true
|
||||
"extra_frameskip", # (int) for number of frames to skip
|
||||
"fcnet_activation", # Nonlinearity for fully connected net (tanh, relu)
|
||||
"fcnet_hiddens", # Number of hidden layers for fully connected net
|
||||
"free_log_std" # Documented in ray.rllib.models.Model
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Preprocessor(object):
|
||||
@@ -24,20 +26,36 @@ class Preprocessor(object):
|
||||
|
||||
class AtariPixelPreprocessor(Preprocessor):
|
||||
def _init(self):
|
||||
self.downscale_factor = self.options.get("downscale_factor", 2)
|
||||
self.dim = int(160 / self.downscale_factor)
|
||||
self.grayscale = self.options.get("grayscale", False)
|
||||
self.zero_mean = self.options.get("zero_mean", True)
|
||||
self.dim = self.options.get("dim", 80)
|
||||
|
||||
def transform_shape(self, obs_shape):
|
||||
return (self.dim, self.dim, 3)
|
||||
if self.grayscale:
|
||||
return (self.dim, self.dim, 1)
|
||||
else:
|
||||
return (self.dim, self.dim, 3)
|
||||
|
||||
# TODO(ekl) why does this need to return an extra size-1 dim (the [None])
|
||||
def transform(self, observation):
|
||||
"""Downsamples images from (210, 160, 3) by the configured factor."""
|
||||
scaled = observation[
|
||||
25:-25:self.downscale_factor, ::self.downscale_factor, :][None]
|
||||
return (scaled - 128) / 128
|
||||
scaled = observation[25:-25, :, :]
|
||||
if self.dim < 80:
|
||||
scaled = cv2.resize(scaled, (80, 80))
|
||||
scaled = cv2.resize(scaled, (self.dim, self.dim))
|
||||
if self.grayscale:
|
||||
scaled = scaled.mean(2)
|
||||
scaled = scaled.astype(np.float32)
|
||||
scaled = np.reshape(scaled, [self.dim, self.dim, 1])
|
||||
scaled = scaled[None]
|
||||
if self.zero_mean:
|
||||
scaled = (scaled - 128) / 128
|
||||
else:
|
||||
scaled *= 1.0 / 255.0
|
||||
return scaled
|
||||
|
||||
|
||||
# TODO(rliaw): Also should include the deepmind preprocessor
|
||||
class AtariRamPreprocessor(Preprocessor):
|
||||
def transform_shape(self, obs_shape):
|
||||
return (128,)
|
||||
|
||||
Reference in New Issue
Block a user