From 16e82b43d1b4bfc7a8a7402a18c8e11a8943e4ea Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Sat, 30 Sep 2017 13:11:20 -0700 Subject: [PATCH] [rllib] Changes for preprocessors (#1033) * Changes for preprocessors * removed comments * Changes + push for lint * linted * adding dependency for travis * linting won't pass * reordering * needed for testing * added comments * pip it * pip dependencies --- .travis/install-dependencies.sh | 8 ++-- doc/requirements-doc.txt | 1 + python/ray/rllib/a3c/a3c.py | 23 +++++++---- python/ray/rllib/a3c/envs.py | 48 ++++++---------------- python/ray/rllib/models/catalog.py | 14 ++++--- python/ray/rllib/models/preprocessors.py | 30 +++++++++++--- test/jenkins_tests/run_multi_node_tests.sh | 2 +- 7 files changed, 67 insertions(+), 59 deletions(-) diff --git a/.travis/install-dependencies.sh b/.travis/install-dependencies.sh index cba8a1a47..8f44b138b 100755 --- a/.travis/install-dependencies.sh +++ b/.travis/install-dependencies.sh @@ -24,7 +24,7 @@ if [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "linux" ]]; then wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" - pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers + pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then sudo apt-get update sudo apt-get install -y cmake pkg-config python-dev python-numpy build-essential autoconf curl libtool libboost-dev libboost-filesystem-dev libboost-system-dev unzip @@ -32,7 +32,7 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" - pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers + pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then # check that brew is installed which -s brew @@ -48,7 +48,7 @@ elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then wget https://repo.continuum.io/miniconda/Miniconda2-latest-MacOSX-x86_64.sh -O miniconda.sh bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" - pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers + pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then # check that brew is installed which -s brew @@ -64,7 +64,7 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" - pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers + pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python elif [[ "$LINT" == "1" ]]; then sudo apt-get update sudo apt-get install -y cmake build-essential autoconf curl libtool libboost-dev libboost-filesystem-dev libboost-system-dev unzip diff --git a/doc/requirements-doc.txt b/doc/requirements-doc.txt index 72d1b96d9..51c9caae5 100644 --- a/doc/requirements-doc.txt +++ b/doc/requirements-doc.txt @@ -4,6 +4,7 @@ cloudpickle funcsigs mock numpy +opencv-python psutil recommonmark redis diff --git a/python/ray/rllib/a3c/a3c.py b/python/ray/rllib/a3c/a3c.py index b10d78c3d..79f2bdcef 100644 --- a/python/ray/rllib/a3c/a3c.py +++ b/python/ray/rllib/a3c/a3c.py @@ -12,13 +12,18 @@ import ray from ray.rllib.a3c.runner import RunnerThread, process_rollout from ray.rllib.a3c.envs import create_env from ray.rllib.common import Agent, TrainingResult +from ray.rllib.a3c.shared_model import SharedModel from ray.rllib.a3c.shared_model_lstm import SharedModelLSTM DEFAULT_CONFIG = { "num_workers": 4, "num_batches_per_iteration": 100, - "batch_size": 10 + "batch_size": 10, + "use_lstm": True, + "model": {"grayscale": True, + "zero_mean": False, + "dim": 42} } @@ -28,8 +33,9 @@ class Runner(object): The gradient computation is also executed from this object. """ - def __init__(self, env_name, policy_cls, actor_id, batch_size, logdir): - env = create_env(env_name) + def __init__(self, env_name, policy_cls, actor_id, batch_size, + preprocess_config, logdir): + env = create_env(env_name, preprocess_config) self.id = actor_id # TODO(rliaw): should change this to be just env.observation_space self.policy = policy_cls(env.observation_space.shape, env.action_space) @@ -88,16 +94,19 @@ class Runner(object): class A3CAgent(Agent): - def __init__(self, env_name, config, - policy_cls=SharedModelLSTM, upload_dir=None): + def __init__(self, env_name, config, upload_dir=None): config.update({"alg": "A3C"}) Agent.__init__(self, env_name, config, upload_dir=upload_dir) - self.env = create_env(env_name) + self.env = create_env(env_name, config["model"]) + if config.get("use_lstm", True): + policy_cls = SharedModelLSTM + else: + policy_cls = SharedModel self.policy = policy_cls( self.env.observation_space.shape, self.env.action_space) self.agents = [ Runner.remote(env_name, policy_cls, i, - config["batch_size"], self.logdir) + config["batch_size"], config["model"], self.logdir) for i in range(config["num_workers"])] self.parameters = self.policy.get_weights() diff --git a/python/ray/rllib/a3c/envs.py b/python/ray/rllib/a3c/envs.py index d8f82d8ef..f25f8e0a1 100644 --- a/python/ray/rllib/a3c/envs.py +++ b/python/ray/rllib/a3c/envs.py @@ -2,57 +2,35 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import cv2 import gym from gym.spaces.box import Box import logging -import numpy as np import time +from ray.rllib.models import ModelCatalog + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -def create_env(env_id): +def create_env(env_id, options): env = gym.make(env_id) - if hasattr(env.env, "ale"): - env = AtariProcessing(env) + env = RLLibPreprocessing(env_id, env, options) env = Diagnostic(env) return env -def _process_frame42(frame): - frame = frame[34:(34 + 160), :160] - # Resize by half, then down to 42x42 (essentially mipmapping). If we resize - # directly we lose pixels that, when mapped to 42x42, aren't close enough - # to the pixel boundary. - frame = cv2.resize(frame, (80, 80)) - frame = cv2.resize(frame, (42, 42)) - frame = frame.mean(2) - frame = frame.astype(np.float32) - frame *= (1.0 / 255.0) - frame = np.reshape(frame, [42, 42, 1]) - return frame - - -def _process_frame80(frame): - frame = frame[34:(34 + 160), :160] - # Resize by half, then down to 80x80. - frame = cv2.resize(frame, (80, 80)) - frame = frame.mean(2) - frame = frame.astype(np.float32) - frame *= (1.0 / 255.0) - frame = np.reshape(frame, [80, 80, 1]) - return frame - - -class AtariProcessing(gym.ObservationWrapper): - def __init__(self, env=None): - super(AtariProcessing, self).__init__(env) - self.observation_space = Box(0.0, 1.0, [42, 42, 1]) +class RLLibPreprocessing(gym.ObservationWrapper): + def __init__(self, env_id, env=None, options=dict()): + super(RLLibPreprocessing, self).__init__(env) + self.preprocessor = ModelCatalog.get_preprocessor( + env_id, env.observation_space.shape, options) + self._process_shape = self.preprocessor.transform_shape( + env.observation_space.shape) + self.observation_space = Box(-1.0, 1.0, self._process_shape) def _observation(self, observation): - return _process_frame42(observation) + return self.preprocessor.transform(observation).squeeze(0) class Diagnostic(gym.Wrapper): diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py index f5df8100b..99e48d67d 100644 --- a/python/ray/rllib/models/catalog.py +++ b/python/ray/rllib/models/catalog.py @@ -13,12 +13,14 @@ from ray.rllib.models.visionnet import VisionNetwork MODEL_CONFIGS = [ - "conv_filters", - "downscale_factor", - "extra_frameskip", - "fcnet_activation", - "fcnet_hiddens", - "free_log_std" + "conv_filters", # Number of filters + "dim", # Dimension for ATARI + "grayscale", # Converts ATARI frame to 1 Channel Grayscale image + "zero_mean", # Changes frame to range from [-1, 1] if true + "extra_frameskip", # (int) for number of frames to skip + "fcnet_activation", # Nonlinearity for fully connected net (tanh, relu) + "fcnet_hiddens", # Number of hidden layers for fully connected net + "free_log_std" # Documented in ray.rllib.models.Model ] diff --git a/python/ray/rllib/models/preprocessors.py b/python/ray/rllib/models/preprocessors.py index f42be3269..e0ee469f1 100644 --- a/python/ray/rllib/models/preprocessors.py +++ b/python/ray/rllib/models/preprocessors.py @@ -1,6 +1,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import cv2 +import numpy as np class Preprocessor(object): @@ -24,20 +26,36 @@ class Preprocessor(object): class AtariPixelPreprocessor(Preprocessor): def _init(self): - self.downscale_factor = self.options.get("downscale_factor", 2) - self.dim = int(160 / self.downscale_factor) + self.grayscale = self.options.get("grayscale", False) + self.zero_mean = self.options.get("zero_mean", True) + self.dim = self.options.get("dim", 80) def transform_shape(self, obs_shape): - return (self.dim, self.dim, 3) + if self.grayscale: + return (self.dim, self.dim, 1) + else: + return (self.dim, self.dim, 3) # TODO(ekl) why does this need to return an extra size-1 dim (the [None]) def transform(self, observation): """Downsamples images from (210, 160, 3) by the configured factor.""" - scaled = observation[ - 25:-25:self.downscale_factor, ::self.downscale_factor, :][None] - return (scaled - 128) / 128 + scaled = observation[25:-25, :, :] + if self.dim < 80: + scaled = cv2.resize(scaled, (80, 80)) + scaled = cv2.resize(scaled, (self.dim, self.dim)) + if self.grayscale: + scaled = scaled.mean(2) + scaled = scaled.astype(np.float32) + scaled = np.reshape(scaled, [self.dim, self.dim, 1]) + scaled = scaled[None] + if self.zero_mean: + scaled = (scaled - 128) / 128 + else: + scaled *= 1.0 / 255.0 + return scaled +# TODO(rliaw): Also should include the deepmind preprocessor class AtariRamPreprocessor(Preprocessor): def transform_shape(self, obs_shape): return (128,) diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh index 9bf440c7a..0dea1ee84 100755 --- a/test/jenkins_tests/run_multi_node_tests.sh +++ b/test/jenkins_tests/run_multi_node_tests.sh @@ -103,4 +103,4 @@ docker run --shm-size=10G --memory=10G $DOCKER_SHA \ --env MontezumaRevenge-v0 \ --alg PPO \ --num-iterations 2 \ - --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "sgd_stepsize": 1e-4, "sgd_batchsize": 64, "timesteps_per_batch": 2000, "num_workers": 1, "model": {"downscale_factor": 4, "conv_filters": [[16, [8, 8], 4], [32, [4, 4], 2], [512, [5, 5], 1]]}, "extra_frameskip": 4}' + --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "sgd_stepsize": 1e-4, "sgd_batchsize": 64, "timesteps_per_batch": 2000, "num_workers": 1, "model": {"dim": 40, "conv_filters": [[16, [8, 8], 4], [32, [4, 4], 2], [512, [5, 5], 1]]}, "extra_frameskip": 4}'