From 16e82b43d1b4bfc7a8a7402a18c8e11a8943e4ea Mon Sep 17 00:00:00 2001
From: Richard Liaw <rliaw@berkeley.edu>
Date: Sat, 30 Sep 2017 13:11:20 -0700
Subject: [PATCH] [rllib] Changes for preprocessors (#1033)

* Changes for preprocessors

* removed comments

* Changes + push for lint

* linted

* adding dependency for travis

* linting won't pass

* reordering

* needed for testing

* added comments

* pip it

* pip dependencies
---
 .travis/install-dependencies.sh            |  8 ++--
 doc/requirements-doc.txt                   |  1 +
 python/ray/rllib/a3c/a3c.py                | 23 +++++++----
 python/ray/rllib/a3c/envs.py               | 48 ++++++----------------
 python/ray/rllib/models/catalog.py         | 14 ++++---
 python/ray/rllib/models/preprocessors.py   | 30 +++++++++++---
 test/jenkins_tests/run_multi_node_tests.sh |  2 +-
 7 files changed, 67 insertions(+), 59 deletions(-)

diff --git a/.travis/install-dependencies.sh b/.travis/install-dependencies.sh
index cba8a1a47..8f44b138b 100755
--- a/.travis/install-dependencies.sh
+++ b/.travis/install-dependencies.sh
@@ -24,7 +24,7 @@ if [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "linux" ]]; then
   wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh
   bash miniconda.sh -b -p $HOME/miniconda
   export PATH="$HOME/miniconda/bin:$PATH"
-  pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers
+  pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python
 elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then
   sudo apt-get update
   sudo apt-get install -y cmake pkg-config python-dev python-numpy build-essential autoconf curl libtool libboost-dev libboost-filesystem-dev libboost-system-dev unzip
@@ -32,7 +32,7 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then
   wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
   bash miniconda.sh -b -p $HOME/miniconda
   export PATH="$HOME/miniconda/bin:$PATH"
-  pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers
+  pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python
 elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then
   # check that brew is installed
   which -s brew
@@ -48,7 +48,7 @@ elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then
   wget https://repo.continuum.io/miniconda/Miniconda2-latest-MacOSX-x86_64.sh -O miniconda.sh
   bash miniconda.sh -b -p $HOME/miniconda
   export PATH="$HOME/miniconda/bin:$PATH"
-  pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers
+  pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python
 elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then
   # check that brew is installed
   which -s brew
@@ -64,7 +64,7 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then
   wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh
   bash miniconda.sh -b -p $HOME/miniconda
   export PATH="$HOME/miniconda/bin:$PATH"
-  pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers
+  pip install numpy cloudpickle==0.3.0 cython cmake funcsigs click colorama psutil redis tensorflow gym flatbuffers opencv-python
 elif [[ "$LINT" == "1" ]]; then
   sudo apt-get update
   sudo apt-get install -y cmake build-essential autoconf curl libtool libboost-dev libboost-filesystem-dev libboost-system-dev unzip
diff --git a/doc/requirements-doc.txt b/doc/requirements-doc.txt
index 72d1b96d9..51c9caae5 100644
--- a/doc/requirements-doc.txt
+++ b/doc/requirements-doc.txt
@@ -4,6 +4,7 @@ cloudpickle
 funcsigs
 mock
 numpy
+opencv-python
 psutil
 recommonmark
 redis
diff --git a/python/ray/rllib/a3c/a3c.py b/python/ray/rllib/a3c/a3c.py
index b10d78c3d..79f2bdcef 100644
--- a/python/ray/rllib/a3c/a3c.py
+++ b/python/ray/rllib/a3c/a3c.py
@@ -12,13 +12,18 @@ import ray
 from ray.rllib.a3c.runner import RunnerThread, process_rollout
 from ray.rllib.a3c.envs import create_env
 from ray.rllib.common import Agent, TrainingResult
+from ray.rllib.a3c.shared_model import SharedModel
 from ray.rllib.a3c.shared_model_lstm import SharedModelLSTM
 
 
 DEFAULT_CONFIG = {
     "num_workers": 4,
     "num_batches_per_iteration": 100,
-    "batch_size": 10
+    "batch_size": 10,
+    "use_lstm": True,
+    "model": {"grayscale": True,
+              "zero_mean": False,
+              "dim": 42}
 }
 
 
@@ -28,8 +33,9 @@ class Runner(object):
 
     The gradient computation is also executed from this object.
     """
-    def __init__(self, env_name, policy_cls, actor_id, batch_size, logdir):
-        env = create_env(env_name)
+    def __init__(self, env_name, policy_cls, actor_id, batch_size,
+                 preprocess_config, logdir):
+        env = create_env(env_name, preprocess_config)
         self.id = actor_id
         # TODO(rliaw): should change this to be just env.observation_space
         self.policy = policy_cls(env.observation_space.shape, env.action_space)
@@ -88,16 +94,19 @@ class Runner(object):
 
 
 class A3CAgent(Agent):
-    def __init__(self, env_name, config,
-                 policy_cls=SharedModelLSTM, upload_dir=None):
+    def __init__(self, env_name, config, upload_dir=None):
         config.update({"alg": "A3C"})
         Agent.__init__(self, env_name, config, upload_dir=upload_dir)
-        self.env = create_env(env_name)
+        self.env = create_env(env_name, config["model"])
+        if config.get("use_lstm", True):
+            policy_cls = SharedModelLSTM
+        else:
+            policy_cls = SharedModel
         self.policy = policy_cls(
             self.env.observation_space.shape, self.env.action_space)
         self.agents = [
             Runner.remote(env_name, policy_cls, i,
-                          config["batch_size"], self.logdir)
+                          config["batch_size"], config["model"], self.logdir)
             for i in range(config["num_workers"])]
         self.parameters = self.policy.get_weights()
 
diff --git a/python/ray/rllib/a3c/envs.py b/python/ray/rllib/a3c/envs.py
index d8f82d8ef..f25f8e0a1 100644
--- a/python/ray/rllib/a3c/envs.py
+++ b/python/ray/rllib/a3c/envs.py
@@ -2,57 +2,35 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import cv2
 import gym
 from gym.spaces.box import Box
 import logging
-import numpy as np
 import time
 
+from ray.rllib.models import ModelCatalog
+
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
 
-def create_env(env_id):
+def create_env(env_id, options):
     env = gym.make(env_id)
-    if hasattr(env.env, "ale"):
-        env = AtariProcessing(env)
+    env = RLLibPreprocessing(env_id, env, options)
     env = Diagnostic(env)
     return env
 
 
-def _process_frame42(frame):
-    frame = frame[34:(34 + 160), :160]
-    # Resize by half, then down to 42x42 (essentially mipmapping). If we resize
-    # directly we lose pixels that, when mapped to 42x42, aren't close enough
-    # to the pixel boundary.
-    frame = cv2.resize(frame, (80, 80))
-    frame = cv2.resize(frame, (42, 42))
-    frame = frame.mean(2)
-    frame = frame.astype(np.float32)
-    frame *= (1.0 / 255.0)
-    frame = np.reshape(frame, [42, 42, 1])
-    return frame
-
-
-def _process_frame80(frame):
-    frame = frame[34:(34 + 160), :160]
-    # Resize by half, then down to 80x80.
-    frame = cv2.resize(frame, (80, 80))
-    frame = frame.mean(2)
-    frame = frame.astype(np.float32)
-    frame *= (1.0 / 255.0)
-    frame = np.reshape(frame, [80, 80, 1])
-    return frame
-
-
-class AtariProcessing(gym.ObservationWrapper):
-    def __init__(self, env=None):
-        super(AtariProcessing, self).__init__(env)
-        self.observation_space = Box(0.0, 1.0, [42, 42, 1])
+class RLLibPreprocessing(gym.ObservationWrapper):
+    def __init__(self, env_id, env=None, options=dict()):
+        super(RLLibPreprocessing, self).__init__(env)
+        self.preprocessor = ModelCatalog.get_preprocessor(
+            env_id, env.observation_space.shape, options)
+        self._process_shape = self.preprocessor.transform_shape(
+            env.observation_space.shape)
+        self.observation_space = Box(-1.0, 1.0, self._process_shape)
 
     def _observation(self, observation):
-        return _process_frame42(observation)
+        return self.preprocessor.transform(observation).squeeze(0)
 
 
 class Diagnostic(gym.Wrapper):
diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py
index f5df8100b..99e48d67d 100644
--- a/python/ray/rllib/models/catalog.py
+++ b/python/ray/rllib/models/catalog.py
@@ -13,12 +13,14 @@ from ray.rllib.models.visionnet import VisionNetwork
 
 
 MODEL_CONFIGS = [
-    "conv_filters",
-    "downscale_factor",
-    "extra_frameskip",
-    "fcnet_activation",
-    "fcnet_hiddens",
-    "free_log_std"
+    "conv_filters",  # Number of filters
+    "dim",  # Dimension for ATARI
+    "grayscale",  # Converts ATARI frame to 1 Channel Grayscale image
+    "zero_mean",  # Changes frame to range from [-1, 1] if true
+    "extra_frameskip",  # (int) for number of frames to skip
+    "fcnet_activation",  # Nonlinearity for fully connected net (tanh, relu)
+    "fcnet_hiddens",  # Number of hidden layers for fully connected net
+    "free_log_std"  # Documented in ray.rllib.models.Model
 ]
 
 
diff --git a/python/ray/rllib/models/preprocessors.py b/python/ray/rllib/models/preprocessors.py
index f42be3269..e0ee469f1 100644
--- a/python/ray/rllib/models/preprocessors.py
+++ b/python/ray/rllib/models/preprocessors.py
@@ -1,6 +1,8 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import cv2
+import numpy as np
 
 
 class Preprocessor(object):
@@ -24,20 +26,36 @@ class Preprocessor(object):
 
 class AtariPixelPreprocessor(Preprocessor):
     def _init(self):
-        self.downscale_factor = self.options.get("downscale_factor", 2)
-        self.dim = int(160 / self.downscale_factor)
+        self.grayscale = self.options.get("grayscale", False)
+        self.zero_mean = self.options.get("zero_mean", True)
+        self.dim = self.options.get("dim", 80)
 
     def transform_shape(self, obs_shape):
-        return (self.dim, self.dim, 3)
+        if self.grayscale:
+            return (self.dim, self.dim, 1)
+        else:
+            return (self.dim, self.dim, 3)
 
     # TODO(ekl) why does this need to return an extra size-1 dim (the [None])
     def transform(self, observation):
         """Downsamples images from (210, 160, 3) by the configured factor."""
-        scaled = observation[
-            25:-25:self.downscale_factor, ::self.downscale_factor, :][None]
-        return (scaled - 128) / 128
+        scaled = observation[25:-25, :, :]
+        if self.dim < 80:
+            scaled = cv2.resize(scaled, (80, 80))
+        scaled = cv2.resize(scaled, (self.dim, self.dim))
+        if self.grayscale:
+            scaled = scaled.mean(2)
+            scaled = scaled.astype(np.float32)
+            scaled = np.reshape(scaled, [self.dim, self.dim, 1])
+        scaled = scaled[None]
+        if self.zero_mean:
+            scaled = (scaled - 128) / 128
+        else:
+            scaled *= 1.0 / 255.0
+        return scaled
 
 
+# TODO(rliaw): Also should include the deepmind preprocessor
 class AtariRamPreprocessor(Preprocessor):
     def transform_shape(self, obs_shape):
         return (128,)
diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh
index 9bf440c7a..0dea1ee84 100755
--- a/test/jenkins_tests/run_multi_node_tests.sh
+++ b/test/jenkins_tests/run_multi_node_tests.sh
@@ -103,4 +103,4 @@ docker run --shm-size=10G --memory=10G $DOCKER_SHA \
     --env MontezumaRevenge-v0 \
     --alg PPO \
     --num-iterations 2 \
-    --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "sgd_stepsize": 1e-4, "sgd_batchsize": 64, "timesteps_per_batch": 2000, "num_workers": 1, "model": {"downscale_factor": 4, "conv_filters": [[16, [8, 8], 4], [32, [4, 4], 2], [512, [5, 5], 1]]}, "extra_frameskip": 4}'
+    --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "sgd_stepsize": 1e-4, "sgd_batchsize": 64, "timesteps_per_batch": 2000, "num_workers": 1, "model": {"dim": 40, "conv_filters": [[16, [8, 8], 4], [32, [4, 4], 2], [512, [5, 5], 1]]}, "extra_frameskip": 4}'