[rllib] Changes for preprocessors (#1033)

* Changes for preprocessors * removed comments * Changes + push for lint * linted * adding dependency for travis * linting won't pass * reordering * needed for testing * added comments * pip it * pip dependencies
2026-06-27 21:38:18 +08:00 · 2017-09-30 13:11:20 -07:00
parent 2d0f439b7b
commit 16e82b43d1
7 changed files with 67 additions and 59 deletions
@@ -12,13 +12,18 @@ import ray
 from ray.rllib.a3c.runner import RunnerThread, process_rollout
 from ray.rllib.a3c.envs import create_env
 from ray.rllib.common import Agent, TrainingResult
+from ray.rllib.a3c.shared_model import SharedModel
 from ray.rllib.a3c.shared_model_lstm import SharedModelLSTM


 DEFAULT_CONFIG = {
    "num_workers": 4,
    "num_batches_per_iteration": 100,
-    "batch_size": 10
+    "batch_size": 10,
+    "use_lstm": True,
+    "model": {"grayscale": True,
+              "zero_mean": False,
+              "dim": 42}
 }


@@ -28,8 +33,9 @@ class Runner(object):

    The gradient computation is also executed from this object.
    """
-    def __init__(self, env_name, policy_cls, actor_id, batch_size, logdir):
-        env = create_env(env_name)
+    def __init__(self, env_name, policy_cls, actor_id, batch_size,
+                 preprocess_config, logdir):
+        env = create_env(env_name, preprocess_config)
        self.id = actor_id
        # TODO(rliaw): should change this to be just env.observation_space
        self.policy = policy_cls(env.observation_space.shape, env.action_space)
@@ -88,16 +94,19 @@ class Runner(object):


 class A3CAgent(Agent):
-    def __init__(self, env_name, config,
-                 policy_cls=SharedModelLSTM, upload_dir=None):
+    def __init__(self, env_name, config, upload_dir=None):
        config.update({"alg": "A3C"})
        Agent.__init__(self, env_name, config, upload_dir=upload_dir)
-        self.env = create_env(env_name)
+        self.env = create_env(env_name, config["model"])
+        if config.get("use_lstm", True):
+            policy_cls = SharedModelLSTM
+        else:
+            policy_cls = SharedModel
        self.policy = policy_cls(
            self.env.observation_space.shape, self.env.action_space)
        self.agents = [
            Runner.remote(env_name, policy_cls, i,
-                          config["batch_size"], self.logdir)
+                          config["batch_size"], config["model"], self.logdir)
            for i in range(config["num_workers"])]
        self.parameters = self.policy.get_weights()

@@ -2,57 +2,35 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import cv2
 import gym
 from gym.spaces.box import Box
 import logging
-import numpy as np
 import time

+from ray.rllib.models import ModelCatalog
+
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)


-def create_env(env_id):
+def create_env(env_id, options):
    env = gym.make(env_id)
-    if hasattr(env.env, "ale"):
-        env = AtariProcessing(env)
+    env = RLLibPreprocessing(env_id, env, options)
    env = Diagnostic(env)
    return env


-def _process_frame42(frame):
-    frame = frame[34:(34 + 160), :160]
-    # Resize by half, then down to 42x42 (essentially mipmapping). If we resize
-    # directly we lose pixels that, when mapped to 42x42, aren't close enough
-    # to the pixel boundary.
-    frame = cv2.resize(frame, (80, 80))
-    frame = cv2.resize(frame, (42, 42))
-    frame = frame.mean(2)
-    frame = frame.astype(np.float32)
-    frame *= (1.0 / 255.0)
-    frame = np.reshape(frame, [42, 42, 1])
-    return frame
-
-
-def _process_frame80(frame):
-    frame = frame[34:(34 + 160), :160]
-    # Resize by half, then down to 80x80.
-    frame = cv2.resize(frame, (80, 80))
-    frame = frame.mean(2)
-    frame = frame.astype(np.float32)
-    frame *= (1.0 / 255.0)
-    frame = np.reshape(frame, [80, 80, 1])
-    return frame
-
-
-class AtariProcessing(gym.ObservationWrapper):
-    def __init__(self, env=None):
-        super(AtariProcessing, self).__init__(env)
-        self.observation_space = Box(0.0, 1.0, [42, 42, 1])
+class RLLibPreprocessing(gym.ObservationWrapper):
+    def __init__(self, env_id, env=None, options=dict()):
+        super(RLLibPreprocessing, self).__init__(env)
+        self.preprocessor = ModelCatalog.get_preprocessor(
+            env_id, env.observation_space.shape, options)
+        self._process_shape = self.preprocessor.transform_shape(
+            env.observation_space.shape)
+        self.observation_space = Box(-1.0, 1.0, self._process_shape)

    def _observation(self, observation):
-        return _process_frame42(observation)
+        return self.preprocessor.transform(observation).squeeze(0)


 class Diagnostic(gym.Wrapper):
@@ -13,12 +13,14 @@ from ray.rllib.models.visionnet import VisionNetwork


 MODEL_CONFIGS = [
-    "conv_filters",
-    "downscale_factor",
-    "extra_frameskip",
-    "fcnet_activation",
-    "fcnet_hiddens",
-    "free_log_std"
+    "conv_filters",  # Number of filters
+    "dim",  # Dimension for ATARI
+    "grayscale",  # Converts ATARI frame to 1 Channel Grayscale image
+    "zero_mean",  # Changes frame to range from [-1, 1] if true
+    "extra_frameskip",  # (int) for number of frames to skip
+    "fcnet_activation",  # Nonlinearity for fully connected net (tanh, relu)
+    "fcnet_hiddens",  # Number of hidden layers for fully connected net
+    "free_log_std"  # Documented in ray.rllib.models.Model
 ]


@@ -1,6 +1,8 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import cv2
+import numpy as np


 class Preprocessor(object):
@@ -24,20 +26,36 @@ class Preprocessor(object):

 class AtariPixelPreprocessor(Preprocessor):
    def _init(self):
-        self.downscale_factor = self.options.get("downscale_factor", 2)
-        self.dim = int(160 / self.downscale_factor)
+        self.grayscale = self.options.get("grayscale", False)
+        self.zero_mean = self.options.get("zero_mean", True)
+        self.dim = self.options.get("dim", 80)

    def transform_shape(self, obs_shape):
-        return (self.dim, self.dim, 3)
+        if self.grayscale:
+            return (self.dim, self.dim, 1)
+        else:
+            return (self.dim, self.dim, 3)

    # TODO(ekl) why does this need to return an extra size-1 dim (the [None])
    def transform(self, observation):
        """Downsamples images from (210, 160, 3) by the configured factor."""
-        scaled = observation[
-            25:-25:self.downscale_factor, ::self.downscale_factor, :][None]
-        return (scaled - 128) / 128
+        scaled = observation[25:-25, :, :]
+        if self.dim < 80:
+            scaled = cv2.resize(scaled, (80, 80))
+        scaled = cv2.resize(scaled, (self.dim, self.dim))
+        if self.grayscale:
+            scaled = scaled.mean(2)
+            scaled = scaled.astype(np.float32)
+            scaled = np.reshape(scaled, [self.dim, self.dim, 1])
+        scaled = scaled[None]
+        if self.zero_mean:
+            scaled = (scaled - 128) / 128
+        else:
+            scaled *= 1.0 / 255.0
+        return scaled


+# TODO(rliaw): Also should include the deepmind preprocessor
 class AtariRamPreprocessor(Preprocessor):
    def transform_shape(self, obs_shape):
        return (128,)