From 71b2dec3b4b20f85ef9719a96ae8298e6eac6b06 Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Sun, 5 May 2019 18:25:38 -0700 Subject: [PATCH] [rllib] Fix bounds of space returned by preprocessor.observation_space (#4736) --- doc/source/rllib-concepts.rst | 2 +- python/ray/rllib/models/preprocessors.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/source/rllib-concepts.rst b/doc/source/rllib-concepts.rst index 6a885c80f..d91a29f28 100644 --- a/doc/source/rllib-concepts.rst +++ b/doc/source/rllib-concepts.rst @@ -99,7 +99,7 @@ This is how the example in the previous section looks when written using a polic # this optimizer implements the IMPALA architecture optimizer = AsyncSamplesOptimizer( - local_evaluator, remote_evaluator, train_batch_size=500) + local_evaluator, remote_evaluators, train_batch_size=500) while True: optimizer.step() diff --git a/python/ray/rllib/models/preprocessors.py b/python/ray/rllib/models/preprocessors.py index 1375d8edf..e4fa435a0 100644 --- a/python/ray/rllib/models/preprocessors.py +++ b/python/ray/rllib/models/preprocessors.py @@ -73,7 +73,11 @@ class Preprocessor(object): @property @PublicAPI def observation_space(self): - obs_space = gym.spaces.Box(-1.0, 1.0, self.shape, dtype=np.float32) + obs_space = gym.spaces.Box( + np.finfo(np.float32).min, + np.finfo(np.float32).max, + self.shape, + dtype=np.float32) # Stash the unwrapped space so that we can unwrap dict and tuple spaces # automatically in model.py if (isinstance(self, TupleFlatteningPreprocessor)