diff --git a/doc/source/rllib-concepts.rst b/doc/source/rllib-concepts.rst index 6a885c80f..d91a29f28 100644 --- a/doc/source/rllib-concepts.rst +++ b/doc/source/rllib-concepts.rst @@ -99,7 +99,7 @@ This is how the example in the previous section looks when written using a polic # this optimizer implements the IMPALA architecture optimizer = AsyncSamplesOptimizer( - local_evaluator, remote_evaluator, train_batch_size=500) + local_evaluator, remote_evaluators, train_batch_size=500) while True: optimizer.step() diff --git a/python/ray/rllib/models/preprocessors.py b/python/ray/rllib/models/preprocessors.py index 1375d8edf..e4fa435a0 100644 --- a/python/ray/rllib/models/preprocessors.py +++ b/python/ray/rllib/models/preprocessors.py @@ -73,7 +73,11 @@ class Preprocessor(object): @property @PublicAPI def observation_space(self): - obs_space = gym.spaces.Box(-1.0, 1.0, self.shape, dtype=np.float32) + obs_space = gym.spaces.Box( + np.finfo(np.float32).min, + np.finfo(np.float32).max, + self.shape, + dtype=np.float32) # Stash the unwrapped space so that we can unwrap dict and tuple spaces # automatically in model.py if (isinstance(self, TupleFlatteningPreprocessor)