diff --git a/python/ray/rllib/ppo/ppo.py b/python/ray/rllib/ppo/ppo.py index f0518dc02..0dc4cc5b3 100644 --- a/python/ray/rllib/ppo/ppo.py +++ b/python/ray/rllib/ppo/ppo.py @@ -57,7 +57,7 @@ DEFAULT_CONFIG = { # Config params to pass to the model "model": {"free_log_std": False}, # Which observation filter to apply to the observation - "observation_filter": "NoFilter", + "observation_filter": "MeanStdFilter", # If >1, adds frameskip "extra_frameskip": 1, # Number of timesteps collected in each outer loop diff --git a/python/ray/rllib/ppo/runner.py b/python/ray/rllib/ppo/runner.py index c63d3ba91..5c4728986 100644 --- a/python/ray/rllib/ppo/runner.py +++ b/python/ray/rllib/ppo/runner.py @@ -145,7 +145,7 @@ class Runner(object): else: raise Exception("Unknown observation_filter: " + str(config["observation_filter"])) - self.reward_filter = NoFilter() + self.reward_filter = MeanStdFilter((), clip=5.0) self.sess.run(tf.global_variables_initializer()) def load_data(self, trajectories, full_trace):