diff --git a/python/ray/rllib/ppo/ppo.py b/python/ray/rllib/ppo/ppo.py index cb0a2edb5..ec6384a6a 100644 --- a/python/ray/rllib/ppo/ppo.py +++ b/python/ray/rllib/ppo/ppo.py @@ -56,7 +56,7 @@ DEFAULT_CONFIG = { # Config params to pass to the model "model": {"free_log_std": False}, # Which observation filter to apply to the observation - "observation_filter": "MeanStdFilter", + "observation_filter": "NoFilter", # If >1, adds frameskip "extra_frameskip": 1, # Number of timesteps collected in each outer loop diff --git a/python/ray/rllib/ppo/runner.py b/python/ray/rllib/ppo/runner.py index e1d7dfdc4..eb046c136 100644 --- a/python/ray/rllib/ppo/runner.py +++ b/python/ray/rllib/ppo/runner.py @@ -148,7 +148,7 @@ class Runner(object): else: raise Exception("Unknown observation_filter: " + str(config["observation_filter"])) - self.reward_filter = MeanStdFilter((), clip=5.0) + self.reward_filter = NoFilter() self.sess.run(tf.global_variables_initializer()) def load_data(self, trajectories, full_trace):