From 7fc2ddbaf7cca05197db6b68278f426ca794d2f9 Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Sun, 26 Nov 2017 16:00:46 -0800 Subject: [PATCH] Revert "[rllib] Use NoFilter instead of MeanStdFilter for PPO. (#1082)" (#1255) This reverts commit 971becc9058544b58e635bb81b5f9115daca8f2e. --- python/ray/rllib/ppo/ppo.py | 2 +- python/ray/rllib/ppo/runner.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ray/rllib/ppo/ppo.py b/python/ray/rllib/ppo/ppo.py index f0518dc02..0dc4cc5b3 100644 --- a/python/ray/rllib/ppo/ppo.py +++ b/python/ray/rllib/ppo/ppo.py @@ -57,7 +57,7 @@ DEFAULT_CONFIG = { # Config params to pass to the model "model": {"free_log_std": False}, # Which observation filter to apply to the observation - "observation_filter": "NoFilter", + "observation_filter": "MeanStdFilter", # If >1, adds frameskip "extra_frameskip": 1, # Number of timesteps collected in each outer loop diff --git a/python/ray/rllib/ppo/runner.py b/python/ray/rllib/ppo/runner.py index c63d3ba91..5c4728986 100644 --- a/python/ray/rllib/ppo/runner.py +++ b/python/ray/rllib/ppo/runner.py @@ -145,7 +145,7 @@ class Runner(object): else: raise Exception("Unknown observation_filter: " + str(config["observation_filter"])) - self.reward_filter = NoFilter() + self.reward_filter = MeanStdFilter((), clip=5.0) self.sess.run(tf.global_variables_initializer()) def load_data(self, trajectories, full_trace):