From b5799b528654daa6afe582295b0c006e793d68fe Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Fri, 1 Mar 2019 13:19:33 -0800 Subject: [PATCH] [rllib] Set PPO observation filter to NoFilter by default (#4191) --- python/ray/rllib/agents/agent.py | 1 + python/ray/rllib/agents/ppo/ppo.py | 21 ++++++++++--------- .../rllib/tuned_examples/halfcheetah-ppo.yaml | 1 + .../ray/rllib/tuned_examples/hopper-ppo.yaml | 1 + .../tuned_examples/humanoid-ppo-gae.yaml | 1 + .../rllib/tuned_examples/humanoid-ppo.yaml | 1 + .../tuned_examples/hyperband-cartpole.yaml | 1 + .../rllib/tuned_examples/pendulum-ppo.yaml | 1 + .../regression_tests/cartpole-ppo.yaml | 1 + .../regression_tests/pendulum-ppo.yaml | 1 + .../rllib/tuned_examples/walker2d-ppo.yaml | 1 + 11 files changed, 21 insertions(+), 10 deletions(-) diff --git a/python/ray/rllib/agents/agent.py b/python/ray/rllib/agents/agent.py index 25ecd4ae8..829c020f5 100644 --- a/python/ray/rllib/agents/agent.py +++ b/python/ray/rllib/agents/agent.py @@ -332,6 +332,7 @@ class Agent(Trainable): merged_config = deep_update(merged_config, config, self._allow_unknown_configs, self._allow_unknown_subkeys) + self.raw_user_config = config self.config = merged_config Agent._validate_config(self.config) if self.config.get("log_level"): diff --git a/python/ray/rllib/agents/ppo/ppo.py b/python/ray/rllib/agents/ppo/ppo.py index 3798f63ff..790aa4b65 100644 --- a/python/ray/rllib/agents/ppo/ppo.py +++ b/python/ray/rllib/agents/ppo/ppo.py @@ -51,7 +51,7 @@ DEFAULT_CONFIG = with_common_config({ # Whether to rollout "complete_episodes" or "truncate_episodes" "batch_mode": "truncate_episodes", # Which observation filter to apply to the observation - "observation_filter": "MeanStdFilter", + "observation_filter": "NoFilter", # Uses the sync samples optimizer instead of the multi-gpu one. This does # not support minibatches. "simple_optimizer": False, @@ -99,6 +99,14 @@ class PPOAgent(Agent): @override(Agent) def _train(self): + if "observation_filter" not in self.raw_user_config: + # TODO(ekl) remove this message after a few releases + logger.info( + "Important! Since 0.7.0, observation normalization is no " + "longer enabled by default. To enable running-mean " + "normalization, set 'observation_filter': 'MeanStdFilter'. " + "You can ignore this message if your environment doesn't " + "require observation normalization.") prev_steps = self.optimizer.num_steps_sampled fetches = self.optimizer.step() if "kl" in fetches: @@ -139,7 +147,6 @@ class PPOAgent(Agent): "{} iterations for your value ".format(rew_scale) + "function to converge. If this is not intended, consider " "increasing `vf_clip_param`.") - return res def _validate_config(self): @@ -159,13 +166,7 @@ class PPOAgent(Agent): "In multi-agent mode, policies will be optimized sequentially " "by the multi-GPU optimizer. Consider setting " "simple_optimizer=True if this doesn't work for you.") - if self.config["observation_filter"] != "NoFilter": - logger.warning( - "By default, observations will be normalized with {}. ".format( - self.config["observation_filter"]) + - "If you are using image or discrete type observations, " - "consider disabling this with observation_filter=NoFilter.") if not self.config["vf_share_layers"]: logger.warning( - "By default, the value function will NOT share layers with " - "the policy model (vf_share_layers=False).") + "FYI: By default, the value function will not share layers " + "with the policy model ('vf_share_layers': False).") diff --git a/python/ray/rllib/tuned_examples/halfcheetah-ppo.yaml b/python/ray/rllib/tuned_examples/halfcheetah-ppo.yaml index d154e7c29..60cbd03dc 100644 --- a/python/ray/rllib/tuned_examples/halfcheetah-ppo.yaml +++ b/python/ray/rllib/tuned_examples/halfcheetah-ppo.yaml @@ -20,3 +20,4 @@ halfcheetah-ppo: num_envs_per_worker: grid_search: [16, 32] batch_mode: truncate_episodes + observation_filter: MeanStdFilter diff --git a/python/ray/rllib/tuned_examples/hopper-ppo.yaml b/python/ray/rllib/tuned_examples/hopper-ppo.yaml index 5082dc792..c73d4480e 100644 --- a/python/ray/rllib/tuned_examples/hopper-ppo.yaml +++ b/python/ray/rllib/tuned_examples/hopper-ppo.yaml @@ -11,3 +11,4 @@ hopper-ppo: num_workers: 64 num_gpus: 4 batch_mode: complete_episodes + observation_filter: MeanStdFilter diff --git a/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml b/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml index 9473b5df7..c9ddbd017 100644 --- a/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml +++ b/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml @@ -18,3 +18,4 @@ humanoid-ppo-gae: num_workers: 64 num_gpus: 4 batch_mode: complete_episodes + observation_filter: MeanStdFilter diff --git a/python/ray/rllib/tuned_examples/humanoid-ppo.yaml b/python/ray/rllib/tuned_examples/humanoid-ppo.yaml index 07371d16f..b531531e9 100644 --- a/python/ray/rllib/tuned_examples/humanoid-ppo.yaml +++ b/python/ray/rllib/tuned_examples/humanoid-ppo.yaml @@ -16,3 +16,4 @@ humanoid-ppo: num_workers: 64 num_gpus: 4 batch_mode: complete_episodes + observation_filter: MeanStdFilter diff --git a/python/ray/rllib/tuned_examples/hyperband-cartpole.yaml b/python/ray/rllib/tuned_examples/hyperband-cartpole.yaml index 64d5571db..8c0a510bc 100644 --- a/python/ray/rllib/tuned_examples/hyperband-cartpole.yaml +++ b/python/ray/rllib/tuned_examples/hyperband-cartpole.yaml @@ -11,3 +11,4 @@ cartpole-ppo: grid_search: [1, 4] sgd_minibatch_size: grid_search: [128, 256, 512] + observation_fliter: MeanStdFilter diff --git a/python/ray/rllib/tuned_examples/pendulum-ppo.yaml b/python/ray/rllib/tuned_examples/pendulum-ppo.yaml index 3e9d45179..2dafa9300 100644 --- a/python/ray/rllib/tuned_examples/pendulum-ppo.yaml +++ b/python/ray/rllib/tuned_examples/pendulum-ppo.yaml @@ -15,3 +15,4 @@ pendulum-ppo: model: fcnet_hiddens: [64, 64] batch_mode: complete_episodes + observation_fliter: MeanStdFilter diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml b/python/ray/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml index 3f326cf83..d34b35280 100644 --- a/python/ray/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml +++ b/python/ray/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml @@ -7,3 +7,4 @@ cartpole-ppo: config: num_workers: 1 batch_mode: complete_episodes + observation_filter: MeanStdFilter diff --git a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml b/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml index 015429110..b6bfbaf79 100644 --- a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml +++ b/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml @@ -17,3 +17,4 @@ pendulum-ppo: model: fcnet_hiddens: [64, 64] batch_mode: complete_episodes + observation_filter: MeanStdFilter diff --git a/python/ray/rllib/tuned_examples/walker2d-ppo.yaml b/python/ray/rllib/tuned_examples/walker2d-ppo.yaml index 9d64720a2..a88589ebf 100644 --- a/python/ray/rllib/tuned_examples/walker2d-ppo.yaml +++ b/python/ray/rllib/tuned_examples/walker2d-ppo.yaml @@ -10,3 +10,4 @@ walker2d-v1-ppo: num_workers: 64 num_gpus: 4 batch_mode: complete_episodes + observation_filter: MeanStdFilter