[rllib] Auto-clip atari rewards

2026-06-27 23:08:32 +08:00 · 2018-09-24 12:55:11 -07:00
parent 8331d1ebe0
commit 75ef70afca
2 changed files with 9 additions and 4 deletions
@@ -41,8 +41,9 @@ COMMON_CONFIG = {
    "observation_filter": "NoFilter",
    # Whether to synchronize the statistics of remote filters.
    "synchronize_filters": True,
-    # Whether to clip rewards prior to experience postprocessing
-    "clip_rewards": False,
+    # Whether to clip rewards prior to experience postprocessing. Setting to
+    # None means clip for Atari only.
+    "clip_rewards": None,
    # Whether to use rllib or deepmind preprocessors
    "preprocessor_pref": "deepmind",
    # Arguments to pass to the env creator
@@ -96,7 +96,7 @@ class PolicyEvaluator(EvaluatorInterface):
                 compress_observations=False,
                 num_envs=1,
                 observation_filter="NoFilter",
-                 clip_rewards=False,
+                 clip_rewards=None,
                 env_config=None,
                 model_config=None,
                 policy_config=None,
@@ -150,7 +150,8 @@ class PolicyEvaluator(EvaluatorInterface):
                if the env already implements VectorEnv.
            observation_filter (str): Name of observation filter to use.
            clip_rewards (bool): Whether to clip rewards to [-1, 1] prior to
-                experience postprocessing.
+                experience postprocessing. Setting to None means clip for Atari
+                only.
            env_config (dict): Config to pass to the env creator.
            model_config (dict): Config to use when creating the policy model.
            policy_config (dict): Config to pass to the policy. In the
@@ -186,6 +187,9 @@ class PolicyEvaluator(EvaluatorInterface):
                "custom_preprocessor" not in model_config and \
                preprocessor_pref == "deepmind":

+            if clip_rewards is None:
+                clip_rewards = True
+
            def wrap(env):
                env = wrap_deepmind(
                    env,