[rllib] Reward clipping should default to off

2026-07-04 10:29:45 +08:00 · 2018-09-18 15:08:01 -07:00
parent 715ec1bca5
commit ab8348b1f5
4 changed files with 4 additions and 1 deletions
@@ -38,7 +38,7 @@ COMMON_CONFIG = {
    # Whether to synchronize the statistics of remote filters.
    "synchronize_filters": True,
    # Whether to clip rewards prior to experience postprocessing
-    "clip_rewards": True,
+    "clip_rewards": False,
    # Whether to use rllib or deepmind preprocessors
    "preprocessor_pref": "deepmind",
    # Arguments to pass to the env creator
@@ -10,6 +10,7 @@ atari-a2c:
    run: A2C
    config:
        sample_batch_size: 100
+        clip_rewards: True
        num_workers: 5
        num_envs_per_worker: 5
        gpu: true
@@ -13,6 +13,7 @@ atari-impala:
        train_batch_size: 500
        num_workers: 32
        num_envs_per_worker: 5
+        clip_rewards: True
        lr_schedule: [
            [0, 0.0005],
            [20000000, 0.000000000001],
@@ -12,6 +12,7 @@ atari-ppo:
        lambda: 0.95
        kl_coeff: 0.5
        clip_param: 0.1
+        clip_rewards: True
        entropy_coeff: 0.01
        train_batch_size: 5000
        sample_batch_size: 500