diff --git a/python/ray/rllib/agents/agent.py b/python/ray/rllib/agents/agent.py index df4dd9f59..12975d78f 100644 --- a/python/ray/rllib/agents/agent.py +++ b/python/ray/rllib/agents/agent.py @@ -41,8 +41,9 @@ COMMON_CONFIG = { "observation_filter": "NoFilter", # Whether to synchronize the statistics of remote filters. "synchronize_filters": True, - # Whether to clip rewards prior to experience postprocessing - "clip_rewards": False, + # Whether to clip rewards prior to experience postprocessing. Setting to + # None means clip for Atari only. + "clip_rewards": None, # Whether to use rllib or deepmind preprocessors "preprocessor_pref": "deepmind", # Arguments to pass to the env creator diff --git a/python/ray/rllib/evaluation/policy_evaluator.py b/python/ray/rllib/evaluation/policy_evaluator.py index f0ce96d9d..24eb74610 100644 --- a/python/ray/rllib/evaluation/policy_evaluator.py +++ b/python/ray/rllib/evaluation/policy_evaluator.py @@ -96,7 +96,7 @@ class PolicyEvaluator(EvaluatorInterface): compress_observations=False, num_envs=1, observation_filter="NoFilter", - clip_rewards=False, + clip_rewards=None, env_config=None, model_config=None, policy_config=None, @@ -150,7 +150,8 @@ class PolicyEvaluator(EvaluatorInterface): if the env already implements VectorEnv. observation_filter (str): Name of observation filter to use. clip_rewards (bool): Whether to clip rewards to [-1, 1] prior to - experience postprocessing. + experience postprocessing. Setting to None means clip for Atari + only. env_config (dict): Config to pass to the env creator. model_config (dict): Config to use when creating the policy model. policy_config (dict): Config to pass to the policy. In the @@ -186,6 +187,9 @@ class PolicyEvaluator(EvaluatorInterface): "custom_preprocessor" not in model_config and \ preprocessor_pref == "deepmind": + if clip_rewards is None: + clip_rewards = True + def wrap(env): env = wrap_deepmind( env,