[rllib] Auto-clip atari rewards

This commit is contained in:
Eric Liang
2018-09-24 12:55:11 -07:00
committed by GitHub
parent 8331d1ebe0
commit 75ef70afca
2 changed files with 9 additions and 4 deletions
+3 -2
View File
@@ -41,8 +41,9 @@ COMMON_CONFIG = {
"observation_filter": "NoFilter",
# Whether to synchronize the statistics of remote filters.
"synchronize_filters": True,
# Whether to clip rewards prior to experience postprocessing
"clip_rewards": False,
# Whether to clip rewards prior to experience postprocessing. Setting to
# None means clip for Atari only.
"clip_rewards": None,
# Whether to use rllib or deepmind preprocessors
"preprocessor_pref": "deepmind",
# Arguments to pass to the env creator
@@ -96,7 +96,7 @@ class PolicyEvaluator(EvaluatorInterface):
compress_observations=False,
num_envs=1,
observation_filter="NoFilter",
clip_rewards=False,
clip_rewards=None,
env_config=None,
model_config=None,
policy_config=None,
@@ -150,7 +150,8 @@ class PolicyEvaluator(EvaluatorInterface):
if the env already implements VectorEnv.
observation_filter (str): Name of observation filter to use.
clip_rewards (bool): Whether to clip rewards to [-1, 1] prior to
experience postprocessing.
experience postprocessing. Setting to None means clip for Atari
only.
env_config (dict): Config to pass to the env creator.
model_config (dict): Config to use when creating the policy model.
policy_config (dict): Config to pass to the policy. In the
@@ -186,6 +187,9 @@ class PolicyEvaluator(EvaluatorInterface):
"custom_preprocessor" not in model_config and \
preprocessor_pref == "deepmind":
if clip_rewards is None:
clip_rewards = True
def wrap(env):
env = wrap_deepmind(
env,