mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 10:17:19 +08:00
[rllib] Set PPO observation filter to NoFilter by default (#4191)
This commit is contained in:
@@ -332,6 +332,7 @@ class Agent(Trainable):
|
||||
merged_config = deep_update(merged_config, config,
|
||||
self._allow_unknown_configs,
|
||||
self._allow_unknown_subkeys)
|
||||
self.raw_user_config = config
|
||||
self.config = merged_config
|
||||
Agent._validate_config(self.config)
|
||||
if self.config.get("log_level"):
|
||||
|
||||
@@ -51,7 +51,7 @@ DEFAULT_CONFIG = with_common_config({
|
||||
# Whether to rollout "complete_episodes" or "truncate_episodes"
|
||||
"batch_mode": "truncate_episodes",
|
||||
# Which observation filter to apply to the observation
|
||||
"observation_filter": "MeanStdFilter",
|
||||
"observation_filter": "NoFilter",
|
||||
# Uses the sync samples optimizer instead of the multi-gpu one. This does
|
||||
# not support minibatches.
|
||||
"simple_optimizer": False,
|
||||
@@ -99,6 +99,14 @@ class PPOAgent(Agent):
|
||||
|
||||
@override(Agent)
|
||||
def _train(self):
|
||||
if "observation_filter" not in self.raw_user_config:
|
||||
# TODO(ekl) remove this message after a few releases
|
||||
logger.info(
|
||||
"Important! Since 0.7.0, observation normalization is no "
|
||||
"longer enabled by default. To enable running-mean "
|
||||
"normalization, set 'observation_filter': 'MeanStdFilter'. "
|
||||
"You can ignore this message if your environment doesn't "
|
||||
"require observation normalization.")
|
||||
prev_steps = self.optimizer.num_steps_sampled
|
||||
fetches = self.optimizer.step()
|
||||
if "kl" in fetches:
|
||||
@@ -139,7 +147,6 @@ class PPOAgent(Agent):
|
||||
"{} iterations for your value ".format(rew_scale) +
|
||||
"function to converge. If this is not intended, consider "
|
||||
"increasing `vf_clip_param`.")
|
||||
|
||||
return res
|
||||
|
||||
def _validate_config(self):
|
||||
@@ -159,13 +166,7 @@ class PPOAgent(Agent):
|
||||
"In multi-agent mode, policies will be optimized sequentially "
|
||||
"by the multi-GPU optimizer. Consider setting "
|
||||
"simple_optimizer=True if this doesn't work for you.")
|
||||
if self.config["observation_filter"] != "NoFilter":
|
||||
logger.warning(
|
||||
"By default, observations will be normalized with {}. ".format(
|
||||
self.config["observation_filter"]) +
|
||||
"If you are using image or discrete type observations, "
|
||||
"consider disabling this with observation_filter=NoFilter.")
|
||||
if not self.config["vf_share_layers"]:
|
||||
logger.warning(
|
||||
"By default, the value function will NOT share layers with "
|
||||
"the policy model (vf_share_layers=False).")
|
||||
"FYI: By default, the value function will not share layers "
|
||||
"with the policy model ('vf_share_layers': False).")
|
||||
|
||||
@@ -20,3 +20,4 @@ halfcheetah-ppo:
|
||||
num_envs_per_worker:
|
||||
grid_search: [16, 32]
|
||||
batch_mode: truncate_episodes
|
||||
observation_filter: MeanStdFilter
|
||||
|
||||
@@ -11,3 +11,4 @@ hopper-ppo:
|
||||
num_workers: 64
|
||||
num_gpus: 4
|
||||
batch_mode: complete_episodes
|
||||
observation_filter: MeanStdFilter
|
||||
|
||||
@@ -18,3 +18,4 @@ humanoid-ppo-gae:
|
||||
num_workers: 64
|
||||
num_gpus: 4
|
||||
batch_mode: complete_episodes
|
||||
observation_filter: MeanStdFilter
|
||||
|
||||
@@ -16,3 +16,4 @@ humanoid-ppo:
|
||||
num_workers: 64
|
||||
num_gpus: 4
|
||||
batch_mode: complete_episodes
|
||||
observation_filter: MeanStdFilter
|
||||
|
||||
@@ -11,3 +11,4 @@ cartpole-ppo:
|
||||
grid_search: [1, 4]
|
||||
sgd_minibatch_size:
|
||||
grid_search: [128, 256, 512]
|
||||
observation_fliter: MeanStdFilter
|
||||
|
||||
@@ -15,3 +15,4 @@ pendulum-ppo:
|
||||
model:
|
||||
fcnet_hiddens: [64, 64]
|
||||
batch_mode: complete_episodes
|
||||
observation_fliter: MeanStdFilter
|
||||
|
||||
@@ -7,3 +7,4 @@ cartpole-ppo:
|
||||
config:
|
||||
num_workers: 1
|
||||
batch_mode: complete_episodes
|
||||
observation_filter: MeanStdFilter
|
||||
|
||||
@@ -17,3 +17,4 @@ pendulum-ppo:
|
||||
model:
|
||||
fcnet_hiddens: [64, 64]
|
||||
batch_mode: complete_episodes
|
||||
observation_filter: MeanStdFilter
|
||||
|
||||
@@ -10,3 +10,4 @@ walker2d-v1-ppo:
|
||||
num_workers: 64
|
||||
num_gpus: 4
|
||||
batch_mode: complete_episodes
|
||||
observation_filter: MeanStdFilter
|
||||
|
||||
Reference in New Issue
Block a user