From e4bea8d10effa45d1fc5b5cb897a1305950880b2 Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Sun, 30 Sep 2018 18:37:55 -0700 Subject: [PATCH] [rllib] Default to truncate_episodes and add some more config validators (#2967) * update * link it * warn about truncation * fix * Update rllib-training.rst * deprecate tests failing --- python/ray/rllib/agents/ppo/ppo.py | 36 ++++++++++++------- .../ray/rllib/tuned_examples/hopper-ppo.yaml | 1 + .../tuned_examples/humanoid-ppo-gae.yaml | 1 + .../rllib/tuned_examples/humanoid-ppo.yaml | 1 + .../rllib/tuned_examples/pendulum-ppo.yaml | 2 +- .../regression_tests/cartpole-ppo.yaml | 1 + .../regression_tests/pendulum-ppo.yaml | 1 + .../rllib/tuned_examples/walker2d-ppo.yaml | 1 + test/jenkins_tests/run_multi_node_tests.sh | 8 +---- 9 files changed, 32 insertions(+), 20 deletions(-) diff --git a/python/ray/rllib/agents/ppo/ppo.py b/python/ray/rllib/agents/ppo/ppo.py index f452f7893..d2a991929 100644 --- a/python/ray/rllib/agents/ppo/ppo.py +++ b/python/ray/rllib/agents/ppo/ppo.py @@ -48,7 +48,7 @@ DEFAULT_CONFIG = with_common_config({ # Whether to allocate CPUs for workers (if > 0). "num_cpus_per_worker": 1, # Whether to rollout "complete_episodes" or "truncate_episodes" - "batch_mode": "complete_episodes", + "batch_mode": "truncate_episodes", # Which observation filter to apply to the observation "observation_filter": "MeanStdFilter", # Use the sync samples optimizer instead of the multi-gpu one @@ -80,17 +80,7 @@ class PPOAgent(Agent): extra_gpu=cf["num_gpus_per_worker"] * cf["num_workers"]) def _init(self): - waste_ratio = ( - self.config["sample_batch_size"] * self.config["num_workers"] / - self.config["train_batch_size"]) - if waste_ratio > 1: - msg = ("sample_batch_size * num_workers >> train_batch_size. " - "This means that many steps will be discarded. Consider " - "reducing sample_batch_size, or increase train_batch_size.") - if waste_ratio > 1.5: - raise ValueError(msg) - else: - print("Warning: " + msg) + self._validate_config() self.local_evaluator = self.make_local_evaluator( self.env_creator, self._policy_graph) self.remote_evaluators = self.make_remote_evaluators( @@ -114,6 +104,28 @@ class PPOAgent(Agent): "standardize_fields": ["advantages"], }) + def _validate_config(self): + waste_ratio = ( + self.config["sample_batch_size"] * self.config["num_workers"] / + self.config["train_batch_size"]) + if waste_ratio > 1: + msg = ("sample_batch_size * num_workers >> train_batch_size. " + "This means that many steps will be discarded. Consider " + "reducing sample_batch_size, or increase train_batch_size.") + if waste_ratio > 1.5: + raise ValueError(msg) + else: + print("Warning: " + msg) + if self.config["sgd_minibatch_size"] > self.config["train_batch_size"]: + raise ValueError( + "Minibatch size {} must be <= train batch size {}.".format( + self.config["sgd_minibatch_size"], + self.config["train_batch_size"])) + if (self.config["batch_mode"] == "truncate_episodes" + and not self.config["use_gae"]): + raise ValueError( + "Episode truncation is not supported without a value function") + def _train(self): prev_steps = self.optimizer.num_steps_sampled fetches = self.optimizer.step() diff --git a/python/ray/rllib/tuned_examples/hopper-ppo.yaml b/python/ray/rllib/tuned_examples/hopper-ppo.yaml index c1c75b166..5082dc792 100644 --- a/python/ray/rllib/tuned_examples/hopper-ppo.yaml +++ b/python/ray/rllib/tuned_examples/hopper-ppo.yaml @@ -10,3 +10,4 @@ hopper-ppo: train_batch_size: 160000 num_workers: 64 num_gpus: 4 + batch_mode: complete_episodes diff --git a/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml b/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml index e176dcae2..9473b5df7 100644 --- a/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml +++ b/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml @@ -17,3 +17,4 @@ humanoid-ppo-gae: free_log_std: true num_workers: 64 num_gpus: 4 + batch_mode: complete_episodes diff --git a/python/ray/rllib/tuned_examples/humanoid-ppo.yaml b/python/ray/rllib/tuned_examples/humanoid-ppo.yaml index 0608f8b60..07371d16f 100644 --- a/python/ray/rllib/tuned_examples/humanoid-ppo.yaml +++ b/python/ray/rllib/tuned_examples/humanoid-ppo.yaml @@ -15,3 +15,4 @@ humanoid-ppo: use_gae: false num_workers: 64 num_gpus: 4 + batch_mode: complete_episodes diff --git a/python/ray/rllib/tuned_examples/pendulum-ppo.yaml b/python/ray/rllib/tuned_examples/pendulum-ppo.yaml index 60df6825b..b8c0293a3 100644 --- a/python/ray/rllib/tuned_examples/pendulum-ppo.yaml +++ b/python/ray/rllib/tuned_examples/pendulum-ppo.yaml @@ -13,4 +13,4 @@ pendulum-ppo: num_sgd_iter: 10 model: fcnet_hiddens: [64, 64] - squash_to_range: True + batch_mode: complete_episodes diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml b/python/ray/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml index 425958e5c..82ea5846e 100644 --- a/python/ray/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml +++ b/python/ray/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml @@ -6,3 +6,4 @@ cartpole-ppo: time_total_s: 300 config: num_workers: 1 + batch_mode: complete_episodes diff --git a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml b/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml index 8b9d69fce..63536d3be 100644 --- a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml +++ b/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml @@ -15,3 +15,4 @@ pendulum-ppo: num_sgd_iter: 10 model: fcnet_hiddens: [64, 64] + batch_mode: complete_episodes diff --git a/python/ray/rllib/tuned_examples/walker2d-ppo.yaml b/python/ray/rllib/tuned_examples/walker2d-ppo.yaml index deb5a0038..9d64720a2 100644 --- a/python/ray/rllib/tuned_examples/walker2d-ppo.yaml +++ b/python/ray/rllib/tuned_examples/walker2d-ppo.yaml @@ -9,3 +9,4 @@ walker2d-v1-ppo: train_batch_size: 320000 num_workers: 64 num_gpus: 4 + batch_mode: complete_episodes diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh index e12eca455..43815f470 100755 --- a/test/jenkins_tests/run_multi_node_tests.sh +++ b/test/jenkins_tests/run_multi_node_tests.sh @@ -58,7 +58,7 @@ docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ --env CartPole-v1 \ --run PPO \ --stop '{"training_iteration": 2}' \ - --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "use_gae": false}' + --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "use_gae": false, "batch_mode": "complete_episodes"}' docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ @@ -288,12 +288,6 @@ docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/tune/examples/genetic_example.py \ --smoke-test -docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ - python /ray/python/ray/rllib/examples/legacy_multiagent/multiagent_mountaincar.py - -docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ - python /ray/python/ray/rllib/examples/legacy_multiagent/multiagent_pendulum.py - docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/examples/multiagent_cartpole.py --num-iters=2