From 6670880f03bc6856342b6304cbf72fe4a8d78e4c Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Thu, 16 Aug 2018 18:03:50 -0700 Subject: [PATCH] [rllib] Workaround actor creation hang edge case for ape-X (#2661) * apex hang * fix * move pyt to end --- python/ray/rllib/agents/dqn/dqn.py | 23 ++++++++++---- .../optimizers/async_replay_optimizer.py | 10 +++++-- test/jenkins_tests/run_multi_node_tests.sh | 30 +++++++++---------- 3 files changed, 41 insertions(+), 22 deletions(-) diff --git a/python/ray/rllib/agents/dqn/dqn.py b/python/ray/rllib/agents/dqn/dqn.py index 2678f9559..d88509b57 100644 --- a/python/ray/rllib/agents/dqn/dqn.py +++ b/python/ray/rllib/agents/dqn/dqn.py @@ -137,14 +137,27 @@ class DQNAgent(Agent): self.local_evaluator = self.make_local_evaluator( self.env_creator, self._policy_graph) - self.remote_evaluators = self.make_remote_evaluators( - self.env_creator, self._policy_graph, self.config["num_workers"], { - "num_cpus": self.config["num_cpus_per_worker"], - "num_gpus": self.config["num_gpus_per_worker"] - }) + + def create_remote_evaluators(): + return self.make_remote_evaluators( + self.env_creator, self._policy_graph, + self.config["num_workers"], { + "num_cpus": self.config["num_cpus_per_worker"], + "num_gpus": self.config["num_gpus_per_worker"] + }) + + if self.config["optimizer_class"] != "AsyncReplayOptimizer": + self.remote_evaluators = create_remote_evaluators() + else: + # Hack to workaround https://github.com/ray-project/ray/issues/2541 + self.remote_evaluators = None self.optimizer = getattr(optimizers, self.config["optimizer_class"])( self.local_evaluator, self.remote_evaluators, self.config["optimizer"]) + # Create the remote evaluators *after* the replay actors + if self.remote_evaluators is None: + self.remote_evaluators = create_remote_evaluators() + self.optimizer.set_evaluators(self.remote_evaluators) self.last_target_update_ts = 0 self.num_target_updates = 0 diff --git a/python/ray/rllib/optimizers/async_replay_optimizer.py b/python/ray/rllib/optimizers/async_replay_optimizer.py index 0037ea7a0..99dfc735f 100644 --- a/python/ray/rllib/optimizers/async_replay_optimizer.py +++ b/python/ray/rllib/optimizers/async_replay_optimizer.py @@ -27,7 +27,7 @@ REPLAY_QUEUE_DEPTH = 4 LEARNER_QUEUE_MAX_SIZE = 16 -@ray.remote +@ray.remote(num_cpus=0) class ReplayActor(object): """A replay buffer shard. @@ -175,7 +175,6 @@ class AsyncReplayOptimizer(PolicyOptimizer): train_batch_size, prioritized_replay_alpha, prioritized_replay_beta, prioritized_replay_eps, clip_rewards ], num_replay_buffer_shards) - assert len(self.remote_evaluators) > 0 # Stats self.timers = { @@ -199,6 +198,12 @@ class AsyncReplayOptimizer(PolicyOptimizer): # Kick off async background sampling self.sample_tasks = TaskPool() + if self.remote_evaluators: + self.set_evaluators(self.remote_evaluators) + + # For https://github.com/ray-project/ray/issues/2541 only + def set_evaluators(self, remote_evaluators): + self.remote_evaluators = remote_evaluators weights = self.local_evaluator.get_weights() for ev in self.remote_evaluators: ev.set_weights.remote(weights) @@ -207,6 +212,7 @@ class AsyncReplayOptimizer(PolicyOptimizer): self.sample_tasks.add(ev, ev.sample_with_count.remote()) def step(self): + assert len(self.remote_evaluators) > 0 start = time.time() sample_timesteps, train_timesteps = self._step() time_delta = time.time() - start diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh index 8965d33b1..a1a77177a 100755 --- a/test/jenkins_tests/run_multi_node_tests.sh +++ b/test/jenkins_tests/run_multi_node_tests.sh @@ -114,20 +114,6 @@ docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ --stop '{"training_iteration": 2}' \ --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "sgd_stepsize": 1e-4, "sgd_batchsize": 64, "timesteps_per_batch": 2000, "num_workers": 1, "model": {"dim": 40, "conv_filters": [[16, [8, 8], 4], [32, [4, 4], 2], [512, [5, 5], 1]]}}' -docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ - python /ray/python/ray/rllib/train.py \ - --env PongDeterministic-v4 \ - --run A3C \ - --stop '{"training_iteration": 2}' \ - --config '{"num_workers": 2, "use_pytorch": true, "model": {"use_lstm": false, "grayscale": true, "zero_mean": false, "dim": 80, "channel_major": true}}' - -docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ - python /ray/python/ray/rllib/train.py \ - --env CartPole-v1 \ - --run A3C \ - --stop '{"training_iteration": 2}' \ - --config '{"num_workers": 2, "use_pytorch": true}' - docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v1 \ @@ -285,6 +271,20 @@ docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ python /ray/python/ray/rllib/examples/multiagent_two_trainers.py --num-iters=2 +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ + python /ray/python/ray/rllib/train.py \ + --env PongDeterministic-v4 \ + --run A3C \ + --stop '{"training_iteration": 2}' \ + --config '{"num_workers": 2, "use_pytorch": true, "model": {"use_lstm": false, "grayscale": true, "zero_mean": false, "dim": 80, "channel_major": true}}' + +docker run -e "RAY_USE_XRAY=1" --rm --shm-size=10G --memory=10G $DOCKER_SHA \ + python /ray/python/ray/rllib/train.py \ + --env CartPole-v1 \ + --run A3C \ + --stop '{"training_iteration": 2}' \ + --config '{"num_workers": 2, "use_pytorch": true}' + python3 $ROOT_DIR/multi_node_docker_test.py \ --docker-image=$DOCKER_SHA \ --num-nodes=5 \ @@ -316,4 +316,4 @@ python3 $ROOT_DIR/multi_node_docker_test.py \ --mem-size=60G \ --shm-size=60G \ --use-raylet \ - --test-script=/ray/test/jenkins_tests/multi_node_tests/large_memory_test.py \ No newline at end of file + --test-script=/ray/test/jenkins_tests/multi_node_tests/large_memory_test.py