diff --git a/python/ray/rllib/agents/impala/impala.py b/python/ray/rllib/agents/impala/impala.py index 9221e3764..94294a1fc 100644 --- a/python/ray/rllib/agents/impala/impala.py +++ b/python/ray/rllib/agents/impala/impala.py @@ -64,7 +64,9 @@ DEFAULT_CONFIG = with_common_config({ "replay_proportion": 0.0, # number of sample batches to store for replay. The number of transitions # saved total will be (replay_buffer_num_slots * sample_batch_size). - "replay_buffer_num_slots": 100, + "replay_buffer_num_slots": 0, + # max queue size for train batches feeding into the learner + "learner_queue_size": 16, # level of queuing for sampling. "max_sample_requests_in_flight_per_worker": 2, # max number of workers to broadcast one set of weights to diff --git a/python/ray/rllib/agents/ppo/appo.py b/python/ray/rllib/agents/ppo/appo.py index d64621d82..d5f9f4fa4 100644 --- a/python/ray/rllib/agents/ppo/appo.py +++ b/python/ray/rllib/agents/ppo/appo.py @@ -35,6 +35,7 @@ DEFAULT_CONFIG = with_base_config(impala.DEFAULT_CONFIG, { "num_sgd_iter": 1, "replay_proportion": 0.0, "replay_buffer_num_slots": 100, + "learner_queue_size": 16, "max_sample_requests_in_flight_per_worker": 2, "broadcast_interval": 1, "grad_clip": 40.0, diff --git a/python/ray/rllib/optimizers/async_samples_optimizer.py b/python/ray/rllib/optimizers/async_samples_optimizer.py index 60b4eb691..1af548af5 100644 --- a/python/ray/rllib/optimizers/async_samples_optimizer.py +++ b/python/ray/rllib/optimizers/async_samples_optimizer.py @@ -24,7 +24,6 @@ from ray.rllib.utils.window_stat import WindowStat logger = logging.getLogger(__name__) -LEARNER_QUEUE_MAX_SIZE = 16 NUM_DATA_LOAD_THREADS = 16 @@ -49,6 +48,7 @@ class AsyncSamplesOptimizer(PolicyOptimizer): broadcast_interval=1, num_sgd_iter=1, minibatch_buffer_size=1, + learner_queue_size=16, _fake_gpus=False): self.learning_started = False self.train_batch_size = train_batch_size @@ -73,10 +73,12 @@ class AsyncSamplesOptimizer(PolicyOptimizer): num_data_loader_buffers=num_data_loader_buffers, minibatch_buffer_size=minibatch_buffer_size, num_sgd_iter=num_sgd_iter, + learner_queue_size=learner_queue_size, _fake_gpus=_fake_gpus) else: self.learner = LearnerThread(self.local_evaluator, - minibatch_buffer_size, num_sgd_iter) + minibatch_buffer_size, num_sgd_iter, + learner_queue_size) self.learner.start() assert len(self.remote_evaluators) > 0 @@ -230,11 +232,12 @@ class LearnerThread(threading.Thread): improves overall throughput. """ - def __init__(self, local_evaluator, minibatch_buffer_size, num_sgd_iter): + def __init__(self, local_evaluator, minibatch_buffer_size, num_sgd_iter, + learner_queue_size): threading.Thread.__init__(self) self.learner_queue_size = WindowStat("size", 50) self.local_evaluator = local_evaluator - self.inqueue = queue.Queue(maxsize=LEARNER_QUEUE_MAX_SIZE) + self.inqueue = queue.Queue(maxsize=learner_queue_size) self.outqueue = queue.Queue() self.minibatch_buffer = MinibatchBuffer( self.inqueue, minibatch_buffer_size, num_sgd_iter) @@ -275,12 +278,13 @@ class TFMultiGPULearner(LearnerThread): num_data_loader_buffers=1, minibatch_buffer_size=1, num_sgd_iter=1, + learner_queue_size=16, _fake_gpus=False): # Multi-GPU requires TensorFlow to function. import tensorflow as tf LearnerThread.__init__(self, local_evaluator, minibatch_buffer_size, - num_sgd_iter) + num_sgd_iter, learner_queue_size) self.lr = lr self.train_batch_size = train_batch_size if not num_gpus: diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh index ce96887bb..573052fa1 100755 --- a/test/jenkins_tests/run_multi_node_tests.sh +++ b/test/jenkins_tests/run_multi_node_tests.sh @@ -214,14 +214,14 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ --env CartPole-v0 \ --run IMPALA \ --stop '{"training_iteration": 2}' \ - --config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "num_data_loader_buffers": 2, "replay_proportion": 1.0}' + --config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "num_data_loader_buffers": 2, "replay_buffer_num_slots": 100, "replay_proportion": 1.0}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ --env CartPole-v0 \ --run IMPALA \ --stop '{"training_iteration": 2}' \ - --config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "num_data_loader_buffers": 2, "replay_proportion": 1.0, "model": {"use_lstm": true}}' + --config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "num_data_loader_buffers": 2, "replay_buffer_num_slots": 100, "replay_proportion": 1.0, "model": {"use_lstm": true}}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ python /ray/python/ray/rllib/train.py \ @@ -448,6 +448,14 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ --stop '{"training_iteration": 2}' \ --config '{"num_workers": 2, "use_pytorch": true, "sample_async": false}' +docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ + python /ray/python/ray/rllib/train.py \ + --env PongDeterministic-v4 \ + --run IMPALA \ + --stop='{"timesteps_total": 40000}' \ + --ray-object-store-memory=500000000 \ + --config '{"num_workers": 1, "num_gpus": 0, "num_envs_per_worker": 64, "sample_batch_size": 50, "train_batch_size": 50, "learner_queue_size": 1}' + python3 $ROOT_DIR/multi_node_docker_test.py \ --docker-image=$DOCKER_SHA \ --num-nodes=5 \