[rllib] Replay buffer for IMPALA should default to 0 slots. (#3971)

* disable replay

* make lq configurable

* leak test

* Update run_multi_node_tests.sh
This commit is contained in:
Eric Liang
2019-02-08 10:03:11 -08:00
committed by GitHub
parent 6a32b410bb
commit 29322c7389
4 changed files with 23 additions and 8 deletions
+3 -1
View File
@@ -64,7 +64,9 @@ DEFAULT_CONFIG = with_common_config({
"replay_proportion": 0.0,
# number of sample batches to store for replay. The number of transitions
# saved total will be (replay_buffer_num_slots * sample_batch_size).
"replay_buffer_num_slots": 100,
"replay_buffer_num_slots": 0,
# max queue size for train batches feeding into the learner
"learner_queue_size": 16,
# level of queuing for sampling.
"max_sample_requests_in_flight_per_worker": 2,
# max number of workers to broadcast one set of weights to
+1
View File
@@ -35,6 +35,7 @@ DEFAULT_CONFIG = with_base_config(impala.DEFAULT_CONFIG, {
"num_sgd_iter": 1,
"replay_proportion": 0.0,
"replay_buffer_num_slots": 100,
"learner_queue_size": 16,
"max_sample_requests_in_flight_per_worker": 2,
"broadcast_interval": 1,
"grad_clip": 40.0,
@@ -24,7 +24,6 @@ from ray.rllib.utils.window_stat import WindowStat
logger = logging.getLogger(__name__)
LEARNER_QUEUE_MAX_SIZE = 16
NUM_DATA_LOAD_THREADS = 16
@@ -49,6 +48,7 @@ class AsyncSamplesOptimizer(PolicyOptimizer):
broadcast_interval=1,
num_sgd_iter=1,
minibatch_buffer_size=1,
learner_queue_size=16,
_fake_gpus=False):
self.learning_started = False
self.train_batch_size = train_batch_size
@@ -73,10 +73,12 @@ class AsyncSamplesOptimizer(PolicyOptimizer):
num_data_loader_buffers=num_data_loader_buffers,
minibatch_buffer_size=minibatch_buffer_size,
num_sgd_iter=num_sgd_iter,
learner_queue_size=learner_queue_size,
_fake_gpus=_fake_gpus)
else:
self.learner = LearnerThread(self.local_evaluator,
minibatch_buffer_size, num_sgd_iter)
minibatch_buffer_size, num_sgd_iter,
learner_queue_size)
self.learner.start()
assert len(self.remote_evaluators) > 0
@@ -230,11 +232,12 @@ class LearnerThread(threading.Thread):
improves overall throughput.
"""
def __init__(self, local_evaluator, minibatch_buffer_size, num_sgd_iter):
def __init__(self, local_evaluator, minibatch_buffer_size, num_sgd_iter,
learner_queue_size):
threading.Thread.__init__(self)
self.learner_queue_size = WindowStat("size", 50)
self.local_evaluator = local_evaluator
self.inqueue = queue.Queue(maxsize=LEARNER_QUEUE_MAX_SIZE)
self.inqueue = queue.Queue(maxsize=learner_queue_size)
self.outqueue = queue.Queue()
self.minibatch_buffer = MinibatchBuffer(
self.inqueue, minibatch_buffer_size, num_sgd_iter)
@@ -275,12 +278,13 @@ class TFMultiGPULearner(LearnerThread):
num_data_loader_buffers=1,
minibatch_buffer_size=1,
num_sgd_iter=1,
learner_queue_size=16,
_fake_gpus=False):
# Multi-GPU requires TensorFlow to function.
import tensorflow as tf
LearnerThread.__init__(self, local_evaluator, minibatch_buffer_size,
num_sgd_iter)
num_sgd_iter, learner_queue_size)
self.lr = lr
self.train_batch_size = train_batch_size
if not num_gpus: