mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 13:37:39 +08:00
[rllib] Replay buffer for IMPALA should default to 0 slots. (#3971)
* disable replay * make lq configurable * leak test * Update run_multi_node_tests.sh
This commit is contained in:
@@ -64,7 +64,9 @@ DEFAULT_CONFIG = with_common_config({
|
||||
"replay_proportion": 0.0,
|
||||
# number of sample batches to store for replay. The number of transitions
|
||||
# saved total will be (replay_buffer_num_slots * sample_batch_size).
|
||||
"replay_buffer_num_slots": 100,
|
||||
"replay_buffer_num_slots": 0,
|
||||
# max queue size for train batches feeding into the learner
|
||||
"learner_queue_size": 16,
|
||||
# level of queuing for sampling.
|
||||
"max_sample_requests_in_flight_per_worker": 2,
|
||||
# max number of workers to broadcast one set of weights to
|
||||
|
||||
@@ -35,6 +35,7 @@ DEFAULT_CONFIG = with_base_config(impala.DEFAULT_CONFIG, {
|
||||
"num_sgd_iter": 1,
|
||||
"replay_proportion": 0.0,
|
||||
"replay_buffer_num_slots": 100,
|
||||
"learner_queue_size": 16,
|
||||
"max_sample_requests_in_flight_per_worker": 2,
|
||||
"broadcast_interval": 1,
|
||||
"grad_clip": 40.0,
|
||||
|
||||
@@ -24,7 +24,6 @@ from ray.rllib.utils.window_stat import WindowStat
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
LEARNER_QUEUE_MAX_SIZE = 16
|
||||
NUM_DATA_LOAD_THREADS = 16
|
||||
|
||||
|
||||
@@ -49,6 +48,7 @@ class AsyncSamplesOptimizer(PolicyOptimizer):
|
||||
broadcast_interval=1,
|
||||
num_sgd_iter=1,
|
||||
minibatch_buffer_size=1,
|
||||
learner_queue_size=16,
|
||||
_fake_gpus=False):
|
||||
self.learning_started = False
|
||||
self.train_batch_size = train_batch_size
|
||||
@@ -73,10 +73,12 @@ class AsyncSamplesOptimizer(PolicyOptimizer):
|
||||
num_data_loader_buffers=num_data_loader_buffers,
|
||||
minibatch_buffer_size=minibatch_buffer_size,
|
||||
num_sgd_iter=num_sgd_iter,
|
||||
learner_queue_size=learner_queue_size,
|
||||
_fake_gpus=_fake_gpus)
|
||||
else:
|
||||
self.learner = LearnerThread(self.local_evaluator,
|
||||
minibatch_buffer_size, num_sgd_iter)
|
||||
minibatch_buffer_size, num_sgd_iter,
|
||||
learner_queue_size)
|
||||
self.learner.start()
|
||||
|
||||
assert len(self.remote_evaluators) > 0
|
||||
@@ -230,11 +232,12 @@ class LearnerThread(threading.Thread):
|
||||
improves overall throughput.
|
||||
"""
|
||||
|
||||
def __init__(self, local_evaluator, minibatch_buffer_size, num_sgd_iter):
|
||||
def __init__(self, local_evaluator, minibatch_buffer_size, num_sgd_iter,
|
||||
learner_queue_size):
|
||||
threading.Thread.__init__(self)
|
||||
self.learner_queue_size = WindowStat("size", 50)
|
||||
self.local_evaluator = local_evaluator
|
||||
self.inqueue = queue.Queue(maxsize=LEARNER_QUEUE_MAX_SIZE)
|
||||
self.inqueue = queue.Queue(maxsize=learner_queue_size)
|
||||
self.outqueue = queue.Queue()
|
||||
self.minibatch_buffer = MinibatchBuffer(
|
||||
self.inqueue, minibatch_buffer_size, num_sgd_iter)
|
||||
@@ -275,12 +278,13 @@ class TFMultiGPULearner(LearnerThread):
|
||||
num_data_loader_buffers=1,
|
||||
minibatch_buffer_size=1,
|
||||
num_sgd_iter=1,
|
||||
learner_queue_size=16,
|
||||
_fake_gpus=False):
|
||||
# Multi-GPU requires TensorFlow to function.
|
||||
import tensorflow as tf
|
||||
|
||||
LearnerThread.__init__(self, local_evaluator, minibatch_buffer_size,
|
||||
num_sgd_iter)
|
||||
num_sgd_iter, learner_queue_size)
|
||||
self.lr = lr
|
||||
self.train_batch_size = train_batch_size
|
||||
if not num_gpus:
|
||||
|
||||
Reference in New Issue
Block a user