[rllib] Add copy() in async samples optimizer to fix memory leak (#3938)

Fixes #3884.
This commit is contained in:
Eric Liang
2019-02-03 18:34:37 -08:00
committed by Richard Liaw
parent 8323419a6d
commit 7ef830bef1
@@ -167,7 +167,8 @@ class AsyncSamplesOptimizer(PolicyOptimizer):
for b in self.batch_buffer) >= self.train_batch_size:
train_batch = self.batch_buffer[0].concat_samples(
self.batch_buffer)
self.learner.inqueue.put(train_batch)
# defensive copy against plasma ref count bugs, see #3884
self.learner.inqueue.put(train_batch.copy())
self.batch_buffer = []
# If the batch was replayed, skip the update below.