diff --git a/python/ray/rllib/optimizers/async_samples_optimizer.py b/python/ray/rllib/optimizers/async_samples_optimizer.py index 60b4eb691..541bcc1fa 100644 --- a/python/ray/rllib/optimizers/async_samples_optimizer.py +++ b/python/ray/rllib/optimizers/async_samples_optimizer.py @@ -167,7 +167,8 @@ class AsyncSamplesOptimizer(PolicyOptimizer): for b in self.batch_buffer) >= self.train_batch_size: train_batch = self.batch_buffer[0].concat_samples( self.batch_buffer) - self.learner.inqueue.put(train_batch) + # defensive copy against plasma ref count bugs, see #3884 + self.learner.inqueue.put(train_batch.copy()) self.batch_buffer = [] # If the batch was replayed, skip the update below.