From 6b93ec30349d85b996c1ff9996b76ebec9bdaca5 Mon Sep 17 00:00:00 2001 From: Leon Sievers Date: Fri, 15 Mar 2019 03:46:02 +0100 Subject: [PATCH] Fixed calculation of num_steps_trained for multi_gpu_optimizer (#4364) --- python/ray/rllib/optimizers/multi_gpu_impl.py | 2 +- python/ray/rllib/optimizers/multi_gpu_optimizer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ray/rllib/optimizers/multi_gpu_impl.py b/python/ray/rllib/optimizers/multi_gpu_impl.py index 07bb9b886..b78c8c9e0 100644 --- a/python/ray/rllib/optimizers/multi_gpu_impl.py +++ b/python/ray/rllib/optimizers/multi_gpu_impl.py @@ -188,7 +188,7 @@ class LocalSyncParallelOptimizer(object): sess.run([t.init_op for t in self._towers], feed_dict=feed_dict) - tuples_per_device = truncated_len / len(self.devices) + tuples_per_device = truncated_len // len(self.devices) assert tuples_per_device > 0, "No data loaded?" assert tuples_per_device % self._loaded_per_device_batch_size == 0 return tuples_per_device diff --git a/python/ray/rllib/optimizers/multi_gpu_optimizer.py b/python/ray/rllib/optimizers/multi_gpu_optimizer.py index f9f662ef6..0defc8fe8 100644 --- a/python/ray/rllib/optimizers/multi_gpu_optimizer.py +++ b/python/ray/rllib/optimizers/multi_gpu_optimizer.py @@ -196,7 +196,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer): fetches[policy_id] = _averaged(iter_extra_fetches) self.num_steps_sampled += samples.count - self.num_steps_trained += samples.count + self.num_steps_trained += tuples_per_device * len(self.devices) return fetches @override(PolicyOptimizer)