diff --git a/python/ray/rllib/a3c/a3c.py b/python/ray/rllib/a3c/a3c.py index c3813ac2d..8af5eb82c 100644 --- a/python/ray/rllib/a3c/a3c.py +++ b/python/ray/rllib/a3c/a3c.py @@ -139,7 +139,10 @@ class A3CAgent(Agent): self.optimizer.step() FilterManager.synchronize( self.local_evaluator.filters, self.remote_evaluators) - return collect_metrics(self.local_evaluator, self.remote_evaluators) + result = collect_metrics(self.local_evaluator, self.remote_evaluators) + result = result._replace( + info=self.optimizer.stats()) + return result def _stop(self): # workaround for https://github.com/ray-project/ray/issues/1516 diff --git a/python/ray/rllib/optimizers/async_gradients_optimizer.py b/python/ray/rllib/optimizers/async_gradients_optimizer.py index 7ec11e2a6..e207162c2 100644 --- a/python/ray/rllib/optimizers/async_gradients_optimizer.py +++ b/python/ray/rllib/optimizers/async_gradients_optimizer.py @@ -57,7 +57,7 @@ class AsyncGradientsOptimizer(PolicyOptimizer): self.num_steps_trained += self.grads_per_step * self.batch_size def stats(self): - return dict(PolicyOptimizer.stats(), **{ + return dict(PolicyOptimizer.stats(self), **{ "wait_time_ms": round(1000 * self.wait_timer.mean, 3), "apply_time_ms": round(1000 * self.apply_timer.mean, 3), "dispatch_time_ms": round(1000 * self.dispatch_timer.mean, 3), diff --git a/python/ray/rllib/optimizers/multi_gpu_optimizer.py b/python/ray/rllib/optimizers/multi_gpu_optimizer.py index fedfb6dbb..aa3a82072 100644 --- a/python/ray/rllib/optimizers/multi_gpu_optimizer.py +++ b/python/ray/rllib/optimizers/multi_gpu_optimizer.py @@ -123,7 +123,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer): return all_extra_fetches def stats(self): - return dict(PolicyOptimizer.stats(), **{ + return dict(PolicyOptimizer.stats(self), **{ "sample_time_ms": round(1000 * self.sample_timer.mean, 3), "load_time_ms": round(1000 * self.load_timer.mean, 3), "grad_time_ms": round(1000 * self.grad_timer.mean, 3),