From d3f81d5aadceba0003d5e24fec170a252f470953 Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Wed, 27 Jun 2018 22:41:34 -0700 Subject: [PATCH] [rllib] Add stats for A3C (#2315) * add stats for a3c again * fix multigpu too --- python/ray/rllib/a3c/a3c.py | 5 ++++- python/ray/rllib/optimizers/async_gradients_optimizer.py | 2 +- python/ray/rllib/optimizers/multi_gpu_optimizer.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/python/ray/rllib/a3c/a3c.py b/python/ray/rllib/a3c/a3c.py index c3813ac2d..8af5eb82c 100644 --- a/python/ray/rllib/a3c/a3c.py +++ b/python/ray/rllib/a3c/a3c.py @@ -139,7 +139,10 @@ class A3CAgent(Agent): self.optimizer.step() FilterManager.synchronize( self.local_evaluator.filters, self.remote_evaluators) - return collect_metrics(self.local_evaluator, self.remote_evaluators) + result = collect_metrics(self.local_evaluator, self.remote_evaluators) + result = result._replace( + info=self.optimizer.stats()) + return result def _stop(self): # workaround for https://github.com/ray-project/ray/issues/1516 diff --git a/python/ray/rllib/optimizers/async_gradients_optimizer.py b/python/ray/rllib/optimizers/async_gradients_optimizer.py index 7ec11e2a6..e207162c2 100644 --- a/python/ray/rllib/optimizers/async_gradients_optimizer.py +++ b/python/ray/rllib/optimizers/async_gradients_optimizer.py @@ -57,7 +57,7 @@ class AsyncGradientsOptimizer(PolicyOptimizer): self.num_steps_trained += self.grads_per_step * self.batch_size def stats(self): - return dict(PolicyOptimizer.stats(), **{ + return dict(PolicyOptimizer.stats(self), **{ "wait_time_ms": round(1000 * self.wait_timer.mean, 3), "apply_time_ms": round(1000 * self.apply_timer.mean, 3), "dispatch_time_ms": round(1000 * self.dispatch_timer.mean, 3), diff --git a/python/ray/rllib/optimizers/multi_gpu_optimizer.py b/python/ray/rllib/optimizers/multi_gpu_optimizer.py index fedfb6dbb..aa3a82072 100644 --- a/python/ray/rllib/optimizers/multi_gpu_optimizer.py +++ b/python/ray/rllib/optimizers/multi_gpu_optimizer.py @@ -123,7 +123,7 @@ class LocalMultiGPUOptimizer(PolicyOptimizer): return all_extra_fetches def stats(self): - return dict(PolicyOptimizer.stats(), **{ + return dict(PolicyOptimizer.stats(self), **{ "sample_time_ms": round(1000 * self.sample_timer.mean, 3), "load_time_ms": round(1000 * self.load_timer.mean, 3), "grad_time_ms": round(1000 * self.grad_timer.mean, 3),