mirror of
https://github.com/wassname/ray.git
synced 2026-07-03 21:48:43 +08:00
[rllib] Misc fixes, A2C (#2679)
A bunch of minor rllib fixes: pull in latest baselines atari wrapper changes (and use deepmind wrapper by default) move reward clipping to policy evaluator add a2c variant of a3c reduce vision network fc layer size to 256 units switch to 84x84 images doc tweaks print timesteps in tune status
This commit is contained in:
@@ -43,6 +43,7 @@ class SyncSamplesOptimizer(PolicyOptimizer):
|
||||
else:
|
||||
samples.append(self.local_evaluator.sample())
|
||||
samples = SampleBatch.concat_samples(samples)
|
||||
self.sample_timer.push_units_processed(samples.count)
|
||||
|
||||
with self.grad_timer:
|
||||
for i in range(self.num_sgd_iter):
|
||||
@@ -64,5 +65,7 @@ class SyncSamplesOptimizer(PolicyOptimizer):
|
||||
3),
|
||||
"opt_peak_throughput": round(self.grad_timer.mean_throughput,
|
||||
3),
|
||||
"sample_peak_throughput": round(
|
||||
self.sample_timer.mean_throughput, 3),
|
||||
"opt_samples": round(self.grad_timer.mean_units_processed, 3),
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user