mirror of
https://github.com/wassname/ray.git
synced 2026-07-01 22:40:06 +08:00
[rllib] more user-friendly Optimizer signature + compute_apply (#2335)
* Move signature of optimizers * fix * expose compute_apply for policy_graphs * dictionaries and such * test for multiagent
This commit is contained in:
@@ -40,8 +40,7 @@ class SyncSamplesOptimizer(PolicyOptimizer):
|
||||
samples = self.local_evaluator.sample()
|
||||
|
||||
with self.grad_timer:
|
||||
grad, _ = self.local_evaluator.compute_gradients(samples)
|
||||
self.local_evaluator.apply_gradients(grad)
|
||||
self.local_evaluator.compute_apply(samples)
|
||||
self.grad_timer.push_units_processed(samples.count)
|
||||
|
||||
self.num_steps_sampled += samples.count
|
||||
|
||||
Reference in New Issue
Block a user