[rllib] Clean up concepts documentation and policy optimizer creation (#4592)

2026-07-04 09:36:46 +08:00 · 2019-04-12 21:03:26 -07:00
parent 0f42f87ebc
commit 6e7680bf21
29 changed files with 303 additions and 270 deletions
@@ -22,8 +22,13 @@ class SyncSamplesOptimizer(PolicyOptimizer):
    model weights are then broadcast to all remote evaluators.
    """

-    @override(PolicyOptimizer)
-    def _init(self, num_sgd_iter=1, train_batch_size=1):
+    def __init__(self,
+                 local_evaluator,
+                 remote_evaluators,
+                 num_sgd_iter=1,
+                 train_batch_size=1):
+        PolicyOptimizer.__init__(self, local_evaluator, remote_evaluators)
+
        self.update_weights_timer = TimerStat()
        self.sample_timer = TimerStat()
        self.grad_timer = TimerStat()