[rllib] Clean up concepts documentation and policy optimizer creation (#4592)

This commit is contained in:
Eric Liang
2019-04-12 21:03:26 -07:00
committed by GitHub
parent 0f42f87ebc
commit 6e7680bf21
29 changed files with 303 additions and 270 deletions
@@ -22,8 +22,13 @@ class SyncSamplesOptimizer(PolicyOptimizer):
model weights are then broadcast to all remote evaluators.
"""
@override(PolicyOptimizer)
def _init(self, num_sgd_iter=1, train_batch_size=1):
def __init__(self,
local_evaluator,
remote_evaluators,
num_sgd_iter=1,
train_batch_size=1):
PolicyOptimizer.__init__(self, local_evaluator, remote_evaluators)
self.update_weights_timer = TimerStat()
self.sample_timer = TimerStat()
self.grad_timer = TimerStat()