diff --git a/python/ray/rllib/agents/pg/pg.py b/python/ray/rllib/agents/pg/pg.py index 59428f76a..bf437725e 100644 --- a/python/ray/rllib/agents/pg/pg.py +++ b/python/ray/rllib/agents/pg/pg.py @@ -57,6 +57,6 @@ class PGAgent(Agent): self.optimizer.step() result = self.optimizer.collect_metrics( self.config["collect_metrics_timeout"]) - result.update( - timesteps_this_iter=self.optimizer.num_steps_sampled - prev_steps) + result.update(timesteps_this_iter=self.optimizer.num_steps_sampled - + prev_steps) return result diff --git a/python/ray/rllib/agents/pg/torch_pg_policy_graph.py b/python/ray/rllib/agents/pg/torch_pg_policy_graph.py index a818013d4..3708afc0f 100644 --- a/python/ray/rllib/agents/pg/torch_pg_policy_graph.py +++ b/python/ray/rllib/agents/pg/torch_pg_policy_graph.py @@ -22,7 +22,6 @@ class PGLoss(nn.Module): def forward(self, observations, actions, advantages): logits, _, values, _ = self.policy_model({"obs": observations}, []) log_probs = F.log_softmax(logits, dim=1) - probs = F.softmax(logits, dim=1) action_log_probs = log_probs.gather(1, actions.view(-1, 1)) pi_err = -advantages.dot(action_log_probs.reshape(-1)) return pi_err