Recompute old probabilities for PPO, to make continuous actions work with obs filter

2026-06-27 16:20:05 +08:00 · 2017-09-24 23:00:14 -04:00
parent 6ee53d245d
commit 54a0f98180
3 changed files with 14 additions and 12 deletions
@@ -82,8 +82,7 @@ class ActorCritic(torch.nn.Module):
        value, logits = self(inputs)
        probs = F.softmax(logits)
        action = probs.multinomial()
-        action_log_probs = F.log_softmax(logits).gather(1, action)
-        return value, action, action_log_probs
+        return value, action

    def evaluate_actions(self, inputs, actions):
        assert inputs.dim() == 4, "Expect to have inputs in num_processes * num_steps x ... format"