Recompute old probabilities for PPO, to make continuous actions work with obs filter

This commit is contained in:
Ilya Kostrikov
2017-09-24 23:00:14 -04:00
parent 6ee53d245d
commit 54a0f98180
3 changed files with 14 additions and 12 deletions
+1 -2
View File
@@ -82,8 +82,7 @@ class ActorCritic(torch.nn.Module):
value, logits = self(inputs)
probs = F.softmax(logits)
action = probs.multinomial()
action_log_probs = F.log_softmax(logits).gather(1, action)
return value, action, action_log_probs
return value, action
def evaluate_actions(self, inputs, actions):
assert inputs.dim() == 4, "Expect to have inputs in num_processes * num_steps x ... format"