mirror of
https://github.com/wassname/pytorch-a2c-ppo-acktr.git
synced 2026-06-27 16:20:05 +08:00
Recompute old probabilities for PPO, to make continuous actions work with obs filter
This commit is contained in:
@@ -82,8 +82,7 @@ class ActorCritic(torch.nn.Module):
|
||||
value, logits = self(inputs)
|
||||
probs = F.softmax(logits)
|
||||
action = probs.multinomial()
|
||||
action_log_probs = F.log_softmax(logits).gather(1, action)
|
||||
return value, action, action_log_probs
|
||||
return value, action
|
||||
|
||||
def evaluate_actions(self, inputs, actions):
|
||||
assert inputs.dim() == 4, "Expect to have inputs in num_processes * num_steps x ... format"
|
||||
|
||||
Reference in New Issue
Block a user