mirror of
https://github.com/wassname/pytorch-soft-actor-critic.git
synced 2026-06-27 18:43:57 +08:00
Update sac.py
This commit is contained in:
@@ -64,7 +64,6 @@ class SAC(object):
|
||||
qf1_next_target, qf2_next_target = self.critic_target(next_state_batch, next_state_action)
|
||||
min_qf_next_target = torch.min(qf1_next_target, qf2_next_target) - self.alpha * next_state_log_pi
|
||||
next_q_value = reward_batch + mask_batch * self.gamma * (min_qf_next_target)
|
||||
|
||||
qf1, qf2 = self.critic(state_batch, action_batch) # Two Q-functions to mitigate positive bias in the policy improvement step
|
||||
qf1_loss = F.mse_loss(qf1, next_q_value) # JQ = 𝔼(st,at)~D[0.5(Q1(st,at) - r(st,at) - γ(𝔼st+1~p[V(st+1)]))^2]
|
||||
qf2_loss = F.mse_loss(qf2, next_q_value) # JQ = 𝔼(st,at)~D[0.5(Q1(st,at) - r(st,at) - γ(𝔼st+1~p[V(st+1)]))^2]
|
||||
|
||||
Reference in New Issue
Block a user