mirror of
https://github.com/wassname/pytorch-a2c-ppo-acktr.git
synced 2026-06-27 16:20:05 +08:00
Add KFAC
This commit is contained in:
@@ -1,16 +1,27 @@
|
|||||||
# pytorch-a2c-ppo
|
# pytorch-a2c-ppo-acktr
|
||||||
|
|
||||||
This is a PyTorch implementation of Advantage Actor Critic (A2C), a synchronous deterministic version of A3C ["Asynchronous Methods for Deep Reinforcement Learning"](https://arxiv.org/pdf/1602.01783v1.pdf) and [Proximal Policy Optimization (PPO)](https://arxiv.org/pdf/1707.06347.pdf). Also see the OpenAI posts: [A2C/A3C](https://blog.openai.com/baselines-acktr-a2c/) and [PPO](https://blog.openai.com/openai-baselines-ppo/) for more information.
|
This is a PyTorch implementation of
|
||||||
|
* Advantage Actor Critic (A2C), a synchronous deterministic version of [A3C](https://arxiv.org/pdf/1602.01783v1.pdf)
|
||||||
|
* Proximal Policy Optimization [PPO](https://arxiv.org/pdf/1707.06347.pdf)
|
||||||
|
* Scalable trust-region method for deep reinforcement learning using Kronecker-factored approximation [ACKTR](https://arxiv.org/abs/1708.05144)
|
||||||
|
|
||||||
This implementation is inspired by the OpenAI baselines for [A2C](https://github.com/openai/baselines/tree/master/baselines/a2c) and [PPO](https://github.com/openai/baselines/tree/master/baselines/ppo1). It uses the same hyper parameters and the model since they were well tuned for Atari games.
|
Also see the OpenAI posts: [A2C/ACKTR](https://blog.openai.com/baselines-acktr-a2c/) and [PPO](https://blog.openai.com/openai-baselines-ppo/) for more information.
|
||||||
|
|
||||||
|
This implementation is inspired by the OpenAI baselines for [A2C](https://github.com/openai/baselines/tree/master/baselines/a2c), [ACKTR](https://github.com/openai/baselines/tree/master/baselines/acktr) and [PPO](https://github.com/openai/baselines/tree/master/baselines/ppo1). It uses the same hyper parameters and the model since they were well tuned for Atari games.
|
||||||
|
|
||||||
## Contributions
|
## Contributions
|
||||||
|
|
||||||
Contributions are very welcome. If you know how to make this code better, don't hesitate to send a pull request.
|
Contributions are very welcome. If you know how to make this code better, don't hesitate to send a pull request. Also see a todo list below.
|
||||||
|
|
||||||
|
### TODO
|
||||||
|
* Add MuJoCo and continuous actions
|
||||||
|
* Improve performance of KFAC, see kfac.py for more information
|
||||||
|
* Run evaluation for all games and algorithms
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
### A2C
|
### A2C
|
||||||
|
|
||||||
```
|
```
|
||||||
python main.py --env-name "PongNoFrameskip-v4"
|
python main.py --env-name "PongNoFrameskip-v4"
|
||||||
```
|
```
|
||||||
@@ -21,6 +32,12 @@ python main.py --env-name "PongNoFrameskip-v4"
|
|||||||
python main.py --env-name "PongNoFrameskip-v4" --algo ppo --use-gae --num-processes 8 --num-steps 256 --vis-interval 1 --log-interval 1
|
python main.py --env-name "PongNoFrameskip-v4" --algo ppo --use-gae --num-processes 8 --num-steps 256 --vis-interval 1 --log-interval 1
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### ACKTR
|
||||||
|
|
||||||
|
```
|
||||||
|
python main.py --env-name "PongNoFrameskip-v4" --algo acktr --num-processes 32 --num-steps 20
|
||||||
|
```
|
||||||
|
|
||||||
## Results
|
## Results
|
||||||
|
|
||||||
### A2C
|
### A2C
|
||||||
@@ -36,3 +53,7 @@ python main.py --env-name "PongNoFrameskip-v4" --algo ppo --use-gae --num-proces
|
|||||||
### PPO
|
### PPO
|
||||||
|
|
||||||
Coming soon.
|
Coming soon.
|
||||||
|
|
||||||
|
### ACKTR
|
||||||
|
|
||||||
|
Coming soon.
|
||||||
|
|||||||
@@ -0,0 +1,223 @@
|
|||||||
|
import math
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.optim as optim
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: In order to make this code faster:
|
||||||
|
# 1) Implement _extract_patches as a single cuda kernel_size
|
||||||
|
# 2) Compute QR decomposition in a separate process
|
||||||
|
# 3) Actually make a general KFAC optimizer so it fits PyTorch
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_patches(x, kernel_size, stride, padding):
|
||||||
|
#result = P.im2col(Variable(x), kernel_size, stride, padding).data
|
||||||
|
#return result.view(result.size(0), -1, result.size(-2), result.size(-1))
|
||||||
|
if padding[0] + padding[1] > 0:
|
||||||
|
x = F.pad(x, (padding[1], padding[1], padding[0],
|
||||||
|
padding[0])).data # Actually check dims
|
||||||
|
x = x.unfold(2, kernel_size[0], stride[0])
|
||||||
|
x = x.unfold(3, kernel_size[1], stride[1])
|
||||||
|
x = x.transpose_(1, 2).transpose_(2, 3).contiguous()
|
||||||
|
x = x.view(
|
||||||
|
x.size(0), x.size(1), x.size(2), x.size(3) * x.size(4) * x.size(5))
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
def compute_cov_a(a, classname, layer_info, fast_cnn):
|
||||||
|
batch_size = a.size(0)
|
||||||
|
|
||||||
|
if classname == 'Conv2d':
|
||||||
|
if fast_cnn:
|
||||||
|
a = _extract_patches(a, *layer_info)
|
||||||
|
a = a.view(a.size(0), -1, a.size(-1))
|
||||||
|
a = a.mean(1)
|
||||||
|
else:
|
||||||
|
a = _extract_patches(a, *layer_info)
|
||||||
|
a = a.view(-1, a.size(-1)).div_(a.size(1)).div_(a.size(2))
|
||||||
|
elif classname == 'AddBias':
|
||||||
|
is_cuda = a.is_cuda
|
||||||
|
a = torch.ones(a.size(0), 1)
|
||||||
|
if is_cuda:
|
||||||
|
a = a.cuda()
|
||||||
|
|
||||||
|
return a.t() @ (a / batch_size)
|
||||||
|
|
||||||
|
|
||||||
|
def compute_cov_g(g, classname, layer_info, fast_cnn):
|
||||||
|
batch_size = g.size(0)
|
||||||
|
|
||||||
|
if classname == 'Conv2d':
|
||||||
|
if fast_cnn:
|
||||||
|
g = g.view(g.size(0), g.size(1), -1)
|
||||||
|
g = g.sum(-1)
|
||||||
|
else:
|
||||||
|
g = g.transpose(1, 2).transpose(2, 3).contiguous()
|
||||||
|
g = g.view(-1, g.size(-1)).mul_(g.size(1)).mul_(g.size(2))
|
||||||
|
elif classname == 'AddBias':
|
||||||
|
g = g.view(g.size(0), g.size(1), -1)
|
||||||
|
g = g.sum(-1)
|
||||||
|
|
||||||
|
g_ = g * batch_size
|
||||||
|
return g_.t() @ (g_ / g.size(0))
|
||||||
|
|
||||||
|
|
||||||
|
def update_running_stat(aa, m_aa, momentum):
|
||||||
|
# Do the trick to keep aa unchanged and not create any additional tensors
|
||||||
|
m_aa *= momentum / (1 - momentum)
|
||||||
|
m_aa += aa
|
||||||
|
m_aa *= (1 - momentum)
|
||||||
|
|
||||||
|
|
||||||
|
class KFACOptimizer(optim.Optimizer):
|
||||||
|
def __init__(self,
|
||||||
|
model,
|
||||||
|
lr=0.25,
|
||||||
|
momentum=0.9,
|
||||||
|
stat_decay=0.99,
|
||||||
|
kl_clip=0.001,
|
||||||
|
damping=1e-2,
|
||||||
|
weight_decay=0,
|
||||||
|
fast_cnn=False,
|
||||||
|
Ts=1,
|
||||||
|
Tf=10):
|
||||||
|
defaults = dict()
|
||||||
|
super(KFACOptimizer, self).__init__(model.parameters(), defaults)
|
||||||
|
|
||||||
|
self.known_modules = {'Linear', 'Conv2d', 'AddBias'}
|
||||||
|
|
||||||
|
self.modules = []
|
||||||
|
self.grad_outputs = {}
|
||||||
|
|
||||||
|
self.model = model
|
||||||
|
self._prepare_model()
|
||||||
|
|
||||||
|
self.steps = 0
|
||||||
|
|
||||||
|
self.m_aa, self.m_gg = {}, {}
|
||||||
|
self.Q_a, self.Q_g = {}, {}
|
||||||
|
self.d_a, self.d_g = {}, {}
|
||||||
|
|
||||||
|
self.momentum = momentum
|
||||||
|
self.stat_decay = stat_decay
|
||||||
|
|
||||||
|
self.lr = lr
|
||||||
|
self.kl_clip = kl_clip
|
||||||
|
self.damping = damping
|
||||||
|
self.weight_decay = weight_decay
|
||||||
|
|
||||||
|
self.fast_cnn = fast_cnn
|
||||||
|
|
||||||
|
self.Ts = Ts
|
||||||
|
self.Tf = Tf
|
||||||
|
|
||||||
|
self.optim = optim.SGD(
|
||||||
|
model.parameters(),
|
||||||
|
lr=self.lr * (1 - self.momentum),
|
||||||
|
momentum=self.momentum)
|
||||||
|
|
||||||
|
def _save_input(self, module, input):
|
||||||
|
if input[0].volatile == False and self.steps % self.Ts == 0:
|
||||||
|
classname = module.__class__.__name__
|
||||||
|
layer_info = None
|
||||||
|
if classname == 'Conv2d':
|
||||||
|
layer_info = (module.kernel_size, module.stride,
|
||||||
|
module.padding)
|
||||||
|
|
||||||
|
aa = compute_cov_a(input[0].data, classname, layer_info,
|
||||||
|
self.fast_cnn)
|
||||||
|
|
||||||
|
# Initialize buffers
|
||||||
|
if self.steps == 0:
|
||||||
|
self.m_aa[module] = aa.clone()
|
||||||
|
|
||||||
|
update_running_stat(aa, self.m_aa[module], self.stat_decay)
|
||||||
|
|
||||||
|
def _save_grad_output(self, module, grad_input, grad_output):
|
||||||
|
if self.acc_stats:
|
||||||
|
classname = module.__class__.__name__
|
||||||
|
layer_info = None
|
||||||
|
if classname == 'Conv2d':
|
||||||
|
layer_info = (module.kernel_size, module.stride,
|
||||||
|
module.padding)
|
||||||
|
|
||||||
|
gg = compute_cov_g(grad_output[0].data, classname,
|
||||||
|
layer_info, self.fast_cnn)
|
||||||
|
|
||||||
|
# Initialize buffers
|
||||||
|
if self.steps == 0:
|
||||||
|
self.m_gg[module] = gg.clone()
|
||||||
|
|
||||||
|
update_running_stat(gg, self.m_gg[module], self.stat_decay)
|
||||||
|
|
||||||
|
def _prepare_model(self):
|
||||||
|
for module in self.model.children():
|
||||||
|
classname = module.__class__.__name__
|
||||||
|
if classname in self.known_modules:
|
||||||
|
assert not ((classname in ['Linear', 'Conv2d']) and module.bias is not None), \
|
||||||
|
"You must have a bias as a separate layer"
|
||||||
|
|
||||||
|
self.modules.append(module)
|
||||||
|
module.register_forward_pre_hook(self._save_input)
|
||||||
|
module.register_backward_hook(self._save_grad_output)
|
||||||
|
elif len(list(module.parameters())) > 0:
|
||||||
|
raise NotImplementedError(
|
||||||
|
'Layer {} is not supported'.format(classname))
|
||||||
|
|
||||||
|
#@profile
|
||||||
|
def step(self):
|
||||||
|
# Add weight decay
|
||||||
|
if self.weight_decay > 0:
|
||||||
|
for p in self.model.parameters():
|
||||||
|
p.grad.data.add_(self.weight_decay, p.data)
|
||||||
|
|
||||||
|
updates = {}
|
||||||
|
for i, m in enumerate(self.modules):
|
||||||
|
assert len(list(m.parameters())
|
||||||
|
) == 1, "Can handle only one parameter at the moment"
|
||||||
|
classname = m.__class__.__name__
|
||||||
|
p = next(m.parameters())
|
||||||
|
|
||||||
|
la = self.damping + self.weight_decay
|
||||||
|
|
||||||
|
if self.steps % self.Tf == 0:
|
||||||
|
# My asynchronous implementation exists, I will add it later.
|
||||||
|
# Experimenting with different ways to this in PyTorch.
|
||||||
|
self.d_a[m], self.Q_a[m] = torch.symeig(
|
||||||
|
self.m_aa[m].cpu().double(), eigenvectors=True)
|
||||||
|
self.d_g[m], self.Q_g[m] = torch.symeig(
|
||||||
|
self.m_gg[m].cpu().double(), eigenvectors=True)
|
||||||
|
self.d_a[m], self.Q_a[m] = self.d_a[
|
||||||
|
m].float().cuda(), self.Q_a[m].float().cuda()
|
||||||
|
self.d_g[m], self.Q_g[m] = self.d_g[
|
||||||
|
m].float().cuda(), self.Q_g[m].float().cuda()
|
||||||
|
self.d_a[m].mul_((self.d_a[m] > 1e-6).float())
|
||||||
|
self.d_g[m].mul_((self.d_g[m] > 1e-6).float())
|
||||||
|
|
||||||
|
if classname == 'Conv2d':
|
||||||
|
p_grad_mat = p.grad.data.view(p.grad.data.size(0), -1)
|
||||||
|
else:
|
||||||
|
p_grad_mat = p.grad.data
|
||||||
|
|
||||||
|
v1 = self.Q_g[m].t() @ p_grad_mat @ self.Q_a[m]
|
||||||
|
v2 = v1 / (
|
||||||
|
self.d_g[m].unsqueeze(1) * self.d_a[m].unsqueeze(0) + la)
|
||||||
|
v = self.Q_g[m] @ v2 @ self.Q_a[m].t()
|
||||||
|
|
||||||
|
v = v.view(p.grad.data.size())
|
||||||
|
updates[p] = v
|
||||||
|
|
||||||
|
vg_sum = 0
|
||||||
|
for p in self.model.parameters():
|
||||||
|
v = updates[p]
|
||||||
|
vg_sum += (v * p.grad.data * self.lr * self.lr).sum()
|
||||||
|
|
||||||
|
nu = min(1, math.sqrt(self.kl_clip / vg_sum))
|
||||||
|
|
||||||
|
for p in self.model.parameters():
|
||||||
|
v = updates[p]
|
||||||
|
p.grad.data.copy_(v)
|
||||||
|
p.grad.data.mul_(nu)
|
||||||
|
|
||||||
|
self.optim.step()
|
||||||
|
self.steps += 1
|
||||||
@@ -13,12 +13,13 @@ from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler
|
|||||||
|
|
||||||
from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
|
from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
|
||||||
from envs import make_env
|
from envs import make_env
|
||||||
|
from kfac import KFACOptimizer
|
||||||
from model import ActorCritic
|
from model import ActorCritic
|
||||||
from vizualize_atari import visdom_plot
|
from vizualize_atari import visdom_plot
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='RL')
|
parser = argparse.ArgumentParser(description='RL')
|
||||||
parser.add_argument('--algo', default='a2c',
|
parser.add_argument('--algo', default='a2c',
|
||||||
help='algorithm to use: a2c | ppo')
|
help='algorithm to use: a2c | ppo | acktr')
|
||||||
parser.add_argument('--lr', type=float, default=7e-4,
|
parser.add_argument('--lr', type=float, default=7e-4,
|
||||||
help='learning rate (default: 7e-4)')
|
help='learning rate (default: 7e-4)')
|
||||||
parser.add_argument('--eps', type=float, default=1e-5,
|
parser.add_argument('--eps', type=float, default=1e-5,
|
||||||
@@ -69,7 +70,7 @@ parser.add_argument('--no-vis', action='store_true', default=False,
|
|||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
assert args.algo in ['a2c', 'ppo']
|
assert args.algo in ['a2c', 'ppo', 'acktr']
|
||||||
if args.algo == 'ppo':
|
if args.algo == 'ppo':
|
||||||
assert args.num_processes * args.num_steps % args.batch_size == 0
|
assert args.num_processes * args.num_steps % args.batch_size == 0
|
||||||
args.cuda = not args.no_cuda and torch.cuda.is_available()
|
args.cuda = not args.no_cuda and torch.cuda.is_available()
|
||||||
@@ -117,6 +118,8 @@ def main():
|
|||||||
optimizer = optim.RMSprop(actor_critic.parameters(), args.lr, eps=args.eps, alpha=args.alpha)
|
optimizer = optim.RMSprop(actor_critic.parameters(), args.lr, eps=args.eps, alpha=args.alpha)
|
||||||
elif args.algo == 'ppo':
|
elif args.algo == 'ppo':
|
||||||
optimizer = optim.Adam(actor_critic.parameters(), eps=args.eps)
|
optimizer = optim.Adam(actor_critic.parameters(), eps=args.eps)
|
||||||
|
elif args.algo == 'acktr':
|
||||||
|
optimizer = KFACOptimizer(actor_critic)
|
||||||
|
|
||||||
obs_shape = envs.observation_space.shape
|
obs_shape = envs.observation_space.shape
|
||||||
obs_shape = (obs_shape[0] * args.num_stack, obs_shape[1], obs_shape[2])
|
obs_shape = (obs_shape[0] * args.num_stack, obs_shape[1], obs_shape[2])
|
||||||
@@ -205,7 +208,7 @@ def main():
|
|||||||
returns[step] = returns[step + 1] * \
|
returns[step] = returns[step + 1] * \
|
||||||
args.gamma * masks[step] + rewards[step]
|
args.gamma * masks[step] + rewards[step]
|
||||||
|
|
||||||
if args.algo == 'a2c':
|
if args.algo in ['a2c', 'acktr']:
|
||||||
# Reshape to do in a single forward pass for all steps
|
# Reshape to do in a single forward pass for all steps
|
||||||
values, logits = actor_critic(Variable(states[:-1].view(-1, *states.size()[-3:])))
|
values, logits = actor_critic(Variable(states[:-1].view(-1, *states.size()[-3:])))
|
||||||
log_probs = F.log_softmax(logits)
|
log_probs = F.log_softmax(logits)
|
||||||
@@ -228,10 +231,29 @@ def main():
|
|||||||
|
|
||||||
action_loss = -(Variable(advantages.data) * action_log_probs).mean()
|
action_loss = -(Variable(advantages.data) * action_log_probs).mean()
|
||||||
|
|
||||||
|
if args.algo == 'acktr' and optimizer.steps % optimizer.Ts == 0:
|
||||||
|
# Sampled fisher, see Martens 2014
|
||||||
|
actor_critic.zero_grad()
|
||||||
|
pg_fisher_loss = -action_log_probs.mean()
|
||||||
|
|
||||||
|
value_noise = Variable(torch.randn(values[:-1].size()))
|
||||||
|
if args.cuda:
|
||||||
|
value_noise = value_noise.cuda()
|
||||||
|
|
||||||
|
sample_values = values[:-1] + value_noise
|
||||||
|
vf_fisher_loss = - (values[:-1] - Variable(sample_values.data)).pow(2).mean()
|
||||||
|
|
||||||
|
fisher_loss = pg_fisher_loss + vf_fisher_loss
|
||||||
|
optimizer.acc_stats = True
|
||||||
|
fisher_loss.backward(retain_graph=True)
|
||||||
|
optimizer.acc_stats = False
|
||||||
|
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
(value_loss * args.value_loss_coef + action_loss - dist_entropy * args.entropy_coef).backward()
|
(value_loss * args.value_loss_coef + action_loss - dist_entropy * args.entropy_coef).backward()
|
||||||
|
|
||||||
nn.utils.clip_grad_norm(actor_critic.parameters(), args.max_grad_norm)
|
if args.algo == 'a2c':
|
||||||
|
nn.utils.clip_grad_norm(actor_critic.parameters(), args.max_grad_norm)
|
||||||
|
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
elif args.algo == 'ppo':
|
elif args.algo == 'ppo':
|
||||||
advantages = returns[:-1] - value_preds[:-1]
|
advantages = returns[:-1] - value_preds[:-1]
|
||||||
|
|||||||
@@ -13,18 +13,40 @@ def weights_init(m):
|
|||||||
m.bias.data.fill_(0)
|
m.bias.data.fill_(0)
|
||||||
|
|
||||||
|
|
||||||
|
# Necessary for my KFAC implementation.
|
||||||
|
class AddBias(nn.Module):
|
||||||
|
def __init__(self, out_features):
|
||||||
|
super(AddBias, self).__init__()
|
||||||
|
self.bias = nn.Parameter(torch.zeros(out_features, 1))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
if x.dim() == 2:
|
||||||
|
bias = self.bias.t().view(1, -1)
|
||||||
|
else:
|
||||||
|
bias = self.bias.t().view(1, -1, 1, 1)
|
||||||
|
|
||||||
|
return x + bias
|
||||||
|
|
||||||
|
|
||||||
class ActorCritic(torch.nn.Module):
|
class ActorCritic(torch.nn.Module):
|
||||||
def __init__(self, num_inputs, action_space):
|
def __init__(self, num_inputs, action_space):
|
||||||
super(ActorCritic, self).__init__()
|
super(ActorCritic, self).__init__()
|
||||||
self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
|
self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4, bias=False)
|
||||||
self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
|
self.ab1 = AddBias(32)
|
||||||
self.conv3 = nn.Conv2d(64, 64, 3, stride=1)
|
self.conv2 = nn.Conv2d(32, 64, 4, stride=2, bias=False)
|
||||||
|
self.ab2 = AddBias(64)
|
||||||
|
self.conv3 = nn.Conv2d(64, 32, 3, stride=1, bias=False)
|
||||||
|
self.ab3 = AddBias(32)
|
||||||
|
|
||||||
self.linear1 = nn.Linear(64 * 7 * 7, 512)
|
self.linear1 = nn.Linear(32 * 7 * 7, 512, bias=False)
|
||||||
|
self.ab_fc1 = AddBias(512)
|
||||||
|
|
||||||
num_outputs = action_space.n
|
num_outputs = action_space.n
|
||||||
self.critic_linear = nn.Linear(512, 1)
|
self.critic_linear = nn.Linear(512, 1, bias=False)
|
||||||
self.actor_linear = nn.Linear(512, num_outputs)
|
self.ab_fc2 = AddBias(1)
|
||||||
|
|
||||||
|
self.actor_linear = nn.Linear(512, num_outputs, bias=False)
|
||||||
|
self.ab_fc3 = AddBias(num_outputs)
|
||||||
|
|
||||||
self.apply(weights_init)
|
self.apply(weights_init)
|
||||||
|
|
||||||
@@ -37,16 +59,21 @@ class ActorCritic(torch.nn.Module):
|
|||||||
|
|
||||||
def forward(self, inputs):
|
def forward(self, inputs):
|
||||||
x = self.conv1(inputs / 255.0)
|
x = self.conv1(inputs / 255.0)
|
||||||
|
x = self.ab1(x)
|
||||||
x = F.relu(x)
|
x = F.relu(x)
|
||||||
|
|
||||||
x = self.conv2(x)
|
x = self.conv2(x)
|
||||||
|
x = self.ab2(x)
|
||||||
x = F.relu(x)
|
x = F.relu(x)
|
||||||
|
|
||||||
x = self.conv3(x)
|
x = self.conv3(x)
|
||||||
|
x = self.ab3(x)
|
||||||
x = F.relu(x)
|
x = F.relu(x)
|
||||||
|
|
||||||
x = x.view(-1, 64 * 7 * 7)
|
x = x.view(-1, 32 * 7 * 7)
|
||||||
x = self.linear1(x)
|
x = self.linear1(x)
|
||||||
|
x = self.ab_fc1(x)
|
||||||
x = F.relu(x)
|
x = F.relu(x)
|
||||||
|
|
||||||
return self.critic_linear(x), self.actor_linear(x)
|
return self.ab_fc2(self.critic_linear(x)), self.ab_fc3(
|
||||||
|
self.actor_linear(x))
|
||||||
|
|||||||
Reference in New Issue
Block a user