mirror of
https://github.com/wassname/NALU-pytorch.git
synced 2026-06-27 16:00:06 +08:00
71 lines
1.9 KiB
Python
71 lines
1.9 KiB
Python
import math
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.init as init
|
|
import torch.nn.functional as F
|
|
|
|
from torch.nn.parameter import Parameter
|
|
|
|
|
|
class NeuralAccumulatorCell(nn.Module):
|
|
"""A Neural Accumulator (NAC) cell [1].
|
|
|
|
Attributes:
|
|
in_dim: size of the input sample.
|
|
out_dim: size of the output sample.
|
|
|
|
Sources:
|
|
[1]: https://arxiv.org/abs/1808.00508
|
|
"""
|
|
def __init__(self, in_dim, out_dim):
|
|
super().__init__()
|
|
self.in_dim = in_dim
|
|
self.out_dim = out_dim
|
|
|
|
self.W_hat = Parameter(torch.Tensor(out_dim, in_dim))
|
|
self.M_hat = Parameter(torch.Tensor(out_dim, in_dim))
|
|
self.W = Parameter(F.tanh(self.W_hat) * F.sigmoid(self.M_hat))
|
|
self.register_parameter('bias', None)
|
|
|
|
init.kaiming_uniform_(self.W_hat, a=math.sqrt(5))
|
|
init.kaiming_uniform_(self.M_hat, a=math.sqrt(5))
|
|
|
|
def forward(self, input):
|
|
return F.linear(input, self.W, self.bias)
|
|
|
|
def extra_repr(self):
|
|
return 'in_dim={}, out_dim={}'.format(
|
|
self.in_dim, self.out_dim
|
|
)
|
|
|
|
|
|
class NAC(nn.Module):
|
|
"""A stack of NAC layers.
|
|
|
|
Attributes:
|
|
num_layers: the number of NAC layers.
|
|
in_dim: the size of the input sample.
|
|
hidden_dim: the size of the hidden layers.
|
|
out_dim: the size of the output.
|
|
"""
|
|
def __init__(self, num_layers, in_dim, hidden_dim, out_dim):
|
|
super().__init__()
|
|
self.num_layers = num_layers
|
|
self.in_dim = in_dim
|
|
self.hidden_dim = hidden_dim
|
|
self.out_dim = out_dim
|
|
|
|
layers = []
|
|
for i in range(num_layers):
|
|
layers.append(
|
|
NeuralAccumulatorCell(
|
|
hidden_dim if i > 0 else in_dim,
|
|
hidden_dim if i < num_layers - 1 else out_dim,
|
|
)
|
|
)
|
|
self.model = nn.Sequential(*layers)
|
|
|
|
def forward(self, x):
|
|
out = self.model(x)
|
|
return out
|