mirror of
https://github.com/wassname/NALU-pytorch.git
synced 2026-06-27 16:00:06 +08:00
78 lines
2.1 KiB
Python
78 lines
2.1 KiB
Python
import math
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.init as init
|
|
import torch.nn.functional as F
|
|
|
|
from .nac import NeuralAccumulatorCell
|
|
from torch.nn.parameter import Parameter
|
|
|
|
|
|
class NeuralArithmeticLogicUnitCell(nn.Module):
|
|
"""A Neural Arithmetic Logic Unit (NALU) cell [1].
|
|
|
|
Attributes:
|
|
in_dim: size of the input sample.
|
|
out_dim: size of the output sample.
|
|
|
|
Sources:
|
|
[1]: https://arxiv.org/abs/1808.00508
|
|
"""
|
|
def __init__(self, in_dim, out_dim):
|
|
super().__init__()
|
|
self.in_dim = in_dim
|
|
self.out_dim = out_dim
|
|
self.eps = 1e-10
|
|
|
|
self.G = Parameter(torch.Tensor(out_dim, in_dim))
|
|
self.nac = NeuralAccumulatorCell(in_dim, out_dim)
|
|
self.register_parameter('bias', None)
|
|
|
|
init.kaiming_uniform_(self.G, a=math.sqrt(5))
|
|
|
|
def forward(self, input):
|
|
a = self.nac(input)
|
|
g = F.sigmoid(F.linear(input, self.G, self.bias))
|
|
add_sub = g * a
|
|
log_input = torch.log(torch.abs(input) + self.eps)
|
|
m = torch.exp(self.nac(log_input))
|
|
mul_div = (1 - g) * m
|
|
y = add_sub + mul_div
|
|
return y
|
|
|
|
def extra_repr(self):
|
|
return 'in_dim={}, out_dim={}'.format(
|
|
self.in_dim, self.out_dim
|
|
)
|
|
|
|
|
|
class NALU(nn.Module):
|
|
"""A stack of NAC layers.
|
|
|
|
Attributes:
|
|
num_layers: the number of NAC layers.
|
|
in_dim: the size of the input sample.
|
|
hidden_dim: the size of the hidden layers.
|
|
out_dim: the size of the output.
|
|
"""
|
|
def __init__(self, num_layers, in_dim, hidden_dim, out_dim):
|
|
super().__init__()
|
|
self.num_layers = num_layers
|
|
self.in_dim = in_dim
|
|
self.hidden_dim = hidden_dim
|
|
self.out_dim = out_dim
|
|
|
|
layers = []
|
|
for i in range(num_layers):
|
|
layers.append(
|
|
NeuralArithmeticLogicUnitCell(
|
|
hidden_dim if i > 0 else in_dim,
|
|
hidden_dim if i < num_layers - 1 else out_dim,
|
|
)
|
|
)
|
|
self.model = nn.Sequential(*layers)
|
|
|
|
def forward(self, x):
|
|
out = self.model(x)
|
|
return out
|