mirror of
https://github.com/wassname/NALU-pytorch.git
synced 2026-06-27 16:00:06 +08:00
implemented nac and nalu.
This commit is contained in:
+7
-4
@@ -11,10 +11,13 @@ TEST_RANGE = [-20, 20]
|
||||
LEARNING_RATE = 1e-2
|
||||
NUM_ITERS = int(1e4)
|
||||
NON_LINEARITIES = [
|
||||
'hardtanh', 'sigmoid', 'relu6', 'tanh',
|
||||
'tanhshrink', 'hardshrink', 'leakyrelu',
|
||||
'softshrink', 'softsign', 'relu',
|
||||
'prelu', 'softplus', 'elu', 'selu',
|
||||
'hardtanh', 'sigmoid',
|
||||
'relu6', 'tanh',
|
||||
'tanhshrink', 'hardshrink',
|
||||
'leakyrelu', 'softshrink',
|
||||
'softsign', 'relu',
|
||||
'prelu', 'softplus',
|
||||
'elu', 'selu',
|
||||
]
|
||||
|
||||
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 392 KiB After Width: | Height: | Size: 453 KiB |
@@ -1 +1,3 @@
|
||||
from .mlp import MLP
|
||||
from .nac import NAC
|
||||
from .nalu import NALU
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
import math
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.init as init
|
||||
import torch.nn.functional as F
|
||||
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
|
||||
class NAC(nn.Module):
|
||||
"""A Neural Accumulator [1].
|
||||
|
||||
NAC supports the ability to accumulate quantities
|
||||
additively which is a desirable inductive bias for
|
||||
linear extrapolation.
|
||||
|
||||
Attributes:
|
||||
in_features: size of the input sample.
|
||||
out_features: size of the output sample.
|
||||
|
||||
Sources:
|
||||
[1]: https://arxiv.org/abs/1808.00508
|
||||
"""
|
||||
def __init__(self, in_features, out_features):
|
||||
super().__init__()
|
||||
self.in_features = in_features
|
||||
self.out_features = out_features
|
||||
|
||||
self.W_hat = Parameter(torch.Tensor(out_features, in_features))
|
||||
self.M_hat = Parameter(torch.Tensor(out_features, in_features))
|
||||
self.W = F.tanh(self.W_hat) * F.sigmoid(self.M_hat)
|
||||
|
||||
init.kaiming_uniform_(self.W_hat, a=math.sqrt(5))
|
||||
init.kaiming_uniform_(self.M_hat, a=math.sqrt(5))
|
||||
|
||||
def forward(self, input):
|
||||
return F.linear(input, self.W, None)
|
||||
|
||||
def extra_repr(self):
|
||||
return 'in_features={}, out_features={}'.format(self.in_features, self.out_features)
|
||||
@@ -0,0 +1,48 @@
|
||||
import math
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.init as init
|
||||
import torch.nn.functional as F
|
||||
|
||||
from .nac import NAC
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
|
||||
class NALU(nn.Module):
|
||||
"""A Neural Arithmetic Logic Unit [1].
|
||||
|
||||
NALU uses 2 NACs with tied weights to support
|
||||
multiplicative extrapolation.
|
||||
|
||||
Attributes:
|
||||
in_features: size of the input sample.
|
||||
out_features: size of the output sample.
|
||||
|
||||
Sources:
|
||||
[1]: https://arxiv.org/abs/1808.00508
|
||||
"""
|
||||
def __init__(self, in_features, out_features):
|
||||
super().__init__()
|
||||
self.in_features = in_features
|
||||
self.out_features = out_features
|
||||
self.eps = 1e-10
|
||||
|
||||
self.G = Parameter(torch.Tensor(out_features, in_features))
|
||||
self.W = Parameter(torch.Tensor(out_features, in_features))
|
||||
self.nac = NAC(in_features, out_features)
|
||||
|
||||
init.kaiming_uniform_(self.G, a=math.sqrt(5))
|
||||
init.kaiming_uniform_(self.W, a=math.sqrt(5))
|
||||
|
||||
def forward(self, input):
|
||||
a = self.nac(input)
|
||||
g = F.sigmoid(F.linear(input, self.G, None))
|
||||
add_sub = a * g
|
||||
log_input = torch.log(torch.abs(input) + self.eps)
|
||||
m = torch.exp(F.linear(log_input, self.W, None))
|
||||
mul_div = (1 - g) * m
|
||||
y = add_sub + mul_div
|
||||
return y
|
||||
|
||||
def extra_repr(self):
|
||||
return 'in_features={}, out_features={}'.format(self.in_features, self.out_features)
|
||||
Reference in New Issue
Block a user