mirror of
https://github.com/wassname/NALU-pytorch.git
synced 2026-06-26 15:50:07 +08:00
trying asinh
This commit is contained in:
@@ -1,3 +1,18 @@
|
||||
Comparing Neural Arithmetic Logic Units with exact and asinh versions.
|
||||
|
||||
|
||||
| | NAC_exact | NALU_sinh | Relu6 | None | NAC | NALU |
|
||||
| :------ | :-------- | :-------- | :----- | :---- | :---- | :----- |
|
||||
| a + b | 0.133 | 0.530 | 3.846 | 0.140 | 0.155 | 0.139 |
|
||||
| a - b | 3.642 | 5.513 | 87.524 | 1.774 | 0.986 | 10.864 |
|
||||
| a * b | 1.525 | 0.444 | 4.082 | 0.319 | 2.889 | 2.139 |
|
||||
| a / b | 0.266 | 0.796 | 4.337 | 0.341 | 2.002 | 1.547 |
|
||||
| a ^ 2 | 1.127 | 1.100 | 92.235 | 0.763 | 4.867 | 0.852 |
|
||||
| sqrt(a) | 0.951 | 0.798 | 85.603 | 0.549 | 4.589 | 0.511 |
|
||||
|
||||
|
||||
|
||||
|
||||
# Neural Arithmetic Logic Units
|
||||
|
||||
[WIP]
|
||||
@@ -43,6 +58,8 @@ python function_learning.py
|
||||
```
|
||||
This should generate a text file called `interpolation.txt` with the following results. (Currently only supports interpolation, I'm working on the rest)
|
||||
|
||||
|
||||
|
||||
| | Relu6 | None | NAC | NALU |
|
||||
|---------|----------|----------|----------|--------|
|
||||
| a + b | 4.472 | 0.132 | 0.154 | 0.157 |
|
||||
@@ -51,3 +68,4 @@ This should generate a text file called `interpolation.txt` with the following r
|
||||
| a / b | 97.070 | 60.594 | 5.730 | 3.042 |
|
||||
| a ^ 2 | 89.987 | 2.977 | 4.718 | 1.117 |
|
||||
| sqrt(a) | 5.939 | 40.243 | 7.263 | 1.119 |
|
||||
|
||||
|
||||
+20
-5
@@ -1,13 +1,14 @@
|
||||
import math
|
||||
import random
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from models import MLP, NAC, NALU
|
||||
from models import MLP, NAC, NALU, NALU_sinh, NAC_exact
|
||||
|
||||
NORMALIZE = True
|
||||
NUM_LAYERS = 2
|
||||
@@ -68,6 +69,18 @@ def main():
|
||||
save_dir = './results/'
|
||||
|
||||
models = [
|
||||
NAC_exact(
|
||||
num_layers=NUM_LAYERS,
|
||||
in_dim=2,
|
||||
hidden_dim=HIDDEN_DIM,
|
||||
out_dim=1,
|
||||
),
|
||||
NALU_sinh(
|
||||
num_layers=NUM_LAYERS,
|
||||
in_dim=2,
|
||||
hidden_dim=HIDDEN_DIM,
|
||||
out_dim=1
|
||||
),
|
||||
MLP(
|
||||
num_layers=NUM_LAYERS,
|
||||
in_dim=2,
|
||||
@@ -99,6 +112,7 @@ def main():
|
||||
results = {}
|
||||
for fn_str, fn in ARITHMETIC_FUNCTIONS.items():
|
||||
results[fn_str] = []
|
||||
print("running", fn_str)
|
||||
|
||||
# dataset
|
||||
X_train, y_train, X_test, y_test = generate_data(
|
||||
@@ -125,16 +139,17 @@ def main():
|
||||
train(net, optim, X_train, y_train, NUM_ITERS)
|
||||
mse = test(net, X_test, y_test).mean().item()
|
||||
results[fn_str].append(mse)
|
||||
print("mse", mse)
|
||||
|
||||
with open(save_dir + "interpolation.txt", "w") as f:
|
||||
f.write("Relu6\tNone\tNAC\tNALU\n")
|
||||
f.write("NAC_exact\tNALU_sinh\tRelu6\tNone\tNAC\tNALU\n")
|
||||
for k, v in results.items():
|
||||
rand = results[k][0]
|
||||
mses = [100.0*x/rand for x in results[k][1:]]
|
||||
mses = [100.0 * x / rand for x in results[k][1:]]
|
||||
if NORMALIZE:
|
||||
f.write("{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\n".format(*mses))
|
||||
f.write(("\t".join(["{:.3f}"]*len(mses))+"\n").format(*mses))
|
||||
else:
|
||||
f.write("{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\n".format(*results[k][1:]))
|
||||
f.write(("\t".join(["{:.3f}"]*len(mses))+"\n").format(*results[k][1:]))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from .mlp import MLP
|
||||
from .nac import NeuralAccumulatorCell, NAC
|
||||
from .nalu import NeuralArithmeticLogicUnitCell, NALU
|
||||
from .nalu_sinh import NeuralArithmeticLogicUnitCellSinh, NALU_sinh
|
||||
from .nac_exact import NeuralAccumulatorCell_exact, NAC_exact
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
import math
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.init as init
|
||||
import torch.nn.functional as F
|
||||
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
|
||||
class NeuralAccumulatorCell_exact(nn.Module):
|
||||
"""A Neural Accumulator (NAC) cell [1].
|
||||
|
||||
Attributes:
|
||||
in_dim: size of the input sample.
|
||||
out_dim: size of the output sample.
|
||||
|
||||
Sources:
|
||||
[1]: https://arxiv.org/abs/1808.00508
|
||||
"""
|
||||
def __init__(self, in_dim, out_dim):
|
||||
super().__init__()
|
||||
self.in_dim = in_dim
|
||||
self.out_dim = out_dim
|
||||
self.gate = nn.Linear(in_dim, out_dim)
|
||||
self.transform = nn.Linear(in_dim, out_dim)
|
||||
|
||||
init.kaiming_uniform_(self.gate.weight, a=math.sqrt(5))
|
||||
init.kaiming_uniform_(self.transform.weight, a=math.sqrt(5))
|
||||
|
||||
def forward(self, input):
|
||||
x = self.transform(input)
|
||||
g = F.sigmoid(self.gate(input))
|
||||
return (1 - g) * x - g * x
|
||||
|
||||
def extra_repr(self):
|
||||
return 'in_dim={}, out_dim={}'.format(
|
||||
self.in_dim, self.out_dim
|
||||
)
|
||||
|
||||
|
||||
class NAC_exact(nn.Module):
|
||||
"""A stack of NAC layers.
|
||||
|
||||
Attributes:
|
||||
num_layers: the number of NAC layers.
|
||||
in_dim: the size of the input sample.
|
||||
hidden_dim: the size of the hidden layers.
|
||||
out_dim: the size of the output.
|
||||
"""
|
||||
def __init__(self, num_layers, in_dim, hidden_dim, out_dim):
|
||||
super().__init__()
|
||||
self.num_layers = num_layers
|
||||
self.in_dim = in_dim
|
||||
self.hidden_dim = hidden_dim
|
||||
self.out_dim = out_dim
|
||||
|
||||
layers = []
|
||||
for i in range(num_layers):
|
||||
layers.append(
|
||||
NeuralAccumulatorCell_exact(
|
||||
hidden_dim if i > 0 else in_dim,
|
||||
hidden_dim if i < num_layers - 1 else out_dim,
|
||||
)
|
||||
)
|
||||
self.model = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.model(x)
|
||||
return out
|
||||
@@ -0,0 +1,89 @@
|
||||
import math
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.init as init
|
||||
import torch.nn.functional as F
|
||||
|
||||
from .nac import NeuralAccumulatorCell
|
||||
from torch.nn.parameter import Parameter
|
||||
|
||||
|
||||
def asinh(x):
|
||||
"""asinh
|
||||
|
||||
https://en.wikipedia.org/wiki/Inverse_hyperbolic_functions#Inverse%20hyperbolic%20sine
|
||||
"""
|
||||
|
||||
return torch.log(x + torch.sqrt(torch.pow(x, 2) + 1))
|
||||
|
||||
def sinh(x):
|
||||
return (torch.exp(x) - torch.exp(-x))/2
|
||||
|
||||
|
||||
class NeuralArithmeticLogicUnitCellSinh(nn.Module):
|
||||
"""A Neural Arithmetic Logic Unit (NALU) cell [1].
|
||||
|
||||
Attributes:
|
||||
in_dim: size of the input sample.
|
||||
out_dim: size of the output sample.
|
||||
|
||||
Sources:
|
||||
[1]: https://arxiv.org/abs/1808.00508
|
||||
"""
|
||||
def __init__(self, in_dim, out_dim):
|
||||
super().__init__()
|
||||
self.in_dim = in_dim
|
||||
self.out_dim = out_dim
|
||||
self.eps = 1e-10
|
||||
|
||||
self.G = Parameter(torch.Tensor(out_dim, in_dim))
|
||||
self.nac = NeuralAccumulatorCell(in_dim, out_dim)
|
||||
self.register_parameter('bias', None)
|
||||
|
||||
init.kaiming_uniform_(self.G, a=math.sqrt(5))
|
||||
|
||||
def forward(self, input):
|
||||
a = self.nac(input)
|
||||
g = F.sigmoid(F.linear(input, self.G, self.bias))
|
||||
add_sub = g * a
|
||||
log_input = asinh(input)
|
||||
m = sinh(self.nac(log_input))
|
||||
mul_div = (1 - g) * m
|
||||
y = add_sub + mul_div
|
||||
return y
|
||||
|
||||
def extra_repr(self):
|
||||
return 'in_dim={}, out_dim={}'.format(
|
||||
self.in_dim, self.out_dim
|
||||
)
|
||||
|
||||
|
||||
class NALU_sinh(nn.Module):
|
||||
"""A stack of NAC layers.
|
||||
|
||||
Attributes:
|
||||
num_layers: the number of NAC layers.
|
||||
in_dim: the size of the input sample.
|
||||
hidden_dim: the size of the hidden layers.
|
||||
out_dim: the size of the output.
|
||||
"""
|
||||
def __init__(self, num_layers, in_dim, hidden_dim, out_dim):
|
||||
super().__init__()
|
||||
self.num_layers = num_layers
|
||||
self.in_dim = in_dim
|
||||
self.hidden_dim = hidden_dim
|
||||
self.out_dim = out_dim
|
||||
|
||||
layers = []
|
||||
for i in range(num_layers):
|
||||
layers.append(
|
||||
NeuralArithmeticLogicUnitCellSinh(
|
||||
hidden_dim if i > 0 else in_dim,
|
||||
hidden_dim if i < num_layers - 1 else out_dim,
|
||||
)
|
||||
)
|
||||
self.model = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.model(x)
|
||||
return out
|
||||
@@ -1,7 +1,7 @@
|
||||
Relu6 None NAC NALU
|
||||
4.408 0.148 100.085 0.614
|
||||
93.842 1.937 93.068 58.833
|
||||
51.517 0.550 100.011 1.448
|
||||
113.406 3.299 49.287 0.644
|
||||
48.252 0.735 100.001 1.401
|
||||
16.141 2.248 98.166 8.952
|
||||
NAC_exact NALU_sinh Relu6 None NAC NALU
|
||||
0.133 0.530 3.846 0.140 0.155 0.139
|
||||
3.642 5.513 87.524 1.774 0.986 10.864
|
||||
1.525 0.444 4.082 0.319 2.889 2.139
|
||||
0.266 0.796 4.337 0.341 2.002 1.547
|
||||
1.127 1.100 92.235 0.763 4.867 0.852
|
||||
0.951 0.798 85.603 0.549 4.589 0.511
|
||||
|
||||
Reference in New Issue
Block a user