From e69afab7d69826f0f2936547d54cb2569ac871c4 Mon Sep 17 00:00:00 2001 From: wassname Date: Fri, 10 Aug 2018 19:11:21 +0800 Subject: [PATCH] trying asinh --- README.md | 18 ++++++++ function_learning.py | 25 ++++++++--- models/__init__.py | 2 + models/nac_exact.py | 69 ++++++++++++++++++++++++++++++ models/nalu_sinh.py | 89 +++++++++++++++++++++++++++++++++++++++ results/interpolation.txt | 14 +++--- 6 files changed, 205 insertions(+), 12 deletions(-) create mode 100644 models/nac_exact.py create mode 100644 models/nalu_sinh.py diff --git a/README.md b/README.md index b760d56..9127bfe 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,18 @@ +Comparing Neural Arithmetic Logic Units with exact and asinh versions. + + +| | NAC_exact | NALU_sinh | Relu6 | None | NAC | NALU | +| :------ | :-------- | :-------- | :----- | :---- | :---- | :----- | +| a + b | 0.133 | 0.530 | 3.846 | 0.140 | 0.155 | 0.139 | +| a - b | 3.642 | 5.513 | 87.524 | 1.774 | 0.986 | 10.864 | +| a * b | 1.525 | 0.444 | 4.082 | 0.319 | 2.889 | 2.139 | +| a / b | 0.266 | 0.796 | 4.337 | 0.341 | 2.002 | 1.547 | +| a ^ 2 | 1.127 | 1.100 | 92.235 | 0.763 | 4.867 | 0.852 | +| sqrt(a) | 0.951 | 0.798 | 85.603 | 0.549 | 4.589 | 0.511 | + + + + # Neural Arithmetic Logic Units [WIP] @@ -43,6 +58,8 @@ python function_learning.py ``` This should generate a text file called `interpolation.txt` with the following results. (Currently only supports interpolation, I'm working on the rest) + + | | Relu6 | None | NAC | NALU | |---------|----------|----------|----------|--------| | a + b | 4.472 | 0.132 | 0.154 | 0.157 | @@ -51,3 +68,4 @@ This should generate a text file called `interpolation.txt` with the following r | a / b | 97.070 | 60.594 | 5.730 | 3.042 | | a ^ 2 | 89.987 | 2.977 | 4.718 | 1.117 | | sqrt(a) | 5.939 | 40.243 | 7.263 | 1.119 | + diff --git a/function_learning.py b/function_learning.py index 3e0e2ba..4cdf364 100644 --- a/function_learning.py +++ b/function_learning.py @@ -1,13 +1,14 @@ import math import random import numpy as np +import pandas as pd import matplotlib.pyplot as plt import torch import torch.nn as nn import torch.nn.functional as F -from models import MLP, NAC, NALU +from models import MLP, NAC, NALU, NALU_sinh, NAC_exact NORMALIZE = True NUM_LAYERS = 2 @@ -68,6 +69,18 @@ def main(): save_dir = './results/' models = [ + NAC_exact( + num_layers=NUM_LAYERS, + in_dim=2, + hidden_dim=HIDDEN_DIM, + out_dim=1, + ), + NALU_sinh( + num_layers=NUM_LAYERS, + in_dim=2, + hidden_dim=HIDDEN_DIM, + out_dim=1 + ), MLP( num_layers=NUM_LAYERS, in_dim=2, @@ -99,6 +112,7 @@ def main(): results = {} for fn_str, fn in ARITHMETIC_FUNCTIONS.items(): results[fn_str] = [] + print("running", fn_str) # dataset X_train, y_train, X_test, y_test = generate_data( @@ -125,16 +139,17 @@ def main(): train(net, optim, X_train, y_train, NUM_ITERS) mse = test(net, X_test, y_test).mean().item() results[fn_str].append(mse) + print("mse", mse) with open(save_dir + "interpolation.txt", "w") as f: - f.write("Relu6\tNone\tNAC\tNALU\n") + f.write("NAC_exact\tNALU_sinh\tRelu6\tNone\tNAC\tNALU\n") for k, v in results.items(): rand = results[k][0] - mses = [100.0*x/rand for x in results[k][1:]] + mses = [100.0 * x / rand for x in results[k][1:]] if NORMALIZE: - f.write("{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\n".format(*mses)) + f.write(("\t".join(["{:.3f}"]*len(mses))+"\n").format(*mses)) else: - f.write("{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\n".format(*results[k][1:])) + f.write(("\t".join(["{:.3f}"]*len(mses))+"\n").format(*results[k][1:])) if __name__ == '__main__': diff --git a/models/__init__.py b/models/__init__.py index f136986..269bdba 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -1,3 +1,5 @@ from .mlp import MLP from .nac import NeuralAccumulatorCell, NAC from .nalu import NeuralArithmeticLogicUnitCell, NALU +from .nalu_sinh import NeuralArithmeticLogicUnitCellSinh, NALU_sinh +from .nac_exact import NeuralAccumulatorCell_exact, NAC_exact diff --git a/models/nac_exact.py b/models/nac_exact.py new file mode 100644 index 0000000..b740360 --- /dev/null +++ b/models/nac_exact.py @@ -0,0 +1,69 @@ +import math +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F + +from torch.nn.parameter import Parameter + + +class NeuralAccumulatorCell_exact(nn.Module): + """A Neural Accumulator (NAC) cell [1]. + + Attributes: + in_dim: size of the input sample. + out_dim: size of the output sample. + + Sources: + [1]: https://arxiv.org/abs/1808.00508 + """ + def __init__(self, in_dim, out_dim): + super().__init__() + self.in_dim = in_dim + self.out_dim = out_dim + self.gate = nn.Linear(in_dim, out_dim) + self.transform = nn.Linear(in_dim, out_dim) + + init.kaiming_uniform_(self.gate.weight, a=math.sqrt(5)) + init.kaiming_uniform_(self.transform.weight, a=math.sqrt(5)) + + def forward(self, input): + x = self.transform(input) + g = F.sigmoid(self.gate(input)) + return (1 - g) * x - g * x + + def extra_repr(self): + return 'in_dim={}, out_dim={}'.format( + self.in_dim, self.out_dim + ) + + +class NAC_exact(nn.Module): + """A stack of NAC layers. + + Attributes: + num_layers: the number of NAC layers. + in_dim: the size of the input sample. + hidden_dim: the size of the hidden layers. + out_dim: the size of the output. + """ + def __init__(self, num_layers, in_dim, hidden_dim, out_dim): + super().__init__() + self.num_layers = num_layers + self.in_dim = in_dim + self.hidden_dim = hidden_dim + self.out_dim = out_dim + + layers = [] + for i in range(num_layers): + layers.append( + NeuralAccumulatorCell_exact( + hidden_dim if i > 0 else in_dim, + hidden_dim if i < num_layers - 1 else out_dim, + ) + ) + self.model = nn.Sequential(*layers) + + def forward(self, x): + out = self.model(x) + return out diff --git a/models/nalu_sinh.py b/models/nalu_sinh.py new file mode 100644 index 0000000..0bca728 --- /dev/null +++ b/models/nalu_sinh.py @@ -0,0 +1,89 @@ +import math +import torch +import torch.nn as nn +import torch.nn.init as init +import torch.nn.functional as F + +from .nac import NeuralAccumulatorCell +from torch.nn.parameter import Parameter + + +def asinh(x): + """asinh + + https://en.wikipedia.org/wiki/Inverse_hyperbolic_functions#Inverse%20hyperbolic%20sine + """ + + return torch.log(x + torch.sqrt(torch.pow(x, 2) + 1)) + +def sinh(x): + return (torch.exp(x) - torch.exp(-x))/2 + + +class NeuralArithmeticLogicUnitCellSinh(nn.Module): + """A Neural Arithmetic Logic Unit (NALU) cell [1]. + + Attributes: + in_dim: size of the input sample. + out_dim: size of the output sample. + + Sources: + [1]: https://arxiv.org/abs/1808.00508 + """ + def __init__(self, in_dim, out_dim): + super().__init__() + self.in_dim = in_dim + self.out_dim = out_dim + self.eps = 1e-10 + + self.G = Parameter(torch.Tensor(out_dim, in_dim)) + self.nac = NeuralAccumulatorCell(in_dim, out_dim) + self.register_parameter('bias', None) + + init.kaiming_uniform_(self.G, a=math.sqrt(5)) + + def forward(self, input): + a = self.nac(input) + g = F.sigmoid(F.linear(input, self.G, self.bias)) + add_sub = g * a + log_input = asinh(input) + m = sinh(self.nac(log_input)) + mul_div = (1 - g) * m + y = add_sub + mul_div + return y + + def extra_repr(self): + return 'in_dim={}, out_dim={}'.format( + self.in_dim, self.out_dim + ) + + +class NALU_sinh(nn.Module): + """A stack of NAC layers. + + Attributes: + num_layers: the number of NAC layers. + in_dim: the size of the input sample. + hidden_dim: the size of the hidden layers. + out_dim: the size of the output. + """ + def __init__(self, num_layers, in_dim, hidden_dim, out_dim): + super().__init__() + self.num_layers = num_layers + self.in_dim = in_dim + self.hidden_dim = hidden_dim + self.out_dim = out_dim + + layers = [] + for i in range(num_layers): + layers.append( + NeuralArithmeticLogicUnitCellSinh( + hidden_dim if i > 0 else in_dim, + hidden_dim if i < num_layers - 1 else out_dim, + ) + ) + self.model = nn.Sequential(*layers) + + def forward(self, x): + out = self.model(x) + return out diff --git a/results/interpolation.txt b/results/interpolation.txt index 1f6317b..ed58072 100644 --- a/results/interpolation.txt +++ b/results/interpolation.txt @@ -1,7 +1,7 @@ -Relu6 None NAC NALU -4.408 0.148 100.085 0.614 -93.842 1.937 93.068 58.833 -51.517 0.550 100.011 1.448 -113.406 3.299 49.287 0.644 -48.252 0.735 100.001 1.401 -16.141 2.248 98.166 8.952 +NAC_exact NALU_sinh Relu6 None NAC NALU +0.133 0.530 3.846 0.140 0.155 0.139 +3.642 5.513 87.524 1.774 0.986 10.864 +1.525 0.444 4.082 0.319 2.889 2.139 +0.266 0.796 4.337 0.341 2.002 1.547 +1.127 1.100 92.235 0.763 4.867 0.852 +0.951 0.798 85.603 0.549 4.589 0.511