From e69afab7d69826f0f2936547d54cb2569ac871c4 Mon Sep 17 00:00:00 2001
From: wassname <wassname@users.noreply.github.com>
Date: Fri, 10 Aug 2018 19:11:21 +0800
Subject: [PATCH] trying asinh

---
 README.md                 | 18 ++++++++
 function_learning.py      | 25 ++++++++---
 models/__init__.py        |  2 +
 models/nac_exact.py       | 69 ++++++++++++++++++++++++++++++
 models/nalu_sinh.py       | 89 +++++++++++++++++++++++++++++++++++++++
 results/interpolation.txt | 14 +++---
 6 files changed, 205 insertions(+), 12 deletions(-)
 create mode 100644 models/nac_exact.py
 create mode 100644 models/nalu_sinh.py

diff --git a/README.md b/README.md
index b760d56..9127bfe 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,18 @@
+Comparing Neural Arithmetic Logic Units with exact and asinh versions.
+
+
+|         | NAC_exact | NALU_sinh | Relu6  | None  | NAC   | NALU   |
+| :------ | :-------- | :-------- | :----- | :---- | :---- | :----- |
+| a + b   | 0.133     | 0.530     | 3.846  | 0.140 | 0.155 | 0.139  |
+| a - b   | 3.642     | 5.513     | 87.524 | 1.774 | 0.986 | 10.864 |
+| a * b   | 1.525     | 0.444     | 4.082  | 0.319 | 2.889 | 2.139  |
+| a / b   | 0.266     | 0.796     | 4.337  | 0.341 | 2.002 | 1.547  |
+| a ^ 2   | 1.127     | 1.100     | 92.235 | 0.763 | 4.867 | 0.852  |
+| sqrt(a) | 0.951     | 0.798     | 85.603 | 0.549 | 4.589 | 0.511  |
+
+
+
+
 # Neural Arithmetic Logic Units
 
 [WIP]
@@ -43,6 +58,8 @@ python function_learning.py
 ```
 This should generate a text file called `interpolation.txt` with the following results. (Currently only supports interpolation, I'm working on the rest)
 
+
+
 |         | Relu6    | None     | NAC      | NALU   |
 |---------|----------|----------|----------|--------|
 | a + b   | 4.472    | 0.132    | 0.154    | 0.157  |
@@ -51,3 +68,4 @@ This should generate a text file called `interpolation.txt` with the following r
 | a / b   | 97.070   | 60.594   | 5.730    | 3.042  |
 | a ^ 2   | 89.987   | 2.977    | 4.718    | 1.117  |
 | sqrt(a) | 5.939    | 40.243   | 7.263    | 1.119  |
+
diff --git a/function_learning.py b/function_learning.py
index 3e0e2ba..4cdf364 100644
--- a/function_learning.py
+++ b/function_learning.py
@@ -1,13 +1,14 @@
 import math
 import random
 import numpy as np
+import pandas as pd
 import matplotlib.pyplot as plt
 
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
-from models import MLP, NAC, NALU
+from models import MLP, NAC, NALU, NALU_sinh, NAC_exact
 
 NORMALIZE = True
 NUM_LAYERS = 2
@@ -68,6 +69,18 @@ def main():
     save_dir = './results/'
 
     models = [
+        NAC_exact(
+            num_layers=NUM_LAYERS,
+            in_dim=2,
+            hidden_dim=HIDDEN_DIM,
+            out_dim=1,
+        ),
+        NALU_sinh(
+            num_layers=NUM_LAYERS,
+            in_dim=2,
+            hidden_dim=HIDDEN_DIM,
+            out_dim=1
+        ),
         MLP(
             num_layers=NUM_LAYERS,
             in_dim=2,
@@ -99,6 +112,7 @@ def main():
     results = {}
     for fn_str, fn in ARITHMETIC_FUNCTIONS.items():
         results[fn_str] = []
+        print("running", fn_str)
 
         # dataset
         X_train, y_train, X_test, y_test = generate_data(
@@ -125,16 +139,17 @@ def main():
             train(net, optim, X_train, y_train, NUM_ITERS)
             mse = test(net, X_test, y_test).mean().item()
             results[fn_str].append(mse)
+        print("mse", mse)
 
     with open(save_dir + "interpolation.txt", "w") as f:
-        f.write("Relu6\tNone\tNAC\tNALU\n")
+        f.write("NAC_exact\tNALU_sinh\tRelu6\tNone\tNAC\tNALU\n")
         for k, v in results.items():
             rand = results[k][0]
-            mses = [100.0*x/rand for x in results[k][1:]]
+            mses = [100.0 * x / rand for x in results[k][1:]]
             if NORMALIZE:
-                f.write("{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\n".format(*mses))
+                f.write(("\t".join(["{:.3f}"]*len(mses))+"\n").format(*mses))
             else:
-                f.write("{:.3f}\t{:.3f}\t{:.3f}\t{:.3f}\n".format(*results[k][1:]))
+                f.write(("\t".join(["{:.3f}"]*len(mses))+"\n").format(*results[k][1:]))
 
 
 if __name__ == '__main__':
diff --git a/models/__init__.py b/models/__init__.py
index f136986..269bdba 100644
--- a/models/__init__.py
+++ b/models/__init__.py
@@ -1,3 +1,5 @@
 from .mlp import MLP
 from .nac import NeuralAccumulatorCell, NAC
 from .nalu import NeuralArithmeticLogicUnitCell, NALU
+from .nalu_sinh import NeuralArithmeticLogicUnitCellSinh, NALU_sinh
+from .nac_exact import NeuralAccumulatorCell_exact, NAC_exact
diff --git a/models/nac_exact.py b/models/nac_exact.py
new file mode 100644
index 0000000..b740360
--- /dev/null
+++ b/models/nac_exact.py
@@ -0,0 +1,69 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.init as init
+import torch.nn.functional as F
+
+from torch.nn.parameter import Parameter
+
+
+class NeuralAccumulatorCell_exact(nn.Module):
+    """A Neural Accumulator (NAC) cell [1].
+
+    Attributes:
+        in_dim: size of the input sample.
+        out_dim: size of the output sample.
+
+    Sources:
+        [1]: https://arxiv.org/abs/1808.00508
+    """
+    def __init__(self, in_dim, out_dim):
+        super().__init__()
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.gate = nn.Linear(in_dim, out_dim)
+        self.transform = nn.Linear(in_dim, out_dim)
+
+        init.kaiming_uniform_(self.gate.weight, a=math.sqrt(5))
+        init.kaiming_uniform_(self.transform.weight, a=math.sqrt(5))
+
+    def forward(self, input):
+        x = self.transform(input)
+        g = F.sigmoid(self.gate(input))
+        return (1 - g) * x - g * x
+
+    def extra_repr(self):
+        return 'in_dim={}, out_dim={}'.format(
+            self.in_dim, self.out_dim
+        )
+
+
+class NAC_exact(nn.Module):
+    """A stack of NAC layers.
+
+    Attributes:
+        num_layers: the number of NAC layers.
+        in_dim: the size of the input sample.
+        hidden_dim: the size of the hidden layers.
+        out_dim: the size of the output.
+    """
+    def __init__(self, num_layers, in_dim, hidden_dim, out_dim):
+        super().__init__()
+        self.num_layers = num_layers
+        self.in_dim = in_dim
+        self.hidden_dim = hidden_dim
+        self.out_dim = out_dim
+
+        layers = []
+        for i in range(num_layers):
+            layers.append(
+                NeuralAccumulatorCell_exact(
+                    hidden_dim if i > 0 else in_dim,
+                    hidden_dim if i < num_layers - 1 else out_dim,
+                )
+            )
+        self.model = nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = self.model(x)
+        return out
diff --git a/models/nalu_sinh.py b/models/nalu_sinh.py
new file mode 100644
index 0000000..0bca728
--- /dev/null
+++ b/models/nalu_sinh.py
@@ -0,0 +1,89 @@
+import math
+import torch
+import torch.nn as nn
+import torch.nn.init as init
+import torch.nn.functional as F
+
+from .nac import NeuralAccumulatorCell
+from torch.nn.parameter import Parameter
+
+
+def asinh(x):
+    """asinh
+
+    https://en.wikipedia.org/wiki/Inverse_hyperbolic_functions#Inverse%20hyperbolic%20sine
+    """
+
+    return torch.log(x + torch.sqrt(torch.pow(x, 2) + 1))
+
+def sinh(x):
+    return (torch.exp(x) - torch.exp(-x))/2
+
+
+class NeuralArithmeticLogicUnitCellSinh(nn.Module):
+    """A Neural Arithmetic Logic Unit (NALU) cell [1].
+
+    Attributes:
+        in_dim: size of the input sample.
+        out_dim: size of the output sample.
+
+    Sources:
+        [1]: https://arxiv.org/abs/1808.00508
+    """
+    def __init__(self, in_dim, out_dim):
+        super().__init__()
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.eps = 1e-10
+
+        self.G = Parameter(torch.Tensor(out_dim, in_dim))
+        self.nac = NeuralAccumulatorCell(in_dim, out_dim)
+        self.register_parameter('bias', None)
+
+        init.kaiming_uniform_(self.G, a=math.sqrt(5))
+
+    def forward(self, input):
+        a = self.nac(input)
+        g = F.sigmoid(F.linear(input, self.G, self.bias))
+        add_sub = g * a
+        log_input = asinh(input)
+        m = sinh(self.nac(log_input))
+        mul_div = (1 - g) * m
+        y = add_sub + mul_div
+        return y
+
+    def extra_repr(self):
+        return 'in_dim={}, out_dim={}'.format(
+            self.in_dim, self.out_dim
+        )
+
+
+class NALU_sinh(nn.Module):
+    """A stack of NAC layers.
+
+    Attributes:
+        num_layers: the number of NAC layers.
+        in_dim: the size of the input sample.
+        hidden_dim: the size of the hidden layers.
+        out_dim: the size of the output.
+    """
+    def __init__(self, num_layers, in_dim, hidden_dim, out_dim):
+        super().__init__()
+        self.num_layers = num_layers
+        self.in_dim = in_dim
+        self.hidden_dim = hidden_dim
+        self.out_dim = out_dim
+
+        layers = []
+        for i in range(num_layers):
+            layers.append(
+                NeuralArithmeticLogicUnitCellSinh(
+                    hidden_dim if i > 0 else in_dim,
+                    hidden_dim if i < num_layers - 1 else out_dim,
+                )
+            )
+        self.model = nn.Sequential(*layers)
+
+    def forward(self, x):
+        out = self.model(x)
+        return out
diff --git a/results/interpolation.txt b/results/interpolation.txt
index 1f6317b..ed58072 100644
--- a/results/interpolation.txt
+++ b/results/interpolation.txt
@@ -1,7 +1,7 @@
-Relu6	None	NAC	NALU
-4.408	0.148	100.085	0.614
-93.842	1.937	93.068	58.833
-51.517	0.550	100.011	1.448
-113.406	3.299	49.287	0.644
-48.252	0.735	100.001	1.401
-16.141	2.248	98.166	8.952
+NAC_exact	NALU_sinh	Relu6	None	NAC	NALU
+0.133	0.530	3.846	0.140	0.155	0.139
+3.642	5.513	87.524	1.774	0.986	10.864
+1.525	0.444	4.082	0.319	2.889	2.139
+0.266	0.796	4.337	0.341	2.002	1.547
+1.127	1.100	92.235	0.763	4.867	0.852
+0.951	0.798	85.603	0.549	4.589	0.511