From b10e9d795f1f75c720c076ecbc89df838b0b5a55 Mon Sep 17 00:00:00 2001
From: Michael Oliver <michael.d.oliver@gmail.com>
Date: Wed, 22 Feb 2017 11:05:37 -0800
Subject: [PATCH 1/5] Add cosine normalized dense

---
 keras_contrib/layers/core.py            | 153 ++++++++++++++++++++++++
 tests/keras_contrib/layers/test_core.py |  55 +++++++++
 2 files changed, 208 insertions(+)

diff --git a/keras_contrib/layers/core.py b/keras_contrib/layers/core.py
index 7e2b19c..2a66edf 100644
--- a/keras_contrib/layers/core.py
+++ b/keras_contrib/layers/core.py
@@ -21,3 +21,156 @@ from keras.engine import Merge
 from keras.utils.generic_utils import func_dump
 from keras.utils.generic_utils import func_load
 from keras.utils.generic_utils import get_from_module
+
+
+class CosineDense(Layer):
+    """A cosine normalized densely-connected NN layer
+    Cosine Normalization: Using Cosine Similarity Instead of Dot Product in Neural Networks
+    https://arxiv.org/pdf/1702.05870.pdf
+
+    # Example
+
+    ```python
+        # as first layer in a sequential model:
+        model = Sequential()
+        model.add(CosineDense(32, input_dim=16))
+        # now the model will take as input arrays of shape (*, 16)
+        # and output arrays of shape (*, 32)
+
+        # this is equivalent to the above:
+        model = Sequential()
+        model.add(CosineDense(32, input_shape=(16,)))
+
+        # after the first layer, you don't need to specify
+        # the size of the input anymore:
+        model.add(CosineDense(32))
+    ```
+
+    # Arguments
+        output_dim: int > 0.
+        init: name of initialization function for the weights of the layer
+            (see [initializations](../initializations.md)),
+            or alternatively, Theano function to use for weights
+            initialization. This parameter is only relevant
+            if you don't pass a `weights` argument.
+        activation: name of activation function to use
+            (see [activations](../activations.md)),
+            or alternatively, elementwise Theano function.
+            If you don't specify anything, no activation is applied
+            (ie. "linear" activation: a(x) = x).
+        weights: list of Numpy arrays to set as initial weights.
+            The list should have 2 elements, of shape `(input_dim, output_dim)`
+            and (output_dim,) for weights and biases respectively.
+        W_regularizer: instance of [WeightRegularizer](../regularizers.md)
+            (eg. L1 or L2 regularization), applied to the main weights matrix.
+        b_regularizer: instance of [WeightRegularizer](../regularizers.md),
+            applied to the bias.
+        activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
+            applied to the network output.
+        W_constraint: instance of the [constraints](../constraints.md) module
+            (eg. maxnorm, nonneg), applied to the main weights matrix.
+        b_constraint: instance of the [constraints](../constraints.md) module,
+            applied to the bias.
+        bias: whether to include a bias
+            (i.e. make the layer affine rather than linear).
+        input_dim: dimensionality of the input (integer). This argument
+            (or alternatively, the keyword argument `input_shape`)
+            is required when using this layer as the first layer in a model.
+
+    # Input shape
+        nD tensor with shape: `(nb_samples, ..., input_dim)`.
+        The most common situation would be
+        a 2D input with shape `(nb_samples, input_dim)`.
+
+    # Output shape
+        nD tensor with shape: `(nb_samples, ..., output_dim)`.
+        For instance, for a 2D input with shape `(nb_samples, input_dim)`,
+        the output would have shape `(nb_samples, output_dim)`.
+    """
+
+    def __init__(self, output_dim, init='glorot_uniform',
+                 activation=None, weights=None,
+                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
+                 W_constraint=None, b_constraint=None,
+                 bias=True, input_dim=None, **kwargs):
+        self.init = initializations.get(init)
+        self.activation = activations.get(activation)
+        self.output_dim = output_dim
+        self.input_dim = input_dim
+
+        self.W_regularizer = regularizers.get(W_regularizer)
+        self.b_regularizer = regularizers.get(b_regularizer)
+        self.activity_regularizer = regularizers.get(activity_regularizer)
+
+        self.W_constraint = constraints.get(W_constraint)
+        self.b_constraint = constraints.get(b_constraint)
+
+        self.bias = bias
+        self.initial_weights = weights
+        self.input_spec = [InputSpec(ndim='2+')]
+
+        if self.input_dim:
+            kwargs['input_shape'] = (self.input_dim,)
+        super(CosineDense, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        assert len(input_shape) >= 2
+        input_dim = input_shape[-1]
+        self.input_dim = input_dim
+        self.input_spec = [InputSpec(dtype=K.floatx(),
+                                     ndim='2+')]
+
+        self.W = self.add_weight((input_dim, self.output_dim),
+                                 initializer=self.init,
+                                 name='{}_W'.format(self.name),
+                                 regularizer=self.W_regularizer,
+                                 constraint=self.W_constraint)
+        if self.bias:
+            self.b = self.add_weight((self.output_dim,),
+                                     initializer='zero',
+                                     name='{}_b'.format(self.name),
+                                     regularizer=self.b_regularizer,
+                                     constraint=self.b_constraint)
+        else:
+            self.b = None
+
+        if self.initial_weights is not None:
+            self.set_weights(self.initial_weights)
+            del self.initial_weights
+        self.built = True
+
+    def call(self, x, mask=None):
+        if self.bias:
+            xnorm = K.sqrt(K.sum(K.square(x), axis=1, keepdims=True) + 1 + K.epsilon())
+            x /= xnorm
+            Wnorm = K.sqrt(K.sum(K.square(self.W), axis=0) + K.square(self.b) + K.epsilon())
+        else:
+            x /= K.sqrt(K.sum(K.square(x), axis=1, keepdims=True) + K.epsilon())
+            Wnorm = K.sqrt(K.sum(K.square(self.W), axis=0) + K.epsilon())
+
+        W = self.W / Wnorm
+        output = K.dot(x, W)
+        if self.bias:
+            output += (self.b / (xnorm*Wnorm))
+        return self.activation(output)
+
+    def get_output_shape_for(self, input_shape):
+        assert input_shape and len(input_shape) >= 2
+        assert input_shape[-1] and input_shape[-1] == self.input_dim
+        output_shape = list(input_shape)
+        output_shape[-1] = self.output_dim
+        return tuple(output_shape)
+
+    def get_config(self):
+        config = {'output_dim': self.output_dim,
+                  'init': self.init.__name__,
+                  'activation': self.activation.__name__,
+                  'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
+                  'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
+                  'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
+                  'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
+                  'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
+                  'bias': self.bias,
+                  'input_dim': self.input_dim}
+        base_config = super(CosineDense, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
diff --git a/tests/keras_contrib/layers/test_core.py b/tests/keras_contrib/layers/test_core.py
index fa517a4..a8e703d 100644
--- a/tests/keras_contrib/layers/test_core.py
+++ b/tests/keras_contrib/layers/test_core.py
@@ -5,6 +5,61 @@ from keras import backend as K
 from keras_contrib import backend as KC
 from keras_contrib.layers import core
 from keras.utils.test_utils import layer_test, keras_test
+from numpy.testing import assert_allclose
+
+
+@keras_test
+def test_cosinedense():
+    from keras import regularizers
+    from keras import constraints
+    from keras.models import Sequential
+
+    layer_test(core.CosineDense,
+               kwargs={'output_dim': 3},
+               input_shape=(3, 2))
+
+    layer_test(core.CosineDense,
+               kwargs={'output_dim': 3},
+               input_shape=(3, 4, 2))
+
+    layer_test(core.CosineDense,
+               kwargs={'output_dim': 3},
+               input_shape=(None, None, 2))
+
+    layer_test(core.CosineDense,
+               kwargs={'output_dim': 3},
+               input_shape=(3, 4, 5, 2))
+
+    layer_test(core.CosineDense,
+               kwargs={'output_dim': 3,
+                       'W_regularizer': regularizers.l2(0.01),
+                       'b_regularizer': regularizers.l1(0.01),
+                       'activity_regularizer': regularizers.activity_l2(0.01),
+                       'W_constraint': constraints.MaxNorm(1),
+                       'b_constraint': constraints.MaxNorm(1)},
+               input_shape=(3, 2))
+
+    X = np.random.randn(1, 20)
+    model = Sequential()
+    model.add(core.CosineDense(1, bias=True, input_shape=(20,)))
+    model.compile(loss='mse', optimizer='rmsprop')
+    W = model.get_weights()
+    W[0] = X.T
+    W[1] = np.asarray([1.])
+    model.set_weights(W)
+    out = model.predict(X)
+    assert_allclose(out, np.ones((1, 1), dtype=K.floatx()))
+
+    X = np.random.randn(1, 20)
+    model = Sequential()
+    model.add(core.CosineDense(1, bias=False, input_shape=(20,)))
+    model.compile(loss='mse', optimizer='rmsprop')
+    W = model.get_weights()
+    W[0] = -X.T
+    model.set_weights(W)
+    out = model.predict(X)
+    assert_allclose(out, -np.ones((1, 1), dtype=K.floatx()))
+
 
 
 if __name__ == '__main__':

From 82b92142cf1975e7f0d653223a1b4bed395d0f65 Mon Sep 17 00:00:00 2001
From: Michael Oliver <michael.d.oliver@gmail.com>
Date: Wed, 22 Feb 2017 11:26:36 -0800
Subject: [PATCH 2/5] fixes and doc updates

---
 keras_contrib/layers/core.py            | 4 +++-
 tests/keras_contrib/layers/test_core.py | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/keras_contrib/layers/core.py b/keras_contrib/layers/core.py
index 2a66edf..4048a90 100644
--- a/keras_contrib/layers/core.py
+++ b/keras_contrib/layers/core.py
@@ -44,6 +44,8 @@ class CosineDense(Layer):
         # after the first layer, you don't need to specify
         # the size of the input anymore:
         model.add(CosineDense(32))
+
+        **Note that a regular Dense layer may work better as the final layer
     ```
 
     # Arguments
@@ -151,7 +153,7 @@ class CosineDense(Layer):
         W = self.W / Wnorm
         output = K.dot(x, W)
         if self.bias:
-            output += (self.b / (xnorm*Wnorm))
+            output += (self.b / (xnorm * Wnorm))
         return self.activation(output)
 
     def get_output_shape_for(self, input_shape):
diff --git a/tests/keras_contrib/layers/test_core.py b/tests/keras_contrib/layers/test_core.py
index a8e703d..63c6734 100644
--- a/tests/keras_contrib/layers/test_core.py
+++ b/tests/keras_contrib/layers/test_core.py
@@ -61,6 +61,5 @@ def test_cosinedense():
     assert_allclose(out, -np.ones((1, 1), dtype=K.floatx()))
 
 
-
 if __name__ == '__main__':
     pytest.main([__file__])

From ac44e1a19cb3fccb769dbc22b5cf93a0f29228e3 Mon Sep 17 00:00:00 2001
From: Michael Oliver <michael.d.oliver@gmail.com>
Date: Wed, 22 Feb 2017 11:33:43 -0800
Subject: [PATCH 3/5] fix serialization

---
 keras_contrib/layers/core.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/keras_contrib/layers/core.py b/keras_contrib/layers/core.py
index 4048a90..47de531 100644
--- a/keras_contrib/layers/core.py
+++ b/keras_contrib/layers/core.py
@@ -21,6 +21,7 @@ from keras.engine import Merge
 from keras.utils.generic_utils import func_dump
 from keras.utils.generic_utils import func_load
 from keras.utils.generic_utils import get_from_module
+from keras.utils.generic_utils import get_custom_objects
 
 
 class CosineDense(Layer):
@@ -176,3 +177,6 @@ class CosineDense(Layer):
                   'input_dim': self.input_dim}
         base_config = super(CosineDense, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
+
+
+get_custom_objects().update({"CosineDense": CosineDense})

From 000927f7c95776d90ff281a7edab7034a8a46251 Mon Sep 17 00:00:00 2001
From: Michael Oliver <michael.d.oliver@gmail.com>
Date: Wed, 22 Feb 2017 11:45:42 -0800
Subject: [PATCH 4/5] fix >2D case

---
 keras_contrib/layers/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/keras_contrib/layers/core.py b/keras_contrib/layers/core.py
index 47de531..bc142a5 100644
--- a/keras_contrib/layers/core.py
+++ b/keras_contrib/layers/core.py
@@ -144,11 +144,11 @@ class CosineDense(Layer):
 
     def call(self, x, mask=None):
         if self.bias:
-            xnorm = K.sqrt(K.sum(K.square(x), axis=1, keepdims=True) + 1 + K.epsilon())
+            xnorm = K.sqrt(K.sum(K.square(x), axis=-1, keepdims=True) + 1 + K.epsilon())
             x /= xnorm
             Wnorm = K.sqrt(K.sum(K.square(self.W), axis=0) + K.square(self.b) + K.epsilon())
         else:
-            x /= K.sqrt(K.sum(K.square(x), axis=1, keepdims=True) + K.epsilon())
+            x /= K.sqrt(K.sum(K.square(x), axis=-1, keepdims=True) + K.epsilon())
             Wnorm = K.sqrt(K.sum(K.square(self.W), axis=0) + K.epsilon())
 
         W = self.W / Wnorm

From 538b9cd45ac33700a00b87e703255a573e9503d8 Mon Sep 17 00:00:00 2001
From: Michael Oliver <michael.d.oliver@gmail.com>
Date: Wed, 22 Feb 2017 11:59:38 -0800
Subject: [PATCH 5/5] change atol on allclose

---
 tests/keras_contrib/layers/test_core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/keras_contrib/layers/test_core.py b/tests/keras_contrib/layers/test_core.py
index 63c6734..64f4b4d 100644
--- a/tests/keras_contrib/layers/test_core.py
+++ b/tests/keras_contrib/layers/test_core.py
@@ -48,7 +48,7 @@ def test_cosinedense():
     W[1] = np.asarray([1.])
     model.set_weights(W)
     out = model.predict(X)
-    assert_allclose(out, np.ones((1, 1), dtype=K.floatx()))
+    assert_allclose(out, np.ones((1, 1), dtype=K.floatx()), atol=1e-5)
 
     X = np.random.randn(1, 20)
     model = Sequential()
@@ -58,7 +58,7 @@ def test_cosinedense():
     W[0] = -X.T
     model.set_weights(W)
     out = model.predict(X)
-    assert_allclose(out, -np.ones((1, 1), dtype=K.floatx()))
+    assert_allclose(out, -np.ones((1, 1), dtype=K.floatx()), atol=1e-5)
 
 
 if __name__ == '__main__':