From b10e9d795f1f75c720c076ecbc89df838b0b5a55 Mon Sep 17 00:00:00 2001 From: Michael Oliver Date: Wed, 22 Feb 2017 11:05:37 -0800 Subject: [PATCH 1/5] Add cosine normalized dense --- keras_contrib/layers/core.py | 153 ++++++++++++++++++++++++ tests/keras_contrib/layers/test_core.py | 55 +++++++++ 2 files changed, 208 insertions(+) diff --git a/keras_contrib/layers/core.py b/keras_contrib/layers/core.py index 7e2b19c..2a66edf 100644 --- a/keras_contrib/layers/core.py +++ b/keras_contrib/layers/core.py @@ -21,3 +21,156 @@ from keras.engine import Merge from keras.utils.generic_utils import func_dump from keras.utils.generic_utils import func_load from keras.utils.generic_utils import get_from_module + + +class CosineDense(Layer): + """A cosine normalized densely-connected NN layer + Cosine Normalization: Using Cosine Similarity Instead of Dot Product in Neural Networks + https://arxiv.org/pdf/1702.05870.pdf + + # Example + + ```python + # as first layer in a sequential model: + model = Sequential() + model.add(CosineDense(32, input_dim=16)) + # now the model will take as input arrays of shape (*, 16) + # and output arrays of shape (*, 32) + + # this is equivalent to the above: + model = Sequential() + model.add(CosineDense(32, input_shape=(16,))) + + # after the first layer, you don't need to specify + # the size of the input anymore: + model.add(CosineDense(32)) + ``` + + # Arguments + output_dim: int > 0. + init: name of initialization function for the weights of the layer + (see [initializations](../initializations.md)), + or alternatively, Theano function to use for weights + initialization. This parameter is only relevant + if you don't pass a `weights` argument. + activation: name of activation function to use + (see [activations](../activations.md)), + or alternatively, elementwise Theano function. + If you don't specify anything, no activation is applied + (ie. "linear" activation: a(x) = x). + weights: list of Numpy arrays to set as initial weights. + The list should have 2 elements, of shape `(input_dim, output_dim)` + and (output_dim,) for weights and biases respectively. + W_regularizer: instance of [WeightRegularizer](../regularizers.md) + (eg. L1 or L2 regularization), applied to the main weights matrix. + b_regularizer: instance of [WeightRegularizer](../regularizers.md), + applied to the bias. + activity_regularizer: instance of [ActivityRegularizer](../regularizers.md), + applied to the network output. + W_constraint: instance of the [constraints](../constraints.md) module + (eg. maxnorm, nonneg), applied to the main weights matrix. + b_constraint: instance of the [constraints](../constraints.md) module, + applied to the bias. + bias: whether to include a bias + (i.e. make the layer affine rather than linear). + input_dim: dimensionality of the input (integer). This argument + (or alternatively, the keyword argument `input_shape`) + is required when using this layer as the first layer in a model. + + # Input shape + nD tensor with shape: `(nb_samples, ..., input_dim)`. + The most common situation would be + a 2D input with shape `(nb_samples, input_dim)`. + + # Output shape + nD tensor with shape: `(nb_samples, ..., output_dim)`. + For instance, for a 2D input with shape `(nb_samples, input_dim)`, + the output would have shape `(nb_samples, output_dim)`. + """ + + def __init__(self, output_dim, init='glorot_uniform', + activation=None, weights=None, + W_regularizer=None, b_regularizer=None, activity_regularizer=None, + W_constraint=None, b_constraint=None, + bias=True, input_dim=None, **kwargs): + self.init = initializations.get(init) + self.activation = activations.get(activation) + self.output_dim = output_dim + self.input_dim = input_dim + + self.W_regularizer = regularizers.get(W_regularizer) + self.b_regularizer = regularizers.get(b_regularizer) + self.activity_regularizer = regularizers.get(activity_regularizer) + + self.W_constraint = constraints.get(W_constraint) + self.b_constraint = constraints.get(b_constraint) + + self.bias = bias + self.initial_weights = weights + self.input_spec = [InputSpec(ndim='2+')] + + if self.input_dim: + kwargs['input_shape'] = (self.input_dim,) + super(CosineDense, self).__init__(**kwargs) + + def build(self, input_shape): + assert len(input_shape) >= 2 + input_dim = input_shape[-1] + self.input_dim = input_dim + self.input_spec = [InputSpec(dtype=K.floatx(), + ndim='2+')] + + self.W = self.add_weight((input_dim, self.output_dim), + initializer=self.init, + name='{}_W'.format(self.name), + regularizer=self.W_regularizer, + constraint=self.W_constraint) + if self.bias: + self.b = self.add_weight((self.output_dim,), + initializer='zero', + name='{}_b'.format(self.name), + regularizer=self.b_regularizer, + constraint=self.b_constraint) + else: + self.b = None + + if self.initial_weights is not None: + self.set_weights(self.initial_weights) + del self.initial_weights + self.built = True + + def call(self, x, mask=None): + if self.bias: + xnorm = K.sqrt(K.sum(K.square(x), axis=1, keepdims=True) + 1 + K.epsilon()) + x /= xnorm + Wnorm = K.sqrt(K.sum(K.square(self.W), axis=0) + K.square(self.b) + K.epsilon()) + else: + x /= K.sqrt(K.sum(K.square(x), axis=1, keepdims=True) + K.epsilon()) + Wnorm = K.sqrt(K.sum(K.square(self.W), axis=0) + K.epsilon()) + + W = self.W / Wnorm + output = K.dot(x, W) + if self.bias: + output += (self.b / (xnorm*Wnorm)) + return self.activation(output) + + def get_output_shape_for(self, input_shape): + assert input_shape and len(input_shape) >= 2 + assert input_shape[-1] and input_shape[-1] == self.input_dim + output_shape = list(input_shape) + output_shape[-1] = self.output_dim + return tuple(output_shape) + + def get_config(self): + config = {'output_dim': self.output_dim, + 'init': self.init.__name__, + 'activation': self.activation.__name__, + 'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None, + 'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None, + 'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None, + 'W_constraint': self.W_constraint.get_config() if self.W_constraint else None, + 'b_constraint': self.b_constraint.get_config() if self.b_constraint else None, + 'bias': self.bias, + 'input_dim': self.input_dim} + base_config = super(CosineDense, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/tests/keras_contrib/layers/test_core.py b/tests/keras_contrib/layers/test_core.py index fa517a4..a8e703d 100644 --- a/tests/keras_contrib/layers/test_core.py +++ b/tests/keras_contrib/layers/test_core.py @@ -5,6 +5,61 @@ from keras import backend as K from keras_contrib import backend as KC from keras_contrib.layers import core from keras.utils.test_utils import layer_test, keras_test +from numpy.testing import assert_allclose + + +@keras_test +def test_cosinedense(): + from keras import regularizers + from keras import constraints + from keras.models import Sequential + + layer_test(core.CosineDense, + kwargs={'output_dim': 3}, + input_shape=(3, 2)) + + layer_test(core.CosineDense, + kwargs={'output_dim': 3}, + input_shape=(3, 4, 2)) + + layer_test(core.CosineDense, + kwargs={'output_dim': 3}, + input_shape=(None, None, 2)) + + layer_test(core.CosineDense, + kwargs={'output_dim': 3}, + input_shape=(3, 4, 5, 2)) + + layer_test(core.CosineDense, + kwargs={'output_dim': 3, + 'W_regularizer': regularizers.l2(0.01), + 'b_regularizer': regularizers.l1(0.01), + 'activity_regularizer': regularizers.activity_l2(0.01), + 'W_constraint': constraints.MaxNorm(1), + 'b_constraint': constraints.MaxNorm(1)}, + input_shape=(3, 2)) + + X = np.random.randn(1, 20) + model = Sequential() + model.add(core.CosineDense(1, bias=True, input_shape=(20,))) + model.compile(loss='mse', optimizer='rmsprop') + W = model.get_weights() + W[0] = X.T + W[1] = np.asarray([1.]) + model.set_weights(W) + out = model.predict(X) + assert_allclose(out, np.ones((1, 1), dtype=K.floatx())) + + X = np.random.randn(1, 20) + model = Sequential() + model.add(core.CosineDense(1, bias=False, input_shape=(20,))) + model.compile(loss='mse', optimizer='rmsprop') + W = model.get_weights() + W[0] = -X.T + model.set_weights(W) + out = model.predict(X) + assert_allclose(out, -np.ones((1, 1), dtype=K.floatx())) + if __name__ == '__main__': From 82b92142cf1975e7f0d653223a1b4bed395d0f65 Mon Sep 17 00:00:00 2001 From: Michael Oliver Date: Wed, 22 Feb 2017 11:26:36 -0800 Subject: [PATCH 2/5] fixes and doc updates --- keras_contrib/layers/core.py | 4 +++- tests/keras_contrib/layers/test_core.py | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/keras_contrib/layers/core.py b/keras_contrib/layers/core.py index 2a66edf..4048a90 100644 --- a/keras_contrib/layers/core.py +++ b/keras_contrib/layers/core.py @@ -44,6 +44,8 @@ class CosineDense(Layer): # after the first layer, you don't need to specify # the size of the input anymore: model.add(CosineDense(32)) + + **Note that a regular Dense layer may work better as the final layer ``` # Arguments @@ -151,7 +153,7 @@ class CosineDense(Layer): W = self.W / Wnorm output = K.dot(x, W) if self.bias: - output += (self.b / (xnorm*Wnorm)) + output += (self.b / (xnorm * Wnorm)) return self.activation(output) def get_output_shape_for(self, input_shape): diff --git a/tests/keras_contrib/layers/test_core.py b/tests/keras_contrib/layers/test_core.py index a8e703d..63c6734 100644 --- a/tests/keras_contrib/layers/test_core.py +++ b/tests/keras_contrib/layers/test_core.py @@ -61,6 +61,5 @@ def test_cosinedense(): assert_allclose(out, -np.ones((1, 1), dtype=K.floatx())) - if __name__ == '__main__': pytest.main([__file__]) From ac44e1a19cb3fccb769dbc22b5cf93a0f29228e3 Mon Sep 17 00:00:00 2001 From: Michael Oliver Date: Wed, 22 Feb 2017 11:33:43 -0800 Subject: [PATCH 3/5] fix serialization --- keras_contrib/layers/core.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/keras_contrib/layers/core.py b/keras_contrib/layers/core.py index 4048a90..47de531 100644 --- a/keras_contrib/layers/core.py +++ b/keras_contrib/layers/core.py @@ -21,6 +21,7 @@ from keras.engine import Merge from keras.utils.generic_utils import func_dump from keras.utils.generic_utils import func_load from keras.utils.generic_utils import get_from_module +from keras.utils.generic_utils import get_custom_objects class CosineDense(Layer): @@ -176,3 +177,6 @@ class CosineDense(Layer): 'input_dim': self.input_dim} base_config = super(CosineDense, self).get_config() return dict(list(base_config.items()) + list(config.items())) + + +get_custom_objects().update({"CosineDense": CosineDense}) From 000927f7c95776d90ff281a7edab7034a8a46251 Mon Sep 17 00:00:00 2001 From: Michael Oliver Date: Wed, 22 Feb 2017 11:45:42 -0800 Subject: [PATCH 4/5] fix >2D case --- keras_contrib/layers/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/keras_contrib/layers/core.py b/keras_contrib/layers/core.py index 47de531..bc142a5 100644 --- a/keras_contrib/layers/core.py +++ b/keras_contrib/layers/core.py @@ -144,11 +144,11 @@ class CosineDense(Layer): def call(self, x, mask=None): if self.bias: - xnorm = K.sqrt(K.sum(K.square(x), axis=1, keepdims=True) + 1 + K.epsilon()) + xnorm = K.sqrt(K.sum(K.square(x), axis=-1, keepdims=True) + 1 + K.epsilon()) x /= xnorm Wnorm = K.sqrt(K.sum(K.square(self.W), axis=0) + K.square(self.b) + K.epsilon()) else: - x /= K.sqrt(K.sum(K.square(x), axis=1, keepdims=True) + K.epsilon()) + x /= K.sqrt(K.sum(K.square(x), axis=-1, keepdims=True) + K.epsilon()) Wnorm = K.sqrt(K.sum(K.square(self.W), axis=0) + K.epsilon()) W = self.W / Wnorm From 538b9cd45ac33700a00b87e703255a573e9503d8 Mon Sep 17 00:00:00 2001 From: Michael Oliver Date: Wed, 22 Feb 2017 11:59:38 -0800 Subject: [PATCH 5/5] change atol on allclose --- tests/keras_contrib/layers/test_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/keras_contrib/layers/test_core.py b/tests/keras_contrib/layers/test_core.py index 63c6734..64f4b4d 100644 --- a/tests/keras_contrib/layers/test_core.py +++ b/tests/keras_contrib/layers/test_core.py @@ -48,7 +48,7 @@ def test_cosinedense(): W[1] = np.asarray([1.]) model.set_weights(W) out = model.predict(X) - assert_allclose(out, np.ones((1, 1), dtype=K.floatx())) + assert_allclose(out, np.ones((1, 1), dtype=K.floatx()), atol=1e-5) X = np.random.randn(1, 20) model = Sequential() @@ -58,7 +58,7 @@ def test_cosinedense(): W[0] = -X.T model.set_weights(W) out = model.predict(X) - assert_allclose(out, -np.ones((1, 1), dtype=K.floatx())) + assert_allclose(out, -np.ones((1, 1), dtype=K.floatx()), atol=1e-5) if __name__ == '__main__':