Merge branch 'cosine' into cosineconv

This commit is contained in:
Michael Oliver
2017-02-22 13:07:12 -08:00
2 changed files with 213 additions and 0 deletions
+159
View File
@@ -21,3 +21,162 @@ from keras.engine import Merge
from keras.utils.generic_utils import func_dump
from keras.utils.generic_utils import func_load
from keras.utils.generic_utils import get_from_module
from keras.utils.generic_utils import get_custom_objects
class CosineDense(Layer):
"""A cosine normalized densely-connected NN layer
Cosine Normalization: Using Cosine Similarity Instead of Dot Product in Neural Networks
https://arxiv.org/pdf/1702.05870.pdf
# Example
```python
# as first layer in a sequential model:
model = Sequential()
model.add(CosineDense(32, input_dim=16))
# now the model will take as input arrays of shape (*, 16)
# and output arrays of shape (*, 32)
# this is equivalent to the above:
model = Sequential()
model.add(CosineDense(32, input_shape=(16,)))
# after the first layer, you don't need to specify
# the size of the input anymore:
model.add(CosineDense(32))
**Note that a regular Dense layer may work better as the final layer
```
# Arguments
output_dim: int > 0.
init: name of initialization function for the weights of the layer
(see [initializations](../initializations.md)),
or alternatively, Theano function to use for weights
initialization. This parameter is only relevant
if you don't pass a `weights` argument.
activation: name of activation function to use
(see [activations](../activations.md)),
or alternatively, elementwise Theano function.
If you don't specify anything, no activation is applied
(ie. "linear" activation: a(x) = x).
weights: list of Numpy arrays to set as initial weights.
The list should have 2 elements, of shape `(input_dim, output_dim)`
and (output_dim,) for weights and biases respectively.
W_regularizer: instance of [WeightRegularizer](../regularizers.md)
(eg. L1 or L2 regularization), applied to the main weights matrix.
b_regularizer: instance of [WeightRegularizer](../regularizers.md),
applied to the bias.
activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
applied to the network output.
W_constraint: instance of the [constraints](../constraints.md) module
(eg. maxnorm, nonneg), applied to the main weights matrix.
b_constraint: instance of the [constraints](../constraints.md) module,
applied to the bias.
bias: whether to include a bias
(i.e. make the layer affine rather than linear).
input_dim: dimensionality of the input (integer). This argument
(or alternatively, the keyword argument `input_shape`)
is required when using this layer as the first layer in a model.
# Input shape
nD tensor with shape: `(nb_samples, ..., input_dim)`.
The most common situation would be
a 2D input with shape `(nb_samples, input_dim)`.
# Output shape
nD tensor with shape: `(nb_samples, ..., output_dim)`.
For instance, for a 2D input with shape `(nb_samples, input_dim)`,
the output would have shape `(nb_samples, output_dim)`.
"""
def __init__(self, output_dim, init='glorot_uniform',
activation=None, weights=None,
W_regularizer=None, b_regularizer=None, activity_regularizer=None,
W_constraint=None, b_constraint=None,
bias=True, input_dim=None, **kwargs):
self.init = initializations.get(init)
self.activation = activations.get(activation)
self.output_dim = output_dim
self.input_dim = input_dim
self.W_regularizer = regularizers.get(W_regularizer)
self.b_regularizer = regularizers.get(b_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.W_constraint = constraints.get(W_constraint)
self.b_constraint = constraints.get(b_constraint)
self.bias = bias
self.initial_weights = weights
self.input_spec = [InputSpec(ndim='2+')]
if self.input_dim:
kwargs['input_shape'] = (self.input_dim,)
super(CosineDense, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) >= 2
input_dim = input_shape[-1]
self.input_dim = input_dim
self.input_spec = [InputSpec(dtype=K.floatx(),
ndim='2+')]
self.W = self.add_weight((input_dim, self.output_dim),
initializer=self.init,
name='{}_W'.format(self.name),
regularizer=self.W_regularizer,
constraint=self.W_constraint)
if self.bias:
self.b = self.add_weight((self.output_dim,),
initializer='zero',
name='{}_b'.format(self.name),
regularizer=self.b_regularizer,
constraint=self.b_constraint)
else:
self.b = None
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
self.built = True
def call(self, x, mask=None):
if self.bias:
xnorm = K.sqrt(K.sum(K.square(x), axis=-1, keepdims=True) + 1 + K.epsilon())
x /= xnorm
Wnorm = K.sqrt(K.sum(K.square(self.W), axis=0) + K.square(self.b) + K.epsilon())
else:
x /= K.sqrt(K.sum(K.square(x), axis=-1, keepdims=True) + K.epsilon())
Wnorm = K.sqrt(K.sum(K.square(self.W), axis=0) + K.epsilon())
W = self.W / Wnorm
output = K.dot(x, W)
if self.bias:
output += (self.b / (xnorm * Wnorm))
return self.activation(output)
def get_output_shape_for(self, input_shape):
assert input_shape and len(input_shape) >= 2
assert input_shape[-1] and input_shape[-1] == self.input_dim
output_shape = list(input_shape)
output_shape[-1] = self.output_dim
return tuple(output_shape)
def get_config(self):
config = {'output_dim': self.output_dim,
'init': self.init.__name__,
'activation': self.activation.__name__,
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
'bias': self.bias,
'input_dim': self.input_dim}
base_config = super(CosineDense, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
get_custom_objects().update({"CosineDense": CosineDense})
+54
View File
@@ -5,6 +5,60 @@ from keras import backend as K
from keras_contrib import backend as KC
from keras_contrib.layers import core
from keras.utils.test_utils import layer_test, keras_test
from numpy.testing import assert_allclose
@keras_test
def test_cosinedense():
from keras import regularizers
from keras import constraints
from keras.models import Sequential
layer_test(core.CosineDense,
kwargs={'output_dim': 3},
input_shape=(3, 2))
layer_test(core.CosineDense,
kwargs={'output_dim': 3},
input_shape=(3, 4, 2))
layer_test(core.CosineDense,
kwargs={'output_dim': 3},
input_shape=(None, None, 2))
layer_test(core.CosineDense,
kwargs={'output_dim': 3},
input_shape=(3, 4, 5, 2))
layer_test(core.CosineDense,
kwargs={'output_dim': 3,
'W_regularizer': regularizers.l2(0.01),
'b_regularizer': regularizers.l1(0.01),
'activity_regularizer': regularizers.activity_l2(0.01),
'W_constraint': constraints.MaxNorm(1),
'b_constraint': constraints.MaxNorm(1)},
input_shape=(3, 2))
X = np.random.randn(1, 20)
model = Sequential()
model.add(core.CosineDense(1, bias=True, input_shape=(20,)))
model.compile(loss='mse', optimizer='rmsprop')
W = model.get_weights()
W[0] = X.T
W[1] = np.asarray([1.])
model.set_weights(W)
out = model.predict(X)
assert_allclose(out, np.ones((1, 1), dtype=K.floatx()), atol=1e-5)
X = np.random.randn(1, 20)
model = Sequential()
model.add(core.CosineDense(1, bias=False, input_shape=(20,)))
model.compile(loss='mse', optimizer='rmsprop')
W = model.get_weights()
W[0] = -X.T
model.set_weights(W)
out = model.predict(X)
assert_allclose(out, -np.ones((1, 1), dtype=K.floatx()), atol=1e-5)
if __name__ == '__main__':