From c0ceaf02fe516ab24aa72fdb475f788174fdfde1 Mon Sep 17 00:00:00 2001 From: Armen Date: Thu, 6 Apr 2017 08:20:15 -0700 Subject: [PATCH] Implemented convolution aware initialization (#60) * Implemented convolution aware initialization * added support for conv3d in the CAI scheme * added tests for convolution aware initialization --- keras_contrib/initializers.py | 102 +++++++++++++++++++++++ tests/keras_contrib/initializers_test.py | 21 ++++- 2 files changed, 120 insertions(+), 3 deletions(-) diff --git a/keras_contrib/initializers.py b/keras_contrib/initializers.py index 07b630c..ba7d489 100644 --- a/keras_contrib/initializers.py +++ b/keras_contrib/initializers.py @@ -2,3 +2,105 @@ from __future__ import absolute_import import numpy as np from . import backend as K from keras.initializers import * +from keras.initializers import _compute_fans + + +class ConvolutionAware(Initializer): + """ + Initializer that generates orthogonal convolution filters in the fourier + space. If this initializer is passed a shape that is not 3D or 4D, + orthogonal intialization will be used. + # Arguments + eps_std: Standard deviation for the random normal noise used to break + symmetry in the inverse fourier transform. + seed: A Python integer. Used to seed the random generator. + # References + Armen Aghajanyan, https://arxiv.org/abs/1702.06295 + """ + + def __init__(self, eps_std=0.05, seed=None): + self.eps_std = eps_std + self.seed = seed + self.orthogonal = Orthogonal() + + def __call__(self, shape): + rank = len(shape) + + if self.seed is not None: + np.random.seed(self.seed) + + fan_in, fan_out = _compute_fans(shape, K.image_data_format()) + variance = 2 / fan_in + + if rank == 3: + row, stack_size, filters_size = shape + + transpose_dimensions = (2, 1, 0) + kernel_shape = (row,) + correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0]) + correct_fft = np.fft.rfft + + elif rank == 4: + row, column, stack_size, filters_size = shape + + transpose_dimensions = (2, 3, 0, 1) + kernel_shape = (row, column) + correct_ifft = np.fft.irfft2 + correct_fft = np.fft.rfft2 + + elif rank == 5: + x, y, z, stack_size, filters_size = shape + + transpose_dimensions = (3, 4, 0, 1, 2) + kernel_shape = (x, y, z) + correct_fft = np.fft.rfftn + correct_ifft = np.fft.irfftn + else: + return K.variable(self.orthogonal(shape), dtype=K.floatx()) + + kernel_fourier_shape = correct_fft(np.zeros(kernel_shape)).shape + + init = [] + for i in range(filters_size): + basis = self._create_basis( + stack_size, np.prod(kernel_fourier_shape)) + basis = basis.reshape((stack_size,) + kernel_fourier_shape) + + filters = [correct_ifft(x, kernel_shape) + + np.random.normal(0, self.eps_std, kernel_shape) for + x in basis] + + init.append(filters) + + # Format of array is now: filters, stack, row, column + init = np.array(init) + init = self._scale_filters(init, variance) + return init.transpose(transpose_dimensions) + + def _create_basis(self, filters, size): + if size == 1: + return np.random.normal(0.0, self.eps_std, (filters, size)) + + nbb = filters // size + 1 + li = [] + for i in range(nbb): + a = np.random.normal(0.0, 1.0, (size, size)) + a = self._symmetrize(a) + u, _, v = np.linalg.svd(a) + li.extend(u.T.tolist()) + p = np.array(li[:filters], dtype=K.floatx()) + return p + + def _symmetrize(self, a): + return a + a.T - np.diag(a.diagonal()) + + def _scale_filters(self, filters, variance): + c_var = np.var(filters) + p = np.sqrt(variance / c_var) + return filters * p + + def get_config(self): + return { + 'eps_std': self.eps_std, + 'seed': self.seed + } diff --git a/tests/keras_contrib/initializers_test.py b/tests/keras_contrib/initializers_test.py index 14ee202..8472f69 100644 --- a/tests/keras_contrib/initializers_test.py +++ b/tests/keras_contrib/initializers_test.py @@ -8,7 +8,8 @@ import numpy as np # 2D tensor test fixture FC_SHAPE = (100, 100) -# 4D convolution in th order. This shape has the same effective shape as FC_SHAPE +# 4D convolution in th order. This shape has the same effective shape as +# FC_SHAPE CONV_SHAPE = (25, 25, 2, 2) # The equivalent shape of both test fixtures @@ -16,9 +17,13 @@ SHAPE = (100, 100) def _runner(init, shape, target_mean=None, target_std=None, - target_max=None, target_min=None): + target_max=None, target_min=None, upper_bound=None, lower_bound=None): variable = init(shape) - output = K.get_value(variable) + if not isinstance(variable, np.ndarray): + output = K.get_value(variable) + else: + output = variable + lim = 1e-2 if target_std is not None: assert abs(output.std() - target_std) < lim @@ -28,6 +33,10 @@ def _runner(init, shape, target_mean=None, target_std=None, assert abs(output.max() - target_max) < lim if target_min is not None: assert abs(output.min() - target_min) < lim + if upper_bound is not None: + assert output.max() < upper_bound + if lower_bound is not None: + assert output.min() > lower_bound ''' @@ -41,5 +50,11 @@ def test_uniform(tensor_shape): ''' +@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', "CONV"]) +def test_cai(tensor_shape): + # upper and lower bounds are proved in original paper + _runner(initializers.ConvolutionAware(), tensor_shape, + upper_bound=1, lower_bound=-1) + if __name__ == '__main__': pytest.main([__file__])