Implemented convolution aware initialization (#60)

* Implemented convolution aware initialization

* added support for conv3d in the CAI scheme

* added tests for convolution aware initialization
This commit is contained in:
Armen
2017-04-06 08:20:15 -07:00
committed by Michael Oliver
parent 52ebe4e7d5
commit c0ceaf02fe
2 changed files with 120 additions and 3 deletions
+102
View File
@@ -2,3 +2,105 @@ from __future__ import absolute_import
import numpy as np
from . import backend as K
from keras.initializers import *
from keras.initializers import _compute_fans
class ConvolutionAware(Initializer):
"""
Initializer that generates orthogonal convolution filters in the fourier
space. If this initializer is passed a shape that is not 3D or 4D,
orthogonal intialization will be used.
# Arguments
eps_std: Standard deviation for the random normal noise used to break
symmetry in the inverse fourier transform.
seed: A Python integer. Used to seed the random generator.
# References
Armen Aghajanyan, https://arxiv.org/abs/1702.06295
"""
def __init__(self, eps_std=0.05, seed=None):
self.eps_std = eps_std
self.seed = seed
self.orthogonal = Orthogonal()
def __call__(self, shape):
rank = len(shape)
if self.seed is not None:
np.random.seed(self.seed)
fan_in, fan_out = _compute_fans(shape, K.image_data_format())
variance = 2 / fan_in
if rank == 3:
row, stack_size, filters_size = shape
transpose_dimensions = (2, 1, 0)
kernel_shape = (row,)
correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0])
correct_fft = np.fft.rfft
elif rank == 4:
row, column, stack_size, filters_size = shape
transpose_dimensions = (2, 3, 0, 1)
kernel_shape = (row, column)
correct_ifft = np.fft.irfft2
correct_fft = np.fft.rfft2
elif rank == 5:
x, y, z, stack_size, filters_size = shape
transpose_dimensions = (3, 4, 0, 1, 2)
kernel_shape = (x, y, z)
correct_fft = np.fft.rfftn
correct_ifft = np.fft.irfftn
else:
return K.variable(self.orthogonal(shape), dtype=K.floatx())
kernel_fourier_shape = correct_fft(np.zeros(kernel_shape)).shape
init = []
for i in range(filters_size):
basis = self._create_basis(
stack_size, np.prod(kernel_fourier_shape))
basis = basis.reshape((stack_size,) + kernel_fourier_shape)
filters = [correct_ifft(x, kernel_shape) +
np.random.normal(0, self.eps_std, kernel_shape) for
x in basis]
init.append(filters)
# Format of array is now: filters, stack, row, column
init = np.array(init)
init = self._scale_filters(init, variance)
return init.transpose(transpose_dimensions)
def _create_basis(self, filters, size):
if size == 1:
return np.random.normal(0.0, self.eps_std, (filters, size))
nbb = filters // size + 1
li = []
for i in range(nbb):
a = np.random.normal(0.0, 1.0, (size, size))
a = self._symmetrize(a)
u, _, v = np.linalg.svd(a)
li.extend(u.T.tolist())
p = np.array(li[:filters], dtype=K.floatx())
return p
def _symmetrize(self, a):
return a + a.T - np.diag(a.diagonal())
def _scale_filters(self, filters, variance):
c_var = np.var(filters)
p = np.sqrt(variance / c_var)
return filters * p
def get_config(self):
return {
'eps_std': self.eps_std,
'seed': self.seed
}
+18 -3
View File
@@ -8,7 +8,8 @@ import numpy as np
# 2D tensor test fixture
FC_SHAPE = (100, 100)
# 4D convolution in th order. This shape has the same effective shape as FC_SHAPE
# 4D convolution in th order. This shape has the same effective shape as
# FC_SHAPE
CONV_SHAPE = (25, 25, 2, 2)
# The equivalent shape of both test fixtures
@@ -16,9 +17,13 @@ SHAPE = (100, 100)
def _runner(init, shape, target_mean=None, target_std=None,
target_max=None, target_min=None):
target_max=None, target_min=None, upper_bound=None, lower_bound=None):
variable = init(shape)
output = K.get_value(variable)
if not isinstance(variable, np.ndarray):
output = K.get_value(variable)
else:
output = variable
lim = 1e-2
if target_std is not None:
assert abs(output.std() - target_std) < lim
@@ -28,6 +33,10 @@ def _runner(init, shape, target_mean=None, target_std=None,
assert abs(output.max() - target_max) < lim
if target_min is not None:
assert abs(output.min() - target_min) < lim
if upper_bound is not None:
assert output.max() < upper_bound
if lower_bound is not None:
assert output.min() > lower_bound
'''
@@ -41,5 +50,11 @@ def test_uniform(tensor_shape):
'''
@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', "CONV"])
def test_cai(tensor_shape):
# upper and lower bounds are proved in original paper
_runner(initializers.ConvolutionAware(), tensor_shape,
upper_bound=1, lower_bound=-1)
if __name__ == '__main__':
pytest.main([__file__])