mirror of
https://github.com/wassname/keras-contrib.git
synced 2026-06-27 16:10:11 +08:00
Implemented convolution aware initialization (#60)
* Implemented convolution aware initialization * added support for conv3d in the CAI scheme * added tests for convolution aware initialization
This commit is contained in:
@@ -2,3 +2,105 @@ from __future__ import absolute_import
|
||||
import numpy as np
|
||||
from . import backend as K
|
||||
from keras.initializers import *
|
||||
from keras.initializers import _compute_fans
|
||||
|
||||
|
||||
class ConvolutionAware(Initializer):
|
||||
"""
|
||||
Initializer that generates orthogonal convolution filters in the fourier
|
||||
space. If this initializer is passed a shape that is not 3D or 4D,
|
||||
orthogonal intialization will be used.
|
||||
# Arguments
|
||||
eps_std: Standard deviation for the random normal noise used to break
|
||||
symmetry in the inverse fourier transform.
|
||||
seed: A Python integer. Used to seed the random generator.
|
||||
# References
|
||||
Armen Aghajanyan, https://arxiv.org/abs/1702.06295
|
||||
"""
|
||||
|
||||
def __init__(self, eps_std=0.05, seed=None):
|
||||
self.eps_std = eps_std
|
||||
self.seed = seed
|
||||
self.orthogonal = Orthogonal()
|
||||
|
||||
def __call__(self, shape):
|
||||
rank = len(shape)
|
||||
|
||||
if self.seed is not None:
|
||||
np.random.seed(self.seed)
|
||||
|
||||
fan_in, fan_out = _compute_fans(shape, K.image_data_format())
|
||||
variance = 2 / fan_in
|
||||
|
||||
if rank == 3:
|
||||
row, stack_size, filters_size = shape
|
||||
|
||||
transpose_dimensions = (2, 1, 0)
|
||||
kernel_shape = (row,)
|
||||
correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0])
|
||||
correct_fft = np.fft.rfft
|
||||
|
||||
elif rank == 4:
|
||||
row, column, stack_size, filters_size = shape
|
||||
|
||||
transpose_dimensions = (2, 3, 0, 1)
|
||||
kernel_shape = (row, column)
|
||||
correct_ifft = np.fft.irfft2
|
||||
correct_fft = np.fft.rfft2
|
||||
|
||||
elif rank == 5:
|
||||
x, y, z, stack_size, filters_size = shape
|
||||
|
||||
transpose_dimensions = (3, 4, 0, 1, 2)
|
||||
kernel_shape = (x, y, z)
|
||||
correct_fft = np.fft.rfftn
|
||||
correct_ifft = np.fft.irfftn
|
||||
else:
|
||||
return K.variable(self.orthogonal(shape), dtype=K.floatx())
|
||||
|
||||
kernel_fourier_shape = correct_fft(np.zeros(kernel_shape)).shape
|
||||
|
||||
init = []
|
||||
for i in range(filters_size):
|
||||
basis = self._create_basis(
|
||||
stack_size, np.prod(kernel_fourier_shape))
|
||||
basis = basis.reshape((stack_size,) + kernel_fourier_shape)
|
||||
|
||||
filters = [correct_ifft(x, kernel_shape) +
|
||||
np.random.normal(0, self.eps_std, kernel_shape) for
|
||||
x in basis]
|
||||
|
||||
init.append(filters)
|
||||
|
||||
# Format of array is now: filters, stack, row, column
|
||||
init = np.array(init)
|
||||
init = self._scale_filters(init, variance)
|
||||
return init.transpose(transpose_dimensions)
|
||||
|
||||
def _create_basis(self, filters, size):
|
||||
if size == 1:
|
||||
return np.random.normal(0.0, self.eps_std, (filters, size))
|
||||
|
||||
nbb = filters // size + 1
|
||||
li = []
|
||||
for i in range(nbb):
|
||||
a = np.random.normal(0.0, 1.0, (size, size))
|
||||
a = self._symmetrize(a)
|
||||
u, _, v = np.linalg.svd(a)
|
||||
li.extend(u.T.tolist())
|
||||
p = np.array(li[:filters], dtype=K.floatx())
|
||||
return p
|
||||
|
||||
def _symmetrize(self, a):
|
||||
return a + a.T - np.diag(a.diagonal())
|
||||
|
||||
def _scale_filters(self, filters, variance):
|
||||
c_var = np.var(filters)
|
||||
p = np.sqrt(variance / c_var)
|
||||
return filters * p
|
||||
|
||||
def get_config(self):
|
||||
return {
|
||||
'eps_std': self.eps_std,
|
||||
'seed': self.seed
|
||||
}
|
||||
|
||||
@@ -8,7 +8,8 @@ import numpy as np
|
||||
# 2D tensor test fixture
|
||||
FC_SHAPE = (100, 100)
|
||||
|
||||
# 4D convolution in th order. This shape has the same effective shape as FC_SHAPE
|
||||
# 4D convolution in th order. This shape has the same effective shape as
|
||||
# FC_SHAPE
|
||||
CONV_SHAPE = (25, 25, 2, 2)
|
||||
|
||||
# The equivalent shape of both test fixtures
|
||||
@@ -16,9 +17,13 @@ SHAPE = (100, 100)
|
||||
|
||||
|
||||
def _runner(init, shape, target_mean=None, target_std=None,
|
||||
target_max=None, target_min=None):
|
||||
target_max=None, target_min=None, upper_bound=None, lower_bound=None):
|
||||
variable = init(shape)
|
||||
output = K.get_value(variable)
|
||||
if not isinstance(variable, np.ndarray):
|
||||
output = K.get_value(variable)
|
||||
else:
|
||||
output = variable
|
||||
|
||||
lim = 1e-2
|
||||
if target_std is not None:
|
||||
assert abs(output.std() - target_std) < lim
|
||||
@@ -28,6 +33,10 @@ def _runner(init, shape, target_mean=None, target_std=None,
|
||||
assert abs(output.max() - target_max) < lim
|
||||
if target_min is not None:
|
||||
assert abs(output.min() - target_min) < lim
|
||||
if upper_bound is not None:
|
||||
assert output.max() < upper_bound
|
||||
if lower_bound is not None:
|
||||
assert output.min() > lower_bound
|
||||
|
||||
|
||||
'''
|
||||
@@ -41,5 +50,11 @@ def test_uniform(tensor_shape):
|
||||
'''
|
||||
|
||||
|
||||
@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', "CONV"])
|
||||
def test_cai(tensor_shape):
|
||||
# upper and lower bounds are proved in original paper
|
||||
_runner(initializers.ConvolutionAware(), tensor_shape,
|
||||
upper_bound=1, lower_bound=-1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
Reference in New Issue
Block a user