Implemented convolution aware initialization (#60)

* Implemented convolution aware initialization * added support for conv3d in the CAI scheme * added tests for convolution aware initialization
2026-06-27 16:10:11 +08:00 · 2017-04-06 08:20:15 -07:00
parent 52ebe4e7d5
commit c0ceaf02fe
2 changed files with 120 additions and 3 deletions
@@ -2,3 +2,105 @@ from __future__ import absolute_import
 import numpy as np
 from . import backend as K
 from keras.initializers import *
+from keras.initializers import _compute_fans
+
+
+class ConvolutionAware(Initializer):
+    """
+    Initializer that generates orthogonal convolution filters in the fourier
+    space. If this initializer is passed a shape that is not 3D or 4D,
+    orthogonal intialization will be used.
+    # Arguments
+        eps_std: Standard deviation for the random normal noise used to break
+        symmetry in the inverse fourier transform.
+        seed: A Python integer. Used to seed the random generator.
+    # References
+        Armen Aghajanyan, https://arxiv.org/abs/1702.06295
+    """
+
+    def __init__(self, eps_std=0.05, seed=None):
+        self.eps_std = eps_std
+        self.seed = seed
+        self.orthogonal = Orthogonal()
+
+    def __call__(self, shape):
+        rank = len(shape)
+
+        if self.seed is not None:
+            np.random.seed(self.seed)
+
+        fan_in, fan_out = _compute_fans(shape, K.image_data_format())
+        variance = 2 / fan_in
+
+        if rank == 3:
+            row, stack_size, filters_size = shape
+
+            transpose_dimensions = (2, 1, 0)
+            kernel_shape = (row,)
+            correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0])
+            correct_fft = np.fft.rfft
+
+        elif rank == 4:
+            row, column, stack_size, filters_size = shape
+
+            transpose_dimensions = (2, 3, 0, 1)
+            kernel_shape = (row, column)
+            correct_ifft = np.fft.irfft2
+            correct_fft = np.fft.rfft2
+
+        elif rank == 5:
+            x, y, z, stack_size, filters_size = shape
+
+            transpose_dimensions = (3, 4, 0, 1, 2)
+            kernel_shape = (x, y, z)
+            correct_fft = np.fft.rfftn
+            correct_ifft = np.fft.irfftn
+        else:
+            return K.variable(self.orthogonal(shape), dtype=K.floatx())
+
+        kernel_fourier_shape = correct_fft(np.zeros(kernel_shape)).shape
+
+        init = []
+        for i in range(filters_size):
+            basis = self._create_basis(
+                stack_size, np.prod(kernel_fourier_shape))
+            basis = basis.reshape((stack_size,) + kernel_fourier_shape)
+
+            filters = [correct_ifft(x, kernel_shape) +
+                       np.random.normal(0, self.eps_std, kernel_shape) for
+                       x in basis]
+
+            init.append(filters)
+
+        # Format of array is now: filters, stack, row, column
+        init = np.array(init)
+        init = self._scale_filters(init, variance)
+        return init.transpose(transpose_dimensions)
+
+    def _create_basis(self, filters, size):
+        if size == 1:
+            return np.random.normal(0.0, self.eps_std, (filters, size))
+
+        nbb = filters // size + 1
+        li = []
+        for i in range(nbb):
+            a = np.random.normal(0.0, 1.0, (size, size))
+            a = self._symmetrize(a)
+            u, _, v = np.linalg.svd(a)
+            li.extend(u.T.tolist())
+        p = np.array(li[:filters], dtype=K.floatx())
+        return p
+
+    def _symmetrize(self, a):
+        return a + a.T - np.diag(a.diagonal())
+
+    def _scale_filters(self, filters, variance):
+        c_var = np.var(filters)
+        p = np.sqrt(variance / c_var)
+        return filters * p
+
+    def get_config(self):
+        return {
+            'eps_std': self.eps_std,
+            'seed': self.seed
+        }
@@ -8,7 +8,8 @@ import numpy as np
 # 2D tensor test fixture
 FC_SHAPE = (100, 100)

-# 4D convolution in th order. This shape has the same effective shape as FC_SHAPE
+# 4D convolution in th order. This shape has the same effective shape as
+# FC_SHAPE
 CONV_SHAPE = (25, 25, 2, 2)

 # The equivalent shape of both test fixtures
@@ -16,9 +17,13 @@ SHAPE = (100, 100)


 def _runner(init, shape, target_mean=None, target_std=None,
-            target_max=None, target_min=None):
+            target_max=None, target_min=None, upper_bound=None, lower_bound=None):
    variable = init(shape)
-    output = K.get_value(variable)
+    if not isinstance(variable, np.ndarray):
+        output = K.get_value(variable)
+    else:
+        output = variable
+
    lim = 1e-2
    if target_std is not None:
        assert abs(output.std() - target_std) < lim
@@ -28,6 +33,10 @@ def _runner(init, shape, target_mean=None, target_std=None,
        assert abs(output.max() - target_max) < lim
    if target_min is not None:
        assert abs(output.min() - target_min) < lim
+    if upper_bound is not None:
+        assert output.max() < upper_bound
+    if lower_bound is not None:
+        assert output.min() > lower_bound


 '''
@@ -41,5 +50,11 @@ def test_uniform(tensor_shape):
 '''


+@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', "CONV"])
+def test_cai(tensor_shape):
+    # upper and lower bounds are proved in original paper
+    _runner(initializers.ConvolutionAware(), tensor_shape,
+            upper_bound=1, lower_bound=-1)
+
 if __name__ == '__main__':
    pytest.main([__file__])