From 7577b8780a33f2ff198afe42fad541301c095dae Mon Sep 17 00:00:00 2001 From: David Stygstra Date: Tue, 29 Aug 2017 22:44:05 -0700 Subject: [PATCH] Fix BatchRenormalization clipping (fixes #127) Previouly, a constant value was used for the clipping parameters. This meant that it stayed at r=1 and d=0 forever, making it essentially equivalent to regular batch normalization. --- keras_contrib/backend/cntk_backend.py | 24 ++++++++++++ keras_contrib/backend/tensorflow_backend.py | 27 ++++++++++++++ keras_contrib/backend/theano_backend.py | 24 ++++++++++++ keras_contrib/layers/normalization.py | 14 +++---- tests/keras_contrib/backend/backend_test.py | 37 +++++++++++++++++++ .../layers/test_normalization.py | 32 ++++++++++++++++ 6 files changed, 150 insertions(+), 8 deletions(-) diff --git a/keras_contrib/backend/cntk_backend.py b/keras_contrib/backend/cntk_backend.py index 363ad08..624aeee 100644 --- a/keras_contrib/backend/cntk_backend.py +++ b/keras_contrib/backend/cntk_backend.py @@ -1,2 +1,26 @@ from keras.backend import cntk_backend as KCN import cntk as C +import numpy as np + + +def clip(x, min_value, max_value): + """Element-wise value clipping. + + If min_value > max_value, clipping range is [min_value,min_value]. + + # Arguments + x: Tensor or variable. + min_value: Tensor, float, int, or None. + If min_value is None, defaults to -infinity. + max_value: Tensor, float, int, or None. + If max_value is None, defaults to infinity. + + # Returns + A tensor. + """ + if max_value is None: + max_value = np.inf + if min_value is None: + min_value = -np.inf + max_value = C.maximum(min_value, max_value) + return C.clip(x, min_value, max_value) diff --git a/keras_contrib/backend/tensorflow_backend.py b/keras_contrib/backend/tensorflow_backend.py index 7b69687..82b3c7f 100644 --- a/keras_contrib/backend/tensorflow_backend.py +++ b/keras_contrib/backend/tensorflow_backend.py @@ -1,4 +1,5 @@ import tensorflow as tf +import numpy as np try: from tensorflow.python.ops import ctc_ops as ctc @@ -11,6 +12,7 @@ from keras.backend.tensorflow_backend import _postprocess_conv3d_output from keras.backend.tensorflow_backend import _preprocess_padding from keras.backend.tensorflow_backend import _preprocess_conv2d_input from keras.backend.tensorflow_backend import _postprocess_conv2d_output +from keras.backend.tensorflow_backend import _to_tensor py_all = all @@ -158,3 +160,28 @@ def moments(x, axes, shift=None, keep_dims=False): ''' Wrapper over tensorflow backend call ''' return tf.nn.moments(x, axes, shift=shift, keep_dims=keep_dims) + + +def clip(x, min_value, max_value): + """Element-wise value clipping. + + If min_value > max_value, clipping range is [min_value,min_value]. + + # Arguments + x: Tensor or variable. + min_value: Tensor, float, int, or None. + If min_value is None, defaults to -infinity. + max_value: Tensor, float, int, or None. + If max_value is None, defaults to infinity. + + # Returns + A tensor. + """ + if max_value is None: + max_value = np.inf + if min_value is None: + min_value = -np.inf + min_value = _to_tensor(min_value, x.dtype.base_dtype) + max_value = _to_tensor(max_value, x.dtype.base_dtype) + max_value = tf.maximum(min_value, max_value) + return tf.clip_by_value(x, min_value, max_value) diff --git a/keras_contrib/backend/theano_backend.py b/keras_contrib/backend/theano_backend.py index 2b5adaf..9e97084 100644 --- a/keras_contrib/backend/theano_backend.py +++ b/keras_contrib/backend/theano_backend.py @@ -1,5 +1,6 @@ from theano import tensor as T from theano.sandbox.neighbours import images2neibs +import numpy as np try: import theano.sparse as th_sparse_module @@ -197,3 +198,26 @@ def moments(x, axes, shift=None, keep_dims=False): var_batch = KTH.var(x, axis=axes, keepdims=keep_dims) return mean_batch, var_batch + + +def clip(x, min_value, max_value): + """Element-wise value clipping. + + If min_value > max_value, clipping range is [min_value,min_value]. + + # Arguments + x: Tensor or variable. + min_value: Tensor, float, int, or None. + If min_value is None, defaults to -infinity. + max_value: Tensor, float, int, or None. + If max_value is None, defaults to infinity. + + # Returns + A tensor. + """ + if max_value is None: + max_value = np.inf + if min_value is None: + min_value = -np.inf + max_value = T.maximum(min_value, max_value) + return T.clip(x, min_value, max_value) diff --git a/keras_contrib/layers/normalization.py b/keras_contrib/layers/normalization.py index 10565b2..40254cd 100644 --- a/keras_contrib/layers/normalization.py +++ b/keras_contrib/layers/normalization.py @@ -266,13 +266,13 @@ class BatchRenormalization(Layer): name='{}_running_std'.format(self.name), trainable=False) - self.r_max = K.variable(np.ones((1,)), name='{}_r_max'.format(self.name)) + self.r_max = K.variable(1, name='{}_r_max'.format(self.name)) - self.d_max = K.variable(np.zeros((1,)), name='{}_d_max'.format(self.name)) + self.d_max = K.variable(0, name='{}_d_max'.format(self.name)) - self.t = K.variable(np.zeros((1,)), name='{}_t'.format(self.name)) + self.t = K.variable(0, name='{}_t'.format(self.name)) - self.t_delta_tensor = K.variable(np.array([self.t_delta])) + self.t_delta_tensor = K.constant(self.t_delta) if self.initial_weights is not None: self.set_weights(self.initial_weights) @@ -292,13 +292,11 @@ class BatchRenormalization(Layer): mean_batch, var_batch = K.moments(inputs, reduction_axes, shift=None, keep_dims=False) std_batch = (K.sqrt(var_batch + self.epsilon)) - r_max_value = K.get_value(self.r_max) r = std_batch / (K.sqrt(self.running_variance + self.epsilon)) - r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value)) + r = K.stop_gradient(K.clip(r, 1 / self.r_max, self.r_max)) - d_max_value = K.get_value(self.d_max) d = (mean_batch - self.running_mean) / K.sqrt(self.running_variance + self.epsilon) - d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value)) + d = K.stop_gradient(K.clip(d, -self.d_max, self.d_max)) if sorted(reduction_axes) == range(K.ndim(inputs))[:-1]: x_normed_batch = (inputs - mean_batch) / std_batch diff --git a/tests/keras_contrib/backend/backend_test.py b/tests/keras_contrib/backend/backend_test.py index 64135b0..998bc6b 100644 --- a/tests/keras_contrib/backend/backend_test.py +++ b/tests/keras_contrib/backend/backend_test.py @@ -160,6 +160,43 @@ class TestBackend(object): assert_allclose(th_mean_val, tf_mean_val, rtol=1e-4) assert_allclose(th_var_val, tf_var_val, rtol=1e-4) + def test_clip(self): + check_single_tensor_operation('clip', (4, 2), min_value=0.4, max_value=0.6) + check_single_tensor_operation('clip', (4, 2), min_value=0.4, max_value=None) + + cases = [ + # (x, min_value, max_value, expected) + (1, 0, 2, 1), + (1, 2, 0, 2), + (-1, 0, 2, 0), + (-1, 2, 0, 2), + (3, 0, 2, 2), + (3, 2, 0, 2), + (1, 0, np.inf, 1), + (1, np.inf, 0, np.inf), + (1, 0, -np.inf, 0), + (1, -np.inf, 0, 0), + (-1, 0, -np.inf, 0), + (-1, -np.inf, 0, -1), + (1, 0, None, 1), + (-1, 0, None, 0), + + # NOTE: In the following two cases, Keras 2.0.8 raises an + # error on all backends, but this is a sensible extension. + (1, None, 0, 0), + (-1, None, 0, -1), + + # NOTE: In the following case, Keras 2.0.8 rasies an error + # for TensorFlow and Theano, but returns 0 for CNTK. This + # extends the TensorFlow and Theano backends to match the + # CNTK behavior instead of raising an error. + (0, None, None, 0), + ] + for K_, KC_ in [(KTF, KCTF), (KTH, KCTH)]: + for x, min_value, max_value, expected in cases: + actual = K_.eval(KC_.clip(K_.constant(x), min_value, max_value)) + assert_allclose(expected, actual, atol=1e-5) + if __name__ == '__main__': pytest.main([__file__]) diff --git a/tests/keras_contrib/layers/test_normalization.py b/tests/keras_contrib/layers/test_normalization.py index fe2a172..b63f2c7 100644 --- a/tests/keras_contrib/layers/test_normalization.py +++ b/tests/keras_contrib/layers/test_normalization.py @@ -305,5 +305,37 @@ def test_shared_batchrenorm(): new_model.train_on_batch(x, x) +@keras_test +def test_batchrenorm_clipping_schedule(): + '''Test that the clipping schedule isn't fixed at r_max=1, d_max=0''' + inp = Input(shape=(10,)) + bn = normalization.BatchRenormalization(t_delta=1.) + out = bn(inp) + model = Model(inp, out) + model.compile('sgd', 'mse') + + x = np.random.normal(5, 10, size=(2, 10)) + y = np.random.normal(5, 10, size=(2, 10)) + + r_max, d_max = K.get_value(bn.r_max), K.get_value(bn.d_max) + assert r_max == 1 + assert d_max == 0 + + for i in range(10): + model.train_on_batch(x, y) + + r_max, d_max = K.get_value(bn.r_max), K.get_value(bn.d_max) + assert_allclose([r_max, d_max], [3, 5], atol=1e-1) + + +@keras_test +def test_batchrenorm_get_config(): + '''Test that get_config works on a model with a batchrenorm layer.''' + x = Input(shape=(10,)) + y = normalization.BatchRenormalization()(x) + model = Model(x, y) + model.get_config() + + if __name__ == '__main__': pytest.main([__file__])