Merge pull request #169 from ahundt/remove_deconv3d

remove Deconvoluion3D, add missing backend functions
2026-06-27 16:10:11 +08:00 · 2017-10-18 02:10:47 -04:00
parent b0a2fbabfd ad120f95e9
commit fd73547011
4 changed files with 53 additions and 376 deletions
@@ -6,25 +6,66 @@ try:
 except ImportError:
    import tensorflow.contrib.ctc as ctc
 from keras.backend import tensorflow_backend as KTF
-from keras.backend.common import floatx, image_data_format
-from keras.backend.tensorflow_backend import _preprocess_conv3d_input
-from keras.backend.tensorflow_backend import _postprocess_conv3d_output
-from keras.backend.tensorflow_backend import _preprocess_padding
-from keras.backend.tensorflow_backend import _preprocess_conv2d_input
-from keras.backend.tensorflow_backend import _postprocess_conv2d_output
+from keras.backend import dtype
+from keras.backend.common import floatx
+from keras.backend.common import image_data_format
 from keras.backend.tensorflow_backend import _to_tensor

 py_all = all


-def _preprocess_deconv_output_shape(x, shape, data_format):
+def _preprocess_conv2d_input(x, data_format):
+    """Transpose and cast the input before the conv2d.
+    # Arguments
+        x: input tensor.
+        data_format: string, `"channels_last"` or `"channels_first"`.
+    # Returns
+        A tensor.
+    """
+    if dtype(x) == 'float64':
+        x = tf.cast(x, 'float32')
    if data_format == 'channels_first':
-        shape = (shape[0],) + tuple(shape[2:]) + (shape[1],)
+        # TF uses the last dimension as channel dimension,
+        # instead of the 2nd one.
+        # TH input shape: (samples, input_depth, rows, cols)
+        # TF input shape: (samples, rows, cols, input_depth)
+        x = tf.transpose(x, (0, 2, 3, 1))
+    return x

-    if shape[0] is None:
-        shape = (tf.shape(x)[0],) + tuple(shape[1:])
-        shape = tf.stack(list(shape))
-    return shape
+
+def _postprocess_conv2d_output(x, data_format):
+    """Transpose and cast the output from conv2d if needed.
+    # Arguments
+        x: A tensor.
+        data_format: string, `"channels_last"` or `"channels_first"`.
+    # Returns
+        A tensor.
+    """
+
+    if data_format == 'channels_first':
+        x = tf.transpose(x, (0, 3, 1, 2))
+
+    if floatx() == 'float64':
+        x = tf.cast(x, 'float64')
+    return x
+
+
+def _preprocess_padding(padding):
+    """Convert keras' padding to tensorflow's padding.
+    # Arguments
+        padding: string, `"same"` or `"valid"`.
+    # Returns
+        a string, `"SAME"` or `"VALID"`.
+    # Raises
+        ValueError: if `padding` is invalid.
+    """
+    if padding == 'same':
+        padding = 'SAME'
+    elif padding == 'valid':
+        padding = 'VALID'
+    else:
+        raise ValueError('Invalid padding:', padding)
+    return padding


 def conv2d(x, kernel, strides=(1, 1), padding='valid', data_format='channels_first',
@@ -72,45 +113,6 @@ def conv2d(x, kernel, strides=(1, 1), padding='valid', data_format='channels_fir
    return x


-def deconv3d(x, kernel, output_shape, strides=(1, 1, 1),
-             padding='valid',
-             data_format='default',
-             image_shape=None, filter_shape=None):
-    '''3D deconvolution (i.e. transposed convolution).
-
-    # Arguments
-        x: input tensor.
-        kernel: kernel tensor.
-        output_shape: 1D int tensor for the output shape.
-        strides: strides tuple.
-        padding: string, "same" or "valid".
-        data_format: "tf" or "th".
-            Whether to use Theano or TensorFlow dimension ordering
-            for inputs/kernels/ouputs.
-
-    # Returns
-        A tensor, result of transposed 3D convolution.
-
-    # Raises
-        ValueError: if `data_format` is neither `tf` or `th`.
-    '''
-    if data_format == 'default':
-        data_format = image_data_format()
-    if data_format not in {'channels_first', 'channels_last'}:
-        raise ValueError('Unknown data_format ' + str(data_format))
-
-    x = _preprocess_conv3d_input(x, data_format)
-    output_shape = _preprocess_deconv_output_shape(x, output_shape,
-                                                   data_format)
-    kernel = tf.transpose(kernel, (0, 1, 2, 4, 3))
-    padding = _preprocess_padding(padding)
-    strides = (1,) + strides + (1,)
-
-    x = tf.nn.conv3d_transpose(x, kernel, output_shape, strides,
-                               padding=padding)
-    return _postprocess_conv3d_output(x, data_format)
-
-
 def extract_image_patches(x, ksizes, ssizes, padding='same',
                          data_format='channels_last'):
    '''
@@ -86,56 +86,6 @@ def conv2d(x, kernel, strides=(1, 1), padding='valid', data_format='channels_fir
    return conv_out


-def deconv3d(x, kernel, output_shape, strides=(1, 1, 1),
-             padding='valid',
-             data_format=None, filter_shape=None):
-    '''3D deconvolution (transposed convolution).
-
-    # Arguments
-        kernel: kernel tensor.
-        output_shape: desired dimensions of output.
-        strides: strides tuple.
-        padding: string, "same" or "valid".
-        data_format: "channels_last" or "channels_first".
-            Whether to use Theano or TensorFlow dimension ordering
-        in inputs/kernels/ouputs.
-    '''
-    flip_filters = False
-    if data_format is None:
-        data_format = image_data_format()
-    if data_format not in {'channels_first', 'channels_last'}:
-        raise ValueError('Unknown data_format: ' + str(data_format))
-
-    if data_format == 'channels_last':
-        output_shape = (output_shape[0], output_shape[4], output_shape[1],
-                        output_shape[2], output_shape[3])
-
-    x = _preprocess_conv3d_input(x, data_format)
-    kernel = _preprocess_conv3d_kernel(kernel, data_format)
-    kernel = kernel.dimshuffle((1, 0, 2, 3, 4))
-    th_padding = _preprocess_padding(padding)
-
-    if hasattr(kernel, '_keras_shape'):
-        kernel_shape = kernel._keras_shape
-    else:
-        # Will only work if `kernel` is a shared variable.
-        kernel_shape = kernel.eval().shape
-
-    filter_shape = _preprocess_conv3d_filter_shape(filter_shape, data_format)
-    filter_shape = tuple(filter_shape[i] for i in (1, 0, 2, 3, 4))
-
-    conv_out = T.nnet.abstract_conv.conv3d_grad_wrt_inputs(
-        x, kernel, output_shape,
-        filter_shape=filter_shape,
-        border_mode=th_padding,
-        subsample=strides,
-        filter_flip=not flip_filters)
-
-    conv_out = _postprocess_conv3d_output(conv_out, x, padding,
-                                          kernel_shape, strides, data_format)
-    return conv_out
-
-
 def extract_image_patches(X, ksizes, strides, padding='valid', data_format='channels_first'):
    '''
    Extract the patches from an image
@@ -16,220 +16,6 @@ from keras.utils.conv_utils import normalize_data_format
 import numpy as np


-class Deconvolution3D(Convolution3D):
-    """Transposed convolution operator for filtering windows of 3-D inputs.
-
-    The need for transposed convolutions generally arises from the desire to
-    use a transformation going in the opposite direction
-    of a normal convolution, i.e., from something that has the shape
-    of the output of some convolution to something that has the shape
-    of its input while maintaining a connectivity pattern
-    that is compatible with said convolution.
-
-    When using this layer as the first layer in a model,
-    provide the keyword argument `input_shape`
-    (tuple of integers, does not include the sample axis),
-    e.g. `input_shape=(3, 128, 128, 128)` for a 128x128x128 volume with
-    three channels.
-
-    To pass the correct `output_shape` to this layer,
-    one could use a test model to predict and observe the actual output shape.
-
-    # Examples
-
-    ```python
-        # TH dim ordering.
-        # apply a 3x3x3 transposed convolution
-        # with stride 1x1x1 and 3 output filters on a 12x12x12 image:
-        model = Sequential()
-        model.add(Deconvolution3D(3, 3, 3, 3, output_shape=(None, 3, 14, 14, 14),
-                                  padding='valid',
-                                  input_shape=(3, 12, 12, 12)))
-
-        # we can predict with the model and print the shape of the array.
-        dummy_input = np.ones((32, 3, 12, 12, 12))
-        preds = model.predict(dummy_input)
-        print(preds.shape)  # (None, 3, 14, 14, 14)
-
-        # apply a 3x3x3 transposed convolution
-        # with stride 2x2x2 and 3 output filters on a 12x12x12 image:
-        model = Sequential()
-        model.add(Deconvolution3D(3, 3, 3, 3, output_shape=(None, 3, 25, 25, 25),
-                                  strides=(2, 2, 2),
-                                  padding='valid',
-                                  input_shape=(3, 12, 12, 12)))
-        model.summary()
-
-        # we can predict with the model and print the shape of the array.
-        dummy_input = np.ones((32, 3, 12, 12, 12))
-        preds = model.predict(dummy_input)
-        print(preds.shape)  # (None, 3, 25, 25, 25)
-    ```
-
-    ```python
-        # TF dim ordering.
-        # apply a 3x3x3 transposed convolution
-        # with stride 1x1x1 and 3 output filters on a 12x12x12 image:
-        model = Sequential()
-        model.add(Deconvolution3D(3, 3, 3, 3, output_shape=(None, 14, 14, 14, 3),
-                                  padding='valid',
-                                  input_shape=(12, 12, 12, 3)))
-
-        # we can predict with the model and print the shape of the array.
-        dummy_input = np.ones((32, 12, 12, 12, 3))
-        preds = model.predict(dummy_input)
-        print(preds.shape)  # (None, 14, 14, 14, 3)
-
-        # apply a 3x3x3 transposed convolution
-        # with stride 2x2x2 and 3 output filters on a 12x12x12 image:
-        model = Sequential()
-        model.add(Deconvolution3D(3, 3, 3, 3, output_shape=(None, 25, 25, 25, 3),
-                                  strides=(2, 2, 2),
-                                  padding='valid',
-                                  input_shape=(12, 12, 12, 3)))
-        model.summary()
-
-        # we can predict with the model and print the shape of the array.
-        dummy_input = np.ones((32, 12, 12, 12, 3))
-        preds = model.predict(dummy_input)
-        print(preds.shape)  # (None, 25, 25, 25, 3)
-    ```
-
-    # Arguments
-        filters: Number of transposed convolution filters to use.
-        kernel_size: kernel_size: An integer or tuple/list of 3 integers, specifying the
-            dimensions of the convolution window.
-        output_shape: Output shape of the transposed convolution operation.
-            tuple of integers
-            `(nb_samples, filters, conv_dim1, conv_dim2, conv_dim3)`.
-             It is better to use
-             a dummy input and observe the actual output shape of
-             a layer, as specified in the examples.
-        init: name of initialization function for the weights of the layer
-            (see [initializers](../initializers.md)), or alternatively,
-            Theano function to use for weights initialization.
-            This parameter is only relevant if you don't pass
-            a `weights` argument.
-        activation: name of activation function to use
-            (see [activations](../activations.md)),
-            or alternatively, elementwise Theano/TensorFlow function.
-            If you don't specify anything, no activation is applied
-            (ie. "linear" activation: a(x) = x).
-        weights: list of numpy arrays to set as initial weights.
-        padding: 'valid', 'same' or 'full'
-            ('full' requires the Theano backend).
-        strides: tuple of length 3. Factor by which to oversample output.
-            Also called strides elsewhere.
-        kernel_regularizer: instance of [WeightRegularizer](../regularizers.md)
-            (eg. L1 or L2 regularization), applied to the main weights matrix.
-        bias_regularizer: instance of [WeightRegularizer](../regularizers.md),
-            applied to the use_bias.
-        activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
-            applied to the network output.
-        kernel_constraint: instance of the [constraints](../constraints.md) module
-            (eg. maxnorm, nonneg), applied to the main weights matrix.
-        bias_constraint: instance of the [constraints](../constraints.md) module,
-            applied to the use_bias.
-        data_format: 'channels_first' or 'channels_last'. In 'channels_first' mode, the channels dimension
-            (the depth) is at index 1, in 'channels_last' mode is it at index 4.
-            It defaults to the `image_data_format` value found in your
-            Keras config file at `~/.keras/keras.json`.
-            If you never set it, then it will be "tf".
-        use_bias: whether to include a use_bias
-            (i.e. make the layer affine rather than linear).
-
-    # Input shape
-        5D tensor with shape:
-        `(samples, channels, conv_dim1, conv_dim2, conv_dim3)` if data_format='channels_first'
-        or 5D tensor with shape:
-        `(samples, conv_dim1, conv_dim2, conv_dim3, channels)` if data_format='channels_last'.
-
-    # Output shape
-        5D tensor with shape:
-        `(samples, filters, nekernel_conv_dim1, nekernel_conv_dim2, nekernel_conv_dim3)` if data_format='channels_first'
-        or 5D tensor with shape:
-        `(samples, nekernel_conv_dim1, nekernel_conv_dim2, nekernel_conv_dim3, filters)` if data_format='channels_last'.
-        `nekernel_conv_dim1`, `nekernel_conv_dim2` and `nekernel_conv_dim3` values might have changed due to padding.
-
-    # References
-        - [A guide to convolution arithmetic for deep learning](https://arxiv.org/abs/1603.07285v1)
-        - [Transposed convolution arithmetic](http://deeplearning.net/software/theano_versions/dev/tutorial/conv_arithmetic.html#transposed-convolution-arithmetic)
-        - [Deconvolutional Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf)
-    """
-
-    def __init__(self, filters, kernel_size,
-                 output_shape, activation=None, weights=None,
-                 padding='valid', strides=(1, 1, 1), data_format=None,
-                 kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None,
-                 kernel_constraint=None, bias_constraint=None,
-                 use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', **kwargs):
-        if padding not in {'valid', 'same', 'full'}:
-            raise ValueError('Invalid border mode for Deconvolution3D:', padding)
-        if len(output_shape) == 4:
-            # missing the batch size
-            output_shape = (None,) + tuple(output_shape)
-
-        self.output_shape_ = output_shape
-
-        super(Deconvolution3D, self).__init__(kernel_size=kernel_size,
-                                              filters=filters,
-                                              activation=activation,
-                                              weights=weights,
-                                              padding=padding,
-                                              strides=strides,
-                                              data_format=data_format,
-                                              kernel_regularizer=kernel_regularizer,
-                                              bias_regularizer=bias_regularizer,
-                                              activity_regularizer=activity_regularizer,
-                                              kernel_constraint=kernel_constraint,
-                                              bias_constraint=bias_constraint,
-                                              use_bias=use_bias,
-                                              kernel_initializer=kernel_initializer,
-                                              bias_initializer=bias_initializer,
-                                              **kwargs)
-
-    def compute_output_shape(self, input_shape):
-        if self.data_format == 'channels_first':
-            conv_dim1 = self.output_shape_[2]
-            conv_dim2 = self.output_shape_[3]
-            conv_dim3 = self.output_shape_[4]
-            return (input_shape[0], self.filters, conv_dim1, conv_dim2, conv_dim3)
-        elif self.data_format == 'channels_last':
-            conv_dim1 = self.output_shape_[1]
-            conv_dim2 = self.output_shape_[2]
-            conv_dim3 = self.output_shape_[3]
-            return (input_shape[0], conv_dim1, conv_dim2, conv_dim3, self.filters)
-        else:
-            raise ValueError('Invalid data format: ', self.data_format)
-
-    def call(self, x, mask=None):
-        kernel_shape = K.get_value(self.kernel).shape
-        output = K.deconv3d(x, self.kernel, self.output_shape_,
-                            strides=self.strides,
-                            padding=self.padding,
-                            data_format=self.data_format,
-                            filter_shape=kernel_shape)
-        if self.use_bias:
-            if self.data_format == 'channels_first':
-                output += K.reshape(self.bias, (1, self.filters, 1, 1, 1))
-            elif self.data_format == 'channels_last':
-                output += K.reshape(self.bias, (1, 1, 1, 1, self.filters))
-            else:
-                raise ValueError('Invalid data_format: ', self.data_format)
-        output = self.activation(output)
-        return output
-
-    def get_config(self):
-        config = {'output_shape': self.output_shape_}
-        base_config = super(Deconvolution3D, self).get_config()
-        return dict(list(base_config.items()) + list(config.items()))
-
-
-Deconv3D = Deconvolution3D
-get_custom_objects().update({'Deconvolution3D': Deconvolution3D})
-get_custom_objects().update({'Deconv3D': Deconv3D})
-
-
 class CosineConvolution2D(Layer):
    """Cosine Normalized Convolution operator for filtering windows of two-dimensional inputs.
    Cosine Normalization: Using Cosine Similarity Instead of Dot Product in Neural Networks
@@ -17,67 +17,6 @@ else:
    _convolution_border_modes = ['valid', 'same']


-@keras_test
-def test_deconvolution_3d():
-    num_samples = 6
-    num_filter = 4
-    stack_size = 2
-    kernel_dim1 = 12
-    kernel_dim2 = 10
-    kernel_dim3 = 8
-
-    for batch_size in [None, num_samples]:
-        for border_mode in _convolution_border_modes:
-            for subsample in [(1, 1, 1), (2, 2, 2)]:
-                if border_mode == 'same' and subsample != (1, 1, 1):
-                    continue
-
-                dim1 = conv_input_length(kernel_dim1, 7,
-                                         border_mode,
-                                         subsample[0])
-                dim2 = conv_input_length(kernel_dim2, 5,
-                                         border_mode,
-                                         subsample[1])
-                dim3 = conv_input_length(kernel_dim3, 3,
-                                         border_mode,
-                                         subsample[2])
-                layer_test(convolutional.Deconvolution3D,
-                           kwargs={'filters': num_filter,
-                                   'kernel_size': (7, 5, 3),
-                                   'output_shape': (batch_size, num_filter, dim1, dim2, dim3),
-                                   'padding': border_mode,
-                                   'strides': subsample,
-                                   'data_format': 'channels_first'},
-                           input_shape=(num_samples, stack_size, kernel_dim1, kernel_dim2, kernel_dim3),
-
-                           fixed_batch_size=True, tolerance=None)
-
-                layer_test(convolutional.Deconvolution3D,
-                           kwargs={'filters': num_filter,
-                                   'kernel_size': (7, 5, 3),
-                                   'output_shape': (batch_size, num_filter, dim1, dim2, dim3),
-                                   'padding': border_mode,
-                                   'strides': subsample,
-                                   'data_format': 'channels_first',
-                                   'kernel_regularizer': 'l2',
-                                   'bias_regularizer': 'l2',
-                                   'activity_regularizer': 'l2'},
-                           input_shape=(num_samples, stack_size, kernel_dim1, kernel_dim2, kernel_dim3),
-                           fixed_batch_size=True, tolerance=None)
-
-                layer_test(convolutional.Deconvolution3D,
-                           kwargs={'filters': num_filter,
-                                   'kernel_size': (7, 5, 3),
-                                   'output_shape': (num_filter, dim1, dim2, dim3),
-                                   'padding': border_mode,
-                                   'strides': subsample,
-                                   'data_format': 'channels_first',
-                                   'kernel_regularizer': 'l2',
-                                   'bias_regularizer': 'l2',
-                                   'activity_regularizer': 'l2'},
-                           input_shape=(num_samples, stack_size, kernel_dim1, kernel_dim2, kernel_dim3), tolerance=None)
-
-
@keras_test
 def test_cosineconvolution_2d():
    num_samples = 2