resnet.py initial integration (#148)

Sorry for the delay. Looks much better now.
2026-06-27 16:10:11 +08:00 · 2017-10-15 21:21:50 -04:00
parent 7dc329a5a3
commit b0a2fbabfd
2 changed files with 550 additions and 0 deletions
@@ -0,0 +1,96 @@
+"""
+Adapted from keras example cifar10_cnn.py and github.com/raghakot/keras-resnet
+Train ResNet-18 on the CIFAR10 small images dataset.
+
+GPU run command with Theano backend (with TensorFlow, the GPU is automatically used):
+    THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10.py
+"""
+from __future__ import print_function
+from keras.datasets import cifar10
+from keras.preprocessing.image import ImageDataGenerator
+from keras.utils import np_utils
+from keras.callbacks import ModelCheckpoint
+from keras.callbacks import ReduceLROnPlateau
+from keras.callbacks import CSVLogger
+from keras.callbacks import EarlyStopping
+from keras_contrib.applications.resnet import ResNet18
+
+import numpy as np
+
+
+weights_file = 'ResNet18v2-CIFAR-10.h5'
+lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
+early_stopper = EarlyStopping(min_delta=0.001, patience=10)
+csv_logger = CSVLogger('ResNet18v2-CIFAR-10.csv')
+model_checkpoint = ModelCheckpoint(weights_file, monitor='val_acc', save_best_only=True,
+                                   save_weights_only=True, mode='auto')
+
+batch_size = 32
+nb_classes = 10
+nb_epoch = 200
+data_augmentation = True
+
+# input image dimensions
+img_rows, img_cols = 32, 32
+# The CIFAR10 images are RGB.
+img_channels = 3
+
+# The data, shuffled and split between train and test sets:
+(X_train, y_train), (X_test, y_test) = cifar10.load_data()
+
+# Convert class vectors to binary class matrices.
+Y_train = np_utils.to_categorical(y_train, nb_classes)
+Y_test = np_utils.to_categorical(y_test, nb_classes)
+
+X_train = X_train.astype('float32')
+X_test = X_test.astype('float32')
+
+# subtract mean and normalize
+mean_image = np.mean(X_train, axis=0)
+X_train -= mean_image
+X_test -= mean_image
+X_train /= 128.
+X_test /= 128.
+
+model = ResNet18((img_rows, img_cols, img_channels), nb_classes)
+model.compile(loss='categorical_crossentropy',
+              optimizer='adam',
+              metrics=['accuracy'])
+
+if not data_augmentation:
+    print('Not using data augmentation.')
+    model.fit(X_train, Y_train,
+              batch_size=batch_size,
+              nb_epoch=nb_epoch,
+              validation_data=(X_test, Y_test),
+              shuffle=True,
+              callbacks=[lr_reducer, early_stopper, csv_logger, model_checkpoint])
+else:
+    print('Using real-time data augmentation.')
+    # This will do preprocessing and realtime data augmentation:
+    datagen = ImageDataGenerator(
+        featurewise_center=False,  # set input mean to 0 over the dataset
+        samplewise_center=False,  # set each sample mean to 0
+        featurewise_std_normalization=False,  # divide inputs by std of the dataset
+        samplewise_std_normalization=False,  # divide each input by its std
+        zca_whitening=False,  # apply ZCA whitening
+        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
+        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
+        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
+        horizontal_flip=True,  # randomly flip images
+        vertical_flip=False)  # randomly flip images
+
+    # Compute quantities required for featurewise normalization
+    # (std, mean, and principal components if ZCA whitening is applied).
+    datagen.fit(X_train)
+
+    # Fit the model on the batches generated by datagen.flow().
+    model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size),
+                        steps_per_epoch=X_train.shape[0] // batch_size,
+                        validation_data=(X_test, Y_test),
+                        epochs=nb_epoch, verbose=2,
+                        callbacks=[lr_reducer, early_stopper, csv_logger, model_checkpoint])
+
+scores = model.evaluate(X_test, Y_test, batch_size=batch_size)
+print('Test loss : ', scores[0])
+print('Test accuracy : ', scores[1])
@@ -0,0 +1,454 @@
+"""ResNet v1, v2, and segmentation models for Keras.
+
+# Reference
+
+- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
+- [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027)
+
+Reference material for extended functionality:
+
+- [ResNeXt](https://arxiv.org/abs/1611.05431) for Tiny ImageNet support.
+- [Dilated Residual Networks](https://arxiv.org/pdf/1705.09914) for segmentation support.
+- [Deep Residual Learning for Instrument Segmentation in Robotic Surgery](https://arxiv.org/abs/1703.08580)
+  for segmentation support.
+
+Implementation Adapted from: github.com/raghakot/keras-resnet
+"""
+from __future__ import division
+
+import six
+from keras.models import Model
+from keras.layers import Input
+from keras.layers import Activation
+from keras.layers import Reshape
+from keras.layers import Dense
+from keras.layers import Flatten
+from keras.layers import Conv2D
+from keras.layers import MaxPooling2D
+from keras.layers import AveragePooling2D
+from keras.layers.pooling import GlobalAveragePooling2D
+from keras.layers import GlobalMaxPooling2D
+from keras.layers import GlobalAveragePooling2D
+from keras.layers import Dropout
+from keras.layers.merge import add
+from keras.layers.normalization import BatchNormalization
+from keras.regularizers import l2
+from keras import backend as K
+from keras.applications.imagenet_utils import _obtain_input_shape
+
+
+def _bn_relu(x, bn_name=None, relu_name=None):
+    """Helper to build a BN -> relu block
+    """
+    norm = BatchNormalization(axis=CHANNEL_AXIS, name=bn_name)(x)
+    return Activation("relu", name=relu_name)(norm)
+
+
+def _conv_bn_relu(**conv_params):
+    """Helper to build a conv -> BN -> relu residual unit activation function.
+       This is the original ResNet v1 scheme in https://arxiv.org/abs/1512.03385
+    """
+    filters = conv_params["filters"]
+    kernel_size = conv_params["kernel_size"]
+    strides = conv_params.setdefault("strides", (1, 1))
+    dilation_rate = conv_params.setdefault("dilation_rate", (1, 1))
+    conv_name = conv_params.setdefault("conv_name", None)
+    bn_name = conv_params.setdefault("bn_name", None)
+    relu_name = conv_params.setdefault("relu_name", None)
+    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
+    padding = conv_params.setdefault("padding", "same")
+    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
+
+    def f(x):
+        x = Conv2D(filters=filters, kernel_size=kernel_size,
+                   strides=strides, padding=padding,
+                   dilation_rate=dilation_rate,
+                   kernel_initializer=kernel_initializer,
+                   kernel_regularizer=kernel_regularizer,
+                   name=conv_name)(x)
+        return _bn_relu(x, bn_name=bn_name, relu_name=relu_name)
+
+    return f
+
+
+def _bn_relu_conv(**conv_params):
+    """Helper to build a BN -> relu -> conv residual unit with full pre-activation function.
+    This is the ResNet v2 scheme proposed in http://arxiv.org/pdf/1603.05027v2.pdf
+    """
+    filters = conv_params["filters"]
+    kernel_size = conv_params["kernel_size"]
+    strides = conv_params.setdefault("strides", (1, 1))
+    dilation_rate = conv_params.setdefault("dilation_rate", (1, 1))
+    conv_name = conv_params.setdefault("conv_name", None)
+    bn_name = conv_params.setdefault("bn_name", None)
+    relu_name = conv_params.setdefault("relu_name", None)
+    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
+    padding = conv_params.setdefault("padding", "same")
+    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
+
+    def f(x):
+        activation = _bn_relu(x, bn_name=bn_name, relu_name=relu_name)
+        return Conv2D(filters=filters, kernel_size=kernel_size,
+                      strides=strides, padding=padding,
+                      dilation_rate=dilation_rate,
+                      kernel_initializer=kernel_initializer,
+                      kernel_regularizer=kernel_regularizer,
+                      name=conv_name)(activation)
+
+    return f
+
+
+def _shortcut(input_feature, residual, conv_name_base=None, bn_name_base=None):
+    """Adds a shortcut between input and residual block and merges them with "sum"
+    """
+    # Expand channels of shortcut to match residual.
+    # Stride appropriately to match residual (width, height)
+    # Should be int if network architecture is correctly configured.
+    input_shape = K.int_shape(input_feature)
+    residual_shape = K.int_shape(residual)
+    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
+    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
+    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]
+
+    shortcut = input_feature
+    # 1 X 1 conv if shape is different. Else identity.
+    if stride_width > 1 or stride_height > 1 or not equal_channels:
+        print('reshaping via a convolution...')
+        if conv_name_base is not None:
+            conv_name_base = conv_name_base + '1'
+        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
+                          kernel_size=(1, 1),
+                          strides=(stride_width, stride_height),
+                          padding="valid",
+                          kernel_initializer="he_normal",
+                          kernel_regularizer=l2(0.0001),
+                          name=conv_name_base)(input_feature)
+        if bn_name_base is not None:
+            bn_name_base = bn_name_base + '1'
+        shortcut = BatchNormalization(axis=CHANNEL_AXIS, name=bn_name_base)(shortcut)
+
+    return add([shortcut, residual])
+
+
+def _residual_block(block_function, filters, blocks, stage,
+                    transition_strides=None, transition_dilation_rates=None,
+                    dilation_rates=(1, 1), is_first_layer=False, dropout=None,
+                    residual_unit=_bn_relu_conv):
+    """Builds a residual block with repeating bottleneck blocks.
+
+       stage: integer, current stage label, used for generating layer names
+       blocks: number of blocks 'a','b'..., current block label, used for generating layer names
+       transition_strides: a list of tuples for the strides of each transition
+       transition_dilation_rates: a list of tuples for the dilation rate of each transition
+    """
+    if transition_dilation_rates is None:
+        transition_dilation_rates = [(1, 1)] * blocks
+    if transition_strides is None:
+        transition_strides = [(1, 1)] * blocks
+
+    def f(x):
+        for i in range(blocks):
+            x = block_function(filters=filters, stage=stage, block=i,
+                               transition_strides=transition_strides[i],
+                               dilation_rate=dilation_rates[i],
+                               is_first_block_of_first_layer=(is_first_layer and i == 0),
+                               dropout=dropout,
+                               residual_unit=residual_unit)(x)
+        return x
+
+    return f
+
+
+def _block_name_base(stage, block):
+    """Get the convolution name base and batch normalization name base defined by stage and block.
+
+    If there are less than 26 blocks they will be labeled 'a', 'b', 'c' to match the paper and keras
+    and beyond 26 blocks they will simply be numbered.
+    """
+    if block < 27:
+        block = '%c' % (block + 97)  # 97 is the ascii number for lowercase 'a'
+    conv_name_base = 'res' + str(stage) + block + '_branch'
+    bn_name_base = 'bn' + str(stage) + block + '_branch'
+    return conv_name_base, bn_name_base
+
+
+def basic_block(filters, stage, block, transition_strides=(1, 1),
+                dilation_rate=(1, 1), is_first_block_of_first_layer=False, dropout=None,
+                residual_unit=_bn_relu_conv):
+    """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34.
+    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
+    """
+    def f(input_features):
+        conv_name_base, bn_name_base = _block_name_base(stage, block)
+        if is_first_block_of_first_layer:
+            # don't repeat bn->relu since we just did bn->relu->maxpool
+            x = Conv2D(filters=filters, kernel_size=(3, 3),
+                       strides=transition_strides,
+                       dilation_rate=dilation_rate,
+                       padding="same",
+                       kernel_initializer="he_normal",
+                       kernel_regularizer=l2(1e-4),
+                       name=conv_name_base + '2a')(input_features)
+        else:
+            x = residual_unit(filters=filters, kernel_size=(3, 3),
+                              strides=transition_strides,
+                              dilation_rate=dilation_rate,
+                              conv_name_base=conv_name_base + '2a',
+                              bn_name_base=bn_name_base + '2a')(input_features)
+
+        if dropout is not None:
+            x = Dropout(dropout)(x)
+
+        x = residual_unit(filters=filters, kernel_size=(3, 3),
+                          conv_name_base=conv_name_base + '2b',
+                          bn_name_base=bn_name_base + '2b')(x)
+
+        return _shortcut(input_features, x)
+
+    return f
+
+
+def bottleneck(filters, stage, block, transition_strides=(1, 1),
+               dilation_rate=(1, 1), is_first_block_of_first_layer=False, dropout=None,
+               residual_unit=_bn_relu_conv):
+    """Bottleneck architecture for > 34 layer resnet.
+    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
+
+    Returns:
+        A final conv layer of filters * 4
+    """
+    def f(input_feature):
+        conv_name_base, bn_name_base = _block_name_base(stage, block)
+        if is_first_block_of_first_layer:
+            # don't repeat bn->relu since we just did bn->relu->maxpool
+            x = Conv2D(filters=filters, kernel_size=(1, 1),
+                       strides=transition_strides,
+                       dilation_rate=dilation_rate,
+                       padding="same",
+                       kernel_initializer="he_normal",
+                       kernel_regularizer=l2(1e-4),
+                       name=conv_name_base + '2a')(input_feature)
+        else:
+            x = residual_unit(filters=filters, kernel_size=(1, 1),
+                              strides=transition_strides,
+                              dilation_rate=dilation_rate,
+                              conv_name_base=conv_name_base + '2a',
+                              bn_name_base=bn_name_base + '2a')(input_feature)
+
+        if dropout is not None:
+            x = Dropout(dropout)(x)
+
+        x = residual_unit(filters=filters, kernel_size=(3, 3),
+                          conv_name_base=conv_name_base + '2b',
+                          bn_name_base=bn_name_base + '2b')(x)
+
+        if dropout is not None:
+            x = Dropout(dropout)(x)
+
+        x = residual_unit(filters=filters * 4, kernel_size=(1, 1),
+                          conv_name_base=conv_name_base + '2c',
+                          bn_name_base=bn_name_base + '2c')(x)
+
+        return _shortcut(input_feature, x)
+
+    return f
+
+
+def _handle_dim_ordering():
+    global ROW_AXIS
+    global COL_AXIS
+    global CHANNEL_AXIS
+    if K.image_data_format() == 'channels_last':
+        ROW_AXIS = 1
+        COL_AXIS = 2
+        CHANNEL_AXIS = 3
+    else:
+        CHANNEL_AXIS = 1
+        ROW_AXIS = 2
+        COL_AXIS = 3
+
+
+def _string_to_function(identifier):
+    if isinstance(identifier, six.string_types):
+        res = globals().get(identifier)
+        if not res:
+            raise ValueError('Invalid {}'.format(identifier))
+        return res
+    return identifier
+
+
+def ResNet(input_shape=None, classes=10, block='bottleneck', residual_unit='v2', repetitions=None,
+           initial_filters=64, activation='softmax', include_top=True, input_tensor=None, dropout=None,
+           transition_dilation_rate=(1, 1), initial_strides=(2, 2), initial_kernel_size=(7, 7),
+           initial_pooling='max', final_pooling=None, top='classification'):
+    """Builds a custom ResNet like architecture. Defaults to ResNet50 v2.
+
+    Args:
+        input_shape: optional shape tuple, only to be specified
+            if `include_top` is False (otherwise the input shape
+            has to be `(224, 224, 3)` (with `channels_last` dim ordering)
+            or `(3, 224, 224)` (with `channels_first` dim ordering).
+            It should have exactly 3 inputs channels,
+            and width and height should be no smaller than 8.
+            E.g. `(224, 224, 3)` would be one valid value.
+        classes: The number of outputs at final softmax layer
+        block: The block function to use. This is either `'basic'` or `'bottleneck'`.
+            The original paper used `basic` for layers < 50.
+        repetitions: Number of repetitions of various block units.
+            At each block unit, the number of filters are doubled and the input size is halved.
+            Default of None implies the ResNet50v2 values of [3, 4, 6, 3].
+        transition_dilation_rate: Used for pixel-wise prediction tasks such as image segmentation.
+        residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu conv.
+            See [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027)
+            for details.
+        dropout: None for no dropout, otherwise rate of dropout from 0 to 1.
+            Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper.
+        transition_dilation_rate: Dilation rate for transition layers. For semantic
+            segmentation of images use a dilation rate of (2, 2).
+        initial_strides: Stride of the very first residual unit and MaxPooling2D call,
+            with default (2, 2), set to (1, 1) for small images like cifar.
+        initial_kernel_size: kernel size of the very first convolution, (7, 7) for imagenet
+            and (3, 3) for small image datasets like tiny imagenet and cifar.
+            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
+        initial_pooling: Determine if there will be an initial pooling layer,
+            'max' for imagenet and None for small image datasets.
+            See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
+        final_pooling: Optional pooling mode for feature extraction at the final model layer
+            when `include_top` is `False`.
+            - `None` means that the output of the model
+                will be the 4D tensor output of the
+                last convolutional layer.
+            - `avg` means that global average pooling
+                will be applied to the output of the
+                last convolutional layer, and thus
+                the output of the model will be a
+                2D tensor.
+            - `max` means that global max pooling will
+                be applied.
+        top: Defines final layers to evaluate based on a specific problem type. Options are
+            'classification' for ImageNet style problems, 'segmentation' for problems like
+            the Pascal VOC dataset, and None to exclude these layers entirely.
+
+    Returns:
+        The keras `Model`.
+    """
+    if activation not in ['softmax', 'sigmoid', None]:
+        raise ValueError('activation must be one of "softmax", "sigmoid", or None')
+    if activation == 'sigmoid' and classes != 1:
+        raise ValueError('sigmoid activation can only be used when classes = 1')
+    if repetitions is None:
+        repetitions = [3, 4, 6, 3]
+    # Determine proper input shape
+    input_shape = _obtain_input_shape(input_shape,
+                                      default_size=32,
+                                      min_size=8,
+                                      data_format=K.image_data_format(),
+                                      require_flatten=include_top)
+    _handle_dim_ordering()
+    if len(input_shape) != 3:
+        raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")
+
+    if block == 'basic':
+        block_fn = basic_block
+    elif block == 'bottleneck':
+        block_fn = bottleneck
+    elif isinstance(block, six.string_types):
+        block_fn = _string_to_function(block)
+    else:
+        block_fn = block
+
+    if residual_unit == 'v2':
+        residual_unit = _bn_relu_conv
+    elif residual_unit == 'v1':
+        residual_unit = _conv_bn_relu
+    elif isinstance(residual_unit, six.string_types):
+        residual_unit = _string_to_function(residual_unit)
+    else:
+        residual_unit = residual_unit
+
+    # Permute dimension order if necessary
+    if K.image_data_format() == 'channels_first':
+        input_shape = (input_shape[1], input_shape[2], input_shape[0])
+    # Determine proper input shape
+    input_shape = _obtain_input_shape(input_shape,
+                                      default_size=32,
+                                      min_size=8,
+                                      data_format=K.image_data_format(),
+                                      require_flatten=include_top)
+
+    img_input = Input(shape=input_shape, tensor=input_tensor)
+    x = _conv_bn_relu(filters=initial_filters, kernel_size=initial_kernel_size, strides=initial_strides)(img_input)
+    if initial_pooling == 'max':
+        x = MaxPooling2D(pool_size=(3, 3), strides=initial_strides, padding="same")(x)
+
+    block = x
+    filters = initial_filters
+    for i, r in enumerate(repetitions):
+        transition_dilation_rates = [transition_dilation_rate] * r
+        transition_strides = [(1, 1)] * r
+        if transition_dilation_rate == (1, 1):
+            transition_strides[0] = (2, 2)
+        block = _residual_block(block_fn, filters=filters,
+                                stage=i, blocks=r,
+                                is_first_layer=(i == 0),
+                                dropout=dropout,
+                                transition_dilation_rates=transition_dilation_rates,
+                                transition_strides=transition_strides,
+                                residual_unit=residual_unit)(block)
+        filters *= 2
+
+    # Last activation
+    x = _bn_relu(block)
+
+    # Classifier block
+    if include_top and top is 'classification':
+        x = GlobalAveragePooling2D()(x)
+        x = Dense(units=classes, activation=activation, kernel_initializer="he_normal")(x)
+    elif include_top and top is 'segmentation':
+        x = Conv2D(classes, (1, 1), activation='linear', padding='same')(x)
+
+        if K.image_data_format() == 'channels_first':
+            channel, row, col = input_shape
+        else:
+            row, col, channel = input_shape
+
+        x = Reshape((row * col, classes))(x)
+        x = Activation(activation)(x)
+        x = Reshape((row, col, classes))(x)
+    elif final_pooling == 'avg':
+        x = GlobalAveragePooling2D()(x)
+    elif final_pooling == 'max':
+        x = GlobalMaxPooling2D()(x)
+
+    model = Model(inputs=img_input, outputs=x)
+    return model
+
+
+def ResNet18(input_shape, classes):
+    """ResNet with 18 layers and v2 residual units
+    """
+    return ResNet(input_shape, classes, basic_block, repetitions=[2, 2, 2, 2])
+
+
+def ResNet34(input_shape, classes):
+    """ResNet with 34 layers and v2 residual units
+    """
+    return ResNet(input_shape, classes, basic_block, repetitions=[3, 4, 6, 3])
+
+
+def ResNet50(input_shape, classes):
+    """ResNet with 50 layers and v2 residual units
+    """
+    return ResNet(input_shape, classes, bottleneck, repetitions=[3, 4, 6, 3])
+
+
+def ResNet101(input_shape, classes):
+    """ResNet with 101 layers and v2 residual units
+    """
+    return ResNet(input_shape, classes, bottleneck, repetitions=[3, 4, 23, 3])
+
+
+def ResNet152(input_shape, classes):
+    """ResNet with 152 layers and v2 residual units
+    """
+    return ResNet(input_shape, classes, bottleneck, repetitions=[3, 8, 36, 3])