From 7bae1db1f25d3b6c3066abe33f9433e18c58f5f7 Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Wed, 6 Sep 2017 13:01:50 -0500 Subject: [PATCH] Update DenseNet with ImageNet weights and major code cleanup --- keras_contrib/applications/densenet.py | 777 ++++++++++++++++++------- 1 file changed, 552 insertions(+), 225 deletions(-) diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py index a7c4711..2cf4020 100644 --- a/keras_contrib/applications/densenet.py +++ b/keras_contrib/applications/densenet.py @@ -1,5 +1,38 @@ # -*- coding: utf-8 -*- -'''DenseNet models for Keras. +'''DenseNet and DenseNet-FCN models for Keras. + +DenseNet is a network architecture where each layer is directly connected +to every other layer in a feed-forward fashion (within each dense block). +For each layer, the feature maps of all preceding layers are treated as +separate inputs whereas its own feature maps are passed on as inputs to +all subsequent layers. This connectivity pattern yields state-of-the-art +accuracies on CIFAR10/100 (with or without data augmentation) and SVHN. +On the large scale ILSVRC 2012 (ImageNet) dataset, DenseNet achieves a +similar accuracy as ResNet, but using less than half the amount of +parameters and roughly half the number of FLOPs. + +DenseNets support any input image size of 32x32 or greater, and are thus +suited for CIFAR-10 or CIFAR-100 datasets. There are two types of DenseNets, +one suited for smaller images (DenseNet) and one suited for ImageNet, +called DenseNetImageNet. They are differentiated by the strided convolution +and pooling operations prior to the initial dense block. + +The following table describes the size and accuracy of DenseNetImageNet models +on the ImageNet dataset (single crop), for which weights are provided: +------------------------------------------------------------------------------------ + Model type | ImageNet Acc (Top 1) | ImageNet Acc (Top 5) | Params (M) | +------------------------------------------------------------------------------------ +| DenseNet-121 | 25.02 % | 7.71 % | 8.0 | +| DenseNet-169 | 23.80 % | 6.85 % | 14.3 | +| DenseNet-201 | 22.58 % | 6.34 % | 20.2 | +| DenseNet-161 | 22.20 % | - % | 28.9 | +------------------------------------------------------------------------------------ + +DenseNets can be extended to image segmentation tasks as described in the +paper "The One Hundred Layers Tiramisu: Fully Convolutional DenseNets for +Semantic Segmentation". Here, the dense blocks are arranged and concatenated +with long skip connections for state of the art performance on CamVid dataset. + # Reference - [Densely Connected Convolutional Networks](https://arxiv.org/pdf/1608.06993.pdf) - [The One Hundred Layers Tiramisu: Fully Convolutional DenseNets for Semantic Segmentation](https://arxiv.org/pdf/1611.09326.pdf) @@ -11,89 +44,169 @@ from __future__ import division import warnings from keras.models import Model -from keras.layers.core import Dense, Dropout, Activation, Reshape -from keras.layers.convolutional import Conv2D, Conv2DTranspose, UpSampling2D -from keras.layers.pooling import AveragePooling2D -from keras.layers.pooling import GlobalAveragePooling2D +from keras.layers import Dense +from keras.layers import Dropout +from keras.layers import Activation +from keras.layers import Reshape +from keras.layers import Conv2D +from keras.layers import Conv2DTranspose +from keras.layers import UpSampling2D +from keras.layers import MaxPooling2D +from keras.layers import AveragePooling2D +from keras.layers import GlobalMaxPooling2D +from keras.layers import GlobalAveragePooling2D from keras.layers import Input -from keras.layers.merge import concatenate -from keras.layers.normalization import BatchNormalization +from keras.layers import concatenate +from keras.layers import BatchNormalization from keras.regularizers import l2 from keras.utils.layer_utils import convert_all_kernels_in_model from keras.utils.data_utils import get_file from keras.engine.topology import get_source_inputs from keras.applications.imagenet_utils import _obtain_input_shape +from keras.applications.imagenet_utils import decode_predictions import keras.backend as K from keras_contrib.layers.convolutional import SubPixelUpscaling -TH_WEIGHTS_PATH = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Theano-Backend-TH-dim-ordering.h5' -TF_WEIGHTS_PATH = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Tensorflow-Backend-TF-dim-ordering.h5' -TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Theano-Backend-TH-dim-ordering-no-top.h5' -TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Tensorflow-Backend-TF-dim-ordering-no-top.h5' +DENSENET_121_WEIGHTS_PATH = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-121-32.h5' +DENSENET_161_WEIGHTS_PATH = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-161-48.h5' +DENSENET_169_WEIGHTS_PATH = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-169-32.h5' +DENSENET_121_WEIGHTS_PATH_NO_TOP = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-121-32-no-top.h5' +DENSENET_161_WEIGHTS_PATH_NO_TOP = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-161-48-no-top.h5' +DENSENET_169_WEIGHTS_PATH_NO_TOP = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-169-32-no-top.h5' -def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers_per_block=-1, - bottleneck=False, reduction=0.0, dropout_rate=0.0, weight_decay=1E-4, - include_top=True, weights='cifar10', input_tensor=None, - classes=10, activation='softmax'): - '''Instantiate the DenseNet architecture, - optionally loading weights pre-trained - on CIFAR-10. Note that when using TensorFlow, - for best performance you should set - `image_data_format='channels_last'` in your Keras config - at ~/.keras/keras.json. - The model and the weights are compatible with both - TensorFlow and Theano. The dimension ordering - convention used by the model is the one - specified in your Keras config file. - # Arguments - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(32, 32, 3)` (with `channels_last` dim ordering) - or `(3, 32, 32)` (with `channels_first` dim ordering). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 8. - E.g. `(200, 200, 3)` would be one valid value. - depth: number or layers in the DenseNet - nb_dense_block: number of dense blocks to add to end (generally = 3) - growth_rate: number of filters to add per dense block - nb_filter: initial number of filters. -1 indicates initial - number of filters is 2 * growth_rate - nb_layers_per_block: number of layers in each dense block. - Can be a -1, positive integer or a list. - If -1, calculates nb_layer_per_block from the network depth. - If positive integer, a set number of layers per dense block. - If list, nb_layer is used as provided. Note that list size must - be (nb_dense_block + 1) - bottleneck: flag to add bottleneck blocks in between dense blocks - reduction: reduction factor of transition blocks. - Note : reduction value is inverted to compute compression. - dropout_rate: dropout rate - weight_decay: weight decay factor - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization) or - 'cifar10' (pre-training on CIFAR-10).. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. - Note that if sigmoid is used, classes must be 1. - # Returns - A Keras model instance. - ''' +def preprocess_input(x, data_format=None): + """Preprocesses a tensor encoding a batch of images. - if weights not in {'cifar10', None}: + # Arguments + x: input Numpy tensor, 4D. + data_format: data format of the image tensor. + + # Returns + Preprocessed tensor. + """ + if data_format is None: + data_format = K.image_data_format() + assert data_format in {'channels_last', 'channels_first'} + + if data_format == 'channels_first': + if x.ndim == 3: + # 'RGB'->'BGR' + x = x[::-1, ...] + # Zero-center by mean pixel + x[0, :, :] -= 103.939 + x[1, :, :] -= 116.779 + x[2, :, :] -= 123.68 + else: + x = x[:, ::-1, ...] + x[:, 0, :, :] -= 103.939 + x[:, 1, :, :] -= 116.779 + x[:, 2, :, :] -= 123.68 + else: + # 'RGB'->'BGR' + x = x[..., ::-1] + # Zero-center by mean pixel + x[..., 0] -= 103.939 + x[..., 1] -= 116.779 + x[..., 2] -= 123.68 + + x *= 0.017 # scale values + + return x + + +def DenseNet(input_shape=None, + depth=40, + nb_dense_block=3, + growth_rate=12, + nb_filter=-1, + nb_layers_per_block=-1, + bottleneck=False, + reduction=0.0, + dropout_rate=0.0, + weight_decay=1e-4, + subsample_initial_block=False, + include_top=True, + weights=None, + input_tensor=None, + pooling=None, + classes=10, + activation='softmax'): + '''Instantiate the DenseNet architecture. + + The model and the weights are compatible with both + TensorFlow and Theano. The dimension ordering + convention used by the model is the one + specified in your Keras config file. + + # Arguments + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` (with `channels_last` dim ordering) + or `(3, 224, 224)` (with `channels_first` dim ordering). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 8. + E.g. `(224, 224, 3)` would be one valid value. + depth: number or layers in the DenseNet + nb_dense_block: number of dense blocks to add to end + growth_rate: number of filters to add per dense block + nb_filter: initial number of filters. -1 indicates initial + number of filters will default to 2 * growth_rate + nb_layers_per_block: number of layers in each dense block. + Can be a -1, positive integer or a list. + If -1, calculates nb_layer_per_block from the network depth. + If positive integer, a set number of layers per dense block. + If list, nb_layer is used as provided. Note that list size must + be nb_dense_block + bottleneck: flag to add bottleneck blocks in between dense blocks + reduction: reduction factor of transition blocks. + Note : reduction value is inverted to compute compression. + dropout_rate: dropout rate + weight_decay: weight decay rate + subsample_initial_block: Set to True to subsample the initial + convolution and add a MaxPool2D before the dense blocks are added. + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization) or + 'imagenet' (pre-training on ImageNet).. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + activation: Type of activation at the top layer. Can be one of + 'softmax' or 'sigmoid'. Note that if sigmoid is used, + classes must be 1. + + # Returns + A Keras model instance. + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + ''' + + if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `cifar10` ' - '(pre-training on CIFAR-10).') + '`None` (random initialization) or `imagenet` ' + '(pre-training on ImageNet).') - if weights == 'cifar10' and include_top and classes != 10: - raise ValueError('If using `weights` as CIFAR 10 with `include_top`' - ' as true, `classes` should be 10') + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as ImageNet with `include_top` ' + 'as true, `classes` should be 1000') if activation not in ['softmax', 'sigmoid']: raise ValueError('activation must be one of "softmax" or "sigmoid"') @@ -106,7 +219,7 @@ def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12, nb_fi default_size=32, min_size=8, data_format=K.image_data_format(), - include_top=include_top) + require_flatten=include_top) if input_tensor is None: img_input = Input(shape=input_shape) @@ -117,8 +230,9 @@ def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12, nb_fi img_input = input_tensor x = __create_dense_net(classes, img_input, include_top, depth, nb_dense_block, - growth_rate, nb_filter, nb_layers_per_block, bottleneck, reduction, - dropout_rate, weight_decay, activation) + growth_rate, nb_filter, nb_layers_per_block, bottleneck, + reduction, dropout_rate, weight_decay, subsample_initial_block, + pooling, activation) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. @@ -130,47 +244,69 @@ def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12, nb_fi model = Model(inputs, x, name='densenet') # load weights - if weights == 'cifar10': - if (depth == 40) and (nb_dense_block == 3) and (growth_rate == 12) and (nb_filter == 16) and \ - (bottleneck is False) and (reduction == 0.0) and (dropout_rate == 0.0) and (weight_decay == 1E-4): - # Default parameters match. Weights for this model exist: + if weights == 'imagenet': + weights_loaded = False - if K.image_data_format() == 'channels_first': - if include_top: - weights_path = get_file('densenet_40_12_th_dim_ordering_th_kernels.h5', - TH_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('densenet_40_12_th_dim_ordering_th_kernels_no_top.h5', - TH_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - - model.load_weights(weights_path) - - if K.backend() == 'tensorflow': - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image dimension ordering convention ' - '(`image_data_format="channels_first"`). ' - 'For best performance, set ' - '`image_data_format="channels_last"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - convert_all_kernels_in_model(model) + if (depth == 121) and (nb_dense_block == 4) and (growth_rate == 32) and (nb_filter == 64) and \ + (bottleneck is True) and (reduction == 0.5) and (dropout_rate == 0.0) and (subsample_initial_block): + if include_top: + weights_path = get_file('DenseNet-BC-121-32.h5', + DENSENET_121_WEIGHTS_PATH, + cache_subdir='models', + md5_hash='a439dd41aa672aef6daba4ee1fd54abd') else: - if include_top: - weights_path = get_file('densenet_40_12_tf_dim_ordering_tf_kernels.h5', - TF_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('densenet_40_12_tf_dim_ordering_tf_kernels_no_top.h5', - TF_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') + weights_path = get_file('DenseNet-BC-121-32-no-top.h5', + DENSENET_121_WEIGHTS_PATH_NO_TOP, + cache_subdir='models', + md5_hash='8804bcb37da5be4a52dc4e45d4425ba7') + model.load_weights(weights_path) + weights_loaded = True - model.load_weights(weights_path) + if (depth == 161) and (nb_dense_block == 4) and (growth_rate == 48) and (nb_filter == 96) and \ + (bottleneck is True) and (reduction == 0.5) and (dropout_rate == 0.0) and (subsample_initial_block): + if include_top: + weights_path = get_file('DenseNet-BC-161-48.h5', + DENSENET_161_WEIGHTS_PATH, + cache_subdir='models', + md5_hash='6c326cf4fbdb57d31eff04333a23fcca') + else: + weights_path = get_file('DenseNet-BC-161-48-no-top.h5', + DENSENET_161_WEIGHTS_PATH_NO_TOP, + cache_subdir='models', + md5_hash='d38903b8732fe238c91dac7859271f26') + model.load_weights(weights_path) + weights_loaded = True - if K.backend() == 'theano': - convert_all_kernels_in_model(model) + if (depth == 169) and (nb_dense_block == 4) and (growth_rate == 32) and (nb_filter == 64) and \ + (bottleneck is True) and (reduction == 0.5) and (dropout_rate == 0.0) and (subsample_initial_block): + if include_top: + weights_path = get_file('DenseNet-BC-169-32.h5', + DENSENET_169_WEIGHTS_PATH, + cache_subdir='models', + md5_hash='914869c361303d2e39dec640b4e606a6') + else: + weights_path = get_file('DenseNet-BC-169-32-no-top.h5', + DENSENET_169_WEIGHTS_PATH_NO_TOP, + cache_subdir='models', + md5_hash='a664d78a30ddd217dd38c0bb8d258461') + model.load_weights(weights_path) + weights_loaded = True + + if weights_loaded: + if K.backend() == 'theano': + convert_all_kernels_in_model(model) + + if K.image_data_format() == 'channels_first' and K.backend() == 'tensorflow': + warnings.warn('You are using the TensorFlow backend, yet you ' + 'are using the Theano ' + 'image data format convention ' + '(`image_data_format="channels_first"`). ' + 'For best performance, set ' + '`image_data_format="channels_last"` in ' + 'your Keras config ' + 'at ~/.keras/keras.json.') + + print("Weights for the model were loaded successfully") return model @@ -297,95 +433,182 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo return model -def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1E-4): - ''' Apply BatchNorm, Relu, 3x3 Conv2D, optional bottleneck block and dropout - Args: - ip: Input keras tensor - nb_filter: number of filters - bottleneck: add bottleneck block +def DenseNetImageNet121(input_shape=None, + bottleneck=True, + reduction=0.5, + dropout_rate=0.0, + weight_decay=1e-4, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000, + activation='softmax'): + return DenseNet(input_shape, depth=121, nb_dense_block=4, growth_rate=32, nb_filter=64, + nb_layers_per_block=[6, 12, 24, 16], bottleneck=bottleneck, reduction=reduction, + dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, + include_top=include_top, weights=weights, input_tensor=input_tensor, + pooling=pooling, classes=classes, activation=activation) + + +def DenseNetImageNet169(input_shape=None, + bottleneck=True, + reduction=0.5, + dropout_rate=0.0, + weight_decay=1e-4, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000, + activation='softmax'): + return DenseNet(input_shape, depth=169, nb_dense_block=4, growth_rate=32, nb_filter=64, + nb_layers_per_block=[6, 12, 32, 32], bottleneck=bottleneck, reduction=reduction, + dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, + include_top=include_top, weights=weights, input_tensor=input_tensor, + pooling=pooling, classes=classes, activation=activation) + + +def DenseNetImageNet201(input_shape=None, + bottleneck=True, + reduction=0.5, + dropout_rate=0.0, + weight_decay=1e-4, + include_top=True, + weights=None, + input_tensor=None, + pooling=None, + classes=1000, + activation='softmax'): + return DenseNet(input_shape, depth=201, nb_dense_block=4, growth_rate=32, nb_filter=64, + nb_layers_per_block=[6, 12, 48, 32], bottleneck=bottleneck, reduction=reduction, + dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, + include_top=include_top, weights=weights, input_tensor=input_tensor, + pooling=pooling, classes=classes, activation=activation) + + +def DenseNetImageNet264(input_shape=None, + bottleneck=True, + reduction=0.5, + dropout_rate=0.0, + weight_decay=1e-4, + include_top=True, + weights=None, + input_tensor=None, + pooling=None, + classes=1000, + activation='softmax'): + return DenseNet(input_shape, depth=201, nb_dense_block=4, growth_rate=32, nb_filter=64, + nb_layers_per_block=[6, 12, 64, 48], bottleneck=bottleneck, reduction=reduction, + dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, + include_top=include_top, weights=weights, input_tensor=input_tensor, + pooling=pooling, classes=classes, activation=activation) + + +def DenseNetImageNet161(input_shape=None, + bottleneck=True, + reduction=0.5, + dropout_rate=0.0, + weight_decay=1e-4, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000, + activation='softmax'): + return DenseNet(input_shape, depth=161, nb_dense_block=4, growth_rate=48, nb_filter=96, + nb_layers_per_block=[6, 12, 36, 24], bottleneck=bottleneck, reduction=reduction, + dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, + include_top=include_top, weights=weights, input_tensor=input_tensor, + pooling=pooling, classes=classes, activation=activation) + + +def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1e-4): + ''' + Adds a convolution layer (with batch normalization and relu), + and optionally a bottleneck layer. + + # Arguments + ip: Input tensor + nb_filter: integer, the dimensionality of the output space + (i.e. the number output of filters in the convolution) + bottleneck: if True, adds a bottleneck convolution block dropout_rate: dropout rate weight_decay: weight decay factor - Returns: keras tensor with batch_norm, relu and convolution2d added (optional bottleneck) - ''' + # Input shape + 4D tensor with shape: + `(samples, channels, rows, cols)` if data_format='channels_first' + or 4D tensor with shape: + `(samples, rows, cols, channels)` if data_format='channels_last'. + + # Output shape + 4D tensor with shape: + `(samples, filters, new_rows, new_cols)` if data_format='channels_first' + or 4D tensor with shape: + `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. + `rows` and `cols` values might have changed due to stride. + + # Returns + output tensor of block + ''' concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay), - beta_regularizer=l2(weight_decay))(ip) + x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip) x = Activation('relu')(x) if bottleneck: - inter_channel = nb_filter * 4 # Obtained from https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua + inter_channel = nb_filter * 4 - x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_uniform', padding='same', use_bias=False, + x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(x) - - if dropout_rate: - x = Dropout(dropout_rate)(x) - - x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay), - beta_regularizer=l2(weight_decay))(x) + x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) x = Activation('relu')(x) - x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_uniform', padding='same', use_bias=False, - kernel_regularizer=l2(weight_decay))(x) + x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_normal', padding='same', use_bias=False)(x) if dropout_rate: x = Dropout(dropout_rate)(x) return x -def __transition_block(ip, nb_filter, compression=1.0, dropout_rate=None, weight_decay=1E-4): - ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D - Args: - ip: keras tensor - nb_filter: number of filters - compression: calculated as 1 - reduction. Reduces the number of feature maps - in the transition block. +def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropout_rate=None, + weight_decay=1e-4, grow_nb_filters=True, return_concat_list=False): + ''' + Build a dense_block where the output of each conv_block is fed + to subsequent ones + + # Arguments + x: input keras tensor + nb_layers: the number of conv_blocks to append to the model + nb_filter: integer, the dimensionality of the output space + (i.e. the number output of filters in the convolution) + growth_rate: growth rate of the dense block + bottleneck: if True, adds a bottleneck convolution block to + each conv_block dropout_rate: dropout rate weight_decay: weight decay factor - Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool + grow_nb_filters: if True, allows number of filters to grow + return_concat_list: set to True to return the list of + feature maps along with the actual output + + # Return + If return_concat_list is True, returns a list of the output + keras tensor, the number of filters and a list of all the + dense blocks added to the keras tensor + + If return_concat_list is False, returns a list of the output + keras tensor and the number of filters ''' - - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay), - beta_regularizer=l2(weight_decay))(ip) - x = Activation('relu')(x) - x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_uniform', padding='same', use_bias=False, - kernel_regularizer=l2(weight_decay))(x) - if dropout_rate: - x = Dropout(dropout_rate)(x) - x = AveragePooling2D((2, 2), strides=(2, 2))(x) - - return x - - -def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropout_rate=None, weight_decay=1E-4, - grow_nb_filters=True, return_concat_list=False): - ''' Build a dense_block where the output of each conv_block is fed to subsequent ones - Args: - x: keras tensor - nb_layers: the number of layers of conv_block to append to the model. - nb_filter: number of filters - growth_rate: growth rate - bottleneck: bottleneck block - dropout_rate: dropout rate - weight_decay: weight decay factor - grow_nb_filters: flag to decide to allow number of filters to grow - return_concat_list: return the list of feature maps along with the actual output - Returns: keras tensor with nb_layers of conv_block appended - ''' - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 x_list = [x] for i in range(nb_layers): - conv_block = __conv_block(x, growth_rate, bottleneck, dropout_rate, weight_decay) - x_list.append(conv_block) + cb = __conv_block(x, growth_rate, bottleneck, dropout_rate, weight_decay) + x_list.append(cb) - x = concatenate(x_list, axis=concat_axis) + x = concatenate([x, cb], axis=concat_axis) if grow_nb_filters: nb_filter += growth_rate @@ -396,36 +619,96 @@ def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropou return x, nb_filter -def __transition_up_block(ip, nb_filters, type='upsampling', weight_decay=1E-4): - ''' SubpixelConvolutional Upscaling (factor = 2) - Args: - ip: keras tensor - nb_filters: number of layers - type: can be 'upsampling', 'subpixel', 'deconv'. Determines type of upsampling performed +def __transition_block(ip, nb_filter, compression=1.0, weight_decay=1e-4): + ''' + Adds a pointwise convolution layer (with batch normalization and relu), + and an average pooling layer. The number of output convolution filters + can be reduced by appropriately reducing the compression parameter. + + # Arguments + ip: input keras tensor + nb_filter: integer, the dimensionality of the output space + (i.e. the number output of filters in the convolution) + compression: calculated as 1 - reduction. Reduces the number + of feature maps in the transition block. weight_decay: weight decay factor - Returns: keras tensor, after applying upsampling operation. + + # Input shape + 4D tensor with shape: + `(samples, channels, rows, cols)` if data_format='channels_first' + or 4D tensor with shape: + `(samples, rows, cols, channels)` if data_format='channels_last'. + + # Output shape + 4D tensor with shape: + `(samples, nb_filter * compression, rows / 2, cols / 2)` + if data_format='channels_first' + or 4D tensor with shape: + `(samples, rows / 2, cols / 2, nb_filter * compression)` + if data_format='channels_last'. + + # Returns + a keras tensor + ''' + concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 + + x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip) + x = Activation('relu')(x) + x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_normal', padding='same', + use_bias=False, kernel_regularizer=l2(weight_decay))(x) + x = AveragePooling2D((2, 2), strides=(2, 2))(x) + + return x + + +def __transition_up_block(ip, nb_filters, type='deconv', weight_decay=1E-4): + '''Adds an upsampling block. Upsampling operation relies on the the type parameter. + + # Arguments + ip: input keras tensor + nb_filters: integer, the dimensionality of the output space + (i.e. the number output of filters in the convolution) + type: can be 'upsampling', 'subpixel', 'deconv'. Determines + type of upsampling performed + weight_decay: weight decay factor + + # Input shape + 4D tensor with shape: + `(samples, channels, rows, cols)` if data_format='channels_first' + or 4D tensor with shape: + `(samples, rows, cols, channels)` if data_format='channels_last'. + + # Output shape + 4D tensor with shape: + `(samples, nb_filter, rows * 2, cols * 2)` if data_format='channels_first' + or 4D tensor with shape: + `(samples, rows * 2, cols * 2, nb_filter)` if data_format='channels_last'. + + # Returns + a keras tensor ''' if type == 'upsampling': x = UpSampling2D()(ip) elif type == 'subpixel': - x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', W_regularizer=l2(weight_decay), - use_bias=False, kernel_initializer='he_uniform')(ip) + x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(weight_decay), + use_bias=False, kernel_initializer='he_normal')(ip) x = SubPixelUpscaling(scale_factor=2)(x) - x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', W_regularizer=l2(weight_decay), - use_bias=False, kernel_initializer='he_uniform')(x) + x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(weight_decay), + use_bias=False, kernel_initializer='he_normal')(x) else: x = Conv2DTranspose(nb_filters, (3, 3), activation='relu', padding='same', strides=(2, 2), - kernel_initializer='he_uniform')(ip) + kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(ip) return x def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=-1, - nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1E-4, - activation='softmax'): + nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1e-4, + subsample_initial_block=False, pooling=None, activation='softmax'): ''' Build the DenseNet model - Args: + + # Arguments nb_classes: number of classes img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) include_top: flag to include the final Dense layer @@ -442,28 +725,51 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_bl bottleneck: add bottleneck blocks reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression dropout_rate: dropout rate - weight_decay: weight decay + weight_decay: weight decay rate + subsample_initial_block: Set to True to subsample the initial convolution and + add a MaxPool2D before the dense blocks are added. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. Note that if sigmoid is used, classes must be 1. - Returns: keras tensor with nb_layers of conv_block appended + + # Returns + a keras tensor + + # Raises + ValueError: in case of invalid argument for `reduction` + or `nb_dense_block` ''' concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - assert (depth - 4) % 3 == 0, 'Depth must be 3 N + 4' if reduction != 0.0: - assert reduction <= 1.0 and reduction > 0.0, 'reduction value must lie between 0.0 and 1.0' + if not (reduction <= 1.0 and reduction > 0.0): + raise ValueError('`reduction` value must lie between 0.0 and 1.0') # layers in each dense block if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: nb_layers = list(nb_layers_per_block) # Convert tuple to list - assert len(nb_layers) == (nb_dense_block + 1), 'If list, nb_layer is used as provided. ' \ - 'Note that list size must be (nb_dense_block + 1)' + if len(nb_layers) != (nb_dense_block): + raise ValueError('If `nb_dense_block` is a list, its length must match ' + 'the number of layers provided by `nb_layers`.') + final_nb_layer = nb_layers[-1] nb_layers = nb_layers[:-1] else: if nb_layers_per_block == -1: + assert (depth - 4) % 3 == 0, 'Depth must be 3 N + 4 if nb_layers_per_block == -1' count = int((depth - 4) / 3) nb_layers = [count for _ in range(nb_dense_block)] final_nb_layer = count @@ -471,9 +777,6 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_bl final_nb_layer = nb_layers_per_block nb_layers = [nb_layers_per_block] * nb_dense_block - if bottleneck: - nb_layers = [int(layer // 2) for layer in nb_layers] - # compute initial nb_filter if -1, else accept users initial nb_filter if nb_filter <= 0: nb_filter = 2 * growth_rate @@ -482,39 +785,55 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_bl compression = 1.0 - reduction # Initial convolution - x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_uniform', padding='same', name='initial_conv2D', - use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) + if subsample_initial_block: + initial_kernel = (7, 7) + initial_strides = (2, 2) + else: + initial_kernel = (3, 3) + initial_strides = (1, 1) + + x = Conv2D(nb_filter, initial_kernel, kernel_initializer='he_normal', padding='same', + strides=initial_strides, use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) + + if subsample_initial_block: + x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) + x = Activation('relu')(x) + x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) # Add dense blocks for block_idx in range(nb_dense_block - 1): x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, bottleneck=bottleneck, dropout_rate=dropout_rate, weight_decay=weight_decay) # add transition_block - x = __transition_block(x, nb_filter, compression=compression, dropout_rate=dropout_rate, - weight_decay=weight_decay) + x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay) nb_filter = int(nb_filter * compression) # The last dense_block does not have a transition_block x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck, dropout_rate=dropout_rate, weight_decay=weight_decay) - x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay), - beta_regularizer=l2(weight_decay))(x) + x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) x = Activation('relu')(x) - x = GlobalAveragePooling2D()(x) if include_top: - x = Dense(nb_classes, activation=activation, W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(x) + x = GlobalAveragePooling2D()(x) + x = Dense(nb_classes, activation=activation)(x) + else: + if pooling == 'avg': + x = GlobalAveragePooling2D()(x) + if pooling == 'max': + x = GlobalMaxPooling2D()(x) return x def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, growth_rate=12, - reduction=0.0, dropout_rate=None, weight_decay=1E-4, - nb_layers_per_block=4, nb_upsampling_conv=128, upsampling_type='deconv', - init_conv_filters=48, input_shape=None, activation='softmax'): - ''' Build the DenseNet model - Args: + reduction=0.0, dropout_rate=None, weight_decay=1e-4, + nb_layers_per_block=4, nb_upsampling_conv=128, upsampling_type='upsampling', + init_conv_filters=48, input_shape=None, activation='deconv'): + ''' Build the DenseNet-FCN model + + # Arguments nb_classes: number of classes img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) include_top: flag to include the final Dense layer @@ -534,7 +853,13 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, input_shape: Only used for shape inference in fully convolutional networks. activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. Note that if sigmoid is used, classes must be 1. - Returns: keras tensor with nb_layers of conv_block appended + + # Returns + a keras tensor + + # Raises + ValueError: in case of invalid argument for `reduction`, + `nb_dense_block` or `nb_upsampling_conv`. ''' concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 @@ -545,20 +870,22 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, rows, cols, _ = input_shape if reduction != 0.0: - assert reduction <= 1.0 and reduction > 0.0, 'reduction value must lie between 0.0 and 1.0' + if not (reduction <= 1.0 and reduction > 0.0): + raise ValueError('`reduction` value must lie between 0.0 and 1.0') # check if upsampling_conv has minimum number of filters # minimum is set to 12, as at least 3 color channels are needed for correct upsampling - assert nb_upsampling_conv > 12 and nb_upsampling_conv % 4 == 0, 'Parameter `upsampling_conv` number of channels must ' \ - 'be a positive number divisible by 4 and greater ' \ - 'than 12' + if not (nb_upsampling_conv > 12 and nb_upsampling_conv % 4 == 0): + raise ValueError('Parameter `nb_upsampling_conv` number of channels must ' + 'be a positive number divisible by 4 and greater than 12') # layers in each dense block if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: nb_layers = list(nb_layers_per_block) # Convert tuple to list - assert len(nb_layers) == (nb_dense_block + 1), 'If list, nb_layer is used as provided. ' \ - 'Note that list size must be (nb_dense_block + 1)' + if len(nb_layers) != (nb_dense_block + 1): + raise ValueError('If `nb_dense_block` is a list, its length must be ' + '(`nb_dense_block` + 1)') bottleneck_nb_layers = nb_layers[-1] rev_layers = nb_layers[::-1] @@ -571,8 +898,10 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, compression = 1.0 - reduction # Initial convolution - x = Conv2D(init_conv_filters, (3, 3), kernel_initializer='he_uniform', padding='same', name='initial_conv2D', + x = Conv2D(init_conv_filters, (7, 7), kernel_initializer='he_normal', padding='same', name='initial_conv2D', use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) + x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) + x = Activation('relu')(x) nb_filter = init_conv_filters @@ -580,15 +909,14 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, # Add dense blocks and transition down block for block_idx in range(nb_dense_block): - x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, - dropout_rate=dropout_rate, weight_decay=weight_decay) + x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, + weight_decay=weight_decay) # Skip connection skip_list.append(x) # add transition_block - x = __transition_block(x, nb_filter, compression=compression, dropout_rate=dropout_rate, - weight_decay=weight_decay) + x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay) nb_filter = int(nb_filter * compression) # this is calculated inside transition_down_block @@ -608,7 +936,7 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, # not the concatenation of the input with the feature maps (concat_list[0]. l = concatenate(concat_list[1:], axis=concat_axis) - t = __transition_up_block(l, nb_filters=n_filters_keep, type=upsampling_type) + t = __transition_up_block(l, nb_filters=n_filters_keep, type=upsampling_type, weight_decay=weight_decay) # concatenate the skip connection with the transition block x = concatenate([t, skip_list[block_idx]], axis=concat_axis) @@ -616,12 +944,11 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, # Dont allow the feature map size to grow in upsampling dense blocks x_up, nb_filter, concat_list = __dense_block(x, nb_layers[nb_dense_block + block_idx + 1], nb_filter=growth_rate, growth_rate=growth_rate, dropout_rate=dropout_rate, - weight_decay=weight_decay, - return_concat_list=True, grow_nb_filters=False) + weight_decay=weight_decay, return_concat_list=True, + grow_nb_filters=False) if include_top: - x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', kernel_regularizer=l2(weight_decay), - use_bias=False)(x_up) + x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', use_bias=False)(x_up) if K.image_data_format() == 'channels_first': channel, row, col = input_shape