diff --git a/examples/cifar10_densenet.py b/examples/cifar10_densenet.py index 79c6c3d..84e2a33 100644 --- a/examples/cifar10_densenet.py +++ b/examples/cifar10_densenet.py @@ -33,8 +33,11 @@ nb_filter = 16 dropout_rate = 0.0 # 0.0 for data augmentation # Create the model (without loading weights) -model = DenseNet(depth, nb_dense_block, growth_rate, nb_filter, dropout_rate=dropout_rate, - input_shape=img_dim, weights=None) +model = DenseNet(depth=depth, nb_dense_block=nb_dense_block, + growth_rate=growth_rate, nb_filter=nb_filter, + dropout_rate=dropout_rate, + input_shape=img_dim, + weights=None) print('Model created') model.summary() diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py index ff72ceb..290d9cc 100644 --- a/keras_contrib/applications/densenet.py +++ b/keras_contrib/applications/densenet.py @@ -1,8 +1,46 @@ # -*- coding: utf-8 -*- -'''DenseNet models for Keras. +'''DenseNet and DenseNet-FCN models for Keras. + +DenseNet is a network architecture where each layer is directly connected +to every other layer in a feed-forward fashion (within each dense block). +For each layer, the feature maps of all preceding layers are treated as +separate inputs whereas its own feature maps are passed on as inputs to +all subsequent layers. This connectivity pattern yields state-of-the-art +accuracies on CIFAR10/100 (with or without data augmentation) and SVHN. +On the large scale ILSVRC 2012 (ImageNet) dataset, DenseNet achieves a +similar accuracy as ResNet, but using less than half the amount of +parameters and roughly half the number of FLOPs. + +DenseNets support any input image size of 32x32 or greater, and are thus +suited for CIFAR-10 or CIFAR-100 datasets. There are two types of DenseNets, +one suited for smaller images (DenseNet) and one suited for ImageNet, +called DenseNetImageNet. They are differentiated by the strided convolution +and pooling operations prior to the initial dense block. + +The following table describes the size and accuracy of DenseNetImageNet models +on the ImageNet dataset (single crop), for which weights are provided: +------------------------------------------------------------------------------------ + Model type | ImageNet Acc (Top 1) | ImageNet Acc (Top 5) | Params (M) | +------------------------------------------------------------------------------------ +| DenseNet-121 | 25.02 % | 7.71 % | 8.0 | +| DenseNet-169 | 23.80 % | 6.85 % | 14.3 | +| DenseNet-201 | 22.58 % | 6.34 % | 20.2 | +| DenseNet-161 | 22.20 % | - % | 28.9 | +------------------------------------------------------------------------------------ + +DenseNets can be extended to image segmentation tasks as described in the +paper "The One Hundred Layers Tiramisu: Fully Convolutional DenseNets for +Semantic Segmentation". Here, the dense blocks are arranged and concatenated +with long skip connections for state of the art performance on the CamVid dataset. + # Reference - [Densely Connected Convolutional Networks](https://arxiv.org/pdf/1608.06993.pdf) - [The One Hundred Layers Tiramisu: Fully Convolutional DenseNets for Semantic Segmentation](https://arxiv.org/pdf/1611.09326.pdf) + +This implementation is based on the following reference code: + - https://github.com/gpleiss/efficient_densenet_pytorch + - https://github.com/liuzhuang13/DenseNet + ''' from __future__ import print_function from __future__ import absolute_import @@ -11,89 +49,147 @@ from __future__ import division import warnings from keras.models import Model -from keras.layers.core import Dense, Dropout, Activation, Reshape -from keras.layers.convolutional import Conv2D, Conv2DTranspose, UpSampling2D -from keras.layers.pooling import AveragePooling2D -from keras.layers.pooling import GlobalAveragePooling2D +from keras.layers import Dense +from keras.layers import Dropout +from keras.layers import Activation +from keras.layers import Reshape +from keras.layers import Conv2D +from keras.layers import Conv2DTranspose +from keras.layers import UpSampling2D +from keras.layers import MaxPooling2D +from keras.layers import AveragePooling2D +from keras.layers import GlobalMaxPooling2D +from keras.layers import GlobalAveragePooling2D from keras.layers import Input -from keras.layers.merge import concatenate -from keras.layers.normalization import BatchNormalization +from keras.layers import concatenate +from keras.layers import BatchNormalization from keras.regularizers import l2 from keras.utils.layer_utils import convert_all_kernels_in_model from keras.utils.data_utils import get_file from keras.engine.topology import get_source_inputs from keras.applications.imagenet_utils import _obtain_input_shape +from keras.applications.imagenet_utils import decode_predictions +from keras.applications.imagenet_utils import preprocess_input as _preprocess_input import keras.backend as K from keras_contrib.layers.convolutional import SubPixelUpscaling -TH_WEIGHTS_PATH = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Theano-Backend-TH-dim-ordering.h5' -TF_WEIGHTS_PATH = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Tensorflow-Backend-TF-dim-ordering.h5' -TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Theano-Backend-TH-dim-ordering-no-top.h5' -TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Tensorflow-Backend-TF-dim-ordering-no-top.h5' +DENSENET_121_WEIGHTS_PATH = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-121-32.h5' +DENSENET_161_WEIGHTS_PATH = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-161-48.h5' +DENSENET_169_WEIGHTS_PATH = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-169-32.h5' +DENSENET_121_WEIGHTS_PATH_NO_TOP = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-121-32-no-top.h5' +DENSENET_161_WEIGHTS_PATH_NO_TOP = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-161-48-no-top.h5' +DENSENET_169_WEIGHTS_PATH_NO_TOP = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-169-32-no-top.h5' -def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers_per_block=-1, - bottleneck=False, reduction=0.0, dropout_rate=0.0, weight_decay=1E-4, - include_top=True, weights='cifar10', input_tensor=None, - classes=10, activation='softmax'): - '''Instantiate the DenseNet architecture, - optionally loading weights pre-trained - on CIFAR-10. Note that when using TensorFlow, - for best performance you should set - `image_data_format='channels_last'` in your Keras config - at ~/.keras/keras.json. - The model and the weights are compatible with both - TensorFlow and Theano. The dimension ordering - convention used by the model is the one - specified in your Keras config file. - # Arguments - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(32, 32, 3)` (with `channels_last` dim ordering) - or `(3, 32, 32)` (with `channels_first` dim ordering). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 8. - E.g. `(200, 200, 3)` would be one valid value. - depth: number or layers in the DenseNet - nb_dense_block: number of dense blocks to add to end (generally = 3) - growth_rate: number of filters to add per dense block - nb_filter: initial number of filters. -1 indicates initial - number of filters is 2 * growth_rate - nb_layers_per_block: number of layers in each dense block. - Can be a -1, positive integer or a list. - If -1, calculates nb_layer_per_block from the network depth. - If positive integer, a set number of layers per dense block. - If list, nb_layer is used as provided. Note that list size must - be (nb_dense_block + 1) - bottleneck: flag to add bottleneck blocks in between dense blocks - reduction: reduction factor of transition blocks. - Note : reduction value is inverted to compute compression. - dropout_rate: dropout rate - weight_decay: weight decay factor - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization) or - 'cifar10' (pre-training on CIFAR-10).. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. - Note that if sigmoid is used, classes must be 1. - # Returns - A Keras model instance. - ''' +def preprocess_input(x, data_format=None): + """Preprocesses a tensor encoding a batch of images. - if weights not in {'cifar10', None}: + # Arguments + x: input Numpy tensor, 4D. + data_format: data format of the image tensor. + + # Returns + Preprocessed tensor. + """ + x = _preprocess_input(x, data_format=data_format) + x *= 0.017 # scale values + return x + + +def DenseNet(input_shape=None, + depth=40, + nb_dense_block=3, + growth_rate=12, + nb_filter=-1, + nb_layers_per_block=-1, + bottleneck=False, + reduction=0.0, + dropout_rate=0.0, + weight_decay=1e-4, + subsample_initial_block=False, + include_top=True, + weights=None, + input_tensor=None, + pooling=None, + classes=10, + activation='softmax'): + '''Instantiate the DenseNet architecture. + + The model and the weights are compatible with both + TensorFlow and Theano. The dimension ordering + convention used by the model is the one + specified in your Keras config file. + + # Arguments + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` (with `channels_last` dim ordering) + or `(3, 224, 224)` (with `channels_first` dim ordering). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 8. + E.g. `(224, 224, 3)` would be one valid value. + depth: number or layers in the DenseNet + nb_dense_block: number of dense blocks to add to end + growth_rate: number of filters to add per dense block + nb_filter: initial number of filters. -1 indicates initial + number of filters will default to 2 * growth_rate + nb_layers_per_block: number of layers in each dense block. + Can be a -1, positive integer or a list. + If -1, calculates nb_layer_per_block from the network depth. + If positive integer, a set number of layers per dense block. + If list, nb_layer is used as provided. Note that list size must + be nb_dense_block + bottleneck: flag to add bottleneck blocks in between dense blocks + reduction: reduction factor of transition blocks. + Note : reduction value is inverted to compute compression. + dropout_rate: dropout rate + weight_decay: weight decay rate + subsample_initial_block: Changes model type to suit different datasets. + Should be set to True for ImageNet, and False for CIFAR datasets. + When set to True, the initial convolution will be strided and + adds a MaxPooling2D before the initial dense block. + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization) or + 'imagenet' (pre-training on ImageNet).. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + activation: Type of activation at the top layer. Can be one of + 'softmax' or 'sigmoid'. Note that if sigmoid is used, + classes must be 1. + + # Returns + A Keras model instance. + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + ''' + + if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `cifar10` ' - '(pre-training on CIFAR-10).') + '`None` (random initialization) or `imagenet` ' + '(pre-training on ImageNet).') - if weights == 'cifar10' and include_top and classes != 10: - raise ValueError('If using `weights` as CIFAR 10 with `include_top`' - ' as true, `classes` should be 10') + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as ImageNet with `include_top` ' + 'as true, `classes` should be 1000') if activation not in ['softmax', 'sigmoid']: raise ValueError('activation must be one of "softmax" or "sigmoid"') @@ -106,7 +202,7 @@ def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12, nb_fi default_size=32, min_size=8, data_format=K.image_data_format(), - include_top=include_top) + require_flatten=include_top) if input_tensor is None: img_input = Input(shape=input_shape) @@ -117,8 +213,9 @@ def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12, nb_fi img_input = input_tensor x = __create_dense_net(classes, img_input, include_top, depth, nb_dense_block, - growth_rate, nb_filter, nb_layers_per_block, bottleneck, reduction, - dropout_rate, weight_decay, activation) + growth_rate, nb_filter, nb_layers_per_block, bottleneck, + reduction, dropout_rate, weight_decay, subsample_initial_block, + pooling, activation) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. @@ -130,47 +227,69 @@ def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12, nb_fi model = Model(inputs, x, name='densenet') # load weights - if weights == 'cifar10': - if (depth == 40) and (nb_dense_block == 3) and (growth_rate == 12) and (nb_filter == 16) and \ - (bottleneck is False) and (reduction == 0.0) and (dropout_rate == 0.0) and (weight_decay == 1E-4): - # Default parameters match. Weights for this model exist: + if weights == 'imagenet': + weights_loaded = False - if K.image_data_format() == 'channels_first': - if include_top: - weights_path = get_file('densenet_40_12_th_dim_ordering_th_kernels.h5', - TH_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('densenet_40_12_th_dim_ordering_th_kernels_no_top.h5', - TH_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - - model.load_weights(weights_path) - - if K.backend() == 'tensorflow': - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image dimension ordering convention ' - '(`image_data_format="channels_first"`). ' - 'For best performance, set ' - '`image_data_format="channels_last"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - convert_all_kernels_in_model(model) + if (depth == 121) and (nb_dense_block == 4) and (growth_rate == 32) and (nb_filter == 64) and \ + (bottleneck is True) and (reduction == 0.5) and (subsample_initial_block): + if include_top: + weights_path = get_file('DenseNet-BC-121-32.h5', + DENSENET_121_WEIGHTS_PATH, + cache_subdir='models', + md5_hash='a439dd41aa672aef6daba4ee1fd54abd') else: - if include_top: - weights_path = get_file('densenet_40_12_tf_dim_ordering_tf_kernels.h5', - TF_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('densenet_40_12_tf_dim_ordering_tf_kernels_no_top.h5', - TF_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') + weights_path = get_file('DenseNet-BC-121-32-no-top.h5', + DENSENET_121_WEIGHTS_PATH_NO_TOP, + cache_subdir='models', + md5_hash='55e62a6358af8a0af0eedf399b5aea99') + model.load_weights(weights_path, by_name=True) + weights_loaded = True - model.load_weights(weights_path) + if (depth == 161) and (nb_dense_block == 4) and (growth_rate == 48) and (nb_filter == 96) and \ + (bottleneck is True) and (reduction == 0.5) and (subsample_initial_block): + if include_top: + weights_path = get_file('DenseNet-BC-161-48.h5', + DENSENET_161_WEIGHTS_PATH, + cache_subdir='models', + md5_hash='6c326cf4fbdb57d31eff04333a23fcca') + else: + weights_path = get_file('DenseNet-BC-161-48-no-top.h5', + DENSENET_161_WEIGHTS_PATH_NO_TOP, + cache_subdir='models', + md5_hash='1a9476b79f6b7673acaa2769e6427b92') + model.load_weights(weights_path, by_name=True) + weights_loaded = True - if K.backend() == 'theano': - convert_all_kernels_in_model(model) + if (depth == 169) and (nb_dense_block == 4) and (growth_rate == 32) and (nb_filter == 64) and \ + (bottleneck is True) and (reduction == 0.5) and (subsample_initial_block): + if include_top: + weights_path = get_file('DenseNet-BC-169-32.h5', + DENSENET_169_WEIGHTS_PATH, + cache_subdir='models', + md5_hash='914869c361303d2e39dec640b4e606a6') + else: + weights_path = get_file('DenseNet-BC-169-32-no-top.h5', + DENSENET_169_WEIGHTS_PATH_NO_TOP, + cache_subdir='models', + md5_hash='89c19e8276cfd10585d5fadc1df6859e') + model.load_weights(weights_path, by_name=True) + weights_loaded = True + + if weights_loaded: + if K.backend() == 'theano': + convert_all_kernels_in_model(model) + + if K.image_data_format() == 'channels_first' and K.backend() == 'tensorflow': + warnings.warn('You are using the TensorFlow backend, yet you ' + 'are using the Theano ' + 'image data format convention ' + '(`image_data_format="channels_first"`). ' + 'For best performance, set ' + '`image_data_format="channels_last"` in ' + 'your Keras config ' + 'at ~/.keras/keras.json.') + + print("Weights for the model were loaded successfully") return model @@ -297,135 +416,286 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo return model -def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1E-4): - ''' Apply BatchNorm, Relu, 3x3 Conv2D, optional bottleneck block and dropout - Args: - ip: Input keras tensor - nb_filter: number of filters - bottleneck: add bottleneck block +def DenseNetImageNet121(input_shape=None, + bottleneck=True, + reduction=0.5, + dropout_rate=0.0, + weight_decay=1e-4, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000, + activation='softmax'): + return DenseNet(input_shape, depth=121, nb_dense_block=4, growth_rate=32, nb_filter=64, + nb_layers_per_block=[6, 12, 24, 16], bottleneck=bottleneck, reduction=reduction, + dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, + include_top=include_top, weights=weights, input_tensor=input_tensor, + pooling=pooling, classes=classes, activation=activation) + + +def DenseNetImageNet169(input_shape=None, + bottleneck=True, + reduction=0.5, + dropout_rate=0.0, + weight_decay=1e-4, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000, + activation='softmax'): + return DenseNet(input_shape, depth=169, nb_dense_block=4, growth_rate=32, nb_filter=64, + nb_layers_per_block=[6, 12, 32, 32], bottleneck=bottleneck, reduction=reduction, + dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, + include_top=include_top, weights=weights, input_tensor=input_tensor, + pooling=pooling, classes=classes, activation=activation) + + +def DenseNetImageNet201(input_shape=None, + bottleneck=True, + reduction=0.5, + dropout_rate=0.0, + weight_decay=1e-4, + include_top=True, + weights=None, + input_tensor=None, + pooling=None, + classes=1000, + activation='softmax'): + return DenseNet(input_shape, depth=201, nb_dense_block=4, growth_rate=32, nb_filter=64, + nb_layers_per_block=[6, 12, 48, 32], bottleneck=bottleneck, reduction=reduction, + dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, + include_top=include_top, weights=weights, input_tensor=input_tensor, + pooling=pooling, classes=classes, activation=activation) + + +def DenseNetImageNet264(input_shape=None, + bottleneck=True, + reduction=0.5, + dropout_rate=0.0, + weight_decay=1e-4, + include_top=True, + weights=None, + input_tensor=None, + pooling=None, + classes=1000, + activation='softmax'): + return DenseNet(input_shape, depth=201, nb_dense_block=4, growth_rate=32, nb_filter=64, + nb_layers_per_block=[6, 12, 64, 48], bottleneck=bottleneck, reduction=reduction, + dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, + include_top=include_top, weights=weights, input_tensor=input_tensor, + pooling=pooling, classes=classes, activation=activation) + + +def DenseNetImageNet161(input_shape=None, + bottleneck=True, + reduction=0.5, + dropout_rate=0.0, + weight_decay=1e-4, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000, + activation='softmax'): + return DenseNet(input_shape, depth=161, nb_dense_block=4, growth_rate=48, nb_filter=96, + nb_layers_per_block=[6, 12, 36, 24], bottleneck=bottleneck, reduction=reduction, + dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, + include_top=include_top, weights=weights, input_tensor=input_tensor, + pooling=pooling, classes=classes, activation=activation) + + +def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1e-4): + ''' + Adds a convolution layer (with batch normalization and relu), + and optionally a bottleneck layer. + + # Arguments + ip: Input tensor + nb_filter: integer, the dimensionality of the output space + (i.e. the number output of filters in the convolution) + bottleneck: if True, adds a bottleneck convolution block dropout_rate: dropout rate weight_decay: weight decay factor - Returns: keras tensor with batch_norm, relu and convolution2d added (optional bottleneck) + + # Input shape + 4D tensor with shape: + `(samples, channels, rows, cols)` if data_format='channels_first' + or 4D tensor with shape: + `(samples, rows, cols, channels)` if data_format='channels_last'. + + # Output shape + 4D tensor with shape: + `(samples, filters, new_rows, new_cols)` if data_format='channels_first' + or 4D tensor with shape: + `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. + `rows` and `cols` values might have changed due to stride. + + # Returns + output tensor of block ''' + with K.name_scope('ConvBlock'): + concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 + x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip) + x = Activation('relu')(x) - x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay), - beta_regularizer=l2(weight_decay))(ip) - x = Activation('relu')(x) + if bottleneck: + inter_channel = nb_filter * 4 - if bottleneck: - inter_channel = nb_filter * 4 # Obtained from https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua - - x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_uniform', padding='same', use_bias=False, - kernel_regularizer=l2(weight_decay))(x) + x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, + kernel_regularizer=l2(weight_decay))(x) + x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) + x = Activation('relu')(x) + x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_normal', padding='same', use_bias=False)(x) if dropout_rate: x = Dropout(dropout_rate)(x) - x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay), - beta_regularizer=l2(weight_decay))(x) + return x + + +def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropout_rate=None, + weight_decay=1e-4, grow_nb_filters=True, return_concat_list=False): + ''' + Build a dense_block where the output of each conv_block is fed + to subsequent ones + + # Arguments + x: input keras tensor + nb_layers: the number of conv_blocks to append to the model + nb_filter: integer, the dimensionality of the output space + (i.e. the number output of filters in the convolution) + growth_rate: growth rate of the dense block + bottleneck: if True, adds a bottleneck convolution block to + each conv_block + dropout_rate: dropout rate + weight_decay: weight decay factor + grow_nb_filters: if True, allows number of filters to grow + return_concat_list: set to True to return the list of + feature maps along with the actual output + + # Return + If return_concat_list is True, returns a list of the output + keras tensor, the number of filters and a list of all the + dense blocks added to the keras tensor + + If return_concat_list is False, returns a list of the output + keras tensor and the number of filters + ''' + with K.name_scope('DenseBlock'): + concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 + + x_list = [x] + + for i in range(nb_layers): + cb = __conv_block(x, growth_rate, bottleneck, dropout_rate, weight_decay) + x_list.append(cb) + + x = concatenate([x, cb], axis=concat_axis) + + if grow_nb_filters: + nb_filter += growth_rate + + if return_concat_list: + return x, nb_filter, x_list + else: + return x, nb_filter + + +def __transition_block(ip, nb_filter, compression=1.0, weight_decay=1e-4): + ''' + Adds a pointwise convolution layer (with batch normalization and relu), + and an average pooling layer. The number of output convolution filters + can be reduced by appropriately reducing the compression parameter. + + # Arguments + ip: input keras tensor + nb_filter: integer, the dimensionality of the output space + (i.e. the number output of filters in the convolution) + compression: calculated as 1 - reduction. Reduces the number + of feature maps in the transition block. + weight_decay: weight decay factor + + # Input shape + 4D tensor with shape: + `(samples, channels, rows, cols)` if data_format='channels_first' + or 4D tensor with shape: + `(samples, rows, cols, channels)` if data_format='channels_last'. + + # Output shape + 4D tensor with shape: + `(samples, nb_filter * compression, rows / 2, cols / 2)` + if data_format='channels_first' + or 4D tensor with shape: + `(samples, rows / 2, cols / 2, nb_filter * compression)` + if data_format='channels_last'. + + # Returns + a keras tensor + ''' + with K.name_scope('Transition'): + concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 + + x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip) x = Activation('relu')(x) + x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_normal', padding='same', + use_bias=False, kernel_regularizer=l2(weight_decay))(x) + x = AveragePooling2D((2, 2), strides=(2, 2))(x) - x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_uniform', padding='same', use_bias=False, - kernel_regularizer=l2(weight_decay))(x) - if dropout_rate: - x = Dropout(dropout_rate)(x) - - return x + return x -def __transition_block(ip, nb_filter, compression=1.0, dropout_rate=None, weight_decay=1E-4): - ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D - Args: - ip: keras tensor - nb_filter: number of filters - compression: calculated as 1 - reduction. Reduces the number of feature maps - in the transition block. - dropout_rate: dropout rate +def __transition_up_block(ip, nb_filters, type='deconv', weight_decay=1E-4): + '''Adds an upsampling block. Upsampling operation relies on the the type parameter. + + # Arguments + ip: input keras tensor + nb_filters: integer, the dimensionality of the output space + (i.e. the number output of filters in the convolution) + type: can be 'upsampling', 'subpixel', 'deconv'. Determines + type of upsampling performed weight_decay: weight decay factor - Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool + + # Input shape + 4D tensor with shape: + `(samples, channels, rows, cols)` if data_format='channels_first' + or 4D tensor with shape: + `(samples, rows, cols, channels)` if data_format='channels_last'. + + # Output shape + 4D tensor with shape: + `(samples, nb_filter, rows * 2, cols * 2)` if data_format='channels_first' + or 4D tensor with shape: + `(samples, rows * 2, cols * 2, nb_filter)` if data_format='channels_last'. + + # Returns + a keras tensor ''' + with K.name_scope('TransitionUp'): - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 + if type == 'upsampling': + x = UpSampling2D()(ip) + elif type == 'subpixel': + x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(weight_decay), + use_bias=False, kernel_initializer='he_normal')(ip) + x = SubPixelUpscaling(scale_factor=2)(x) + x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(weight_decay), + use_bias=False, kernel_initializer='he_normal')(x) + else: + x = Conv2DTranspose(nb_filters, (3, 3), activation='relu', padding='same', strides=(2, 2), + kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(ip) - x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay), - beta_regularizer=l2(weight_decay))(ip) - x = Activation('relu')(x) - x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_uniform', padding='same', use_bias=False, - kernel_regularizer=l2(weight_decay))(x) - if dropout_rate: - x = Dropout(dropout_rate)(x) - x = AveragePooling2D((2, 2), strides=(2, 2))(x) - - return x - - -def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropout_rate=None, weight_decay=1E-4, - grow_nb_filters=True, return_concat_list=False): - ''' Build a dense_block where the output of each conv_block is fed to subsequent ones - Args: - x: keras tensor - nb_layers: the number of layers of conv_block to append to the model. - nb_filter: number of filters - growth_rate: growth rate - bottleneck: bottleneck block - dropout_rate: dropout rate - weight_decay: weight decay factor - grow_nb_filters: flag to decide to allow number of filters to grow - return_concat_list: return the list of feature maps along with the actual output - Returns: keras tensor with nb_layers of conv_block appended - ''' - - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - x_list = [x] - - for i in range(nb_layers): - conv_block = __conv_block(x, growth_rate, bottleneck, dropout_rate, weight_decay) - x_list.append(conv_block) - - x = concatenate([x, conv_block], axis=concat_axis) - - if grow_nb_filters: - nb_filter += growth_rate - - if return_concat_list: - return x, nb_filter, x_list - else: - return x, nb_filter - - -def __transition_up_block(ip, nb_filters, type='upsampling', weight_decay=1E-4): - ''' SubpixelConvolutional Upscaling (factor = 2) - Args: - ip: keras tensor - nb_filters: number of layers - type: can be 'upsampling', 'subpixel', 'deconv'. Determines type of upsampling performed - weight_decay: weight decay factor - Returns: keras tensor, after applying upsampling operation. - ''' - - if type == 'upsampling': - x = UpSampling2D()(ip) - elif type == 'subpixel': - x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', W_regularizer=l2(weight_decay), - use_bias=False, kernel_initializer='he_uniform')(ip) - x = SubPixelUpscaling(scale_factor=2)(x) - x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', W_regularizer=l2(weight_decay), - use_bias=False, kernel_initializer='he_uniform')(x) - else: - x = Conv2DTranspose(nb_filters, (3, 3), activation='relu', padding='same', strides=(2, 2), - kernel_initializer='he_uniform')(ip) - - return x + return x def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=-1, - nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1E-4, - activation='softmax'): + nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1e-4, + subsample_initial_block=False, pooling=None, activation='softmax'): ''' Build the DenseNet model - Args: + + # Arguments nb_classes: number of classes img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) include_top: flag to include the final Dense layer @@ -442,79 +712,117 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_bl bottleneck: add bottleneck blocks reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression dropout_rate: dropout rate - weight_decay: weight decay + weight_decay: weight decay rate + subsample_initial_block: Changes model type to suit different datasets. + Should be set to True for ImageNet, and False for CIFAR datasets. + When set to True, the initial convolution will be strided and + adds a MaxPooling2D before the initial dense block. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. Note that if sigmoid is used, classes must be 1. - Returns: keras tensor with nb_layers of conv_block appended + + # Returns + a keras tensor + + # Raises + ValueError: in case of invalid argument for `reduction` + or `nb_dense_block` ''' + with K.name_scope('DenseNet'): + concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 + if reduction != 0.0: + if not (reduction <= 1.0 and reduction > 0.0): + raise ValueError('`reduction` value must lie between 0.0 and 1.0') - assert (depth - 4) % 3 == 0, 'Depth must be 3 N + 4' - if reduction != 0.0: - assert reduction <= 1.0 and reduction > 0.0, 'reduction value must lie between 0.0 and 1.0' + # layers in each dense block + if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: + nb_layers = list(nb_layers_per_block) # Convert tuple to list - # layers in each dense block - if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: - nb_layers = list(nb_layers_per_block) # Convert tuple to list + if len(nb_layers) != (nb_dense_block): + raise ValueError('If `nb_dense_block` is a list, its length must match ' + 'the number of layers provided by `nb_layers`.') - assert len(nb_layers) == (nb_dense_block + 1), 'If list, nb_layer is used as provided. ' \ - 'Note that list size must be (nb_dense_block + 1)' - final_nb_layer = nb_layers[-1] - nb_layers = nb_layers[:-1] - else: - if nb_layers_per_block == -1: - count = int((depth - 4) / 3) - nb_layers = [count for _ in range(nb_dense_block)] - final_nb_layer = count + final_nb_layer = nb_layers[-1] + nb_layers = nb_layers[:-1] else: - final_nb_layer = nb_layers_per_block - nb_layers = [nb_layers_per_block] * nb_dense_block + if nb_layers_per_block == -1: + assert (depth - 4) % 3 == 0, 'Depth must be 3 N + 4 if nb_layers_per_block == -1' + count = int((depth - 4) / 3) + nb_layers = [count for _ in range(nb_dense_block)] + final_nb_layer = count + else: + final_nb_layer = nb_layers_per_block + nb_layers = [nb_layers_per_block] * nb_dense_block - if bottleneck: - nb_layers = [int(layer // 2) for layer in nb_layers] + # compute initial nb_filter if -1, else accept users initial nb_filter + if nb_filter <= 0: + nb_filter = 2 * growth_rate - # compute initial nb_filter if -1, else accept users initial nb_filter - if nb_filter <= 0: - nb_filter = 2 * growth_rate + # compute compression factor + compression = 1.0 - reduction - # compute compression factor - compression = 1.0 - reduction + # Initial convolution + if subsample_initial_block: + initial_kernel = (7, 7) + initial_strides = (2, 2) + else: + initial_kernel = (3, 3) + initial_strides = (1, 1) - # Initial convolution - x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_uniform', padding='same', name='initial_conv2D', - use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) + x = Conv2D(nb_filter, initial_kernel, kernel_initializer='he_normal', padding='same', + strides=initial_strides, use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) - # Add dense blocks - for block_idx in range(nb_dense_block - 1): - x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, bottleneck=bottleneck, + if subsample_initial_block: + x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) + x = Activation('relu')(x) + x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) + + # Add dense blocks + for block_idx in range(nb_dense_block - 1): + x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, bottleneck=bottleneck, + dropout_rate=dropout_rate, weight_decay=weight_decay) + # add transition_block + x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay) + nb_filter = int(nb_filter * compression) + + # The last dense_block does not have a transition_block + x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck, dropout_rate=dropout_rate, weight_decay=weight_decay) - # add transition_block - x = __transition_block(x, nb_filter, compression=compression, dropout_rate=dropout_rate, - weight_decay=weight_decay) - nb_filter = int(nb_filter * compression) - # The last dense_block does not have a transition_block - x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck, - dropout_rate=dropout_rate, weight_decay=weight_decay) + x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) + x = Activation('relu')(x) - x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay), - beta_regularizer=l2(weight_decay))(x) - x = Activation('relu')(x) - x = GlobalAveragePooling2D()(x) + if include_top: + x = GlobalAveragePooling2D()(x) + x = Dense(nb_classes, activation=activation)(x) + else: + if pooling == 'avg': + x = GlobalAveragePooling2D()(x) + if pooling == 'max': + x = GlobalMaxPooling2D()(x) - if include_top: - x = Dense(nb_classes, activation=activation, W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(x) - - return x + return x def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, growth_rate=12, - reduction=0.0, dropout_rate=None, weight_decay=1E-4, - nb_layers_per_block=4, nb_upsampling_conv=128, upsampling_type='deconv', - init_conv_filters=48, input_shape=None, activation='softmax'): - ''' Build the DenseNet model - Args: + reduction=0.0, dropout_rate=None, weight_decay=1e-4, + nb_layers_per_block=4, nb_upsampling_conv=128, upsampling_type='upsampling', + init_conv_filters=48, input_shape=None, activation='deconv'): + ''' Build the DenseNet-FCN model + + # Arguments nb_classes: number of classes img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) include_top: flag to include the final Dense layer @@ -534,104 +842,112 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, input_shape: Only used for shape inference in fully convolutional networks. activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. Note that if sigmoid is used, classes must be 1. - Returns: keras tensor with nb_layers of conv_block appended + + # Returns + a keras tensor + + # Raises + ValueError: in case of invalid argument for `reduction`, + `nb_dense_block` or `nb_upsampling_conv`. ''' + with K.name_scope('DenseNetFCN'): + concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - if concat_axis == 1: # channels_first dim ordering - _, rows, cols = input_shape - else: - rows, cols, _ = input_shape - - if reduction != 0.0: - assert reduction <= 1.0 and reduction > 0.0, 'reduction value must lie between 0.0 and 1.0' - - # check if upsampling_conv has minimum number of filters - # minimum is set to 12, as at least 3 color channels are needed for correct upsampling - assert nb_upsampling_conv > 12 and nb_upsampling_conv % 4 == 0, 'Parameter `upsampling_conv` number of channels must ' \ - 'be a positive number divisible by 4 and greater ' \ - 'than 12' - - # layers in each dense block - if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: - nb_layers = list(nb_layers_per_block) # Convert tuple to list - - assert len(nb_layers) == (nb_dense_block + 1), 'If list, nb_layer is used as provided. ' \ - 'Note that list size must be (nb_dense_block + 1)' - - bottleneck_nb_layers = nb_layers[-1] - rev_layers = nb_layers[::-1] - nb_layers.extend(rev_layers[1:]) - else: - bottleneck_nb_layers = nb_layers_per_block - nb_layers = [nb_layers_per_block] * (2 * nb_dense_block + 1) - - # compute compression factor - compression = 1.0 - reduction - - # Initial convolution - x = Conv2D(init_conv_filters, (3, 3), kernel_initializer='he_uniform', padding='same', name='initial_conv2D', - use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) - - nb_filter = init_conv_filters - - skip_list = [] - - # Add dense blocks and transition down block - for block_idx in range(nb_dense_block): - x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, - dropout_rate=dropout_rate, weight_decay=weight_decay) - - # Skip connection - skip_list.append(x) - - # add transition_block - x = __transition_block(x, nb_filter, compression=compression, dropout_rate=dropout_rate, - weight_decay=weight_decay) - - nb_filter = int(nb_filter * compression) # this is calculated inside transition_down_block - - # The last dense_block does not have a transition_down_block - # return the concatenated feature maps without the concatenation of the input - _, nb_filter, concat_list = __dense_block(x, bottleneck_nb_layers, nb_filter, growth_rate, - dropout_rate=dropout_rate, weight_decay=weight_decay, - return_concat_list=True) - - skip_list = skip_list[::-1] # reverse the skip list - - # Add dense blocks and transition up block - for block_idx in range(nb_dense_block): - n_filters_keep = growth_rate * nb_layers[nb_dense_block + block_idx] - - # upsampling block must upsample only the feature maps (concat_list[1:]), - # not the concatenation of the input with the feature maps (concat_list[0]. - l = concatenate(concat_list[1:], axis=concat_axis) - - t = __transition_up_block(l, nb_filters=n_filters_keep, type=upsampling_type) - - # concatenate the skip connection with the transition block - x = concatenate([t, skip_list[block_idx]], axis=concat_axis) - - # Dont allow the feature map size to grow in upsampling dense blocks - x_up, nb_filter, concat_list = __dense_block(x, nb_layers[nb_dense_block + block_idx + 1], nb_filter=growth_rate, - growth_rate=growth_rate, dropout_rate=dropout_rate, - weight_decay=weight_decay, - return_concat_list=True, grow_nb_filters=False) - - if include_top: - x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', kernel_regularizer=l2(weight_decay), - use_bias=False)(x_up) - - if K.image_data_format() == 'channels_first': - channel, row, col = input_shape + if concat_axis == 1: # channels_first dim ordering + _, rows, cols = input_shape else: - row, col, channel = input_shape + rows, cols, _ = input_shape - x = Reshape((row * col, nb_classes))(x) - x = Activation(activation)(x) - x = Reshape((row, col, nb_classes))(x) - else: - x = x_up + if reduction != 0.0: + if not (reduction <= 1.0 and reduction > 0.0): + raise ValueError('`reduction` value must lie between 0.0 and 1.0') - return x + # check if upsampling_conv has minimum number of filters + # minimum is set to 12, as at least 3 color channels are needed for correct upsampling + if not (nb_upsampling_conv > 12 and nb_upsampling_conv % 4 == 0): + raise ValueError('Parameter `nb_upsampling_conv` number of channels must ' + 'be a positive number divisible by 4 and greater than 12') + + # layers in each dense block + if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: + nb_layers = list(nb_layers_per_block) # Convert tuple to list + + if len(nb_layers) != (nb_dense_block + 1): + raise ValueError('If `nb_dense_block` is a list, its length must be ' + '(`nb_dense_block` + 1)') + + bottleneck_nb_layers = nb_layers[-1] + rev_layers = nb_layers[::-1] + nb_layers.extend(rev_layers[1:]) + else: + bottleneck_nb_layers = nb_layers_per_block + nb_layers = [nb_layers_per_block] * (2 * nb_dense_block + 1) + + # compute compression factor + compression = 1.0 - reduction + + # Initial convolution + x = Conv2D(init_conv_filters, (7, 7), kernel_initializer='he_normal', padding='same', name='initial_conv2D', + use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) + x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) + x = Activation('relu')(x) + + nb_filter = init_conv_filters + + skip_list = [] + + # Add dense blocks and transition down block + for block_idx in range(nb_dense_block): + x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, + weight_decay=weight_decay) + + # Skip connection + skip_list.append(x) + + # add transition_block + x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay) + + nb_filter = int(nb_filter * compression) # this is calculated inside transition_down_block + + # The last dense_block does not have a transition_down_block + # return the concatenated feature maps without the concatenation of the input + _, nb_filter, concat_list = __dense_block(x, bottleneck_nb_layers, nb_filter, growth_rate, + dropout_rate=dropout_rate, weight_decay=weight_decay, + return_concat_list=True) + + skip_list = skip_list[::-1] # reverse the skip list + + # Add dense blocks and transition up block + for block_idx in range(nb_dense_block): + n_filters_keep = growth_rate * nb_layers[nb_dense_block + block_idx] + + # upsampling block must upsample only the feature maps (concat_list[1:]), + # not the concatenation of the input with the feature maps (concat_list[0]. + l = concatenate(concat_list[1:], axis=concat_axis) + + t = __transition_up_block(l, nb_filters=n_filters_keep, type=upsampling_type, weight_decay=weight_decay) + + # concatenate the skip connection with the transition block + x = concatenate([t, skip_list[block_idx]], axis=concat_axis) + + # Dont allow the feature map size to grow in upsampling dense blocks + x_up, nb_filter, concat_list = __dense_block(x, nb_layers[nb_dense_block + block_idx + 1], nb_filter=growth_rate, + growth_rate=growth_rate, dropout_rate=dropout_rate, + weight_decay=weight_decay, return_concat_list=True, + grow_nb_filters=False) + + if include_top: + x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', use_bias=False)(x_up) + + if K.image_data_format() == 'channels_first': + channel, row, col = input_shape + else: + row, col, channel = input_shape + + x = Reshape((row * col, nb_classes))(x) + x = Activation(activation)(x) + x = Reshape((row, col, nb_classes))(x) + else: + x = x_up + + return x