diff --git a/examples/cifar10_densenet.py b/examples/cifar10_densenet.py
new file mode 100644
index 0000000..669105a
--- /dev/null
+++ b/examples/cifar10_densenet.py
@@ -0,0 +1,82 @@
+'''
+Trains a DenseNet-40-12 model on the CIFAR-10 Dataset.
+
+Gets a 99.84% accuracy score after 300 epochs.
+'''
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import numpy as np
+import sklearn.metrics as metrics
+
+from keras import backend as K
+from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
+from keras.datasets import cifar10
+from keras.optimizers import Adam
+from keras.preprocessing.image import ImageDataGenerator
+from keras.utils import np_utils
+from keras_contrib.applications.densenet import DenseNet
+
+batch_size = 64
+nb_classes = 10
+nb_epoch = 300
+
+img_rows, img_cols = 32, 32
+img_channels = 3
+
+# Parameters for the DenseNet model builder
+img_dim = (img_channels, img_rows, img_cols) if K.image_dim_ordering() == "th" else (img_rows, img_cols, img_channels)
+depth = 40
+nb_dense_block = 3
+growth_rate = 12
+nb_filter = 16
+dropout_rate = 0.0  # 0.0 for data augmentation
+
+# Create the model (without loading weights)
+model = DenseNet(depth, nb_dense_block, growth_rate, nb_filter, dropout_rate=dropout_rate,
+                 input_shape=img_dim, weights=None)
+print("Model created")
+
+model.summary()
+
+optimizer = Adam(lr=1e-3)  # Using Adam instead of SGD to speed up training
+model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=["accuracy"])
+print("Finished compiling")
+
+(trainX, trainY), (testX, testY) = cifar10.load_data()
+
+trainX = trainX.astype('float32')
+testX = testX.astype('float32')
+
+trainX /= 255.
+testX /= 255.
+
+Y_train = np_utils.to_categorical(trainY, nb_classes)
+Y_test = np_utils.to_categorical(testY, nb_classes)
+
+generator = ImageDataGenerator(rotation_range=15,
+                               width_shift_range=5. / 32,
+                               height_shift_range=5. / 32)
+
+generator.fit(trainX, seed=0)
+
+weights_file = "DenseNet-40-12-CIFAR-10.h5"
+
+lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1),
+                               cooldown=0, patience=10, min_lr=0.5e-6)
+early_stopper = EarlyStopping(monitor='val_acc', min_delta=1e-4, patience=20)
+model_checkpoint = ModelCheckpoint(weights_file, monitor="val_acc", save_best_only=True,
+                                   save_weights_only=True, mode='auto')
+
+callbacks = [lr_reducer, early_stopper, model_checkpoint]
+
+model.fit_generator(generator.flow(trainX, Y_train, batch_size=batch_size), samples_per_epoch=len(trainX),
+                    nb_epoch=nb_epoch,
+                    callbacks=callbacks,
+                    validation_data=(testX, Y_test),
+                    nb_val_samples=testX.shape[0], verbose=2)
+
+scores = model.evaluate(testX, Y_test, batch_size=batch_size)
+print("Test loss : ", scores[0])
+print("Test accuracy : ", scores[1])
diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
new file mode 100644
index 0000000..51acfa7
--- /dev/null
+++ b/keras_contrib/applications/densenet.py
@@ -0,0 +1,325 @@
+# -*- coding: utf-8 -*-
+"""DenseNet models for Keras.
+
+# Reference
+
+- [Densely Connected Convolutional Networks](https://arxiv.org/pdf/1608.06993.pdf)
+
+"""
+from __future__ import print_function
+from __future__ import absolute_import
+from __future__ import division
+
+import warnings
+
+from keras.models import Model
+from keras.layers.core import Dense, Dropout, Activation
+from keras.layers.convolutional import Convolution2D
+from keras.layers.pooling import AveragePooling2D
+from keras.layers.pooling import GlobalAveragePooling2D
+from keras.layers import Input, merge
+from keras.layers.normalization import BatchNormalization
+from keras.regularizers import l2
+from keras.utils.layer_utils import convert_all_kernels_in_model
+from keras.utils.data_utils import get_file
+from keras.engine.topology import get_source_inputs
+from keras.applications.imagenet_utils import _obtain_input_shape
+import keras.backend as K
+
+TH_WEIGHTS_PATH = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Theano-Backend-TH-dim-ordering.h5'
+TF_WEIGHTS_PATH = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Tensorflow-Backend-TF-dim-ordering.h5'
+TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Theano-Backend-TH-dim-ordering-no-top.h5'
+TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/titu1994/DenseNet/releases/download/v2.0/DenseNet-40-12-Tensorflow-Backend-TF-dim-ordering-no-top.h5'
+
+
+def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16,
+             bottleneck=False, reduction=0.0, dropout_rate=0.0, weight_decay=1E-4,
+             include_top=True, weights='cifar10',
+             input_tensor=None, input_shape=None,
+             classes=10):
+    """Instantiate the DenseNet architecture,
+        optionally loading weights pre-trained
+        on CIFAR-10. Note that when using TensorFlow,
+        for best performance you should set
+        `image_dim_ordering="tf"` in your Keras config
+        at ~/.keras/keras.json.
+
+        The model and the weights are compatible with both
+        TensorFlow and Theano. The dimension ordering
+        convention used by the model is the one
+        specified in your Keras config file.
+
+        # Arguments
+            depth: number or layers in the DenseNet
+            nb_dense_block: number of dense blocks to add to end (generally = 3)
+            growth_rate: number of filters to add per dense block
+            nb_filter: initial number of filters. -1 indicates initial
+                number of filters is 2 * growth_rate
+            bottleneck: flag to add bottleneck blocks in between dense blocks
+            reduction: reduction factor of transition blocks.
+                Note : reduction value is inverted to compute compression.
+            dropout_rate: dropout rate
+            weight_decay: weight decay factor
+            include_top: whether to include the fully-connected
+                layer at the top of the network.
+            weights: one of `None` (random initialization) or
+                "cifar10" (pre-training on CIFAR-10)..
+            input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
+                to use as image input for the model.
+            input_shape: optional shape tuple, only to be specified
+                if `include_top` is False (otherwise the input shape
+                has to be `(32, 32, 3)` (with `tf` dim ordering)
+                or `(3, 32, 32)` (with `th` dim ordering).
+                It should have exactly 3 inputs channels,
+                and width and height should be no smaller than 8.
+                E.g. `(200, 200, 3)` would be one valid value.
+            classes: optional number of classes to classify images
+                into, only to be specified if `include_top` is True, and
+                if no `weights` argument is specified.
+
+        # Returns
+            A Keras model instance.
+        """
+
+    if weights not in {'cifar10', None}:
+        raise ValueError('The `weights` argument should be either '
+                         '`None` (random initialization) or `cifar10` '
+                         '(pre-training on CIFAR-10).')
+
+    if weights == 'cifar10' and include_top and classes != 10:
+        raise ValueError('If using `weights` as CIFAR 10 with `include_top`'
+                         ' as true, `classes` should be 10')
+
+    # Determine proper input shape
+    input_shape = _obtain_input_shape(input_shape,
+                                      default_size=32,
+                                      min_size=8,
+                                      dim_ordering=K.image_dim_ordering(),
+                                      include_top=include_top)
+
+    if input_tensor is None:
+        img_input = Input(shape=input_shape)
+    else:
+        if not K.is_keras_tensor(input_tensor):
+            img_input = Input(tensor=input_tensor, shape=input_shape)
+        else:
+            img_input = input_tensor
+
+    x = __create_dense_net(classes, img_input, include_top, depth, nb_dense_block,
+                           growth_rate, nb_filter, bottleneck, reduction,
+                           dropout_rate, weight_decay)
+
+    # Ensure that the model takes into account
+    # any potential predecessors of `input_tensor`.
+    if input_tensor is not None:
+        inputs = get_source_inputs(input_tensor)
+    else:
+        inputs = img_input
+    # Create model.
+    model = Model(inputs, x, name='densenet')
+
+    # load weights
+    if weights == 'cifar10':
+        if (depth == 40) and (nb_dense_block == 3) and (growth_rate == 12) and (nb_filter == 16) and \
+                (bottleneck is False) and (reduction == 0.0) and (dropout_rate == 0.0) and (weight_decay == 1E-4):
+            # Default parameters match. Weights for this model exist:
+
+            if K.image_dim_ordering() == 'th':
+                if include_top:
+                    weights_path = get_file('densenet_40_12_th_dim_ordering_th_kernels.h5',
+                                            TH_WEIGHTS_PATH,
+                                            cache_subdir='models')
+                else:
+                    weights_path = get_file('densenet_40_12_th_dim_ordering_th_kernels_no_top.h5',
+                                            TH_WEIGHTS_PATH_NO_TOP,
+                                            cache_subdir='models')
+
+                model.load_weights(weights_path)
+
+                if K.backend() == 'tensorflow':
+                    warnings.warn('You are using the TensorFlow backend, yet you '
+                                  'are using the Theano '
+                                  'image dimension ordering convention '
+                                  '(`image_dim_ordering="th"`). '
+                                  'For best performance, set '
+                                  '`image_dim_ordering="tf"` in '
+                                  'your Keras config '
+                                  'at ~/.keras/keras.json.')
+                    convert_all_kernels_in_model(model)
+            else:
+                if include_top:
+                    weights_path = get_file('densenet_40_12_tf_dim_ordering_tf_kernels.h5',
+                                            TF_WEIGHTS_PATH,
+                                            cache_subdir='models')
+                else:
+                    weights_path = get_file('densenet_40_12_tf_dim_ordering_tf_kernels_no_top.h5',
+                                            TF_WEIGHTS_PATH_NO_TOP,
+                                            cache_subdir='models')
+
+                model.load_weights(weights_path)
+
+                if K.backend() == 'theano':
+                    convert_all_kernels_in_model(model)
+
+    return model
+
+
+def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1E-4):
+    ''' Apply BatchNorm, Relu, 3x3 Conv2D, optional bottleneck block and dropout
+
+    Args:
+        ip: Input keras tensor
+        nb_filter: number of filters
+        bottleneck: add bottleneck block
+        dropout_rate: dropout rate
+        weight_decay: weight decay factor
+
+    Returns: keras tensor with batch_norm, relu and convolution2d added (optional bottleneck)
+    '''
+
+    concat_axis = 1 if K.image_dim_ordering() == "th" else -1
+
+    x = BatchNormalization(mode=0, axis=concat_axis, gamma_regularizer=l2(weight_decay),
+                           beta_regularizer=l2(weight_decay))(ip)
+    x = Activation('relu')(x)
+
+    if bottleneck:
+        inter_channel = nb_filter * 4  # Obtained from https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua
+
+        x = Convolution2D(inter_channel, 1, 1, init='he_uniform', border_mode='same', bias=False,
+                          W_regularizer=l2(weight_decay))(x)
+
+        if dropout_rate:
+            x = Dropout(dropout_rate)(x)
+
+        x = BatchNormalization(mode=0, axis=concat_axis, gamma_regularizer=l2(weight_decay),
+                               beta_regularizer=l2(weight_decay))(x)
+        x = Activation('relu')(x)
+
+    x = Convolution2D(nb_filter, 3, 3, init="he_uniform", border_mode="same", bias=False,
+                      W_regularizer=l2(weight_decay))(x)
+    if dropout_rate:
+        x = Dropout(dropout_rate)(x)
+
+    return x
+
+
+def __transition_block(ip, nb_filter, compression=1.0, dropout_rate=None, weight_decay=1E-4):
+    ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D
+
+    Args:
+        ip: keras tensor
+        nb_filter: number of filters
+        dropout_rate: dropout rate
+        weight_decay: weight decay factor
+
+    Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool
+    '''
+
+    concat_axis = 1 if K.image_dim_ordering() == "th" else -1
+
+    x = BatchNormalization(mode=0, axis=concat_axis, gamma_regularizer=l2(weight_decay),
+                           beta_regularizer=l2(weight_decay))(ip)
+    x = Activation('relu')(x)
+    x = Convolution2D(int(nb_filter * compression), 1, 1, init="he_uniform", border_mode="same", bias=False,
+                      W_regularizer=l2(weight_decay))(x)
+    if dropout_rate:
+        x = Dropout(dropout_rate)(x)
+    x = AveragePooling2D((2, 2), strides=(2, 2))(x)
+
+    return x
+
+
+def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropout_rate=None, weight_decay=1E-4):
+    ''' Build a dense_block where the output of each conv_block is fed to subsequent ones
+
+    Args:
+        x: keras tensor
+        nb_layers: the number of layers of __conv_block to append to the model.
+        nb_filter: number of filters
+        growth_rate: growth rate
+        bottleneck: bottleneck block
+        dropout_rate: dropout rate
+        weight_decay: weight decay factor
+
+    Returns: keras tensor with nb_layers of __conv_block appended
+    '''
+
+    concat_axis = 1 if K.image_dim_ordering() == "th" else -1
+
+    x_list = [x]
+
+    for i in range(nb_layers):
+        x = __conv_block(x, growth_rate, bottleneck, dropout_rate, weight_decay)
+        x_list.append(x)
+        x = merge(x_list, mode='concat', concat_axis=concat_axis)
+        nb_filter += growth_rate
+
+    return x, nb_filter
+
+
+def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=-1,
+                       bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1E-4):
+    ''' Build the DenseNet model
+
+    Args:
+        nb_classes: number of classes
+        img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels)
+        include_top: flag to include the final Dense layer
+        depth: number or layers
+        nb_dense_block: number of dense blocks to add to end (generally = 3)
+        growth_rate: number of filters to add per dense block
+        nb_filter: initial number of filters. Default -1 indicates initial number of filters is 2 * growth_rate
+        bottleneck: add bottleneck blocks
+        reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression
+        dropout_rate: dropout rate
+        weight_decay: weight decay
+
+    Returns: keras tensor with nb_layers of __conv_block appended
+    '''
+
+    concat_axis = 1 if K.image_dim_ordering() == "th" else -1
+
+    assert (depth - 4) % 3 == 0, "Depth must be 3 N + 4"
+    if reduction != 0.0:
+        assert reduction <= 1.0 and reduction > 0.0, "reduction value must lie between 0.0 and 1.0"
+
+    # layers in each dense block
+    nb_layers = int((depth - 4) / 3)
+
+    if bottleneck:
+        nb_layers = int(nb_layers // 2)
+
+    # compute initial nb_filter if -1, else accept users initial nb_filter
+    if nb_filter <= 0:
+        nb_filter = 2 * growth_rate
+
+    # compute compression factor
+    compression = 1.0 - reduction
+
+    # Initial convolution
+    x = Convolution2D(nb_filter, 3, 3, init="he_uniform", border_mode="same", name="initial_conv2D", bias=False,
+                      W_regularizer=l2(weight_decay))(img_input)
+
+    # Add dense blocks
+    for block_idx in range(nb_dense_block - 1):
+        x, nb_filter = __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=bottleneck,
+                                     dropout_rate=dropout_rate, weight_decay=weight_decay)
+        # add __transition_block
+        x = __transition_block(x, nb_filter, compression=compression, dropout_rate=dropout_rate,
+                               weight_decay=weight_decay)
+        nb_filter = int(nb_filter * compression)
+
+    # The last __dense_block does not have a __transition_block
+    x, nb_filter = __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=bottleneck,
+                                 dropout_rate=dropout_rate, weight_decay=weight_decay)
+
+    x = BatchNormalization(mode=0, axis=concat_axis, gamma_regularizer=l2(weight_decay),
+                           beta_regularizer=l2(weight_decay))(x)
+    x = Activation('relu')(x)
+    x = GlobalAveragePooling2D()(x)
+
+    if include_top:
+        x = Dense(nb_classes, activation='softmax', W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(x)
+
+    return x