diff --git a/examples/cifar10_densenet.py b/examples/cifar10_densenet.py
index c495919..971956f 100644
--- a/examples/cifar10_densenet.py
+++ b/examples/cifar10_densenet.py
@@ -26,7 +26,7 @@ img_rows, img_cols = 32, 32
 img_channels = 3
 
 # Parameters for the DenseNet model builder
-img_dim = (img_channels, img_rows, img_cols) if K.imgae_data_format() == "channels_first" else (img_rows, img_cols, img_channels)
+img_dim = (img_channels, img_rows, img_cols) if K.image_data_format() == "channels_first" else (img_rows, img_cols, img_channels)
 depth = 40
 nb_dense_block = 3
 growth_rate = 12
@@ -71,11 +71,11 @@ model_checkpoint = ModelCheckpoint(weights_file, monitor="val_acc", save_best_on
 
 callbacks = [lr_reducer, early_stopper, model_checkpoint]
 
-model.fit_generator(generator.flow(trainX, Y_train, batch_size=batch_size), samples_per_epoch=len(trainX),
-                    nb_epoch=nb_epoch,
+model.fit_generator(generator.flow(trainX, Y_train, batch_size=batch_size), steps_per_epoch=len(trainX) // batch_size,
+                    epochs=nb_epoch,
                     callbacks=callbacks,
                     validation_data=(testX, Y_test),
-                    nb_val_samples=testX.shape[0], verbose=2)
+                    verbose=2)
 
 scores = model.evaluate(testX, Y_test, batch_size=batch_size)
 print("Test loss : ", scores[0])
diff --git a/keras_contrib/applications/densenet.py b/keras_contrib/applications/densenet.py
index 71963cc..9027fef 100644
--- a/keras_contrib/applications/densenet.py
+++ b/keras_contrib/applications/densenet.py
@@ -14,10 +14,11 @@ import warnings
 
 from keras.models import Model
 from keras.layers.core import Dense, Dropout, Activation, Reshape
-from keras.layers.convolutional import Convolution2D, Deconvolution2D, AtrousConvolution2D, UpSampling2D
+from keras.layers import Convolution2D, Deconvolution2D, AtrousConvolution2D, UpSampling2D
+from keras.layers.merge import concatenate
 from keras.layers.pooling import AveragePooling2D
 from keras.layers.pooling import GlobalAveragePooling2D
-from keras.layers import Input, merge
+from keras.layers import Input, merge, Conv2D
 from keras.layers.normalization import BatchNormalization
 from keras.regularizers import l2
 from keras.utils.layer_utils import convert_all_kernels_in_model
@@ -101,7 +102,7 @@ def DenseNet(depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, nb_layers
     input_shape = _obtain_input_shape(input_shape,
                                       default_size=32,
                                       min_size=8,
-                                      dim_ordering=K.image_dim_ordering(),
+                                      data_format=K.image_dim_ordering(),
                                       include_top=include_top)
 
     if input_tensor is None:
@@ -253,7 +254,7 @@ def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_blo
     input_shape = _obtain_input_shape(input_shape,
                                       default_size=32,
                                       min_size=16,
-                                      dim_ordering=K.image_dim_ordering(),
+                                      data_format=K.image_dim_ordering(),
                                       include_top=include_top)
 
     if input_tensor is None:
@@ -296,15 +297,15 @@ def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_deca
 
     concat_axis = 1 if K.image_dim_ordering() == "th" else -1
 
-    x = BatchNormalization(mode=0, axis=concat_axis, gamma_regularizer=l2(weight_decay),
+    x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay),
                            beta_regularizer=l2(weight_decay))(ip)
     x = Activation('relu')(x)
 
     if bottleneck:
         inter_channel = nb_filter * 4  # Obtained from https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua
 
-        x = Convolution2D(inter_channel, 1, 1, init='he_uniform', border_mode='same', bias=False,
-                          W_regularizer=l2(weight_decay))(x)
+        x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_uniform', padding='same', use_bias=False,
+                          kernel_regularizer=l2(weight_decay))(x)
 
         if dropout_rate:
             x = Dropout(dropout_rate)(x)
@@ -313,8 +314,8 @@ def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_deca
                                beta_regularizer=l2(weight_decay))(x)
         x = Activation('relu')(x)
 
-    x = Convolution2D(nb_filter, 3, 3, init="he_uniform", border_mode="same", bias=False,
-                      W_regularizer=l2(weight_decay))(x)
+    x = Conv2D(nb_filter, (3, 3), kernel_initializer="he_uniform", padding="same", use_bias=False,
+                      kernel_regularizer=l2(weight_decay))(x)
     if dropout_rate:
         x = Dropout(dropout_rate)(x)
 
@@ -337,11 +338,11 @@ def __transition_block(ip, nb_filter, compression=1.0, dropout_rate=None, weight
 
     concat_axis = 1 if K.image_dim_ordering() == "th" else -1
 
-    x = BatchNormalization(mode=0, axis=concat_axis, gamma_regularizer=l2(weight_decay),
+    x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay),
                            beta_regularizer=l2(weight_decay))(ip)
     x = Activation('relu')(x)
-    x = Convolution2D(int(nb_filter * compression), 1, 1, init="he_uniform", border_mode="same", bias=False,
-                      W_regularizer=l2(weight_decay))(x)
+    x = Conv2D(nb_filter, (3, 3), kernel_initializer="he_uniform", padding="same", use_bias=False,
+               kernel_regularizer=l2(weight_decay))(x)
     if dropout_rate:
         x = Dropout(dropout_rate)(x)
     x = AveragePooling2D((2, 2), strides=(2, 2))(x)
@@ -375,15 +376,16 @@ def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropou
         x = __conv_block(x, growth_rate, bottleneck, dropout_rate, weight_decay)
         x_list.append(x)
 
-        x = merge(x_list, mode='concat', concat_axis=concat_axis)
+        x1 = concatenate(x_list, axis=concat_axis)
+        #x = merge(x_list, mode='concat', concat_axis=concat_axis)
 
         if grow_nb_filters:
             nb_filter += growth_rate
 
     if return_concat_list:
-        return x, nb_filter, x_list
+        return x1, nb_filter, x_list
     else:
-        return x, nb_filter
+        return x1, nb_filter
 
 
 def __transition_up_block(ip, nb_filters, type='upsampling', output_shape=None, weight_decay=1E-4):
@@ -402,11 +404,13 @@ def __transition_up_block(ip, nb_filters, type='upsampling', output_shape=None,
     if type == 'upsampling':
         x = UpSampling2D()(ip)
     elif type == 'subpixel':
-        x = Convolution2D(nb_filters, 3, 3, activation="relu", border_mode='same', W_regularizer=l2(weight_decay),
-                          bias=False, init='he_uniform')(ip)
+        x = Conv2D(nb_filters, (3, 3), padding="same", kernel_regularizer=l2(weight_decay), activation='relu',
+                   use_bias=False, kernel_initializer='he_uniform')(ip)
+        #x = Convolution2D(nb_filters, 3, 3, activation="relu", border_mode='same', W_regularizer=l2(weight_decay),
+        #                  bias=False, init='he_uniform')(ip)
         x = SubPixelUpscaling(scale_factor=2)(x)
-        x = Convolution2D(nb_filters, 3, 3, activation="relu", border_mode='same', W_regularizer=l2(weight_decay),
-                          bias=False, init='he_uniform')(x)
+        x = Conv2D(nb_filters, (3, 3), activation="relu", padding='same', kernel_regularizer=l2(weight_decay),
+                          use_bias=False, kernel_initializer='he_uniform')(x)
     elif type == 'atrous':
         # waiting on https://github.com/fchollet/keras/issues/4018
         x = AtrousConvolution2D(nb_filters, 3, 3, activation="relu", W_regularizer=l2(weight_decay),
@@ -478,8 +482,8 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_bl
     compression = 1.0 - reduction
 
     # Initial convolution
-    x = Convolution2D(nb_filter, 3, 3, init="he_uniform", border_mode="same", name="initial_conv2D", bias=False,
-                      W_regularizer=l2(weight_decay))(img_input)
+    x = Conv2D(nb_filter, (3, 3), kernel_initializer="he_uniform", padding="same", name="initial_conv2D", use_bias=False,
+               kernel_regularizer=l2(weight_decay))(img_input)
 
     # Add dense blocks
     for block_idx in range(nb_dense_block - 1):
@@ -494,13 +498,13 @@ def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_bl
     x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck,
                                  dropout_rate=dropout_rate, weight_decay=weight_decay)
 
-    x = BatchNormalization(mode=0, axis=concat_axis, gamma_regularizer=l2(weight_decay),
+    x = BatchNormalization(axis=concat_axis, gamma_regularizer=l2(weight_decay),
                            beta_regularizer=l2(weight_decay))(x)
     x = Activation('relu')(x)
     x = GlobalAveragePooling2D()(x)
 
     if include_top:
-        x = Dense(nb_classes, activation='softmax', W_regularizer=l2(weight_decay), b_regularizer=l2(weight_decay))(x)
+        x = Dense(nb_classes, activation='softmax', kernel_regularizer=l2(weight_decay), bias_regularizer=l2(weight_decay))(x)
 
     return x
 
@@ -571,8 +575,8 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
     compression = 1.0 - reduction
 
     # Initial convolution
-    x = Convolution2D(init_conv_filters, 3, 3, init="he_uniform", border_mode="same", name="initial_conv2D", bias=False,
-                      W_regularizer=l2(weight_decay))(img_input)
+    x = Conv2D(init_conv_filters, (3, 3), kernel_initializer="he_uniform", padding="same", name="initial_conv2D",
+               use_bias=False, kernel_regularizer=l2(weight_decay))(img_input)
 
     nb_filter = init_conv_filters
 
@@ -616,12 +620,12 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
 
         # upsampling block must upsample only the feature maps (concat_list[1:]),
         # not the concatenation of the input with the feature maps (concat_list[0].
-        l = merge(concat_list[1:], mode='concat', concat_axis=concat_axis)
+        l = concatenate(concat_list[1:], axis=concat_axis)
 
         t = __transition_up_block(l, nb_filters=n_filters_keep, type=upsampling_type, output_shape=out_shape)
 
         # concatenate the skip connection with the transition block
-        x = merge([t, skip_list[block_idx]], mode='concat', concat_axis=concat_axis)
+        x = concatenate([t, skip_list[block_idx]], axis=concat_axis)
 
         if K.image_dim_ordering() == 'th':
             out_shape[2] *= 2
@@ -637,8 +641,8 @@ def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5,
                                                   return_concat_list=True, grow_nb_filters=False)
 
     if include_top:
-        x = Convolution2D(nb_classes, 1, 1, activation='linear', border_mode='same', W_regularizer=l2(weight_decay),
-                          bias=False)(x)
+        x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', kernel_regularizer=l2(weight_decay),
+                          use_bias=False)(x)
 
         if K.image_dim_ordering() == 'th':
             channel, row, col = input_shape