mirror of
https://github.com/wassname/keras-contrib.git
synced 2026-06-27 16:10:11 +08:00
Merge branch 'master' into segmentation-data-generator
This commit is contained in:
+23
-11
@@ -32,25 +32,39 @@ install:
|
||||
|
||||
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy matplotlib pandas pytest h5py
|
||||
- source activate test-environment
|
||||
- pip install pytest-cov python-coveralls pytest-xdist coverage==3.7.1 #we need this version of coverage for coveralls.io to work
|
||||
- pip install pytest-cov pytest-xdist
|
||||
- pip install pep8 pytest-pep8
|
||||
- conda install mkl mkl-service
|
||||
- pip install theano
|
||||
- pip install git+git://github.com/fchollet/keras.git
|
||||
|
||||
# install PIL for preprocessing tests
|
||||
#- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
|
||||
# conda install pil;
|
||||
# elif [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then
|
||||
# conda install Pillow;
|
||||
# fi
|
||||
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
|
||||
conda install pil;
|
||||
elif [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then
|
||||
conda install Pillow;
|
||||
fi
|
||||
|
||||
- python setup.py install
|
||||
- pip install -e .[tests]
|
||||
|
||||
# install TensorFlow (CPU)
|
||||
# install TensorFlow (CPU version).
|
||||
- pip install tensorflow
|
||||
|
||||
# install cntk
|
||||
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
|
||||
pip install https://cntk.ai/PythonWheel/CPU-Only/cntk-2.2-cp27-cp27mu-linux_x86_64.whl;
|
||||
elif [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then
|
||||
pip install https://cntk.ai/PythonWheel/CPU-Only/cntk-2.2-cp35-cp35m-linux_x86_64.whl;
|
||||
fi
|
||||
|
||||
# install pydot for visualization tests
|
||||
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
|
||||
conda install pydot graphviz;
|
||||
fi
|
||||
|
||||
# command to run tests
|
||||
script:
|
||||
- export MKL_THREADING_LAYER="GNU"
|
||||
# run keras backend init to initialize backend config
|
||||
- python -c "import keras.backend"
|
||||
# create dataset directory to avoid concurrent directory creation at runtime
|
||||
@@ -61,7 +75,5 @@ script:
|
||||
- if [[ "$TEST_MODE" == "PEP8" ]]; then
|
||||
PYTHONPATH=$PWD:$PYTHONPATH py.test --pep8 -m pep8 -n0;
|
||||
else
|
||||
PYTHONPATH=$PWD:$PYTHONPATH py.test tests/;
|
||||
PYTHONPATH=$PWD:$PYTHONPATH py.test tests/ --ignore=tests/integration_tests --ignore=tests/test_documentation.py --cov=keras tests/ --cov-report term-missing;
|
||||
fi
|
||||
after_success:
|
||||
- coveralls
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
## Maintainers:
|
||||
Following are the users with write-access to this repository (maintainers) :
|
||||
* [athundt](https://www.github.com/athundt)
|
||||
* [bstriner](https://www.github.com/bstriner)
|
||||
* [farizrahman4u](https://www.github.com/farizrahman4u)
|
||||
* [fchollet](https://www.github.com/fchollet)
|
||||
|
||||
@@ -33,8 +33,11 @@ nb_filter = 16
|
||||
dropout_rate = 0.0 # 0.0 for data augmentation
|
||||
|
||||
# Create the model (without loading weights)
|
||||
model = DenseNet(depth, nb_dense_block, growth_rate, nb_filter, dropout_rate=dropout_rate,
|
||||
input_shape=img_dim, weights=None)
|
||||
model = DenseNet(depth=depth, nb_dense_block=nb_dense_block,
|
||||
growth_rate=growth_rate, nb_filter=nb_filter,
|
||||
dropout_rate=dropout_rate,
|
||||
input_shape=img_dim,
|
||||
weights=None)
|
||||
print('Model created')
|
||||
|
||||
model.summary()
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
"""
|
||||
Adapted from keras example cifar10_cnn.py
|
||||
Train NASNet-CIFAR on the CIFAR10 small images dataset.
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from keras.datasets import cifar10
|
||||
from keras.preprocessing.image import ImageDataGenerator
|
||||
from keras.utils import np_utils
|
||||
from keras.callbacks import ModelCheckpoint
|
||||
from keras.callbacks import ReduceLROnPlateau
|
||||
from keras.callbacks import CSVLogger
|
||||
from keras.optimizers import Adam
|
||||
from keras_contrib.applications.nasnet import NASNetCIFAR, preprocess_input
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
weights_file = 'NASNet-CIFAR-10.h5'
|
||||
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.5), cooldown=0, patience=5, min_lr=0.5e-5)
|
||||
csv_logger = CSVLogger('NASNet-CIFAR-10.csv')
|
||||
model_checkpoint = ModelCheckpoint(weights_file, monitor='val_predictions_acc', save_best_only=True,
|
||||
save_weights_only=True, mode='max')
|
||||
|
||||
batch_size = 128
|
||||
nb_classes = 10
|
||||
nb_epoch = 600
|
||||
data_augmentation = True
|
||||
|
||||
# input image dimensions
|
||||
img_rows, img_cols = 32, 32
|
||||
# The CIFAR10 images are RGB.
|
||||
img_channels = 3
|
||||
|
||||
# The data, shuffled and split between train and test sets:
|
||||
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
|
||||
|
||||
# Convert class vectors to binary class matrices.
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
X_train = X_train.astype('float32')
|
||||
X_test = X_test.astype('float32')
|
||||
|
||||
# preprocess input
|
||||
X_train = preprocess_input(X_train)
|
||||
X_test = preprocess_input(X_test)
|
||||
|
||||
# For training, the auxilary branch must be used to correctly train NASNet
|
||||
model = NASNetCIFAR((img_rows, img_cols, img_channels), use_auxilary_branch=True)
|
||||
model.summary()
|
||||
|
||||
optimizer = Adam(lr=1e-3, clipnorm=5)
|
||||
model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy'],
|
||||
optimizer=optimizer, metrics=['accuracy'], loss_weights=[1.0, 0.4])
|
||||
|
||||
# model.load_weights('NASNet-CIFAR-10.h5', by_name=True)
|
||||
|
||||
if not data_augmentation:
|
||||
print('Not using data augmentation.')
|
||||
model.fit(X_train, [Y_train, Y_train],
|
||||
batch_size=batch_size,
|
||||
epochs=nb_epoch,
|
||||
validation_data=(X_test, [Y_test, Y_test]),
|
||||
shuffle=True,
|
||||
verbose=2,
|
||||
callbacks=[lr_reducer, csv_logger, model_checkpoint])
|
||||
else:
|
||||
print('Using real-time data augmentation.')
|
||||
# This will do preprocessing and realtime data augmentation:
|
||||
datagen = ImageDataGenerator(
|
||||
featurewise_center=False, # set input mean to 0 over the dataset
|
||||
samplewise_center=False, # set each sample mean to 0
|
||||
featurewise_std_normalization=False, # divide inputs by std of the dataset
|
||||
samplewise_std_normalization=False, # divide each input by its std
|
||||
zca_whitening=False, # apply ZCA whitening
|
||||
rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180)
|
||||
width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
|
||||
height_shift_range=0.1, # randomly shift images vertically (fraction of total height)
|
||||
horizontal_flip=True, # randomly flip images
|
||||
vertical_flip=False) # randomly flip images
|
||||
|
||||
# Compute quantities required for featurewise normalization
|
||||
# (std, mean, and principal components if ZCA whitening is applied).
|
||||
datagen.fit(X_train)
|
||||
|
||||
# wrap the ImageDataGenerator to yield two label batches [y, y] for each input batch X
|
||||
# When training a NASNet model, we have to use its auxilary training head
|
||||
# Therefore the model is technically a 1 input - 2 output model, and requires
|
||||
# the label to be duplicated for the auxilary head
|
||||
def image_data_generator_wrapper(image_datagenerator, batch_size):
|
||||
iterator = datagen.flow(X_train, Y_train, batch_size=batch_size)
|
||||
|
||||
while True:
|
||||
X, y = next(iterator) # get the next batch
|
||||
yield X, [y, y] # duplicate the labels for each batch
|
||||
|
||||
# Fit the model on the batches generated by datagen.flow().
|
||||
model.fit_generator(image_data_generator_wrapper(datagen, batch_size),
|
||||
steps_per_epoch=X_train.shape[0] // batch_size,
|
||||
validation_data=(X_test, [Y_test, Y_test]),
|
||||
epochs=nb_epoch, verbose=2,
|
||||
callbacks=[lr_reducer, csv_logger, model_checkpoint])
|
||||
|
||||
scores = model.evaluate(X_test, [Y_test, Y_test], batch_size=batch_size)
|
||||
for score, metric_name in zip(scores, model.metrics_names):
|
||||
print("%s : %0.4f" % (metric_name, score))
|
||||
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
Adapted from keras example cifar10_cnn.py and github.com/raghakot/keras-resnet
|
||||
Train ResNet-18 on the CIFAR10 small images dataset.
|
||||
|
||||
GPU run command with Theano backend (with TensorFlow, the GPU is automatically used):
|
||||
THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10.py
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from keras.datasets import cifar10
|
||||
from keras.preprocessing.image import ImageDataGenerator
|
||||
from keras.utils import np_utils
|
||||
from keras.callbacks import ModelCheckpoint
|
||||
from keras.callbacks import ReduceLROnPlateau
|
||||
from keras.callbacks import CSVLogger
|
||||
from keras.callbacks import EarlyStopping
|
||||
from keras_contrib.applications.resnet import ResNet18
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
weights_file = 'ResNet18v2-CIFAR-10.h5'
|
||||
lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
|
||||
early_stopper = EarlyStopping(min_delta=0.001, patience=10)
|
||||
csv_logger = CSVLogger('ResNet18v2-CIFAR-10.csv')
|
||||
model_checkpoint = ModelCheckpoint(weights_file, monitor='val_acc', save_best_only=True,
|
||||
save_weights_only=True, mode='auto')
|
||||
|
||||
batch_size = 32
|
||||
nb_classes = 10
|
||||
nb_epoch = 200
|
||||
data_augmentation = True
|
||||
|
||||
# input image dimensions
|
||||
img_rows, img_cols = 32, 32
|
||||
# The CIFAR10 images are RGB.
|
||||
img_channels = 3
|
||||
|
||||
# The data, shuffled and split between train and test sets:
|
||||
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
|
||||
|
||||
# Convert class vectors to binary class matrices.
|
||||
Y_train = np_utils.to_categorical(y_train, nb_classes)
|
||||
Y_test = np_utils.to_categorical(y_test, nb_classes)
|
||||
|
||||
X_train = X_train.astype('float32')
|
||||
X_test = X_test.astype('float32')
|
||||
|
||||
# subtract mean and normalize
|
||||
mean_image = np.mean(X_train, axis=0)
|
||||
X_train -= mean_image
|
||||
X_test -= mean_image
|
||||
X_train /= 128.
|
||||
X_test /= 128.
|
||||
|
||||
model = ResNet18((img_rows, img_cols, img_channels), nb_classes)
|
||||
model.compile(loss='categorical_crossentropy',
|
||||
optimizer='adam',
|
||||
metrics=['accuracy'])
|
||||
|
||||
if not data_augmentation:
|
||||
print('Not using data augmentation.')
|
||||
model.fit(X_train, Y_train,
|
||||
batch_size=batch_size,
|
||||
nb_epoch=nb_epoch,
|
||||
validation_data=(X_test, Y_test),
|
||||
shuffle=True,
|
||||
callbacks=[lr_reducer, early_stopper, csv_logger, model_checkpoint])
|
||||
else:
|
||||
print('Using real-time data augmentation.')
|
||||
# This will do preprocessing and realtime data augmentation:
|
||||
datagen = ImageDataGenerator(
|
||||
featurewise_center=False, # set input mean to 0 over the dataset
|
||||
samplewise_center=False, # set each sample mean to 0
|
||||
featurewise_std_normalization=False, # divide inputs by std of the dataset
|
||||
samplewise_std_normalization=False, # divide each input by its std
|
||||
zca_whitening=False, # apply ZCA whitening
|
||||
rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180)
|
||||
width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
|
||||
height_shift_range=0.1, # randomly shift images vertically (fraction of total height)
|
||||
horizontal_flip=True, # randomly flip images
|
||||
vertical_flip=False) # randomly flip images
|
||||
|
||||
# Compute quantities required for featurewise normalization
|
||||
# (std, mean, and principal components if ZCA whitening is applied).
|
||||
datagen.fit(X_train)
|
||||
|
||||
# Fit the model on the batches generated by datagen.flow().
|
||||
model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size),
|
||||
steps_per_epoch=X_train.shape[0] // batch_size,
|
||||
validation_data=(X_test, Y_test),
|
||||
epochs=nb_epoch, verbose=2,
|
||||
callbacks=[lr_reducer, early_stopper, csv_logger, model_checkpoint])
|
||||
|
||||
scores = model.evaluate(X_test, Y_test, batch_size=batch_size)
|
||||
print('Test loss : ', scores[0])
|
||||
print('Test accuracy : ', scores[1])
|
||||
@@ -1,2 +1,5 @@
|
||||
from .densenet import DenseNet
|
||||
from .ror import ResidualOfResidual
|
||||
from .resnet import ResNet, ResNet18, ResNet34, ResNet50, ResNet101, ResNet152
|
||||
from .wide_resnet import WideResidualNetwork
|
||||
from .nasnet import NASNet, NASNetLarge, NASNetMobile
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,773 @@
|
||||
"""Collection of NASNet models
|
||||
|
||||
The reference paper:
|
||||
- [Learning Transferable Architectures for Scalable Image Recognition]
|
||||
(https://arxiv.org/abs/1707.07012)
|
||||
|
||||
The reference implementation:
|
||||
1. TF Slim
|
||||
- https://github.com/tensorflow/models/blob/master/research/slim/nets/
|
||||
nasnet/nasnet.py
|
||||
2. TensorNets
|
||||
- https://github.com/taehoonlee/tensornets/blob/master/tensornets/nasnets.py
|
||||
3. Weights
|
||||
- https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet
|
||||
"""
|
||||
from __future__ import print_function
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
|
||||
import warnings
|
||||
|
||||
from keras.models import Model
|
||||
from keras.layers import Input
|
||||
from keras.layers import Activation
|
||||
from keras.layers import Dense
|
||||
from keras.layers import Dropout
|
||||
from keras.layers import BatchNormalization
|
||||
from keras.layers import MaxPooling2D
|
||||
from keras.layers import AveragePooling2D
|
||||
from keras.layers import GlobalAveragePooling2D
|
||||
from keras.layers import GlobalMaxPooling2D
|
||||
from keras.layers import Conv2D
|
||||
from keras.layers import SeparableConv2D
|
||||
from keras.layers import ZeroPadding2D
|
||||
from keras.layers import Cropping2D
|
||||
from keras.layers import concatenate
|
||||
from keras.layers import add
|
||||
from keras.regularizers import l2
|
||||
from keras.utils.data_utils import get_file
|
||||
from keras.engine.topology import get_source_inputs
|
||||
from keras.applications.imagenet_utils import _obtain_input_shape
|
||||
from keras.applications.inception_v3 import preprocess_input
|
||||
from keras.applications.imagenet_utils import decode_predictions
|
||||
from keras import backend as K
|
||||
|
||||
_BN_DECAY = 0.9997
|
||||
_BN_EPSILON = 1e-3
|
||||
|
||||
NASNET_MOBILE_WEIGHT_PATH = "https://github.com/titu1994/Keras-NASNet/releases/download/v1.0/NASNet-mobile.h5"
|
||||
NASNET_MOBILE_WEIGHT_PATH_NO_TOP = "https://github.com/titu1994/Keras-NASNet/releases/download/v1.0/NASNet-mobile-no-top.h5"
|
||||
NASNET_MOBILE_WEIGHT_PATH_WITH_AUXULARY = "https://github.com/titu1994/Keras-NASNet/releases/download/v1.0/NASNet-auxiliary-mobile.h5"
|
||||
NASNET_MOBILE_WEIGHT_PATH_WITH_AUXULARY_NO_TOP = "https://github.com/titu1994/Keras-NASNet/releases/download/v1.0/NASNet-auxiliary-mobile-no-top.h5"
|
||||
NASNET_LARGE_WEIGHT_PATH = "https://github.com/titu1994/Keras-NASNet/releases/download/v1.1/NASNet-large.h5"
|
||||
NASNET_LARGE_WEIGHT_PATH_NO_TOP = "https://github.com/titu1994/Keras-NASNet/releases/download/v1.1/NASNet-large-no-top.h5"
|
||||
NASNET_LARGE_WEIGHT_PATH_WITH_auxiliary = "https://github.com/titu1994/Keras-NASNet/releases/download/v1.1/NASNet-auxiliary-large.h5"
|
||||
NASNET_LARGE_WEIGHT_PATH_WITH_auxiliary_NO_TOP = "https://github.com/titu1994/Keras-NASNet/releases/download/v1.1/NASNet-auxiliary-large-no-top.h5"
|
||||
|
||||
|
||||
def NASNet(input_shape=None,
|
||||
penultimate_filters=4032,
|
||||
nb_blocks=6,
|
||||
stem_filters=96,
|
||||
skip_reduction=True,
|
||||
use_auxiliary_branch=False,
|
||||
filters_multiplier=2,
|
||||
dropout=0.5,
|
||||
weight_decay=5e-5,
|
||||
include_top=True,
|
||||
weights=None,
|
||||
input_tensor=None,
|
||||
pooling=None,
|
||||
classes=1000,
|
||||
default_size=None):
|
||||
"""Instantiates a NASNet architecture.
|
||||
Note that only TensorFlow is supported for now,
|
||||
therefore it only works with the data format
|
||||
`image_data_format='channels_last'` in your Keras config
|
||||
at `~/.keras/keras.json`.
|
||||
|
||||
# Arguments
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(331, 331, 3)` for NASNetLarge or
|
||||
`(224, 224, 3)` for NASNetMobile
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 32.
|
||||
E.g. `(224, 224, 3)` would be one valid value.
|
||||
penultimate_filters: number of filters in the penultimate layer.
|
||||
NASNet models use the notation `NASNet (N @ P)`, where:
|
||||
- N is the number of blocks
|
||||
- P is the number of penultimate filters
|
||||
nb_blocks: number of repeated blocks of the NASNet model.
|
||||
NASNet models use the notation `NASNet (N @ P)`, where:
|
||||
- N is the number of blocks
|
||||
- P is the number of penultimate filters
|
||||
stem_filters: number of filters in the initial stem block
|
||||
skip_reduction: Whether to skip the reduction step at the tail
|
||||
end of the network. Set to `False` for CIFAR models.
|
||||
use_auxiliary_branch: Whether to use the auxiliary branch during
|
||||
training or evaluation.
|
||||
filters_multiplier: controls the width of the network.
|
||||
- If `filters_multiplier` < 1.0, proportionally decreases the number
|
||||
of filters in each layer.
|
||||
- If `filters_multiplier` > 1.0, proportionally increases the number
|
||||
of filters in each layer.
|
||||
- If `filters_multiplier` = 1, default number of filters from the paper
|
||||
are used at each layer.
|
||||
dropout: dropout rate
|
||||
weight_decay: l2 regularization weight
|
||||
include_top: whether to include the fully-connected
|
||||
layer at the top of the network.
|
||||
weights: `None` (random initialization) or
|
||||
`imagenet` (ImageNet weights)
|
||||
input_tensor: optional Keras tensor (i.e. output of
|
||||
`layers.Input()`)
|
||||
to use as image input for the model.
|
||||
pooling: Optional pooling mode for feature extraction
|
||||
when `include_top` is `False`.
|
||||
- `None` means that the output of the model
|
||||
will be the 4D tensor output of the
|
||||
last convolutional layer.
|
||||
- `avg` means that global average pooling
|
||||
will be applied to the output of the
|
||||
last convolutional layer, and thus
|
||||
the output of the model will be a
|
||||
2D tensor.
|
||||
- `max` means that global max pooling will
|
||||
be applied.
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
default_size: specifies the default image size of the model
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
# Raises
|
||||
ValueError: in case of invalid argument for `weights`,
|
||||
or invalid input shape.
|
||||
RuntimeError: If attempting to run this model with a
|
||||
backend that does not support separable convolutions.
|
||||
"""
|
||||
if K.backend() != 'tensorflow':
|
||||
raise RuntimeError('Only Tensorflow backend is currently supported, '
|
||||
'as other backends do not support '
|
||||
'separable convolution.')
|
||||
|
||||
if weights not in {'imagenet', None}:
|
||||
raise ValueError('The `weights` argument should be either '
|
||||
'`None` (random initialization) or `imagenet` '
|
||||
'(pre-training on ImageNet).')
|
||||
|
||||
if weights == 'imagenet' and include_top and classes != 1000:
|
||||
raise ValueError('If using `weights` as ImageNet with `include_top` '
|
||||
'as true, `classes` should be 1000')
|
||||
|
||||
if default_size is None:
|
||||
default_size = 331
|
||||
|
||||
# Determine proper input shape and default size.
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=default_size,
|
||||
min_size=32,
|
||||
data_format=K.image_data_format(),
|
||||
require_flatten=include_top or weights)
|
||||
|
||||
if K.image_data_format() != 'channels_last':
|
||||
warnings.warn('The NASNet family of models is only available '
|
||||
'for the input data format "channels_last" '
|
||||
'(width, height, channels). '
|
||||
'However your settings specify the default '
|
||||
'data format "channels_first" (channels, width, height).'
|
||||
' You should set `image_data_format="channels_last"` '
|
||||
'in your Keras config located at ~/.keras/keras.json. '
|
||||
'The model being returned right now will expect inputs '
|
||||
'to follow the "channels_last" data format.')
|
||||
K.set_image_data_format('channels_last')
|
||||
old_data_format = 'channels_first'
|
||||
else:
|
||||
old_data_format = None
|
||||
|
||||
if input_tensor is None:
|
||||
img_input = Input(shape=input_shape)
|
||||
else:
|
||||
if not K.is_keras_tensor(input_tensor):
|
||||
img_input = Input(tensor=input_tensor, shape=input_shape)
|
||||
else:
|
||||
img_input = input_tensor
|
||||
|
||||
assert penultimate_filters % 24 == 0, "`penultimate_filters` needs to be divisible " \
|
||||
"by 24."
|
||||
|
||||
channel_dim = 1 if K.image_data_format() == 'channels_first' else -1
|
||||
filters = penultimate_filters // 24
|
||||
|
||||
if not skip_reduction:
|
||||
x = Conv2D(stem_filters, (3, 3), strides=(2, 2), padding='valid', use_bias=False, name='stem_conv1',
|
||||
kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(img_input)
|
||||
else:
|
||||
x = Conv2D(stem_filters, (3, 3), strides=(1, 1), padding='same', use_bias=False, name='stem_conv1',
|
||||
kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(img_input)
|
||||
|
||||
x = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON,
|
||||
name='stem_bn1')(x)
|
||||
|
||||
p = None
|
||||
if not skip_reduction: # imagenet / mobile mode
|
||||
x, p = _reduction_A(x, p, filters // (filters_multiplier ** 2), weight_decay, id='stem_1')
|
||||
x, p = _reduction_A(x, p, filters // filters_multiplier, weight_decay, id='stem_2')
|
||||
|
||||
for i in range(nb_blocks):
|
||||
x, p = _normal_A(x, p, filters, weight_decay, id='%d' % (i))
|
||||
|
||||
x, p0 = _reduction_A(x, p, filters * filters_multiplier, weight_decay, id='reduce_%d' % (nb_blocks))
|
||||
|
||||
p = p0 if not skip_reduction else p
|
||||
|
||||
for i in range(nb_blocks):
|
||||
x, p = _normal_A(x, p, filters * filters_multiplier, weight_decay, id='%d' % (nb_blocks + i + 1))
|
||||
|
||||
auxiliary_x = None
|
||||
if not skip_reduction: # imagenet / mobile mode
|
||||
if use_auxiliary_branch:
|
||||
auxiliary_x = _add_auxiliary_head(x, classes, weight_decay)
|
||||
|
||||
x, p0 = _reduction_A(x, p, filters * filters_multiplier ** 2, weight_decay, id='reduce_%d' % (2 * nb_blocks))
|
||||
|
||||
if skip_reduction: # CIFAR mode
|
||||
if use_auxiliary_branch:
|
||||
auxiliary_x = _add_auxiliary_head(x, classes, weight_decay)
|
||||
|
||||
p = p0 if not skip_reduction else p
|
||||
|
||||
for i in range(nb_blocks):
|
||||
x, p = _normal_A(x, p, filters * filters_multiplier ** 2, weight_decay, id='%d' % (2 * nb_blocks + i + 1))
|
||||
|
||||
x = Activation('relu')(x)
|
||||
|
||||
if include_top:
|
||||
x = GlobalAveragePooling2D()(x)
|
||||
x = Dropout(dropout)(x)
|
||||
x = Dense(classes, activation='softmax', kernel_regularizer=l2(weight_decay), name='predictions')(x)
|
||||
else:
|
||||
if pooling == 'avg':
|
||||
x = GlobalAveragePooling2D()(x)
|
||||
elif pooling == 'max':
|
||||
x = GlobalMaxPooling2D()(x)
|
||||
|
||||
# Ensure that the model takes into account
|
||||
# any potential predecessors of `input_tensor`.
|
||||
if input_tensor is not None:
|
||||
inputs = get_source_inputs(input_tensor)
|
||||
else:
|
||||
inputs = img_input
|
||||
|
||||
# Create model.
|
||||
if use_auxiliary_branch:
|
||||
model = Model(inputs, [x, auxiliary_x], name='NASNet_with_auxiliary')
|
||||
else:
|
||||
model = Model(inputs, x, name='NASNet')
|
||||
|
||||
# load weights
|
||||
if weights == 'imagenet':
|
||||
if default_size == 224: # mobile version
|
||||
if include_top:
|
||||
if use_auxiliary_branch:
|
||||
weight_path = NASNET_MOBILE_WEIGHT_PATH_WITH_AUXULARY
|
||||
model_name = 'nasnet_mobile_with_aux.h5'
|
||||
else:
|
||||
weight_path = NASNET_MOBILE_WEIGHT_PATH
|
||||
model_name = 'nasnet_mobile.h5'
|
||||
else:
|
||||
if use_auxiliary_branch:
|
||||
weight_path = NASNET_MOBILE_WEIGHT_PATH_WITH_AUXULARY_NO_TOP
|
||||
model_name = 'nasnet_mobile_with_aux_no_top.h5'
|
||||
else:
|
||||
weight_path = NASNET_MOBILE_WEIGHT_PATH_NO_TOP
|
||||
model_name = 'nasnet_mobile_no_top.h5'
|
||||
|
||||
weights_file = get_file(model_name, weight_path, cache_subdir='models')
|
||||
model.load_weights(weights_file, by_name=True)
|
||||
|
||||
elif default_size == 331: # large version
|
||||
if include_top:
|
||||
if use_auxiliary_branch:
|
||||
weight_path = NASNET_LARGE_WEIGHT_PATH_WITH_auxiliary
|
||||
model_name = 'nasnet_large_with_aux.h5'
|
||||
else:
|
||||
weight_path = NASNET_LARGE_WEIGHT_PATH
|
||||
model_name = 'nasnet_large.h5'
|
||||
else:
|
||||
if use_auxiliary_branch:
|
||||
weight_path = NASNET_LARGE_WEIGHT_PATH_WITH_auxiliary_NO_TOP
|
||||
model_name = 'nasnet_large_with_aux_no_top.h5'
|
||||
else:
|
||||
weight_path = NASNET_LARGE_WEIGHT_PATH_NO_TOP
|
||||
model_name = 'nasnet_large_no_top.h5'
|
||||
|
||||
weights_file = get_file(model_name, weight_path, cache_subdir='models')
|
||||
model.load_weights(weights_file, by_name=True)
|
||||
|
||||
else:
|
||||
raise ValueError('ImageNet weights can only be loaded on NASNetLarge or NASNetMobile')
|
||||
|
||||
if old_data_format:
|
||||
K.set_image_data_format(old_data_format)
|
||||
|
||||
return model
|
||||
|
||||
|
||||
def NASNetLarge(input_shape=(331, 331, 3),
|
||||
dropout=0.5,
|
||||
weight_decay=5e-5,
|
||||
use_auxiliary_branch=False,
|
||||
include_top=True,
|
||||
weights='imagenet',
|
||||
input_tensor=None,
|
||||
pooling=None,
|
||||
classes=1000):
|
||||
"""Instantiates a NASNet architecture in ImageNet mode.
|
||||
Note that only TensorFlow is supported for now,
|
||||
therefore it only works with the data format
|
||||
`image_data_format='channels_last'` in your Keras config
|
||||
at `~/.keras/keras.json`.
|
||||
|
||||
# Arguments
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(331, 331, 3)` for NASNetLarge.
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 32.
|
||||
E.g. `(224, 224, 3)` would be one valid value.
|
||||
use_auxiliary_branch: Whether to use the auxiliary branch during
|
||||
training or evaluation.
|
||||
dropout: dropout rate
|
||||
weight_decay: l2 regularization weight
|
||||
include_top: whether to include the fully-connected
|
||||
layer at the top of the network.
|
||||
weights: `None` (random initialization) or
|
||||
`imagenet` (ImageNet weights)
|
||||
input_tensor: optional Keras tensor (i.e. output of
|
||||
`layers.Input()`)
|
||||
to use as image input for the model.
|
||||
pooling: Optional pooling mode for feature extraction
|
||||
when `include_top` is `False`.
|
||||
- `None` means that the output of the model
|
||||
will be the 4D tensor output of the
|
||||
last convolutional layer.
|
||||
- `avg` means that global average pooling
|
||||
will be applied to the output of the
|
||||
last convolutional layer, and thus
|
||||
the output of the model will be a
|
||||
2D tensor.
|
||||
- `max` means that global max pooling will
|
||||
be applied.
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
default_size: specifies the default image size of the model
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
# Raises
|
||||
ValueError: in case of invalid argument for `weights`,
|
||||
or invalid input shape.
|
||||
RuntimeError: If attempting to run this model with a
|
||||
backend that does not support separable convolutions.
|
||||
"""
|
||||
global _BN_DECAY, _BN_EPSILON
|
||||
_BN_DECAY = 0.9997
|
||||
_BN_EPSILON = 1e-3
|
||||
|
||||
return NASNet(input_shape,
|
||||
penultimate_filters=4032,
|
||||
nb_blocks=6,
|
||||
stem_filters=96,
|
||||
skip_reduction=False,
|
||||
use_auxiliary_branch=use_auxiliary_branch,
|
||||
filters_multiplier=2,
|
||||
dropout=dropout,
|
||||
weight_decay=weight_decay,
|
||||
include_top=include_top,
|
||||
weights=weights,
|
||||
input_tensor=input_tensor,
|
||||
pooling=pooling,
|
||||
classes=classes,
|
||||
default_size=331)
|
||||
|
||||
|
||||
def NASNetMobile(input_shape=(224, 224, 3),
|
||||
dropout=0.5,
|
||||
weight_decay=4e-5,
|
||||
use_auxiliary_branch=False,
|
||||
include_top=True,
|
||||
weights='imagenet',
|
||||
input_tensor=None,
|
||||
pooling=None,
|
||||
classes=1000):
|
||||
"""Instantiates a NASNet architecture in Mobile ImageNet mode.
|
||||
Note that only TensorFlow is supported for now,
|
||||
therefore it only works with the data format
|
||||
`image_data_format='channels_last'` in your Keras config
|
||||
at `~/.keras/keras.json`.
|
||||
|
||||
# Arguments
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` for NASNetMobile
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 32.
|
||||
E.g. `(224, 224, 3)` would be one valid value.
|
||||
use_auxiliary_branch: Whether to use the auxiliary branch during
|
||||
training or evaluation.
|
||||
dropout: dropout rate
|
||||
weight_decay: l2 regularization weight
|
||||
include_top: whether to include the fully-connected
|
||||
layer at the top of the network.
|
||||
weights: `None` (random initialization) or
|
||||
`imagenet` (ImageNet weights)
|
||||
input_tensor: optional Keras tensor (i.e. output of
|
||||
`layers.Input()`)
|
||||
to use as image input for the model.
|
||||
pooling: Optional pooling mode for feature extraction
|
||||
when `include_top` is `False`.
|
||||
- `None` means that the output of the model
|
||||
will be the 4D tensor output of the
|
||||
last convolutional layer.
|
||||
- `avg` means that global average pooling
|
||||
will be applied to the output of the
|
||||
last convolutional layer, and thus
|
||||
the output of the model will be a
|
||||
2D tensor.
|
||||
- `max` means that global max pooling will
|
||||
be applied.
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
default_size: specifies the default image size of the model
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
# Raises
|
||||
ValueError: in case of invalid argument for `weights`,
|
||||
or invalid input shape.
|
||||
RuntimeError: If attempting to run this model with a
|
||||
backend that does not support separable convolutions.
|
||||
"""
|
||||
global _BN_DECAY, _BN_EPSILON
|
||||
_BN_DECAY = 0.9997
|
||||
_BN_EPSILON = 1e-3
|
||||
|
||||
return NASNet(input_shape,
|
||||
penultimate_filters=1056,
|
||||
nb_blocks=4,
|
||||
stem_filters=32,
|
||||
skip_reduction=False,
|
||||
use_auxiliary_branch=use_auxiliary_branch,
|
||||
filters_multiplier=2,
|
||||
dropout=dropout,
|
||||
weight_decay=weight_decay,
|
||||
include_top=include_top,
|
||||
weights=weights,
|
||||
input_tensor=input_tensor,
|
||||
pooling=pooling,
|
||||
classes=classes,
|
||||
default_size=224)
|
||||
|
||||
|
||||
def NASNetCIFAR(input_shape=(32, 32, 3),
|
||||
dropout=0.0,
|
||||
weight_decay=5e-4,
|
||||
use_auxiliary_branch=False,
|
||||
include_top=True,
|
||||
weights=None,
|
||||
input_tensor=None,
|
||||
pooling=None,
|
||||
classes=10):
|
||||
"""Instantiates a NASNet architecture in CIFAR mode.
|
||||
Note that only TensorFlow is supported for now,
|
||||
therefore it only works with the data format
|
||||
`image_data_format='channels_last'` in your Keras config
|
||||
at `~/.keras/keras.json`.
|
||||
|
||||
# Arguments
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(32, 32, 3)` for NASNetMobile
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 32.
|
||||
E.g. `(32, 32, 3)` would be one valid value.
|
||||
use_auxiliary_branch: Whether to use the auxiliary branch during
|
||||
training or evaluation.
|
||||
dropout: dropout rate
|
||||
weight_decay: l2 regularization weight
|
||||
include_top: whether to include the fully-connected
|
||||
layer at the top of the network.
|
||||
weights: `None` (random initialization) or
|
||||
`imagenet` (ImageNet weights)
|
||||
input_tensor: optional Keras tensor (i.e. output of
|
||||
`layers.Input()`)
|
||||
to use as image input for the model.
|
||||
pooling: Optional pooling mode for feature extraction
|
||||
when `include_top` is `False`.
|
||||
- `None` means that the output of the model
|
||||
will be the 4D tensor output of the
|
||||
last convolutional layer.
|
||||
- `avg` means that global average pooling
|
||||
will be applied to the output of the
|
||||
last convolutional layer, and thus
|
||||
the output of the model will be a
|
||||
2D tensor.
|
||||
- `max` means that global max pooling will
|
||||
be applied.
|
||||
classes: optional number of classes to classify images
|
||||
into, only to be specified if `include_top` is True, and
|
||||
if no `weights` argument is specified.
|
||||
default_size: specifies the default image size of the model
|
||||
# Returns
|
||||
A Keras model instance.
|
||||
# Raises
|
||||
ValueError: in case of invalid argument for `weights`,
|
||||
or invalid input shape.
|
||||
RuntimeError: If attempting to run this model with a
|
||||
backend that does not support separable convolutions.
|
||||
"""
|
||||
global _BN_DECAY, _BN_EPSILON
|
||||
_BN_DECAY = 0.9
|
||||
_BN_EPSILON = 1e-5
|
||||
|
||||
return NASNet(input_shape,
|
||||
penultimate_filters=768,
|
||||
nb_blocks=6,
|
||||
stem_filters=32,
|
||||
skip_reduction=True,
|
||||
use_auxiliary_branch=use_auxiliary_branch,
|
||||
filters_multiplier=2,
|
||||
dropout=dropout,
|
||||
weight_decay=weight_decay,
|
||||
include_top=include_top,
|
||||
weights=weights,
|
||||
input_tensor=input_tensor,
|
||||
pooling=pooling,
|
||||
classes=classes,
|
||||
default_size=224)
|
||||
|
||||
|
||||
def _separable_conv_block(ip, filters, kernel_size=(3, 3), strides=(1, 1), weight_decay=5e-5, id=None):
|
||||
'''Adds 2 blocks of [relu-separable conv-batchnorm]
|
||||
|
||||
# Arguments:
|
||||
ip: input tensor
|
||||
filters: number of output filters per layer
|
||||
kernel_size: kernel size of separable convolutions
|
||||
strides: strided convolution for downsampling
|
||||
weight_decay: l2 regularization weight
|
||||
id: string id
|
||||
|
||||
# Returns:
|
||||
a Keras tensor
|
||||
'''
|
||||
channel_dim = 1 if K.image_data_format() == 'channels_first' else -1
|
||||
|
||||
with K.name_scope('separable_conv_block_%s' % id):
|
||||
x = Activation('relu')(ip)
|
||||
x = SeparableConv2D(filters, kernel_size, strides=strides, name='separable_conv_1_%s' % id,
|
||||
padding='same', use_bias=False, kernel_initializer='he_normal',
|
||||
kernel_regularizer=l2(weight_decay))(x)
|
||||
x = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON,
|
||||
name="separable_conv_1_bn_%s" % (id))(x)
|
||||
x = Activation('relu')(x)
|
||||
x = SeparableConv2D(filters, kernel_size, name='separable_conv_2_%s' % id,
|
||||
padding='same', use_bias=False, kernel_initializer='he_normal',
|
||||
kernel_regularizer=l2(weight_decay))(x)
|
||||
x = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON,
|
||||
name="separable_conv_2_bn_%s" % (id))(x)
|
||||
return x
|
||||
|
||||
|
||||
def _adjust_block(p, ip, filters, weight_decay=5e-5, id=None):
|
||||
'''
|
||||
Adjusts the input `p` to match the shape of the `input`
|
||||
or situations where the output number of filters needs to
|
||||
be changed
|
||||
|
||||
# Arguments:
|
||||
p: input tensor which needs to be modified
|
||||
ip: input tensor whose shape needs to be matched
|
||||
filters: number of output filters to be matched
|
||||
weight_decay: l2 regularization weight
|
||||
id: string id
|
||||
|
||||
# Returns:
|
||||
an adjusted Keras tensor
|
||||
'''
|
||||
channel_dim = 1 if K.image_data_format() == 'channels_first' else -1
|
||||
img_dim = 2 if K.image_data_format() == 'channels_first' else -2
|
||||
|
||||
with K.name_scope('adjust_block'):
|
||||
if p is None:
|
||||
p = ip
|
||||
|
||||
elif p._keras_shape[img_dim] != ip._keras_shape[img_dim]:
|
||||
with K.name_scope('adjust_reduction_block_%s' % id):
|
||||
p = Activation('relu', name='adjust_relu_1_%s' % id)(p)
|
||||
|
||||
p1 = AveragePooling2D((1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_1_%s' % id)(p)
|
||||
p1 = Conv2D(filters // 2, (1, 1), padding='same', use_bias=False, kernel_regularizer=l2(weight_decay),
|
||||
name='adjust_conv_1_%s' % id, kernel_initializer='he_normal')(p1)
|
||||
|
||||
p2 = ZeroPadding2D(padding=((0, 1), (0, 1)))(p)
|
||||
p2 = Cropping2D(cropping=((1, 0), (1, 0)))(p2)
|
||||
p2 = AveragePooling2D((1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_2_%s' % id)(p2)
|
||||
p2 = Conv2D(filters // 2, (1, 1), padding='same', use_bias=False, kernel_regularizer=l2(weight_decay),
|
||||
name='adjust_conv_2_%s' % id, kernel_initializer='he_normal')(p2)
|
||||
|
||||
p = concatenate([p1, p2], axis=channel_dim)
|
||||
p = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON,
|
||||
name='adjust_bn_%s' % id)(p)
|
||||
|
||||
elif p._keras_shape[channel_dim] != filters:
|
||||
with K.name_scope('adjust_projection_block_%s' % id):
|
||||
p = Activation('relu')(p)
|
||||
p = Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='adjust_conv_projection_%s' % id,
|
||||
use_bias=False, kernel_regularizer=l2(weight_decay), kernel_initializer='he_normal')(p)
|
||||
p = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON,
|
||||
name='adjust_bn_%s' % id)(p)
|
||||
return p
|
||||
|
||||
|
||||
def _normal_A(ip, p, filters, weight_decay=5e-5, id=None):
|
||||
'''Adds a Normal cell for NASNet-A (Fig. 4 in the paper)
|
||||
|
||||
# Arguments:
|
||||
ip: input tensor `x`
|
||||
p: input tensor `p`
|
||||
filters: number of output filters
|
||||
weight_decay: l2 regularization weight
|
||||
id: string id
|
||||
|
||||
# Returns:
|
||||
a Keras tensor
|
||||
'''
|
||||
channel_dim = 1 if K.image_data_format() == 'channels_first' else -1
|
||||
|
||||
with K.name_scope('normal_A_block_%s' % id):
|
||||
p = _adjust_block(p, ip, filters, weight_decay, id)
|
||||
|
||||
h = Activation('relu')(ip)
|
||||
h = Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='normal_conv_1_%s' % id,
|
||||
use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(h)
|
||||
h = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON,
|
||||
name='normal_bn_1_%s' % id)(h)
|
||||
|
||||
with K.name_scope('block_1'):
|
||||
x1_1 = _separable_conv_block(h, filters, kernel_size=(5, 5), weight_decay=weight_decay,
|
||||
id='normal_left1_%s' % id)
|
||||
x1_2 = _separable_conv_block(p, filters, weight_decay=weight_decay, id='normal_right1_%s' % id)
|
||||
x1 = add([x1_1, x1_2], name='normal_add_1_%s' % id)
|
||||
|
||||
with K.name_scope('block_2'):
|
||||
x2_1 = _separable_conv_block(p, filters, (5, 5), weight_decay=weight_decay, id='normal_left2_%s' % id)
|
||||
x2_2 = _separable_conv_block(p, filters, (3, 3), weight_decay=weight_decay, id='normal_right2_%s' % id)
|
||||
x2 = add([x2_1, x2_2], name='normal_add_2_%s' % id)
|
||||
|
||||
with K.name_scope('block_3'):
|
||||
x3 = AveragePooling2D((3, 3), strides=(1, 1), padding='same', name='normal_left3_%s' % (id))(h)
|
||||
x3 = add([x3, p], name='normal_add_3_%s' % id)
|
||||
|
||||
with K.name_scope('block_4'):
|
||||
x4_1 = AveragePooling2D((3, 3), strides=(1, 1), padding='same', name='normal_left4_%s' % (id))(p)
|
||||
x4_2 = AveragePooling2D((3, 3), strides=(1, 1), padding='same', name='normal_right4_%s' % (id))(p)
|
||||
x4 = add([x4_1, x4_2], name='normal_add_4_%s' % id)
|
||||
|
||||
with K.name_scope('block_5'):
|
||||
x5 = _separable_conv_block(h, filters, weight_decay=weight_decay, id='normal_left5_%s' % id)
|
||||
x5 = add([x5, h], name='normal_add_5_%s' % id)
|
||||
|
||||
x = concatenate([p, x1, x2, x3, x4, x5], axis=channel_dim, name='normal_concat_%s' % id)
|
||||
return x, ip
|
||||
|
||||
|
||||
def _reduction_A(ip, p, filters, weight_decay=5e-5, id=None):
|
||||
'''Adds a Reduction cell for NASNet-A (Fig. 4 in the paper)
|
||||
|
||||
# Arguments:
|
||||
ip: input tensor `x`
|
||||
p: input tensor `p`
|
||||
filters: number of output filters
|
||||
weight_decay: l2 regularization weight
|
||||
id: string id
|
||||
|
||||
# Returns:
|
||||
a Keras tensor
|
||||
'''
|
||||
""""""
|
||||
channel_dim = 1 if K.image_data_format() == 'channels_first' else -1
|
||||
|
||||
with K.name_scope('reduction_A_block_%s' % id):
|
||||
p = _adjust_block(p, ip, filters, weight_decay, id)
|
||||
|
||||
h = Activation('relu')(ip)
|
||||
h = Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='reduction_conv_1_%s' % id,
|
||||
use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(h)
|
||||
h = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON,
|
||||
name='reduction_bn_1_%s' % id)(h)
|
||||
|
||||
with K.name_scope('block_1'):
|
||||
x1_1 = _separable_conv_block(h, filters, (5, 5), strides=(2, 2), weight_decay=weight_decay,
|
||||
id='reduction_left1_%s' % id)
|
||||
x1_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), weight_decay=weight_decay,
|
||||
id='reduction_1_%s' % id)
|
||||
x1 = add([x1_1, x1_2], name='reduction_add_1_%s' % id)
|
||||
|
||||
with K.name_scope('block_2'):
|
||||
x2_1 = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='reduction_left2_%s' % id)(h)
|
||||
x2_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), weight_decay=weight_decay,
|
||||
id='reduction_right2_%s' % id)
|
||||
x2 = add([x2_1, x2_2], name='reduction_add_2_%s' % id)
|
||||
|
||||
with K.name_scope('block_3'):
|
||||
x3_1 = AveragePooling2D((3, 3), strides=(2, 2), padding='same', name='reduction_left3_%s' % id)(h)
|
||||
x3_2 = _separable_conv_block(p, filters, (5, 5), strides=(2, 2), weight_decay=weight_decay,
|
||||
id='reduction_right3_%s' % id)
|
||||
x3 = add([x3_1, x3_2], name='reduction_add3_%s' % id)
|
||||
|
||||
with K.name_scope('block_4'):
|
||||
x4 = AveragePooling2D((3, 3), strides=(1, 1), padding='same', name='reduction_left4_%s' % id)(x1)
|
||||
x4 = add([x2, x4])
|
||||
|
||||
with K.name_scope('block_5'):
|
||||
x5_1 = _separable_conv_block(x1, filters, (3, 3), weight_decay=weight_decay, id='reduction_left4_%s' % id)
|
||||
x5_2 = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='reduction_right5_%s' % id)(h)
|
||||
x5 = add([x5_1, x5_2], name='reduction_add4_%s' % id)
|
||||
|
||||
x = concatenate([x2, x3, x4, x5], axis=channel_dim, name='reduction_concat_%s' % id)
|
||||
return x, ip
|
||||
|
||||
|
||||
def _add_auxiliary_head(x, classes, weight_decay):
|
||||
'''Adds an auxiliary head for training the model
|
||||
|
||||
From section A.7 "Training of ImageNet models" of the paper, all NASNet models are
|
||||
trained using an auxiliary classifier around 2/3 of the depth of the network, with
|
||||
a loss weight of 0.4
|
||||
|
||||
# Arguments
|
||||
x: input tensor
|
||||
classes: number of output classes
|
||||
weight_decay: l2 regularization weight
|
||||
|
||||
# Returns
|
||||
a keras Tensor
|
||||
'''
|
||||
img_height = 1 if K.image_data_format() == 'channels_last' else 2
|
||||
img_width = 2 if K.image_data_format() == 'channels_last' else 3
|
||||
channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
|
||||
|
||||
with K.name_scope('auxiliary_branch'):
|
||||
auxiliary_x = Activation('relu')(x)
|
||||
auxiliary_x = AveragePooling2D((5, 5), strides=(3, 3), padding='valid', name='aux_pool')(auxiliary_x)
|
||||
auxiliary_x = Conv2D(128, (1, 1), padding='same', use_bias=False, name='aux_conv_projection',
|
||||
kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(auxiliary_x)
|
||||
auxiliary_x = BatchNormalization(axis=channel_axis, momentum=_BN_DECAY, epsilon=_BN_EPSILON,
|
||||
name='aux_bn_projection')(auxiliary_x)
|
||||
auxiliary_x = Activation('relu')(auxiliary_x)
|
||||
|
||||
auxiliary_x = Conv2D(768, (auxiliary_x._keras_shape[img_height], auxiliary_x._keras_shape[img_width]),
|
||||
padding='valid', use_bias=False, kernel_initializer='he_normal',
|
||||
kernel_regularizer=l2(weight_decay), name='aux_conv_reduction')(auxiliary_x)
|
||||
auxiliary_x = BatchNormalization(axis=channel_axis, momentum=_BN_DECAY, epsilon=_BN_EPSILON,
|
||||
name='aux_bn_reduction')(auxiliary_x)
|
||||
auxiliary_x = Activation('relu')(auxiliary_x)
|
||||
|
||||
auxiliary_x = GlobalAveragePooling2D()(auxiliary_x)
|
||||
auxiliary_x = Dense(classes, activation='softmax', kernel_regularizer=l2(weight_decay),
|
||||
name='aux_predictions')(auxiliary_x)
|
||||
return auxiliary_x
|
||||
@@ -0,0 +1,454 @@
|
||||
"""ResNet v1, v2, and segmentation models for Keras.
|
||||
|
||||
# Reference
|
||||
|
||||
- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
|
||||
- [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027)
|
||||
|
||||
Reference material for extended functionality:
|
||||
|
||||
- [ResNeXt](https://arxiv.org/abs/1611.05431) for Tiny ImageNet support.
|
||||
- [Dilated Residual Networks](https://arxiv.org/pdf/1705.09914) for segmentation support.
|
||||
- [Deep Residual Learning for Instrument Segmentation in Robotic Surgery](https://arxiv.org/abs/1703.08580)
|
||||
for segmentation support.
|
||||
|
||||
Implementation Adapted from: github.com/raghakot/keras-resnet
|
||||
"""
|
||||
from __future__ import division
|
||||
|
||||
import six
|
||||
from keras.models import Model
|
||||
from keras.layers import Input
|
||||
from keras.layers import Activation
|
||||
from keras.layers import Reshape
|
||||
from keras.layers import Dense
|
||||
from keras.layers import Flatten
|
||||
from keras.layers import Conv2D
|
||||
from keras.layers import MaxPooling2D
|
||||
from keras.layers import AveragePooling2D
|
||||
from keras.layers.pooling import GlobalAveragePooling2D
|
||||
from keras.layers import GlobalMaxPooling2D
|
||||
from keras.layers import GlobalAveragePooling2D
|
||||
from keras.layers import Dropout
|
||||
from keras.layers.merge import add
|
||||
from keras.layers.normalization import BatchNormalization
|
||||
from keras.regularizers import l2
|
||||
from keras import backend as K
|
||||
from keras.applications.imagenet_utils import _obtain_input_shape
|
||||
|
||||
|
||||
def _bn_relu(x, bn_name=None, relu_name=None):
|
||||
"""Helper to build a BN -> relu block
|
||||
"""
|
||||
norm = BatchNormalization(axis=CHANNEL_AXIS, name=bn_name)(x)
|
||||
return Activation("relu", name=relu_name)(norm)
|
||||
|
||||
|
||||
def _conv_bn_relu(**conv_params):
|
||||
"""Helper to build a conv -> BN -> relu residual unit activation function.
|
||||
This is the original ResNet v1 scheme in https://arxiv.org/abs/1512.03385
|
||||
"""
|
||||
filters = conv_params["filters"]
|
||||
kernel_size = conv_params["kernel_size"]
|
||||
strides = conv_params.setdefault("strides", (1, 1))
|
||||
dilation_rate = conv_params.setdefault("dilation_rate", (1, 1))
|
||||
conv_name = conv_params.setdefault("conv_name", None)
|
||||
bn_name = conv_params.setdefault("bn_name", None)
|
||||
relu_name = conv_params.setdefault("relu_name", None)
|
||||
kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
|
||||
padding = conv_params.setdefault("padding", "same")
|
||||
kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
|
||||
|
||||
def f(x):
|
||||
x = Conv2D(filters=filters, kernel_size=kernel_size,
|
||||
strides=strides, padding=padding,
|
||||
dilation_rate=dilation_rate,
|
||||
kernel_initializer=kernel_initializer,
|
||||
kernel_regularizer=kernel_regularizer,
|
||||
name=conv_name)(x)
|
||||
return _bn_relu(x, bn_name=bn_name, relu_name=relu_name)
|
||||
|
||||
return f
|
||||
|
||||
|
||||
def _bn_relu_conv(**conv_params):
|
||||
"""Helper to build a BN -> relu -> conv residual unit with full pre-activation function.
|
||||
This is the ResNet v2 scheme proposed in http://arxiv.org/pdf/1603.05027v2.pdf
|
||||
"""
|
||||
filters = conv_params["filters"]
|
||||
kernel_size = conv_params["kernel_size"]
|
||||
strides = conv_params.setdefault("strides", (1, 1))
|
||||
dilation_rate = conv_params.setdefault("dilation_rate", (1, 1))
|
||||
conv_name = conv_params.setdefault("conv_name", None)
|
||||
bn_name = conv_params.setdefault("bn_name", None)
|
||||
relu_name = conv_params.setdefault("relu_name", None)
|
||||
kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
|
||||
padding = conv_params.setdefault("padding", "same")
|
||||
kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))
|
||||
|
||||
def f(x):
|
||||
activation = _bn_relu(x, bn_name=bn_name, relu_name=relu_name)
|
||||
return Conv2D(filters=filters, kernel_size=kernel_size,
|
||||
strides=strides, padding=padding,
|
||||
dilation_rate=dilation_rate,
|
||||
kernel_initializer=kernel_initializer,
|
||||
kernel_regularizer=kernel_regularizer,
|
||||
name=conv_name)(activation)
|
||||
|
||||
return f
|
||||
|
||||
|
||||
def _shortcut(input_feature, residual, conv_name_base=None, bn_name_base=None):
|
||||
"""Adds a shortcut between input and residual block and merges them with "sum"
|
||||
"""
|
||||
# Expand channels of shortcut to match residual.
|
||||
# Stride appropriately to match residual (width, height)
|
||||
# Should be int if network architecture is correctly configured.
|
||||
input_shape = K.int_shape(input_feature)
|
||||
residual_shape = K.int_shape(residual)
|
||||
stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
|
||||
stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
|
||||
equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]
|
||||
|
||||
shortcut = input_feature
|
||||
# 1 X 1 conv if shape is different. Else identity.
|
||||
if stride_width > 1 or stride_height > 1 or not equal_channels:
|
||||
print('reshaping via a convolution...')
|
||||
if conv_name_base is not None:
|
||||
conv_name_base = conv_name_base + '1'
|
||||
shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
|
||||
kernel_size=(1, 1),
|
||||
strides=(stride_width, stride_height),
|
||||
padding="valid",
|
||||
kernel_initializer="he_normal",
|
||||
kernel_regularizer=l2(0.0001),
|
||||
name=conv_name_base)(input_feature)
|
||||
if bn_name_base is not None:
|
||||
bn_name_base = bn_name_base + '1'
|
||||
shortcut = BatchNormalization(axis=CHANNEL_AXIS, name=bn_name_base)(shortcut)
|
||||
|
||||
return add([shortcut, residual])
|
||||
|
||||
|
||||
def _residual_block(block_function, filters, blocks, stage,
|
||||
transition_strides=None, transition_dilation_rates=None,
|
||||
dilation_rates=(1, 1), is_first_layer=False, dropout=None,
|
||||
residual_unit=_bn_relu_conv):
|
||||
"""Builds a residual block with repeating bottleneck blocks.
|
||||
|
||||
stage: integer, current stage label, used for generating layer names
|
||||
blocks: number of blocks 'a','b'..., current block label, used for generating layer names
|
||||
transition_strides: a list of tuples for the strides of each transition
|
||||
transition_dilation_rates: a list of tuples for the dilation rate of each transition
|
||||
"""
|
||||
if transition_dilation_rates is None:
|
||||
transition_dilation_rates = [(1, 1)] * blocks
|
||||
if transition_strides is None:
|
||||
transition_strides = [(1, 1)] * blocks
|
||||
|
||||
def f(x):
|
||||
for i in range(blocks):
|
||||
x = block_function(filters=filters, stage=stage, block=i,
|
||||
transition_strides=transition_strides[i],
|
||||
dilation_rate=dilation_rates[i],
|
||||
is_first_block_of_first_layer=(is_first_layer and i == 0),
|
||||
dropout=dropout,
|
||||
residual_unit=residual_unit)(x)
|
||||
return x
|
||||
|
||||
return f
|
||||
|
||||
|
||||
def _block_name_base(stage, block):
|
||||
"""Get the convolution name base and batch normalization name base defined by stage and block.
|
||||
|
||||
If there are less than 26 blocks they will be labeled 'a', 'b', 'c' to match the paper and keras
|
||||
and beyond 26 blocks they will simply be numbered.
|
||||
"""
|
||||
if block < 27:
|
||||
block = '%c' % (block + 97) # 97 is the ascii number for lowercase 'a'
|
||||
conv_name_base = 'res' + str(stage) + block + '_branch'
|
||||
bn_name_base = 'bn' + str(stage) + block + '_branch'
|
||||
return conv_name_base, bn_name_base
|
||||
|
||||
|
||||
def basic_block(filters, stage, block, transition_strides=(1, 1),
|
||||
dilation_rate=(1, 1), is_first_block_of_first_layer=False, dropout=None,
|
||||
residual_unit=_bn_relu_conv):
|
||||
"""Basic 3 X 3 convolution blocks for use on resnets with layers <= 34.
|
||||
Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
|
||||
"""
|
||||
def f(input_features):
|
||||
conv_name_base, bn_name_base = _block_name_base(stage, block)
|
||||
if is_first_block_of_first_layer:
|
||||
# don't repeat bn->relu since we just did bn->relu->maxpool
|
||||
x = Conv2D(filters=filters, kernel_size=(3, 3),
|
||||
strides=transition_strides,
|
||||
dilation_rate=dilation_rate,
|
||||
padding="same",
|
||||
kernel_initializer="he_normal",
|
||||
kernel_regularizer=l2(1e-4),
|
||||
name=conv_name_base + '2a')(input_features)
|
||||
else:
|
||||
x = residual_unit(filters=filters, kernel_size=(3, 3),
|
||||
strides=transition_strides,
|
||||
dilation_rate=dilation_rate,
|
||||
conv_name_base=conv_name_base + '2a',
|
||||
bn_name_base=bn_name_base + '2a')(input_features)
|
||||
|
||||
if dropout is not None:
|
||||
x = Dropout(dropout)(x)
|
||||
|
||||
x = residual_unit(filters=filters, kernel_size=(3, 3),
|
||||
conv_name_base=conv_name_base + '2b',
|
||||
bn_name_base=bn_name_base + '2b')(x)
|
||||
|
||||
return _shortcut(input_features, x)
|
||||
|
||||
return f
|
||||
|
||||
|
||||
def bottleneck(filters, stage, block, transition_strides=(1, 1),
|
||||
dilation_rate=(1, 1), is_first_block_of_first_layer=False, dropout=None,
|
||||
residual_unit=_bn_relu_conv):
|
||||
"""Bottleneck architecture for > 34 layer resnet.
|
||||
Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
|
||||
|
||||
Returns:
|
||||
A final conv layer of filters * 4
|
||||
"""
|
||||
def f(input_feature):
|
||||
conv_name_base, bn_name_base = _block_name_base(stage, block)
|
||||
if is_first_block_of_first_layer:
|
||||
# don't repeat bn->relu since we just did bn->relu->maxpool
|
||||
x = Conv2D(filters=filters, kernel_size=(1, 1),
|
||||
strides=transition_strides,
|
||||
dilation_rate=dilation_rate,
|
||||
padding="same",
|
||||
kernel_initializer="he_normal",
|
||||
kernel_regularizer=l2(1e-4),
|
||||
name=conv_name_base + '2a')(input_feature)
|
||||
else:
|
||||
x = residual_unit(filters=filters, kernel_size=(1, 1),
|
||||
strides=transition_strides,
|
||||
dilation_rate=dilation_rate,
|
||||
conv_name_base=conv_name_base + '2a',
|
||||
bn_name_base=bn_name_base + '2a')(input_feature)
|
||||
|
||||
if dropout is not None:
|
||||
x = Dropout(dropout)(x)
|
||||
|
||||
x = residual_unit(filters=filters, kernel_size=(3, 3),
|
||||
conv_name_base=conv_name_base + '2b',
|
||||
bn_name_base=bn_name_base + '2b')(x)
|
||||
|
||||
if dropout is not None:
|
||||
x = Dropout(dropout)(x)
|
||||
|
||||
x = residual_unit(filters=filters * 4, kernel_size=(1, 1),
|
||||
conv_name_base=conv_name_base + '2c',
|
||||
bn_name_base=bn_name_base + '2c')(x)
|
||||
|
||||
return _shortcut(input_feature, x)
|
||||
|
||||
return f
|
||||
|
||||
|
||||
def _handle_dim_ordering():
|
||||
global ROW_AXIS
|
||||
global COL_AXIS
|
||||
global CHANNEL_AXIS
|
||||
if K.image_data_format() == 'channels_last':
|
||||
ROW_AXIS = 1
|
||||
COL_AXIS = 2
|
||||
CHANNEL_AXIS = 3
|
||||
else:
|
||||
CHANNEL_AXIS = 1
|
||||
ROW_AXIS = 2
|
||||
COL_AXIS = 3
|
||||
|
||||
|
||||
def _string_to_function(identifier):
|
||||
if isinstance(identifier, six.string_types):
|
||||
res = globals().get(identifier)
|
||||
if not res:
|
||||
raise ValueError('Invalid {}'.format(identifier))
|
||||
return res
|
||||
return identifier
|
||||
|
||||
|
||||
def ResNet(input_shape=None, classes=10, block='bottleneck', residual_unit='v2', repetitions=None,
|
||||
initial_filters=64, activation='softmax', include_top=True, input_tensor=None, dropout=None,
|
||||
transition_dilation_rate=(1, 1), initial_strides=(2, 2), initial_kernel_size=(7, 7),
|
||||
initial_pooling='max', final_pooling=None, top='classification'):
|
||||
"""Builds a custom ResNet like architecture. Defaults to ResNet50 v2.
|
||||
|
||||
Args:
|
||||
input_shape: optional shape tuple, only to be specified
|
||||
if `include_top` is False (otherwise the input shape
|
||||
has to be `(224, 224, 3)` (with `channels_last` dim ordering)
|
||||
or `(3, 224, 224)` (with `channels_first` dim ordering).
|
||||
It should have exactly 3 inputs channels,
|
||||
and width and height should be no smaller than 8.
|
||||
E.g. `(224, 224, 3)` would be one valid value.
|
||||
classes: The number of outputs at final softmax layer
|
||||
block: The block function to use. This is either `'basic'` or `'bottleneck'`.
|
||||
The original paper used `basic` for layers < 50.
|
||||
repetitions: Number of repetitions of various block units.
|
||||
At each block unit, the number of filters are doubled and the input size is halved.
|
||||
Default of None implies the ResNet50v2 values of [3, 4, 6, 3].
|
||||
transition_dilation_rate: Used for pixel-wise prediction tasks such as image segmentation.
|
||||
residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu conv.
|
||||
See [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027)
|
||||
for details.
|
||||
dropout: None for no dropout, otherwise rate of dropout from 0 to 1.
|
||||
Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper.
|
||||
transition_dilation_rate: Dilation rate for transition layers. For semantic
|
||||
segmentation of images use a dilation rate of (2, 2).
|
||||
initial_strides: Stride of the very first residual unit and MaxPooling2D call,
|
||||
with default (2, 2), set to (1, 1) for small images like cifar.
|
||||
initial_kernel_size: kernel size of the very first convolution, (7, 7) for imagenet
|
||||
and (3, 3) for small image datasets like tiny imagenet and cifar.
|
||||
See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
|
||||
initial_pooling: Determine if there will be an initial pooling layer,
|
||||
'max' for imagenet and None for small image datasets.
|
||||
See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details.
|
||||
final_pooling: Optional pooling mode for feature extraction at the final model layer
|
||||
when `include_top` is `False`.
|
||||
- `None` means that the output of the model
|
||||
will be the 4D tensor output of the
|
||||
last convolutional layer.
|
||||
- `avg` means that global average pooling
|
||||
will be applied to the output of the
|
||||
last convolutional layer, and thus
|
||||
the output of the model will be a
|
||||
2D tensor.
|
||||
- `max` means that global max pooling will
|
||||
be applied.
|
||||
top: Defines final layers to evaluate based on a specific problem type. Options are
|
||||
'classification' for ImageNet style problems, 'segmentation' for problems like
|
||||
the Pascal VOC dataset, and None to exclude these layers entirely.
|
||||
|
||||
Returns:
|
||||
The keras `Model`.
|
||||
"""
|
||||
if activation not in ['softmax', 'sigmoid', None]:
|
||||
raise ValueError('activation must be one of "softmax", "sigmoid", or None')
|
||||
if activation == 'sigmoid' and classes != 1:
|
||||
raise ValueError('sigmoid activation can only be used when classes = 1')
|
||||
if repetitions is None:
|
||||
repetitions = [3, 4, 6, 3]
|
||||
# Determine proper input shape
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=32,
|
||||
min_size=8,
|
||||
data_format=K.image_data_format(),
|
||||
require_flatten=include_top)
|
||||
_handle_dim_ordering()
|
||||
if len(input_shape) != 3:
|
||||
raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")
|
||||
|
||||
if block == 'basic':
|
||||
block_fn = basic_block
|
||||
elif block == 'bottleneck':
|
||||
block_fn = bottleneck
|
||||
elif isinstance(block, six.string_types):
|
||||
block_fn = _string_to_function(block)
|
||||
else:
|
||||
block_fn = block
|
||||
|
||||
if residual_unit == 'v2':
|
||||
residual_unit = _bn_relu_conv
|
||||
elif residual_unit == 'v1':
|
||||
residual_unit = _conv_bn_relu
|
||||
elif isinstance(residual_unit, six.string_types):
|
||||
residual_unit = _string_to_function(residual_unit)
|
||||
else:
|
||||
residual_unit = residual_unit
|
||||
|
||||
# Permute dimension order if necessary
|
||||
if K.image_data_format() == 'channels_first':
|
||||
input_shape = (input_shape[1], input_shape[2], input_shape[0])
|
||||
# Determine proper input shape
|
||||
input_shape = _obtain_input_shape(input_shape,
|
||||
default_size=32,
|
||||
min_size=8,
|
||||
data_format=K.image_data_format(),
|
||||
require_flatten=include_top)
|
||||
|
||||
img_input = Input(shape=input_shape, tensor=input_tensor)
|
||||
x = _conv_bn_relu(filters=initial_filters, kernel_size=initial_kernel_size, strides=initial_strides)(img_input)
|
||||
if initial_pooling == 'max':
|
||||
x = MaxPooling2D(pool_size=(3, 3), strides=initial_strides, padding="same")(x)
|
||||
|
||||
block = x
|
||||
filters = initial_filters
|
||||
for i, r in enumerate(repetitions):
|
||||
transition_dilation_rates = [transition_dilation_rate] * r
|
||||
transition_strides = [(1, 1)] * r
|
||||
if transition_dilation_rate == (1, 1):
|
||||
transition_strides[0] = (2, 2)
|
||||
block = _residual_block(block_fn, filters=filters,
|
||||
stage=i, blocks=r,
|
||||
is_first_layer=(i == 0),
|
||||
dropout=dropout,
|
||||
transition_dilation_rates=transition_dilation_rates,
|
||||
transition_strides=transition_strides,
|
||||
residual_unit=residual_unit)(block)
|
||||
filters *= 2
|
||||
|
||||
# Last activation
|
||||
x = _bn_relu(block)
|
||||
|
||||
# Classifier block
|
||||
if include_top and top is 'classification':
|
||||
x = GlobalAveragePooling2D()(x)
|
||||
x = Dense(units=classes, activation=activation, kernel_initializer="he_normal")(x)
|
||||
elif include_top and top is 'segmentation':
|
||||
x = Conv2D(classes, (1, 1), activation='linear', padding='same')(x)
|
||||
|
||||
if K.image_data_format() == 'channels_first':
|
||||
channel, row, col = input_shape
|
||||
else:
|
||||
row, col, channel = input_shape
|
||||
|
||||
x = Reshape((row * col, classes))(x)
|
||||
x = Activation(activation)(x)
|
||||
x = Reshape((row, col, classes))(x)
|
||||
elif final_pooling == 'avg':
|
||||
x = GlobalAveragePooling2D()(x)
|
||||
elif final_pooling == 'max':
|
||||
x = GlobalMaxPooling2D()(x)
|
||||
|
||||
model = Model(inputs=img_input, outputs=x)
|
||||
return model
|
||||
|
||||
|
||||
def ResNet18(input_shape, classes):
|
||||
"""ResNet with 18 layers and v2 residual units
|
||||
"""
|
||||
return ResNet(input_shape, classes, basic_block, repetitions=[2, 2, 2, 2])
|
||||
|
||||
|
||||
def ResNet34(input_shape, classes):
|
||||
"""ResNet with 34 layers and v2 residual units
|
||||
"""
|
||||
return ResNet(input_shape, classes, basic_block, repetitions=[3, 4, 6, 3])
|
||||
|
||||
|
||||
def ResNet50(input_shape, classes):
|
||||
"""ResNet with 50 layers and v2 residual units
|
||||
"""
|
||||
return ResNet(input_shape, classes, bottleneck, repetitions=[3, 4, 6, 3])
|
||||
|
||||
|
||||
def ResNet101(input_shape, classes):
|
||||
"""ResNet with 101 layers and v2 residual units
|
||||
"""
|
||||
return ResNet(input_shape, classes, bottleneck, repetitions=[3, 4, 23, 3])
|
||||
|
||||
|
||||
def ResNet152(input_shape, classes):
|
||||
"""ResNet with 152 layers and v2 residual units
|
||||
"""
|
||||
return ResNet(input_shape, classes, bottleneck, repetitions=[3, 8, 36, 3])
|
||||
@@ -89,7 +89,7 @@ def WideResidualNetwork(depth=28, width=8, dropout_rate=0.0,
|
||||
default_size=32,
|
||||
min_size=8,
|
||||
data_format=K.image_dim_ordering(),
|
||||
include_top=include_top)
|
||||
require_flatten=include_top)
|
||||
|
||||
if input_tensor is None:
|
||||
img_input = Input(shape=input_shape)
|
||||
|
||||
@@ -1,2 +1,26 @@
|
||||
from keras.backend import cntk_backend as KCN
|
||||
import cntk as C
|
||||
import numpy as np
|
||||
|
||||
|
||||
def clip(x, min_value, max_value):
|
||||
"""Element-wise value clipping.
|
||||
|
||||
If min_value > max_value, clipping range is [min_value,min_value].
|
||||
|
||||
# Arguments
|
||||
x: Tensor or variable.
|
||||
min_value: Tensor, float, int, or None.
|
||||
If min_value is None, defaults to -infinity.
|
||||
max_value: Tensor, float, int, or None.
|
||||
If max_value is None, defaults to infinity.
|
||||
|
||||
# Returns
|
||||
A tensor.
|
||||
"""
|
||||
if max_value is None:
|
||||
max_value = np.inf
|
||||
if min_value is None:
|
||||
min_value = -np.inf
|
||||
max_value = C.maximum(min_value, max_value)
|
||||
return C.clip(x, min_value, max_value)
|
||||
|
||||
@@ -1,28 +1,71 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
from tensorflow.python.ops import ctc_ops as ctc
|
||||
except ImportError:
|
||||
import tensorflow.contrib.ctc as ctc
|
||||
from keras.backend import tensorflow_backend as KTF
|
||||
from keras.backend.common import floatx, image_data_format
|
||||
from keras.backend.tensorflow_backend import _preprocess_conv3d_input
|
||||
from keras.backend.tensorflow_backend import _postprocess_conv3d_output
|
||||
from keras.backend.tensorflow_backend import _preprocess_padding
|
||||
from keras.backend.tensorflow_backend import _preprocess_conv2d_input
|
||||
from keras.backend.tensorflow_backend import _postprocess_conv2d_output
|
||||
from keras.backend import dtype
|
||||
from keras.backend.common import floatx
|
||||
from keras.backend.common import image_data_format
|
||||
from keras.backend.tensorflow_backend import _to_tensor
|
||||
|
||||
py_all = all
|
||||
|
||||
|
||||
def _preprocess_deconv_output_shape(x, shape, data_format):
|
||||
def _preprocess_conv2d_input(x, data_format):
|
||||
"""Transpose and cast the input before the conv2d.
|
||||
# Arguments
|
||||
x: input tensor.
|
||||
data_format: string, `"channels_last"` or `"channels_first"`.
|
||||
# Returns
|
||||
A tensor.
|
||||
"""
|
||||
if dtype(x) == 'float64':
|
||||
x = tf.cast(x, 'float32')
|
||||
if data_format == 'channels_first':
|
||||
shape = (shape[0],) + tuple(shape[2:]) + (shape[1],)
|
||||
# TF uses the last dimension as channel dimension,
|
||||
# instead of the 2nd one.
|
||||
# TH input shape: (samples, input_depth, rows, cols)
|
||||
# TF input shape: (samples, rows, cols, input_depth)
|
||||
x = tf.transpose(x, (0, 2, 3, 1))
|
||||
return x
|
||||
|
||||
if shape[0] is None:
|
||||
shape = (tf.shape(x)[0],) + tuple(shape[1:])
|
||||
shape = tf.stack(list(shape))
|
||||
return shape
|
||||
|
||||
def _postprocess_conv2d_output(x, data_format):
|
||||
"""Transpose and cast the output from conv2d if needed.
|
||||
# Arguments
|
||||
x: A tensor.
|
||||
data_format: string, `"channels_last"` or `"channels_first"`.
|
||||
# Returns
|
||||
A tensor.
|
||||
"""
|
||||
|
||||
if data_format == 'channels_first':
|
||||
x = tf.transpose(x, (0, 3, 1, 2))
|
||||
|
||||
if floatx() == 'float64':
|
||||
x = tf.cast(x, 'float64')
|
||||
return x
|
||||
|
||||
|
||||
def _preprocess_padding(padding):
|
||||
"""Convert keras' padding to tensorflow's padding.
|
||||
# Arguments
|
||||
padding: string, `"same"` or `"valid"`.
|
||||
# Returns
|
||||
a string, `"SAME"` or `"VALID"`.
|
||||
# Raises
|
||||
ValueError: if `padding` is invalid.
|
||||
"""
|
||||
if padding == 'same':
|
||||
padding = 'SAME'
|
||||
elif padding == 'valid':
|
||||
padding = 'VALID'
|
||||
else:
|
||||
raise ValueError('Invalid padding:', padding)
|
||||
return padding
|
||||
|
||||
|
||||
def conv2d(x, kernel, strides=(1, 1), padding='valid', data_format='channels_first',
|
||||
@@ -70,45 +113,6 @@ def conv2d(x, kernel, strides=(1, 1), padding='valid', data_format='channels_fir
|
||||
return x
|
||||
|
||||
|
||||
def deconv3d(x, kernel, output_shape, strides=(1, 1, 1),
|
||||
padding='valid',
|
||||
data_format='default',
|
||||
image_shape=None, filter_shape=None):
|
||||
'''3D deconvolution (i.e. transposed convolution).
|
||||
|
||||
# Arguments
|
||||
x: input tensor.
|
||||
kernel: kernel tensor.
|
||||
output_shape: 1D int tensor for the output shape.
|
||||
strides: strides tuple.
|
||||
padding: string, "same" or "valid".
|
||||
data_format: "tf" or "th".
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
for inputs/kernels/ouputs.
|
||||
|
||||
# Returns
|
||||
A tensor, result of transposed 3D convolution.
|
||||
|
||||
# Raises
|
||||
ValueError: if `data_format` is neither `tf` or `th`.
|
||||
'''
|
||||
if data_format == 'default':
|
||||
data_format = image_data_format()
|
||||
if data_format not in {'channels_first', 'channels_last'}:
|
||||
raise ValueError('Unknown data_format ' + str(data_format))
|
||||
|
||||
x = _preprocess_conv3d_input(x, data_format)
|
||||
output_shape = _preprocess_deconv_output_shape(x, output_shape,
|
||||
data_format)
|
||||
kernel = tf.transpose(kernel, (0, 1, 2, 4, 3))
|
||||
padding = _preprocess_padding(padding)
|
||||
strides = (1,) + strides + (1,)
|
||||
|
||||
x = tf.nn.conv3d_transpose(x, kernel, output_shape, strides,
|
||||
padding=padding)
|
||||
return _postprocess_conv3d_output(x, data_format)
|
||||
|
||||
|
||||
def extract_image_patches(x, ksizes, ssizes, padding='same',
|
||||
data_format='channels_last'):
|
||||
'''
|
||||
@@ -158,3 +162,28 @@ def moments(x, axes, shift=None, keep_dims=False):
|
||||
''' Wrapper over tensorflow backend call '''
|
||||
|
||||
return tf.nn.moments(x, axes, shift=shift, keep_dims=keep_dims)
|
||||
|
||||
|
||||
def clip(x, min_value, max_value):
|
||||
"""Element-wise value clipping.
|
||||
|
||||
If min_value > max_value, clipping range is [min_value,min_value].
|
||||
|
||||
# Arguments
|
||||
x: Tensor or variable.
|
||||
min_value: Tensor, float, int, or None.
|
||||
If min_value is None, defaults to -infinity.
|
||||
max_value: Tensor, float, int, or None.
|
||||
If max_value is None, defaults to infinity.
|
||||
|
||||
# Returns
|
||||
A tensor.
|
||||
"""
|
||||
if max_value is None:
|
||||
max_value = np.inf
|
||||
if min_value is None:
|
||||
min_value = -np.inf
|
||||
min_value = _to_tensor(min_value, x.dtype.base_dtype)
|
||||
max_value = _to_tensor(max_value, x.dtype.base_dtype)
|
||||
max_value = tf.maximum(min_value, max_value)
|
||||
return tf.clip_by_value(x, min_value, max_value)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from theano import tensor as T
|
||||
from theano.sandbox.neighbours import images2neibs
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
import theano.sparse as th_sparse_module
|
||||
@@ -85,56 +86,6 @@ def conv2d(x, kernel, strides=(1, 1), padding='valid', data_format='channels_fir
|
||||
return conv_out
|
||||
|
||||
|
||||
def deconv3d(x, kernel, output_shape, strides=(1, 1, 1),
|
||||
padding='valid',
|
||||
data_format=None, filter_shape=None):
|
||||
'''3D deconvolution (transposed convolution).
|
||||
|
||||
# Arguments
|
||||
kernel: kernel tensor.
|
||||
output_shape: desired dimensions of output.
|
||||
strides: strides tuple.
|
||||
padding: string, "same" or "valid".
|
||||
data_format: "channels_last" or "channels_first".
|
||||
Whether to use Theano or TensorFlow dimension ordering
|
||||
in inputs/kernels/ouputs.
|
||||
'''
|
||||
flip_filters = False
|
||||
if data_format is None:
|
||||
data_format = image_data_format()
|
||||
if data_format not in {'channels_first', 'channels_last'}:
|
||||
raise ValueError('Unknown data_format: ' + str(data_format))
|
||||
|
||||
if data_format == 'channels_last':
|
||||
output_shape = (output_shape[0], output_shape[4], output_shape[1],
|
||||
output_shape[2], output_shape[3])
|
||||
|
||||
x = _preprocess_conv3d_input(x, data_format)
|
||||
kernel = _preprocess_conv3d_kernel(kernel, data_format)
|
||||
kernel = kernel.dimshuffle((1, 0, 2, 3, 4))
|
||||
th_padding = _preprocess_padding(padding)
|
||||
|
||||
if hasattr(kernel, '_keras_shape'):
|
||||
kernel_shape = kernel._keras_shape
|
||||
else:
|
||||
# Will only work if `kernel` is a shared variable.
|
||||
kernel_shape = kernel.eval().shape
|
||||
|
||||
filter_shape = _preprocess_conv3d_filter_shape(filter_shape, data_format)
|
||||
filter_shape = tuple(filter_shape[i] for i in (1, 0, 2, 3, 4))
|
||||
|
||||
conv_out = T.nnet.abstract_conv.conv3d_grad_wrt_inputs(
|
||||
x, kernel, output_shape,
|
||||
filter_shape=filter_shape,
|
||||
border_mode=th_padding,
|
||||
subsample=strides,
|
||||
filter_flip=not flip_filters)
|
||||
|
||||
conv_out = _postprocess_conv3d_output(conv_out, x, padding,
|
||||
kernel_shape, strides, data_format)
|
||||
return conv_out
|
||||
|
||||
|
||||
def extract_image_patches(X, ksizes, strides, padding='valid', data_format='channels_first'):
|
||||
'''
|
||||
Extract the patches from an image
|
||||
@@ -197,3 +148,26 @@ def moments(x, axes, shift=None, keep_dims=False):
|
||||
var_batch = KTH.var(x, axis=axes, keepdims=keep_dims)
|
||||
|
||||
return mean_batch, var_batch
|
||||
|
||||
|
||||
def clip(x, min_value, max_value):
|
||||
"""Element-wise value clipping.
|
||||
|
||||
If min_value > max_value, clipping range is [min_value,min_value].
|
||||
|
||||
# Arguments
|
||||
x: Tensor or variable.
|
||||
min_value: Tensor, float, int, or None.
|
||||
If min_value is None, defaults to -infinity.
|
||||
max_value: Tensor, float, int, or None.
|
||||
If max_value is None, defaults to infinity.
|
||||
|
||||
# Returns
|
||||
A tensor.
|
||||
"""
|
||||
if max_value is None:
|
||||
max_value = np.inf
|
||||
if min_value is None:
|
||||
min_value = -np.inf
|
||||
max_value = T.maximum(min_value, max_value)
|
||||
return T.clip(x, min_value, max_value)
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
import numpy as np
|
||||
import warnings
|
||||
|
||||
from keras.callbacks import Callback
|
||||
from keras.layers import Dense
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
@@ -13,10 +11,11 @@ class DeadReluDetector(Callback):
|
||||
# Arguments
|
||||
x_train: Training dataset to check whether or not neurons fire
|
||||
verbose: verbosity mode
|
||||
True means that even a single dead neuron triggers warning
|
||||
True means that even a single dead neuron triggers a warning message
|
||||
False means that only significant number of dead neurons (10% or more)
|
||||
triggers warning
|
||||
triggers a warning message
|
||||
"""
|
||||
|
||||
def __init__(self, x_train, verbose=False):
|
||||
super(DeadReluDetector, self).__init__()
|
||||
self.x_train = x_train
|
||||
@@ -25,7 +24,8 @@ class DeadReluDetector(Callback):
|
||||
|
||||
@staticmethod
|
||||
def is_relu_layer(layer):
|
||||
return isinstance(layer, Dense) and layer.get_config()['activation'] == 'relu'
|
||||
# Should work for all layers with relu activation. Tested for Dense and Conv2D
|
||||
return 'activation' in layer.get_config() and layer.get_config()['activation'] == 'relu'
|
||||
|
||||
def get_relu_activations(self):
|
||||
model_input = self.model.input
|
||||
@@ -44,17 +44,43 @@ class DeadReluDetector(Callback):
|
||||
layer_outputs = [func(list_inputs)[0] for func in funcs]
|
||||
for layer_index, layer_activations in enumerate(layer_outputs):
|
||||
if self.is_relu_layer(self.model.layers[layer_index]):
|
||||
yield [layer_index, layer_activations]
|
||||
layer_name = self.model.layers[layer_index].name
|
||||
# layer_weight is a list [W] (+ [b])
|
||||
layer_weight = self.model.layers[layer_index].get_weights()
|
||||
# with kernel and bias, the weights are saved as a list [W, b]. If only weights, it is [W]
|
||||
if type(layer_weight) is not list:
|
||||
raise ValueError("'Layer_weight' should be a list, but was {}".format(type(layer_weight)))
|
||||
|
||||
layer_weight_shape = np.shape(layer_weight[0])
|
||||
yield [layer_index, layer_activations, layer_name, layer_weight_shape]
|
||||
|
||||
def on_epoch_end(self, epoch, logs={}):
|
||||
for relu_activation in self.get_relu_activations():
|
||||
layer_index, activation_values = relu_activation
|
||||
total_neurons = activation_values.shape[-1]
|
||||
dead_neurons = np.sum(activation_values == 0)
|
||||
dead_neurons_share = dead_neurons / total_neurons
|
||||
if (self.verbose and dead_neurons > 0) or dead_neurons_share > self.dead_neurons_share_threshold:
|
||||
warnings.warn(
|
||||
'Layer #{} has {} dead neurons ({:.2%})!'
|
||||
.format(layer_index, dead_neurons, dead_neurons_share),
|
||||
RuntimeWarning
|
||||
)
|
||||
layer_index, activation_values, layer_name, layer_weight_shape = relu_activation
|
||||
|
||||
shape_act = activation_values.shape
|
||||
|
||||
weight_len = len(layer_weight_shape)
|
||||
act_len = len(shape_act)
|
||||
|
||||
# should work for both Conv and Flat
|
||||
if K.image_data_format() == 'channels_last':
|
||||
# features in last axis
|
||||
axis_filter = -1
|
||||
else:
|
||||
# features before the convolution axis, for weight_len the input and output have to be subtracted
|
||||
axis_filter = -1 - (weight_len - 2)
|
||||
|
||||
total_featuremaps = shape_act[axis_filter]
|
||||
|
||||
axis = tuple(
|
||||
i for i in range(act_len) if (i != axis_filter) and (i != (len(shape_act) + axis_filter)))
|
||||
|
||||
dead_neurons = np.sum(np.sum(activation_values, axis=axis) == 0)
|
||||
|
||||
dead_neurons_share = float(dead_neurons) / float(total_featuremaps)
|
||||
if (self.verbose and dead_neurons > 0) or dead_neurons_share >= self.dead_neurons_share_threshold:
|
||||
str_warning = 'Layer {} (#{}) has {} dead neurons ({:.2%})!'.format(layer_name, layer_index,
|
||||
dead_neurons, dead_neurons_share)
|
||||
|
||||
print(str_warning)
|
||||
|
||||
Regular → Executable
+2
-2
@@ -16,7 +16,7 @@ def load_data(path='conll2000.zip', min_freq=2):
|
||||
archive.close()
|
||||
|
||||
word_counts = Counter(row[0].lower() for sample in train for row in sample)
|
||||
vocab = ['<pad>', '<unk>'] + [w for w, f in word_counts.iteritems() if f >= min_freq]
|
||||
vocab = ['<pad>', '<unk>'] + [w for w, f in iter(word_counts.items()) if f >= min_freq]
|
||||
pos_tags = sorted(list(set(row[1] for sample in train + test for row in sample))) # in alphabetic order
|
||||
chunk_tags = sorted(list(set(row[2] for sample in train + test for row in sample))) # in alphabetic order
|
||||
|
||||
@@ -27,7 +27,7 @@ def load_data(path='conll2000.zip', min_freq=2):
|
||||
|
||||
def _parse_data(fh):
|
||||
string = fh.read()
|
||||
data = [[row.split() for row in sample.split('\n')] for sample in string.strip().split('\n\n')]
|
||||
data = [[row.split() for row in sample.split('\n')] for sample in string.decode().strip().split('\n\n')]
|
||||
fh.close()
|
||||
return data
|
||||
|
||||
|
||||
@@ -236,3 +236,50 @@ class SReLU(Layer):
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
get_custom_objects().update({'SReLU': SReLU})
|
||||
|
||||
|
||||
class Swish(Layer):
|
||||
""" Swish (Ramachandranet al., 2017)
|
||||
|
||||
# Input shape
|
||||
Arbitrary. Use the keyword argument `input_shape`
|
||||
(tuple of integers, does not include the samples axis)
|
||||
when using this layer as the first layer in a model.
|
||||
|
||||
# Output shape
|
||||
Same shape as the input.
|
||||
|
||||
# Arguments
|
||||
beta: float >= 0. Scaling factor
|
||||
if set to 1 and trainable set to False (default), Swish equals the SiLU activation (Elfwing et al., 2017)
|
||||
trainable: whether to learn the scaling factor during training or not
|
||||
|
||||
# References
|
||||
- [Searching for Activation Functions](https://arxiv.org/abs/1710.05941)
|
||||
- [Sigmoid-weighted linear units for neural network function approximation in reinforcement learning](https://arxiv.org/abs/1702.03118)
|
||||
"""
|
||||
|
||||
def __init__(self, beta=1.0, trainable=False, **kwargs):
|
||||
super(Swish, self).__init__(**kwargs)
|
||||
self.supports_masking = True
|
||||
self.beta = beta
|
||||
self.trainable = trainable
|
||||
|
||||
def build(self, input_shape):
|
||||
self.scaling_factor = K.variable(self.beta,
|
||||
dtype=K.floatx(),
|
||||
name='scaling_factor')
|
||||
if self.trainable:
|
||||
self._trainable_weights.append(self.scaling_factor)
|
||||
super(Swish, self).build(input_shape)
|
||||
|
||||
def call(self, inputs, mask=None):
|
||||
return inputs * K.sigmoid(self.scaling_factor * inputs)
|
||||
|
||||
def get_config(self):
|
||||
config = {'beta': self.get_weights()[0] if self.trainable else self.beta,
|
||||
'trainable': self.trainable}
|
||||
base_config = super(Swish, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
get_custom_objects().update({'Swish': Swish})
|
||||
|
||||
@@ -16,220 +16,6 @@ from keras.utils.conv_utils import normalize_data_format
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Deconvolution3D(Convolution3D):
|
||||
"""Transposed convolution operator for filtering windows of 3-D inputs.
|
||||
|
||||
The need for transposed convolutions generally arises from the desire to
|
||||
use a transformation going in the opposite direction
|
||||
of a normal convolution, i.e., from something that has the shape
|
||||
of the output of some convolution to something that has the shape
|
||||
of its input while maintaining a connectivity pattern
|
||||
that is compatible with said convolution.
|
||||
|
||||
When using this layer as the first layer in a model,
|
||||
provide the keyword argument `input_shape`
|
||||
(tuple of integers, does not include the sample axis),
|
||||
e.g. `input_shape=(3, 128, 128, 128)` for a 128x128x128 volume with
|
||||
three channels.
|
||||
|
||||
To pass the correct `output_shape` to this layer,
|
||||
one could use a test model to predict and observe the actual output shape.
|
||||
|
||||
# Examples
|
||||
|
||||
```python
|
||||
# TH dim ordering.
|
||||
# apply a 3x3x3 transposed convolution
|
||||
# with stride 1x1x1 and 3 output filters on a 12x12x12 image:
|
||||
model = Sequential()
|
||||
model.add(Deconvolution3D(3, 3, 3, 3, output_shape=(None, 3, 14, 14, 14),
|
||||
padding='valid',
|
||||
input_shape=(3, 12, 12, 12)))
|
||||
|
||||
# we can predict with the model and print the shape of the array.
|
||||
dummy_input = np.ones((32, 3, 12, 12, 12))
|
||||
preds = model.predict(dummy_input)
|
||||
print(preds.shape) # (None, 3, 14, 14, 14)
|
||||
|
||||
# apply a 3x3x3 transposed convolution
|
||||
# with stride 2x2x2 and 3 output filters on a 12x12x12 image:
|
||||
model = Sequential()
|
||||
model.add(Deconvolution3D(3, 3, 3, 3, output_shape=(None, 3, 25, 25, 25),
|
||||
strides=(2, 2, 2),
|
||||
padding='valid',
|
||||
input_shape=(3, 12, 12, 12)))
|
||||
model.summary()
|
||||
|
||||
# we can predict with the model and print the shape of the array.
|
||||
dummy_input = np.ones((32, 3, 12, 12, 12))
|
||||
preds = model.predict(dummy_input)
|
||||
print(preds.shape) # (None, 3, 25, 25, 25)
|
||||
```
|
||||
|
||||
```python
|
||||
# TF dim ordering.
|
||||
# apply a 3x3x3 transposed convolution
|
||||
# with stride 1x1x1 and 3 output filters on a 12x12x12 image:
|
||||
model = Sequential()
|
||||
model.add(Deconvolution3D(3, 3, 3, 3, output_shape=(None, 14, 14, 14, 3),
|
||||
padding='valid',
|
||||
input_shape=(12, 12, 12, 3)))
|
||||
|
||||
# we can predict with the model and print the shape of the array.
|
||||
dummy_input = np.ones((32, 12, 12, 12, 3))
|
||||
preds = model.predict(dummy_input)
|
||||
print(preds.shape) # (None, 14, 14, 14, 3)
|
||||
|
||||
# apply a 3x3x3 transposed convolution
|
||||
# with stride 2x2x2 and 3 output filters on a 12x12x12 image:
|
||||
model = Sequential()
|
||||
model.add(Deconvolution3D(3, 3, 3, 3, output_shape=(None, 25, 25, 25, 3),
|
||||
strides=(2, 2, 2),
|
||||
padding='valid',
|
||||
input_shape=(12, 12, 12, 3)))
|
||||
model.summary()
|
||||
|
||||
# we can predict with the model and print the shape of the array.
|
||||
dummy_input = np.ones((32, 12, 12, 12, 3))
|
||||
preds = model.predict(dummy_input)
|
||||
print(preds.shape) # (None, 25, 25, 25, 3)
|
||||
```
|
||||
|
||||
# Arguments
|
||||
filters: Number of transposed convolution filters to use.
|
||||
kernel_size: kernel_size: An integer or tuple/list of 3 integers, specifying the
|
||||
dimensions of the convolution window.
|
||||
output_shape: Output shape of the transposed convolution operation.
|
||||
tuple of integers
|
||||
`(nb_samples, filters, conv_dim1, conv_dim2, conv_dim3)`.
|
||||
It is better to use
|
||||
a dummy input and observe the actual output shape of
|
||||
a layer, as specified in the examples.
|
||||
init: name of initialization function for the weights of the layer
|
||||
(see [initializers](../initializers.md)), or alternatively,
|
||||
Theano function to use for weights initialization.
|
||||
This parameter is only relevant if you don't pass
|
||||
a `weights` argument.
|
||||
activation: name of activation function to use
|
||||
(see [activations](../activations.md)),
|
||||
or alternatively, elementwise Theano/TensorFlow function.
|
||||
If you don't specify anything, no activation is applied
|
||||
(ie. "linear" activation: a(x) = x).
|
||||
weights: list of numpy arrays to set as initial weights.
|
||||
padding: 'valid', 'same' or 'full'
|
||||
('full' requires the Theano backend).
|
||||
strides: tuple of length 3. Factor by which to oversample output.
|
||||
Also called strides elsewhere.
|
||||
kernel_regularizer: instance of [WeightRegularizer](../regularizers.md)
|
||||
(eg. L1 or L2 regularization), applied to the main weights matrix.
|
||||
bias_regularizer: instance of [WeightRegularizer](../regularizers.md),
|
||||
applied to the use_bias.
|
||||
activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
|
||||
applied to the network output.
|
||||
kernel_constraint: instance of the [constraints](../constraints.md) module
|
||||
(eg. maxnorm, nonneg), applied to the main weights matrix.
|
||||
bias_constraint: instance of the [constraints](../constraints.md) module,
|
||||
applied to the use_bias.
|
||||
data_format: 'channels_first' or 'channels_last'. In 'channels_first' mode, the channels dimension
|
||||
(the depth) is at index 1, in 'channels_last' mode is it at index 4.
|
||||
It defaults to the `image_data_format` value found in your
|
||||
Keras config file at `~/.keras/keras.json`.
|
||||
If you never set it, then it will be "tf".
|
||||
use_bias: whether to include a use_bias
|
||||
(i.e. make the layer affine rather than linear).
|
||||
|
||||
# Input shape
|
||||
5D tensor with shape:
|
||||
`(samples, channels, conv_dim1, conv_dim2, conv_dim3)` if data_format='channels_first'
|
||||
or 5D tensor with shape:
|
||||
`(samples, conv_dim1, conv_dim2, conv_dim3, channels)` if data_format='channels_last'.
|
||||
|
||||
# Output shape
|
||||
5D tensor with shape:
|
||||
`(samples, filters, nekernel_conv_dim1, nekernel_conv_dim2, nekernel_conv_dim3)` if data_format='channels_first'
|
||||
or 5D tensor with shape:
|
||||
`(samples, nekernel_conv_dim1, nekernel_conv_dim2, nekernel_conv_dim3, filters)` if data_format='channels_last'.
|
||||
`nekernel_conv_dim1`, `nekernel_conv_dim2` and `nekernel_conv_dim3` values might have changed due to padding.
|
||||
|
||||
# References
|
||||
- [A guide to convolution arithmetic for deep learning](https://arxiv.org/abs/1603.07285v1)
|
||||
- [Transposed convolution arithmetic](http://deeplearning.net/software/theano_versions/dev/tutorial/conv_arithmetic.html#transposed-convolution-arithmetic)
|
||||
- [Deconvolutional Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf)
|
||||
"""
|
||||
|
||||
def __init__(self, filters, kernel_size,
|
||||
output_shape, activation=None, weights=None,
|
||||
padding='valid', strides=(1, 1, 1), data_format=None,
|
||||
kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None,
|
||||
kernel_constraint=None, bias_constraint=None,
|
||||
use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', **kwargs):
|
||||
if padding not in {'valid', 'same', 'full'}:
|
||||
raise ValueError('Invalid border mode for Deconvolution3D:', padding)
|
||||
if len(output_shape) == 4:
|
||||
# missing the batch size
|
||||
output_shape = (None,) + tuple(output_shape)
|
||||
|
||||
self.output_shape_ = output_shape
|
||||
|
||||
super(Deconvolution3D, self).__init__(kernel_size=kernel_size,
|
||||
filters=filters,
|
||||
activation=activation,
|
||||
weights=weights,
|
||||
padding=padding,
|
||||
strides=strides,
|
||||
data_format=data_format,
|
||||
kernel_regularizer=kernel_regularizer,
|
||||
bias_regularizer=bias_regularizer,
|
||||
activity_regularizer=activity_regularizer,
|
||||
kernel_constraint=kernel_constraint,
|
||||
bias_constraint=bias_constraint,
|
||||
use_bias=use_bias,
|
||||
kernel_initializer=kernel_initializer,
|
||||
bias_initializer=bias_initializer,
|
||||
**kwargs)
|
||||
|
||||
def compute_output_shape(self, input_shape):
|
||||
if self.data_format == 'channels_first':
|
||||
conv_dim1 = self.output_shape_[2]
|
||||
conv_dim2 = self.output_shape_[3]
|
||||
conv_dim3 = self.output_shape_[4]
|
||||
return (input_shape[0], self.filters, conv_dim1, conv_dim2, conv_dim3)
|
||||
elif self.data_format == 'channels_last':
|
||||
conv_dim1 = self.output_shape_[1]
|
||||
conv_dim2 = self.output_shape_[2]
|
||||
conv_dim3 = self.output_shape_[3]
|
||||
return (input_shape[0], conv_dim1, conv_dim2, conv_dim3, self.filters)
|
||||
else:
|
||||
raise ValueError('Invalid data format: ', self.data_format)
|
||||
|
||||
def call(self, x, mask=None):
|
||||
kernel_shape = K.get_value(self.kernel).shape
|
||||
output = K.deconv3d(x, self.kernel, self.output_shape_,
|
||||
strides=self.strides,
|
||||
padding=self.padding,
|
||||
data_format=self.data_format,
|
||||
filter_shape=kernel_shape)
|
||||
if self.use_bias:
|
||||
if self.data_format == 'channels_first':
|
||||
output += K.reshape(self.bias, (1, self.filters, 1, 1, 1))
|
||||
elif self.data_format == 'channels_last':
|
||||
output += K.reshape(self.bias, (1, 1, 1, 1, self.filters))
|
||||
else:
|
||||
raise ValueError('Invalid data_format: ', self.data_format)
|
||||
output = self.activation(output)
|
||||
return output
|
||||
|
||||
def get_config(self):
|
||||
config = {'output_shape': self.output_shape_}
|
||||
base_config = super(Deconvolution3D, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
|
||||
Deconv3D = Deconvolution3D
|
||||
get_custom_objects().update({'Deconvolution3D': Deconvolution3D})
|
||||
get_custom_objects().update({'Deconv3D': Deconv3D})
|
||||
|
||||
|
||||
class CosineConvolution2D(Layer):
|
||||
"""Cosine Normalized Convolution operator for filtering windows of two-dimensional inputs.
|
||||
Cosine Normalization: Using Cosine Similarity Instead of Dot Product in Neural Networks
|
||||
|
||||
@@ -219,7 +219,7 @@ class BatchRenormalization(Layer):
|
||||
self.initial_weights = weights
|
||||
self.r_max_value = r_max_value
|
||||
self.d_max_value = d_max_value
|
||||
self.t_delta = K.variable(np.array(t_delta))
|
||||
self.t_delta = t_delta
|
||||
self.beta_initializer = initializers.get(beta_initializer)
|
||||
self.gamma_initializer = initializers.get(gamma_initializer)
|
||||
self.moving_mean_initializer = initializers.get(moving_mean_initializer)
|
||||
@@ -266,11 +266,13 @@ class BatchRenormalization(Layer):
|
||||
name='{}_running_std'.format(self.name),
|
||||
trainable=False)
|
||||
|
||||
self.r_max = K.variable(np.ones((1,)), name='{}_r_max'.format(self.name))
|
||||
self.r_max = K.variable(1, name='{}_r_max'.format(self.name))
|
||||
|
||||
self.d_max = K.variable(np.zeros((1,)), name='{}_d_max'.format(self.name))
|
||||
self.d_max = K.variable(0, name='{}_d_max'.format(self.name))
|
||||
|
||||
self.t = K.variable(np.zeros((1,)), name='{}_t'.format(self.name))
|
||||
self.t = K.variable(0, name='{}_t'.format(self.name))
|
||||
|
||||
self.t_delta_tensor = K.constant(self.t_delta)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@@ -290,13 +292,11 @@ class BatchRenormalization(Layer):
|
||||
mean_batch, var_batch = K.moments(inputs, reduction_axes, shift=None, keep_dims=False)
|
||||
std_batch = (K.sqrt(var_batch + self.epsilon))
|
||||
|
||||
r_max_value = K.get_value(self.r_max)
|
||||
r = std_batch / (K.sqrt(self.running_variance + self.epsilon))
|
||||
r = K.stop_gradient(K.clip(r, 1 / r_max_value, r_max_value))
|
||||
r = K.stop_gradient(K.clip(r, 1 / self.r_max, self.r_max))
|
||||
|
||||
d_max_value = K.get_value(self.d_max)
|
||||
d = (mean_batch - self.running_mean) / K.sqrt(self.running_variance + self.epsilon)
|
||||
d = K.stop_gradient(K.clip(d, -d_max_value, d_max_value))
|
||||
d = K.stop_gradient(K.clip(d, -self.d_max, self.d_max))
|
||||
|
||||
if sorted(reduction_axes) == range(K.ndim(inputs))[:-1]:
|
||||
x_normed_batch = (inputs - mean_batch) / std_batch
|
||||
@@ -323,7 +323,7 @@ class BatchRenormalization(Layer):
|
||||
|
||||
self.add_update([K.update(self.r_max, r_val),
|
||||
K.update(self.d_max, d_val),
|
||||
K.update_add(self.t, self.t_delta)], x)
|
||||
K.update_add(self.t, self.t_delta_tensor)], inputs)
|
||||
|
||||
if training in {0, False}:
|
||||
return x_normed
|
||||
@@ -358,13 +358,15 @@ class BatchRenormalization(Layer):
|
||||
def get_config(self):
|
||||
config = {'epsilon': self.epsilon,
|
||||
'axis': self.axis,
|
||||
'center': self.center,
|
||||
'scale': self.scale,
|
||||
'momentum': self.momentum,
|
||||
'gamma_regularizer': initializers.serialize(self.gamma_regularizer),
|
||||
'beta_regularizer': initializers.serialize(self.beta_regularizer),
|
||||
'moving_mean_initializer': initializers.serialize(self.moving_mean_initializer),
|
||||
'moving_variance_initializer': initializers.serialize(self.moving_variance_initializer),
|
||||
'beta_constraint': constraints.serialize(self.beta_constraint),
|
||||
'gamma_constraint': constraints.serialize(self.gamma_constraint),
|
||||
'momentum': self.momentum,
|
||||
'r_max_value': self.r_max_value,
|
||||
'd_max_value': self.d_max_value,
|
||||
't_delta': self.t_delta}
|
||||
|
||||
@@ -8,5 +8,3 @@ from .. import initializers
|
||||
from .. import regularizers
|
||||
from keras.engine import Layer
|
||||
from keras.engine import InputSpec
|
||||
|
||||
from keras.layers.recurrent import _time_distributed_dense
|
||||
|
||||
@@ -2,7 +2,6 @@ from __future__ import absolute_import
|
||||
from keras.optimizers import Optimizer
|
||||
from .. import backend as K
|
||||
from keras.utils.generic_utils import get_custom_objects
|
||||
from keras.legacy import interfaces
|
||||
|
||||
|
||||
class FTML(Optimizer):
|
||||
@@ -31,7 +30,6 @@ class FTML(Optimizer):
|
||||
self.epsilon = epsilon
|
||||
self.inital_decay = decay
|
||||
|
||||
@interfaces.legacy_get_updates_support
|
||||
def get_updates(self, loss, params):
|
||||
grads = self.get_gradients(loss, params)
|
||||
self.updates = [K.update_add(self.iterations, 1)]
|
||||
|
||||
@@ -3,11 +3,32 @@ from setuptools import find_packages
|
||||
|
||||
|
||||
setup(name='keras_contrib',
|
||||
version='1.2.1',
|
||||
description='Keras community contributions',
|
||||
version='2.0.8',
|
||||
description='Keras Deep Learning for Python, Community Contributions',
|
||||
author='Fariz Rahman',
|
||||
author_email='farizrahman4u@gmail.com',
|
||||
url='https://github.com/farizrahman4u/keras-contrib',
|
||||
license='MIT',
|
||||
install_requires=['keras'],
|
||||
extras_require={
|
||||
'h5py': ['h5py'],
|
||||
'visualize': ['pydot>=1.2.0'],
|
||||
'tests': ['pytest',
|
||||
'pytest-pep8',
|
||||
'pytest-xdist',
|
||||
'pytest-cov'],
|
||||
},
|
||||
classifiers=[
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Intended Audience :: Developers',
|
||||
'Intended Audience :: Education',
|
||||
'Intended Audience :: Science/Research',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 2',
|
||||
'Programming Language :: Python :: 2.7',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
'Topic :: Software Development :: Libraries',
|
||||
'Topic :: Software Development :: Libraries :: Python Modules'
|
||||
],
|
||||
packages=find_packages())
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose
|
||||
import numpy as np
|
||||
import scipy.sparse as sparse
|
||||
|
||||
from keras import backend as K
|
||||
from keras.backend import theano_backend as KTH, floatx, set_floatx, variable
|
||||
@@ -157,8 +156,46 @@ class TestBackend(object):
|
||||
th_var_val = KTH.eval(th_var)
|
||||
tf_var_val = KTF.eval(tf_var)
|
||||
|
||||
assert_allclose(th_mean_val, tf_mean_val, rtol=1e-4)
|
||||
assert_allclose(th_var_val, tf_var_val, rtol=1e-4)
|
||||
# absolute tolerance needed when working with zeros
|
||||
assert_allclose(th_mean_val, tf_mean_val, rtol=1e-4, atol=1e-10)
|
||||
assert_allclose(th_var_val, tf_var_val, rtol=1e-4, atol=1e-10)
|
||||
|
||||
def test_clip(self):
|
||||
check_single_tensor_operation('clip', (4, 2), min_value=0.4, max_value=0.6)
|
||||
check_single_tensor_operation('clip', (4, 2), min_value=0.4, max_value=None)
|
||||
|
||||
cases = [
|
||||
# (x, min_value, max_value, expected)
|
||||
(1, 0, 2, 1),
|
||||
(1, 2, 0, 2),
|
||||
(-1, 0, 2, 0),
|
||||
(-1, 2, 0, 2),
|
||||
(3, 0, 2, 2),
|
||||
(3, 2, 0, 2),
|
||||
(1, 0, np.inf, 1),
|
||||
(1, np.inf, 0, np.inf),
|
||||
(1, 0, -np.inf, 0),
|
||||
(1, -np.inf, 0, 0),
|
||||
(-1, 0, -np.inf, 0),
|
||||
(-1, -np.inf, 0, -1),
|
||||
(1, 0, None, 1),
|
||||
(-1, 0, None, 0),
|
||||
|
||||
# NOTE: In the following two cases, Keras 2.0.8 raises an
|
||||
# error on all backends, but this is a sensible extension.
|
||||
(1, None, 0, 0),
|
||||
(-1, None, 0, -1),
|
||||
|
||||
# NOTE: In the following case, Keras 2.0.8 rasies an error
|
||||
# for TensorFlow and Theano, but returns 0 for CNTK. This
|
||||
# extends the TensorFlow and Theano backends to match the
|
||||
# CNTK behavior instead of raising an error.
|
||||
(0, None, None, 0),
|
||||
]
|
||||
for K_, KC_ in [(KTF, KCTF), (KTH, KCTH)]:
|
||||
for x, min_value, max_value, expected in cases:
|
||||
actual = K_.eval(KC_.clip(K_.constant(x), min_value, max_value))
|
||||
assert_allclose(expected, actual, atol=1e-5)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -1,40 +1,191 @@
|
||||
import pytest
|
||||
import warnings
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
if (sys.version_info > (3, 0)):
|
||||
from io import StringIO
|
||||
else:
|
||||
from StringIO import StringIO
|
||||
|
||||
from keras_contrib import callbacks
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense
|
||||
from keras.layers import Dense, Conv2D, Flatten
|
||||
from keras import backend as K
|
||||
|
||||
n_out = 11 # with 1 neuron dead, 1/11 is just below the threshold of 10% with verbose = False
|
||||
|
||||
|
||||
def check_print(do_train, expected_warnings, nr_dead=None, perc_dead=None):
|
||||
"""
|
||||
Receive stdout to check if correct warning message is delivered
|
||||
:param nr_dead: int
|
||||
:param perc_dead: float, 10% should be written as 0.1
|
||||
"""
|
||||
|
||||
saved_stdout = sys.stdout
|
||||
|
||||
out = StringIO()
|
||||
out.flush()
|
||||
sys.stdout = out # overwrite current stdout
|
||||
|
||||
do_train()
|
||||
|
||||
stdoutput = out.getvalue().strip() # get prints, can be something like: "Layer dense (#0) has 2 dead neurons (20.00%)!"
|
||||
str_to_count = "dead neurons"
|
||||
count = stdoutput.count(str_to_count)
|
||||
|
||||
sys.stdout = saved_stdout # restore stdout
|
||||
out.close()
|
||||
|
||||
assert expected_warnings == count
|
||||
if expected_warnings and (nr_dead is not None):
|
||||
str_to_check = 'has {} dead'.format(nr_dead)
|
||||
assert str_to_check in stdoutput, '"{}" not in "{}"'.format(str_to_check, stdoutput)
|
||||
if expected_warnings and (perc_dead is not None):
|
||||
str_to_check = 'neurons ({:.2%})!'.format(perc_dead)
|
||||
assert str_to_check in stdoutput, '"{}" not in "{}"'.format(str_to_check, stdoutput)
|
||||
|
||||
|
||||
def test_DeadDeadReluDetector():
|
||||
def do_test(weights, expected_warnings, verbose):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
dataset = np.ones((1, 1, 1)) # data to be fed as training
|
||||
n_samples = 9
|
||||
|
||||
input_shape = (n_samples, 3, 4) # 4 input features
|
||||
shape_out = (n_samples, 3, n_out) # 11 output features
|
||||
shape_weights = (4, n_out)
|
||||
|
||||
# ignore batch size
|
||||
input_shape_dense = tuple(input_shape[1:])
|
||||
|
||||
def do_test(weights, expected_warnings, verbose, nr_dead=None, perc_dead=None):
|
||||
|
||||
def do_train():
|
||||
dataset = np.ones(input_shape) # data to be fed as training
|
||||
model = Sequential()
|
||||
model.add(Dense(10, activation='relu', input_shape=(1, 1), use_bias=False, weights=[weights]))
|
||||
model.add(Dense(n_out, activation='relu', input_shape=input_shape_dense,
|
||||
use_bias=False, weights=[weights], name='dense'))
|
||||
model.compile(optimizer='sgd', loss='categorical_crossentropy')
|
||||
model.fit(
|
||||
dataset,
|
||||
np.ones((1, 1, 10)),
|
||||
np.ones(shape_out),
|
||||
batch_size=1,
|
||||
epochs=1,
|
||||
callbacks=[callbacks.DeadReluDetector(dataset, verbose=verbose)],
|
||||
verbose=False
|
||||
)
|
||||
assert len(w) == expected_warnings
|
||||
for warn_item in w:
|
||||
assert issubclass(warn_item.category, RuntimeWarning)
|
||||
assert "dead neurons" in str(warn_item.message)
|
||||
|
||||
weights_1_dead = np.ones((1, 10)) # weights that correspond to NN with 1/10 neurons dead
|
||||
check_print(do_train, expected_warnings, nr_dead, perc_dead)
|
||||
|
||||
weights_1_dead = np.ones(shape_weights) # weights that correspond to NN with 1/11 neurons dead
|
||||
weights_2_dead = np.ones(shape_weights) # weights that correspond to NN with 2/11 neurons dead
|
||||
weights_all_dead = np.zeros(shape_weights) # weights that correspond to all neurons dead
|
||||
|
||||
weights_1_dead[:, 0] = 0
|
||||
weights_2_dead = np.ones((1, 10)) # weights that correspond to NN with 2/10 neurons dead
|
||||
weights_2_dead[:, 0] = 0
|
||||
weights_2_dead[:, 1] = 0
|
||||
weights_2_dead[:, 0:2] = 0
|
||||
|
||||
do_test(weights_1_dead, verbose=True, expected_warnings=1)
|
||||
do_test(weights_1_dead, verbose=True, expected_warnings=1, nr_dead=1, perc_dead=1. / n_out)
|
||||
do_test(weights_1_dead, verbose=False, expected_warnings=0)
|
||||
do_test(weights_2_dead, verbose=True, expected_warnings=1)
|
||||
do_test(weights_2_dead, verbose=True, expected_warnings=1, nr_dead=2, perc_dead=2. / n_out)
|
||||
# do_test(weights_all_dead, verbose=True, expected_warnings=1, nr_dead=n_out, perc_dead=1.)
|
||||
|
||||
|
||||
def test_DeadDeadReluDetector_bias():
|
||||
n_samples = 9
|
||||
|
||||
input_shape = (n_samples, 4) # 4 input features
|
||||
shape_weights = (4, n_out)
|
||||
shape_bias = (n_out, )
|
||||
shape_out = (n_samples, n_out) # 11 output features
|
||||
|
||||
# ignore batch size
|
||||
input_shape_dense = tuple(input_shape[1:])
|
||||
|
||||
def do_test(weights, bias, expected_warnings, verbose, nr_dead=None, perc_dead=None):
|
||||
|
||||
def do_train():
|
||||
dataset = np.ones(input_shape) # data to be fed as training
|
||||
model = Sequential()
|
||||
model.add(Dense(n_out, activation='relu', input_shape=input_shape_dense,
|
||||
use_bias=True, weights=[weights, bias], name='dense'))
|
||||
model.compile(optimizer='sgd', loss='categorical_crossentropy')
|
||||
model.fit(
|
||||
dataset,
|
||||
np.ones(shape_out),
|
||||
batch_size=1,
|
||||
epochs=1,
|
||||
callbacks=[callbacks.DeadReluDetector(dataset, verbose=verbose)],
|
||||
verbose=False
|
||||
)
|
||||
|
||||
check_print(do_train, expected_warnings, nr_dead, perc_dead)
|
||||
|
||||
weights_1_dead = np.ones(shape_weights) # weights that correspond to NN with 1/11 neurons dead
|
||||
weights_2_dead = np.ones(shape_weights) # weights that correspond to NN with 2/11 neurons dead
|
||||
weights_all_dead = np.zeros(shape_weights) # weights that correspond to all neurons dead
|
||||
|
||||
weights_1_dead[:, 0] = 0
|
||||
weights_2_dead[:, 0:2] = 0
|
||||
|
||||
bias = np.zeros(shape_bias)
|
||||
|
||||
do_test(weights_1_dead, bias, verbose=True, expected_warnings=1, nr_dead=1, perc_dead=1. / n_out)
|
||||
do_test(weights_1_dead, bias, verbose=False, expected_warnings=0)
|
||||
do_test(weights_2_dead, bias, verbose=True, expected_warnings=1, nr_dead=2, perc_dead=2. / n_out)
|
||||
# do_test(weights_all_dead, bias, verbose=True, expected_warnings=1, nr_dead=n_out, perc_dead=1.)
|
||||
|
||||
|
||||
def test_DeadDeadReluDetector_conv():
|
||||
n_samples = 9
|
||||
|
||||
# (5, 5) kernel, 4 input featuremaps and 11 output featuremaps
|
||||
if K.image_data_format() == 'channels_last':
|
||||
input_shape = (n_samples, 5, 5, 4)
|
||||
else:
|
||||
input_shape = (n_samples, 4, 5, 5)
|
||||
|
||||
# ignore batch size
|
||||
input_shape_conv = tuple(input_shape[1:])
|
||||
shape_weights = (5, 5, 4, n_out)
|
||||
shape_out = (n_samples, n_out)
|
||||
|
||||
def do_test(weights_bias, expected_warnings, verbose, nr_dead=None, perc_dead=None):
|
||||
"""
|
||||
:param perc_dead: as float, 10% should be written as 0.1
|
||||
"""
|
||||
|
||||
def do_train():
|
||||
dataset = np.ones(input_shape) # data to be fed as training
|
||||
model = Sequential()
|
||||
model.add(Conv2D(n_out, (5, 5), activation='relu', input_shape=input_shape_conv,
|
||||
use_bias=True, weights=weights_bias, name='conv'))
|
||||
model.add(Flatten()) # to handle Theano's categorical crossentropy
|
||||
model.compile(optimizer='sgd', loss='categorical_crossentropy')
|
||||
model.fit(
|
||||
dataset,
|
||||
np.ones(shape_out),
|
||||
batch_size=1,
|
||||
epochs=1,
|
||||
callbacks=[callbacks.DeadReluDetector(dataset, verbose=verbose)],
|
||||
verbose=False
|
||||
)
|
||||
|
||||
check_print(do_train, expected_warnings, nr_dead, perc_dead)
|
||||
|
||||
weights_1_dead = np.ones(shape_weights) # weights that correspond to NN with 1/11 neurons dead
|
||||
weights_1_dead[..., 0] = 0
|
||||
weights_2_dead = np.ones(shape_weights) # weights that correspond to NN with 2/11 neurons dead
|
||||
weights_2_dead[..., 0:2] = 0
|
||||
weights_all_dead = np.zeros(shape_weights) # weights that correspond to NN with all neurons dead
|
||||
|
||||
bias = np.zeros((11, ))
|
||||
|
||||
weights_bias_1_dead = [weights_1_dead, bias]
|
||||
weights_bias_2_dead = [weights_2_dead, bias]
|
||||
weights_bias_all_dead = [weights_all_dead, bias]
|
||||
|
||||
do_test(weights_bias_1_dead, verbose=True, expected_warnings=1, nr_dead=1, perc_dead=1. / n_out)
|
||||
do_test(weights_bias_1_dead, verbose=False, expected_warnings=0)
|
||||
do_test(weights_bias_2_dead, verbose=True, expected_warnings=1, nr_dead=2, perc_dead=2. / n_out)
|
||||
# do_test(weights_bias_all_dead, verbose=True, expected_warnings=1, nr_dead=n_out, perc_dead=1.)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -26,5 +26,18 @@ def test_srelu_share():
|
||||
layer_test(advanced_activations.SReLU, kwargs={'shared_axes': 1},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_swish_constant():
|
||||
layer_test(advanced_activations.Swish, kwargs={'beta': 1.0, 'trainable': False},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_swish_trainable():
|
||||
layer_test(advanced_activations.Swish, kwargs={'beta': 1.0, 'trainable': True},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -17,67 +17,6 @@ else:
|
||||
_convolution_border_modes = ['valid', 'same']
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_deconvolution_3d():
|
||||
num_samples = 6
|
||||
num_filter = 4
|
||||
stack_size = 2
|
||||
kernel_dim1 = 12
|
||||
kernel_dim2 = 10
|
||||
kernel_dim3 = 8
|
||||
|
||||
for batch_size in [None, num_samples]:
|
||||
for border_mode in _convolution_border_modes:
|
||||
for subsample in [(1, 1, 1), (2, 2, 2)]:
|
||||
if border_mode == 'same' and subsample != (1, 1, 1):
|
||||
continue
|
||||
|
||||
dim1 = conv_input_length(kernel_dim1, 7,
|
||||
border_mode,
|
||||
subsample[0])
|
||||
dim2 = conv_input_length(kernel_dim2, 5,
|
||||
border_mode,
|
||||
subsample[1])
|
||||
dim3 = conv_input_length(kernel_dim3, 3,
|
||||
border_mode,
|
||||
subsample[2])
|
||||
layer_test(convolutional.Deconvolution3D,
|
||||
kwargs={'filters': num_filter,
|
||||
'kernel_size': (7, 5, 3),
|
||||
'output_shape': (batch_size, num_filter, dim1, dim2, dim3),
|
||||
'padding': border_mode,
|
||||
'strides': subsample,
|
||||
'data_format': 'channels_first'},
|
||||
input_shape=(num_samples, stack_size, kernel_dim1, kernel_dim2, kernel_dim3),
|
||||
|
||||
fixed_batch_size=True, tolerance=None)
|
||||
|
||||
layer_test(convolutional.Deconvolution3D,
|
||||
kwargs={'filters': num_filter,
|
||||
'kernel_size': (7, 5, 3),
|
||||
'output_shape': (batch_size, num_filter, dim1, dim2, dim3),
|
||||
'padding': border_mode,
|
||||
'strides': subsample,
|
||||
'data_format': 'channels_first',
|
||||
'kernel_regularizer': 'l2',
|
||||
'bias_regularizer': 'l2',
|
||||
'activity_regularizer': 'l2'},
|
||||
input_shape=(num_samples, stack_size, kernel_dim1, kernel_dim2, kernel_dim3),
|
||||
fixed_batch_size=True, tolerance=None)
|
||||
|
||||
layer_test(convolutional.Deconvolution3D,
|
||||
kwargs={'filters': num_filter,
|
||||
'kernel_size': (7, 5, 3),
|
||||
'output_shape': (num_filter, dim1, dim2, dim3),
|
||||
'padding': border_mode,
|
||||
'strides': subsample,
|
||||
'data_format': 'channels_first',
|
||||
'kernel_regularizer': 'l2',
|
||||
'bias_regularizer': 'l2',
|
||||
'activity_regularizer': 'l2'},
|
||||
input_shape=(num_samples, stack_size, kernel_dim1, kernel_dim2, kernel_dim3), tolerance=None)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_cosineconvolution_2d():
|
||||
num_samples = 2
|
||||
|
||||
@@ -25,9 +25,7 @@ def basic_instancenorm_test():
|
||||
input_shape=(3, 4, 2))
|
||||
layer_test(normalization.InstanceNormalization,
|
||||
kwargs={'gamma_initializer': 'ones',
|
||||
'beta_initializer': 'ones',
|
||||
'moving_mean_initializer': 'zeros',
|
||||
'moving_variance_initializer': 'ones'},
|
||||
'beta_initializer': 'ones'},
|
||||
input_shape=(3, 4, 2))
|
||||
layer_test(normalization.InstanceNormalization,
|
||||
kwargs={'scale': False, 'center': False},
|
||||
@@ -190,7 +188,7 @@ def test_instancenorm_perchannel_correctness():
|
||||
for channel in range(3):
|
||||
activations = out[instance, channel]
|
||||
assert abs(activations.mean()) > 1e-2
|
||||
assert abs(activations.std() - 1.0) > 1e-2
|
||||
assert abs(activations.std() - 1.0) > 1e-6
|
||||
|
||||
# but values are still normalized per-instance
|
||||
activations = out[instance]
|
||||
@@ -229,10 +227,11 @@ def basic_batchrenorm_test():
|
||||
|
||||
@keras_test
|
||||
def test_batchrenorm_mode_0_or_2():
|
||||
for training in [1, 0]:
|
||||
model = Sequential()
|
||||
norm_m0 = normalization.BatchRenormalization(input_shape=(10,), momentum=0.8)
|
||||
model.add(norm_m0)
|
||||
for training in [1, 0, None]:
|
||||
ip = Input(shape=(10,))
|
||||
norm_m0 = normalization.BatchRenormalization(momentum=0.8)
|
||||
out = norm_m0(ip, training=training)
|
||||
model = Model(ip, out)
|
||||
model.compile(loss='mse', optimizer='sgd')
|
||||
|
||||
# centered on 5.0, variance 10.0
|
||||
@@ -306,5 +305,37 @@ def test_shared_batchrenorm():
|
||||
new_model.train_on_batch(x, x)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_batchrenorm_clipping_schedule():
|
||||
'''Test that the clipping schedule isn't fixed at r_max=1, d_max=0'''
|
||||
inp = Input(shape=(10,))
|
||||
bn = normalization.BatchRenormalization(t_delta=1.)
|
||||
out = bn(inp)
|
||||
model = Model(inp, out)
|
||||
model.compile('sgd', 'mse')
|
||||
|
||||
x = np.random.normal(5, 10, size=(2, 10))
|
||||
y = np.random.normal(5, 10, size=(2, 10))
|
||||
|
||||
r_max, d_max = K.get_value(bn.r_max), K.get_value(bn.d_max)
|
||||
assert r_max == 1
|
||||
assert d_max == 0
|
||||
|
||||
for i in range(10):
|
||||
model.train_on_batch(x, y)
|
||||
|
||||
r_max, d_max = K.get_value(bn.r_max), K.get_value(bn.d_max)
|
||||
assert_allclose([r_max, d_max], [3, 5], atol=1e-1)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_batchrenorm_get_config():
|
||||
'''Test that get_config works on a model with a batchrenorm layer.'''
|
||||
x = Input(shape=(10,))
|
||||
y = normalization.BatchRenormalization()(x)
|
||||
model = Model(x, y)
|
||||
model.get_config()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
||||
@@ -1,12 +1,16 @@
|
||||
import pytest
|
||||
import os
|
||||
from keras import backend as K
|
||||
from keras.layers import Input, Dense
|
||||
from keras.models import Model
|
||||
from numpy.testing import assert_allclose
|
||||
from keras.utils.test_utils import keras_test
|
||||
|
||||
from keras_contrib.utils.save_load_utils import save_all_weights, load_all_weights
|
||||
|
||||
|
||||
@pytest.mark.skipif(K.backend() != 'tensorflow', reason='save_all_weights and load_all_weights only supported on TensorFlow')
|
||||
@keras_test
|
||||
def test_save_and_load_all_weights():
|
||||
'''
|
||||
Test save_all_weights and load_all_weights. Save and load optimizer and model weights but not configuration.
|
||||
@@ -33,15 +37,16 @@ def test_save_and_load_all_weights():
|
||||
ow1value[0, 0:3] = [4, 2, 0]
|
||||
K.set_value(ow1, ow1value)
|
||||
# save all weights
|
||||
save_all_weights(m1, "model.h5")
|
||||
save_all_weights(m1, 'model.h5')
|
||||
# new model
|
||||
m2 = make_model()
|
||||
# load all weights
|
||||
load_all_weights(m2, "model.h5")
|
||||
load_all_weights(m2, 'model.h5')
|
||||
# check weights
|
||||
assert_allclose(K.get_value(m2.layers[1].kernel)[0, 0:4], [1, 3, 3, 7])
|
||||
# check optimizer weights
|
||||
assert_allclose(K.get_value(m2.optimizer.weights[3])[0, 0:3], [4, 2, 0])
|
||||
os.remove('model.h5')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user