Fix channels_first and add preprocess_input (#46)

qubvel · web-flow · commit 293f19a03aa7 · 2019-07-26T23:24:04.000+03:00
* Fix channels_first, add preprocess_input
* Fix for theano backend
diff --git a/efficientnet/model.py b/efficientnet/model.py
@@ -38,6 +38,7 @@
 from keras_applications.imagenet_utils import decode_predictions
 
 from . import get_submodules_from_kwargs
+from .preprocessing import preprocess_input
 
 backend = None
 layers = None
@@ -189,7 +190,7 @@ def mb_conv_block(inputs, block_args, drop_rate=None, relu_fn=swish, prefix='',
     has_se = (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1)
     bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1
 
-    # workaround over non working dropout in tf.keras
+    # workaround over non working dropout with None in noise_shape in tf.keras
     Dropout = get_dropout(
         backend=backend,
         layers=layers,
@@ -226,8 +227,9 @@ def mb_conv_block(inputs, block_args, drop_rate=None, relu_fn=swish, prefix='',
             block_args.input_filters * block_args.se_ratio
         ))
         se_tensor = layers.GlobalAveragePooling2D(name=prefix + 'se_squeeze')(x)
-        se_tensor = layers.Reshape((1, 1, filters),
-                                   name=prefix + 'se_reshape')(se_tensor)
+        
+        target_shape = (1, 1, filters) if backend.image_data_format() == 'channels_last' else (filters, 1, 1)
+        se_tensor = layers.Reshape(target_shape, name=prefix + 'se_reshape')(se_tensor)
         se_tensor = layers.Conv2D(num_reduced_filters, 1,
                                   activation=relu_fn,
                                   padding='same',
@@ -243,8 +245,10 @@ def mb_conv_block(inputs, block_args, drop_rate=None, relu_fn=swish, prefix='',
         if backend.backend() == 'theano':
             # For the Theano backend, we have to explicitly make
             # the excitation weights broadcastable.
+            pattern = ([True, True, True, False] if backend.image_data_format() == 'channels_last'
+                       else [True, False, True, True])
             se_tensor = layers.Lambda(
-                lambda x: backend.pattern_broadcast(x, [True, True, True, False]),
+                lambda x: backend.pattern_broadcast(x, pattern),
                 name=prefix + 'se_broadcast')(se_tensor)
         x = layers.multiply([x, se_tensor], name=prefix + 'se_excite')