focal loss implementation for tf keras (#32)

AakashKumarNain · seanpmorgan · commit b53d8a7e6734 · 2019-03-20T22:28:29.000-04:00
* ENH: Add focal loss
diff --git a/tensorflow_addons/losses/BUILD b/tensorflow_addons/losses/BUILD
@@ -6,6 +6,7 @@ py_library(
     name = "losses",
     srcs = [
         "__init__.py",
+        "focal_loss.py",
         "lifted.py",
         "metric_learning.py",
         "sparsemax_loss.py",
@@ -18,6 +19,19 @@ py_library(
     ],
 )
 
+py_test(
+    name = "focal_loss_test",
+    size = "small",
+    srcs = [
+        "focal_loss_test.py",
+    ],
+    main = "focal_loss_test.py",
+    srcs_version = "PY2AND3",
+    deps = [
+        ":losses",
+    ],
+)
+
 py_test(
     name = "sparsemax_loss_test",
     size = "small",
diff --git a/tensorflow_addons/losses/README.md b/tensorflow_addons/losses/README.md
@@ -3,13 +3,15 @@
 ## Maintainers
 | Submodule  |  Maintainers  | Contact Info   |
 |:---------- |:----------- |:------------- |
+| focal_loss |  SIG-Addons | addons@tensorflow.org |
 | lifted |  SIG-Addons | addons@tensorflow.org |
 | sparsemax_loss |  SIG-Addons | addons@tensorflow.org |
 | triplet |  SIG-Addons | addons@tensorflow.org |
 
 ## Components
 | Submodule | Loss  | Reference               |
 |:----------------------- |:---------------------|:--------------------------|
+| focal_loss | SigmoidFocalCrossEntropy | https://arxiv.org/abs/1708.02002  |
 | lifted | LiftedStructLoss | https://arxiv.org/abs/1511.06452       |
 | sparsemax_loss | SparsemaxLoss |  https://arxiv.org/abs/1602.02068 | 
 | triplet | TripletSemiHardLoss | https://arxiv.org/abs/1503.03832       |
@@ -34,4 +36,5 @@ must:
  * Add a `py_test` to this sub-package's BUILD file.
 
 #### Documentation Requirements
+ * Update the table of contents in the project's central README.
  * Update the table of contents in this sub-package's README.
diff --git a/tensorflow_addons/losses/__init__.py b/tensorflow_addons/losses/__init__.py
@@ -18,6 +18,7 @@
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_addons.losses.lifted import lifted_struct_loss
+from tensorflow_addons.losses.focal_loss import sigmoid_focal_crossentropy, SigmoidFocalCrossEntropy
+from tensorflow_addons.losses.lifted import lifted_struct_loss, LiftedStructLoss
 from tensorflow_addons.losses.sparsemax_loss import sparsemax_loss, SparsemaxLoss
-from tensorflow_addons.losses.triplet import triplet_semihard_loss
+from tensorflow_addons.losses.triplet import triplet_semihard_loss, TripletSemiHardLoss
diff --git a/tensorflow_addons/losses/focal_loss.py b/tensorflow_addons/losses/focal_loss.py
@@ -0,0 +1,137 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implements Focal loss."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+import tensorflow.keras.backend as K
+from tensorflow_addons.utils import keras_utils
+
+
+@keras_utils.register_keras_custom_object
+class SigmoidFocalCrossEntropy(keras_utils.LossFunctionWrapper):
+    """Implements the focal loss function.
+
+    Focal loss was first introduced in the RetinaNet paper
+    (https://arxiv.org/pdf/1708.02002.pdf). Focal loss is extremely useful for
+    classification when you have highly imbalanced classes. It down-weights
+    well-classified examples and focuses on hard examples. The loss value is
+    much high for a sample which is misclassified by the classifier as compared
+    to the loss value corresponding to a well-classified example. One of the
+    best use-cases of focal loss is its usage in object detection where the
+    imbalance between the background class and other classes is extremely high.
+
+    Usage:
+
+    ```python
+    fl = tfa.losses.SigmoidFocalCrossEntropy()
+    loss = fl(
+      [[0.97], [0.91], [0.03]],
+      [[1], [1], [0])
+    print('Loss: ', loss.numpy())  # Loss: [[0.03045921]
+                                            [0.09431068]
+                                            [0.31471074]
+    ```
+    Usage with tf.keras API:
+
+    ```python
+    model = tf.keras.Model(inputs, outputs)
+    model.compile('sgd', loss=tf.keras.losses.SigmoidFocalCrossEntropy())
+    ```
+
+    Args
+      alpha: balancing factor, default value is 0.25
+      gamma: modulating factor, default value is 2.0
+
+    Returns:
+      Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
+          shape as `y_true`; otherwise, it is scalar.
+
+    Raises:
+        ValueError: If the shape of `sample_weight` is invalid or value of
+          `gamma` is less than zero
+    """
+
+    def __init__(self,
+                 from_logits=False,
+                 alpha=0.25,
+                 gamma=2.0,
+                 reduction=tf.keras.losses.Reduction.NONE,
+                 name='sigmoid_focal_crossentropy'):
+        super(SigmoidFocalCrossEntropy, self).__init__(
+            sigmoid_focal_crossentropy,
+            name=name,
+            reduction=reduction,
+            from_logits=from_logits,
+            alpha=alpha,
+            gamma=gamma)
+
+        self.from_logits = from_logits
+        self.alpha = alpha
+        self.gamma = gamma
+
+
+@keras_utils.register_keras_custom_object
+@tf.function
+def sigmoid_focal_crossentropy(y_true,
+                               y_pred,
+                               alpha=0.25,
+                               gamma=2.0,
+                               from_logits=False):
+    """
+    Args
+        y_true: true targets tensor.
+        y_pred: predictions tensor.
+        alpha: balancing factor.
+        gamma: modulating factor.
+    
+    Returns:
+        Weighted loss float `Tensor`. If `reduction` is `NONE`,this has the 
+        same shape as `y_true`; otherwise, it is scalar.
+    """
+    if gamma and gamma < 0:
+        raise ValueError(
+            "Value of gamma should be greater than or equal to zero")
+
+    y_pred = tf.convert_to_tensor(y_pred)
+    y_true = tf.cast(y_true, y_pred.dtype)
+
+    # Get the binary cross_entropy
+    bce = K.binary_crossentropy(y_true, y_pred, from_logits=from_logits)
+
+    # If logits are provided then convert the predictions into probabilities
+    if from_logits:
+        y_pred = K.sigmoid(y_pred)
+    else:
+        y_pred = K.clip(y_pred, K.epsilon(), 1. - K.epsilon())
+
+    p_t = (y_true * y_pred) + ((1 - y_true) * (1 - y_pred))
+    alpha_factor = 1
+    modulating_factor = 1
+
+    if alpha:
+        alpha = tf.convert_to_tensor(alpha, dtype=K.floatx())
+        alpha_factor = y_true * alpha + ((1 - alpha) * (1 - y_true))
+
+    if gamma:
+        gamma = tf.convert_to_tensor(gamma, dtype=K.floatx())
+        modulating_factor = K.pow((1 - p_t), gamma)
+
+    # compute the final loss and return
+    return K.mean(
+        alpha_factor * modulating_factor * bce, axis=-1, keepdims=True)
diff --git a/tensorflow_addons/losses/focal_loss_test.py b/tensorflow_addons/losses/focal_loss_test.py
@@ -0,0 +1,112 @@
+## Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for focal loss."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+import tensorflow.keras.backend as K
+from tensorflow_addons.utils import test_utils
+from tensorflow_addons.losses import sigmoid_focal_crossentropy, SigmoidFocalCrossEntropy
+
+
+@test_utils.run_all_in_graph_and_eager_modes
+class SigmoidFocalCrossEntropyTest(tf.test.TestCase):
+    def test_config(self):
+        bce_obj = SigmoidFocalCrossEntropy(
+            reduction=tf.keras.losses.Reduction.NONE,
+            name='sigmoid_focal_crossentropy')
+        self.assertEqual(bce_obj.name, 'sigmoid_focal_crossentropy')
+        self.assertEqual(bce_obj.reduction, tf.keras.losses.Reduction.NONE)
+
+    def to_logit(self, prob):
+        logit = np.log(prob / (1. - prob))
+        return logit
+
+    def log10(self, x):
+        numerator = tf.math.log(x)
+        denominator = tf.math.log(tf.constant(10, dtype=numerator.dtype))
+        return numerator / denominator
+
+    # Test with logits
+    def test_with_logits(self):
+        # predictiions represented as logits
+        prediction_tensor = tf.constant(
+            [[self.to_logit(0.97)], [self.to_logit(0.91)],
+             [self.to_logit(0.73)], [self.to_logit(0.27)],
+             [self.to_logit(0.09)], [self.to_logit(0.03)]], tf.float32)
+        # Ground truth
+        target_tensor = tf.constant([[1], [1], [1], [0], [0], [0]], tf.float32)
+
+        fl = sigmoid_focal_crossentropy(
+            y_true=target_tensor,
+            y_pred=prediction_tensor,
+            from_logits=True,
+            alpha=None,
+            gamma=None)
+        bce = K.binary_crossentropy(
+            target_tensor, prediction_tensor, from_logits=True)
+
+        # When alpha and gamma are None, it should be equal to BCE
+        self.assertAllClose(fl, bce)
+
+        # When gamma==2.0
+        fl = sigmoid_focal_crossentropy(
+            y_true=target_tensor,
+            y_pred=prediction_tensor,
+            from_logits=True,
+            alpha=None,
+            gamma=2.0)
+
+        # order_of_ratio = np.power(10, np.floor(np.log10(bce/FL)))
+        order_of_ratio = tf.pow(10.0, tf.math.floor(self.log10(bce / fl)))
+        pow_values = tf.constant([[1000], [100], [10], [10], [100], [1000]])
+        self.assertAllClose(order_of_ratio, pow_values)
+
+    # Test without logits
+    def test_without_logits(self):
+        # predictiions represented as logits
+        prediction_tensor = tf.constant(
+            [[0.97], [0.91], [0.73], [0.27], [0.09], [0.03]], tf.float32)
+        # Ground truth
+        target_tensor = tf.constant([[1], [1], [1], [0], [0], [0]], tf.float32)
+
+        fl = sigmoid_focal_crossentropy(
+            y_true=target_tensor,
+            y_pred=prediction_tensor,
+            alpha=None,
+            gamma=None)
+        bce = K.binary_crossentropy(target_tensor, prediction_tensor)
+
+        # When alpha and gamma are None, it should be equal to BCE
+        self.assertAllClose(fl, bce)
+
+        # When gamma==2.0
+        fl = sigmoid_focal_crossentropy(
+            y_true=target_tensor,
+            y_pred=prediction_tensor,
+            alpha=None,
+            gamma=2.0)
+
+        order_of_ratio = tf.pow(10.0, tf.math.floor(self.log10(bce / fl)))
+        pow_values = tf.constant([[1000], [100], [10], [10], [100], [1000]])
+        self.assertAllClose(order_of_ratio, pow_values)
+
+
+if __name__ == '__main__':
+    tf.test.main()