GeLU activation as a layer (#424)

AakashKumarNain · WindQAQ · commit bbec769c9bd2 · 2019-08-29T23:01:30.000+08:00
* add gelu activation

* add tests for gelu activation

* add gelu to imports

* include gelu in build file

* update tests and refactor

* refactor

* make compatible with every fp dtype and fulfill layer requirements

* add dummy model test

* code format

* code format and sanity check pass

* code format

* auto code format

* use fused gelu activation

* remove redundant test cases
diff --git a/tensorflow_addons/layers/BUILD b/tensorflow_addons/layers/BUILD
@@ -6,6 +6,7 @@ py_library(
     name = "layers",
     srcs = [
         "__init__.py",
+        "gelu.py",
         "maxout.py",
         "normalizations.py",
         "optical_flow.py",
@@ -23,6 +24,19 @@ py_library(
     ],
 )
 
+py_test(
+    name = "gelu_test",
+    size = "small",
+    srcs = [
+        "gelu_test.py",
+    ],
+    main = "gelu_test.py",
+    srcs_version = "PY2AND3",
+    deps = [
+        ":layers",
+    ],
+)
+
 py_test(
     name = "layers_wrappers_test",
     size = "small",
diff --git a/tensorflow_addons/layers/__init__.py b/tensorflow_addons/layers/__init__.py
@@ -18,10 +18,11 @@
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow_addons.layers.gelu import GeLU
 from tensorflow_addons.layers.maxout import Maxout
 from tensorflow_addons.layers.normalizations import GroupNormalization
 from tensorflow_addons.layers.normalizations import InstanceNormalization
 from tensorflow_addons.layers.optical_flow import CorrelationCost
 from tensorflow_addons.layers.poincare import PoincareNormalize
 from tensorflow_addons.layers.sparsemax import Sparsemax
-from tensorflow_addons.layers.wrappers import WeightNormalization
+from tensorflow_addons.layers.wrappers import WeightNormalization
diff --git a/tensorflow_addons/layers/gelu.py b/tensorflow_addons/layers/gelu.py
@@ -0,0 +1,57 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implements GeLU activation."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+from tensorflow_addons.utils import keras_utils
+from tensorflow_addons.activations import gelu
+
+
+@keras_utils.register_keras_custom_object
+class GeLU(tf.keras.layers.Layer):
+    """Gaussian Error Linear Unit.
+
+    A smoother version of ReLU generally used
+    in the BERT or BERT architecture based models.
+    Original paper: https://arxiv.org/abs/1606.08415
+
+    Input shape:
+        Arbitrary. Use the keyword argument `input_shape`
+        (tuple of integers, does not include the samples axis)
+        when using this layer as the first layer in a model.
+
+    Output shape:
+        Same shape as the input.
+    """
+
+    def __init__(self, approximate=True, **kwargs):
+        super(GeLU, self).__init__(**kwargs)
+        self.approximate = approximate
+        self.supports_masking = True
+
+    def call(self, inputs):
+        return gelu(inputs, approximate=self.approximate)
+
+    def get_config(self):
+        config = {'approximate': self.approximate}
+        base_config = super(GeLU, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
diff --git a/tensorflow_addons/layers/gelu_test.py b/tensorflow_addons/layers/gelu_test.py
@@ -0,0 +1,39 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for GeLU activation."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+from absl.testing import parameterized
+from tensorflow_addons.layers.gelu import GeLU
+from tensorflow_addons.utils import test_utils
+
+
+@parameterized.parameters([np.float16, np.float32, np.float64])
+@test_utils.run_all_in_graph_and_eager_modes
+class TestGeLU(tf.test.TestCase):
+    def test_random(self, dtype):
+        x = np.array([[0.5, 1.2, -0.3]]).astype(dtype)
+        val = np.array([[0.345714, 1.0617027, -0.11462909]]).astype(dtype)
+        test_utils.layer_test(
+            GeLU, kwargs={'dtype': dtype}, input_data=x, expected_output=val)
+
+
+if __name__ == '__main__':
+    tf.test.main()