Fix kwargs in crf_decode_forward (#2642)

tgsmith61591 · web-flow · commit b8cab7fd61af · 2022-01-20T17:25:01.000-08:00
* Address issue #2639
diff --git a/tensorflow_addons/text/crf.py b/tensorflow_addons/text/crf.py
@@ -490,7 +490,11 @@ def crf_decode_forward(
     mask = tf.sequence_mask(sequence_lengths, tf.shape(inputs)[1])
     crf_fwd_cell = CrfDecodeForwardRnnCell(transition_params, dtype=inputs.dtype)
     crf_fwd_layer = tf.keras.layers.RNN(
-        crf_fwd_cell, return_sequences=True, return_state=True, dtype=inputs.dtype
+        crf_fwd_cell,
+        return_sequences=True,
+        return_state=True,
+        dtype=inputs.dtype,
+        zero_output_for_mask=True,  # See: https://github.com/tensorflow/addons/issues/2639
     )
     return crf_fwd_layer(inputs, state, mask=mask)
 
diff --git a/tensorflow_addons/text/tests/crf_test.py b/tensorflow_addons/text/tests/crf_test.py
@@ -23,6 +23,7 @@
 
 from tensorflow_addons import text
 from tensorflow_addons.utils import test_utils
+from numpy.testing import assert_array_equal
 
 
 def calculate_sequence_score(inputs, transition_params, tag_indices, sequence_lengths):
@@ -559,3 +560,60 @@ def test_crf_decode_save_load(tmpdir):
             "seq_len": np.array([10]),
         }
     )
+
+
+@pytest.mark.parametrize(
+    "potentials,sequence_length",
+    [
+        # performs masking
+        pytest.param(
+            tf.random.normal([2, 12, 3]),
+            tf.constant([8, 10]),
+        ),
+        # does not perform masking
+        pytest.param(
+            tf.random.normal([4, 8, 10]),
+            tf.constant([8, 8, 8, 8]),
+        ),
+    ],
+)
+def test_crf_decode_forward_mask(potentials, sequence_length):
+    # mimics setup of the `_multi_seq_fn` closure in `crf_decode`
+    initial_state = tf.slice(potentials, [0, 0, 0], [-1, 1, -1])
+    initial_state = tf.squeeze(initial_state, axis=[1])
+    inputs = tf.slice(potentials, [0, 1, 0], [-1, -1, -1])
+
+    sequence_length_less_one = tf.maximum(
+        tf.constant(0, dtype=tf.int32), sequence_length - 1
+    )
+
+    n_tags = potentials.shape[-1]
+    transition_params = tf.random.normal([n_tags, n_tags])
+
+    backpointers, _ = text.crf_decode_forward(
+        inputs, initial_state, transition_params, sequence_length_less_one
+    )
+
+    # everything masked by `sequence_length_less_one` should be equal to 0.
+    mask = tf.sequence_mask(sequence_length_less_one, tf.shape(inputs)[1])
+
+    # the indices that _should_ have been masked in the RNN operation
+    masked_indices = tf.cast(tf.logical_not(mask), tf.int32)
+
+    # sum of each row in the mask should equal timedim - seq lens
+    exp_mask_sums = (
+        tf.repeat(inputs.shape[1], inputs.shape[0]) - sequence_length_less_one
+    )
+    mask_sums = tf.reduce_sum(masked_indices, axis=1)
+    assert_array_equal(
+        exp_mask_sums.numpy(),
+        mask_sums.numpy(),
+    )
+
+    # now apply the inverse mask to the backpointers and show that ALL are zeros. this is proof that
+    # we appropriately masked timesteps
+    masked_indices = tf.expand_dims(masked_indices, [2])
+    zeros = masked_indices * backpointers
+    assert tf.reduce_all(zeros == 0).numpy(), "Mask not applied correctly: {0}".format(
+        zeros
+    )