Merge pull request #4 from linjieccc/add_taskflow_ddparser

wawltor · web-flow · commit c09d901f85aa · 2021-09-07T16:27:06.000+08:00
Replace np.argmax to paddle.argmax
diff --git a/paddlenlp/taskflow/dependency_parsing.py b/paddlenlp/taskflow/dependency_parsing.py
@@ -78,7 +78,6 @@ class DDParserTask(Task):
     Args:
         task(string): The name of task.
         model(string): The model name in the task.
-        static_mode(bool): The flag to control in the static/dygraph mode.
         tree(bool): Ensure the output conforms to the tree structure.
         prob(bool): Whether to return the probability of predicted heads.
         use_pos(bool): Whether to return the postag.
@@ -167,7 +166,8 @@ def _construct_model(self, model):
             n_rels=len(self.rel_vocab),
             n_words=len(self.word_vocab),
             pad_index=self.word_pad_index,
-            eos_index=self.word_eos_index, )
+            bos_index=self.word_bos_index,
+            eos_index=self.word_eos_index,)
         # Load the model parameter for the predict
         state_dict = paddle.load(
             os.path.join(self._task_path, self.model, "model.pdparams"))
@@ -249,15 +249,12 @@ def _run_model(self, inputs):
             self.input_handles[0].copy_from_cpu(words)
             self.input_handles[1].copy_from_cpu(wp)
             self.predictor.run()
-            s_arc = self.output_handle[0].copy_to_cpu()
-            s_rel = self.output_handle[1].copy_to_cpu()
-            words = self.output_handle[2].copy_to_cpu()
+            arc_preds = self.output_handle[0].copy_to_cpu()
+            rel_preds = self.output_handle[1].copy_to_cpu()
+            s_arc = self.output_handle[2].copy_to_cpu()
+            mask = self.output_handle[3].copy_to_cpu().astype('bool')
 
-            mask = np.logical_and(
-                np.logical_and(words != self.word_pad_index,
-                               words != self.word_bos_index),
-                words != self.word_eos_index, )
-            arc_preds, rel_preds = decode(s_arc, s_rel, mask, self.tree)
+            arc_preds, rel_preds = decode(arc_preds, rel_preds, s_arc, mask, self.tree)
 
             arcs.extend([arc_pred[m] for arc_pred, m in zip(arc_preds, mask)])
             rels.extend([rel_pred[m] for rel_pred, m in zip(rel_preds, mask)])
@@ -458,16 +455,13 @@ def probability(s_arc, arc_preds):
     return arc_probs
 
 
-def decode(s_arc, s_rel, mask, tree=True):
-
-    lens = np.sum(mask.astype(int), axis=-1)
-    arc_preds = np.argmax(s_arc, axis=-1)
+def decode(arc_preds, rel_preds, s_arc, mask, tree):
+    """decode"""
+    lens = np.sum(mask, -1)
 
     bad = [not istree(seq[:i + 1]) for i, seq in zip(lens, arc_preds)]
     if tree and any(bad):
         arc_preds[bad] = eisner(s_arc[bad], mask[bad])
-
-    rel_preds = np.argmax(s_rel, axis=-1)
     rel_preds = [
         rel_pred[np.arange(len(arc_pred)), arc_pred]
         for arc_pred, rel_pred in zip(arc_preds, rel_preds)
@@ -704,4 +698,4 @@ def inorder_traversal(self, node):
 
 def istree(sequence):
     """Is the sequence a project tree"""
-    return DepTree(sequence).judge_legal()
+    return DepTree(sequence).judge_legal()
diff --git a/paddlenlp/taskflow/models/dependency_parsing_model.py b/paddlenlp/taskflow/models/dependency_parsing_model.py
@@ -25,11 +25,13 @@ def __init__(self,
                  n_rels,
                  n_words,
                  pad_index,
+                 bos_index,
                  eos_index,
                  n_mlp_arc=500,
                  n_mlp_rel=100):
         super(BiAffineParser, self).__init__()
         self.pad_index = pad_index
+        self.bos_index = bos_index
         self.eos_index = eos_index
 
         if encoding_model == "lstm-pe":
@@ -70,7 +72,14 @@ def forward(self, words, wp):
         s_arc_mask = paddle.unsqueeze(mask, 1)
         s_arc = s_arc * s_arc_mask + paddle.scale(
             paddle.cast(s_arc_mask, 'int32'), scale=1e5, bias=-1, bias_after_scale=False)
-        return s_arc, s_rel, words
+
+        mask = paddle.cast(paddle.logical_and(
+            paddle.logical_and(words != self.pad_index, words != self.bos_index),
+            words != self.eos_index,
+            ), 'int32')
+        arc_preds = paddle.argmax(s_arc, axis=-1)
+        rel_preds = paddle.argmax(s_rel, axis=-1)    
+        return arc_preds, rel_preds, s_arc, mask
         
 
 class MLP(nn.Layer):
@@ -236,5 +245,4 @@ def index_sample(x, index):
         out = paddle.reshape(out, shape=[x_s[0], x_s[1], -1])
     else:
         out = paddle.reshape(out, shape=[x_s[0], -1])
-    return out
-
+    return out