wangxicoding
diff --git a/‎examples/language_model/gpt/export_model.py‎
Lines changed: 4 additions & 1 deletion b/‎examples/language_model/gpt/export_model.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎examples/language_model/gpt/predict.py‎
Lines changed: 4 additions & 2 deletions b/‎examples/language_model/gpt/predict.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎examples/text_to_knowledge/ernie-ctm/predictor.py‎
Lines changed: 0 additions & 1 deletion b/‎examples/text_to_knowledge/ernie-ctm/predictor.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/text_to_knowledge/ernie-ctm/predictor.py‎
Lines changed: 461 additions & 0 deletions b/‎examples/text_to_knowledge/ernie-ctm/predictor.py‎
Lines changed: 461 additions & 0 deletions
diff --git a/‎examples/text_to_knowledge/wordtag/README.md‎
Lines changed: 11 additions & 3 deletions b/‎examples/text_to_knowledge/wordtag/README.md‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎examples/text_to_knowledge/wordtag/predict.py‎
Lines changed: 7 additions & 6 deletions b/‎examples/text_to_knowledge/wordtag/predict.py‎
Lines changed: 7 additions & 6 deletions
@@ -58,8 +58,11 @@ def main():
     model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
 
     # Suild model and load trained parameters
-    model = model_class.from_pretrained(args.model_path, max_predict_len=32)
     tokenizer = tokenizer_class.from_pretrained(args.model_path)
+    model = model_class.from_pretrained(
+        args.model_path,
+        max_predict_len=32,
+        eol_token_id=self.tokenizer.eol_token_id)
     # Switch to eval model
     model.eval()
     # Convert to static graph with specific input description
 
@@ -40,7 +40,9 @@ def __init__(self,
         self.tokenizer = tokenizer_class.from_pretrained(model_name_or_path)
         logger.info('Loading the model parameters, please wait...')
         self.model = model_class.from_pretrained(
-            model_name_or_path, max_predict_len=max_predict_len)
+            model_name_or_path,
+            max_predict_len=max_predict_len,
+            eol_token_id=self.tokenizer.eol_token_id)
         self.model.eval()
         logger.info('Model loaded.')
 
@@ -49,7 +51,7 @@ def predict(self, text):
         ids = self.tokenizer(text)["input_ids"]
         input_ids = paddle.to_tensor(
             np.array(ids).reshape(1, -1).astype('int64'))
-        out = self.model(input_ids, self.tokenizer.eol_token_id)
+        out = self.model(input_ids)
         out = [int(x) for x in out.numpy().reshape([-1])]
         logger.info(self.tokenizer.convert_ids_to_string(out))
 
 
@@ -54,9 +54,9 @@ Term-Linking示例程序可以对无标签数据可以启动模型预测, 例如
 
 执行下面的脚本即可快速获取上面两段文本的百科知识树链接的结果
 
-```bash
-from paddlenlp.taskflow import TaskFlow
-task = TaskFlow("text2knowledge", model="wordtag")
+```python
+from paddlenlp import Taskflow
+task = Taskflow("text2knowledge", model="wordtag")
 task(["热梅茶是一道以梅子为主要原料制作的茶饮",
       "《孤女》是2010年九州出版社出版的小说，作者是余兼羽"])
 # Support the input text directly
@@ -70,6 +70,14 @@ task("热梅茶是一道以梅子为主要原料制作的茶饮")
 {'text': '热梅茶是一道以梅子为主要原料制作的茶饮', 'items': [{'item': '热梅茶', 'offset': 0, 'wordtag_label': '饮食类_饮品', 'length': 3}, {'item': '是', 'offset': 3, 'wordtag_label': '肯定词', 'length': 1, 'termid': '肯定否定词_cb_是'}, {'item': '一道', 'offset': 4, 'wordtag_label': '数量词', 'length': 2}, {'item': '以', 'offset': 6, 'wordtag_label': '介词', 'length': 1, 'termid': '介词_cb_以'}, {'item': '梅子', 'offset': 7, 'wordtag_label': '饮食类', 'length': 2, 'termid': '饮食_cb_梅'}, {'item': '为', 'offset': 9, 'wordtag_label': '肯定词', 'length': 1, 'termid': '肯定否定词_cb_为'}, {'item': '主要原料', 'offset': 10, 'wordtag_label': '物体类', 'length': 4, 'termid': '物品_cb_主要原料'}, {'item': '制作', 'offset': 14, 'wordtag_label': '场景事件', 'length': 2, 'termid': '场景事件_cb_制作'}, {'item': '的', 'offset': 16, 'wordtag_label': '助词', 'length': 1, 'termid': '助词_cb_的'}, {'item': '茶饮', 'offset': 17, 'wordtag_label': '饮食类_饮品', 'length': 2, 'termid': '饮品_cb_茶饮'}]}
 ```
 
+同时我们也提供了基于上述taskflow的python执行脚本，具体的执行方式如下：
+```shell
+python predict.py --max_seq_len 128 --batch_size 2
+```
+其中参数释义如下：
+- `max_seq_len` 表示最大句子长度，超过该长度将被截断。
+- `batch_size` 表示每个预测批次的样本数目。
+
 ## WordTag后续计划
 
 1. 持续优化知识标注模型，获得更加精准的标注结果；
 
@@ -16,15 +16,13 @@
 import argparse
 
 import paddle
-
-from predictor import WordtagPredictor
+from paddlenlp import Taskflow
 
 
 def parse_args():
     parser = argparse.ArgumentParser()
 
     # yapf: disable
-    parser.add_argument("--data_dir", default="./data", type=str, help="The input data dir, should contain [train/test].json and [train/test]_metrics.json .")
     parser.add_argument("--max_seq_len", default=128, type=int, help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.", )
     parser.add_argument("--batch_size", default=32, type=int, help="Batch size per GPU/CPU for training.", )
     parser.add_argument("--device", default="gpu", type=str, choices=["cpu", "gpu", "xpu"] ,help="The device to select to train the model, is must be cpu/gpu/xpu.")
@@ -36,10 +34,13 @@ def parse_args():
 
 def do_predict(args):
     paddle.set_device(args.device)
-    predictor = WordtagPredictor(term_linking=True)
+    wordtag = Taskflow(
+        "text2knowledge",
+        model="wordtag",
+        batch_size=args.batch_size,
+        max_seq_length=args.max_seq_len)
     txts = ["《孤女》是2010年九州出版社出版的小说，作者是余兼羽。", "热梅茶是一道以梅子为主要原料制作的茶饮"]
-
-    res = predictor.run(txts)
+    res = wordtag(txts)
     print(res)