From ffab339a10cabba7bae505109a6545f7b8401a38 Mon Sep 17 00:00:00 2001 From: Amit Moryossef Date: Tue, 28 Apr 2026 09:19:49 +0000 Subject: [PATCH 1/2] Add Miyazaki et al. 2024 (Gloss Pair Encoding) to Text-to-Gloss Co-Authored-By: Claude Opus 4.7 (1M context) --- src/index.md | 2 ++ src/references.bib | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/src/index.md b/src/index.md index 7835f8db..cb6cd3b5 100644 --- a/src/index.md +++ b/src/index.md @@ -725,6 +725,8 @@ complexity of the model. Testing their model on the RWTH-PHOENIX-Weather-2014T [@cihan2018neural], they demonstrated that injecting this additional information results in better translation quality. +@miyazaki-etal-2024-sign propose Gloss Pair Encoding (GPE), a BPE-inspired tokenization that merges frequently co-occurring gloss pairs (e.g., "book"+"building" $\to$ "library") into single tokens to match granularity between spoken and signed languages, and show on a Japanese-JSL news corpus that combining GPE-augmented and original training data improves text-to-gloss translation while merging non-manual markers (head-nods, pointing) hurts performance. + --- #### Video-to-Text diff --git a/src/references.bib b/src/references.bib index 5d2b8280..15f60c06 100644 --- a/src/references.bib +++ b/src/references.bib @@ -4554,6 +4554,14 @@ @inproceedings{roh-etal-2024-preprocessing Hwang, Eui Jun and Cho, Sukmin and Park, Jong C.", +} + +@inproceedings{miyazaki-etal-2024-sign, + title = "Sign Language Translation with Gloss Pair Encoding", + author = "Miyazaki, Taro and + Tan, Sihan and + Uchida, Tsubasa and + Kaneko, Hiroyuki", editor = "Efthimiou, Eleni and Fotinea, Stavroula-Evita and Hanke, Thomas and @@ -4572,3 +4580,7 @@ @inproceedings{roh-etal-2024-preprocessing url = "https://aclanthology.org/2024.signlang-1.36/", pages = "323--334" } + + url = "https://aclanthology.org/2024.signlang-1.29/", + pages = "262--268" +} From f5b7ef9bb1d57f1461a5c06dad157939ea4e050f Mon Sep 17 00:00:00 2001 From: AmitMY Date: Tue, 28 Apr 2026 09:55:40 +0000 Subject: [PATCH 2/2] Simplify miyazaki-etal one-liner (review pattern: concise) --- src/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/index.md b/src/index.md index cb6cd3b5..fbdfbfa9 100644 --- a/src/index.md +++ b/src/index.md @@ -725,7 +725,7 @@ complexity of the model. Testing their model on the RWTH-PHOENIX-Weather-2014T [@cihan2018neural], they demonstrated that injecting this additional information results in better translation quality. -@miyazaki-etal-2024-sign propose Gloss Pair Encoding (GPE), a BPE-inspired tokenization that merges frequently co-occurring gloss pairs (e.g., "book"+"building" $\to$ "library") into single tokens to match granularity between spoken and signed languages, and show on a Japanese-JSL news corpus that combining GPE-augmented and original training data improves text-to-gloss translation while merging non-manual markers (head-nods, pointing) hurts performance. +@miyazaki-etal-2024-sign propose Gloss Pair Encoding (GPE), a BPE-inspired tokenization that merges frequent gloss pairs into single tokens to better match granularity between spoken and signed languages, and show that combining GPE-augmented data with the original training data improves text-to-gloss translation on a Japanese-JSL corpus. ---