NVIDIA · tbartley94 · Feb 20, 2026 · Feb 17, 2026 · Feb 17, 2026 · Feb 19, 2026
diff --git a/nemo_text_processing/inverse_text_normalization/ko/data/whitelist.tsv b/nemo_text_processing/inverse_text_normalization/ko/data/whitelist.tsv
@@ -0,0 +1,35 @@
+박사	Dr.
+박사	dr.
+씨	Mr.
+씨	mr.
+양	Ms.
+양	ms.
+여사	Mrs.
+여사	mrs.
+산	mt.
+산	Mt.
+교수	Prof.
+교수	prof.
+시니어	sr.
+시니어	Sr.
+주니어	jr.
+주니어	Jr.
+대로	Ave.
+대로	ave.
+번호	no.
+번호	No.
+왼쪽 괄호	(
+오른쪽 괄호	)
+더하기	+
+마이너스	-
+시그마	Σ
+에타	η
+카파	κ
+오메가	ω
+시그마	σ
+알파	α
+뉴	ν
+델타	δ
+이오타	ι
+박사학위	Ph.D.
+등	etc.
diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/date.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/date.py
@@ -34,27 +34,25 @@ def __init__(self, cardinal: GraphFst):
         cardinal = cardinal.just_cardinals
         month = pynini.string_file(get_abs_path("data/months.tsv"))
 
-        spacing = pynini.closure(pynini.accep(NEMO_SPACE), 0, 1)
-
         year_suffix = pynini.cross("년", "")
         month_suffix = pynini.cross("월", "")
         day_suffix = pynini.cross("일", "")
 
-        year_component = (
-            pynutil.insert("year: \"") + cardinal + pynini.closure(year_suffix, 0, 1) + pynutil.insert("\"")
-        )
+        delete_space = pynini.closure(pynutil.delete(NEMO_SPACE), 0, 1)
+        between_fields = delete_space + pynutil.insert(NEMO_SPACE)
 
-        month_component = (
-            pynutil.insert("month: \"") + spacing + month + pynini.closure(month_suffix, 0, 1) + pynutil.insert("\"")
-        )
+        year_component = pynutil.insert("year: \"") + cardinal + year_suffix + pynutil.insert("\"")
+        month_component = pynutil.insert("month: \"") + month + month_suffix + pynutil.insert("\"")
+        day_component = pynutil.insert("day: \"") + cardinal + day_suffix + pynutil.insert("\"")
 
-        day_component = pynutil.insert("day: \"") + spacing + cardinal + day_suffix + spacing + pynutil.insert("\"")
+        graph_component = year_component | month_component
 
-        graph_component = year_component | month_component | day_component
         graph_date = (
-            pynini.closure(year_component, 0, 1)
-            + pynini.closure((pynutil.insert(NEMO_SPACE)) + month_component, 0, 1)
-            + pynini.closure((pynutil.insert(NEMO_SPACE)) + day_component, 0, 1)
+            year_component
+            | month_component
+            | (year_component + between_fields + month_component)
+            | (month_component + between_fields + day_component)
+            | (year_component + between_fields + month_component + between_fields + day_component)
         )
 
         final_graph = graph_component | graph_date

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/decimal.py
@@ -41,7 +41,7 @@ def __init__(self, cardinal: GraphFst):
         cardinals = cardinal.just_cardinals
         graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
         graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
-        decimal_part = pynini.closure(graph_zero | graph_digit)
+        decimal_part = pynini.closure(graph_zero | graph_digit, 1)
 
         decimal_point = pynutil.delete("점")
         integer_part = pynutil.insert("integer_part: \"") + cardinals + pynutil.insert("\"")
@@ -55,7 +55,7 @@ def __init__(self, cardinal: GraphFst):
         )  # If decimal is used to express big numbers like  15000 -> "1.5만"
 
         self.decimal = graph_decimal_regular | graph_deicimal_larger
-        self.just_decimal = cardinals + pynini.cross("점", ".") + decimal_part
+        self.just_decimal = cardinals | (cardinals + pynini.cross("점", ".") + decimal_part)
 
         graph_sign = (
             pynutil.insert("negative: \"") + (pynini.cross("마이너스", "-") | pynini.accep("-")) + pynutil.insert("\"")

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/money.py
@@ -28,17 +28,16 @@ class MoneyFst(GraphFst):
         cardinal: CardinalFst
     """
 
-    def __init__(self, cardinal: GraphFst):
+    def __init__(self, cardinal: GraphFst, decimal: GraphFst):
         super().__init__(name="money", kind="classify")
 
         cardinals = cardinal.just_cardinals
+        decimals = decimal.just_decimal
         currency = pynini.string_file(get_abs_path("data/currency.tsv"))
 
         # Accepting space if there are one between integer and currency
         spacing = pynini.closure(pynini.accep(NEMO_SPACE), 0, 1)
-
-        graph_integer = pynutil.insert("integer_part: \"") + cardinals + pynutil.insert("\"") + spacing
-
+        graph_integer = pynutil.insert('integer_part: "') + decimals + pynutil.insert('"') + spacing
         graph_unit = pynutil.insert(" currency: \"") + currency + pynutil.insert("\"")
 
         graph_final = graph_integer + graph_unit

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/time.py
@@ -73,27 +73,25 @@ def __init__(self):
             pynutil.insert("seconds: \"") + (graph_0_to_59 + spacing + second_suffix) + pynutil.insert("\"")
         )
 
-        hour = pynini.closure(hour_component, 0, 1)
-        minute = pynini.closure(delete_space + minute_component, 0, 1)
-        second = pynini.closure(delete_space + second_component, 0, 1)
+        hm_opt = pynini.closure(delete_space + minute_component, 0, 1)
+        hs_opt = pynini.closure(delete_space + second_component, 0, 1)
 
-        graph_regular = hour + minute + second
+        hms = hour_component + hm_opt + hs_opt
+        ms = minute_component + pynini.closure(delete_space + second_component, 0, 1)
+        s_only = second_component
+
+        graph_regular = pynini.union(hms, ms, s_only).optimize()
 
         # 오전 = AM, 오후 = PM
-        prefix_words = (
-            pynini.union(
-                (pynini.accep("오전")), (pynini.accep("오후")), (pynini.accep("새벽")), (pynini.accep("아침"))
-            )
-            + spacing
-        )
-        prefix_tag = pynutil.insert("prefix: \"") + prefix_words + pynutil.insert("\"")
+        ampm_words = pynini.union("오전", "오후", "새벽", "아침")
+        ampm_tag = pynutil.insert('suffix: "') + ampm_words + pynutil.insert('"')
 
         # 전 = before, 후 = after
         suffix_words = pynini.accep("전") | pynini.accep("후")
         suffix_tag = pynutil.insert("suffix: \"") + suffix_words + pynutil.insert("\"")
 
         time_graph = (
-            pynini.closure(delete_space + prefix_tag, 0, 1)
+            pynini.closure(delete_space + ampm_tag, 0, 1)
             + graph_regular
             + pynini.closure(delete_space + suffix_tag, 0, 1)
         )

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/tokenize_and_classify.py
@@ -18,7 +18,14 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.ko.graph_utils import INPUT_LOWER_CASED, GraphFst, generator_main
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import (
+    INPUT_LOWER_CASED,
+    NEMO_WHITE_SPACE,
+    GraphFst,
+    delete_extra_space,
+    delete_space,
+    generator_main,
+)
 from nemo_text_processing.inverse_text_normalization.ko.taggers.cardinal import CardinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.date import DateFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.decimal import DecimalFst
@@ -28,6 +35,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.taggers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.telephone import TelephoneFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.time import TimeFst
+from nemo_text_processing.inverse_text_normalization.ko.taggers.whitelist import WhiteListFst
 from nemo_text_processing.inverse_text_normalization.ko.taggers.word import WordFst
 
 
@@ -56,7 +64,7 @@ def __init__(
         far_file = None
         if cache_dir is not None and cache_dir != "None":
             os.makedirs(cache_dir, exist_ok=True)
-            far_file = os.path.join(cache_dir, f"jp_itn_{input_case}.far")
+            far_file = os.path.join(cache_dir, f"ko_itn_{input_case}_tokenize.far")
         if not overwrite_cache and far_file and os.path.exists(far_file):
             self.fst = pynini.Far(far_file, mode="r")["tokenize_and_classify"]
             logging.info(f"ClassifyFst.fst was restored from {far_file}.")
@@ -81,7 +89,7 @@ def __init__(
             date = DateFst(cardinal)
             date_graph = date.fst
 
-            money = MoneyFst(cardinal)
+            money = MoneyFst(cardinal, decimal)
             money_graph = money.fst
 
             telephone = TelephoneFst()
@@ -91,6 +99,7 @@ def __init__(
             measure_graph = measure.fst
 
             word_graph = WordFst().fst
+            whitelist_graph = WhiteListFst().fst
 
             classify = (
                 pynutil.add_weight(cardinal_graph, 1.1)
@@ -103,12 +112,15 @@ def __init__(
                 | pynutil.add_weight(telephone_graph, 1.1)
                 | pynutil.add_weight(measure_graph, 1.1)
                 | pynutil.add_weight(word_graph, 100)
+                | pynutil.add_weight(whitelist_graph, 1.01)
             )
 
-            token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
-            tagger = pynini.closure(token, 1)
+            token = pynutil.insert("tokens { ") + classify + pynutil.insert(" }")
+            space = NEMO_WHITE_SPACE @ delete_extra_space
+            space_opt = pynini.closure(space, 0, 1)
 
-            self.fst = tagger
+            graph = delete_space + token + pynini.closure(space_opt + token) + delete_space
+            self.fst = graph.optimize()
 
             if far_file:
                 generator_main(far_file, {"tokenize_and_classify": self.fst})

diff --git a/nemo_text_processing/inverse_text_normalization/ko/taggers/whitelist.py b/nemo_text_processing/inverse_text_normalization/ko/taggers/whitelist.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import GraphFst
+from nemo_text_processing.inverse_text_normalization.ko.utils import get_abs_path
+
+
+class WhiteListFst(GraphFst):
+    """
+    Finite state transducer for classifying whitelisted tokens
+    This class has highest priority among all classifier grammars. Whitelisted tokens are defined and loaded from "data/whitelist.tsv".
+    """
+
+    def __init__(self):
+        super().__init__(name="whitelist", kind="classify")
+
+        whitelist = pynini.string_file(get_abs_path("data/whitelist.tsv"))
+        graph = (pynutil.insert('name: "')) + (whitelist) + pynutil.insert('"')
+
+        self.fst = graph.optimize()
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/measure.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/measure.py
@@ -68,11 +68,13 @@ def __init__(self):
         graph_fraction = (
             pynutil.delete("fraction {")
             + delete_space
-            + pynutil.delete('denominator: "')
+            + optional_sign
+            + delete_space
+            + pynutil.delete('numerator: "')
             + measurement
             + pynutil.delete('"')
             + delete_space
-            + pynutil.delete('numerator: "')
+            + pynutil.delete('denominator: "')
             + pynutil.insert("/")
             + measurement
             + pynutil.delete('"')

diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/verbalize.py
@@ -25,6 +25,7 @@
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.telephone import TelephoneFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.time import TimeFst
+from nemo_text_processing.inverse_text_normalization.ko.verbalizers.whitelist import WhiteListFst
 from nemo_text_processing.inverse_text_normalization.ko.verbalizers.word import WordFst
 
 
@@ -67,6 +68,8 @@ def __init__(self):
         word = WordFst()
         word_graph = word.fst
 
+        whitelist_graph = WhiteListFst().fst
+
         graph = pynini.union(
             cardinal_graph,
             ordinal_graph,
@@ -78,5 +81,6 @@ def __init__(self):
             telephone_graph,
             measure_graph,
             word_graph,
+            whitelist_graph,
         )
         self.fst = graph
diff --git a/nemo_text_processing/inverse_text_normalization/ko/verbalizers/whitelist.py b/nemo_text_processing/inverse_text_normalization/ko/verbalizers/whitelist.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.ko.graph_utils import NEMO_NOT_QUOTE, GraphFst
+
+
+class WhiteListFst(GraphFst):
+    '''
+    tokens { whitelist: "ATM" } -> A T M
+    '''
+
+    def __init__(self, deterministic: bool = True, lm: bool = False):
+        super().__init__(name="whitelist", kind="verbalize", deterministic=deterministic)
+
+        whitelist = pynutil.delete("name: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
+        graph = whitelist
+        self.fst = graph.optimize()
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_date.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_date.txt
@@ -1,16 +1,15 @@
 이천이십사년팔월이십팔일~2024년8월28일
-이천이십삼년 구월 오일~2023년 9월 5일
+이천이십삼년 구월 오일~2023년9월5일
 천구백구십구년십이월삼십일일~1999년12월31일
-이천년 이월 이십구일~2000년 2월 29일
+이천년 이월 이십구일~2000년2월29일
 이천십년시월십일~2010년10월10일
 이천이십일년유월십육일~2021년6월16일
 이천삼십년삼월십사일~2030년3월14일
-천구백팔십팔년 오월 이십일~1988년 5월 20일
-이천일년 칠월 구일~2001년 7월 9일
+천구백팔십팔년 오월 이십일~1988년5월20일
+이천일년 칠월 구일~2001년7월9일
 이천십팔년사월삼십일~2018년4월30일
 삼천년팔월십오일~3000년8월15일
-이천구년 일월이십일~2009년 1월20일
+이천구년 일월이십일~2009년1월20일
 이천삼십오년~2035년
 오월~5월
-이십사일~24일
 구천구백구십구년삼월일일~9999년3월1일
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_ordinal.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_ordinal.txt
@@ -14,12 +14,4 @@
 사십번째~40번째
 오십번째~50번째
 오십삼번째~53번째
-백번째~100번째
-한개~1개
-한마리~1마리
-열병~10병
-스물한송이~21송이
-사십그루~40그루
-여섯사람~6사람
-열다섯장~15장
-서른일곱권~37권
+백번째~100번째
diff --git a/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_time.txt b/tests/nemo_text_processing/ko/data_inverse_text_normalization/test_cases_time.txt
@@ -6,8 +6,8 @@
 열두시 반~12:30
 두시 오초~2:00:05
 두시 삼십분 오초~2:30:05
-오전두시~오전2:00
-오후네시반~오후4:30
+오전 두시~오전 2:00
+오후 네시 반~오후 4:30
 두시전~2:00 전
 두시십분후~2:10 후
 한시 십오분 삼십초~1:15:30