From 513fff52d51376f397794c4ad9b38ed4042cfafc Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Wed, 27 Nov 2024 15:17:49 +0530
Subject: [PATCH 01/28] Addition of whitelist and word classes

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 .../hi/data/whitelist/whitelist.tsv               | 13 +++++++++++++
 .../hi/data/whitelist/whitelist_fraction.tsv      |  3 ---
 .../hi/data/whitelist/whitelist_time.tsv          |  2 --
 .../hi/taggers/tokenize_and_classify.py           |  6 +++---
 .../hi/taggers/whitelist.py                       |  2 +-
 .../hi/verbalizers/verbalize.py                   |  4 +++-
 .../test_cases_whitelist.txt                      | 12 ++++++++++++
 .../test_cases_word.txt                           | 15 +++++++++++++++
 ...test_sparrowhawk_inverse_text_normalization.sh | 10 ++++++++++
 tests/nemo_text_processing/hi/test_whitelist.py   |  9 +++++++++
 tests/nemo_text_processing/hi/test_word.py        | 10 ++++++++++
 tools/text_processing_deployment/pynini_export.py |  2 +-
 12 files changed, 77 insertions(+), 11 deletions(-)
 create mode 100644 nemo_text_processing/inverse_text_normalization/hi/data/whitelist/whitelist.tsv
 delete mode 100644 nemo_text_processing/inverse_text_normalization/hi/data/whitelist/whitelist_fraction.tsv
 delete mode 100644 nemo_text_processing/inverse_text_normalization/hi/data/whitelist/whitelist_time.tsv
 create mode 100644 tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_whitelist.txt
 create mode 100644 tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_word.txt

diff --git a/nemo_text_processing/inverse_text_normalization/hi/data/whitelist/whitelist.tsv b/nemo_text_processing/inverse_text_normalization/hi/data/whitelist/whitelist.tsv
new file mode 100644
index 000000000..f9eb081b9
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/hi/data/whitelist/whitelist.tsv
@@ -0,0 +1,13 @@
+१/४	पाव
+१/२	आधा
+३/४	पौन
+१:३०	डेढ़ बजे
+२:३०	ढाई बजे
+१.५	डेढ़
+२.५	ढाई
+कु.	कुमारी
+स्मि.	श्रीमती
+श्री.	श्री
+श्री.	श्रीमान
+मा.	मास्टर
+डॉ.	डॉक्टर
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/hi/data/whitelist/whitelist_fraction.tsv b/nemo_text_processing/inverse_text_normalization/hi/data/whitelist/whitelist_fraction.tsv
deleted file mode 100644
index d3596a955..000000000
--- a/nemo_text_processing/inverse_text_normalization/hi/data/whitelist/whitelist_fraction.tsv
+++ /dev/null
@@ -1,3 +0,0 @@
-१/४	पाव
-१/२	आधा 
-३/४	पौन
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/hi/data/whitelist/whitelist_time.tsv b/nemo_text_processing/inverse_text_normalization/hi/data/whitelist/whitelist_time.tsv
deleted file mode 100644
index aaf5baf8b..000000000
--- a/nemo_text_processing/inverse_text_normalization/hi/data/whitelist/whitelist_time.tsv
+++ /dev/null
@@ -1,2 +0,0 @@
-१:३०	डेढ़
-२:३०	ढाई
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py
index 5267da2bb..9c8168aa0 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py
@@ -35,7 +35,7 @@
 from nemo_text_processing.inverse_text_normalization.hi.taggers.punctuation import PunctuationFst
 from nemo_text_processing.inverse_text_normalization.hi.taggers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.hi.taggers.word import WordFst
-from nemo_text_processing.inverse_text_normalization.hi.verbalizers.whitelist import WhiteListFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.whitelist import WhiteListFst
 
 
 class ClassifyFst(GraphFst):
@@ -83,7 +83,7 @@ def __init__(
             money = MoneyFst(cardinal, decimal)
             money_graph = money.fst
             punct_graph = PunctuationFst().fst
-            # whitelist_graph = WhiteListFst(input_file=whitelist).fst
+            whitelist_graph = WhiteListFst().fst
             word_graph = WordFst().fst
 
             classify = (
@@ -96,7 +96,7 @@ def __init__(
                 | pynutil.add_weight(measure_graph, 1.1)
                 | pynutil.add_weight(money_graph, 1.1)
                 | pynutil.add_weight(word_graph, 100)
-                # |  pynutil.add_weight(whitelist_graph, 1.01)
+                | pynutil.add_weight(whitelist_graph, 1.01)
             )
 
             punct = pynutil.insert("tokens { ") + pynutil.add_weight(punct_graph, weight=1.1) + pynutil.insert(" }")
diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/whitelist.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/whitelist.py
index 2d522c4ba..caeab03b1 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/whitelist.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/whitelist.py
@@ -47,7 +47,7 @@ def __init__(self, input_case: str = INPUT_LOWER_CASED, input_file: str = None):
         super().__init__(name="whitelist", kind="classify")
 
         if input_file is None:
-            input_file = get_abs_path("data/whitelist.tsv")
+            input_file = get_abs_path("data/whitelist/whitelist.tsv")
 
         if not os.path.exists(input_file):
             raise ValueError(f"Whitelist file {input_file} not found")
diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py
index b6f9bd70a..7aaef4fc3 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py
@@ -22,6 +22,7 @@
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.time import TimeFst
+from nemo_text_processing.inverse_text_normalization.hi.verbalizers.word import WordFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.whitelist import WhiteListFst
 
 
@@ -44,12 +45,13 @@ def __init__(self):
         time_graph = TimeFst().fst
         measure_graph = MeasureFst(cardinal, decimal).fst
         money_graph = MoneyFst(cardinal, decimal).fst
-
+        word_graph = WordFst().fst
         whitelist_graph = WhiteListFst().fst
 
         graph = (
             cardinal_graph
             | whitelist_graph
+            | word_graph
             | ordinal_graph
             | decimal_graph
             | fraction_graph
diff --git a/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_whitelist.txt b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_whitelist.txt
new file mode 100644
index 000000000..30824fced
--- /dev/null
+++ b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_whitelist.txt
@@ -0,0 +1,12 @@
+डेढ़ बजे~१:३०
+ढाई बजे~२:३०
+मास्टर निखिल तनिष~मा. निखिल तनिष
+पाव~१/४
+श्रीमती ज्योत्सना~स्मि. ज्योत्सना
+डॉक्टर~डॉ.
+आधा कप चाय~१/२ कप चाय
+श्रीमान भारत कुमार~श्री. भारत कुमार
+डॉक्टर प्रशांत~डॉ. प्रशांत
+डेढ़~१.५
+कुमारी~कु.
+ढाई~२.५
\ No newline at end of file
diff --git a/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_word.txt b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_word.txt
new file mode 100644
index 000000000..ce044e7cf
--- /dev/null
+++ b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_word.txt
@@ -0,0 +1,15 @@
+नींद~नींद
+याहू!~याहू!
+-~-
+आआआ~आआआ
+आकाशगंगा~आकाशगंगा
+लटरपटर~लटरपटर
+कच्चा-पक्का~कच्चा-पक्का
+गुब्बारा~गुब्बारा
+चिट्ठी~चिट्ठी
+ढूंढना~ढूंढना
+लोहे का!~लोहे का!
+टाटा~टाटा
+~
+झ~झ
+संगीत~संगीत
\ No newline at end of file
diff --git a/tests/nemo_text_processing/hi/test_sparrowhawk_inverse_text_normalization.sh b/tests/nemo_text_processing/hi/test_sparrowhawk_inverse_text_normalization.sh
index 61093c60d..aec7299d5 100644
--- a/tests/nemo_text_processing/hi/test_sparrowhawk_inverse_text_normalization.sh
+++ b/tests/nemo_text_processing/hi/test_sparrowhawk_inverse_text_normalization.sh
@@ -63,6 +63,16 @@ testITNMoney() {
   runtest $input
 }
 
+testITNWord() {
+  input=$PROJECT_DIR/hi/data_inverse_text_normalization/test_cases_word.txt
+  runtest $input
+}
+
+testITNWhiteList() {
+  input=$PROJECT_DIR/hi/data_inverse_text_normalization/test_cases_whitelist.txt
+  runtest $input
+}
+
 
 # Load shUnit2
 . $PROJECT_DIR/../shunit2/shunit2
diff --git a/tests/nemo_text_processing/hi/test_whitelist.py b/tests/nemo_text_processing/hi/test_whitelist.py
index 4a090d823..c6a228e6e 100644
--- a/tests/nemo_text_processing/hi/test_whitelist.py
+++ b/tests/nemo_text_processing/hi/test_whitelist.py
@@ -15,6 +15,7 @@
 import pytest
 from parameterized import parameterized
 
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
 from nemo_text_processing.text_normalization.normalize import Normalizer
 
 from ..utils import CACHE_DIR, parse_test_case_file
@@ -24,6 +25,7 @@ class TestWhitelist:
     normalizer = Normalizer(
         input_case='cased', lang='hi', cache_dir=CACHE_DIR, overwrite_cache=False, post_process=False
     )
+    inverse_normalizer = InverseNormalizer(lang='hi', cache_dir=CACHE_DIR, overwrite_cache=False)
 
     @parameterized.expand(parse_test_case_file('hi/data_text_normalization/test_cases_whitelist.txt'))
     @pytest.mark.run_only_on('CPU')
@@ -31,3 +33,10 @@ class TestWhitelist:
     def test_norm(self, test_input, expected):
         pred = self.normalizer.normalize(test_input, verbose=False)
         assert pred.strip() == expected.strip()
+        
+    @parameterized.expand(parse_test_case_file('hi/data_inverse_text_normalization/test_cases_whitelist.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_denorm(self, test_input, expected):
+        pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
+        assert pred == expected
diff --git a/tests/nemo_text_processing/hi/test_word.py b/tests/nemo_text_processing/hi/test_word.py
index 4d6bd2261..30d809356 100644
--- a/tests/nemo_text_processing/hi/test_word.py
+++ b/tests/nemo_text_processing/hi/test_word.py
@@ -16,6 +16,7 @@
 from parameterized import parameterized
 
 from nemo_text_processing.text_normalization.normalize import Normalizer
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
 
 from ..utils import CACHE_DIR, parse_test_case_file
 
@@ -24,6 +25,8 @@ class TestWord:
     normalizer = Normalizer(
         input_case='cased', lang='hi', cache_dir=CACHE_DIR, overwrite_cache=False, post_process=True
     )
+    inverse_normalizer = InverseNormalizer(lang='hi', cache_dir=CACHE_DIR, overwrite_cache=False)
+
 
     @parameterized.expand(parse_test_case_file('hi/data_text_normalization/test_cases_word.txt'))
     @pytest.mark.run_only_on('CPU')
@@ -31,3 +34,10 @@ class TestWord:
     def test_norm(self, test_input, expected):
         pred = self.normalizer.normalize(test_input, verbose=False, punct_post_process=True)
         assert pred == expected
+        
+    @parameterized.expand(parse_test_case_file('hi/data_inverse_text_normalization/test_cases_word.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_denorm(self, test_input, expected):
+        pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
+        assert pred == expected
diff --git a/tools/text_processing_deployment/pynini_export.py b/tools/text_processing_deployment/pynini_export.py
index 23b1f7deb..6b82dfbec 100644
--- a/tools/text_processing_deployment/pynini_export.py
+++ b/tools/text_processing_deployment/pynini_export.py
@@ -21,7 +21,7 @@
 
 import pynini
 
-from nemo_text_processing.text_normalization.rw.graph_utils import generator_main
+from nemo_text_processing.text_normalization.en.graph_utils import generator_main
 
 # This script exports compiled grammars inside nemo_text_processing into OpenFst finite state archive files
 # tokenize_and_classify.far and verbalize.far for production purposes

From 535af69bb96d376cadcd9e8f03eebddc4afc3c06 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 27 Nov 2024 09:52:07 +0000
Subject: [PATCH 02/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../hi/taggers/tokenize_and_classify.py                      | 2 +-
 .../inverse_text_normalization/hi/verbalizers/verbalize.py   | 2 +-
 tests/nemo_text_processing/hi/test_whitelist.py              | 2 +-
 tests/nemo_text_processing/hi/test_word.py                   | 5 ++---
 4 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py
index 9c8168aa0..a5a371d90 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py
@@ -34,8 +34,8 @@
 from nemo_text_processing.inverse_text_normalization.hi.taggers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.hi.taggers.punctuation import PunctuationFst
 from nemo_text_processing.inverse_text_normalization.hi.taggers.time import TimeFst
-from nemo_text_processing.inverse_text_normalization.hi.taggers.word import WordFst
 from nemo_text_processing.inverse_text_normalization.hi.taggers.whitelist import WhiteListFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.word import WordFst
 
 
 class ClassifyFst(GraphFst):
diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py
index 7aaef4fc3..d88bd25d9 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py
@@ -22,8 +22,8 @@
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.time import TimeFst
-from nemo_text_processing.inverse_text_normalization.hi.verbalizers.word import WordFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.whitelist import WhiteListFst
+from nemo_text_processing.inverse_text_normalization.hi.verbalizers.word import WordFst
 
 
 class VerbalizeFst(GraphFst):
diff --git a/tests/nemo_text_processing/hi/test_whitelist.py b/tests/nemo_text_processing/hi/test_whitelist.py
index c6a228e6e..1e45e6a0e 100644
--- a/tests/nemo_text_processing/hi/test_whitelist.py
+++ b/tests/nemo_text_processing/hi/test_whitelist.py
@@ -33,7 +33,7 @@ class TestWhitelist:
     def test_norm(self, test_input, expected):
         pred = self.normalizer.normalize(test_input, verbose=False)
         assert pred.strip() == expected.strip()
-        
+
     @parameterized.expand(parse_test_case_file('hi/data_inverse_text_normalization/test_cases_whitelist.txt'))
     @pytest.mark.run_only_on('CPU')
     @pytest.mark.unit
diff --git a/tests/nemo_text_processing/hi/test_word.py b/tests/nemo_text_processing/hi/test_word.py
index 30d809356..6fc5883cc 100644
--- a/tests/nemo_text_processing/hi/test_word.py
+++ b/tests/nemo_text_processing/hi/test_word.py
@@ -15,8 +15,8 @@
 import pytest
 from parameterized import parameterized
 
-from nemo_text_processing.text_normalization.normalize import Normalizer
 from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
 
 from ..utils import CACHE_DIR, parse_test_case_file
 
@@ -27,14 +27,13 @@ class TestWord:
     )
     inverse_normalizer = InverseNormalizer(lang='hi', cache_dir=CACHE_DIR, overwrite_cache=False)
 
-
     @parameterized.expand(parse_test_case_file('hi/data_text_normalization/test_cases_word.txt'))
     @pytest.mark.run_only_on('CPU')
     @pytest.mark.unit
     def test_norm(self, test_input, expected):
         pred = self.normalizer.normalize(test_input, verbose=False, punct_post_process=True)
         assert pred == expected
-        
+
     @parameterized.expand(parse_test_case_file('hi/data_inverse_text_normalization/test_cases_word.txt'))
     @pytest.mark.run_only_on('CPU')
     @pytest.mark.unit

From d4e380fdc8a65a8c9847575b991cf80e6873a50c Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Wed, 27 Nov 2024 16:43:57 +0530
Subject: [PATCH 03/28] Updation of Jenkins date

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index e9cfcde12..fe6a75161 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -27,7 +27,7 @@ pipeline {
     HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
     MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
-    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-13-24-0'
+    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-27-24-0'
     DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {

From 60f87577e4418f35a06d313166864b8575873d6d Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Thu, 28 Nov 2024 15:23:19 +0530
Subject: [PATCH 04/28] Cleanup

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index fe6a75161..63fb1a01b 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -27,7 +27,7 @@ pipeline {
     HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
     MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
-    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-27-24-0'
+    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-28-24-0'
     DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {

From 9aa85c0cf927a7459d0e9ee00c91c109c1df7dc8 Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Fri, 29 Nov 2024 15:49:43 +0530
Subject: [PATCH 05/28] Updation

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 63fb1a01b..40dd4d626 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -27,7 +27,7 @@ pipeline {
     HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
     MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
-    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-28-24-0'
+    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-29-24-0'
     DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {

From bf6ebe3c5f5fd8841af8e0176abbbfc8b1116b23 Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Fri, 29 Nov 2024 17:30:48 +0530
Subject: [PATCH 06/28] Updation

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 40dd4d626..4883d7169 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -27,7 +27,7 @@ pipeline {
     HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
     MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
-    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-29-24-0'
+    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-29-24-1'
     DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {

From ba19d36ced05b630b88a4a9c404b01a0a5208442 Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Thu, 12 Dec 2024 15:54:29 +0530
Subject: [PATCH 07/28] Future implementations for date

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 .../hi/data/date/century.tsv                  |  3 ++
 .../hi/taggers/date.py                        | 48 ++++++++++++-----
 .../hi/verbalizers/date.py                    | 52 ++++++++++++++-----
 3 files changed, 76 insertions(+), 27 deletions(-)
 create mode 100644 nemo_text_processing/inverse_text_normalization/hi/data/date/century.tsv

diff --git a/nemo_text_processing/inverse_text_normalization/hi/data/date/century.tsv b/nemo_text_processing/inverse_text_normalization/hi/data/date/century.tsv
new file mode 100644
index 000000000..bd188a059
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/hi/data/date/century.tsv
@@ -0,0 +1,3 @@
+ई. पू.	ईसा पूर्व
+ई.	ईस्वी
+ई.	ईसवी
diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
index 61183ae72..f5c10ad7c 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 import pynini
 from pynini.lib import pynutil
-
+ 
 from nemo_text_processing.inverse_text_normalization.hi.graph_utils import (
     NEMO_HI_DIGIT,
     GraphFst,
@@ -22,9 +22,9 @@
     delete_space,
     insert_space,
 )
-from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path
-
-
+from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path, apply_fst
+ 
+ 
 class DateFst(GraphFst):
     """
         Finite state transducer for classifying date, 
@@ -34,22 +34,25 @@ class DateFst(GraphFst):
         cardinal: CardinalFst
         date: DateFst
     """
-
+ 
     def __init__(self, cardinal: GraphFst):
         super().__init__(name="date", kind="classify")
-
+ 
         graph_year = pynutil.add_weight(
             pynini.compose(cardinal.graph_no_exception, pynini.closure(NEMO_HI_DIGIT, 1, 4)), 0.03
         )
-
+ 
         month_graph = pynini.string_file(get_abs_path("data/date/months.tsv"))
         graph_date_days = pynini.string_file(get_abs_path("data/date/date_days.tsv")).invert()
-
+        graph_century = pynini.string_file(get_abs_path("data/date/century.tsv")).invert()
+ 
+ 
         self.day = pynutil.insert("day: \"") + graph_date_days + pynutil.insert("\" ")
         self.month = pynutil.insert("month: \"") + month_graph + pynutil.insert("\" ")
         self.year = pynutil.insert("year: \"") + graph_year + pynutil.insert("\" ")
+        self.century = pynutil.insert("text: \"") + graph_century + pynutil.insert("\" ")
         insert_comma = pynutil.insert(", ")
-
+ 
         graph_day_month = self.day + delete_space + self.month
         graph_month_day = self.month + delete_space + self.day
         graph_month_day += pynutil.insert(" preserve_order: true")
@@ -58,9 +61,28 @@ def __init__(self, cardinal: GraphFst):
         graph_month_day_year += pynutil.insert(" preserve_order: true")
         graph_month_year = self.month + delete_space + self.year
         graph_saal = self.year
-
-        graph = graph_day_month | graph_month_day | graph_day_month_year | graph_month_day_year | graph_month_year
-        self.graph = graph.optimize()
-
+        graph_AD_BC = self.year + delete_space + self.century
+        graph_day_month_year_century = self.day + delete_space + self.month + delete_space + self.year + delete_space + self.century
+        graph_month_year_century = self.month + delete_space + self.year + delete_space + self.century
+        graph_year_range = self.year + delete_space + pynutil.delete("से") + delete_space + self.year
+ 
+ 
+        graph = graph_day_month | graph_month_day | graph_day_month_year | graph_month_day_year | graph_month_year | graph_saal | graph_AD_BC | graph_day_month_year_century | graph_month_year_century | graph_year_range
         final_graph = self.add_tokens(graph)
         self.fst = final_graph
+
+#from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
+#cardinal = CardinalFst()
+#date = DateFst(cardinal)
+#input_text = "पच्चीस मार्च दो हज़ार दस"
+#input_text = "छ: मार्च उन्नीस सौ नब्बे"
+#input_text = "छ: मार्च उन्नीस सौ नब्बे ईस्वी"
+#input_text = "छह मार्च दो हज़ार दस"
+#input_text = "तीन फ़रवरी"
+#input_text = "चौवालीस सौ ईसा पूर्व"
+#input_text = "फ़रवरी चौवालीस सौ ईसा पूर्व"
+#input_text = "चौवालीस सौ ईस्वी"
+#input_text = "उन्नीस सौ बीस से उन्नीस सौ छब्बीस"
+#input_text = "उन्नीस सौ बीस से छब्बीस"
+#output = apply_fst(input_text, date.fst)
+#print(output)
diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py
index 5442777da..235c2ccb1 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py
@@ -11,25 +11,27 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+ 
 import pynini
 from pynini.lib import pynutil
-
+ 
 from nemo_text_processing.inverse_text_normalization.hi.graph_utils import (
     NEMO_NOT_QUOTE,
     GraphFst,
     delete_extra_space,
     delete_space,
 )
-
-
+ 
+from nemo_text_processing.inverse_text_normalization.hi.utils import apply_fst
+ 
+ 
 class DateFst(GraphFst):
     """
     Finite state transducer for verbalizing date, e.g.
         date { month: "जनवरी" day: "५" year: "२०१२" preserve_order: true } -> जनवरी ५ २०१२
         date { day: "५" month: "जनवरी" year: "२०१२" preserve_order: true } -> ५ जनवरी २०१२
     """
-
+ 
     def __init__(self):
         super().__init__(name="date", kind="verbalize")
         month = (
@@ -61,22 +63,32 @@ def __init__(self):
             + pynini.closure(NEMO_NOT_QUOTE, 1)
             + pynutil.delete("\"")
         )
-        graph_fy = period + delete_space + year
+        graph_fy = year
+        graph_fy |= period + delete_space + year
+        #century
+        graph_century = year + delete_extra_space + period
         # month (day) year
         graph_mdy = month + delete_extra_space + day + pynutil.insert(",") + delete_extra_space + year
-
+ 
         # (day) month year
         graph_dmy = day + delete_extra_space + month + pynutil.insert(",") + delete_extra_space + year
-
+        # day month year century
+        graph_dmyc = day + delete_extra_space + month + pynutil.insert(",") + delete_extra_space + year + delete_extra_space + period
+ 
         # month year
         graph_my = month + pynini.closure(delete_extra_space + year, 0, 1)
-
+        # month year century
+        graph_myc = month + pynutil.insert(",") + delete_extra_space + year + delete_extra_space + period
+ 
         # month day
         graph_md = month + pynini.closure(delete_extra_space + day, 0, 1)
-
+ 
         # day month
         graph_dm = day + pynini.closure(delete_extra_space + month, 0, 1)
-
+        # date range
+        graph_year_range = year + delete_extra_space + pynutil.insert("-") + delete_extra_space + year
+ 
+ 
         optional_preserve_order = pynini.closure(
             pynutil.delete("preserve_order:") + delete_space + pynutil.delete("true") + delete_space
             | pynutil.delete("field_order:")
@@ -86,12 +98,24 @@ def __init__(self):
             + pynutil.delete("\"")
             + delete_space
         )
-
+ 
         final_graph = (
-            (graph_fy | graph_mdy | graph_dmy | graph_my | graph_md | graph_dm)
+            (graph_fy | graph_mdy | graph_dmy | graph_my | graph_md | graph_dm | graph_century | graph_dmyc | graph_myc | graph_year_range)
             + delete_space
             + optional_preserve_order
         )
-
+ 
         delete_tokens = self.delete_tokens(final_graph)
         self.fst = delete_tokens.optimize()
+date = DateFst()
+#input_text = 'date { period: "सन " year: "२०१९"  }'
+#input_text = 'date { day: "१७"month: "अप्रैल"year: "२००२" }'
+#input_text = 'date { day: "२५" month: "मार्च" year: "२०१०"  }'
+#input_text = 'date { day: "१७" month: "अक्टूबर" year: "२०१९"  }'
+#input_text = 'date { year: "४४००"  }'
+#input_text = 'date { year: "४४००" text: "ईस्वी" }'
+#input_text = 'date { year: "४४००" text: "ई. पू."  }'
+#input_text = 'date { day: "२५" month: "मार्च" year: "२०१०" text: "ई. पू." }'
+input_text = 'date { year: "१९२०" year: "१९२६" }'
+output = apply_fst(input_text, date.fst)
+print(output)

From 6452e610eb75c968528b08435f4e762900650570 Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Mon, 16 Dec 2024 16:12:52 +0530
Subject: [PATCH 08/28] pushing rough date code for ref

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 .../hi/taggers/date.py                        | 22 ++++++++++--------
 .../hi/verbalizers/date.py                    | 23 +++++++++++++------
 .../test_cases_date.txt                       | 10 +++++++-
 3 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
index f5c10ad7c..51c4d1298 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
@@ -49,7 +49,7 @@ def __init__(self, cardinal: GraphFst):
  
         self.day = pynutil.insert("day: \"") + graph_date_days + pynutil.insert("\" ")
         self.month = pynutil.insert("month: \"") + month_graph + pynutil.insert("\" ")
-        self.year = pynutil.insert("year: \"") + graph_year + pynutil.insert("\" ")
+        self.year = pynutil.insert("year: \"") + graph_year + delete_space + pynini.cross("से", "-") + delete_space +  graph_year + delete_space + pynutil.insert("\" ")
         self.century = pynutil.insert("text: \"") + graph_century + pynutil.insert("\" ")
         insert_comma = pynutil.insert(", ")
  
@@ -64,16 +64,16 @@ def __init__(self, cardinal: GraphFst):
         graph_AD_BC = self.year + delete_space + self.century
         graph_day_month_year_century = self.day + delete_space + self.month + delete_space + self.year + delete_space + self.century
         graph_month_year_century = self.month + delete_space + self.year + delete_space + self.century
-        graph_year_range = self.year + delete_space + pynutil.delete("से") + delete_space + self.year
+        graph_year_range = self.year
+        graph_date_exceptions = self.month + delete_space + pynutil.delete("की") + delete_space + self.day
  
- 
-        graph = graph_day_month | graph_month_day | graph_day_month_year | graph_month_day_year | graph_month_year | graph_saal | graph_AD_BC | graph_day_month_year_century | graph_month_year_century | graph_year_range
+        graph = graph_day_month | graph_month_day | graph_day_month_year | graph_month_day_year | graph_month_year | graph_saal | graph_AD_BC | graph_day_month_year_century | graph_month_year_century | graph_year_range | graph_date_exceptions
         final_graph = self.add_tokens(graph)
         self.fst = final_graph
 
-#from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
-#cardinal = CardinalFst()
-#date = DateFst(cardinal)
+from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
+cardinal = CardinalFst()
+date = DateFst(cardinal)
 #input_text = "पच्चीस मार्च दो हज़ार दस"
 #input_text = "छ: मार्च उन्नीस सौ नब्बे"
 #input_text = "छ: मार्च उन्नीस सौ नब्बे ईस्वी"
@@ -81,8 +81,10 @@ def __init__(self, cardinal: GraphFst):
 #input_text = "तीन फ़रवरी"
 #input_text = "चौवालीस सौ ईसा पूर्व"
 #input_text = "फ़रवरी चौवालीस सौ ईसा पूर्व"
-#input_text = "चौवालीस सौ ईस्वी"
+input_text = "चौवालीस सौ ईस्वी"
 #input_text = "उन्नीस सौ बीस से उन्नीस सौ छब्बीस"
 #input_text = "उन्नीस सौ बीस से छब्बीस"
-#output = apply_fst(input_text, date.fst)
-#print(output)
+#input_text = "मार्च की दो"
+#input_text = "फ़रवरी की बीस"
+output = apply_fst(input_text, date.fst)
+print(output)
diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py
index 235c2ccb1..922bcf049 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py
@@ -65,18 +65,22 @@ def __init__(self):
         )
         graph_fy = year
         graph_fy |= period + delete_space + year
+        
         #century
         graph_century = year + delete_extra_space + period
+        
         # month (day) year
         graph_mdy = month + delete_extra_space + day + pynutil.insert(",") + delete_extra_space + year
  
         # (day) month year
         graph_dmy = day + delete_extra_space + month + pynutil.insert(",") + delete_extra_space + year
+        
         # day month year century
         graph_dmyc = day + delete_extra_space + month + pynutil.insert(",") + delete_extra_space + year + delete_extra_space + period
  
         # month year
         graph_my = month + pynini.closure(delete_extra_space + year, 0, 1)
+        
         # month year century
         graph_myc = month + pynutil.insert(",") + delete_extra_space + year + delete_extra_space + period
  
@@ -85,9 +89,12 @@ def __init__(self):
  
         # day month
         graph_dm = day + pynini.closure(delete_extra_space + month, 0, 1)
-        # date range
-        graph_year_range = year + delete_extra_space + pynutil.insert("-") + delete_extra_space + year
- 
+        
+        # year range
+        graph_year_range = year
+        
+        # date exceptions
+        #graph_date_exceptions = day + delete_extra_space + pynutil.insert("की") + delete_extra_space + month
  
         optional_preserve_order = pynini.closure(
             pynutil.delete("preserve_order:") + delete_space + pynutil.delete("true") + delete_space
@@ -107,7 +114,8 @@ def __init__(self):
  
         delete_tokens = self.delete_tokens(final_graph)
         self.fst = delete_tokens.optimize()
-date = DateFst()
+        
+#date = DateFst()
 #input_text = 'date { period: "सन " year: "२०१९"  }'
 #input_text = 'date { day: "१७"month: "अप्रैल"year: "२००२" }'
 #input_text = 'date { day: "२५" month: "मार्च" year: "२०१०"  }'
@@ -116,6 +124,7 @@ def __init__(self):
 #input_text = 'date { year: "४४००" text: "ईस्वी" }'
 #input_text = 'date { year: "४४००" text: "ई. पू."  }'
 #input_text = 'date { day: "२५" month: "मार्च" year: "२०१०" text: "ई. पू." }'
-input_text = 'date { year: "१९२०" year: "१९२६" }'
-output = apply_fst(input_text, date.fst)
-print(output)
+#input_text = 'date { year: "१९२०-२६" }'
+#input_text = 'date { month: "फ़रवरी" day: "२०" }'
+#output = apply_fst(input_text, date.fst)
+#print(output)
diff --git a/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_date.txt b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_date.txt
index bdc450fdd..96d5cbadf 100644
--- a/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_date.txt
+++ b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_date.txt
@@ -22,4 +22,12 @@
 सत्ताईस जुलाई दो हज़ार ग्यारह~२७ जुलाई, २०११
 जुलाई सत्ताईस~जुलाई २७
 वर्ष दो हज़ार उन्नीस~वर्ष २०१९
-सन उन्नीस सौ नब्बे~सन १९९०
\ No newline at end of file
+सन उन्नीस सौ नब्बे~सन १९९०
+उन्नीस सौ नब्बे से उन्नीस सौ इक्यानबे~१९९०-१९९१
+दो हज़ार पाँच से दो हज़ार उन्नीस~२००५-२०१९
+दो हज़ार पाँच से उन्नीस~२००५-१९
+चौंतीस सौ ईसा पूर्व~३४०० ई. पू.
+उन्नीस सौ बीस ईस्वी~१९२० ई.
+पच्चीस जनवरी अठारह सौ तिरेपन ईसवी~२५ जनवरी, १८५३ ई.
+इकत्तीस मई उन्नीस सौ नब्बे ईसवी~३१ मई, १९९० ई.
+पच्चीस ईसा पूर्व~२५ ई.पू.

From 3821339e20d72763723ec4bea8abd4ed95638105 Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Thu, 9 Jan 2025 14:32:34 +0530
Subject: [PATCH 09/28] Future implementations date.py

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 .../hi/data/date/century.tsv                  |  2 +-
 .../hi/taggers/date.py                        | 33 +++++--------------
 .../hi/verbalizers/date.py                    | 23 ++-----------
 .../test_cases_date.txt                       |  4 ++-
 tools/text_processing_deployment/Dockerfile   | 13 +++++---
 5 files changed, 23 insertions(+), 52 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/data/date/century.tsv b/nemo_text_processing/inverse_text_normalization/hi/data/date/century.tsv
index bd188a059..da69e23eb 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/data/date/century.tsv
+++ b/nemo_text_processing/inverse_text_normalization/hi/data/date/century.tsv
@@ -1,3 +1,3 @@
-ई. पू.	ईसा पूर्व
+ई.पू.	ईसा पूर्व
 ई.	ईस्वी
 ई.	ईसवी
diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
index 51c4d1298..d3fb48eca 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
@@ -22,7 +22,7 @@
     delete_space,
     insert_space,
 )
-from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path, apply_fst
+from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path
  
  
 class DateFst(GraphFst):
@@ -46,10 +46,10 @@ def __init__(self, cardinal: GraphFst):
         graph_date_days = pynini.string_file(get_abs_path("data/date/date_days.tsv")).invert()
         graph_century = pynini.string_file(get_abs_path("data/date/century.tsv")).invert()
  
- 
         self.day = pynutil.insert("day: \"") + graph_date_days + pynutil.insert("\" ")
         self.month = pynutil.insert("month: \"") + month_graph + pynutil.insert("\" ")
-        self.year = pynutil.insert("year: \"") + graph_year + delete_space + pynini.cross("से", "-") + delete_space +  graph_year + delete_space + pynutil.insert("\" ")
+        self.year = pynutil.insert("year: \"") + graph_year + pynutil.insert("\" ")
+        self.year_range = pynutil.insert("year: \"") + graph_year + delete_space + pynini.cross("से", "-") + delete_space +  graph_year + delete_space + pynutil.insert("\" ")
         self.century = pynutil.insert("text: \"") + graph_century + pynutil.insert("\" ")
         insert_comma = pynutil.insert(", ")
  
@@ -64,27 +64,12 @@ def __init__(self, cardinal: GraphFst):
         graph_AD_BC = self.year + delete_space + self.century
         graph_day_month_year_century = self.day + delete_space + self.month + delete_space + self.year + delete_space + self.century
         graph_month_year_century = self.month + delete_space + self.year + delete_space + self.century
-        graph_year_range = self.year
+        graph_year_range = self.year_range
+
         graph_date_exceptions = self.month + delete_space + pynutil.delete("की") + delete_space + self.day
- 
-        graph = graph_day_month | graph_month_day | graph_day_month_year | graph_month_day_year | graph_month_year | graph_saal | graph_AD_BC | graph_day_month_year_century | graph_month_year_century | graph_year_range | graph_date_exceptions
+        graph_date_exceptions += pynutil.insert("preserve_order: true")
+
+        
+        graph = (graph_day_month | graph_month_day | graph_day_month_year | graph_month_day_year | graph_month_year | graph_saal | graph_AD_BC | graph_day_month_year_century | graph_month_year_century | graph_year_range | graph_date_exceptions)
         final_graph = self.add_tokens(graph)
         self.fst = final_graph
-
-from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
-cardinal = CardinalFst()
-date = DateFst(cardinal)
-#input_text = "पच्चीस मार्च दो हज़ार दस"
-#input_text = "छ: मार्च उन्नीस सौ नब्बे"
-#input_text = "छ: मार्च उन्नीस सौ नब्बे ईस्वी"
-#input_text = "छह मार्च दो हज़ार दस"
-#input_text = "तीन फ़रवरी"
-#input_text = "चौवालीस सौ ईसा पूर्व"
-#input_text = "फ़रवरी चौवालीस सौ ईसा पूर्व"
-input_text = "चौवालीस सौ ईस्वी"
-#input_text = "उन्नीस सौ बीस से उन्नीस सौ छब्बीस"
-#input_text = "उन्नीस सौ बीस से छब्बीस"
-#input_text = "मार्च की दो"
-#input_text = "फ़रवरी की बीस"
-output = apply_fst(input_text, date.fst)
-print(output)
diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py
index 922bcf049..1945f9e5c 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py
@@ -21,9 +21,7 @@
     delete_extra_space,
     delete_space,
 )
- 
-from nemo_text_processing.inverse_text_normalization.hi.utils import apply_fst
- 
+  
  
 class DateFst(GraphFst):
     """
@@ -93,9 +91,6 @@ def __init__(self):
         # year range
         graph_year_range = year
         
-        # date exceptions
-        #graph_date_exceptions = day + delete_extra_space + pynutil.insert("की") + delete_extra_space + month
- 
         optional_preserve_order = pynini.closure(
             pynutil.delete("preserve_order:") + delete_space + pynutil.delete("true") + delete_space
             | pynutil.delete("field_order:")
@@ -105,7 +100,7 @@ def __init__(self):
             + pynutil.delete("\"")
             + delete_space
         )
- 
+
         final_graph = (
             (graph_fy | graph_mdy | graph_dmy | graph_my | graph_md | graph_dm | graph_century | graph_dmyc | graph_myc | graph_year_range)
             + delete_space
@@ -114,17 +109,3 @@ def __init__(self):
  
         delete_tokens = self.delete_tokens(final_graph)
         self.fst = delete_tokens.optimize()
-        
-#date = DateFst()
-#input_text = 'date { period: "सन " year: "२०१९"  }'
-#input_text = 'date { day: "१७"month: "अप्रैल"year: "२००२" }'
-#input_text = 'date { day: "२५" month: "मार्च" year: "२०१०"  }'
-#input_text = 'date { day: "१७" month: "अक्टूबर" year: "२०१९"  }'
-#input_text = 'date { year: "४४००"  }'
-#input_text = 'date { year: "४४००" text: "ईस्वी" }'
-#input_text = 'date { year: "४४००" text: "ई. पू."  }'
-#input_text = 'date { day: "२५" month: "मार्च" year: "२०१०" text: "ई. पू." }'
-#input_text = 'date { year: "१९२०-२६" }'
-#input_text = 'date { month: "फ़रवरी" day: "२०" }'
-#output = apply_fst(input_text, date.fst)
-#print(output)
diff --git a/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_date.txt b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_date.txt
index 96d5cbadf..6d570a9c5 100644
--- a/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_date.txt
+++ b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_date.txt
@@ -26,8 +26,10 @@
 उन्नीस सौ नब्बे से उन्नीस सौ इक्यानबे~१९९०-१९९१
 दो हज़ार पाँच से दो हज़ार उन्नीस~२००५-२०१९
 दो हज़ार पाँच से उन्नीस~२००५-१९
-चौंतीस सौ ईसा पूर्व~३४०० ई. पू.
+चौंतीस सौ ईसा पूर्व~३४०० ई.पू.
 उन्नीस सौ बीस ईस्वी~१९२० ई.
 पच्चीस जनवरी अठारह सौ तिरेपन ईसवी~२५ जनवरी, १८५३ ई.
 इकत्तीस मई उन्नीस सौ नब्बे ईसवी~३१ मई, १९९० ई.
 पच्चीस ईसा पूर्व~२५ ई.पू.
+मार्च की दो~मार्च २
+फ़रवरी की बीस~फ़रवरी २०
diff --git a/tools/text_processing_deployment/Dockerfile b/tools/text_processing_deployment/Dockerfile
index 22c2b8b92..be6fedcda 100644
--- a/tools/text_processing_deployment/Dockerfile
+++ b/tools/text_processing_deployment/Dockerfile
@@ -16,22 +16,25 @@
 # Dockerfile for C++ (inverse) text normalization backend Sparrowhawk https://github.com/google/sparrowhawk
 
 # set base image (host OS)
-FROM conda/miniconda3
+FROM continuumio/miniconda3 
+
 
 # set the working directory in the container
 WORKDIR /workspace
 
 # install dependencies
 RUN echo "deb http://archive.debian.org/debian stretch main contrib non-free" > /etc/apt/sources.list
+RUN apt-get update &&  apt-get upgrade -y &&   apt-get install -y --reinstall build-essential pkg-config  git make  wget
 RUN conda install conda-build -y
-RUN apt-get update &&     apt-get install -y --reinstall build-essential pkg-config &&     apt-get upgrade -y &&     apt-get install -y git &&     apt-get install make
+RUN conda install -c conda-forge thrax=1.3.4 -y
 RUN git clone https://github.com/google/re2 
 RUN cd re2 && git checkout tags/2022-02-01 && make && make install
-RUN apt-get install build-essential -y && apt-get install wget -y
 RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v2.5.0/protobuf-2.5.0.tar.gz
 RUN tar xzvf protobuf-2.5.0.tar.gz
 RUN cd protobuf-2.5.0 && ./configure && make && make install && ldconfig
-RUN conda install -c conda-forge thrax=1.3.4 -y
+RUN printf  "# Conda lib path \n/opt/conda/lib" > /etc/ld.so.conf.d/conda.so.conf
+ENV CPPFLAGS="-I/opt/conda/include"
+ENV LDFLAGS="-L/opt/conda/lib"
 RUN git clone https://github.com/anand-nv/sparrowhawk.git && cd sparrowhawk &&  git checkout nemo_tests &&   apt-get install -y autoconf &&     bash autoreconf && ./configure && make && make install && ldconfig
 RUN git clone https://github.com/kward/shunit2.git
-RUN echo "DONE"
\ No newline at end of file
+RUN echo "DONE"

From 6ece14bf3c3f0340df1b69768eaaca39e86f0557 Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Thu, 9 Jan 2025 14:35:13 +0530
Subject: [PATCH 10/28] Cleanup

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 .../inverse_text_normalization/hi/taggers/date.py                | 1 -
 1 file changed, 1 deletion(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
index d3fb48eca..e1070fd71 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
@@ -69,7 +69,6 @@ def __init__(self, cardinal: GraphFst):
         graph_date_exceptions = self.month + delete_space + pynutil.delete("की") + delete_space + self.day
         graph_date_exceptions += pynutil.insert("preserve_order: true")
 
-        
         graph = (graph_day_month | graph_month_day | graph_day_month_year | graph_month_day_year | graph_month_year | graph_saal | graph_AD_BC | graph_day_month_year_century | graph_month_year_century | graph_year_range | graph_date_exceptions)
         final_graph = self.add_tokens(graph)
         self.fst = final_graph

From 6ec714c8a394845c4ba275a9620d310d6529a412 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 9 Jan 2025 09:08:17 +0000
Subject: [PATCH 11/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../hi/taggers/date.py                        | 45 ++++++++++----
 .../hi/verbalizers/date.py                    | 58 +++++++++++++------
 2 files changed, 73 insertions(+), 30 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
index e1070fd71..6859f0834 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/date.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 import pynini
 from pynini.lib import pynutil
- 
+
 from nemo_text_processing.inverse_text_normalization.hi.graph_utils import (
     NEMO_HI_DIGIT,
     GraphFst,
@@ -23,8 +23,8 @@
     insert_space,
 )
 from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path
- 
- 
+
+
 class DateFst(GraphFst):
     """
         Finite state transducer for classifying date, 
@@ -34,25 +34,34 @@ class DateFst(GraphFst):
         cardinal: CardinalFst
         date: DateFst
     """
- 
+
     def __init__(self, cardinal: GraphFst):
         super().__init__(name="date", kind="classify")
- 
+
         graph_year = pynutil.add_weight(
             pynini.compose(cardinal.graph_no_exception, pynini.closure(NEMO_HI_DIGIT, 1, 4)), 0.03
         )
- 
+
         month_graph = pynini.string_file(get_abs_path("data/date/months.tsv"))
         graph_date_days = pynini.string_file(get_abs_path("data/date/date_days.tsv")).invert()
         graph_century = pynini.string_file(get_abs_path("data/date/century.tsv")).invert()
- 
+
         self.day = pynutil.insert("day: \"") + graph_date_days + pynutil.insert("\" ")
         self.month = pynutil.insert("month: \"") + month_graph + pynutil.insert("\" ")
         self.year = pynutil.insert("year: \"") + graph_year + pynutil.insert("\" ")
-        self.year_range = pynutil.insert("year: \"") + graph_year + delete_space + pynini.cross("से", "-") + delete_space +  graph_year + delete_space + pynutil.insert("\" ")
+        self.year_range = (
+            pynutil.insert("year: \"")
+            + graph_year
+            + delete_space
+            + pynini.cross("से", "-")
+            + delete_space
+            + graph_year
+            + delete_space
+            + pynutil.insert("\" ")
+        )
         self.century = pynutil.insert("text: \"") + graph_century + pynutil.insert("\" ")
         insert_comma = pynutil.insert(", ")
- 
+
         graph_day_month = self.day + delete_space + self.month
         graph_month_day = self.month + delete_space + self.day
         graph_month_day += pynutil.insert(" preserve_order: true")
@@ -62,13 +71,27 @@ def __init__(self, cardinal: GraphFst):
         graph_month_year = self.month + delete_space + self.year
         graph_saal = self.year
         graph_AD_BC = self.year + delete_space + self.century
-        graph_day_month_year_century = self.day + delete_space + self.month + delete_space + self.year + delete_space + self.century
+        graph_day_month_year_century = (
+            self.day + delete_space + self.month + delete_space + self.year + delete_space + self.century
+        )
         graph_month_year_century = self.month + delete_space + self.year + delete_space + self.century
         graph_year_range = self.year_range
 
         graph_date_exceptions = self.month + delete_space + pynutil.delete("की") + delete_space + self.day
         graph_date_exceptions += pynutil.insert("preserve_order: true")
 
-        graph = (graph_day_month | graph_month_day | graph_day_month_year | graph_month_day_year | graph_month_year | graph_saal | graph_AD_BC | graph_day_month_year_century | graph_month_year_century | graph_year_range | graph_date_exceptions)
+        graph = (
+            graph_day_month
+            | graph_month_day
+            | graph_day_month_year
+            | graph_month_day_year
+            | graph_month_year
+            | graph_saal
+            | graph_AD_BC
+            | graph_day_month_year_century
+            | graph_month_year_century
+            | graph_year_range
+            | graph_date_exceptions
+        )
         final_graph = self.add_tokens(graph)
         self.fst = final_graph
diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py
index 1945f9e5c..eacfb5765 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/date.py
@@ -11,25 +11,25 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
- 
+
 import pynini
 from pynini.lib import pynutil
- 
+
 from nemo_text_processing.inverse_text_normalization.hi.graph_utils import (
     NEMO_NOT_QUOTE,
     GraphFst,
     delete_extra_space,
     delete_space,
 )
-  
- 
+
+
 class DateFst(GraphFst):
     """
     Finite state transducer for verbalizing date, e.g.
         date { month: "जनवरी" day: "५" year: "२०१२" preserve_order: true } -> जनवरी ५ २०१२
         date { day: "५" month: "जनवरी" year: "२०१२" preserve_order: true } -> ५ जनवरी २०१२
     """
- 
+
     def __init__(self):
         super().__init__(name="date", kind="verbalize")
         month = (
@@ -63,34 +63,43 @@ def __init__(self):
         )
         graph_fy = year
         graph_fy |= period + delete_space + year
-        
-        #century
+
+        # century
         graph_century = year + delete_extra_space + period
-        
+
         # month (day) year
         graph_mdy = month + delete_extra_space + day + pynutil.insert(",") + delete_extra_space + year
- 
+
         # (day) month year
         graph_dmy = day + delete_extra_space + month + pynutil.insert(",") + delete_extra_space + year
-        
+
         # day month year century
-        graph_dmyc = day + delete_extra_space + month + pynutil.insert(",") + delete_extra_space + year + delete_extra_space + period
- 
+        graph_dmyc = (
+            day
+            + delete_extra_space
+            + month
+            + pynutil.insert(",")
+            + delete_extra_space
+            + year
+            + delete_extra_space
+            + period
+        )
+
         # month year
         graph_my = month + pynini.closure(delete_extra_space + year, 0, 1)
-        
+
         # month year century
         graph_myc = month + pynutil.insert(",") + delete_extra_space + year + delete_extra_space + period
- 
+
         # month day
         graph_md = month + pynini.closure(delete_extra_space + day, 0, 1)
- 
+
         # day month
         graph_dm = day + pynini.closure(delete_extra_space + month, 0, 1)
-        
+
         # year range
         graph_year_range = year
-        
+
         optional_preserve_order = pynini.closure(
             pynutil.delete("preserve_order:") + delete_space + pynutil.delete("true") + delete_space
             | pynutil.delete("field_order:")
@@ -102,10 +111,21 @@ def __init__(self):
         )
 
         final_graph = (
-            (graph_fy | graph_mdy | graph_dmy | graph_my | graph_md | graph_dm | graph_century | graph_dmyc | graph_myc | graph_year_range)
+            (
+                graph_fy
+                | graph_mdy
+                | graph_dmy
+                | graph_my
+                | graph_md
+                | graph_dm
+                | graph_century
+                | graph_dmyc
+                | graph_myc
+                | graph_year_range
+            )
             + delete_space
             + optional_preserve_order
         )
- 
+
         delete_tokens = self.delete_tokens(final_graph)
         self.fst = delete_tokens.optimize()

From 2adeee40617353c86cdb208f828c25a95f114a43 Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Wed, 15 Jan 2025 09:52:52 +0530
Subject: [PATCH 12/28] Updation of Jenkinsfile

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 6edad14a2..5e3916ce2 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -27,7 +27,7 @@ pipeline {
     HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
     MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
-    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/11-29-24-1'
+    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/01-15-25-1'
     DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {

From b5ede2f95817b7971af7b19a82eab2bebb2eaf79 Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Mon, 20 Jan 2025 12:19:53 +0530
Subject: [PATCH 13/28] Telephone.py-hindi itn

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 .../hi/data/telephone/eng_to_hindi_digit.tsv  | 10 ++
 .../telephone/teens_and_ties_eng_to_hin.tsv   | 90 +++++++++++++++++
 .../hi/taggers/telephone.py                   | 99 +++++++++++++++++++
 .../hi/taggers/tokenize_and_classify.py       |  4 +
 .../hi/verbalizers/telephone.py               | 73 ++++++++++++++
 .../hi/verbalizers/verbalize.py               |  3 +
 .../test_cases_telephone.txt                  | 25 +++++
 ..._sparrowhawk_inverse_text_normalization.sh |  5 +
 .../nemo_text_processing/hi/test_telephone.py | 31 ++++++
 9 files changed, 340 insertions(+)
 create mode 100644 nemo_text_processing/inverse_text_normalization/hi/data/telephone/eng_to_hindi_digit.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/hi/data/telephone/teens_and_ties_eng_to_hin.tsv
 create mode 100644 nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
 create mode 100644 nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
 create mode 100644 tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt
 create mode 100644 tests/nemo_text_processing/hi/test_telephone.py

diff --git a/nemo_text_processing/inverse_text_normalization/hi/data/telephone/eng_to_hindi_digit.tsv b/nemo_text_processing/inverse_text_normalization/hi/data/telephone/eng_to_hindi_digit.tsv
new file mode 100644
index 000000000..53c5e36cb
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/hi/data/telephone/eng_to_hindi_digit.tsv
@@ -0,0 +1,10 @@
+०	zero
+१	one
+२	two
+३	three
+४	four
+५	five
+६	six
+७	seven
+८	eight
+९	nine
diff --git a/nemo_text_processing/inverse_text_normalization/hi/data/telephone/teens_and_ties_eng_to_hin.tsv b/nemo_text_processing/inverse_text_normalization/hi/data/telephone/teens_and_ties_eng_to_hin.tsv
new file mode 100644
index 000000000..ac37b55f2
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/hi/data/telephone/teens_and_ties_eng_to_hin.tsv
@@ -0,0 +1,90 @@
+१०	ten
+११	eleven
+१२	twelve
+१३	thirteen
+१४	fourteen
+१५	fifteen
+१६	sixteen
+१७	seventeen
+१८	eighteen
+१९	nineteen
+२०	twenty
+२१	twenty one
+२२	twenty two
+२३	twenty three
+२४	twenty four
+२५	twenty five
+२६	twenty six
+२७	twenty seven
+२८	twenty eight
+२९	twenty nine
+३०	thirty
+३१	thirty one
+३२	thirty two
+३३	thirty three
+३४	thirty four
+३५	thirty five
+३६	thirty six
+३७	thirty seven
+३८	thirty eight
+३९	thirty nine
+४०	forty
+४१	forty one
+४२	forty two
+४३	forty three
+४४	forty four
+४५	forty five
+४६	forty six
+४७	forty seven
+४८	forty eight
+४९	forty nine
+५०	fifty
+५१	fifty one
+५२	fifty two
+५३	fifty three
+५४	fifty four
+५५	fifty five
+५६	fifty six
+५७	fifty seven
+५८	fifty eight
+५९	fifty nine
+६०	sixty
+६१	sixty one
+६२	sixty two
+६३	sixty three
+६४	sixty four
+६५	sixty five
+६६	sixty six
+६७	sixty seven
+६८	sixty eight
+६९	sixty nine
+७०	seventy
+७१	seventy one
+७२	seventy two
+७३	seventy three
+७४	seventy four
+७५	seventy five
+७६	seventy six
+७७	seventy seven
+७८	seventy eight
+७९	seventy nine
+८०	eighty
+८१	eighty one
+८२	eighty two
+८३	eighty three
+८४	eighty four
+८५	eighty five
+८६	eighty six
+८७	eighty seven
+८८	eighty eight
+८९	eighty nine
+९०	ninety
+९१	ninety one
+९२	ninety two
+९३	ninety three
+९४	ninety four
+९५	ninety five
+९६	ninety six
+९७	ninety seven
+९८	ninety eight
+९९	ninety nine
diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
new file mode 100644
index 000000000..63136e472
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
@@ -0,0 +1,99 @@
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.inverse_text_normalization.hi.graph_utils import (
+    NEMO_HI_DIGIT,
+    GraphFst,
+    delete_extra_space,
+    delete_space,
+    insert_space,
+)
+from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path, apply_fst
+
+
+class TelephoneFst(GraphFst):
+    """
+    Finite state transducer for classifying telephone numbers, e.g.
+    e.g. प्लस इक्यानवे नौ आठ सात छह पांच चार तीन दो एक शून्य => tokens { name: "+९१ ९८७६५ ४३२१०" }
+    
+    Args:
+        Cardinal: CardinalFst
+    """
+
+    def __init__(self, cardinal: GraphFst):
+        super().__init__(name="telephone", kind="classify")
+        
+        hindi_digit_graph = pynini.string_file(get_abs_path("data/numbers/digit.tsv")).invert()
+        hindi_digit_graph |= pynini.string_file(get_abs_path("data/numbers/zero.tsv")).invert()
+        
+        english_digit_graph = pynini.string_file(get_abs_path("data/telephone/eng_to_hindi_digit.tsv")).invert()
+        
+        country_code_graph_single_digits = pynini.string_file(get_abs_path("data/numbers/digit.tsv")).invert()
+        country_code_graph_single_digits |= pynini.string_file(get_abs_path("data/numbers/zero.tsv")).invert()
+        country_code_graph_single_digits |= pynini.string_file(get_abs_path("data/telephone/eng_to_hindi_digit.tsv")).invert()
+        
+        country_code_graph_double_digits = pynini.string_file(get_abs_path("data/numbers/teens_and_ties.tsv")).invert()
+        country_code_graph_double_digits |= pynini.string_file(get_abs_path("data/telephone/teens_and_ties_eng_to_hin.tsv")).invert()
+        
+        self.hindi_digit = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 9) + hindi_digit_graph + pynutil.insert("\" ")
+        self.english_digit = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 9) + english_digit_graph + delete_space + pynutil.insert("\" ")
+        
+        self.country_code_with_single_digits = pynutil.insert("country_code: \"") + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2) + pynutil.insert("\" ")
+        self.country_code_with_double_digits = pynutil.insert("country_code: \"") + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1) + pynutil.insert("\" ")
+        self.country_code = (self.country_code_with_single_digits | self.country_code_with_double_digits)
+        
+        self.city_code_with_single_digits = pynutil.insert("city_code: \"") + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2) + pynutil.insert("\" ")
+        self.city_code_with_double_digits = pynutil.insert("city_code: \"") + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1) + pynutil.insert("\" ")
+        self.city_code = (self.city_code_with_single_digits | self.city_code_with_double_digits)
+        
+        self.landline_hindi_digit = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 6) + hindi_digit_graph + pynutil.insert("\" ")
+        self.landline_english_digit = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 6) + english_digit_graph + pynutil.insert("\" ")
+        
+        delete_plus = pynini.union(
+            pynutil.delete("प्लस") | pynutil.delete("plus") | pynutil.delete("Plus") | pynutil.delete("PLUS")
+        )
+        
+        delete_zero = pynini.union(
+            pynutil.delete("शून्य") | pynutil.delete("zero") | pynutil.delete("Zero") | pynutil.delete("ZERO")
+        )
+        
+        graph_number_with_hindi_digit = delete_plus + delete_space + self.country_code + delete_space + self.hindi_digit
+        graph_number_with_english_digit = delete_plus + delete_space + self.country_code + self.english_digit
+        
+        graph_landline_with_hindi_digit = delete_zero + delete_space + self.city_code + delete_space + self.landline_hindi_digit
+        graph_landline_with_english_digit = delete_zero + delete_space + self.city_code + delete_space + self.landline_english_digit
+
+        graph = (graph_number_with_hindi_digit | graph_number_with_english_digit | graph_landline_with_hindi_digit | graph_landline_with_english_digit)
+        final_graph = self.add_tokens(graph)
+        self.fst = final_graph
+
+#from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
+#cardinal = CardinalFst()
+#telephone = TelephoneFst(cardinal)
+#input_text = "प्लस इक्यानवे nine four one one one two three four one two"
+#input_text = "प्लस इक्यानवे नौ आठ सात छह पांच चार तीन दो एक शून्य"
+#input_text = "plus nine eight zero nine four one one one two three four one"
+#input_text = "plus sixty two nine four one one one two three"
+#input_text = "प्लस  नौ एक नौ आठ सात छह पांच चार तीन दो एक शून्य"
+#input_text = 'Plus ninety one नौ सात छह चार एक zero five eight two three'
+#input_text = "plus eleven nine four one one one two three"
+#input_text = "zero eight zero two nine four one one one two" #landline example of bangalore
+#input_text = "zero eleven two nine four one one one two" #Delhi
+#input_text = "zero four zero two seven eight one eight three nine" #hyd
+#input_text = "शून्य सात नौ एक नौ आठ सात छह पांच चार"
+#output = apply_fst(input_text, telephone.fst)
+#print(output)
diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py
index a5a371d90..2fda42cc6 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py
@@ -34,6 +34,7 @@
 from nemo_text_processing.inverse_text_normalization.hi.taggers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.hi.taggers.punctuation import PunctuationFst
 from nemo_text_processing.inverse_text_normalization.hi.taggers.time import TimeFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.telephone import TelephoneFst
 from nemo_text_processing.inverse_text_normalization.hi.taggers.whitelist import WhiteListFst
 from nemo_text_processing.inverse_text_normalization.hi.taggers.word import WordFst
 
@@ -82,6 +83,8 @@ def __init__(
             measure_graph = measure.fst
             money = MoneyFst(cardinal, decimal)
             money_graph = money.fst
+            telephone = TelephoneFst(cardinal)
+            telephone_graph = telephone.fst
             punct_graph = PunctuationFst().fst
             whitelist_graph = WhiteListFst().fst
             word_graph = WordFst().fst
@@ -95,6 +98,7 @@ def __init__(
                 | pynutil.add_weight(time_graph, 1.1)
                 | pynutil.add_weight(measure_graph, 1.1)
                 | pynutil.add_weight(money_graph, 1.1)
+                | pynutil.add_weight(telephone_graph, 1.1)
                 | pynutil.add_weight(word_graph, 100)
                 | pynutil.add_weight(whitelist_graph, 1.01)
             )
diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
new file mode 100644
index 000000000..5a475414d
--- /dev/null
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2025 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.text_normalization.hi.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.hi.utils import apply_fst
+
+
+class TelephoneFst(GraphFst):
+    """
+    Finite state transducer for verbalizing telephone, e.g.
+        telephone { number_part: "123-123-5678" }
+        -> 123-123-5678
+    """
+
+    def __init__(self, cardinal: GraphFst):
+        super().__init__(name="telephone", kind="verbalize")
+
+        number_part = pynutil.delete("number_part: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
+        optional_country_code = pynini.closure(
+            pynutil.delete("country_code: \"")
+            + pynutil.insert("+")
+            + delete_space
+            + pynini.closure(NEMO_NOT_QUOTE, 1)
+            + pynutil.delete("\"")
+            + pynini.accep(" "),
+            0,
+            1,
+        )
+        optional_city_code = pynini.closure(
+            pynutil.delete("city_code: \"")
+            + pynutil.insert("०")
+            + delete_space
+            + pynini.closure(NEMO_NOT_QUOTE, 1)
+            + pynutil.delete("\"")
+            + pynini.accep(" "),
+            0,
+            1,
+        )
+        delete_tokens = self.delete_tokens(optional_country_code + number_part)
+        delete_tokens |= self.delete_tokens(optional_city_code + number_part)
+        self.fst = delete_tokens.optimize()
+        
+#from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
+#cardinal = CardinalFst()
+#telephone = TelephoneFst(cardinal)
+#input_text = 'telephone { country_code: "९१" number_part: "९८७६५४३२१०"  }'
+#input_text = 'telephone { country_code: "९१" number_part: "९४१११२३४१२"  }'
+#input_text = 'telephone { country_code: "९१" number_part: "९४२२२२२२२"  }'
+#input_text = 'telephone{ country_code: "९१" number_part: "११२३४५६७८९" }'
+#input_text = 'telephone{ country_code: "९१" number_part: "९८७६५४३२११" }'
+#input_text = 'telephone{ country_code: "९१" number_part: "९४५६७८९०१२" }'
+#input_text = 'telephone{ country_code: "९१" number_part: "९५६७८९०१२३" }'
+#input_text = 'telephone { city_code: "७९" number_part: "१९८७६५४"  }'
+#input_text = 'telephone { city_code: "४०" number_part: "२७८१८३९"  }'
+#input_text = 'telephone { city_code: "११" number_part: "२९४१११२"  }'
+#input_text = 'telephone { city_code: "८०" number_part: "२९४१११२"  }'
+#output = apply_fst(input_text, telephone.fst)
+#print(output)
diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py
index d88bd25d9..411b08863 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py
@@ -22,6 +22,7 @@
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.time import TimeFst
+from nemo_text_processing.inverse_text_normalization.hi.verbalizers.telephone import TelephoneFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.whitelist import WhiteListFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.word import WordFst
 
@@ -45,6 +46,7 @@ def __init__(self):
         time_graph = TimeFst().fst
         measure_graph = MeasureFst(cardinal, decimal).fst
         money_graph = MoneyFst(cardinal, decimal).fst
+        telephone_graph = TelephoneFst(cardinal).fst
         word_graph = WordFst().fst
         whitelist_graph = WhiteListFst().fst
 
@@ -59,5 +61,6 @@ def __init__(self):
             | time_graph
             | measure_graph
             | money_graph
+            | telephone_graph
         )
         self.fst = graph
diff --git a/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt
new file mode 100644
index 000000000..0c001b20f
--- /dev/null
+++ b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt
@@ -0,0 +1,25 @@
+प्लस इक्यानवे nine four one one one two three four one two~+९१ ९४१११२३४१२
+प्लस इक्यानवे नौ आठ सात छह पांच चार तीन दो एक शून्य~+९१ ९८७६५४३२१०
+plus nine eight nine four one one one two three four zero one~+९८ ९४१११२३४०१
+plus sixty two nine four one one one two three~+६२ ९४१११२३
+प्लस  नौ एक नौ आठ सात छह पांच चार तीन दो एक शून्य~+९१ ९८७६५४३२१०
+plus eleven nine four one one one two three~+११ ९४१११२३
+zero eleven nine four one one one two three~०११ ९४१११२३
+शून्य ग्यारह नौ चार एक एक एक दो तीन~०११ ९४१११२३
+zero eight zero two nine four one one one two~०८० २९४१११२
+शून्य आठ शून्य दो नौ चार एक एक एक दो~०८० २९४१११२
+zero eleven two nine four one one one two~०११ २९४१११२
+शून्य ग्यारह दो नौ चार एक एक एक दो~०११ २९४१११२
+zero four zero two seven eight one eight three nine~०४० २७८१८३९
+शून्य चार शून्य दो सात आठ एक आठ तीन नौ~०४० २७८१८३९
+शून्य सात नौ एक नौ आठ सात छह पांच चार~०७९ १९८७६५४
+प्लस  नौ एक नौ तीन आठ दो सात एक चार छह पांच शून्य~+९१ ९३८२७१४६५०
+प्लस  नौ एक नौ शून्य पांच एक तीन चार आठ दो सात छह~+९१ ९०५१३४८२७६
+प्लस  नौ एक नौ चार तीन सात दो शून्य पांच छह एक आठ~+९१ ९४३७२०५६१८
+PLUS ninety one nine three eight two seven one four six five zero~+९१ ९३८२७१४६५०
+plus nine one nine zero five one three four eight two seven six~+९१ ९०५१३४८२७६
+plus ninety one nine four three seven two zero five six one eight~+९१ ९४३७२०५६१८
+ZERO seven three चार पाँच छह सात आठ नौ शून्य~०७३ ४५६७८९०
+शून्य चार शून्य पाँच चार एक दो सात तीन आठ~०४० ५४१२७३८
+ZERO seven three four five six seven eight nine zero~०७३ ४५६७८९०
+zero two eight seven six five four three two seven~०२८ ७६५४३२७
diff --git a/tests/nemo_text_processing/hi/test_sparrowhawk_inverse_text_normalization.sh b/tests/nemo_text_processing/hi/test_sparrowhawk_inverse_text_normalization.sh
index aec7299d5..a365a834d 100644
--- a/tests/nemo_text_processing/hi/test_sparrowhawk_inverse_text_normalization.sh
+++ b/tests/nemo_text_processing/hi/test_sparrowhawk_inverse_text_normalization.sh
@@ -63,6 +63,11 @@ testITNMoney() {
   runtest $input
 }
 
+testITNTelephone() {
+  input=$PROJECT_DIR/hi/data_inverse_text_normalization/test_cases_telephone.txt
+  runtest $input
+}
+
 testITNWord() {
   input=$PROJECT_DIR/hi/data_inverse_text_normalization/test_cases_word.txt
   runtest $input
diff --git a/tests/nemo_text_processing/hi/test_telephone.py b/tests/nemo_text_processing/hi/test_telephone.py
new file mode 100644
index 000000000..895f042b0
--- /dev/null
+++ b/tests/nemo_text_processing/hi/test_telephone.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+from parameterized import parameterized
+
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
+
+from ..utils import CACHE_DIR, parse_test_case_file
+
+
+class TestTelephone:
+    inverse_normalizer = InverseNormalizer(lang='hi', cache_dir=CACHE_DIR, overwrite_cache=False)
+    @parameterized.expand(parse_test_case_file('hi/data_inverse_text_normalization/test_cases_telephone.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_denorm(self, test_input, expected):
+        pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
+        assert pred.strip() == expected.strip()

From 461962bf9459af23a771c2354a18b38f237a8525 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 20 Jan 2025 06:50:39 +0000
Subject: [PATCH 14/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../hi/taggers/telephone.py                   | 143 ++++++++++++------
 .../hi/taggers/tokenize_and_classify.py       |   2 +-
 .../hi/verbalizers/telephone.py               |  37 ++---
 .../hi/verbalizers/verbalize.py               |   2 +-
 .../nemo_text_processing/hi/test_telephone.py |   1 +
 5 files changed, 120 insertions(+), 65 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
index 63136e472..04f7a8b23 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
@@ -22,7 +22,7 @@
     delete_space,
     insert_space,
 )
-from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path, apply_fst
+from nemo_text_processing.inverse_text_normalization.hi.utils import apply_fst, get_abs_path
 
 
 class TelephoneFst(GraphFst):
@@ -36,64 +36,117 @@ class TelephoneFst(GraphFst):
 
     def __init__(self, cardinal: GraphFst):
         super().__init__(name="telephone", kind="classify")
-        
+
         hindi_digit_graph = pynini.string_file(get_abs_path("data/numbers/digit.tsv")).invert()
         hindi_digit_graph |= pynini.string_file(get_abs_path("data/numbers/zero.tsv")).invert()
-        
+
         english_digit_graph = pynini.string_file(get_abs_path("data/telephone/eng_to_hindi_digit.tsv")).invert()
-        
+
         country_code_graph_single_digits = pynini.string_file(get_abs_path("data/numbers/digit.tsv")).invert()
         country_code_graph_single_digits |= pynini.string_file(get_abs_path("data/numbers/zero.tsv")).invert()
-        country_code_graph_single_digits |= pynini.string_file(get_abs_path("data/telephone/eng_to_hindi_digit.tsv")).invert()
-        
+        country_code_graph_single_digits |= pynini.string_file(
+            get_abs_path("data/telephone/eng_to_hindi_digit.tsv")
+        ).invert()
+
         country_code_graph_double_digits = pynini.string_file(get_abs_path("data/numbers/teens_and_ties.tsv")).invert()
-        country_code_graph_double_digits |= pynini.string_file(get_abs_path("data/telephone/teens_and_ties_eng_to_hin.tsv")).invert()
-        
-        self.hindi_digit = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 9) + hindi_digit_graph + pynutil.insert("\" ")
-        self.english_digit = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 9) + english_digit_graph + delete_space + pynutil.insert("\" ")
-        
-        self.country_code_with_single_digits = pynutil.insert("country_code: \"") + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2) + pynutil.insert("\" ")
-        self.country_code_with_double_digits = pynutil.insert("country_code: \"") + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1) + pynutil.insert("\" ")
-        self.country_code = (self.country_code_with_single_digits | self.country_code_with_double_digits)
-        
-        self.city_code_with_single_digits = pynutil.insert("city_code: \"") + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2) + pynutil.insert("\" ")
-        self.city_code_with_double_digits = pynutil.insert("city_code: \"") + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1) + pynutil.insert("\" ")
-        self.city_code = (self.city_code_with_single_digits | self.city_code_with_double_digits)
-        
-        self.landline_hindi_digit = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 6) + hindi_digit_graph + pynutil.insert("\" ")
-        self.landline_english_digit = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 6) + english_digit_graph + pynutil.insert("\" ")
-        
+        country_code_graph_double_digits |= pynini.string_file(
+            get_abs_path("data/telephone/teens_and_ties_eng_to_hin.tsv")
+        ).invert()
+
+        self.hindi_digit = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(hindi_digit_graph + delete_space, 0, 9)
+            + hindi_digit_graph
+            + pynutil.insert("\" ")
+        )
+        self.english_digit = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(english_digit_graph + delete_space, 0, 9)
+            + english_digit_graph
+            + delete_space
+            + pynutil.insert("\" ")
+        )
+
+        self.country_code_with_single_digits = (
+            pynutil.insert("country_code: \"")
+            + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2)
+            + pynutil.insert("\" ")
+        )
+        self.country_code_with_double_digits = (
+            pynutil.insert("country_code: \"")
+            + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1)
+            + pynutil.insert("\" ")
+        )
+        self.country_code = self.country_code_with_single_digits | self.country_code_with_double_digits
+
+        self.city_code_with_single_digits = (
+            pynutil.insert("city_code: \"")
+            + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2)
+            + pynutil.insert("\" ")
+        )
+        self.city_code_with_double_digits = (
+            pynutil.insert("city_code: \"")
+            + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1)
+            + pynutil.insert("\" ")
+        )
+        self.city_code = self.city_code_with_single_digits | self.city_code_with_double_digits
+
+        self.landline_hindi_digit = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(hindi_digit_graph + delete_space, 0, 6)
+            + hindi_digit_graph
+            + pynutil.insert("\" ")
+        )
+        self.landline_english_digit = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(english_digit_graph + delete_space, 0, 6)
+            + english_digit_graph
+            + pynutil.insert("\" ")
+        )
+
         delete_plus = pynini.union(
             pynutil.delete("प्लस") | pynutil.delete("plus") | pynutil.delete("Plus") | pynutil.delete("PLUS")
         )
-        
+
         delete_zero = pynini.union(
             pynutil.delete("शून्य") | pynutil.delete("zero") | pynutil.delete("Zero") | pynutil.delete("ZERO")
         )
-        
-        graph_number_with_hindi_digit = delete_plus + delete_space + self.country_code + delete_space + self.hindi_digit
+
+        graph_number_with_hindi_digit = (
+            delete_plus + delete_space + self.country_code + delete_space + self.hindi_digit
+        )
         graph_number_with_english_digit = delete_plus + delete_space + self.country_code + self.english_digit
-        
-        graph_landline_with_hindi_digit = delete_zero + delete_space + self.city_code + delete_space + self.landline_hindi_digit
-        graph_landline_with_english_digit = delete_zero + delete_space + self.city_code + delete_space + self.landline_english_digit
 
-        graph = (graph_number_with_hindi_digit | graph_number_with_english_digit | graph_landline_with_hindi_digit | graph_landline_with_english_digit)
+        graph_landline_with_hindi_digit = (
+            delete_zero + delete_space + self.city_code + delete_space + self.landline_hindi_digit
+        )
+        graph_landline_with_english_digit = (
+            delete_zero + delete_space + self.city_code + delete_space + self.landline_english_digit
+        )
+
+        graph = (
+            graph_number_with_hindi_digit
+            | graph_number_with_english_digit
+            | graph_landline_with_hindi_digit
+            | graph_landline_with_english_digit
+        )
         final_graph = self.add_tokens(graph)
         self.fst = final_graph
 
-#from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
-#cardinal = CardinalFst()
-#telephone = TelephoneFst(cardinal)
-#input_text = "प्लस इक्यानवे nine four one one one two three four one two"
-#input_text = "प्लस इक्यानवे नौ आठ सात छह पांच चार तीन दो एक शून्य"
-#input_text = "plus nine eight zero nine four one one one two three four one"
-#input_text = "plus sixty two nine four one one one two three"
-#input_text = "प्लस  नौ एक नौ आठ सात छह पांच चार तीन दो एक शून्य"
-#input_text = 'Plus ninety one नौ सात छह चार एक zero five eight two three'
-#input_text = "plus eleven nine four one one one two three"
-#input_text = "zero eight zero two nine four one one one two" #landline example of bangalore
-#input_text = "zero eleven two nine four one one one two" #Delhi
-#input_text = "zero four zero two seven eight one eight three nine" #hyd
-#input_text = "शून्य सात नौ एक नौ आठ सात छह पांच चार"
-#output = apply_fst(input_text, telephone.fst)
-#print(output)
+
+# from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
+# cardinal = CardinalFst()
+# telephone = TelephoneFst(cardinal)
+# input_text = "प्लस इक्यानवे nine four one one one two three four one two"
+# input_text = "प्लस इक्यानवे नौ आठ सात छह पांच चार तीन दो एक शून्य"
+# input_text = "plus nine eight zero nine four one one one two three four one"
+# input_text = "plus sixty two nine four one one one two three"
+# input_text = "प्लस  नौ एक नौ आठ सात छह पांच चार तीन दो एक शून्य"
+# input_text = 'Plus ninety one नौ सात छह चार एक zero five eight two three'
+# input_text = "plus eleven nine four one one one two three"
+# input_text = "zero eight zero two nine four one one one two" #landline example of bangalore
+# input_text = "zero eleven two nine four one one one two" #Delhi
+# input_text = "zero four zero two seven eight one eight three nine" #hyd
+# input_text = "शून्य सात नौ एक नौ आठ सात छह पांच चार"
+# output = apply_fst(input_text, telephone.fst)
+# print(output)
diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py
index 2fda42cc6..62554bd14 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/tokenize_and_classify.py
@@ -33,8 +33,8 @@
 from nemo_text_processing.inverse_text_normalization.hi.taggers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.hi.taggers.ordinal import OrdinalFst
 from nemo_text_processing.inverse_text_normalization.hi.taggers.punctuation import PunctuationFst
-from nemo_text_processing.inverse_text_normalization.hi.taggers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.hi.taggers.telephone import TelephoneFst
+from nemo_text_processing.inverse_text_normalization.hi.taggers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.hi.taggers.whitelist import WhiteListFst
 from nemo_text_processing.inverse_text_normalization.hi.taggers.word import WordFst
 
diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
index 5a475414d..682c9416a 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
@@ -16,8 +16,8 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.hi.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 from nemo_text_processing.inverse_text_normalization.hi.utils import apply_fst
+from nemo_text_processing.text_normalization.hi.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
 class TelephoneFst(GraphFst):
@@ -54,20 +54,21 @@ def __init__(self, cardinal: GraphFst):
         delete_tokens = self.delete_tokens(optional_country_code + number_part)
         delete_tokens |= self.delete_tokens(optional_city_code + number_part)
         self.fst = delete_tokens.optimize()
-        
-#from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
-#cardinal = CardinalFst()
-#telephone = TelephoneFst(cardinal)
-#input_text = 'telephone { country_code: "९१" number_part: "९८७६५४३२१०"  }'
-#input_text = 'telephone { country_code: "९१" number_part: "९४१११२३४१२"  }'
-#input_text = 'telephone { country_code: "९१" number_part: "९४२२२२२२२"  }'
-#input_text = 'telephone{ country_code: "९१" number_part: "११२३४५६७८९" }'
-#input_text = 'telephone{ country_code: "९१" number_part: "९८७६५४३२११" }'
-#input_text = 'telephone{ country_code: "९१" number_part: "९४५६७८९०१२" }'
-#input_text = 'telephone{ country_code: "९१" number_part: "९५६७८९०१२३" }'
-#input_text = 'telephone { city_code: "७९" number_part: "१९८७६५४"  }'
-#input_text = 'telephone { city_code: "४०" number_part: "२७८१८३९"  }'
-#input_text = 'telephone { city_code: "११" number_part: "२९४१११२"  }'
-#input_text = 'telephone { city_code: "८०" number_part: "२९४१११२"  }'
-#output = apply_fst(input_text, telephone.fst)
-#print(output)
+
+
+# from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
+# cardinal = CardinalFst()
+# telephone = TelephoneFst(cardinal)
+# input_text = 'telephone { country_code: "९१" number_part: "९८७६५४३२१०"  }'
+# input_text = 'telephone { country_code: "९१" number_part: "९४१११२३४१२"  }'
+# input_text = 'telephone { country_code: "९१" number_part: "९४२२२२२२२"  }'
+# input_text = 'telephone{ country_code: "९१" number_part: "११२३४५६७८९" }'
+# input_text = 'telephone{ country_code: "९१" number_part: "९८७६५४३२११" }'
+# input_text = 'telephone{ country_code: "९१" number_part: "९४५६७८९०१२" }'
+# input_text = 'telephone{ country_code: "९१" number_part: "९५६७८९०१२३" }'
+# input_text = 'telephone { city_code: "७९" number_part: "१९८७६५४"  }'
+# input_text = 'telephone { city_code: "४०" number_part: "२७८१८३९"  }'
+# input_text = 'telephone { city_code: "११" number_part: "२९४१११२"  }'
+# input_text = 'telephone { city_code: "८०" number_part: "२९४१११२"  }'
+# output = apply_fst(input_text, telephone.fst)
+# print(output)
diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py
index 411b08863..165fe7a7e 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/verbalize.py
@@ -21,8 +21,8 @@
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.measure import MeasureFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.money import MoneyFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.ordinal import OrdinalFst
-from nemo_text_processing.inverse_text_normalization.hi.verbalizers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.telephone import TelephoneFst
+from nemo_text_processing.inverse_text_normalization.hi.verbalizers.time import TimeFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.whitelist import WhiteListFst
 from nemo_text_processing.inverse_text_normalization.hi.verbalizers.word import WordFst
 
diff --git a/tests/nemo_text_processing/hi/test_telephone.py b/tests/nemo_text_processing/hi/test_telephone.py
index 895f042b0..145f554a4 100644
--- a/tests/nemo_text_processing/hi/test_telephone.py
+++ b/tests/nemo_text_processing/hi/test_telephone.py
@@ -23,6 +23,7 @@
 
 class TestTelephone:
     inverse_normalizer = InverseNormalizer(lang='hi', cache_dir=CACHE_DIR, overwrite_cache=False)
+
     @parameterized.expand(parse_test_case_file('hi/data_inverse_text_normalization/test_cases_telephone.txt'))
     @pytest.mark.run_only_on('CPU')
     @pytest.mark.unit

From 3a011b9ff10efb191b0d73ec32924bf1319e1457 Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Mon, 20 Jan 2025 17:44:11 +0530
Subject: [PATCH 15/28] Telephone.py - Hindi ITN

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 .../hi/taggers/telephone.py                   | 22 ++----------------
 .../hi/verbalizers/telephone.py               | 23 ++-----------------
 .../test_cases_telephone.txt                  |  2 +-
 .../nemo_text_processing/hi/test_telephone.py |  2 +-
 4 files changed, 6 insertions(+), 43 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
index 04f7a8b23..97df99eae 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
@@ -22,7 +22,7 @@
     delete_space,
     insert_space,
 )
-from nemo_text_processing.inverse_text_normalization.hi.utils import apply_fst, get_abs_path
+from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path
 
 
 class TelephoneFst(GraphFst):
@@ -131,22 +131,4 @@ def __init__(self, cardinal: GraphFst):
             | graph_landline_with_english_digit
         )
         final_graph = self.add_tokens(graph)
-        self.fst = final_graph
-
-
-# from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
-# cardinal = CardinalFst()
-# telephone = TelephoneFst(cardinal)
-# input_text = "प्लस इक्यानवे nine four one one one two three four one two"
-# input_text = "प्लस इक्यानवे नौ आठ सात छह पांच चार तीन दो एक शून्य"
-# input_text = "plus nine eight zero nine four one one one two three four one"
-# input_text = "plus sixty two nine four one one one two three"
-# input_text = "प्लस  नौ एक नौ आठ सात छह पांच चार तीन दो एक शून्य"
-# input_text = 'Plus ninety one नौ सात छह चार एक zero five eight two three'
-# input_text = "plus eleven nine four one one one two three"
-# input_text = "zero eight zero two nine four one one one two" #landline example of bangalore
-# input_text = "zero eleven two nine four one one one two" #Delhi
-# input_text = "zero four zero two seven eight one eight three nine" #hyd
-# input_text = "शून्य सात नौ एक नौ आठ सात छह पांच चार"
-# output = apply_fst(input_text, telephone.fst)
-# print(output)
+        self.fst = final_graph
\ No newline at end of file
diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
index 682c9416a..e0c721dc5 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
@@ -16,7 +16,6 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.hi.utils import apply_fst
 from nemo_text_processing.text_normalization.hi.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
@@ -42,7 +41,7 @@ def __init__(self, cardinal: GraphFst):
             1,
         )
         optional_city_code = pynini.closure(
-            pynutil.delete("city_code: \"")
+            pynutil.delete("extension: \"")
             + pynutil.insert("०")
             + delete_space
             + pynini.closure(NEMO_NOT_QUOTE, 1)
@@ -53,22 +52,4 @@ def __init__(self, cardinal: GraphFst):
         )
         delete_tokens = self.delete_tokens(optional_country_code + number_part)
         delete_tokens |= self.delete_tokens(optional_city_code + number_part)
-        self.fst = delete_tokens.optimize()
-
-
-# from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
-# cardinal = CardinalFst()
-# telephone = TelephoneFst(cardinal)
-# input_text = 'telephone { country_code: "९१" number_part: "९८७६५४३२१०"  }'
-# input_text = 'telephone { country_code: "९१" number_part: "९४१११२३४१२"  }'
-# input_text = 'telephone { country_code: "९१" number_part: "९४२२२२२२२"  }'
-# input_text = 'telephone{ country_code: "९१" number_part: "११२३४५६७८९" }'
-# input_text = 'telephone{ country_code: "९१" number_part: "९८७६५४३२११" }'
-# input_text = 'telephone{ country_code: "९१" number_part: "९४५६७८९०१२" }'
-# input_text = 'telephone{ country_code: "९१" number_part: "९५६७८९०१२३" }'
-# input_text = 'telephone { city_code: "७९" number_part: "१९८७६५४"  }'
-# input_text = 'telephone { city_code: "४०" number_part: "२७८१८३९"  }'
-# input_text = 'telephone { city_code: "११" number_part: "२९४१११२"  }'
-# input_text = 'telephone { city_code: "८०" number_part: "२९४१११२"  }'
-# output = apply_fst(input_text, telephone.fst)
-# print(output)
+        self.fst = delete_tokens.optimize()
\ No newline at end of file
diff --git a/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt
index 0c001b20f..34d031b41 100644
--- a/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt
+++ b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt
@@ -1,7 +1,6 @@
 प्लस इक्यानवे nine four one one one two three four one two~+९१ ९४१११२३४१२
 प्लस इक्यानवे नौ आठ सात छह पांच चार तीन दो एक शून्य~+९१ ९८७६५४३२१०
 plus nine eight nine four one one one two three four zero one~+९८ ९४१११२३४०१
-plus sixty two nine four one one one two three~+६२ ९४१११२३
 प्लस  नौ एक नौ आठ सात छह पांच चार तीन दो एक शून्य~+९१ ९८७६५४३२१०
 plus eleven nine four one one one two three~+११ ९४१११२३
 zero eleven nine four one one one two three~०११ ९४१११२३
@@ -23,3 +22,4 @@ ZERO seven three चार पाँच छह सात आठ नौ शून
 शून्य चार शून्य पाँच चार एक दो सात तीन आठ~०४० ५४१२७३८
 ZERO seven three four five six seven eight nine zero~०७३ ४५६७८९०
 zero two eight seven six five four three two seven~०२८ ७६५४३२७
+PLUS eighty one nine seven four seven two zero zero one one eight~+८१ ९७४७२००११८
diff --git a/tests/nemo_text_processing/hi/test_telephone.py b/tests/nemo_text_processing/hi/test_telephone.py
index 145f554a4..b01b11871 100644
--- a/tests/nemo_text_processing/hi/test_telephone.py
+++ b/tests/nemo_text_processing/hi/test_telephone.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From a5cf0500eadc33dc51a8e93d7d0c681b9400dfcd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 21 Jan 2025 04:19:39 +0000
Subject: [PATCH 16/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../inverse_text_normalization/hi/taggers/telephone.py          | 2 +-
 .../inverse_text_normalization/hi/verbalizers/telephone.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
index 97df99eae..1b938f241 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
@@ -131,4 +131,4 @@ def __init__(self, cardinal: GraphFst):
             | graph_landline_with_english_digit
         )
         final_graph = self.add_tokens(graph)
-        self.fst = final_graph
\ No newline at end of file
+        self.fst = final_graph
diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
index e0c721dc5..3f4b4de1f 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
@@ -52,4 +52,4 @@ def __init__(self, cardinal: GraphFst):
         )
         delete_tokens = self.delete_tokens(optional_country_code + number_part)
         delete_tokens |= self.delete_tokens(optional_city_code + number_part)
-        self.fst = delete_tokens.optimize()
\ No newline at end of file
+        self.fst = delete_tokens.optimize()

From 1c506e26c56b5abb3cd83e28b4c311b87249fc35 Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Tue, 21 Jan 2025 09:58:06 +0530
Subject: [PATCH 17/28] Telephone modified tagger and verbalizer

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 .../hi/taggers/telephone.py                   | 108 +++++-------------
 1 file changed, 28 insertions(+), 80 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
index 1b938f241..2449eaff5 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
@@ -36,99 +36,47 @@ class TelephoneFst(GraphFst):
 
     def __init__(self, cardinal: GraphFst):
         super().__init__(name="telephone", kind="classify")
-
+        
         hindi_digit_graph = pynini.string_file(get_abs_path("data/numbers/digit.tsv")).invert()
         hindi_digit_graph |= pynini.string_file(get_abs_path("data/numbers/zero.tsv")).invert()
-
+        
         english_digit_graph = pynini.string_file(get_abs_path("data/telephone/eng_to_hindi_digit.tsv")).invert()
-
+        
         country_code_graph_single_digits = pynini.string_file(get_abs_path("data/numbers/digit.tsv")).invert()
         country_code_graph_single_digits |= pynini.string_file(get_abs_path("data/numbers/zero.tsv")).invert()
-        country_code_graph_single_digits |= pynini.string_file(
-            get_abs_path("data/telephone/eng_to_hindi_digit.tsv")
-        ).invert()
-
+        country_code_graph_single_digits |= pynini.string_file(get_abs_path("data/telephone/eng_to_hindi_digit.tsv")).invert()
+        
         country_code_graph_double_digits = pynini.string_file(get_abs_path("data/numbers/teens_and_ties.tsv")).invert()
-        country_code_graph_double_digits |= pynini.string_file(
-            get_abs_path("data/telephone/teens_and_ties_eng_to_hin.tsv")
-        ).invert()
-
-        self.hindi_digit = (
-            pynutil.insert("number_part: \"")
-            + pynini.closure(hindi_digit_graph + delete_space, 0, 9)
-            + hindi_digit_graph
-            + pynutil.insert("\" ")
-        )
-        self.english_digit = (
-            pynutil.insert("number_part: \"")
-            + pynini.closure(english_digit_graph + delete_space, 0, 9)
-            + english_digit_graph
-            + delete_space
-            + pynutil.insert("\" ")
-        )
-
-        self.country_code_with_single_digits = (
-            pynutil.insert("country_code: \"")
-            + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2)
-            + pynutil.insert("\" ")
-        )
-        self.country_code_with_double_digits = (
-            pynutil.insert("country_code: \"")
-            + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1)
-            + pynutil.insert("\" ")
-        )
-        self.country_code = self.country_code_with_single_digits | self.country_code_with_double_digits
-
-        self.city_code_with_single_digits = (
-            pynutil.insert("city_code: \"")
-            + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2)
-            + pynutil.insert("\" ")
-        )
-        self.city_code_with_double_digits = (
-            pynutil.insert("city_code: \"")
-            + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1)
-            + pynutil.insert("\" ")
-        )
-        self.city_code = self.city_code_with_single_digits | self.city_code_with_double_digits
-
-        self.landline_hindi_digit = (
-            pynutil.insert("number_part: \"")
-            + pynini.closure(hindi_digit_graph + delete_space, 0, 6)
-            + hindi_digit_graph
-            + pynutil.insert("\" ")
-        )
-        self.landline_english_digit = (
-            pynutil.insert("number_part: \"")
-            + pynini.closure(english_digit_graph + delete_space, 0, 6)
-            + english_digit_graph
-            + pynutil.insert("\" ")
-        )
-
+        country_code_graph_double_digits |= pynini.string_file(get_abs_path("data/telephone/teens_and_ties_eng_to_hin.tsv")).invert()
+        
+        self.hindi_digit = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 9) + hindi_digit_graph + pynutil.insert("\" ")
+        self.english_digit = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 9) + english_digit_graph + delete_space + pynutil.insert("\" ")
+        
+        self.country_code_with_single_digits = pynutil.insert("country_code: \"") + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2) + pynutil.insert("\" ")
+        self.country_code_with_double_digits = pynutil.insert("country_code: \"") + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1) + pynutil.insert("\" ")
+        self.country_code = (self.country_code_with_single_digits | self.country_code_with_double_digits)
+        
+        self.city_code_with_single_digits = pynutil.insert("extension: \"") + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2) + pynutil.insert("\" ")
+        self.city_code_with_double_digits = pynutil.insert("extension: \"") + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1) + pynutil.insert("\" ")
+        self.city_code = (self.city_code_with_single_digits | self.city_code_with_double_digits)
+        
+        self.landline_hindi_digit = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 6) + hindi_digit_graph + pynutil.insert("\" ")
+        self.landline_english_digit = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 6) + english_digit_graph + pynutil.insert("\" ")
+        
         delete_plus = pynini.union(
             pynutil.delete("प्लस") | pynutil.delete("plus") | pynutil.delete("Plus") | pynutil.delete("PLUS")
         )
-
+        
         delete_zero = pynini.union(
             pynutil.delete("शून्य") | pynutil.delete("zero") | pynutil.delete("Zero") | pynutil.delete("ZERO")
         )
-
-        graph_number_with_hindi_digit = (
-            delete_plus + delete_space + self.country_code + delete_space + self.hindi_digit
-        )
+        
+        graph_number_with_hindi_digit = delete_plus + delete_space + self.country_code + delete_space + self.hindi_digit
         graph_number_with_english_digit = delete_plus + delete_space + self.country_code + self.english_digit
+        
+        graph_landline_with_hindi_digit = delete_zero + delete_space + self.city_code + delete_space + self.landline_hindi_digit
+        graph_landline_with_english_digit = delete_zero + delete_space + self.city_code + delete_space + self.landline_english_digit
 
-        graph_landline_with_hindi_digit = (
-            delete_zero + delete_space + self.city_code + delete_space + self.landline_hindi_digit
-        )
-        graph_landline_with_english_digit = (
-            delete_zero + delete_space + self.city_code + delete_space + self.landline_english_digit
-        )
-
-        graph = (
-            graph_number_with_hindi_digit
-            | graph_number_with_english_digit
-            | graph_landline_with_hindi_digit
-            | graph_landline_with_english_digit
-        )
+        graph = (graph_number_with_hindi_digit | graph_number_with_english_digit | graph_landline_with_hindi_digit | graph_landline_with_english_digit)
         final_graph = self.add_tokens(graph)
         self.fst = final_graph

From 4503378b6a8d50901135437bb39a77de0b98b168 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 21 Jan 2025 04:29:18 +0000
Subject: [PATCH 18/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../hi/taggers/telephone.py                   | 108 +++++++++++++-----
 1 file changed, 80 insertions(+), 28 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
index 2449eaff5..52960efcc 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
@@ -36,47 +36,99 @@ class TelephoneFst(GraphFst):
 
     def __init__(self, cardinal: GraphFst):
         super().__init__(name="telephone", kind="classify")
-        
+
         hindi_digit_graph = pynini.string_file(get_abs_path("data/numbers/digit.tsv")).invert()
         hindi_digit_graph |= pynini.string_file(get_abs_path("data/numbers/zero.tsv")).invert()
-        
+
         english_digit_graph = pynini.string_file(get_abs_path("data/telephone/eng_to_hindi_digit.tsv")).invert()
-        
+
         country_code_graph_single_digits = pynini.string_file(get_abs_path("data/numbers/digit.tsv")).invert()
         country_code_graph_single_digits |= pynini.string_file(get_abs_path("data/numbers/zero.tsv")).invert()
-        country_code_graph_single_digits |= pynini.string_file(get_abs_path("data/telephone/eng_to_hindi_digit.tsv")).invert()
-        
+        country_code_graph_single_digits |= pynini.string_file(
+            get_abs_path("data/telephone/eng_to_hindi_digit.tsv")
+        ).invert()
+
         country_code_graph_double_digits = pynini.string_file(get_abs_path("data/numbers/teens_and_ties.tsv")).invert()
-        country_code_graph_double_digits |= pynini.string_file(get_abs_path("data/telephone/teens_and_ties_eng_to_hin.tsv")).invert()
-        
-        self.hindi_digit = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 9) + hindi_digit_graph + pynutil.insert("\" ")
-        self.english_digit = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 9) + english_digit_graph + delete_space + pynutil.insert("\" ")
-        
-        self.country_code_with_single_digits = pynutil.insert("country_code: \"") + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2) + pynutil.insert("\" ")
-        self.country_code_with_double_digits = pynutil.insert("country_code: \"") + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1) + pynutil.insert("\" ")
-        self.country_code = (self.country_code_with_single_digits | self.country_code_with_double_digits)
-        
-        self.city_code_with_single_digits = pynutil.insert("extension: \"") + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2) + pynutil.insert("\" ")
-        self.city_code_with_double_digits = pynutil.insert("extension: \"") + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1) + pynutil.insert("\" ")
-        self.city_code = (self.city_code_with_single_digits | self.city_code_with_double_digits)
-        
-        self.landline_hindi_digit = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 6) + hindi_digit_graph + pynutil.insert("\" ")
-        self.landline_english_digit = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 6) + english_digit_graph + pynutil.insert("\" ")
-        
+        country_code_graph_double_digits |= pynini.string_file(
+            get_abs_path("data/telephone/teens_and_ties_eng_to_hin.tsv")
+        ).invert()
+
+        self.hindi_digit = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(hindi_digit_graph + delete_space, 0, 9)
+            + hindi_digit_graph
+            + pynutil.insert("\" ")
+        )
+        self.english_digit = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(english_digit_graph + delete_space, 0, 9)
+            + english_digit_graph
+            + delete_space
+            + pynutil.insert("\" ")
+        )
+
+        self.country_code_with_single_digits = (
+            pynutil.insert("country_code: \"")
+            + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2)
+            + pynutil.insert("\" ")
+        )
+        self.country_code_with_double_digits = (
+            pynutil.insert("country_code: \"")
+            + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1)
+            + pynutil.insert("\" ")
+        )
+        self.country_code = self.country_code_with_single_digits | self.country_code_with_double_digits
+
+        self.city_code_with_single_digits = (
+            pynutil.insert("extension: \"")
+            + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2)
+            + pynutil.insert("\" ")
+        )
+        self.city_code_with_double_digits = (
+            pynutil.insert("extension: \"")
+            + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1)
+            + pynutil.insert("\" ")
+        )
+        self.city_code = self.city_code_with_single_digits | self.city_code_with_double_digits
+
+        self.landline_hindi_digit = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(hindi_digit_graph + delete_space, 0, 6)
+            + hindi_digit_graph
+            + pynutil.insert("\" ")
+        )
+        self.landline_english_digit = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(english_digit_graph + delete_space, 0, 6)
+            + english_digit_graph
+            + pynutil.insert("\" ")
+        )
+
         delete_plus = pynini.union(
             pynutil.delete("प्लस") | pynutil.delete("plus") | pynutil.delete("Plus") | pynutil.delete("PLUS")
         )
-        
+
         delete_zero = pynini.union(
             pynutil.delete("शून्य") | pynutil.delete("zero") | pynutil.delete("Zero") | pynutil.delete("ZERO")
         )
-        
-        graph_number_with_hindi_digit = delete_plus + delete_space + self.country_code + delete_space + self.hindi_digit
+
+        graph_number_with_hindi_digit = (
+            delete_plus + delete_space + self.country_code + delete_space + self.hindi_digit
+        )
         graph_number_with_english_digit = delete_plus + delete_space + self.country_code + self.english_digit
-        
-        graph_landline_with_hindi_digit = delete_zero + delete_space + self.city_code + delete_space + self.landline_hindi_digit
-        graph_landline_with_english_digit = delete_zero + delete_space + self.city_code + delete_space + self.landline_english_digit
 
-        graph = (graph_number_with_hindi_digit | graph_number_with_english_digit | graph_landline_with_hindi_digit | graph_landline_with_english_digit)
+        graph_landline_with_hindi_digit = (
+            delete_zero + delete_space + self.city_code + delete_space + self.landline_hindi_digit
+        )
+        graph_landline_with_english_digit = (
+            delete_zero + delete_space + self.city_code + delete_space + self.landline_english_digit
+        )
+
+        graph = (
+            graph_number_with_hindi_digit
+            | graph_number_with_english_digit
+            | graph_landline_with_hindi_digit
+            | graph_landline_with_english_digit
+        )
         final_graph = self.add_tokens(graph)
         self.fst = final_graph

From eb269ef28cf77c6dc78fd7df2b1f5f0bbc3a244c Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Fri, 24 Jan 2025 12:13:01 +0530
Subject: [PATCH 19/28] telephone tagger with 3,4,5 digit std codes

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 .../hi/taggers/telephone.py                   | 146 +++++++++---------
 1 file changed, 72 insertions(+), 74 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
index 52960efcc..4ec0889aa 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
@@ -22,7 +22,7 @@
     delete_space,
     insert_space,
 )
-from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path
+from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path, apply_fst
 
 
 class TelephoneFst(GraphFst):
@@ -49,61 +49,49 @@ def __init__(self, cardinal: GraphFst):
         ).invert()
 
         country_code_graph_double_digits = pynini.string_file(get_abs_path("data/numbers/teens_and_ties.tsv")).invert()
-        country_code_graph_double_digits |= pynini.string_file(
-            get_abs_path("data/telephone/teens_and_ties_eng_to_hin.tsv")
-        ).invert()
-
-        self.hindi_digit = (
-            pynutil.insert("number_part: \"")
-            + pynini.closure(hindi_digit_graph + delete_space, 0, 9)
-            + hindi_digit_graph
-            + pynutil.insert("\" ")
-        )
-        self.english_digit = (
-            pynutil.insert("number_part: \"")
-            + pynini.closure(english_digit_graph + delete_space, 0, 9)
-            + english_digit_graph
-            + delete_space
-            + pynutil.insert("\" ")
-        )
-
-        self.country_code_with_single_digits = (
-            pynutil.insert("country_code: \"")
-            + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2)
-            + pynutil.insert("\" ")
-        )
-        self.country_code_with_double_digits = (
-            pynutil.insert("country_code: \"")
-            + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1)
-            + pynutil.insert("\" ")
-        )
-        self.country_code = self.country_code_with_single_digits | self.country_code_with_double_digits
-
-        self.city_code_with_single_digits = (
-            pynutil.insert("extension: \"")
-            + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2)
-            + pynutil.insert("\" ")
-        )
-        self.city_code_with_double_digits = (
-            pynutil.insert("extension: \"")
-            + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1)
-            + pynutil.insert("\" ")
-        )
-        self.city_code = self.city_code_with_single_digits | self.city_code_with_double_digits
-
-        self.landline_hindi_digit = (
-            pynutil.insert("number_part: \"")
-            + pynini.closure(hindi_digit_graph + delete_space, 0, 6)
-            + hindi_digit_graph
-            + pynutil.insert("\" ")
-        )
-        self.landline_english_digit = (
-            pynutil.insert("number_part: \"")
-            + pynini.closure(english_digit_graph + delete_space, 0, 6)
-            + english_digit_graph
-            + pynutil.insert("\" ")
-        )
-
+        country_code_graph_double_digits |= pynini.string_file(get_abs_path("data/telephone/teens_and_ties_eng_to_hin.tsv")).invert()
+        
+        self.hindi_digit = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 9) + hindi_digit_graph + pynutil.insert("\" ")
+        self.english_digit = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 9) + english_digit_graph + delete_space + pynutil.insert("\" ")
+        
+        self.country_code_with_single_digits = pynutil.insert("country_code: \"") + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2) + pynutil.insert("\" ")
+        self.country_code_with_double_digits = pynutil.insert("country_code: \"") + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1) + pynutil.insert("\" ")
+        self.country_code = (self.country_code_with_single_digits | self.country_code_with_double_digits)
+         
+        #two, three, four-digit extension code with zero
+        self.city_two_digit_code_hindi = pynutil.insert("extension: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 2) + pynutil.insert("\" ")
+        self.city_two_digit_code_english = pynutil.insert("extension: \"") + pynini.closure(english_digit_graph + delete_space, 0, 2) + pynutil.insert("\" ")
+        self.city_three_digit_code_hindi = pynutil.insert("extension: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 3) + pynutil.insert("\" ")
+        self.city_three_digit_code_english = pynutil.insert("extension: \"") + pynini.closure(english_digit_graph + delete_space, 0, 3) + pynutil.insert("\" ")
+        self.city_four_digit_code_hindi = pynutil.insert("extension: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 4) + pynutil.insert("\" ")
+        self.city_four_digit_code_english = pynutil.insert("extension: \"") + pynini.closure(english_digit_graph + delete_space, 0, 4) + pynutil.insert("\" ")
+        
+        #concise extensions graphs - 2,3,4-digit
+        self.city_two_digit_extension = self.city_two_digit_code_hindi | self.city_two_digit_code_english
+        self.city_three_digit_extension = self.city_three_digit_code_hindi | self.city_three_digit_code_english
+        self.city_four_digit_extension = (self.city_four_digit_code_hindi | self.city_four_digit_code_english)
+        
+        #7-digit landline graph for 2-digit extension in hindi and english digits
+        self.landline_with_extension_two_hindi = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 6) + pynutil.insert("\" ")
+        self.landline_with_extension_two_english = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 6) + pynutil.insert("\" ")
+        self.landline_two = (self.landline_with_extension_two_hindi | self.landline_with_extension_two_english)
+        
+        #7-digit landline graph for 3-digit extension in hindi and english digits
+        self.landline_with_extension_three_hindi = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 7) + pynutil.insert("\" ")
+        self.landline_with_extension_three_english = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 7) + pynutil.insert("\" ")
+        self.landline_three = (self.landline_with_extension_three_hindi | self.landline_with_extension_three_english)
+        
+        #7-digit landline graph for 4-digit extension in hindi and english digits
+        self.landline_with_extension_four_hindi = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 8) + pynutil.insert("\" ")
+        self.landline_with_extension_four_english = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 8) + pynutil.insert("\" ")
+        self.landline_four = (self.landline_with_extension_four_hindi | self.landline_with_extension_four_english)
+        
+        self.pincode_in_hindi = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 5) + hindi_digit_graph + pynutil.insert("\" ")
+        self.pincode_in_english = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 5) + english_digit_graph + pynutil.insert("\" ")
+        
+        self.credit_card_last_four_digits_in_hindi = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 3) + hindi_digit_graph + pynutil.insert("\" ")
+        self.credit_card_last_four_digits_in_english = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 3) + english_digit_graph + pynutil.insert("\" ")
+        
         delete_plus = pynini.union(
             pynutil.delete("प्लस") | pynutil.delete("plus") | pynutil.delete("Plus") | pynutil.delete("PLUS")
         )
@@ -111,24 +99,34 @@ def __init__(self, cardinal: GraphFst):
         delete_zero = pynini.union(
             pynutil.delete("शून्य") | pynutil.delete("zero") | pynutil.delete("Zero") | pynutil.delete("ZERO")
         )
-
-        graph_number_with_hindi_digit = (
-            delete_plus + delete_space + self.country_code + delete_space + self.hindi_digit
-        )
+        
+        graph_number_with_hindi_digit = delete_plus + delete_space + self.country_code + delete_space + self.hindi_digit
         graph_number_with_english_digit = delete_plus + delete_space + self.country_code + self.english_digit
-
-        graph_landline_with_hindi_digit = (
-            delete_zero + delete_space + self.city_code + delete_space + self.landline_hindi_digit
-        )
-        graph_landline_with_english_digit = (
-            delete_zero + delete_space + self.city_code + delete_space + self.landline_english_digit
-        )
-
-        graph = (
-            graph_number_with_hindi_digit
-            | graph_number_with_english_digit
-            | graph_landline_with_hindi_digit
-            | graph_landline_with_english_digit
-        )
+        
+        graph_landline_with_two_digit_extension = delete_zero + delete_space + self.city_two_digit_extension + delete_space + self.landline_two
+        graph_landline_with_three_digit_extension = delete_zero + delete_space + self.city_three_digit_extension + delete_space + self.landline_three
+        graph_landline_with_four_digit_extension = delete_zero + delete_space + self.city_four_digit_extension + delete_space + self.landline_four
+        
+        graph_pincode = self.pincode_in_hindi | self.pincode_in_english
+        
+        graph_credit_card_last_four_digits = self.credit_card_last_four_digits_in_hindi | self.credit_card_last_four_digits_in_english
+	
+        graph = (graph_number_with_hindi_digit | graph_number_with_english_digit | graph_landline_with_two_digit_extension | graph_landline_with_three_digit_extension | graph_landline_with_three_digit_extension | graph_pincode | graph_credit_card_last_four_digits)
+        
         final_graph = self.add_tokens(graph)
         self.fst = final_graph
+        
+#from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
+#cardinal = CardinalFst()
+#telephone = TelephoneFst(cardinal)
+
+#input_text = "zero eight zero two two nine four one one one" # zero+ two digit extension + landline in english
+#input_text = "zero eight zero nine two two nine four one one one" # zero + three digit extension + landline in english
+#input_text = "zero eight zero nine one two two nine four one one one" #zero + four digit extension + landline in english
+
+#input_text = "शून्य सात नौ एक नौ आठ सात छह पांच चार" #zero + two digit extension + landline in hindi
+#input_text = "शून्य सात नौ नौ एक नौ आठ सात छह पांच चार" #zero + three digit extension + landline in hindi
+#input_text = "शून्य सात नौ एक एक एक नौ आठ सात छह पांच चार" #zero+ four digit digit extension + landline in hindi
+
+#output = apply_fst(input_text, telephone.fst)
+#print(output)

From 26e9d7f50c7c27e1dbd946b0c2b16b241441e4fb Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Fri, 24 Jan 2025 12:17:50 +0530
Subject: [PATCH 20/28] Further additions - telephone.py

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 .../hi/verbalizers/telephone.py                       | 11 +++++++++++
 .../test_cases_telephone.txt                          |  6 ++++++
 2 files changed, 17 insertions(+)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
index 3f4b4de1f..66ccb7927 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
@@ -17,6 +17,8 @@
 from pynini.lib import pynutil
 
 from nemo_text_processing.text_normalization.hi.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.hi.utils import apply_fst
+
 
 
 class TelephoneFst(GraphFst):
@@ -53,3 +55,12 @@ def __init__(self, cardinal: GraphFst):
         delete_tokens = self.delete_tokens(optional_country_code + number_part)
         delete_tokens |= self.delete_tokens(optional_city_code + number_part)
         self.fst = delete_tokens.optimize()
+        
+#from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
+#cardinal = CardinalFst()
+#telephone = TelephoneFst(cardinal)
+#input_text = 'telephone { number_part: "१९८७६५"  }'
+#input_text ='telephone { number_part: "३४०१"  }'
+#input_text = 'telephone { number_part: "०७९१"  }'
+#output = apply_fst(input_text, telephone.fst)
+#print(output)
diff --git a/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt
index 34d031b41..c5a2d574e 100644
--- a/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt
+++ b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt
@@ -23,3 +23,9 @@ ZERO seven three चार पाँच छह सात आठ नौ शून
 ZERO seven three four five six seven eight nine zero~०७३ ४५६७८९०
 zero two eight seven six five four three two seven~०२८ ७६५४३२७
 PLUS eighty one nine seven four seven two zero zero one one eight~+८१ ९७४७२००११८
+एक एक शून्य शून्य सात शून्य दिल्ली के वसंत कुंज का पिनकोड है~११००७० दिल्ली के वसंत कुंज का पिनकोड है
+बंगलौर के बैयापानहली का पिनकोड पाँच छह शून्य शून्य तीन आठ है~बंगलौर के बैयापानहली का पिनकोड ५६००३८ है
+दिल्ली के वसंत कुंज का पिनकोड one one zero zero seven zero है~दिल्ली के वसंत कुंज का पिनकोड ११००७० है
+five six zero zero three eight बंगलौर के बैयापानहली का पिनकोड है~५६००३८ बंगलौर के बैयापानहली का पिनकोड है
+मेरे क्रेडिट कार्ड के आखिरी डिजिट शून्य शून्य तीन आठ हैं~मेरे क्रेडिट कार्ड के आखिरी डिजिट ००३८ हैं
+क्रेडिट कार्ड के आखिरी डिजिट four three seven two हैं~क्रेडिट कार्ड के आखिरी डिजिट ४३७२ हैं
\ No newline at end of file

From b743170959fe81fed20cc2d3a7c108e07dc5a9b0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 24 Jan 2025 06:51:26 +0000
Subject: [PATCH 21/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../hi/taggers/telephone.py                   | 240 +++++++++++++-----
 .../hi/verbalizers/telephone.py               |  22 +-
 2 files changed, 185 insertions(+), 77 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
index 4ec0889aa..dd03551dc 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
@@ -22,7 +22,7 @@
     delete_space,
     insert_space,
 )
-from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path, apply_fst
+from nemo_text_processing.inverse_text_normalization.hi.utils import apply_fst, get_abs_path
 
 
 class TelephoneFst(GraphFst):
@@ -49,49 +49,138 @@ def __init__(self, cardinal: GraphFst):
         ).invert()
 
         country_code_graph_double_digits = pynini.string_file(get_abs_path("data/numbers/teens_and_ties.tsv")).invert()
-        country_code_graph_double_digits |= pynini.string_file(get_abs_path("data/telephone/teens_and_ties_eng_to_hin.tsv")).invert()
-        
-        self.hindi_digit = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 9) + hindi_digit_graph + pynutil.insert("\" ")
-        self.english_digit = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 9) + english_digit_graph + delete_space + pynutil.insert("\" ")
-        
-        self.country_code_with_single_digits = pynutil.insert("country_code: \"") + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2) + pynutil.insert("\" ")
-        self.country_code_with_double_digits = pynutil.insert("country_code: \"") + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1) + pynutil.insert("\" ")
-        self.country_code = (self.country_code_with_single_digits | self.country_code_with_double_digits)
-         
-        #two, three, four-digit extension code with zero
-        self.city_two_digit_code_hindi = pynutil.insert("extension: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 2) + pynutil.insert("\" ")
-        self.city_two_digit_code_english = pynutil.insert("extension: \"") + pynini.closure(english_digit_graph + delete_space, 0, 2) + pynutil.insert("\" ")
-        self.city_three_digit_code_hindi = pynutil.insert("extension: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 3) + pynutil.insert("\" ")
-        self.city_three_digit_code_english = pynutil.insert("extension: \"") + pynini.closure(english_digit_graph + delete_space, 0, 3) + pynutil.insert("\" ")
-        self.city_four_digit_code_hindi = pynutil.insert("extension: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 4) + pynutil.insert("\" ")
-        self.city_four_digit_code_english = pynutil.insert("extension: \"") + pynini.closure(english_digit_graph + delete_space, 0, 4) + pynutil.insert("\" ")
-        
-        #concise extensions graphs - 2,3,4-digit
+        country_code_graph_double_digits |= pynini.string_file(
+            get_abs_path("data/telephone/teens_and_ties_eng_to_hin.tsv")
+        ).invert()
+
+        self.hindi_digit = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(hindi_digit_graph + delete_space, 0, 9)
+            + hindi_digit_graph
+            + pynutil.insert("\" ")
+        )
+        self.english_digit = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(english_digit_graph + delete_space, 0, 9)
+            + english_digit_graph
+            + delete_space
+            + pynutil.insert("\" ")
+        )
+
+        self.country_code_with_single_digits = (
+            pynutil.insert("country_code: \"")
+            + pynini.closure(country_code_graph_single_digits + delete_space, 0, 2)
+            + pynutil.insert("\" ")
+        )
+        self.country_code_with_double_digits = (
+            pynutil.insert("country_code: \"")
+            + pynini.closure(country_code_graph_double_digits + delete_space, 0, 1)
+            + pynutil.insert("\" ")
+        )
+        self.country_code = self.country_code_with_single_digits | self.country_code_with_double_digits
+
+        # two, three, four-digit extension code with zero
+        self.city_two_digit_code_hindi = (
+            pynutil.insert("extension: \"")
+            + pynini.closure(hindi_digit_graph + delete_space, 0, 2)
+            + pynutil.insert("\" ")
+        )
+        self.city_two_digit_code_english = (
+            pynutil.insert("extension: \"")
+            + pynini.closure(english_digit_graph + delete_space, 0, 2)
+            + pynutil.insert("\" ")
+        )
+        self.city_three_digit_code_hindi = (
+            pynutil.insert("extension: \"")
+            + pynini.closure(hindi_digit_graph + delete_space, 0, 3)
+            + pynutil.insert("\" ")
+        )
+        self.city_three_digit_code_english = (
+            pynutil.insert("extension: \"")
+            + pynini.closure(english_digit_graph + delete_space, 0, 3)
+            + pynutil.insert("\" ")
+        )
+        self.city_four_digit_code_hindi = (
+            pynutil.insert("extension: \"")
+            + pynini.closure(hindi_digit_graph + delete_space, 0, 4)
+            + pynutil.insert("\" ")
+        )
+        self.city_four_digit_code_english = (
+            pynutil.insert("extension: \"")
+            + pynini.closure(english_digit_graph + delete_space, 0, 4)
+            + pynutil.insert("\" ")
+        )
+
+        # concise extensions graphs - 2,3,4-digit
         self.city_two_digit_extension = self.city_two_digit_code_hindi | self.city_two_digit_code_english
         self.city_three_digit_extension = self.city_three_digit_code_hindi | self.city_three_digit_code_english
-        self.city_four_digit_extension = (self.city_four_digit_code_hindi | self.city_four_digit_code_english)
-        
-        #7-digit landline graph for 2-digit extension in hindi and english digits
-        self.landline_with_extension_two_hindi = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 6) + pynutil.insert("\" ")
-        self.landline_with_extension_two_english = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 6) + pynutil.insert("\" ")
-        self.landline_two = (self.landline_with_extension_two_hindi | self.landline_with_extension_two_english)
-        
-        #7-digit landline graph for 3-digit extension in hindi and english digits
-        self.landline_with_extension_three_hindi = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 7) + pynutil.insert("\" ")
-        self.landline_with_extension_three_english = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 7) + pynutil.insert("\" ")
-        self.landline_three = (self.landline_with_extension_three_hindi | self.landline_with_extension_three_english)
-        
-        #7-digit landline graph for 4-digit extension in hindi and english digits
-        self.landline_with_extension_four_hindi = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 8) + pynutil.insert("\" ")
-        self.landline_with_extension_four_english = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 8) + pynutil.insert("\" ")
-        self.landline_four = (self.landline_with_extension_four_hindi | self.landline_with_extension_four_english)
-        
-        self.pincode_in_hindi = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 5) + hindi_digit_graph + pynutil.insert("\" ")
-        self.pincode_in_english = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 5) + english_digit_graph + pynutil.insert("\" ")
-        
-        self.credit_card_last_four_digits_in_hindi = pynutil.insert("number_part: \"") + pynini.closure(hindi_digit_graph + delete_space, 0, 3) + hindi_digit_graph + pynutil.insert("\" ")
-        self.credit_card_last_four_digits_in_english = pynutil.insert("number_part: \"") + pynini.closure(english_digit_graph + delete_space, 0, 3) + english_digit_graph + pynutil.insert("\" ")
-        
+        self.city_four_digit_extension = self.city_four_digit_code_hindi | self.city_four_digit_code_english
+
+        # 7-digit landline graph for 2-digit extension in hindi and english digits
+        self.landline_with_extension_two_hindi = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(hindi_digit_graph + delete_space, 6)
+            + pynutil.insert("\" ")
+        )
+        self.landline_with_extension_two_english = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(english_digit_graph + delete_space, 6)
+            + pynutil.insert("\" ")
+        )
+        self.landline_two = self.landline_with_extension_two_hindi | self.landline_with_extension_two_english
+
+        # 7-digit landline graph for 3-digit extension in hindi and english digits
+        self.landline_with_extension_three_hindi = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(hindi_digit_graph + delete_space, 7)
+            + pynutil.insert("\" ")
+        )
+        self.landline_with_extension_three_english = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(english_digit_graph + delete_space, 7)
+            + pynutil.insert("\" ")
+        )
+        self.landline_three = self.landline_with_extension_three_hindi | self.landline_with_extension_three_english
+
+        # 7-digit landline graph for 4-digit extension in hindi and english digits
+        self.landline_with_extension_four_hindi = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(hindi_digit_graph + delete_space, 8)
+            + pynutil.insert("\" ")
+        )
+        self.landline_with_extension_four_english = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(english_digit_graph + delete_space, 8)
+            + pynutil.insert("\" ")
+        )
+        self.landline_four = self.landline_with_extension_four_hindi | self.landline_with_extension_four_english
+
+        self.pincode_in_hindi = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(hindi_digit_graph + delete_space, 0, 5)
+            + hindi_digit_graph
+            + pynutil.insert("\" ")
+        )
+        self.pincode_in_english = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(english_digit_graph + delete_space, 0, 5)
+            + english_digit_graph
+            + pynutil.insert("\" ")
+        )
+
+        self.credit_card_last_four_digits_in_hindi = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(hindi_digit_graph + delete_space, 0, 3)
+            + hindi_digit_graph
+            + pynutil.insert("\" ")
+        )
+        self.credit_card_last_four_digits_in_english = (
+            pynutil.insert("number_part: \"")
+            + pynini.closure(english_digit_graph + delete_space, 0, 3)
+            + english_digit_graph
+            + pynutil.insert("\" ")
+        )
+
         delete_plus = pynini.union(
             pynutil.delete("प्लस") | pynutil.delete("plus") | pynutil.delete("Plus") | pynutil.delete("PLUS")
         )
@@ -99,34 +188,53 @@ def __init__(self, cardinal: GraphFst):
         delete_zero = pynini.union(
             pynutil.delete("शून्य") | pynutil.delete("zero") | pynutil.delete("Zero") | pynutil.delete("ZERO")
         )
-        
-        graph_number_with_hindi_digit = delete_plus + delete_space + self.country_code + delete_space + self.hindi_digit
+
+        graph_number_with_hindi_digit = (
+            delete_plus + delete_space + self.country_code + delete_space + self.hindi_digit
+        )
         graph_number_with_english_digit = delete_plus + delete_space + self.country_code + self.english_digit
-        
-        graph_landline_with_two_digit_extension = delete_zero + delete_space + self.city_two_digit_extension + delete_space + self.landline_two
-        graph_landline_with_three_digit_extension = delete_zero + delete_space + self.city_three_digit_extension + delete_space + self.landline_three
-        graph_landline_with_four_digit_extension = delete_zero + delete_space + self.city_four_digit_extension + delete_space + self.landline_four
-        
+
+        graph_landline_with_two_digit_extension = (
+            delete_zero + delete_space + self.city_two_digit_extension + delete_space + self.landline_two
+        )
+        graph_landline_with_three_digit_extension = (
+            delete_zero + delete_space + self.city_three_digit_extension + delete_space + self.landline_three
+        )
+        graph_landline_with_four_digit_extension = (
+            delete_zero + delete_space + self.city_four_digit_extension + delete_space + self.landline_four
+        )
+
         graph_pincode = self.pincode_in_hindi | self.pincode_in_english
-        
-        graph_credit_card_last_four_digits = self.credit_card_last_four_digits_in_hindi | self.credit_card_last_four_digits_in_english
-	
-        graph = (graph_number_with_hindi_digit | graph_number_with_english_digit | graph_landline_with_two_digit_extension | graph_landline_with_three_digit_extension | graph_landline_with_three_digit_extension | graph_pincode | graph_credit_card_last_four_digits)
-        
+
+        graph_credit_card_last_four_digits = (
+            self.credit_card_last_four_digits_in_hindi | self.credit_card_last_four_digits_in_english
+        )
+
+        graph = (
+            graph_number_with_hindi_digit
+            | graph_number_with_english_digit
+            | graph_landline_with_two_digit_extension
+            | graph_landline_with_three_digit_extension
+            | graph_landline_with_three_digit_extension
+            | graph_pincode
+            | graph_credit_card_last_four_digits
+        )
+
         final_graph = self.add_tokens(graph)
         self.fst = final_graph
-        
-#from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
-#cardinal = CardinalFst()
-#telephone = TelephoneFst(cardinal)
 
-#input_text = "zero eight zero two two nine four one one one" # zero+ two digit extension + landline in english
-#input_text = "zero eight zero nine two two nine four one one one" # zero + three digit extension + landline in english
-#input_text = "zero eight zero nine one two two nine four one one one" #zero + four digit extension + landline in english
 
-#input_text = "शून्य सात नौ एक नौ आठ सात छह पांच चार" #zero + two digit extension + landline in hindi
-#input_text = "शून्य सात नौ नौ एक नौ आठ सात छह पांच चार" #zero + three digit extension + landline in hindi
-#input_text = "शून्य सात नौ एक एक एक नौ आठ सात छह पांच चार" #zero+ four digit digit extension + landline in hindi
+# from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
+# cardinal = CardinalFst()
+# telephone = TelephoneFst(cardinal)
+
+# input_text = "zero eight zero two two nine four one one one" # zero+ two digit extension + landline in english
+# input_text = "zero eight zero nine two two nine four one one one" # zero + three digit extension + landline in english
+# input_text = "zero eight zero nine one two two nine four one one one" #zero + four digit extension + landline in english
+
+# input_text = "शून्य सात नौ एक नौ आठ सात छह पांच चार" #zero + two digit extension + landline in hindi
+# input_text = "शून्य सात नौ नौ एक नौ आठ सात छह पांच चार" #zero + three digit extension + landline in hindi
+# input_text = "शून्य सात नौ एक एक एक नौ आठ सात छह पांच चार" #zero+ four digit digit extension + landline in hindi
 
-#output = apply_fst(input_text, telephone.fst)
-#print(output)
+# output = apply_fst(input_text, telephone.fst)
+# print(output)
diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
index 66ccb7927..a59a2ca97 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
@@ -16,9 +16,8 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.hi.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 from nemo_text_processing.inverse_text_normalization.hi.utils import apply_fst
-
+from nemo_text_processing.text_normalization.hi.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
 class TelephoneFst(GraphFst):
@@ -55,12 +54,13 @@ def __init__(self, cardinal: GraphFst):
         delete_tokens = self.delete_tokens(optional_country_code + number_part)
         delete_tokens |= self.delete_tokens(optional_city_code + number_part)
         self.fst = delete_tokens.optimize()
-        
-#from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
-#cardinal = CardinalFst()
-#telephone = TelephoneFst(cardinal)
-#input_text = 'telephone { number_part: "१९८७६५"  }'
-#input_text ='telephone { number_part: "३४०१"  }'
-#input_text = 'telephone { number_part: "०७९१"  }'
-#output = apply_fst(input_text, telephone.fst)
-#print(output)
+
+
+# from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
+# cardinal = CardinalFst()
+# telephone = TelephoneFst(cardinal)
+# input_text = 'telephone { number_part: "१९८७६५"  }'
+# input_text ='telephone { number_part: "३४०१"  }'
+# input_text = 'telephone { number_part: "०७९१"  }'
+# output = apply_fst(input_text, telephone.fst)
+# print(output)

From ab5d886f951f34a2b2e8f9a3aa89450df6e2297c Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Mon, 27 Jan 2025 15:29:34 +0530
Subject: [PATCH 22/28] Jenkins update

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 5e3916ce2..bbad2c3da 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -27,7 +27,7 @@ pipeline {
     HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
     MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
-    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/01-15-25-1'
+    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/01-27-25-1'
     DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {

From a0ff72e2426953ec63a63be411f959c6f529d4e5 Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Tue, 28 Jan 2025 12:09:58 +0530
Subject: [PATCH 23/28] Telephone.py

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 .../hi/taggers/telephone.py                   | 115 +++---------------
 .../hi/verbalizers/telephone.py               |  11 --
 .../test_cases_telephone.txt                  |  16 ++-
 .../nemo_text_processing/hi/test_telephone.py |   1 -
 4 files changed, 31 insertions(+), 112 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
index dd03551dc..cbfef8393 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
@@ -16,13 +16,10 @@
 from pynini.lib import pynutil
 
 from nemo_text_processing.inverse_text_normalization.hi.graph_utils import (
-    NEMO_HI_DIGIT,
     GraphFst,
-    delete_extra_space,
     delete_space,
-    insert_space,
 )
-from nemo_text_processing.inverse_text_normalization.hi.utils import apply_fst, get_abs_path
+from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path
 
 
 class TelephoneFst(GraphFst):
@@ -80,80 +77,32 @@ def __init__(self, cardinal: GraphFst):
         self.country_code = self.country_code_with_single_digits | self.country_code_with_double_digits
 
         # two, three, four-digit extension code with zero
-        self.city_two_digit_code_hindi = (
+        self.city_code_hindi = (
             pynutil.insert("extension: \"")
-            + pynini.closure(hindi_digit_graph + delete_space, 0, 2)
+            + pynini.closure(hindi_digit_graph + delete_space, 2, 5)
             + pynutil.insert("\" ")
         )
-        self.city_two_digit_code_english = (
+        self.city_code_english = (
             pynutil.insert("extension: \"")
-            + pynini.closure(english_digit_graph + delete_space, 0, 2)
+            + pynini.closure(english_digit_graph + delete_space, 2, 5)
             + pynutil.insert("\" ")
         )
-        self.city_three_digit_code_hindi = (
-            pynutil.insert("extension: \"")
-            + pynini.closure(hindi_digit_graph + delete_space, 0, 3)
-            + pynutil.insert("\" ")
-        )
-        self.city_three_digit_code_english = (
-            pynutil.insert("extension: \"")
-            + pynini.closure(english_digit_graph + delete_space, 0, 3)
-            + pynutil.insert("\" ")
-        )
-        self.city_four_digit_code_hindi = (
-            pynutil.insert("extension: \"")
-            + pynini.closure(hindi_digit_graph + delete_space, 0, 4)
-            + pynutil.insert("\" ")
-        )
-        self.city_four_digit_code_english = (
-            pynutil.insert("extension: \"")
-            + pynini.closure(english_digit_graph + delete_space, 0, 4)
-            + pynutil.insert("\" ")
-        )
-
-        # concise extensions graphs - 2,3,4-digit
-        self.city_two_digit_extension = self.city_two_digit_code_hindi | self.city_two_digit_code_english
-        self.city_three_digit_extension = self.city_three_digit_code_hindi | self.city_three_digit_code_english
-        self.city_four_digit_extension = self.city_four_digit_code_hindi | self.city_four_digit_code_english
-
-        # 7-digit landline graph for 2-digit extension in hindi and english digits
-        self.landline_with_extension_two_hindi = (
-            pynutil.insert("number_part: \"")
-            + pynini.closure(hindi_digit_graph + delete_space, 6)
-            + pynutil.insert("\" ")
-        )
-        self.landline_with_extension_two_english = (
-            pynutil.insert("number_part: \"")
-            + pynini.closure(english_digit_graph + delete_space, 6)
-            + pynutil.insert("\" ")
-        )
-        self.landline_two = self.landline_with_extension_two_hindi | self.landline_with_extension_two_english
+ 
+        self.city_extension = self.city_code_hindi | self.city_code_english
 
-        # 7-digit landline graph for 3-digit extension in hindi and english digits
-        self.landline_with_extension_three_hindi = (
+        # 7-digit landline graph in hindi and english digits
+        self.landline_hindi = (
             pynutil.insert("number_part: \"")
             + pynini.closure(hindi_digit_graph + delete_space, 7)
             + pynutil.insert("\" ")
         )
-        self.landline_with_extension_three_english = (
+        self.landline_english = (
             pynutil.insert("number_part: \"")
             + pynini.closure(english_digit_graph + delete_space, 7)
             + pynutil.insert("\" ")
         )
-        self.landline_three = self.landline_with_extension_three_hindi | self.landline_with_extension_three_english
-
-        # 7-digit landline graph for 4-digit extension in hindi and english digits
-        self.landline_with_extension_four_hindi = (
-            pynutil.insert("number_part: \"")
-            + pynini.closure(hindi_digit_graph + delete_space, 8)
-            + pynutil.insert("\" ")
-        )
-        self.landline_with_extension_four_english = (
-            pynutil.insert("number_part: \"")
-            + pynini.closure(english_digit_graph + delete_space, 8)
-            + pynutil.insert("\" ")
-        )
-        self.landline_four = self.landline_with_extension_four_hindi | self.landline_with_extension_four_english
+        
+        self.landline = self.landline_hindi | self.landline_english
 
         self.pincode_in_hindi = (
             pynutil.insert("number_part: \"")
@@ -168,13 +117,13 @@ def __init__(self, cardinal: GraphFst):
             + pynutil.insert("\" ")
         )
 
-        self.credit_card_last_four_digits_in_hindi = (
+        self.credit_card_last_digits_hindi = (
             pynutil.insert("number_part: \"")
             + pynini.closure(hindi_digit_graph + delete_space, 0, 3)
             + hindi_digit_graph
             + pynutil.insert("\" ")
         )
-        self.credit_card_last_four_digits_in_english = (
+        self.credit_card_last_digits_english = (
             pynutil.insert("number_part: \"")
             + pynini.closure(english_digit_graph + delete_space, 0, 3)
             + english_digit_graph
@@ -194,47 +143,23 @@ def __init__(self, cardinal: GraphFst):
         )
         graph_number_with_english_digit = delete_plus + delete_space + self.country_code + self.english_digit
 
-        graph_landline_with_two_digit_extension = (
-            delete_zero + delete_space + self.city_two_digit_extension + delete_space + self.landline_two
-        )
-        graph_landline_with_three_digit_extension = (
-            delete_zero + delete_space + self.city_three_digit_extension + delete_space + self.landline_three
-        )
-        graph_landline_with_four_digit_extension = (
-            delete_zero + delete_space + self.city_four_digit_extension + delete_space + self.landline_four
+        graph_landline_with_extension = (
+            delete_zero + delete_space + self.city_extension + delete_space + self.landline
         )
 
         graph_pincode = self.pincode_in_hindi | self.pincode_in_english
 
-        graph_credit_card_last_four_digits = (
-            self.credit_card_last_four_digits_in_hindi | self.credit_card_last_four_digits_in_english
+        graph_credit_card_last_digits = (
+            self.credit_card_last_digits_hindi | self.credit_card_last_digits_english
         )
 
         graph = (
             graph_number_with_hindi_digit
             | graph_number_with_english_digit
-            | graph_landline_with_two_digit_extension
-            | graph_landline_with_three_digit_extension
-            | graph_landline_with_three_digit_extension
+            | graph_landline_with_extension
             | graph_pincode
-            | graph_credit_card_last_four_digits
+            | graph_credit_card_last_digits
         )
 
         final_graph = self.add_tokens(graph)
         self.fst = final_graph
-
-
-# from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
-# cardinal = CardinalFst()
-# telephone = TelephoneFst(cardinal)
-
-# input_text = "zero eight zero two two nine four one one one" # zero+ two digit extension + landline in english
-# input_text = "zero eight zero nine two two nine four one one one" # zero + three digit extension + landline in english
-# input_text = "zero eight zero nine one two two nine four one one one" #zero + four digit extension + landline in english
-
-# input_text = "शून्य सात नौ एक नौ आठ सात छह पांच चार" #zero + two digit extension + landline in hindi
-# input_text = "शून्य सात नौ नौ एक नौ आठ सात छह पांच चार" #zero + three digit extension + landline in hindi
-# input_text = "शून्य सात नौ एक एक एक नौ आठ सात छह पांच चार" #zero+ four digit digit extension + landline in hindi
-
-# output = apply_fst(input_text, telephone.fst)
-# print(output)
diff --git a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
index a59a2ca97..3f4b4de1f 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/verbalizers/telephone.py
@@ -16,7 +16,6 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.hi.utils import apply_fst
 from nemo_text_processing.text_normalization.hi.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
@@ -54,13 +53,3 @@ def __init__(self, cardinal: GraphFst):
         delete_tokens = self.delete_tokens(optional_country_code + number_part)
         delete_tokens |= self.delete_tokens(optional_city_code + number_part)
         self.fst = delete_tokens.optimize()
-
-
-# from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
-# cardinal = CardinalFst()
-# telephone = TelephoneFst(cardinal)
-# input_text = 'telephone { number_part: "१९८७६५"  }'
-# input_text ='telephone { number_part: "३४०१"  }'
-# input_text = 'telephone { number_part: "०७९१"  }'
-# output = apply_fst(input_text, telephone.fst)
-# print(output)
diff --git a/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt
index c5a2d574e..0c51d8df0 100644
--- a/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt
+++ b/tests/nemo_text_processing/hi/data_inverse_text_normalization/test_cases_telephone.txt
@@ -3,12 +3,8 @@
 plus nine eight nine four one one one two three four zero one~+९८ ९४१११२३४०१
 प्लस  नौ एक नौ आठ सात छह पांच चार तीन दो एक शून्य~+९१ ९८७६५४३२१०
 plus eleven nine four one one one two three~+११ ९४१११२३
-zero eleven nine four one one one two three~०११ ९४१११२३
-शून्य ग्यारह नौ चार एक एक एक दो तीन~०११ ९४१११२३
 zero eight zero two nine four one one one two~०८० २९४१११२
 शून्य आठ शून्य दो नौ चार एक एक एक दो~०८० २९४१११२
-zero eleven two nine four one one one two~०११ २९४१११२
-शून्य ग्यारह दो नौ चार एक एक एक दो~०११ २९४१११२
 zero four zero two seven eight one eight three nine~०४० २७८१८३९
 शून्य चार शून्य दो सात आठ एक आठ तीन नौ~०४० २७८१८३९
 शून्य सात नौ एक नौ आठ सात छह पांच चार~०७९ १९८७६५४
@@ -23,9 +19,19 @@ ZERO seven three चार पाँच छह सात आठ नौ शून
 ZERO seven three four five six seven eight nine zero~०७३ ४५६७८९०
 zero two eight seven six five four three two seven~०२८ ७६५४३२७
 PLUS eighty one nine seven four seven two zero zero one one eight~+८१ ९७४७२००११८
+zero eight zero two two nine four one one one~०८० २२९४१११
+शून्य सात नौ एक नौ आठ सात छह पांच चार~०७९ १९८७६५४
+zero eight zero nine two two nine four one one one~०८०९ २२९४१११
+शून्य सात नौ नौ एक नौ आठ सात छह पांच चार~०७९९ १९८७६५४
+zero three one nine two two two nine four one one one~०३१९२ २२९४१११
+शून्य सात नौ एक एक एक नौ आठ सात छह पांच चार~०७९११ १९८७६५४
 एक एक शून्य शून्य सात शून्य दिल्ली के वसंत कुंज का पिनकोड है~११००७० दिल्ली के वसंत कुंज का पिनकोड है
 बंगलौर के बैयापानहली का पिनकोड पाँच छह शून्य शून्य तीन आठ है~बंगलौर के बैयापानहली का पिनकोड ५६००३८ है
 दिल्ली के वसंत कुंज का पिनकोड one one zero zero seven zero है~दिल्ली के वसंत कुंज का पिनकोड ११००७० है
 five six zero zero three eight बंगलौर के बैयापानहली का पिनकोड है~५६००३८ बंगलौर के बैयापानहली का पिनकोड है
 मेरे क्रेडिट कार्ड के आखिरी डिजिट शून्य शून्य तीन आठ हैं~मेरे क्रेडिट कार्ड के आखिरी डिजिट ००३८ हैं
-क्रेडिट कार्ड के आखिरी डिजिट four three seven two हैं~क्रेडिट कार्ड के आखिरी डिजिट ४३७२ हैं
\ No newline at end of file
+क्रेडिट कार्ड के आखिरी डिजिट four three seven two हैं~क्रेडिट कार्ड के आखिरी डिजिट ४३७२ हैं
+दिल्ली के वसंत कुंज का पिनकोड one one zero zero seven zero है~दिल्ली के वसंत कुंज का पिनकोड ११००७० है
+five six zero zero three eight बंगलौर के बैयापानहली का पिनकोड है~५६००३८ बंगलौर के बैयापानहली का पिनकोड है
+मेरे क्रेडिट कार्ड के आखिरी डिजिट शून्य शून्य तीन आठ हैं~मेरे क्रेडिट कार्ड के आखिरी डिजिट ००३८ हैं
+क्रेडिट कार्ड के आखिरी डिजिट four three seven two हैं~क्रेडिट कार्ड के आखिरी डिजिट ४३७२ हैं
diff --git a/tests/nemo_text_processing/hi/test_telephone.py b/tests/nemo_text_processing/hi/test_telephone.py
index b01b11871..7e43f7e82 100644
--- a/tests/nemo_text_processing/hi/test_telephone.py
+++ b/tests/nemo_text_processing/hi/test_telephone.py
@@ -16,7 +16,6 @@
 from parameterized import parameterized
 
 from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
-from nemo_text_processing.text_normalization.normalize import Normalizer
 
 from ..utils import CACHE_DIR, parse_test_case_file
 

From cac9be65f313670f22a149dc2b049b4cab67d722 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 28 Jan 2025 06:40:52 +0000
Subject: [PATCH 24/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../hi/taggers/telephone.py                     | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
index cbfef8393..0162dacc4 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
@@ -15,10 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.hi.graph_utils import (
-    GraphFst,
-    delete_space,
-)
+from nemo_text_processing.inverse_text_normalization.hi.graph_utils import GraphFst, delete_space
 from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path
 
 
@@ -87,7 +84,7 @@ def __init__(self, cardinal: GraphFst):
             + pynini.closure(english_digit_graph + delete_space, 2, 5)
             + pynutil.insert("\" ")
         )
- 
+
         self.city_extension = self.city_code_hindi | self.city_code_english
 
         # 7-digit landline graph in hindi and english digits
@@ -101,7 +98,7 @@ def __init__(self, cardinal: GraphFst):
             + pynini.closure(english_digit_graph + delete_space, 7)
             + pynutil.insert("\" ")
         )
-        
+
         self.landline = self.landline_hindi | self.landline_english
 
         self.pincode_in_hindi = (
@@ -143,15 +140,11 @@ def __init__(self, cardinal: GraphFst):
         )
         graph_number_with_english_digit = delete_plus + delete_space + self.country_code + self.english_digit
 
-        graph_landline_with_extension = (
-            delete_zero + delete_space + self.city_extension + delete_space + self.landline
-        )
+        graph_landline_with_extension = delete_zero + delete_space + self.city_extension + delete_space + self.landline
 
         graph_pincode = self.pincode_in_hindi | self.pincode_in_english
 
-        graph_credit_card_last_digits = (
-            self.credit_card_last_digits_hindi | self.credit_card_last_digits_english
-        )
+        graph_credit_card_last_digits = self.credit_card_last_digits_hindi | self.credit_card_last_digits_english
 
         graph = (
             graph_number_with_hindi_digit

From e23887149980d3a19ba6036ea338c62ba640d884 Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Wed, 29 Jan 2025 17:39:39 +0530
Subject: [PATCH 25/28] Updated tagger-telephone.py

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 .../inverse_text_normalization/hi/taggers/telephone.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
index 0162dacc4..1d1d3c875 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
@@ -90,12 +90,12 @@ def __init__(self, cardinal: GraphFst):
         # 7-digit landline graph in hindi and english digits
         self.landline_hindi = (
             pynutil.insert("number_part: \"")
-            + pynini.closure(hindi_digit_graph + delete_space, 7)
+            + pynini.closure(hindi_digit_graph + delete_space, 7, 7)
             + pynutil.insert("\" ")
         )
         self.landline_english = (
             pynutil.insert("number_part: \"")
-            + pynini.closure(english_digit_graph + delete_space, 7)
+            + pynini.closure(english_digit_graph + delete_space, 7, 7)
             + pynutil.insert("\" ")
         )
 

From d4d27da30b975dac821929ae7e92da70ba3349a7 Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Thu, 30 Jan 2025 17:01:42 +0530
Subject: [PATCH 26/28] Telephone and Jenkinsfile cleanup

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 Jenkinsfile                                     |  2 +-
 .../hi/taggers/telephone.py                     | 17 ++++++++++++-----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index bbad2c3da..ba381f535 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -27,7 +27,7 @@ pipeline {
     HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
     MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
-    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/01-27-25-1'
+    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/01-30-25-1'
     DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {
diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
index 1d1d3c875..6e695f997 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
@@ -15,7 +15,10 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.hi.graph_utils import GraphFst, delete_space
+from nemo_text_processing.inverse_text_normalization.hi.graph_utils import (
+    GraphFst,
+    delete_space,
+)
 from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path
 
 
@@ -84,7 +87,7 @@ def __init__(self, cardinal: GraphFst):
             + pynini.closure(english_digit_graph + delete_space, 2, 5)
             + pynutil.insert("\" ")
         )
-
+ 
         self.city_extension = self.city_code_hindi | self.city_code_english
 
         # 7-digit landline graph in hindi and english digits
@@ -98,7 +101,7 @@ def __init__(self, cardinal: GraphFst):
             + pynini.closure(english_digit_graph + delete_space, 7, 7)
             + pynutil.insert("\" ")
         )
-
+        
         self.landline = self.landline_hindi | self.landline_english
 
         self.pincode_in_hindi = (
@@ -140,11 +143,15 @@ def __init__(self, cardinal: GraphFst):
         )
         graph_number_with_english_digit = delete_plus + delete_space + self.country_code + self.english_digit
 
-        graph_landline_with_extension = delete_zero + delete_space + self.city_extension + delete_space + self.landline
+        graph_landline_with_extension = (
+            delete_zero + delete_space + self.city_extension + delete_space + self.landline
+        )
 
         graph_pincode = self.pincode_in_hindi | self.pincode_in_english
 
-        graph_credit_card_last_digits = self.credit_card_last_digits_hindi | self.credit_card_last_digits_english
+        graph_credit_card_last_digits = (
+            self.credit_card_last_digits_hindi | self.credit_card_last_digits_english
+        )
 
         graph = (
             graph_number_with_hindi_digit

From e072a016093c4013bdc09603e2aa8eec515ca63f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 30 Jan 2025 11:33:24 +0000
Subject: [PATCH 27/28] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../hi/taggers/telephone.py                     | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
index 6e695f997..1d1d3c875 100644
--- a/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
+++ b/nemo_text_processing/inverse_text_normalization/hi/taggers/telephone.py
@@ -15,10 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.inverse_text_normalization.hi.graph_utils import (
-    GraphFst,
-    delete_space,
-)
+from nemo_text_processing.inverse_text_normalization.hi.graph_utils import GraphFst, delete_space
 from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path
 
 
@@ -87,7 +84,7 @@ def __init__(self, cardinal: GraphFst):
             + pynini.closure(english_digit_graph + delete_space, 2, 5)
             + pynutil.insert("\" ")
         )
- 
+
         self.city_extension = self.city_code_hindi | self.city_code_english
 
         # 7-digit landline graph in hindi and english digits
@@ -101,7 +98,7 @@ def __init__(self, cardinal: GraphFst):
             + pynini.closure(english_digit_graph + delete_space, 7, 7)
             + pynutil.insert("\" ")
         )
-        
+
         self.landline = self.landline_hindi | self.landline_english
 
         self.pincode_in_hindi = (
@@ -143,15 +140,11 @@ def __init__(self, cardinal: GraphFst):
         )
         graph_number_with_english_digit = delete_plus + delete_space + self.country_code + self.english_digit
 
-        graph_landline_with_extension = (
-            delete_zero + delete_space + self.city_extension + delete_space + self.landline
-        )
+        graph_landline_with_extension = delete_zero + delete_space + self.city_extension + delete_space + self.landline
 
         graph_pincode = self.pincode_in_hindi | self.pincode_in_english
 
-        graph_credit_card_last_digits = (
-            self.credit_card_last_digits_hindi | self.credit_card_last_digits_english
-        )
+        graph_credit_card_last_digits = self.credit_card_last_digits_hindi | self.credit_card_last_digits_english
 
         graph = (
             graph_number_with_hindi_digit

From f6084c3e95263b5d95def5aeac488b79fe2cf19e Mon Sep 17 00:00:00 2001
From: Tarushi V <tarushiv@nvidia.com>
Date: Thu, 3 Apr 2025 15:16:18 +0530
Subject: [PATCH 28/28] Update Jenkins

Signed-off-by: Tarushi V <tarushiv@nvidia.com>
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index ba381f535..82a0a4799 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -27,7 +27,7 @@ pipeline {
     HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
     MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
     JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/10-17-24-1'
-    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/01-30-25-1'
+    HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/04-03-25-1'
     DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
   }
   stages {