From 949c76ed6fed6efa191ca1ed9cc9e29e42099ba4 Mon Sep 17 00:00:00 2001
From: kurt0cougar <kkleber000@gmail.com>
Date: Mon, 2 Sep 2024 06:28:51 +0200
Subject: [PATCH] Cardinals up to a hundred trillions, timeFST and
 transliteration (moving constants to data files).

Signed-off-by: kurt0cougar <kkleber000@gmail.com>
---
 .../text_normalization/normalize.py           |   3 +
 .../text_normalization/rw/__init__.py         |   1 +
 .../text_normalization/rw/data/__init__.py    |   1 +
 .../rw/data/cardinal/__init__.py              |  15 +
 .../rw/data/cardinal/digits.tsv               |   9 +
 .../rw/data/cardinal/digits_for_thousands.tsv |  10 +
 .../cardinal/digits_millions_trillions.tsv    |  10 +
 .../rw/data/cardinal/hundreds.tsv             |   9 +
 .../rw/data/cardinal/hundreds_of_millions.tsv |   9 +
 .../data/cardinal/hundreds_of_thousands.tsv   |   9 +
 .../data/cardinal/hundreds_of_trillions.tsv   |   9 +
 .../rw/data/cardinal/millions.tsv             |   9 +
 .../rw/data/cardinal/tens.tsv                 |   9 +
 .../rw/data/cardinal/tens_of_millions.tsv     |   9 +
 .../rw/data/cardinal/tens_of_thousands.tsv    |   9 +
 .../rw/data/cardinal/tens_of_trillions.tsv    |   9 +
 .../rw/data/cardinal/thousands.tsv            |  10 +
 .../rw/data/cardinal/trillions.tsv            |   9 +
 .../rw/data/time/__init__.py                  |  15 +
 .../text_normalization/rw/data/time/hours.tsv |  12 +
 .../rw/data/time/minutes.tsv                  |  60 ++++
 .../rw/data/whitelist/__init__.py             |   1 +
 .../text_normalization/rw/graph_utils.py      | 311 ++++++++++++++++++
 .../text_normalization/rw/taggers/__init__.py |   1 +
 .../text_normalization/rw/taggers/cardinal.py | 196 ++---------
 .../text_normalization/rw/taggers/time.py     |  82 +----
 .../rw/taggers/tokenize_and_classify.py       |   5 +-
 .../rw/taggers/whitelist.py                   |   3 +-
 .../rw/verbalizers/__init__.py                |   1 +
 .../text_normalization/rw/verbalizers/time.py |   7 +-
 .../rw/verbalizers/verbalize.py               |   3 +-
 .../rw/verbalizers/verbalize_final.py         |  30 +-
 .../pynini_export.py                          |   7 +-
 33 files changed, 611 insertions(+), 272 deletions(-)
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/__init__.py
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/digits.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/digits_for_thousands.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/digits_millions_trillions.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/hundreds.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/hundreds_of_millions.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/hundreds_of_thousands.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/hundreds_of_trillions.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/millions.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/tens.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/tens_of_millions.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/tens_of_thousands.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/tens_of_trillions.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/thousands.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/cardinal/trillions.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/time/__init__.py
 create mode 100644 nemo_text_processing/text_normalization/rw/data/time/hours.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/data/time/minutes.tsv
 create mode 100644 nemo_text_processing/text_normalization/rw/graph_utils.py

diff --git a/nemo_text_processing/text_normalization/normalize.py b/nemo_text_processing/text_normalization/normalize.py
index 14093dadf..c6d19f82f 100644
--- a/nemo_text_processing/text_normalization/normalize.py
+++ b/nemo_text_processing/text_normalization/normalize.py
@@ -165,6 +165,9 @@ def __init__(
         elif lang == 'hy':
             from nemo_text_processing.text_normalization.hy.taggers.tokenize_and_classify import ClassifyFst
             from nemo_text_processing.text_normalization.hy.verbalizers.verbalize_final import VerbalizeFinalFst
+        elif lang == 'rw':
+            from nemo_text_processing.text_normalization.rw.taggers.tokenize_and_classify import ClassifyFst
+            from nemo_text_processing.text_normalization.rw.verbalizers.verbalize_final import VerbalizeFinalFst
         else:
             raise NotImplementedError(f"Language {lang} has not been supported yet.")
 
diff --git a/nemo_text_processing/text_normalization/rw/__init__.py b/nemo_text_processing/text_normalization/rw/__init__.py
index b136ce06b..c921ca1b8 100644
--- a/nemo_text_processing/text_normalization/rw/__init__.py
+++ b/nemo_text_processing/text_normalization/rw/__init__.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 # Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/rw/data/__init__.py b/nemo_text_processing/text_normalization/rw/data/__init__.py
index 9fb50331b..9c4313114 100644
--- a/nemo_text_processing/text_normalization/rw/data/__init__.py
+++ b/nemo_text_processing/text_normalization/rw/data/__init__.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 # Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/__init__.py b/nemo_text_processing/text_normalization/rw/data/cardinal/__init__.py
new file mode 100644
index 000000000..9c4313114
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/digits.tsv b/nemo_text_processing/text_normalization/rw/data/cardinal/digits.tsv
new file mode 100644
index 000000000..bf85b743b
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/digits.tsv
@@ -0,0 +1,9 @@
+rimwe	1
+kabiri	2
+gatatu	3
+kane	4
+gatanu	5
+gatandatu	6
+karindwi	7
+umunani	8
+icyenda	9
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/digits_for_thousands.tsv b/nemo_text_processing/text_normalization/rw/data/cardinal/digits_for_thousands.tsv
new file mode 100644
index 000000000..ee31aadee
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/digits_for_thousands.tsv
@@ -0,0 +1,10 @@
+ 	0
+kimwe	1
+bibiri	2
+bitatu	3
+bine	4
+bitanu	5
+bitandatu	6
+birindwi	7
+umunani	8
+icyenda	9
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/digits_millions_trillions.tsv b/nemo_text_processing/text_normalization/rw/data/cardinal/digits_millions_trillions.tsv
new file mode 100644
index 000000000..126ad90a3
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/digits_millions_trillions.tsv
@@ -0,0 +1,10 @@
+	0
+imwe	1
+ebyiri	2
+eshatu	3
+enye	4
+eshanu	5
+esheshatu	6
+zirindwi	7
+umunani	8
+icyenda	9
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/hundreds.tsv b/nemo_text_processing/text_normalization/rw/data/cardinal/hundreds.tsv
new file mode 100644
index 000000000..a46623cc1
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/hundreds.tsv
@@ -0,0 +1,9 @@
+ijana	1
+magana_abiri	2
+magana_atatu	3
+magana_ane	4
+magana_atanu	5
+magana_atandatu	6
+magana_arindwi	7
+magana_inani	8
+magana_cyenda	9
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/hundreds_of_millions.tsv b/nemo_text_processing/text_normalization/rw/data/cardinal/hundreds_of_millions.tsv
new file mode 100644
index 000000000..6e38c3ceb
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/hundreds_of_millions.tsv
@@ -0,0 +1,9 @@
+miliyoni_ijana	1
+miliyoni_magana_abiri	2
+miliyoni_magana_atatu	3
+miliyoni_magana_ane	4
+miliyoni_magana_atanu	5
+miliyoni_magana_atandatu	6
+miliyoni_magana_arindwi	7
+miliyoni_magana_inani	8
+miliyoni_magana_cyenda	9
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/hundreds_of_thousands.tsv b/nemo_text_processing/text_normalization/rw/data/cardinal/hundreds_of_thousands.tsv
new file mode 100644
index 000000000..a73477c14
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/hundreds_of_thousands.tsv
@@ -0,0 +1,9 @@
+ibihumbi_ijana	1
+ibihumbi_magana_abiri	2
+ibihumbi_magana_atatu	3
+ibihumbi_magana_ane	4
+ibihumbi_magana_atanu	5
+ibihumbi_magana_atandatu	6
+ibihumbi_magana_arindwi	7
+ibihumbi_magana_inani	8
+ibihumbi_magana_cyenda	9
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/hundreds_of_trillions.tsv b/nemo_text_processing/text_normalization/rw/data/cardinal/hundreds_of_trillions.tsv
new file mode 100644
index 000000000..00fc01aa4
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/hundreds_of_trillions.tsv
@@ -0,0 +1,9 @@
+tiriyoni_ijana	1
+tiriyoni_magana_abiri	2
+tiriyoni_magana_atatu	3
+tiriyoni_magana_ane	4
+tiriyoni_magana_atanu	5
+tiriyoni_magana_atandatu	6
+tiriyoni_magana_arindwi	7
+tiriyoni_magana_inani	8
+tiriyoni_magana_cyenda	9
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/millions.tsv b/nemo_text_processing/text_normalization/rw/data/cardinal/millions.tsv
new file mode 100644
index 000000000..fded5ed55
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/millions.tsv
@@ -0,0 +1,9 @@
+miliyoni	1
+miliyoni_ebyiri	2
+miliyoni_eshatu	3
+miliyoni_enye	4
+miliyoni_eshanu	5
+miliyoni_esheshatu	6
+miliyoni_zirindwi	7
+miliyoni_umunani	8
+miliyoni_icyenda	9
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/tens.tsv b/nemo_text_processing/text_normalization/rw/data/cardinal/tens.tsv
new file mode 100644
index 000000000..6e63c3875
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/tens.tsv
@@ -0,0 +1,9 @@
+ 	0
+makumyabiri	2
+mirongo_itatu	3
+mirongo_ine	4
+mirongo_itanu	5
+mirongo_itandatu	6
+mirongo_irindwi	7
+mirongo_inani	8
+mirongo_icyenda	9
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/tens_of_millions.tsv b/nemo_text_processing/text_normalization/rw/data/cardinal/tens_of_millions.tsv
new file mode 100644
index 000000000..36f077d00
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/tens_of_millions.tsv
@@ -0,0 +1,9 @@
+miliyoni_cumi	1
+miliyoni_makumyabiri	2
+miliyoni_mirongo_itatu	3
+miliyoni_mirongo_ine	4
+miliyoni_mirongo_itanu	5
+miliyoni_mirongo_itandatatu	6
+miliyoni_mirongo_irindwi	7
+miliyoni_mirongo_inani	8
+miliyoni_mirongo_icyenda	9
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/tens_of_thousands.tsv b/nemo_text_processing/text_normalization/rw/data/cardinal/tens_of_thousands.tsv
new file mode 100644
index 000000000..f230751bf
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/tens_of_thousands.tsv
@@ -0,0 +1,9 @@
+ibihumbi_cumi	1
+ibihumbi_makumyabiri	2
+ibihumbi_mirongo_itatu	3
+ibihumbi_mirongo_ine	4
+ibihumbi_mirongo_itanu	5
+ibihumbi_mirongo_itandatatu	6
+ibihumbi_mirongo_irindwi	7
+ibihumbi_mirongo_inani	8
+ibihumbi_mirongo_icyenda	9
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/tens_of_trillions.tsv b/nemo_text_processing/text_normalization/rw/data/cardinal/tens_of_trillions.tsv
new file mode 100644
index 000000000..3cf483594
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/tens_of_trillions.tsv
@@ -0,0 +1,9 @@
+tiriyoni_icumi	1
+tiriyoni_makumyabiri	2
+tiriyoni_mirongo_itatu	3
+tiriyoni_mirongo_ine	4
+tiriyoni_mirongo_itanu	5
+tiriyoni_mirongo_itandatatu	6
+tiriyoni_mirongo_irindwi	7
+tiriyoni_mirongo_inani	8
+tiriyoni_mirongo_icyenda	9
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/thousands.tsv b/nemo_text_processing/text_normalization/rw/data/cardinal/thousands.tsv
new file mode 100644
index 000000000..39d262443
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/thousands.tsv
@@ -0,0 +1,10 @@
+igihumbi	1
+ibihumbi_bibiri	2
+ibihumbi_bitatu	3
+ibihumbi_bine	4
+ibihumbi_bitanu	5
+ibihumbi_bitandatu	6
+ibihumbi_birindwi	7
+ibihumbi_umunani	8
+ibihumbi_icyenda	9
+
diff --git a/nemo_text_processing/text_normalization/rw/data/cardinal/trillions.tsv b/nemo_text_processing/text_normalization/rw/data/cardinal/trillions.tsv
new file mode 100644
index 000000000..8098158df
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/cardinal/trillions.tsv
@@ -0,0 +1,9 @@
+tiriyoni	1
+tiriyoni_ebyiri	2
+tiriyoni_eshatu	3
+tiriyoni_enye	4
+tiriyoni_eshanu	5
+tiriyoni_esheshatu	6
+tiriyoni_zirindwi	7
+tiriyoni_umunani	8
+tiriyoni_icyenda	9
diff --git a/nemo_text_processing/text_normalization/rw/data/time/__init__.py b/nemo_text_processing/text_normalization/rw/data/time/__init__.py
new file mode 100644
index 000000000..9c4313114
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/time/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nemo_text_processing/text_normalization/rw/data/time/hours.tsv b/nemo_text_processing/text_normalization/rw/data/time/hours.tsv
new file mode 100644
index 000000000..fae6f0898
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/time/hours.tsv
@@ -0,0 +1,12 @@
+1	saa saba
+2	saa munani
+3	saa cyenda
+4	saa cumi
+5	saa cumi n'imwe
+6	saa cumi n'ebyiri
+7	saa moya
+8	saa mbiri
+9	saa tatu
+10	saa ine
+11	saa tanu
+12	saa sita
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/rw/data/time/minutes.tsv b/nemo_text_processing/text_normalization/rw/data/time/minutes.tsv
new file mode 100644
index 000000000..c30327106
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/data/time/minutes.tsv
@@ -0,0 +1,60 @@
+00	 
+01	 n'umunota umwe
+02	 n'iminota ibiri
+03	 n'iminota itatu
+04	 n'iminota ine
+05	 n'iminota itanu
+06	 n'iminota itandatu
+07	 n'iminota irindwi
+08	 n'iminota umunani
+09	 n'iminota icyenda
+10	 n'iminota icumi
+11	 n'iminota cumi n'umwe
+12	 n'iminota cumi n'ibiri
+13	 n'iminota cumi n'itatu
+14	 n'iminota cumi n'ine
+15	 n'iminota cumi n'itanu
+16	 n'iminota cumi n'itandatu
+17	 n'iminota cumi n'irindwi
+18	 n'iminota cumi n'umunani
+19	 n'iminota cumi n'icyenda
+20	 n'iminota makumyabiri
+21	 n'iminota makumyabiri na rimwe
+22	 n'iminota makumyabiri n'ibiri
+23	 n'iminota makumyabiri n'itatu
+24	 n'iminota makumyabiri n'ine
+25	 n'iminota makumyabiri n'itanu
+26	 n'iminota makumyabiri n'itandatu
+27	 n'iminota makumyabiri n'irindwi
+28	 n'iminota makumyabiri n'umunani
+29	 n'iminota makumyabiri n'icyenda
+30	 n'iminota mirongo itatu
+31	 n'iminota mirongo itatu n'umwe
+32	 n'iminota mirongo itatu n'ibiri
+33	 n'iminota mirongo itatu n'itatu
+34	 n'iminota mirongo itatu n'ine
+35	 n'iminota mirongo itatu n'itanu
+36	 n'iminota mirongo itatu n'itandatu
+37	 n'iminota mirongo itatu n'irindwi
+38	 n'iminota mirongo itatu n'umunani
+39	 n'iminota mirongo itatu n'icyenda
+40	 n'iminota mirongo ine
+41	 n'iminota mirongo ine n'umwe
+42	 n'iminota mirongo ine n'ibiri
+43	 n'iminota mirongo ine n'itatu
+44	 n'iminota mirongo ine n'ine
+45	 n'iminota mirongo ine n'itanu
+46	 n'iminota mirongo ine n'itandatu
+47	 n'iminota mirongo ine n'irindwi
+48	 n'iminota mirongo ine n'umunani
+49	 n'iminota mirongo ine n'icyenda
+50	 n'iminota mirongo itanu
+51	 n'iminota mirongo itanu n'umwe
+52	 n'iminota mirongo itanu n'ibiri
+53	 n'iminota mirongo itanu n'itatu
+54	 n'iminota mirongo itanu n'ine
+55	 n'iminota mirongo itanu n'itanu
+56	 n'iminota mirongo itanu n'itandatu
+57	 n'iminota mirongo itanu n'irindwi
+58	 n'iminota mirongo itanu n'umunani
+59	 n'iminota mirongo itanu n'icyenda
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/rw/data/whitelist/__init__.py b/nemo_text_processing/text_normalization/rw/data/whitelist/__init__.py
index 9fb50331b..9c4313114 100644
--- a/nemo_text_processing/text_normalization/rw/data/whitelist/__init__.py
+++ b/nemo_text_processing/text_normalization/rw/data/whitelist/__init__.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 # Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/rw/graph_utils.py b/nemo_text_processing/text_normalization/rw/graph_utils.py
new file mode 100644
index 000000000..3744580d5
--- /dev/null
+++ b/nemo_text_processing/text_normalization/rw/graph_utils.py
@@ -0,0 +1,311 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import string
+from pathlib import Path
+from typing import Dict
+
+import pynini
+from pynini import Far
+from pynini.examples import plurals
+from pynini.export import export
+from pynini.lib import byte, pynutil, utf8
+
+from nemo_text_processing.text_normalization.en.utils import get_abs_path, load_labels
+from nemo_text_processing.utils.logging import logger
+
+NEMO_CHAR = utf8.VALID_UTF8_CHAR
+
+NEMO_DIGIT = byte.DIGIT
+NEMO_LOWER = pynini.union(*string.ascii_lowercase).optimize()
+NEMO_UPPER = pynini.union(*string.ascii_uppercase).optimize()
+NEMO_ALPHA = pynini.union(NEMO_LOWER, NEMO_UPPER).optimize()
+NEMO_ALNUM = pynini.union(NEMO_DIGIT, NEMO_ALPHA).optimize()
+NEMO_VOWELS = pynini.union(*"aeiouAEIOU").optimize()
+NEMO_CONSONANTS = pynini.union(*"BCDFGHJKLMNPQRSTVWXYZbcdfghjklmnpqrstvwxyz").optimize()
+NEMO_HEX = pynini.union(*string.hexdigits).optimize()
+NEMO_NON_BREAKING_SPACE = "\u00A0"
+NEMO_SPACE = " "
+NEMO_WHITE_SPACE = pynini.union(" ", "\t", "\n", "\r", "\u00A0").optimize()
+NEMO_NOT_SPACE = pynini.difference(NEMO_CHAR, NEMO_WHITE_SPACE).optimize()
+NEMO_NOT_QUOTE = pynini.difference(NEMO_CHAR, r'"').optimize()
+
+NEMO_PUNCT = pynini.union(*map(pynini.escape, string.punctuation)).optimize()
+NEMO_GRAPH = pynini.union(NEMO_ALNUM, NEMO_PUNCT).optimize()
+
+NEMO_SIGMA = pynini.closure(NEMO_CHAR)
+NEMO_LOWER_NOT_A = pynini.union(
+    "b",
+    "c",
+    "d",
+    "e",
+    "f",
+    "g",
+    "h",
+    "i",
+    "j",
+    "k",
+    "l",
+    "m",
+    "n",
+    "o",
+    "p",
+    "q",
+    "r",
+    "s",
+    "t",
+    "u",
+    "v",
+    "w",
+    "x",
+    "y",
+    "z",
+).optimize()
+
+delete_space = pynutil.delete(pynini.closure(NEMO_WHITE_SPACE))
+delete_space_or_punct = NEMO_PUNCT | delete_space 
+delete_zero_or_one_space = pynutil.delete(pynini.closure(NEMO_WHITE_SPACE, 0, 1))
+insert_space = pynutil.insert(" ")
+delete_extra_space = pynini.cross(pynini.closure(NEMO_WHITE_SPACE, 1), " ")
+delete_preserve_order = pynini.closure(
+    pynutil.delete(" preserve_order: true")
+    | (pynutil.delete(' field_order: "') + NEMO_NOT_QUOTE + pynutil.delete('"'))
+)
+
+
+# Common string literals; expand as you see fit.
+username_string = "username"
+double_quotes = '"'
+domain_string = "domain"
+protocol_string = "protocol"
+slash = "/"
+double_slash = "//"
+triple_slash = "///"
+file = "file"
+period = "."
+at = "@"
+colon = ":"
+https = "https"
+http = "http"
+www = "www"
+
+
+suppletive = pynini.string_file(get_abs_path("data/suppletive.tsv"))
+# _v = pynini.union("a", "e", "i", "o", "u")
+_c = pynini.union(
+    "b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "y", "z",
+)
+_ies = NEMO_SIGMA + _c + pynini.cross("y", "ies")
+_es = NEMO_SIGMA + pynini.union("s", "sh", "ch", "x", "z") + pynutil.insert("es")
+_s = NEMO_SIGMA + pynutil.insert("s")
+
+graph_plural = plurals._priority_union(
+    suppletive, plurals._priority_union(_ies, plurals._priority_union(_es, _s, NEMO_SIGMA), NEMO_SIGMA), NEMO_SIGMA,
+).optimize()
+
+SINGULAR_TO_PLURAL = graph_plural
+PLURAL_TO_SINGULAR = pynini.invert(graph_plural)
+TO_LOWER = pynini.union(*[pynini.cross(x, y) for x, y in zip(string.ascii_uppercase, string.ascii_lowercase)])
+TO_UPPER = pynini.invert(TO_LOWER)
+MIN_NEG_WEIGHT = -0.0001
+MIN_POS_WEIGHT = 0.0001
+INPUT_CASED = "cased"
+INPUT_LOWER_CASED = "lower_cased"
+MINUS = pynini.union("minus", "Minus").optimize()
+
+
+def capitalized_input_graph(
+    graph: "pynini.FstLike", original_graph_weight: float = None, capitalized_graph_weight: float = None,
+) -> "pynini.FstLike":
+    """
+    Allow graph input to be capitalized, e.g. for ITN)
+
+    Args:
+        graph: FstGraph
+        original_graph_weight: weight to add to the original `graph`
+        capitalized_graph_weight: weight to add to the capitalized graph
+    """
+    capitalized_graph = pynini.compose(TO_LOWER + NEMO_SIGMA, graph).optimize()
+
+    if original_graph_weight is not None:
+        graph = pynutil.add_weight(graph, weight=original_graph_weight)
+
+    if capitalized_graph_weight is not None:
+        capitalized_graph = pynutil.add_weight(capitalized_graph, weight=capitalized_graph_weight)
+
+    graph |= capitalized_graph
+    return graph
+
+
+def generator_main(file_name: str, graphs: Dict[str, "pynini.FstLike"]):
+    """
+    Exports graph as OpenFst finite state archive (FAR) file with given file name and rule name.
+
+    Args:
+        file_name: exported file name
+        graphs: Mapping of a rule name and Pynini WFST graph to be exported
+    """
+    exporter = export.Exporter(file_name)
+    for rule, graph in graphs.items():
+        exporter[rule] = graph.optimize()
+    exporter.close()
+    logger.info(f"Created {file_name}")
+
+
+def get_plurals(fst):
+    """
+    Given singular returns plurals
+
+    Args:
+        fst: Fst
+
+    Returns plurals to given singular forms
+    """
+    return SINGULAR_TO_PLURAL @ fst
+
+
+def get_singulars(fst):
+    """
+    Given plural returns singulars
+
+    Args:
+        fst: Fst
+
+    Returns singulars to given plural forms
+    """
+    return PLURAL_TO_SINGULAR @ fst
+
+
+def convert_space(fst) -> "pynini.FstLike":
+    """
+    Converts space to nonbreaking space.
+    Used only in tagger grammars for transducing token values within quotes, e.g. name: "hello kitty"
+    This is making transducer significantly slower, so only use when there could be potential spaces within quotes, otherwise leave it.
+
+    Args:
+        fst: input fst
+
+    Returns output fst where breaking spaces are converted to non breaking spaces
+    """
+    return fst @ pynini.cdrewrite(pynini.cross(NEMO_SPACE, NEMO_NON_BREAKING_SPACE), "", "", NEMO_SIGMA)
+
+
+def string_map_cased(input_file: str, input_case: str = INPUT_LOWER_CASED):
+    labels = load_labels(input_file)
+
+    if input_case == INPUT_CASED:
+        additional_labels = []
+        for written, spoken, *weight in labels:
+            written_capitalized = written[0].upper() + written[1:]
+            additional_labels.extend(
+                [
+                    [written_capitalized, spoken.capitalize(),],  # first letter capitalized
+                    [
+                        written_capitalized,
+                        spoken.upper().replace(" AND ", " and "),
+                    ],  # # add pairs with the all letters capitalized
+                ]
+            )
+
+            spoken_no_space = spoken.replace(" ", "")
+            # add abbreviations without spaces (both lower and upper case), i.e. "BMW" not "B M W"
+            if len(spoken) == (2 * len(spoken_no_space) - 1):
+                logger.debug(f"This is weight {weight}")
+                if len(weight) == 0:
+                    additional_labels.extend(
+                        [[written, spoken_no_space], [written_capitalized, spoken_no_space.upper()],]
+                    )
+                else:
+                    additional_labels.extend(
+                        [
+                            [written, spoken_no_space, weight[0]],
+                            [written_capitalized, spoken_no_space.upper(), weight[0]],
+                        ]
+                    )
+        labels += additional_labels
+
+    whitelist = pynini.string_map(labels).invert().optimize()
+    return whitelist
+
+
+class GraphFst:
+    """
+    Base class for all grammar fsts.
+
+    Args:
+        name: name of grammar class
+        kind: either 'classify' or 'verbalize'
+        deterministic: if True will provide a single transduction option,
+            for False multiple transduction are generated (used for audio-based normalization)
+    """
+
+    def __init__(self, name: str, kind: str, deterministic: bool = True):
+        self.name = name
+        self.kind = kind
+        self._fst = None
+        self.deterministic = deterministic
+
+        self.far_path = Path(os.path.dirname(__file__) + "/grammars/" + kind + "/" + name + ".far")
+        if self.far_exist():
+            self._fst = Far(self.far_path, mode="r", arc_type="standard", far_type="default").get_fst()
+
+    def far_exist(self) -> bool:
+        """
+        Returns true if FAR can be loaded
+        """
+        return self.far_path.exists()
+
+    @property
+    def fst(self) -> "pynini.FstLike":
+        return self._fst
+
+    @fst.setter
+    def fst(self, fst):
+        self._fst = fst
+
+    def add_tokens(self, fst) -> "pynini.FstLike":
+        """
+        Wraps class name around to given fst
+
+        Args:
+            fst: input fst
+
+        Returns:
+            Fst: fst
+        """
+        return pynutil.insert(f"{self.name} {{ ") + fst + pynutil.insert(" }")
+
+    def delete_tokens(self, fst) -> "pynini.FstLike":
+        """
+        Deletes class name wrap around output of given fst
+
+        Args:
+            fst: input fst
+
+        Returns:
+            Fst: fst
+        """
+        res = (
+            pynutil.delete(f"{self.name}")
+            + delete_space
+            + pynutil.delete("{")
+            + delete_space
+            + fst
+            + delete_space
+            + pynutil.delete("}")
+        )
+        return res @ pynini.cdrewrite(pynini.cross("\u00A0", " "), "", "", NEMO_SIGMA)
diff --git a/nemo_text_processing/text_normalization/rw/taggers/__init__.py b/nemo_text_processing/text_normalization/rw/taggers/__init__.py
index 90380542f..96d45783e 100644
--- a/nemo_text_processing/text_normalization/rw/taggers/__init__.py
+++ b/nemo_text_processing/text_normalization/rw/taggers/__init__.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 # Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/rw/taggers/cardinal.py b/nemo_text_processing/text_normalization/rw/taggers/cardinal.py
index 68abc5fbd..c80097a8e 100644
--- a/nemo_text_processing/text_normalization/rw/taggers/cardinal.py
+++ b/nemo_text_processing/text_normalization/rw/taggers/cardinal.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 # Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,189 +16,40 @@
 
 import pynini
 from pynini.lib import pynutil
-import string
-from nemo_text_processing.text_normalization.en.graph_utils import GraphFst,NEMO_CHAR,insert_space
+from nemo_text_processing.text_normalization.rw.graph_utils import GraphFst,NEMO_CHAR,insert_space,NEMO_DIGIT,NEMO_ALPHA,NEMO_CONSONANTS,NEMO_VOWELS,delete_extra_space,delete_space
 from nemo_text_processing.text_normalization.rw.utils import get_abs_path
 
-def apply_fst(text, fst):
-    try:
-        print(pynini.shortestpath(text @ fst).string())
-        print(len(pynini.shortestpath(text @ fst).string()))
-
-    except pynini.FstOpError:
-        print(f"Error: no valid output with given'input: '{text}'")
 
 class CardinalFst(GraphFst):
     def __init__(self):
         super().__init__(name="cardinal", kind="classify")
-        alphabet = string.ascii_letters
-        rewrite_na_fst = pynini.cdrewrite(pynini.cross(" "," na "),pynini.union(*"aeiouAEIOU "),pynini.union(*"BCDFGHJKLMNPQRSTVWXYZbcdfghjklmnpqrstvwxyz"),NEMO_CHAR.closure())
-        rewrite_n_fst = pynini.cdrewrite(pynini.cross(" "," n'"),pynini.union(*"aeiouAEIOU "),pynini.union(*"aeiouAEIOU"),NEMO_CHAR.closure())
-        remove_underscore_fst = pynini.cdrewrite(pynini.cross("_"," "),pynini.union(*alphabet),pynini.union(*alphabet),NEMO_CHAR.closure())
-        remove_extra_space_fst = pynini.cdrewrite(pynini.cross("  "," "),pynini.union(*alphabet),pynini.union(*alphabet),NEMO_CHAR.closure())
-        remove_trailing_space_fst = pynini.cdrewrite(pynini.cross(pynini.accep(' ').closure(),''),pynini.union(*alphabet).closure(),'[EOS]',NEMO_CHAR.closure())
+        vowels_or_space = NEMO_VOWELS | " "
+        rewrite_na_fst = pynini.cdrewrite(pynini.cross(" "," na "),vowels_or_space,NEMO_CONSONANTS,NEMO_CHAR.closure())
+        rewrite_n_fst = pynini.cdrewrite(pynini.cross(" "," n'"),vowels_or_space,NEMO_VOWELS,NEMO_CHAR.closure())
+        remove_underscore_fst = pynini.cdrewrite(pynini.cross("_"," "),pynini.union(NEMO_ALPHA),pynini.union(NEMO_ALPHA),NEMO_CHAR.closure())
+        remove_extra_space_fst = pynini.cdrewrite(delete_extra_space,pynini.union(NEMO_ALPHA),pynini.union(NEMO_ALPHA),NEMO_CHAR.closure())
+        remove_trailing_space_fst = pynini.cdrewrite(delete_space,pynini.union(NEMO_ALPHA).closure(),'[EOS]',NEMO_CHAR.closure())
 
         rewrite_add_separator_fst = pynini.compose(rewrite_na_fst,rewrite_n_fst)
         ten_thousand = pynini.string_map([("ibihumbi_icumi","10")])
         ten = pynini.string_map([("icumi","10")])
-        digits = pynini.string_map([
-            ("rimwe","1"),
-            ("kabiri","2"),
-            ("gatatu","3"),
-            ("kane","4"),
-            ("gatanu","5"),
-            ("gatandatu","6"),
-            ("karindwi","7"),
-            ("umunani","8"),
-            ("icyenda","9"),
-        ])
-        digits_for_thousands = pynini.string_map([
-            ("","0"),
-            ("kimwe","1"),
-            ("bibiri","2"),
-            ("bitatu","3"),
-            ("bine","4"),
-            ("bitanu","5"),
-            ("bitandatu","6"),
-            ("birindwi","7"),
-            ("umunani","8"),
-            ("icyenda","9")
-        ]) 
-        digits_millions_trillions= pynini.string_map([
-            ("","0"),
-            ("imwe","1"),
-            ("ebyiri","2"),
-            ("eshatu","3"),
-            ("enye","4"),
-            ("eshanu","5"),
-            ("esheshatu","6"),
-            ("zirindwi","7"),
-            ("umunani","8"),
-            ("icyenda","9")
-        ]) 
-        tens = pynini.string_map([
-            (" ","0"),
-            ("makumyabiri","2"),
-            ("mirongo_itatu","3"),
-            ("mirongo_ine","4"),
-            ("mirongo_itanu","5"),
-            ("mirongo_itandatu","6"),
-            ("mirongo_irindwi","7"),
-            ("mirongo_inani","8"),
-            ("mirongo_icyenda","9")
-        ])
+        digits = pynini.string_file(get_abs_path("data/cardinal/digits.tsv"))
+        digits_for_thousands = pynini.string_file(get_abs_path("data/cardinal/digits_for_thousands.tsv")) 
+        digits_millions_trillions= pynini.string_file(get_abs_path("data/cardinal/digits_millions_trillions.tsv")) 
+        tens = pynini.string_file(get_abs_path("data/cardinal/tens.tsv"))
         tens_for_ends = pynini.string_map([("icumi","1")])|tens 
         tens_for_beginnings= pynini.string_map([("cumi","1")])|tens
-        hundreds = pynini.string_map([
-            ("ijana","1"),
-            ("magana_abiri","2"),
-            ("magana_atatu","3"),
-            ("magana_ane","4"),
-            ("magana_atanu","5"),
-            ("magana_atandatu","6"),
-            ("magana_arindwi","7"),
-            ("magana_inani","8"),
-            ("magana_cyenda","9")
-        ])
-        thousands = pynini.string_map([
-            ("igihumbi","1"),
-            ("ibihumbi_bibiri","2"),
-            ("ibihumbi_bitatu","3"),
-            ("ibihumbi_bine","4"),
-            ("ibihumbi_bitanu","5"),
-            ("ibihumbi_bitandatu","6"),
-            ("ibihumbi_birindwi","7"),
-            ("ibihumbi_umunani","8"),
-            ("ibihumbi_icyenda","9")
-        ])
-        tens_of_thousands = pynini.string_map([
-            ("ibihumbi_cumi","1"),
-            ("ibihumbi_makumyabiri","2"),
-            ("ibihumbi_mirongo_itatu","3"),
-            ("ibihumbi_mirongo_ine","4"),
-            ("ibihumbi_mirongo_itanu","5"),
-            ("ibihumbi_mirongo_itandatatu","6"),
-            ("ibihumbi_mirongo_irindwi","7"),
-            ("ibihumbi_mirongo_inani","8"),
-            ("ibihumbi_mirongo_icyenda","9")
-        ])
-        hundreds_of_thousands = pynini.string_map([
-            ("ibihumbi_ijana","1"),
-            ("ibihumbi_magana_abiri","2"),
-            ("ibihumbi_magana_atatu","3"),
-            ("ibihumbi_magana_ane","4"),
-            ("ibihumbi_magana_atanu","5"),
-            ("ibihumbi_magana_atandatu","6"),
-            ("ibihumbi_magana_arindwi","7"),
-            ("ibihumbi_magana_inani","8"),
-            ("ibihumbi_magana_cyenda","9")
-        ])
-        millions = pynini.string_map([
-            ("miliyoni","1"),
-            ("miliyoni_ebyiri","2"),
-            ("miliyoni_eshatu","3"),
-            ("miliyoni_enye","4"),
-            ("miliyoni_eshanu","5"),
-            ("miliyoni_esheshatu","6"),
-            ("miliyoni_zirindwi","7"),
-            ("miliyoni_umunani","8"),
-            ("miliyoni_icyenda","9")
-        ])
-        tens_of_millions = pynini.string_map([
-            ("miliyoni_cumi","1"),
-            ("miliyoni_makumyabiri","2"),
-            ("miliyoni_mirongo_itatu","3"),
-            ("miliyoni_mirongo_ine","4"),
-            ("miliyoni_mirongo_itanu","5"),
-            ("miliyoni_mirongo_itandatatu","6"),
-            ("miliyoni_mirongo_irindwi","7"),
-            ("miliyoni_mirongo_inani","8"),
-            ("miliyoni_mirongo_icyenda","9")
-        ])
-        hundreds_of_millions = pynini.string_map([
-            ("miliyoni_ijana","1"),
-            ("miliyoni_magana_abiri","2"),
-            ("miliyoni_magana_atatu","3"),
-            ("miliyoni_magana_ane","4"),
-            ("miliyoni_magana_atanu","5"),
-            ("miliyoni_magana_atandatu","6"),
-            ("miliyoni_magana_arindwi","7"),
-            ("miliyoni_magana_inani","8"),
-            ("miliyoni_magana_cyenda","9")
-        ])
-        trillions = pynini.string_map([
-            ("tiriyoni","1"),
-            ("tiriyoni_ebyiri","2"),
-            ("tiriyoni_eshatu","3"),
-            ("tiriyoni_enye","4"),
-            ("tiriyoni_eshanu","5"),
-            ("tiriyoni_esheshatu","6"),
-            ("tiriyoni_zirindwi","7"),
-            ("tiriyoni_umunani","8"),
-            ("tiriyoni_icyenda","9")
-        ])
-        tens_of_trillions = pynini.string_map([
-            ("tiriyoni_icumi","1"),
-            ("tiriyoni_makumyabiri","2"),
-            ("tiriyoni_mirongo_itatu","3"),
-            ("tiriyoni_mirongo_ine","4"),
-            ("tiriyoni_mirongo_itanu","5"),
-            ("tiriyoni_mirongo_itandatatu","6"),
-            ("tiriyoni_mirongo_irindwi","7"),
-            ("tiriyoni_mirongo_inani","8"),
-            ("tiriyoni_mirongo_icyenda","9")
-        ])
-        hundreds_of_trillions = pynini.string_map([
-            ("tiriyoni_ijana","1"),
-            ("tiriyoni_magana_abiri","2"),
-            ("tiriyoni_magana_atatu","3"),
-            ("tiriyoni_magana_ane","4"),
-            ("tiriyoni_magana_atanu","5"),
-            ("tiriyoni_magana_atandatu","6"),
-            ("tiriyoni_magana_arindwi","7"),
-            ("tiriyoni_magana_inani","8"),
-            ("tiriyoni_magana_cyenda","9")
-        ])
+        hundreds = pynini.string_file(get_abs_path("data/cardinal/hundreds.tsv"))
+        thousands = pynini.string_file(get_abs_path("data/cardinal/thousands.tsv"))
+        tens_of_thousands = pynini.string_file(get_abs_path("data/cardinal/tens_of_thousands.tsv"))
+        hundreds_of_thousands = pynini.string_file(get_abs_path("data/cardinal/hundreds_of_thousands.tsv"))
+        millions = pynini.string_file(get_abs_path("data/cardinal/millions.tsv"))
+        tens_of_millions = pynini.string_file(get_abs_path("data/cardinal/tens_of_millions.tsv"))
+        hundreds_of_millions = pynini.string_file(get_abs_path("data/cardinal/hundreds_of_millions.tsv"))
+        trillions = pynini.string_file(get_abs_path("data/cardinal/trillions.tsv"))
+        tens_of_trillions = pynini.string_file(get_abs_path("data/cardinal/tens_of_trillions.tsv"))
+        hundreds_of_trillions = pynini.string_file(get_abs_path("data/cardinal/hundreds_of_trillions.tsv"))
+
         THREE_ZEROS = "000"
         FOUR_ZEROS = "0000"
         FIVE_ZEROS = "00000"
@@ -208,7 +60,7 @@ def __init__(self):
         NINE_ZEROS = "000000000"
 
         zero = pynini.string_map([("zeru","0")])
-        rewrite_remove_comma_fst = pynini.cdrewrite(pynini.cross(",",""),pynini.union(*"0123456789"),pynini.union(*"0123456789"),NEMO_CHAR.closure())
+        rewrite_remove_comma_fst = pynini.cdrewrite(pynini.cross(",",""),pynini.union(NEMO_DIGIT),pynini.union(NEMO_DIGIT),NEMO_CHAR.closure())
         single_digits_graph = pynini.invert(digits | zero)
         single_digits_graph = single_digits_graph + pynini.closure(insert_space + single_digits_graph)
         remove_comma = rewrite_remove_comma_fst@single_digits_graph
diff --git a/nemo_text_processing/text_normalization/rw/taggers/time.py b/nemo_text_processing/text_normalization/rw/taggers/time.py
index 6b2a0d531..a07ae059e 100644
--- a/nemo_text_processing/text_normalization/rw/taggers/time.py
+++ b/nemo_text_processing/text_normalization/rw/taggers/time.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 # Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,92 +14,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nemo_text_processing.text_normalization.en.graph_utils import GraphFst
+from nemo_text_processing.text_normalization.rw.graph_utils import GraphFst
 import pynini
 from pynini.lib import pynutil
+from nemo_text_processing.text_normalization.rw.utils import get_abs_path
 
 
 class TimeFst(GraphFst):
     def __init__(self):
         super().__init__(name="time", kind="classify")
         
-        hours = pynini.string_map([
-            ('1', 'saa saba'),
-            ('2', 'saa munani'),
-            ('3', 'saa cyenda'),
-            ('4', 'saa cumi'),
-            ('5', "saa cumi n'imwe"),
-            ('6', "saa cumi n'ebyiri"),
-            ('7', 'saa moya'),
-            ('8', 'saa mbiri'),
-            ('9', 'saa tatu'),
-            ('10', 'saa ine'),
-            ('11', 'saa tanu'),
-            ('12', 'saa sita'),            
-        ])
+        hours = pynini.string_file(get_abs_path("data/time/hours.tsv"))
         
-        minutes = pynini.string_map([
-            ('00', ' '),
-            ('01', " n'umunota umwe") ,
-            ('02', " n'iminota ibiri") ,
-            ('03', " n'iminota itatu") ,
-            ('04', " n'iminota ine") ,
-            ('05', " n'iminota itanu") ,
-            ('06', " n'iminota itandatu") ,
-            ('07', " n'iminota irindwi") ,
-            ('08', " n'iminota umunani") ,
-            ('09', " n'iminota icyenda") ,
-            ('10', " n'iminota icumi") ,
-            ('11', " n'iminota cumi n'umwe") ,
-            ('12', " n'iminota cumi n'ibiri") ,
-            ('13', " n'iminota cumi n'itatu") ,
-            ('14', " n'iminota cumi n'ine") ,
-            ('15', " n'iminota cumi n'itanu") ,
-            ('16', " n'iminota cumi n'itandatu") ,
-            ('17', " n'iminota cumi n'irindwi") ,
-            ('18', " n'iminota cumi n'umunani") ,
-            ('19', " n'iminota cumi n'icyenda") ,
-            ('20', " n'iminota makumyabiri") ,
-            ('21', " n'iminota makumyabiri na rimwe") ,
-            ('22', " n'iminota makumyabiri n'ibiri") ,
-            ('23', " n'iminota makumyabiri n'itatu") ,
-            ('24', " n'iminota makumyabiri n'ine") ,
-            ('25', " n'iminota makumyabiri n'itanu") ,
-            ('26', " n'iminota makumyabiri n'itandatu") ,
-            ('27', " n'iminota makumyabiri n'irindwi") ,
-            ('28', " n'iminota makumyabiri n'umunani") ,
-            ('29', " n'iminota makumyabiri n'icyenda") ,
-            ('30', " n'iminota mirongo itatu") ,
-            ('31', " n'iminota mirongo itatu n'umwe") ,
-            ('32', " n'iminota mirongo itatu n'ibiri") ,
-            ('33', " n'iminota mirongo itatu n'itatu") ,
-            ('34', " n'iminota mirongo itatu n'ine") ,
-            ('35', " n'iminota mirongo itatu n'itanu") ,
-            ('36', " n'iminota mirongo itatu n'itandatu") ,
-            ('37', " n'iminota mirongo itatu n'irindwi") ,
-            ('38', " n'iminota mirongo itatu n'umunani") ,
-            ('39', " n'iminota mirongo itatu n'icyenda") ,
-            ('40', " n'iminota mirongo ine") ,
-            ('41', " n'iminota mirongo ine n'umwe") ,
-            ('42', " n'iminota mirongo ine n'ibiri") ,
-            ('43', " n'iminota mirongo ine n'itatu") ,
-            ('44', " n'iminota mirongo ine n'ine") ,
-            ('45', " n'iminota mirongo ine n'itanu") ,
-            ('46', " n'iminota mirongo ine n'itandatu") ,
-            ('47', " n'iminota mirongo ine n'irindwi") ,
-            ('48', " n'iminota mirongo ine n'umunani") ,
-            ('49', " n'iminota mirongo ine n'icyenda") ,
-            ('50', " n'iminota mirongo itanu") ,
-            ('51', " n'iminota mirongo itanu n'umwe") ,
-            ('52', " n'iminota mirongo itanu n'ibiri") ,
-            ('53', " n'iminota mirongo itanu n'itatu") ,
-            ('54', " n'iminota mirongo itanu n'ine") ,
-            ('55', " n'iminota mirongo itanu n'itanu") ,
-            ('56', " n'iminota mirongo itanu n'itandatu") ,
-            ('57', " n'iminota mirongo itanu n'irindwi") ,
-            ('58', " n'iminota mirongo itanu n'umunani") ,
-            ('59', " n'iminota mirongo itanu n'icyenda") ,
-        ])
+        minutes = pynini.string_file(get_abs_path("data/time/minutes.tsv"))
         
         final_graph = pynutil.insert("hours:\"")+hours+pynutil.insert("\"")+pynutil.delete(":")+pynutil.insert(" minutes:\"")+minutes+pynutil.insert("\"")
         final_graph = self.add_tokens(final_graph)
diff --git a/nemo_text_processing/text_normalization/rw/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/rw/taggers/tokenize_and_classify.py
index 3a034af13..e17841e10 100644
--- a/nemo_text_processing/text_normalization/rw/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/text_normalization/rw/taggers/tokenize_and_classify.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 # Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from nemo_text_processing.text_normalization.en.graph_utils import GraphFst
+from nemo_text_processing.text_normalization.rw.graph_utils import GraphFst
 from nemo_text_processing.text_normalization.rw.taggers.time import TimeFst
 from nemo_text_processing.text_normalization.rw.taggers.whitelist import WhiteListFst
 from nemo_text_processing.text_normalization.rw.taggers.cardinal import CardinalFst
@@ -23,7 +24,7 @@
 import pynini
 from pynini.lib import pynutil
 import os
-from nemo_text_processing.text_normalization.en.graph_utils import (
+from nemo_text_processing.text_normalization.rw.graph_utils import (
     GraphFst,
     delete_extra_space,
     delete_space,
diff --git a/nemo_text_processing/text_normalization/rw/taggers/whitelist.py b/nemo_text_processing/text_normalization/rw/taggers/whitelist.py
index 0355d9741..288a1edda 100644
--- a/nemo_text_processing/text_normalization/rw/taggers/whitelist.py
+++ b/nemo_text_processing/text_normalization/rw/taggers/whitelist.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 # Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nemo_text_processing.text_normalization.en.graph_utils import GraphFst
+from nemo_text_processing.text_normalization.rw.graph_utils import GraphFst
 import pynini
 from pynini.lib import pynutil
 from nemo_text_processing.text_normalization.rw.utils import get_abs_path
diff --git a/nemo_text_processing/text_normalization/rw/verbalizers/__init__.py b/nemo_text_processing/text_normalization/rw/verbalizers/__init__.py
index 26cff59aa..2931cfd9b 100644
--- a/nemo_text_processing/text_normalization/rw/verbalizers/__init__.py
+++ b/nemo_text_processing/text_normalization/rw/verbalizers/__init__.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 # Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/rw/verbalizers/time.py b/nemo_text_processing/text_normalization/rw/verbalizers/time.py
index 90d1c17e4..99bcd7808 100644
--- a/nemo_text_processing/text_normalization/rw/verbalizers/time.py
+++ b/nemo_text_processing/text_normalization/rw/verbalizers/time.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 # Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +15,7 @@
 # limitations under the License.
 import pynini
 from pynini.lib import pynutil
-from nemo_text_processing.text_normalization.en.graph_utils import (
+from nemo_text_processing.text_normalization.rw.graph_utils import (
     GraphFst,
     delete_space,
     NEMO_CHAR
@@ -23,8 +24,8 @@
 class VerbalizeTimeFst(GraphFst):
     def __init__(self):
         super().__init__(name="time",kind="verbalize")
-        hour = (pynutil.delete("hours:")+delete_space+pynutil.delete("\"")+pynini.closure(NEMO_CHAR,1,60)+pynutil.delete("\"")+delete_space \
-                +pynutil.delete("minutes:")+delete_space+pynutil.delete("\"") + pynini.closure(NEMO_CHAR,1,60)+pynutil.delete("\""))
+        hour = (pynutil.delete("hours:")+delete_space+pynutil.delete("\"")+pynini.closure(NEMO_CHAR)+pynutil.delete("\"")+delete_space \
+                +pynutil.delete("minutes:")+delete_space+pynutil.delete("\"") + pynini.closure(NEMO_CHAR)+pynutil.delete("\""))
 
         graph = hour 
         delete_tokens = self.delete_tokens(graph)
diff --git a/nemo_text_processing/text_normalization/rw/verbalizers/verbalize.py b/nemo_text_processing/text_normalization/rw/verbalizers/verbalize.py
index 94bf7a038..9d3e69cd9 100644
--- a/nemo_text_processing/text_normalization/rw/verbalizers/verbalize.py
+++ b/nemo_text_processing/text_normalization/rw/verbalizers/verbalize.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 # Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from nemo_text_processing.text_normalization.en.graph_utils import GraphFst
+from nemo_text_processing.text_normalization.rw.graph_utils import GraphFst
 from nemo_text_processing.text_normalization.rw.verbalizers.time import VerbalizeTimeFst
 from nemo_text_processing.text_normalization.en.verbalizers.cardinal import CardinalFst
 
diff --git a/nemo_text_processing/text_normalization/rw/verbalizers/verbalize_final.py b/nemo_text_processing/text_normalization/rw/verbalizers/verbalize_final.py
index e191fbf32..953bffdfe 100644
--- a/nemo_text_processing/text_normalization/rw/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/text_normalization/rw/verbalizers/verbalize_final.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 # Copyright (c) 2024, DIGITAL UMUGANDA
+# Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,11 +17,14 @@
 from pynini.lib import pynutil
 from nemo_text_processing.text_normalization.rw.verbalizers.verbalize import VerbalizeFst
 from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst
-from nemo_text_processing.text_normalization.en.graph_utils import (
+from nemo_text_processing.text_normalization.rw.graph_utils import (
     GraphFst,
     delete_extra_space,
+    delete_space_or_punct,
     delete_space,
+    NEMO_PUNCT,
     generator_main,
+    delete_space
 )
 import os
 
@@ -34,20 +38,20 @@ def __init__(self, cache_dir: str = None, overwrite_cache: bool = False,determin
         if not overwrite_cache and far_file and os.path.exists(far_file):
             self.fst = pynini.Far(far_file, mode="r")["verbalize"]
         else:
-            verbalize = VerbalizeFst().fst
-            word = WordFst().fst
-		    
+            verbalize = VerbalizeFst(deterministic=deterministic).fst
+            word = WordFst(deterministic=deterministic).fst
             types = verbalize | word
             graph = (
-		        pynutil.delete("tokens")
-		        + delete_space
-		        + pynutil.delete("{")
-		        + delete_space
-		        + types
-		        + delete_space
-		        + pynutil.delete("}")
-		    )
-            graph = delete_space + pynini.closure(graph + delete_extra_space) + graph + delete_space
+                pynutil.delete("tokens")
+                + delete_space
+                + pynutil.delete("{")
+                + delete_space
+                + types
+                + delete_space
+                + pynutil.delete("}")
+            )
+            graph = delete_space + pynini.closure(graph + delete_space) + graph + delete_space
+
 
 
             self.fst = graph
diff --git a/tools/text_processing_deployment/pynini_export.py b/tools/text_processing_deployment/pynini_export.py
index 596723091..0cbd53349 100644
--- a/tools/text_processing_deployment/pynini_export.py
+++ b/tools/text_processing_deployment/pynini_export.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2024, DIGITAL UMUGANDA
 # Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -20,7 +21,7 @@
 
 import pynini
 
-from nemo_text_processing.text_normalization.en.graph_utils import generator_main
+from nemo_text_processing.text_normalization.rw.graph_utils import generator_main
 
 # This script exports compiled grammars inside nemo_text_processing into OpenFst finite state archive files
 # tokenize_and_classify.far and verbalize.far for production purposes
@@ -270,7 +271,7 @@ def parse_args():
             ClassifyFst as TNClassifyFst,
         )
         from nemo_text_processing.text_normalization.hy.verbalizers.verbalize import VerbalizeFst as TNVerbalizeFst
-	elif args.language == 'rw':
+    elif args.language == 'rw':
         from nemo_text_processing.text_normalization.rw.taggers.tokenize_and_classify import (
             ClassifyFst as TNClassifyFst,
         )