Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cardinals up to a hundred trillions, timeFST and transliteration #209

Merged
merged 7 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions nemo_text_processing/text_normalization/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ def __init__(
elif lang == 'hy':
from nemo_text_processing.text_normalization.hy.taggers.tokenize_and_classify import ClassifyFst
from nemo_text_processing.text_normalization.hy.verbalizers.verbalize_final import VerbalizeFinalFst
elif lang == 'rw':
from nemo_text_processing.text_normalization.rw.taggers.tokenize_and_classify import ClassifyFst
from nemo_text_processing.text_normalization.rw.verbalizers.verbalize_final import VerbalizeFinalFst
else:
raise NotImplementedError(f"Language {lang} has not been supported yet.")

Expand Down
19 changes: 19 additions & 0 deletions nemo_text_processing/text_normalization/rw/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2024, DIGITAL UMUGANDA
# Copyright 2015 and onwards Google, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from nemo_text_processing.text_normalization.rw.taggers.tokenize_and_classify import ClassifyFst
from nemo_text_processing.text_normalization.rw.verbalizers.verbalize import VerbalizeFst
from nemo_text_processing.text_normalization.rw.verbalizers.verbalize_final import VerbalizeFinalFst

15 changes: 15 additions & 0 deletions nemo_text_processing/text_normalization/rw/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2024, DIGITAL UMUGANDA
# Copyright 2015 and onwards Google, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2024, DIGITAL UMUGANDA
# Copyright 2015 and onwards Google, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
rimwe 1
kabiri 2
gatatu 3
kane 4
gatanu 5
gatandatu 6
karindwi 7
umunani 8
icyenda 9
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
0
kimwe 1
bibiri 2
bitatu 3
bine 4
bitanu 5
bitandatu 6
birindwi 7
umunani 8
icyenda 9
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
0
imwe 1
ebyiri 2
eshatu 3
enye 4
eshanu 5
esheshatu 6
zirindwi 7
umunani 8
icyenda 9
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
ijana 1
magana_abiri 2
magana_atatu 3
magana_ane 4
magana_atanu 5
magana_atandatu 6
magana_arindwi 7
magana_inani 8
magana_cyenda 9
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
miliyoni_ijana 1
miliyoni_magana_abiri 2
miliyoni_magana_atatu 3
miliyoni_magana_ane 4
miliyoni_magana_atanu 5
miliyoni_magana_atandatu 6
miliyoni_magana_arindwi 7
miliyoni_magana_inani 8
miliyoni_magana_cyenda 9
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
ibihumbi_ijana 1
ibihumbi_magana_abiri 2
ibihumbi_magana_atatu 3
ibihumbi_magana_ane 4
ibihumbi_magana_atanu 5
ibihumbi_magana_atandatu 6
ibihumbi_magana_arindwi 7
ibihumbi_magana_inani 8
ibihumbi_magana_cyenda 9
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
tiriyoni_ijana 1
tiriyoni_magana_abiri 2
tiriyoni_magana_atatu 3
tiriyoni_magana_ane 4
tiriyoni_magana_atanu 5
tiriyoni_magana_atandatu 6
tiriyoni_magana_arindwi 7
tiriyoni_magana_inani 8
tiriyoni_magana_cyenda 9
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
miliyoni 1
miliyoni_ebyiri 2
miliyoni_eshatu 3
miliyoni_enye 4
miliyoni_eshanu 5
miliyoni_esheshatu 6
miliyoni_zirindwi 7
miliyoni_umunani 8
miliyoni_icyenda 9
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
0
makumyabiri 2
mirongo_itatu 3
mirongo_ine 4
mirongo_itanu 5
mirongo_itandatu 6
mirongo_irindwi 7
mirongo_inani 8
mirongo_icyenda 9
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
miliyoni_cumi 1
miliyoni_makumyabiri 2
miliyoni_mirongo_itatu 3
miliyoni_mirongo_ine 4
miliyoni_mirongo_itanu 5
miliyoni_mirongo_itandatatu 6
miliyoni_mirongo_irindwi 7
miliyoni_mirongo_inani 8
miliyoni_mirongo_icyenda 9
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
ibihumbi_cumi 1
ibihumbi_makumyabiri 2
ibihumbi_mirongo_itatu 3
ibihumbi_mirongo_ine 4
ibihumbi_mirongo_itanu 5
ibihumbi_mirongo_itandatatu 6
ibihumbi_mirongo_irindwi 7
ibihumbi_mirongo_inani 8
ibihumbi_mirongo_icyenda 9
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
tiriyoni_icumi 1
tiriyoni_makumyabiri 2
tiriyoni_mirongo_itatu 3
tiriyoni_mirongo_ine 4
tiriyoni_mirongo_itanu 5
tiriyoni_mirongo_itandatatu 6
tiriyoni_mirongo_irindwi 7
tiriyoni_mirongo_inani 8
tiriyoni_mirongo_icyenda 9
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
igihumbi 1
ibihumbi_bibiri 2
ibihumbi_bitatu 3
ibihumbi_bine 4
ibihumbi_bitanu 5
ibihumbi_bitandatu 6
ibihumbi_birindwi 7
ibihumbi_umunani 8
ibihumbi_icyenda 9

Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
tiriyoni 1
tiriyoni_ebyiri 2
tiriyoni_eshatu 3
tiriyoni_enye 4
tiriyoni_eshanu 5
tiriyoni_esheshatu 6
tiriyoni_zirindwi 7
tiriyoni_umunani 8
tiriyoni_icyenda 9
15 changes: 15 additions & 0 deletions nemo_text_processing/text_normalization/rw/data/time/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2024, DIGITAL UMUGANDA
# Copyright 2015 and onwards Google, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
12 changes: 12 additions & 0 deletions nemo_text_processing/text_normalization/rw/data/time/hours.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
1 saa saba
2 saa munani
3 saa cyenda
4 saa cumi
5 saa cumi n'imwe
6 saa cumi n'ebyiri
7 saa moya
8 saa mbiri
9 saa tatu
10 saa ine
11 saa tanu
12 saa sita
60 changes: 60 additions & 0 deletions nemo_text_processing/text_normalization/rw/data/time/minutes.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
00
01 n'umunota umwe
02 n'iminota ibiri
03 n'iminota itatu
04 n'iminota ine
05 n'iminota itanu
06 n'iminota itandatu
07 n'iminota irindwi
08 n'iminota umunani
09 n'iminota icyenda
10 n'iminota icumi
11 n'iminota cumi n'umwe
12 n'iminota cumi n'ibiri
13 n'iminota cumi n'itatu
14 n'iminota cumi n'ine
15 n'iminota cumi n'itanu
16 n'iminota cumi n'itandatu
17 n'iminota cumi n'irindwi
18 n'iminota cumi n'umunani
19 n'iminota cumi n'icyenda
20 n'iminota makumyabiri
21 n'iminota makumyabiri na rimwe
22 n'iminota makumyabiri n'ibiri
23 n'iminota makumyabiri n'itatu
24 n'iminota makumyabiri n'ine
25 n'iminota makumyabiri n'itanu
26 n'iminota makumyabiri n'itandatu
27 n'iminota makumyabiri n'irindwi
28 n'iminota makumyabiri n'umunani
29 n'iminota makumyabiri n'icyenda
30 n'iminota mirongo itatu
31 n'iminota mirongo itatu n'umwe
32 n'iminota mirongo itatu n'ibiri
33 n'iminota mirongo itatu n'itatu
34 n'iminota mirongo itatu n'ine
35 n'iminota mirongo itatu n'itanu
36 n'iminota mirongo itatu n'itandatu
37 n'iminota mirongo itatu n'irindwi
38 n'iminota mirongo itatu n'umunani
39 n'iminota mirongo itatu n'icyenda
40 n'iminota mirongo ine
41 n'iminota mirongo ine n'umwe
42 n'iminota mirongo ine n'ibiri
43 n'iminota mirongo ine n'itatu
44 n'iminota mirongo ine n'ine
45 n'iminota mirongo ine n'itanu
46 n'iminota mirongo ine n'itandatu
47 n'iminota mirongo ine n'irindwi
48 n'iminota mirongo ine n'umunani
49 n'iminota mirongo ine n'icyenda
50 n'iminota mirongo itanu
51 n'iminota mirongo itanu n'umwe
52 n'iminota mirongo itanu n'ibiri
53 n'iminota mirongo itanu n'itatu
54 n'iminota mirongo itanu n'ine
55 n'iminota mirongo itanu n'itanu
56 n'iminota mirongo itanu n'itandatu
57 n'iminota mirongo itanu n'irindwi
58 n'iminota mirongo itanu n'umunani
59 n'iminota mirongo itanu n'icyenda
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2024, DIGITAL UMUGANDA
# Copyright 2015 and onwards Google, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading