Skip to content

Commit

Permalink
Hindi TN support Cardinal,Decimal,Fraction, Date
Browse files Browse the repository at this point in the history
Signed-off-by: Ankit Narwade <[email protected]>
  • Loading branch information
ankitnv committed Aug 29, 2024
1 parent fc3cf9c commit f5b564a
Show file tree
Hide file tree
Showing 9 changed files with 35 additions and 15 deletions.
23 changes: 23 additions & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ pipeline {
HY_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-0'
MR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/03-12-24-1'
JA_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/07-15-24-0'
HI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/08-29-24-0'
DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
}
stages {
Expand Down Expand Up @@ -92,6 +93,23 @@ pipeline {

}
}
stage('L0: Create HI TN/ITN Grammars') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('L0: Hi TN grammars') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --text="१" --cache_dir ${HI_TN_CACHE}'
}
}

}
}

stage('L0: Create DE/ES TN/ITN Grammars') {
when {
Expand Down Expand Up @@ -323,6 +341,11 @@ pipeline {
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/es/ -m "not pleasefixme" --cpu --tn_cache_dir ${ES_TN_CACHE}'
}
}
stage('L1: Run all HI TN/ITN tests (restore grammars from cache)') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/hi/ -m "not pleasefixme" --cpu --tn_cache_dir ${HI_TN_CACHE}'
}
}
stage('L1: Run all Codeswitched ES/EN TN/ITN tests (restore grammars from cache)') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/es_en/ -m "not pleasefixme" --cpu --tn_cache_dir ${ES_EN_TN_CACHE}'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,12 @@ def create_larger_number_graph(digit_graph, suffix, zeros_counts, sub_graph):
graph_hundreds.optimize()
self.graph_hundreds = graph_hundreds


# Transducer for eleven hundred -> 1100 or twenty one hundred eleven -> 2111
graph_hundreds_as_thousand = create_graph_suffix(teens_and_ties, suffix_hundreds, 2)
graph_hundreds_as_thousand |= create_larger_number_graph(teens_and_ties, suffix_hundreds, 1, digit)
graph_hundreds_as_thousand |= create_larger_number_graph(teens_and_ties, suffix_hundreds, 0, teens_ties)
self.graph_hundreds_as_thousand = graph_hundreds_as_thousand



#Thousands and Ten thousands graph
suffix_thousands = pynutil.insert(" हज़ार")
graph_thousands = create_graph_suffix(digit, suffix_thousands, 3)
Expand Down Expand Up @@ -302,5 +299,4 @@ def create_larger_number_graph(digit_graph, suffix, zeros_counts, sub_graph):
self.final_graph = final_graph.optimize()
final_graph = optional_minus_graph + pynutil.insert("integer: \"") + self.final_graph + pynutil.insert("\"")
final_graph = self.add_tokens(final_graph)
self.fst = final_graph

self.fst = final_graph
3 changes: 1 addition & 2 deletions nemo_text_processing/text_normalization/hi/taggers/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,5 +71,4 @@ def __init__(self, cardinal: GraphFst):

self.final_graph = final_graph.optimize()

self.fst = self.add_tokens(self.final_graph)

self.fst = self.add_tokens(self.final_graph)
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,4 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
final_graph = optional_graph_negative + self.final_graph_wo_negative

final_graph = self.add_tokens(final_graph)
self.fst = final_graph.optimize()

self.fst = final_graph.optimize()
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,4 @@ def __init__(self, cardinal, deterministic: bool = True):

graph = self.graph
final_graph = self.add_tokens(graph)
self.fst = final_graph.optimize()

self.fst = final_graph.optimize()
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@ def __init__(self, deterministic: bool = True):

self.numbers = self.optional_sign + integer
delete_tokens = self.delete_tokens(self.numbers)
self.fst = delete_tokens.optimize()
self.fst = delete_tokens.optimize()
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,4 @@ def __init__(self):
final_graph = self.graph

delete_tokens = self.delete_tokens(final_graph)
self.fst = delete_tokens.optimize()

self.fst = delete_tokens.optimize()
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,4 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
graph = self.graph

delete_tokens = self.delete_tokens(graph)
self.fst = delete_tokens.optimize()
self.fst = delete_tokens.optimize()
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from nemo_text_processing.text_normalization.hi.verbalizers.decimal import DecimalFst
from nemo_text_processing.text_normalization.hi.verbalizers.fraction import FractionFst
from nemo_text_processing.text_normalization.hi.verbalizers.date import DateFst
from nemo_text_processing.text_normalization.hi.verbalizers.time import TimeFst
from nemo_text_processing.text_normalization.hi.verbalizers.whitelist import WhiteListFst


Expand Down Expand Up @@ -45,6 +46,9 @@ def __init__(self, deterministic: bool = True):

date = DateFst()
date_graph = date.fst

time = TimeFst()
time_graph = time.fst

whitelist_graph = WhiteListFst(deterministic=deterministic).fst

Expand All @@ -53,6 +57,7 @@ def __init__(self, deterministic: bool = True):
| decimal_graph
| fraction_graph
| date_graph
| time_graph
| whitelist_graph
)

Expand Down

0 comments on commit f5b564a

Please sign in to comment.