Skip to content

Commit

Permalink
electronic verbalizer fallback (#81)
Browse files Browse the repository at this point in the history
* 0.1.8 release

Signed-off-by: Evelina <[email protected]>

* add elec fallback

Signed-off-by: Evelina <[email protected]>

* update ci

Signed-off-by: Evelina <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: Evelina <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
ekmb and pre-commit-ci[bot] committed Jun 20, 2023
1 parent f32af30 commit 87e09f2
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pipeline {

AR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
DE_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-14-23-0'
ES_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
ES_EN_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-13-23-1'
FR_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,22 +85,26 @@ def __init__(self, deterministic: bool = True):

domain_common = pynini.string_file(get_abs_path("data/electronic/domain.tsv"))

# this will be used for a safe fallback
domain_all = pynini.compose(
default_chars_symbols,
pynini.closure(TO_LOWER | NEMO_LOWER | NEMO_SPACE | pynutil.add_weight(dict_words, MIN_NEG_WEIGHT)),
)

domain = (
pynini.compose(
default_chars_symbols,
pynini.closure(TO_LOWER | NEMO_LOWER | NEMO_SPACE | pynutil.add_weight(dict_words, MIN_NEG_WEIGHT)),
)
domain_all
+ insert_space
+ plurals._priority_union(
domain_common, pynutil.add_weight(pynini.cross(".", "dot"), weight=0.0001), NEMO_SIGMA
)
+ pynini.closure(insert_space + default_chars_symbols, 0, 1)
)

domain = (
pynutil.delete("domain:")
+ delete_space
+ pynutil.delete("\"")
+ domain
+ (domain | pynutil.add_weight(domain_all, weight=100)).optimize()
+ delete_space
+ pynutil.delete("\"")
).optimize()
Expand Down

0 comments on commit 87e09f2

Please sign in to comment.