From eacfc8b24f2f03455e6b95afdd77f52bad8fd912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xingchen=20Song=28=E5=AE=8B=E6=98=9F=E8=BE=B0=29?= Date: Tue, 25 Jun 2024 13:53:46 +0800 Subject: [PATCH] [tn] english, fix

(#251) --- tn/english/rules/punctuation.py | 14 +++++++++----- tn/english/test/data/normalizer.txt | 1 + 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tn/english/rules/punctuation.py b/tn/english/rules/punctuation.py index 3147b3b..68f0552 100644 --- a/tn/english/rules/punctuation.py +++ b/tn/english/rules/punctuation.py @@ -61,14 +61,15 @@ def build_tagger(self): punct = closure(self.punct | cross('\\', '\\\\\\') | cross('"', '\\"'), 1) - emphasis = ( + self.emphasis = ( accep("<") + (( closure(self.NOT_SPACE - union("<", ">"), 1) + # noqa closure(accep("/"), 0, 1)) # noqa | (accep("/") + closure(self.NOT_SPACE - union("<", ">"), 1))) + accep(">")) # noqa - punct = plurals._priority_union(emphasis, punct, closure(self.VCHAR)) + punct = plurals._priority_union(self.emphasis, punct, + closure(self.VCHAR)) self.graph = punct final_graph = insert("v: \"") + add_weight( @@ -78,7 +79,10 @@ def build_tagger(self): def build_verbalizer(self): punct = closure( - self.punct | cross('\\\\\\', '\\') | cross('\\"', '"') - | accep(" "), 1) - verbalizer = delete('v: "') + punct + delete('"') + self.punct | self.emphasis | cross('\\\\\\', '\\') + | cross('\\"', '"'), 1) + verbalizer = delete('v: "') + add_weight(accep(" "), -1.0).star \ + + punct \ + + add_weight(accep(" "), -1.0).star \ + + delete('"') self.verbalizer = self.delete_tokens(verbalizer) diff --git a/tn/english/test/data/normalizer.txt b/tn/english/test/data/normalizer.txt index fc5cc92..4b0c12d 100644 --- a/tn/english/test/data/normalizer.txt +++ b/tn/english/test/data/normalizer.txt @@ -4,3 +4,4 @@ The National Map, accessed April 1, 2011" Site Description of Koppers Co. From t The museum is open Mon.-Sun. children of 3-4 years 123 The plan will help you lose 3-4 pounds the first week, and 1-2 pounds the weeks thereafter. => The museum is open Monday to Sunday children of three to four years one hundred and twenty three The plan will help you lose three to four pounds the first week, and one to two pounds the weeks thereafter. Try searching for 'Toyota' or 'Investment' => Try searching for 'Toyota' or 'Investment' "" => "" +The HTML tag

defines a paragraph. => The HTML tag

defines a paragraph.