Merge pull request #710 from 0xCAFEBABE0/bug_cpu_hang

fix(bug).when generating text that contains only punctuation marks or…
FunAudioLLM · Dec 30, 2024 · 5aa3a46 · 5aa3a46
2 parents 3d0458a + b60c37b
commit 5aa3a46
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 1 deletion.
diff --git a/cosyvoice/cli/frontend.py b/cosyvoice/cli/frontend.py
@@ -31,7 +31,7 @@
     from tn.chinese.normalizer import Normalizer as ZhNormalizer
     from tn.english.normalizer import Normalizer as EnNormalizer
     use_ttsfrd = False
-from cosyvoice.utils.frontend_utils import contains_chinese, replace_blank, replace_corner_mark, remove_bracket, spell_out_number, split_paragraph
+from cosyvoice.utils.frontend_utils import contains_chinese, replace_blank, replace_corner_mark, remove_bracket, spell_out_number, split_paragraph, is_only_punctuation
 
 
 class CosyVoiceFrontEnd:
@@ -111,6 +111,10 @@ def text_normalize(self, text, split=True, text_frontend=True):
         if text_frontend is False:
             return [text] if split is True else text
         text = text.strip()
+        # When generating text that contains only punctuation marks or whitespace characters
+        # - Returning empty texts ensures consistent processing logic.
+        if is_only_punctuation(text):
+            return []
         if contains_chinese(text):
             if self.use_ttsfrd:
                 texts = [i["text"] for i in json.loads(self.frd.do_voicegen_frd(text))["sentences"]]

diff --git a/cosyvoice/utils/frontend_utils.py b/cosyvoice/utils/frontend_utils.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import re
+import regex
 chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]+')
 
 
@@ -127,3 +128,9 @@ def replace_blank(text: str):
         else:
             out_str.append(c)
     return "".join(out_str)
+
+
+def is_only_punctuation(text):
+    # Regular expression: Match strings that consist only of punctuation marks or are empty.
+    punctuation_pattern = r'^[\p{P}\p{S}]*$'
+    return bool(regex.fullmatch(punctuation_pattern, text))