Skip to content

Commit

Permalink
Merge pull request #710 from 0xCAFEBABE0/bug_cpu_hang
Browse files Browse the repository at this point in the history
fix(bug).when generating text that contains only punctuation marks or…
  • Loading branch information
aluminumbox authored Dec 30, 2024
2 parents 3d0458a + b60c37b commit 5aa3a46
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
6 changes: 5 additions & 1 deletion cosyvoice/cli/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from tn.chinese.normalizer import Normalizer as ZhNormalizer
from tn.english.normalizer import Normalizer as EnNormalizer
use_ttsfrd = False
from cosyvoice.utils.frontend_utils import contains_chinese, replace_blank, replace_corner_mark, remove_bracket, spell_out_number, split_paragraph
from cosyvoice.utils.frontend_utils import contains_chinese, replace_blank, replace_corner_mark, remove_bracket, spell_out_number, split_paragraph, is_only_punctuation


class CosyVoiceFrontEnd:
Expand Down Expand Up @@ -111,6 +111,10 @@ def text_normalize(self, text, split=True, text_frontend=True):
if text_frontend is False:
return [text] if split is True else text
text = text.strip()
# When generating text that contains only punctuation marks or whitespace characters
# - Returning empty texts ensures consistent processing logic.
if is_only_punctuation(text):
return []
if contains_chinese(text):
if self.use_ttsfrd:
texts = [i["text"] for i in json.loads(self.frd.do_voicegen_frd(text))["sentences"]]
Expand Down
7 changes: 7 additions & 0 deletions cosyvoice/utils/frontend_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import re
import regex
chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]+')


Expand Down Expand Up @@ -127,3 +128,9 @@ def replace_blank(text: str):
else:
out_str.append(c)
return "".join(out_str)


def is_only_punctuation(text):
# Regular expression: Match strings that consist only of punctuation marks or are empty.
punctuation_pattern = r'^[\p{P}\p{S}]*$'
return bool(regex.fullmatch(punctuation_pattern, text))

0 comments on commit 5aa3a46

Please sign in to comment.