Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(bug).when generating text that contains only punctuation marks or… #710

Merged
merged 7 commits into from
Dec 30, 2024
6 changes: 5 additions & 1 deletion cosyvoice/cli/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from tn.chinese.normalizer import Normalizer as ZhNormalizer
from tn.english.normalizer import Normalizer as EnNormalizer
use_ttsfrd = False
from cosyvoice.utils.frontend_utils import contains_chinese, replace_blank, replace_corner_mark, remove_bracket, spell_out_number, split_paragraph
from cosyvoice.utils.frontend_utils import contains_chinese, replace_blank, replace_corner_mark, remove_bracket, spell_out_number, split_paragraph, is_only_punctuation


class CosyVoiceFrontEnd:
Expand Down Expand Up @@ -109,6 +109,10 @@ def _extract_speech_feat(self, speech):

def text_normalize(self, text, split=True):
text = text.strip()
# When generating text that contains only punctuation marks or whitespace characters
# - Returning empty texts ensures consistent processing logic.
if is_only_punctuation(text):
return []
if contains_chinese(text):
if self.use_ttsfrd:
texts = [i["text"] for i in json.loads(self.frd.do_voicegen_frd(text))["sentences"]]
Expand Down
7 changes: 7 additions & 0 deletions cosyvoice/utils/frontend_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import re
import regex
chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]+')


Expand Down Expand Up @@ -127,3 +128,9 @@ def replace_blank(text: str):
else:
out_str.append(c)
return "".join(out_str)


def is_only_punctuation(text):
# Regular expression: Match strings that consist only of punctuation marks or are empty.
punctuation_pattern = r'^[\p{P}\p{S}]*$'
return bool(regex.fullmatch(punctuation_pattern, text))
Loading