Skip to content

Commit

Permalink
v0.2.17: Fixes malformed transcript critical bug
Browse files Browse the repository at this point in the history
  • Loading branch information
souzatharsis committed Oct 31, 2024
1 parent 035862c commit a18b91a
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 11 deletions.
2 changes: 1 addition & 1 deletion podcastfy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# This file can be left empty for now
__version__ = "0.2.16" # or whatever version you're on
__version__ = "0.2.17" # or whatever version you're on
2 changes: 1 addition & 1 deletion podcastfy/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ content_generator:
gemini_model: "gemini-1.5-pro-latest"
max_output_tokens: 8192
prompt_template: "souzatharsis/podcastfy_multimodal_cleanmarkup"
prompt_commit: "3d5b42fc"
prompt_commit: "23094489"
content_extractor:
youtube_url_patterns:
- "youtube.com"
Expand Down
49 changes: 46 additions & 3 deletions podcastfy/content_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,12 +258,12 @@ def generate_qa_content(
image_file_paths, image_path_keys, input_texts
)

response_raw = self.chain.invoke(
self.response = self.chain.invoke(
prompt_params
) # in the future, make sure we have structured output

# Clean up scratchpad blocks from response
self.response = self.__clean_scratchpad(response_raw)
self.response = self.__clean_tss_markup(self.response)

logger.info(f"Content generated successfully")

Expand All @@ -278,6 +278,49 @@ def generate_qa_content(
raise


def __clean_tss_markup(self, input_text: str, additional_tags: List[str] = ["Person1", "Person2"]) -> str:
"""
Remove unsupported TSS markup tags from the input text while preserving supported SSML tags.
Args:
input_text (str): The input text containing TSS markup tags.
additional_tags (List[str]): Optional list of additional tags to preserve. Defaults to ["Person1", "Person2"].
Returns:
str: Cleaned text with unsupported TSS markup tags removed.
"""
# List of SSML tags supported by both OpenAI and ElevenLabs
supported_tags = [
"speak", "lang", "p", "phoneme",
"s", "sub"
]

# Append additional tags to the supported tags list
supported_tags.extend(additional_tags)

# Create a pattern that matches any tag not in the supported list
pattern = r'</?(?!(?:' + '|'.join(supported_tags) + r')\b)[^>]+>'

# Remove unsupported tags
cleaned_text = re.sub(pattern, '', input_text)

# Remove any leftover empty lines
cleaned_text = re.sub(r'\n\s*\n', '\n', cleaned_text)

# Ensure closing tags for additional tags are preserved
for tag in additional_tags:
cleaned_text = re.sub(
f'<{tag}>(.*?)(?=<(?:{"|".join(additional_tags)})>|$)',
f'<{tag}>\\1</{tag}>',
cleaned_text,
flags=re.DOTALL
)

return cleaned_text.replace('(scratchpad)', '').strip()




def main(seed: int = 42, is_local: bool = False) -> None:
"""
Generate Q&A content based on input text from input_text.txt using the specified LLM backend.
Expand Down Expand Up @@ -322,4 +365,4 @@ def main(seed: int = 42, is_local: bool = False) -> None:


if __name__ == "__main__":
main()
main()
80 changes: 76 additions & 4 deletions podcastfy/text_to_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,22 @@ def convert_to_speech(self, text: str, output_file: str) -> None:
Args:
text (str): Input text to convert to speech.
output_file (str): Path to save the output audio file.
Raises:
ValueError: If the input text is not properly formatted
"""

# Then clean up TSS markup
# Validate transcript format
#self._validate_transcript_format(text)

# Clean up TSS markup
cleaned_text = text

try:
with tempfile.TemporaryDirectory(dir=self.temp_audio_dir) as temp_dir:
audio_segments = self._generate_audio_segments(cleaned_text, temp_dir)
self._merge_audio_files(audio_segments, output_file)
logger.info(f"Audio saved to {output_file}")

except Exception as e:
logger.error(f"Error converting text to speech: {str(e)}")
raise
Expand Down Expand Up @@ -201,6 +206,73 @@ def _setup_directories(self) -> None:
if dir_path and not os.path.exists(dir_path):
os.makedirs(dir_path)

def _validate_transcript_format(self, text: str) -> None:
"""
Validate that the input text follows the correct transcript format.
Args:
text (str): Input text to validate
Raises:
ValueError: If the text is not properly formatted
The text should:
1. Have alternating Person1 and Person2 tags
2. Each opening tag should have a closing tag
3. Tags should be properly nested
"""
try:
# Check for empty text
if not text.strip():
raise ValueError("Input text is empty")

# Check for matching opening and closing tags
person1_open = text.count("<Person1>")
person1_close = text.count("</Person1>")
person2_open = text.count("<Person2>")
person2_close = text.count("</Person2>")

if person1_open != person1_close:
raise ValueError(f"Mismatched Person1 tags: {person1_open} opening tags and {person1_close} closing tags")
if person2_open != person2_close:
raise ValueError(f"Mismatched Person2 tags: {person2_open} opening tags and {person2_close} closing tags")

# Check for alternating pattern using regex
pattern = r"<Person1>.*?</Person1>\s*<Person2>.*?</Person2>"
matches = re.findall(pattern, text, re.DOTALL)

# Calculate expected number of pairs
expected_pairs = min(person1_open, person2_open)

if len(matches) != expected_pairs:
raise ValueError(
"Tags are not properly alternating between Person1 and Person2. "
"Each Person1 section should be followed by a Person2 section."
)

# Check for malformed tags (unclosed or improperly nested)
stack = []
for match in re.finditer(r"<(/?)Person([12])>", text):
tag = match.group(0)
if tag.startswith("</"):
if not stack or stack[-1] != tag[2:-1]:
raise ValueError(f"Improperly nested tags near: {tag}")
stack.pop()
else:
stack.append(tag[1:-1])

if stack:
raise ValueError(f"Unclosed tags: {', '.join(stack)}")

logger.debug("Transcript format validation passed")

except ValueError as e:
logger.error(f"Transcript format validation failed: {str(e)}")
raise
except Exception as e:
logger.error(f"Unexpected error during transcript validation: {str(e)}")
raise ValueError(f"Invalid transcript format: {str(e)}")


def main(seed: int = 42) -> None:
"""
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "podcastfy"
version = "0.2.16"
version = "0.2.17"
description = "An Open Source alternative to NotebookLM's podcast feature: Transforming Multimodal Content into Captivating Multilingual Audio Conversations with GenAI"
authors = ["Tharsis T. P. Souza"]
license = "Apache-2.0"
Expand Down
2 changes: 1 addition & 1 deletion tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def test_generate_transcript_only(sample_config):
for tag in re.findall(r"<Person2>.*?</Person2>", content)
)


@pytest.mark.skip(reason="Not supported yet")
def test_generate_podcast_from_urls_and_file(mock_files, sample_config):
result = runner.invoke(
app,
Expand Down

0 comments on commit a18b91a

Please sign in to comment.