add video

swyxio · swyxio · commit aa3da7a7ee01 · 2024-10-01T00:14:06.000-07:00
diff --git a/main.py b/main.py
@@ -3,10 +3,7 @@
 import json
 import requests
 import time
-<<<<<<< HEAD
 import subprocess
-=======
->>>>>>> 55931718a2f0d4000a8192254d25b14605265944
 from datetime import datetime
 from openai import OpenAI
 from pydub import AudioSegment
@@ -51,7 +48,6 @@ def generate_dialogue():
 
     log("BRAINSTORM", "Generating important news stories and discussion topics...")
     brainstorm_response = openai_client.chat.completions.create(
-<<<<<<< HEAD
         model=basemodel,
         messages=[
             {"role": "user", "content": "You are an AI assistant tasked with brainstorming important tech news stories and discussion topics. Focus on new models, data, news, rumors and hot topics. Ignore mundane support or debugging issues."},
@@ -75,43 +71,15 @@ def generate_dialogue():
     brainstormed_questions = questions_response.choices[0].message.content
     log("QUESTION_GEN", "Questions generation completed.")
     log("QUESTION_GEN_OUTPUT", brainstormed_questions)
-=======
-        model="gpt-4o",
-        messages=[
-            {"role": "system", "content": "You are an AI assistant tasked with brainstorming important tech news stories and discussion topics. Focus on new models, data, news, and hot topics. Ignore mundane support or debugging issues."},
-            {"role": "user", "content": f"Based on the following content, brainstorm the top 5 most important and interesting tech news stories or discussion items. For each topic, provide a brief explanation of why it's significant and how it relates to AI Engineering, machine learning, or tech innovation.\n\nContent: {content}"}
-        ]
-    )
-    
-    brainstormed_topics = brainstorm_response.choices[0].message.content
-    log("BRAINSTORM", "Topics generation completed.")
-
-    log("QUESTION_GEN", "Generating key questions for each topic...")
-    questions_response = openai_client.chat.completions.create(
-        model="gpt-4o",
-        messages=[
-            {"role": "system", "content": "You are an AI assistant tasked with generating insightful questions about tech news items."},
-            {"role": "user", "content": f"Based on the following brainstormed topics, generate 2-3 key questions for each topic that the reader or listener might want answered. These questions should be thought-provoking, slightly humorous, and encourage detailed explanations from Sarah.\n\nBrainstormed Topics:\n{brainstormed_topics}"}
-        ]
-    )
-    
-    brainstormed_questions = questions_response.choices[0].message.content
-    log("QUESTION_GEN", "Questions generation completed.")
->>>>>>> 55931718a2f0d4000a8192254d25b14605265944
 
     log("DIALOGUE_GEN", "Generating dialogue using OpenAI GPT-4...")
     start_time = time.time()
     response = openai_client.chat.completions.create(
         model="gpt-4o",
         messages=[
             {"role": "system", "content": "You are an AI assistant tasked with generating a dialogue about tech news."},
-<<<<<<< HEAD
             {"role": "user", "content": f"Based on the following brainstormed news items, questions, and original content, generate a dialogue discussing the top 5 news of the day for a show called AI News Pod. Include an introduction by a host (Charlie) mentioning today's date, {datetime.now().strftime('%Y-%m-%d')}, and then a discussion between two voices: Karan (male) and Sarah (female). The host (Charlie) should only speak briefly at the start, just mentioning the date and major topics (not introducing himself or the Karan or Sarah), and then at each change of topic, and introduce the headline news and facts that Karan and Sarah will then discuss. Sarah should introduce the news (mentioning the sources they are from) and answer questions, while Karan should make funny/amusing but technical observations for an AI Engineer audience and ask follow-up questions for Sarah to answer. Use the brainstormed questions as a guide for Karan's inquiries. Give credit to the source discussing these topics. End with Charlie again telling listeners to send feedback to @smol_ai on Twitter.\n\nBrainstormed Topics:\n{brainstormed_topics}\n\nBrainstormed Questions:\n{brainstormed_questions}\n\nOriginal Content:\n{content}"},
             {"role": "user", "content": f"Sarah is a 35-year-old AI engineer. She has a Ph.D. in Computer Science from MIT and spent 7 years working as a researcher at Google DeepMind. Sarah is known for her in-depth knowledge and no-nonsense approach to tech news. She's an avid rock climber and often uses climbing metaphors in her explanations, but also loves cooking Thai food and surfing. Her catchphrase is 'What a time to be alive!' and her favorite AI lab is DeepMind.\n\nKaran is a 60-year-old Irish stand-up comedian with a degree in Communications from NYU. He fell into tech journalism by accident when his comedy podcast about ridiculous tech gadgets went viral. Karan brings a fresh, humorous perspective to tech news, often pointing out the absurd and making witty pop culture references. He's a passionate gamer and often relates tech news to video game scenarios, famous movies and tv shows or science fiction/fantasy books. His catchphrase is 'Super easy, barely an inconvenience!' and his favorite AI lab is OpenAI, mainly because he finds their name 'kind of ironic'."}
-=======
-            {"role": "user", "content": f"Based on the following brainstormed news items, questions, and original content, generate a dialogue discussing the top 5 news of the day for a show called AI News Pod. Include an introduction by a host, and then a discussion between two voices: Alex (male) and Sarah (female). The host should only speak at the start, and then at each change of topic, and introduce the headline news and facts that Alex and Sarah will then discuss. Sarah should introduce the news (mentioning the sources they are from) and answer questions, while Alex should make funny/amusing observations and ask follow-up questions for Sarah to answer. Use the brainstormed questions as a guide for Alex's inquiries. Give credit to the source discussing these topics.\n\nBrainstormed Topics:\n{brainstormed_topics}\n\nBrainstormed Questions:\n{brainstormed_questions}\n\nOriginal Content:\n{content}"},
-            {"role": "user", "content": f"Sarah is a 35-year-old former software engineer turned tech journalist. She has a Ph.D. in Computer Science from MIT and spent 5 years working at Google DeepMind. Sarah is known for her in-depth knowledge and no-nonsense approach to tech news. She's an avid rock climber and often uses climbing metaphors in her explanations. Her catchphrase is 'Feel the AGI!' and her favorite AI lab is DeepMind.\n\nAlex is a 29-year-old stand-up comedian with a degree in Communications from NYU. He fell into tech journalism by accident when his comedy podcast about ridiculous tech gadgets went viral. Alex brings a fresh, humorous perspective to tech news, often pointing out the absurd and making witty pop culture references. He's a passionate gamer and often relates tech news to video game scenarios. His catchphrase is 'Super easy, barely an inconvenience!' and his favorite AI lab is OpenAI, mainly because he finds their name 'kind of ironic'."}
->>>>>>> 55931718a2f0d4000a8192254d25b14605265944
         ],
         functions=[
             {
@@ -143,7 +111,6 @@ def generate_dialogue():
 
     return response.choices[0].message.function_call.arguments
 
-<<<<<<< HEAD
 def text_to_speech_file(name: str, text: str, voice_id: str, temp_folder: str, history: list, use_cartesia: bool = False, progress: tuple = None) -> tuple:
     if progress:
         current, total = progress
@@ -235,46 +202,6 @@ def text_to_speech_file(name: str, text: str, voice_id: str, temp_folder: str, h
     end_time = time.time()
     log("TTS", f"Audio file saved: {os.path.basename(save_file_path)} (generated in {end_time - start_time:.2f} seconds)")
     return save_file_path, generation_id, duration_sec  # {{ edit_5 }}
-=======
-def text_to_speech_file(text: str, voice_id: str, temp_folder: str, history: list) -> tuple:
-    log("TTS", f"Converting text to speech for voice {voice_id}...")
-    start_time = time.time()
-    url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream"
-    
-    headers = {
-        "Accept": "audio/mpeg",
-        "Content-Type": "application/json",
-        "xi-api-key": ELEVENLABS_API_KEY
-    }
-
-    data = {
-        "text": text,
-        "model_id": "eleven_turbo_v2",
-        "voice_settings": {
-            "stability": 0.5,
-            "similarity_boost": 0.75
-        }
-    }
-
-    if history:
-        data["history"] = history[-3:]  # Use up to 3 previous generations
-
-    response = requests.post(url, json=data, headers=headers)
-
-    if response.status_code == 200:
-        save_file_path = os.path.join(temp_folder, f"{uuid.uuid4()}.mp3")
-        with open(save_file_path, "wb") as f:
-            f.write(response.content)
-
-        end_time = time.time()
-        log("TTS", f"Audio file saved: {save_file_path} (generated in {end_time - start_time:.2f} seconds)")
-
-        generation_id = response.headers.get("x-request-id")
-        return save_file_path, generation_id
-    else:
-        log("TTS_ERROR", f"Error: {response.status_code} - {response.text}")
-        return None, None
->>>>>>> 55931718a2f0d4000a8192254d25b14605265944
 
 def combine_audio_files(file_paths, output_file):
     log("AUDIO_COMBINE", "Combining audio files...")
@@ -287,27 +214,12 @@ def combine_audio_files(file_paths, output_file):
     combined = combined[:-200]  # Remove the last silence
     combined.export(output_file, format="mp3")
     end_time = time.time()
-<<<<<<< HEAD
     log("AUDIO_COMBINE", f"Audio files combined with 300ms gaps in {end_time - start_time:.2f} seconds")
-=======
-    log("AUDIO_COMBINE", f"Audio files combined in {end_time - start_time:.2f} seconds")
->>>>>>> 55931718a2f0d4000a8192254d25b14605265944
 
 def main():
     log("PROCESS_START", "Starting the dialogue generation and text-to-speech process...")
     dialogue_json = generate_dialogue()
     dialogue = json.loads(dialogue_json)['dialogue']
-<<<<<<< HEAD
-=======
-    
-    temp_folder = f"temp_{uuid.uuid4()}"
-    os.makedirs(temp_folder, exist_ok=True)
-    log("TEMP_FOLDER", f"Created temporary folder: {temp_folder}")
-    
-    voice_host_id = "ThT5KcBeYPX3keUQqHPh"  # Charlie pre-made voice
-    voice_alex_id = "pNInz6obpgDQGcFmaJgB"  # Adam pre-made voice for Alex
-    voice_sarah_id = "21m00Tcm4TlvDq8ikWAM"  # Rachel pre-made voice for Sarah
->>>>>>> 55931718a2f0d4000a8192254d25b14605265944
 
     temp_folder = f"temp_{datetime.now().strftime('%Y-%m-%d_%H-%M')}_{uuid.uuid4()}"
     os.makedirs(temp_folder, exist_ok=True)
@@ -323,7 +235,6 @@ def main():
     history_sarah = []
 
     log("DIALOGUE_PROCESS", f"Processing {len(dialogue)} dialogue lines...")
-<<<<<<< HEAD
     with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
         future_to_index = {
             executor.submit(
@@ -382,40 +293,6 @@ def main():
     else:
         log("AUDIO_ERROR", "No audio files were generated successfully.")
 
-=======
-    for i, line in enumerate(dialogue):
-        log("DIALOGUE_LINE", f"Processing line {i+1}/{len(dialogue)}: {line['speaker']}")
-        if line['speaker'] == "Host":
-            voice_id = voice_host_id
-            history = history_host
-        elif line['speaker'] == "Alex":
-            voice_id = voice_alex_id
-            history = history_alex
-        else:  # Sarah
-            voice_id = voice_sarah_id
-            history = history_sarah
-        
-        audio_file, generation_id = text_to_speech_file(line['text'], voice_id, temp_folder, history)
-        if audio_file:
-            audio_files.append(audio_file)
-            history.append({"text": line['text'], "generation_id": generation_id})
-        else:
-            log("TTS_FAIL", f"Failed to generate audio for line {i+1}")
-
-    if audio_files:
-        output_file = "combined_dialogue.mp3"
-        combine_audio_files(audio_files, output_file)
-        log("OUTPUT", f"Combined audio saved as: {output_file}")
-    else:
-        log("AUDIO_ERROR", "No audio files were generated successfully.")
-
-    dialogue_output_file = "dialogue_transcript.txt"
-    with open(dialogue_output_file, "w") as f:
-        for line in dialogue:
-            f.write(f"{line['speaker']}: {line['text']}\n\n")
-    log("OUTPUT", f"Dialogue transcript saved as: {dialogue_output_file}")
-
->>>>>>> 55931718a2f0d4000a8192254d25b14605265944
     log("PROCESS_END", "Process completed successfully!")
 
 if __name__ == "__main__":