Skip to content

Commit aa3da7a

Browse files
committed
add video
1 parent a912a9d commit aa3da7a

File tree

1 file changed

+0
-123
lines changed

1 file changed

+0
-123
lines changed

main.py

-123
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,7 @@
33
import json
44
import requests
55
import time
6-
<<<<<<< HEAD
76
import subprocess
8-
=======
9-
>>>>>>> 55931718a2f0d4000a8192254d25b14605265944
107
from datetime import datetime
118
from openai import OpenAI
129
from pydub import AudioSegment
@@ -51,7 +48,6 @@ def generate_dialogue():
5148

5249
log("BRAINSTORM", "Generating important news stories and discussion topics...")
5350
brainstorm_response = openai_client.chat.completions.create(
54-
<<<<<<< HEAD
5551
model=basemodel,
5652
messages=[
5753
{"role": "user", "content": "You are an AI assistant tasked with brainstorming important tech news stories and discussion topics. Focus on new models, data, news, rumors and hot topics. Ignore mundane support or debugging issues."},
@@ -75,43 +71,15 @@ def generate_dialogue():
7571
brainstormed_questions = questions_response.choices[0].message.content
7672
log("QUESTION_GEN", "Questions generation completed.")
7773
log("QUESTION_GEN_OUTPUT", brainstormed_questions)
78-
=======
79-
model="gpt-4o",
80-
messages=[
81-
{"role": "system", "content": "You are an AI assistant tasked with brainstorming important tech news stories and discussion topics. Focus on new models, data, news, and hot topics. Ignore mundane support or debugging issues."},
82-
{"role": "user", "content": f"Based on the following content, brainstorm the top 5 most important and interesting tech news stories or discussion items. For each topic, provide a brief explanation of why it's significant and how it relates to AI Engineering, machine learning, or tech innovation.\n\nContent: {content}"}
83-
]
84-
)
85-
86-
brainstormed_topics = brainstorm_response.choices[0].message.content
87-
log("BRAINSTORM", "Topics generation completed.")
88-
89-
log("QUESTION_GEN", "Generating key questions for each topic...")
90-
questions_response = openai_client.chat.completions.create(
91-
model="gpt-4o",
92-
messages=[
93-
{"role": "system", "content": "You are an AI assistant tasked with generating insightful questions about tech news items."},
94-
{"role": "user", "content": f"Based on the following brainstormed topics, generate 2-3 key questions for each topic that the reader or listener might want answered. These questions should be thought-provoking, slightly humorous, and encourage detailed explanations from Sarah.\n\nBrainstormed Topics:\n{brainstormed_topics}"}
95-
]
96-
)
97-
98-
brainstormed_questions = questions_response.choices[0].message.content
99-
log("QUESTION_GEN", "Questions generation completed.")
100-
>>>>>>> 55931718a2f0d4000a8192254d25b14605265944
10174

10275
log("DIALOGUE_GEN", "Generating dialogue using OpenAI GPT-4...")
10376
start_time = time.time()
10477
response = openai_client.chat.completions.create(
10578
model="gpt-4o",
10679
messages=[
10780
{"role": "system", "content": "You are an AI assistant tasked with generating a dialogue about tech news."},
108-
<<<<<<< HEAD
10981
{"role": "user", "content": f"Based on the following brainstormed news items, questions, and original content, generate a dialogue discussing the top 5 news of the day for a show called AI News Pod. Include an introduction by a host (Charlie) mentioning today's date, {datetime.now().strftime('%Y-%m-%d')}, and then a discussion between two voices: Karan (male) and Sarah (female). The host (Charlie) should only speak briefly at the start, just mentioning the date and major topics (not introducing himself or the Karan or Sarah), and then at each change of topic, and introduce the headline news and facts that Karan and Sarah will then discuss. Sarah should introduce the news (mentioning the sources they are from) and answer questions, while Karan should make funny/amusing but technical observations for an AI Engineer audience and ask follow-up questions for Sarah to answer. Use the brainstormed questions as a guide for Karan's inquiries. Give credit to the source discussing these topics. End with Charlie again telling listeners to send feedback to @smol_ai on Twitter.\n\nBrainstormed Topics:\n{brainstormed_topics}\n\nBrainstormed Questions:\n{brainstormed_questions}\n\nOriginal Content:\n{content}"},
11082
{"role": "user", "content": f"Sarah is a 35-year-old AI engineer. She has a Ph.D. in Computer Science from MIT and spent 7 years working as a researcher at Google DeepMind. Sarah is known for her in-depth knowledge and no-nonsense approach to tech news. She's an avid rock climber and often uses climbing metaphors in her explanations, but also loves cooking Thai food and surfing. Her catchphrase is 'What a time to be alive!' and her favorite AI lab is DeepMind.\n\nKaran is a 60-year-old Irish stand-up comedian with a degree in Communications from NYU. He fell into tech journalism by accident when his comedy podcast about ridiculous tech gadgets went viral. Karan brings a fresh, humorous perspective to tech news, often pointing out the absurd and making witty pop culture references. He's a passionate gamer and often relates tech news to video game scenarios, famous movies and tv shows or science fiction/fantasy books. His catchphrase is 'Super easy, barely an inconvenience!' and his favorite AI lab is OpenAI, mainly because he finds their name 'kind of ironic'."}
111-
=======
112-
{"role": "user", "content": f"Based on the following brainstormed news items, questions, and original content, generate a dialogue discussing the top 5 news of the day for a show called AI News Pod. Include an introduction by a host, and then a discussion between two voices: Alex (male) and Sarah (female). The host should only speak at the start, and then at each change of topic, and introduce the headline news and facts that Alex and Sarah will then discuss. Sarah should introduce the news (mentioning the sources they are from) and answer questions, while Alex should make funny/amusing observations and ask follow-up questions for Sarah to answer. Use the brainstormed questions as a guide for Alex's inquiries. Give credit to the source discussing these topics.\n\nBrainstormed Topics:\n{brainstormed_topics}\n\nBrainstormed Questions:\n{brainstormed_questions}\n\nOriginal Content:\n{content}"},
113-
{"role": "user", "content": f"Sarah is a 35-year-old former software engineer turned tech journalist. She has a Ph.D. in Computer Science from MIT and spent 5 years working at Google DeepMind. Sarah is known for her in-depth knowledge and no-nonsense approach to tech news. She's an avid rock climber and often uses climbing metaphors in her explanations. Her catchphrase is 'Feel the AGI!' and her favorite AI lab is DeepMind.\n\nAlex is a 29-year-old stand-up comedian with a degree in Communications from NYU. He fell into tech journalism by accident when his comedy podcast about ridiculous tech gadgets went viral. Alex brings a fresh, humorous perspective to tech news, often pointing out the absurd and making witty pop culture references. He's a passionate gamer and often relates tech news to video game scenarios. His catchphrase is 'Super easy, barely an inconvenience!' and his favorite AI lab is OpenAI, mainly because he finds their name 'kind of ironic'."}
114-
>>>>>>> 55931718a2f0d4000a8192254d25b14605265944
11583
],
11684
functions=[
11785
{
@@ -143,7 +111,6 @@ def generate_dialogue():
143111

144112
return response.choices[0].message.function_call.arguments
145113

146-
<<<<<<< HEAD
147114
def text_to_speech_file(name: str, text: str, voice_id: str, temp_folder: str, history: list, use_cartesia: bool = False, progress: tuple = None) -> tuple:
148115
if progress:
149116
current, total = progress
@@ -235,46 +202,6 @@ def text_to_speech_file(name: str, text: str, voice_id: str, temp_folder: str, h
235202
end_time = time.time()
236203
log("TTS", f"Audio file saved: {os.path.basename(save_file_path)} (generated in {end_time - start_time:.2f} seconds)")
237204
return save_file_path, generation_id, duration_sec # {{ edit_5 }}
238-
=======
239-
def text_to_speech_file(text: str, voice_id: str, temp_folder: str, history: list) -> tuple:
240-
log("TTS", f"Converting text to speech for voice {voice_id}...")
241-
start_time = time.time()
242-
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream"
243-
244-
headers = {
245-
"Accept": "audio/mpeg",
246-
"Content-Type": "application/json",
247-
"xi-api-key": ELEVENLABS_API_KEY
248-
}
249-
250-
data = {
251-
"text": text,
252-
"model_id": "eleven_turbo_v2",
253-
"voice_settings": {
254-
"stability": 0.5,
255-
"similarity_boost": 0.75
256-
}
257-
}
258-
259-
if history:
260-
data["history"] = history[-3:] # Use up to 3 previous generations
261-
262-
response = requests.post(url, json=data, headers=headers)
263-
264-
if response.status_code == 200:
265-
save_file_path = os.path.join(temp_folder, f"{uuid.uuid4()}.mp3")
266-
with open(save_file_path, "wb") as f:
267-
f.write(response.content)
268-
269-
end_time = time.time()
270-
log("TTS", f"Audio file saved: {save_file_path} (generated in {end_time - start_time:.2f} seconds)")
271-
272-
generation_id = response.headers.get("x-request-id")
273-
return save_file_path, generation_id
274-
else:
275-
log("TTS_ERROR", f"Error: {response.status_code} - {response.text}")
276-
return None, None
277-
>>>>>>> 55931718a2f0d4000a8192254d25b14605265944
278205

279206
def combine_audio_files(file_paths, output_file):
280207
log("AUDIO_COMBINE", "Combining audio files...")
@@ -287,27 +214,12 @@ def combine_audio_files(file_paths, output_file):
287214
combined = combined[:-200] # Remove the last silence
288215
combined.export(output_file, format="mp3")
289216
end_time = time.time()
290-
<<<<<<< HEAD
291217
log("AUDIO_COMBINE", f"Audio files combined with 300ms gaps in {end_time - start_time:.2f} seconds")
292-
=======
293-
log("AUDIO_COMBINE", f"Audio files combined in {end_time - start_time:.2f} seconds")
294-
>>>>>>> 55931718a2f0d4000a8192254d25b14605265944
295218

296219
def main():
297220
log("PROCESS_START", "Starting the dialogue generation and text-to-speech process...")
298221
dialogue_json = generate_dialogue()
299222
dialogue = json.loads(dialogue_json)['dialogue']
300-
<<<<<<< HEAD
301-
=======
302-
303-
temp_folder = f"temp_{uuid.uuid4()}"
304-
os.makedirs(temp_folder, exist_ok=True)
305-
log("TEMP_FOLDER", f"Created temporary folder: {temp_folder}")
306-
307-
voice_host_id = "ThT5KcBeYPX3keUQqHPh" # Charlie pre-made voice
308-
voice_alex_id = "pNInz6obpgDQGcFmaJgB" # Adam pre-made voice for Alex
309-
voice_sarah_id = "21m00Tcm4TlvDq8ikWAM" # Rachel pre-made voice for Sarah
310-
>>>>>>> 55931718a2f0d4000a8192254d25b14605265944
311223

312224
temp_folder = f"temp_{datetime.now().strftime('%Y-%m-%d_%H-%M')}_{uuid.uuid4()}"
313225
os.makedirs(temp_folder, exist_ok=True)
@@ -323,7 +235,6 @@ def main():
323235
history_sarah = []
324236

325237
log("DIALOGUE_PROCESS", f"Processing {len(dialogue)} dialogue lines...")
326-
<<<<<<< HEAD
327238
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
328239
future_to_index = {
329240
executor.submit(
@@ -382,40 +293,6 @@ def main():
382293
else:
383294
log("AUDIO_ERROR", "No audio files were generated successfully.")
384295

385-
=======
386-
for i, line in enumerate(dialogue):
387-
log("DIALOGUE_LINE", f"Processing line {i+1}/{len(dialogue)}: {line['speaker']}")
388-
if line['speaker'] == "Host":
389-
voice_id = voice_host_id
390-
history = history_host
391-
elif line['speaker'] == "Alex":
392-
voice_id = voice_alex_id
393-
history = history_alex
394-
else: # Sarah
395-
voice_id = voice_sarah_id
396-
history = history_sarah
397-
398-
audio_file, generation_id = text_to_speech_file(line['text'], voice_id, temp_folder, history)
399-
if audio_file:
400-
audio_files.append(audio_file)
401-
history.append({"text": line['text'], "generation_id": generation_id})
402-
else:
403-
log("TTS_FAIL", f"Failed to generate audio for line {i+1}")
404-
405-
if audio_files:
406-
output_file = "combined_dialogue.mp3"
407-
combine_audio_files(audio_files, output_file)
408-
log("OUTPUT", f"Combined audio saved as: {output_file}")
409-
else:
410-
log("AUDIO_ERROR", "No audio files were generated successfully.")
411-
412-
dialogue_output_file = "dialogue_transcript.txt"
413-
with open(dialogue_output_file, "w") as f:
414-
for line in dialogue:
415-
f.write(f"{line['speaker']}: {line['text']}\n\n")
416-
log("OUTPUT", f"Dialogue transcript saved as: {dialogue_output_file}")
417-
418-
>>>>>>> 55931718a2f0d4000a8192254d25b14605265944
419296
log("PROCESS_END", "Process completed successfully!")
420297

421298
if __name__ == "__main__":

0 commit comments

Comments
 (0)