diff --git a/.gitignore b/.gitignore index d44432a..4befdbf 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -/api.env +/server/api.env +/.venv diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index caa2793..0000000 --- a/Dockerfile +++ /dev/null @@ -1,18 +0,0 @@ -FROM python:3.10-slim -WORKDIR /app - -# Copy requirements first to leverage Docker caching -COPY requirements.txt requirements.txt -RUN apt-get update && apt-get install -y ffmpeg -# Install dependencies -RUN pip install --no-cache-dir -r requirements.txt - -# Copy the rest of the application -COPY . . - -# Expose the Flask app port -EXPOSE 5000 - - -# Start the Flask app -CMD ["python", "main.py"] \ No newline at end of file diff --git a/api.env b/api.env deleted file mode 100644 index e7c472e..0000000 --- a/api.env +++ /dev/null @@ -1 +0,0 @@ -OPENAI_API_KEY=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 \ No newline at end of file diff --git a/audio_processing.py b/audio_processing.py deleted file mode 100644 index 27dc493..0000000 --- a/audio_processing.py +++ /dev/null @@ -1,246 +0,0 @@ -import concurrent.futures -from io import BytesIO - -import requests -from pydub import AudioSegment -import os -from faster_whisper import WhisperModel, BatchedInferencePipeline -from urllib.parse import urlparse, unquote -import openai -import re -from dotenv import load_dotenv -from helpers.cache_helpers import cache, initiate_key, cache_audio, cached_rss_url, cached_source_url -from helpers.file_helpers import allowed_file, save_file, sanitize_filename -import logging -from helpers.url_helpers import normalize_url, generate_cache_url, extract_name, extract_title - -# Configure logging -logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") - -logger = logging.getLogger(__name__) - -ALLOWED_EXTENSIONS = {'wav', 'mp3', 'flac'} - - -load_dotenv("api.env") -# Initialize the whisper model -model = WhisperModel("small.en", device="cpu", compute_type="int8") -batched_model = BatchedInferencePipeline(model=model) -api_key = os.getenv('OPENAI_API_KEY') -client = openai.OpenAI(api_key=api_key) - - -def chunk_audio(file_path, chunk_length_ms=360000): - """Splits an audio file into smaller chunks.""" - audio = AudioSegment.from_file(file_path) - chunks = [(audio[i:i + chunk_length_ms], len(audio[i:i + chunk_length_ms]) / 1000) - for i in range(0, len(audio), chunk_length_ms)] - return chunks # Returns chunks in memory instead of saving files - - -def transcribe(audio_name): - audio = AudioSegment.from_file(audio_name) # Access the audio - duration = audio.duration_seconds - if duration < 360: # Less than 6 minutes (360 seconds) - logger.info("Audio file is less than 6 minutes, skipping chunking.") - chunk_files = [(audio_name, duration)] # No chunking, just use the original file - else: - # Split the audio file into chunks if it's longer than 6 - chunk_files = chunk_audio(audio_name) - total_duration = 0 # This variable is used to track the total duration of the chunks - all_transcriptions = [] - - for i, (chunk, chunk_duration) in enumerate(chunk_files): - logger.info(f"[INFO] Processing chunk {i+1}/{len(chunk_files)} - Duration: {chunk_duration:.2f} seconds") - - buffer = BytesIO() - chunk.export(buffer, format="mp3") - buffer.seek(0) - segments, _ = batched_model.transcribe(buffer, word_timestamps=True, batch_size=8) - # Extract word-level timestamps - word_timestamps = [ - { - "start": word.start + total_duration, - "end": word.end + total_duration, - "text": word.word - } - for segment in segments - for word in segment.words - ] - # Format the data to send to ChatGPT - words_with_timestamps = "\n".join( - [f"[{w['start']}-{w['end']}] {w['text']}" for w in word_timestamps] - ) - all_transcriptions.append(words_with_timestamps) - logger.info(f"[INFO] Finished transcribing chunk {i+1}/{len(chunk_files)}") - - total_duration += chunk_duration - - logger.info(f"[INFO] Transcription complete for {audio_name}") - return "\n".join(all_transcriptions) - - -def detect_ads(transcript): - - try: - completion = \ - client.chat.completions.create( - model="gpt-4o-mini", - messages=[ - { - "role": "system", - "content": "You are a system that detects ads in audio transcriptions. " - "Based on the word-level timestamps provided, determine the start and end times of any ad segments. " - "For each ad segment, provide a 5-word summary of the ad. " - "Provide ad segments in the format: start: