Skip to content

Commit

Permalink
ADDED: Audio Summarizer using Gemini 1.5 Pro
Browse files Browse the repository at this point in the history
  • Loading branch information
AquibPy committed Apr 18, 2024
1 parent 8a3c8bc commit 369f1aa
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 2 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ Generative AI, powered by advanced machine learning models, enables the creation
- **Route:** `/RAG_PDF_Groq`
- **Description:** This endpoint uses the pdf and give the answer based on the prompt provided using Groq,with a default model input of llama2-70b-4096, but offering alternatives like mixtral-8x7b-32768 and gemma-7b-it.

### 13. Audio Summarizer

- **Route:** `/summarize_audio`
- **Description:** Endpoint to summarize an uploaded audio file using gemini-1.5-pro-latest.

## Usage

Each endpoint accepts specific parameters as described in the respective endpoint documentation. Users can make POST requests to these endpoints with the required parameters to perform the desired tasks.
Expand Down
21 changes: 19 additions & 2 deletions api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from settings import invoice_prompt,youtube_transcribe_prompt,text2sql_prompt,EMPLOYEE_DB,GEMINI_PRO,GEMINI_PRO_1_5
from mongo import MongoDB
from helper_functions import get_qa_chain,get_gemini_response,get_url_doc_qa,extract_transcript_details,\
get_gemini_response_health,get_gemini_pdf,read_sql_query,remove_substrings,questions_generator,groq_pdf
get_gemini_response_health,get_gemini_pdf,read_sql_query,remove_substrings,questions_generator,groq_pdf,\
summarize_audio
from langchain_groq import ChatGroq
from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
Expand Down Expand Up @@ -308,4 +309,20 @@ async def talk_pd_groq(pdf: UploadFile = File(...),prompt: str = Form(...),
print(result)
return ResponseText(response=out)
except Exception as e:
return ResponseText(response=f"Error: {str(e)}")
return ResponseText(response=f"Error: {str(e)}")

@app.post("/summarize_audio",description="""Endpoint to summarize an uploaded audio file using gemini-1.5-pro-latest.""")
async def summarize_audio_endpoint(audio_file: UploadFile = File(...)):
try:
summary_text = await summarize_audio(audio_file)
db = MongoDB()
payload = {
"endpoint" : "/summarize_audio",
"output" : summary_text
}
mongo_data = {"Document": payload}
result = db.insert_data(mongo_data)
print(result)
return ResponseText(response=summary_text)
except Exception as e:
return {"error": str(e)}
Binary file added data/harvard.wav
Binary file not shown.
23 changes: 23 additions & 0 deletions helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from PyPDF2 import PdfReader
import sqlite3
from langchain_community.embeddings import GooglePalmEmbeddings
import tempfile

load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
Expand Down Expand Up @@ -212,6 +213,28 @@ def groq_pdf(pdf,model):
)
return rag_chain

async def summarize_audio(audio_file):
"""Summarize the audio using Google's Generative API."""
model = genai.GenerativeModel("models/gemini-1.5-pro-latest")

# Save the audio file to a temporary file
try:
with tempfile.NamedTemporaryFile(delete=False, suffix='.'+audio_file.filename.split('.')[-1]) as tmp_file:
tmp_file.write(await audio_file.read())
audio_file_path = tmp_file.name
except Exception as e:
raise Exception(f"Error handling uploaded file: {e}")

audio_file = genai.upload_file(path=audio_file_path)
response = model.generate_content(
[
"Please summarize the following audio.",
audio_file
]
)

return response.text

if __name__ == "__main__":
create_vector_db()
chain = get_qa_chain()
Expand Down

0 comments on commit 369f1aa

Please sign in to comment.