Skip to content

Commit

Permalink
ADDED: NoteGem
Browse files Browse the repository at this point in the history
  • Loading branch information
AquibPy committed Jun 12, 2024
1 parent e81b498 commit a4eb2ee
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 2 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,16 @@ percentage, missing keywords, and profile summary.
- **Image Quality Note:** Indicates if certain aspects are 'Unable to be determined based on the provided image.'
- **Disclaimer:** Includes the disclaimer: "Consult with a Doctor before making any decisions."

### 24. NoteGem: Automated Note-Taking Assistant

- **Route:** `/NoteGem`
- **Description:** This API endpoint leverages the Google Gemini AI Model to generate comprehensive notes from YouTube video transcripts.
- **Feature:**
- **Input Video URL:** Users can provide a YouTube video URL for processing.
- **Transcript Extraction:** The API extracts the transcript from the provided YouTube video.
- **Error Handling for Transcripts:** If the transcript is not available, it returns a message indicating that the transcript is not available for transcription.
- **AI Summary Generation:** The AI model generates a structured summary of the transcript focusing on main points, critical information, key takeaways, examples or case studies, quotes, and actionable steps.

## Usage

Each endpoint accepts specific parameters as described in the respective endpoint documentation. Users can make POST requests to these endpoints with the required parameters to perform the desired tasks.
Expand Down
34 changes: 33 additions & 1 deletion api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from mongo import MongoDB
from helper_functions import get_qa_chain,get_gemini_response,get_url_doc_qa,extract_transcript_details,\
get_gemini_response_health,get_gemini_pdf,read_sql_query,remove_substrings,questions_generator,groq_pdf,\
summarize_audio,chatbot_send_message,extraxt_pdf_text,advance_rag_llama_index,parse_sql_response
summarize_audio,chatbot_send_message,extraxt_pdf_text,advance_rag_llama_index,parse_sql_response, extract_video_id
from langchain_groq import ChatGroq
from langchain.chains.conversation.base import ConversationChain
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
Expand All @@ -38,6 +38,7 @@
from langchain_community.agent_toolkits import SQLDatabaseToolkit
import tempfile
import shutil
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound


os.environ["LANGCHAIN_TRACING_V2"]="true"
Expand Down Expand Up @@ -737,6 +738,37 @@ async def medigem(image_file: UploadFile = File(...)):
result = db.insert_data(mongo_data)
print(result)
return ResponseText(response=remove_substrings(response.text))

@app.post("/NoteGem", description="This API endpoint leverages the Google Gemini AI Model to generate comprehensive notes from YouTube video transcripts")
def process_video(video_url: str = Form(...)):
video_id = extract_video_id(video_url)
if not video_id:
raise HTTPException(status_code=400, detail="Invalid YouTube URL")

try:
transcript_text = YouTubeTranscriptApi.get_transcript(video_id)
transcript = " ".join([i["text"] for i in transcript_text])
except (TranscriptsDisabled, NoTranscriptFound):
return {"transcript": "Transcript not available", "error": True}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

try:
model = genai.GenerativeModel(settings.GEMINI_PRO_1_5)
response = model.generate_content(settings.NOTE_GEN_PROMPT + transcript)
summary = response.text
db = MongoDB()
payload = {
"endpoint" : "/NoteGem",
"video_url": video_url,
"output" : response.text
}
mongo_data = {"Document": payload}
result = db.insert_data(mongo_data)
print(result)
return ResponseText(response=summary)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

if __name__ == '__main__':
import uvicorn
Expand Down
15 changes: 15 additions & 0 deletions helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,21 @@ def parse_sql_response(response):
formatted_response = '\n'.join(formatted_sql_statements)
return formatted_response

def extract_video_id(url):
video_id = None
regex_patterns = [
r"(?<=v=)[^#\&\?]*",
r"(?<=be/)[^#\&\?]*",
r"(?<=embed/)[^#\&\?]*",
r"(?<=youtu.be/)[^#\&\?]*"
]
for pattern in regex_patterns:
match = re.search(pattern, url)
if match:
video_id = match.group(0)
break
return video_id

if __name__ == "__main__":
create_vector_db()
chain = get_qa_chain()
Expand Down
15 changes: 14 additions & 1 deletion settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,4 +141,17 @@
Please provide me an output with these 4 headings Detailed Analysis, Findings Report, Recommendations and Next Steps and Treatment Suggestions.
"""
"""

NOTE_GEN_PROMPT = """
You are a professional note-taker with expertise in distilling key insights from video content. Your task is to generate a comprehensive, yet concise set of notes from the provided video transcript. Focus on the following:
1. Main points
2. Critical information
3. Key takeaways
4. Examples or case studies
5. Quotes or important statements
6. Actionable steps or recommendations
Make sure the notes are well-structured and formatted as bullet points. The total length should not exceed 1000 words. Please summarize the following text:
"""

0 comments on commit a4eb2ee

Please sign in to comment.