Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Contact_Me.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Name: Rishabh Agnihotri
email id: [email protected]
mobile no: 9198468894
106 changes: 106 additions & 0 deletions code.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import re\n",
"from transformers import pipeline\n",
"import wikipediaapi\n",
"\n",
"# Load the JSON data with UTF-8 encoding\n",
"with open('C:\\\\Users\\\\hp\\\\Desktop\\\\rishu_assignment\\\\news.article.json', 'r', encoding='utf-8') as file:\n",
" data = json.load(file)\n",
"\n",
"def preprocess_text(text):\n",
" # Remove special characters and extra spaces\n",
" text = re.sub(r'\\s+', ' ', text)\n",
" text = re.sub(r'[^\\w\\s]', '', text)\n",
" return text\n",
"\n",
"# Preprocess each article's text\n",
"for article in data:\n",
" article['cleaned_text'] = preprocess_text(article['articleBody'])\n",
"\n",
"# Filter relevant articles based on specific keywords\n",
"def is_relevant_article(article):\n",
" specific_keywords = [\"Al-Shifa Hospital\", \"Gaza hospital\", \"Israel airstrike\", \"Hamas attack\", \"Gaza conflict\"]\n",
" return any(keyword.lower() in article['cleaned_text'].lower() for keyword in specific_keywords)\n",
"\n",
"relevant_articles = [article for article in data if is_relevant_article(article)]\n",
"\n",
"# Load a pre-trained QA model\n",
"qa_pipeline = pipeline(\"question-answering\", model=\"distilbert-base-uncased-distilled-squad\")\n",
"\n",
"# Function to get additional context from Wikipedia with a specified user-agent\n",
"def get_wikipedia_context(topic):\n",
" user_agent = 'MyUserAgent/1.0 ([email protected])' # Replace with your own details\n",
" wiki_wiki = wikipediaapi.Wikipedia('en', headers={'User-Agent': user_agent})\n",
" page = wiki_wiki.page(topic)\n",
" if page.exists():\n",
" return page.summary\n",
" else:\n",
" return \"\"\n",
"\n",
"# Function to get combined context\n",
"def get_combined_context(relevant_articles, wiki_topic, max_length=5000):\n",
" context = \"\"\n",
" for article in relevant_articles:\n",
" if len(context) + len(article['cleaned_text']) > max_length:\n",
" break\n",
" context += article['cleaned_text'] + \" \"\n",
" \n",
" wiki_context = get_wikipedia_context(wiki_topic)\n",
" combined_context = context + \" \" + wiki_context\n",
" \n",
" # Debugging: Print combined context\n",
" print(\"Combined Context: \", combined_context[:1000]) # Print first 1000 characters to check content\n",
" \n",
" return combined_context\n",
"\n",
"# Function to get answer to a question\n",
"def get_answer(question, relevant_articles, wiki_topic):\n",
" combined_context = get_combined_context(relevant_articles, wiki_topic)\n",
" result = qa_pipeline(question=question, context=combined_context)\n",
" return result['answer']\n",
"\n",
"# Example question\n",
"question = \"What happened at the Al-Shifa Hospital?\"\n",
"wiki_topic = \"Al-Shifa Hospital Gaza conflict\"\n",
"answer = get_answer(question, relevant_articles, wiki_topic)\n",
"print(\"Answer: \", answer)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
19 changes: 19 additions & 0 deletions explanation.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
I have done first part of the Q1 and since there is mentioned that I need to attempt any of 2 so I just did first one, and below is the explanation
of the same step by step:

1. Data Loading and Preprocessing
First, we load the JSON file containing the news articles. Since the data might contain special characters and extra spaces,
we need to clean it up.
2. Filtering Relevant Articles
To ensure our QA system focuses on the Israel-Hamas war, we filter out irrelevant articles. We define a list of keywords related
to the conflict and keep only those articles that contain these keywords.
3. Load a Pre-trained QA Model
We use a pre-trained QA model from Hugging Face's Transformers library. In this case, we choose the distilbert-base-uncased-distilled-squad
model for its efficiency and accuracy in question answering tasks.
4. Augment Context with Wikipedia
To enhance the context available for the QA system, we fetch additional information from Wikipedia. This step helps provide a more
comprehensive background for the QA model.
5. Combine Context
We combine the filtered articles with the Wikipedia context. This combined context is then used by the QA model to answer questions.
6. Answer Questions
Finally, we create a function that uses the combined context to answer a given question.
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
json
re
transformers
wikipediaapi