ADDED: more test cases for new endpoints

AquibPy · May 9, 2024 · 51e974b · 51e974b
1 parent c46a114
commit 51e974b
Show file tree

Hide file tree

Showing 5 changed files with 134 additions and 5 deletions.
diff --git a/.github/workflows/ci-cd.yaml b/.github/workflows/ci-cd.yaml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
     steps:
       - name: Checkout Repository
         uses: actions/checkout@v2

diff --git a/data/llm.txt b/data/llm.txt
@@ -0,0 +1,35 @@
+Title: Enhancing Large Language Models with Perturbative Fine-Tuning: A Comprehensive Guide
+
+Introduction:
+In the realm of natural language processing (NLP), large language models (LLMs) have revolutionized the way we interact with and analyze textual data. 
+However, while pre-trained LLMs offer remarkable capabilities out-of-the-box, there's often a need to fine-tune them for specific domains or tasks to unlock their full potential. 
+Enter Perturbative Fine-Tuning (PEFT), a cutting-edge method developed by Dr. Kosaraju that enables researchers and practitioners to enhance LLMs with domain-specific knowledge. 
+In this blog post, we'll explore the ins and outs of PEFT and how it empowers us to tailor LLMs to our specific needs.
+
+Understanding PEFT:
+PEFT is a systematic approach to fine-tuning pre-trained LLMs by introducing domain-specific perturbations and iteratively refining the model based on task-specific data. 
+At its core, PEFT leverages the wealth of information encoded in pre-trained LLMs and augments it with domain-specific knowledge, resulting in models that excel in specialized tasks.
+
+The PEFT Process:
+1. Data Preparation: The journey begins with gathering and preprocessing domain-specific datasets tailored to the task at hand. 
+This step ensures that the fine-tuned model learns from relevant examples and nuances inherent to the target domain.
+
+2. Model Selection: Next, we select a suitable pre-trained LLM as the foundation for our fine-tuning process. 
+The choice of base model depends on factors such as architecture, pre-training data, and computational resources.
+
+3. Perturbation and Fine-Tuning: Here comes the heart of PEFT. 
+We introduce domain-specific perturbations to the pre-trained model, guiding it to adapt and specialize in the target domain through iterative fine-tuning steps.
+
+4. Evaluation and Refinement: Finally, we evaluate the performance of the fine-tuned model using appropriate metrics and benchmarks. 
+Based on the results, we refine the model further, iterating until satisfactory performance is achieved.
+
+Case Studies and Experiments:
+To illustrate the effectiveness of PEFT, let's delve into some real-world case studies and experiments. 
+From legal text classification to sentiment analysis, PEFT consistently demonstrates its prowess in enhancing LLMs for diverse applications. 
+Through these examples, we witness firsthand how PEFT empowers researchers and practitioners to unlock new possibilities and push the boundaries of NLP.
+
+Conclusion:
+In a world where the demand for specialized NLP solutions continues to grow, Perturbative Fine-Tuning emerges as a game-changer. 
+By seamlessly integrating domain-specific knowledge into pre-trained LLMs, PEFT equips us with powerful tools to tackle complex tasks and domains with ease. 
+As we embark on this journey of fine-tuning and specialization, let's embrace PEFT as a guiding light, 
+illuminating the path towards unparalleled performance and innovation in natural language processing.
diff --git a/helper_functions.py b/helper_functions.py
@@ -2,7 +2,7 @@
 from settings import GOOGLE_EMBEDDING,FAQ_FILE,INSTRUCTOR_EMBEDDING,VECTORDB_PATH,qa_prompt,\
 prompt_pdf,question_prompt_template,question_refine_template, GEMINI_PRO
 from langchain_google_genai import GoogleGenerativeAI,GoogleGenerativeAIEmbeddings,ChatGoogleGenerativeAI
-from langchain.document_loaders.csv_loader import CSVLoader
+from langchain_community.document_loaders import CSVLoader
 from langchain_community.document_loaders import UnstructuredURLLoader,PyPDFLoader,WebBaseLoader
 from langchain.docstore.document import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter,TokenTextSplitter
@@ -191,7 +191,7 @@ def questions_generator(doc):
                                             question_prompt=PROMPT_QUESTIONS, 
                                             refine_prompt=REFINE_PROMPT_QUESTIONS)
 
-    ques = ques_gen_chain.run(document_ques_gen)
+    ques = ques_gen_chain.invoke(document_ques_gen)
     return ques
 
 def groq_pdf(pdf,model):

diff --git a/settings.py b/settings.py
@@ -80,4 +80,21 @@
     "Animagine_xl" : "https://api-inference.huggingface.co/models/cagliostrolab/animagine-xl-3.0",
     "Stable_Diffusion_base" : "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-base-1.0",
     "Stable_Diffusion_v2" : "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1",
-}
+}
+
+summary_para = """
+In the vast landscape of human history, civilizations have risen and fallen, leaving behind legacies that shape our present. 
+From the ancient civilizations of Mesopotamia and Egypt, where the foundations of writing, agriculture, and governance were laid, 
+to the grand empires of Rome and China, which expanded their reach through conquest and trade, the story of humanity is one of ambition, innovation, and conflict. 
+The Middle Ages saw the emergence of feudalism in Europe, characterized by the exchange of land for loyalty and protection, 
+while the Islamic Golden Age ushered in a period of scientific, artistic, and philosophical advancement in the Muslim world. 
+The Renaissance in Europe sparked a revival of classical learning and ushered in an era of exploration and discovery, 
+leading to the age of Enlightenment, where reason and empiricism challenged traditional authority. 
+The Industrial Revolution transformed societies with technological advancements, urbanization, 
+and shifts in economic production, while the 20th century witnessed unprecedented global conflicts, 
+technological leaps, and social revolutions. Today, in the 21st century, 
+we stand at the intersection of unprecedented technological advancement and pressing global challenges, 
+navigating issues of climate change, political polarization, and the ethical implications of artificial intelligence.
+As we reflect on the journey of humanity, from ancient civilizations to the digital age, we are reminded of our shared past and
+the collective responsibility to shape a more equitable and sustainable future.
+"""
diff --git a/test_main.py b/test_main.py
@@ -1,6 +1,7 @@
 from fastapi.testclient import TestClient
 from api import app
 from mongo import MongoDB
+from settings import summary_para
 
 client = TestClient(app)
 db = MongoDB()
@@ -58,4 +59,80 @@ def test_questions_generator():
     # Make a request to the questions_generator endpoint
     response = client.post("/questions_generator", files={"pdf": pdf_file})
     assert response.status_code == 200
-    assert "response" in response.json()
+    assert "response" in response.json()
+
+def test_chat_groq():
+    question = "What is the capital of France?"
+    model = "mixtral-8x7b-32768"
+    conversational_memory_length = 5
+    # Send the POST request
+    data = {
+        "question": question,
+        "model": model,
+        "conversational_memory_length": conversational_memory_length
+    }
+    response = client.post("/chat_groq", data=data)
+
+    assert response.status_code == 200
+
+    # Assert the response content
+    response_data = response.json()
+    assert "Chatbot" in response_data
+    assert isinstance(response_data["Chatbot"], str)
+
+def test_text_summarizer_groq():
+    data = {
+        "input_text": summary_para
+    }
+    response = client.post("/text_summarizer_groq", data=data)
+    assert response.status_code == 200
+
+    response_data = response.json()
+    assert "Summary" in response_data
+    assert isinstance(response_data["Summary"], str)
+    assert len(response_data["Summary"]) > 0
+
+def test_summarize_audio():
+    audio_file_path = "data/harvard.wav"
+
+    with open(audio_file_path, "rb") as audio_file:
+        files = {"audio_file": audio_file}
+        response = client.post("/summarize_audio", files=files)
+
+    assert response.status_code == 200
+
+    response_data = response.json()
+    assert "response" in response_data
+    assert isinstance(response_data["response"], str)
+    assert len(response_data["response"]) > 0
+
+def test_qa_url_doc_with_url():
+    url = ["https://huggingface.co/blog/merve/quantization"]
+    prompt = "What is the GPTQ Quantization?"
+
+    data = {
+        "url": url,
+        "prompt": prompt
+    }
+    response = client.post("/qa_url_doc", data=data)
+
+    assert response.status_code == 200
+
+    response_data = response.json()
+    assert "response" in response_data
+    assert isinstance(response_data["response"], str)
+
+def test_qa_url_doc_with_document():
+    document_file_path = "data/llm.txt"
+    prompt = "What is the main point discussed in the document?"
+
+    with open(document_file_path, "rb") as document_file:
+        files = {"documents": ("document.txt", document_file, "text/plain")}
+        data = {"prompt": prompt}
+        response = client.post("/qa_url_doc", files=files, data=data)
+
+    assert response.status_code == 200
+
+    response_data = response.json()
+    assert "response" in response_data
+    assert isinstance(response_data["response"], str)