Skip to content

Commit

Permalink
Mvp for llm4dev (#34)
Browse files Browse the repository at this point in the history
* migrations for file, knowledge_category and mapping embeddings to a file

* document upload now maps embeddings to its file model

* resetting the correct migrations

* added organization to knowledge category

* api to add a knowledge category for an org

* validation; knowledge category names to be unique

* api to delete the knowledge category; will wipe out all files and embeddings

* added knowledge category filter in create chat api and also added a get all categories api

* api to get all files of an organization

* api to delete a file; category compulsory for an org

* use category_id in document upload and chat api
  • Loading branch information
Ishankoradia committed Apr 23, 2024
1 parent 7896250 commit 7ab0c1a
Show file tree
Hide file tree
Showing 5 changed files with 362 additions and 6 deletions.
242 changes: 236 additions & 6 deletions llm/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import uuid
import os
import django
import json
import openai
from logging import basicConfig, INFO, getLogger

from pypdf import PdfReader
Expand All @@ -12,15 +14,14 @@
from rest_framework.decorators import api_view
from rest_framework.parsers import MultiPartParser
from rest_framework.views import APIView
import openai

from llm.utils.prompt import (
context_prompt_messages,
evaluate_criteria_score,
count_tokens_for_text,
)
from llm.utils.general import generate_session_id
from llm.models import Organization, Embedding, Message
from llm.models import Organization, Embedding, Message, File, KnowledgeCategory


basicConfig(level=INFO)
Expand Down Expand Up @@ -49,6 +50,12 @@ def create_chat(request):

openai.api_key = organization.openai_key

knowledge_cat = None
if "category_id" in request.data:
knowledge_cat = KnowledgeCategory.objects.filter(
id=request.data["category_id"]
).first()

question = request.data.get("question").strip()
system_prompt = (
request.data.get("system_prompt", None) or organization.system_prompt
Expand Down Expand Up @@ -120,11 +127,18 @@ def create_chat(request):
model="text-embedding-ada-002", input=question
)["data"][0]["embedding"]

embedding_results_query = Embedding.objects

if knowledge_cat:
embedding_results_query = embedding_results_query.filter(
file__knowledge_category=knowledge_cat
)

embedding_results = (
Embedding.objects.alias(
embedding_results_query.alias(
distance=L2Distance("text_vectors", prompt_embeddings),
)
.filter(distance__gt=0.7)
# .filter(distance__gt=0.7)
.order_by("-distance")
)
logger.info(
Expand Down Expand Up @@ -236,9 +250,35 @@ def post(self, request, format=None):

openai.api_key = org.openai_key

file = request.data["file"]
request_file = request.data["file"]

if "category_id" not in request.data:
return JsonResponse(
{"error": f"Please provide a category"},
status=status.HTTP_400_BAD_REQUEST,
)

knowledge_cat = KnowledgeCategory.objects.filter(
id=request.data["category_id"]
).first()

pdf_reader = PdfReader(file)
if not knowledge_cat:
return JsonResponse(
{"error": f"Category does not exist, please create one first"},
status=status.HTTP_404_NOT_FOUND,
)

logger.info("Using Knowledge Category : %s", knowledge_cat)

logger.info("Uploading file %s", request_file.name)

# Create the file object
file = File.objects.create(
knowledge_category=knowledge_cat,
name=request_file.name,
)

pdf_reader = PdfReader(request_file)
for page in pdf_reader.pages:
page_text = page.extract_text().replace("\n", " ")

Expand All @@ -259,6 +299,7 @@ def post(self, request, format=None):
text_vectors=embeddings,
organization=org,
num_tokens=count_tokens_for_text(page_text),
file=file,
)

return JsonResponse({"msg": f"Uploaded file {file.name} successfully"})
Expand Down Expand Up @@ -333,6 +374,7 @@ def set_examples_text(request):
'
"""
try:

org: Organization = request.org
logger.info(f"processing set examples text request for org {org.name}")

Expand Down Expand Up @@ -382,3 +424,191 @@ def set_openai_key(request):
{"error": f"Something went wrong"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)


@api_view(["POST"])
def create_knowledge_category(request):
"""
Create a new category for an org
"""
try:
org: Organization = request.org

name = request.data.get("name")

if KnowledgeCategory.objects.filter(name=name, org=org).exists():
return JsonResponse(
{"error": f"Knowledge Category with name {name} already exists"},
status=status.HTTP_400_BAD_REQUEST,
)

knowledge_cat = KnowledgeCategory.objects.create(name=name.strip(), org=org)

return JsonResponse(
{
"name": knowledge_cat.name,
"uuid": knowledge_cat.uuid,
"id": knowledge_cat.id,
},
status=status.HTTP_200_OK,
)

except Exception as error:
logger.error(f"Error: {error}")
return JsonResponse(
{"error": f"Something went wrong"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)


@api_view(["GET"])
def get_knowledge_categories(request):
"""
Fetches all categories for an org
"""
try:
org: Organization = request.org

return JsonResponse(
{
"data": [
{
"name": knowledge_cat.name,
"uuid": knowledge_cat.uuid,
"id": knowledge_cat.id,
}
for knowledge_cat in KnowledgeCategory.objects.filter(org=org).all()
]
},
status=status.HTTP_200_OK,
)

except Exception as error:
logger.error(f"Error: {error}")
return JsonResponse(
{"error": f"Something went wrong"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)


@api_view(["DELETE"])
def delete_knowledge_category(request, category_uuid):
"""
Example request body:
'
Question: Peshab ki jagah se kharash ho rahi hai
Chatbot Answer in Hindi: aapakee samasya ke lie dhanyavaad. yah peshaab ke samay kharaash kee samasya ho sakatee hai. ise yoorinaree traikt inphekshan (uti) kaha jaata hai. yoorinaree traikt imphekshan utpann hone ka mukhy kaaran aantarik inphekshan ho sakata hai.
'
"""
try:
org: Organization = request.org

try:
uuid.UUID(
category_uuid
) # This will raise a ValueError if uuid_str is not a valid UUID
except ValueError:
return JsonResponse(
{"error": "Invalid UUID"},
status=status.HTTP_400_BAD_REQUEST,
)

knowledge_cat = KnowledgeCategory.objects.filter(
uuid=category_uuid, org=org
).first()

if not knowledge_cat:
return JsonResponse(
{"error": f"Knowledge Category does not exists"},
status=status.HTTP_400_BAD_REQUEST,
)

knowledge_cat.delete()

return JsonResponse(
{"msg": f"Category deleted successfully"},
status=status.HTTP_200_OK,
)

except Exception as error:
logger.error(f"Error: {error}")
return JsonResponse(
{"error": f"Something went wrong"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)


@api_view(["GET"])
def get_documents(request):
"""
Fetches all documents uploaded by the org
"""
try:
org: Organization = request.org

return JsonResponse(
{
"data": [
{
"name": file.name,
"uuid": file.uuid,
"category": {
"name": file.knowledge_category.name,
"uuid": file.knowledge_category.uuid,
"id": file.knowledge_category.id,
},
}
for file in File.objects.filter(knowledge_category__org=org).all()
]
},
status=status.HTTP_200_OK,
)

except Exception as error:
logger.error(f"Error: {error}")
return JsonResponse(
{"error": f"Something went wrong"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)


@api_view(["DELETE"])
def delete_document(request, file_uuid):
"""
Fetches all documents uploaded by the org
"""
try:
org: Organization = request.org

try:
uuid.UUID(
file_uuid
) # This will raise a ValueError if uuid_str is not a valid UUID
except ValueError:
return JsonResponse(
{"error": "Invalid UUID"},
status=status.HTTP_400_BAD_REQUEST,
)

file = File.objects.filter(uuid=file_uuid, knowledge_category__org=org).first()

if not file:
return JsonResponse(
{"error": f"Document does not exists"},
status=status.HTTP_400_BAD_REQUEST,
)

file.delete()

return JsonResponse(
{"msg": f"File and its embeddings deleted successfully"},
status=status.HTTP_200_OK,
)

except Exception as error:
logger.error(f"Error: {error}")
return JsonResponse(
{"error": f"Something went wrong"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
44 changes: 44 additions & 0 deletions llm/migrations/0013_knowledgecategory_file_embedding_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Generated by Django 4.2.6 on 2024-04-17 03:00

from django.db import migrations, models
import django.db.models.deletion
import uuid


class Migration(migrations.Migration):

dependencies = [
('llm', '0012_embedding_num_tokens'),
]

operations = [
migrations.CreateModel(
name='KnowledgeCategory',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('name', models.CharField(default='default', max_length=255, unique=True)),
('org', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='llm.organization')),
],
options={
'db_table': 'knowledge_category',
},
),
migrations.CreateModel(
name='File',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('name', models.CharField(max_length=255)),
('knowledge_category', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='llm.knowledgecategory')),
],
options={
'db_table': 'files',
},
),
migrations.AddField(
model_name='embedding',
name='file',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='llm.file'),
),
]
19 changes: 19 additions & 0 deletions llm/migrations/0014_alter_file_knowledge_category.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 4.2.6 on 2024-04-17 03:38

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('llm', '0013_knowledgecategory_file_embedding_file'),
]

operations = [
migrations.AlterField(
model_name='file',
name='knowledge_category',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='llm.knowledgecategory'),
),
]
Loading

0 comments on commit 7ab0c1a

Please sign in to comment.