Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mvp for llm4dev #34

Merged
merged 11 commits into from
Apr 23, 2024
242 changes: 236 additions & 6 deletions llm/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import uuid
import os
import django
import json
import openai
from logging import basicConfig, INFO, getLogger

from pypdf import PdfReader
Expand All @@ -12,15 +14,14 @@
from rest_framework.decorators import api_view
from rest_framework.parsers import MultiPartParser
from rest_framework.views import APIView
import openai

from llm.utils.prompt import (
context_prompt_messages,
evaluate_criteria_score,
count_tokens_for_text,
)
from llm.utils.general import generate_session_id
from llm.models import Organization, Embedding, Message
from llm.models import Organization, Embedding, Message, File, KnowledgeCategory


basicConfig(level=INFO)
Expand Down Expand Up @@ -49,6 +50,12 @@ def create_chat(request):

openai.api_key = organization.openai_key

knowledge_cat = None
if "category_id" in request.data:
knowledge_cat = KnowledgeCategory.objects.filter(
id=request.data["category_id"]
).first()

question = request.data.get("question").strip()
system_prompt = (
request.data.get("system_prompt", None) or organization.system_prompt
Expand Down Expand Up @@ -120,11 +127,18 @@ def create_chat(request):
model="text-embedding-ada-002", input=question
)["data"][0]["embedding"]

embedding_results_query = Embedding.objects

if knowledge_cat:
embedding_results_query = embedding_results_query.filter(
file__knowledge_category=knowledge_cat
)

embedding_results = (
Embedding.objects.alias(
embedding_results_query.alias(
distance=L2Distance("text_vectors", prompt_embeddings),
)
.filter(distance__gt=0.7)
# .filter(distance__gt=0.7)
.order_by("-distance")
)
logger.info(
Expand Down Expand Up @@ -236,9 +250,35 @@ def post(self, request, format=None):

openai.api_key = org.openai_key

file = request.data["file"]
request_file = request.data["file"]

if "category_id" not in request.data:
return JsonResponse(
{"error": f"Please provide a category"},
status=status.HTTP_400_BAD_REQUEST,
)

knowledge_cat = KnowledgeCategory.objects.filter(
id=request.data["category_id"]
).first()

pdf_reader = PdfReader(file)
if not knowledge_cat:
return JsonResponse(
{"error": f"Category does not exist, please create one first"},
status=status.HTTP_404_NOT_FOUND,
)

logger.info("Using Knowledge Category : %s", knowledge_cat)

logger.info("Uploading file %s", request_file.name)

# Create the file object
file = File.objects.create(
knowledge_category=knowledge_cat,
name=request_file.name,
)

pdf_reader = PdfReader(request_file)
for page in pdf_reader.pages:
page_text = page.extract_text().replace("\n", " ")

Expand All @@ -259,6 +299,7 @@ def post(self, request, format=None):
text_vectors=embeddings,
organization=org,
num_tokens=count_tokens_for_text(page_text),
file=file,
)

return JsonResponse({"msg": f"Uploaded file {file.name} successfully"})
Expand Down Expand Up @@ -333,6 +374,7 @@ def set_examples_text(request):
'
"""
try:

org: Organization = request.org
logger.info(f"processing set examples text request for org {org.name}")

Expand Down Expand Up @@ -382,3 +424,191 @@ def set_openai_key(request):
{"error": f"Something went wrong"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)


@api_view(["POST"])
def create_knowledge_category(request):
"""
Create a new category for an org
"""
try:
org: Organization = request.org

name = request.data.get("name")

if KnowledgeCategory.objects.filter(name=name, org=org).exists():
return JsonResponse(
{"error": f"Knowledge Category with name {name} already exists"},
status=status.HTTP_400_BAD_REQUEST,
)

knowledge_cat = KnowledgeCategory.objects.create(name=name.strip(), org=org)

return JsonResponse(
{
"name": knowledge_cat.name,
"uuid": knowledge_cat.uuid,
"id": knowledge_cat.id,
},
status=status.HTTP_200_OK,
)

except Exception as error:
logger.error(f"Error: {error}")
return JsonResponse(
{"error": f"Something went wrong"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)


@api_view(["GET"])
def get_knowledge_categories(request):
"""
Fetches all categories for an org
"""
try:
org: Organization = request.org

return JsonResponse(
{
"data": [
{
"name": knowledge_cat.name,
"uuid": knowledge_cat.uuid,
"id": knowledge_cat.id,
}
for knowledge_cat in KnowledgeCategory.objects.filter(org=org).all()
]
},
status=status.HTTP_200_OK,
)

except Exception as error:
logger.error(f"Error: {error}")
return JsonResponse(
{"error": f"Something went wrong"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)


@api_view(["DELETE"])
def delete_knowledge_category(request, category_uuid):
"""
Example request body:

'
Question: Peshab ki jagah se kharash ho rahi hai
Chatbot Answer in Hindi: aapakee samasya ke lie dhanyavaad. yah peshaab ke samay kharaash kee samasya ho sakatee hai. ise yoorinaree traikt inphekshan (uti) kaha jaata hai. yoorinaree traikt imphekshan utpann hone ka mukhy kaaran aantarik inphekshan ho sakata hai.
'
"""
try:
org: Organization = request.org

try:
uuid.UUID(
category_uuid
) # This will raise a ValueError if uuid_str is not a valid UUID
except ValueError:
return JsonResponse(
{"error": "Invalid UUID"},
status=status.HTTP_400_BAD_REQUEST,
)

knowledge_cat = KnowledgeCategory.objects.filter(
uuid=category_uuid, org=org
).first()

if not knowledge_cat:
return JsonResponse(
{"error": f"Knowledge Category does not exists"},
status=status.HTTP_400_BAD_REQUEST,
)

knowledge_cat.delete()

return JsonResponse(
{"msg": f"Category deleted successfully"},
status=status.HTTP_200_OK,
)

except Exception as error:
logger.error(f"Error: {error}")
return JsonResponse(
{"error": f"Something went wrong"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)


@api_view(["GET"])
def get_documents(request):
"""
Fetches all documents uploaded by the org
"""
try:
org: Organization = request.org

return JsonResponse(
{
"data": [
{
"name": file.name,
"uuid": file.uuid,
"category": {
"name": file.knowledge_category.name,
"uuid": file.knowledge_category.uuid,
"id": file.knowledge_category.id,
},
}
for file in File.objects.filter(knowledge_category__org=org).all()
]
},
status=status.HTTP_200_OK,
)

except Exception as error:
logger.error(f"Error: {error}")
return JsonResponse(
{"error": f"Something went wrong"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)


@api_view(["DELETE"])
def delete_document(request, file_uuid):
"""
Fetches all documents uploaded by the org
"""
try:
org: Organization = request.org

try:
uuid.UUID(
file_uuid
) # This will raise a ValueError if uuid_str is not a valid UUID
except ValueError:
return JsonResponse(
{"error": "Invalid UUID"},
status=status.HTTP_400_BAD_REQUEST,
)

file = File.objects.filter(uuid=file_uuid, knowledge_category__org=org).first()

if not file:
return JsonResponse(
{"error": f"Document does not exists"},
status=status.HTTP_400_BAD_REQUEST,
)

file.delete()

return JsonResponse(
{"msg": f"File and its embeddings deleted successfully"},
status=status.HTTP_200_OK,
)

except Exception as error:
logger.error(f"Error: {error}")
return JsonResponse(
{"error": f"Something went wrong"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
44 changes: 44 additions & 0 deletions llm/migrations/0013_knowledgecategory_file_embedding_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Generated by Django 4.2.6 on 2024-04-17 03:00

from django.db import migrations, models
import django.db.models.deletion
import uuid


class Migration(migrations.Migration):

dependencies = [
('llm', '0012_embedding_num_tokens'),
]

operations = [
migrations.CreateModel(
name='KnowledgeCategory',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('name', models.CharField(default='default', max_length=255, unique=True)),
('org', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='llm.organization')),
],
options={
'db_table': 'knowledge_category',
},
),
migrations.CreateModel(
name='File',
fields=[
('id', models.AutoField(primary_key=True, serialize=False)),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True)),
('name', models.CharField(max_length=255)),
('knowledge_category', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='llm.knowledgecategory')),
],
options={
'db_table': 'files',
},
),
migrations.AddField(
model_name='embedding',
name='file',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='llm.file'),
),
]
19 changes: 19 additions & 0 deletions llm/migrations/0014_alter_file_knowledge_category.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 4.2.6 on 2024-04-17 03:38

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('llm', '0013_knowledgecategory_file_embedding_file'),
]

operations = [
migrations.AlterField(
model_name='file',
name='knowledge_category',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='llm.knowledgecategory'),
),
]
Loading