Skip to content

Commit

Permalink
Merge pull request #228 from UIUC-Chatbot/dependency_injection
Browse files Browse the repository at this point in the history
Major refactor introducing Dependency injection
  • Loading branch information
rohan-uiuc authored Mar 12, 2024
2 parents 107eef5 + cf5e4df commit bea873d
Show file tree
Hide file tree
Showing 33 changed files with 1,831 additions and 3,998 deletions.
2 changes: 1 addition & 1 deletion .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ SUPABASE_READ_ONLY=
SUPABASE_JWT_SECRET=

MATERIALS_SUPABASE_TABLE=uiuc_chatbot
NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE=documents
SUPABASE_DOCUMENTS_TABLE=documents

# QDRANT
QDRANT_COLLECTION_NAME=uiuc-chatbot
Expand Down
64 changes: 0 additions & 64 deletions ai_ta_backend/aws.py

This file was deleted.

23 changes: 11 additions & 12 deletions ai_ta_backend/beam/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import pytesseract
import sentry_sdk
import supabase
from beam import App, QueueDepthAutoscaler, RequestLatencyAutoscaler, Runtime
from beam import App, QueueDepthAutoscaler, Runtime # RequestLatencyAutoscaler,
from bs4 import BeautifulSoup
from git.repo import Repo
from langchain.document_loaders import (
Expand Down Expand Up @@ -962,7 +962,7 @@ def split_and_upload(self, texts: List[str], metadatas: List[Dict[str, Any]]):
}

response = self.supabase_client.table(
os.getenv('NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE')).insert(document).execute() # type: ignore
os.getenv('SUPABASE_DOCUMENTS_TABLE')).insert(document).execute() # type: ignore

# add to Nomic document map
if len(response.data) > 0:
Expand Down Expand Up @@ -991,7 +991,7 @@ def check_for_duplicates(self, texts: List[Dict], metadatas: List[Dict[str, Any]
For given metadata, fetch docs from Supabase based on S3 path or URL.
If docs exists, concatenate the texts and compare with current texts, if same, return True.
"""
doc_table = os.getenv('NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE', '')
doc_table = os.getenv('SUPABASE_DOCUMENTS_TABLE', '')
course_name = metadatas[0]['course_name']
incoming_s3_path = metadatas[0]['s3_path']
url = metadatas[0]['url']
Expand Down Expand Up @@ -1090,8 +1090,8 @@ def delete_data(self, course_name: str, s3_path: str, source_url: str):
try:
# delete from Nomic
response = self.supabase_client.from_(
os.environ['NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE']).select("id, s3_path, contexts").eq(
's3_path', s3_path).eq('course_name', course_name).execute()
os.environ['SUPABASE_DOCUMENTS_TABLE']).select("id, s3_path, contexts").eq('s3_path', s3_path).eq(
'course_name', course_name).execute()
data = response.data[0] #single record fetched
nomic_ids_to_delete = []
context_count = len(data['contexts'])
Expand All @@ -1105,8 +1105,8 @@ def delete_data(self, course_name: str, s3_path: str, source_url: str):
sentry_sdk.capture_exception(e)

try:
self.supabase_client.from_(os.environ['NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE']).delete().eq(
's3_path', s3_path).eq('course_name', course_name).execute()
self.supabase_client.from_(os.environ['SUPABASE_DOCUMENTS_TABLE']).delete().eq('s3_path', s3_path).eq(
'course_name', course_name).execute()
except Exception as e:
print("Error in deleting file from supabase:", e)
sentry_sdk.capture_exception(e)
Expand Down Expand Up @@ -1134,9 +1134,8 @@ def delete_data(self, course_name: str, s3_path: str, source_url: str):
sentry_sdk.capture_exception(e)
try:
# delete from Nomic
response = self.supabase_client.from_(
os.environ['NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE']).select("id, url, contexts").eq(
'url', source_url).eq('course_name', course_name).execute()
response = self.supabase_client.from_(os.environ['SUPABASE_DOCUMENTS_TABLE']).select("id, url, contexts").eq(
'url', source_url).eq('course_name', course_name).execute()
data = response.data[0] #single record fetched
nomic_ids_to_delete = []
context_count = len(data['contexts'])
Expand All @@ -1151,8 +1150,8 @@ def delete_data(self, course_name: str, s3_path: str, source_url: str):

try:
# delete from Supabase
self.supabase_client.from_(os.environ['NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE']).delete().eq(
'url', source_url).eq('course_name', course_name).execute()
self.supabase_client.from_(os.environ['SUPABASE_DOCUMENTS_TABLE']).delete().eq('url', source_url).eq(
'course_name', course_name).execute()
except Exception as e:
print("Error in deleting file from supabase:", e)
sentry_sdk.capture_exception(e)
Expand Down
Loading

0 comments on commit bea873d

Please sign in to comment.