Skip to content

Commit

Permalink
Add new method to SQLDatabase, update environment variable usage, som…
Browse files Browse the repository at this point in the history
…e debugging logs
  • Loading branch information
rohan-uiuc committed Mar 8, 2024
1 parent 75251d1 commit f5a36fc
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 28 deletions.
6 changes: 3 additions & 3 deletions ai_ta_backend/database/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ def __init__(self):
# S3
self.s3_client = boto3.client(
's3',
aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'),
aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'],
aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'],
)

def upload_file(self, file_path: str, bucket_name: str, object_name: str):
Expand All @@ -22,7 +22,7 @@ def download_file(self, object_name: str, bucket_name: str, file_path: str):
self.s3_client.download_file(bucket_name, object_name, file_path)

def delete_file(self, bucket_name: str, s3_path: str):
self.s3_client.delete_object(Bucket=bucket_name, Key=s3_path)
return self.s3_client.delete_object(Bucket=bucket_name, Key=s3_path)

def generatePresignedUrl(self, object: str, bucket_name: str, s3_path: str, expiration: int = 3600):
# generate presigned URL
Expand Down
3 changes: 3 additions & 0 deletions ai_ta_backend/database/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,6 @@ def getDocsForIdsGte(self, course_name: str, first_id: int, fields: str = "*", l

def insertProjectInfo(self, project_info):
return self.supabase_client.table("projects").insert(project_info).execute()

def getAllFromLLMConvoMonitor(self, course_name: str):
return self.supabase_client.table("llm-convo-monitor").select("*").eq("course_name", course_name).execute()
6 changes: 3 additions & 3 deletions ai_ta_backend/database/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ def __init__(self):
"""
# vector DB
self.qdrant_client = QdrantClient(
url=os.getenv('QDRANT_URL'),
api_key=os.getenv('QDRANT_API_KEY'),
url=os.environ['QDRANT_URL'],
api_key=os.environ['QDRANT_API_KEY'],
)

self.vectorstore = Qdrant(client=self.qdrant_client,
Expand Down Expand Up @@ -50,7 +50,7 @@ def delete_data(self, collection_name: str, key: str, value: str):
"""
Delete data from the vector database.
"""
self.qdrant_client.delete(
return self.qdrant_client.delete(
collection_name=collection_name,
points_selector=models.Filter(must=[
models.FieldCondition(
Expand Down
17 changes: 7 additions & 10 deletions ai_ta_backend/service/nomic_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class NomicService():

@inject
def __init__(self, sentry: SentryService, sql: SQLDatabase):
nomic.login(os.getenv('NOMIC_API_KEY'))
nomic.login(os.environ['NOMIC_API_KEY'])
self.sentry = sentry
self.sql = sql

Expand Down Expand Up @@ -258,18 +258,15 @@ def create_nomic_map(self, course_name: str, log_data: list):
2. appends current embeddings and metadata to it
2. creates map if there are at least 20 queries
"""
nomic.login(os.getenv('NOMIC_API_KEY')) # login during start of flask app
nomic.login(os.environ['NOMIC_API_KEY']) # login during start of flask app
NOMIC_MAP_NAME_PREFIX = 'Conversation Map for '

print(f"in create_nomic_map() for {course_name}")
# initialize supabase
supabase_client = supabase.create_client( # type: ignore
supabase_url=os.getenv('SUPABASE_URL'), # type: ignore
supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore

try:
# fetch all conversations with this new course (we expect <=20 conversations, because otherwise the map should be made already)
response = supabase_client.table("llm-convo-monitor").select("*").eq("course_name", course_name).execute()

response = self.sql.getAllFromLLMConvoMonitor(course_name)
data = response.data
df = pd.DataFrame(data)

Expand Down Expand Up @@ -627,7 +624,7 @@ def create_map(self, embeddings, metadata, map_name, index_name, topic_label_fie
topic_label_field: str
colorable_fields: list of str
"""
nomic.login(os.getenv('NOMIC_API_KEY'))
nomic.login(os.environ['NOMIC_API_KEY'])

try:
project = atlas.map_embeddings(embeddings=embeddings,
Expand All @@ -652,7 +649,7 @@ def append_to_map(self, embeddings, metadata, map_name):
metadata: pd.DataFrame of Nomic upload metadata
map_name: str
"""
nomic.login(os.getenv('NOMIC_API_KEY'))
nomic.login(os.environ['NOMIC_API_KEY'])
try:
project = atlas.AtlasProject(name=map_name, add_datums_if_exists=True)
with project.wait_for_project_lock():
Expand Down Expand Up @@ -714,7 +711,7 @@ def data_prep_for_doc_map(self, df: pd.DataFrame):
# openai_api_key=os.getenv('AZURE_OPENAI_KEY')) # type: ignore
embeddings_model = OpenAIEmbeddings(openai_api_type="openai",
openai_api_base="https://api.openai.com/v1/",
openai_api_key=os.getenv('VLADS_OPENAI_KEY')) # type: ignore
openai_api_key=os.environ['VLADS_OPENAI_KEY'])
embeddings = embeddings_model.embed_documents(texts)

metadata = pd.DataFrame(metadata)
Expand Down
32 changes: 20 additions & 12 deletions ai_ta_backend/service/retrieval_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,22 +34,22 @@ def __init__(self, vdb: VectorDatabase, sqlDb: SQLDatabase, aws: AWSStorage, pos
self.posthog = posthog
self.nomicService = nomicService

openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = os.environ["OPENAI_API_KEY"]

self.embeddings = OpenAIEmbeddings(
model='text-embedding-ada-002',
openai_api_base=os.getenv("AZURE_OPENAI_ENDPOINT"), # type:ignore
openai_api_base=os.environ["AZURE_OPENAI_ENDPOINT"],
openai_api_type=os.environ['OPENAI_API_TYPE'],
openai_api_key=os.getenv("AZURE_OPENAI_KEY"), # type:ignore
openai_api_version=os.getenv("OPENAI_API_VERSION"), # type:ignore
openai_api_key=os.environ["AZURE_OPENAI_KEY"],
openai_api_version=os.environ["OPENAI_API_VERSION"],
)

self.llm = AzureChatOpenAI(
temperature=0,
deployment_name=os.getenv("AZURE_OPENAI_ENGINE"), # type:ignore
openai_api_base=os.getenv("AZURE_OPENAI_ENDPOINT"), # type:ignore
openai_api_key=os.getenv("AZURE_OPENAI_KEY"), # type:ignore
openai_api_version=os.getenv("OPENAI_API_VERSION"), # type:ignore
deployment_name=os.environ["AZURE_OPENAI_ENGINE"],
openai_api_base=os.environ["AZURE_OPENAI_ENDPOINT"],
openai_api_key=os.environ["AZURE_OPENAI_KEY"],
openai_api_version=os.environ["OPENAI_API_VERSION"],
openai_api_type=os.environ['OPENAI_API_TYPE'],
)

Expand Down Expand Up @@ -150,7 +150,7 @@ def delete_data(self, course_name: str, s3_path: str, source_url: str):
# add delete from doc map logic here
try:
# Delete file from S3
bucket_name = os.getenv('S3_BUCKET_NAME')
bucket_name = os.environ['S3_BUCKET_NAME']
if bucket_name is None:
raise ValueError("S3_BUCKET_NAME environment variable is not set")

Expand All @@ -176,14 +176,18 @@ def delete_data(self, course_name: str, s3_path: str, source_url: str):

def delete_from_s3(self, bucket_name: str, s3_path: str):
try:
self.aws.delete_file(bucket_name, s3_path)
print("Deleting from S3")
response = self.aws.delete_file(bucket_name, s3_path)
print(f"AWS response: {response}")
except Exception as e:
print("Error in deleting file from s3:", e)
self.sentry.capture_exception(e)

def delete_from_qdrant(self, identifier_key: str, identifier_value: str):
try:
self.vdb.delete_data(os.environ['QDRANT_COLLECTION_NAME'], identifier_key, identifier_value)
print("Deleting from Qdrant")
response = self.vdb.delete_data(os.environ['QDRANT_COLLECTION_NAME'], identifier_key, identifier_value)
print(f"Qdrant response: {response}")
except Exception as e:
if "timed out" in str(e):
# Timed out is fine. Still deletes.
Expand Down Expand Up @@ -311,7 +315,9 @@ def format_for_json_mqr(self, found_docs) -> List[Dict]:

def delete_from_nomic_and_supabase(self, course_name: str, identifier_key: str, identifier_value: str):
try:
print(f"Deleting from Nomic and Supabase for {course_name} using {identifier_key}: {identifier_value}")
response = self.sqlDb.getMaterialsForCourseAndKeyAndValue(course_name, identifier_key, identifier_value)
print(f"Trying to delete materials: {response}")
data = response.data[0] # single record fetched
nomic_ids_to_delete = [str(data['id']) + "_" + str(i) for i in range(1, len(data['contexts']) + 1)]

Expand All @@ -325,7 +331,9 @@ def delete_from_nomic_and_supabase(self, course_name: str, identifier_key: str,
self.nomicService.delete_from_document_map(project_id, nomic_ids_to_delete)

# delete from Supabase
self.sqlDb.deleteMaterialsForCourseAndKeyAndValue(course_name, identifier_key, identifier_value)
print(f"Deleting from Supabase for {course_name} using {identifier_key}: {identifier_value}")
response = self.sqlDb.deleteMaterialsForCourseAndKeyAndValue(course_name, identifier_key, identifier_value)
print(f"Deleted from sql: {response}")
except Exception as e:
print(f"Error in deleting file from Nomic or Supabase using {identifier_key}: {identifier_value}", e)
self.sentry.capture_exception(e)
Expand Down

0 comments on commit f5a36fc

Please sign in to comment.