diff --git a/ai_ta_backend/database/aws.py b/ai_ta_backend/database/aws.py index 1b2f63dc..68e61b68 100644 --- a/ai_ta_backend/database/aws.py +++ b/ai_ta_backend/database/aws.py @@ -11,8 +11,8 @@ def __init__(self): # S3 self.s3_client = boto3.client( 's3', - aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'), - aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'), + aws_access_key_id=os.environ['AWS_ACCESS_KEY_ID'], + aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'], ) def upload_file(self, file_path: str, bucket_name: str, object_name: str): @@ -22,7 +22,7 @@ def download_file(self, object_name: str, bucket_name: str, file_path: str): self.s3_client.download_file(bucket_name, object_name, file_path) def delete_file(self, bucket_name: str, s3_path: str): - self.s3_client.delete_object(Bucket=bucket_name, Key=s3_path) + return self.s3_client.delete_object(Bucket=bucket_name, Key=s3_path) def generatePresignedUrl(self, object: str, bucket_name: str, s3_path: str, expiration: int = 3600): # generate presigned URL diff --git a/ai_ta_backend/database/sql.py b/ai_ta_backend/database/sql.py index b8d4579c..a9819657 100644 --- a/ai_ta_backend/database/sql.py +++ b/ai_ta_backend/database/sql.py @@ -83,3 +83,6 @@ def getDocsForIdsGte(self, course_name: str, first_id: int, fields: str = "*", l def insertProjectInfo(self, project_info): return self.supabase_client.table("projects").insert(project_info).execute() + + def getAllFromLLMConvoMonitor(self, course_name: str): + return self.supabase_client.table("llm-convo-monitor").select("*").eq("course_name", course_name).execute() diff --git a/ai_ta_backend/database/vector.py b/ai_ta_backend/database/vector.py index 7ac67eda..d22fc6ca 100644 --- a/ai_ta_backend/database/vector.py +++ b/ai_ta_backend/database/vector.py @@ -20,8 +20,8 @@ def __init__(self): """ # vector DB self.qdrant_client = QdrantClient( - url=os.getenv('QDRANT_URL'), - api_key=os.getenv('QDRANT_API_KEY'), + url=os.environ['QDRANT_URL'], + api_key=os.environ['QDRANT_API_KEY'], ) self.vectorstore = Qdrant(client=self.qdrant_client, @@ -50,7 +50,7 @@ def delete_data(self, collection_name: str, key: str, value: str): """ Delete data from the vector database. """ - self.qdrant_client.delete( + return self.qdrant_client.delete( collection_name=collection_name, points_selector=models.Filter(must=[ models.FieldCondition( diff --git a/ai_ta_backend/service/nomic_service.py b/ai_ta_backend/service/nomic_service.py index f9d33a59..2e660de0 100644 --- a/ai_ta_backend/service/nomic_service.py +++ b/ai_ta_backend/service/nomic_service.py @@ -61,7 +61,7 @@ class NomicService(): @inject def __init__(self, sentry: SentryService, sql: SQLDatabase): - nomic.login(os.getenv('NOMIC_API_KEY')) + nomic.login(os.environ['NOMIC_API_KEY']) self.sentry = sentry self.sql = sql @@ -258,18 +258,15 @@ def create_nomic_map(self, course_name: str, log_data: list): 2. appends current embeddings and metadata to it 2. creates map if there are at least 20 queries """ - nomic.login(os.getenv('NOMIC_API_KEY')) # login during start of flask app + nomic.login(os.environ['NOMIC_API_KEY']) # login during start of flask app NOMIC_MAP_NAME_PREFIX = 'Conversation Map for ' print(f"in create_nomic_map() for {course_name}") - # initialize supabase - supabase_client = supabase.create_client( # type: ignore - supabase_url=os.getenv('SUPABASE_URL'), # type: ignore - supabase_key=os.getenv('SUPABASE_API_KEY')) # type: ignore try: # fetch all conversations with this new course (we expect <=20 conversations, because otherwise the map should be made already) - response = supabase_client.table("llm-convo-monitor").select("*").eq("course_name", course_name).execute() + + response = self.sql.getAllFromLLMConvoMonitor(course_name) data = response.data df = pd.DataFrame(data) @@ -627,7 +624,7 @@ def create_map(self, embeddings, metadata, map_name, index_name, topic_label_fie topic_label_field: str colorable_fields: list of str """ - nomic.login(os.getenv('NOMIC_API_KEY')) + nomic.login(os.environ['NOMIC_API_KEY']) try: project = atlas.map_embeddings(embeddings=embeddings, @@ -652,7 +649,7 @@ def append_to_map(self, embeddings, metadata, map_name): metadata: pd.DataFrame of Nomic upload metadata map_name: str """ - nomic.login(os.getenv('NOMIC_API_KEY')) + nomic.login(os.environ['NOMIC_API_KEY']) try: project = atlas.AtlasProject(name=map_name, add_datums_if_exists=True) with project.wait_for_project_lock(): @@ -714,7 +711,7 @@ def data_prep_for_doc_map(self, df: pd.DataFrame): # openai_api_key=os.getenv('AZURE_OPENAI_KEY')) # type: ignore embeddings_model = OpenAIEmbeddings(openai_api_type="openai", openai_api_base="https://api.openai.com/v1/", - openai_api_key=os.getenv('VLADS_OPENAI_KEY')) # type: ignore + openai_api_key=os.environ['VLADS_OPENAI_KEY']) embeddings = embeddings_model.embed_documents(texts) metadata = pd.DataFrame(metadata) diff --git a/ai_ta_backend/service/retrieval_service.py b/ai_ta_backend/service/retrieval_service.py index 91583323..3f13c311 100644 --- a/ai_ta_backend/service/retrieval_service.py +++ b/ai_ta_backend/service/retrieval_service.py @@ -34,22 +34,22 @@ def __init__(self, vdb: VectorDatabase, sqlDb: SQLDatabase, aws: AWSStorage, pos self.posthog = posthog self.nomicService = nomicService - openai.api_key = os.getenv("OPENAI_API_KEY") + openai.api_key = os.environ["OPENAI_API_KEY"] self.embeddings = OpenAIEmbeddings( model='text-embedding-ada-002', - openai_api_base=os.getenv("AZURE_OPENAI_ENDPOINT"), # type:ignore + openai_api_base=os.environ["AZURE_OPENAI_ENDPOINT"], openai_api_type=os.environ['OPENAI_API_TYPE'], - openai_api_key=os.getenv("AZURE_OPENAI_KEY"), # type:ignore - openai_api_version=os.getenv("OPENAI_API_VERSION"), # type:ignore + openai_api_key=os.environ["AZURE_OPENAI_KEY"], + openai_api_version=os.environ["OPENAI_API_VERSION"], ) self.llm = AzureChatOpenAI( temperature=0, - deployment_name=os.getenv("AZURE_OPENAI_ENGINE"), # type:ignore - openai_api_base=os.getenv("AZURE_OPENAI_ENDPOINT"), # type:ignore - openai_api_key=os.getenv("AZURE_OPENAI_KEY"), # type:ignore - openai_api_version=os.getenv("OPENAI_API_VERSION"), # type:ignore + deployment_name=os.environ["AZURE_OPENAI_ENGINE"], + openai_api_base=os.environ["AZURE_OPENAI_ENDPOINT"], + openai_api_key=os.environ["AZURE_OPENAI_KEY"], + openai_api_version=os.environ["OPENAI_API_VERSION"], openai_api_type=os.environ['OPENAI_API_TYPE'], ) @@ -150,7 +150,7 @@ def delete_data(self, course_name: str, s3_path: str, source_url: str): # add delete from doc map logic here try: # Delete file from S3 - bucket_name = os.getenv('S3_BUCKET_NAME') + bucket_name = os.environ['S3_BUCKET_NAME'] if bucket_name is None: raise ValueError("S3_BUCKET_NAME environment variable is not set") @@ -176,14 +176,18 @@ def delete_data(self, course_name: str, s3_path: str, source_url: str): def delete_from_s3(self, bucket_name: str, s3_path: str): try: - self.aws.delete_file(bucket_name, s3_path) + print("Deleting from S3") + response = self.aws.delete_file(bucket_name, s3_path) + print(f"AWS response: {response}") except Exception as e: print("Error in deleting file from s3:", e) self.sentry.capture_exception(e) def delete_from_qdrant(self, identifier_key: str, identifier_value: str): try: - self.vdb.delete_data(os.environ['QDRANT_COLLECTION_NAME'], identifier_key, identifier_value) + print("Deleting from Qdrant") + response = self.vdb.delete_data(os.environ['QDRANT_COLLECTION_NAME'], identifier_key, identifier_value) + print(f"Qdrant response: {response}") except Exception as e: if "timed out" in str(e): # Timed out is fine. Still deletes. @@ -311,7 +315,9 @@ def format_for_json_mqr(self, found_docs) -> List[Dict]: def delete_from_nomic_and_supabase(self, course_name: str, identifier_key: str, identifier_value: str): try: + print(f"Deleting from Nomic and Supabase for {course_name} using {identifier_key}: {identifier_value}") response = self.sqlDb.getMaterialsForCourseAndKeyAndValue(course_name, identifier_key, identifier_value) + print(f"Trying to delete materials: {response}") data = response.data[0] # single record fetched nomic_ids_to_delete = [str(data['id']) + "_" + str(i) for i in range(1, len(data['contexts']) + 1)] @@ -325,7 +331,9 @@ def delete_from_nomic_and_supabase(self, course_name: str, identifier_key: str, self.nomicService.delete_from_document_map(project_id, nomic_ids_to_delete) # delete from Supabase - self.sqlDb.deleteMaterialsForCourseAndKeyAndValue(course_name, identifier_key, identifier_value) + print(f"Deleting from Supabase for {course_name} using {identifier_key}: {identifier_value}") + response = self.sqlDb.deleteMaterialsForCourseAndKeyAndValue(course_name, identifier_key, identifier_value) + print(f"Deleted from sql: {response}") except Exception as e: print(f"Error in deleting file from Nomic or Supabase using {identifier_key}: {identifier_value}", e) self.sentry.capture_exception(e)