Skip to content

Commit

Permalink
Further print and logging refinement
Browse files Browse the repository at this point in the history
  • Loading branch information
KastanDay committed Dec 12, 2023
1 parent e01ee11 commit 154d45b
Showing 1 changed file with 3 additions and 8 deletions.
11 changes: 3 additions & 8 deletions ai_ta_backend/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -1222,7 +1222,6 @@ def check_for_duplicates(self, texts: List[Dict], metadatas: List[Dict[str, Any]
incoming_s3_path = metadatas[0]['s3_path']
url = metadatas[0]['url']
original_filename = incoming_s3_path.split('/')[-1][37:] # remove the 37-char uuid prefix
print("Extracted filename from incoming s3_path: ", original_filename)

# check if uuid exists in s3_path -- not all s3_paths have uuids!
incoming_filename = incoming_s3_path.split('/')[-1]
Expand Down Expand Up @@ -1257,16 +1256,12 @@ def check_for_duplicates(self, texts: List[Dict], metadatas: List[Dict[str, Any]
for text in texts:
current_whole_text += text['input']

# print("supabase_whole_text: ", supabase_whole_text)
# print("current_whole_text: ", current_whole_text)

# compare with current texts
if supabase_whole_text == current_whole_text: # matches the previous file
print(f"The file πŸ“„: {filename} is a duplicate!")
print(f"Duplicate ingested! πŸ“„ s3_path: {filename}.")
return True

else: # the file is updated
print(f"The file πŸ“„: {filename} seems to be updated! Deleting the older file...")
print(f"Updated file detected! Same filename, new contents. πŸ“„ s3_path: {filename}")

# call the delete function on older docs
for content in supabase_contents:
Expand All @@ -1276,7 +1271,7 @@ def check_for_duplicates(self, texts: List[Dict], metadatas: List[Dict[str, Any]
return False

else: # filename does not already exist in Supabase, so its a brand new file
print(f"File πŸ“„: {filename} is NOT a duplicate!")
print(f"NOT a duplicate! πŸ“„s3_path: {filename}")
return False


Expand Down

0 comments on commit 154d45b

Please sign in to comment.