Skip to content

Commit

Permalink
added missing library to requirements.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
star-nox committed Nov 16, 2023
1 parent 0a0e870 commit bcefb36
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 11 deletions.
18 changes: 7 additions & 11 deletions ai_ta_backend/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -1242,28 +1242,24 @@ def check_for_duplicates(self, texts: List[Dict], metadatas: List[Dict[str, Any]

doc_table = os.getenv('NEW_NEW_NEWNEW_MATERIALS_SUPABASE_TABLE')
course_name = metadatas[0]['course_name']
s3_path = metadatas[0]['s3_path']
incoming_s3_path = metadatas[0]['s3_path']
url = metadatas[0]['url']
filename = metadatas[0]['readable_filename']

shorter_s3_path = s3_path.split('/')[-1]
match = re.match(r'^\w{37}_(.*)$', shorter_s3_path)
if match:
print("ID exists")
else:
print("No ID")
original_filename = incoming_s3_path.split('/')[-1][37:] # remove the 37-char uuid prefix
original_s3_path = "courses/" + course_name + "/" + original_filename # the older files will have this path


print("--------------------Checking for duplicates------------------------")
print("METADATAS: ", metadatas)
print("S3_PATH: ", s3_path)
print("S3_PATH: ", incoming_s3_path)
print("filename: ", filename)
print("SHORTER S3 PATH: ", shorter_s3_path) # will always have a 37-char prefix
print("OG S3 PATH: ", original_s3_path)
exit()

if s3_path:
filename = shorter_s3_path
supabase_contents = self.supabase_client.table(doc_table).select('contexts', 's3_path').eq('course_name', course_name).like('s3_path', '%' + shorter_s3_path + '%').execute()
filename = original_s3_path
supabase_contents = self.supabase_client.table(doc_table).select('contexts', 's3_path').eq('course_name', course_name).like('s3_path', '%' + original_s3_path + '%').execute()
elif url:
filename = url
supabase_contents = self.supabase_client.table(doc_table).select('contexts', 's3_path').eq('course_name', course_name).eq('url', url).execute()
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,5 @@ unstructured==0.10.29 # causes huge ~5.3 GB of installs. Probbably from onnx: ht

# Not currently supporting coursera ingest
# cs-dlp @ git+https://github.com/raffaem/[email protected] # previously called coursera-dl

pypdf

0 comments on commit bcefb36

Please sign in to comment.