Skip to content

Commit

Permalink
testing unique filenames in aws upload
Browse files Browse the repository at this point in the history
  • Loading branch information
star-nox committed Nov 16, 2023
1 parent 21f64fb commit 0a0e870
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 3 deletions.
Empty file removed 12168-headers.txt
Empty file.
2 changes: 2 additions & 0 deletions ai_ta_backend/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ def upload_data_files_to_s3(course_name: str, localdir: str) -> Optional[List[st
s3_paths_lock = Lock()

def upload(myfile):
print("filename: ", myfile)
exit()
s3_file = f"courses/{course_name}/{os.path.basename(myfile)}"
s3.upload_file(myfile, os.getenv('S3_BUCKET_NAME'), s3_file)
with s3_paths_lock:
Expand Down
15 changes: 12 additions & 3 deletions ai_ta_backend/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import time
import traceback
import uuid
import re
from importlib import metadata
from pathlib import Path
from tempfile import NamedTemporaryFile
Expand Down Expand Up @@ -1243,14 +1244,22 @@ def check_for_duplicates(self, texts: List[Dict], metadatas: List[Dict[str, Any]
course_name = metadatas[0]['course_name']
s3_path = metadatas[0]['s3_path']
url = metadatas[0]['url']
filename = metadatas[0]['readable_filename']

shorter_s3_path = s3_path.split('/')[-1][37:]
shorter_s3_path = s3_path.split('/')[-1]
match = re.match(r'^\w{37}_(.*)$', shorter_s3_path)
if match:
print("ID exists")
else:
print("No ID")


print("--------------------Checking for duplicates------------------------")
print("METADATAS: ", metadatas)
print("S3_PATH: ", s3_path)
print("URL: ", url)
print("filename: ", filename)
print("SHORTER S3 PATH: ", shorter_s3_path) # will always have a 37-char prefix

exit()

if s3_path:
filename = shorter_s3_path
Expand Down

0 comments on commit 0a0e870

Please sign in to comment.