Skip to content

Commit

Permalink
adding more prints to bulk_ingest... I think I see why text files in …
Browse files Browse the repository at this point in the history
…particular are failing
  • Loading branch information
KastanDay committed Sep 27, 2023
1 parent b21aad7 commit 1124b7d
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions ai_ta_backend/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ def _ingest_single(file_ingest_methods, s3_path, *args, **kwargs):
}
# 👆👆👆👆 ADD NEW INGEST METHODS ERE 👆👆👇�DS 👇�🎉

print(f"Top of bulk_ingest. S3 paths {s3_paths}")
print(f"Top of bulk_ingest. Course_name {course_name}")
print(f"Top of bulk_ingest. kwargs {kwargs}")

success_status = {"success_ingest": [], "failure_ingest": []}
try:
if isinstance(s3_paths, str):
Expand All @@ -203,14 +207,20 @@ def _ingest_single(file_ingest_methods, s3_path, *args, **kwargs):
for s3_path in s3_paths:
with NamedTemporaryFile(suffix=Path(s3_path).suffix) as tmpfile:
self.s3_client.download_fileobj(Bucket=os.environ['S3_BUCKET_NAME'], Key=s3_path, Fileobj=tmpfile)
print("tmpfile.name", tmpfile.name)
mime_type = mimetypes.guess_type(tmpfile.name, strict=False)[0]
mime_category, extension = mime_type.split('/')
file_ext = "." + extension
print(f"Mime mime_category: {mime_category}")
print(f"Mime type: {mime_type}")
print(f"file extension: {file_ext}")

if file_ext in file_ingest_methods:
# Use specialized functions when possible, fallback to mimetype. Else raise error.
print(f"Using SPECIFIC file ingest methods")
_ingest_single(file_ingest_methods, s3_path, course_name, kwargs=kwargs)
elif mime_category in mimetype_ingest_methods:
print(f"Using GENERAL Mimetype ingest methods")
# mime type
_ingest_single(mimetype_ingest_methods, s3_path, course_name, kwargs=kwargs)
else:
Expand Down

0 comments on commit 1124b7d

Please sign in to comment.