Skip to content

Commit

Permalink
should fix text ingest, we were using the wrong file extençsion AND m…
Browse files Browse the repository at this point in the history
…ime type
  • Loading branch information
KastanDay committed Sep 27, 2023
1 parent 7a7d92b commit 1793b94
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions ai_ta_backend/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,17 +205,17 @@ def _ingest_single(file_ingest_methods, s3_path, *args, **kwargs):


for s3_path in s3_paths:
with NamedTemporaryFile(suffix=Path(s3_path).suffix) as tmpfile:
file_extension = Path(s3_path).suffix
with NamedTemporaryFile(suffix=file_extension) as tmpfile:
self.s3_client.download_fileobj(Bucket=os.environ['S3_BUCKET_NAME'], Key=s3_path, Fileobj=tmpfile)
print("tmpfile.name", tmpfile.name)
mime_type = mimetypes.guess_type(tmpfile.name, strict=False)[0]
mime_category, extension = mime_type.split('/')
file_ext = "." + extension
# file_ext = "." + extension
print(f"Mime mime_category: {mime_category}")
print(f"Mime type: {mime_type}")
print(f"file extension: {file_ext}")
print(f"file extension: {file_extension}")

if file_ext in file_ingest_methods:
if file_extension in file_ingest_methods:
# Use specialized functions when possible, fallback to mimetype. Else raise error.
print(f"Using SPECIFIC file ingest methods")
_ingest_single(file_ingest_methods, s3_path, course_name, kwargs=kwargs)
Expand Down

0 comments on commit 1793b94

Please sign in to comment.