Skip to content

Commit

Permalink
further refinement of bulk_ingest
Browse files Browse the repository at this point in the history
  • Loading branch information
KastanDay committed Sep 28, 2023
1 parent 165dc51 commit 6c3218d
Showing 1 changed file with 10 additions and 14 deletions.
24 changes: 10 additions & 14 deletions ai_ta_backend/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,18 +161,14 @@ def get_context_stuffed_prompt(self, user_question: str, course_name: str, top_n


def bulk_ingest(self, s3_paths: Union[List[str], str], course_name: str, **kwargs) -> Dict[str, List[str]]:
def _ingest_single(ingest_method: Callable | None, s3_path, *args, **kwargs):
def _ingest_single(ingest_method: Callable, s3_path, *args, **kwargs):
"""Handle running an arbitrary ingest function for an individual file."""
if ingest_method == None:
success_status['failure_ingest'].append(f"We don't have a ingest method for this filetype: {s3_path}")
print(f"NO INGEST METHOD!! {success_status}")
# RUN INGEST METHOD
ret = ingest_method(s3_path, *args, **kwargs)
if ret == "Success":
success_status['success_ingest'].append(s3_path)
else:
# RUN INGEST METHOD
ret = ingest_method(s3_path, *args, **kwargs)
if ret == "Success":
success_status['success_ingest'].append(s3_path)
else:
success_status['failure_ingest'].append(s3_path)
success_status['failure_ingest'].append(s3_path)

# πŸ‘‡πŸ‘‡πŸ‘‡πŸ‘‡ ADD NEW INGEST METHODSE E HERπŸ‘‡πŸ‘‡πŸ‘‡πŸ‘‡πŸŽ‰
file_ingest_methods = {
Expand Down Expand Up @@ -210,15 +206,15 @@ def _ingest_single(ingest_method: Callable | None, s3_path, *args, **kwargs):

if file_extension in file_ingest_methods:
# Use specialized functions when possible, fallback to mimetype. Else raise error.
ingest_method = file_ingest_methods.get(file_extension)
ingest_method = file_ingest_methods[file_extension]
_ingest_single(ingest_method, s3_path, course_name, kwargs=kwargs)
elif mime_category in mimetype_ingest_methods:
# mime type
ingest_method = file_ingest_methods.get(mime_category)
# fallback to MimeType
ingest_method = file_ingest_methods[mime_category]
_ingest_single(ingest_method, s3_path, course_name, kwargs=kwargs)
else:
# failure
success_status['failure_ingest'].append(f"File ingest not supported for Mimetype: {mime_type}, with MimeCategory: {mime_category}, with file extension: {file_ext} for s3_path: {s3_path}")
success_status['failure_ingest'].append(f"We don't have a ingest method for this filetype: {file_extension} (with generic type {mime_type}), for file: {s3_path}")
continue

return success_status
Expand Down

0 comments on commit 6c3218d

Please sign in to comment.