Skip to content

Commit

Permalink
Final print cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
KastanDay committed Dec 12, 2023
1 parent 36145d3 commit b76b449
Showing 1 changed file with 3 additions and 5 deletions.
8 changes: 3 additions & 5 deletions ai_ta_backend/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,10 +520,9 @@ def _ingest_single_pdf(self, s3_path: str, course_name: str, **kwargs):
pdf_texts = [page['text'] for page in pdf_pages_OCRed]

success_or_failure = self.split_and_upload(texts=pdf_texts, metadatas=metadatas)
print("PDF message: ", success_or_failure)
return success_or_failure
except Exception as e:
err = f"❌❌ Error in (PDF ingest): `{inspect.currentframe().f_code.co_name}`: {e}\nTraceback:\n", traceback.format_exc()
err = f"❌❌ Error in (PDF ingest): `{inspect.currentframe().f_code.co_name}`: {e}\nTraceback:\n", traceback.format_exc() # type: ignore
print(err)
return err
return "Success"
Expand Down Expand Up @@ -714,8 +713,8 @@ def split_and_upload(self, texts: List[str], metadatas: List[Dict[str, Any]]):
metadatas (List[Dict[str, Any]]): _description_
"""
print("In split and upload")
# print(f"metadatas: {metadatas}")
# print(f"Texts: {texts}")
print(f"metadatas: {metadatas}")
print(f"Texts: {texts}")
assert len(texts) == len(metadatas), f'must have equal number of text strings and metadata dicts. len(texts) is {len(texts)}. len(metadatas) is {len(metadatas)}'

try:
Expand All @@ -726,7 +725,6 @@ def split_and_upload(self, texts: List[str], metadatas: List[Dict[str, Any]]):
)
contexts: List[Document] = text_splitter.create_documents(texts=texts, metadatas=metadatas)
input_texts = [{'input': context.page_content, 'model': 'text-embedding-ada-002'} for context in contexts]
print("METADATAS: ", metadatas)

# check for duplicates
is_duplicate = self.check_for_duplicates(input_texts, metadatas)
Expand Down

0 comments on commit b76b449

Please sign in to comment.