Skip to content

Commit

Permalink
add chunk index - minor hotfix
Browse files Browse the repository at this point in the history
  • Loading branch information
KastanDay committed Oct 25, 2023
1 parent b0c6d3c commit daa0fa9
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions ai_ta_backend/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,10 +723,11 @@ def split_and_upload(self, texts: List[str], metadatas: List[Dict[str, Any]]):
print(f"Texts: {texts}")
assert len(texts) == len(metadatas), f'must have equal number of text strings and metadata dicts. len(texts) is {len(texts)}. len(metadatas) is {len(metadatas)}'

# add chunk index & make s3path unique
# add chunk index for Parent Document Retriever concept
for i, meta in enumerate(metadatas):
meta['chunk_index'] = i
meta['s3_path'] = str(uuid.uuid4()) + "-" + meta['s3_path']
# unique s3 path has to happen elsewhere, anytime we upload to S3 (mostly the front end + webscrape)
# meta['s3_path'] = str(uuid.uuid4()) + "-" + meta['s3_path']

try:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
Expand Down

0 comments on commit daa0fa9

Please sign in to comment.