Skip to content

Commit

Permalink
add chunk index
Browse files Browse the repository at this point in the history
  • Loading branch information
KastanDay committed Oct 25, 2023
1 parent 89f61aa commit b0c6d3c
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions ai_ta_backend/vector_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,11 @@ def split_and_upload(self, texts: List[str], metadatas: List[Dict[str, Any]]):
print(f"Texts: {texts}")
assert len(texts) == len(metadatas), f'must have equal number of text strings and metadata dicts. len(texts) is {len(texts)}. len(metadatas) is {len(metadatas)}'

# add chunk index & make s3path unique
for i, meta in enumerate(metadatas):
meta['chunk_index'] = i
meta['s3_path'] = str(uuid.uuid4()) + "-" + meta['s3_path']

try:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
chunk_size=1000,
Expand Down

0 comments on commit b0c6d3c

Please sign in to comment.