Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/lightspeed_rag_content/document_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,10 +540,11 @@ async def upload_file(chunk_indices: list[int]) -> str:
)
embedding = embedding_response.data[0].embedding

metadata = {**doc.get("metadata", {}), "source": index}
chunk = {
"content": doc["content"],
"chunk_id": doc["chunk_id"],
"metadata": doc.get("metadata", {}),
"metadata": metadata,
"chunk_metadata": doc["chunk_metadata"],
"embedding": embedding,
"embedding_model": embedding_model,
Expand Down Expand Up @@ -613,6 +614,7 @@ async def _upload_and_process_files( # noqa: C901 # pylint: disable=R0912,R091
attributes = {
**rag_doc.metadata, # type: ignore[union-attr]
"document_id": doc_uuid,
"source": index,
}
vs_file = await client.vector_stores.files.create(
vector_store_id=vector_store.id,
Expand Down
12 changes: 12 additions & 0 deletions tests/test_document_processor_llama_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,12 @@ def test_save_manual_chunking(self, mocker, llama_stack_processor):
assert call_kwargs["vector_store_id"] == "vs_123"
assert "chunks" in call_kwargs
assert len(call_kwargs["chunks"]) == 2
# Verify index name is embedded in chunk metadata as "source"
# and existing metadata keys are preserved
for chunk in call_kwargs["chunks"]:
assert chunk["metadata"]["source"] == mock.sentinel.index
assert "title" in chunk["metadata"]
assert "docs_url" in chunk["metadata"]

def test_save_auto_chunking(self, mocker, llama_stack_processor):
"""Test saving documents with automatic chunking workflow."""
Expand All @@ -566,3 +572,9 @@ def test_save_auto_chunking(self, mocker, llama_stack_processor):
# Verify files.create was called for each document (single file upload)
assert client.files.create.await_count == 2
assert client.vector_stores.files.create.await_count == 2
# Verify index name is embedded in file attributes as "source"
# and existing metadata keys are preserved
for call in client.vector_stores.files.create.await_args_list:
assert call.kwargs["attributes"]["source"] == mock.sentinel.index
assert "title" in call.kwargs["attributes"]
assert "document_id" in call.kwargs["attributes"]
Loading