Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions tinytroupe/agent/grounding.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import tinytroupe.utils as utils

from tinytroupe.agent import logger
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
from llama_index.readers.web import SimpleWebPageReader


#######################################################################################################################
Expand Down Expand Up @@ -125,14 +125,14 @@ def add_documents(self, new_documents, doc_to_name_func=None) -> list:
"""
# index documents by name
if len(new_documents) > 0:
# add the new documents to the list of documents
self.documents += new_documents

# process documents individually too
# process documents individually
for document in new_documents:

# out of an abundance of caution, we sanitize the text
document.text = utils.sanitize_raw_string(document.text)
sanitized_text = utils.sanitize_raw_string(document.text)
# clone the document with the new text and add it to the list
sanitized_document = self.clone_document_with_new_text(document, sanitized_text)
self.documents.append(sanitized_document)

if doc_to_name_func is not None:
name = doc_to_name_func(document)
Expand All @@ -149,7 +149,24 @@ def add_documents(self, new_documents, doc_to_name_func=None) -> list:
self.index = VectorStoreIndex.from_documents(self.documents)
else:
self.index.refresh(self.documents)



def clone_document_with_new_text(self, original_doc: Document, new_text: str) -> Document:
"""
Clones the specified document, replacing the text with the new text.
Here, "document" refer to the llama-index's data structure that stores a unit of content.
"""
new_doc = Document(
text=new_text,
metadata=original_doc.metadata, # Copy metadata
excluded_llm_metadata_keys=original_doc.excluded_llm_metadata_keys,
excluded_embed_metadata_keys=original_doc.excluded_embed_metadata_keys,
metadata_separator=original_doc.metadata_separator,
metadata_template=original_doc.metadata_template,
text_template=original_doc.text_template,
relationships=original_doc.relationships
)
return new_doc


@utils.post_init
Expand Down