Implement RAPTOR #15471
Replies: 6 comments 6 replies
-
@dosu how do I implement this making sure that I'm able to add more documents in the future if needed. I have the following initialisation for my class:L self.redis_client = redis.from_url(
self.config.get("REDIS_VECTOR_STORE")
)
self.docstore = RedisDocumentStore.from_redis_client(
redis_client=self.redis_client, namespace=self.namespace
)
# TODO: UNcomment to use redis vector store
self.vector_store = RedisVectorStore(
index_name=self.namespace,
redis_url=self.config.get("REDIS_VECTOR_STORE"),
metadata_fields=["namespace"],
)
self.storage_context = StorageContext.from_defaults(
docstore=self.docstore,
index_store=RedisIndexStore.from_redis_client(
redis_client=self.redis_client, namespace=self.namespace
),
vector_store=self.vector_store,
)
try:
self.base_index = load_index_from_storage(self.storage_context)
print("[INFO] Index found at storage")
except ValueError as e:
print("[INFO] No index found at storage")
self.base_index = VectorStoreIndex(
nodes=[],
storage_context=self.storage_context,
store_nodes_override=True,
)
self.base_retriever = self.base_index.as_retriever(
similarity_top_k=self.similarity_top_k,
filters=MetadataFilters(
filters=[
ExactMatchFilter(key="namespace", value=self.namespace)
]
),
)
self.retriever = AutoMergingRetriever(
self.base_retriever, self.storage_context, verbose=verbose
)
self.recency_postprocessor = FixedRecencyPostprocessor(
top_k=recency, date_key="creation_date"
)
self.postprocessor = SentenceTransformerRerank(
model=self.config.get("RAG_RERANK_MODEL"),
top_n=similarity_top_k_reranker,
)
self.llm = OpenAI(
model=self.config.get("LLM_ANSWERING_MODEL"),
api_key=self.config.get("OPENAI_API_KEY"),
)
self.response_synthesizer = get_response_synthesizer(
response_mode=ResponseMode.COMPACT, llm=self.llm
) I also have the following helper functions: def create_document(self, text, filename):
documents = []
for idx, page in text.items():
document = Document(text=page)
current_date = datetime.now().strftime("%Y-%m-%d")
document.metadata = {
"filename": filename,
"page_number": idx,
"creation_date": current_date,
"last_accessed_date": current_date,
"last_modified_date": current_date,
"namespace": self.namespace,
}
documents.append(document)
return documents
def return_nodes(self, docs):
nodes = self.node_parser.get_nodes_from_documents(docs)
return nodes
def add_nodes_to_doc_store(self, all_nodes):
try:
self.docstore.add_documents(nodes=all_nodes)
except Exception as e:
print(f"An error occurred when adding documents to doc store: {e}")
traceback.print_exc()
raise Exception(
f"An error occurred when adding documents to doc store: {e}"
)
def process_streamlit_fetch_query_results(self, query="", verbose=False):
try:
bm24_retriever = BM25Retriever.from_defaults(
docstore=self.docstore, similarity_top_k=self.similarity_top_k
)
fusion_retriever = QueryFusionRetriever(
[self.retriever, bm24_retriever],
similarity_top_k=self.similarity_top_k,
num_queries=1, # set this to 1 to disable query generation
mode="reciprocal_rerank",
use_async=True,
verbose=verbose,
)
query_bundle = QueryBundle(query_str=query)
retrived_nodes = fusion_retriever.retrieve(query_bundle)
recency_nodes = self.recency_postprocessor.postprocess_nodes(
retrived_nodes, query_bundle=query_bundle
)
rerank_nodes = self.postprocessor.postprocess_nodes(
nodes=recency_nodes, query_bundle=query_bundle
)
px.active_session().url
return rerank_nodes
except Exception as e:
traceback.print_exc()
raise Exception(f"An error occurred retrieving: {e}") How can I modify it to use raptor ? |
Beta Was this translation helpful? Give feedback.
-
@dosu you forgot the HierarchicalNodeParser should it be used for adding the nodes? Could you also use QueryFusionRetriever to merger raptor + bm25 in the generated code |
Beta Was this translation helpful? Give feedback.
-
@dosu in the code below define my metadata filter properly: self.raptor_pack = RaptorPack(
documents=[],
embed_model=Settings.embed_model,
llm=self.llm,
vector_store=self.vector_store,
similarity_top_k=self.similarity_top_k,
mode="collapsed",
summary_module=self.summary_module,
)
self.bm25_retriever = BM25Retriever.from_defaults(
docstore=self.docstore, similarity_top_k=self.similarity_top_k
)
self.fusion_retriever = QueryFusionRetriever(
[self.raptor_pack.retriever, self.bm25_retriever],
similarity_top_k=self.similarity_top_k,
num_queries=1, # set this to 1 to disable query generation
mode="reciprocal_rerank",
use_async=True,
verbose=verbose,
)
self.node_parser = HierarchicalNodeParser.from_defaults()
try:
self.base_index = load_index_from_storage(self.storage_context)
print("[INFO] Index found at storage")
except ValueError as e:
print("[INFO] No index found at storage")
self.base_index = VectorStoreIndex(
nodes=[],
storage_context=self.storage_context,
store_nodes_override=True,
)
self.base_retriever = self.base_index.as_retriever(
similarity_top_k=self.similarity_top_k,
filters=MetadataFilters(
filters=[
ExactMatchFilter(key="namespace", value=self.namespace)
]
),
)
self.retriever = AutoMergingRetriever(
self.base_retriever, self.storage_context, verbose=verbose
)
self.recency_postprocessor = FixedRecencyPostprocessor(
top_k=recency, date_key="creation_date"
)
self.postprocessor = SentenceTransformerRerank(
model=self.config.get("RAG_RERANK_MODEL"),
top_n=similarity_top_k_reranker,
)
self.response_synthesizer = get_response_synthesizer(
response_mode=ResponseMode.COMPACT, llm=self.llm
) the code fragment here does not take into account my metadata filter: self.fusion_retriever = QueryFusionRetriever(
[self.raptor_pack.retriever, self.bm25_retriever],
similarity_top_k=self.similarity_top_k,
num_queries=1, # set this to 1 to disable query generation
mode="reciprocal_rerank",
use_async=True,
verbose=verbose,
) the filter should be something like: filters=MetadataFilters(
filters=[
ExactMatchFilter(key="namespace", value=self.namespace)
]
), this is being used in the function as: def process_streamlit_fetch_query_results(self, query="", verbose=False):
try:
query_bundle = QueryBundle(query_str=query)
retrived_nodes = self.fusion_retriever.retrieve(query_bundle)
recency_nodes = self.recency_postprocessor.postprocess_nodes(
retrived_nodes, query_bundle=query_bundle
)
rerank_nodes = self.postprocessor.postprocess_nodes(
nodes=recency_nodes, query_bundle=query_bundle
)
px.active_session().url
return rerank_nodes
except Exception as e:
traceback.print_exc()
raise Exception(f"An error occurred retrieving: {e}") |
Beta Was this translation helpful? Give feedback.
-
TypeError: QueryFusionRetriever.init() got an unexpected keyword argument 'filters |
Beta Was this translation helpful? Give feedback.
-
this does not work. Can we do it here in the function? def process_streamlit_fetch_query_results(self, query="", verbose=False):
try:
query_bundle = QueryBundle(query_str=query)
retrived_nodes = self.fusion_retriever.retrieve(query_bundle)
recency_nodes = self.recency_postprocessor.postprocess_nodes(
retrived_nodes, query_bundle=query_bundle
)
rerank_nodes = self.postprocessor.postprocess_nodes(
nodes=recency_nodes, query_bundle=query_bundle
)
px.active_session().url
return rerank_nodes
except Exception as e:
traceback.print_exc()
raise Exception(f"An error occurred retrieving: {e}") |
Beta Was this translation helpful? Give feedback.
-
@dosu what I meant is implement a metadata filter to filter based on this: MetadataFilters(
filters=[
ExactMatchFilter(key="namespace", value=self.namespace)
]
), def process_streamlit_fetch_query_results(self, query="", verbose=False):
try:
query_bundle = QueryBundle(query_str=query)
retrived_nodes = self.fusion_retriever.retrieve(query_bundle)
recency_nodes = self.recency_postprocessor.postprocess_nodes(
retrived_nodes, query_bundle=query_bundle
)
rerank_nodes = self.postprocessor.postprocess_nodes(
nodes=recency_nodes, query_bundle=query_bundle
)
px.active_session().url
return rerank_nodes
except Exception as e:
traceback.print_exc()
raise Exception(f"An error occurred retrieving: {e}") |
Beta Was this translation helpful? Give feedback.
-
I have the following initialisation for my class:L
constructor code:
I also have the following helper functions:
How can I modify it to use raptor ?
@dosu
Beta Was this translation helpful? Give feedback.
All reactions