Add memory/chat history to a prompt with retrieved nodes processed as context #14789
Replies: 1 comment
-
To add memory/chat history into the prompt and use chat history for the retrieval process in your customized query engine and prompt, you can follow these steps:
Here’s how you can modify your code: Step 1: Modify the Prompt to Include Chat HistoryUpdate your prompt to include a placeholder for the chat history: customized_prompt = [
(
"system",
"Return string to the given question using the provided Content and source_name in no particular order as references and use 'source_name' to improve the answer relevance for a given question and also provide citations using ' chunk_id ' to the 'Content' from which you answered the question. "
),
(
"user",
"{chat_history}\n{context_str}"
),
(
"user",
"You are a professional geologist and petroleum engineer that can answer questions based on the content that I have provided. Please respond to this question : ' {query_str} '. Answer should be properly formatted with all necessary special characters like new line, tab, hyperlinks, image links, and bullets. Answer only if you get the information from the content . If you get partial information then return the partial answer only. Always send relevant and correct chunk_ids with the answer fragments. "
)
] Step 2: Update the Query Engine to Handle Chat HistoryModify your class BaseQueryEngine:
def __init__(self, index, llm, similarity_top_k=20, use_rerank=True, rerank_top_n=10, text_qa_template_path="prompts/prompt_1.txt", filters=None):
self.index = index
self.similarity_top_k = similarity_top_k
self.text_qa_template = load_prompt(text_qa_template_path)
self.llm = llm
self.filters = filters
self.use_rerank = use_rerank
self.chat_history = [] # Initialize chat history
if self.use_rerank:
self.reranker = CohereRerank(api_key=cohere_api_key, top_n=rerank_top_n)
def query(self, question):
retriever = self.index.as_retriever(
similarity_top_k=self.similarity_top_k,
filters=self.filters)
# Include chat history in the query
full_query = "\n".join([msg['content'] for msg in self.chat_history]) + "\n" + question
retrieved_nodes = retriever.retrieve(full_query)
if self.use_rerank:
logging.info("Using rerank")
query_bundle = QueryBundle(full_query)
reranked_nodes = self.reranker.postprocess_nodes(retrieved_nodes, query_bundle)
else:
logging.info("Not using rerank")
reranked_nodes = retrieved_nodes
context_str = ""
source_data = {}
for i, node in enumerate(reranked_nodes):
context_str += f"Content: {node.text}. chunk_id: 'chk-{i+1}', source_name: {node.metadata['file_name']}{node.metadata['page_label']}."
source_data[f"chk-{i+1}"] = {
"text": node.text,
"file_name": node.metadata['file_name'],
"page": node.metadata['page_label']
}
context_str = context_str.replace('\n', '')
# Format the prompt with chat history
chat_history_str = "\n".join([f"{msg['role']}: {msg['content']}" for msg in self.chat_history])
fmt_qa_prompt = self.text_qa_template.format(
chat_history=chat_history_str, context_str=context_str, query_str=question
)
response = self.llm.complete(fmt_qa_prompt)
# Update chat history
self.chat_history.append({"role": "user", "content": question})
self.chat_history.append({"role": "assistant", "content": response})
return response, source_data Step 3: Use the Updated Query EngineWhen using the class QueryEngineWithTfidfFilter(BaseQueryEngine):
def __init__(self,
index,
llm,
semantic_weight=0.7,
filter_top_n=5,
similarity_top_k=20,
use_rerank=True,
rerank_top_n=10,
text_qa_template_path="prompts/prompt_1.txt",
file_path_df='metadata_embedding_db.pkl',
embed_model=embed_model
):
super().__init__(
index=index,
similarity_top_k=similarity_top_k,
text_qa_template_path=text_qa_template_path,
llm=llm,
rerank_top_n=rerank_top_n,
filters=None,
use_rerank=use_rerank
)
self.metadata_search_engine = MetadataSearchEngine(
df_path=file_path_df,
embed_model=embed_model
)
self.top_n = filter_top_n
self.semantic_weight = semantic_weight
def query(self, question):
metadata_search = self.metadata_search_engine.filter(
question,
top_n=self.top_n,
tfidf_weight=1-self.semantic_weight,
)
logging.info(f"Metadata filter results: {metadata_search}")
self.filters = MetadataFilters(
filters=[MetadataFilter(key="ReportFileName", value=item) for item in metadata_search],
condition=FilterCondition.OR,
)
return super().query(question) This setup ensures that the chat history is included in the prompt and used during the retrieval process, enhancing the relevance and context of the responses. |
Beta Was this translation helpful? Give feedback.
-
How does Llamaindex at lower level to include memory/chat history in the RAG process? Does chat history also used for the retrieval of new question?
I have the following customized query engine and prompt, can you instruct me how to add memorty/chat history into the prompt? Also, how to use chat history for the retrival?
my customized prompt:
[
(
"system",
"Return string to the given question using the provided Content and source_name in no particular order as references and use 'source_name' to improve the answer relevance for a given question and also provide citations using ' chunk_id ' to the 'Content' from which you answered the question. "
),
(
"user",
"{context_str}"
),
(
"user",
"You are a professional geologist and petroleum engineer that can answer questions based on the content that I have provided. Please respond to this question : ' {query_str} '. Answer should be properly formatted with all necessary special characters like new line, tab, hyperlinks, image links, and bullets. Answer only if you get the information from the content . If you get partial information then return the partial answer only. Always send relevant and correct chunk_ids with the answer fragments. "
)
]
my customized query engine:
class BaseQueryEngine:
def init(self, index, llm, similarity_top_k=20, use_rerank=True, rerank_top_n=10, text_qa_template_path="prompts/prompt_1.txt", filters=None):
self.index = index
self.similarity_top_k = similarity_top_k
self.text_qa_template = load_prompt(text_qa_template_path)
self.llm = llm
self.filters = filters
self.use_rerank = use_rerank
if self.use_rerank:
#self.reranker = SentenceTransformerRerank(model="cross-encoder/ms-marco-MiniLM-L-2-v2", top_n=rerank_top_n)
self.reranker = CohereRerank(api_key=cohere_api_key, top_n=rerank_top_n)
class QueryEngineWithTfidfFilter(BaseQueryEngine):
def init(self,
index,
llm,
semantic_weight=0.7,
filter_top_n=5,
similarity_top_k=20,
use_rerank=True,
rerank_top_n=10,
text_qa_template_path="prompts/prompt_1.txt",
file_path_df='metadata_embedding_db.pkl',
embed_model=embed_model
):
Beta Was this translation helpful? Give feedback.
All reactions