forked from Azure-Samples/rag-postgres-openai-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrag_simple.py
161 lines (148 loc) · 6.37 KB
/
rag_simple.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from collections.abc import AsyncGenerator
from typing import Optional, Union
from openai import AsyncAzureOpenAI, AsyncOpenAI, AsyncStream
from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageParam
from openai_messages_token_helper import build_messages, get_token_limit
from fastapi_app.api_models import (
AIChatRoles,
Message,
RAGContext,
RetrievalResponse,
RetrievalResponseDelta,
ThoughtStep,
)
from fastapi_app.postgres_models import Item
from fastapi_app.postgres_searcher import PostgresSearcher
from fastapi_app.rag_base import ChatParams, RAGChatBase
class SimpleRAGChat(RAGChatBase):
def __init__(
self,
*,
searcher: PostgresSearcher,
openai_chat_client: Union[AsyncOpenAI, AsyncAzureOpenAI],
chat_model: str,
chat_deployment: Optional[str], # Not needed for non-Azure OpenAI
):
self.searcher = searcher
self.openai_chat_client = openai_chat_client
self.chat_model = chat_model
self.chat_deployment = chat_deployment
self.chat_token_limit = get_token_limit(chat_model, default_to_minimum=True)
async def prepare_context(
self, chat_params: ChatParams
) -> tuple[list[ChatCompletionMessageParam], list[Item], list[ThoughtStep]]:
"""Retrieve relevant rows from the database and build a context for the chat model."""
# Retrieve relevant rows from the database
results = await self.searcher.search_and_embed(
chat_params.original_user_query,
top=chat_params.top,
enable_vector_search=chat_params.enable_vector_search,
enable_text_search=chat_params.enable_text_search,
)
sources_content = [f"[{(item.id)}]:{item.to_str_for_rag()}\n\n" for item in results]
content = "\n".join(sources_content)
# Generate a contextual and content specific answer using the search results and chat history
contextual_messages: list[ChatCompletionMessageParam] = build_messages(
model=self.chat_model,
system_prompt=chat_params.prompt_template,
new_user_content=chat_params.original_user_query + "\n\nSources:\n" + content,
past_messages=chat_params.past_messages,
max_tokens=self.chat_token_limit - chat_params.response_token_limit,
fallback_to_default=True,
)
thoughts = [
ThoughtStep(
title="Search query for database",
description=chat_params.original_user_query,
props={
"top": chat_params.top,
"vector_search": chat_params.enable_vector_search,
"text_search": chat_params.enable_text_search,
},
),
ThoughtStep(
title="Search results",
description=[result.to_dict() for result in results],
),
]
return contextual_messages, results, thoughts
async def answer(
self,
chat_params: ChatParams,
contextual_messages: list[ChatCompletionMessageParam],
results: list[Item],
earlier_thoughts: list[ThoughtStep],
) -> RetrievalResponse:
chat_completion_response: ChatCompletion = await self.openai_chat_client.chat.completions.create(
# Azure OpenAI takes the deployment name as the model name
model=self.chat_deployment if self.chat_deployment else self.chat_model,
messages=contextual_messages,
temperature=chat_params.temperature,
max_tokens=chat_params.response_token_limit,
n=1,
stream=False,
seed=chat_params.seed,
)
return RetrievalResponse(
message=Message(
content=str(chat_completion_response.choices[0].message.content), role=AIChatRoles.ASSISTANT
),
context=RAGContext(
data_points={item.id: item.to_dict() for item in results},
thoughts=earlier_thoughts
+ [
ThoughtStep(
title="Prompt to generate answer",
description=contextual_messages,
props=(
{"model": self.chat_model, "deployment": self.chat_deployment}
if self.chat_deployment
else {"model": self.chat_model}
),
),
],
),
)
async def answer_stream(
self,
chat_params: ChatParams,
contextual_messages: list[ChatCompletionMessageParam],
results: list[Item],
earlier_thoughts: list[ThoughtStep],
) -> AsyncGenerator[RetrievalResponseDelta, None]:
chat_completion_async_stream: AsyncStream[
ChatCompletionChunk
] = await self.openai_chat_client.chat.completions.create(
# Azure OpenAI takes the deployment name as the model name
model=self.chat_deployment if self.chat_deployment else self.chat_model,
messages=contextual_messages,
temperature=chat_params.temperature,
max_tokens=chat_params.response_token_limit,
n=1,
stream=True,
seed=chat_params.seed,
)
yield RetrievalResponseDelta(
context=RAGContext(
data_points={item.id: item.to_dict() for item in results},
thoughts=earlier_thoughts
+ [
ThoughtStep(
title="Prompt to generate answer",
description=contextual_messages,
props=(
{"model": self.chat_model, "deployment": self.chat_deployment}
if self.chat_deployment
else {"model": self.chat_model}
),
),
],
),
)
async for response_chunk in chat_completion_async_stream:
# first response has empty choices and last response has empty content
if response_chunk.choices and response_chunk.choices[0].delta.content:
yield RetrievalResponseDelta(
delta=Message(content=str(response_chunk.choices[0].delta.content), role=AIChatRoles.ASSISTANT)
)
return