Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable results to be grounded in Bing Search Snippets #2296

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions app/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,14 @@
from approaches.promptmanager import PromptyManager
from approaches.retrievethenread import RetrieveThenReadApproach
from approaches.retrievethenreadvision import RetrieveThenReadVisionApproach
from bing_client import AsyncBingClient
from chat_history.cosmosdb import chat_history_cosmosdb_bp
from config import (
CONFIG_ASK_APPROACH,
CONFIG_ASK_VISION_APPROACH,
CONFIG_AUTH_CLIENT,
CONFIG_BING_SEARCH_CLIENT,
CONFIG_BING_SEARCH_ENABLED,
CONFIG_BLOB_CONTAINER_CLIENT,
CONFIG_CHAT_APPROACH,
CONFIG_CHAT_HISTORY_BROWSER_ENABLED,
Expand Down Expand Up @@ -299,6 +302,7 @@ def config():
"showSpeechOutputAzure": current_app.config[CONFIG_SPEECH_OUTPUT_AZURE_ENABLED],
"showChatHistoryBrowser": current_app.config[CONFIG_CHAT_HISTORY_BROWSER_ENABLED],
"showChatHistoryCosmos": current_app.config[CONFIG_CHAT_HISTORY_COSMOS_ENABLED],
"showBingSearchOption": current_app.config[CONFIG_BING_SEARCH_ENABLED],
}
)

Expand Down Expand Up @@ -466,6 +470,9 @@ async def setup_clients():
USE_SPEECH_OUTPUT_AZURE = os.getenv("USE_SPEECH_OUTPUT_AZURE", "").lower() == "true"
USE_CHAT_HISTORY_BROWSER = os.getenv("USE_CHAT_HISTORY_BROWSER", "").lower() == "true"
USE_CHAT_HISTORY_COSMOS = os.getenv("USE_CHAT_HISTORY_COSMOS", "").lower() == "true"
USE_BING_SEARCH = os.getenv("USE_BING_SEARCH", "").lower() == "true"
BING_SEARCH_API_KEY = os.getenv("BING_SEARCH_API_KEY")
BING_SEARCH_ENDPOINT = os.getenv("BING_SEARCH_ENDPOINT")

# WEBSITE_HOSTNAME is always set by App Service, RUNNING_IN_PRODUCTION is set in main.bicep
RUNNING_ON_AZURE = os.getenv("WEBSITE_HOSTNAME") is not None or os.getenv("RUNNING_IN_PRODUCTION") is not None
Expand Down Expand Up @@ -588,6 +595,19 @@ async def setup_clients():
# Wait until token is needed to fetch for the first time
current_app.config[CONFIG_SPEECH_SERVICE_TOKEN] = None

if USE_BING_SEARCH:
current_app.logger.info("USE_BING_SEARCH is true, setting up Bing search client")
if not BING_SEARCH_API_KEY:
raise ValueError("BING_SEARCH_API_KEY must be set when USE_BING_SEARCH is true")
if BING_SEARCH_ENDPOINT:
bing_search_client = AsyncBingClient(BING_SEARCH_API_KEY, BING_SEARCH_ENDPOINT)
else:
bing_search_client = AsyncBingClient(BING_SEARCH_API_KEY)
current_app.config[CONFIG_BING_SEARCH_CLIENT] = bing_search_client
else:
current_app.logger.info("USE_BING_SEARCH is false, Bing search client not set up")
bing_search_client = None

if OPENAI_HOST.startswith("azure"):
if OPENAI_HOST == "azure_custom":
current_app.logger.info("OPENAI_HOST is azure_custom, setting up Azure OpenAI custom client")
Expand Down Expand Up @@ -642,6 +662,7 @@ async def setup_clients():
current_app.config[CONFIG_SPEECH_OUTPUT_AZURE_ENABLED] = USE_SPEECH_OUTPUT_AZURE
current_app.config[CONFIG_CHAT_HISTORY_BROWSER_ENABLED] = USE_CHAT_HISTORY_BROWSER
current_app.config[CONFIG_CHAT_HISTORY_COSMOS_ENABLED] = USE_CHAT_HISTORY_COSMOS
current_app.config[CONFIG_BING_SEARCH_ENABLED] = USE_BING_SEARCH

prompt_manager = PromptyManager()

Expand Down Expand Up @@ -678,6 +699,7 @@ async def setup_clients():
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
query_speller=AZURE_SEARCH_QUERY_SPELLER,
prompt_manager=prompt_manager,
bing_client=bing_search_client,
)

if USE_GPT4V:
Expand Down Expand Up @@ -724,6 +746,7 @@ async def setup_clients():
query_language=AZURE_SEARCH_QUERY_LANGUAGE,
query_speller=AZURE_SEARCH_QUERY_SPELLER,
prompt_manager=prompt_manager,
bing_client=bing_search_client,
)


Expand Down
4 changes: 4 additions & 0 deletions app/backend/approaches/approach.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from openai.types.chat import ChatCompletionMessageParam

from approaches.promptmanager import PromptManager
from bing_client import WebPage
from core.authentication import AuthenticationHelper


Expand Down Expand Up @@ -236,6 +237,9 @@ def get_citation(self, sourcepage: str, use_image_citation: bool) -> str:

return sourcepage

def get_links(self, webpages: list[WebPage]) -> list[str]:
return [f"{page.id}: {page.snippet}" for page in webpages]

async def compute_text_embedding(self, q: str):
SUPPORTED_DIMENSIONS_MODEL = {
"text-embedding-ada-002": False,
Expand Down
118 changes: 78 additions & 40 deletions app/backend/approaches/chatreadretrieveread.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@
)
from openai_messages_token_helper import build_messages, get_token_limit

from approaches.approach import ThoughtStep
from approaches.approach import Document, ThoughtStep
from approaches.chatapproach import ChatApproach
from approaches.promptmanager import PromptManager
from bing_client import AsyncBingClient, WebPage
from core.authentication import AuthenticationHelper


Expand All @@ -39,10 +40,12 @@ def __init__(
content_field: str,
query_language: str,
query_speller: str,
prompt_manager: PromptManager
prompt_manager: PromptManager,
bing_client: Optional[AsyncBingClient] = None,
):
self.search_client = search_client
self.openai_client = openai_client
self.bing_client = bing_client
self.auth_helper = auth_helper
self.chatgpt_model = chatgpt_model
self.chatgpt_deployment = chatgpt_deployment
Expand All @@ -58,6 +61,9 @@ def __init__(
self.query_rewrite_prompt = self.prompt_manager.load_prompt("chat_query_rewrite.prompty")
self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty")
self.bing_answer_prompt = self.prompt_manager.load_prompt("chat_bing_answer_question.prompty")
self.bing_ground_rewrite_prompt = self.prompt_manager.load_prompt("chat_bing_ground_rewrite.prompty")
self.bing_ground_rewrite_tools = self.prompt_manager.load_tools("chat_bing_ground_rewrite_tools.json")

@overload
async def run_until_final_call(
Expand Down Expand Up @@ -89,6 +95,7 @@ async def run_until_final_call(
use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
use_semantic_ranker = True if overrides.get("semantic_ranker") else False
use_semantic_captions = True if overrides.get("semantic_captions") else False
use_bing_search = True if overrides.get("use_bing_search") else False
top = overrides.get("top", 3)
minimum_search_score = overrides.get("minimum_search_score", 0.0)
minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0)
Expand All @@ -98,36 +105,45 @@ async def run_until_final_call(
if not isinstance(original_user_query, str):
raise ValueError("The most recent message content must be a string.")

# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
async def keyword_rewrite(rendered_prompt, tools):
query_response_token_limit = 100
query_messages = build_messages(
model=self.chatgpt_model,
system_prompt=rendered_prompt.system_content,
few_shots=rendered_prompt.few_shot_messages,
past_messages=rendered_prompt.past_messages,
new_user_content=rendered_prompt.new_user_content,
tools=tools,
max_tokens=self.chatgpt_token_limit - query_response_token_limit,
fallback_to_default=self.ALLOW_NON_GPT_MODELS,
)

chat_completion: ChatCompletion = await self.openai_client.chat.completions.create(
messages=query_messages, # type: ignore
# Azure OpenAI takes the deployment name as the model name
model=self.chatgpt_deployment if self.chatgpt_deployment else self.chatgpt_model,
temperature=0.0, # Minimize creativity for search query generation
max_tokens=query_response_token_limit, # Setting too low risks malformed JSON, setting too high may affect performance
n=1,
tools=tools,
seed=seed,
)

return query_messages, self.get_search_query(chat_completion, original_user_query)

rendered_query_prompt = self.prompt_manager.render_prompt(
self.query_rewrite_prompt, {"user_query": original_user_query, "past_messages": messages[:-1]}
)
tools: List[ChatCompletionToolParam] = self.query_rewrite_tools

# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
query_response_token_limit = 100
query_messages = build_messages(
model=self.chatgpt_model,
system_prompt=rendered_query_prompt.system_content,
few_shots=rendered_query_prompt.few_shot_messages,
past_messages=rendered_query_prompt.past_messages,
new_user_content=rendered_query_prompt.new_user_content,
tools=tools,
max_tokens=self.chatgpt_token_limit - query_response_token_limit,
fallback_to_default=self.ALLOW_NON_GPT_MODELS,
)

chat_completion: ChatCompletion = await self.openai_client.chat.completions.create(
messages=query_messages, # type: ignore
# Azure OpenAI takes the deployment name as the model name
model=self.chatgpt_deployment if self.chatgpt_deployment else self.chatgpt_model,
temperature=0.0, # Minimize creativity for search query generation
max_tokens=query_response_token_limit, # Setting too low risks malformed JSON, setting too high may affect performance
n=1,
tools=tools,
seed=seed,
)

query_text = self.get_search_query(chat_completion, original_user_query)
query_messages, query_text = await keyword_rewrite(rendered_query_prompt, tools)
if use_bing_search and self.bing_client:
bing_search_prompt = self.prompt_manager.render_prompt(
self.bing_ground_rewrite_prompt,
{"user_query": original_user_query, "past_messages": messages[:-1]},
)
_, bing_query_text = await keyword_rewrite(bing_search_prompt, self.bing_ground_rewrite_tools)
bing_results = await self.bing_client.search(bing_query_text, lang=self.query_language)

# STEP 2: Retrieve relevant documents from the search index with the GPT optimized query

Expand All @@ -136,7 +152,7 @@ async def run_until_final_call(
if use_vector_search:
vectors.append(await self.compute_text_embedding(query_text))

results = await self.search(
results: list[Document] = await self.search(
top,
query_text,
filter,
Expand All @@ -151,16 +167,33 @@ async def run_until_final_call(

# STEP 3: Generate a contextual and content specific answer using the search results and chat history
text_sources = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
rendered_answer_prompt = self.prompt_manager.render_prompt(
self.answer_prompt,
self.get_system_prompt_variables(overrides.get("prompt_template"))
| {
"include_follow_up_questions": bool(overrides.get("suggest_followup_questions")),
"past_messages": messages[:-1],
"user_query": original_user_query,
"text_sources": text_sources,
},
)
web_sources: list[WebPage] = []
if use_bing_search and bing_results.totalEstimatedMatches > 0:
web_sources = bing_results.value[:2]
web_sources_text = self.get_links(web_sources)

rendered_answer_prompt = self.prompt_manager.render_prompt(
self.bing_answer_prompt,
self.get_system_prompt_variables(overrides.get("prompt_template"))
| {
"include_follow_up_questions": bool(overrides.get("suggest_followup_questions")),
"past_messages": messages[:-1],
"user_query": original_user_query,
"text_sources": text_sources,
"web_search_snippets": web_sources_text,
},
)
else:
rendered_answer_prompt = self.prompt_manager.render_prompt(
self.answer_prompt,
self.get_system_prompt_variables(overrides.get("prompt_template"))
| {
"include_follow_up_questions": bool(overrides.get("suggest_followup_questions")),
"past_messages": messages[:-1],
"user_query": original_user_query,
"text_sources": text_sources,
},
)

response_token_limit = 1024
messages = build_messages(
Expand All @@ -173,7 +206,7 @@ async def run_until_final_call(
)

extra_info = {
"data_points": {"text": text_sources},
"data_points": {"text": text_sources, "web_search": [hit.model_dump() for hit in web_sources]},
"thoughts": [
ThoughtStep(
"Prompt to generate search query",
Expand All @@ -184,6 +217,11 @@ async def run_until_final_call(
else {"model": self.chatgpt_model}
),
),
ThoughtStep("Bing search query", bing_query_text if use_bing_search else None, {}),
ThoughtStep(
"Bing search results",
[result.snippet for result in bing_results.value[:2]] if use_bing_search else None,
),
ThoughtStep(
"Search using generated search query",
query_text,
Expand Down
4 changes: 4 additions & 0 deletions app/backend/approaches/chatreadretrievereadvision.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from approaches.approach import ThoughtStep
from approaches.chatapproach import ChatApproach
from approaches.promptmanager import PromptManager
from bing_client import AsyncBingClient
from core.authentication import AuthenticationHelper
from core.imageshelper import fetch_image

Expand Down Expand Up @@ -46,10 +47,12 @@ def __init__(
vision_endpoint: str,
vision_token_provider: Callable[[], Awaitable[str]],
prompt_manager: PromptManager,
bing_client: Optional[AsyncBingClient] = None,
):
self.search_client = search_client
self.blob_container_client = blob_container_client
self.openai_client = openai_client
self.bing_client = bing_client
self.auth_helper = auth_helper
self.chatgpt_model = chatgpt_model
self.chatgpt_deployment = chatgpt_deployment
Expand Down Expand Up @@ -82,6 +85,7 @@ async def run_until_final_call(
use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
use_semantic_ranker = True if overrides.get("semantic_ranker") else False
use_semantic_captions = True if overrides.get("semantic_captions") else False
# use_bing_search = True if overrides.get("use_bing_search") else False
top = overrides.get("top", 3)
minimum_search_score = overrides.get("minimum_search_score", 0.0)
minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0)
Expand Down
Loading
Loading