Azure-Samples · tonybaloney · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025 · Jan 23, 2025
diff --git a/app/backend/app.py b/app/backend/app.py
@@ -54,11 +54,14 @@
 from approaches.promptmanager import PromptyManager
 from approaches.retrievethenread import RetrieveThenReadApproach
 from approaches.retrievethenreadvision import RetrieveThenReadVisionApproach
+from bing_client import AsyncBingClient
 from chat_history.cosmosdb import chat_history_cosmosdb_bp
 from config import (
     CONFIG_ASK_APPROACH,
     CONFIG_ASK_VISION_APPROACH,
     CONFIG_AUTH_CLIENT,
+    CONFIG_BING_SEARCH_CLIENT,
+    CONFIG_BING_SEARCH_ENABLED,
     CONFIG_BLOB_CONTAINER_CLIENT,
     CONFIG_CHAT_APPROACH,
     CONFIG_CHAT_HISTORY_BROWSER_ENABLED,
@@ -299,6 +302,7 @@ def config():
             "showSpeechOutputAzure": current_app.config[CONFIG_SPEECH_OUTPUT_AZURE_ENABLED],
             "showChatHistoryBrowser": current_app.config[CONFIG_CHAT_HISTORY_BROWSER_ENABLED],
             "showChatHistoryCosmos": current_app.config[CONFIG_CHAT_HISTORY_COSMOS_ENABLED],
+            "showBingSearchOption": current_app.config[CONFIG_BING_SEARCH_ENABLED],
         }
     )
 
@@ -466,6 +470,9 @@ async def setup_clients():
     USE_SPEECH_OUTPUT_AZURE = os.getenv("USE_SPEECH_OUTPUT_AZURE", "").lower() == "true"
     USE_CHAT_HISTORY_BROWSER = os.getenv("USE_CHAT_HISTORY_BROWSER", "").lower() == "true"
     USE_CHAT_HISTORY_COSMOS = os.getenv("USE_CHAT_HISTORY_COSMOS", "").lower() == "true"
+    USE_BING_SEARCH = os.getenv("USE_BING_SEARCH", "").lower() == "true"
+    BING_SEARCH_API_KEY = os.getenv("BING_SEARCH_API_KEY")
+    BING_SEARCH_ENDPOINT = os.getenv("BING_SEARCH_ENDPOINT")
 
     # WEBSITE_HOSTNAME is always set by App Service, RUNNING_IN_PRODUCTION is set in main.bicep
     RUNNING_ON_AZURE = os.getenv("WEBSITE_HOSTNAME") is not None or os.getenv("RUNNING_IN_PRODUCTION") is not None
@@ -588,6 +595,19 @@ async def setup_clients():
         # Wait until token is needed to fetch for the first time
         current_app.config[CONFIG_SPEECH_SERVICE_TOKEN] = None
 
+    if USE_BING_SEARCH:
+        current_app.logger.info("USE_BING_SEARCH is true, setting up Bing search client")
+        if not BING_SEARCH_API_KEY:
+            raise ValueError("BING_SEARCH_API_KEY must be set when USE_BING_SEARCH is true")
+        if BING_SEARCH_ENDPOINT:
+            bing_search_client = AsyncBingClient(BING_SEARCH_API_KEY, BING_SEARCH_ENDPOINT)
+        else:
+            bing_search_client = AsyncBingClient(BING_SEARCH_API_KEY)
+        current_app.config[CONFIG_BING_SEARCH_CLIENT] = bing_search_client
+    else:
+        current_app.logger.info("USE_BING_SEARCH is false, Bing search client not set up")
+        bing_search_client = None
+
     if OPENAI_HOST.startswith("azure"):
         if OPENAI_HOST == "azure_custom":
             current_app.logger.info("OPENAI_HOST is azure_custom, setting up Azure OpenAI custom client")
@@ -642,6 +662,7 @@ async def setup_clients():
     current_app.config[CONFIG_SPEECH_OUTPUT_AZURE_ENABLED] = USE_SPEECH_OUTPUT_AZURE
     current_app.config[CONFIG_CHAT_HISTORY_BROWSER_ENABLED] = USE_CHAT_HISTORY_BROWSER
     current_app.config[CONFIG_CHAT_HISTORY_COSMOS_ENABLED] = USE_CHAT_HISTORY_COSMOS
+    current_app.config[CONFIG_BING_SEARCH_ENABLED] = USE_BING_SEARCH
 
     prompt_manager = PromptyManager()
 
@@ -678,6 +699,7 @@ async def setup_clients():
         query_language=AZURE_SEARCH_QUERY_LANGUAGE,
         query_speller=AZURE_SEARCH_QUERY_SPELLER,
         prompt_manager=prompt_manager,
+        bing_client=bing_search_client,
     )
 
     if USE_GPT4V:
@@ -724,6 +746,7 @@ async def setup_clients():
             query_language=AZURE_SEARCH_QUERY_LANGUAGE,
             query_speller=AZURE_SEARCH_QUERY_SPELLER,
             prompt_manager=prompt_manager,
+            bing_client=bing_search_client,
         )
 
 

diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py
@@ -25,6 +25,7 @@
 from openai.types.chat import ChatCompletionMessageParam
 
 from approaches.promptmanager import PromptManager
+from bing_client import WebPage
 from core.authentication import AuthenticationHelper
 
 
@@ -236,6 +237,9 @@ def get_citation(self, sourcepage: str, use_image_citation: bool) -> str:
 
             return sourcepage
 
+    def get_links(self, webpages: list[WebPage]) -> list[str]:
+        return [f"{page.id}: {page.snippet}" for page in webpages]
+
     async def compute_text_embedding(self, q: str):
         SUPPORTED_DIMENSIONS_MODEL = {
             "text-embedding-ada-002": False,

diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py
@@ -11,9 +11,10 @@
 )
 from openai_messages_token_helper import build_messages, get_token_limit
 
-from approaches.approach import ThoughtStep
+from approaches.approach import Document, ThoughtStep
 from approaches.chatapproach import ChatApproach
 from approaches.promptmanager import PromptManager
+from bing_client import AsyncBingClient, WebPage
 from core.authentication import AuthenticationHelper
 
 
@@ -39,10 +40,12 @@ def __init__(
         content_field: str,
         query_language: str,
         query_speller: str,
-        prompt_manager: PromptManager
+        prompt_manager: PromptManager,
+        bing_client: Optional[AsyncBingClient] = None,
     ):
         self.search_client = search_client
         self.openai_client = openai_client
+        self.bing_client = bing_client
         self.auth_helper = auth_helper
         self.chatgpt_model = chatgpt_model
         self.chatgpt_deployment = chatgpt_deployment
@@ -58,6 +61,9 @@ def __init__(
         self.query_rewrite_prompt = self.prompt_manager.load_prompt("chat_query_rewrite.prompty")
         self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
         self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty")
+        self.bing_answer_prompt = self.prompt_manager.load_prompt("chat_bing_answer_question.prompty")
+        self.bing_ground_rewrite_prompt = self.prompt_manager.load_prompt("chat_bing_ground_rewrite.prompty")
+        self.bing_ground_rewrite_tools = self.prompt_manager.load_tools("chat_bing_ground_rewrite_tools.json")
 
     @overload
     async def run_until_final_call(
@@ -89,6 +95,7 @@ async def run_until_final_call(
         use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
         use_semantic_ranker = True if overrides.get("semantic_ranker") else False
         use_semantic_captions = True if overrides.get("semantic_captions") else False
+        use_bing_search = True if overrides.get("use_bing_search") else False
         top = overrides.get("top", 3)
         minimum_search_score = overrides.get("minimum_search_score", 0.0)
         minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0)
@@ -98,36 +105,45 @@ async def run_until_final_call(
         if not isinstance(original_user_query, str):
             raise ValueError("The most recent message content must be a string.")
 
+        # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
+        async def keyword_rewrite(rendered_prompt, tools):
+            query_response_token_limit = 100
+            query_messages = build_messages(
+                model=self.chatgpt_model,
+                system_prompt=rendered_prompt.system_content,
+                few_shots=rendered_prompt.few_shot_messages,
+                past_messages=rendered_prompt.past_messages,
+                new_user_content=rendered_prompt.new_user_content,
+                tools=tools,
+                max_tokens=self.chatgpt_token_limit - query_response_token_limit,
+                fallback_to_default=self.ALLOW_NON_GPT_MODELS,
+            )
+
+            chat_completion: ChatCompletion = await self.openai_client.chat.completions.create(
+                messages=query_messages,  # type: ignore
+                # Azure OpenAI takes the deployment name as the model name
+                model=self.chatgpt_deployment if self.chatgpt_deployment else self.chatgpt_model,
+                temperature=0.0,  # Minimize creativity for search query generation
+                max_tokens=query_response_token_limit,  # Setting too low risks malformed JSON, setting too high may affect performance
+                n=1,
+                tools=tools,
+                seed=seed,
+            )
+
+            return query_messages, self.get_search_query(chat_completion, original_user_query)
+
         rendered_query_prompt = self.prompt_manager.render_prompt(
             self.query_rewrite_prompt, {"user_query": original_user_query, "past_messages": messages[:-1]}
         )
         tools: List[ChatCompletionToolParam] = self.query_rewrite_tools
-
-        # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
-        query_response_token_limit = 100
-        query_messages = build_messages(
-            model=self.chatgpt_model,
-            system_prompt=rendered_query_prompt.system_content,
-            few_shots=rendered_query_prompt.few_shot_messages,
-            past_messages=rendered_query_prompt.past_messages,
-            new_user_content=rendered_query_prompt.new_user_content,
-            tools=tools,
-            max_tokens=self.chatgpt_token_limit - query_response_token_limit,
-            fallback_to_default=self.ALLOW_NON_GPT_MODELS,
-        )
-
-        chat_completion: ChatCompletion = await self.openai_client.chat.completions.create(
-            messages=query_messages,  # type: ignore
-            # Azure OpenAI takes the deployment name as the model name
-            model=self.chatgpt_deployment if self.chatgpt_deployment else self.chatgpt_model,
-            temperature=0.0,  # Minimize creativity for search query generation
-            max_tokens=query_response_token_limit,  # Setting too low risks malformed JSON, setting too high may affect performance
-            n=1,
-            tools=tools,
-            seed=seed,
-        )
-
-        query_text = self.get_search_query(chat_completion, original_user_query)
+        query_messages, query_text = await keyword_rewrite(rendered_query_prompt, tools)
+        if use_bing_search and self.bing_client:
+            bing_search_prompt = self.prompt_manager.render_prompt(
+                self.bing_ground_rewrite_prompt,
+                {"user_query": original_user_query, "past_messages": messages[:-1]},
+            )
+            _, bing_query_text = await keyword_rewrite(bing_search_prompt, self.bing_ground_rewrite_tools)
+            bing_results = await self.bing_client.search(bing_query_text, lang=self.query_language)
 
         # STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
 
@@ -136,7 +152,7 @@ async def run_until_final_call(
         if use_vector_search:
             vectors.append(await self.compute_text_embedding(query_text))
 
-        results = await self.search(
+        results: list[Document] = await self.search(
             top,
             query_text,
             filter,
@@ -151,16 +167,33 @@ async def run_until_final_call(
 
         # STEP 3: Generate a contextual and content specific answer using the search results and chat history
         text_sources = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
-        rendered_answer_prompt = self.prompt_manager.render_prompt(
-            self.answer_prompt,
-            self.get_system_prompt_variables(overrides.get("prompt_template"))
-            | {
-                "include_follow_up_questions": bool(overrides.get("suggest_followup_questions")),
-                "past_messages": messages[:-1],
-                "user_query": original_user_query,
-                "text_sources": text_sources,
-            },
-        )
+        web_sources: list[WebPage] = []
+        if use_bing_search and bing_results.totalEstimatedMatches > 0:
+            web_sources = bing_results.value[:2]
+            web_sources_text = self.get_links(web_sources)
+
+            rendered_answer_prompt = self.prompt_manager.render_prompt(
+                self.bing_answer_prompt,
+                self.get_system_prompt_variables(overrides.get("prompt_template"))
+                | {
+                    "include_follow_up_questions": bool(overrides.get("suggest_followup_questions")),
+                    "past_messages": messages[:-1],
+                    "user_query": original_user_query,
+                    "text_sources": text_sources,
+                    "web_search_snippets": web_sources_text,
+                },
+            )
+        else:
+            rendered_answer_prompt = self.prompt_manager.render_prompt(
+                self.answer_prompt,
+                self.get_system_prompt_variables(overrides.get("prompt_template"))
+                | {
+                    "include_follow_up_questions": bool(overrides.get("suggest_followup_questions")),
+                    "past_messages": messages[:-1],
+                    "user_query": original_user_query,
+                    "text_sources": text_sources,
+                },
+            )
 
         response_token_limit = 1024
         messages = build_messages(
@@ -173,7 +206,7 @@ async def run_until_final_call(
         )
 
         extra_info = {
-            "data_points": {"text": text_sources},
+            "data_points": {"text": text_sources, "web_search": [hit.model_dump() for hit in web_sources]},
             "thoughts": [
                 ThoughtStep(
                     "Prompt to generate search query",
@@ -184,6 +217,11 @@ async def run_until_final_call(
                         else {"model": self.chatgpt_model}
                     ),
                 ),
+                ThoughtStep("Bing search query", bing_query_text if use_bing_search else None, {}),
+                ThoughtStep(
+                    "Bing search results",
+                    [result.snippet for result in bing_results.value[:2]] if use_bing_search else None,
+                ),
                 ThoughtStep(
                     "Search using generated search query",
                     query_text,

diff --git a/app/backend/approaches/chatreadretrievereadvision.py b/app/backend/approaches/chatreadretrievereadvision.py
@@ -14,6 +14,7 @@
 from approaches.approach import ThoughtStep
 from approaches.chatapproach import ChatApproach
 from approaches.promptmanager import PromptManager
+from bing_client import AsyncBingClient
 from core.authentication import AuthenticationHelper
 from core.imageshelper import fetch_image
 
@@ -46,10 +47,12 @@ def __init__(
         vision_endpoint: str,
         vision_token_provider: Callable[[], Awaitable[str]],
         prompt_manager: PromptManager,
+        bing_client: Optional[AsyncBingClient] = None,
     ):
         self.search_client = search_client
         self.blob_container_client = blob_container_client
         self.openai_client = openai_client
+        self.bing_client = bing_client
         self.auth_helper = auth_helper
         self.chatgpt_model = chatgpt_model
         self.chatgpt_deployment = chatgpt_deployment
@@ -82,6 +85,7 @@ async def run_until_final_call(
         use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
         use_semantic_ranker = True if overrides.get("semantic_ranker") else False
         use_semantic_captions = True if overrides.get("semantic_captions") else False
+        # use_bing_search = True if overrides.get("use_bing_search") else False
         top = overrides.get("top", 3)
         minimum_search_score = overrides.get("minimum_search_score", 0.0)
         minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0)