feat(api): implement /ask api

Azure-Samples · Aug 31, 2023 · 64a2d81 · 64a2d81
1 parent 2d8d5e5
commit 64a2d81
Show file tree

Hide file tree

Showing 8 changed files with 202 additions and 12 deletions.
diff --git a/packages/api/src/lib/approaches/approach.ts b/packages/api/src/lib/approaches/approach.ts
@@ -1,16 +1,15 @@
 import { HistoryMessage } from '../message';
 
-export interface ChatQueryResponse {
+export interface ApproachResponse {
   data_points: string[];
   answer: string;
   thoughts: string;
 }
 
 export interface ChatApproach {
-  run(history: HistoryMessage[], overrides: Record<string, any>): Promise<ChatQueryResponse>;
+  run(history: HistoryMessage[], overrides: Record<string, any>): Promise<ApproachResponse>;
 }
 
-// TODO: improve typing
 export interface AskApproach {
-  run(q: string, overrides: Record<string, any>): Promise<any>;
+  run(q: string, overrides: Record<string, any>): Promise<ApproachResponse>;
 }
diff --git a/packages/api/src/lib/approaches/ask-retrieve-then-read.ts b/packages/api/src/lib/approaches/ask-retrieve-then-read.ts
@@ -0,0 +1,137 @@
+import { SearchClient } from '@azure/search-documents';
+import { OpenAiClients } from '../../plugins/openai.js';
+import { removeNewlines } from '../util/index.js';
+import { MessageBuilder } from '../message-builder.js';
+import { AskApproach } from './approach.js';
+import { messagesToString } from '../message.js';
+
+const SYSTEM_CHAT_TEMPLATE = `You are an intelligent assistant helping Contoso Inc employees with their healthcare plan questions and employee handbook questions.
+Use 'you' to refer to the individual asking the questions even if they ask with 'I'.
+Answer the following question using only the data provided in the sources below.
+For tabular information return it as an html table. Do not return markdown format.
+Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response.
+If you cannot answer using the sources below, say you don't know. Use below example to answer`;
+
+// shots/sample conversation
+const QUESTION = `
+'What is the deductible for the employee plan for a visit to Overlake in Bellevue?'
+
+Sources:
+info1.txt: deductibles depend on whether you are in-network or out-of-network. In-network deductibles are $500 for employee and $1000 for family. Out-of-network deductibles are $1000 for employee and $2000 for family.
+info2.pdf: Overlake is in-network for the employee plan.
+info3.pdf: Overlake is the name of the area that includes a park and ride near Bellevue.
+info4.pdf: In-network institutions include Overlake, Swedish and others in the region
+`;
+
+const ANSWER = `In-network deductibles are $500 for employee and $1000 for family [info1.txt] and Overlake is in-network for the employee plan [info2.pdf][info4.pdf].`;
+
+/**
+ * Simple retrieve-then-read implementation, using the Cognitive Search and OpenAI APIs directly.
+ * It first retrieves top documents from search, then constructs a prompt with them, and then uses
+ * OpenAI to generate an completion (answer) with that prompt.
+ */
+export class AskRetrieveThenReadApproach implements AskApproach {
+  constructor(
+    private search: SearchClient<any>,
+    private openai: OpenAiClients,
+    private chatGptModel: string,
+    private sourcePageField: string,
+    private contentField: string,
+  ) {}
+
+  async run(q: string, overrides: Record<string, any>): Promise<any> {
+    const hasText = ['text', 'hybrid', undefined].includes(overrides?.retrieval_mode);
+    // const hasVectors = ['vectors', 'hybrid', undefined].includes(overrides?.retrieval_mode);
+    const useSemanticCaption = Boolean(overrides?.use_semantic_caption) && hasText;
+    const top = overrides?.top ? Number(overrides?.top) : 3;
+    const excludeCategory: string | undefined = overrides?.exclude_category;
+    const filter = excludeCategory ? `category ne '${excludeCategory.replace("'", "''")}'` : undefined;
+
+    // If retrieval mode includes vectors, compute an embedding for the query
+    // let queryVector;
+    // if (hasVectors) {
+    //   let openAiEmbeddings = await this.openai.getEmbeddings();
+    //   const result = await openAiEmbeddings.create({
+    //     model: 'text-embedding-ada-002',
+    //     input: queryText!,
+    //   });
+    //   queryVector = result.data[0].embedding;
+    // }
+
+    // Only keep the text query if the retrieval mode uses text, otherwise drop it
+    const queryText = hasText ? q : '';
+
+    // Use semantic L2 reranker if requested and if retrieval mode is text or hybrid (vectors + text)
+    let searchResults;
+    // TODO: JS SDK is missing features: https://github.com/anfibiacreativa/azure-search-open-ai-javascript/issues/21
+    // if (overrides?.semantic_ranker && hasText) {
+    //   searchResults = await this.search.search(queryText, {
+    //     filter,
+    //     queryType: 'semantic',
+    //     queryLanguage: 'en-us',
+    //     querySpeller: 'lexicon',
+    //     semanticConfigurationName: 'default',
+    //     top,
+    //     queryCaption: useSemanticCaption ? 'extractive|highlight-false' : undefined,
+    //     vector: queryVector,
+    //     topK: queryVector ? 50 : undefined,
+    //     vectorFields: queryVector ? 'embedding' : undefined,
+    //   }
+    // } else {
+    searchResults = await this.search.search(queryText, {
+      filter,
+      top,
+      // vector: queryVector,
+      // topK: queryVector ? 50 : undefined,
+      // vectorFields: queryVector ? 'embedding' : undefined,
+    });
+    // }
+
+    let results: string[] = [];
+    if (useSemanticCaption) {
+      for await (const result of searchResults.results) {
+        // TODO: ensure typings
+        const doc = result as any;
+        const captions = doc['@search.captions'];
+        const captionsText = captions.map((c: any) => c.text).join(' . ');
+        results.push(`${doc[this.sourcePageField]}: ${removeNewlines(captionsText)}`);
+      }
+    } else {
+      for await (const result of searchResults.results) {
+        // TODO: ensure typings
+        const doc = result.document as any;
+        results.push(`${doc[this.sourcePageField]}: ${removeNewlines(doc[this.contentField])}`);
+      }
+    }
+    const content = results.join('\n');
+
+    const messageBuilder = new MessageBuilder(overrides?.prompt_template || SYSTEM_CHAT_TEMPLATE, this.chatGptModel);
+
+    // Add user question
+    const userContent = `${q}\nSources:\n${content}`;
+    messageBuilder.appendMessage('user', userContent);
+
+    // Add shots/samples. This helps model to mimic response and make sure they match rules laid out in system message.
+    messageBuilder.appendMessage('assistant', QUESTION);
+    messageBuilder.appendMessage('user', ANSWER);
+
+    const messages = messageBuilder.messages;
+
+    const openAiChat = await this.openai.getChat();
+    const chatCompletion = await openAiChat.completions.create({
+      model: this.chatGptModel,
+      messages,
+      temperature: overrides?.temperature ?? 0.3,
+      max_tokens: 1024,
+      n: 1,
+    });
+
+    const messageToDisplay = messagesToString(messages);
+
+    return {
+      data_points: results,
+      answer: chatCompletion.choices[0].message.content ?? '',
+      thoughts: `Question:<br>${queryText}<br><br>Prompt:<br>${messageToDisplay.replace('\n', '<br>')}`,
+    };
+  }
+}
diff --git a/packages/api/src/lib/approaches/chat-read-retrieve-read.ts b/packages/api/src/lib/approaches/chat-read-retrieve-read.ts
@@ -1,10 +1,10 @@
 import { SearchClient } from '@azure/search-documents';
-import { ChatApproach, ChatQueryResponse } from './approach.js';
+import { ChatApproach, ApproachResponse } from './approach.js';
 import { OpenAiClients } from '../../plugins/openai.js';
 import { removeNewlines } from '../util/index.js';
 import { MessageBuilder } from '../message-builder.js';
 import { getTokenLimit } from '../model-helpers.js';
-import { HistoryMessage, Message, messageToString } from '../message.js';
+import { HistoryMessage, Message, messagesToString } from '../message.js';
 
 const SYSTEM_MESSAGE_CHAT_CONVERSATION = `Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.
 Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
@@ -44,7 +44,7 @@ export class ChatReadRetrieveRead implements ChatApproach {
   chatGptTokenLimit: number;
 
   constructor(
-    private search: SearchClient<unknown>,
+    private search: SearchClient<any>,
     private openai: OpenAiClients,
     private chatGptModel: string,
     private sourcePageField: string,
@@ -53,7 +53,7 @@ export class ChatReadRetrieveRead implements ChatApproach {
     this.chatGptTokenLimit = getTokenLimit(chatGptModel);
   }
 
-  async run(history: HistoryMessage[], overrides: Record<string, any>): Promise<ChatQueryResponse> {
+  async run(history: HistoryMessage[], overrides: Record<string, any>): Promise<ApproachResponse> {
     const hasText = ['text', 'hybrid', undefined].includes(overrides?.retrieval_mode);
     // const hasVectors = ['vectors', 'hybrid', undefined].includes(overrides?.retrieval_mode);
     const useSemanticCaption = Boolean(overrides?.use_semantic_caption) && hasText;
@@ -196,7 +196,7 @@ export class ChatReadRetrieveRead implements ChatApproach {
     });
 
     const chatContent = finalChatCompletion.choices[0].message.content ?? '';
-    const messageToDisplay = messages.map((m) => messageToString(m)).join('\n\n');
+    const messageToDisplay = messagesToString(messages);
 
     return {
       data_points: results,

diff --git a/packages/api/src/lib/approaches/index.ts b/packages/api/src/lib/approaches/index.ts
@@ -1,2 +1,3 @@
 export * from './approach.js';
+export * from './ask-retrieve-then-read.js';
 export * from './chat-read-retrieve-read.js';
diff --git a/packages/api/src/lib/message.ts b/packages/api/src/lib/message.ts
@@ -13,3 +13,7 @@ export interface HistoryMessage {
 export function messageToString(message: Message): string {
   return `${message.role}: ${message.content}`;
 }
+
+export function messagesToString(messages: Message[]): string {
+  return messages.map((m) => messageToString(m)).join('\n\n');
+}
diff --git a/packages/api/src/plugins/approaches.ts b/packages/api/src/plugins/approaches.ts
@@ -1,5 +1,5 @@
 import fp from 'fastify-plugin';
-import { AskApproach, ChatApproach, ChatReadRetrieveRead } from '../lib/index.js';
+import { AskApproach, AskRetrieveThenReadApproach, ChatApproach, ChatReadRetrieveRead } from '../lib/index.js';
 
 export type Approaches = {
   chat: Record<string, ChatApproach>;
@@ -23,7 +23,15 @@ export default fp(
           config.kbFieldsContent,
         ),
       },
-      ask: {},
+      ask: {
+        rtr: new AskRetrieveThenReadApproach(
+          fastify.azure.search,
+          fastify.openai,
+          config.azureOpenAiChatGptModel,
+          config.kbFieldsSourcePage,
+          config.kbFieldsContent,
+        ),
+      },
     });
   },
   {

diff --git a/packages/api/src/routes/root.ts b/packages/api/src/routes/root.ts
@@ -9,6 +9,14 @@ export type ChatRequest = FastifyRequest<{
   };
 }>;
 
+export type AskRequest = FastifyRequest<{
+  Body: {
+    approach: string;
+    question: string;
+    overrides: Record<string, any>;
+  };
+}>;
+
 const root: FastifyPluginAsync = async (fastify, opts): Promise<void> => {
   fastify.get('/', async function (request, reply) {
     return { root: true };
@@ -46,6 +54,39 @@ const root: FastifyPluginAsync = async (fastify, opts): Promise<void> => {
       }
     },
   });
+
+  fastify.post('/ask', {
+    schema: {
+      body: {
+        type: 'object',
+        properties: {
+          approach: {
+            type: 'string',
+          },
+        },
+      },
+    },
+    handler: async function (request: AskRequest, reply) {
+      const { approach } = request.body;
+      const askApproach = fastify.approaches.ask[approach];
+      if (!askApproach) {
+        reply.code(400);
+        return {
+          error: `Ask approach "${approach}" is unknown or not implemented.`,
+        };
+      }
+
+      const { overrides, question } = request.body;
+      try {
+        return await askApproach.run(question, overrides);
+      } catch (_error: unknown) {
+        const error = _error as Error;
+        fastify.log.error(error);
+        reply.code(500);
+        return { error: `Unknown server error: ${error.message}` };
+      }
+    },
+  });
 };
 
 export default root;
diff --git a/packages/webapp/vite.config.ts b/packages/webapp/vite.config.ts
@@ -24,7 +24,7 @@ export default defineConfig({
     },
     server: {
         proxy: {
-            "/ask": "http://localhost:50505",
+            "/ask": "http://localhost:3000",
             "/chat": "http://localhost:3000"
         }
     }