feat: ping llms (#19)

* feat: ping llms * feat: error handling * fix: return object instead of array * feat: response time * feat: integrate into models endpoint * feat: say hello world * chore: refactor
technologiestiftung · Sep 25, 2024 · 465dc04 · 465dc04
1 parent 334f5cb
commit 465dc04
Show file tree

Hide file tree

Showing 6 changed files with 142 additions and 3 deletions.
diff --git a/src/controllers/model-controller.ts b/src/controllers/model-controller.ts
@@ -1,8 +1,11 @@
 import { Request, Response } from "express";
 import { ModelProvider, ModelResponse } from "../types/model-types";
 import { LLM_PARAMETERS } from "../llm-handlers/constants";
+import { getLlmStatus } from "./status-controller";
+
+export const getModels = async (req: Request, res: Response<ModelResponse>) => {
+  const modelStatus = await getLlmStatus();
 
-export const getModels = (req: Request, res: Response<ModelResponse>) => {
   res.json({
     models: [
       {
@@ -14,6 +17,7 @@ export const getModels = (req: Request, res: Response<ModelResponse>) => {
         isOpenSource: false,
         serverLocation: "USA",
         description: "Aktuelles Modell von OpenAI, gehostet von OpenAI.",
+        status: modelStatus["openai-gpt-4o-mini"],
       },
       {
         identifier: "azure-gpt-4o-mini",
@@ -25,6 +29,7 @@ export const getModels = (req: Request, res: Response<ModelResponse>) => {
         serverLocation: "Schweden",
         description:
           "Aktuelles Modell von OpenAI, datenschutzkonform gehostet von Microsoft Azure.",
+        status: modelStatus["azure-gpt-4o-mini"],
       },
       {
         identifier: "citylab-macstudio-llama-3.1",
@@ -36,6 +41,7 @@ export const getModels = (req: Request, res: Response<ModelResponse>) => {
         serverLocation: "Berlin",
         description:
           "Open Source - Modell von Meta, datenschutzkonform gehostet im CityLAB Berlin.",
+        status: modelStatus["citylab-macstudio-llama-3.1"],
       },
     ],
     parameters: LLM_PARAMETERS,

diff --git a/src/controllers/status-controller.ts b/src/controllers/status-controller.ts
@@ -0,0 +1,115 @@
+import {
+  LLMIdentifier,
+  resolveLlmHandler,
+} from "../llm-handlers/resolve-llm-handler";
+import { ModelStatus } from "../types/model-types";
+
+export const getLlmStatus = async () => {
+  const llms = [
+    "openai-gpt-4o-mini",
+    "azure-gpt-4o-mini",
+    "citylab-macstudio-llama-3.1",
+  ];
+
+  const pingResults = await Promise.all(
+    llms.map(async (llm) => {
+      const llmHandler = resolveLlmHandler(llm as LLMIdentifier);
+
+      try {
+        const then = Date.now();
+        const llmRespone = await llmHandler.chatCompletion([
+          {
+            role: "user",
+            content: "Say 'Hello, world!'.",
+          },
+        ]);
+
+        const status = llmRespone.status;
+
+        if (status !== 200) {
+          return {
+            llm: llm,
+            status: status,
+            healthy: false,
+            error: llmRespone.error?.message,
+            helloWorldResponse: undefined,
+            responseTimeMs: undefined,
+          };
+        }
+
+        const stream: NodeJS.ReadableStream = llmRespone.stream!;
+
+        const rawChunks: string = await new Promise((resolve, reject) => {
+          let data = "";
+          stream.on("data", (chunk) => {
+            data += chunk;
+          });
+          stream.on("end", () => {
+            const streamData = data.toString();
+            resolve(streamData);
+          });
+          stream.on("error", (error) => {
+            reject(error);
+          });
+        });
+
+        const now = Date.now();
+        const elapsed = now - then;
+
+        let llmResponse = "";
+        if (llm === "openai-gpt-4o-mini" || llm === "azure-gpt-4o-mini") {
+          llmResponse = rawChunks
+            .split("\n")
+            .filter((chunk) => chunk !== "")
+            .filter((chunk) => chunk !== "data: [DONE]")
+            .map((chunk) => chunk.replace("data: ", ""))
+            .map((chunk) => JSON.parse(chunk))
+            .map((chunk) => chunk.choices[0].delta)
+            .filter((chunk) => chunk.content !== "")
+            .filter((chunk) => Object.keys(chunk).length !== 0)
+            .map((chunk) => chunk.content)
+            .join("");
+        }
+        if (llm === "citylab-macstudio-llama-3.1") {
+          llmResponse = rawChunks
+            .split("\n")
+            .filter((chunk) => chunk !== "")
+            .map((chunk) => JSON.parse(chunk))
+            .map((chunk) => chunk.message)
+            .filter((chunk) => chunk.content !== "")
+            .filter((chunk) => Object.keys(chunk).length !== 0)
+            .map((chunk) => chunk.content)
+            .join("");
+        }
+
+        return {
+          llm: llm,
+          status: status,
+          healthy: status === 200,
+          error: undefined,
+          helloWorldResponse: llmResponse,
+          responseTimeMs: elapsed,
+        };
+      } catch (e: any) {
+        return {
+          llm: llm,
+          status: 500,
+          healthy: false,
+          error: e.message,
+          helloWorldResponse: undefined,
+          responseTimeMs: undefined,
+        };
+      }
+    })
+  );
+
+  const pingResultsObject: { [key: string]: any } = pingResults.reduce(
+    (acc, curr) => {
+      acc[curr.llm] = curr as ModelStatus;
+      return acc;
+    },
+    {} as { [key: string]: ModelStatus }
+  );
+
+  return pingResultsObject;
+};
diff --git a/src/index.ts b/src/index.ts
@@ -14,7 +14,7 @@ export const config: Config = parseConfig();
 const app = express();
 const port = 3000;
 
-app.use(express.json({ limit: '10mb'}));
+app.use(express.json({ limit: "10mb" }));
 app.use(corsMiddleware(config));
 app.use(rateLimitMiddleware(config));
 app.use(basicAuthMiddleware(config));

diff --git a/src/llm-handlers/llm-handler-utils.ts b/src/llm-handlers/llm-handler-utils.ts
@@ -3,7 +3,16 @@ import { LLMResponse } from "../types/llm-handler-types";
 export const toCustomError = async (
   response: Response
 ): Promise<LLMResponse> => {
-  const rawError = (await response.json()).error;
+  let rawError: { code: string; message: string } = { code: "", message: "" };
+  try {
+    rawError = (await response.json()).error;
+  } catch (e) {
+    rawError = {
+      code: "unknown",
+      //@ts-ignore
+      message: e.message,
+    };
+  }
 
   if (rawError.code === "content_filter") {
     return {

diff --git a/src/llm-handlers/openai-handler.ts b/src/llm-handlers/openai-handler.ts
@@ -28,6 +28,7 @@ export class OpenAILLMHandler implements LLMHandler {
         }),
       });
       if (moderationsResponse.status !== 200) {
+        throw new Error(`Failed to call OpenAI moderations endpoint`);
       }
       const res = await moderationsResponse.json();
       const flagged = await res.results[0].flagged;

diff --git a/src/types/model-types.ts b/src/types/model-types.ts
@@ -7,6 +7,13 @@ export enum ModelProvider {
   Ollama = "Ollama",
 }
 
+export interface ModelStatus {
+  status: number;
+  healthy: boolean;
+  helloWorldResponse: string | undefined;
+  responseTimeMs: number | undefined;
+}
+
 export interface Model {
   identifier: LLMIdentifier;
   baseModelName: string;
@@ -16,6 +23,7 @@ export interface Model {
   isOpenSource: boolean;
   serverLocation: string;
   description: string;
+  status: ModelStatus;
 }
 
 export interface ModelResponse {