Introduce builtin WebWorker support (#118)

tqchen · web-flow · commit 8912a4d832bd · 2023-05-25T21:25:32.000-04:00
This PR introduces webworker support to WebLLM package.
diff --git a/README.md b/README.md
@@ -65,8 +65,50 @@ async function main() {
 main();
 ```
 
-Finally, you can find a complete
-You can also find a complete chat app in [examples/simple-chat](examples/simple-chat/).
+### Using Web Worker
+
+WebLLM comes with API support for WebWorker so you can hook
+the generation process into a separate worker thread so that
+the compute in the webworker won't disrupt the UI.
+
+We first create a worker script that created a ChatModule and
+hook it up to a handler that handles requests.
+
+```typescript
+// worker.ts
+import { ChatWorkerHandler, ChatModule } from "@mlc-ai/web-llm";
+
+// Hookup a chat module to a worker handler
+const chat = new ChatModule();
+const handler = new ChatWorkerHandler(chat);
+self.onmessage = (msg: MessageEvent) => {
+  handler.onmessage(msg);
+};
+```
+
+Then in the main logic, we create a `ChatWorkerClient` that
+implements the same `ChatInterface`. The rest of the logic remains the same.
+
+```typescript
+// main.ts
+import * as webllm from "@mlc-ai/web-llm";
+
+async function main() {
+  // Use a chat worker client instead of ChatModule here
+  const chat = new webllm.ChatWorkerClient(new Worker(
+    new URL('./worker.ts', import.meta.url),
+    {type: 'module'}
+  ));
+  // everything else remains the same
+}
+```
+
+
+### Build a ChatApp
+
+You can find a complete
+a complete chat app example in [examples/simple-chat](examples/simple-chat/).
+
 
 ## Customized Model Weights
 
diff --git a/examples/README.md b/examples/README.md
@@ -6,5 +6,5 @@ Please send a pull request if you find things that belongs to here.
 ## Tutorial Examples
 
 - [get-started](get-started): minimum get started example.
+- [web-worker](web-worker): get started with web worker backed chat.
 - [simple-chat](simple-chat): a mininum and complete chat app.
-
diff --git a/examples/get-started/README.md b/examples/get-started/README.md
@@ -7,7 +7,7 @@ To try it out, you can do the following steps
     - `@mlc-ai/web-llm` points to a valid npm version e.g.
       ```js
       "dependencies": {
-        "@mlc-ai/web-llm": "^0.1.3"
+        "@mlc-ai/web-llm": "^0.2.0"
       }
       ```
       Try this option if you would like to use WebLLM without building it yourself.
diff --git a/examples/simple-chat/README.md b/examples/simple-chat/README.md
@@ -7,7 +7,7 @@ chat app based on WebLLM. To try it out, you can do the following steps
     - Option 1: `@mlc-ai/web-llm` points to a valid npm version e.g.
       ```js
       "dependencies": {
-          "@mlc-ai/web-llm": "^0.1.3"
+          "@mlc-ai/web-llm": "^0.2.0"
        }
       ```
       Try this option if you would like to use WebLLM.
diff --git a/examples/simple-chat/src/gh-config.js b/examples/simple-chat/src/gh-config.js
@@ -18,5 +18,6 @@ export default {
 		"vicuna-v1-7b-q4f32_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/vicuna-v1-7b-q4f32_0-webgpu.wasm",
 		"RedPajama-INCITE-Chat-3B-v1-q4f32_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm",
 		"RedPajama-INCITE-Chat-3B-v1-q4f16_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f16_0-webgpu.wasm"
-	}
+	},
+	"use_web_worker": true
 }
diff --git a/examples/simple-chat/src/llm_chat.html b/examples/simple-chat/src/llm_chat.html
@@ -1,7 +1,6 @@
 <select id="chatui-select">
 </select>
 
-
 <link href="./llm_chat.css" rel="stylesheet" type="text/css"/>
 
 <div class="chatui">
diff --git a/examples/simple-chat/src/mlc-local-config.js b/examples/simple-chat/src/mlc-local-config.js
@@ -22,5 +22,6 @@ export default {
     "vicuna-v1-7b-q4f32_0": "http://localhost:8000/vicuna-v1-7b-q4f32_0/vicuna-v1-7b-q4f32_0-webgpu.wasm",
     "RedPajama-INCITE-Chat-3B-v1-q4f32_0": "http://localhost:8000/RedPajama-INCITE-Chat-3B-v1-q4f32_0/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm",
     "RedPajama-INCITE-Chat-3B-v1-q4f16_0": "http://localhost:8000/RedPajama-INCITE-Chat-3B-v1-q4f16_0/RedPajama-INCITE-Chat-3B-v1-q4f16_0-webgpu.wasm"
-  }
+  },
+  "use_web_worker": true
 }
diff --git a/examples/simple-chat/src/simple_chat.ts b/examples/simple-chat/src/simple_chat.ts
@@ -1,5 +1,5 @@
 import appConfig from "./app-config";
-import { ChatModule, ModelRecord } from "@mlc-ai/web-llm";
+import { ChatInterface, ChatModule, ChatWorkerClient, ModelRecord } from "@mlc-ai/web-llm";
 
 function getElementAndCheck(id: string): HTMLElement {
   const element = document.getElementById(id);
@@ -18,7 +18,7 @@ class ChatUI {
   private uiChat: HTMLElement;
   private uiChatInput: HTMLInputElement;
   private uiChatInfoLabel: HTMLLabelElement;
-  private chat: ChatModule;
+  private chat: ChatInterface;
   private config: AppConfig = appConfig;
   private selectedModel: string;
   private chatLoaded = false;
@@ -27,8 +27,9 @@ class ChatUI {
   // all requests send to chat are sequentialized
   private chatRequestChain: Promise<void> = Promise.resolve();
 
-  constructor() {
-    this.chat = new ChatModule();
+  constructor(chat: ChatInterface) {
+    // use web worker to run chat generation in background
+    this.chat = chat;
     // get the elements
     this.uiChat = getElementAndCheck("chatui-chat");
     this.uiChatInput = getElementAndCheck("chatui-input") as HTMLInputElement;
@@ -156,9 +157,10 @@ class ChatUI {
   private resetChatHistory() {
     const clearTags = ["left", "right", "init", "error"];
     for (const tag of clearTags) {
-      const matches = this.uiChat.getElementsByClassName(`msg ${tag}-msg`);
+      // need to unpack to list so the iterator don't get affected by mutation
+      const matches = [...this.uiChat.getElementsByClassName(`msg ${tag}-msg`)];
       for (const item of matches) {
-        item.remove();
+        this.uiChat.removeChild(item);
       }
     }
     if (this.uiChatInfoLabel !== undefined) {
@@ -211,11 +213,6 @@ class ChatUI {
 
     this.appendMessage("left", "");
     const callbackUpdateResponse = (step, msg) => {
-      if (msg.endsWith("##")) {
-        msg = msg.substring(0, msg.length - 2);
-      } else if (msg.endsWith("#")) {
-        msg = msg.substring(0, msg.length - 1);
-      }
       this.updateLastMessage("left", msg);
     };
 
@@ -233,4 +230,15 @@ class ChatUI {
   }
 }
 
-new ChatUI();
+const useWebWorker = appConfig.use_web_worker;
+let chat: ChatInterface;
+
+if (useWebWorker) {
+  chat = new ChatWorkerClient(new Worker(
+    new URL('./worker.ts', import.meta.url),
+    {type: 'module'}
+  ));
+} else {
+  chat = new ChatModule();
+}
+new ChatUI(chat);
diff --git a/examples/simple-chat/src/worker.ts b/examples/simple-chat/src/worker.ts
@@ -0,0 +1,8 @@
+// Serve the chat workload through web worker
+import { ChatWorkerHandler, ChatModule } from "@mlc-ai/web-llm";
+
+const chat = new ChatModule();
+const handler = new ChatWorkerHandler(chat);
+self.onmessage = (msg: MessageEvent) => {
+  handler.onmessage(msg);
+};
diff --git a/examples/web-worker/README.md b/examples/web-worker/README.md
@@ -0,0 +1,25 @@
+# WebLLM Get Started with WebWorker
+
+This folder provides a minimum demo to show WebLLM API using
+[WebWorker](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers).
+The main benefit of web worker is that all ML workloads runs on a separate thread as a result
+will less likely block the UI.
+
+To try it out, you can do the following steps
+
+- Modify [package.json](package.json) to make sure either
+    - `@mlc-ai/web-llm` points to a valid npm version e.g.
+      ```js
+      "dependencies": {
+        "@mlc-ai/web-llm": "^0.2.0"
+      }
+      ```
+      Try this option if you would like to use WebLLM without building it yourself.
+    - Or keep the dependencies as `"file:../.."`, and follow the build from source
+      instruction in the project to build webllm locally. This option is more useful
+      for developers who would like to hack WebLLM core package.
+- Run the following command
+  ```bash
+  npm install
+  npm start
+  ```
diff --git a/examples/web-worker/package.json b/examples/web-worker/package.json
@@ -0,0 +1,17 @@
+{
+    "name": "get-started-web-worker",
+    "version": "0.1.0",
+    "private": true,
+    "scripts": {
+        "start": "parcel src/get_started.html  --port 8888",
+        "build": "parcel build src/get_started.html --dist-dir lib"
+    },
+    "devDependencies": {
+        "parcel": "^2.8.3",
+        "typescript": "^4.9.5",
+        "tslib": "^2.3.1"
+    },
+    "dependencies": {
+        "@mlc-ai/web-llm": "file:../.."
+    }
+}
diff --git a/examples/web-worker/src/get_started.html b/examples/web-worker/src/get_started.html
@@ -0,0 +1,22 @@
+<!DOCTYPE html>
+<html>
+  <script>
+    webLLMGlobal = {}
+  </script>
+  <body>
+    <h2>WebLLM Test Page</h2>
+    Open console to see output
+   </br>
+   </br>
+    <label id="init-label"> </label>
+
+    <h3>Prompt</h3>
+    <label id="prompt-label"> </label>
+
+    <h3>Response</h3>
+    <label id="generate-label"> </label>
+  </br>
+    <label id="stats-label"> </label>
+
+    <script type="module" src="./main.ts"></script>
+</html>
diff --git a/examples/web-worker/src/main.ts b/examples/web-worker/src/main.ts
@@ -0,0 +1,41 @@
+import * as webllm from "@mlc-ai/web-llm";
+
+function setLabel(id: string, text: string) {
+  const label = document.getElementById(id);
+  if (label == null) {
+    throw Error("Cannot find label " + id);
+  }
+  label.innerText = text;
+}
+
+async function main() {
+  // Use a chat worker client instead of ChatModule here
+  const chat = new webllm.ChatWorkerClient(new Worker(
+    new URL('./worker.ts', import.meta.url),
+    {type: 'module'}
+  ));
+
+  chat.setInitProgressCallback((report: webllm.InitProgressReport) => {
+    setLabel("init-label", report.text);
+  });
+
+  await chat.reload("vicuna-v1-7b-q4f32_0");
+
+  const generateProgressCallback = (_step: number, message: string) => {
+    setLabel("generate-label", message);
+  };
+
+  const prompt0 = "What is the capital of Canada?";
+  setLabel("prompt-label", prompt0);
+  const reply0 = await chat.generate(prompt0, generateProgressCallback);
+  console.log(reply0);
+
+  const prompt1 = "Can you write a poem about it?";
+  setLabel("prompt-label", prompt1);
+  const reply1 = await chat.generate(prompt1, generateProgressCallback);
+  console.log(reply1);
+
+  console.log(await chat.runtimeStatsText());
+}
+
+main();
diff --git a/examples/web-worker/src/worker.ts b/examples/web-worker/src/worker.ts
@@ -0,0 +1,8 @@
+import { ChatWorkerHandler, ChatModule } from "@mlc-ai/web-llm";
+
+// Hookup a chat module to a worker handler
+const chat = new ChatModule();
+const handler = new ChatWorkerHandler(chat);
+self.onmessage = (msg: MessageEvent) => {
+  handler.onmessage(msg);
+};
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@mlc-ai/web-llm",
-  "version": "0.1.3",
+  "version": "0.2.0",
   "description": "Hardware accelerated language model chats on browsers",
   "main": "lib/index.js",
   "types": "lib/index.d.ts",
diff --git a/src/index.ts b/src/index.ts
@@ -13,3 +13,8 @@ export {
 export {
   ChatModule,
 } from "./chat_module";
+
+export {
+  ChatWorkerHandler,
+  ChatWorkerClient
+} from "./web_worker";
diff --git a/src/web_worker.ts b/src/web_worker.ts

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@ To try it out, you can do the following steps`
`7`	`7`	- `@mlc-ai/web-llm` points to a valid npm version e.g.
`8`	`8`	```js
`9`	`9`	`"dependencies": {`
`10`		`- "@mlc-ai/web-llm": "^0.1.3"`
	`10`	`+ "@mlc-ai/web-llm": "^0.2.0"`
`11`	`11`	`}`
`12`	`12`	```
`13`	`13`	`Try this option if you would like to use WebLLM without building it yourself.`
Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@ chat app based on WebLLM. To try it out, you can do the following steps`
`7`	`7`	- Option 1: `@mlc-ai/web-llm` points to a valid npm version e.g.
`8`	`8`	```js
`9`	`9`	`"dependencies": {`
`10`		`- "@mlc-ai/web-llm": "^0.1.3"`
	`10`	`+ "@mlc-ai/web-llm": "^0.2.0"`
`11`	`11`	`}`
`12`	`12`	```
`13`	`13`	`Try this option if you would like to use WebLLM.`
Original file line number	Diff line number	Diff line change
`@@ -18,5 +18,6 @@ export default {`
`18`	`18`	`"vicuna-v1-7b-q4f32_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/vicuna-v1-7b-q4f32_0-webgpu.wasm",`
`19`	`19`	`"RedPajama-INCITE-Chat-3B-v1-q4f32_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm",`
`20`	`20`	`"RedPajama-INCITE-Chat-3B-v1-q4f16_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f16_0-webgpu.wasm"`
`21`		`- }`
	`21`	`+ },`
	`22`	`+ "use_web_worker": true`
`22`	`23`	`}`
Original file line number	Diff line number	Diff line change
`@@ -22,5 +22,6 @@ export default {`
`22`	`22`	`"vicuna-v1-7b-q4f32_0": "http://localhost:8000/vicuna-v1-7b-q4f32_0/vicuna-v1-7b-q4f32_0-webgpu.wasm",`
`23`	`23`	`"RedPajama-INCITE-Chat-3B-v1-q4f32_0": "http://localhost:8000/RedPajama-INCITE-Chat-3B-v1-q4f32_0/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm",`
`24`	`24`	`"RedPajama-INCITE-Chat-3B-v1-q4f16_0": "http://localhost:8000/RedPajama-INCITE-Chat-3B-v1-q4f16_0/RedPajama-INCITE-Chat-3B-v1-q4f16_0-webgpu.wasm"`
`25`		`- }`
	`25`	`+ },`
	`26`	`+ "use_web_worker": true`
`26`	`27`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@mlc-ai/web-llm",`
`3`		`- "version": "0.1.3",`
	`3`	`+ "version": "0.2.0",`
`4`	`4`	`"description": "Hardware accelerated language model chats on browsers",`
`5`	`5`	`"main": "lib/index.js",`
`6`	`6`	`"types": "lib/index.d.ts",`