index.html

<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <link rel="stylesheet" href="./pure-min.css" />
    <link rel="stylesheet" href="./style.css" />
    <link rel="icon" href="favicon.ico" type="image/x-icon" />
    <title>llguidance-wasm demo</title>
  </head>
  <body>
    <div id="app">
      <div id="messages">
        <div id="msg-error"></div>
        <div id="msg-progress"></div>
      </div>
      <h1>llguidance-wasm demo</h1>
      <p>
        This website will load phi-3 mini model (2.1GB), store it in browser
        cache, and then generate text based on the provided grammar. It uses
        <a href="https://github.com/microsoft/llguidance"
          >Low-level Guidance (llguidance)</a
        >
        library compiled to WebAssembly and a bunch of
        <a href="https://github.com/mmoskal/llguidance-wasm">glue code</a>. The
        llguidance grammars are generated from TypeScript/JavaScript code via
        <a href="https://github.com/mmoskal/guidance-ts">guidance-ts</a>
        library.
      </p>
      <p>
        To simulate
        <a
          href="https://github.com/mmoskal/web-llm/blob/main/src/window_ai_ll_iface.ts"
          >proposed APIs</a
        >
        that are not yet available in the browser, the site uses
        <a href="https://github.com/mlc-ai/web-llm">WebLLM</a>
        (see
        <a
          href="https://github.com/mmoskal/web-llm/blob/main/src/window_ai_ll.ts"
          >implementation</a
        >). WebLLM requires a GPU exposed via WebGPU. It seems to work in Edge
        on Mac and Windows.
      </p>
      <form class="pure-form pure-form-stacked">
        <fieldset>
          <div class="pure-control-group">
            <label for="msg-user">Message from user to chat bot:</label>
            <textarea
              id="msg-user"
              class="pure-input-1"
              placeholder="Assistant message"
            ></textarea>
            <span class="pure-form-message">Instructions in plain text.</span>
          </div>

          <div class="pure-control-group">
            <label for="grammar">Grammar:</label>
            <textarea
              id="grammar"
              rows="20"
              spellcheck="false"
              class="pure-input-1"
              placeholder="grm`...`"
            ></textarea>
            <span class="pure-form-message"
              >It will be eval()ed. You can use gen() and select() functions.
              <a href="https://github.com/mmoskal/guidance-ts"
                >More info</a
              ></span
            >
          </div>

          <div class="pure-control-group">
            <label for="grammar">Max tokens:</label>
            <input id="max-tokens" type="number" value="100" />
          </div>

          <button id="generate" class="pure-button pure-button-primary">
            Generate!
          </button>
        </fieldset>

        <div id="examples">Examples:</div>

        <div>Output:</div>
        <div id="output"></div>

        <div>Stats:</div>
        <div id="stats"></div>

        <div>
          <button id="del-model" class="pure-button">Delete cache (model)</button>
        </div>
      </form>
    </div>
    <script type="module">
      import { main } from "./bundle.js";
      main();
    </script>
  </body>
</html>