From 5c598c4000089c86027fdc0c5cc0fab0e8a77216 Mon Sep 17 00:00:00 2001
From: Tobi Lutke <tobi@shopify.com>
Date: Tue, 10 Mar 2026 12:06:02 -0400
Subject: [PATCH 1/2] fix(test): increase timeout for SDK search tests that
 trigger LLM expansion

These tests load the query expansion model on first call, which
consistently exceeds the 30s timeout on CI runners.
---
 test/sdk.test.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/test/sdk.test.ts b/test/sdk.test.ts
index 905fb2bf..db39dbcd 100644
--- a/test/sdk.test.ts
+++ b/test/sdk.test.ts
@@ -610,7 +610,7 @@ describe("search (unified API)", () => {
     expect(results[0]).toHaveProperty("title");
     expect(results[0]).toHaveProperty("bestChunk");
     expect(results[0]).toHaveProperty("docid");
-  });
+  }, 120_000);
 
   test("search() with intent and rerank:false returns results", async () => {
     const results = await store.search({
@@ -619,7 +619,7 @@ describe("search (unified API)", () => {
       rerank: false,
     });
     expect(results.length).toBeGreaterThan(0);
-  });
+  }, 120_000);
 
   test("search() with collection filter", async () => {
     const results = await store.search({

From c6e9ff39819e380c30dc8bd1b971b21cb4b16957 Mon Sep 17 00:00:00 2001
From: MoonCoder <74440188+2830201534@users.noreply.github.com>
Date: Wed, 11 Mar 2026 00:23:28 +0800
Subject: [PATCH 2/2] All three models support local environment variable
 configuration.

---
 src/llm.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llm.ts b/src/llm.ts
index 7b5b8d01..03af236a 100644
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -194,9 +194,9 @@ export type RerankDocument = {
 // Format: hf:<user>/<repo>/<file>
 // Override via QMD_EMBED_MODEL env var (e.g. hf:Qwen/Qwen3-Embedding-0.6B-GGUF/qwen3-embedding-0.6b-q8_0.gguf)
 const DEFAULT_EMBED_MODEL = process.env.QMD_EMBED_MODEL ?? "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
-const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
+const DEFAULT_RERANK_MODEL = process.env.QMD_RERANK_MODEL ?? "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
 // const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf";
-const DEFAULT_GENERATE_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
+const DEFAULT_GENERATE_MODEL = process.env.QMD_GENERATE_MODEL ?? "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
 
 // Alternative generation models for query expansion:
 // LiquidAI LFM2 - hybrid architecture optimized for edge/on-device inference