From 5c598c4000089c86027fdc0c5cc0fab0e8a77216 Mon Sep 17 00:00:00 2001 From: Tobi Lutke Date: Tue, 10 Mar 2026 12:06:02 -0400 Subject: [PATCH 1/2] fix(test): increase timeout for SDK search tests that trigger LLM expansion These tests load the query expansion model on first call, which consistently exceeds the 30s timeout on CI runners. --- test/sdk.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/sdk.test.ts b/test/sdk.test.ts index 905fb2bf..db39dbcd 100644 --- a/test/sdk.test.ts +++ b/test/sdk.test.ts @@ -610,7 +610,7 @@ describe("search (unified API)", () => { expect(results[0]).toHaveProperty("title"); expect(results[0]).toHaveProperty("bestChunk"); expect(results[0]).toHaveProperty("docid"); - }); + }, 120_000); test("search() with intent and rerank:false returns results", async () => { const results = await store.search({ @@ -619,7 +619,7 @@ describe("search (unified API)", () => { rerank: false, }); expect(results.length).toBeGreaterThan(0); - }); + }, 120_000); test("search() with collection filter", async () => { const results = await store.search({ From c6e9ff39819e380c30dc8bd1b971b21cb4b16957 Mon Sep 17 00:00:00 2001 From: MoonCoder <74440188+2830201534@users.noreply.github.com> Date: Wed, 11 Mar 2026 00:23:28 +0800 Subject: [PATCH 2/2] All three models support local environment variable configuration. --- src/llm.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llm.ts b/src/llm.ts index 7b5b8d01..03af236a 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -194,9 +194,9 @@ export type RerankDocument = { // Format: hf:// // Override via QMD_EMBED_MODEL env var (e.g. hf:Qwen/Qwen3-Embedding-0.6B-GGUF/qwen3-embedding-0.6b-q8_0.gguf) const DEFAULT_EMBED_MODEL = process.env.QMD_EMBED_MODEL ?? "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf"; -const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf"; +const DEFAULT_RERANK_MODEL = process.env.QMD_RERANK_MODEL ?? "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf"; // const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf"; -const DEFAULT_GENERATE_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf"; +const DEFAULT_GENERATE_MODEL = process.env.QMD_GENERATE_MODEL ?? "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf"; // Alternative generation models for query expansion: // LiquidAI LFM2 - hybrid architecture optimized for edge/on-device inference