diff --git a/server/docker-compose.yml b/server/docker-compose.yml new file mode 100644 index 00000000..00387710 --- /dev/null +++ b/server/docker-compose.yml @@ -0,0 +1,68 @@ +name: qmd + +services: + qmd-embed: + image: ghcr.io/ggml-org/llama.cpp:server-cuda + container_name: qmd-embed + ports: + - "8081:8080" + volumes: + - qmd-models:/models + command: > + -m /models/embeddinggemma-300M-Q8_0.gguf + --embedding --pooling mean + --host 0.0.0.0 --port 8080 + -ngl 99 -b 4096 -ub 4096 -np 4 -c 16384 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + restart: unless-stopped + + qmd-rerank: + image: ghcr.io/ggml-org/llama.cpp:server-cuda + container_name: qmd-rerank + ports: + - "8082:8080" + volumes: + - qmd-models:/models + command: > + -m /models/qwen3-reranker-0.6b-q8_0.gguf + --reranking --pooling rank + --host 0.0.0.0 --port 8080 + -ngl 99 -b 2048 -ub 2048 -c 4096 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + restart: unless-stopped + + qmd-generate: + image: ghcr.io/ggml-org/llama.cpp:server-cuda + container_name: qmd-generate + ports: + - "8083:8080" + volumes: + - qmd-models:/models + command: > + -m /models/qmd-query-expansion-1.7B-q4_k_m.gguf + --host 0.0.0.0 --port 8080 + -ngl 99 -c 2048 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + restart: unless-stopped + +volumes: + qmd-models: + external: true diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts index 52a076da..531a290b 100755 --- a/src/cli/qmd.ts +++ b/src/cli/qmd.ts @@ -32,6 +32,7 @@ import { hashContent, extractTitle, formatDocForEmbedding, + chunkDocument, chunkDocumentByTokens, clearCache, getCacheKey, @@ -74,8 +75,15 @@ import { generateEmbeddings, syncConfigToDb, type ReindexResult, + findLocalQmdDir, + initLocalQmdDir, + setCliQmdDir, + getCliQmdDir, + getEffectiveQmdDir, + setQmdDirConfigLoader, } from "../store.js"; import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js"; +import { RemoteLLM, loadRemoteConfig, saveRemoteConfig, clearRemoteConfig, isRemoteConfigured, getDefaultRemoteLLM, disposeDefaultRemoteLLM, withRemoteLLMSession, loadQmdDirConfig, saveQmdDirConfig, clearQmdDirConfig } from "../llm-remote.js"; import { formatSearchResults, formatDocuments, @@ -95,6 +103,7 @@ import { listAllContexts, setConfigIndexName, loadConfig, + setConfigDirResolver, } from "../collections.js"; import { getEmbeddedQmdSkillContent, getEmbeddedQmdSkillFiles } from "../embedded-skills.js"; @@ -102,6 +111,42 @@ import { getEmbeddedQmdSkillContent, getEmbeddedQmdSkillFiles } from "../embedde // Tests must set INDEX_PATH or use createStore() with explicit path enableProductionMode(); +// Wire .qmd directory config resolution +setQmdDirConfigLoader(loadQmdDirConfig); +setConfigDirResolver(() => getEffectiveQmdDir()); + +// ============================================================================= +// LLM Backend Selection +// ============================================================================= + +let forceLocalMode = false; + +function setForceLocalMode(force: boolean): void { + forceLocalMode = force; +} + +function shouldUseRemote(): boolean { + if (forceLocalMode) return false; + return isRemoteConfigured(); +} + +async function withLLMSessionAuto( + fn: (session: any) => Promise, + options?: any +): Promise { + if (shouldUseRemote()) { + return withRemoteLLMSession(fn, options); + } + return withLLMSession(fn, options); +} + +async function disposeAllLLM(): Promise { + if (shouldUseRemote()) { + await disposeDefaultRemoteLLM(); + } + await disposeDefaultLlamaCpp(); +} + // ============================================================================= // Store/DB lifecycle (no legacy singletons in store.ts) // ============================================================================= @@ -2125,12 +2170,13 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string = checkIndexHealth(store.db); - await withLLMSession(async () => { + await withLLMSessionAuto(async () => { let results = await vectorSearchQuery(store, query, { collection: singleCollection, limit: opts.all ? 500 : (opts.limit || 10), minScore: opts.minScore || 0.3, intent: opts.intent, + llm: shouldUseRemote() ? getDefaultRemoteLLM() : undefined, hooks: { onExpand: (original, expanded) => { logExpansionTree(original, expanded); @@ -2181,7 +2227,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri // Intent can come from --intent flag or from intent: line in query document const intent = opts.intent || parsed?.intent; - await withLLMSession(async () => { + await withLLMSessionAuto(async () => { let results; if (parsed) { @@ -2234,6 +2280,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri candidateLimit: opts.candidateLimit, explain: !!opts.explain, intent, + llm: shouldUseRemote() ? getDefaultRemoteLLM() : undefined, hooks: { onStrongSignal: (score) => { process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`); @@ -2349,6 +2396,12 @@ function parseCLI() { http: { type: "boolean" }, daemon: { type: "boolean" }, port: { type: "string" }, + // Remote LLM options + local: { type: "boolean" }, + "embed-url": { type: "string" }, + "rerank-url": { type: "string" }, + "generate-url": { type: "string" }, + "generate-model": { type: "string" }, }, allowPositionals: true, strict: false, // Allow unknown options to pass through @@ -2543,6 +2596,11 @@ function showHelp(): void { console.log(" qmd context add/list/rm - Attach human-written summaries"); console.log(" qmd ls [collection[/path]] - Inspect indexed files"); console.log(""); + console.log("Remote LLM backend:"); + console.log(" qmd remote set - Configure remote embed/rerank/generate endpoints"); + console.log(" qmd remote status - Show remote config and check health"); + console.log(" qmd remote clear - Clear remote config (revert to local)"); + console.log(""); console.log("Maintenance:"); console.log(" qmd status - View index + collection health"); console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)"); @@ -2592,6 +2650,7 @@ function showHelp(): void { console.log(""); console.log("Global options:"); console.log(" --index - Use a named index (default: index)"); + console.log(" --local - Force local LLM mode (ignore remote config)"); console.log(""); console.log("Search options:"); console.log(" -n - Max results (default 5, or 20 for --files/--json)"); @@ -2662,6 +2721,10 @@ if (isMain) { process.exit(0); } + if (cli.values.local) { + setForceLocalMode(true); + } + if (!cli.command || cli.values.help) { showHelp(); process.exit(cli.values.help ? 0 : 1); @@ -3132,6 +3195,92 @@ if (isMain) { break; } + case "remote": { + const subcommand = cli.args[0]; + if (!subcommand) { + console.error("Usage: qmd remote "); + console.error(""); + console.error("Commands:"); + console.error(" qmd remote set "); + console.error(" qmd remote status - Show current remote configuration"); + console.error(" qmd remote clear - Clear remote config (use local mode)"); + process.exit(1); + } + + switch (subcommand) { + case "set": { + let embedUrl = cli.values["embed-url"] as string | undefined; + let rerankUrl = cli.values["rerank-url"] as string | undefined; + let generateUrl = cli.values["generate-url"] as string | undefined; + const generateModel = cli.values["generate-model"] as string | undefined; + + if (cli.args.length >= 4) { + embedUrl = cli.args[1]; + rerankUrl = cli.args[2]; + generateUrl = cli.args[3]; + } else if (cli.args.length === 2 && cli.args[1]) { + embedUrl = cli.args[1]; + rerankUrl = cli.args[1]; + generateUrl = cli.args[1]; + } + + if (!embedUrl && !rerankUrl && !generateUrl && !generateModel) { + console.error("Usage: qmd remote set "); + process.exit(1); + } + + const existingConfig = loadRemoteConfig(); + const newConfig = { + embedUrl: embedUrl ?? existingConfig.embedUrl, + rerankUrl: rerankUrl ?? existingConfig.rerankUrl, + generateUrl: generateUrl ?? existingConfig.generateUrl, + generateModel: generateModel ?? existingConfig.generateModel, + }; + + saveRemoteConfig(newConfig); + console.log("Remote configuration saved"); + console.log(` Embed: ${newConfig.embedUrl || "(not set)"}`); + console.log(` Rerank: ${newConfig.rerankUrl || "(not set)"}`); + console.log(` Generate: ${newConfig.generateUrl || "(not set)"}`); + if (newConfig.generateModel) { + console.log(` Generate model: ${newConfig.generateModel}`); + } + break; + } + case "status": { + const config = loadRemoteConfig(); + if (!config.embedUrl && !config.rerankUrl && !config.generateUrl) { + console.log("Remote mode: disabled (using local models)"); + } else { + console.log("Remote Configuration\n"); + console.log(` Embed: ${config.embedUrl || "(not set)"}`); + console.log(` Rerank: ${config.rerankUrl || "(not set)"}`); + console.log(` Generate: ${config.generateUrl || "(not set)"}`); + if (config.generateModel) { + console.log(` Generate model: ${config.generateModel}`); + } + console.log("\nChecking endpoint health..."); + const remote = new RemoteLLM(config); + const health = await remote.checkHealth(); + console.log(` Embed: ${health.embed ? "healthy" : "unreachable"}`); + console.log(` Rerank: ${health.rerank ? "healthy" : "unreachable"}`); + console.log(` Generate: ${health.generate ? "healthy" : "unreachable"}`); + } + break; + } + case "clear": { + clearRemoteConfig(); + console.log("Remote configuration cleared"); + console.log("Now using local models"); + break; + } + default: + console.error(`Unknown remote subcommand: ${subcommand}`); + process.exit(1); + } + break; + } + default: console.error(`Unknown command: ${cli.command}`); console.error("Run 'qmd --help' for usage."); @@ -3139,7 +3288,7 @@ if (isMain) { } if (cli.command !== "mcp") { - await disposeDefaultLlamaCpp(); + await disposeAllLLM(); process.exit(0); } diff --git a/src/collections.ts b/src/collections.ts index 257f144f..98ac6834 100644 --- a/src/collections.ts +++ b/src/collections.ts @@ -10,6 +10,20 @@ import { join, dirname } from "path"; import { homedir } from "os"; import YAML from "yaml"; +// ============================================================================ +// Config path resolution (avoids circular import with store) +// ============================================================================ + +let _qmdDirResolver: (() => string | null) | null = null; + +/** + * Set the resolver for the effective .qmd directory. + * When set, config is loaded from {qmdDir}/index.yml first. + */ +export function setConfigDirResolver(resolver: (() => string | null) | null): void { + _qmdDirResolver = resolver; +} + // ============================================================================ // Types // ============================================================================ @@ -99,28 +113,26 @@ export function setConfigIndexName(name: string): void { } } -function getConfigDir(): string { - // Allow override via QMD_CONFIG_DIR for testing +function getConfigFilePath(): string { + // 1. Test override (QMD_CONFIG_DIR) if (process.env.QMD_CONFIG_DIR) { - return process.env.QMD_CONFIG_DIR; + return join(process.env.QMD_CONFIG_DIR, "index.yml"); } - // Respect XDG Base Directory specification (consistent with store.ts) - if (process.env.XDG_CONFIG_HOME) { - return join(process.env.XDG_CONFIG_HOME, "qmd"); + // 2. Index-colocated config when using a .qmd directory + if (_qmdDirResolver) { + const qmdDir = _qmdDirResolver(); + if (qmdDir) { + return join(qmdDir, "index.yml"); + } } - return join(homedir(), ".config", "qmd"); + // 3. Fallback to global config + return join(homedir(), ".config", "qmd", "index.yml"); } -function getConfigFilePath(): string { - return join(getConfigDir(), `${currentIndexName}.yml`); -} - -/** - * Ensure config directory exists - */ function ensureConfigDir(): void { - const configDir = getConfigDir(); - if (!existsSync(configDir)) { + const configPath = getConfigFilePath(); + const configDir = configPath.slice(0, configPath.lastIndexOf("/")); + if (configDir && !existsSync(configDir)) { mkdirSync(configDir, { recursive: true }); } } @@ -130,10 +142,14 @@ function ensureConfigDir(): void { // ============================================================================ /** - * Load configuration from the configured source. - * - Inline config: returns the in-memory object directly - * - File-based: reads from YAML file (default ~/.config/qmd/index.yml) - * Returns empty config if file doesn't exist + * Load collection configuration. + * + * Resolution order for the config file: + * 1. QMD_CONFIG_DIR env var (test override) -> {QMD_CONFIG_DIR}/index.yml + * 2. .qmd directory via setConfigDirResolver() -> {qmdDir}/index.yml + * 3. Global fallback -> ~/.config/qmd/index.yml + * + * Returns empty config if file doesn't exist. */ export function loadConfig(): CollectionConfig { // SDK inline config mode @@ -163,9 +179,12 @@ export function loadConfig(): CollectionConfig { } /** - * Save configuration to the configured source. - * - Inline config: updates the in-memory object (no file I/O) - * - File-based: writes to YAML file (default ~/.config/qmd/index.yml) + * Save collection configuration. + * + * Resolution order for the config file: + * 1. QMD_CONFIG_DIR env var (test override) -> {QMD_CONFIG_DIR}/index.yml + * 2. .qmd directory via setConfigDirResolver() -> {qmdDir}/index.yml + * 3. Global fallback -> ~/.config/qmd/index.yml */ export function saveConfig(config: CollectionConfig): void { // SDK inline config mode: update in place, no file I/O diff --git a/src/llm-remote.ts b/src/llm-remote.ts new file mode 100644 index 00000000..73b44a78 --- /dev/null +++ b/src/llm-remote.ts @@ -0,0 +1,759 @@ +/** + * llm-remote.ts - Remote LLM backend for QMD using HTTP endpoints + * + * Provides embeddings, text generation, and reranking via remote llama.cpp servers. + */ + +import { homedir } from "os"; +import { join } from "path"; +import { existsSync, readFileSync, writeFileSync, mkdirSync } from "fs"; + +import type { + LLM, + EmbeddingResult, + GenerateResult, + ModelInfo, + EmbedOptions, + GenerateOptions, + RerankOptions, + RerankDocument, + RerankDocumentResult, + RerankResult, + Queryable, + QueryType, + ILLMSession, + LLMSessionOptions, +} from "./llm.js"; + +// ============================================================================= +// Configuration +// ============================================================================= + +export type RemoteLLMConfig = { + embedUrl?: string; // e.g. "http://192.168.1.100:8081" + rerankUrl?: string; // e.g. "http://192.168.1.100:8082" + generateUrl?: string; // e.g. "http://192.168.1.100:8083" or "http://localhost:4000" (LiteLLM) + generateModel?: string; // e.g. "gpt-4o-mini" or "ollama/llama3" - required for LiteLLM, optional for llama.cpp +}; + +// Config file path +const CONFIG_DIR = join(homedir(), ".cache", "qmd"); +const CONFIG_FILE = join(CONFIG_DIR, "config.json"); + +/** + * Load remote config from file + */ +export function loadRemoteConfig(): RemoteLLMConfig { + try { + if (existsSync(CONFIG_FILE)) { + const data = JSON.parse(readFileSync(CONFIG_FILE, "utf-8")); + return data.remote || {}; + } + } catch (e) { + // Ignore errors, return empty config + } + return {}; +} + +/** + * Save remote config to file + */ +export function saveRemoteConfig(config: RemoteLLMConfig): void { + try { + if (!existsSync(CONFIG_DIR)) { + mkdirSync(CONFIG_DIR, { recursive: true }); + } + + let data: Record = {}; + if (existsSync(CONFIG_FILE)) { + try { + data = JSON.parse(readFileSync(CONFIG_FILE, "utf-8")); + } catch { + // Start fresh if parse fails + } + } + + data.remote = config; + writeFileSync(CONFIG_FILE, JSON.stringify(data, null, 2)); + } catch (e) { + console.error("Failed to save remote config:", e); + } +} + +/** + * Clear remote config + */ +export function clearRemoteConfig(): void { + try { + if (existsSync(CONFIG_FILE)) { + let data: Record = {}; + try { + data = JSON.parse(readFileSync(CONFIG_FILE, "utf-8")); + } catch { + // Start fresh + } + delete data.remote; + writeFileSync(CONFIG_FILE, JSON.stringify(data, null, 2)); + } + } catch (e) { + console.error("Failed to clear remote config:", e); + } +} + +// ============================================================================= +// QMD Directory Config (persistent path to .qmd folder) +// ============================================================================= + +/** + * Load saved qmdDir from config + */ +export function loadQmdDirConfig(): string | null { + try { + if (existsSync(CONFIG_FILE)) { + const data = JSON.parse(readFileSync(CONFIG_FILE, "utf-8")); + return data.qmdDir || null; + } + } catch { + // Ignore errors + } + return null; +} + +/** + * Save qmdDir to config + */ +export function saveQmdDirConfig(qmdDir: string): void { + try { + if (!existsSync(CONFIG_DIR)) { + mkdirSync(CONFIG_DIR, { recursive: true }); + } + + let data: Record = {}; + if (existsSync(CONFIG_FILE)) { + try { + data = JSON.parse(readFileSync(CONFIG_FILE, "utf-8")); + } catch { + // Start fresh + } + } + + data.qmdDir = qmdDir; + writeFileSync(CONFIG_FILE, JSON.stringify(data, null, 2)); + } catch (e) { + console.error("Failed to save qmdDir config:", e); + } +} + +/** + * Clear qmdDir from config + */ +export function clearQmdDirConfig(): void { + try { + if (existsSync(CONFIG_FILE)) { + let data: Record = {}; + try { + data = JSON.parse(readFileSync(CONFIG_FILE, "utf-8")); + } catch { + // Start fresh + } + delete data.qmdDir; + writeFileSync(CONFIG_FILE, JSON.stringify(data, null, 2)); + } + } catch { + // Ignore errors + } +} + +/** + * Check if remote mode is configured + */ +export function isRemoteConfigured(): boolean { + const config = loadRemoteConfig(); + return !!(config.embedUrl || config.rerankUrl || config.generateUrl); +} + +// ============================================================================= +// Remote LLM Implementation +// ============================================================================= + +/** + * LLM implementation using remote HTTP endpoints (llama.cpp servers) + */ +export class RemoteLLM implements LLM { + private embedUrl: string | null; + private rerankUrl: string | null; + private generateUrl: string | null; + private generateModel: string | null; + + constructor(config: RemoteLLMConfig = {}) { + // Load from saved config, then override with explicit config + const savedConfig = loadRemoteConfig(); + this.embedUrl = config.embedUrl ?? savedConfig.embedUrl ?? null; + this.rerankUrl = config.rerankUrl ?? savedConfig.rerankUrl ?? null; + this.generateUrl = config.generateUrl ?? savedConfig.generateUrl ?? null; + this.generateModel = config.generateModel ?? savedConfig.generateModel ?? null; + } + + /** + * Get embeddings via remote server (retries up to 3 times on transient errors) + */ + async embed(text: string, options: EmbedOptions = {}): Promise { + if (!this.embedUrl) { + console.error("No embed URL configured"); + return null; + } + + const MAX_RETRIES = 3; + for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { + try { + const response = await fetch(`${this.embedUrl}/v1/embeddings`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + input: text, + model: "embeddinggemma", + }), + }); + + if (response.status === 400) { + // Client error — retrying won't help + console.error(`Embed request failed: ${response.status} ${response.statusText}`); + return null; + } + + if (!response.ok) { + console.error(`Embed request failed: ${response.status} ${response.statusText}`); + if (attempt < MAX_RETRIES - 1) { + await new Promise(r => setTimeout(r, 1000 * (attempt + 1))); + continue; + } + return null; + } + + const data = await response.json() as { + data: Array<{ embedding: number[] }>; + model: string; + }; + + if (!data.data || data.data.length === 0) { + console.error("No embedding data in response"); + return null; + } + + return { + embedding: data.data[0]!.embedding, + model: data.model || "remote-embed", + }; + } catch (error) { + if (attempt < MAX_RETRIES - 1) { + await new Promise(r => setTimeout(r, 1000 * (attempt + 1))); + continue; + } + console.error("Embedding error:", error); + return null; + } + } + return null; + } + + /** + * Batch embed multiple texts in a single API call. + * On batch failure, falls back to sequential individual requests (which have their own retries). + */ + async embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]> { + if (texts.length === 0) return []; + if (!this.embedUrl) { + return texts.map(() => null); + } + + try { + // Send all texts in a single request (OpenAI API supports array input) + const response = await fetch(`${this.embedUrl}/v1/embeddings`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + input: texts, + model: "embeddinggemma", + }), + }); + + if (!response.ok) { + console.error(`Batch embed failed: ${response.status} ${response.statusText}`); + // Fall back to sequential individual requests (each has retries) + const results: (EmbeddingResult | null)[] = []; + for (const text of texts) { + results.push(await this.embed(text)); + } + return results; + } + + const data = await response.json() as { + data: Array<{ embedding: number[]; index: number }>; + model: string; + }; + + if (!data.data || data.data.length === 0) { + console.error("No embedding data in batch response"); + return texts.map(() => null); + } + + // Map results back to original order (API may return in different order) + const results: (EmbeddingResult | null)[] = new Array(texts.length).fill(null); + for (const item of data.data) { + if (item.index < texts.length) { + results[item.index] = { + embedding: item.embedding, + model: data.model || "remote-embed", + }; + } + } + return results; + } catch (error) { + console.error("Batch embedding error:", error); + // Fall back to sequential individual requests (each has retries) + const results: (EmbeddingResult | null)[] = []; + for (const text of texts) { + results.push(await this.embed(text)); + } + return results; + } + } + + /** + * Generate text via remote server + */ + async generate(prompt: string, options: GenerateOptions = {}): Promise { + if (!this.generateUrl) { + console.error("No generate URL configured"); + return null; + } + + try { + const body: Record = { + prompt, + max_tokens: options.maxTokens ?? 150, + temperature: options.temperature ?? 0, + }; + if (this.generateModel) { + body.model = this.generateModel; + } + const response = await fetch(`${this.generateUrl}/v1/completions`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + + if (!response.ok) { + console.error(`Generate request failed: ${response.status} ${response.statusText}`); + return null; + } + + const data = await response.json() as { + choices: Array<{ text: string }>; + model: string; + }; + + if (!data.choices || data.choices.length === 0) { + console.error("No choices in response"); + return null; + } + + return { + text: data.choices[0]!.text, + model: data.model || "remote-generate", + done: true, + }; + } catch (error) { + console.error("Generate error:", error); + return null; + } + } + + /** + * Check if model exists (always returns true for remote) + */ + async modelExists(model: string): Promise { + return { name: model, exists: true }; + } + + /** + * Expand a search query into multiple variations + */ + async expandQuery( + query: string, + options: { context?: string; includeLexical?: boolean } = {} + ): Promise { + if (!this.generateUrl) { + // Fallback to original query + const fallback: Queryable[] = [{ type: 'vec', text: query }]; + if (options.includeLexical !== false) { + fallback.unshift({ type: 'lex', text: query }); + } + return fallback; + } + + const includeLexical = options.includeLexical ?? true; + const context = options.context; + + const prompt = `You are a search query optimization expert. Your task is to improve retrieval by rewriting queries and generating hypothetical documents. + +Original Query: ${query} + +${context ? `Additional Context, ONLY USE IF RELEVANT:\n\n${context}` : ""} + +## Step 1: Query Analysis +Identify entities, search intent, and missing context. + +## Step 2: Generate Hypothetical Document +Write a focused sentence passage that would answer the query. Include specific terminology and domain vocabulary. + +## Step 3: Query Rewrites +Generate 2-3 alternative search queries that resolve ambiguities. Use terminology from the hypothetical document. + +## Step 4: Final Retrieval Text +Output MAX ONE 'hyde' line FIRST, then 1-3 'lex' lines, then 1-3 'vec' lines. + + +hyde: {complete hypothetical document passage from Step 2 on a SINGLE LINE} +lex: {single search term} +vec: {single vector query} + + + +Example (FOR FORMAT ONLY - DO NOT COPY THIS CONTENT): +hyde: This is an example of a hypothetical document passage that would answer the example query. It contains multiple sentences and relevant vocabulary. +lex: example keyword 1 +lex: example keyword 2 +vec: example semantic query + + + +- DO NOT repeat the same line. +- Each 'lex:' line MUST be a different keyword variation based on the ORIGINAL QUERY. +- Each 'vec:' line MUST be a different semantic variation based on the ORIGINAL QUERY. +- The 'hyde:' line MUST be the full sentence passage from Step 2, but all on one line. +- DO NOT use the example content above. +${!includeLexical ? "- Do NOT output any 'lex:' lines" : ""} + + +Final Output:`; + + try { + const result = await this.generate(prompt, { maxTokens: 1000, temperature: 1 }); + if (!result) { + throw new Error("Generation failed"); + } + + const lines = result.text.trim().split("\n"); + const queryables: Queryable[] = lines.map((line: string) => { + const colonIdx = line.indexOf(":"); + if (colonIdx === -1) return null; + const type = line.slice(0, colonIdx).trim(); + if (type !== 'lex' && type !== 'vec' && type !== 'hyde') return null; + const text = line.slice(colonIdx + 1).trim(); + return { type: type as QueryType, text }; + }).filter((q: Queryable | null): q is Queryable => q !== null); + + // Filter out lex entries if not requested + if (!includeLexical) { + return queryables.filter(q => q.type !== 'lex'); + } + return queryables; + } catch (error) { + console.error("Query expansion failed:", error); + // Fallback to original query + const fallback: Queryable[] = [{ type: 'vec', text: query }]; + if (includeLexical) fallback.unshift({ type: 'lex', text: query }); + return fallback; + } + } + + /** + * Rerank documents by relevance to a query + */ + async rerank( + query: string, + documents: RerankDocument[], + options: RerankOptions = {} + ): Promise { + if (!this.rerankUrl) { + // Return documents in original order with default scores + return { + results: documents.map((doc, index) => ({ + file: doc.file, + score: 1 - (index * 0.1), // Decreasing scores + index, + })), + model: "no-rerank", + }; + } + + // If we have more than 10 documents, batch them to avoid overwhelming the server + const BATCH_SIZE = 10; + if (documents.length > BATCH_SIZE) { + try { + const allResults: RerankDocumentResult[] = []; + let modelName = "remote-rerank"; + + // Process in batches + for (let i = 0; i < documents.length; i += BATCH_SIZE) { + const batch = documents.slice(i, i + BATCH_SIZE); + const batchResult = await this.rerankBatch(query, batch, i); + allResults.push(...batchResult.results); + modelName = batchResult.model; + } + + // Sort all results by score descending + allResults.sort((a, b) => b.score - a.score); + + return { + results: allResults, + model: modelName, + }; + } catch (error) { + console.error("Batch rerank error:", error); + // Fallback + return { + results: documents.map((doc, index) => ({ + file: doc.file, + score: 1 - (index * 0.1), + index, + })), + model: "rerank-fallback", + }; + } + } + + // Single batch - use existing logic + return this.rerankBatch(query, documents, 0); + } + + /** + * Rerank a single batch of documents + */ + private async rerankBatch( + query: string, + documents: RerankDocument[], + indexOffset: number + ): Promise { + try { + const texts = documents.map(doc => doc.text); + + const response = await fetch(`${this.rerankUrl}/v1/rerank`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + query, + documents: texts, + model: "qwen3-reranker", + }), + }); + + if (!response.ok) { + console.error(`Rerank request failed: ${response.status} ${response.statusText}`); + // Try to get error details from response body + try { + const errorText = await response.text(); + console.error(`Rerank error details: ${errorText}`); + } catch (e) { + // Ignore if we can't read the error body + } + throw new Error("Rerank request failed"); + } + + const data = await response.json() as { + results: Array<{ index: number; relevance_score: number }>; + model: string; + }; + + // Map results back to our format (with adjusted indices) + const results: RerankDocumentResult[] = data.results.map(item => ({ + file: documents[item.index]!.file, + score: item.relevance_score, + index: indexOffset + item.index, + })); + + // Sort by score descending + results.sort((a, b) => b.score - a.score); + + return { + results, + model: data.model || "remote-rerank", + }; + } catch (error) { + console.error("Rerank batch error:", error); + // Return documents in original order with default scores + return { + results: documents.map((doc, index) => ({ + file: doc.file, + score: 1 - (index * 0.1), + index: indexOffset + index, + })), + model: "rerank-fallback", + }; + } + } + + /** + * Dispose (no-op for remote) + */ + async dispose(): Promise { + // Nothing to dispose for remote connections + } + + /** + * Check health of remote endpoints + */ + async checkHealth(): Promise<{ embed: boolean; rerank: boolean; generate: boolean }> { + const results = { embed: false, rerank: false, generate: false }; + + const checkEndpoint = async (url: string | null): Promise => { + if (!url) return false; + try { + const response = await fetch(`${url}/health`, { method: "GET" }); + return response.ok; + } catch { + return false; + } + }; + + [results.embed, results.rerank, results.generate] = await Promise.all([ + checkEndpoint(this.embedUrl), + checkEndpoint(this.rerankUrl), + checkEndpoint(this.generateUrl), + ]); + + return results; + } + + /** + * Get configured URLs + */ + getConfig(): RemoteLLMConfig { + return { + embedUrl: this.embedUrl || undefined, + rerankUrl: this.rerankUrl || undefined, + generateUrl: this.generateUrl || undefined, + generateModel: this.generateModel || undefined, + }; + } +} + +// ============================================================================= +// Remote LLM Session (implements ILLMSession for compatibility) +// ============================================================================= + +/** + * Session wrapper for RemoteLLM that implements ILLMSession interface. + * This allows RemoteLLM to be used with the existing withLLMSession pattern. + */ +class RemoteLLMSession implements ILLMSession { + private llm: RemoteLLM; + private released = false; + private abortController: AbortController; + + constructor(llm: RemoteLLM, _options: LLMSessionOptions = {}) { + this.llm = llm; + this.abortController = new AbortController(); + } + + get isValid(): boolean { + return !this.released && !this.abortController.signal.aborted; + } + + get signal(): AbortSignal { + return this.abortController.signal; + } + + release(): void { + this.released = true; + this.abortController.abort(); + } + + async embed(text: string, options?: EmbedOptions): Promise { + if (!this.isValid) return null; + return this.llm.embed(text, options); + } + + async embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]> { + if (!this.isValid) return texts.map(() => null); + return this.llm.embedBatch(texts); + } + + async expandQuery( + query: string, + options?: { context?: string; includeLexical?: boolean } + ): Promise { + if (!this.isValid) return [{ type: 'vec', text: query }]; + return this.llm.expandQuery(query, options); + } + + async rerank( + query: string, + documents: RerankDocument[], + options?: RerankOptions + ): Promise { + if (!this.isValid) { + return { + results: documents.map((doc, index) => ({ + file: doc.file, + score: 1 - (index * 0.1), + index, + })), + model: "session-invalid", + }; + } + return this.llm.rerank(query, documents, options); + } +} + +/** + * Execute a function with a scoped RemoteLLM session. + * Compatible with the existing withLLMSession pattern. + */ +export async function withRemoteLLMSession( + fn: (session: ILLMSession) => Promise, + options?: LLMSessionOptions +): Promise { + const llm = getDefaultRemoteLLM(); + const session = new RemoteLLMSession(llm, options); + + try { + return await fn(session); + } finally { + session.release(); + } +} + +// ============================================================================= +// Singleton for default RemoteLLM instance +// ============================================================================= + +let defaultRemoteLLM: RemoteLLM | null = null; + +/** + * Get the default RemoteLLM instance (creates one if needed) + */ +export function getDefaultRemoteLLM(): RemoteLLM { + if (!defaultRemoteLLM) { + defaultRemoteLLM = new RemoteLLM(); + } + return defaultRemoteLLM; +} + +/** + * Set a custom default RemoteLLM instance + */ +export function setDefaultRemoteLLM(llm: RemoteLLM | null): void { + defaultRemoteLLM = llm; +} + +/** + * Dispose the default RemoteLLM instance if it exists + */ +export async function disposeDefaultRemoteLLM(): Promise { + if (defaultRemoteLLM) { + await defaultRemoteLLM.dispose(); + defaultRemoteLLM = null; + } +} diff --git a/src/llm.ts b/src/llm.ts index 39ab28b5..11a22c26 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -317,6 +317,11 @@ export interface LLM { */ embed(text: string, options?: EmbedOptions): Promise; + /** + * Get embeddings for multiple texts in a single batch call + */ + embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>; + /** * Generate text completion */ diff --git a/src/store.ts b/src/store.ts index aa5fae4f..a1c17cf5 100644 --- a/src/store.ts +++ b/src/store.ts @@ -25,6 +25,7 @@ import { formatDocForEmbedding, withLLMSessionForLlm, type LLMSessionOptions, + type LLM, type RerankDocument, type ILLMSession, } from "./llm.js"; @@ -427,6 +428,75 @@ export function enableProductionMode(): void { _productionMode = true; } +// ============================================================================= +// .qmd directory resolution +// ============================================================================= + +/** + * Find a local .qmd directory by searching current directory and parents. + */ +export function findLocalQmdDir(startDir?: string): string | null { + let dir = startDir || getPwd(); + const root = resolve("/"); + + while (dir !== root) { + const qmdDir = resolve(dir, ".qmd"); + try { + const stat = statSync(qmdDir); + if (stat.isDirectory()) return qmdDir; + } catch { + // Directory doesn't exist, continue searching + } + const parent = resolve(dir, ".."); + if (parent === dir) break; + dir = parent; + } + return null; +} + +/** + * Initialize a local .qmd directory in the specified path. + */ +export function initLocalQmdDir(targetDir?: string): string { + const dir = targetDir || getPwd(); + const qmdDir = resolve(dir, ".qmd"); + mkdirSync(qmdDir, { recursive: true }); + return qmdDir; +} + +// CLI-provided qmdDir (highest priority) +let _cliQmdDir: string | null = null; + +export function setCliQmdDir(qmdDir: string | null): void { + _cliQmdDir = qmdDir; +} + +export function getCliQmdDir(): string | null { + return _cliQmdDir; +} + +// Config loader for saved qmdDir (avoids circular imports) +let _loadQmdDirConfig: (() => string | null) | null = null; + +export function setQmdDirConfigLoader(loader: () => string | null): void { + _loadQmdDirConfig = loader; +} + +/** + * Get the effective qmdDir with priority: + * 1. CLI flag (--qmd-dir) + * 2. Saved config (~/.cache/qmd/config.json) + * 3. Auto-discover (search upward for .qmd) + */ +export function getEffectiveQmdDir(): string | null { + if (_cliQmdDir) return _cliQmdDir; + if (_loadQmdDirConfig) { + const saved = _loadQmdDirConfig(); + if (saved) return saved; + } + return findLocalQmdDir(); +} + export function getDefaultDbPath(indexName: string = "index"): string { // Always allow override via INDEX_PATH (for testing) if (process.env.INDEX_PATH) { @@ -441,6 +511,13 @@ export function getDefaultDbPath(indexName: string = "index"): string { ); } + // Check for .qmd directory (CLI > config > auto-discover) + const qmdDir = getEffectiveQmdDir(); + if (qmdDir) { + return resolve(qmdDir, `${indexName}.sqlite`); + } + + // Fall back to global cache const cacheDir = process.env.XDG_CACHE_HOME || resolve(homedir(), ".cache"); const qmdCacheDir = resolve(cacheDir, "qmd"); try { mkdirSync(qmdCacheDir, { recursive: true }); } catch { } @@ -1012,8 +1089,8 @@ export type Store = { searchVec: (query: string, model: string, limit?: number, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]) => Promise; // Query expansion & reranking - expandQuery: (query: string, model?: string, intent?: string) => Promise; - rerank: (query: string, documents: { file: string; text: string }[], model?: string, intent?: string) => Promise<{ file: string; score: number }[]>; + expandQuery: (query: string, model?: string, intent?: string, llm?: LLM) => Promise; + rerank: (query: string, documents: { file: string; text: string }[], model?: string, intent?: string, llm?: LLM) => Promise<{ file: string; score: number }[]>; // Document retrieval findDocument: (filename: string, options?: { includeBody?: boolean }) => DocumentResult | DocumentNotFound; @@ -1364,8 +1441,8 @@ export function createStore(dbPath?: string): Store { searchVec: (query: string, model: string, limit?: number, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]) => searchVec(db, query, model, limit, collectionName, session, precomputedEmbedding), // Query expansion & reranking - expandQuery: (query: string, model?: string, intent?: string) => expandQuery(query, model, db, intent, store.llm), - rerank: (query: string, documents: { file: string; text: string }[], model?: string, intent?: string) => rerank(query, documents, model, db, intent, store.llm), + expandQuery: (query: string, model?: string, intent?: string, llm?: LLM) => expandQuery(query, model, db, intent, llm ?? store.llm), + rerank: (query: string, documents: { file: string; text: string }[], model?: string, intent?: string, llm?: LLM) => rerank(query, documents, model, db, intent, llm ?? store.llm), // Document retrieval findDocument: (filename: string, options?: { includeBody?: boolean }) => findDocument(db, filename, options), @@ -2820,7 +2897,7 @@ export function insertEmbedding( // Query expansion // ============================================================================= -export async function expandQuery(query: string, model: string = DEFAULT_QUERY_MODEL, db: Database, intent?: string, llmOverride?: LlamaCpp): Promise { +export async function expandQuery(query: string, model: string = DEFAULT_QUERY_MODEL, db: Database, intent?: string, llmOverride?: LLM): Promise { // Check cache first — stored as JSON preserving types const cacheKey = getCacheKey("expandQuery", { query, model, ...(intent && { intent }) }); const cached = getCachedResult(db, cacheKey); @@ -2859,7 +2936,7 @@ export async function expandQuery(query: string, model: string = DEFAULT_QUERY_M // Reranking // ============================================================================= -export async function rerank(query: string, documents: { file: string; text: string }[], model: string = DEFAULT_RERANK_MODEL, db: Database, intent?: string, llmOverride?: LlamaCpp): Promise<{ file: string; score: number }[]> { +export async function rerank(query: string, documents: { file: string; text: string }[], model: string = DEFAULT_RERANK_MODEL, db: Database, intent?: string, llmOverride?: LLM): Promise<{ file: string; score: number }[]> { // Prepend intent to rerank query so the reranker scores with domain context const rerankQuery = intent ? `${intent}\n\n${query}` : query; @@ -3530,6 +3607,7 @@ export interface HybridQueryOptions { intent?: string; // domain intent hint for disambiguation skipRerank?: boolean; // skip LLM reranking, use only RRF scores hooks?: SearchHooks; + llm?: LLM; // override LLM backend (default: local node-llama-cpp) } export interface HybridQueryResult { @@ -3604,7 +3682,7 @@ export async function hybridQuery( const expandStart = Date.now(); const expanded = hasStrongSignal ? [] - : await store.expandQuery(query, undefined, intent); + : await store.expandQuery(query, undefined, intent, options?.llm); hooks?.onExpand?.(query, expanded, Date.now() - expandStart); @@ -3651,21 +3729,26 @@ export async function hybridQuery( } // Batch embed all vector queries in a single call - const llm = getLlm(store); + const llm = options?.llm ?? getLlm(store); const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text)); hooks?.onEmbedStart?.(textsToEmbed.length); const embedStart = Date.now(); const embeddings = await llm.embedBatch(textsToEmbed); hooks?.onEmbedDone?.(Date.now() - embedStart); - // Run sqlite-vec lookups with pre-computed embeddings - for (let i = 0; i < vecQueries.length; i++) { - const embedding = embeddings[i]?.embedding; - if (!embedding) continue; + // Average all embeddings into one vector for a single scan + const validEmbeddings = embeddings.filter(e => e !== null).map(e => e!.embedding); + if (validEmbeddings.length > 0) { + const dim = validEmbeddings[0]!.length; + const avgEmbedding = new Array(dim).fill(0); + for (const emb of validEmbeddings) { + for (let j = 0; j < dim; j++) avgEmbedding[j] += emb[j]!; + } + for (let j = 0; j < dim; j++) avgEmbedding[j] /= validEmbeddings.length; const vecResults = await store.searchVec( - vecQueries[i]!.text, DEFAULT_EMBED_MODEL, 20, collection, - undefined, embedding + query, DEFAULT_EMBED_MODEL, 20, collection, + undefined, avgEmbedding ); if (vecResults.length > 0) { for (const r of vecResults) docidMap.set(r.filepath, r.docid); @@ -3777,7 +3860,7 @@ export async function hybridQuery( hooks?.onRerankStart?.(chunksToRerank.length); const rerankStart = Date.now(); - const reranked = await store.rerank(query, chunksToRerank, undefined, intent); + const reranked = await store.rerank(query, chunksToRerank, undefined, intent, options?.llm); hooks?.onRerankDone?.(Date.now() - rerankStart); // Step 7: Blend RRF position score with reranker score @@ -3850,6 +3933,7 @@ export interface VectorSearchOptions { minScore?: number; // default 0.3 intent?: string; // domain intent hint for disambiguation hooks?: Pick; + llm?: LLM; } export interface VectorSearchResult { @@ -3888,29 +3972,39 @@ export async function vectorSearchQuery( // Expand query — filter to vec/hyde only (lex queries target FTS, not vector) const expandStart = Date.now(); - const allExpanded = await store.expandQuery(query, undefined, intent); + const allExpanded = await store.expandQuery(query, undefined, intent, options?.llm); const vecExpanded = allExpanded.filter(q => q.type !== 'lex'); options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart); - // Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs + // Batch embed all query texts, then average into single embedding for one scan const queryTexts = [query, ...vecExpanded.map(q => q.query)]; + const llm = options?.llm ?? getLlm(store); + const textsToEmbed = queryTexts.map(q => formatQueryForEmbedding(q)); + const embeddings = await llm.embedBatch(textsToEmbed); + + // Average all embeddings into one vector + const validEmbeddings = embeddings.filter(e => e !== null).map(e => e!.embedding); + if (validEmbeddings.length === 0) return []; + const dim = validEmbeddings[0]!.length; + const avgEmbedding = new Array(dim).fill(0); + for (const emb of validEmbeddings) { + for (let i = 0; i < dim; i++) avgEmbedding[i] += emb[i]!; + } + for (let i = 0; i < dim; i++) avgEmbedding[i] /= validEmbeddings.length; + + // Single scan with averaged embedding + const vecResults = await store.searchVec(query, DEFAULT_EMBED_MODEL, limit, collection, undefined, avgEmbedding); const allResults = new Map(); - for (const q of queryTexts) { - const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit, collection); - for (const r of vecResults) { - const existing = allResults.get(r.filepath); - if (!existing || r.score > existing.score) { - allResults.set(r.filepath, { - file: r.filepath, - displayPath: r.displayPath, - title: r.title, - body: r.body || "", - score: r.score, - context: store.getContextForFile(r.filepath), - docid: r.docid, - }); - } - } + for (const r of vecResults) { + allResults.set(r.filepath, { + file: r.filepath, + displayPath: r.displayPath, + title: r.title, + body: r.body || "", + score: r.score, + context: store.getContextForFile(r.filepath), + docid: r.docid, + }); } return Array.from(allResults.values()) @@ -3938,6 +4032,7 @@ export interface StructuredSearchOptions { /** Skip LLM reranking, use only RRF scores */ skipRerank?: boolean; hooks?: SearchHooks; + llm?: LLM; } /** @@ -4032,7 +4127,7 @@ export async function structuredSearch( s.type === 'vec' || s.type === 'hyde' ); if (vecSearches.length > 0) { - const llm = getLlm(store); + const llm = options?.llm ?? getLlm(store); const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query)); hooks?.onEmbedStart?.(textsToEmbed.length); const embedStart = Date.now(); @@ -4167,7 +4262,7 @@ export async function structuredSearch( hooks?.onRerankStart?.(chunksToRerank.length); const rerankStart2 = Date.now(); - const reranked = await store.rerank(primaryQuery, chunksToRerank, undefined, intent); + const reranked = await store.rerank(primaryQuery, chunksToRerank, undefined, intent, options?.llm); hooks?.onRerankDone?.(Date.now() - rerankStart2); // Step 6: Blend RRF position score with reranker score