diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts index 22b5561c..bde0f4c5 100755 --- a/src/cli/qmd.ts +++ b/src/cli/qmd.ts @@ -69,6 +69,7 @@ import { DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, + getExcludeDirs, createStore, getDefaultDbPath, reindexCollection, @@ -1467,7 +1468,6 @@ async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, coll const db = getDb(); const resolvedPwd = pwd || getPwd(); const now = new Date().toISOString(); - const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"]; // Clear Ollama cache on index clearCache(db); @@ -1481,7 +1481,7 @@ async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, coll progress.indeterminate(); const allIgnore = [ - ...excludeDirs.map(d => `**/${d}/**`), + ...getExcludeDirs().map(d => `**/${d}/**`), ...(ignorePatterns || []), ]; const allFiles: string[] = await fastGlob(globPattern, { diff --git a/src/store.ts b/src/store.ts index f17404d8..9b7e1194 100644 --- a/src/store.ts +++ b/src/store.ts @@ -47,6 +47,13 @@ export const DEFAULT_MULTI_GET_MAX_BYTES = 10 * 1024; // 10KB export const DEFAULT_EMBED_MAX_DOCS_PER_BATCH = 64; export const DEFAULT_EMBED_MAX_BATCH_BYTES = 64 * 1024 * 1024; // 64MB +const BASE_EXCLUDE_DIRS = ["node_modules", ".git", ".cache", "vendor", "dist", "build"]; + +export function getExcludeDirs(): string[] { + const extra = process.env.QMD_EXCLUDE_DIRS?.split(',').map(d => d.trim()).filter(Boolean) ?? []; + return [...BASE_EXCLUDE_DIRS, ...extra]; +} + // Chunking: 900 tokens per chunk with 15% overlap // Increased from 800 to accommodate smart chunking finding natural break points export const CHUNK_SIZE_TOKENS = 900; @@ -1087,10 +1094,9 @@ export async function reindexCollection( ): Promise { const db = store.db; const now = new Date().toISOString(); - const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"]; const allIgnore = [ - ...excludeDirs.map(d => `**/${d}/**`), + ...getExcludeDirs().map(d => `**/${d}/**`), ...(options?.ignorePatterns || []), ]; const allFiles: string[] = await fastGlob(globPattern, { diff --git a/test/store.helpers.unit.test.ts b/test/store.helpers.unit.test.ts index eb7f8a63..7d46b66d 100644 --- a/test/store.helpers.unit.test.ts +++ b/test/store.helpers.unit.test.ts @@ -16,6 +16,7 @@ import { isDocid, handelize, cleanupOrphanedVectors, + getExcludeDirs, } from "../src/store"; // ============================================================================= @@ -244,3 +245,56 @@ describe("handelize", () => { expect(isDocid("12345")).toBe(false); }); }); + +// ============================================================================= +// getExcludeDirs Tests +// ============================================================================= + +describe("getExcludeDirs", () => { + test("returns default exclude dirs when env var is unset", () => { + const original = process.env.QMD_EXCLUDE_DIRS; + delete process.env.QMD_EXCLUDE_DIRS; + + const dirs = getExcludeDirs(); + expect(dirs).toContain("node_modules"); + expect(dirs).toContain(".git"); + expect(dirs).toContain("vendor"); + expect(dirs).toContain("dist"); + expect(dirs).toContain("build"); + expect(dirs).toContain(".cache"); + + if (original) process.env.QMD_EXCLUDE_DIRS = original; + }); + + test("appends custom dirs from QMD_EXCLUDE_DIRS", () => { + const original = process.env.QMD_EXCLUDE_DIRS; + process.env.QMD_EXCLUDE_DIRS = ".obsidian,tmp"; + + const dirs = getExcludeDirs(); + expect(dirs).toContain("node_modules"); + expect(dirs).toContain(".obsidian"); + expect(dirs).toContain("tmp"); + + if (original) { + process.env.QMD_EXCLUDE_DIRS = original; + } else { + delete process.env.QMD_EXCLUDE_DIRS; + } + }); + + test("handles whitespace in QMD_EXCLUDE_DIRS", () => { + const original = process.env.QMD_EXCLUDE_DIRS; + process.env.QMD_EXCLUDE_DIRS = " .obsidian , tmp , "; + + const dirs = getExcludeDirs(); + expect(dirs).toContain(".obsidian"); + expect(dirs).toContain("tmp"); + expect(dirs).not.toContain(""); + + if (original) { + process.env.QMD_EXCLUDE_DIRS = original; + } else { + delete process.env.QMD_EXCLUDE_DIRS; + } + }); +});