diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ace379a..6befdfbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- Per-collection `slugify` option (default: `true`). Set `slugify: false` in collection config to preserve original filenames with spaces and uppercase characters instead of converting to kebab-case during indexing + ### Fixes - Sync stale `bun.lock` (`better-sqlite3` 11.x → 12.x). CI and release diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts index 22b5561c..7dfa4b92 100755 --- a/src/cli/qmd.ts +++ b/src/cli/qmd.ts @@ -57,6 +57,7 @@ import { getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, + normalizeRelativePath, hybridQuery, vectorSearchQuery, structuredSearch, @@ -559,6 +560,7 @@ async function updateCollections(): Promise { const result = await reindexCollection(storeInstance, col.pwd, col.glob_pattern, col.name, { ignorePatterns: yamlCol?.ignore, + slugify: yamlCol?.slugify, onProgress: (info) => { progress.set((info.current / info.total) * 100); const elapsed = (Date.now() - startTime) / 1000; @@ -1410,7 +1412,7 @@ async function collectionAdd(pwd: string, globPattern: string, name?: string): P // Create the collection and index files console.log(`Creating collection '${collName}'...`); const newColl = getCollectionFromYaml(collName); - await indexFiles(pwd, globPattern, collName, false, newColl?.ignore); + await indexFiles(pwd, globPattern, collName, false, newColl?.ignore, newColl?.slugify); console.log(`${c.green}✓${c.reset} Collection '${collName}' created successfully`); } @@ -1463,7 +1465,7 @@ function collectionRename(oldName: string, newName: string): void { console.log(` Virtual paths updated: ${c.cyan}qmd://${oldName}/${c.reset} → ${c.cyan}qmd://${newName}/${c.reset}`); } -async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, collectionName?: string, suppressEmbedNotice: boolean = false, ignorePatterns?: string[]): Promise { +async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, collectionName?: string, suppressEmbedNotice: boolean = false, ignorePatterns?: string[], slugify?: boolean): Promise { const db = getDb(); const resolvedPwd = pwd || getPwd(); const now = new Date().toISOString(); @@ -1511,7 +1513,7 @@ async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, coll for (const relativeFile of files) { const filepath = getRealPath(resolve(resolvedPwd, relativeFile)); - const path = handelize(relativeFile); // Normalize path for token-friendliness + const path = (slugify ?? true) ? handelize(relativeFile) : normalizeRelativePath(relativeFile); seenPaths.add(path); let content: string; diff --git a/src/collections.ts b/src/collections.ts index 257f144f..3dec3551 100644 --- a/src/collections.ts +++ b/src/collections.ts @@ -31,6 +31,7 @@ export interface Collection { context?: ContextMap; // Optional context definitions update?: string; // Optional bash command to run during qmd update includeByDefault?: boolean; // Include in queries by default (default: true) + slugify?: boolean; // Slugify filenames to kebab-case during indexing (default: true) } /** diff --git a/src/index.ts b/src/index.ts index 22f3fa32..9ab7d7fb 100644 --- a/src/index.ts +++ b/src/index.ts @@ -480,6 +480,7 @@ export async function createStore(options: StoreOptions): Promise { for (const col of filtered) { const result = await reindexCollection(internal, col.path, col.pattern || "**/*.md", col.name, { ignorePatterns: col.ignore, + slugify: col.slugify, onProgress: updateOpts?.onProgress ? (info) => updateOpts.onProgress!({ collection: col.name, ...info }) : undefined, diff --git a/src/store.ts b/src/store.ts index f17404d8..85d1f1ae 100644 --- a/src/store.ts +++ b/src/store.ts @@ -306,6 +306,21 @@ function isWSL(): boolean { return !!(process.env.WSL_DISTRO_NAME || process.env.WSL_INTEROP); } +/** + * Normalize a relative file path for storage: forward-slash separators, + * no leading/trailing slashes, collapse runs of slashes. Preserves + * original filename casing, spaces, and special characters. + */ +export function normalizeRelativePath(path: string): string { + if (!path || path.trim() === '') { + throw new Error('normalizeRelativePath: path cannot be empty'); + } + return normalizePathSeparators(path) + .split('/') + .filter(Boolean) + .join('/'); +} + /** * Get the relative path from a prefix. * Returns null if path is not under prefix. @@ -1082,11 +1097,13 @@ export async function reindexCollection( collectionName: string, options?: { ignorePatterns?: string[]; + slugify?: boolean; onProgress?: (info: ReindexProgress) => void; } ): Promise { const db = store.db; const now = new Date().toISOString(); + const slugifyPaths = options?.slugify ?? true; const excludeDirs = ["node_modules", ".git", ".cache", "vendor", "dist", "build"]; const allIgnore = [ @@ -1112,7 +1129,7 @@ export async function reindexCollection( for (const relativeFile of files) { const filepath = getRealPath(resolve(collectionPath, relativeFile)); - const path = handelize(relativeFile); + const path = slugifyPaths ? handelize(relativeFile) : normalizeRelativePath(relativeFile); seenPaths.add(path); let content: string; diff --git a/test/cli.test.ts b/test/cli.test.ts index 7d6f5267..e09a0cef 100644 --- a/test/cli.test.ts +++ b/test/cli.test.ts @@ -836,7 +836,7 @@ describe("CLI ls Command", () => { test("lists files in a collection", async () => { const { stdout, exitCode } = await runQmd(["ls", "fixtures"], { dbPath: localDbPath }); expect(exitCode).toBe(0); - // handelize converts to lowercase + // Default slugify: true converts to lowercase expect(stdout).toContain("qmd://fixtures/readme.md"); expect(stdout).toContain("qmd://fixtures/notes/meeting.md"); }); @@ -846,7 +846,7 @@ describe("CLI ls Command", () => { expect(exitCode).toBe(0); expect(stdout).toContain("qmd://fixtures/notes/meeting.md"); expect(stdout).toContain("qmd://fixtures/notes/ideas.md"); - // Should not include files outside the prefix (handelize converts to lowercase) + // Default slugify: true converts to lowercase expect(stdout).not.toContain("qmd://fixtures/readme.md"); }); diff --git a/test/store.test.ts b/test/store.test.ts index c5755f81..790ff49c 100644 --- a/test/store.test.ts +++ b/test/store.test.ts @@ -9,7 +9,7 @@ import { describe, test, expect, beforeAll, afterAll, beforeEach, afterEach, vi } from "vitest"; import { openDatabase, loadSqliteVec } from "../src/db.js"; import type { Database } from "../src/db.js"; -import { unlink, mkdtemp, rmdir, writeFile } from "node:fs/promises"; +import { unlink, mkdtemp, rmdir, writeFile, mkdir } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; import YAML from "yaml"; @@ -39,6 +39,7 @@ import { extractSnippet, getCacheKey, handelize, + reindexCollection, normalizeVirtualPath, isVirtualPath, parseVirtualPath, @@ -1136,6 +1137,93 @@ describe("Collections", () => { }); }); +// ============================================================================= +// Reindex slugify option Tests +// ============================================================================= + +describe("reindexCollection slugify option", () => { + test("default (slugify: true) handleizes paths to kebab-case", async () => { + const store = await createTestStore(); + const collectionName = "music"; + + const collectionDir = join(testDir, "music-default"); + await mkdir(join(collectionDir, "bass-guitar"), { recursive: true }); + + await writeFile( + join(collectionDir, "bass-guitar", "BASS PLAYER Lesson 02.md"), + "# Bass Player Lesson 02\n\nLearn the basics of bass guitar." + ); + + await createTestCollection({ pwd: collectionDir, glob: "**/*.md", name: collectionName }); + await reindexCollection(store, collectionDir, "**/*.md", collectionName); + + const rows = store.db.prepare( + `SELECT path FROM documents WHERE collection = ? AND active = 1` + ).all(collectionName) as { path: string }[]; + + const paths = rows.map(r => r.path); + expect(paths).toContain("bass-guitar/bass-player-lesson-02.md"); + expect(paths).not.toContain("bass-guitar/BASS PLAYER Lesson 02.md"); + + await cleanupTestDb(store); + }); + + test("slugify: false preserves original filenames with spaces and uppercase", async () => { + const store = await createTestStore(); + const collectionName = "music"; + + const collectionDir = join(testDir, "music-preserve"); + await mkdir(join(collectionDir, "bass-guitar"), { recursive: true }); + + await writeFile( + join(collectionDir, "bass-guitar", "BASS PLAYER Lesson 02.md"), + "# Bass Player Lesson 02\n\nLearn the basics of bass guitar." + ); + await writeFile( + join(collectionDir, "bass-guitar", "simple-file.md"), + "# Simple File\n\nAlready lowercase no spaces." + ); + + await createTestCollection({ pwd: collectionDir, glob: "**/*.md", name: collectionName }); + await reindexCollection(store, collectionDir, "**/*.md", collectionName, { slugify: false }); + + const rows = store.db.prepare( + `SELECT path FROM documents WHERE collection = ? AND active = 1 ORDER BY path` + ).all(collectionName) as { path: string }[]; + + const paths = rows.map(r => r.path); + + expect(paths).toContain("bass-guitar/BASS PLAYER Lesson 02.md"); + expect(paths).toContain("bass-guitar/simple-file.md"); + expect(paths).not.toContain("bass-guitar/bass-player-lesson-02.md"); + + await cleanupTestDb(store); + }); + + test("slugify: false search results return original filenames", async () => { + const store = await createTestStore(); + const collectionName = "notes"; + + const collectionDir = join(testDir, "notes-preserve"); + await mkdir(collectionDir, { recursive: true }); + + await writeFile( + join(collectionDir, "My Meeting Notes.md"), + "# Meeting Notes\n\nDiscussed quarterly planning and budget reviews." + ); + + await createTestCollection({ pwd: collectionDir, glob: "**/*.md", name: collectionName }); + await reindexCollection(store, collectionDir, "**/*.md", collectionName, { slugify: false }); + + const results = store.searchFTS("quarterly planning", 5); + + expect(results.length).toBeGreaterThan(0); + expect(results[0]!.displayPath).toBe("notes/My Meeting Notes.md"); + + await cleanupTestDb(store); + }); +}); + // ============================================================================= // FTS Search Tests // =============================================================================