Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 50 additions & 25 deletions src/qmd.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1449,53 +1449,75 @@ async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, coll
const seenPaths = new Set<string>();
const startTime = Date.now();

let skipped = 0;

// Wrap all inserts/updates in a single transaction for performance
db.exec("BEGIN TRANSACTION");

for (const relativeFile of files) {
const filepath = getRealPath(resolve(resolvedPwd, relativeFile));
const path = handelize(relativeFile); // Normalize path for token-friendliness
seenPaths.add(path);

let content: string;
try {
content = readFileSync(filepath, "utf-8");
} catch (err: any) {
// Skip files that can't be read (e.g. iCloud evicted files returning EAGAIN)
processed++;
progress.set((processed / total) * 100);
continue;
}

// Skip empty files - nothing useful to index
if (!content.trim()) {
processed++;
continue;
}

const hash = await hashContent(content);
const title = extractTitle(content, relativeFile);

// Check if document exists in this collection with this path
const existing = findActiveDocument(db, collectionName, path);

if (existing) {
// Fast path: skip file read if mtime hasn't changed
const stat = statSync(filepath);
const fileMtime = stat ? new Date(stat.mtime).toISOString() : null;
if (fileMtime && existing.modified_at === fileMtime) {
unchanged++;
skipped++;
processed++;
progress.set((processed / total) * 100);
const elapsed = (Date.now() - startTime) / 1000;
const rate = processed / elapsed;
const remaining = (total - processed) / rate;
const eta = processed > 2 ? ` ETA: ${formatETA(remaining)}` : "";
process.stderr.write(`\rIndexing: ${processed}/${total}${eta} `);
continue;
}

// mtime changed - read file and check content
const content = readFileSync(filepath, "utf-8");
if (!content.trim()) {
processed++;
continue;
}

const hash = await hashContent(content);
const title = extractTitle(content, relativeFile);

if (existing.hash === hash) {
// Hash unchanged, but check if title needs updating
// Content unchanged despite mtime change - update mtime in DB
if (existing.title !== title) {
updateDocumentTitle(db, existing.id, title, now);
updated++;
} else {
unchanged++;
}
// Update modified_at to current mtime so future runs skip this file
if (fileMtime) {
updateDocument(db, existing.id, existing.title, existing.hash, fileMtime);
}
} else {
// Content changed - insert new content hash and update document
insertContent(db, hash, content, now);
const stat = statSync(filepath);
updateDocument(db, existing.id, title, hash,
stat ? new Date(stat.mtime).toISOString() : now);
updateDocument(db, existing.id, title, hash, fileMtime || now);
updated++;
}
} else {
// New document - insert content and document
// New document - must read file
const content = readFileSync(filepath, "utf-8");
if (!content.trim()) {
processed++;
continue;
}

indexed++;
const hash = await hashContent(content);
const title = extractTitle(content, relativeFile);
insertContent(db, hash, content, now);
const stat = statSync(filepath);
insertDocument(db, collectionName, path, title, hash,
Expand All @@ -1522,14 +1544,17 @@ async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, coll
}
}

// Commit the transaction
db.exec("COMMIT");

// Clean up orphaned content hashes (content not referenced by any document)
const orphanedContent = cleanupOrphanedContent(db);

// Check if vector index needs updating
const needsEmbedding = getHashesNeedingEmbedding(db);

progress.clear();
console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed`);
console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed${skipped > 0 ? ` (${skipped} skipped via mtime)` : ""}`);
if (orphanedContent > 0) {
console.log(`Cleaned up ${orphanedContent} orphaned content hash(es)`);
}
Expand Down
6 changes: 3 additions & 3 deletions src/store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1346,11 +1346,11 @@ export function findActiveDocument(
db: Database,
collectionName: string,
path: string
): { id: number; hash: string; title: string } | null {
): { id: number; hash: string; title: string; modified_at: string | null } | null {
const row = db.prepare(`
SELECT id, hash, title FROM documents
SELECT id, hash, title, modified_at FROM documents
WHERE collection = ? AND path = ? AND active = 1
`).get(collectionName, path) as { id: number; hash: string; title: string } | undefined;
`).get(collectionName, path) as { id: number; hash: string; title: string; modified_at: string | null } | undefined;
return row ?? null;
}

Expand Down