Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 62 additions & 9 deletions src/cli/qmd.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,21 @@ import {
syncConfigToDb,
type ReindexResult,
} from "../store.js";
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
import {
disposeDefaultLlamaCpp,
getDefaultLlamaCpp,
withLLMSession,
pullModels,
DEFAULT_EMBED_MODEL_URI,
DEFAULT_GENERATE_MODEL_URI,
DEFAULT_RERANK_MODEL_URI,
DEFAULT_MODEL_CACHE_DIR,
setEmbedProvider,
resolveEmbedProvider,
getActiveEmbedModel,
getConfiguredEmbedDimensions,
getGoogleApiKey,
} from "../llm.js";
import {
formatSearchResults,
formatDocuments,
Expand Down Expand Up @@ -417,10 +431,20 @@ async function showStatus(): Promise<void> {
const match = uri.match(/^hf:([^/]+\/[^/]+)\//);
return match ? `https://huggingface.co/${match[1]}` : uri;
};
const provider = await resolveEmbedProvider();
const configuredDims = getConfiguredEmbedDimensions();
const embeddingModel = provider === "google"
? "Google Gemini Embedding 2"
: hfLink(DEFAULT_EMBED_MODEL_URI);
console.log(`\n${c.bold}Models${c.reset}`);
console.log(` Embedding: ${hfLink(DEFAULT_EMBED_MODEL_URI)}`);
console.log(` Provider: ${provider}${provider === "google" && configuredDims ? ` (${configuredDims}d)` : ""}`);
console.log(` Embedding: ${embeddingModel}`);
console.log(` Reranking: ${hfLink(DEFAULT_RERANK_MODEL_URI)}`);
console.log(` Generation: ${hfLink(DEFAULT_GENERATE_MODEL_URI)}`);
if (provider === "google") {
const keyStatus = getGoogleApiKey() ? "set" : "missing";
console.log(` GEMINI_API_KEY: ${keyStatus}`);
}
}

// Device / GPU info
Expand Down Expand Up @@ -1546,7 +1570,7 @@ async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, coll
// Content changed - insert new content hash and update document
insertContent(db, hash, content, now);
const stat = statSync(filepath);
updateDocument(db, existing.id, title, hash,
updateDocument(db, existing.id, title, hash, "text",
stat ? new Date(stat.mtime).toISOString() : now);
updated++;
}
Expand All @@ -1555,7 +1579,7 @@ async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, coll
indexed++;
insertContent(db, hash, content, now);
const stat = statSync(filepath);
insertDocument(db, collectionName, path, title, hash,
insertDocument(db, collectionName, path, title, hash, "text",
stat ? new Date(stat.birthtime).toISOString() : now,
stat ? new Date(stat.mtime).toISOString() : now);
}
Expand Down Expand Up @@ -1605,9 +1629,11 @@ function renderProgressBar(percent: number, width: number = 30): string {
return bar;
}

async function vectorIndex(model: string = DEFAULT_EMBED_MODEL, force: boolean = false): Promise<void> {
async function vectorIndex(model?: string, force: boolean = false): Promise<void> {
const storeInstance = getStore();
const db = storeInstance.db;
const provider = await resolveEmbedProvider();
const activeModel = model ?? await getActiveEmbedModel();

if (force) {
console.log(`${c.yellow}Force re-indexing: clearing all vectors...${c.reset}`);
Expand All @@ -1621,15 +1647,18 @@ async function vectorIndex(model: string = DEFAULT_EMBED_MODEL, force: boolean =
return;
}

console.log(`${c.dim}Model: ${model}${c.reset}\n`);
const dimensions = getConfiguredEmbedDimensions();
const dimInfo = provider === "google" && dimensions ? ` (${dimensions}d)` : "";
console.log(`${c.dim}Provider: ${provider}${dimInfo}${c.reset}`);
console.log(`${c.dim}Model: ${activeModel}${c.reset}\n`);
cursor.hide();
progress.indeterminate();

const startTime = Date.now();

const result = await generateEmbeddings(storeInstance, {
force,
model,
model: activeModel,
onProgress: (info) => {
if (info.totalBytes === 0) return;
const percent = (info.bytesProcessed / info.totalBytes) * 100;
Expand Down Expand Up @@ -2330,6 +2359,8 @@ function parseCLI() {
mask: { type: "string" }, // glob pattern
// Embed options
force: { type: "boolean", short: "f" },
provider: { type: "string" },
dimensions: { type: "string" },
// Update options
pull: { type: "boolean" }, // git pull before update
refresh: { type: "boolean" },
Expand Down Expand Up @@ -2357,6 +2388,23 @@ function parseCLI() {
setConfigIndexName(indexName);
}

const providerValue = typeof values.provider === "string" ? values.provider.toLowerCase() : undefined;
if (providerValue === "google" || providerValue === "local") {
setEmbedProvider(providerValue);
} else if (providerValue != null) {
console.error(`Invalid --provider value '${providerValue}'. Use 'local' or 'google'.`);
process.exit(1);
}

if (typeof values.dimensions === "string") {
const dimRaw = values.dimensions.trim();
if (!["768", "1536", "3072"].includes(dimRaw)) {
console.error(`Invalid --dimensions value '${values.dimensions}'. Use 768, 1536, or 3072.`);
process.exit(1);
}
process.env.QMD_EMBED_DIMENSIONS = dimRaw;
}

// Determine output format
let format: OutputFormat = "cli";
if (values.csv) format = "csv";
Expand Down Expand Up @@ -2433,7 +2481,7 @@ function showHelp(): void {
console.log("Maintenance:");
console.log(" qmd status - View index + collection health");
console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)");
console.log(" qmd embed [-f] - Generate/refresh vector embeddings");
console.log(" qmd embed [-f] [--provider local|google] [--dimensions 768|1536|3072] - Generate/refresh vector embeddings");
console.log(" qmd cleanup - Clear caches, vacuum DB");
console.log("");
console.log("Query syntax (qmd query):");
Expand Down Expand Up @@ -2494,6 +2542,11 @@ function showHelp(): void {
console.log(" --max-bytes <num> - Skip files larger than N bytes (default 10240)");
console.log(" --json/--csv/--md/--xml/--files - Same formats as search");
console.log("");
console.log("Embed options:");
console.log(" -f, --force - Force full re-embedding");
console.log(" --provider <local|google> - Embedding backend (or QMD_EMBED_PROVIDER)");
console.log(" --dimensions <n> - Gemini output dimensions (768/1536/3072)");
console.log("");
console.log(`Index: ${getDbPath()}`);
}

Expand Down Expand Up @@ -2794,7 +2847,7 @@ if (isMain) {
break;

case "embed":
await vectorIndex(DEFAULT_EMBED_MODEL, !!cli.values.force);
await vectorIndex(undefined, !!cli.values.force);
break;

case "pull": {
Expand Down
4 changes: 3 additions & 1 deletion src/collections.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ export interface NamedCollection extends Collection {
name: string;
}

export const DEFAULT_COLLECTION_PATTERN = "**/*.md";

// ============================================================================
// Configuration paths
// ============================================================================
Expand Down Expand Up @@ -269,7 +271,7 @@ export function updateCollectionSettings(
export function addCollection(
name: string,
path: string,
pattern: string = "**/*.md"
pattern: string = DEFAULT_COLLECTION_PATTERN
): void {
const config = loadConfig();

Expand Down
Loading