diff --git a/index.ts b/index.ts index 08aa5fc..70f7058 100644 --- a/index.ts +++ b/index.ts @@ -24,6 +24,12 @@ import { appendSelfImprovementEntry, ensureSelfImprovementLearningFiles } from " import type { MdMirrorWriter } from "./src/tools.js"; import { shouldSkipRetrieval } from "./src/adaptive-retrieval.js"; import { parseClawteamScopes, applyClawteamScopes } from "./src/clawteam-scope.js"; +import { + runCompaction, + shouldRunCompaction, + recordCompactionRun, + type CompactionConfig, +} from "./src/memory-compactor.js"; import { runWithReflectionTransientRetryOnce } from "./src/reflection-retry.js"; import { resolveReflectionSessionSearchDirs, stripResetSuffix } from "./src/session-recovery.js"; import { @@ -188,6 +194,14 @@ interface PluginConfig { mdMirror?: { enabled?: boolean; dir?: string }; workspaceBoundary?: WorkspaceBoundaryConfig; admissionControl?: AdmissionControlConfig; + memoryCompaction?: { + enabled?: boolean; + minAgeDays?: number; + similarityThreshold?: number; + minClusterSize?: number; + maxMemoriesToScan?: number; + cooldownHours?: number; + }; } type ReflectionThinkLevel = "off" | "minimal" | "low" | "medium" | "high"; @@ -1990,6 +2004,128 @@ const memoryLanceDBProPlugin = { } ); + // ======================================================================== + // Memory Compaction (Progressive Summarization) + // ======================================================================== + + if (config.enableManagementTools) { + api.registerTool({ + name: "memory_compact", + description: + "Consolidate semantically similar old memories into refined single entries " + + "(progressive summarization). Reduces noise and improves retrieval quality over time. " + + "Use dry_run:true first to preview the compaction plan without making changes.", + inputSchema: { + type: "object" as const, + properties: { + dry_run: { + type: "boolean", + description: "Preview clusters without writing changes. Default: false.", + }, + min_age_days: { + type: "number", + description: "Only compact memories at least this many days old. Default: 7.", + }, + similarity_threshold: { + type: "number", + description: "Cosine similarity threshold for clustering [0-1]. Default: 0.88.", + }, + scopes: { + type: "array", + items: { type: "string" }, + description: "Scope filter. Omit to compact all scopes.", + }, + }, + required: [], + }, + execute: async (args: Record) => { + const compactionCfg: CompactionConfig = { + enabled: true, + minAgeDays: + typeof args.min_age_days === "number" + ? args.min_age_days + : (config.memoryCompaction?.minAgeDays ?? 7), + similarityThreshold: + typeof args.similarity_threshold === "number" + ? Math.max(0, Math.min(1, args.similarity_threshold)) + : (config.memoryCompaction?.similarityThreshold ?? 0.88), + minClusterSize: config.memoryCompaction?.minClusterSize ?? 2, + maxMemoriesToScan: config.memoryCompaction?.maxMemoriesToScan ?? 200, + dryRun: args.dry_run === true, + cooldownHours: config.memoryCompaction?.cooldownHours ?? 24, + }; + const scopes = + Array.isArray(args.scopes) && args.scopes.length > 0 + ? (args.scopes as string[]) + : undefined; + + const result = await runCompaction( + store, + embedder, + compactionCfg, + scopes, + api.logger, + ); + + return { + content: [ + { + type: "text", + text: JSON.stringify( + { + scanned: result.scanned, + clustersFound: result.clustersFound, + memoriesDeleted: result.memoriesDeleted, + memoriesCreated: result.memoriesCreated, + dryRun: result.dryRun, + summary: result.dryRun + ? `Dry run: found ${result.clustersFound} cluster(s) in ${result.scanned} memories — no changes made.` + : `Compacted ${result.memoriesDeleted} memories into ${result.memoriesCreated} consolidated entries.`, + }, + null, + 2, + ), + }, + ], + }; + }, + }); + } + + // Auto-compaction at gateway_start (if enabled, respects cooldown) + if (config.memoryCompaction?.enabled) { + api.on("gateway_start", () => { + const compactionStateFile = join( + dirname(resolvedDbPath), + ".compaction-state.json", + ); + const compactionCfg: CompactionConfig = { + enabled: true, + minAgeDays: config.memoryCompaction!.minAgeDays ?? 7, + similarityThreshold: config.memoryCompaction!.similarityThreshold ?? 0.88, + minClusterSize: config.memoryCompaction!.minClusterSize ?? 2, + maxMemoriesToScan: config.memoryCompaction!.maxMemoriesToScan ?? 200, + dryRun: false, + cooldownHours: config.memoryCompaction!.cooldownHours ?? 24, + }; + + shouldRunCompaction(compactionStateFile, compactionCfg.cooldownHours) + .then(async (should) => { + if (!should) return; + await recordCompactionRun(compactionStateFile); + const result = await runCompaction(store, embedder, compactionCfg, undefined, api.logger); + if (result.clustersFound > 0) { + api.logger.info( + `memory-compactor [auto]: compacted ${result.memoriesDeleted} → ${result.memoriesCreated} entries`, + ); + } + }) + .catch((err) => { + api.logger.warn(`memory-compactor [auto]: failed: ${String(err)}`); + }); + }); + } + // ======================================================================== // Register CLI Commands // ======================================================================== @@ -3669,6 +3805,24 @@ export function parsePluginConfig(value: unknown): PluginConfig { } : undefined, admissionControl: normalizeAdmissionControlConfig(cfg.admissionControl), + memoryCompaction: (() => { + const raw = + typeof cfg.memoryCompaction === "object" && cfg.memoryCompaction !== null + ? (cfg.memoryCompaction as Record) + : null; + if (!raw) return undefined; + return { + enabled: raw.enabled === true, + minAgeDays: parsePositiveInt(raw.minAgeDays) ?? 7, + similarityThreshold: + typeof raw.similarityThreshold === "number" + ? Math.max(0, Math.min(1, raw.similarityThreshold)) + : 0.88, + minClusterSize: parsePositiveInt(raw.minClusterSize) ?? 2, + maxMemoriesToScan: parsePositiveInt(raw.maxMemoriesToScan) ?? 200, + cooldownHours: parsePositiveInt(raw.cooldownHours) ?? 24, + }; + })(), }; } diff --git a/openclaw.plugin.json b/openclaw.plugin.json index a2224f9..cf1f3e6 100644 --- a/openclaw.plugin.json +++ b/openclaw.plugin.json @@ -744,6 +744,49 @@ } } } + }, + "memoryCompaction": { + "type": "object", + "additionalProperties": false, + "description": "Progressive summarization: periodically consolidate semantically similar old memories into refined single entries, reducing noise and improving retrieval quality over time.", + "properties": { + "enabled": { + "type": "boolean", + "default": false, + "description": "Enable automatic compaction at gateway startup (respects cooldownHours)" + }, + "minAgeDays": { + "type": "integer", + "default": 7, + "minimum": 1, + "description": "Only compact memories at least this many days old" + }, + "similarityThreshold": { + "type": "number", + "default": 0.88, + "minimum": 0, + "maximum": 1, + "description": "Cosine similarity threshold for clustering. Higher = more conservative merges." + }, + "minClusterSize": { + "type": "integer", + "default": 2, + "minimum": 2, + "description": "Minimum cluster size required to trigger a merge" + }, + "maxMemoriesToScan": { + "type": "integer", + "default": 200, + "minimum": 1, + "description": "Maximum number of memories to scan per compaction run" + }, + "cooldownHours": { + "type": "integer", + "default": 24, + "minimum": 1, + "description": "Minimum hours between automatic compaction runs" + } + } } }, "required": [ @@ -1217,6 +1260,25 @@ "placeholder": "30000", "help": "Request timeout for the smart-extraction / upgrade LLM in milliseconds", "advanced": true + }, + "memoryCompaction.enabled": { + "label": "Auto Compaction", + "help": "Automatically consolidate similar old memories at gateway startup. Also available on-demand via the memory_compact tool (requires enableManagementTools)." + }, + "memoryCompaction.minAgeDays": { + "label": "Min Age (days)", + "help": "Memories younger than this are never touched by compaction", + "advanced": true + }, + "memoryCompaction.similarityThreshold": { + "label": "Similarity Threshold", + "help": "How similar two memories must be to merge (0–1). 0.88 is a good starting point; raise to 0.92+ for conservative merges.", + "advanced": true + }, + "memoryCompaction.cooldownHours": { + "label": "Cooldown (hours)", + "help": "Minimum gap between automatic compaction runs", + "advanced": true } } } diff --git a/src/memory-compactor.ts b/src/memory-compactor.ts new file mode 100644 index 0000000..1c0b1ea --- /dev/null +++ b/src/memory-compactor.ts @@ -0,0 +1,403 @@ +/** + * Memory Compactor — Progressive Summarization + * + * Identifies clusters of semantically similar memories older than a configured + * age threshold and merges each cluster into a single, higher-quality entry. + * + * Implements the "progressive summarization" pattern: memories get more refined + * over time as related fragments are consolidated, reducing noise and improving + * retrieval quality without requiring an external LLM call. + * + * Algorithm: + * 1. Load memories older than `minAgeDays` (with vectors). + * 2. Build similarity clusters using greedy cosine-similarity expansion. + * 3. For each cluster >= `minClusterSize`, merge into one entry: + * - text: deduplicated lines joined with newlines + * - importance: max of cluster members (never downgrade) + * - category: plurality vote + * - scope: shared scope (all members must share one) + * - metadata: marked { compacted: true, sourceCount: N } + * 4. Delete source entries, store merged entry. + */ + +import type { MemoryEntry } from "./store.js"; + +// ============================================================================ +// Types +// ============================================================================ + +export interface CompactionConfig { + /** Enable automatic compaction. Default: false */ + enabled: boolean; + /** Only compact memories at least this many days old. Default: 7 */ + minAgeDays: number; + /** Cosine similarity threshold for clustering [0, 1]. Default: 0.88 */ + similarityThreshold: number; + /** Minimum number of memories in a cluster to trigger merge. Default: 2 */ + minClusterSize: number; + /** Maximum memories to scan per compaction run. Default: 200 */ + maxMemoriesToScan: number; + /** Report plan without writing changes. Default: false */ + dryRun: boolean; + /** Run at most once per N hours (gateway_start guard). Default: 24 */ + cooldownHours: number; +} + +export interface CompactionEntry { + id: string; + text: string; + vector: number[]; + category: MemoryEntry["category"]; + scope: string; + importance: number; + timestamp: number; + metadata: string; +} + +export interface ClusterPlan { + /** Indices into the input entries array */ + memberIndices: number[]; + /** Proposed merged entry (without id/vector — computed by caller) */ + merged: { + text: string; + importance: number; + category: MemoryEntry["category"]; + scope: string; + metadata: string; + }; +} + +export interface CompactionResult { + /** Memories scanned (limited by maxMemoriesToScan) */ + scanned: number; + /** Clusters found with >= minClusterSize members */ + clustersFound: number; + /** Source memories deleted (0 when dryRun) */ + memoriesDeleted: number; + /** Merged memories created (0 when dryRun) */ + memoriesCreated: number; + /** Whether this was a dry run */ + dryRun: boolean; +} + +// ============================================================================ +// Math helpers +// ============================================================================ + +/** Dot product of two equal-length vectors. */ +function dot(a: number[], b: number[]): number { + let s = 0; + for (let i = 0; i < a.length; i++) s += a[i] * b[i]; + return s; +} + +/** L2 norm of a vector. */ +function norm(v: number[]): number { + return Math.sqrt(dot(v, v)); +} + +/** + * Cosine similarity in [0, 1]. + * Returns 0 if either vector has zero norm (avoids NaN). + */ +export function cosineSimilarity(a: number[], b: number[]): number { + if (a.length === 0 || a.length !== b.length) return 0; + const na = norm(a); + const nb = norm(b); + if (na === 0 || nb === 0) return 0; + return Math.max(0, Math.min(1, dot(a, b) / (na * nb))); +} + +// ============================================================================ +// Cluster building +// ============================================================================ + +/** + * Greedy cluster expansion. + * + * Sort entries by importance DESC so the most valuable memory seeds each + * cluster. Expand each seed by collecting every unassigned entry whose + * cosine similarity with the seed is >= threshold. + * + * Returns an array of index-arrays (each inner array = one cluster). + * Only clusters with >= minClusterSize entries are returned. + */ +export function buildClusters( + entries: CompactionEntry[], + threshold: number, + minClusterSize: number, +): ClusterPlan[] { + if (entries.length < minClusterSize) return []; + + // Sort indices by importance desc (highest importance seeds first) + const order = entries + .map((_, i) => i) + .sort((a, b) => entries[b].importance - entries[a].importance); + + const assigned = new Uint8Array(entries.length); // 0 = unassigned + const plans: ClusterPlan[] = []; + + for (const seedIdx of order) { + if (assigned[seedIdx]) continue; + + const cluster: number[] = [seedIdx]; + assigned[seedIdx] = 1; + + const seedVec = entries[seedIdx].vector; + if (seedVec.length === 0) continue; // skip entries without vectors + + for (let j = 0; j < entries.length; j++) { + if (assigned[j]) continue; + const jVec = entries[j].vector; + if (jVec.length === 0) continue; + if (cosineSimilarity(seedVec, jVec) >= threshold) { + cluster.push(j); + assigned[j] = 1; + } + } + + if (cluster.length >= minClusterSize) { + const members = cluster.map((i) => entries[i]); + plans.push({ + memberIndices: cluster, + merged: buildMergedEntry(members), + }); + } + } + + return plans; +} + +// ============================================================================ +// Merge strategy +// ============================================================================ + +/** + * Merge a cluster of entries into a single proposed entry. + * + * Text strategy: deduplicate lines across all member texts, join with newline. + * This preserves all unique information while removing redundancy. + * + * Importance: max across cluster (never downgrade). + * Category: plurality vote; ties broken by member with highest importance. + * Scope: all members must share a scope (validated upstream). + */ +export function buildMergedEntry( + members: CompactionEntry[], +): ClusterPlan["merged"] { + // --- text: deduplicate lines --- + const seen = new Set(); + const lines: string[] = []; + for (const m of members) { + for (const line of m.text.split("\n")) { + const trimmed = line.trim(); + if (trimmed && !seen.has(trimmed.toLowerCase())) { + seen.add(trimmed.toLowerCase()); + lines.push(trimmed); + } + } + } + const text = lines.join("\n"); + + // --- importance: max --- + const importance = Math.min( + 1.0, + Math.max(...members.map((m) => m.importance)), + ); + + // --- category: plurality vote --- + const counts = new Map(); + for (const m of members) { + counts.set(m.category, (counts.get(m.category) ?? 0) + 1); + } + let category: MemoryEntry["category"] = "other"; + let best = 0; + for (const [cat, count] of counts) { + if (count > best) { + best = count; + category = cat as MemoryEntry["category"]; + } + } + + // --- scope: use the first (all should match) --- + const scope = members[0].scope; + + // --- metadata --- + const metadata = JSON.stringify({ + compacted: true, + sourceCount: members.length, + compactedAt: Date.now(), + }); + + return { text, importance, category, scope, metadata }; +} + +// ============================================================================ +// Minimal store interface (duck-typed so no circular import) +// ============================================================================ + +export interface CompactorStore { + fetchForCompaction( + maxTimestamp: number, + scopeFilter?: string[], + limit?: number, + ): Promise; + store(entry: { + text: string; + vector: number[]; + importance: number; + category: MemoryEntry["category"]; + scope: string; + metadata?: string; + }): Promise; + delete(id: string, scopeFilter?: string[]): Promise; +} + +export interface CompactorEmbedder { + embedPassage(text: string): Promise; +} + +export interface CompactorLogger { + info(msg: string): void; + warn(msg: string): void; +} + +// ============================================================================ +// Main runner +// ============================================================================ + +/** + * Run a single compaction pass over memories in the given scopes. + * + * @param store Storage backend (must support fetchForCompaction + store + delete) + * @param embedder Used to embed merged text before storage + * @param config Compaction configuration + * @param scopes Scope filter; undefined = all scopes + * @param logger Optional logger + */ +export async function runCompaction( + store: CompactorStore, + embedder: CompactorEmbedder, + config: CompactionConfig, + scopes?: string[], + logger?: CompactorLogger, +): Promise { + const cutoff = Date.now() - config.minAgeDays * 24 * 60 * 60 * 1000; + + const entries = await store.fetchForCompaction( + cutoff, + scopes, + config.maxMemoriesToScan, + ); + + if (entries.length === 0) { + return { + scanned: 0, + clustersFound: 0, + memoriesDeleted: 0, + memoriesCreated: 0, + dryRun: config.dryRun, + }; + } + + // Filter out entries without vectors (shouldn't happen but be safe) + const valid = entries.filter((e) => e.vector && e.vector.length > 0); + + const plans = buildClusters( + valid, + config.similarityThreshold, + config.minClusterSize, + ); + + if (config.dryRun) { + logger?.info( + `memory-compactor [dry-run]: scanned=${valid.length} clusters=${plans.length}`, + ); + return { + scanned: valid.length, + clustersFound: plans.length, + memoriesDeleted: 0, + memoriesCreated: 0, + dryRun: true, + }; + } + + let memoriesDeleted = 0; + let memoriesCreated = 0; + + for (const plan of plans) { + const members = plan.memberIndices.map((i) => valid[i]); + + try { + // Embed the merged text + const vector = await embedder.embedPassage(plan.merged.text); + + // Store merged entry + await store.store({ + text: plan.merged.text, + vector, + importance: plan.merged.importance, + category: plan.merged.category, + scope: plan.merged.scope, + metadata: plan.merged.metadata, + }); + memoriesCreated++; + + // Delete source entries + for (const m of members) { + const deleted = await store.delete(m.id); + if (deleted) memoriesDeleted++; + } + } catch (err) { + logger?.warn( + `memory-compactor: failed to merge cluster of ${members.length}: ${String(err)}`, + ); + } + } + + logger?.info( + `memory-compactor: scanned=${valid.length} clusters=${plans.length} ` + + `deleted=${memoriesDeleted} created=${memoriesCreated}`, + ); + + return { + scanned: valid.length, + clustersFound: plans.length, + memoriesDeleted, + memoriesCreated, + dryRun: false, + }; +} + +// ============================================================================ +// Cooldown helper +// ============================================================================ + +/** + * Check whether enough time has passed since the last compaction run. + * Uses a simple JSON file at `stateFile` to persist the last-run timestamp. + */ +export async function shouldRunCompaction( + stateFile: string, + cooldownHours: number, +): Promise { + try { + const { readFile } = await import("node:fs/promises"); + const raw = await readFile(stateFile, "utf8"); + const state = JSON.parse(raw) as { lastRunAt?: number }; + if (typeof state.lastRunAt === "number") { + const elapsed = Date.now() - state.lastRunAt; + return elapsed >= cooldownHours * 60 * 60 * 1000; + } + } catch { + // File doesn't exist or is malformed — treat as never run + } + return true; +} + +export async function recordCompactionRun(stateFile: string): Promise { + const { writeFile, mkdir } = await import("node:fs/promises"); + const { dirname } = await import("node:path"); + await mkdir(dirname(stateFile), { recursive: true }); + await writeFile(stateFile, JSON.stringify({ lastRunAt: Date.now() }), "utf8"); +} diff --git a/src/store.ts b/src/store.ts index 2bcf14e..ce80034 100644 --- a/src/store.ts +++ b/src/store.ts @@ -1108,4 +1108,49 @@ export class MemoryStore { return { success: false, error: msg }; } } + + /** + * Fetch memories older than `maxTimestamp` including their raw vectors. + * Used exclusively by the memory compactor; vectors are intentionally + * omitted from `list()` for performance, but compaction needs them for + * cosine-similarity clustering. + */ + async fetchForCompaction( + maxTimestamp: number, + scopeFilter?: string[], + limit = 200, + ): Promise { + await this.ensureInitialized(); + + const conditions: string[] = [`timestamp < ${maxTimestamp}`]; + + if (scopeFilter && scopeFilter.length > 0) { + const scopeConditions = scopeFilter + .map((scope) => `scope = '${escapeSqlLiteral(scope)}'`) + .join(" OR "); + conditions.push(`((${scopeConditions}) OR scope IS NULL)`); + } + + const whereClause = conditions.join(" AND "); + + const results = await this.table! + .query() + .where(whereClause) + .toArray(); + + return results + .slice(0, limit) + .map( + (row): MemoryEntry => ({ + id: row.id as string, + text: row.text as string, + vector: Array.isArray(row.vector) ? (row.vector as number[]) : [], + category: row.category as MemoryEntry["category"], + scope: (row.scope as string | undefined) ?? "global", + importance: Number(row.importance), + timestamp: Number(row.timestamp), + metadata: (row.metadata as string) || "{}", + }), + ); + } } diff --git a/test/memory-compactor.test.mjs b/test/memory-compactor.test.mjs new file mode 100644 index 0000000..42b9a3a --- /dev/null +++ b/test/memory-compactor.test.mjs @@ -0,0 +1,292 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import jitiFactory from "jiti"; + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); + +const { + cosineSimilarity, + buildClusters, + buildMergedEntry, + runCompaction, +} = jiti("../src/memory-compactor.ts"); + +// ============================================================================ +// Helpers +// ============================================================================ + +function vec(dims, ...values) { + // Create a vector of `dims` dimensions, placing `values` at the first positions + const v = new Array(dims).fill(0); + values.forEach((val, i) => { v[i] = val; }); + return v; +} + +function entry(overrides = {}) { + return { + id: overrides.id ?? "id-" + Math.random().toString(36).slice(2), + text: overrides.text ?? "some memory", + vector: overrides.vector ?? vec(4, 1, 0, 0, 0), + category: overrides.category ?? "fact", + scope: overrides.scope ?? "global", + importance: overrides.importance ?? 0.5, + timestamp: overrides.timestamp ?? Date.now() - 8 * 24 * 60 * 60 * 1000, + metadata: overrides.metadata ?? "{}", + }; +} + +function makeStore(entries = []) { + const db = new Map(entries.map((e) => [e.id, { ...e }])); + return { + stored: [], + deleted: [], + async fetchForCompaction(_maxTs, _scopes, limit = 200) { + return [...db.values()].slice(0, limit); + }, + async store(e) { + const newEntry = { id: "merged-" + Math.random().toString(36).slice(2), ...e }; + db.set(newEntry.id, newEntry); + this.stored.push(newEntry); + return newEntry; + }, + async delete(id) { + if (db.has(id)) { db.delete(id); this.deleted.push(id); return true; } + return false; + }, + }; +} + +function makeEmbedder(dim = 4) { + return { + async embedPassage(text) { + // Deterministic fake embedding: hash first char into first dimension + const v = new Array(dim).fill(0); + v[0] = (text.charCodeAt(0) % 10) / 10; + return v; + }, + }; +} + +const defaultConfig = { + enabled: true, + minAgeDays: 7, + similarityThreshold: 0.88, + minClusterSize: 2, + maxMemoriesToScan: 200, + dryRun: false, + cooldownHours: 24, +}; + +// ============================================================================ +// cosineSimilarity +// ============================================================================ + +describe("cosineSimilarity", () => { + it("returns 1.0 for identical vectors", () => { + const v = vec(4, 1, 2, 3, 4); + assert.equal(cosineSimilarity(v, v), 1.0); + }); + + it("returns 0 for orthogonal vectors", () => { + assert.equal(cosineSimilarity(vec(4, 1, 0, 0, 0), vec(4, 0, 1, 0, 0)), 0); + }); + + it("returns ~0.71 for 45-degree vectors", () => { + const sim = cosineSimilarity(vec(2, 1, 0), vec(2, 1, 1)); + assert.ok(sim > 0.7 && sim < 0.72, `expected ~0.71, got ${sim}`); + }); + + it("returns 0 for zero-norm vector without NaN", () => { + assert.equal(cosineSimilarity(vec(4), vec(4, 1, 0, 0, 0)), 0); + }); + + it("returns 0 for mismatched dimensions", () => { + assert.equal(cosineSimilarity([1, 0], [1, 0, 0]), 0); + }); + + it("clamps result to [0, 1]", () => { + // Floating point can produce tiny values outside [0,1] + const v = vec(4, 0.9999999, 0.0000001, 0, 0); + const sim = cosineSimilarity(v, v); + assert.ok(sim >= 0 && sim <= 1); + }); +}); + +// ============================================================================ +// buildClusters +// ============================================================================ + +describe("buildClusters", () => { + it("returns empty array when fewer entries than minClusterSize", () => { + const e = entry({ vector: vec(4, 1, 0, 0, 0) }); + const result = buildClusters([e], 0.9, 2); + assert.deepEqual(result, []); + }); + + it("clusters two very similar entries", () => { + const a = entry({ vector: vec(4, 1, 0.01, 0, 0), importance: 0.8 }); + const b = entry({ vector: vec(4, 1, 0.02, 0, 0), importance: 0.5 }); + const clusters = buildClusters([a, b], 0.88, 2); + assert.equal(clusters.length, 1); + assert.equal(clusters[0].memberIndices.length, 2); + }); + + it("does not cluster orthogonal entries", () => { + const a = entry({ vector: vec(4, 1, 0, 0, 0) }); + const b = entry({ vector: vec(4, 0, 1, 0, 0) }); + const clusters = buildClusters([a, b], 0.88, 2); + assert.equal(clusters.length, 0); + }); + + it("seeds cluster with highest-importance entry", () => { + const lo = entry({ vector: vec(4, 1, 0, 0, 0), importance: 0.3 }); + const hi = entry({ vector: vec(4, 1, 0.01, 0, 0), importance: 0.9 }); + const clusters = buildClusters([lo, hi], 0.88, 2); + assert.equal(clusters.length, 1); + // The merged importance should reflect the hi entry + assert.equal(clusters[0].merged.importance, 0.9); + }); + + it("produces two separate clusters for two disjoint similar pairs", () => { + const a1 = entry({ vector: vec(4, 1, 0.01, 0, 0), importance: 0.9 }); + const a2 = entry({ vector: vec(4, 1, 0.02, 0, 0), importance: 0.6 }); + const b1 = entry({ vector: vec(4, 0, 0, 1, 0.01), importance: 0.8 }); + const b2 = entry({ vector: vec(4, 0, 0, 1, 0.02), importance: 0.5 }); + const clusters = buildClusters([a1, a2, b1, b2], 0.88, 2); + assert.equal(clusters.length, 2); + }); + + it("skips entries with empty vectors", () => { + const a = entry({ vector: [], importance: 0.9 }); + const b = entry({ vector: [], importance: 0.5 }); + const clusters = buildClusters([a, b], 0.5, 2); + assert.equal(clusters.length, 0); + }); +}); + +// ============================================================================ +// buildMergedEntry +// ============================================================================ + +describe("buildMergedEntry", () => { + it("deduplicates identical lines across members", () => { + const a = entry({ text: "learned TypeScript\nuses vim" }); + const b = entry({ text: "learned TypeScript\nprefers dark mode" }); + const merged = buildMergedEntry([a, b]); + const lines = merged.text.split("\n"); + const tsLines = lines.filter((l) => l.includes("TypeScript")); + assert.equal(tsLines.length, 1, "duplicate line should appear once"); + }); + + it("preserves unique lines from all members", () => { + const a = entry({ text: "uses vim" }); + const b = entry({ text: "prefers dark mode" }); + const merged = buildMergedEntry([a, b]); + assert.ok(merged.text.includes("uses vim")); + assert.ok(merged.text.includes("prefers dark mode")); + }); + + it("takes max importance", () => { + const a = entry({ importance: 0.4 }); + const b = entry({ importance: 0.9 }); + const c = entry({ importance: 0.6 }); + const merged = buildMergedEntry([a, b, c]); + assert.equal(merged.importance, 0.9); + }); + + it("caps importance at 1.0", () => { + const a = entry({ importance: 1.0 }); + const b = entry({ importance: 1.0 }); + const merged = buildMergedEntry([a, b]); + assert.ok(merged.importance <= 1.0); + }); + + it("uses plurality category", () => { + const a = entry({ category: "preference" }); + const b = entry({ category: "fact" }); + const c = entry({ category: "fact" }); + const merged = buildMergedEntry([a, b, c]); + assert.equal(merged.category, "fact"); + }); + + it("marks metadata as compacted with sourceCount", () => { + const members = [entry(), entry(), entry()]; + const merged = buildMergedEntry(members); + const meta = JSON.parse(merged.metadata); + assert.equal(meta.compacted, true); + assert.equal(meta.sourceCount, 3); + assert.ok(typeof meta.compactedAt === "number"); + }); +}); + +// ============================================================================ +// runCompaction +// ============================================================================ + +describe("runCompaction", () => { + it("merges a similar pair and reports correct counts", async () => { + const a = entry({ text: "pref: dark mode", vector: vec(4, 1, 0.01, 0, 0), importance: 0.7 }); + const b = entry({ text: "pref: always dark theme", vector: vec(4, 1, 0.02, 0, 0), importance: 0.5 }); + const store = makeStore([a, b]); + const embedder = makeEmbedder(4); + + const result = await runCompaction(store, embedder, defaultConfig); + + assert.equal(result.clustersFound, 1); + assert.equal(result.memoriesDeleted, 2); + assert.equal(result.memoriesCreated, 1); + assert.equal(result.dryRun, false); + assert.equal(store.stored.length, 1); + assert.equal(store.deleted.length, 2); + }); + + it("dry-run does not write anything", async () => { + const a = entry({ vector: vec(4, 1, 0.01, 0, 0) }); + const b = entry({ vector: vec(4, 1, 0.02, 0, 0) }); + const store = makeStore([a, b]); + + const result = await runCompaction(store, makeEmbedder(), { + ...defaultConfig, + dryRun: true, + }); + + assert.equal(result.dryRun, true); + assert.equal(result.memoriesDeleted, 0); + assert.equal(result.memoriesCreated, 0); + assert.equal(store.stored.length, 0); + assert.equal(store.deleted.length, 0); + assert.equal(result.clustersFound, 1); + }); + + it("returns zero counts when no entries are available", async () => { + const store = makeStore([]); + const result = await runCompaction(store, makeEmbedder(), defaultConfig); + assert.equal(result.scanned, 0); + assert.equal(result.clustersFound, 0); + }); + + it("skips singleton clusters (no merge when similarity below threshold)", async () => { + const a = entry({ vector: vec(4, 1, 0, 0, 0) }); + const b = entry({ vector: vec(4, 0, 1, 0, 0) }); // orthogonal + const store = makeStore([a, b]); + + const result = await runCompaction(store, makeEmbedder(), defaultConfig); + + assert.equal(result.clustersFound, 0); + assert.equal(result.memoriesDeleted, 0); + }); + + it("respects maxMemoriesToScan limit", async () => { + const entries = Array.from({ length: 10 }, (_, i) => + entry({ vector: vec(4, 1, i * 0.001, 0, 0) }) + ); + const store = makeStore(entries); + + const result = await runCompaction(store, makeEmbedder(), { + ...defaultConfig, + maxMemoriesToScan: 3, + }); + + assert.ok(result.scanned <= 3); + }); +});