From 5a5b0d2786a4467901d86156e0f179fe7410055c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AE=89=E9=97=B2=E9=9D=99=E9=9B=85?= Date: Wed, 25 Mar 2026 14:19:51 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20memory=20compaction=20=E2=80=94=20progr?= =?UTF-8?q?essive=20summarization=20for=20stored=20memories?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a `MemoryCompactor` that periodically consolidates semantically similar old memories into single refined entries, inspired by the progressive summarization pattern (MemOS). Over time, related memory fragments are merged rather than accumulated, reducing retrieval noise and keeping the LanceDB index lean. Key additions: - `src/memory-compactor.ts`: pure, dependency-free compaction module with cosine-similarity clustering, greedy seed expansion, and rule-based merge (dedup lines, max importance, plurality category) - `store.ts`: new `fetchForCompaction()` method that fetches old entries with vectors (intentionally omitted from `list()` for performance) - `index.ts`: `memory_compact` management tool (requires `enableManagementTools: true`) + optional auto-compaction at `gateway_start` with configurable cooldown - `openclaw.plugin.json`: `memoryCompaction` config schema + uiHints - `test/memory-compactor.test.mjs`: 23 tests, 100% pass Config example: memoryCompaction: enabled: true # auto-run at gateway_start minAgeDays: 7 # only touch memories ≥ 7 days old similarityThreshold: 0.88 cooldownHours: 24 Co-Authored-By: Claude Sonnet 4.6 --- index.ts | 154 +++++++++++++ openclaw.plugin.json | 62 +++++ src/memory-compactor.ts | 403 +++++++++++++++++++++++++++++++++ src/store.ts | 45 ++++ test/memory-compactor.test.mjs | 292 ++++++++++++++++++++++++ 5 files changed, 956 insertions(+) create mode 100644 src/memory-compactor.ts create mode 100644 test/memory-compactor.test.mjs diff --git a/index.ts b/index.ts index 08aa5fc..70f7058 100644 --- a/index.ts +++ b/index.ts @@ -24,6 +24,12 @@ import { appendSelfImprovementEntry, ensureSelfImprovementLearningFiles } from " import type { MdMirrorWriter } from "./src/tools.js"; import { shouldSkipRetrieval } from "./src/adaptive-retrieval.js"; import { parseClawteamScopes, applyClawteamScopes } from "./src/clawteam-scope.js"; +import { + runCompaction, + shouldRunCompaction, + recordCompactionRun, + type CompactionConfig, +} from "./src/memory-compactor.js"; import { runWithReflectionTransientRetryOnce } from "./src/reflection-retry.js"; import { resolveReflectionSessionSearchDirs, stripResetSuffix } from "./src/session-recovery.js"; import { @@ -188,6 +194,14 @@ interface PluginConfig { mdMirror?: { enabled?: boolean; dir?: string }; workspaceBoundary?: WorkspaceBoundaryConfig; admissionControl?: AdmissionControlConfig; + memoryCompaction?: { + enabled?: boolean; + minAgeDays?: number; + similarityThreshold?: number; + minClusterSize?: number; + maxMemoriesToScan?: number; + cooldownHours?: number; + }; } type ReflectionThinkLevel = "off" | "minimal" | "low" | "medium" | "high"; @@ -1990,6 +2004,128 @@ const memoryLanceDBProPlugin = { } ); + // ======================================================================== + // Memory Compaction (Progressive Summarization) + // ======================================================================== + + if (config.enableManagementTools) { + api.registerTool({ + name: "memory_compact", + description: + "Consolidate semantically similar old memories into refined single entries " + + "(progressive summarization). Reduces noise and improves retrieval quality over time. " + + "Use dry_run:true first to preview the compaction plan without making changes.", + inputSchema: { + type: "object" as const, + properties: { + dry_run: { + type: "boolean", + description: "Preview clusters without writing changes. Default: false.", + }, + min_age_days: { + type: "number", + description: "Only compact memories at least this many days old. Default: 7.", + }, + similarity_threshold: { + type: "number", + description: "Cosine similarity threshold for clustering [0-1]. Default: 0.88.", + }, + scopes: { + type: "array", + items: { type: "string" }, + description: "Scope filter. Omit to compact all scopes.", + }, + }, + required: [], + }, + execute: async (args: Record) => { + const compactionCfg: CompactionConfig = { + enabled: true, + minAgeDays: + typeof args.min_age_days === "number" + ? args.min_age_days + : (config.memoryCompaction?.minAgeDays ?? 7), + similarityThreshold: + typeof args.similarity_threshold === "number" + ? Math.max(0, Math.min(1, args.similarity_threshold)) + : (config.memoryCompaction?.similarityThreshold ?? 0.88), + minClusterSize: config.memoryCompaction?.minClusterSize ?? 2, + maxMemoriesToScan: config.memoryCompaction?.maxMemoriesToScan ?? 200, + dryRun: args.dry_run === true, + cooldownHours: config.memoryCompaction?.cooldownHours ?? 24, + }; + const scopes = + Array.isArray(args.scopes) && args.scopes.length > 0 + ? (args.scopes as string[]) + : undefined; + + const result = await runCompaction( + store, + embedder, + compactionCfg, + scopes, + api.logger, + ); + + return { + content: [ + { + type: "text", + text: JSON.stringify( + { + scanned: result.scanned, + clustersFound: result.clustersFound, + memoriesDeleted: result.memoriesDeleted, + memoriesCreated: result.memoriesCreated, + dryRun: result.dryRun, + summary: result.dryRun + ? `Dry run: found ${result.clustersFound} cluster(s) in ${result.scanned} memories — no changes made.` + : `Compacted ${result.memoriesDeleted} memories into ${result.memoriesCreated} consolidated entries.`, + }, + null, + 2, + ), + }, + ], + }; + }, + }); + } + + // Auto-compaction at gateway_start (if enabled, respects cooldown) + if (config.memoryCompaction?.enabled) { + api.on("gateway_start", () => { + const compactionStateFile = join( + dirname(resolvedDbPath), + ".compaction-state.json", + ); + const compactionCfg: CompactionConfig = { + enabled: true, + minAgeDays: config.memoryCompaction!.minAgeDays ?? 7, + similarityThreshold: config.memoryCompaction!.similarityThreshold ?? 0.88, + minClusterSize: config.memoryCompaction!.minClusterSize ?? 2, + maxMemoriesToScan: config.memoryCompaction!.maxMemoriesToScan ?? 200, + dryRun: false, + cooldownHours: config.memoryCompaction!.cooldownHours ?? 24, + }; + + shouldRunCompaction(compactionStateFile, compactionCfg.cooldownHours) + .then(async (should) => { + if (!should) return; + await recordCompactionRun(compactionStateFile); + const result = await runCompaction(store, embedder, compactionCfg, undefined, api.logger); + if (result.clustersFound > 0) { + api.logger.info( + `memory-compactor [auto]: compacted ${result.memoriesDeleted} → ${result.memoriesCreated} entries`, + ); + } + }) + .catch((err) => { + api.logger.warn(`memory-compactor [auto]: failed: ${String(err)}`); + }); + }); + } + // ======================================================================== // Register CLI Commands // ======================================================================== @@ -3669,6 +3805,24 @@ export function parsePluginConfig(value: unknown): PluginConfig { } : undefined, admissionControl: normalizeAdmissionControlConfig(cfg.admissionControl), + memoryCompaction: (() => { + const raw = + typeof cfg.memoryCompaction === "object" && cfg.memoryCompaction !== null + ? (cfg.memoryCompaction as Record) + : null; + if (!raw) return undefined; + return { + enabled: raw.enabled === true, + minAgeDays: parsePositiveInt(raw.minAgeDays) ?? 7, + similarityThreshold: + typeof raw.similarityThreshold === "number" + ? Math.max(0, Math.min(1, raw.similarityThreshold)) + : 0.88, + minClusterSize: parsePositiveInt(raw.minClusterSize) ?? 2, + maxMemoriesToScan: parsePositiveInt(raw.maxMemoriesToScan) ?? 200, + cooldownHours: parsePositiveInt(raw.cooldownHours) ?? 24, + }; + })(), }; } diff --git a/openclaw.plugin.json b/openclaw.plugin.json index a2224f9..cf1f3e6 100644 --- a/openclaw.plugin.json +++ b/openclaw.plugin.json @@ -744,6 +744,49 @@ } } } + }, + "memoryCompaction": { + "type": "object", + "additionalProperties": false, + "description": "Progressive summarization: periodically consolidate semantically similar old memories into refined single entries, reducing noise and improving retrieval quality over time.", + "properties": { + "enabled": { + "type": "boolean", + "default": false, + "description": "Enable automatic compaction at gateway startup (respects cooldownHours)" + }, + "minAgeDays": { + "type": "integer", + "default": 7, + "minimum": 1, + "description": "Only compact memories at least this many days old" + }, + "similarityThreshold": { + "type": "number", + "default": 0.88, + "minimum": 0, + "maximum": 1, + "description": "Cosine similarity threshold for clustering. Higher = more conservative merges." + }, + "minClusterSize": { + "type": "integer", + "default": 2, + "minimum": 2, + "description": "Minimum cluster size required to trigger a merge" + }, + "maxMemoriesToScan": { + "type": "integer", + "default": 200, + "minimum": 1, + "description": "Maximum number of memories to scan per compaction run" + }, + "cooldownHours": { + "type": "integer", + "default": 24, + "minimum": 1, + "description": "Minimum hours between automatic compaction runs" + } + } } }, "required": [ @@ -1217,6 +1260,25 @@ "placeholder": "30000", "help": "Request timeout for the smart-extraction / upgrade LLM in milliseconds", "advanced": true + }, + "memoryCompaction.enabled": { + "label": "Auto Compaction", + "help": "Automatically consolidate similar old memories at gateway startup. Also available on-demand via the memory_compact tool (requires enableManagementTools)." + }, + "memoryCompaction.minAgeDays": { + "label": "Min Age (days)", + "help": "Memories younger than this are never touched by compaction", + "advanced": true + }, + "memoryCompaction.similarityThreshold": { + "label": "Similarity Threshold", + "help": "How similar two memories must be to merge (0–1). 0.88 is a good starting point; raise to 0.92+ for conservative merges.", + "advanced": true + }, + "memoryCompaction.cooldownHours": { + "label": "Cooldown (hours)", + "help": "Minimum gap between automatic compaction runs", + "advanced": true } } } diff --git a/src/memory-compactor.ts b/src/memory-compactor.ts new file mode 100644 index 0000000..1c0b1ea --- /dev/null +++ b/src/memory-compactor.ts @@ -0,0 +1,403 @@ +/** + * Memory Compactor — Progressive Summarization + * + * Identifies clusters of semantically similar memories older than a configured + * age threshold and merges each cluster into a single, higher-quality entry. + * + * Implements the "progressive summarization" pattern: memories get more refined + * over time as related fragments are consolidated, reducing noise and improving + * retrieval quality without requiring an external LLM call. + * + * Algorithm: + * 1. Load memories older than `minAgeDays` (with vectors). + * 2. Build similarity clusters using greedy cosine-similarity expansion. + * 3. For each cluster >= `minClusterSize`, merge into one entry: + * - text: deduplicated lines joined with newlines + * - importance: max of cluster members (never downgrade) + * - category: plurality vote + * - scope: shared scope (all members must share one) + * - metadata: marked { compacted: true, sourceCount: N } + * 4. Delete source entries, store merged entry. + */ + +import type { MemoryEntry } from "./store.js"; + +// ============================================================================ +// Types +// ============================================================================ + +export interface CompactionConfig { + /** Enable automatic compaction. Default: false */ + enabled: boolean; + /** Only compact memories at least this many days old. Default: 7 */ + minAgeDays: number; + /** Cosine similarity threshold for clustering [0, 1]. Default: 0.88 */ + similarityThreshold: number; + /** Minimum number of memories in a cluster to trigger merge. Default: 2 */ + minClusterSize: number; + /** Maximum memories to scan per compaction run. Default: 200 */ + maxMemoriesToScan: number; + /** Report plan without writing changes. Default: false */ + dryRun: boolean; + /** Run at most once per N hours (gateway_start guard). Default: 24 */ + cooldownHours: number; +} + +export interface CompactionEntry { + id: string; + text: string; + vector: number[]; + category: MemoryEntry["category"]; + scope: string; + importance: number; + timestamp: number; + metadata: string; +} + +export interface ClusterPlan { + /** Indices into the input entries array */ + memberIndices: number[]; + /** Proposed merged entry (without id/vector — computed by caller) */ + merged: { + text: string; + importance: number; + category: MemoryEntry["category"]; + scope: string; + metadata: string; + }; +} + +export interface CompactionResult { + /** Memories scanned (limited by maxMemoriesToScan) */ + scanned: number; + /** Clusters found with >= minClusterSize members */ + clustersFound: number; + /** Source memories deleted (0 when dryRun) */ + memoriesDeleted: number; + /** Merged memories created (0 when dryRun) */ + memoriesCreated: number; + /** Whether this was a dry run */ + dryRun: boolean; +} + +// ============================================================================ +// Math helpers +// ============================================================================ + +/** Dot product of two equal-length vectors. */ +function dot(a: number[], b: number[]): number { + let s = 0; + for (let i = 0; i < a.length; i++) s += a[i] * b[i]; + return s; +} + +/** L2 norm of a vector. */ +function norm(v: number[]): number { + return Math.sqrt(dot(v, v)); +} + +/** + * Cosine similarity in [0, 1]. + * Returns 0 if either vector has zero norm (avoids NaN). + */ +export function cosineSimilarity(a: number[], b: number[]): number { + if (a.length === 0 || a.length !== b.length) return 0; + const na = norm(a); + const nb = norm(b); + if (na === 0 || nb === 0) return 0; + return Math.max(0, Math.min(1, dot(a, b) / (na * nb))); +} + +// ============================================================================ +// Cluster building +// ============================================================================ + +/** + * Greedy cluster expansion. + * + * Sort entries by importance DESC so the most valuable memory seeds each + * cluster. Expand each seed by collecting every unassigned entry whose + * cosine similarity with the seed is >= threshold. + * + * Returns an array of index-arrays (each inner array = one cluster). + * Only clusters with >= minClusterSize entries are returned. + */ +export function buildClusters( + entries: CompactionEntry[], + threshold: number, + minClusterSize: number, +): ClusterPlan[] { + if (entries.length < minClusterSize) return []; + + // Sort indices by importance desc (highest importance seeds first) + const order = entries + .map((_, i) => i) + .sort((a, b) => entries[b].importance - entries[a].importance); + + const assigned = new Uint8Array(entries.length); // 0 = unassigned + const plans: ClusterPlan[] = []; + + for (const seedIdx of order) { + if (assigned[seedIdx]) continue; + + const cluster: number[] = [seedIdx]; + assigned[seedIdx] = 1; + + const seedVec = entries[seedIdx].vector; + if (seedVec.length === 0) continue; // skip entries without vectors + + for (let j = 0; j < entries.length; j++) { + if (assigned[j]) continue; + const jVec = entries[j].vector; + if (jVec.length === 0) continue; + if (cosineSimilarity(seedVec, jVec) >= threshold) { + cluster.push(j); + assigned[j] = 1; + } + } + + if (cluster.length >= minClusterSize) { + const members = cluster.map((i) => entries[i]); + plans.push({ + memberIndices: cluster, + merged: buildMergedEntry(members), + }); + } + } + + return plans; +} + +// ============================================================================ +// Merge strategy +// ============================================================================ + +/** + * Merge a cluster of entries into a single proposed entry. + * + * Text strategy: deduplicate lines across all member texts, join with newline. + * This preserves all unique information while removing redundancy. + * + * Importance: max across cluster (never downgrade). + * Category: plurality vote; ties broken by member with highest importance. + * Scope: all members must share a scope (validated upstream). + */ +export function buildMergedEntry( + members: CompactionEntry[], +): ClusterPlan["merged"] { + // --- text: deduplicate lines --- + const seen = new Set(); + const lines: string[] = []; + for (const m of members) { + for (const line of m.text.split("\n")) { + const trimmed = line.trim(); + if (trimmed && !seen.has(trimmed.toLowerCase())) { + seen.add(trimmed.toLowerCase()); + lines.push(trimmed); + } + } + } + const text = lines.join("\n"); + + // --- importance: max --- + const importance = Math.min( + 1.0, + Math.max(...members.map((m) => m.importance)), + ); + + // --- category: plurality vote --- + const counts = new Map(); + for (const m of members) { + counts.set(m.category, (counts.get(m.category) ?? 0) + 1); + } + let category: MemoryEntry["category"] = "other"; + let best = 0; + for (const [cat, count] of counts) { + if (count > best) { + best = count; + category = cat as MemoryEntry["category"]; + } + } + + // --- scope: use the first (all should match) --- + const scope = members[0].scope; + + // --- metadata --- + const metadata = JSON.stringify({ + compacted: true, + sourceCount: members.length, + compactedAt: Date.now(), + }); + + return { text, importance, category, scope, metadata }; +} + +// ============================================================================ +// Minimal store interface (duck-typed so no circular import) +// ============================================================================ + +export interface CompactorStore { + fetchForCompaction( + maxTimestamp: number, + scopeFilter?: string[], + limit?: number, + ): Promise; + store(entry: { + text: string; + vector: number[]; + importance: number; + category: MemoryEntry["category"]; + scope: string; + metadata?: string; + }): Promise; + delete(id: string, scopeFilter?: string[]): Promise; +} + +export interface CompactorEmbedder { + embedPassage(text: string): Promise; +} + +export interface CompactorLogger { + info(msg: string): void; + warn(msg: string): void; +} + +// ============================================================================ +// Main runner +// ============================================================================ + +/** + * Run a single compaction pass over memories in the given scopes. + * + * @param store Storage backend (must support fetchForCompaction + store + delete) + * @param embedder Used to embed merged text before storage + * @param config Compaction configuration + * @param scopes Scope filter; undefined = all scopes + * @param logger Optional logger + */ +export async function runCompaction( + store: CompactorStore, + embedder: CompactorEmbedder, + config: CompactionConfig, + scopes?: string[], + logger?: CompactorLogger, +): Promise { + const cutoff = Date.now() - config.minAgeDays * 24 * 60 * 60 * 1000; + + const entries = await store.fetchForCompaction( + cutoff, + scopes, + config.maxMemoriesToScan, + ); + + if (entries.length === 0) { + return { + scanned: 0, + clustersFound: 0, + memoriesDeleted: 0, + memoriesCreated: 0, + dryRun: config.dryRun, + }; + } + + // Filter out entries without vectors (shouldn't happen but be safe) + const valid = entries.filter((e) => e.vector && e.vector.length > 0); + + const plans = buildClusters( + valid, + config.similarityThreshold, + config.minClusterSize, + ); + + if (config.dryRun) { + logger?.info( + `memory-compactor [dry-run]: scanned=${valid.length} clusters=${plans.length}`, + ); + return { + scanned: valid.length, + clustersFound: plans.length, + memoriesDeleted: 0, + memoriesCreated: 0, + dryRun: true, + }; + } + + let memoriesDeleted = 0; + let memoriesCreated = 0; + + for (const plan of plans) { + const members = plan.memberIndices.map((i) => valid[i]); + + try { + // Embed the merged text + const vector = await embedder.embedPassage(plan.merged.text); + + // Store merged entry + await store.store({ + text: plan.merged.text, + vector, + importance: plan.merged.importance, + category: plan.merged.category, + scope: plan.merged.scope, + metadata: plan.merged.metadata, + }); + memoriesCreated++; + + // Delete source entries + for (const m of members) { + const deleted = await store.delete(m.id); + if (deleted) memoriesDeleted++; + } + } catch (err) { + logger?.warn( + `memory-compactor: failed to merge cluster of ${members.length}: ${String(err)}`, + ); + } + } + + logger?.info( + `memory-compactor: scanned=${valid.length} clusters=${plans.length} ` + + `deleted=${memoriesDeleted} created=${memoriesCreated}`, + ); + + return { + scanned: valid.length, + clustersFound: plans.length, + memoriesDeleted, + memoriesCreated, + dryRun: false, + }; +} + +// ============================================================================ +// Cooldown helper +// ============================================================================ + +/** + * Check whether enough time has passed since the last compaction run. + * Uses a simple JSON file at `stateFile` to persist the last-run timestamp. + */ +export async function shouldRunCompaction( + stateFile: string, + cooldownHours: number, +): Promise { + try { + const { readFile } = await import("node:fs/promises"); + const raw = await readFile(stateFile, "utf8"); + const state = JSON.parse(raw) as { lastRunAt?: number }; + if (typeof state.lastRunAt === "number") { + const elapsed = Date.now() - state.lastRunAt; + return elapsed >= cooldownHours * 60 * 60 * 1000; + } + } catch { + // File doesn't exist or is malformed — treat as never run + } + return true; +} + +export async function recordCompactionRun(stateFile: string): Promise { + const { writeFile, mkdir } = await import("node:fs/promises"); + const { dirname } = await import("node:path"); + await mkdir(dirname(stateFile), { recursive: true }); + await writeFile(stateFile, JSON.stringify({ lastRunAt: Date.now() }), "utf8"); +} diff --git a/src/store.ts b/src/store.ts index 2bcf14e..ce80034 100644 --- a/src/store.ts +++ b/src/store.ts @@ -1108,4 +1108,49 @@ export class MemoryStore { return { success: false, error: msg }; } } + + /** + * Fetch memories older than `maxTimestamp` including their raw vectors. + * Used exclusively by the memory compactor; vectors are intentionally + * omitted from `list()` for performance, but compaction needs them for + * cosine-similarity clustering. + */ + async fetchForCompaction( + maxTimestamp: number, + scopeFilter?: string[], + limit = 200, + ): Promise { + await this.ensureInitialized(); + + const conditions: string[] = [`timestamp < ${maxTimestamp}`]; + + if (scopeFilter && scopeFilter.length > 0) { + const scopeConditions = scopeFilter + .map((scope) => `scope = '${escapeSqlLiteral(scope)}'`) + .join(" OR "); + conditions.push(`((${scopeConditions}) OR scope IS NULL)`); + } + + const whereClause = conditions.join(" AND "); + + const results = await this.table! + .query() + .where(whereClause) + .toArray(); + + return results + .slice(0, limit) + .map( + (row): MemoryEntry => ({ + id: row.id as string, + text: row.text as string, + vector: Array.isArray(row.vector) ? (row.vector as number[]) : [], + category: row.category as MemoryEntry["category"], + scope: (row.scope as string | undefined) ?? "global", + importance: Number(row.importance), + timestamp: Number(row.timestamp), + metadata: (row.metadata as string) || "{}", + }), + ); + } } diff --git a/test/memory-compactor.test.mjs b/test/memory-compactor.test.mjs new file mode 100644 index 0000000..42b9a3a --- /dev/null +++ b/test/memory-compactor.test.mjs @@ -0,0 +1,292 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import jitiFactory from "jiti"; + +const jiti = jitiFactory(import.meta.url, { interopDefault: true }); + +const { + cosineSimilarity, + buildClusters, + buildMergedEntry, + runCompaction, +} = jiti("../src/memory-compactor.ts"); + +// ============================================================================ +// Helpers +// ============================================================================ + +function vec(dims, ...values) { + // Create a vector of `dims` dimensions, placing `values` at the first positions + const v = new Array(dims).fill(0); + values.forEach((val, i) => { v[i] = val; }); + return v; +} + +function entry(overrides = {}) { + return { + id: overrides.id ?? "id-" + Math.random().toString(36).slice(2), + text: overrides.text ?? "some memory", + vector: overrides.vector ?? vec(4, 1, 0, 0, 0), + category: overrides.category ?? "fact", + scope: overrides.scope ?? "global", + importance: overrides.importance ?? 0.5, + timestamp: overrides.timestamp ?? Date.now() - 8 * 24 * 60 * 60 * 1000, + metadata: overrides.metadata ?? "{}", + }; +} + +function makeStore(entries = []) { + const db = new Map(entries.map((e) => [e.id, { ...e }])); + return { + stored: [], + deleted: [], + async fetchForCompaction(_maxTs, _scopes, limit = 200) { + return [...db.values()].slice(0, limit); + }, + async store(e) { + const newEntry = { id: "merged-" + Math.random().toString(36).slice(2), ...e }; + db.set(newEntry.id, newEntry); + this.stored.push(newEntry); + return newEntry; + }, + async delete(id) { + if (db.has(id)) { db.delete(id); this.deleted.push(id); return true; } + return false; + }, + }; +} + +function makeEmbedder(dim = 4) { + return { + async embedPassage(text) { + // Deterministic fake embedding: hash first char into first dimension + const v = new Array(dim).fill(0); + v[0] = (text.charCodeAt(0) % 10) / 10; + return v; + }, + }; +} + +const defaultConfig = { + enabled: true, + minAgeDays: 7, + similarityThreshold: 0.88, + minClusterSize: 2, + maxMemoriesToScan: 200, + dryRun: false, + cooldownHours: 24, +}; + +// ============================================================================ +// cosineSimilarity +// ============================================================================ + +describe("cosineSimilarity", () => { + it("returns 1.0 for identical vectors", () => { + const v = vec(4, 1, 2, 3, 4); + assert.equal(cosineSimilarity(v, v), 1.0); + }); + + it("returns 0 for orthogonal vectors", () => { + assert.equal(cosineSimilarity(vec(4, 1, 0, 0, 0), vec(4, 0, 1, 0, 0)), 0); + }); + + it("returns ~0.71 for 45-degree vectors", () => { + const sim = cosineSimilarity(vec(2, 1, 0), vec(2, 1, 1)); + assert.ok(sim > 0.7 && sim < 0.72, `expected ~0.71, got ${sim}`); + }); + + it("returns 0 for zero-norm vector without NaN", () => { + assert.equal(cosineSimilarity(vec(4), vec(4, 1, 0, 0, 0)), 0); + }); + + it("returns 0 for mismatched dimensions", () => { + assert.equal(cosineSimilarity([1, 0], [1, 0, 0]), 0); + }); + + it("clamps result to [0, 1]", () => { + // Floating point can produce tiny values outside [0,1] + const v = vec(4, 0.9999999, 0.0000001, 0, 0); + const sim = cosineSimilarity(v, v); + assert.ok(sim >= 0 && sim <= 1); + }); +}); + +// ============================================================================ +// buildClusters +// ============================================================================ + +describe("buildClusters", () => { + it("returns empty array when fewer entries than minClusterSize", () => { + const e = entry({ vector: vec(4, 1, 0, 0, 0) }); + const result = buildClusters([e], 0.9, 2); + assert.deepEqual(result, []); + }); + + it("clusters two very similar entries", () => { + const a = entry({ vector: vec(4, 1, 0.01, 0, 0), importance: 0.8 }); + const b = entry({ vector: vec(4, 1, 0.02, 0, 0), importance: 0.5 }); + const clusters = buildClusters([a, b], 0.88, 2); + assert.equal(clusters.length, 1); + assert.equal(clusters[0].memberIndices.length, 2); + }); + + it("does not cluster orthogonal entries", () => { + const a = entry({ vector: vec(4, 1, 0, 0, 0) }); + const b = entry({ vector: vec(4, 0, 1, 0, 0) }); + const clusters = buildClusters([a, b], 0.88, 2); + assert.equal(clusters.length, 0); + }); + + it("seeds cluster with highest-importance entry", () => { + const lo = entry({ vector: vec(4, 1, 0, 0, 0), importance: 0.3 }); + const hi = entry({ vector: vec(4, 1, 0.01, 0, 0), importance: 0.9 }); + const clusters = buildClusters([lo, hi], 0.88, 2); + assert.equal(clusters.length, 1); + // The merged importance should reflect the hi entry + assert.equal(clusters[0].merged.importance, 0.9); + }); + + it("produces two separate clusters for two disjoint similar pairs", () => { + const a1 = entry({ vector: vec(4, 1, 0.01, 0, 0), importance: 0.9 }); + const a2 = entry({ vector: vec(4, 1, 0.02, 0, 0), importance: 0.6 }); + const b1 = entry({ vector: vec(4, 0, 0, 1, 0.01), importance: 0.8 }); + const b2 = entry({ vector: vec(4, 0, 0, 1, 0.02), importance: 0.5 }); + const clusters = buildClusters([a1, a2, b1, b2], 0.88, 2); + assert.equal(clusters.length, 2); + }); + + it("skips entries with empty vectors", () => { + const a = entry({ vector: [], importance: 0.9 }); + const b = entry({ vector: [], importance: 0.5 }); + const clusters = buildClusters([a, b], 0.5, 2); + assert.equal(clusters.length, 0); + }); +}); + +// ============================================================================ +// buildMergedEntry +// ============================================================================ + +describe("buildMergedEntry", () => { + it("deduplicates identical lines across members", () => { + const a = entry({ text: "learned TypeScript\nuses vim" }); + const b = entry({ text: "learned TypeScript\nprefers dark mode" }); + const merged = buildMergedEntry([a, b]); + const lines = merged.text.split("\n"); + const tsLines = lines.filter((l) => l.includes("TypeScript")); + assert.equal(tsLines.length, 1, "duplicate line should appear once"); + }); + + it("preserves unique lines from all members", () => { + const a = entry({ text: "uses vim" }); + const b = entry({ text: "prefers dark mode" }); + const merged = buildMergedEntry([a, b]); + assert.ok(merged.text.includes("uses vim")); + assert.ok(merged.text.includes("prefers dark mode")); + }); + + it("takes max importance", () => { + const a = entry({ importance: 0.4 }); + const b = entry({ importance: 0.9 }); + const c = entry({ importance: 0.6 }); + const merged = buildMergedEntry([a, b, c]); + assert.equal(merged.importance, 0.9); + }); + + it("caps importance at 1.0", () => { + const a = entry({ importance: 1.0 }); + const b = entry({ importance: 1.0 }); + const merged = buildMergedEntry([a, b]); + assert.ok(merged.importance <= 1.0); + }); + + it("uses plurality category", () => { + const a = entry({ category: "preference" }); + const b = entry({ category: "fact" }); + const c = entry({ category: "fact" }); + const merged = buildMergedEntry([a, b, c]); + assert.equal(merged.category, "fact"); + }); + + it("marks metadata as compacted with sourceCount", () => { + const members = [entry(), entry(), entry()]; + const merged = buildMergedEntry(members); + const meta = JSON.parse(merged.metadata); + assert.equal(meta.compacted, true); + assert.equal(meta.sourceCount, 3); + assert.ok(typeof meta.compactedAt === "number"); + }); +}); + +// ============================================================================ +// runCompaction +// ============================================================================ + +describe("runCompaction", () => { + it("merges a similar pair and reports correct counts", async () => { + const a = entry({ text: "pref: dark mode", vector: vec(4, 1, 0.01, 0, 0), importance: 0.7 }); + const b = entry({ text: "pref: always dark theme", vector: vec(4, 1, 0.02, 0, 0), importance: 0.5 }); + const store = makeStore([a, b]); + const embedder = makeEmbedder(4); + + const result = await runCompaction(store, embedder, defaultConfig); + + assert.equal(result.clustersFound, 1); + assert.equal(result.memoriesDeleted, 2); + assert.equal(result.memoriesCreated, 1); + assert.equal(result.dryRun, false); + assert.equal(store.stored.length, 1); + assert.equal(store.deleted.length, 2); + }); + + it("dry-run does not write anything", async () => { + const a = entry({ vector: vec(4, 1, 0.01, 0, 0) }); + const b = entry({ vector: vec(4, 1, 0.02, 0, 0) }); + const store = makeStore([a, b]); + + const result = await runCompaction(store, makeEmbedder(), { + ...defaultConfig, + dryRun: true, + }); + + assert.equal(result.dryRun, true); + assert.equal(result.memoriesDeleted, 0); + assert.equal(result.memoriesCreated, 0); + assert.equal(store.stored.length, 0); + assert.equal(store.deleted.length, 0); + assert.equal(result.clustersFound, 1); + }); + + it("returns zero counts when no entries are available", async () => { + const store = makeStore([]); + const result = await runCompaction(store, makeEmbedder(), defaultConfig); + assert.equal(result.scanned, 0); + assert.equal(result.clustersFound, 0); + }); + + it("skips singleton clusters (no merge when similarity below threshold)", async () => { + const a = entry({ vector: vec(4, 1, 0, 0, 0) }); + const b = entry({ vector: vec(4, 0, 1, 0, 0) }); // orthogonal + const store = makeStore([a, b]); + + const result = await runCompaction(store, makeEmbedder(), defaultConfig); + + assert.equal(result.clustersFound, 0); + assert.equal(result.memoriesDeleted, 0); + }); + + it("respects maxMemoriesToScan limit", async () => { + const entries = Array.from({ length: 10 }, (_, i) => + entry({ vector: vec(4, 1, i * 0.001, 0, 0) }) + ); + const store = makeStore(entries); + + const result = await runCompaction(store, makeEmbedder(), { + ...defaultConfig, + maxMemoriesToScan: 3, + }); + + assert.ok(result.scanned <= 3); + }); +});