Skip to content
199 changes: 198 additions & 1 deletion cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
*/

import type { Command } from "commander";
import { readFileSync } from "node:fs";
import { readFileSync, type Dirent } from "node:fs";
import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
import { homedir } from "node:os";
import path from "node:path";
Expand Down Expand Up @@ -1036,6 +1036,203 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
}
});

/**
* import-markdown: Import memories from Markdown memory files into the plugin store.
* Targets MEMORY.md and memory/YYYY-MM-DD.md files found in OpenClaw workspaces.
*/
memory
.command("import-markdown [workspace-glob]")
.description("Import memories from Markdown files (MEMORY.md, memory/YYYY-MM-DD.md) into the plugin store")
.option("--dry-run", "Show what would be imported without importing")
.option("--scope <scope>", "Import into specific scope (default: global)")
.option(
"--openclaw-home <path>",
"OpenClaw home directory (default: ~/.openclaw)",
)
.option(
"--dedup",
"Skip entries already in store (scope-aware exact match, requires store.bm25Search)",
)
.option(
"--min-text-length <n>",
"Minimum text length to import (default: 5)",
"5",
)
.option(
"--importance <n>",
"Importance score for imported entries, 0.0-1.0 (default: 0.7)",
"0.7",
)
.action(async (workspaceGlob, options) => {
const openclawHome = options.openclawHome
? path.resolve(options.openclawHome)
: path.join(homedir(), ".openclaw");

const workspaceDir = path.join(openclawHome, "workspace");
let imported = 0;
let skipped = 0;
let foundFiles = 0;

if (!context.embedder) {
console.error(
"import-markdown requires an embedder. Use via plugin CLI or ensure embedder is configured.",
);
process.exit(1);
}

// Infer workspace scope from openclaw.json agents list
// (flat memory/ files have no per-file metadata, so we derive scope from config)
const fsPromises = await import("node:fs/promises");
let workspaceScope = ""; // empty = no scope override for nested workspaces
try {
const configPath = path.join(openclawHome, "openclaw.json");
const configContent = await fsPromises.readFile(configPath, "utf-8");
const config = JSON.parse(configContent);
const agentsList: Array<{ id?: string; workspace?: string }> = config?.agents?.list ?? [];
const matched = agentsList.find((a) => {
if (!a.workspace) return false;
return path.normalize(a.workspace) === workspaceDir;
});
if (matched?.id) {
workspaceScope = matched.id;
}
} catch { /* use default */ }

// Scan workspace directories
let workspaceEntries: Dirent[];
try {
workspaceEntries = await fsPromises.readdir(workspaceDir, { withFileTypes: true });
} catch {
console.error(`Failed to read workspace directory: ${workspaceDir}`);
process.exit(1);
}

// Collect all markdown files to scan
const mdFiles: Array<{ filePath: string; scope: string }> = [];

for (const entry of workspaceEntries) {
if (!entry.isDirectory()) continue;
if (workspaceGlob && !entry.name.includes(workspaceGlob)) continue;

const workspacePath = path.join(workspaceDir, entry.name);

// MEMORY.md
const memoryMd = path.join(workspacePath, "MEMORY.md");
try {
await fsPromises.stat(memoryMd);
mdFiles.push({ filePath: memoryMd, scope: entry.name });
} catch { /* not found */ }

// memory/ directory
const memoryDir = path.join(workspacePath, "memory");
try {
const stats = await fsPromises.stat(memoryDir);
if (stats.isDirectory()) {
const files = await fsPromises.readdir(memoryDir);
for (const f of files) {
if (f.endsWith(".md") && /^\d{4}-\d{2}-\d{2}/.test(f)) {
mdFiles.push({ filePath: path.join(memoryDir, f), scope: entry.name });
}
}
}
} catch { /* not found */ }
}

// Also scan the flat `workspace/memory/` directory directly under workspace root
// (not inside any workspace subdirectory — supports James's actual structure).
// This scan runs regardless of whether nested workspace mdFiles were found,
// so flat memory is always reachable even when all nested workspaces are empty.
// Skip if a specific workspace was requested (workspaceGlob), to avoid importing
// root flat memory when the user meant to import only one workspace.
if (!workspaceGlob) {
const flatMemoryDir = path.join(workspaceDir, "memory");
try {
const stats = await fsPromises.stat(flatMemoryDir);
if (stats.isDirectory()) {
const files = await fsPromises.readdir(flatMemoryDir);
for (const f of files) {
if (f.endsWith(".md") && /^\d{4}-\d{2}-\d{2}/.test(f)) {
mdFiles.push({ filePath: path.join(flatMemoryDir, f), scope: workspaceScope || "shared" });
}
}
}
} catch { /* not found */ }
}

if (mdFiles.length === 0) {
console.log("No Markdown memory files found.");
return;
}

const targetScope = options.scope || "global";
const minTextLength = parseInt(options.minTextLength ?? "5", 10);
const importanceDefault = parseFloat(options.importance ?? "0.7");
const dedupEnabled = !!options.dedup;

// Parse each file for memory entries (lines starting with "- ")
for (const { filePath, scope } of mdFiles) {
foundFiles++;
let content = await fsPromises.readFile(filePath, "utf-8");
// Strip UTF-8 BOM (e.g. from Windows Notepad-saved files)
content = content.replace(/^\uFEFF/, "");
// Normalize line endings: handle both CRLF (\r\n) and LF (\n)
const lines = content.split(/\r?\n/);

for (const line of lines) {
// Skip non-memory lines
// Supports: "- text", "* text", "+ text" (standard Markdown bullet formats)
if (!/^[-*+]\s/.test(line)) continue;
const text = line.slice(2).trim();
if (text.length < minTextLength) { skipped++; continue; }

// ── Deduplication check (scope-aware exact match) ───────────────────
// Run even in dry-run so --dry-run --dedup reports accurate counts
if (dedupEnabled) {
try {
const existing = await context.store.bm25Search(text, 1, [targetScope]);
if (existing.length > 0 && existing[0].entry.text === text) {
skipped++;
if (!options.dryRun) {
console.log(` [skip] already imported: ${text.slice(0, 60)}${text.length > 60 ? "..." : ""}`);
}
continue;
}
} catch {
// bm25Search not available on this store implementation; proceed with import
}
}

if (options.dryRun) {
console.log(` [dry-run] would import: ${text.slice(0, 80)}${text.length > 80 ? "..." : ""}`);
imported++;
continue;
}

try {
const vector = await context.embedder!.embedPassage(text);
await context.store.store({
text,
vector,
importance: importanceDefault,
category: "other",
scope: targetScope,
metadata: JSON.stringify({ importedFrom: filePath, sourceScope: scope }),
});
imported++;
} catch (err) {
console.warn(` Failed to import: ${text.slice(0, 60)}... — ${err}`);
skipped++;
}
}
}

if (options.dryRun) {
console.log(`\nDRY RUN — found ${foundFiles} files, ${imported} entries would be imported, ${skipped} skipped${dedupEnabled ? " [dedup enabled]" : ""}`);
} else {
console.log(`\nImport complete: ${imported} imported, ${skipped} skipped (scanned ${foundFiles} files)${dedupEnabled ? " [dedup enabled]" : ""}`);
}
});

// Re-embed an existing LanceDB into the current target DB (A/B testing)
memory
.command("reembed")
Expand Down
Loading
Loading