diff --git a/CLAUDE.md b/CLAUDE.md index d5ea9ff..3ae2b79 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -13,6 +13,8 @@ use the tools — ingest meetings, answer queries, maintain the brain, enrich fr - `src/core/engine.ts` — Pluggable engine interface (BrainEngine) - `src/core/postgres-engine.ts` — Postgres + pgvector implementation +- `src/core/sqlite-engine.ts` — SQLite + FTS5 + vec0 implementation +- `src/sqlite-schema.sql` — SQLite DDL (FTS5 triggers, vec0 virtual table) - `src/core/db.ts` — Connection management, schema initialization - `src/core/import-file.ts` — Shared single-file import (used by import + sync) - `src/core/sync.ts` — Pure sync functions (manifest parsing, filtering, slug conversion) diff --git a/Dockerfile.sqlite b/Dockerfile.sqlite new file mode 100644 index 0000000..e3f5f49 --- /dev/null +++ b/Dockerfile.sqlite @@ -0,0 +1,30 @@ +# GBrain SQLite engine with vec0 extension pre-installed. +# Usage: +# docker build -f Dockerfile.sqlite -t gbrain-sqlite . +# docker run -v ~/.gbrain:/root/.gbrain gbrain-sqlite +# +# Examples: +# docker run -v ~/.gbrain:/root/.gbrain gbrain-sqlite init --sqlite +# docker run -v ~/.gbrain:/root/.gbrain -v /path/to/notes:/notes gbrain-sqlite import /notes +# +# For vector search with vec0, the extension is pre-installed at /usr/lib/vec0.so. +# Without Docker, download the binary for your platform from: +# https://github.com/asg017/sqlite-vec/releases + +FROM oven/bun:1 AS base + +# Install vec0 extension for vector search +# Note: update the URL to the latest release for your target platform +RUN apt-get update && apt-get install -y wget ca-certificates && \ + wget -qO /usr/lib/vec0.so \ + https://github.com/asg017/sqlite-vec/releases/latest/download/vec0-linux-x86_64.so && \ + chmod +x /usr/lib/vec0.so && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +WORKDIR /app +COPY package.json bun.lock* ./ +RUN bun install --frozen-lockfile + +COPY . . + +ENTRYPOINT ["bun", "run", "src/cli.ts"] diff --git a/docs/SQLITE_ENGINE.md b/docs/SQLITE_ENGINE.md index fcac44c..ee4f5cf 100644 --- a/docs/SQLITE_ENGINE.md +++ b/docs/SQLITE_ENGINE.md @@ -1,6 +1,6 @@ # SQLite Engine Design -## Status: Designed, not built. Community PRs welcome. +## Status: Built. See `src/core/sqlite-engine.ts`. The pluggable engine interface (`docs/ENGINES.md`) means anyone can add a SQLite backend without touching the CLI, MCP server, or skills. This document is the full plan. diff --git a/src/cli.ts b/src/cli.ts index 03a3891..8325d1a 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1,12 +1,13 @@ #!/usr/bin/env bun import { PostgresEngine } from './core/postgres-engine.ts'; +import { SQLiteEngine } from './core/sqlite-engine.ts'; import { loadConfig, toEngineConfig } from './core/config.ts'; import type { BrainEngine } from './core/engine.ts'; import { VERSION } from './version.ts'; const COMMAND_HELP: Record = { - init: 'Usage: gbrain init [--supabase|--url ]\n\nCreate brain (guided wizard).', + init: 'Usage: gbrain init [--supabase|--sqlite|--url ] [--path ]\n\nCreate brain (guided wizard).', upgrade: 'Usage: gbrain upgrade\n\nSelf-update the CLI.\n\nDetects install method (bun, binary, clawhub) and runs the appropriate update.', get: 'Usage: gbrain get \n\nRead a page by slug (supports fuzzy matching).', put: 'Usage: gbrain put [< file.md]\n\nWrite or update a page from stdin.', @@ -236,11 +237,14 @@ async function main() { async function connectEngine(): Promise { const config = loadConfig(); if (!config) { - console.error('No brain configured. Run: gbrain init --supabase'); + console.error('No brain configured. Run: gbrain init --supabase or gbrain init --sqlite'); process.exit(1); } - const engine = new PostgresEngine(); + const engine = config.engine === 'sqlite' + ? new SQLiteEngine() + : new PostgresEngine(); + await engine.connect(toEngineConfig(config)); return engine; } @@ -252,7 +256,7 @@ USAGE gbrain [options] SETUP - init [--supabase|--url ] Create brain (guided wizard) + init [--supabase|--sqlite|--url] Create brain (guided wizard) upgrade Self-update PAGES diff --git a/src/commands/files.ts b/src/commands/files.ts index 73bff70..24b3217 100644 --- a/src/commands/files.ts +++ b/src/commands/files.ts @@ -2,20 +2,7 @@ import { readFileSync, readdirSync, statSync, existsSync } from 'fs'; import { join, relative, extname, basename } from 'path'; import { createHash } from 'crypto'; import type { BrainEngine } from '../core/engine.ts'; -import * as db from '../core/db.ts'; - -interface FileRecord { - id: number; - page_slug: string | null; - filename: string; - storage_path: string; - storage_url: string; - mime_type: string | null; - size_bytes: number; - content_hash: string; - metadata: Record; - created_at: string; -} +import type { FileInput } from '../core/types.ts'; const MIME_TYPES: Record = { '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.png': 'image/png', @@ -42,16 +29,16 @@ export async function runFiles(engine: BrainEngine, args: string[]) { switch (subcommand) { case 'list': - await listFiles(args[1]); + await listFiles(engine, args[1]); break; case 'upload': - await uploadFile(args.slice(1)); + await uploadFile(engine, args.slice(1)); break; case 'sync': - await syncFiles(args[1]); + await syncFiles(engine, args[1]); break; case 'verify': - await verifyFiles(); + await verifyFiles(engine); break; default: console.error(`Usage: gbrain files [args]`); @@ -63,14 +50,8 @@ export async function runFiles(engine: BrainEngine, args: string[]) { } } -async function listFiles(slug?: string) { - const sql = db.getConnection(); - let rows; - if (slug) { - rows = await sql`SELECT * FROM files WHERE page_slug = ${slug} ORDER BY filename`; - } else { - rows = await sql`SELECT * FROM files ORDER BY page_slug, filename LIMIT 100`; - } +async function listFiles(engine: BrainEngine, slug?: string) { + const rows = await engine.getFiles(slug); if (rows.length === 0) { console.log(slug ? `No files for page: ${slug}` : 'No files stored.'); @@ -84,7 +65,7 @@ async function listFiles(slug?: string) { } } -async function uploadFile(args: string[]) { +async function uploadFile(engine: BrainEngine, args: string[]) { const filePath = args.find(a => !a.startsWith('--')); const pageSlug = args.find((a, i) => args[i - 1] === '--page') || null; @@ -99,32 +80,30 @@ async function uploadFile(args: string[]) { const storagePath = pageSlug ? `${pageSlug}/${filename}` : `unsorted/${hash.slice(0, 8)}-${filename}`; const mimeType = getMimeType(filePath); - const sql = db.getConnection(); - - // Check for existing file by hash - const existing = await sql`SELECT id FROM files WHERE content_hash = ${hash} AND storage_path = ${storagePath}`; - if (existing.length > 0) { + const existing = await engine.findFileByHash(hash, storagePath); + if (existing) { console.log(`File already uploaded (hash match): ${storagePath}`); return; } // TODO: actual Supabase Storage upload goes here - // For now, record metadata in Postgres const storageUrl = `https://storage.supabase.co/brain-files/${storagePath}`; - await sql` - INSERT INTO files (page_slug, filename, storage_path, storage_url, mime_type, size_bytes, content_hash, metadata) - VALUES (${pageSlug}, ${filename}, ${storagePath}, ${storageUrl}, ${mimeType}, ${stat.size}, ${hash}, ${'{}'}::jsonb) - ON CONFLICT (storage_path) DO UPDATE SET - content_hash = EXCLUDED.content_hash, - size_bytes = EXCLUDED.size_bytes, - mime_type = EXCLUDED.mime_type - `; + const file: FileInput = { + page_slug: pageSlug, + filename, + storage_path: storagePath, + storage_url: storageUrl, + mime_type: mimeType, + size_bytes: stat.size, + content_hash: hash, + }; + await engine.upsertFile(file); console.log(`Uploaded: ${storagePath} (${Math.round(stat.size / 1024)}KB)`); } -async function syncFiles(dir?: string) { +async function syncFiles(engine: BrainEngine, dir?: string) { if (!dir || !existsSync(dir)) { console.error('Usage: gbrain files sync '); process.exit(1); @@ -150,27 +129,25 @@ async function syncFiles(dir?: string) { const mimeType = getMimeType(filePath); const stat = statSync(filePath); - const sql = db.getConnection(); - const existing = await sql`SELECT id FROM files WHERE content_hash = ${hash} AND storage_path = ${storagePath}`; - if (existing.length > 0) { + const existing = await engine.findFileByHash(hash, storagePath); + if (existing) { skipped++; continue; } - // Infer page slug from directory structure const pathParts = relativePath.split('/'); const pageSlug = pathParts.length > 1 ? pathParts.slice(0, -1).join('/') : null; - const storageUrl = `https://storage.supabase.co/brain-files/${storagePath}`; - await sql` - INSERT INTO files (page_slug, filename, storage_path, storage_url, mime_type, size_bytes, content_hash, metadata) - VALUES (${pageSlug}, ${filename}, ${storagePath}, ${storageUrl}, ${mimeType}, ${stat.size}, ${hash}, ${'{}'}::jsonb) - ON CONFLICT (storage_path) DO UPDATE SET - content_hash = EXCLUDED.content_hash, - size_bytes = EXCLUDED.size_bytes, - mime_type = EXCLUDED.mime_type - `; + await engine.upsertFile({ + page_slug: pageSlug, + filename, + storage_path: storagePath, + storage_url: storageUrl, + mime_type: mimeType, + size_bytes: stat.size, + content_hash: hash, + }); uploaded++; } @@ -178,9 +155,8 @@ async function syncFiles(dir?: string) { console.log(`\n\nFiles sync complete: ${uploaded} uploaded, ${skipped} skipped (unchanged)`); } -async function verifyFiles() { - const sql = db.getConnection(); - const rows = await sql`SELECT * FROM files ORDER BY storage_path`; +async function verifyFiles(engine: BrainEngine) { + const rows = await engine.getFiles(); if (rows.length === 0) { console.log('No files to verify.'); @@ -189,11 +165,8 @@ async function verifyFiles() { let verified = 0; let mismatches = 0; - let missing = 0; for (const row of rows) { - // Note: full verification would check Supabase Storage hash - // For now, verify the DB record exists and has valid data if (!row.content_hash || !row.storage_path) { mismatches++; console.error(` MISMATCH: ${row.storage_path} (missing hash or path)`); @@ -202,11 +175,10 @@ async function verifyFiles() { } } - if (mismatches === 0 && missing === 0) { + if (mismatches === 0) { console.log(`${verified} files verified, 0 mismatches, 0 missing`); } else { - console.error(`VERIFY FAILED: ${mismatches} mismatches, ${missing} missing.`); - console.error(`Run: gbrain files sync --retry-failed`); + console.error(`VERIFY FAILED: ${mismatches} mismatches.`); process.exit(1); } } @@ -224,7 +196,6 @@ function collectFiles(dir: string): string[] { if (stat.isDirectory()) { walk(full); } else if (!entry.endsWith('.md')) { - // Non-markdown files are candidates for storage files.push(full); } } diff --git a/src/commands/init.ts b/src/commands/init.ts index cabfc6b..e8967ae 100644 --- a/src/commands/init.ts +++ b/src/commands/init.ts @@ -1,24 +1,56 @@ import { execSync } from 'child_process'; import { PostgresEngine } from '../core/postgres-engine.ts'; -import { saveConfig, type GBrainConfig } from '../core/config.ts'; +import { SQLiteEngine } from '../core/sqlite-engine.ts'; +import { saveConfig, getConfigDir, type GBrainConfig } from '../core/config.ts'; +import { join } from 'path'; export async function runInit(args: string[]) { - const isSupabase = args.includes('--supabase'); + const isSqlite = args.includes('--sqlite'); const urlIndex = args.indexOf('--url'); const manualUrl = urlIndex !== -1 ? args[urlIndex + 1] : null; + const pathIndex = args.indexOf('--path'); + const manualPath = pathIndex !== -1 ? args[pathIndex + 1] : null; + if (isSqlite) { + await initSqlite(manualPath); + } else { + await initPostgres(manualUrl); + } +} + +async function initSqlite(dbPath: string | null) { + const resolvedPath = dbPath || join(getConfigDir(), 'brain.db'); + console.log(`Creating SQLite brain at ${resolvedPath}...`); + + const engine = new SQLiteEngine(); + await engine.connect({ engine: 'sqlite', database_path: resolvedPath }); + + console.log('Running schema migration...'); + await engine.initSchema(); + + const config: GBrainConfig = { + engine: 'sqlite', + database_path: resolvedPath, + }; + saveConfig(config); + console.log('Config saved to ~/.gbrain/config.json'); + + const stats = await engine.getStats(); + await engine.disconnect(); + + console.log(`\nBrain ready. ${stats.page_count} pages.`); + console.log('Next: gbrain import to add your markdown.'); +} + +async function initPostgres(manualUrl: string | null) { let databaseUrl: string; if (manualUrl) { databaseUrl = manualUrl; - } else if (isSupabase) { - databaseUrl = await supabaseWizard(); } else { - // Default to supabase wizard databaseUrl = await supabaseWizard(); } - // Connect and init schema console.log('Connecting to database...'); const engine = new PostgresEngine(); await engine.connect({ database_url: databaseUrl }); @@ -26,7 +58,6 @@ export async function runInit(args: string[]) { console.log('Running schema migration...'); await engine.initSchema(); - // Save config const config: GBrainConfig = { engine: 'postgres', database_url: databaseUrl, @@ -34,7 +65,6 @@ export async function runInit(args: string[]) { saveConfig(config); console.log('Config saved to ~/.gbrain/config.json'); - // Verify const stats = await engine.getStats(); await engine.disconnect(); @@ -43,7 +73,6 @@ export async function runInit(args: string[]) { } async function supabaseWizard(): Promise { - // Try Supabase CLI auto-provision try { execSync('bunx supabase --version', { stdio: 'pipe' }); console.log('Supabase CLI detected.'); @@ -55,7 +84,6 @@ async function supabaseWizard(): Promise { console.log('Or provide a connection URL directly.'); } - // Fallback to manual URL console.log('\nEnter your Supabase/Postgres connection URL:'); console.log(' Format: postgresql://user:password@host:port/database'); console.log(' Find it: Supabase Dashboard > Settings > Database > Connection string\n'); diff --git a/src/core/engine.ts b/src/core/engine.ts index 126cffc..7853e5f 100644 --- a/src/core/engine.ts +++ b/src/core/engine.ts @@ -8,6 +8,7 @@ import type { PageVersion, BrainStats, BrainHealth, IngestLogEntry, IngestLogInput, + FileRecord, FileInput, EngineConfig, } from './types.ts'; @@ -74,4 +75,9 @@ export interface BrainEngine { // Config getConfig(key: string): Promise; setConfig(key: string, value: string): Promise; + + // Files + getFiles(slug?: string): Promise; + upsertFile(file: FileInput): Promise; + findFileByHash(contentHash: string, storagePath: string): Promise; } diff --git a/src/core/index.ts b/src/core/index.ts index f5f946c..8fb3826 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -1,4 +1,6 @@ export type { BrainEngine } from './engine.ts'; export { PostgresEngine } from './postgres-engine.ts'; +export { SQLiteEngine } from './sqlite-engine.ts'; export * from './types.ts'; export { parseMarkdown, serializeMarkdown, splitBody } from './markdown.ts'; +export { validateSlug, contentHash } from './utils.ts'; diff --git a/src/core/postgres-engine.ts b/src/core/postgres-engine.ts index 1fb6791..44c73a1 100644 --- a/src/core/postgres-engine.ts +++ b/src/core/postgres-engine.ts @@ -1,5 +1,5 @@ -import { createHash } from 'crypto'; import type { BrainEngine } from './engine.ts'; +import { validateSlug, contentHash } from './utils.ts'; import type { Page, PageInput, PageFilters, PageType, Chunk, ChunkInput, @@ -10,6 +10,7 @@ import type { PageVersion, BrainStats, BrainHealth, IngestLogEntry, IngestLogInput, + FileRecord, FileInput, EngineConfig, } from './types.ts'; import * as db from './db.ts'; @@ -548,19 +549,39 @@ export class PostgresEngine implements BrainEngine { ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value `; } -} -// Helpers -function validateSlug(slug: string): void { - if (!slug || /\.\./.test(slug) || /^\//.test(slug) || !/^[a-z0-9][a-z0-9/_-]*$/.test(slug)) { - throw new Error(`Invalid slug: "${slug}". Slugs must be lowercase alphanumeric with / - _ separators, no path traversal.`); + // Files + async getFiles(slug?: string): Promise { + const sql = db.getConnection(); + let rows; + if (slug) { + rows = await sql`SELECT * FROM files WHERE page_slug = ${slug} ORDER BY filename`; + } else { + rows = await sql`SELECT * FROM files ORDER BY page_slug, filename LIMIT 100`; + } + return rows.map(rowToFileRecord); + } + + async upsertFile(file: FileInput): Promise { + const sql = db.getConnection(); + await sql` + INSERT INTO files (page_slug, filename, storage_path, storage_url, mime_type, size_bytes, content_hash, metadata) + VALUES (${file.page_slug}, ${file.filename}, ${file.storage_path}, ${file.storage_url}, ${file.mime_type}, ${file.size_bytes}, ${file.content_hash}, ${JSON.stringify(file.metadata || {})}::jsonb) + ON CONFLICT (storage_path) DO UPDATE SET + content_hash = EXCLUDED.content_hash, + size_bytes = EXCLUDED.size_bytes, + mime_type = EXCLUDED.mime_type + `; } -} -function contentHash(compiledTruth: string, timeline: string): string { - return createHash('sha256').update(compiledTruth + '\n---\n' + timeline).digest('hex'); + async findFileByHash(contentHash: string, storagePath: string): Promise { + const sql = db.getConnection(); + const rows = await sql`SELECT * FROM files WHERE content_hash = ${contentHash} AND storage_path = ${storagePath}`; + return rows.length > 0 ? rowToFileRecord(rows[0]) : null; + } } +// Helpers function rowToPage(row: Record): Page { return { id: row.id as number, @@ -602,3 +623,18 @@ function rowToSearchResult(row: Record): SearchResult { stale: Boolean(row.stale), }; } + +function rowToFileRecord(row: Record): FileRecord { + return { + id: row.id as number, + page_slug: row.page_slug as string | null, + filename: row.filename as string, + storage_path: row.storage_path as string, + storage_url: row.storage_url as string, + mime_type: row.mime_type as string | null, + size_bytes: row.size_bytes as number | null, + content_hash: row.content_hash as string, + metadata: (typeof row.metadata === 'string' ? JSON.parse(row.metadata) : row.metadata) as Record, + created_at: new Date(row.created_at as string), + }; +} diff --git a/src/core/sqlite-engine.ts b/src/core/sqlite-engine.ts new file mode 100644 index 0000000..abece50 --- /dev/null +++ b/src/core/sqlite-engine.ts @@ -0,0 +1,834 @@ +import { Database } from 'bun:sqlite'; +import { readFileSync } from 'fs'; +import { join, dirname } from 'path'; +import { validateSlug, contentHash } from './utils.ts'; +import type { BrainEngine } from './engine.ts'; +import type { + Page, PageInput, PageFilters, PageType, + Chunk, ChunkInput, + SearchResult, SearchOpts, + Link, GraphNode, + TimelineEntry, TimelineInput, TimelineOpts, + RawData, + PageVersion, + BrainStats, BrainHealth, + IngestLogEntry, IngestLogInput, + FileRecord, FileInput, + EngineConfig, +} from './types.ts'; + +export class SQLiteEngine implements BrainEngine { + private db: Database | null = null; + private hasVec0 = false; + private vec0Warned = false; + + // ── Lifecycle ──────────────────────────────────────────── + + async connect(config: EngineConfig): Promise { + const dbPath = config.database_path; + if (!dbPath) { + throw new Error('database_path is required for SQLite engine'); + } + this.db = new Database(dbPath, { create: true }); + this.db.exec('PRAGMA journal_mode=WAL'); + this.db.exec('PRAGMA foreign_keys=ON'); + + // Try loading vec0 extension + try { + this.db.loadExtension('vec0'); + this.hasVec0 = true; + } catch { + this.hasVec0 = false; + } + } + + async disconnect(): Promise { + if (this.db) { + this.db.close(); + this.db = null; + } + } + + async initSchema(): Promise { + const conn = this.getDb(); + const schemaPath = join(dirname(new URL(import.meta.url).pathname), '..', 'sqlite-schema.sql'); + const schemaSql = readFileSync(schemaPath, 'utf-8'); + + // bun:sqlite exec handles multi-statement SQL including triggers correctly + conn.exec(schemaSql); + + // Create vec0 virtual table if extension loaded + if (this.hasVec0) { + try { + conn.exec(` + CREATE VIRTUAL TABLE IF NOT EXISTS chunks_vec USING vec0( + chunk_id INTEGER PRIMARY KEY, + embedding float[1536] + ) + `); + } catch { + this.hasVec0 = false; + } + } + } + + async transaction(fn: (engine: BrainEngine) => Promise): Promise { + const conn = this.getDb(); + // NOTE: bun:sqlite is synchronous. All SQLiteEngine methods execute + // synchronously even though they return Promises. The `await fn(this)` + // resolves via microtasks before any other macrotask can execute, so + // the BEGIN...COMMIT block is safe in single-threaded Bun environments. + conn.exec('BEGIN'); + try { + const result = await fn(this); + conn.exec('COMMIT'); + return result; + } catch (e) { + conn.exec('ROLLBACK'); + throw e; + } + } + + // ── Pages CRUD ─────────────────────────────────────────── + + async getPage(slug: string): Promise { + const conn = this.getDb(); + const row = conn.query(` + SELECT id, slug, type, title, compiled_truth, timeline, frontmatter, content_hash, created_at, updated_at + FROM pages WHERE slug = ? + `).get(slug) as Record | null; + if (!row) return null; + return this.rowToPage(row); + } + + async putPage(slug: string, page: PageInput): Promise { + validateSlug(slug); + const conn = this.getDb(); + const hash = contentHash(page.compiled_truth, page.timeline || ''); + const frontmatter = JSON.stringify(page.frontmatter || {}); + const now = new Date().toISOString(); + + conn.query(` + INSERT INTO pages (slug, type, title, compiled_truth, timeline, frontmatter, content_hash, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (slug) DO UPDATE SET + type = excluded.type, + title = excluded.title, + compiled_truth = excluded.compiled_truth, + timeline = excluded.timeline, + frontmatter = excluded.frontmatter, + content_hash = excluded.content_hash, + updated_at = ? + `).run(slug, page.type, page.title, page.compiled_truth, page.timeline || '', frontmatter, hash, now, now, now); + + return (await this.getPage(slug))!; + } + + async deletePage(slug: string): Promise { + const conn = this.getDb(); + conn.query('DELETE FROM pages WHERE slug = ?').run(slug); + } + + async listPages(filters?: PageFilters): Promise { + const conn = this.getDb(); + const limit = filters?.limit || 100; + const offset = filters?.offset || 0; + + let sql: string; + let params: unknown[]; + + if (filters?.type && filters?.tag) { + sql = ` + SELECT p.* FROM pages p + JOIN tags t ON t.page_id = p.id + WHERE p.type = ? AND t.tag = ? + ORDER BY p.updated_at DESC LIMIT ? OFFSET ? + `; + params = [filters.type, filters.tag, limit, offset]; + } else if (filters?.type) { + sql = `SELECT * FROM pages WHERE type = ? ORDER BY updated_at DESC LIMIT ? OFFSET ?`; + params = [filters.type, limit, offset]; + } else if (filters?.tag) { + sql = ` + SELECT p.* FROM pages p + JOIN tags t ON t.page_id = p.id + WHERE t.tag = ? + ORDER BY p.updated_at DESC LIMIT ? OFFSET ? + `; + params = [filters.tag, limit, offset]; + } else { + sql = `SELECT * FROM pages ORDER BY updated_at DESC LIMIT ? OFFSET ?`; + params = [limit, offset]; + } + + const rows = conn.query(sql).all(...params) as Record[]; + return rows.map(r => this.rowToPage(r)); + } + + async resolveSlugs(partial: string): Promise { + const conn = this.getDb(); + + // Exact match first + const exact = conn.query('SELECT slug FROM pages WHERE slug = ?').get(partial) as { slug: string } | null; + if (exact) return [exact.slug]; + + // LIKE fuzzy match + const fuzzy = conn.query(` + SELECT slug FROM pages + WHERE slug LIKE ? OR title LIKE ? + ORDER BY + CASE WHEN slug LIKE ? THEN 1 + WHEN title LIKE ? THEN 2 + ELSE 3 + END + LIMIT 5 + `).all(`%${partial}%`, `%${partial}%`, `%${partial}%`, `%${partial}%`) as { slug: string }[]; + + return fuzzy.map(r => r.slug); + } + + // ── Search ─────────────────────────────────────────────── + + async searchKeyword(query: string, opts?: SearchOpts): Promise { + const conn = this.getDb(); + const limit = opts?.limit || 20; + const ftsQuery = toFts5Query(query); + if (!ftsQuery) return []; + + try { + // Get matching pages ranked by FTS5, then join to first chunk for result display + const rows = conn.query(` + SELECT + p.slug, p.id as page_id, p.title, p.type, + cc.chunk_text, cc.chunk_source, + (-pages_fts.rank) AS score, + 0 AS stale + FROM pages_fts + JOIN pages p ON p.id = pages_fts.rowid + LEFT JOIN content_chunks cc ON cc.page_id = p.id + AND cc.id = (SELECT id FROM content_chunks WHERE page_id = p.id ORDER BY chunk_index LIMIT 1) + WHERE pages_fts MATCH ? + ORDER BY pages_fts.rank + LIMIT ? + `).all(ftsQuery, limit) as Record[]; + + // Filter out rows with no chunk (page exists in FTS but has no chunks) + return rows + .filter(r => r.chunk_text != null) + .map(r => this.rowToSearchResult(r)); + } catch { + return []; + } + } + + async searchVector(embedding: Float32Array, opts?: SearchOpts): Promise { + if (!this.hasVec0) { + if (!this.vec0Warned) { + console.warn('vec0 extension not loaded — vector search unavailable. Using keyword search only.'); + this.vec0Warned = true; + } + return []; + } + + const conn = this.getDb(); + const limit = opts?.limit || 20; + const vecStr = '[' + Array.from(embedding).join(',') + ']'; + + try { + const rows = conn.query(` + SELECT + p.slug, p.id as page_id, p.title, p.type, + cc.chunk_text, cc.chunk_source, + (1.0 - v.distance) AS score, + 0 AS stale + FROM chunks_vec v + JOIN content_chunks cc ON cc.id = v.chunk_id + JOIN pages p ON p.id = cc.page_id + WHERE v.embedding MATCH ? + ORDER BY v.distance + LIMIT ? + `).all(vecStr, limit) as Record[]; + + return rows.map(r => this.rowToSearchResult(r)); + } catch { + return []; + } + } + + // ── Chunks ─────────────────────────────────────────────── + + async upsertChunks(slug: string, chunks: ChunkInput[]): Promise { + const conn = this.getDb(); + const pageRow = conn.query('SELECT id FROM pages WHERE slug = ?').get(slug) as { id: number } | null; + if (!pageRow) throw new Error(`Page not found: ${slug}`); + const pageId = pageRow.id; + + // Clean up vec0 first + if (this.hasVec0) { + const existingIds = conn.query('SELECT id FROM content_chunks WHERE page_id = ?').all(pageId) as { id: number }[]; + for (const { id } of existingIds) { + try { conn.query('DELETE FROM chunks_vec WHERE chunk_id = ?').run(id); } catch {} + } + } + + conn.query('DELETE FROM content_chunks WHERE page_id = ?').run(pageId); + if (chunks.length === 0) return; + + const now = new Date().toISOString(); + const insertChunk = conn.query(` + INSERT INTO content_chunks (page_id, chunk_index, chunk_text, chunk_source, model, token_count, embedded_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + `); + + for (const chunk of chunks) { + const runResult = insertChunk.run( + pageId, chunk.chunk_index, chunk.chunk_text, chunk.chunk_source, + chunk.model || 'text-embedding-3-large', chunk.token_count || null, + chunk.embedding ? now : null, + ); + + if (this.hasVec0 && chunk.embedding) { + const chunkId = runResult.lastInsertRowid; + const vecStr = '[' + Array.from(chunk.embedding).join(',') + ']'; + try { conn.query('INSERT INTO chunks_vec (chunk_id, embedding) VALUES (?, ?)').run(chunkId, vecStr); } catch {} + } + } + } + + async getChunks(slug: string): Promise { + const conn = this.getDb(); + const rows = conn.query(` + SELECT cc.* FROM content_chunks cc + JOIN pages p ON p.id = cc.page_id + WHERE p.slug = ? + ORDER BY cc.chunk_index + `).all(slug) as Record[]; + return rows.map(r => this.rowToChunk(r)); + } + + async deleteChunks(slug: string): Promise { + const conn = this.getDb(); + if (this.hasVec0) { + const ids = conn.query(` + SELECT cc.id FROM content_chunks cc + JOIN pages p ON p.id = cc.page_id + WHERE p.slug = ? + `).all(slug) as { id: number }[]; + for (const { id } of ids) { + try { conn.query('DELETE FROM chunks_vec WHERE chunk_id = ?').run(id); } catch {} + } + } + conn.query(` + DELETE FROM content_chunks + WHERE page_id = (SELECT id FROM pages WHERE slug = ?) + `).run(slug); + } + + // ── Links ──────────────────────────────────────────────── + + async addLink(from: string, to: string, context?: string, linkType?: string): Promise { + const conn = this.getDb(); + conn.query(` + INSERT INTO links (from_page_id, to_page_id, link_type, context) + SELECT f.id, t.id, ?, ? + FROM pages f, pages t + WHERE f.slug = ? AND t.slug = ? + ON CONFLICT (from_page_id, to_page_id) DO UPDATE SET + link_type = excluded.link_type, + context = excluded.context + `).run(linkType || '', context || '', from, to); + } + + async removeLink(from: string, to: string): Promise { + const conn = this.getDb(); + conn.query(` + DELETE FROM links + WHERE from_page_id = (SELECT id FROM pages WHERE slug = ?) + AND to_page_id = (SELECT id FROM pages WHERE slug = ?) + `).run(from, to); + } + + async getLinks(slug: string): Promise { + const conn = this.getDb(); + return conn.query(` + SELECT f.slug as from_slug, t.slug as to_slug, l.link_type, l.context + FROM links l + JOIN pages f ON f.id = l.from_page_id + JOIN pages t ON t.id = l.to_page_id + WHERE f.slug = ? + `).all(slug) as Link[]; + } + + async getBacklinks(slug: string): Promise { + const conn = this.getDb(); + return conn.query(` + SELECT f.slug as from_slug, t.slug as to_slug, l.link_type, l.context + FROM links l + JOIN pages f ON f.id = l.from_page_id + JOIN pages t ON t.id = l.to_page_id + WHERE t.slug = ? + `).all(slug) as Link[]; + } + + async traverseGraph(slug: string, depth: number = 5): Promise { + const conn = this.getDb(); + const rows = conn.query(` + WITH RECURSIVE graph AS ( + SELECT p.id, p.slug, p.title, p.type, 0 as depth + FROM pages p WHERE p.slug = ? + + UNION + + SELECT p2.id, p2.slug, p2.title, p2.type, g.depth + 1 + FROM graph g + JOIN links l ON l.from_page_id = g.id + JOIN pages p2 ON p2.id = l.to_page_id + WHERE g.depth < ? + ) + SELECT DISTINCT g.slug, g.title, g.type, g.depth, + ( + SELECT json_group_array(json_object('to_slug', p3.slug, 'link_type', l2.link_type)) + FROM links l2 + JOIN pages p3 ON p3.id = l2.to_page_id + WHERE l2.from_page_id = g.id + ) as links + FROM graph g + ORDER BY g.depth, g.slug + `).all(slug, depth) as Record[]; + + return rows.map(r => ({ + slug: r.slug as string, + title: r.title as string, + type: r.type as PageType, + depth: r.depth as number, + links: r.links ? JSON.parse(r.links as string) : [], + })); + } + + // ── Tags ───────────────────────────────────────────────── + + async addTag(slug: string, tag: string): Promise { + const conn = this.getDb(); + conn.query(` + INSERT INTO tags (page_id, tag) + SELECT id, ? FROM pages WHERE slug = ? + ON CONFLICT (page_id, tag) DO NOTHING + `).run(tag, slug); + } + + async removeTag(slug: string, tag: string): Promise { + const conn = this.getDb(); + conn.query(` + DELETE FROM tags + WHERE page_id = (SELECT id FROM pages WHERE slug = ?) AND tag = ? + `).run(slug, tag); + } + + async getTags(slug: string): Promise { + const conn = this.getDb(); + const rows = conn.query(` + SELECT tag FROM tags + WHERE page_id = (SELECT id FROM pages WHERE slug = ?) + ORDER BY tag + `).all(slug) as { tag: string }[]; + return rows.map(r => r.tag); + } + + // ── Timeline ───────────────────────────────────────────── + + async addTimelineEntry(slug: string, entry: TimelineInput): Promise { + const conn = this.getDb(); + conn.query(` + INSERT INTO timeline_entries (page_id, date, source, summary, detail) + SELECT id, ?, ?, ?, ? + FROM pages WHERE slug = ? + `).run(entry.date, entry.source || '', entry.summary, entry.detail || '', slug); + } + + async getTimeline(slug: string, opts?: TimelineOpts): Promise { + const conn = this.getDb(); + const limit = opts?.limit || 100; + let sql: string; + let params: unknown[]; + + if (opts?.after && opts?.before) { + sql = ` + SELECT te.* FROM timeline_entries te + JOIN pages p ON p.id = te.page_id + WHERE p.slug = ? AND te.date >= ? AND te.date <= ? + ORDER BY te.date DESC LIMIT ? + `; + params = [slug, opts.after, opts.before, limit]; + } else if (opts?.after) { + sql = ` + SELECT te.* FROM timeline_entries te + JOIN pages p ON p.id = te.page_id + WHERE p.slug = ? AND te.date >= ? + ORDER BY te.date DESC LIMIT ? + `; + params = [slug, opts.after, limit]; + } else { + sql = ` + SELECT te.* FROM timeline_entries te + JOIN pages p ON p.id = te.page_id + WHERE p.slug = ? + ORDER BY te.date DESC LIMIT ? + `; + params = [slug, limit]; + } + + const rows = conn.query(sql).all(...params) as Record[]; + return rows.map(r => ({ + id: r.id as number, + page_id: r.page_id as number, + date: r.date as string, + source: r.source as string, + summary: r.summary as string, + detail: r.detail as string, + created_at: new Date(r.created_at as string), + })); + } + + // ── Raw data ───────────────────────────────────────────── + + async putRawData(slug: string, source: string, data: object): Promise { + const conn = this.getDb(); + conn.query(` + INSERT INTO raw_data (page_id, source, data) + SELECT id, ?, ? + FROM pages WHERE slug = ? + ON CONFLICT (page_id, source) DO UPDATE SET + data = excluded.data, + fetched_at = datetime('now') + `).run(source, JSON.stringify(data), slug); + } + + async getRawData(slug: string, source?: string): Promise { + const conn = this.getDb(); + let rows: Record[]; + if (source) { + rows = conn.query(` + SELECT rd.source, rd.data, rd.fetched_at FROM raw_data rd + JOIN pages p ON p.id = rd.page_id + WHERE p.slug = ? AND rd.source = ? + `).all(slug, source) as Record[]; + } else { + rows = conn.query(` + SELECT rd.source, rd.data, rd.fetched_at FROM raw_data rd + JOIN pages p ON p.id = rd.page_id + WHERE p.slug = ? + `).all(slug) as Record[]; + } + return rows.map(r => ({ + source: r.source as string, + data: JSON.parse(r.data as string), + fetched_at: new Date(r.fetched_at as string), + })); + } + + // ── Versions ───────────────────────────────────────────── + + async createVersion(slug: string): Promise { + const conn = this.getDb(); + conn.query(` + INSERT INTO page_versions (page_id, compiled_truth, frontmatter) + SELECT id, compiled_truth, frontmatter FROM pages WHERE slug = ? + `).run(slug); + + const row = conn.query(` + SELECT pv.* FROM page_versions pv + JOIN pages p ON p.id = pv.page_id + WHERE p.slug = ? + ORDER BY pv.id DESC LIMIT 1 + `).get(slug) as Record; + + return { + id: row.id as number, + page_id: row.page_id as number, + compiled_truth: row.compiled_truth as string, + frontmatter: JSON.parse(row.frontmatter as string), + snapshot_at: new Date(row.snapshot_at as string), + }; + } + + async getVersions(slug: string): Promise { + const conn = this.getDb(); + const rows = conn.query(` + SELECT pv.* FROM page_versions pv + JOIN pages p ON p.id = pv.page_id + WHERE p.slug = ? + ORDER BY pv.snapshot_at DESC + `).all(slug) as Record[]; + + return rows.map(r => ({ + id: r.id as number, + page_id: r.page_id as number, + compiled_truth: r.compiled_truth as string, + frontmatter: JSON.parse(r.frontmatter as string), + snapshot_at: new Date(r.snapshot_at as string), + })); + } + + async revertToVersion(slug: string, versionId: number): Promise { + const conn = this.getDb(); + const version = conn.query('SELECT * FROM page_versions WHERE id = ?').get(versionId) as Record | null; + if (!version) throw new Error(`Version not found: ${versionId}`); + const now = new Date().toISOString(); + conn.query(` + UPDATE pages SET compiled_truth = ?, frontmatter = ?, updated_at = ? + WHERE slug = ? AND id = ? + `).run(version.compiled_truth, version.frontmatter, now, slug, version.page_id); + } + + // ── Stats + health ─────────────────────────────────────── + + async getStats(): Promise { + const conn = this.getDb(); + const stats = conn.query(` + SELECT + (SELECT count(*) FROM pages) as page_count, + (SELECT count(*) FROM content_chunks) as chunk_count, + (SELECT count(*) FROM content_chunks WHERE embedded_at IS NOT NULL) as embedded_count, + (SELECT count(*) FROM links) as link_count, + (SELECT count(DISTINCT tag) FROM tags) as tag_count, + (SELECT count(*) FROM timeline_entries) as timeline_entry_count + `).get() as Record; + + const types = conn.query(` + SELECT type, count(*) as count FROM pages GROUP BY type ORDER BY count DESC + `).all() as { type: string; count: number }[]; + + const pages_by_type: Record = {}; + for (const t of types) pages_by_type[t.type] = t.count; + + return { + page_count: Number(stats.page_count), + chunk_count: Number(stats.chunk_count), + embedded_count: Number(stats.embedded_count), + link_count: Number(stats.link_count), + tag_count: Number(stats.tag_count), + timeline_entry_count: Number(stats.timeline_entry_count), + pages_by_type, + }; + } + + async getHealth(): Promise { + const conn = this.getDb(); + const h = conn.query(` + SELECT + (SELECT count(*) FROM pages) as page_count, + CAST((SELECT count(*) FROM content_chunks WHERE embedded_at IS NOT NULL) AS REAL) / + MAX((SELECT count(*) FROM content_chunks), 1) as embed_coverage, + (SELECT count(*) FROM pages p + WHERE p.updated_at < (SELECT MAX(te.created_at) FROM timeline_entries te WHERE te.page_id = p.id) + ) as stale_pages, + (SELECT count(*) FROM pages p + WHERE NOT EXISTS (SELECT 1 FROM links l WHERE l.to_page_id = p.id) + ) as orphan_pages, + (SELECT count(*) FROM links l + WHERE NOT EXISTS (SELECT 1 FROM pages p WHERE p.id = l.to_page_id) + ) as dead_links, + (SELECT count(*) FROM content_chunks WHERE embedded_at IS NULL) as missing_embeddings + `).get() as Record; + + return { + page_count: Number(h.page_count), + embed_coverage: Number(h.embed_coverage), + stale_pages: Number(h.stale_pages), + orphan_pages: Number(h.orphan_pages), + dead_links: Number(h.dead_links), + missing_embeddings: Number(h.missing_embeddings), + }; + } + + // ── Ingest log ─────────────────────────────────────────── + + async logIngest(entry: IngestLogInput): Promise { + const conn = this.getDb(); + conn.query(` + INSERT INTO ingest_log (source_type, source_ref, pages_updated, summary) + VALUES (?, ?, ?, ?) + `).run(entry.source_type, entry.source_ref, JSON.stringify(entry.pages_updated), entry.summary); + } + + async getIngestLog(opts?: { limit?: number }): Promise { + const conn = this.getDb(); + const limit = opts?.limit || 50; + const rows = conn.query('SELECT * FROM ingest_log ORDER BY created_at DESC LIMIT ?').all(limit) as Record[]; + return rows.map(r => ({ + id: r.id as number, + source_type: r.source_type as string, + source_ref: r.source_ref as string, + pages_updated: JSON.parse(r.pages_updated as string), + summary: r.summary as string, + created_at: new Date(r.created_at as string), + })); + } + + // ── Sync ───────────────────────────────────────────────── + + async updateSlug(oldSlug: string, newSlug: string): Promise { + validateSlug(newSlug); + const conn = this.getDb(); + const now = new Date().toISOString(); + conn.query('UPDATE pages SET slug = ?, updated_at = ? WHERE slug = ?').run(newSlug, now, oldSlug); + } + + async rewriteLinks(_oldSlug: string, _newSlug: string): Promise { + // Stub — links use integer FKs, already correct after updateSlug. + } + + // ── Config ─────────────────────────────────────────────── + + async getConfig(key: string): Promise { + const conn = this.getDb(); + const row = conn.query('SELECT value FROM config WHERE key = ?').get(key) as { value: string } | null; + return row ? row.value : null; + } + + async setConfig(key: string, value: string): Promise { + const conn = this.getDb(); + conn.query(` + INSERT INTO config (key, value) VALUES (?, ?) + ON CONFLICT (key) DO UPDATE SET value = excluded.value + `).run(key, value); + } + + // ── Files ──────────────────────────────────────────────── + + async getFiles(slug?: string): Promise { + const conn = this.getDb(); + let rows: Record[]; + if (slug) { + rows = conn.query('SELECT * FROM files WHERE page_slug = ? ORDER BY filename').all(slug) as Record[]; + } else { + rows = conn.query('SELECT * FROM files ORDER BY page_slug, filename LIMIT 100').all() as Record[]; + } + return rows.map(r => this.rowToFileRecord(r)); + } + + async upsertFile(file: FileInput): Promise { + const conn = this.getDb(); + const now = new Date().toISOString(); + conn.query(` + INSERT INTO files (page_slug, filename, storage_path, storage_url, mime_type, size_bytes, content_hash, metadata, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (storage_path) DO UPDATE SET + content_hash = excluded.content_hash, + size_bytes = excluded.size_bytes, + mime_type = excluded.mime_type + `).run( + file.page_slug, file.filename, file.storage_path, file.storage_url, + file.mime_type, file.size_bytes, file.content_hash, + JSON.stringify(file.metadata || {}), now, + ); + } + + async findFileByHash(contentHash: string, storagePath: string): Promise { + const conn = this.getDb(); + const row = conn.query('SELECT * FROM files WHERE content_hash = ? AND storage_path = ?').get(contentHash, storagePath) as Record | null; + return row ? this.rowToFileRecord(row) : null; + } + + // ── Private helpers ────────────────────────────────────── + + private getDb(): Database { + if (!this.db) { + throw new Error('SQLite database not connected. Call connect() first.'); + } + return this.db; + } + + private rowToPage(row: Record): Page { + return { + id: row.id as number, + slug: row.slug as string, + type: row.type as PageType, + title: row.title as string, + compiled_truth: row.compiled_truth as string, + timeline: row.timeline as string, + frontmatter: typeof row.frontmatter === 'string' ? JSON.parse(row.frontmatter) : row.frontmatter as Record, + content_hash: row.content_hash as string | undefined, + created_at: new Date(row.created_at as string), + updated_at: new Date(row.updated_at as string), + }; + } + + private rowToChunk(row: Record): Chunk { + return { + id: row.id as number, + page_id: row.page_id as number, + chunk_index: row.chunk_index as number, + chunk_text: row.chunk_text as string, + chunk_source: row.chunk_source as 'compiled_truth' | 'timeline', + embedding: null, + model: row.model as string, + token_count: row.token_count as number | null, + embedded_at: row.embedded_at ? new Date(row.embedded_at as string) : null, + }; + } + + private rowToSearchResult(row: Record): SearchResult { + return { + slug: row.slug as string, + page_id: row.page_id as number, + title: row.title as string, + type: row.type as PageType, + chunk_text: row.chunk_text as string, + chunk_source: row.chunk_source as 'compiled_truth' | 'timeline', + score: Number(row.score), + stale: Boolean(row.stale), + }; + } + + private rowToFileRecord(row: Record): FileRecord { + return { + id: row.id as number, + page_slug: row.page_slug as string | null, + filename: row.filename as string, + storage_path: row.storage_path as string, + storage_url: row.storage_url as string, + mime_type: row.mime_type as string | null, + size_bytes: row.size_bytes as number | null, + content_hash: row.content_hash as string, + metadata: typeof row.metadata === 'string' ? JSON.parse(row.metadata) : row.metadata as Record, + created_at: new Date(row.created_at as string), + }; + } +} + +// ── FTS5 query helper ──────────────────────────────────── + +export function toFts5Query(input: string): string { + const trimmed = input.trim(); + if (!trimmed) return ''; + + // If user already wrote FTS5 syntax, pass through. + // Note: malformed FTS5 will throw in SQLite, caught by searchKeyword which returns []. + if (trimmed.includes('"') || trimmed.includes(' OR ') || trimmed.includes(' NOT ')) { + return trimmed; + } + + const tokens = trimmed.split(/\s+/); + const positiveParts: string[] = []; + const negativeParts: string[] = []; + + for (const token of tokens) { + if (token.startsWith('-') && token.length > 1) { + negativeParts.push(token.slice(1)); + } else { + positiveParts.push(token); + } + } + + // FTS5 NOT requires a positive term before it + if (positiveParts.length === 0) { + // All negative tokens with no positive context — just return the first positive token + // to avoid a bare NOT error; treat as keyword search on the negated terms + return negativeParts.join(' '); + } + + const result = positiveParts.join(' '); + if (negativeParts.length === 0) return result; + return result + ' NOT ' + negativeParts.join(' NOT '); +} diff --git a/src/core/types.ts b/src/core/types.ts index 397f03d..dff5bd7 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -169,6 +169,31 @@ export interface EngineConfig { engine?: 'postgres' | 'sqlite'; } +// Files +export interface FileRecord { + id: number; + page_slug: string | null; + filename: string; + storage_path: string; + storage_url: string; + mime_type: string | null; + size_bytes: number | null; + content_hash: string; + metadata: Record; + created_at: Date; +} + +export interface FileInput { + page_slug: string | null; + filename: string; + storage_path: string; + storage_url: string; + mime_type: string | null; + size_bytes: number | null; + content_hash: string; + metadata?: Record; +} + // Errors export class GBrainError extends Error { constructor( diff --git a/src/core/utils.ts b/src/core/utils.ts new file mode 100644 index 0000000..e231c9a --- /dev/null +++ b/src/core/utils.ts @@ -0,0 +1,11 @@ +import { createHash } from 'crypto'; + +export function validateSlug(slug: string): void { + if (!slug || /\.\./.test(slug) || /^\//.test(slug) || !/^[a-z0-9][a-z0-9_-]*(?:\/[a-z0-9][a-z0-9_-]*)*$/.test(slug)) { + throw new Error(`Invalid slug: "${slug}". Slugs must be lowercase alphanumeric with / - _ separators, no path traversal.`); + } +} + +export function contentHash(compiledTruth: string, timeline: string): string { + return createHash('sha256').update(compiledTruth + '\n---\n' + timeline).digest('hex'); +} diff --git a/src/sqlite-schema.sql b/src/sqlite-schema.sql new file mode 100644 index 0000000..9813405 --- /dev/null +++ b/src/sqlite-schema.sql @@ -0,0 +1,188 @@ +-- GBrain SQLite + FTS5 + vec0 schema +-- Enable WAL mode and foreign keys via PRAGMA at connect time, not here. + +-- ============================================================ +-- pages +-- ============================================================ +CREATE TABLE IF NOT EXISTS pages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + slug TEXT NOT NULL UNIQUE, + type TEXT NOT NULL, + title TEXT NOT NULL, + compiled_truth TEXT NOT NULL DEFAULT '', + timeline TEXT NOT NULL DEFAULT '', + frontmatter TEXT NOT NULL DEFAULT '{}', + content_hash TEXT, + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX IF NOT EXISTS idx_pages_type ON pages(type); + +-- ============================================================ +-- Full-text search via FTS5 +-- ============================================================ +CREATE VIRTUAL TABLE IF NOT EXISTS pages_fts USING fts5( + title, + compiled_truth, + timeline, + content='pages', + content_rowid='id', + tokenize='porter unicode61' +); + +-- Triggers to keep FTS5 in sync +CREATE TRIGGER IF NOT EXISTS pages_fts_insert AFTER INSERT ON pages BEGIN + INSERT INTO pages_fts(rowid, title, compiled_truth, timeline) + VALUES (new.id, new.title, new.compiled_truth, new.timeline); +END; + +CREATE TRIGGER IF NOT EXISTS pages_fts_update AFTER UPDATE ON pages BEGIN + INSERT INTO pages_fts(pages_fts, rowid, title, compiled_truth, timeline) + VALUES ('delete', old.id, old.title, old.compiled_truth, old.timeline); + INSERT INTO pages_fts(rowid, title, compiled_truth, timeline) + VALUES (new.id, new.title, new.compiled_truth, new.timeline); +END; + +CREATE TRIGGER IF NOT EXISTS pages_fts_delete AFTER DELETE ON pages BEGIN + INSERT INTO pages_fts(pages_fts, rowid, title, compiled_truth, timeline) + VALUES ('delete', old.id, old.title, old.compiled_truth, old.timeline); +END; + +-- ============================================================ +-- content_chunks +-- ============================================================ +CREATE TABLE IF NOT EXISTS content_chunks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + page_id INTEGER NOT NULL REFERENCES pages(id) ON DELETE CASCADE, + chunk_index INTEGER NOT NULL, + chunk_text TEXT NOT NULL, + chunk_source TEXT NOT NULL DEFAULT 'compiled_truth', + model TEXT NOT NULL DEFAULT 'text-embedding-3-large', + token_count INTEGER, + embedded_at TEXT, + created_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX IF NOT EXISTS idx_chunks_page ON content_chunks(page_id); + +-- Note: embeddings stored in chunks_vec virtual table (vec0), not in this table. +-- If vec0 is unavailable, vector search returns empty results. + +-- ============================================================ +-- links +-- ============================================================ +CREATE TABLE IF NOT EXISTS links ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + from_page_id INTEGER NOT NULL REFERENCES pages(id) ON DELETE CASCADE, + to_page_id INTEGER NOT NULL REFERENCES pages(id) ON DELETE CASCADE, + link_type TEXT NOT NULL DEFAULT '', + context TEXT NOT NULL DEFAULT '', + created_at TEXT NOT NULL DEFAULT (datetime('now')), + UNIQUE(from_page_id, to_page_id) +); + +CREATE INDEX IF NOT EXISTS idx_links_from ON links(from_page_id); +CREATE INDEX IF NOT EXISTS idx_links_to ON links(to_page_id); + +-- ============================================================ +-- tags +-- ============================================================ +CREATE TABLE IF NOT EXISTS tags ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + page_id INTEGER NOT NULL REFERENCES pages(id) ON DELETE CASCADE, + tag TEXT NOT NULL, + UNIQUE(page_id, tag) +); + +CREATE INDEX IF NOT EXISTS idx_tags_tag ON tags(tag); +CREATE INDEX IF NOT EXISTS idx_tags_page_id ON tags(page_id); + +-- ============================================================ +-- raw_data +-- ============================================================ +CREATE TABLE IF NOT EXISTS raw_data ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + page_id INTEGER NOT NULL REFERENCES pages(id) ON DELETE CASCADE, + source TEXT NOT NULL, + data TEXT NOT NULL, + fetched_at TEXT NOT NULL DEFAULT (datetime('now')), + UNIQUE(page_id, source) +); + +CREATE INDEX IF NOT EXISTS idx_raw_data_page ON raw_data(page_id); + +-- ============================================================ +-- timeline_entries +-- ============================================================ +CREATE TABLE IF NOT EXISTS timeline_entries ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + page_id INTEGER NOT NULL REFERENCES pages(id) ON DELETE CASCADE, + date TEXT NOT NULL, + source TEXT NOT NULL DEFAULT '', + summary TEXT NOT NULL, + detail TEXT NOT NULL DEFAULT '', + created_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX IF NOT EXISTS idx_timeline_page ON timeline_entries(page_id); +CREATE INDEX IF NOT EXISTS idx_timeline_date ON timeline_entries(date); + +-- ============================================================ +-- page_versions +-- ============================================================ +CREATE TABLE IF NOT EXISTS page_versions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + page_id INTEGER NOT NULL REFERENCES pages(id) ON DELETE CASCADE, + compiled_truth TEXT NOT NULL, + frontmatter TEXT NOT NULL DEFAULT '{}', + snapshot_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX IF NOT EXISTS idx_versions_page ON page_versions(page_id); + +-- ============================================================ +-- ingest_log +-- ============================================================ +CREATE TABLE IF NOT EXISTS ingest_log ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_type TEXT NOT NULL, + source_ref TEXT NOT NULL, + pages_updated TEXT NOT NULL DEFAULT '[]', + summary TEXT NOT NULL DEFAULT '', + created_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +-- ============================================================ +-- config +-- ============================================================ +CREATE TABLE IF NOT EXISTS config ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +); + +INSERT OR IGNORE INTO config (key, value) VALUES + ('version', '1'), + ('engine', 'sqlite'), + ('embedding_model', 'text-embedding-3-large'), + ('embedding_dimensions', '1536'), + ('chunk_strategy', 'semantic'); + +-- ============================================================ +-- files +-- ============================================================ +CREATE TABLE IF NOT EXISTS files ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + page_slug TEXT REFERENCES pages(slug) ON DELETE SET NULL ON UPDATE CASCADE, + filename TEXT NOT NULL, + storage_path TEXT NOT NULL UNIQUE, + storage_url TEXT NOT NULL, + mime_type TEXT, + size_bytes INTEGER, + content_hash TEXT NOT NULL, + metadata TEXT NOT NULL DEFAULT '{}', + created_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX IF NOT EXISTS idx_files_page ON files(page_slug); +CREATE INDEX IF NOT EXISTS idx_files_hash ON files(content_hash); diff --git a/test/fts5-query.test.ts b/test/fts5-query.test.ts new file mode 100644 index 0000000..e1122b4 --- /dev/null +++ b/test/fts5-query.test.ts @@ -0,0 +1,47 @@ +import { describe, test, expect } from 'bun:test'; +import { toFts5Query } from '../src/core/sqlite-engine.ts'; + +describe('toFts5Query', () => { + test('empty string returns empty', () => { + expect(toFts5Query('')).toBe(''); + expect(toFts5Query(' ')).toBe(''); + }); + + test('simple terms pass through', () => { + expect(toFts5Query('hello world')).toBe('hello world'); + }); + + test('negation converts -term to NOT', () => { + const result = toFts5Query('quantum -classical'); + expect(result).toContain('quantum'); + expect(result).toContain('NOT classical'); + }); + + test('bare negation (all negative) returns plain terms', () => { + // Should not produce bare NOT (FTS5 syntax error) + const result = toFts5Query('-foo -bar'); + expect(result).not.toMatch(/^NOT/); + }); + + test('quoted phrases pass through', () => { + expect(toFts5Query('"exact match"')).toBe('"exact match"'); + }); + + test('OR passes through', () => { + expect(toFts5Query('cats OR dogs')).toBe('cats OR dogs'); + }); + + test('NOT passes through', () => { + expect(toFts5Query('cats NOT dogs')).toBe('cats NOT dogs'); + }); + + test('single word with no operators', () => { + expect(toFts5Query('quantum')).toBe('quantum'); + }); + + test('lone hyphen is not negation', () => { + const result = toFts5Query('-'); + // '-' alone has length 1, so it should pass through as-is + expect(result).toBe('-'); + }); +}); diff --git a/test/sqlite-engine.test.ts b/test/sqlite-engine.test.ts new file mode 100644 index 0000000..15ac2a8 --- /dev/null +++ b/test/sqlite-engine.test.ts @@ -0,0 +1,404 @@ +import { describe, test, expect, beforeAll, afterAll } from 'bun:test'; +import { SQLiteEngine } from '../src/core/sqlite-engine.ts'; +import { unlinkSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; + +const TEST_DB = join(tmpdir(), `gbrain-test-${Date.now()}.db`); + +describe('SQLiteEngine', () => { + let engine: SQLiteEngine; + + beforeAll(async () => { + engine = new SQLiteEngine(); + await engine.connect({ engine: 'sqlite', database_path: TEST_DB }); + await engine.initSchema(); + }); + + afterAll(async () => { + await engine.disconnect(); + try { unlinkSync(TEST_DB); } catch {} + try { unlinkSync(TEST_DB + '-wal'); } catch {} + try { unlinkSync(TEST_DB + '-shm'); } catch {} + }); + + describe('lifecycle', () => { + test('WAL mode is enabled and config seeded', async () => { + const config = await engine.getConfig('engine'); + expect(config).toBe('sqlite'); + }); + }); + + describe('pages CRUD', () => { + test('putPage + getPage round-trip', async () => { + const page = await engine.putPage('people/john-doe', { + type: 'person', + title: 'John Doe', + compiled_truth: 'A test person.', + timeline: '2024-01-01: Born', + frontmatter: { role: 'engineer' }, + }); + expect(page.slug).toBe('people/john-doe'); + expect(page.title).toBe('John Doe'); + expect(page.type).toBe('person'); + expect(page.compiled_truth).toBe('A test person.'); + expect(page.timeline).toBe('2024-01-01: Born'); + expect(page.frontmatter).toEqual({ role: 'engineer' }); + expect(page.content_hash).toBeDefined(); + expect(page.id).toBeGreaterThan(0); + + const fetched = await engine.getPage('people/john-doe'); + expect(fetched).not.toBeNull(); + expect(fetched!.slug).toBe('people/john-doe'); + expect(fetched!.title).toBe('John Doe'); + }); + + test('putPage upserts on conflict', async () => { + await engine.putPage('people/jane', { + type: 'person', + title: 'Jane v1', + compiled_truth: 'v1', + }); + const updated = await engine.putPage('people/jane', { + type: 'person', + title: 'Jane v2', + compiled_truth: 'v2', + }); + expect(updated.title).toBe('Jane v2'); + + const all = await engine.listPages(); + const janes = all.filter(p => p.slug === 'people/jane'); + expect(janes).toHaveLength(1); + }); + + test('getPage returns null for missing slug', async () => { + const page = await engine.getPage('nonexistent/slug'); + expect(page).toBeNull(); + }); + + test('deletePage removes the page', async () => { + await engine.putPage('people/to-delete', { + type: 'person', + title: 'Delete Me', + compiled_truth: 'bye', + }); + await engine.deletePage('people/to-delete'); + const page = await engine.getPage('people/to-delete'); + expect(page).toBeNull(); + }); + + test('listPages returns pages', async () => { + const pages = await engine.listPages(); + expect(pages.length).toBeGreaterThan(0); + expect(pages.some(p => p.slug === 'people/john-doe')).toBe(true); + }); + + test('listPages filters by type', async () => { + await engine.putPage('companies/acme', { + type: 'company', + title: 'ACME Corp', + compiled_truth: 'A company.', + }); + const companies = await engine.listPages({ type: 'company' }); + expect(companies.every(p => p.type === 'company')).toBe(true); + expect(companies.some(p => p.slug === 'companies/acme')).toBe(true); + }); + + test('listPages filters by tag', async () => { + await engine.addTag('people/john-doe', 'vip'); + const tagged = await engine.listPages({ tag: 'vip' }); + expect(tagged.some(p => p.slug === 'people/john-doe')).toBe(true); + }); + + test('listPages filters by type + tag', async () => { + const results = await engine.listPages({ type: 'person', tag: 'vip' }); + expect(results.every(p => p.type === 'person')).toBe(true); + }); + + test('resolveSlugs finds exact match', async () => { + const slugs = await engine.resolveSlugs('people/john-doe'); + expect(slugs).toContain('people/john-doe'); + }); + + test('resolveSlugs finds partial match via LIKE', async () => { + const slugs = await engine.resolveSlugs('john'); + expect(slugs.length).toBeGreaterThan(0); + }); + + test('validateSlug rejects invalid slugs', async () => { + await expect(engine.putPage('../bad', { + type: 'person', + title: 'Bad', + compiled_truth: '', + })).rejects.toThrow('Invalid slug'); + }); + }); + + describe('FTS5 keyword search', () => { + test('searchKeyword finds pages by content', async () => { + await engine.putPage('test/searchable', { + type: 'concept', + title: 'Quantum Computing', + compiled_truth: 'Quantum computing uses qubits for parallel computation.', + }); + await engine.upsertChunks('test/searchable', [{ + chunk_index: 0, + chunk_text: 'Quantum computing uses qubits for parallel computation.', + chunk_source: 'compiled_truth', + }]); + + const results = await engine.searchKeyword('quantum computing'); + expect(results.length).toBeGreaterThan(0); + expect(results[0].slug).toBe('test/searchable'); + expect(results[0].score).toBeGreaterThan(0); + }); + + test('searchKeyword returns empty for no match', async () => { + const results = await engine.searchKeyword('xyznonexistent123'); + expect(results).toEqual([]); + }); + + test('searchKeyword handles empty query', async () => { + const results = await engine.searchKeyword(''); + expect(results).toEqual([]); + }); + }); + + describe('chunks', () => { + test('upsertChunks + getChunks round-trip', async () => { + await engine.upsertChunks('people/john-doe', [ + { chunk_index: 0, chunk_text: 'Chunk zero.', chunk_source: 'compiled_truth' }, + { chunk_index: 1, chunk_text: 'Chunk one.', chunk_source: 'timeline' }, + ]); + const chunks = await engine.getChunks('people/john-doe'); + expect(chunks).toHaveLength(2); + expect(chunks[0].chunk_text).toBe('Chunk zero.'); + expect(chunks[1].chunk_text).toBe('Chunk one.'); + }); + + test('upsertChunks replaces existing chunks', async () => { + await engine.upsertChunks('people/john-doe', [ + { chunk_index: 0, chunk_text: 'Replacement.', chunk_source: 'compiled_truth' }, + ]); + const chunks = await engine.getChunks('people/john-doe'); + expect(chunks).toHaveLength(1); + expect(chunks[0].chunk_text).toBe('Replacement.'); + }); + + test('deleteChunks removes all chunks', async () => { + await engine.deleteChunks('people/john-doe'); + const chunks = await engine.getChunks('people/john-doe'); + expect(chunks).toHaveLength(0); + }); + }); + + describe('links', () => { + test('addLink + getLinks', async () => { + await engine.putPage('test/link-from', { type: 'concept', title: 'From', compiled_truth: '' }); + await engine.putPage('test/link-to', { type: 'concept', title: 'To', compiled_truth: '' }); + await engine.addLink('test/link-from', 'test/link-to', 'related', 'knows'); + + const links = await engine.getLinks('test/link-from'); + expect(links).toHaveLength(1); + expect(links[0].to_slug).toBe('test/link-to'); + expect(links[0].link_type).toBe('knows'); + }); + + test('getBacklinks returns incoming links', async () => { + const backlinks = await engine.getBacklinks('test/link-to'); + expect(backlinks).toHaveLength(1); + expect(backlinks[0].from_slug).toBe('test/link-from'); + }); + + test('removeLink removes the link', async () => { + await engine.removeLink('test/link-from', 'test/link-to'); + const links = await engine.getLinks('test/link-from'); + expect(links).toHaveLength(0); + }); + + test('traverseGraph returns connected nodes', async () => { + await engine.addLink('test/link-from', 'test/link-to', 'related', 'knows'); + const graph = await engine.traverseGraph('test/link-from', 2); + expect(graph.length).toBeGreaterThan(0); + expect(graph[0].slug).toBe('test/link-from'); + expect(graph[0].depth).toBe(0); + }); + }); + + describe('tags', () => { + test('addTag + getTags', async () => { + await engine.addTag('people/john-doe', 'engineer'); + const tags = await engine.getTags('people/john-doe'); + expect(tags).toContain('engineer'); + }); + + test('removeTag', async () => { + await engine.removeTag('people/john-doe', 'engineer'); + const tags = await engine.getTags('people/john-doe'); + expect(tags).not.toContain('engineer'); + }); + + test('addTag is idempotent', async () => { + await engine.addTag('people/john-doe', 'test-tag'); + await engine.addTag('people/john-doe', 'test-tag'); + const tags = await engine.getTags('people/john-doe'); + expect(tags.filter(t => t === 'test-tag')).toHaveLength(1); + }); + }); + + describe('timeline', () => { + test('addTimelineEntry + getTimeline', async () => { + await engine.addTimelineEntry('people/john-doe', { + date: '2024-06-15', + source: 'manual', + summary: 'Joined company', + detail: 'Started as engineer', + }); + const timeline = await engine.getTimeline('people/john-doe'); + expect(timeline.length).toBeGreaterThan(0); + expect(timeline[0].summary).toBe('Joined company'); + }); + + test('getTimeline filters by date range', async () => { + await engine.addTimelineEntry('people/john-doe', { + date: '2023-01-01', + summary: 'Old event', + }); + const filtered = await engine.getTimeline('people/john-doe', { after: '2024-01-01' }); + expect(filtered.every(e => e.date >= '2024-01-01')).toBe(true); + }); + }); + + describe('raw data', () => { + test('putRawData + getRawData', async () => { + await engine.putRawData('people/john-doe', 'linkedin', { url: 'https://linkedin.com/in/john' }); + const data = await engine.getRawData('people/john-doe', 'linkedin'); + expect(data).toHaveLength(1); + expect((data[0].data as { url: string }).url).toBe('https://linkedin.com/in/john'); + }); + + test('putRawData upserts on conflict', async () => { + await engine.putRawData('people/john-doe', 'linkedin', { url: 'https://linkedin.com/in/john-v2' }); + const data = await engine.getRawData('people/john-doe', 'linkedin'); + expect(data).toHaveLength(1); + expect((data[0].data as { url: string }).url).toBe('https://linkedin.com/in/john-v2'); + }); + + test('getRawData without source returns all', async () => { + await engine.putRawData('people/john-doe', 'github', { user: 'johndoe' }); + const all = await engine.getRawData('people/john-doe'); + expect(all.length).toBeGreaterThanOrEqual(2); + }); + }); + + describe('versions', () => { + test('createVersion + getVersions', async () => { + const version = await engine.createVersion('people/john-doe'); + expect(version.id).toBeGreaterThan(0); + expect(version.compiled_truth).toBeDefined(); + + const versions = await engine.getVersions('people/john-doe'); + expect(versions.length).toBeGreaterThan(0); + }); + + test('revertToVersion restores content', async () => { + const before = await engine.getPage('people/john-doe'); + const version = await engine.createVersion('people/john-doe'); + + await engine.putPage('people/john-doe', { + type: 'person', + title: 'John Doe MODIFIED', + compiled_truth: 'modified content', + }); + + await engine.revertToVersion('people/john-doe', version.id); + const after = await engine.getPage('people/john-doe'); + expect(after!.compiled_truth).toBe(before!.compiled_truth); + }); + }); + + describe('stats + health', () => { + test('getStats returns counts', async () => { + const stats = await engine.getStats(); + expect(stats.page_count).toBeGreaterThan(0); + expect(stats.pages_by_type).toBeDefined(); + expect(typeof stats.page_count).toBe('number'); + }); + + test('getHealth returns health metrics', async () => { + const health = await engine.getHealth(); + expect(health.page_count).toBeGreaterThan(0); + expect(typeof health.embed_coverage).toBe('number'); + expect(typeof health.orphan_pages).toBe('number'); + }); + }); + + describe('ingest log', () => { + test('logIngest + getIngestLog', async () => { + await engine.logIngest({ + source_type: 'directory', + source_ref: '/test/dir', + pages_updated: ['people/john-doe'], + summary: 'Test ingest', + }); + const log = await engine.getIngestLog({ limit: 10 }); + expect(log.length).toBeGreaterThan(0); + expect(log[0].source_type).toBe('directory'); + expect(log[0].pages_updated).toContain('people/john-doe'); + }); + }); + + describe('sync', () => { + test('updateSlug renames a page', async () => { + await engine.putPage('test/old-slug', { type: 'concept', title: 'Rename Me', compiled_truth: '' }); + await engine.updateSlug('test/old-slug', 'test/new-slug'); + expect(await engine.getPage('test/old-slug')).toBeNull(); + expect(await engine.getPage('test/new-slug')).not.toBeNull(); + }); + }); + + describe('config', () => { + test('getConfig returns seeded values', async () => { + const version = await engine.getConfig('version'); + expect(version).toBe('1'); + }); + + test('setConfig + getConfig round-trip', async () => { + await engine.setConfig('test_key', 'test_value'); + const value = await engine.getConfig('test_key'); + expect(value).toBe('test_value'); + }); + + test('setConfig upserts', async () => { + await engine.setConfig('test_key', 'updated_value'); + const value = await engine.getConfig('test_key'); + expect(value).toBe('updated_value'); + }); + + test('getConfig returns null for missing key', async () => { + const value = await engine.getConfig('nonexistent_key'); + expect(value).toBeNull(); + }); + }); + + describe('transaction', () => { + test('commits on success', async () => { + await engine.transaction(async (eng) => { + await eng.putPage('test/tx-page', { type: 'concept', title: 'TX Page', compiled_truth: '' }); + }); + const page = await engine.getPage('test/tx-page'); + expect(page).not.toBeNull(); + }); + + test('rolls back on error', async () => { + try { + await engine.transaction(async (eng) => { + await eng.putPage('test/tx-rollback', { type: 'concept', title: 'Rollback', compiled_truth: '' }); + throw new Error('Intentional rollback'); + }); + } catch {} + const page = await engine.getPage('test/tx-rollback'); + expect(page).toBeNull(); + }); + }); +}); diff --git a/test/utils.test.ts b/test/utils.test.ts new file mode 100644 index 0000000..664fc65 --- /dev/null +++ b/test/utils.test.ts @@ -0,0 +1,58 @@ +import { describe, test, expect } from 'bun:test'; +import { validateSlug, contentHash } from '../src/core/utils.ts'; + +describe('validateSlug', () => { + test('accepts valid slugs', () => { + expect(() => validateSlug('people/john-doe')).not.toThrow(); + expect(() => validateSlug('companies/acme')).not.toThrow(); + expect(() => validateSlug('a')).not.toThrow(); + expect(() => validateSlug('a/b/c')).not.toThrow(); + expect(() => validateSlug('test_slug-123')).not.toThrow(); + }); + + test('rejects empty slug', () => { + expect(() => validateSlug('')).toThrow('Invalid slug'); + }); + + test('rejects path traversal', () => { + expect(() => validateSlug('../etc/passwd')).toThrow('Invalid slug'); + expect(() => validateSlug('people/../secrets')).toThrow('Invalid slug'); + }); + + test('rejects leading slash', () => { + expect(() => validateSlug('/people/john')).toThrow('Invalid slug'); + }); + + test('rejects uppercase', () => { + expect(() => validateSlug('People/John')).toThrow('Invalid slug'); + }); + + test('rejects trailing slash', () => { + expect(() => validateSlug('people/john/')).toThrow('Invalid slug'); + expect(() => validateSlug('a/')).toThrow('Invalid slug'); + }); + + test('rejects consecutive slashes', () => { + expect(() => validateSlug('people//john')).toThrow('Invalid slug'); + }); +}); + +describe('contentHash', () => { + test('returns consistent SHA-256 for same input', () => { + const h1 = contentHash('truth', 'timeline'); + const h2 = contentHash('truth', 'timeline'); + expect(h1).toBe(h2); + expect(h1).toHaveLength(64); + }); + + test('different input produces different hash', () => { + const h1 = contentHash('a', 'b'); + const h2 = contentHash('c', 'd'); + expect(h1).not.toBe(h2); + }); + + test('handles empty strings', () => { + const h = contentHash('', ''); + expect(h).toHaveLength(64); + }); +});