diff --git a/src/cli/index.ts b/src/cli/index.ts index 5453cf7..9fae9d5 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -4,7 +4,7 @@ import pc from "picocolors"; import { ExitCode } from "./exit-code"; import { parseArgs } from "./parse-args"; import type { CliCommand } from "./types"; -import { setSilentMode, symbols, ui } from "./ui"; +import { setSilentMode, setVerboseMode, symbols, ui } from "./ui"; export const CLI_NAME = "docs-cache"; @@ -23,18 +23,22 @@ Commands: init Create a new config interactively Global options: - --source (add only) - --target (add only) --config --cache-dir --offline --fail-on-miss --lock-only - --target-dir (add only) --concurrency --json --timeout-ms --silent + --verbose + +Add options: + --source + --target + --target-dir + --id `; const printHelp = () => { @@ -45,70 +49,20 @@ const printError = (message: string) => { process.stderr.write(`${symbols.error} ${message}\n`); }; -const parseAddEntries = (rawArgs: string[]) => { - const commandIndex = rawArgs.findIndex((arg) => !arg.startsWith("-")); - const tail = commandIndex === -1 ? [] : rawArgs.slice(commandIndex + 1); - const entries: Array<{ repo: string; targetDir?: string }> = []; - let lastIndex = -1; - const skipNextFor = new Set([ - "--config", - "--cache-dir", - "--concurrency", - "--timeout-ms", - ]); - for (let index = 0; index < tail.length; index += 1) { - const arg = tail[index]; - if (arg === "--source") { - const next = tail[index + 1]; - if (!next || next.startsWith("-")) { - throw new Error("--source expects a value."); - } - entries.push({ repo: next }); - lastIndex = entries.length - 1; - index += 1; - continue; - } - if (arg === "--target" || arg === "--target-dir") { - const next = tail[index + 1]; - if (!next || next.startsWith("-")) { - throw new Error("--target expects a value."); - } - if (lastIndex === -1) { - throw new Error("--target must follow a --source entry."); - } - entries[lastIndex].targetDir = next; - index += 1; - continue; - } - if (skipNextFor.has(arg)) { - index += 1; - continue; - } - if (arg.startsWith("--")) { - continue; - } - entries.push({ repo: arg }); - lastIndex = entries.length - 1; - } - return entries; -}; - -const runCommand = async (parsed: CliCommand, rawArgs: string[]) => { +const runCommand = async (parsed: CliCommand) => { const command = parsed.command; const options = parsed.options; - const positionals = parsed.args; if (command === "add") { const { addSources } = await import("../add"); const { runSync } = await import("../sync"); - const entries = parseAddEntries(rawArgs); - if (entries.length === 0) { + if (parsed.entries.length === 0) { throw new Error( "Usage: docs-cache add [--source --target ] ", ); } const result = await addSources({ configPath: options.config, - entries, + entries: parsed.entries, }); if (!options.offline) { await runSync({ @@ -120,6 +74,7 @@ const runCommand = async (parsed: CliCommand, rawArgs: string[]) => { failOnMiss: options.failOnMiss, sourceFilter: result.sources.map((source) => source.id), timeoutMs: options.timeoutMs, + verbose: options.verbose, }); } else if (!options.json) { ui.line(`${symbols.warn} Offline: skipped sync`); @@ -159,12 +114,12 @@ const runCommand = async (parsed: CliCommand, rawArgs: string[]) => { if (command === "remove") { const { removeSources } = await import("../remove"); const { pruneCache } = await import("../prune"); - if (positionals.length === 0) { + if (parsed.ids.length === 0) { throw new Error("Usage: docs-cache remove "); } const result = await removeSources({ configPath: options.config, - ids: positionals, + ids: parsed.ids, }); if (options.json) { process.stdout.write(`${JSON.stringify(result, null, 2)}\n`); @@ -287,6 +242,7 @@ const runCommand = async (parsed: CliCommand, rawArgs: string[]) => { offline: options.offline, failOnMiss: options.failOnMiss, timeoutMs: options.timeoutMs, + verbose: options.verbose, }); if (options.json) { process.stdout.write(`${JSON.stringify(plan, null, 2)}\n`); @@ -347,10 +303,10 @@ export async function main(): Promise { process.on("unhandledRejection", errorHandler); const parsed = parseArgs(); - const _rawArgs = parsed.rawArgs; // Set silent mode if the flag is present setSilentMode(parsed.options.silent); + setVerboseMode(parsed.options.verbose); if (parsed.help) { printHelp(); @@ -372,13 +328,7 @@ export async function main(): Promise { process.exit(ExitCode.InvalidArgument); } - if (parsed.command !== "add" && parsed.options.targetDir) { - printError(`${CLI_NAME}: --target-dir is only valid for add.`); - printHelp(); - process.exit(ExitCode.InvalidArgument); - } - - await runCommand(parsed.parsed, parsed.rawArgs); + await runCommand(parsed.parsed); } catch (error) { errorHandler(error as Error); } diff --git a/src/cli/parse-args.ts b/src/cli/parse-args.ts index ecd090d..a26fd6b 100644 --- a/src/cli/parse-args.ts +++ b/src/cli/parse-args.ts @@ -2,7 +2,7 @@ import process from "node:process"; import cac from "cac"; import { ExitCode } from "./exit-code"; -import type { CliCommand, CliOptions } from "./types"; +import type { AddEntry, CliCommand, CliOptions } from "./types"; const COMMANDS = [ "add", @@ -26,28 +26,180 @@ export type ParsedArgs = { parsed: CliCommand; }; +const ADD_ONLY_OPTIONS = new Set([ + "--source", + "--target", + "--target-dir", + "--id", +]); +const POSITIONAL_SKIP_OPTIONS = new Set([ + "--config", + "--cache-dir", + "--concurrency", + "--timeout-ms", +]); + +const parseAddEntries = (rawArgs: string[]): AddEntry[] => { + const commandIndex = rawArgs.findIndex((arg) => !arg.startsWith("-")); + const tail = commandIndex === -1 ? [] : rawArgs.slice(commandIndex + 1); + const entries: AddEntry[] = []; + let lastIndex = -1; + let pendingId: string | null = null; + let lastWasRepoAdded = false; + const skipNextFor = new Set([ + "--config", + "--cache-dir", + "--concurrency", + "--timeout-ms", + ]); + for (let index = 0; index < tail.length; index += 1) { + const arg = tail[index]; + if (arg === "--id" || arg.startsWith("--id=")) { + const rawValue = arg === "--id" ? tail[index + 1] : arg.slice(5); + if (!rawValue || rawValue.startsWith("-")) { + throw new Error("--id expects a value."); + } + if (arg === "--id") { + index += 1; + } + if ( + lastWasRepoAdded && + lastIndex !== -1 && + entries[lastIndex]?.id === undefined && + pendingId === null + ) { + entries[lastIndex].id = rawValue; + lastWasRepoAdded = false; + continue; + } + if (pendingId !== null) { + throw new Error("--id must be followed by a source."); + } + pendingId = rawValue; + lastWasRepoAdded = false; + continue; + } + if (arg === "--source") { + const next = tail[index + 1]; + if (!next || next.startsWith("-")) { + throw new Error("--source expects a value."); + } + entries.push({ repo: next, ...(pendingId ? { id: pendingId } : {}) }); + lastIndex = entries.length - 1; + pendingId = null; + lastWasRepoAdded = true; + index += 1; + continue; + } + if (arg === "--target" || arg === "--target-dir") { + const next = tail[index + 1]; + if (!next || next.startsWith("-")) { + throw new Error("--target expects a value."); + } + if (lastIndex === -1) { + throw new Error("--target must follow a --source entry."); + } + entries[lastIndex].targetDir = next; + index += 1; + lastWasRepoAdded = false; + continue; + } + if (skipNextFor.has(arg)) { + index += 1; + lastWasRepoAdded = false; + continue; + } + if (arg.startsWith("--")) { + lastWasRepoAdded = false; + continue; + } + entries.push({ repo: arg, ...(pendingId ? { id: pendingId } : {}) }); + lastIndex = entries.length - 1; + pendingId = null; + lastWasRepoAdded = true; + } + if (pendingId !== null) { + throw new Error("--id must be followed by a source."); + } + return entries; +}; + +const parsePositionals = (rawArgs: string[]) => { + const commandIndex = rawArgs.findIndex((arg) => !arg.startsWith("-")); + const tail = commandIndex === -1 ? [] : rawArgs.slice(commandIndex + 1); + const positionals: string[] = []; + for (let index = 0; index < tail.length; index += 1) { + const arg = tail[index]; + if (POSITIONAL_SKIP_OPTIONS.has(arg)) { + index += 1; + continue; + } + if (arg.startsWith("--")) { + continue; + } + positionals.push(arg); + } + return positionals; +}; + +const assertAddOnlyOptions = (command: Command | null, rawArgs: string[]) => { + if (command === "add") { + return; + } + for (const arg of rawArgs) { + if (ADD_ONLY_OPTIONS.has(arg)) { + throw new Error(`${arg} is only valid for add.`); + } + if ( + arg.startsWith("--id=") || + arg.startsWith("--source=") || + arg.startsWith("--target=") || + arg.startsWith("--target-dir=") + ) { + throw new Error(`${arg.split("=")[0]} is only valid for add.`); + } + } +}; + export const parseArgs = (argv = process.argv): ParsedArgs => { try { const cli = cac("docs-cache"); cli - .option("--source ", "Source repo (add only)") - .option("--target ", "Target directory for source (add only)") .option("--config ", "Path to config file") .option("--cache-dir ", "Override cache directory") .option("--offline", "Disable network access") .option("--fail-on-miss", "Fail when required sources are missing") .option("--lock-only", "Update lock without materializing files") .option("--prune", "Prune cache on remove") - .option("--target-dir ", "Target directory for add") .option("--concurrency ", "Concurrency limit") .option("--json", "Output JSON") .option("--timeout-ms ", "Network timeout in milliseconds") .option("--silent", "Suppress non-error output") + .option("--verbose", "Enable verbose logging") .help(); + cli + .command("add [repo...]", "Add sources to the config") + .option("--source ", "Source repo") + .option("--target ", "Target directory for source") + .option("--target-dir ", "Target directory for source") + .option("--id ", "Source id"); + + cli.command("remove ", "Remove sources from the config and targets"); + cli.command("sync", "Synchronize cache with config"); + cli.command("status", "Show cache status"); + cli.command("clean", "Remove project cache"); + cli.command("clean-cache", "Clear global git cache"); + cli.command("prune", "Remove unused data"); + cli.command("verify", "Validate cache integrity"); + cli.command("init", "Create a new config interactively"); + const result = cli.parse(argv, { run: false }); - const command = result.args[0] as Command | undefined; + const rawArgs = argv.slice(2); + const commandIndex = rawArgs.findIndex((arg) => !arg.startsWith("-")); + const command = + commandIndex === -1 ? undefined : (rawArgs[commandIndex] as Command); if (command && !COMMANDS.includes(command)) { throw new Error(`Unknown command '${command}'.`); } @@ -59,7 +211,6 @@ export const parseArgs = (argv = process.argv): ParsedArgs => { failOnMiss: Boolean(result.options.failOnMiss), lockOnly: Boolean(result.options.lockOnly), prune: Boolean(result.options.prune), - targetDir: result.options.targetDir, concurrency: result.options.concurrency ? Number(result.options.concurrency) : undefined, @@ -68,6 +219,7 @@ export const parseArgs = (argv = process.argv): ParsedArgs => { ? Number(result.options.timeoutMs) : undefined, silent: Boolean(result.options.silent), + verbose: Boolean(result.options.verbose), }; if (options.concurrency !== undefined && options.concurrency < 1) { @@ -77,18 +229,63 @@ export const parseArgs = (argv = process.argv): ParsedArgs => { throw new Error("--timeout-ms must be a positive number."); } - const rawArgs = argv.slice(2); + assertAddOnlyOptions(command ?? null, rawArgs); + let addEntries: AddEntry[] | null = null; + const positionals = (() => { + switch (command ?? null) { + case "add": + addEntries = parseAddEntries(rawArgs); + return addEntries.map((entry) => entry.repo); + case "remove": + return parsePositionals(rawArgs); + default: + return parsePositionals(rawArgs); + } + })(); + let parsed: CliCommand; + switch (command ?? null) { + case "add": + parsed = { + command: "add", + entries: addEntries ?? parseAddEntries(rawArgs), + options, + }; + break; + case "remove": + parsed = { command: "remove", ids: positionals, options }; + break; + case "sync": + parsed = { command: "sync", options }; + break; + case "status": + parsed = { command: "status", options }; + break; + case "clean": + parsed = { command: "clean", options }; + break; + case "clean-cache": + parsed = { command: "clean-cache", options }; + break; + case "prune": + parsed = { command: "prune", options }; + break; + case "verify": + parsed = { command: "verify", options }; + break; + case "init": + parsed = { command: "init", options }; + break; + default: + parsed = { command: null, options }; + break; + } return { command: command ?? null, options, - positionals: result.args.slice(1), + positionals, rawArgs, help: Boolean(result.options.help), - parsed: { - command: command ?? null, - args: result.args.slice(1), - options, - }, + parsed, }; } catch (error) { const message = error instanceof Error ? error.message : String(error); diff --git a/src/cli/types.ts b/src/cli/types.ts index b415f94..b4a0621 100644 --- a/src/cli/types.ts +++ b/src/cli/types.ts @@ -5,21 +5,27 @@ export type CliOptions = { failOnMiss: boolean; lockOnly: boolean; prune: boolean; - targetDir?: string; concurrency?: number; json: boolean; timeoutMs?: number; silent: boolean; + verbose: boolean; +}; + +export type AddEntry = { + id?: string; + repo: string; + targetDir?: string; }; export type CliCommand = - | { command: "add"; args: string[]; options: CliOptions } - | { command: "remove"; args: string[]; options: CliOptions } - | { command: "sync"; args: string[]; options: CliOptions } - | { command: "status"; args: string[]; options: CliOptions } - | { command: "clean"; args: string[]; options: CliOptions } - | { command: "clean-cache"; args: string[]; options: CliOptions } - | { command: "prune"; args: string[]; options: CliOptions } - | { command: "verify"; args: string[]; options: CliOptions } - | { command: "init"; args: string[]; options: CliOptions } - | { command: null; args: string[]; options: CliOptions }; + | { command: "add"; entries: AddEntry[]; options: CliOptions } + | { command: "remove"; ids: string[]; options: CliOptions } + | { command: "sync"; options: CliOptions } + | { command: "status"; options: CliOptions } + | { command: "clean"; options: CliOptions } + | { command: "clean-cache"; options: CliOptions } + | { command: "prune"; options: CliOptions } + | { command: "verify"; options: CliOptions } + | { command: "init"; options: CliOptions } + | { command: null; options: CliOptions }; diff --git a/src/cli/ui.ts b/src/cli/ui.ts index 23d2871..76a09e5 100644 --- a/src/cli/ui.ts +++ b/src/cli/ui.ts @@ -10,11 +10,16 @@ export const symbols = { }; let _silentMode = false; +let _verboseMode = false; export const setSilentMode = (silent: boolean) => { _silentMode = silent; }; +export const setVerboseMode = (verbose: boolean) => { + _verboseMode = verbose; +}; + export const ui = { // Formatters path: (value: string) => { @@ -54,4 +59,9 @@ export const ui = { ` ${icon} ${action} ${pc.bold(subject)}${details ? ` ${pc.dim(details)}` : ""}\n`, ); }, + + debug: (text: string) => { + if (_silentMode || !_verboseMode) return; + process.stdout.write(`${pc.dim("•")} ${pc.dim(text)}\n`); + }, }; diff --git a/src/git/fetch-source.ts b/src/git/fetch-source.ts index ac1d183..29d9acd 100644 --- a/src/git/fetch-source.ts +++ b/src/git/fetch-source.ts @@ -1,10 +1,8 @@ -import { execFile } from "node:child_process"; import { createHash } from "node:crypto"; import { mkdir, mkdtemp, readFile, rm } from "node:fs/promises"; import { tmpdir } from "node:os"; import path from "node:path"; import { pathToFileURL } from "node:url"; -import { promisify } from "node:util"; import { execa } from "execa"; @@ -12,8 +10,6 @@ import { getErrnoCode } from "../errors"; import { assertSafeSourceId } from "../source-id"; import { exists, resolveGitCacheDir } from "./cache-dir"; -const execFileAsync = promisify(execFile); - const DEFAULT_TIMEOUT_MS = 120000; // 120 seconds (2 minutes) const DEFAULT_GIT_DEPTH = 1; const DEFAULT_RM_RETRIES = 3; @@ -21,7 +17,12 @@ const DEFAULT_RM_BACKOFF_MS = 100; const git = async ( args: string[], - options?: { cwd?: string; timeoutMs?: number; allowFileProtocol?: boolean }, + options?: { + cwd?: string; + timeoutMs?: number; + allowFileProtocol?: boolean; + logger?: (message: string) => void; + }, ) => { const pathValue = process.env.PATH ?? process.env.Path; const pathExtValue = @@ -46,10 +47,15 @@ const git = async ( configs.push("-c", "protocol.file.allow=never"); } - await execa("git", [...configs, ...args], { + const commandArgs = [...configs, ...args]; + const commandLabel = `git ${commandArgs.join(" ")}`; + options?.logger?.(commandLabel); + const subprocess = execa("git", commandArgs, { cwd: options?.cwd, timeout: options?.timeoutMs ?? DEFAULT_TIMEOUT_MS, maxBuffer: 10 * 1024 * 1024, // 10MB buffer for large repos + stdout: "pipe", + stderr: "pipe", env: { ...process.env, ...(pathValue ? { PATH: pathValue, Path: pathValue } : {}), @@ -73,6 +79,22 @@ const git = async ( ...(process.platform === "win32" ? {} : { GIT_ASKPASS: "/bin/false" }), }, }); + if (options?.logger) { + const forward = (stream: NodeJS.ReadableStream | null) => { + if (!stream) return; + stream.on("data", (chunk) => { + const text = + chunk instanceof Buffer ? chunk.toString("utf8") : String(chunk); + for (const line of text.split(/\r?\n/)) { + if (!line) continue; + options.logger?.(`${commandLabel} | ${line}`); + } + }); + }; + forward(subprocess.stdout); + forward(subprocess.stderr); + } + await subprocess; }; const removeDir = async (dirPath: string, retries = DEFAULT_RM_RETRIES) => { @@ -134,12 +156,17 @@ const isPartialClone = async (repoPath: string) => { const ensureCommitAvailable = async ( repoPath: string, commit: string, - options?: { timeoutMs?: number; allowFileProtocol?: boolean }, + options?: { + timeoutMs?: number; + allowFileProtocol?: boolean; + logger?: (message: string) => void; + }, ) => { try { await git(["-C", repoPath, "cat-file", "-e", `${commit}^{commit}`], { timeoutMs: options?.timeoutMs, allowFileProtocol: options?.allowFileProtocol, + logger: options?.logger, }); return; } catch { @@ -148,6 +175,7 @@ const ensureCommitAvailable = async ( await git(["-C", repoPath, "fetch", "origin", commit], { timeoutMs: options?.timeoutMs, allowFileProtocol: options?.allowFileProtocol, + logger: options?.logger, }); }; @@ -159,6 +187,7 @@ type FetchParams = { cacheDir: string; include?: string[]; timeoutMs?: number; + logger?: (message: string) => void; }; type FetchResult = { @@ -167,32 +196,6 @@ type FetchResult = { fromCache: boolean; }; -const runGitArchive = async ( - repo: string, - resolvedCommit: string, - outDir: string, - timeoutMs?: number, -) => { - const archivePath = path.join(outDir, "archive.tar"); - await git( - [ - "archive", - "--remote", - repo, - "--format=tar", - "--output", - archivePath, - resolvedCommit, - ], - { timeoutMs }, - ); - await execFileAsync("tar", ["-xf", archivePath, "-C", outDir], { - timeout: timeoutMs ?? DEFAULT_TIMEOUT_MS, - maxBuffer: 1024 * 1024, - }); - await rm(archivePath, { force: true }); -}; - const isSparseEligible = (include?: string[]) => { if (!include || include.length === 0) { return false; @@ -243,15 +246,17 @@ const cloneRepo = async (params: FetchParams, outDir: string) => { } } cloneArgs.push(params.repo, outDir); - await git(cloneArgs, { timeoutMs: params.timeoutMs }); + await git(cloneArgs, { timeoutMs: params.timeoutMs, logger: params.logger }); await ensureCommitAvailable(outDir, params.resolvedCommit, { timeoutMs: params.timeoutMs, + logger: params.logger, }); if (useSparse) { const sparsePaths = extractSparsePaths(params.include); if (sparsePaths.length > 0) { await git(["-C", outDir, "sparse-checkout", "set", ...sparsePaths], { timeoutMs: params.timeoutMs, + logger: params.logger, }); } } @@ -259,66 +264,68 @@ const cloneRepo = async (params: FetchParams, outDir: string) => { ["-C", outDir, "checkout", "--quiet", "--detach", params.resolvedCommit], { timeoutMs: params.timeoutMs, + logger: params.logger, }, ); }; // Clone or update a repository using persistent cache -const cloneOrUpdateRepo = async (params: FetchParams, outDir: string) => { +const cloneOrUpdateRepo = async ( + params: FetchParams, + outDir: string, +): Promise<{ usedCache: boolean }> => { const cachePath = getPersistentCachePath(params.repo); const cacheExists = await exists(cachePath); + const cacheValid = cacheExists && (await isValidGitRepo(cachePath)); const isCommitRef = /^[0-9a-f]{7,40}$/i.test(params.ref); const useSparse = isSparseEligible(params.include); + let usedCache = cacheValid; const cacheRoot = resolveGitCacheDir(); - // Ensure the git cache directory exists await mkdir(cacheRoot, { recursive: true }); - // If cache exists and is valid, try to fetch and update - if (cacheExists && (await isValidGitRepo(cachePath))) { + if (cacheValid) { if (await isPartialClone(cachePath)) { await removeDir(cachePath); await cloneRepo(params, cachePath); + usedCache = false; } else { try { - // Fetch the specific ref or commit const fetchArgs = ["fetch", "origin"]; if (!isCommitRef) { - // Fetch specific branch/tag const refSpec = params.ref === "HEAD" ? "HEAD" : `${params.ref}:refs/remotes/origin/${params.ref}`; fetchArgs.push(refSpec, "--depth", String(DEFAULT_GIT_DEPTH)); } else { - // For commit refs, fetch the default branch and hope the commit is there fetchArgs.push("--depth", String(DEFAULT_GIT_DEPTH)); } await git(["-C", cachePath, ...fetchArgs], { timeoutMs: params.timeoutMs, + logger: params.logger, }); await ensureCommitAvailable(cachePath, params.resolvedCommit, { timeoutMs: params.timeoutMs, + logger: params.logger, }); } catch (_error) { - // Fetch failed, remove corrupt cache and re-clone await removeDir(cachePath); await cloneRepo(params, cachePath); + usedCache = false; } } } else { - // No cache or invalid - do fresh clone if (cacheExists) { await removeDir(cachePath); } await cloneRepo(params, cachePath); + usedCache = false; } - // Now copy from cache to outDir with the specific commit checked out await mkdir(outDir, { recursive: true }); - // Clone from local cache (much faster than from remote) const localCloneArgs = [ "clone", "--no-checkout", @@ -347,6 +354,7 @@ const cloneOrUpdateRepo = async (params: FetchParams, outDir: string) => { await git(localCloneArgs, { timeoutMs: params.timeoutMs, allowFileProtocol: true, + logger: params.logger, }); if (useSparse) { @@ -355,6 +363,7 @@ const cloneOrUpdateRepo = async (params: FetchParams, outDir: string) => { await git(["-C", outDir, "sparse-checkout", "set", ...sparsePaths], { timeoutMs: params.timeoutMs, allowFileProtocol: true, + logger: params.logger, }); } } @@ -362,6 +371,7 @@ const cloneOrUpdateRepo = async (params: FetchParams, outDir: string) => { await ensureCommitAvailable(outDir, params.resolvedCommit, { timeoutMs: params.timeoutMs, allowFileProtocol: true, + logger: params.logger, }); await git( @@ -369,57 +379,31 @@ const cloneOrUpdateRepo = async (params: FetchParams, outDir: string) => { { timeoutMs: params.timeoutMs, allowFileProtocol: true, + logger: params.logger, }, ); -}; -const archiveRepo = async (params: FetchParams) => { - const tempDir = await mkdtemp( - path.join(tmpdir(), `docs-cache-${params.sourceId}-`), - ); - try { - await runGitArchive( - params.repo, - params.resolvedCommit, - tempDir, - params.timeoutMs, - ); - return tempDir; - } catch (error) { - await removeDir(tempDir); - throw error; - } + return { usedCache }; }; export const fetchSource = async ( params: FetchParams, ): Promise => { assertSafeSourceId(params.sourceId, "sourceId"); + const tempDir = await mkdtemp( + path.join(tmpdir(), `docs-cache-${params.sourceId}-`), + ); try { - const archiveDir = await archiveRepo(params); + const { usedCache } = await cloneOrUpdateRepo(params, tempDir); return { - repoDir: archiveDir, + repoDir: tempDir, cleanup: async () => { - await removeDir(archiveDir); + await removeDir(tempDir); }, - fromCache: false, + fromCache: usedCache, }; - } catch { - const tempDir = await mkdtemp( - path.join(tmpdir(), `docs-cache-${params.sourceId}-`), - ); - try { - await cloneOrUpdateRepo(params, tempDir); - return { - repoDir: tempDir, - cleanup: async () => { - await removeDir(tempDir); - }, - fromCache: true, - }; - } catch (error) { - await removeDir(tempDir); - throw error; - } + } catch (error) { + await removeDir(tempDir); + throw error; } }; diff --git a/src/git/resolve-remote.ts b/src/git/resolve-remote.ts index d6ab631..f49fbcb 100644 --- a/src/git/resolve-remote.ts +++ b/src/git/resolve-remote.ts @@ -12,6 +12,7 @@ type ResolveRemoteParams = { ref: string; allowHosts: string[]; timeoutMs?: number; + logger?: (message: string) => void; }; const BLOCKED_PROTOCOLS = new Set(["file:", "ftp:", "data:", "javascript:"]); @@ -34,14 +35,9 @@ const assertAllowedProtocol = (repo: string) => { const parseRepoHost = (repo: string) => { assertAllowedProtocol(repo); - if (repo.startsWith("git@")) { - const atIndex = repo.indexOf("@"); - const colonIndex = repo.indexOf(":", atIndex + 1); - if (colonIndex === -1) { - return null; - } - const host = repo.slice(atIndex + 1, colonIndex); - return host || null; + const scpMatch = repo.match(/^[^@]+@([^:]+):/); + if (scpMatch) { + return scpMatch[1] || null; } try { @@ -64,7 +60,10 @@ export const enforceHostAllowlist = (repo: string, allowHosts: string[]) => { } const normalizedHost = host.toLowerCase(); const allowed = allowHosts.map((entry) => entry.toLowerCase()); - if (!allowed.includes(normalizedHost)) { + const isAllowed = allowed.some( + (entry) => normalizedHost === entry || normalizedHost.endsWith(`.${entry}`), + ); + if (!isAllowed) { throw new Error( `Host '${host}' is not in allowHosts for '${redactRepoUrl(repo)}'.`, ); @@ -83,6 +82,8 @@ export const parseLsRemote = (stdout: string) => { export const resolveRemoteCommit = async (params: ResolveRemoteParams) => { enforceHostAllowlist(params.repo, params.allowHosts); + const repoLabel = redactRepoUrl(params.repo); + params.logger?.(`git ls-remote ${repoLabel} ${params.ref}`); const { stdout } = await execFileAsync( "git", ["ls-remote", params.repo, params.ref], diff --git a/src/materialize.ts b/src/materialize.ts index 6e5fcee..e261c42 100644 --- a/src/materialize.ts +++ b/src/materialize.ts @@ -14,7 +14,7 @@ import os from "node:os"; import path from "node:path"; import { pipeline } from "node:stream/promises"; import fg from "fast-glob"; - +import { symbols, ui } from "./cli/ui"; import { getErrnoCode } from "./errors"; import { MANIFEST_FILENAME } from "./manifest"; import { getCacheLayout, toPosixPath } from "./paths"; @@ -30,6 +30,7 @@ type MaterializeParams = { maxFiles?: number; ignoreHidden?: boolean; unwrapSingleRootDir?: boolean; + json?: boolean; }; type ResolvedMaterializeParams = { @@ -42,6 +43,7 @@ type ResolvedMaterializeParams = { maxFiles?: number; ignoreHidden: boolean; unwrapSingleRootDir: boolean; + json: boolean; }; type ManifestStats = { @@ -52,6 +54,44 @@ type ManifestStats = { const normalizePath = (value: string) => toPosixPath(value); +const escapeParens = (value: string) => { + let output = ""; + let escaped = false; + for (const char of value) { + if (escaped) { + output += char; + escaped = false; + continue; + } + if (char === "\\") { + output += char; + escaped = true; + continue; + } + if (char === "(" || char === ")") { + output += `\\${char}`; + continue; + } + output += char; + } + return output; +}; + +const normalizeIncludePatterns = (patterns: string[]) => { + return patterns.map((pattern) => { + let decoded = pattern; + if (pattern.includes("%")) { + try { + decoded = decodeURIComponent(pattern); + } catch { + decoded = pattern; + } + } + const hasExtGlob = /(^|[^\\])[@!+?*]\(/.test(decoded); + return hasExtGlob ? decoded : escapeParens(decoded); + }); +}; + const STREAM_COPY_THRESHOLD_MB = Number( process.env.DOCS_CACHE_STREAM_THRESHOLD_MB ?? "2", ); @@ -132,6 +172,7 @@ const resolveMaterializeParams = ( exclude: params.exclude ?? [], ignoreHidden: params.ignoreHidden ?? false, unwrapSingleRootDir: params.unwrapSingleRootDir ?? false, + json: params.json ?? false, }); const acquireLock = async (lockPath: string, timeoutMs = 5000) => { @@ -194,13 +235,19 @@ export const materializeSource = async (params: MaterializeParams) => { ...(resolved.ignoreHidden ? [".*", "**/.*", "**/.*/**"] : []), ...resolved.exclude, ]; - const files = await fg(resolved.include, { + const includePatterns = normalizeIncludePatterns(resolved.include); + const files = await fg(includePatterns, { cwd: resolved.repoDir, ignore: ignorePatterns, dot: true, onlyFiles: true, followSymbolicLinks: false, }); + if (!resolved.json && includePatterns.length > 0 && files.length === 0) { + ui.line( + `${symbols.warn} No files matched include patterns for ${resolved.sourceId}: ${includePatterns.join(", ")}`, + ); + } const entries = files .map((relativePath) => ({ relativePath, @@ -393,7 +440,8 @@ export const computeManifestHash = async ( params: MaterializeParams, ): Promise => { assertSafeSourceId(params.sourceId, "sourceId"); - const files = await fg(params.include, { + const includePatterns = normalizeIncludePatterns(params.include); + const files = await fg(includePatterns, { cwd: params.repoDir, ignore: [ ".git/**", diff --git a/src/sync.ts b/src/sync.ts index d1a343a..60d7807 100644 --- a/src/sync.ts +++ b/src/sync.ts @@ -27,6 +27,7 @@ type SyncOptions = { lockOnly: boolean; offline: boolean; failOnMiss: boolean; + verbose?: boolean; concurrency?: number; sourceFilter?: string[]; timeoutMs?: number; @@ -204,6 +205,7 @@ export const getSyncPlan = async ( ref: source.ref, allowHosts: defaults.allowHosts, timeoutMs: options.timeoutMs, + logger: options.verbose && !options.json ? ui.debug : undefined, }); const upToDate = lockEntry?.resolvedCommit === resolved.resolvedCommit && @@ -399,6 +401,7 @@ export const runSync = async (options: SyncOptions, deps: SyncDeps = {}) => { cacheDir: plan.cacheDir, include: source.include ?? defaults.include, timeoutMs: options.timeoutMs, + logger: options.verbose && !options.json ? ui.debug : undefined, }); if (!options.json) { ui.step( @@ -453,6 +456,7 @@ export const runSync = async (options: SyncOptions, deps: SyncDeps = {}) => { maxFiles: source.maxFiles ?? defaults.maxFiles, ignoreHidden: source.ignoreHidden ?? defaults.ignoreHidden, unwrapSingleRootDir: source.unwrapSingleRootDir, + json: options.json, }); if (source.targetDir) { const resolvedTarget = resolveTargetDir( diff --git a/src/toc.ts b/src/toc.ts index 09c73db..2503e97 100644 --- a/src/toc.ts +++ b/src/toc.ts @@ -1,5 +1,6 @@ import { access, readFile, rm, writeFile } from "node:fs/promises"; import path from "node:path"; +import { symbols, ui } from "./cli/ui"; import type { DocsCacheResolvedSource, TocFormat } from "./config"; import type { DocsCacheLock } from "./lock"; import { DEFAULT_TOC_FILENAME, resolveTargetDir, toPosixPath } from "./paths"; @@ -232,7 +233,18 @@ export const writeToc = async (params: { // Missing TOC; regenerate below. } } + let existingContent: string | null = null; + try { + existingContent = await readFile(sourceTocPath, "utf8"); + } catch { + existingContent = null; + } const sourceTocContent = generateSourceToc(entry, tocFormat); + if (existingContent !== null && existingContent !== sourceTocContent) { + ui.line( + `${symbols.warn} Overwriting existing ${DEFAULT_TOC_FILENAME} for ${id}`, + ); + } await writeFile(sourceTocPath, sourceTocContent, "utf8"); } else { // Remove TOC.md if it exists but toc is disabled diff --git a/tests/cli-add.test.js b/tests/cli-add.test.js index 91da773..88f36c3 100644 --- a/tests/cli-add.test.js +++ b/tests/cli-add.test.js @@ -48,6 +48,51 @@ test("add supports multiple github shorthands", async () => { ); }); +test("add supports explicit ids", async () => { + const tmpPath = path.join(tmpdir(), `docs-config-${Date.now()}-id.json`); + await execFileAsync("node", [ + "bin/docs-cache.mjs", + "add", + "--offline", + "--id", + "ux-design", + "https://github.com/fbosch/docs-cache.git", + "--config", + tmpPath, + ]); + + const raw = await readFile(tmpPath, "utf8"); + const config = JSON.parse(raw); + assert.equal(config.sources[0].id, "ux-design"); + assert.equal( + config.sources[0].repo, + "https://github.com/fbosch/docs-cache.git", + ); +}); + +test("add supports explicit ids for multiple sources", async () => { + const tmpPath = path.join(tmpdir(), `docs-config-${Date.now()}-ids.json`); + await execFileAsync("node", [ + "bin/docs-cache.mjs", + "add", + "--offline", + "--id", + "ux-nixos", + "fbosch/nixos", + "--id", + "ux-dotfiles", + "fbosch/dotfiles", + "--config", + tmpPath, + ]); + + const raw = await readFile(tmpPath, "utf8"); + const config = JSON.parse(raw); + assert.equal(config.sources.length, 2); + assert.equal(config.sources[0].id, "ux-nixos"); + assert.equal(config.sources[1].id, "ux-dotfiles"); +}); + test("add skips existing sources", async () => { const tmpPath = path.join(tmpdir(), `docs-config-${Date.now()}-skip.json`); await execFileAsync("node", [ diff --git a/tests/cli-parse.test.js b/tests/cli-parse.test.js index 90551b5..c077bb9 100644 --- a/tests/cli-parse.test.js +++ b/tests/cli-parse.test.js @@ -117,6 +117,23 @@ test("parseArgs handles --silent flag", async (t) => { assert.equal(result.options.silent, true); }); +test("parseArgs handles --verbose flag", async (t) => { + const module = await loadCliModule(); + if (!module) { + t.skip("CLI not built yet"); + return; + } + const result = module.parseArgs([ + "node", + "docs-cache", + "status", + "--verbose", + ]); + + assert.equal(result.command, "status"); + assert.equal(result.options.verbose, true); +}); + test("parseArgs accepts clean-cache command", async (t) => { const module = await loadCliModule(); if (!module) { diff --git a/tests/resolve-remote.test.js b/tests/resolve-remote.test.js index 08dc003..2ee19f7 100644 --- a/tests/resolve-remote.test.js +++ b/tests/resolve-remote.test.js @@ -25,3 +25,20 @@ test("enforceHostAllowlist rejects disallowed host", () => { /allowHosts/i, ); }); + +test("enforceHostAllowlist allows subdomain for allowHosts entry", () => { + assert.doesNotThrow(() => + enforceHostAllowlist("https://acme.visualstudio.com/repo.git", [ + "visualstudio.com", + ]), + ); +}); + +test("enforceHostAllowlist allows scp-style ssh", () => { + assert.doesNotThrow(() => + enforceHostAllowlist( + "user@vs-ssh.visualstudio.com:v3/org/project/repo.git", + ["visualstudio.com"], + ), + ); +}); diff --git a/tests/sync-materialize.test.js b/tests/sync-materialize.test.js index 546def3..74d3417 100644 --- a/tests/sync-materialize.test.js +++ b/tests/sync-materialize.test.js @@ -82,6 +82,130 @@ test("sync materializes via mocked fetch", async () => { assert.equal(lock.sources.local.fileCount, 1); }); +test("sync warns when include matches no files", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-sync-miss-${Date.now().toString(36)}`, + ); + await mkdir(tmpRoot, { recursive: true }); + const cacheDir = path.join(tmpRoot, ".docs"); + const repoDir = path.join(tmpRoot, "repo"); + const configPath = path.join(tmpRoot, "docs.config.json"); + + await mkdir(repoDir, { recursive: true }); + await writeFile(path.join(repoDir, "README.md"), "hello", "utf8"); + + const config = { + $schema: + "https://raw.githubusercontent.com/fbosch/docs-cache/main/docs.config.schema.json", + sources: [ + { + id: "local", + repo: "https://example.com/repo.git", + include: ["docs/**/*.md"], + }, + ], + }; + await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8"); + + const warnings = []; + const originalWrite = process.stdout.write.bind(process.stdout); + process.stdout.write = (chunk) => { + const text = chunk instanceof Uint8Array ? chunk.toString() : chunk; + if (text.includes("No files matched include patterns")) { + warnings.push(text); + } + return originalWrite(chunk); + }; + + try { + await runSync( + { + configPath, + cacheDirOverride: cacheDir, + json: false, + lockOnly: false, + offline: false, + failOnMiss: false, + }, + { + resolveRemoteCommit: async () => ({ + repo: "https://example.com/repo.git", + ref: "HEAD", + resolvedCommit: "abc123", + }), + fetchSource: async () => ({ + repoDir, + cleanup: async () => undefined, + }), + }, + ); + } finally { + process.stdout.write = originalWrite; + } + + assert.ok(warnings.length > 0, "expected include mismatch warning"); +}); + +test("sync decodes percent-encoded include patterns", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-sync-encoded-${Date.now().toString(36)}`, + ); + await mkdir(tmpRoot, { recursive: true }); + const cacheDir = path.join(tmpRoot, ".docs"); + const repoDir = path.join(tmpRoot, "repo"); + const configPath = path.join(tmpRoot, "docs.config.json"); + + const encodedDir = "Design%20Notes"; + const decodedDir = "Design Notes"; + await mkdir(path.join(repoDir, decodedDir), { recursive: true }); + await writeFile(path.join(repoDir, decodedDir, "README.md"), "hello", "utf8"); + + const config = { + $schema: + "https://raw.githubusercontent.com/fbosch/docs-cache/main/docs.config.schema.json", + sources: [ + { + id: "local", + repo: "https://example.com/repo.git", + include: [`${encodedDir}/**/*.md`], + }, + ], + }; + await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8"); + + await runSync( + { + configPath, + cacheDirOverride: cacheDir, + json: false, + lockOnly: false, + offline: false, + failOnMiss: false, + }, + { + resolveRemoteCommit: async () => ({ + repo: "https://example.com/repo.git", + ref: "HEAD", + resolvedCommit: "abc123", + }), + fetchSource: async () => ({ + repoDir, + cleanup: async () => undefined, + }), + }, + ); + + const materializedPath = path.join( + cacheDir, + "local", + decodedDir, + "README.md", + ); + assert.equal(await readFile(materializedPath, "utf8"), "hello"); +}); + test("sync re-materializes when docs missing even if commit unchanged", async () => { const tmpRoot = path.join( tmpdir(), diff --git a/tests/sync-toc.test.js b/tests/sync-toc.test.js index 294db76..e8f10e2 100644 --- a/tests/sync-toc.test.js +++ b/tests/sync-toc.test.js @@ -422,3 +422,78 @@ test("sync does not rewrite TOC.md when commit matches", async () => { "TOC.md should not be rewritten when commit matches", ); }); + +test("sync warns when overwriting existing TOC.md", async () => { + const tmpRoot = path.join( + tmpdir(), + `docs-cache-toc-overwrite-${Date.now().toString(36)}`, + ); + await mkdir(tmpRoot, { recursive: true }); + const cacheDir = path.join(tmpRoot, ".docs"); + const repoDir = path.join(tmpRoot, "repo"); + const configPath = path.join(tmpRoot, "docs.config.json"); + + await mkdir(repoDir, { recursive: true }); + await writeFile(path.join(repoDir, "README.md"), "hello", "utf8"); + + const config = { + $schema: + "https://raw.githubusercontent.com/fbosch/docs-cache/main/docs.config.schema.json", + sources: [ + { + id: "local", + repo: "https://example.com/repo.git", + }, + ], + }; + await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8"); + + const warnings = []; + const originalWrite = process.stdout.write.bind(process.stdout); + process.stdout.write = (chunk) => { + const text = chunk instanceof Uint8Array ? chunk.toString() : chunk; + if (text.includes("Overwriting existing TOC.md")) { + warnings.push(text); + } + return originalWrite(chunk); + }; + + try { + await runSync( + { + configPath, + cacheDirOverride: cacheDir, + json: false, + lockOnly: false, + offline: false, + failOnMiss: false, + }, + { + resolveRemoteCommit: async () => ({ + repo: "https://example.com/repo.git", + ref: "HEAD", + resolvedCommit: "abc123", + }), + fetchSource: async () => ({ + repoDir, + cleanup: async () => undefined, + }), + materializeSource: async ({ cacheDir: cacheRoot, sourceId }) => { + const outDir = path.join(cacheRoot, sourceId); + await mkdir(outDir, { recursive: true }); + await writeFile( + path.join(outDir, ".manifest.jsonl"), + `${JSON.stringify({ path: "README.md", size: 5 })}\n`, + ); + await writeFile(path.join(outDir, "README.md"), "hello", "utf8"); + await writeFile(path.join(outDir, "TOC.md"), "old toc", "utf8"); + return { bytes: 5, fileCount: 1 }; + }, + }, + ); + } finally { + process.stdout.write = originalWrite; + } + + assert.ok(warnings.length > 0, "expected overwrite warning for TOC.md"); +});