Skip to content
24 changes: 22 additions & 2 deletions src/git/fetch-source.ts
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,28 @@ type CloneResult = {
const patternHasGlob = (pattern: string) =>
pattern.includes("*") || pattern.includes("?") || pattern.includes("[");

const normalizeSparsePatterns = (include?: string[]) =>
(include ?? []).map((pattern) => pattern.replace(/\\/g, "/")).filter(Boolean);
const expandBracePattern = (pattern: string): string[] => {
// Match patterns like **/*.{md,mdx,txt}
const braceMatch = pattern.match(/^(.*)\.{([^}]+)}(.*)$/);
if (!braceMatch) {
return [pattern];
}
const [, prefix, extensions, suffix] = braceMatch;
const extList = extensions.split(",").map((ext) => ext.trim());
Comment thread
fbosch marked this conversation as resolved.
Outdated
Comment thread
fbosch marked this conversation as resolved.
Outdated
return extList.map((ext) => `${prefix}.${ext}${suffix}`);
Comment thread
fbosch marked this conversation as resolved.
Outdated
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regex pattern ^(.*)\.{([^}]+)}(.*)$ only matches brace patterns that have a literal dot immediately before the opening brace (e.g., **/*.{md,txt}). Patterns without a dot before the brace (e.g., {dir1,dir2}/file.md or path/{a,b}/file.txt) will not be expanded and will be passed through unchanged to git sparse-checkout.

While this is appropriate for the primary use case of file extension patterns, it means users cannot use brace expansion for directory patterns. Consider documenting this limitation or adding a more flexible pattern matcher if directory-level brace expansion is a desired feature.

Suggested change
// Match patterns like **/*.{md,mdx,txt}
const braceMatch = pattern.match(/^(.*)\.{([^}]+)}(.*)$/);
if (!braceMatch) {
return [pattern];
}
const [, prefix, extensions, suffix] = braceMatch;
const extList = extensions.split(",").map((ext) => ext.trim());
return extList.map((ext) => `${prefix}.${ext}${suffix}`);
// Match brace patterns like **/*.{md,mdx,txt} or {dir1,dir2}/file.md
const braceMatch = pattern.match(/^(.*){([^}]+)}(.*)$/);
if (!braceMatch) {
return [pattern];
}
const [, prefix, values, suffix] = braceMatch;
const valueList = values.split(",").map((value) => value.trim());
return valueList.map((value) => `${prefix}${value}${suffix}`);

Copilot uses AI. Check for mistakes.
};
Comment thread
fbosch marked this conversation as resolved.
Comment thread
coderabbitai[bot] marked this conversation as resolved.

const normalizeSparsePatterns = (include?: string[]) => {
const patterns = include ?? [];
const expanded: string[] = [];
for (const pattern of patterns) {
const normalized = pattern.replace(/\\/g, "/");
if (!normalized) continue;
// Expand brace patterns for git sparse-checkout compatibility
expanded.push(...expandBracePattern(normalized));
Comment thread
fbosch marked this conversation as resolved.
}
return expanded;
};

const isDirectoryLiteral = (pattern: string) => pattern.endsWith("/");

Expand Down
60 changes: 60 additions & 0 deletions tests/integration-real-repos.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,63 @@ test("integration clears partial clone cache before sync", async (t) => {
await rm(tmpRoot, { recursive: true, force: true });
}
});

test("integration uses default include pattern without explicit config", async (t) => {
if (!shouldRun()) {
t.skip("Set DOCS_CACHE_INTEGRATION=1 to run integration tests");
return;
}
const tmpRoot = path.join(
tmpdir(),
`docs-cache-defaults-${Date.now().toString(36)}`,
);
const cacheDir = path.join(tmpRoot, ".docs");
const configPath = path.join(tmpRoot, "docs.config.json");
const repo = "https://github.com/glanceapp/glance.git";

await mkdir(tmpRoot, { recursive: true });
const config = {
$schema:
"https://raw.githubusercontent.com/fbosch/docs-cache/main/docs.config.schema.json",
sources: [
{
id: "glance",
repo,
// No include pattern specified - should use defaults
},
],
};
await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8");

try {
await runSync({
configPath,
cacheDirOverride: cacheDir,
json: false,
lockOnly: false,
offline: false,
failOnMiss: false,
});
const lockRaw = await readFile(
path.join(tmpRoot, DEFAULT_LOCK_FILENAME),
"utf8",
);
const lock = JSON.parse(lockRaw);
assert.ok(lock.sources.glance);
// The default pattern includes **/*.{md,mdx,markdown,mkd,txt,rst,adoc,asciidoc}
// glanceapp/glance has markdown files, so fileCount should be > 0
assert.ok(
lock.sources.glance.fileCount > 0,
`Expected files to be synced with default include pattern, got ${lock.sources.glance.fileCount}`,
);
// Verify that actual .md files were synced
const readmePath = path.join(cacheDir, "glance", "README.md");
const readmeContent = await readFile(readmePath, "utf8");
assert.ok(
readmeContent.length > 0,
"Expected README.md to be synced and have content",
);
} finally {
await rm(tmpRoot, { recursive: true, force: true });
}
});
267 changes: 267 additions & 0 deletions tests/sparse-brace-expansion.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
import assert from "node:assert/strict";
import { createHash } from "node:crypto";
import { chmod, mkdir, readFile, rm, writeFile } from "node:fs/promises";
import { tmpdir } from "node:os";
import path from "node:path";
import { test } from "node:test";

import { runSync } from "../dist/api.mjs";

const hashRepoUrl = (repo) =>
createHash("sha256").update(repo).digest("hex").substring(0, 16);

const writeGitShim = async (binDir, logPath) => {
const scriptPath = path.join(
binDir,
process.platform === "win32" ? "git.js" : "git",
);
const payload = `#!/usr/bin/env node
const fs = require("node:fs");
const path = require("node:path");

const logPath = ${JSON.stringify(logPath)};
fs.appendFileSync(logPath,
JSON.stringify(process.argv.slice(2)) + "\\n",
"utf8",
);

const args = process.argv.slice(2);
const isWin = process.platform === "win32";
const normalize = (value) => (isWin ? value.toLowerCase() : value);

if (args.map(normalize).includes("ls-remote")) {
// Return a fake commit SHA
console.log("abc123def456789012345678901234567890abcd\\tHEAD");
process.exit(0);
}

if (args.map(normalize).includes("clone")) {
const outDir = args[args.length - 1];
fs.mkdirSync(outDir, { recursive: true });
}

if (args.map(normalize).includes("checkout")) {
process.exit(0);
}

process.exit(0);
`;
await writeFile(scriptPath, payload, "utf8");
if (process.platform !== "win32") {
await chmod(scriptPath, 0o755);
return;
}
const cmdPath = path.join(binDir, "git.cmd");
const cmdPayload = `@echo off
"${process.execPath}" "${scriptPath}" %*
`;
await writeFile(cmdPath, cmdPayload, "utf8");
};

test("sync expands brace patterns for git sparse-checkout", async () => {
const tmpRoot = path.join(
tmpdir(),
`docs-cache-brace-${Date.now().toString(36)}`,
);
const binDir = path.join(tmpRoot, "bin");
const logPath = path.join(tmpRoot, "git.log");
const cacheDir = path.join(tmpRoot, ".docs");
const configPath = path.join(tmpRoot, "docs.config.json");
const gitCacheRoot = path.join(tmpRoot, "git-cache");
const repo = "https://example.com/repo.git";
const repoHash = hashRepoUrl(repo);
const cachePath = path.join(gitCacheRoot, repoHash);

await mkdir(binDir, { recursive: true });
await mkdir(cachePath, { recursive: true });
await writeGitShim(binDir, logPath);
await writeFile(logPath, "", "utf8");

const config = {
$schema:
"https://raw.githubusercontent.com/fbosch/docs-cache/main/docs.config.schema.json",
defaults: {
allowHosts: ["example.com"],
},
sources: [
{
id: "test",
repo,
include: ["**/*.{md,mdx,txt}"],
},
],
};
await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8");

const previousPath = process.env.PATH ?? process.env.Path;
const previousPathExt = process.env.PATHEXT;
const previousGitDir = process.env.DOCS_CACHE_GIT_DIR;
const nextPath =
process.platform === "win32"
Comment thread
fbosch marked this conversation as resolved.
Outdated
? `${binDir};${previousPath ?? ""}`
: `${binDir}:${previousPath ?? ""}`;
const nextPathExt =
process.platform === "win32" ? ".CMD;.BAT;.EXE;.COM" : previousPathExt;

process.env.PATH = nextPath;
Comment thread
fbosch marked this conversation as resolved.
Outdated
process.env.Path = nextPath;
process.env.PATHEXT = nextPathExt;
process.env.DOCS_CACHE_GIT_DIR = gitCacheRoot;

try {
await runSync({
configPath,
cacheDirOverride: cacheDir,
json: false,
lockOnly: false,
offline: false,
failOnMiss: false,
});

const logRaw = await readFile(logPath, "utf8");
const lines = logRaw.trim().split("\n").filter(Boolean);
const sparseCheckoutCalls = lines.filter((line) => {
try {
const args = JSON.parse(line);
return args.includes("sparse-checkout");
} catch {
return false;
}
});

assert.ok(
sparseCheckoutCalls.length > 0,
"Expected sparse-checkout to be called",
);

// Check that brace pattern was expanded into separate patterns
const sparseArgs = sparseCheckoutCalls.map((call) => JSON.parse(call));
const hasExpandedPatterns = sparseArgs.some((args) => {
// Should have expanded **/*.{md,mdx,txt} into:
// **/*.md, **/*.mdx, **/*.txt
const patternIndex = args.indexOf("set");
Comment thread
fbosch marked this conversation as resolved.
Outdated
if (patternIndex === -1) return false;
const patterns = args.slice(patternIndex + 2); // skip "set" and "--no-cone"
return (
patterns.includes("**/*.md") &&
patterns.includes("**/*.mdx") &&
patterns.includes("**/*.txt")
);
});

assert.ok(
hasExpandedPatterns,
`Expected brace patterns to be expanded. Got: ${JSON.stringify(sparseArgs, null, 2)}`,
);
} finally {
process.env.PATH = previousPath;
process.env.Path = previousPath;
process.env.PATHEXT = previousPathExt;
process.env.DOCS_CACHE_GIT_DIR = previousGitDir;
await rm(tmpRoot, { recursive: true, force: true });
}
});
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

test("sync expands default brace pattern when no include specified", async () => {
const tmpRoot = path.join(
tmpdir(),
`docs-cache-default-brace-${Date.now().toString(36)}`,
);
const binDir = path.join(tmpRoot, "bin");
const logPath = path.join(tmpRoot, "git.log");
const cacheDir = path.join(tmpRoot, ".docs");
const configPath = path.join(tmpRoot, "docs.config.json");
const gitCacheRoot = path.join(tmpRoot, "git-cache");
const repo = "https://example.com/repo.git";
const repoHash = hashRepoUrl(repo);
const cachePath = path.join(gitCacheRoot, repoHash);

await mkdir(binDir, { recursive: true });
await mkdir(cachePath, { recursive: true });
await writeGitShim(binDir, logPath);
await writeFile(logPath, "", "utf8");

const config = {
$schema:
"https://raw.githubusercontent.com/fbosch/docs-cache/main/docs.config.schema.json",
defaults: {
allowHosts: ["example.com"],
},
sources: [
{
id: "test",
repo,
// No include - should use default pattern with brace expansion
},
],
};
await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8");

const previousPath = process.env.PATH ?? process.env.Path;
const previousPathExt = process.env.PATHEXT;
const previousGitDir = process.env.DOCS_CACHE_GIT_DIR;
Copy link

Copilot AI Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PATH delimiter is hardcoded as : here, but it should use path.delimiter to be cross-platform compatible. While the code correctly checks for Windows (process.platform === "win32"), on non-Windows systems it assumes the delimiter is always :, which may not be true for all Unix-like systems.

For consistency with similar code in tests/fetch-source-file-protocol.test.js:101, use path.delimiter instead of hardcoding :.

Suggested change
const previousGitDir = process.env.DOCS_CACHE_GIT_DIR;
process.platform === "win32"
? binDir
: `${binDir}${path.delimiter}${previousPath ?? ""}`;

Copilot uses AI. Check for mistakes.
const nextPath =
process.platform === "win32"
? `${binDir};${previousPath ?? ""}`
: `${binDir}:${previousPath ?? ""}`;
const nextPathExt =
process.platform === "win32" ? ".CMD;.BAT;.EXE;.COM" : previousPathExt;
Comment thread
fbosch marked this conversation as resolved.
Outdated

process.env.PATH = nextPath;
process.env.Path = nextPath;
process.env.PATHEXT = nextPathExt;
process.env.DOCS_CACHE_GIT_DIR = gitCacheRoot;

try {
await runSync({
configPath,
cacheDirOverride: cacheDir,
json: false,
lockOnly: false,
offline: false,
failOnMiss: false,
});

const logRaw = await readFile(logPath, "utf8");
const lines = logRaw.trim().split("\n").filter(Boolean);
const sparseCheckoutCalls = lines.filter((line) => {
try {
const args = JSON.parse(line);
return args.includes("sparse-checkout");
} catch {
return false;
}
});

assert.ok(
sparseCheckoutCalls.length > 0,
"Expected sparse-checkout to be called with default patterns",
);

// Check that default brace pattern was expanded
const sparseArgs = sparseCheckoutCalls.map((call) => JSON.parse(call));
Comment thread
fbosch marked this conversation as resolved.
const hasExpandedDefaults = sparseArgs.some((args) => {
const patternIndex = args.indexOf("set");
if (patternIndex === -1) return false;
const patterns = args.slice(patternIndex + 2);
// Default is **/*.{md,mdx,markdown,mkd,txt,rst,adoc,asciidoc}
return (
patterns.includes("**/*.md") &&
patterns.includes("**/*.mdx") &&
patterns.includes("**/*.markdown") &&
patterns.includes("**/*.txt")
);
});

assert.ok(
hasExpandedDefaults,
`Expected default brace patterns to be expanded. Got: ${JSON.stringify(sparseArgs, null, 2)}`,
);
} finally {
process.env.PATH = previousPath;
process.env.Path = previousPath;
process.env.PATHEXT = previousPathExt;
process.env.DOCS_CACHE_GIT_DIR = previousGitDir;
await rm(tmpRoot, { recursive: true, force: true });
}
});
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Loading