Skip to content

Commit 0d70e98

Browse files
committed
feat(git): add partial clone support and retry rm
1 parent 25d2931 commit 0d70e98

4 files changed

Lines changed: 440 additions & 41 deletions

File tree

src/git/fetch-source.ts

Lines changed: 108 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { execFile } from "node:child_process";
22
import { createHash } from "node:crypto";
3-
import { mkdir, mkdtemp, rm } from "node:fs/promises";
3+
import { mkdir, mkdtemp, readFile, rm } from "node:fs/promises";
44
import { tmpdir } from "node:os";
55
import path from "node:path";
66
import { pathToFileURL } from "node:url";
@@ -12,6 +12,8 @@ import { exists, resolveGitCacheDir } from "./cache-dir";
1212
const execFileAsync = promisify(execFile);
1313

1414
const DEFAULT_TIMEOUT_MS = 120000; // 120 seconds (2 minutes)
15+
const DEFAULT_RM_RETRIES = 3;
16+
const DEFAULT_RM_BACKOFF_MS = 100;
1517

1618
const git = async (
1719
args: string[],
@@ -62,6 +64,26 @@ const git = async (
6264
});
6365
};
6466

67+
const removeDir = async (dirPath: string, retries = DEFAULT_RM_RETRIES) => {
68+
for (let attempt = 0; attempt <= retries; attempt += 1) {
69+
try {
70+
await rm(dirPath, { recursive: true, force: true });
71+
return;
72+
} catch (error) {
73+
const code = (error as NodeJS.ErrnoException).code;
74+
if (code !== "ENOTEMPTY" && code !== "EBUSY" && code !== "EPERM") {
75+
throw error;
76+
}
77+
if (attempt === retries) {
78+
throw error;
79+
}
80+
await new Promise((resolve) =>
81+
setTimeout(resolve, DEFAULT_RM_BACKOFF_MS * (attempt + 1)),
82+
);
83+
}
84+
}
85+
};
86+
6587
// Hash a repo URL to create a safe directory name
6688
const hashRepoUrl = (repo: string): string => {
6789
return createHash("sha256").update(repo).digest("hex").substring(0, 16);
@@ -83,6 +105,21 @@ const isValidGitRepo = async (repoPath: string): Promise<boolean> => {
83105
}
84106
};
85107

108+
const isPartialClone = async (repoPath: string) => {
109+
try {
110+
const configPath = path.join(repoPath, ".git", "config");
111+
const raw = await readFile(configPath, "utf8");
112+
const lower = raw.toLowerCase();
113+
return (
114+
lower.includes("partialclone") ||
115+
lower.includes("promisor") ||
116+
lower.includes("partialclonefilter")
117+
);
118+
} catch {
119+
return false;
120+
}
121+
};
122+
86123
type FetchParams = {
87124
sourceId: string;
88125
repo: string;
@@ -148,15 +185,18 @@ const extractSparsePaths = (include?: string[]) => {
148185
const cloneRepo = async (params: FetchParams, outDir: string) => {
149186
const isCommitRef = /^[0-9a-f]{7,40}$/i.test(params.ref);
150187
const useSparse = isSparseEligible(params.include);
151-
const cloneArgs = [
152-
"clone",
153-
"--no-checkout",
154-
"--filter=blob:none",
155-
"--depth",
156-
String(params.depth),
157-
"--recurse-submodules=no",
158-
"--no-tags",
159-
];
188+
const buildCloneArgs = () => {
189+
const cloneArgs = [
190+
"clone",
191+
"--no-checkout",
192+
"--depth",
193+
String(params.depth),
194+
"--recurse-submodules=no",
195+
"--no-tags",
196+
];
197+
return cloneArgs;
198+
};
199+
const cloneArgs = buildCloneArgs();
160200
if (useSparse) {
161201
cloneArgs.push("--sparse");
162202
}
@@ -197,33 +237,38 @@ const cloneOrUpdateRepo = async (params: FetchParams, outDir: string) => {
197237

198238
// If cache exists and is valid, try to fetch and update
199239
if (cacheExists && (await isValidGitRepo(cachePath))) {
200-
try {
201-
// Fetch the specific ref or commit
202-
const fetchArgs = ["fetch", "origin"];
203-
if (!isCommitRef) {
204-
// Fetch specific branch/tag
205-
const refSpec =
206-
params.ref === "HEAD"
207-
? "HEAD"
208-
: `${params.ref}:refs/remotes/origin/${params.ref}`;
209-
fetchArgs.push(refSpec, "--depth", String(params.depth));
210-
} else {
211-
// For commit refs, fetch the default branch and hope the commit is there
212-
fetchArgs.push("--depth", String(params.depth));
213-
}
214-
215-
await git(["-C", cachePath, ...fetchArgs], {
216-
timeoutMs: params.timeoutMs,
217-
});
218-
} catch (_error) {
219-
// Fetch failed, remove corrupt cache and re-clone
220-
await rm(cachePath, { recursive: true, force: true });
240+
if (await isPartialClone(cachePath)) {
241+
await removeDir(cachePath);
221242
await cloneRepo(params, cachePath);
243+
} else {
244+
try {
245+
// Fetch the specific ref or commit
246+
const fetchArgs = ["fetch", "origin"];
247+
if (!isCommitRef) {
248+
// Fetch specific branch/tag
249+
const refSpec =
250+
params.ref === "HEAD"
251+
? "HEAD"
252+
: `${params.ref}:refs/remotes/origin/${params.ref}`;
253+
fetchArgs.push(refSpec, "--depth", String(params.depth));
254+
} else {
255+
// For commit refs, fetch the default branch and hope the commit is there
256+
fetchArgs.push("--depth", String(params.depth));
257+
}
258+
259+
await git(["-C", cachePath, ...fetchArgs], {
260+
timeoutMs: params.timeoutMs,
261+
});
262+
} catch (_error) {
263+
// Fetch failed, remove corrupt cache and re-clone
264+
await removeDir(cachePath);
265+
await cloneRepo(params, cachePath);
266+
}
222267
}
223268
} else {
224269
// No cache or invalid - do fresh clone
225270
if (cacheExists) {
226-
await rm(cachePath, { recursive: true, force: true });
271+
await removeDir(cachePath);
227272
}
228273
await cloneRepo(params, cachePath);
229274
}
@@ -235,12 +280,14 @@ const cloneOrUpdateRepo = async (params: FetchParams, outDir: string) => {
235280
const localCloneArgs = [
236281
"clone",
237282
"--no-checkout",
238-
"--filter=blob:none",
239283
"--depth",
240284
String(params.depth),
241285
"--recurse-submodules=no",
242286
"--no-tags",
243287
];
288+
if (await isPartialClone(cachePath)) {
289+
localCloneArgs.splice(2, 0, "--filter=blob:none");
290+
}
244291

245292
if (useSparse) {
246293
localCloneArgs.push("--sparse");
@@ -255,16 +302,35 @@ const cloneOrUpdateRepo = async (params: FetchParams, outDir: string) => {
255302

256303
const cacheUrl = pathToFileURL(cachePath).href;
257304
localCloneArgs.push(cacheUrl, outDir);
258-
await git(localCloneArgs, {
259-
timeoutMs: params.timeoutMs,
260-
allowFileProtocol: true,
261-
});
305+
let allowLocalFilter = true;
306+
if (allowLocalFilter) {
307+
localCloneArgs.splice(2, 0, "--filter=blob:none");
308+
}
309+
try {
310+
await git(localCloneArgs, {
311+
timeoutMs: params.timeoutMs,
312+
allowFileProtocol: true,
313+
});
314+
} catch (error) {
315+
if (!allowLocalFilter || !isFilterUnsupported(error)) {
316+
throw error;
317+
}
318+
allowLocalFilter = false;
319+
const fallbackArgs = localCloneArgs.filter(
320+
(arg) => arg !== "--filter=blob:none",
321+
);
322+
await git(fallbackArgs, {
323+
timeoutMs: params.timeoutMs,
324+
allowFileProtocol: true,
325+
});
326+
}
262327

263328
if (useSparse) {
264329
const sparsePaths = extractSparsePaths(params.include);
265330
if (sparsePaths.length > 0) {
266331
await git(["-C", outDir, "sparse-checkout", "set", ...sparsePaths], {
267332
timeoutMs: params.timeoutMs,
333+
allowFileProtocol: true,
268334
});
269335
}
270336
}
@@ -273,6 +339,7 @@ const cloneOrUpdateRepo = async (params: FetchParams, outDir: string) => {
273339
["-C", outDir, "checkout", "--quiet", "--detach", params.resolvedCommit],
274340
{
275341
timeoutMs: params.timeoutMs,
342+
allowFileProtocol: true,
276343
},
277344
);
278345
};
@@ -290,7 +357,7 @@ const archiveRepo = async (params: FetchParams) => {
290357
);
291358
return tempDir;
292359
} catch (error) {
293-
await rm(tempDir, { recursive: true, force: true });
360+
await removeDir(tempDir);
294361
throw error;
295362
}
296363
};
@@ -302,7 +369,7 @@ export const fetchSource = async (params: FetchParams) => {
302369
return {
303370
repoDir: archiveDir,
304371
cleanup: async () => {
305-
await rm(archiveDir, { recursive: true, force: true });
372+
await removeDir(archiveDir);
306373
},
307374
};
308375
} catch {
@@ -314,11 +381,11 @@ export const fetchSource = async (params: FetchParams) => {
314381
return {
315382
repoDir: tempDir,
316383
cleanup: async () => {
317-
await rm(tempDir, { recursive: true, force: true });
384+
await removeDir(tempDir);
318385
},
319386
};
320387
} catch (error) {
321-
await rm(tempDir, { recursive: true, force: true });
388+
await removeDir(tempDir);
322389
throw error;
323390
}
324391
}
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import assert from "node:assert/strict";
2+
import { createHash } from "node:crypto";
3+
import { chmod, mkdir, readFile, rm, writeFile } from "node:fs/promises";
4+
import { tmpdir } from "node:os";
5+
import path from "node:path";
6+
import { test } from "node:test";
7+
8+
import { runSync } from "../dist/api.mjs";
9+
10+
const hashRepoUrl = (repo) =>
11+
createHash("sha256").update(repo).digest("hex").substring(0, 16);
12+
13+
const writeGitShim = async (binDir, logPath) => {
14+
const scriptPath = path.join(
15+
binDir,
16+
process.platform === "win32" ? "git.js" : "git",
17+
);
18+
const payload = `#!/usr/bin/env node
19+
const fs = require("node:fs");
20+
const path = require("node:path");
21+
22+
const logPath = ${JSON.stringify(logPath)};
23+
fs.appendFileSync(logPath, \
24+
JSON.stringify(process.argv.slice(2)) + "\\n",
25+
"utf8",
26+
);
27+
28+
const args = process.argv.slice(2);
29+
if (args.includes("archive")) {
30+
process.exit(1);
31+
}
32+
33+
if (args.includes("clone")) {
34+
const outDir = args[args.length - 1];
35+
fs.mkdirSync(outDir, { recursive: true });
36+
}
37+
38+
process.exit(0);
39+
`;
40+
await writeFile(scriptPath, payload, "utf8");
41+
if (process.platform !== "win32") {
42+
await chmod(scriptPath, 0o755);
43+
return;
44+
}
45+
const cmdPath = path.join(binDir, "git.cmd");
46+
const cmdPayload = `@echo off
47+
"${process.execPath}" "${scriptPath}" %*
48+
`;
49+
await writeFile(cmdPath, cmdPayload, "utf8");
50+
};
51+
52+
test("sync uses file protocol allowlist for local cache checkout", async () => {
53+
const tmpRoot = path.join(
54+
tmpdir(),
55+
`docs-cache-git-protocol-${Date.now().toString(36)}`,
56+
);
57+
const binDir = path.join(tmpRoot, "bin");
58+
const logPath = path.join(tmpRoot, "git.log");
59+
const cacheDir = path.join(tmpRoot, ".docs");
60+
const configPath = path.join(tmpRoot, "docs.config.json");
61+
const gitCacheRoot = path.join(tmpRoot, "git-cache");
62+
const repo = "https://example.com/repo.git";
63+
const repoHash = hashRepoUrl(repo);
64+
const cachePath = path.join(gitCacheRoot, repoHash);
65+
66+
await mkdir(binDir, { recursive: true });
67+
await mkdir(cachePath, { recursive: true });
68+
await writeGitShim(binDir, logPath);
69+
await writeFile(logPath, "", "utf8");
70+
71+
const config = {
72+
$schema:
73+
"https://raw.githubusercontent.com/fbosch/docs-cache/main/docs.config.schema.json",
74+
sources: [
75+
{
76+
id: "local",
77+
repo,
78+
include: ["docs"],
79+
},
80+
],
81+
};
82+
await writeFile(configPath, `${JSON.stringify(config, null, 2)}\n`, "utf8");
83+
84+
const previousPath = process.env.PATH;
85+
const previousGitDir = process.env.DOCS_CACHE_GIT_DIR;
86+
process.env.PATH = `${binDir}${path.delimiter}${previousPath ?? ""}`;
87+
process.env.DOCS_CACHE_GIT_DIR = gitCacheRoot;
88+
process.env.GIT_TERMINAL_PROMPT = "0";
89+
90+
try {
91+
await runSync(
92+
{
93+
configPath,
94+
cacheDirOverride: cacheDir,
95+
json: false,
96+
lockOnly: false,
97+
offline: false,
98+
failOnMiss: false,
99+
},
100+
{
101+
resolveRemoteCommit: async () => ({
102+
repo,
103+
ref: "HEAD",
104+
resolvedCommit: "abc123",
105+
}),
106+
},
107+
);
108+
109+
const logRaw = await readFile(logPath, "utf8");
110+
const entries = logRaw
111+
.split("\n")
112+
.filter(Boolean)
113+
.map((line) => JSON.parse(line));
114+
assert.ok(entries.length > 0, "expected git shim to be invoked");
115+
const checkout = entries.find((args) => args.includes("checkout"));
116+
assert.ok(checkout, "expected checkout to run via git shim");
117+
assert.ok(
118+
checkout.includes("protocol.file.allow=always"),
119+
"expected checkout to allow file protocol",
120+
);
121+
const sparse = entries.find((args) => args.includes("sparse-checkout"));
122+
assert.ok(sparse, "expected sparse-checkout to run via git shim");
123+
assert.ok(
124+
sparse.includes("protocol.file.allow=always"),
125+
"expected sparse-checkout to allow file protocol",
126+
);
127+
} finally {
128+
process.env.PATH = previousPath;
129+
process.env.DOCS_CACHE_GIT_DIR = previousGitDir;
130+
await rm(tmpRoot, { recursive: true, force: true });
131+
}
132+
});

0 commit comments

Comments
 (0)