Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/resources/extensions/gsd/auto.ts
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,11 @@ export type {
import { autoSession as s } from "./auto-runtime-state.js";
import { gsdHome } from "./gsd-home.js";
import { createWorkspace, scopeMilestone } from "./workspace.js";
import { registerAutoWorker, markWorkerStopping } from "./db/auto-workers.js";
import {
registerAutoWorker,
markWorkerStopping,
markWorkerStoppingByPid,
} from "./db/auto-workers.js";
import { releaseMilestoneLease } from "./db/milestone-leases.js";
import { normalizeRealPath } from "./paths.js";

Expand Down Expand Up @@ -919,6 +923,7 @@ export function checkRemoteAutoSession(projectRoot: string): {

if (!isLockProcessAlive(lock)) {
// Stale lock from a dead process — not a live remote session
markWorkerStoppingByPid(normalizeRealPath(projectRoot), lock.pid);
return { running: false };
}

Expand Down
3 changes: 3 additions & 0 deletions src/resources/extensions/gsd/crash-recovery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import {
getAllAutoWorkers,
markWorkerCrashed,
markWorkerStopping,
markWorkerStoppingByPid,
type AutoWorkerRow,
} from "./db/auto-workers.js";
import { forceReleaseLeasesForWorker } from "./db/milestone-leases.js";
Expand Down Expand Up @@ -234,6 +235,8 @@ export function clearLock(basePath: string): void {
deleteRuntimeKv("worker", staleWorker.worker_id, SESSION_FILE_KV_KEY);
return;
}
const lock = readLegacyLock(basePath);
if (lock?.pid) markWorkerStoppingByPid(projectRoot, lock.pid);
const worker = findActiveWorkerForCurrentProcess(projectRoot);
if (worker) deleteRuntimeKv("worker", worker.worker_id, SESSION_FILE_KV_KEY);

Expand Down
25 changes: 25 additions & 0 deletions src/resources/extensions/gsd/db/auto-workers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,31 @@ export function markWorkerStopping(workerId: string): void {
});
}

/**
* Mark the active worker row for a specific PID/project root as stopping.
* Used when we detect a dead PID from lock metadata before heartbeat expiry.
*/
export function markWorkerStoppingByPid(
projectRootRealpath: string,
pid: number,
): void {
if (!isDbAvailable()) return;
if (!Number.isInteger(pid) || pid <= 0) return;
const db = _getAdapter()!;
transaction(() => {
db.prepare(
`UPDATE workers
SET status = 'stopping'
WHERE pid = :pid
AND project_root_realpath = :project_root
AND status = 'active'`,
).run({
":pid": pid,
":project_root": projectRootRealpath,
});
Comment thread
jeremymcs marked this conversation as resolved.
});
}

/**
* Return all workers whose status is 'active' AND whose heartbeat is within
* the TTL window. Workers older than the TTL are NOT auto-marked crashed
Expand Down
17 changes: 15 additions & 2 deletions src/resources/extensions/gsd/session-lock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
import { createRequire } from "node:module";
import { existsSync, readFileSync, readdirSync, mkdirSync, unlinkSync, rmSync, statSync } from "node:fs";
import { join, dirname } from "node:path";
import { gsdRoot } from "./paths.js";
import { gsdRoot, normalizeRealPath } from "./paths.js";
import { atomicWriteSync } from "./atomic-write.js";
import { markWorkerStoppingByPid } from "./db/auto-workers.js";

const _require = createRequire(import.meta.url);

Expand Down Expand Up @@ -281,6 +282,13 @@ export function acquireSessionLock(basePath: string): SessionLockResult {
// Clean up numbered lock file variants from cloud sync conflicts (#1315)
cleanupStrayLockFiles(basePath);

// If lock metadata points to a dead PID, mark that worker row stopping so
// crash diagnostics do not keep surfacing it as active.
const existingPreflight = readExistingLockData(lp);
if (existingPreflight?.pid && !isPidAlive(existingPreflight.pid)) {
markWorkerStoppingByPid(normalizeRealPath(basePath), existingPreflight.pid);
}
Comment thread
jeremymcs marked this conversation as resolved.

// Write our lock data first (the content is informational; the OS lock is the real guard)
const lockData: SessionLockData = {
pid: process.pid,
Expand Down Expand Up @@ -308,7 +316,9 @@ export function acquireSessionLock(basePath: string): SessionLockResult {
const lockDir = lockTarget + ".lock";
if (existsSync(lockDir)) {
const existingData = readExistingLockData(lp);
const isOrphan = !existingData || (existingData.pid && !isPidAlive(existingData.pid));
const deadPid = existingData?.pid && !isPidAlive(existingData.pid) ? existingData.pid : null;
if (deadPid) markWorkerStoppingByPid(normalizeRealPath(basePath), deadPid);
const isOrphan = !existingData || !!deadPid;
if (isOrphan) {
try { rmSync(lockDir, { recursive: true, force: true }); } catch { /* best-effort */ }
try { if (existsSync(lp)) unlinkSync(lp); } catch { /* best-effort */ }
Expand Down Expand Up @@ -344,6 +354,9 @@ export function acquireSessionLock(basePath: string): SessionLockResult {
// Check: if auto.lock is gone and no process is alive, the lock dir is stale.
const existingData = readExistingLockData(lp);
const existingPid = existingData?.pid;
if (existingPid && !isPidAlive(existingPid)) {
markWorkerStoppingByPid(normalizeRealPath(basePath), existingPid);
}

// If no lock file or no alive process, try to clean up and re-acquire (#1245)
if (!existingData || (existingPid && !isPidAlive(existingPid))) {
Expand Down
13 changes: 13 additions & 0 deletions src/resources/extensions/gsd/tests/auto-workers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
heartbeatAutoWorker,
markWorkerCrashed,
markWorkerStopping,
markWorkerStoppingByPid,
getActiveAutoWorkers,
getAutoWorker,
} from "../db/auto-workers.ts";
Expand Down Expand Up @@ -71,6 +72,18 @@ test("markWorkerStopping flips status to stopping", (t) => {
assert.equal(row.status, "stopping");
});

test("markWorkerStoppingByPid flips matching active row to stopping", (t) => {
const base = makeBase();
t.after(() => cleanup(base));
openDatabase(join(base, ".gsd", "gsd.db"));

const id = registerAutoWorker({ projectRootRealpath: base });
const pid = getAutoWorker(id)!.pid;
markWorkerStoppingByPid(base, pid);
const row = getAutoWorker(id)!;
assert.equal(row.status, "stopping");
});

test("markWorkerCrashed flips status to crashed", (t) => {
const base = makeBase();
t.after(() => cleanup(base));
Expand Down
35 changes: 35 additions & 0 deletions src/resources/extensions/gsd/tests/session-lock-regression.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ import {
isSessionLockHeld,
} from '../session-lock.ts';
import { gsdRoot } from '../paths.ts';
import { openDatabase, closeDatabase, _getAdapter } from "../gsd-db.ts";
import { registerAutoWorker, getAutoWorker } from "../db/auto-workers.ts";
import { normalizeRealPath } from "../paths.ts";
import { describe, test } from 'node:test';
import assert from 'node:assert/strict';

Expand Down Expand Up @@ -94,6 +97,38 @@ describe('session-lock-regression', async () => {
}
}

// ─── 2b. Dead lock PID is marked stopping in workers table ────────────
console.log('\n=== 2b. dead lock PID marks worker stopping ===');
{
const base = mkdtempSync(join(tmpdir(), 'gsd-session-lock-'));
mkdirSync(join(base, '.gsd'), { recursive: true });

try {
openDatabase(join(base, ".gsd", "gsd.db"));
const projectRoot = normalizeRealPath(base);
const workerId = registerAutoWorker({ projectRootRealpath: projectRoot });
const deadPid = 99999;
Comment thread
coderabbitai[bot] marked this conversation as resolved.
writeFileSync(join(gsdRoot(base), "auto.lock"), JSON.stringify({
pid: deadPid,
startedAt: new Date().toISOString(),
unitType: "starting",
unitId: "bootstrap",
unitStartedAt: new Date().toISOString(),
}));
// Align worker PID with stale lock metadata.
_getAdapter()?.prepare("UPDATE workers SET pid = :pid WHERE worker_id = :id")
.run({ ":pid": deadPid, ":id": workerId });

const result = acquireSessionLock(base);
assert.ok(result.acquired, "acquire recovers stale lock");
assert.equal(getAutoWorker(workerId)?.status, "stopping");
releaseSessionLock(base);
} finally {
try { closeDatabase(); } catch { /* noop */ }
rmSync(base, { recursive: true, force: true });
}
}

// ─── 3. updateSessionLock preserves lock data ─────────────────────────
console.log('\n=== 3. updateSessionLock writes metadata ===');
{
Expand Down
Loading