diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index be8a5c2d30..0f6ff98309 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -299,7 +299,11 @@ export type { import { autoSession as s } from "./auto-runtime-state.js"; import { gsdHome } from "./gsd-home.js"; import { createWorkspace, scopeMilestone } from "./workspace.js"; -import { registerAutoWorker, markWorkerStopping } from "./db/auto-workers.js"; +import { + registerAutoWorker, + markWorkerStopping, + markWorkerStoppingByPid, +} from "./db/auto-workers.js"; import { releaseMilestoneLease } from "./db/milestone-leases.js"; import { normalizeRealPath } from "./paths.js"; @@ -919,6 +923,7 @@ export function checkRemoteAutoSession(projectRoot: string): { if (!isLockProcessAlive(lock)) { // Stale lock from a dead process — not a live remote session + markWorkerStoppingByPid(normalizeRealPath(projectRoot), lock.pid); return { running: false }; } diff --git a/src/resources/extensions/gsd/crash-recovery.ts b/src/resources/extensions/gsd/crash-recovery.ts index d77b807a8b..e4996e7f3e 100644 --- a/src/resources/extensions/gsd/crash-recovery.ts +++ b/src/resources/extensions/gsd/crash-recovery.ts @@ -32,6 +32,7 @@ import { getAllAutoWorkers, markWorkerCrashed, markWorkerStopping, + markWorkerStoppingByPid, type AutoWorkerRow, } from "./db/auto-workers.js"; import { forceReleaseLeasesForWorker } from "./db/milestone-leases.js"; @@ -234,6 +235,8 @@ export function clearLock(basePath: string): void { deleteRuntimeKv("worker", staleWorker.worker_id, SESSION_FILE_KV_KEY); return; } + const lock = readLegacyLock(basePath); + if (lock?.pid) markWorkerStoppingByPid(projectRoot, lock.pid); const worker = findActiveWorkerForCurrentProcess(projectRoot); if (worker) deleteRuntimeKv("worker", worker.worker_id, SESSION_FILE_KV_KEY); diff --git a/src/resources/extensions/gsd/db/auto-workers.ts b/src/resources/extensions/gsd/db/auto-workers.ts index 263e0a5984..0bf6508d0a 100644 --- a/src/resources/extensions/gsd/db/auto-workers.ts +++ b/src/resources/extensions/gsd/db/auto-workers.ts @@ -157,6 +157,31 @@ export function markWorkerStopping(workerId: string): void { }); } +/** + * Mark the active worker row for a specific PID/project root as stopping. + * Used when we detect a dead PID from lock metadata before heartbeat expiry. + */ +export function markWorkerStoppingByPid( + projectRootRealpath: string, + pid: number, +): void { + if (!isDbAvailable()) return; + if (!Number.isInteger(pid) || pid <= 0) return; + const db = _getAdapter()!; + transaction(() => { + db.prepare( + `UPDATE workers + SET status = 'stopping' + WHERE pid = :pid + AND project_root_realpath = :project_root + AND status = 'active'`, + ).run({ + ":pid": pid, + ":project_root": projectRootRealpath, + }); + }); +} + /** * Return all workers whose status is 'active' AND whose heartbeat is within * the TTL window. Workers older than the TTL are NOT auto-marked crashed diff --git a/src/resources/extensions/gsd/session-lock.ts b/src/resources/extensions/gsd/session-lock.ts index d392d2e59a..72ca8af500 100644 --- a/src/resources/extensions/gsd/session-lock.ts +++ b/src/resources/extensions/gsd/session-lock.ts @@ -19,8 +19,9 @@ import { createRequire } from "node:module"; import { existsSync, readFileSync, readdirSync, mkdirSync, unlinkSync, rmSync, statSync } from "node:fs"; import { join, dirname } from "node:path"; -import { gsdRoot } from "./paths.js"; +import { gsdRoot, normalizeRealPath } from "./paths.js"; import { atomicWriteSync } from "./atomic-write.js"; +import { markWorkerStoppingByPid } from "./db/auto-workers.js"; const _require = createRequire(import.meta.url); @@ -281,6 +282,13 @@ export function acquireSessionLock(basePath: string): SessionLockResult { // Clean up numbered lock file variants from cloud sync conflicts (#1315) cleanupStrayLockFiles(basePath); + // If lock metadata points to a dead PID, mark that worker row stopping so + // crash diagnostics do not keep surfacing it as active. + const existingPreflight = readExistingLockData(lp); + if (existingPreflight?.pid && !isPidAlive(existingPreflight.pid)) { + markWorkerStoppingByPid(normalizeRealPath(basePath), existingPreflight.pid); + } + // Write our lock data first (the content is informational; the OS lock is the real guard) const lockData: SessionLockData = { pid: process.pid, @@ -308,7 +316,9 @@ export function acquireSessionLock(basePath: string): SessionLockResult { const lockDir = lockTarget + ".lock"; if (existsSync(lockDir)) { const existingData = readExistingLockData(lp); - const isOrphan = !existingData || (existingData.pid && !isPidAlive(existingData.pid)); + const deadPid = existingData?.pid && !isPidAlive(existingData.pid) ? existingData.pid : null; + if (deadPid) markWorkerStoppingByPid(normalizeRealPath(basePath), deadPid); + const isOrphan = !existingData || !!deadPid; if (isOrphan) { try { rmSync(lockDir, { recursive: true, force: true }); } catch { /* best-effort */ } try { if (existsSync(lp)) unlinkSync(lp); } catch { /* best-effort */ } @@ -344,6 +354,9 @@ export function acquireSessionLock(basePath: string): SessionLockResult { // Check: if auto.lock is gone and no process is alive, the lock dir is stale. const existingData = readExistingLockData(lp); const existingPid = existingData?.pid; + if (existingPid && !isPidAlive(existingPid)) { + markWorkerStoppingByPid(normalizeRealPath(basePath), existingPid); + } // If no lock file or no alive process, try to clean up and re-acquire (#1245) if (!existingData || (existingPid && !isPidAlive(existingPid))) { diff --git a/src/resources/extensions/gsd/tests/auto-workers.test.ts b/src/resources/extensions/gsd/tests/auto-workers.test.ts index c9c896d259..c265c3dad7 100644 --- a/src/resources/extensions/gsd/tests/auto-workers.test.ts +++ b/src/resources/extensions/gsd/tests/auto-workers.test.ts @@ -13,6 +13,7 @@ import { heartbeatAutoWorker, markWorkerCrashed, markWorkerStopping, + markWorkerStoppingByPid, getActiveAutoWorkers, getAutoWorker, } from "../db/auto-workers.ts"; @@ -71,6 +72,18 @@ test("markWorkerStopping flips status to stopping", (t) => { assert.equal(row.status, "stopping"); }); +test("markWorkerStoppingByPid flips matching active row to stopping", (t) => { + const base = makeBase(); + t.after(() => cleanup(base)); + openDatabase(join(base, ".gsd", "gsd.db")); + + const id = registerAutoWorker({ projectRootRealpath: base }); + const pid = getAutoWorker(id)!.pid; + markWorkerStoppingByPid(base, pid); + const row = getAutoWorker(id)!; + assert.equal(row.status, "stopping"); +}); + test("markWorkerCrashed flips status to crashed", (t) => { const base = makeBase(); t.after(() => cleanup(base)); diff --git a/src/resources/extensions/gsd/tests/session-lock-regression.test.ts b/src/resources/extensions/gsd/tests/session-lock-regression.test.ts index a494bc3c5a..587949128e 100644 --- a/src/resources/extensions/gsd/tests/session-lock-regression.test.ts +++ b/src/resources/extensions/gsd/tests/session-lock-regression.test.ts @@ -25,6 +25,9 @@ import { isSessionLockHeld, } from '../session-lock.ts'; import { gsdRoot } from '../paths.ts'; +import { openDatabase, closeDatabase, _getAdapter } from "../gsd-db.ts"; +import { registerAutoWorker, getAutoWorker } from "../db/auto-workers.ts"; +import { normalizeRealPath } from "../paths.ts"; import { describe, test } from 'node:test'; import assert from 'node:assert/strict'; @@ -94,6 +97,38 @@ describe('session-lock-regression', async () => { } } + // ─── 2b. Dead lock PID is marked stopping in workers table ──────────── + console.log('\n=== 2b. dead lock PID marks worker stopping ==='); + { + const base = mkdtempSync(join(tmpdir(), 'gsd-session-lock-')); + mkdirSync(join(base, '.gsd'), { recursive: true }); + + try { + openDatabase(join(base, ".gsd", "gsd.db")); + const projectRoot = normalizeRealPath(base); + const workerId = registerAutoWorker({ projectRootRealpath: projectRoot }); + const deadPid = 99999; + writeFileSync(join(gsdRoot(base), "auto.lock"), JSON.stringify({ + pid: deadPid, + startedAt: new Date().toISOString(), + unitType: "starting", + unitId: "bootstrap", + unitStartedAt: new Date().toISOString(), + })); + // Align worker PID with stale lock metadata. + _getAdapter()?.prepare("UPDATE workers SET pid = :pid WHERE worker_id = :id") + .run({ ":pid": deadPid, ":id": workerId }); + + const result = acquireSessionLock(base); + assert.ok(result.acquired, "acquire recovers stale lock"); + assert.equal(getAutoWorker(workerId)?.status, "stopping"); + releaseSessionLock(base); + } finally { + try { closeDatabase(); } catch { /* noop */ } + rmSync(base, { recursive: true, force: true }); + } + } + // ─── 3. updateSessionLock preserves lock data ───────────────────────── console.log('\n=== 3. updateSessionLock writes metadata ==='); {