diff --git a/packages/api/src/domains/cats/services/agents/invocation/invoke-helpers.ts b/packages/api/src/domains/cats/services/agents/invocation/invoke-helpers.ts index 524f58f08..8d5f44894 100644 --- a/packages/api/src/domains/cats/services/agents/invocation/invoke-helpers.ts +++ b/packages/api/src/domains/cats/services/agents/invocation/invoke-helpers.ts @@ -44,6 +44,12 @@ export function classifyResumeFailure(message: string | undefined): ResumeFailur if (/(No conversation found with session ID|no rollout found|missing_rollout)/i.test(message)) { return 'missing_session'; } + // #300: Gemini exit code 42 = "no conversation found" — treat as missing_session + // so retry-without-session can recover automatically. + // Scoped to "Gemini CLI:" prefix to avoid cross-provider false positives. + if (/Gemini CLI:.*CLI 异常退出 \(code:\s*42\b/i.test(message)) { + return 'missing_session'; + } if (/CLI 异常退出 \(code:\s*(?:\d+|null)(?:,\s*signal:\s*[^)]+)?\)/i.test(message)) { return 'cli_exit'; } diff --git a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts index 889ce5ffe..225de9b28 100644 --- a/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts +++ b/packages/api/src/domains/cats/services/agents/invocation/invoke-single-cat.ts @@ -494,6 +494,11 @@ export async function* invokeSingleCat(deps: InvocationDeps, params: InvocationP } } } + // NOTE (#300): When chain is empty but Redis holds a stale sessionId (e.g. after + // restart clears in-memory chain store), the stale ID may cause --resume to fail + // (Gemini exit 42). We intentionally do NOT clear sessionId here because an empty + // chain is also the normal state for fresh threads. Instead, RC2 self-heals: + // classifyResumeFailure maps exit code 42 → missing_session → retry without session. } catch { // R9 P1: Fail-closed — if chain store read fails, discard sessionId. // Rationale: requestSeal accepted = hard seal boundary. When we can't diff --git a/packages/api/test/invoke-single-cat.test.js b/packages/api/test/invoke-single-cat.test.js index bb2e3c3aa..f24f2f910 100644 --- a/packages/api/test/invoke-single-cat.test.js +++ b/packages/api/test/invoke-single-cat.test.js @@ -1120,6 +1120,12 @@ describe('invokeSingleCat audit events (P1 fix)', () => { // isTransientCliExitCode1 (line 1393), so missing_session takes priority → shouldRetryWithoutSession assert.equal(classifyResumeFailure('Gemini CLI: CLI 异常退出 (code: 1, signal: none)'), 'cli_exit'); assert.equal(classifyResumeFailure('Gemini CLI: CLI 异常退出 (code: null, signal: SIGTERM)'), 'cli_exit'); + // #300: Gemini exit code 42 = missing session, must trigger retry-without-session + assert.equal( + classifyResumeFailure('Gemini CLI: CLI 异常退出 (code: 42, signal: none)'), + 'missing_session', + '#300: Gemini exit code 42 must be classified as missing_session, not cli_exit', + ); assert.equal(classifyResumeFailure('authentication failed: login required'), 'auth'); assert.equal( classifyResumeFailure( @@ -2113,6 +2119,67 @@ describe('invokeSingleCat audit events (P1 fix)', () => { assert.equal(optionsSeen[0].sessionId, undefined, 'getChain() failure must discard sessionId (fail-closed, R9 P1)'); }); + it('#300: Gemini exit code 42 triggers self-heal retry without session', async () => { + // Scenario: after restart, stale sessionId causes Gemini CLI to exit with code 42 + // ("no conversation found"). classifyResumeFailure must map this to missing_session, + // triggering retry-without-session to recover automatically. + let invokeCount = 0; + const sessionDeletes = []; + const optionsSeen = []; + const service = { + async *invoke(_prompt, options) { + optionsSeen.push({ ...options }); + invokeCount++; + if (invokeCount === 1) { + yield { + type: 'error', + catId: 'gemini25', + error: 'Gemini CLI: CLI 异常退出 (code: 42, signal: none)', + timestamp: Date.now(), + }; + yield { type: 'done', catId: 'gemini25', timestamp: Date.now() }; + return; + } + yield { type: 'session_init', catId: 'gemini25', sessionId: 'fresh-sess', timestamp: Date.now() }; + yield { type: 'text', catId: 'gemini25', content: 'recovered', timestamp: Date.now() }; + yield { type: 'done', catId: 'gemini25', timestamp: Date.now() }; + }, + }; + + const deps = makeDeps(); + deps.sessionManager = { + get: async () => 'stale-session-from-redis', + store: async () => {}, + delete: async (u, c, t) => { + sessionDeletes.push(`${u}:${c}:${t}`); + }, + }; + + const msgs = await collect( + invokeSingleCat(deps, { + catId: 'gemini25', + service, + prompt: 'test', + userId: 'u1', + threadId: 'thread-gemini-code42', + isLastCat: true, + }), + ); + + assert.equal(invokeCount, 2, '#300: should retry once after Gemini exit code 42'); + assert.equal(optionsSeen[0].sessionId, 'stale-session-from-redis', 'first attempt uses stale session'); + assert.equal(optionsSeen[1].sessionId, undefined, 'retry attempt drops --resume session'); + assert.deepEqual( + sessionDeletes, + ['u1:gemini25:thread-gemini-code42'], + '#300: stale session must be deleted from persistent store before retry', + ); + assert.ok( + msgs.some((m) => m.type === 'text' && m.content === 'recovered'), + '#300: should recover via retry without session', + ); + }); + it('R11 P1-1: uses active record cliSessionId when it differs from sessionManager (RED)', async () => { // Scenario: sessionManager.get() returns 'cli-old' but the active SessionRecord // has cliSessionId='cli-new' (CLI restarted and session_init updated the record).