Skip to content

Commit fa61ff6

Browse files
authored
🤖 Inject mode transition context for flaky test #224 (#225)
## Problem Integration test `should include mode-specific instructions in system message` is flaky (#224). When testing mode switches mid-conversation, models sometimes respond with the wrong mode marker (`[PLAN_MODE_ACTIVE]` instead of `[EXEC_MODE_ACTIVE]`). **Root cause**: Models see conflicting signals: - System message says "You're in EXEC mode" - Conversation history shows they just said they were in PLAN mode - User prompt is vague ("Please respond.") with no transition signal Models sometimes prioritize conversation consistency over system instructions. ## Solution **Inject mode transition as a temporal user message in the conversation flow.** When mode changes, insert a synthetic user message before the final user message: ``` [Mode switched from plan to exec. Follow exec mode instructions.] ``` **Benefits**: - ✅ Temporal - transition happens in natural conversation flow - ✅ Models handle in-message context better than system changes - ✅ Simple - no metadata persistence complexity - ✅ Works for both tests and production usage ## Implementation - Added `injectModeTransition()` to `modelMessageTransform.ts` - Operates on `CmuxMessage[]` where metadata.mode is available - Called after `addInterruptedSentinel`, before converting to `ModelMessage` - Mode persisted in assistant message metadata for next request - Added comprehensive unit tests (5 test cases) ## Testing - ✅ All unit tests pass - ✅ CI will verify improved integration test reliability - ✅ Works for real-world mode switches mid-conversation Closes #224
1 parent b44e1f0 commit fa61ff6

File tree

4 files changed

+222
-1
lines changed

4 files changed

+222
-1
lines changed

src/services/aiService.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import {
2222
validateAnthropicCompliance,
2323
addInterruptedSentinel,
2424
filterEmptyAssistantMessages,
25+
injectModeTransition,
2526
} from "@/utils/messages/modelMessageTransform";
2627
import { applyCacheControl } from "@/utils/ai/cacheStrategy";
2728
import type { HistoryService } from "./historyService";
@@ -450,9 +451,12 @@ export class AIService extends EventEmitter {
450451
// Add [CONTINUE] sentinel to partial messages (for model context)
451452
const messagesWithSentinel = addInterruptedSentinel(filteredMessages);
452453

454+
// Inject mode transition context if mode changed from last assistant message
455+
const messagesWithModeContext = injectModeTransition(messagesWithSentinel, mode);
456+
453457
// Apply centralized tool-output redaction BEFORE converting to provider ModelMessages
454458
// This keeps the persisted/UI history intact while trimming heavy fields for the request
455-
const redactedForProvider = applyToolOutputRedaction(messagesWithSentinel);
459+
const redactedForProvider = applyToolOutputRedaction(messagesWithModeContext);
456460
log.debug_obj(`${workspaceId}/2a_redacted_messages.json`, redactedForProvider);
457461

458462
// Convert CmuxMessage to ModelMessage format using Vercel AI SDK utility
@@ -525,6 +529,7 @@ export class AIService extends EventEmitter {
525529
timestamp: Date.now(),
526530
model: modelString,
527531
systemMessageTokens,
532+
mode, // Track the mode for this assistant response
528533
});
529534

530535
// Append to history to get historySequence assigned
@@ -676,6 +681,7 @@ export class AIService extends EventEmitter {
676681
{
677682
systemMessageTokens,
678683
timestamp: Date.now(),
684+
mode, // Pass mode so it persists in final history entry
679685
},
680686
providerOptions,
681687
maxOutputTokens,

src/types/message.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export interface CmuxMetadata {
1818
errorType?: StreamErrorType; // Error type/category if stream failed
1919
compacted?: boolean; // Whether this message is a compacted summary of previous history
2020
toolPolicy?: ToolPolicy; // Tool policy active when this message was sent (user messages only)
21+
mode?: string; // The mode (plan/exec/etc) active when this message was sent (assistant messages only)
2122
}
2223

2324
// Extended tool part type that supports interrupted tool calls (input-available state)

src/utils/messages/modelMessageTransform.test.ts

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {
44
transformModelMessages,
55
validateAnthropicCompliance,
66
addInterruptedSentinel,
7+
injectModeTransition,
78
} from "./modelMessageTransform";
89
import type { CmuxMessage } from "@/types/message";
910

@@ -712,3 +713,138 @@ describe("modelMessageTransform", () => {
712713
});
713714
});
714715
});
716+
717+
describe("injectModeTransition", () => {
718+
it("should inject transition message when mode changes", () => {
719+
const messages: CmuxMessage[] = [
720+
{
721+
id: "user-1",
722+
role: "user",
723+
parts: [{ type: "text", text: "Let's plan a feature" }],
724+
metadata: { timestamp: 1000 },
725+
},
726+
{
727+
id: "assistant-1",
728+
role: "assistant",
729+
parts: [{ type: "text", text: "Here's the plan..." }],
730+
metadata: { timestamp: 2000, mode: "plan" },
731+
},
732+
{
733+
id: "user-2",
734+
role: "user",
735+
parts: [{ type: "text", text: "Now execute it" }],
736+
metadata: { timestamp: 3000 },
737+
},
738+
];
739+
740+
const result = injectModeTransition(messages, "exec");
741+
742+
// Should have 4 messages: user, assistant, mode-transition, user
743+
expect(result.length).toBe(4);
744+
745+
// Third message should be mode transition
746+
expect(result[2].role).toBe("user");
747+
expect(result[2].metadata?.synthetic).toBe(true);
748+
expect(result[2].parts[0]).toMatchObject({
749+
type: "text",
750+
text: "[Mode switched from plan to exec. Follow exec mode instructions.]",
751+
});
752+
753+
// Original messages should be preserved
754+
expect(result[0]).toEqual(messages[0]);
755+
expect(result[1]).toEqual(messages[1]);
756+
expect(result[3]).toEqual(messages[2]); // Last user message shifted
757+
});
758+
759+
it("should not inject transition when mode is the same", () => {
760+
const messages: CmuxMessage[] = [
761+
{
762+
id: "user-1",
763+
role: "user",
764+
parts: [{ type: "text", text: "Let's plan" }],
765+
metadata: { timestamp: 1000 },
766+
},
767+
{
768+
id: "assistant-1",
769+
role: "assistant",
770+
parts: [{ type: "text", text: "Planning..." }],
771+
metadata: { timestamp: 2000, mode: "plan" },
772+
},
773+
{
774+
id: "user-2",
775+
role: "user",
776+
parts: [{ type: "text", text: "Continue planning" }],
777+
metadata: { timestamp: 3000 },
778+
},
779+
];
780+
781+
const result = injectModeTransition(messages, "plan");
782+
783+
// Should be unchanged
784+
expect(result.length).toBe(3);
785+
expect(result).toEqual(messages);
786+
});
787+
788+
it("should not inject transition when no previous mode exists", () => {
789+
const messages: CmuxMessage[] = [
790+
{
791+
id: "user-1",
792+
role: "user",
793+
parts: [{ type: "text", text: "Hello" }],
794+
metadata: { timestamp: 1000 },
795+
},
796+
];
797+
798+
const result = injectModeTransition(messages, "exec");
799+
800+
// Should be unchanged (no assistant message to compare)
801+
expect(result.length).toBe(1);
802+
expect(result).toEqual(messages);
803+
});
804+
805+
it("should not inject transition when no mode specified", () => {
806+
const messages: CmuxMessage[] = [
807+
{
808+
id: "user-1",
809+
role: "user",
810+
parts: [{ type: "text", text: "Hello" }],
811+
metadata: { timestamp: 1000 },
812+
},
813+
{
814+
id: "assistant-1",
815+
role: "assistant",
816+
parts: [{ type: "text", text: "Hi" }],
817+
metadata: { timestamp: 2000, mode: "plan" },
818+
},
819+
{
820+
id: "user-2",
821+
role: "user",
822+
parts: [{ type: "text", text: "Continue" }],
823+
metadata: { timestamp: 3000 },
824+
},
825+
];
826+
827+
const result = injectModeTransition(messages, undefined);
828+
829+
// Should be unchanged
830+
expect(result.length).toBe(3);
831+
expect(result).toEqual(messages);
832+
});
833+
834+
it("should handle conversation with no user messages", () => {
835+
const messages: CmuxMessage[] = [
836+
{
837+
id: "assistant-1",
838+
role: "assistant",
839+
parts: [{ type: "text", text: "Hi" }],
840+
metadata: { timestamp: 2000, mode: "plan" },
841+
},
842+
];
843+
844+
const result = injectModeTransition(messages, "exec");
845+
846+
// Should be unchanged (no user message to inject before)
847+
expect(result.length).toBe(1);
848+
expect(result).toEqual(messages);
849+
});
850+
});

src/utils/messages/modelMessageTransform.ts

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,84 @@ export function addInterruptedSentinel(messages: CmuxMessage[]): CmuxMessage[] {
106106
return result;
107107
}
108108

109+
/**
110+
* Inject mode transition context when mode changes mid-conversation.
111+
* Inserts a synthetic user message before the final user message to signal the mode switch.
112+
* This provides temporal context that helps models understand they should follow new mode instructions.
113+
*
114+
* @param messages The conversation history
115+
* @param currentMode The mode for the upcoming assistant response (e.g., "plan", "exec")
116+
* @returns Messages with mode transition context injected if needed
117+
*/
118+
export function injectModeTransition(messages: CmuxMessage[], currentMode?: string): CmuxMessage[] {
119+
// No mode specified, nothing to do
120+
if (!currentMode) {
121+
return messages;
122+
}
123+
124+
// Need at least one message to have a conversation
125+
if (messages.length === 0) {
126+
return messages;
127+
}
128+
129+
// Find the last assistant message to check its mode
130+
const lastAssistantMessage = [...messages].reverse().find((m) => m.role === "assistant");
131+
const lastMode = lastAssistantMessage?.metadata?.mode;
132+
133+
// No mode transition if no previous mode or same mode
134+
if (!lastMode || lastMode === currentMode) {
135+
return messages;
136+
}
137+
138+
// Mode transition detected! Inject a synthetic user message before the last user message
139+
// This provides temporal context: user says "switch modes" before their actual request
140+
141+
// Find the index of the last user message
142+
let lastUserIndex = -1;
143+
for (let i = messages.length - 1; i >= 0; i--) {
144+
if (messages[i].role === "user") {
145+
lastUserIndex = i;
146+
break;
147+
}
148+
}
149+
150+
// If there's no user message, can't inject transition (nothing to inject before)
151+
if (lastUserIndex === -1) {
152+
return messages;
153+
}
154+
155+
const result: CmuxMessage[] = [];
156+
157+
// Add all messages up to (but not including) the last user message
158+
for (let i = 0; i < lastUserIndex; i++) {
159+
result.push(messages[i]);
160+
}
161+
162+
// Inject mode transition message right before the last user message
163+
const transitionMessage: CmuxMessage = {
164+
id: `mode-transition-${Date.now()}`,
165+
role: "user",
166+
parts: [
167+
{
168+
type: "text",
169+
text: `[Mode switched from ${lastMode} to ${currentMode}. Follow ${currentMode} mode instructions.]`,
170+
},
171+
],
172+
metadata: {
173+
timestamp: Date.now(),
174+
synthetic: true,
175+
},
176+
};
177+
result.push(transitionMessage);
178+
179+
// Add the last user message and any remaining messages
180+
for (let i = lastUserIndex; i < messages.length; i++) {
181+
result.push(messages[i]);
182+
}
183+
184+
return result;
185+
}
186+
109187
/**
110188
* Split assistant messages with mixed text and tool calls into separate messages
111189
* to comply with Anthropic's requirement that tool_use blocks must be immediately

0 commit comments

Comments
 (0)