Skip to content

Commit 58e1b4b

Browse files
authored
fix(generate_response): Fix flaky concurrent tests for generate_response (#1011)
## AgentScope-Java Version [The version of AgentScope-Java you are working on, e.g. 1.0.9, check your pom.xml dependency version or run `mvn dependency:tree | grep agentscope-parent:pom`(only mac/linux)] ## Description ### **Background** The unit test code `testConcurrencyConflictStructuredOutput ` contains two `agent.call` actions. The first thread's `agent.call` writes `generate_response` into the *Toolkit* and then clears it in the `doFinally` block. The second thread's `agent.call` also writes `generate_response` into the *Toolkit* and then clears it in the `doFinally` block. ### **Problem** The issue occurs when the first thread executes `doFinally` later than the second thread writes `generate_response`. In this case, the first thread will clear the content written by the second thread into the *Toolkit*, causing the second thread to fail to retrieve `generate_response`. ### **Objective** Avoid "late cleanup" mistakenly deleting a tool with the same name that has already been overwritten by another call. ### **Implementation Approach** Use Java's `ConcurrentHashMap.remove(Object key, Object value)` to ensure only the tool written by the current thread is removed, while preventing race conditions. ### **How to Verify** Add a new test method: `io.agentscope.core.agent.ReActAgentStructuredOutputTest#testConcurrencyConflictStructuredOutput_repeated`. Test the same method 25,000 times. If no exceptions occur, it can be considered basically problem-free. ## Checklist Please check the following items before code is ready to be reviewed. - [x] Code has been formatted with `mvn spotless:apply` - [x] All tests are passing (`mvn test`) - [x] Javadoc comments are complete and follow project conventions - [x] Related documentation has been updated (e.g. links, examples, etc.) - [x] Code is ready for review
1 parent 7d898be commit 58e1b4b

File tree

5 files changed

+143
-4
lines changed

5 files changed

+143
-4
lines changed

agentscope-core/src/main/java/io/agentscope/core/agent/StructuredOutputCapableAgent.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,9 @@ private Mono<Msg> executeWithStructuredOutput(
189189
})
190190
.doFinally(
191191
signal -> {
192-
// Cleanup: remove hook and unregister tool
193192
removeHook(hook);
194-
toolkit.removeTool(STRUCTURED_OUTPUT_TOOL_NAME);
193+
toolkit.removeToolIfSame(
194+
STRUCTURED_OUTPUT_TOOL_NAME, structuredOutputTool);
195195
});
196196
});
197197
}

agentscope-core/src/main/java/io/agentscope/core/tool/ToolRegistry.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,22 @@ void removeTool(String toolName) {
114114
registeredTools.remove(toolName);
115115
}
116116

117+
/**
118+
* Atomically remove a tool only if the current instance matches the expected one.
119+
* Uses {@link ConcurrentHashMap#remove(Object, Object)} to avoid TOCTOU races.
120+
*
121+
* @param toolName Tool name to remove
122+
* @param expected The expected AgentTool instance (identity comparison)
123+
* @return true if the tool was removed, false if it was already replaced or absent
124+
*/
125+
boolean removeToolIfSame(String toolName, AgentTool expected) {
126+
boolean removed = tools.remove(toolName, expected);
127+
if (removed) {
128+
registeredTools.remove(toolName);
129+
}
130+
return removed;
131+
}
132+
117133
/**
118134
* Remove multiple tools by names.
119135
*

agentscope-core/src/main/java/io/agentscope/core/tool/Toolkit.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,21 @@ public void removeTool(String toolName) {
601601
toolRegistry.removeTool(toolName);
602602
}
603603

604+
/**
605+
* Atomically remove a tool only if the registered instance is the expected one.
606+
*
607+
* @param toolName Name of the tool to remove
608+
* @param expected The expected AgentTool instance (identity comparison)
609+
* @return true if the tool was removed, false if it was already replaced or absent
610+
*/
611+
public boolean removeToolIfSame(String toolName, AgentTool expected) {
612+
if (!config.isAllowToolDeletion()) {
613+
logger.warn("Tool deletion is disabled - ignoring removal of tool: {}", toolName);
614+
return false;
615+
}
616+
return toolRegistry.removeToolIfSame(toolName, expected);
617+
}
618+
604619
/**
605620
* Remove tool groups and all tools within them.
606621
*

agentscope-core/src/test/java/io/agentscope/core/agent/ReActAgentStructuredOutputTest.java

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,10 @@
3737
import java.util.List;
3838
import java.util.Map;
3939
import org.junit.jupiter.api.BeforeEach;
40+
import org.junit.jupiter.api.DisplayName;
41+
import org.junit.jupiter.api.RepeatedTest;
4042
import org.junit.jupiter.api.Test;
43+
import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
4144
import reactor.core.scheduler.Schedulers;
4245

4346
class ReActAgentStructuredOutputTest {
@@ -536,6 +539,27 @@ void testStructuredOutputPreservesThinkingBlock() {
536539

537540
@Test
538541
void testConcurrencyConflictStructuredOutput() {
542+
runSubscribeOnThenSequentialSecondCallStructuredOutputScenario(toolkit);
543+
}
544+
545+
/**
546+
* Reproduces the subscribeOn(elastic) vs delayed {@code doFinally} race: run many times until
547+
* failure or increase confidence after a fix.
548+
*/
549+
@EnabledIfSystemProperty(named = "agentscope.runStructuredOutputRaceTest", matches = "true")
550+
@RepeatedTest(25000)
551+
@DisplayName("Structured output race: 25000 repetitions (subscribeOn + immediate second call)")
552+
void testConcurrencyConflictStructuredOutput_repeated() {
553+
runSubscribeOnThenSequentialSecondCallStructuredOutputScenario(toolkit);
554+
}
555+
556+
/**
557+
* First call on {@link Schedulers#boundedElastic()}, second call immediately on the calling
558+
* thread — same agent and toolkit. Flaky when structured-output cleanup races the second
559+
* registration.
560+
*/
561+
private void runSubscribeOnThenSequentialSecondCallStructuredOutputScenario(
562+
Toolkit agentToolkit) {
539563
Memory memory = new InMemoryMemory();
540564
Map<String, Object> toolInput =
541565
Map.of(
@@ -587,7 +611,7 @@ void testConcurrencyConflictStructuredOutput() {
587611
.name("weather-agent")
588612
.sysPrompt("You are a weather assistant")
589613
.model(mockModel)
590-
.toolkit(toolkit)
614+
.toolkit(agentToolkit)
591615
.memory(memory)
592616
.build();
593617

@@ -604,7 +628,7 @@ void testConcurrencyConflictStructuredOutput() {
604628
Msg responseMsg =
605629
agent.call(inputMsg, WeatherResponse.class)
606630
.subscribeOn(Schedulers.boundedElastic())
607-
.block(Duration.ofMillis(TestConstants.DEFAULT_TEST_TIMEOUT_MS));
631+
.block(Duration.ofMillis(TestConstants.DEFAULT_TEST_TIMEOUT_MS * 10000));
608632

609633
Msg response2 =
610634
agent.call(inputMsg, WeatherResponse.class)

agentscope-core/src/test/java/io/agentscope/core/tool/ToolkitTest.java

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import static org.junit.jupiter.api.Assertions.assertEquals;
2020
import static org.junit.jupiter.api.Assertions.assertFalse;
2121
import static org.junit.jupiter.api.Assertions.assertNotNull;
22+
import static org.junit.jupiter.api.Assertions.assertSame;
2223
import static org.junit.jupiter.api.Assertions.assertThrows;
2324
import static org.junit.jupiter.api.Assertions.assertTrue;
2425
import static org.mockito.Mockito.mock;
@@ -241,6 +242,89 @@ void testRemoveToolWhenDeletionDisabled() {
241242
assertEquals(initialCount, toolkit.getToolNames().size(), "Tool count should not change");
242243
}
243244

245+
private static AgentTool namedAgentTool(String name) {
246+
return new AgentTool() {
247+
@Override
248+
public String getName() {
249+
return name;
250+
}
251+
252+
@Override
253+
public String getDescription() {
254+
return "test";
255+
}
256+
257+
@Override
258+
public Map<String, Object> getParameters() {
259+
return Map.of("type", "object");
260+
}
261+
262+
@Override
263+
public Mono<ToolResultBlock> callAsync(ToolCallParam param) {
264+
return Mono.empty();
265+
}
266+
};
267+
}
268+
269+
@Test
270+
@DisplayName("removeToolIfSame removes when registered instance matches")
271+
void removeToolIfSame_removesWhenInstanceMatches() {
272+
AgentTool tool = namedAgentTool("remove_if_same_a");
273+
toolkit.registerAgentTool(tool);
274+
assertTrue(toolkit.removeToolIfSame("remove_if_same_a", tool));
275+
assertEquals(null, toolkit.getTool("remove_if_same_a"));
276+
}
277+
278+
@Test
279+
@DisplayName("removeToolIfSame does not remove when a newer tool replaced the name")
280+
void removeToolIfSame_noOpWhenReplaced() {
281+
AgentTool first = namedAgentTool("remove_if_same_b");
282+
AgentTool second = namedAgentTool("remove_if_same_b");
283+
toolkit.registerAgentTool(first);
284+
toolkit.registerAgentTool(second);
285+
assertFalse(
286+
toolkit.removeToolIfSame("remove_if_same_b", first),
287+
"stale instance after replace must return false");
288+
assertSame(second, toolkit.getTool("remove_if_same_b"));
289+
assertTrue(toolkit.removeToolIfSame("remove_if_same_b", second));
290+
assertEquals(null, toolkit.getTool("remove_if_same_b"));
291+
}
292+
293+
@Test
294+
@DisplayName("removeToolIfSame returns false when tool name is absent")
295+
void removeToolIfSame_falseWhenAbsent() {
296+
AgentTool phantom = namedAgentTool("remove_if_same_missing");
297+
assertFalse(
298+
toolkit.removeToolIfSame("remove_if_same_missing", phantom),
299+
"no registration must return false");
300+
assertEquals(null, toolkit.getTool("remove_if_same_missing"));
301+
}
302+
303+
@Test
304+
@DisplayName("removeToolIfSame returns false when expected is not the registered instance")
305+
void removeToolIfSame_falseWhenExpectedNotRegisteredInstance() {
306+
AgentTool registered = namedAgentTool("remove_if_same_wrong_ref");
307+
AgentTool otherSameName = namedAgentTool("remove_if_same_wrong_ref");
308+
toolkit.registerAgentTool(registered);
309+
assertFalse(
310+
toolkit.removeToolIfSame("remove_if_same_wrong_ref", otherSameName),
311+
"non-matching instance must return false and leave registry unchanged");
312+
assertSame(registered, toolkit.getTool("remove_if_same_wrong_ref"));
313+
}
314+
315+
@Test
316+
@DisplayName("removeToolIfSame returns false when deletion is disabled")
317+
void removeToolIfSame_falseWhenDeletionDisabled() {
318+
ToolkitConfig config = ToolkitConfig.builder().allowToolDeletion(false).build();
319+
Toolkit tk = new Toolkit(config);
320+
AgentTool tool = namedAgentTool("remove_if_same_no_delete");
321+
tk.registerAgentTool(tool);
322+
assertFalse(
323+
tk.removeToolIfSame("remove_if_same_no_delete", tool),
324+
"allowToolDeletion=false must return false");
325+
assertSame(tool, tk.getTool("remove_if_same_no_delete"));
326+
}
327+
244328
@Test
245329
@DisplayName("Should ignore removeToolGroups when deletion is disabled")
246330
void testRemoveToolGroupsWhenDeletionDisabled() {

0 commit comments

Comments
 (0)