diff --git a/agents/s03_todo_write.py b/agents/s03_todo_write.py index 9ca805c05..9722ffc59 100644 --- a/agents/s03_todo_write.py +++ b/agents/s03_todo_write.py @@ -186,7 +186,7 @@ def agent_loop(messages: list): used_todo = True rounds_since_todo = 0 if used_todo else rounds_since_todo + 1 if rounds_since_todo >= 3: - results.insert(0, {"type": "text", "text": "Update your todos."}) + results.append({"type": "text", "text": "Update your todos."}) messages.append({"role": "user", "content": results}) diff --git a/agents/s_full.py b/agents/s_full.py index d4dcfd3c6..9aa916d96 100644 --- a/agents/s_full.py +++ b/agents/s_full.py @@ -698,7 +698,7 @@ def agent_loop(messages: list): # s03: nag reminder (only when todo workflow is active) rounds_without_todo = 0 if used_todo else rounds_without_todo + 1 if TODO.has_open_items() and rounds_without_todo >= 3: - results.insert(0, {"type": "text", "text": "Update your todos."}) + results.append({"type": "text", "text": "Update your todos."}) messages.append({"role": "user", "content": results}) # s06: manual compress if manual_compress: diff --git a/tests/test_tool_result_ordering.py b/tests/test_tool_result_ordering.py new file mode 100644 index 000000000..c3d9e31f6 --- /dev/null +++ b/tests/test_tool_result_ordering.py @@ -0,0 +1,110 @@ +import os +import sys +import types +import unittest +from pathlib import Path +from types import SimpleNamespace + + +REPO_ROOT = Path(__file__).resolve().parents[1] +if str(REPO_ROOT) not in sys.path: + sys.path.insert(0, str(REPO_ROOT)) + +os.environ.setdefault("MODEL_ID", "test-model") + +fake_anthropic = types.ModuleType("anthropic") + + +class FakeAnthropic: + def __init__(self, *args, **kwargs): + self.messages = SimpleNamespace(create=None) + + +setattr(fake_anthropic, "Anthropic", FakeAnthropic) +sys.modules.setdefault("anthropic", fake_anthropic) + +fake_dotenv = types.ModuleType("dotenv") +setattr(fake_dotenv, "load_dotenv", lambda *args, **kwargs: None) +sys.modules.setdefault("dotenv", fake_dotenv) + +import agents.s03_todo_write as s03_todo_write +import agents.s_full as s_full + + +class FakeMessagesAPI: + def __init__(self, responses): + self._responses = iter(responses) + + def create(self, **kwargs): + return next(self._responses) + + +def make_tool_use_response(tool_id: str, tool_name: str, tool_input: dict): + return SimpleNamespace( + stop_reason="tool_use", + content=[ + SimpleNamespace( + type="tool_use", id=tool_id, name=tool_name, input=tool_input + ) + ], + ) + + +class ToolResultOrderingTests(unittest.TestCase): + def test_s03_places_tool_results_before_reminders(self): + messages = [{"role": "user", "content": "do work"}] + fake_api = FakeMessagesAPI( + [ + make_tool_use_response("tool-1", "bash", {"command": "pwd"}), + make_tool_use_response("tool-2", "bash", {"command": "pwd"}), + make_tool_use_response("tool-3", "bash", {"command": "pwd"}), + SimpleNamespace(stop_reason="end_turn", content="done"), + ] + ) + original_client = s03_todo_write.client + original_handlers = s03_todo_write.TOOL_HANDLERS + try: + s03_todo_write.client = SimpleNamespace(messages=fake_api) + s03_todo_write.TOOL_HANDLERS = { + **original_handlers, + "bash": lambda **kwargs: "ok", + } + s03_todo_write.agent_loop(messages) + finally: + s03_todo_write.client = original_client + s03_todo_write.TOOL_HANDLERS = original_handlers + + third_user_message = messages[-2]["content"] + self.assertEqual(third_user_message[0]["type"], "tool_result") + self.assertEqual(third_user_message[-1]["type"], "text") + + def test_s_full_places_tool_results_before_reminders(self): + messages = [{"role": "user", "content": "do work"}] + fake_api = FakeMessagesAPI( + [ + make_tool_use_response("tool-1", "bash", {"command": "pwd"}), + make_tool_use_response("tool-2", "bash", {"command": "pwd"}), + make_tool_use_response("tool-3", "bash", {"command": "pwd"}), + SimpleNamespace(stop_reason="end_turn", content="done"), + ] + ) + original_client = s_full.client + original_handlers = s_full.TOOL_HANDLERS + original_has_open_items = s_full.TODO.has_open_items + try: + s_full.client = SimpleNamespace(messages=fake_api) + s_full.TOOL_HANDLERS = {**original_handlers, "bash": lambda **kwargs: "ok"} + s_full.TODO.has_open_items = lambda: True + s_full.agent_loop(messages) + finally: + s_full.client = original_client + s_full.TOOL_HANDLERS = original_handlers + s_full.TODO.has_open_items = original_has_open_items + + third_user_message = messages[-2]["content"] + self.assertEqual(third_user_message[0]["type"], "tool_result") + self.assertEqual(third_user_message[-1]["type"], "text") + + +if __name__ == "__main__": + unittest.main()