diff --git a/agents/s03_todo_write.py b/agents/s03_todo_write.py
index 9ca805c05..9722ffc59 100644
--- a/agents/s03_todo_write.py
+++ b/agents/s03_todo_write.py
@@ -186,7 +186,7 @@ def agent_loop(messages: list):
used_todo = True
rounds_since_todo = 0 if used_todo else rounds_since_todo + 1
if rounds_since_todo >= 3:
- results.insert(0, {"type": "text", "text": "Update your todos."})
+ results.append({"type": "text", "text": "Update your todos."})
messages.append({"role": "user", "content": results})
diff --git a/agents/s_full.py b/agents/s_full.py
index d4dcfd3c6..9aa916d96 100644
--- a/agents/s_full.py
+++ b/agents/s_full.py
@@ -698,7 +698,7 @@ def agent_loop(messages: list):
# s03: nag reminder (only when todo workflow is active)
rounds_without_todo = 0 if used_todo else rounds_without_todo + 1
if TODO.has_open_items() and rounds_without_todo >= 3:
- results.insert(0, {"type": "text", "text": "Update your todos."})
+ results.append({"type": "text", "text": "Update your todos."})
messages.append({"role": "user", "content": results})
# s06: manual compress
if manual_compress:
diff --git a/tests/test_tool_result_ordering.py b/tests/test_tool_result_ordering.py
new file mode 100644
index 000000000..c3d9e31f6
--- /dev/null
+++ b/tests/test_tool_result_ordering.py
@@ -0,0 +1,110 @@
+import os
+import sys
+import types
+import unittest
+from pathlib import Path
+from types import SimpleNamespace
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+if str(REPO_ROOT) not in sys.path:
+ sys.path.insert(0, str(REPO_ROOT))
+
+os.environ.setdefault("MODEL_ID", "test-model")
+
+fake_anthropic = types.ModuleType("anthropic")
+
+
+class FakeAnthropic:
+ def __init__(self, *args, **kwargs):
+ self.messages = SimpleNamespace(create=None)
+
+
+setattr(fake_anthropic, "Anthropic", FakeAnthropic)
+sys.modules.setdefault("anthropic", fake_anthropic)
+
+fake_dotenv = types.ModuleType("dotenv")
+setattr(fake_dotenv, "load_dotenv", lambda *args, **kwargs: None)
+sys.modules.setdefault("dotenv", fake_dotenv)
+
+import agents.s03_todo_write as s03_todo_write
+import agents.s_full as s_full
+
+
+class FakeMessagesAPI:
+ def __init__(self, responses):
+ self._responses = iter(responses)
+
+ def create(self, **kwargs):
+ return next(self._responses)
+
+
+def make_tool_use_response(tool_id: str, tool_name: str, tool_input: dict):
+ return SimpleNamespace(
+ stop_reason="tool_use",
+ content=[
+ SimpleNamespace(
+ type="tool_use", id=tool_id, name=tool_name, input=tool_input
+ )
+ ],
+ )
+
+
+class ToolResultOrderingTests(unittest.TestCase):
+ def test_s03_places_tool_results_before_reminders(self):
+ messages = [{"role": "user", "content": "do work"}]
+ fake_api = FakeMessagesAPI(
+ [
+ make_tool_use_response("tool-1", "bash", {"command": "pwd"}),
+ make_tool_use_response("tool-2", "bash", {"command": "pwd"}),
+ make_tool_use_response("tool-3", "bash", {"command": "pwd"}),
+ SimpleNamespace(stop_reason="end_turn", content="done"),
+ ]
+ )
+ original_client = s03_todo_write.client
+ original_handlers = s03_todo_write.TOOL_HANDLERS
+ try:
+ s03_todo_write.client = SimpleNamespace(messages=fake_api)
+ s03_todo_write.TOOL_HANDLERS = {
+ **original_handlers,
+ "bash": lambda **kwargs: "ok",
+ }
+ s03_todo_write.agent_loop(messages)
+ finally:
+ s03_todo_write.client = original_client
+ s03_todo_write.TOOL_HANDLERS = original_handlers
+
+ third_user_message = messages[-2]["content"]
+ self.assertEqual(third_user_message[0]["type"], "tool_result")
+ self.assertEqual(third_user_message[-1]["type"], "text")
+
+ def test_s_full_places_tool_results_before_reminders(self):
+ messages = [{"role": "user", "content": "do work"}]
+ fake_api = FakeMessagesAPI(
+ [
+ make_tool_use_response("tool-1", "bash", {"command": "pwd"}),
+ make_tool_use_response("tool-2", "bash", {"command": "pwd"}),
+ make_tool_use_response("tool-3", "bash", {"command": "pwd"}),
+ SimpleNamespace(stop_reason="end_turn", content="done"),
+ ]
+ )
+ original_client = s_full.client
+ original_handlers = s_full.TOOL_HANDLERS
+ original_has_open_items = s_full.TODO.has_open_items
+ try:
+ s_full.client = SimpleNamespace(messages=fake_api)
+ s_full.TOOL_HANDLERS = {**original_handlers, "bash": lambda **kwargs: "ok"}
+ s_full.TODO.has_open_items = lambda: True
+ s_full.agent_loop(messages)
+ finally:
+ s_full.client = original_client
+ s_full.TOOL_HANDLERS = original_handlers
+ s_full.TODO.has_open_items = original_has_open_items
+
+ third_user_message = messages[-2]["content"]
+ self.assertEqual(third_user_message[0]["type"], "tool_result")
+ self.assertEqual(third_user_message[-1]["type"], "text")
+
+
+if __name__ == "__main__":
+ unittest.main()