From 09a7f908b45a09520d540d5f04acb5dd4fcbbcb3 Mon Sep 17 00:00:00 2001 From: KiteEater <3305126471@qq.com> Date: Sun, 3 May 2026 22:07:30 +0800 Subject: [PATCH 1/3] fix: preload subagent skills in system prompt --- .../harness/deerflow/subagents/executor.py | 58 ++++++++++--------- .../deerflow/tools/builtins/task_tool.py | 5 +- backend/tests/test_subagent_executor.py | 57 ++++++++++++++++++ backend/tests/test_task_tool_core_logic.py | 4 +- 4 files changed, 92 insertions(+), 32 deletions(-) diff --git a/backend/packages/harness/deerflow/subagents/executor.py b/backend/packages/harness/deerflow/subagents/executor.py index b42cebacf4..92fa901560 100644 --- a/backend/packages/harness/deerflow/subagents/executor.py +++ b/backend/packages/harness/deerflow/subagents/executor.py @@ -13,7 +13,7 @@ from langchain.agents import create_agent from langchain.tools import BaseTool -from langchain_core.messages import AIMessage, HumanMessage, SystemMessage +from langchain_core.messages import AIMessage, HumanMessage from langchain_core.runnables import RunnableConfig from deerflow.agents.thread_state import SandboxState, ThreadDataState, ThreadState @@ -166,7 +166,7 @@ def __init__( logger.info(f"[trace={self.trace_id}] SubagentExecutor initialized: {config.name} with {len(self.tools)} tools") - def _create_agent(self): + def _create_agent(self, system_prompt: str | None = None): """Create the agent instance.""" model_name = _get_model_name(self.config, self.parent_model) model = create_chat_model(name=model_name, thinking_enabled=False) @@ -180,27 +180,24 @@ def _create_agent(self): model=model, tools=self.tools, middleware=middlewares, - system_prompt=self.config.system_prompt, + system_prompt=system_prompt or self.config.system_prompt, state_schema=ThreadState, ) - async def _load_skill_messages(self) -> list[SystemMessage]: - """Load skill content as conversation items based on config.skills. + async def _load_skill_prompt(self) -> str: + """Load skill content into a single prompt block based on config.skills. - Aligned with Codex's pattern: each subagent loads its own skills - per-session and injects them as conversation items (developer messages), - not as system prompt text. The config.skills whitelist controls which - skills are loaded: + The config.skills whitelist controls which skills are loaded: - None: load all enabled skills - []: no skills - ["skill-a", "skill-b"]: only these skills Returns: - List of SystemMessages containing skill content. + System-prompt text containing preloaded skill content. """ if self.config.skills is not None and len(self.config.skills) == 0: logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} skills=[] — skipping skill loading") - return [] + return "" try: from deerflow.skills.loader import load_skills @@ -210,11 +207,11 @@ async def _load_skill_messages(self) -> list[SystemMessage]: logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} loaded {len(all_skills)} enabled skills from disk") except Exception: logger.warning(f"[trace={self.trace_id}] Failed to load skills for subagent {self.config.name}", exc_info=True) - return [] + return "" if not all_skills: logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} no enabled skills found") - return [] + return "" # Filter by config.skills whitelist if self.config.skills is not None: @@ -224,21 +221,34 @@ async def _load_skill_messages(self) -> list[SystemMessage]: skills = all_skills if not skills: - return [] + return "" - # Read each skill's SKILL.md content and create conversation items - messages = [] + # Read each skill's SKILL.md content and merge it into the single + # system prompt. Do not add SystemMessages to state: OpenAI-compatible + # providers require system messages to appear only at the beginning. + skill_blocks = [] for skill in skills: try: content = await asyncio.to_thread(skill.skill_file.read_text, encoding="utf-8") content = content.strip() if content: - messages.append(SystemMessage(content=f'\n{content}\n')) + skill_blocks.append(f'\n{content}\n') logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} loaded skill: {skill.name}") except Exception: logger.debug(f"[trace={self.trace_id}] Failed to read skill {skill.name}", exc_info=True) - return messages + if not skill_blocks: + return "" + + joined_skill_blocks = "\n\n".join(skill_blocks) + return f"\nThe following skills are preloaded. Follow them when relevant.\n\n{joined_skill_blocks}\n" + + def _build_system_prompt(self, skill_prompt: str) -> str: + """Build the per-run subagent system prompt.""" + if not skill_prompt: + return self.config.system_prompt + base_prompt = self.config.system_prompt.rstrip() + return f"{base_prompt}\n\n{skill_prompt}" if base_prompt else skill_prompt async def _build_initial_state(self, task: str) -> dict[str, Any]: """Build the initial state for agent execution. @@ -249,14 +259,7 @@ async def _build_initial_state(self, task: str) -> dict[str, Any]: Returns: Initial state dictionary. """ - # Load skills as conversation items (Codex pattern) - skill_messages = await self._load_skill_messages() - - messages: list = [] - # Skill content injected as developer/system messages before the task - messages.extend(skill_messages) - # Then the actual task - messages.append(HumanMessage(content=task)) + messages = [HumanMessage(content=task)] state: dict[str, Any] = { "messages": messages, @@ -294,7 +297,8 @@ async def _aexecute(self, task: str, result_holder: SubagentResult | None = None ) try: - agent = self._create_agent() + skill_prompt = await self._load_skill_prompt() + agent = self._create_agent(system_prompt=self._build_system_prompt(skill_prompt)) state = await self._build_initial_state(task) # Build config with thread_id for sandbox access and recursion limit diff --git a/backend/packages/harness/deerflow/tools/builtins/task_tool.py b/backend/packages/harness/deerflow/tools/builtins/task_tool.py index 59613272cc..385ad035c8 100644 --- a/backend/packages/harness/deerflow/tools/builtins/task_tool.py +++ b/backend/packages/harness/deerflow/tools/builtins/task_tool.py @@ -87,9 +87,8 @@ async def task_tool( # Build config overrides overrides: dict = {} - # Skills are loaded by SubagentExecutor per-session (aligned with Codex's pattern: - # each subagent loads its own skills based on config, injected as conversation items). - # No longer appended to system_prompt here. + # Skills are preloaded by SubagentExecutor per-session and merged into the + # subagent system prompt. They are not appended to the task config here. if max_turns is not None: overrides["max_turns"] = max_turns diff --git a/backend/tests/test_subagent_executor.py b/backend/tests/test_subagent_executor.py index a6a62c2b65..f443571935 100644 --- a/backend/tests/test_subagent_executor.py +++ b/backend/tests/test_subagent_executor.py @@ -17,6 +17,7 @@ import sys import threading from datetime import datetime +from types import ModuleType, SimpleNamespace from unittest.mock import MagicMock, patch import pytest @@ -1040,3 +1041,59 @@ def test_cleanup_removes_cancelled_task(self, executor_module, classes): executor_module.cleanup_background_task(task_id) assert task_id not in executor_module._background_tasks + + +# ----------------------------------------------------------------------------- +# Skill Preload Tests +# ----------------------------------------------------------------------------- + + +class TestSkillPreload: + @pytest.mark.anyio + async def test_preloaded_skills_are_sent_in_the_leading_system_message(self, classes, base_config, monkeypatch): + """Preloaded skills must reach the model without creating later SystemMessages.""" + from langchain_core.language_models.fake_chat_models import FakeListChatModel + from langchain_core.messages import AIMessage, HumanMessage, SystemMessage + from langchain_core.outputs import ChatGeneration, ChatResult + + SubagentExecutor = classes["SubagentExecutor"] + captured_messages = [] + + class CapturingChatModel(FakeListChatModel): + def _generate(self, messages, stop=None, run_manager=None, **kwargs): + captured_messages.append(messages) + return ChatResult(generations=[ChatGeneration(message=AIMessage(content="done"))]) + + skill = SimpleNamespace( + name="demo-skill", + description="Demo workflow", + skill_file=SimpleNamespace(read_text=MagicMock(return_value="Demo skill instructions")), + ) + config = classes["SubagentConfig"]( + name=base_config.name, + description=base_config.description, + system_prompt=base_config.system_prompt, + skills=["demo-skill"], + max_turns=base_config.max_turns, + timeout_seconds=base_config.timeout_seconds, + ) + executor = SubagentExecutor(config=config, tools=[], thread_id="test-thread") + + middleware_module = ModuleType("deerflow.agents.middlewares.tool_error_handling_middleware") + middleware_module.build_subagent_runtime_middlewares = lambda *, lazy_init=True: [] + monkeypatch.setitem(sys.modules, "deerflow.agents.middlewares.tool_error_handling_middleware", middleware_module) + monkeypatch.setattr("deerflow.subagents.executor.ThreadState", None) + monkeypatch.setattr("deerflow.subagents.executor.create_chat_model", lambda **kwargs: CapturingChatModel(responses=["done"])) + + with patch("deerflow.skills.loader.load_skills", return_value=[skill]): + result = await executor._aexecute("Do the task") + + assert result.result == "done" + assert captured_messages + messages = captured_messages[0] + system_indexes = [index for index, message in enumerate(messages) if isinstance(message, SystemMessage)] + assert system_indexes == [0] + assert base_config.system_prompt in messages[0].content + assert '' in messages[0].content + assert "Demo skill instructions" in messages[0].content + assert [message.content for message in messages if isinstance(message, HumanMessage)] == ["Do the task"] diff --git a/backend/tests/test_task_tool_core_logic.py b/backend/tests/test_task_tool_core_logic.py index 1ae008df2c..6b1ba693a8 100644 --- a/backend/tests/test_task_tool_core_logic.py +++ b/backend/tests/test_task_tool_core_logic.py @@ -165,8 +165,8 @@ def execute_async(self, prompt, task_id=None): assert captured["executor_kwargs"]["thread_id"] == "thread-1" assert captured["executor_kwargs"]["parent_model"] == "ark-model" assert captured["executor_kwargs"]["config"].max_turns == 7 - # Skills are no longer appended to system_prompt; they are loaded per-session - # by SubagentExecutor and injected as conversation items (Codex pattern). + # Skills are loaded per-session by SubagentExecutor and merged into the + # runtime system prompt, not appended to the stored config here. assert captured["executor_kwargs"]["config"].system_prompt == "Base system prompt" get_available_tools.assert_called_once_with(model_name="ark-model", groups=None, subagent_enabled=False) From d417a9db72367ace62a6e80a41ddcbbf94e77304 Mon Sep 17 00:00:00 2001 From: KiteEater <3305126471@qq.com> Date: Sun, 3 May 2026 23:30:14 +0800 Subject: [PATCH 2/3] Fix subagent skill preload message ordering --- backend/tests/test_subagent_executor.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/backend/tests/test_subagent_executor.py b/backend/tests/test_subagent_executor.py index 7c3e4c938a..c8ceaf26af 100644 --- a/backend/tests/test_subagent_executor.py +++ b/backend/tests/test_subagent_executor.py @@ -267,7 +267,7 @@ def fake_create_agent(**kwargs): assert captured["agent"]["system_prompt"] == base_config.system_prompt @pytest.mark.anyio - async def test_load_skill_messages_uses_explicit_app_config_for_skill_storage( + async def test_load_skill_prompt_uses_explicit_app_config_for_skill_storage( self, classes, base_config, @@ -297,11 +297,11 @@ def fake_get_or_new_skill_storage(*, app_config=None): thread_id="test-thread", ) - messages = await executor._load_skill_messages() + skill_prompt = await executor._load_skill_prompt() assert captured["app_config"] is app_config - assert len(messages) == 1 - assert "Use demo skill" in messages[0].content + assert '' in skill_prompt + assert "Use demo skill" in skill_prompt # ----------------------------------------------------------------------------- @@ -1342,12 +1342,17 @@ def _generate(self, messages, stop=None, run_manager=None, **kwargs): executor = SubagentExecutor(config=config, tools=[], thread_id="test-thread") middleware_module = ModuleType("deerflow.agents.middlewares.tool_error_handling_middleware") - middleware_module.build_subagent_runtime_middlewares = lambda *, lazy_init=True: [] + middleware_module.build_subagent_runtime_middlewares = lambda **kwargs: [] monkeypatch.setitem(sys.modules, "deerflow.agents.middlewares.tool_error_handling_middleware", middleware_module) monkeypatch.setattr("deerflow.subagents.executor.ThreadState", None) monkeypatch.setattr("deerflow.subagents.executor.create_chat_model", lambda **kwargs: CapturingChatModel(responses=["done"])) - with patch("deerflow.skills.loader.load_skills", return_value=[skill]): + def fake_get_or_new_skill_storage(*, app_config=None): + return SimpleNamespace(load_skills=lambda *, enabled_only: [skill]) + + monkeypatch.setattr("deerflow.skills.storage.get_or_new_skill_storage", fake_get_or_new_skill_storage) + + with patch.object(skill.skill_file, "read_text", return_value="Demo skill instructions"): result = await executor._aexecute("Do the task") assert result.result == "done" From cb9d294fa01ef5b59c61d52f09449d3d2a661ff8 Mon Sep 17 00:00:00 2001 From: KiteEater <3305126471@qq.com> Date: Sun, 3 May 2026 23:40:52 +0800 Subject: [PATCH 3/3] Refine subagent prompt handling and preload tests --- .../packages/harness/deerflow/subagents/executor.py | 2 +- backend/tests/test_subagent_executor.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/backend/packages/harness/deerflow/subagents/executor.py b/backend/packages/harness/deerflow/subagents/executor.py index a8b0963953..9f26340f3c 100644 --- a/backend/packages/harness/deerflow/subagents/executor.py +++ b/backend/packages/harness/deerflow/subagents/executor.py @@ -285,7 +285,7 @@ def _create_agent(self, system_prompt: str | None = None): model=model, tools=self.tools, middleware=middlewares, - system_prompt=system_prompt or self.config.system_prompt, + system_prompt=self.config.system_prompt if system_prompt is None else system_prompt, state_schema=ThreadState, ) diff --git a/backend/tests/test_subagent_executor.py b/backend/tests/test_subagent_executor.py index c8ceaf26af..c8f25e31e1 100644 --- a/backend/tests/test_subagent_executor.py +++ b/backend/tests/test_subagent_executor.py @@ -1311,6 +1311,19 @@ def test_cleanup_removes_cancelled_task(self, executor_module, classes): class TestSkillPreload: + @pytest.mark.anyio + async def test_build_initial_state_keeps_only_task_human_message(self, classes, base_config): + """Initial state should not inject skill content as extra system messages.""" + from langchain_core.messages import HumanMessage + + SubagentExecutor = classes["SubagentExecutor"] + executor = SubagentExecutor(config=base_config, tools=[], thread_id="test-thread") + + state = await executor._build_initial_state("Do the task") + + assert [type(message) for message in state["messages"]] == [HumanMessage] + assert [message.content for message in state["messages"]] == ["Do the task"] + @pytest.mark.anyio async def test_preloaded_skills_are_sent_in_the_leading_system_message(self, classes, base_config, monkeypatch): """Preloaded skills must reach the model without creating later SystemMessages."""