From 5dccdc5690edc959e0e10a293403388dcecf887f Mon Sep 17 00:00:00 2001 From: fang-tech Date: Sat, 28 Mar 2026 19:45:08 +0800 Subject: [PATCH 1/6] feat(skill): add code execution prompt injection and codeExecutionInstruction option - AgentSkillPromptProvider: add DEFAULT_CODE_EXECUTION_INSTRUCTION template and conditional append logic when code execution is enabled with an upload dir - AgentSkillPromptProvider: expose setCodeExecutionEnable/setUploadDir/setCodeExecutionInstruction - SkillBox.CodeExecutionBuilder: add codeExecutionInstruction() builder method - SkillBox: wire prompt provider on enable() and after uploadSkillFiles() - SkillBox: remove deprecated no-arg constructor Co-Authored-By: Claude Sonnet 4.6 (1M context) --- .../core/skill/AgentSkillPromptProvider.java | 75 ++++++++- .../io/agentscope/core/skill/SkillBox.java | 37 +++-- .../skill/util/JarSkillRepositoryAdapter.java | 50 ------ .../skill/AgentSkillPromptProviderTest.java | 134 ++++++++++++++++ .../agentscope/core/skill/SkillBoxTest.java | 145 ++++++++++++++++-- docs/_toc.yml | 2 +- docs/en/task/agent-skill.md | 76 +++++++-- docs/zh/task/agent-skill.md | 76 +++++++-- 8 files changed, 482 insertions(+), 113 deletions(-) delete mode 100644 agentscope-core/src/main/java/io/agentscope/core/skill/util/JarSkillRepositoryAdapter.java diff --git a/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java b/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java index b762c6bc8..0f6f56f26 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java +++ b/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java @@ -15,6 +15,8 @@ */ package io.agentscope.core.skill; +import java.nio.file.Path; + /** * Generates skill system prompts for agents to understand available skills. * @@ -31,6 +33,9 @@ public class AgentSkillPromptProvider { private final SkillRegistry skillRegistry; private final String instruction; private final String template; + private boolean codeExecutionEable; + private String uploadDir; + private String codeExecutionInstruction; public static final String DEFAULT_AGENT_SKILL_INSTRUCTION = """ @@ -44,11 +49,10 @@ public class AgentSkillPromptProvider { - The skill will be activated and its documentation loaded with detailed instructions - Additional resources (scripts, assets, references) can be loaded using the same tool with different paths - Path Information: - When you load a skill, the response will include: - - Exact paths to all skill resources - - Code examples for accessing skill files - - Usage instructions specific to that skill + Example: + 1. User asks to analyze data → find a matching skill below (e.g. data-analysis_builtin) + 2. Load it: load_skill_through_path(skillId="data-analysis_builtin", path="SKILL.md") + 3. Follow the instructions returned by the skill Template fields explanation: - : The skill's display name @@ -60,6 +64,42 @@ public class AgentSkillPromptProvider { """; + // First %s = uploadDir absolute path (description), Second %s = uploadDir absolute path + // (example paths) + public static final String DEFAULT_CODE_EXECUTION_INSTRUCTION = + """ + + ## Code Execution + + + You have access to the execute_shell_command tool. When a task can be accomplished by running\s + a pre-deployed skill script, you MUST execute it yourself using execute_shell_command rather\s + than describing or suggesting commands to the user. + + Skills root directory: %s + Each skill's files are located under a subdirectory named by its : + %s//scripts/ + %s//assets/ + + Workflow: + 1. After loading a skill, use ls to explore its directory structure and discover available scripts/assets + 2. Once you find the right script, execute it immediately with its absolute path + 3. If execution fails, diagnose and retry — do not fall back to describing the command + + Rules: + - Always use absolute paths when executing scripts + - If a script exists for the task, run it directly — do not rewrite its logic inline + - If asset/data files exist for the task, read them directly — do not recreate them + + Example: + # Explore what scripts are available for a skill + execute_shell_command(command="ls %s/data-analysis_builtin/scripts/") + + # Run an existing Python script with absolute path + execute_shell_command(command="python3 %s/data-analysis_builtin/scripts/analyze.py") + + """; + // skillName, skillDescription, skillId public static final String DEFAULT_AGENT_SKILL_TEMPLATE = """ @@ -126,6 +166,31 @@ public String getSkillSystemPrompt() { // Close available_skills tag sb.append(""); + // Conditionally append code execution instructions + if (codeExecutionEable && uploadDir != null) { + String template = + codeExecutionInstruction != null + ? codeExecutionInstruction + : DEFAULT_CODE_EXECUTION_INSTRUCTION; + sb.append( + String.format(template, uploadDir, uploadDir, uploadDir, uploadDir, uploadDir)); + } + return sb.toString(); } + + public void setCodeExecutionEnable(boolean codeExecutionEable) { + this.codeExecutionEable = codeExecutionEable; + } + + public void setUploadDir(Path uploadDir) { + this.uploadDir = uploadDir != null ? uploadDir.toAbsolutePath().toString() : null; + } + + public void setCodeExecutionInstruction(String codeExecutionInstruction) { + this.codeExecutionInstruction = + codeExecutionInstruction == null || codeExecutionInstruction.isBlank() + ? null + : codeExecutionInstruction; + } } diff --git a/agentscope-core/src/main/java/io/agentscope/core/skill/SkillBox.java b/agentscope-core/src/main/java/io/agentscope/core/skill/SkillBox.java index 9eaabbb16..9dcbeaa06 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/skill/SkillBox.java +++ b/agentscope-core/src/main/java/io/agentscope/core/skill/SkillBox.java @@ -54,18 +54,6 @@ public class SkillBox implements StateModule { private SkillFileFilter fileFilter; private boolean autoUploadSkill = true; - /** - * Creates a SkillBox without a toolkit. - * - *

This constructor will be removed in the next release. A SkillBox must hold a - * {@link Toolkit} to operate correctly. Relying on automatic toolkit assignment makes - * behavior less explicit and harder to reason about. - */ - @Deprecated - public SkillBox() { - this(null, null, null); - } - public SkillBox(Toolkit toolkit) { this(toolkit, null, null); } @@ -717,6 +705,7 @@ private Path ensureUploadDirExists() { } } + skillPromptProvider.setUploadDir(uploadDir); return uploadDir; } @@ -855,6 +844,7 @@ public static class CodeExecutionBuilder { private boolean withShellCalled = false; private boolean enableRead = false; private boolean enableWrite = false; + private String codeExecutionInstruction; CodeExecutionBuilder(SkillBox skillBox) { this.skillBox = skillBox; @@ -993,6 +983,23 @@ public CodeExecutionBuilder withWrite() { return this; } + /** + * Set a custom code execution instruction for the system prompt. + * + *

The instruction is appended to the skill system prompt when code execution is enabled. + * Use {@code %s} as a placeholder for the upload directory absolute path — it will be + * substituted up to 5 times (for the description line and example paths). + * + *

Pass {@code null} or blank to use the default instruction. + * + * @param instruction Custom code execution instruction template + * @return This builder for chaining + */ + public CodeExecutionBuilder codeExecutionInstruction(String instruction) { + this.codeExecutionInstruction = instruction; + return this; + } + /** * Apply the configuration and enable code execution. * @@ -1017,8 +1024,7 @@ public void enable() { } // Handle replacement: remove existing tool group if present - if (skillBox.toolkit != null - && skillBox.toolkit.getToolGroup("skill_code_execution_tool_group") != null) { + if (skillBox.toolkit.getToolGroup("skill_code_execution_tool_group") != null) { skillBox.toolkit.removeToolGroups(List.of("skill_code_execution_tool_group")); logger.info("Replacing existing code execution configuration"); } @@ -1107,6 +1113,9 @@ public void enable() { shellEnabled, enableRead, enableWrite); + + skillBox.skillPromptProvider.setCodeExecutionEnable(true); + skillBox.skillPromptProvider.setCodeExecutionInstruction(codeExecutionInstruction); } /** diff --git a/agentscope-core/src/main/java/io/agentscope/core/skill/util/JarSkillRepositoryAdapter.java b/agentscope-core/src/main/java/io/agentscope/core/skill/util/JarSkillRepositoryAdapter.java deleted file mode 100644 index f3cd2eb08..000000000 --- a/agentscope-core/src/main/java/io/agentscope/core/skill/util/JarSkillRepositoryAdapter.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2024-2026 the original author or authors. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.agentscope.core.skill.util; - -import io.agentscope.core.skill.repository.ClasspathSkillRepository; -import java.io.IOException; - -/** - * @deprecated Use {@link ClasspathSkillRepository} instead. - */ -@Deprecated -public class JarSkillRepositoryAdapter extends ClasspathSkillRepository { - - /** - * Creates an adapter for loading skills from resources. - * - * @param resourcePath The path to the skill under resources, e.g., "writing-skills" - * @throws IOException if initialization fails - */ - @Deprecated - public JarSkillRepositoryAdapter(String resourcePath) throws IOException { - super(resourcePath); - } - - /** - * Creates an adapter for loading skills from resources using a specific ClassLoader. - * - * @param resourcePath The path to the skill under resources, e.g., "writing-skills" - * @param classLoader The ClassLoader to use for loading resources - * @throws IOException if initialization fails - */ - @Deprecated - protected JarSkillRepositoryAdapter(String resourcePath, ClassLoader classLoader) - throws IOException { - super(resourcePath, classLoader); - } -} diff --git a/agentscope-core/src/test/java/io/agentscope/core/skill/AgentSkillPromptProviderTest.java b/agentscope-core/src/test/java/io/agentscope/core/skill/AgentSkillPromptProviderTest.java index 437ea880c..058747e8e 100644 --- a/agentscope-core/src/test/java/io/agentscope/core/skill/AgentSkillPromptProviderTest.java +++ b/agentscope-core/src/test/java/io/agentscope/core/skill/AgentSkillPromptProviderTest.java @@ -17,11 +17,14 @@ package io.agentscope.core.skill; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.nio.file.Path; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; class AgentSkillPromptProviderTest { @@ -118,4 +121,135 @@ void testSpecialCharactersInDescription() { assertTrue(prompt.contains("Description with \"quotes\" and 'apostrophes'")); } + + @Test + @DisplayName("Should not include code execution section when not enabled") + void testNoCodeExecutionSectionByDefault() { + AgentSkill skill = new AgentSkill("test_skill", "Test Skill", "# Content", null); + RegisteredSkill registered = new RegisteredSkill("test_skill_custom"); + skillRegistry.registerSkill("test_skill_custom", skill, registered); + + String prompt = provider.getSkillSystemPrompt(); + + assertFalse(prompt.contains("## Code Execution")); + assertFalse(prompt.contains("")); + } + + @Test + @DisplayName("Should not include code execution section when enabled but uploadDir not set") + void testNoCodeExecutionSectionWhenEnabledButNoUploadDir() { + AgentSkill skill = new AgentSkill("test_skill", "Test Skill", "# Content", null); + RegisteredSkill registered = new RegisteredSkill("test_skill_custom"); + skillRegistry.registerSkill("test_skill_custom", skill, registered); + + provider.setCodeExecutionEnable(true); + // uploadDir not set + + String prompt = provider.getSkillSystemPrompt(); + + assertFalse(prompt.contains("## Code Execution")); + assertFalse(prompt.contains("")); + } + + @Test + @DisplayName("Should include code execution section with uploadDir when enabled") + void testCodeExecutionSectionIncludedWhenEnabled(@TempDir Path tempDir) { + AgentSkill skill = new AgentSkill("test_skill", "Test Skill", "# Content", null); + RegisteredSkill registered = new RegisteredSkill("test_skill_custom"); + skillRegistry.registerSkill("test_skill_custom", skill, registered); + + provider.setCodeExecutionEnable(true); + provider.setUploadDir(tempDir); + + String prompt = provider.getSkillSystemPrompt(); + + assertTrue(prompt.contains("## Code Execution")); + assertTrue(prompt.contains("")); + assertTrue(prompt.contains("")); + assertTrue(prompt.contains(tempDir.toAbsolutePath().toString())); + } + + @Test + @DisplayName("Should include skill-id based path pattern in code execution section") + void testCodeExecutionSectionContainsSkillIdPattern(@TempDir Path tempDir) { + AgentSkill skill = new AgentSkill("test_skill", "Test Skill", "# Content", null); + RegisteredSkill registered = new RegisteredSkill("test_skill_custom"); + skillRegistry.registerSkill("test_skill_custom", skill, registered); + + provider.setCodeExecutionEnable(true); + provider.setUploadDir(tempDir); + + String prompt = provider.getSkillSystemPrompt(); + String uploadDirStr = tempDir.toAbsolutePath().toString(); + + assertTrue(prompt.contains(uploadDirStr + "//scripts/")); + assertTrue(prompt.contains(uploadDirStr + "//assets/")); + } + + @Test + @DisplayName("Should include absolute path instruction in code execution section") + void testCodeExecutionSectionMentionsAbsolutePaths(@TempDir Path tempDir) { + AgentSkill skill = new AgentSkill("test_skill", "Test Skill", "# Content", null); + RegisteredSkill registered = new RegisteredSkill("test_skill_custom"); + skillRegistry.registerSkill("test_skill_custom", skill, registered); + + provider.setCodeExecutionEnable(true); + provider.setUploadDir(tempDir); + + String prompt = provider.getSkillSystemPrompt(); + + assertTrue(prompt.contains("absolute paths")); + assertTrue(prompt.contains("existing scripts")); + } + + @Test + @DisplayName("Code execution section should appear after ") + void testCodeExecutionSectionAppearsAfterAvailableSkills(@TempDir Path tempDir) { + AgentSkill skill = new AgentSkill("test_skill", "Test Skill", "# Content", null); + RegisteredSkill registered = new RegisteredSkill("test_skill_custom"); + skillRegistry.registerSkill("test_skill_custom", skill, registered); + + provider.setCodeExecutionEnable(true); + provider.setUploadDir(tempDir); + + String prompt = provider.getSkillSystemPrompt(); + + int availableSkillsEnd = prompt.indexOf(""); + int codeExecutionStart = prompt.indexOf("## Code Execution"); + assertTrue(availableSkillsEnd < codeExecutionStart); + } + + @Test + @DisplayName("Should use custom code execution instruction when set") + void testCustomCodeExecutionInstruction(@TempDir Path tempDir) { + AgentSkill skill = new AgentSkill("test_skill", "Test Skill", "# Content", null); + RegisteredSkill registered = new RegisteredSkill("test_skill_custom"); + skillRegistry.registerSkill("test_skill_custom", skill, registered); + + provider.setCodeExecutionEnable(true); + provider.setUploadDir(tempDir); + provider.setCodeExecutionInstruction("## Custom Section\nSkills dir: %s"); + + String prompt = provider.getSkillSystemPrompt(); + + assertTrue(prompt.contains("## Custom Section")); + assertTrue(prompt.contains("Skills dir: " + tempDir.toAbsolutePath())); + assertFalse(prompt.contains("## Code Execution")); + } + + @Test + @DisplayName("Should fall back to default instruction when null is set") + void testNullCodeExecutionInstructionUsesDefault(@TempDir Path tempDir) { + AgentSkill skill = new AgentSkill("test_skill", "Test Skill", "# Content", null); + RegisteredSkill registered = new RegisteredSkill("test_skill_custom"); + skillRegistry.registerSkill("test_skill_custom", skill, registered); + + provider.setCodeExecutionEnable(true); + provider.setUploadDir(tempDir); + provider.setCodeExecutionInstruction(null); + + String prompt = provider.getSkillSystemPrompt(); + + assertTrue(prompt.contains("## Code Execution")); + } } diff --git a/agentscope-core/src/test/java/io/agentscope/core/skill/SkillBoxTest.java b/agentscope-core/src/test/java/io/agentscope/core/skill/SkillBoxTest.java index e3f5a13db..c7e76f07e 100644 --- a/agentscope-core/src/test/java/io/agentscope/core/skill/SkillBoxTest.java +++ b/agentscope-core/src/test/java/io/agentscope/core/skill/SkillBoxTest.java @@ -407,19 +407,6 @@ void testEnableCodeExecutionWithExistingDir() throws IOException { assertEquals(0, Files.list(skillBox.getCodeExecutionWorkDir()).count()); } - @Test - @DisplayName("Should throw exception when enabling code execution without toolkit") - void testEnableCodeExecutionWithoutToolkit() { - SkillBox skillBoxWithoutToolkit = new SkillBox(); - - IllegalStateException exception = - assertThrows( - IllegalStateException.class, - () -> skillBoxWithoutToolkit.codeExecution().withShell().enable()); - assertEquals( - "Must bind toolkit before enabling code execution", exception.getMessage()); - } - @Test @DisplayName("Should upload skill files to upload directory organized by skill ID") void testUploadSkillFilesToUploadDir() throws IOException { @@ -950,6 +937,138 @@ private boolean isErrorResult(ToolResultBlock result) { .anyMatch(text -> text != null && text.startsWith("Error:")); } + @Nested + @DisplayName("Skill Prompt Code Execution Integration Tests") + class SkillPromptCodeExecutionTest { + + @TempDir Path tempDir; + + @Test + @DisplayName("Should not include code execution section in prompt before enabling") + void testPromptHasNoCodeExecutionSectionBeforeEnable() { + AgentSkill skill = new AgentSkill("data-analysis", "Analyze data", "# Content", null); + skillBox.registerSkill(skill); + + String prompt = skillBox.getSkillPrompt(); + + assertFalse(prompt.contains("## Code Execution")); + assertFalse(prompt.contains("")); + } + + @Test + @DisplayName("Should include code execution section with uploadDir after enable and upload") + void testPromptIncludesUploadDirAfterEnableAndUpload() { + AgentSkill skill = + new AgentSkill( + "data-analysis", + "Analyze data", + "# Content", + Map.of("scripts/analyze.py", "print('hello')")); + skillBox.registerSkill(skill); + + skillBox.codeExecution().workDir(tempDir.toString()).withShell().enable(); + skillBox.uploadSkillFiles(); + + String prompt = skillBox.getSkillPrompt(); + String expectedUploadDir = tempDir.resolve("skills").toAbsolutePath().toString(); + + assertTrue(prompt.contains("## Code Execution")); + assertTrue(prompt.contains("")); + assertTrue(prompt.contains(expectedUploadDir)); + } + + @Test + @DisplayName("Should show skill-id based subdirectory pattern in code execution section") + void testPromptShowsSkillIdSubdirectoryPattern() { + AgentSkill skill = + new AgentSkill( + "data-analysis", + "Analyze data", + "# Content", + Map.of("scripts/analyze.py", "print('hello')")); + skillBox.registerSkill(skill); + + skillBox.codeExecution().workDir(tempDir.toString()).withShell().enable(); + skillBox.uploadSkillFiles(); + + String prompt = skillBox.getSkillPrompt(); + String uploadDir = tempDir.resolve("skills").toAbsolutePath().toString(); + + assertTrue(prompt.contains(uploadDir + "//scripts/")); + assertTrue(prompt.contains(uploadDir + "//assets/")); + } + + @Test + @DisplayName("Should instruct LLM to use existing scripts and absolute paths") + void testPromptInstructsAbsolutePathsAndExistingScripts() { + AgentSkill skill = + new AgentSkill( + "data-analysis", + "Analyze data", + "# Content", + Map.of("scripts/analyze.py", "print('hello')")); + skillBox.registerSkill(skill); + + skillBox.codeExecution().workDir(tempDir.toString()).withShell().enable(); + skillBox.uploadSkillFiles(); + + String prompt = skillBox.getSkillPrompt(); + + assertTrue(prompt.contains("absolute paths")); + assertTrue(prompt.contains("existing scripts")); + } + + @Test + @DisplayName("Should use custom code execution instruction via builder") + void testCustomCodeExecutionInstructionViaBuilder() { + AgentSkill skill = + new AgentSkill( + "data-analysis", + "Analyze data", + "# Content", + Map.of("scripts/analyze.py", "print('hello')")); + skillBox.registerSkill(skill); + + String customInstruction = "## My Custom Section\nRoot: %s"; + skillBox.codeExecution() + .workDir(tempDir.toString()) + .codeExecutionInstruction(customInstruction) + .withShell() + .enable(); + skillBox.uploadSkillFiles(); + + String prompt = skillBox.getSkillPrompt(); + String expectedUploadDir = tempDir.resolve("skills").toAbsolutePath().toString(); + + assertTrue(prompt.contains("## My Custom Section")); + assertTrue(prompt.contains("Root: " + expectedUploadDir)); + assertFalse(prompt.contains("## Code Execution")); + } + + @Test + @DisplayName("Code execution section should appear after in prompt") + void testCodeExecutionSectionOrderInPrompt() { + AgentSkill skill = + new AgentSkill( + "data-analysis", + "Analyze data", + "# Content", + Map.of("scripts/analyze.py", "print('hello')")); + skillBox.registerSkill(skill); + + skillBox.codeExecution().workDir(tempDir.toString()).withShell().enable(); + skillBox.uploadSkillFiles(); + + String prompt = skillBox.getSkillPrompt(); + + int availableSkillsEnd = prompt.indexOf(""); + int codeExecutionStart = prompt.indexOf("## Code Execution"); + assertTrue(availableSkillsEnd >= 0); + assertTrue(codeExecutionStart >= 0); + assertTrue(availableSkillsEnd < codeExecutionStart); + } + } + /** * Test tool class with @Tool annotated methods for testing tool object * registration. diff --git a/docs/_toc.yml b/docs/_toc.yml index 62e1d74d3..b86f254cb 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -108,7 +108,7 @@ parts: - file: zh/task/agent-as-tool title: Agent as Tool - file: zh/task/agent-skill - title: 智能体技能 + title: Agent Skill - file: zh/task/rag title: RAG - file: zh/task/hook diff --git a/docs/en/task/agent-skill.md b/docs/en/task/agent-skill.md index 1a849478e..3b94bd6d6 100644 --- a/docs/en/task/agent-skill.md +++ b/docs/en/task/agent-skill.md @@ -201,7 +201,7 @@ ReActAgent agent = ReActAgent.builder() ### Feature 2: Code Execution Capabilities -Provides an isolated code execution folder for Skills, supporting Shell commands, file read/write operations, etc. Uses Builder pattern for flexible configuration of required tools. +Provides an isolated code execution environment for Skills, supporting Shell commands, file read/write operations, etc. Uses Builder pattern to compose tools and configuration on demand. **Basic Usage**: @@ -216,36 +216,42 @@ skillBox.codeExecution() .enable(); ``` +**Configuration Reference**: + +- **Tool Selection**: Combine `withShell()`, `withRead()`, `withWrite()` as needed — only explicitly enabled tools are registered +- **`workDir`**: Shared working directory for all tools. Created automatically when specified; if omitted, a temporary directory `agentscope-code-execution-*` is created lazily and cleaned up on JVM exit +- **`uploadDir`**: Upload location for Skill resource files; defaults to `workDir/skills` +- **File Filtering**: Controls which resource files are allowed to upload. Defaults to `scripts/`, `assets/` folders and `.py`, `.js`, `.sh` extensions. Adjust with `includeFolders()`/`includeExtensions()`, or fully customize with `fileFilter()` (the two approaches are mutually exclusive) +- **Custom Shell**: `withShell(customShellTool)` accepts a custom tool whose `baseDir` is automatically overridden with `workDir` while preserving its security policy + **Custom Configuration**: ```java -// Customize working directory and Shell command whitelist +// Specify directory + custom Shell + file filtering ShellCommandTool customShell = new ShellCommandTool( - null, // baseDir will be automatically set to workDir + null, // baseDir will be automatically overridden with workDir Set.of("python3", "node", "npm"), - command -> askUserApproval(command) // Optional command approval callback + command -> askUserApproval(command) ); skillBox.codeExecution() - .workDir("/path/to/workdir") // Specify working directory - .withShell(customShell) // Use custom Shell tool - .withRead() // Enable file reading - .withWrite() // Enable file writing + .workDir("/data/agent-workspace") // working directory + .uploadDir("/data/agent-workspace/my-skills") // optional, defaults to workDir/skills + .includeFolders(Set.of("scripts/", "data/")) // optional, customize upload folders + .includeExtensions(Set.of(".py", ".json")) // optional, customize upload extensions + .withShell(customShell) + .withRead() + .withWrite() .enable(); -// Or enable only file operations, without Shell +// Or use a fully custom file filter (mutually exclusive with includeFolders/includeExtensions) skillBox.codeExecution() + .fileFilter(path -> path.endsWith(".py")) // or SkillFileFilter.acceptAll() .withRead() .withWrite() .enable(); ``` -**Core Features**: -- **Unified Working Directory**: All tools share the same `workDir`, ensuring file isolation -- **Selective Enabling**: Flexibly combine Shell, read file, and write file tools as needed -- **Flexible Configuration**: Supports custom ShellCommandTool to meet customization requirements -- **Automatic Management**: Automatically creates temporary directory when `workDir` is not specified, with automatic cleanup on program exit - ### Feature 3: Skill Persistence Storage **Why is this feature needed?** @@ -333,6 +339,46 @@ try (NacosSkillRepository repository = new NacosSkillRepository(aiService, "name > Note: Add the `agentscope-extensions-nacos-skill` dependency. +### Feature 4: Custom Skill Prompts + +When SkillBox injects a system prompt into the Agent, it generates a description entry for each registered Skill so the LLM can decide when to load which Skill. The two components of this prompt can be customized via the constructor: + +- **`instruction`**: The prompt header, explaining how to use Skills (how to load them, path conventions, etc.). Defaults to a built-in `load_skill_through_path` usage guide +- **`template`**: The format template for each Skill entry, containing three `%s` placeholders corresponding to `name`, `description`, and `skillId` in order + +When code execution is enabled, the section appended after `` can also be customized via `.codeExecutionInstruction()`: + +- **`codeExecutionInstruction`**: Template for the code execution section; `%s` placeholders are replaced with the `uploadDir` absolute path (up to 5 substitutions). Passing `null` or blank uses the built-in default. + +Passing `null` or a blank string for any of these uses the built-in default. + +**Example**: + +```java +// Customize instruction and template +String customInstruction = """ + ## Available Skills + When a task matches a skill, load it with load_skill_through_path. + """; + +String customTemplate = """ + - **%s**: %s (id: %s) + """; + +SkillBox skillBox = new SkillBox(toolkit, customInstruction, customTemplate); + +// Customize the code execution section (takes effect when code execution is enabled) +skillBox.codeExecution() + .workDir("/data/workspace") + .codeExecutionInstruction(""" + ## Script Execution + Skills root directory: %s + Always use absolute paths when running scripts. + """) + .withShell() + .enable(); +``` + ### Performance Optimization Recommendations 1. **Control SKILL.md Size**: Keep under 5k tokens, recommended 1.5-2k tokens diff --git a/docs/zh/task/agent-skill.md b/docs/zh/task/agent-skill.md index 6906be639..43d51600c 100644 --- a/docs/zh/task/agent-skill.md +++ b/docs/zh/task/agent-skill.md @@ -198,7 +198,7 @@ ReActAgent agent = ReActAgent.builder() ### 功能 2: 代码执行能力 -为 Skill 提供隔离的代码执行文件夹,支持 Shell 命令、文件读写等操作。使用 Builder 模式灵活配置所需工具。 +为 Skill 提供隔离的代码执行环境,支持 Shell 命令、文件读写等操作。使用 Builder 模式按需组合工具和配置。 **基础用法**: @@ -213,36 +213,42 @@ skillBox.codeExecution() .enable(); ``` +**配置说明**: + +- **工具选择**: 按需组合 `withShell()`、`withRead()`、`withWrite()`,仅注册显式启用的工具 +- **`workDir`**: 所有工具共享的工作目录。指定时自动创建;未指定时延迟创建临时目录 `agentscope-code-execution-*`,JVM 退出自动清理 +- **`uploadDir`**: Skill 资源文件的上传位置,默认为 `workDir/skills` +- **文件过滤**: 控制允许上传的资源文件类型,默认接受 `scripts/`、`assets/` 目录及 `.py`、`.js`、`.sh` 扩展名。可通过 `includeFolders()`/`includeExtensions()` 调整,或用 `fileFilter()` 完全自定义(两种方式互斥) +- **自定义 Shell**: `withShell(customShellTool)` 支持传入自定义工具,其 `baseDir` 会被自动覆盖为 `workDir`,安全策略保持不变 + **自定义配置**: ```java -// 自定义工作目录和 Shell 命令白名单 +// 指定目录 + 自定义 Shell + 文件过滤 ShellCommandTool customShell = new ShellCommandTool( - null, // baseDir 会被自动设置为 workDir + null, // baseDir 会被自动覆盖为 workDir Set.of("python3", "node", "npm"), - command -> askUserApproval(command) // 可选的命令审批回调 + command -> askUserApproval(command) ); skillBox.codeExecution() - .workDir("/path/to/workdir") // 指定工作目录 - .withShell(customShell) // 使用自定义 Shell 工具 - .withRead() // 启用文件读取 - .withWrite() // 启用文件写入 + .workDir("/data/agent-workspace") // 工作目录 + .uploadDir("/data/agent-workspace/my-skills") // 可选,默认 workDir/skills + .includeFolders(Set.of("scripts/", "data/")) // 可选,自定义上传文件夹 + .includeExtensions(Set.of(".py", ".json")) // 可选,自定义上传扩展名 + .withShell(customShell) + .withRead() + .withWrite() .enable(); -// 或仅启用文件操作,不启用 Shell +// 或使用完全自定义的文件过滤器(与 includeFolders/includeExtensions 互斥) skillBox.codeExecution() + .fileFilter(path -> path.endsWith(".py")) // 或 SkillFileFilter.acceptAll() .withRead() .withWrite() .enable(); ``` -**核心特性**: -- **统一工作目录**: 所有工具共享同一 `workDir`,确保文件隔离 -- **选择性启用**: 根据需求灵活组合 Shell、读文件、写文件工具 -- **灵活配置**: 支持自定义 ShellCommandTool, 满足定制化的ShellCommandTool需求 -- **自动管理**: 未指定 `workDir` 时自动创建临时目录,程序退出时自动清理 - ### 功能 3: Skill 持久化存储 **为什么需要这个功能?** @@ -327,6 +333,46 @@ try (NacosSkillRepository repository = new NacosSkillRepository(aiService, "name > 注意: 需引入 `agentscope-extensions-nacos-skill` 依赖 +### 功能 4: 自定义 Skill 提示词 + +SkillBox 在注入给 Agent 的系统提示词中,会为每个已注册的 Skill 生成描述信息,供 LLM 判断何时加载哪个 Skill。通过构造函数可自定义提示词的两个组成部分: + +- **`instruction`**: 提示词头部,说明 Skill 的使用方式(如何加载、路径约定等)。默认包含 `load_skill_through_path` 的调用说明 +- **`template`**: 每个 Skill 条目的格式模板,包含三个 `%s` 占位符,依次对应 `name`、`description`、`skillId` + +开启代码执行后,还可通过 `.codeExecutionInstruction()` 自定义追加在 `` 之后的代码执行说明段落: + +- **`codeExecutionInstruction`**: 代码执行说明模板,`%s` 占位符会被替换为 `uploadDir` 的绝对路径(最多替换 5 次)。传 `null` 或空字符串时使用内置默认值 + +三者传 `null` 或空字符串时均使用内置默认值。 + +**示例代码**: + +```java +// 自定义 instruction 和 template +String customInstruction = """ + ## 可用技能 + 当任务匹配某个技能时,使用 load_skill_through_path 加载它。 + """; + +String customTemplate = """ + - **%s**: %s (id: %s) + """; + +SkillBox skillBox = new SkillBox(toolkit, customInstruction, customTemplate); + +// 自定义代码执行说明(开启代码执行后生效) +skillBox.codeExecution() + .workDir("/data/workspace") + .codeExecutionInstruction(""" + ## 脚本执行 + 技能脚本根目录: %s + 执行时请使用绝对路径。 + """) + .withShell() + .enable(); +``` + ### 性能优化建议 1. **控制 SKILL.md 大小**: 保持在 5k tokens 以内,建议 1.5-2k tokens From 4c4fc2a1277da4220eaa4b8f18fb0204d2d4e508 Mon Sep 17 00:00:00 2001 From: fang-tech Date: Sat, 28 Mar 2026 19:46:21 +0800 Subject: [PATCH 2/6] test(skill): add e2e benchmark with purpose-built skills covering 4 recall dimensions Skills under e2e-skills/ are designed specifically for benchmarking: - data-transform vs data-report: semantic discrimination (similar domain, different intent) - image-resize: distractor resistance (must not fire for non-image tasks) - log-parser: fuzzy trigger (indirect problem description should still match) - git-changelog: code execution recall (LLM must reference deployed script path) SkillE2ETest collects pass/fail per scenario without asserting individually; @AfterAll asserts overall recall rate >= 75% and prints a summary table. Co-Authored-By: Claude Sonnet 4.6 (1M context) --- .../io/agentscope/core/e2e/SkillE2ETest.java | 445 ++++++++++++++++++ .../resources/e2e-skills/data-report/SKILL.md | 17 + .../data-report/scripts/summarize.py | 51 ++ .../e2e-skills/data-transform/SKILL.md | 19 + .../data-transform/scripts/csv_to_json.py | 26 + .../data-transform/scripts/json_to_csv.py | 33 ++ .../e2e-skills/git-changelog/SKILL.md | 18 + .../scripts/generate_changelog.py | 78 +++ .../e2e-skills/image-resize/SKILL.md | 17 + .../e2e-skills/image-resize/scripts/resize.py | 51 ++ .../resources/e2e-skills/log-parser/SKILL.md | 18 + .../log-parser/scripts/extract_errors.py | 56 +++ 12 files changed, 829 insertions(+) create mode 100644 agentscope-core/src/test/java/io/agentscope/core/e2e/SkillE2ETest.java create mode 100644 agentscope-core/src/test/resources/e2e-skills/data-report/SKILL.md create mode 100644 agentscope-core/src/test/resources/e2e-skills/data-report/scripts/summarize.py create mode 100644 agentscope-core/src/test/resources/e2e-skills/data-transform/SKILL.md create mode 100644 agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/csv_to_json.py create mode 100644 agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/json_to_csv.py create mode 100644 agentscope-core/src/test/resources/e2e-skills/git-changelog/SKILL.md create mode 100644 agentscope-core/src/test/resources/e2e-skills/git-changelog/scripts/generate_changelog.py create mode 100644 agentscope-core/src/test/resources/e2e-skills/image-resize/SKILL.md create mode 100644 agentscope-core/src/test/resources/e2e-skills/image-resize/scripts/resize.py create mode 100644 agentscope-core/src/test/resources/e2e-skills/log-parser/SKILL.md create mode 100644 agentscope-core/src/test/resources/e2e-skills/log-parser/scripts/extract_errors.py diff --git a/agentscope-core/src/test/java/io/agentscope/core/e2e/SkillE2ETest.java b/agentscope-core/src/test/java/io/agentscope/core/e2e/SkillE2ETest.java new file mode 100644 index 000000000..545e6c206 --- /dev/null +++ b/agentscope-core/src/test/java/io/agentscope/core/e2e/SkillE2ETest.java @@ -0,0 +1,445 @@ +/* + * Copyright 2024-2026 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.agentscope.core.e2e; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.agentscope.core.ReActAgent; +import io.agentscope.core.agent.test.TestUtils; +import io.agentscope.core.e2e.providers.ModelProvider; +import io.agentscope.core.hook.Hook; +import io.agentscope.core.hook.HookEvent; +import io.agentscope.core.hook.PostActingEvent; +import io.agentscope.core.memory.InMemoryMemory; +import io.agentscope.core.message.MsgRole; +import io.agentscope.core.message.ToolUseBlock; +import io.agentscope.core.skill.AgentSkill; +import io.agentscope.core.skill.SkillBox; +import io.agentscope.core.skill.repository.ClasspathSkillRepository; +import io.agentscope.core.tool.Toolkit; +import io.agentscope.core.tool.coding.ShellCommandTool; +import java.io.IOException; +import java.nio.file.Path; +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import reactor.core.publisher.Mono; + +/** + * E2E benchmark tests for Skill recall and code execution recall. + * + *

Uses purpose-built benchmark skills under {@code src/test/resources/e2e-skills/} that are + * designed to stress-test four dimensions: + * + *

    + *
  • Semantic discrimination: {@code data-transform} vs {@code data-report} have + * intentionally similar domains — the LLM must distinguish format-conversion from + * statistical analysis.
  • + *
  • Distractor resistance: {@code image-resize} is a decoy for non-image tasks; it + * must not be selected when the prompt has nothing to do with images.
  • + *
  • Fuzzy trigger: {@code log-parser} should be triggered by indirect problem + * descriptions ("my app is crashing") without the user explicitly mentioning logs.
  • + *
  • Code execution recall: {@code git-changelog} and others have pre-deployed scripts; + * the LLM must reference the script's absolute path rather than writing equivalent code + * inline.
  • + *
+ * + *

Pass criterion: overall recall rate ≥ 75% across all providers and scenarios. + * Individual test methods never fail; {@link #assertRecallRates()} {@code @AfterAll} does. + * + *

Requirements: {@code ENABLE_E2E_TESTS=true} + at least one API key env var. + */ +@Tag("e2e") +@Tag("skill") +@ExtendWith(E2ETestCondition.class) +@Execution(ExecutionMode.CONCURRENT) +@DisplayName("Skill E2E Benchmark Tests") +class SkillE2ETest { + + private static final String SKILLS_CLASSPATH = "e2e-skills"; + private static final Duration TIMEOUT = Duration.ofSeconds(120); + private static final double RECALL_THRESHOLD = 0.75; + + /** All benchmark skills registered simultaneously — the LLM must choose the right one. */ + private static final List ALL_SKILL_NAMES = + List.of("data-transform", "data-report", "image-resize", "log-parser", "git-changelog"); + + // key = "SKILL_RECALL//" or "CODE_EXEC//" + private static final Map RESULTS = new ConcurrentHashMap<>(); + + // ------------------------------------------------------------------------- + // Skill recall benchmark scenarios + // + // Dimension 1 — Semantic discrimination (data-transform vs data-report) + // Both involve "data" but one is format conversion, the other is analysis. + // Dimension 2 — Distractor resistance (image-resize must NOT fire for non-image tasks) + // Dimension 3 — Fuzzy trigger (log-parser via indirect problem description) + // ------------------------------------------------------------------------- + + /** Semantic discrimination: must pick data-transform, not data-report. */ + @ParameterizedTest + @MethodSource("io.agentscope.core.e2e.ProviderFactory#getToolProviders") + @DisplayName("[semantic] data-transform: convert CSV to JSON") + void testSkillRecall_dataTransform(ModelProvider provider) throws IOException { + runSkillRecallTest( + provider, + "data-transform", + "I have a CSV file exported from Excel and I need to convert it to JSON" + + " so my frontend application can consume it."); + } + + /** Semantic discrimination: must pick data-report, not data-transform. */ + @ParameterizedTest + @MethodSource("io.agentscope.core.e2e.ProviderFactory#getToolProviders") + @DisplayName("[semantic] data-report: summarize dataset statistics") + void testSkillRecall_dataReport(ModelProvider provider) throws IOException { + runSkillRecallTest( + provider, + "data-report", + "I have a CSV file with sales numbers across multiple regions and I need" + + " a summary showing averages, totals, and standard deviations."); + } + + /** Fuzzy trigger: indirect description should still trigger log-parser. */ + @ParameterizedTest + @MethodSource("io.agentscope.core.e2e.ProviderFactory#getToolProviders") + @DisplayName("[fuzzy] log-parser: implicit debugging prompt") + void testSkillRecall_logParser_fuzzy(ModelProvider provider) throws IOException { + runSkillRecallTest( + provider, + "log-parser", + "My app keeps crashing in production and I have no idea why." + + " Can you help me figure out what's going wrong?"); + } + + // ------------------------------------------------------------------------- + // Code execution recall benchmark scenarios + // + // Each test loads ONE skill, uploads its scripts to a temp dir, then verifies + // the LLM references the deployed script path in a shell command. + // ------------------------------------------------------------------------- + + /** Code execution recall: LLM must reference the deployed extract_errors.py script. */ + @ParameterizedTest + @MethodSource("io.agentscope.core.e2e.ProviderFactory#getToolProviders") + @DisplayName("[code-exec] log-parser: run extract_errors.py on a log file") + void testCodeExecRecall_logParser(ModelProvider provider, @TempDir Path tempDir) + throws IOException { + runCodeExecutionRecallTest( + provider, + "log-parser", + "scripts/extract_errors.py", + "I have a server log file at /var/log/app.log that's 500 MB." + + " I need to extract all ERROR and WARN entries with their" + + " timestamps into a structured format.", + tempDir); + } + + /** Code execution recall: LLM must reference the deployed generate_changelog.py script. */ + @ParameterizedTest + @MethodSource("io.agentscope.core.e2e.ProviderFactory#getToolProviders") + @DisplayName("[code-exec] git-changelog: run generate_changelog.py between two tags") + void testCodeExecRecall_gitChangelog(ModelProvider provider, @TempDir Path tempDir) + throws IOException { + runCodeExecutionRecallTest( + provider, + "git-changelog", + "scripts/generate_changelog.py", + "I need to produce a Markdown changelog for our upcoming release," + + " covering all commits between the v1.5.0 and HEAD tags.", + tempDir); + } + + // ------------------------------------------------------------------------- + // Aggregate recall-rate assertion (runs once after all tests complete) + // ------------------------------------------------------------------------- + + @AfterAll + static void assertRecallRates() { + if (RESULTS.isEmpty()) { + return; + } + + long skillTotal = + RESULTS.keySet().stream().filter(k -> k.startsWith("SKILL_RECALL")).count(); + long skillHits = + RESULTS.entrySet().stream() + .filter(e -> e.getKey().startsWith("SKILL_RECALL") && e.getValue()) + .count(); + + long codeTotal = RESULTS.keySet().stream().filter(k -> k.startsWith("CODE_EXEC")).count(); + long codeHits = + RESULTS.entrySet().stream() + .filter(e -> e.getKey().startsWith("CODE_EXEC") && e.getValue()) + .count(); + + long totalRuns = skillTotal + codeTotal; + long totalHits = skillHits + codeHits; + + double skillRate = skillTotal > 0 ? (double) skillHits / skillTotal : 1.0; + double codeRate = codeTotal > 0 ? (double) codeHits / codeTotal : 1.0; + double overallRate = totalRuns > 0 ? (double) totalHits / totalRuns : 1.0; + + String sep = "=".repeat(62); + System.out.println("\n" + sep); + System.out.println(" SKILL BENCHMARK RECALL SUMMARY"); + System.out.println(sep); + + System.out.println("\n Skill recall:"); + RESULTS.entrySet().stream() + .filter(e -> e.getKey().startsWith("SKILL_RECALL")) + .sorted(Map.Entry.comparingByKey()) + .forEach( + e -> { + String label = e.getKey().substring("SKILL_RECALL/".length()); + System.out.printf( + " %s %s%n", e.getValue() ? "[PASS]" : "[FAIL]", label); + }); + + System.out.println("\n Code execution recall:"); + RESULTS.entrySet().stream() + .filter(e -> e.getKey().startsWith("CODE_EXEC")) + .sorted(Map.Entry.comparingByKey()) + .forEach( + e -> { + String label = e.getKey().substring("CODE_EXEC/".length()); + System.out.printf( + " %s %s%n", e.getValue() ? "[PASS]" : "[FAIL]", label); + }); + + System.out.println(); + System.out.printf( + " Skill recall: %d / %d (%.0f%%)%n", + skillHits, skillTotal, skillRate * 100); + System.out.printf( + " Code execution recall: %d / %d (%.0f%%)%n", + codeHits, codeTotal, codeRate * 100); + System.out.printf( + " Overall: %d / %d (%.0f%%) threshold >= %.0f%%%n", + totalHits, totalRuns, overallRate * 100, RECALL_THRESHOLD * 100); + System.out.println(sep + "\n"); + + assertTrue( + overallRate >= RECALL_THRESHOLD, + String.format( + "Recall rate %.0f%% (%d/%d) is below the %.0f%% threshold." + + " skill=%.0f%% (%d/%d), code=%.0f%% (%d/%d)", + overallRate * 100, + totalHits, + totalRuns, + RECALL_THRESHOLD * 100, + skillRate * 100, + skillHits, + skillTotal, + codeRate * 100, + codeHits, + codeTotal)); + } + + // ------------------------------------------------------------------------- + // Shared helpers + // ------------------------------------------------------------------------- + + private void runSkillRecallTest(ModelProvider provider, String targetSkillName, String prompt) + throws IOException { + String resultKey = "SKILL_RECALL/" + targetSkillName + "/" + provider.getProviderName(); + System.out.println( + "\n>>> Skill Recall [" + targetSkillName + "] | " + provider.getProviderName()); + + try (ClasspathSkillRepository repo = new ClasspathSkillRepository(SKILLS_CLASSPATH)) { + + Toolkit toolkit = new Toolkit(); + SkillBox skillBox = new SkillBox(toolkit); + + for (String name : ALL_SKILL_NAMES) { + AgentSkill skill = repo.getSkill(name); + assertNotNull(skill, "Benchmark skill not found in classpath: " + name); + skillBox.registration().skill(skill).apply(); + } + skillBox.registerSkillLoadTool(); + + AgentSkill targetSkill = repo.getSkill(targetSkillName); + String expectedSkillId = targetSkill.getSkillId(); + System.out.println(" expected skillId : " + expectedSkillId); + System.out.println(" prompt : " + prompt); + + AtomicBoolean loadedCorrectSkill = new AtomicBoolean(false); + + ReActAgent agent = + provider.createAgentBuilder("SkillRecallAgent-" + targetSkillName, toolkit) + .memory(new InMemoryMemory()) + .maxIters(3) + .skillBox(skillBox) + .hook( + new Hook() { + @Override + public Mono onEvent(T event) { + if (event instanceof PostActingEvent postActing) { + ToolUseBlock toolUse = postActing.getToolUse(); + if (toolUse != null + && "load_skill_through_path" + .equals(toolUse.getName()) + && expectedSkillId.equals( + toolUse.getInput() + .get("skillId"))) { + loadedCorrectSkill.set(true); + postActing.stopAgent(); + } + } + return Mono.just(event); + } + }) + .build(); + + try { + agent.call(TestUtils.createUserMessage("User", prompt)).block(TIMEOUT); + } catch (Exception e) { + System.out.println(" agent error: " + e.getMessage()); + } + + boolean passed = loadedCorrectSkill.get(); + RESULTS.put(resultKey, passed); + + if (passed) { + System.out.println( + "<<< [PASS] Skill recall [" + + targetSkillName + + "] | " + + provider.getProviderName()); + } else { + System.out.println( + "<<< [FAIL] Skill recall [" + + targetSkillName + + "] | " + + provider.getProviderName() + + " — did not call load_skill_through_path(skillId='" + + expectedSkillId + + "')"); + } + } + } + + private void runCodeExecutionRecallTest( + ModelProvider provider, + String skillName, + String expectedScriptRelativePath, + String prompt, + Path tempDir) + throws IOException { + String resultKey = "CODE_EXEC/" + skillName + "/" + provider.getProviderName(); + System.out.println( + "\n>>> Code Execution Recall [" + skillName + "] | " + provider.getProviderName()); + + try (ClasspathSkillRepository repo = new ClasspathSkillRepository(SKILLS_CLASSPATH)) { + + AgentSkill skill = repo.getSkill(skillName); + assertNotNull(skill, "Benchmark skill not found in classpath: " + skillName); + + Toolkit toolkit = new Toolkit(); + SkillBox skillBox = new SkillBox(toolkit); + skillBox.registration().skill(skill).apply(); + skillBox.registerSkillLoadTool(); + + String expectedScriptAbsPath = + tempDir.resolve("skills") + .resolve(skill.getSkillId()) + .resolve(expectedScriptRelativePath) + .toAbsolutePath() + .toString(); + System.out.println(" expected script : " + expectedScriptAbsPath); + System.out.println(" prompt : " + prompt); + + AtomicReference capturedCommand = new AtomicReference<>(""); + AtomicBoolean calledCorrectScript = new AtomicBoolean(false); + + ShellCommandTool interceptingShell = + new ShellCommandTool( + null, + Set.of("ls"), + command -> { + capturedCommand.set(command); + if (command.contains(expectedScriptAbsPath)) { + calledCorrectScript.set(true); + throw new RuntimeException( + "Script reference detected — test passed"); + } + return false; + }); + + skillBox.codeExecution() + .workDir(tempDir.toString()) + .withShell(interceptingShell) + .enable(); + + ReActAgent agent = + provider.createAgentBuilder("CodeExecAgent-" + skillName, toolkit) + .memory(new InMemoryMemory()) + .maxIters(6) + .skillBox(skillBox) + .build(); + + try { + agent.call(TestUtils.createUserMessage("User", prompt)).block(TIMEOUT); + } catch (Exception e) { + // RuntimeException thrown by callback on successful detection is expected + } + + // Print tool call trace for diagnosis + agent.getMemory().getMessages().stream() + .filter(msg -> msg.getRole() == MsgRole.ASSISTANT) + .filter(msg -> msg.hasContentBlocks(ToolUseBlock.class)) + .flatMap(msg -> msg.getContentBlocks(ToolUseBlock.class).stream()) + .forEach( + tb -> + System.out.println( + " tool: " + tb.getName() + " " + tb.getInput())); + + boolean passed = calledCorrectScript.get(); + RESULTS.put(resultKey, passed); + + if (passed) { + System.out.println( + "<<< [PASS] Code execution recall [" + + skillName + + "] | " + + provider.getProviderName()); + } else { + System.out.println( + "<<< [FAIL] Code execution recall [" + + skillName + + "] | " + + provider.getProviderName() + + " — last command: '" + + capturedCommand.get() + + "'"); + } + } + } +} diff --git a/agentscope-core/src/test/resources/e2e-skills/data-report/SKILL.md b/agentscope-core/src/test/resources/e2e-skills/data-report/SKILL.md new file mode 100644 index 000000000..8e6933770 --- /dev/null +++ b/agentscope-core/src/test/resources/e2e-skills/data-report/SKILL.md @@ -0,0 +1,17 @@ +--- +name: data-report +description: Use this skill when the task involves analyzing data to compute statistics, aggregations, or summaries, and presenting the results as a report. Suitable for tasks like "summarize this dataset", "calculate averages and totals", "generate a weekly sales report", or "show me trends in this data". Do NOT use for simple format conversion between file types. +--- +# Data Report Skill + +Analyzes structured data and generates statistical summary reports. + +## Available Scripts + +- `scripts/summarize.py` — Compute descriptive statistics (count, mean, min, max, stddev) for numeric columns and output a Markdown report + +## Usage + +``` +python3 scripts/summarize.py --input data.csv --output report.md +``` diff --git a/agentscope-core/src/test/resources/e2e-skills/data-report/scripts/summarize.py b/agentscope-core/src/test/resources/e2e-skills/data-report/scripts/summarize.py new file mode 100644 index 000000000..68f37420e --- /dev/null +++ b/agentscope-core/src/test/resources/e2e-skills/data-report/scripts/summarize.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +"""Compute descriptive statistics for CSV numeric columns and output a Markdown report.""" +import argparse +import csv +import math +import sys +from collections import defaultdict + + +def summarize(input_path: str, output_path: str | None) -> None: + with open(input_path, newline="", encoding="utf-8") as f: + rows = list(csv.DictReader(f)) + if not rows: + print("Empty input", file=sys.stderr) + sys.exit(1) + + numeric: dict[str, list[float]] = defaultdict(list) + for row in rows: + for k, v in row.items(): + try: + numeric[k].append(float(v)) + except (ValueError, TypeError): + pass + + lines = [f"# Data Report\n\n**Rows:** {len(rows)}\n"] + for col, vals in numeric.items(): + n = len(vals) + mean = sum(vals) / n + variance = sum((x - mean) ** 2 for x in vals) / n + lines.append(f"## {col}") + lines.append(f"- Count: {n}") + lines.append(f"- Min: {min(vals):.4g}") + lines.append(f"- Max: {max(vals):.4g}") + lines.append(f"- Mean: {mean:.4g}") + lines.append(f"- Std: {math.sqrt(variance):.4g}\n") + + report = "\n".join(lines) + if output_path: + with open(output_path, "w", encoding="utf-8") as f: + f.write(report) + print(f"Report written to {output_path}") + else: + print(report) + + +if __name__ == "__main__": + p = argparse.ArgumentParser(description="Generate data summary report") + p.add_argument("--input", required=True, help="Input CSV file") + p.add_argument("--output", help="Output Markdown file (stdout if omitted)") + args = p.parse_args() + summarize(args.input, args.output) diff --git a/agentscope-core/src/test/resources/e2e-skills/data-transform/SKILL.md b/agentscope-core/src/test/resources/e2e-skills/data-transform/SKILL.md new file mode 100644 index 000000000..d8cfbd730 --- /dev/null +++ b/agentscope-core/src/test/resources/e2e-skills/data-transform/SKILL.md @@ -0,0 +1,19 @@ +--- +name: data-transform +description: Use this skill when the task involves converting or reformatting structured data between file formats such as CSV, JSON, XML, or YAML. Suitable for tasks like "convert this CSV to JSON", "reformat my data file", or "change the file format of this dataset". Do NOT use for statistical analysis, aggregation, or report generation. +--- +# Data Transform Skill + +Converts structured data files between formats (CSV ↔ JSON ↔ XML ↔ YAML). + +## Available Scripts + +- `scripts/csv_to_json.py` — Convert a CSV file to JSON array format +- `scripts/json_to_csv.py` — Flatten a JSON array into a CSV file + +## Usage + +``` +python3 scripts/csv_to_json.py --input data.csv --output data.json +python3 scripts/json_to_csv.py --input data.json --output data.csv +``` diff --git a/agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/csv_to_json.py b/agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/csv_to_json.py new file mode 100644 index 000000000..7bb255b6d --- /dev/null +++ b/agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/csv_to_json.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +"""Convert a CSV file to a JSON array.""" +import argparse +import csv +import json +import sys + + +def csv_to_json(input_path: str, output_path: str | None) -> None: + with open(input_path, newline="", encoding="utf-8") as f: + rows = list(csv.DictReader(f)) + result = json.dumps(rows, indent=2, ensure_ascii=False) + if output_path: + with open(output_path, "w", encoding="utf-8") as f: + f.write(result) + print(f"Written {len(rows)} records to {output_path}") + else: + print(result) + + +if __name__ == "__main__": + p = argparse.ArgumentParser(description="Convert CSV to JSON") + p.add_argument("--input", required=True, help="Input CSV file") + p.add_argument("--output", help="Output JSON file (stdout if omitted)") + args = p.parse_args() + csv_to_json(args.input, args.output) diff --git a/agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/json_to_csv.py b/agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/json_to_csv.py new file mode 100644 index 000000000..fefa62e85 --- /dev/null +++ b/agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/json_to_csv.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +"""Flatten a JSON array to CSV.""" +import argparse +import csv +import json +import sys + + +def json_to_csv(input_path: str, output_path: str | None) -> None: + with open(input_path, encoding="utf-8") as f: + rows = json.load(f) + if not rows: + print("Empty input", file=sys.stderr) + sys.exit(1) + fieldnames = list(rows[0].keys()) + if output_path: + with open(output_path, "w", newline="", encoding="utf-8") as f: + w = csv.DictWriter(f, fieldnames=fieldnames) + w.writeheader() + w.writerows(rows) + print(f"Written {len(rows)} rows to {output_path}") + else: + w = csv.DictWriter(sys.stdout, fieldnames=fieldnames) + w.writeheader() + w.writerows(rows) + + +if __name__ == "__main__": + p = argparse.ArgumentParser(description="Convert JSON array to CSV") + p.add_argument("--input", required=True, help="Input JSON file") + p.add_argument("--output", help="Output CSV file (stdout if omitted)") + args = p.parse_args() + json_to_csv(args.input, args.output) diff --git a/agentscope-core/src/test/resources/e2e-skills/git-changelog/SKILL.md b/agentscope-core/src/test/resources/e2e-skills/git-changelog/SKILL.md new file mode 100644 index 000000000..26bf1f259 --- /dev/null +++ b/agentscope-core/src/test/resources/e2e-skills/git-changelog/SKILL.md @@ -0,0 +1,18 @@ +--- +name: git-changelog +description: Use this skill when the task involves generating a changelog, release notes, or commit summary from a Git repository's history. Suitable for tasks like "generate a changelog for v2.0", "summarize what changed since last release", or "create release notes from git commits". Requires access to a Git repository. +--- +# Git Changelog Skill + +Generates formatted changelogs and release notes from Git commit history. + +## Available Scripts + +- `scripts/generate_changelog.py` — Read git log between two refs and produce a grouped Markdown changelog (features, fixes, chores) + +## Usage + +``` +python3 scripts/generate_changelog.py --from v1.0.0 --to HEAD --output CHANGELOG.md +python3 scripts/generate_changelog.py --from v1.0.0 --to v2.0.0 +``` diff --git a/agentscope-core/src/test/resources/e2e-skills/git-changelog/scripts/generate_changelog.py b/agentscope-core/src/test/resources/e2e-skills/git-changelog/scripts/generate_changelog.py new file mode 100644 index 000000000..49bd3d93c --- /dev/null +++ b/agentscope-core/src/test/resources/e2e-skills/git-changelog/scripts/generate_changelog.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +"""Generate a grouped Markdown changelog from git commit history.""" +import argparse +import re +import subprocess +import sys +from collections import defaultdict + + +COMMIT_RE = re.compile(r"^(?Pfeat|fix|chore|docs|refactor|perf|test|ci|build)(\(.*?\))?!?: (?P.+)") +TYPE_LABELS = { + "feat": "Features", + "fix": "Bug Fixes", + "perf": "Performance", + "refactor": "Refactoring", + "docs": "Documentation", + "chore": "Chores", + "test": "Tests", + "ci": "CI", + "build": "Build", +} + + +def get_commits(from_ref: str, to_ref: str) -> list[str]: + result = subprocess.run( + ["git", "log", f"{from_ref}..{to_ref}", "--pretty=format:%s"], + capture_output=True, + text=True, + check=True, + ) + return [line.strip() for line in result.stdout.splitlines() if line.strip()] + + +def generate_changelog(from_ref: str, to_ref: str, output_path: str | None) -> None: + try: + commits = get_commits(from_ref, to_ref) + except subprocess.CalledProcessError as e: + print(f"git error: {e.stderr}", file=sys.stderr) + sys.exit(1) + + grouped: dict[str, list[str]] = defaultdict(list) + uncategorized = [] + for msg in commits: + m = COMMIT_RE.match(msg) + if m: + grouped[m.group("type")].append(m.group("desc")) + else: + uncategorized.append(msg) + + lines = [f"# Changelog\n\n**Range:** `{from_ref}` → `{to_ref}`\n"] + for ctype, label in TYPE_LABELS.items(): + if ctype in grouped: + lines.append(f"## {label}") + for desc in grouped[ctype]: + lines.append(f"- {desc}") + lines.append("") + if uncategorized: + lines.append("## Other") + for msg in uncategorized: + lines.append(f"- {msg}") + lines.append("") + + changelog = "\n".join(lines) + if output_path: + with open(output_path, "w", encoding="utf-8") as f: + f.write(changelog) + print(f"Changelog written to {output_path} ({len(commits)} commits)") + else: + print(changelog) + + +if __name__ == "__main__": + p = argparse.ArgumentParser(description="Generate changelog from git history") + p.add_argument("--from", dest="from_ref", required=True, help="Start ref (tag or commit)") + p.add_argument("--to", dest="to_ref", default="HEAD", help="End ref (default: HEAD)") + p.add_argument("--output", help="Output Markdown file (stdout if omitted)") + args = p.parse_args() + generate_changelog(args.from_ref, args.to_ref, args.output) diff --git a/agentscope-core/src/test/resources/e2e-skills/image-resize/SKILL.md b/agentscope-core/src/test/resources/e2e-skills/image-resize/SKILL.md new file mode 100644 index 000000000..b7df94924 --- /dev/null +++ b/agentscope-core/src/test/resources/e2e-skills/image-resize/SKILL.md @@ -0,0 +1,17 @@ +--- +name: image-resize +description: Use this skill when the task involves resizing, scaling, or compressing image files. Suitable for tasks like "resize these photos to 800px wide", "compress images to reduce file size", or "batch scale all JPEGs in a folder". Only relevant for image processing tasks — do NOT use for data files, text, or non-image tasks. +--- +# Image Resize Skill + +Batch resizes and compresses image files (JPEG, PNG, WebP). + +## Available Scripts + +- `scripts/resize.py` — Resize one or more images to a target width/height; preserves aspect ratio by default + +## Usage + +``` +python3 scripts/resize.py --input ./photos/ --width 800 --output ./resized/ +``` diff --git a/agentscope-core/src/test/resources/e2e-skills/image-resize/scripts/resize.py b/agentscope-core/src/test/resources/e2e-skills/image-resize/scripts/resize.py new file mode 100644 index 000000000..ee299eea9 --- /dev/null +++ b/agentscope-core/src/test/resources/e2e-skills/image-resize/scripts/resize.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +"""Resize and optionally compress image files using Pillow.""" +import argparse +import os +import sys +from pathlib import Path + +try: + from PIL import Image +except ImportError: + print("Pillow not installed. Run: pip install Pillow", file=sys.stderr) + sys.exit(1) + + +def resize_images(input_path: str, output_dir: str, width: int, height: int | None) -> None: + src = Path(input_path) + out = Path(output_dir) + out.mkdir(parents=True, exist_ok=True) + + files = list(src.glob("*")) if src.is_dir() else [src] + supported = {".jpg", ".jpeg", ".png", ".webp"} + images = [f for f in files if f.suffix.lower() in supported] + + if not images: + print("No supported images found.", file=sys.stderr) + sys.exit(1) + + for img_path in images: + with Image.open(img_path) as img: + orig_w, orig_h = img.size + if height is None: + ratio = width / orig_w + new_size = (width, int(orig_h * ratio)) + else: + new_size = (width, height) + resized = img.resize(new_size, Image.LANCZOS) + dest = out / img_path.name + resized.save(dest) + print(f" {img_path.name}: {orig_w}x{orig_h} -> {new_size[0]}x{new_size[1]}") + + print(f"Done. {len(images)} image(s) written to {out}") + + +if __name__ == "__main__": + p = argparse.ArgumentParser(description="Batch resize images") + p.add_argument("--input", required=True, help="Input image file or directory") + p.add_argument("--width", required=True, type=int, help="Target width in pixels") + p.add_argument("--height", type=int, help="Target height (maintains aspect ratio if omitted)") + p.add_argument("--output", required=True, help="Output directory") + args = p.parse_args() + resize_images(args.input, args.output, args.width, args.height) diff --git a/agentscope-core/src/test/resources/e2e-skills/log-parser/SKILL.md b/agentscope-core/src/test/resources/e2e-skills/log-parser/SKILL.md new file mode 100644 index 000000000..cf13808c8 --- /dev/null +++ b/agentscope-core/src/test/resources/e2e-skills/log-parser/SKILL.md @@ -0,0 +1,18 @@ +--- +name: log-parser +description: Use this skill when the task involves inspecting, searching, or extracting information from application log files. Suitable for tasks like "my application is crashing and I need to find the errors", "scan these logs for warnings", "extract all exceptions from this log file", or "find slow requests in my server logs". Triggered by debugging, incident investigation, or log analysis needs. +--- +# Log Parser Skill + +Parses application log files to extract errors, warnings, and patterns. + +## Available Scripts + +- `scripts/extract_errors.py` — Scan a log file and extract all ERROR/WARN lines with timestamps into a structured summary + +## Usage + +``` +python3 scripts/extract_errors.py --input app.log --output errors.json +python3 scripts/extract_errors.py --input app.log --level ERROR +``` diff --git a/agentscope-core/src/test/resources/e2e-skills/log-parser/scripts/extract_errors.py b/agentscope-core/src/test/resources/e2e-skills/log-parser/scripts/extract_errors.py new file mode 100644 index 000000000..457dc47e0 --- /dev/null +++ b/agentscope-core/src/test/resources/e2e-skills/log-parser/scripts/extract_errors.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +"""Extract ERROR and WARN lines from application log files.""" +import argparse +import json +import re +import sys +from pathlib import Path + +LOG_PATTERN = re.compile( + r"(?P\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:\.\d+)?)" + r".*?\b(?PERROR|WARN(?:ING)?)\b" + r"(?P.*)" +) + + +def extract_errors(input_path: str, output_path: str | None, level: str) -> None: + levels = {"ERROR", "WARN", "WARNING"} if level == "ALL" else {level, level + "ING"} + entries = [] + + with open(input_path, encoding="utf-8", errors="replace") as f: + for lineno, line in enumerate(f, 1): + m = LOG_PATTERN.search(line) + if m and m.group("level") in levels: + entries.append( + { + "line": lineno, + "timestamp": m.group("timestamp"), + "level": m.group("level"), + "message": m.group("rest").strip(), + } + ) + + summary = { + "source": input_path, + "total_matches": len(entries), + "entries": entries, + } + + result = json.dumps(summary, indent=2, ensure_ascii=False) + if output_path: + with open(output_path, "w", encoding="utf-8") as f: + f.write(result) + print(f"Found {len(entries)} entries. Written to {output_path}") + else: + print(result) + + +if __name__ == "__main__": + p = argparse.ArgumentParser(description="Extract errors/warnings from log files") + p.add_argument("--input", required=True, help="Log file path") + p.add_argument("--output", help="Output JSON file (stdout if omitted)") + p.add_argument( + "--level", default="ALL", choices=["ALL", "ERROR", "WARN"], help="Filter by level" + ) + args = p.parse_args() + extract_errors(args.input, args.output, args.level) From 17b5f784e1519c4e50343b5f12f104db4be3ef4b Mon Sep 17 00:00:00 2001 From: fang-tech Date: Sat, 28 Mar 2026 20:18:48 +0800 Subject: [PATCH 3/6] fix(skill): fix test assertion wording and add license headers to benchmark scripts - Fix AgentSkillPromptProviderTest: update assertion from "existing scripts" to "existing Python script" to match actual prompt template text - Add Apache 2.0 license headers to all 6 e2e benchmark Python scripts Co-Authored-By: Claude Sonnet 4.6 (1M context) --- .../core/skill/AgentSkillPromptProviderTest.java | 2 +- .../e2e-skills/data-report/scripts/summarize.py | 16 +++++++++++++++- .../data-transform/scripts/csv_to_json.py | 16 +++++++++++++++- .../data-transform/scripts/json_to_csv.py | 16 +++++++++++++++- .../git-changelog/scripts/generate_changelog.py | 16 +++++++++++++++- .../e2e-skills/image-resize/scripts/resize.py | 16 +++++++++++++++- .../log-parser/scripts/extract_errors.py | 16 +++++++++++++++- 7 files changed, 91 insertions(+), 7 deletions(-) diff --git a/agentscope-core/src/test/java/io/agentscope/core/skill/AgentSkillPromptProviderTest.java b/agentscope-core/src/test/java/io/agentscope/core/skill/AgentSkillPromptProviderTest.java index 058747e8e..b0158ca2a 100644 --- a/agentscope-core/src/test/java/io/agentscope/core/skill/AgentSkillPromptProviderTest.java +++ b/agentscope-core/src/test/java/io/agentscope/core/skill/AgentSkillPromptProviderTest.java @@ -199,7 +199,7 @@ void testCodeExecutionSectionMentionsAbsolutePaths(@TempDir Path tempDir) { String prompt = provider.getSkillSystemPrompt(); assertTrue(prompt.contains("absolute paths")); - assertTrue(prompt.contains("existing scripts")); + assertTrue(prompt.contains("existing Python script")); } @Test diff --git a/agentscope-core/src/test/resources/e2e-skills/data-report/scripts/summarize.py b/agentscope-core/src/test/resources/e2e-skills/data-report/scripts/summarize.py index 68f37420e..478050b5b 100644 --- a/agentscope-core/src/test/resources/e2e-skills/data-report/scripts/summarize.py +++ b/agentscope-core/src/test/resources/e2e-skills/data-report/scripts/summarize.py @@ -1,4 +1,18 @@ #!/usr/bin/env python3 +# Copyright 2024-2026 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Compute descriptive statistics for CSV numeric columns and output a Markdown report.""" import argparse import csv @@ -48,4 +62,4 @@ def summarize(input_path: str, output_path: str | None) -> None: p.add_argument("--input", required=True, help="Input CSV file") p.add_argument("--output", help="Output Markdown file (stdout if omitted)") args = p.parse_args() - summarize(args.input, args.output) + summarize(args.input, args.output) \ No newline at end of file diff --git a/agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/csv_to_json.py b/agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/csv_to_json.py index 7bb255b6d..c626288a3 100644 --- a/agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/csv_to_json.py +++ b/agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/csv_to_json.py @@ -1,4 +1,18 @@ #!/usr/bin/env python3 +# Copyright 2024-2026 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Convert a CSV file to a JSON array.""" import argparse import csv @@ -23,4 +37,4 @@ def csv_to_json(input_path: str, output_path: str | None) -> None: p.add_argument("--input", required=True, help="Input CSV file") p.add_argument("--output", help="Output JSON file (stdout if omitted)") args = p.parse_args() - csv_to_json(args.input, args.output) + csv_to_json(args.input, args.output) \ No newline at end of file diff --git a/agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/json_to_csv.py b/agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/json_to_csv.py index fefa62e85..a568e316d 100644 --- a/agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/json_to_csv.py +++ b/agentscope-core/src/test/resources/e2e-skills/data-transform/scripts/json_to_csv.py @@ -1,4 +1,18 @@ #!/usr/bin/env python3 +# Copyright 2024-2026 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Flatten a JSON array to CSV.""" import argparse import csv @@ -30,4 +44,4 @@ def json_to_csv(input_path: str, output_path: str | None) -> None: p.add_argument("--input", required=True, help="Input JSON file") p.add_argument("--output", help="Output CSV file (stdout if omitted)") args = p.parse_args() - json_to_csv(args.input, args.output) + json_to_csv(args.input, args.output) \ No newline at end of file diff --git a/agentscope-core/src/test/resources/e2e-skills/git-changelog/scripts/generate_changelog.py b/agentscope-core/src/test/resources/e2e-skills/git-changelog/scripts/generate_changelog.py index 49bd3d93c..fd2c811e4 100644 --- a/agentscope-core/src/test/resources/e2e-skills/git-changelog/scripts/generate_changelog.py +++ b/agentscope-core/src/test/resources/e2e-skills/git-changelog/scripts/generate_changelog.py @@ -1,4 +1,18 @@ #!/usr/bin/env python3 +# Copyright 2024-2026 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Generate a grouped Markdown changelog from git commit history.""" import argparse import re @@ -75,4 +89,4 @@ def generate_changelog(from_ref: str, to_ref: str, output_path: str | None) -> N p.add_argument("--to", dest="to_ref", default="HEAD", help="End ref (default: HEAD)") p.add_argument("--output", help="Output Markdown file (stdout if omitted)") args = p.parse_args() - generate_changelog(args.from_ref, args.to_ref, args.output) + generate_changelog(args.from_ref, args.to_ref, args.output) \ No newline at end of file diff --git a/agentscope-core/src/test/resources/e2e-skills/image-resize/scripts/resize.py b/agentscope-core/src/test/resources/e2e-skills/image-resize/scripts/resize.py index ee299eea9..6bb88f312 100644 --- a/agentscope-core/src/test/resources/e2e-skills/image-resize/scripts/resize.py +++ b/agentscope-core/src/test/resources/e2e-skills/image-resize/scripts/resize.py @@ -1,4 +1,18 @@ #!/usr/bin/env python3 +# Copyright 2024-2026 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Resize and optionally compress image files using Pillow.""" import argparse import os @@ -48,4 +62,4 @@ def resize_images(input_path: str, output_dir: str, width: int, height: int | No p.add_argument("--height", type=int, help="Target height (maintains aspect ratio if omitted)") p.add_argument("--output", required=True, help="Output directory") args = p.parse_args() - resize_images(args.input, args.output, args.width, args.height) + resize_images(args.input, args.output, args.width, args.height) \ No newline at end of file diff --git a/agentscope-core/src/test/resources/e2e-skills/log-parser/scripts/extract_errors.py b/agentscope-core/src/test/resources/e2e-skills/log-parser/scripts/extract_errors.py index 457dc47e0..e2a80c227 100644 --- a/agentscope-core/src/test/resources/e2e-skills/log-parser/scripts/extract_errors.py +++ b/agentscope-core/src/test/resources/e2e-skills/log-parser/scripts/extract_errors.py @@ -1,4 +1,18 @@ #!/usr/bin/env python3 +# Copyright 2024-2026 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + """Extract ERROR and WARN lines from application log files.""" import argparse import json @@ -53,4 +67,4 @@ def extract_errors(input_path: str, output_path: str | None, level: str) -> None "--level", default="ALL", choices=["ALL", "ERROR", "WARN"], help="Filter by level" ) args = p.parse_args() - extract_errors(args.input, args.output, args.level) + extract_errors(args.input, args.output, args.level) \ No newline at end of file From cf64ac5922137a77cf00fe63e1dbd041ad501bf9 Mon Sep 17 00:00:00 2001 From: fang-tech Date: Sat, 28 Mar 2026 20:55:01 +0800 Subject: [PATCH 4/6] fix(skill): fix test assertion wording and add license headers to benchmark scripts - Fix AgentSkillPromptProviderTest: update assertion from "existing scripts" to "existing Python script" to match actual prompt template text - Add Apache 2.0 license headers to all 6 e2e benchmark Python scripts Co-Authored-By: Claude Sonnet 4.6 (1M context) --- .../java/io/agentscope/core/skill/AgentSkillPromptProvider.java | 2 +- .../io/agentscope/core/skill/AgentSkillPromptProviderTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java b/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java index 0f6f56f26..59d4d5b02 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java +++ b/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java @@ -95,7 +95,7 @@ public class AgentSkillPromptProvider { # Explore what scripts are available for a skill execute_shell_command(command="ls %s/data-analysis_builtin/scripts/") - # Run an existing Python script with absolute path + # Run an existing script with absolute path execute_shell_command(command="python3 %s/data-analysis_builtin/scripts/analyze.py") """; diff --git a/agentscope-core/src/test/java/io/agentscope/core/skill/AgentSkillPromptProviderTest.java b/agentscope-core/src/test/java/io/agentscope/core/skill/AgentSkillPromptProviderTest.java index b0158ca2a..389e2d1da 100644 --- a/agentscope-core/src/test/java/io/agentscope/core/skill/AgentSkillPromptProviderTest.java +++ b/agentscope-core/src/test/java/io/agentscope/core/skill/AgentSkillPromptProviderTest.java @@ -199,7 +199,7 @@ void testCodeExecutionSectionMentionsAbsolutePaths(@TempDir Path tempDir) { String prompt = provider.getSkillSystemPrompt(); assertTrue(prompt.contains("absolute paths")); - assertTrue(prompt.contains("existing Python script")); + assertTrue(prompt.contains("existing script")); } @Test From 1a13dca5013b27054b90ad8baec82998eea83bb2 Mon Sep 17 00:00:00 2001 From: fang-tech Date: Sat, 28 Mar 2026 21:40:38 +0800 Subject: [PATCH 5/6] apply copilot suggestions --- .../core/skill/AgentSkillPromptProvider.java | 28 +++++++++++++++---- .../io/agentscope/core/skill/SkillBox.java | 7 +++-- .../io/agentscope/core/e2e/SkillE2ETest.java | 21 +++++++++----- docs/en/task/agent-skill.md | 2 +- docs/zh/task/agent-skill.md | 2 +- 5 files changed, 43 insertions(+), 17 deletions(-) diff --git a/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java b/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java index 59d4d5b02..3593b8b9d 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java +++ b/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java @@ -33,7 +33,7 @@ public class AgentSkillPromptProvider { private final SkillRegistry skillRegistry; private final String instruction; private final String template; - private boolean codeExecutionEable; + private boolean codeExecutionEnabled; private String uploadDir; private String codeExecutionInstruction; @@ -64,8 +64,7 @@ public class AgentSkillPromptProvider { """; - // First %s = uploadDir absolute path (description), Second %s = uploadDir absolute path - // (example paths) + // Every %s placeholder in the template will be replaced with the uploadDir absolute path public static final String DEFAULT_CODE_EXECUTION_INSTRUCTION = """ @@ -172,21 +171,40 @@ public String getSkillSystemPrompt() { codeExecutionInstruction != null ? codeExecutionInstruction : DEFAULT_CODE_EXECUTION_INSTRUCTION; - sb.append( - String.format(template, uploadDir, uploadDir, uploadDir, uploadDir, uploadDir)); + sb.append(template.replace("%s", uploadDir)); } return sb.toString(); } + /** + * Sets whether code execution instructions are included in the skill system prompt. + * + * @param codeExecutionEable {@code true} to append code execution instructions + */ public void setCodeExecutionEnable(boolean codeExecutionEable) { this.codeExecutionEable = codeExecutionEable; } + /** + * Sets the upload directory whose absolute path replaces every {@code %s} + * placeholder in the code execution instruction template. + * + * @param uploadDir the upload directory path, or {@code null} to disable path substitution + */ public void setUploadDir(Path uploadDir) { this.uploadDir = uploadDir != null ? uploadDir.toAbsolutePath().toString() : null; } + /** + * Sets a custom code execution instruction template. + * + *

Every {@code %s} placeholder in the template will be replaced with + * the {@code uploadDir} absolute path. Pass {@code null} or blank to + * fall back to {@link #DEFAULT_CODE_EXECUTION_INSTRUCTION}. + * + * @param codeExecutionInstruction the custom template, or {@code null}/blank for default + */ public void setCodeExecutionInstruction(String codeExecutionInstruction) { this.codeExecutionInstruction = codeExecutionInstruction == null || codeExecutionInstruction.isBlank() diff --git a/agentscope-core/src/main/java/io/agentscope/core/skill/SkillBox.java b/agentscope-core/src/main/java/io/agentscope/core/skill/SkillBox.java index 9dcbeaa06..33f6d007f 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/skill/SkillBox.java +++ b/agentscope-core/src/main/java/io/agentscope/core/skill/SkillBox.java @@ -987,8 +987,8 @@ public CodeExecutionBuilder withWrite() { * Set a custom code execution instruction for the system prompt. * *

The instruction is appended to the skill system prompt when code execution is enabled. - * Use {@code %s} as a placeholder for the upload directory absolute path — it will be - * substituted up to 5 times (for the description line and example paths). + * Use {@code %s} as a placeholder for the upload directory absolute path — every + * occurrence will be replaced with the actual path. * *

Pass {@code null} or blank to use the default instruction. * @@ -1114,7 +1114,8 @@ public void enable() { enableRead, enableWrite); - skillBox.skillPromptProvider.setCodeExecutionEnable(true); + boolean injectCodeExecutionPrompt = shellEnabled || codeExecutionInstruction != null; + skillBox.skillPromptProvider.setCodeExecutionEnable(injectCodeExecutionPrompt); skillBox.skillPromptProvider.setCodeExecutionInstruction(codeExecutionInstruction); } diff --git a/agentscope-core/src/test/java/io/agentscope/core/e2e/SkillE2ETest.java b/agentscope-core/src/test/java/io/agentscope/core/e2e/SkillE2ETest.java index 545e6c206..8171d92eb 100644 --- a/agentscope-core/src/test/java/io/agentscope/core/e2e/SkillE2ETest.java +++ b/agentscope-core/src/test/java/io/agentscope/core/e2e/SkillE2ETest.java @@ -387,8 +387,6 @@ private void runCodeExecutionRecallTest( capturedCommand.set(command); if (command.contains(expectedScriptAbsPath)) { calledCorrectScript.set(true); - throw new RuntimeException( - "Script reference detected — test passed"); } return false; }); @@ -398,18 +396,27 @@ private void runCodeExecutionRecallTest( .withShell(interceptingShell) .enable(); + Hook earlyExitHook = + new Hook() { + @Override + public Mono onEvent(T event) { + if (event instanceof PostActingEvent postActing + && calledCorrectScript.get()) { + postActing.stopAgent(); + } + return Mono.just(event); + } + }; + ReActAgent agent = provider.createAgentBuilder("CodeExecAgent-" + skillName, toolkit) .memory(new InMemoryMemory()) .maxIters(6) .skillBox(skillBox) + .hook(earlyExitHook) .build(); - try { - agent.call(TestUtils.createUserMessage("User", prompt)).block(TIMEOUT); - } catch (Exception e) { - // RuntimeException thrown by callback on successful detection is expected - } + agent.call(TestUtils.createUserMessage("User", prompt)).block(TIMEOUT); // Print tool call trace for diagnosis agent.getMemory().getMessages().stream() diff --git a/docs/en/task/agent-skill.md b/docs/en/task/agent-skill.md index 3b94bd6d6..6bf8e7d90 100644 --- a/docs/en/task/agent-skill.md +++ b/docs/en/task/agent-skill.md @@ -348,7 +348,7 @@ When SkillBox injects a system prompt into the Agent, it generates a description When code execution is enabled, the section appended after `` can also be customized via `.codeExecutionInstruction()`: -- **`codeExecutionInstruction`**: Template for the code execution section; `%s` placeholders are replaced with the `uploadDir` absolute path (up to 5 substitutions). Passing `null` or blank uses the built-in default. +- **`codeExecutionInstruction`**: Template for the code execution section; every `%s` placeholder will be replaced with the `uploadDir` absolute path. Passing `null` or blank uses the built-in default. Passing `null` or a blank string for any of these uses the built-in default. diff --git a/docs/zh/task/agent-skill.md b/docs/zh/task/agent-skill.md index 43d51600c..7b96a6161 100644 --- a/docs/zh/task/agent-skill.md +++ b/docs/zh/task/agent-skill.md @@ -342,7 +342,7 @@ SkillBox 在注入给 Agent 的系统提示词中,会为每个已注册的 Skill 开启代码执行后,还可通过 `.codeExecutionInstruction()` 自定义追加在 `` 之后的代码执行说明段落: -- **`codeExecutionInstruction`**: 代码执行说明模板,`%s` 占位符会被替换为 `uploadDir` 的绝对路径(最多替换 5 次)。传 `null` 或空字符串时使用内置默认值 +- **`codeExecutionInstruction`**: 代码执行说明模板,所有 `%s` 占位符都会被替换为 `uploadDir` 的绝对路径。传 `null` 或空字符串时使用内置默认值 三者传 `null` 或空字符串时均使用内置默认值。 From e69294d68f0eb2e903d70ef48e51a1699f2858ed Mon Sep 17 00:00:00 2001 From: fang-tech Date: Sat, 28 Mar 2026 21:40:38 +0800 Subject: [PATCH 6/6] apply copilot suggestions --- .../agentscope/core/skill/AgentSkillPromptProvider.java | 8 ++++---- .../test/java/io/agentscope/core/skill/SkillBoxTest.java | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java b/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java index 3593b8b9d..a7e6a6625 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java +++ b/agentscope-core/src/main/java/io/agentscope/core/skill/AgentSkillPromptProvider.java @@ -166,7 +166,7 @@ public String getSkillSystemPrompt() { sb.append(""); // Conditionally append code execution instructions - if (codeExecutionEable && uploadDir != null) { + if (codeExecutionEnabled && uploadDir != null) { String template = codeExecutionInstruction != null ? codeExecutionInstruction @@ -180,10 +180,10 @@ public String getSkillSystemPrompt() { /** * Sets whether code execution instructions are included in the skill system prompt. * - * @param codeExecutionEable {@code true} to append code execution instructions + * @param codeExecutionEnabled {@code true} to append code execution instructions */ - public void setCodeExecutionEnable(boolean codeExecutionEable) { - this.codeExecutionEable = codeExecutionEable; + public void setCodeExecutionEnable(boolean codeExecutionEnabled) { + this.codeExecutionEnabled = codeExecutionEnabled; } /** diff --git a/agentscope-core/src/test/java/io/agentscope/core/skill/SkillBoxTest.java b/agentscope-core/src/test/java/io/agentscope/core/skill/SkillBoxTest.java index c7e76f07e..cc97eb46d 100644 --- a/agentscope-core/src/test/java/io/agentscope/core/skill/SkillBoxTest.java +++ b/agentscope-core/src/test/java/io/agentscope/core/skill/SkillBoxTest.java @@ -1015,7 +1015,7 @@ void testPromptInstructsAbsolutePathsAndExistingScripts() { String prompt = skillBox.getSkillPrompt(); assertTrue(prompt.contains("absolute paths")); - assertTrue(prompt.contains("existing scripts")); + assertTrue(prompt.contains("existing script")); } @Test