diff --git a/.github/actions/strands-agent-runner/action.yml b/.github/actions/strands-agent-runner/action.yml index 6d4c2d7fb..d0e93effe 100644 --- a/.github/actions/strands-agent-runner/action.yml +++ b/.github/actions/strands-agent-runner/action.yml @@ -149,7 +149,7 @@ runs: STRANDS_TOOL_CONSOLE_MODE: 'enabled' BYPASS_TOOL_CONSENT: 'true' run: | - uv run --no-project ${{ runner.temp }}/strands-agent-runner/.github/scripts/python/agent_runner.py "$INPUT_TASK" + uv run --no-project ${{ runner.temp }}/strands-agent-runner/.github/scripts/python/agent_runner.py - name: Capture repository state shell: bash diff --git a/.github/agent-sops/task-release-notes.sop.md b/.github/agent-sops/task-release-notes.sop.md index 5f024da82..8434b9de5 100644 --- a/.github/agent-sops/task-release-notes.sop.md +++ b/.github/agent-sops/task-release-notes.sop.md @@ -8,6 +8,22 @@ You analyze merged pull requests between two git references (tags or branches), **Important**: You are executing in an ephemeral environment. Any files you create (test files, notes, etc.) will be discarded after execution. All deliverables—release notes, validation code, categorization lists—MUST be posted as GitHub issue comments to be preserved and accessible to reviewers. +## Key Principles + +These principles apply throughout the entire workflow and are referenced by name in later sections. + +### Principle 1: Ephemeral Environment +You are executing in an ephemeral environment. All deliverables MUST be posted as GitHub issue comments to be preserved. + +### Principle 2: PR Descriptions May Be Stale +PR descriptions are written at PR creation and may become outdated after code review. Reviewers often request structural changes, API modifications, or feature adjustments that are implemented but NOT reflected in the original description. You MUST cross-reference descriptions with review comments and treat merged code as the source of truth. + +### Principle 3: Validation Is Mandatory +You MUST attempt to validate EVERY code example with behavioral tests. The engineer review fallback is only for cases where you have genuinely tried and failed with documented evidence. + +### Principle 4: Never Remove Features +You MUST NOT remove a feature from release notes because validation failed. Always include a code sample—either validated or marked for engineer review. + ## Steps ### 1. Setup and Input Processing @@ -62,10 +78,10 @@ For each PR identified (from release or API query), fetch additional metadata ne - You MUST retrieve additional metadata for PRs being considered for Major Features or Major Bug Fixes: - PR description/body (essential for understanding the change) - PR labels (if any) + - PR review comments and conversation threads (per **Principle 2**) - You SHOULD retrieve for Major Feature candidates: - Files changed in the PR (to find code examples) -- You MAY retrieve: - - PR review comments if helpful for understanding the change +- You MUST retrieve PR review comments for Major Feature and Major Bug Fix candidates to identify post-description changes - You SHOULD minimize API calls by only fetching detailed metadata for PRs that appear significant based on title/prefix - You MUST track this data for use in categorization and release notes generation @@ -89,18 +105,24 @@ Extract categorization signals from PR titles using conventional commit prefixes - You SHOULD record the prefix-based category for each PR - You MAY encounter PRs without conventional commit prefixes -#### 2.2 Analyze PR Descriptions +#### 2.2 Analyze PR Descriptions and Review Comments Use LLM analysis to understand the significance and user impact of each change. **Constraints:** - You MUST read and analyze the PR description for each PR +- Per **Principle 2**, you MUST also review PR comments and review threads to identify changes made after the initial description: + - Look for reviewer comments requesting changes to the implementation + - Look for author responses confirming changes were made + - Look for "LGTM" or approval comments that reference specific modifications + - Pay special attention to comments about API changes, renamed methods, or restructured code +- You MUST treat the actual merged code as the source of truth when descriptions conflict with review feedback - You MUST assess the user-facing impact of the change: - Does it introduce new functionality users will interact with? - Does it fix a bug that users experienced? - Is it purely internal with no user-visible changes? - You MUST identify if the change introduces breaking changes -- You SHOULD identify if the PR includes code examples in its description +- You SHOULD identify if the PR includes code examples in its description (but verify they match the final implementation) - You SHOULD note any links to documentation or related issues - You MAY consider the size and complexity of the change @@ -152,6 +174,10 @@ Present the categorized PRs to the user for review and confirmation. - You MUST wait for user confirmation or recategorization before proceeding - You SHOULD update your categorization based on user feedback - You MAY iterate on categorization if the user requests changes +- When the user promotes a PR to "Major Features" that was not previously in that category: + - You MUST perform Step 3 (Code Snippet Extraction) for the newly promoted PR + - You MUST perform Step 4 (Code Validation) for any code snippets extracted or generated + - You MUST include the validation code for newly promoted features in the Validation Comment (Step 6.1) ### 3. Code Snippet Extraction and Generation @@ -163,12 +189,16 @@ Search merged PRs for existing code that demonstrates the new feature. **Constraints:** - You MUST search each Major Feature PR for existing code examples in: - - Test files (especially integration tests or example tests) + - Test files (especially integration tests or example tests) - these are most reliable as they reflect the final implementation - Example applications or scripts in `examples/` directory - - Code snippets in the PR description + - Code snippets in the PR description (but verify per **Principle 2**) - Documentation updates that include code examples - README updates with usage examples -- You MUST prioritize test files that show real usage of the feature +- You MUST cross-reference any examples from PR descriptions with: + - Review comments that may have requested API changes + - The actual merged code to ensure the example is still accurate + - Test files which reflect the working implementation +- You MUST prioritize test files that show real usage of the feature (these are validated against the final code) - You SHOULD look for the simplest, most focused examples - You SHOULD prefer examples that are already validated (from test files) - You MAY examine multiple PRs if a feature spans several PRs @@ -208,60 +238,178 @@ When existing examples are insufficient, generate new code snippets. ### 4. Code Validation -**Note**: This phase is REQUIRED for all code snippets (extracted or generated) that will appear in Major Features sections. Validation must occur AFTER snippets have been extracted or generated in Step 3. +**Note**: This phase is REQUIRED for all code snippets (extracted or generated) that will appear in Major Features sections. Per **Principle 3**, you MUST attempt validation for every example. -#### 4.1 Create Temporary Test Files +#### 4.1 Validation Requirements -Create temporary test files to validate the code snippets. +Validation tests MUST verify the actual behavior of the feature, not just syntax correctness. A test that only checks whether code parses or imports succeed is NOT valid validation. + +**Available Testing Resources:** +- **Amazon Bedrock**: You have access to Bedrock models for testing. Use Bedrock when a feature requires a real model provider. +- **Project test fixtures**: The project includes mocked model providers and test utilities in `tests/fixtures/` +- **Integration test patterns**: Examine `tests_integ/` for patterns that test real model interactions + +**Features that genuinely cannot be validated (rare):** +- Features requiring paid third-party API credentials with no mock option AND no Bedrock alternative +- Features requiring specific hardware (GPU, TPU) +- Features requiring live network access to specific external services that cannot be mocked **Constraints:** - You MUST create a temporary test file for each code snippet - You MUST place test files in an appropriate test directory based on the project structure - You MUST include all necessary imports and setup code in the test file - You MUST wrap the snippet in a proper test case +- You MUST include assertions that verify the feature's actual behavior: + - Assert that outputs match expected values + - Assert that state changes occur as expected + - Assert that callbacks/hooks are invoked correctly + - Assert that return types and structures are correct +- You MUST NOT write tests that only verify: + - Code parses without syntax errors + - Imports succeed + - Objects can be instantiated without checking behavior + - Functions can be called without checking results - You SHOULD use the project's testing framework -- You MAY need to mock dependencies or setup test fixtures +- You SHOULD mock external dependencies (APIs, databases) but still verify behavior with mocks +- You MAY need to setup test fixtures that enable behavioral verification - You MAY include additional test code that doesn't appear in the release notes -**Example test file structure** (language-specific format will vary): +**Example of GOOD validation** (verifies behavior): +```python +def test_structured_output_validation(): + """Verify that structured output actually validates against the schema.""" + from pydantic import BaseModel + + class UserResponse(BaseModel): + name: str + age: int + + agent = Agent(model=mock_model, output_schema=UserResponse) + result = agent("Get user info") + + # Behavioral assertions - verify the feature works + assert isinstance(result.output, UserResponse) + assert hasattr(result.output, 'name') + assert hasattr(result.output, 'age') + assert isinstance(result.output.age, int) ``` -# Test structure depends on the project's testing framework -# Include necessary imports, setup, and the snippet being validated -# Add assertions to verify the code works correctly + +**Example of BAD validation** (only verifies syntax): +```python +def test_structured_output_syntax(): + """BAD: This only verifies the code runs without errors.""" + from pydantic import BaseModel + + class UserResponse(BaseModel): + name: str + age: int + + # BAD: No assertions about behavior + agent = Agent(model=mock_model, output_schema=UserResponse) + # BAD: Just calling without checking results proves nothing + agent("Get user info") ``` -#### 4.2 Run Validation Tests +#### 4.2 Validation Workflow -Execute tests to ensure code snippets are valid and functional. +For each Major Feature, follow this workflow in order: + +1. **Write a test file** with behavioral assertions +2. **Run the test** using the project's test framework +3. **If it fails**, try these approaches in order: + - Try using Bedrock instead of other model providers + - Try installing missing dependencies + - Try mocking external services + - Try using project test fixtures (`tests/fixtures/mocked_model_provider.py`) + - Try simplifying the example +4. **Document each attempt** and its result in the Validation Comment +5. **Only after documented failures** can you use the engineer review fallback **Constraints:** - You MUST run the appropriate test command for the project (e.g., `npm test`, `pytest`, `go test`) - You MUST verify that the test passes successfully +- You MUST verify that assertions actually executed (not skipped or short-circuited) - You MUST check that the code compiles without errors in compiled languages +- You MUST ensure tests include meaningful assertions about feature behavior - You SHOULD run type checking if applicable (e.g., `npm run type-check`, `mypy`) +- You SHOULD review test output to confirm behavioral assertions passed - You MAY need to adjust imports or setup code if tests fail -- You MAY need to install additional dependencies if required -**Fallback validation** (if test execution fails or is not possible): -- You MUST at minimum validate syntax using the appropriate language tools -- You MUST ensure the code is syntactically correct -- You MUST verify all referenced types and modules exist +**Installing Dependencies:** +- You MUST attempt to install missing dependencies when tests fail due to import errors +- You SHOULD check the project's `pyproject.toml`, `package.json`, or equivalent for optional dependency groups +- You SHOULD use the project's package manager to install dependencies (e.g., `pip install`, `npm install`, `hatch`) +- For Python projects with optional extras, try: `pip install -e ".[extra_name]"` or `pip install package_name` +- You SHOULD only fall back to mocking if the dependency cannot be installed (e.g., requires paid API keys, proprietary software) + +**Example of mocking external dependencies:** +```python +def test_custom_http_client(): + """Verify custom HTTP client is passed to the provider.""" + from unittest.mock import Mock, patch + + custom_client = Mock() + + with patch('strands.models.openai.OpenAI') as mock_openai: + from strands.models.openai import OpenAIModel + model = OpenAIModel(http_client=custom_client) + + # Verify the custom client was passed + mock_openai.assert_called_once() + call_kwargs = mock_openai.call_args[1] + assert call_kwargs.get('http_client') == custom_client +``` + +#### 4.3 Engineer Review Fallback -#### 4.3 Handle Validation Failures +When validation genuinely fails after documented attempts, use this fallback. Per **Principle 4**, you MUST still include the feature with a code sample. -Address any validation failures before including snippets in release notes. +**Required proof before using this fallback:** +1. Created an actual test file (show the code in the validation comment) +2. Ran the test and received an actual error (show the error message) +3. Tried at least ONE alternative approach (Bedrock, mocking, simplified example) +4. Documented each attempt and its failure reason **Constraints:** -- You MUST NOT include unvalidated code snippets in release notes -- You MUST revise the code snippet if validation fails -- You MUST re-run validation after making changes -- You SHOULD examine the actual implementation in the PR if generated code fails -- You SHOULD simplify the example if complexity is causing validation issues -- You MAY extract a different example from the PR if the current one cannot be validated -- You MAY seek clarification if you cannot create a valid example -- You MUST preserve the test file content to include in the GitHub issue comment (Step 6.2) +- You MUST NOT mark examples as needing validation without actually attempting validation first +- You MUST NOT use vague reasons like "complex setup required" - be specific about what you tried and what error you got +- You MUST show your test code and error messages in the Validation Comment +- You MUST try Bedrock for any feature that works with multiple model providers before giving up +- You MUST try mocking for provider-specific features before giving up +- You MUST document all validation attempts (successful AND failed) in the Validation Comment +- You MUST preserve the test file content to include in the GitHub issue comment (Step 6.1) +- You MUST note in the validation comment what specific behavior each test verifies - You MAY delete temporary test files after capturing their content, as the environment is ephemeral +**Process when validation genuinely fails:** +1. **Extract a code sample from the PR** - Use code from: + - The PR description's code examples + - Test files added in the PR + - The actual implementation (simplified for readability) + - Documentation updates in the PR +2. **Include the sample in the release notes** with a clear callout that it needs engineer validation +3. **Document the validation attempts and failures** in the Validation Comment (Step 6.1) + +**Format for unvalidated code examples:** +```markdown +### Feature Name - [PR#123](link) + +Description of the feature and its impact. + +\`\`\`python +# ⚠️ NEEDS ENGINEER VALIDATION +# Validation attempted: [describe test created and error received] +# Alternative attempts: [what else you tried and why it failed] + +# Code sample extracted from PR description/tests +from strands import Agent +from strands.models.openai import OpenAIModel + +model = OpenAIModel(http_client=custom_client) +agent = Agent(model=model) +\`\`\` +``` + ### 5. Release Notes Formatting #### 5.1 Format Major Features Section @@ -289,9 +437,16 @@ Create the Major Features section with concise descriptions and code examples. Agents can now validate responses against predefined schemas with configurable retry behavior for non-conforming outputs. -\`\`\`[language] -# Code example in the project's programming language -# Show the feature in action with clear, focused code +\`\`\`python +from strands import Agent +from pydantic import BaseModel + +class Response(BaseModel): + answer: str + +agent = Agent(output_schema=Response) +result = agent("What is 2+2?") +print(result.output.answer) \`\`\` See the [Structured Output docs](https://docs.example.com/structured-output) for configuration options. @@ -336,63 +491,82 @@ Add a horizontal rule to separate your content from GitHub's auto-generated sect - This visually separates your curated content from GitHub's auto-generated "What's Changed" and "New Contributors" sections - You MUST NOT include a "Full Changelog" link—GitHub adds this automatically -**Example format**: -```markdown -## Major Bug Fixes - -- **Critical Fix** - [PR#124](https://github.com/owner/repo/pull/124) - Description of what was fixed. - ---- -``` - ### 6. Output Delivery -**Critical**: You are running in an ephemeral environment. All files created during execution (test files, temporary notes, etc.) will be deleted when the workflow completes. You MUST post all deliverables as GitHub issue comments—this is the only way to preserve your work and make it accessible to reviewers. +Per **Principle 1**, all deliverables must be posted as GitHub issue comments. -**Comment Structure**: Post exactly two comments on the GitHub issue: +**Comment Structure**: Post exactly three comments on the GitHub issue: 1. **Validation Comment** (first): Contains all validation code for all features in one batched comment 2. **Release Notes Comment** (second): Contains the final formatted release notes +3. **Exclusions Comment** (third): Documents any features that were excluded and why + +This ordering allows reviewers to see the validation evidence, review the release notes, and understand any exclusion decisions. -This ordering allows reviewers to see the validation evidence before reviewing the release notes. +**Iteration Comments**: If the user requests changes after the initial comments are posted: +- Post additional validation comments for any re-validated code +- Post updated release notes as new comments (do not edit previous comments) +- This creates an audit trail of changes and validations #### 6.1 Post Validation Code Comment Batch all validation code into a single GitHub issue comment. **Constraints:** -- You MUST post ONE comment containing ALL validation code for ALL features +- You MUST post ONE comment containing validation attempts for ALL Major Features +- You MUST show test code for EVERY feature - both successful and failed attempts - You MUST NOT post separate comments for each feature's validation - You MUST post this comment BEFORE the release notes comment - You MUST include all test files created during validation (Step 4) in this single comment +- You MUST document what specific behavior each test verifies (not just "validates the code works") - You MUST NOT reference local file paths—the ephemeral environment will be destroyed - You MUST clearly label this comment as "Code Validation Tests" -- You MUST include a note explaining that this code was used to validate the snippets in the release notes -- You SHOULD use collapsible `
` sections to organize validation code by feature: - ```markdown - ## Code Validation Tests +- You SHOULD use collapsible `
` sections to organize validation code by feature +- You SHOULD include a brief description of what behavior is being verified for each test - The following test code was used to validate the code examples in the release notes. +**Format:** +```markdown +## Code Validation Tests + +The following test code was used to validate the code examples in the release notes. -
- Validation: Feature Name 1 +
+✅ Validated: Feature Name 1 - \`\`\`typescript - [Full test file for feature 1] - \`\`\` +**Behavior verified:** This test confirms that the new `output_schema` parameter causes the agent to return a validated Pydantic model instance with the correct field types. -
+\`\`\`python +[Full test file for feature 1 with behavioral assertions] +\`\`\` -
- Validation: Feature Name 2 +**Test output:** PASSED - \`\`\`typescript - [Full test file for feature 2] - \`\`\` +
-
- ``` -- This allows reviewers to copy and run the validation code themselves +
+⚠️ Could Not Validate: Feature Name 2 + +**Attempt 1: Direct test with mocked model** +\`\`\`python +[Test code that was attempted] +\`\`\` +**Error received:** +\`\`\` +[Actual error message from running the test] +\`\`\` + +**Attempt 2: Test with Bedrock** +\`\`\`python +[Alternative test code attempted] +\`\`\` +**Error received:** +\`\`\` +[Actual error message] +\`\`\` + +**Conclusion:** Could not validate because [specific reason based on actual errors]. Code sample in release notes extracted from PR description. + +
+``` #### 6.2 Post Release Notes Comment @@ -408,95 +582,117 @@ Post the formatted release notes as a single GitHub issue comment. - You MAY use markdown formatting in the comment - If comment posting is deferred, continue with the workflow and note the deferred status -## Examples +#### 6.3 Post Exclusions Comment -### Example 1: Major Features Section with Code +Document any features with unvalidated code samples and any other notable decisions. +**Constraints:** +- You MUST post this comment as the FINAL comment on the GitHub issue +- You MUST include this comment if ANY of the following occurred: + - A Major Feature has an unvalidated code sample (marked for engineer review) + - A feature's scope or description was significantly different from the PR description + - You relied on review comments rather than the PR description to understand a feature +- You MUST clearly explain the reasoning for each unvalidated sample +- You SHOULD include this comment even if all code samples were validated, with a simple note: "All code samples were successfully validated. No engineer review required." +- You MUST NOT skip this comment—it provides critical transparency for reviewers + +**Format:** ```markdown -## Major Features - -### Managed MCP Connections - [PR#895](https://github.com/org/repo/pull/895) - -MCP Connections via ToolProviders allow the Agent to manage connection lifecycles automatically, eliminating the need for manual context managers. This experimental interface simplifies MCP tool integration significantly. +## Release Notes Review Notes -\`\`\`[language] -# Code example in the project's programming language -# Demonstrate the key feature usage -# Keep it focused and concise -\`\`\` +The following items require attention during review: -See the [MCP docs](https://docs.example.com/mcp) for details. +### ⚠️ Features with Unvalidated Code Samples -### Async Streaming for Multi-Agent Systems - [PR#961](https://github.com/org/repo/pull/961) +These features have code samples extracted from PRs but could not be automatically validated. An engineer must verify these examples before publishing: -Multi-agent systems now support async streaming, enabling real-time event streaming from agent teams as they collaborate. +- **PR#123 - Feature Title**: + - Code source: PR description / test files / implementation + - Validation attempted: [what you tried] + - Failure reason: [why it failed, e.g., "requires OpenAI API credentials", "complex multi-service integration"] + - Action needed: Engineer should verify the code sample works as shown -\`\`\`[language] -# Another code example -# Show the feature in action -# Include only essential code -\`\`\` +### Description vs. Implementation Discrepancies +- **PR#101 - Feature Title**: PR description stated [X] but review comments and final implementation show [Y]. Release notes reflect the actual merged behavior. ``` -### Example 2: Major Bug Fixes Section +#### 6.4 Handle User Feedback on Release Notes -```markdown ---- +When the user requests changes to the release notes after they have been posted, re-validate as needed. -## Major Bug Fixes - -- **Guardrails Redaction Fix** - [PR#1072](https://github.com/strands-agents/sdk-python/pull/1072) - Fixed input/output message redaction when `guardrails_trace="enabled_full"`, ensuring sensitive data is properly protected in traces. - -- **Tool Result Block Redaction** - [PR#1080](https://github.com/strands-agents/sdk-python/pull/1080) - Properly redact tool result blocks to prevent conversation corruption when using content filtering or PII redaction. +**Constraints:** +- You MUST re-run validation (Step 4) when the user requests changes that affect code examples: + - Modified code snippets + - New code examples for features that previously had none + - Replacement examples for features +- You MUST perform full extraction (Step 3) and validation (Step 4) when the user requests: + - Adding a new feature to the release notes that wasn't previously included + - Promoting a bug fix to include a code example +- You MUST NOT make changes to code examples without re-validating them +- You MUST post updated validation code as a new comment when re-validation occurs +- You MUST post the revised release notes as a new comment (do not edit previous comments) +- You SHOULD note in the updated release notes comment what changed from the previous version +- You MAY skip re-validation only for changes that do not affect code: + - Wording changes to descriptions + - Fixing typos + - Reordering features + - Removing features (no validation needed for removal) -- **Orphaned Tool Use Fix** - [PR#1123](https://github.com/strands-agents/sdk-python/pull/1123) - Fixed broken conversations caused by orphaned `toolUse` blocks, improving reliability when tools fail or are interrupted. -``` +## Examples -### Example 3: Complete Release Notes Structure +### Example 1: Complete Release Notes ```markdown ## Major Features -### Feature Name - [PR#123](https://github.com/owner/repo/pull/123) +### Managed MCP Connections - [PR#895](https://github.com/org/repo/pull/895) -Description of the feature and its impact. +MCP Connections via ToolProviders allow the Agent to manage connection lifecycles automatically, eliminating the need for manual context managers. This experimental interface simplifies MCP tool integration significantly. + +\`\`\`python +from strands import Agent +from strands.tools import MCPToolProvider -\`\`\`[language] -# Code example demonstrating the feature +provider = MCPToolProvider(server_config) +agent = Agent(tools=[provider]) +result = agent("Use the MCP tools") \`\`\` ---- +See the [MCP docs](https://docs.example.com/mcp) for details. -## Major Bug Fixes +### Custom HTTP Client Support - [PR#1366](https://github.com/org/repo/pull/1366) -- **Critical Fix** - [PR#124](https://github.com/owner/repo/pull/124) - Description of what was fixed and why it matters. +OpenAI model provider now accepts a custom HTTP client, enabling proxy configuration, custom timeouts, and request logging. ---- -``` +\`\`\`python +# ⚠️ NEEDS ENGINEER VALIDATION +# Validation attempted: mocked OpenAI client, received import error +# Alternative attempts: Bedrock (not applicable - OpenAI-specific) -Note: The trailing `---` separates your content from GitHub's auto-generated "What's Changed" and "New Contributors" sections that follow. +from strands.models.openai import OpenAIModel +import httpx -### Example 4: Issue Comment with Release Notes +custom_client = httpx.Client(proxy="http://proxy.example.com:8080") +model = OpenAIModel(client_args={"http_client": custom_client}) +\`\`\` -```markdown -Release notes for v1.15.0: +--- -## Major Features +## Major Bug Fixes -### Managed MCP Connections - [PR#895](https://github.com/strands-agents/sdk-typescript/pull/895) +- **Guardrails Redaction Fix** - [PR#1072](https://github.com/strands-agents/sdk-python/pull/1072) + Fixed input/output message redaction when `guardrails_trace="enabled_full"`, ensuring sensitive data is properly protected in traces. -We've introduced MCP Connections via ToolProviders... +- **Tool Result Block Redaction** - [PR#1080](https://github.com/strands-agents/sdk-python/pull/1080) + Properly redact tool result blocks to prevent conversation corruption when using content filtering or PII redaction. -[... rest of release notes ...] +- **Orphaned Tool Use Fix** - [PR#1123](https://github.com/strands-agents/sdk-python/pull/1123) + Fixed broken conversations caused by orphaned `toolUse` blocks, improving reliability when tools fail or are interrupted. --- ``` -When this content is added to the GitHub release, GitHub will automatically append the "What's Changed" and "New Contributors" sections below the separator. +Note: The trailing `---` separates your content from GitHub's auto-generated "What's Changed" and "New Contributors" sections that follow. ## Troubleshooting @@ -519,14 +715,7 @@ If you encounter GitHub API rate limit errors: ### Code Validation Failures -If code validation fails for a snippet: -1. Review the test output to understand the failure reason -2. Check if the feature requires additional dependencies or setup -3. Examine the actual implementation in the PR to understand correct usage -4. Try simplifying the example to focus on core functionality -5. Consider using a different example from the PR -6. If unable to validate, note the issue in the release notes comment and skip the code example for that feature -7. Leave a comment on the issue noting which features couldn't include validated code examples +Follow the validation workflow in Section 4.2. If all attempts fail, use the engineer review fallback per Section 4.3. Per **Principle 4**, always include a code sample. ### Large PR Sets (>100 PRs) @@ -561,26 +750,23 @@ When GitHub tools or git operations are deferred (GITHUB_WRITE=false): - The operations will be executed after agent completion - Do not retry or attempt alternative approaches for deferred operations -### Unable to Extract Suitable Code Examples +### Stale PR Descriptions -If no suitable code examples can be found or generated for a feature: -1. Examine the PR description more carefully for usage information -2. Look at related documentation changes -3. Consider whether the feature actually needs a code example (some features are self-explanatory) -4. Generate a minimal example based on the API changes, even if you can't fully validate it -5. Mark the example as "conceptual" if validation isn't possible -6. Consider omitting the code example if it would be misleading +Per **Principle 2**: Review PR comments for context on what changed, examine merged code (especially test files), and use test files as the authoritative source for code examples. ## Desired Outcome * Focused release notes highlighting Major Features and Major Bug Fixes with concise descriptions (2-3 sentences, no bullet points) -* Working, validated code examples for all major features +* Code examples for ALL major features - either validated or marked for engineer review +* Validated code examples have passing behavioral tests +* Unvalidated code examples are clearly marked with the engineer validation warning and extracted from PR sources * Well-formatted markdown that renders properly on GitHub * Release notes posted as a comment on the GitHub issue for review +* Review notes comment documenting any features with unvalidated code samples that need engineer attention **Important**: Your generated release notes will be prepended to GitHub's auto-generated release notes. GitHub automatically generates: - "What's Changed" section listing all PRs with authors and links - "New Contributors" section acknowledging first-time contributors - "Full Changelog" comparison link -You should NOT include these sections—focus exclusively on Major Features and Major Bug Fixes that benefit from detailed descriptions and code examples. Minor changes (refactors, docs, tests, chores, etc.) will be covered by GitHub's automatic changelog. \ No newline at end of file +You should NOT include these sections—focus exclusively on Major Features and Major Bug Fixes that benefit from detailed descriptions and code examples. Minor changes (refactors, docs, tests, chores, etc.) will be covered by GitHub's automatic changelog. diff --git a/.github/scripts/javascript/process-input.cjs b/.github/scripts/javascript/process-input.cjs index b7ed29263..d0380a97b 100644 --- a/.github/scripts/javascript/process-input.cjs +++ b/.github/scripts/javascript/process-input.cjs @@ -8,9 +8,10 @@ async function getIssueInfo(github, context, inputs) { const issueId = context.eventName === 'workflow_dispatch' ? inputs.issue_id : context.payload.issue.number.toString(); + const commentBody = context.payload.comment?.body || ''; const command = context.eventName === 'workflow_dispatch' ? inputs.command - : (context.payload.comment.body.match(/^\/strands\s*(.*?)$/m)?.[1]?.trim() || ''); + : (commentBody.startsWith('/strands') ? commentBody.slice('/strands'.length).trim() : ''); console.log(`Event: ${context.eventName}, Issue ID: ${issueId}, Command: "${command}"`); @@ -76,10 +77,29 @@ function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs) const scriptFile = scriptFiles[mode] || scriptFiles['refiner']; const systemPrompt = fs.readFileSync(scriptFile, 'utf8'); + // Extract the user's feedback/instructions after the mode keyword + // e.g., "release-notes Move #123 to Major Features" -> "Move #123 to Major Features" + const modeKeywords = { + 'release-notes': /^(?:release-notes|release notes)\s*/i, + 'implementer': /^implement\s*/i, + 'refiner': /^refine\s*/i + }; + + const modePattern = modeKeywords[mode]; + const userFeedback = modePattern ? command.replace(modePattern, '').trim() : command.trim(); + let prompt = (isPullRequest) ? 'The pull request id is:' : 'The issue id is:'; - prompt += `${issueId}\n${command}\nreview and continue`; + prompt += `${issueId}\n`; + + // If there's substantial user feedback beyond just the command keyword, include it as the main instruction + // Otherwise, use the default "review and continue" for initial triggers + if (userFeedback && userFeedback.length > 0) { + prompt += userFeedback; + } else { + prompt += 'review and continue'; + } return { sessionId, systemPrompt, prompt }; } diff --git a/.github/scripts/python/agent_runner.py b/.github/scripts/python/agent_runner.py index db10ceadb..9d92c2ac4 100644 --- a/.github/scripts/python/agent_runner.py +++ b/.github/scripts/python/agent_runner.py @@ -142,13 +142,12 @@ def run_agent(query: str): def main() -> None: """Main entry point for the agent runner.""" try: - # Read task from command line arguments - if len(sys.argv) < 2: - raise ValueError("Task argument is required") - - task = " ".join(sys.argv[1:]) - if not task.strip(): - raise ValueError("Task cannot be empty") + # Prefer INPUT_TASK env var (avoids shell escaping issues), fall back to CLI args + task = os.getenv("INPUT_TASK", "").strip() + if not task and len(sys.argv) > 1: + task = " ".join(sys.argv[1:]).strip() + if not task: + raise ValueError("Task is required (via INPUT_TASK env var or CLI argument)") print(f"🤖 Running agent with task: {task}") run_agent(task) diff --git a/.github/scripts/python/handoff_to_user.py b/.github/scripts/python/handoff_to_user.py index 07ad331f1..e3c8d1edf 100644 --- a/.github/scripts/python/handoff_to_user.py +++ b/.github/scripts/python/handoff_to_user.py @@ -4,13 +4,29 @@ from strands.types.tools import ToolContext from strands_tools.utils import console_util +from github_tools import add_issue_comment + + @tool(context=True) -def handoff_to_user(message: str, tool_context: ToolContext) -> str: +def handoff_to_user( + message: str, + tool_context: ToolContext, + post_comment: bool, + issue_number: int | None = None, +) -> str: """ - Hand off control to the user with a message. + Hand off control to the user with a message. This stops the agent execution + and waits for the user to respond before continuing. Args: message: The message to give to the user + post_comment: If true, post the message as a comment on the GitHub issue/PR. + Only set this to true when user intervention or feedback is required + before continuing (e.g., clarification needed, approval required, + or a decision must be made). Do not post a comment for simple status updates + or completion messages. If you are asking a question to the user this MUST + be true. + issue_number: The issue or PR number to comment on (required if post_comment is true) Returns: The users response after handing back control @@ -25,6 +41,19 @@ def handoff_to_user(message: str, tool_context: ToolContext) -> str: ) ) + # Post comment to GitHub if requested + if post_comment: + if issue_number is None: + console.print( + Panel( + "Cannot post comment: issue_number is required when post_comment is true", + title="[bold red]Error", + border_style="red", + ) + ) + else: + add_issue_comment(issue_number, message) + request_state = { "stop_event_loop": True } diff --git a/install_output.log b/install_output.log new file mode 100644 index 000000000..4b0217208 --- /dev/null +++ b/install_output.log @@ -0,0 +1,256 @@ +Obtaining file:///home/runner/work/sdk-python/sdk-python + Installing build dependencies: started + Installing build dependencies: finished with status 'done' + Checking if build backend supports build_editable: started + Checking if build backend supports build_editable: finished with status 'done' + Getting requirements to build editable: started + Getting requirements to build editable: finished with status 'done' + Installing backend dependencies: started + Installing backend dependencies: finished with status 'done' + Preparing editable metadata (pyproject.toml): started + Preparing editable metadata (pyproject.toml): finished with status 'done' +Requirement already satisfied: boto3<2.0.0,>=1.26.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from strands-agents==0.1.dev1+g252f896b4) (1.42.32) +Requirement already satisfied: botocore<2.0.0,>=1.29.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from strands-agents==0.1.dev1+g252f896b4) (1.42.32) +Requirement already satisfied: docstring-parser<1.0,>=0.15 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from strands-agents==0.1.dev1+g252f896b4) (0.17.0) +Requirement already satisfied: jsonschema<5.0.0,>=4.0.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from strands-agents==0.1.dev1+g252f896b4) (4.26.0) +Requirement already satisfied: mcp<2.0.0,>=1.11.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from strands-agents==0.1.dev1+g252f896b4) (1.25.0) +Requirement already satisfied: opentelemetry-api<2.0.0,>=1.30.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from strands-agents==0.1.dev1+g252f896b4) (1.39.1) +Requirement already satisfied: opentelemetry-instrumentation-threading<1.00b0,>=0.51b0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from strands-agents==0.1.dev1+g252f896b4) (0.60b1) +Requirement already satisfied: opentelemetry-sdk<2.0.0,>=1.30.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from strands-agents==0.1.dev1+g252f896b4) (1.39.1) +Requirement already satisfied: pydantic<3.0.0,>=2.4.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from strands-agents==0.1.dev1+g252f896b4) (2.12.5) +Requirement already satisfied: typing-extensions<5.0.0,>=4.13.2 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from strands-agents==0.1.dev1+g252f896b4) (4.15.0) +Requirement already satisfied: watchdog<7.0.0,>=6.0.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from strands-agents==0.1.dev1+g252f896b4) (6.0.0) +Collecting commitizen<5.0.0,>=4.4.0 (from strands-agents==0.1.dev1+g252f896b4) + Downloading commitizen-4.12.0-py3-none-any.whl.metadata (13 kB) +Collecting hatch<2.0.0,>=1.0.0 (from strands-agents==0.1.dev1+g252f896b4) + Downloading hatch-1.16.3-py3-none-any.whl.metadata (5.6 kB) +Collecting moto<6.0.0,>=5.1.0 (from strands-agents==0.1.dev1+g252f896b4) + Downloading moto-5.1.20-py3-none-any.whl.metadata (12 kB) +Collecting mypy<2.0.0,>=1.15.0 (from strands-agents==0.1.dev1+g252f896b4) + Downloading mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (2.2 kB) +Collecting pre-commit<4.6.0,>=3.2.0 (from strands-agents==0.1.dev1+g252f896b4) + Downloading pre_commit-4.5.1-py2.py3-none-any.whl.metadata (1.2 kB) +Collecting pytest-asyncio<1.4.0,>=1.0.0 (from strands-agents==0.1.dev1+g252f896b4) + Downloading pytest_asyncio-1.3.0-py3-none-any.whl.metadata (4.1 kB) +Collecting pytest-cov<8.0.0,>=7.0.0 (from strands-agents==0.1.dev1+g252f896b4) + Downloading pytest_cov-7.0.0-py3-none-any.whl.metadata (31 kB) +Collecting pytest-xdist<4.0.0,>=3.0.0 (from strands-agents==0.1.dev1+g252f896b4) + Downloading pytest_xdist-3.8.0-py3-none-any.whl.metadata (3.0 kB) +Collecting pytest<9.0.0,>=8.0.0 (from strands-agents==0.1.dev1+g252f896b4) + Downloading pytest-8.4.2-py3-none-any.whl.metadata (7.7 kB) +Collecting ruff<0.15.0,>=0.13.0 (from strands-agents==0.1.dev1+g252f896b4) + Downloading ruff-0.14.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (26 kB) +Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from boto3<2.0.0,>=1.26.0->strands-agents==0.1.dev1+g252f896b4) (1.0.1) +Requirement already satisfied: s3transfer<0.17.0,>=0.16.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from boto3<2.0.0,>=1.26.0->strands-agents==0.1.dev1+g252f896b4) (0.16.0) +Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from botocore<2.0.0,>=1.29.0->strands-agents==0.1.dev1+g252f896b4) (2.9.0.post0) +Requirement already satisfied: urllib3!=2.2.0,<3,>=1.25.4 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from botocore<2.0.0,>=1.29.0->strands-agents==0.1.dev1+g252f896b4) (2.6.3) +Collecting questionary<3.0,>=2.0 (from commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) + Downloading questionary-2.1.1-py3-none-any.whl.metadata (5.4 kB) +Collecting prompt-toolkit!=3.0.52 (from commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) + Downloading prompt_toolkit-3.0.51-py3-none-any.whl.metadata (6.4 kB) +Collecting decli<1.0,>=0.6.0 (from commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) + Downloading decli-0.6.3-py3-none-any.whl.metadata (17 kB) +Requirement already satisfied: colorama<1.0,>=0.4.1 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) (0.4.6) +Collecting termcolor<4.0.0,>=1.1.0 (from commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) + Downloading termcolor-3.3.0-py3-none-any.whl.metadata (6.5 kB) +Requirement already satisfied: packaging>=19 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) (26.0) +Collecting tomlkit<1.0.0,>=0.8.0 (from commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) + Downloading tomlkit-0.14.0-py3-none-any.whl.metadata (2.8 kB) +Collecting jinja2>=2.10.3 (from commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) + Downloading jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB) +Collecting pyyaml>=3.8 (from commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) + Downloading pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (2.4 kB) +Collecting argcomplete<3.7,>=1.12.1 (from commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) + Downloading argcomplete-3.6.3-py3-none-any.whl.metadata (16 kB) +Requirement already satisfied: charset-normalizer<4,>=2.1.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) (3.4.4) +Collecting deprecated<2,>=1.2.13 (from commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) + Downloading deprecated-1.3.1-py2.py3-none-any.whl.metadata (5.9 kB) +Requirement already satisfied: wrapt<3,>=1.10 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from deprecated<2,>=1.2.13->commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) (1.17.3) +Collecting backports-zstd>=1.0.0 (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading backports_zstd-1.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (6.9 kB) +Requirement already satisfied: click>=8.0.6 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) (8.3.1) +Collecting hatchling>=1.27.0 (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Using cached hatchling-1.28.0-py3-none-any.whl.metadata (3.8 kB) +Requirement already satisfied: httpx>=0.22.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) (0.28.1) +Collecting hyperlink>=21.0.0 (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading hyperlink-21.0.0-py2.py3-none-any.whl.metadata (1.5 kB) +Collecting keyring>=23.5.0 (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading keyring-25.7.0-py3-none-any.whl.metadata (21 kB) +Collecting pexpect~=4.8 (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading pexpect-4.9.0-py2.py3-none-any.whl.metadata (2.5 kB) +Collecting platformdirs>=2.5.0 (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading platformdirs-4.5.1-py3-none-any.whl.metadata (12 kB) +Collecting pyproject-hooks (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading pyproject_hooks-1.2.0-py3-none-any.whl.metadata (1.3 kB) +Requirement already satisfied: rich>=11.2.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) (14.2.0) +Collecting shellingham>=1.4.0 (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading shellingham-1.5.4-py2.py3-none-any.whl.metadata (3.5 kB) +Collecting tomli-w>=1.0 (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading tomli_w-1.2.0-py3-none-any.whl.metadata (5.7 kB) +Collecting userpath~=1.7 (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading userpath-1.9.2-py3-none-any.whl.metadata (3.0 kB) +Collecting uv>=0.5.23 (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading uv-0.9.26-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB) +Collecting virtualenv>=20.26.6 (from hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading virtualenv-20.36.1-py3-none-any.whl.metadata (4.7 kB) +Requirement already satisfied: attrs>=22.2.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from jsonschema<5.0.0,>=4.0.0->strands-agents==0.1.dev1+g252f896b4) (25.4.0) +Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from jsonschema<5.0.0,>=4.0.0->strands-agents==0.1.dev1+g252f896b4) (2025.9.1) +Requirement already satisfied: referencing>=0.28.4 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from jsonschema<5.0.0,>=4.0.0->strands-agents==0.1.dev1+g252f896b4) (0.37.0) +Requirement already satisfied: rpds-py>=0.25.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from jsonschema<5.0.0,>=4.0.0->strands-agents==0.1.dev1+g252f896b4) (0.30.0) +Requirement already satisfied: anyio>=4.5 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from mcp<2.0.0,>=1.11.0->strands-agents==0.1.dev1+g252f896b4) (4.12.1) +Requirement already satisfied: httpx-sse>=0.4 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from mcp<2.0.0,>=1.11.0->strands-agents==0.1.dev1+g252f896b4) (0.4.3) +Requirement already satisfied: pydantic-settings>=2.5.2 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from mcp<2.0.0,>=1.11.0->strands-agents==0.1.dev1+g252f896b4) (2.12.0) +Requirement already satisfied: pyjwt>=2.10.1 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from pyjwt[crypto]>=2.10.1->mcp<2.0.0,>=1.11.0->strands-agents==0.1.dev1+g252f896b4) (2.10.1) +Requirement already satisfied: python-multipart>=0.0.9 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from mcp<2.0.0,>=1.11.0->strands-agents==0.1.dev1+g252f896b4) (0.0.21) +Requirement already satisfied: sse-starlette>=1.6.1 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from mcp<2.0.0,>=1.11.0->strands-agents==0.1.dev1+g252f896b4) (3.2.0) +Requirement already satisfied: starlette>=0.27 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from mcp<2.0.0,>=1.11.0->strands-agents==0.1.dev1+g252f896b4) (0.52.1) +Requirement already satisfied: typing-inspection>=0.4.1 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from mcp<2.0.0,>=1.11.0->strands-agents==0.1.dev1+g252f896b4) (0.4.2) +Requirement already satisfied: uvicorn>=0.31.1 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from mcp<2.0.0,>=1.11.0->strands-agents==0.1.dev1+g252f896b4) (0.40.0) +Requirement already satisfied: cryptography>=35.0.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from moto<6.0.0,>=5.1.0->strands-agents==0.1.dev1+g252f896b4) (46.0.3) +Requirement already satisfied: requests>=2.5 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from moto<6.0.0,>=5.1.0->strands-agents==0.1.dev1+g252f896b4) (2.32.5) +Collecting xmltodict (from moto<6.0.0,>=5.1.0->strands-agents==0.1.dev1+g252f896b4) + Downloading xmltodict-1.0.2-py3-none-any.whl.metadata (15 kB) +Collecting werkzeug!=2.2.0,!=2.2.1,>=0.5 (from moto<6.0.0,>=5.1.0->strands-agents==0.1.dev1+g252f896b4) + Downloading werkzeug-3.1.5-py3-none-any.whl.metadata (4.0 kB) +Collecting responses!=0.25.5,>=0.15.0 (from moto<6.0.0,>=5.1.0->strands-agents==0.1.dev1+g252f896b4) + Downloading responses-0.25.8-py3-none-any.whl.metadata (47 kB) +Collecting mypy_extensions>=1.0.0 (from mypy<2.0.0,>=1.15.0->strands-agents==0.1.dev1+g252f896b4) + Downloading mypy_extensions-1.1.0-py3-none-any.whl.metadata (1.1 kB) +Collecting pathspec>=0.9.0 (from mypy<2.0.0,>=1.15.0->strands-agents==0.1.dev1+g252f896b4) + Using cached pathspec-1.0.3-py3-none-any.whl.metadata (13 kB) +Collecting librt>=0.6.2 (from mypy<2.0.0,>=1.15.0->strands-agents==0.1.dev1+g252f896b4) + Downloading librt-0.7.8-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (1.3 kB) +Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from opentelemetry-api<2.0.0,>=1.30.0->strands-agents==0.1.dev1+g252f896b4) (8.7.1) +Requirement already satisfied: zipp>=3.20 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api<2.0.0,>=1.30.0->strands-agents==0.1.dev1+g252f896b4) (3.23.0) +Requirement already satisfied: opentelemetry-instrumentation==0.60b1 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from opentelemetry-instrumentation-threading<1.00b0,>=0.51b0->strands-agents==0.1.dev1+g252f896b4) (0.60b1) +Requirement already satisfied: opentelemetry-semantic-conventions==0.60b1 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from opentelemetry-instrumentation==0.60b1->opentelemetry-instrumentation-threading<1.00b0,>=0.51b0->strands-agents==0.1.dev1+g252f896b4) (0.60b1) +Collecting ptyprocess>=0.5 (from pexpect~=4.8->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading ptyprocess-0.7.0-py2.py3-none-any.whl.metadata (1.3 kB) +Collecting cfgv>=2.0.0 (from pre-commit<4.6.0,>=3.2.0->strands-agents==0.1.dev1+g252f896b4) + Downloading cfgv-3.5.0-py2.py3-none-any.whl.metadata (8.9 kB) +Collecting identify>=1.0.0 (from pre-commit<4.6.0,>=3.2.0->strands-agents==0.1.dev1+g252f896b4) + Downloading identify-2.6.16-py2.py3-none-any.whl.metadata (4.4 kB) +Collecting nodeenv>=0.11.1 (from pre-commit<4.6.0,>=3.2.0->strands-agents==0.1.dev1+g252f896b4) + Downloading nodeenv-1.10.0-py2.py3-none-any.whl.metadata (24 kB) +Requirement already satisfied: annotated-types>=0.6.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.4.0->strands-agents==0.1.dev1+g252f896b4) (0.7.0) +Requirement already satisfied: pydantic-core==2.41.5 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from pydantic<3.0.0,>=2.4.0->strands-agents==0.1.dev1+g252f896b4) (2.41.5) +Collecting iniconfig>=1 (from pytest<9.0.0,>=8.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading iniconfig-2.3.0-py3-none-any.whl.metadata (2.5 kB) +Collecting pluggy<2,>=1.5 (from pytest<9.0.0,>=8.0.0->strands-agents==0.1.dev1+g252f896b4) + Using cached pluggy-1.6.0-py3-none-any.whl.metadata (4.8 kB) +Requirement already satisfied: pygments>=2.7.2 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from pytest<9.0.0,>=8.0.0->strands-agents==0.1.dev1+g252f896b4) (2.19.2) +Collecting coverage>=7.10.6 (from coverage[toml]>=7.10.6->pytest-cov<8.0.0,>=7.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading coverage-7.13.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl.metadata (8.5 kB) +Collecting execnet>=2.1 (from pytest-xdist<4.0.0,>=3.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading execnet-2.1.2-py3-none-any.whl.metadata (2.9 kB) +Requirement already satisfied: six>=1.5 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from python-dateutil<3.0.0,>=2.1->botocore<2.0.0,>=1.29.0->strands-agents==0.1.dev1+g252f896b4) (1.17.0) +Requirement already satisfied: wcwidth in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from prompt-toolkit!=3.0.52->commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) (0.3.0) +Requirement already satisfied: idna>=2.8 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from anyio>=4.5->mcp<2.0.0,>=1.11.0->strands-agents==0.1.dev1+g252f896b4) (3.11) +Requirement already satisfied: cffi>=2.0.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from cryptography>=35.0.0->moto<6.0.0,>=5.1.0->strands-agents==0.1.dev1+g252f896b4) (2.0.0) +Requirement already satisfied: pycparser in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from cffi>=2.0.0->cryptography>=35.0.0->moto<6.0.0,>=5.1.0->strands-agents==0.1.dev1+g252f896b4) (3.0) +Collecting trove-classifiers (from hatchling>=1.27.0->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Using cached trove_classifiers-2026.1.14.14-py3-none-any.whl.metadata (2.4 kB) +Requirement already satisfied: certifi in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from httpx>=0.22.0->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) (2026.1.4) +Requirement already satisfied: httpcore==1.* in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from httpx>=0.22.0->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) (1.0.9) +Requirement already satisfied: h11>=0.16 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from httpcore==1.*->httpx>=0.22.0->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) (0.16.0) +Collecting MarkupSafe>=2.0 (from jinja2>=2.10.3->commitizen<5.0.0,>=4.4.0->strands-agents==0.1.dev1+g252f896b4) + Downloading markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (2.7 kB) +Collecting SecretStorage>=3.2 (from keyring>=23.5.0->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading secretstorage-3.5.0-py3-none-any.whl.metadata (4.0 kB) +Collecting jeepney>=0.4.2 (from keyring>=23.5.0->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading jeepney-0.9.0-py3-none-any.whl.metadata (1.2 kB) +Collecting jaraco.classes (from keyring>=23.5.0->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading jaraco.classes-3.4.0-py3-none-any.whl.metadata (2.6 kB) +Collecting jaraco.functools (from keyring>=23.5.0->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading jaraco_functools-4.4.0-py3-none-any.whl.metadata (3.0 kB) +Collecting jaraco.context (from keyring>=23.5.0->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading jaraco_context-6.1.0-py3-none-any.whl.metadata (4.3 kB) +Requirement already satisfied: python-dotenv>=0.21.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from pydantic-settings>=2.5.2->mcp<2.0.0,>=1.11.0->strands-agents==0.1.dev1+g252f896b4) (1.2.1) +Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from rich>=11.2.0->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) (4.0.0) +Requirement already satisfied: mdurl~=0.1 in /opt/hostedtoolcache/Python/3.13.11/x64/lib/python3.13/site-packages (from markdown-it-py>=2.2.0->rich>=11.2.0->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) (0.1.2) +Collecting distlib<1,>=0.3.7 (from virtualenv>=20.26.6->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading distlib-0.4.0-py2.py3-none-any.whl.metadata (5.2 kB) +Collecting filelock<4,>=3.20.1 (from virtualenv>=20.26.6->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading filelock-3.20.3-py3-none-any.whl.metadata (2.1 kB) +Collecting more-itertools (from jaraco.classes->keyring>=23.5.0->hatch<2.0.0,>=1.0.0->strands-agents==0.1.dev1+g252f896b4) + Downloading more_itertools-10.8.0-py3-none-any.whl.metadata (39 kB) +Downloading commitizen-4.12.0-py3-none-any.whl (84 kB) +Downloading argcomplete-3.6.3-py3-none-any.whl (43 kB) +Downloading decli-0.6.3-py3-none-any.whl (8.0 kB) +Downloading deprecated-1.3.1-py2.py3-none-any.whl (11 kB) +Downloading hatch-1.16.3-py3-none-any.whl (141 kB) +Downloading moto-5.1.20-py3-none-any.whl (6.4 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.4/6.4 MB 212.4 MB/s 0:00:00 +Downloading mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (13.6 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.6/13.6 MB 280.9 MB/s 0:00:00 +Downloading pexpect-4.9.0-py2.py3-none-any.whl (63 kB) +Downloading pre_commit-4.5.1-py2.py3-none-any.whl (226 kB) +Downloading pytest-8.4.2-py3-none-any.whl (365 kB) +Using cached pluggy-1.6.0-py3-none-any.whl (20 kB) +Downloading pytest_asyncio-1.3.0-py3-none-any.whl (15 kB) +Downloading pytest_cov-7.0.0-py3-none-any.whl (22 kB) +Downloading pytest_xdist-3.8.0-py3-none-any.whl (46 kB) +Downloading questionary-2.1.1-py3-none-any.whl (36 kB) +Downloading prompt_toolkit-3.0.51-py3-none-any.whl (387 kB) +Downloading ruff-0.14.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.9 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.9/13.9 MB 253.3 MB/s 0:00:00 +Downloading termcolor-3.3.0-py3-none-any.whl (7.7 kB) +Downloading tomlkit-0.14.0-py3-none-any.whl (39 kB) +Downloading userpath-1.9.2-py3-none-any.whl (9.1 kB) +Downloading backports_zstd-1.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (490 kB) +Downloading cfgv-3.5.0-py2.py3-none-any.whl (7.4 kB) +Downloading coverage-7.13.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (252 kB) +Downloading execnet-2.1.2-py3-none-any.whl (40 kB) +Using cached hatchling-1.28.0-py3-none-any.whl (76 kB) +Downloading hyperlink-21.0.0-py2.py3-none-any.whl (74 kB) +Downloading identify-2.6.16-py2.py3-none-any.whl (99 kB) +Downloading iniconfig-2.3.0-py3-none-any.whl (7.5 kB) +Downloading jinja2-3.1.6-py3-none-any.whl (134 kB) +Downloading keyring-25.7.0-py3-none-any.whl (39 kB) +Downloading jeepney-0.9.0-py3-none-any.whl (49 kB) +Downloading librt-0.7.8-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (193 kB) +Downloading markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (22 kB) +Downloading mypy_extensions-1.1.0-py3-none-any.whl (5.0 kB) +Downloading nodeenv-1.10.0-py2.py3-none-any.whl (23 kB) +Using cached pathspec-1.0.3-py3-none-any.whl (55 kB) +Downloading platformdirs-4.5.1-py3-none-any.whl (18 kB) +Downloading ptyprocess-0.7.0-py2.py3-none-any.whl (13 kB) +Downloading pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (801 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 801.6/801.6 kB 120.7 MB/s 0:00:00 +Downloading responses-0.25.8-py3-none-any.whl (34 kB) +Downloading secretstorage-3.5.0-py3-none-any.whl (15 kB) +Downloading shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB) +Downloading tomli_w-1.2.0-py3-none-any.whl (6.7 kB) +Downloading uv-0.9.26-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (23.3 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 23.3/23.3 MB 256.8 MB/s 0:00:00 +Downloading virtualenv-20.36.1-py3-none-any.whl (6.0 MB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.0/6.0 MB 246.9 MB/s 0:00:00 +Downloading distlib-0.4.0-py2.py3-none-any.whl (469 kB) +Downloading filelock-3.20.3-py3-none-any.whl (16 kB) +Downloading werkzeug-3.1.5-py3-none-any.whl (225 kB) +Downloading jaraco.classes-3.4.0-py3-none-any.whl (6.8 kB) +Downloading jaraco_context-6.1.0-py3-none-any.whl (7.1 kB) +Downloading jaraco_functools-4.4.0-py3-none-any.whl (10 kB) +Downloading more_itertools-10.8.0-py3-none-any.whl (69 kB) +Downloading pyproject_hooks-1.2.0-py3-none-any.whl (10 kB) +Using cached trove_classifiers-2026.1.14.14-py3-none-any.whl (14 kB) +Downloading xmltodict-1.0.2-py3-none-any.whl (13 kB) +Building wheels for collected packages: strands-agents + Building editable for strands-agents (pyproject.toml): started + Building editable for strands-agents (pyproject.toml): finished with status 'done' + Created wheel for strands-agents: filename=strands_agents-0.1.dev1+g252f896b4-py3-none-any.whl size=10025 sha256=4b43428675806af2535aeae6fab9ed55519696f758f1b8f1c3a180704457d448 + Stored in directory: /tmp/pip-ephem-wheel-cache-xw66ray_/wheels/94/60/63/fc2d04fbd73b5e7d5ee8ee2c7af924f44becb4b085a98d9503 +Successfully built strands-agents +Installing collected packages: trove-classifiers, ptyprocess, distlib, xmltodict, uv, userpath, tomlkit, tomli-w, termcolor, shellingham, ruff, pyyaml, pyproject-hooks, prompt-toolkit, pluggy, platformdirs, pexpect, pathspec, nodeenv, mypy_extensions, more-itertools, MarkupSafe, librt, jeepney, jaraco.context, iniconfig, identify, hyperlink, filelock, execnet, deprecated, decli, coverage, cfgv, backports-zstd, argcomplete, werkzeug, virtualenv, responses, questionary, pytest, mypy, jinja2, jaraco.functools, jaraco.classes, hatchling, SecretStorage, pytest-xdist, pytest-cov, pytest-asyncio, pre-commit, commitizen, keyring, moto, hatch, strands-agents + Attempting uninstall: prompt-toolkit + Found existing installation: prompt_toolkit 3.0.52 + Uninstalling prompt_toolkit-3.0.52: + Successfully uninstalled prompt_toolkit-3.0.52 + Attempting uninstall: strands-agents + Found existing installation: strands-agents 1.23.0 + Uninstalling strands-agents-1.23.0: + Successfully uninstalled strands-agents-1.23.0 + +ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. +strands-agents-tools 0.2.19 requires strands-agents>=1.0.0, but you have strands-agents 0.1.dev1+g252f896b4 which is incompatible. +Successfully installed MarkupSafe-3.0.3 SecretStorage-3.5.0 argcomplete-3.6.3 backports-zstd-1.3.0 cfgv-3.5.0 commitizen-4.12.0 coverage-7.13.1 decli-0.6.3 deprecated-1.3.1 distlib-0.4.0 execnet-2.1.2 filelock-3.20.3 hatch-1.16.3 hatchling-1.28.0 hyperlink-21.0.0 identify-2.6.16 iniconfig-2.3.0 jaraco.classes-3.4.0 jaraco.context-6.1.0 jaraco.functools-4.4.0 jeepney-0.9.0 jinja2-3.1.6 keyring-25.7.0 librt-0.7.8 more-itertools-10.8.0 moto-5.1.20 mypy-1.19.1 mypy_extensions-1.1.0 nodeenv-1.10.0 pathspec-1.0.3 pexpect-4.9.0 platformdirs-4.5.1 pluggy-1.6.0 pre-commit-4.5.1 prompt-toolkit-3.0.51 ptyprocess-0.7.0 pyproject-hooks-1.2.0 pytest-8.4.2 pytest-asyncio-1.3.0 pytest-cov-7.0.0 pytest-xdist-3.8.0 pyyaml-6.0.3 questionary-2.1.1 responses-0.25.8 ruff-0.14.13 shellingham-1.5.4 strands-agents-0.1.dev1+g252f896b4 termcolor-3.3.0 tomli-w-1.2.0 tomlkit-0.14.0 trove-classifiers-2026.1.14.14 userpath-1.9.2 uv-0.9.26 virtualenv-20.36.1 werkzeug-3.1.5 xmltodict-1.0.2 diff --git a/src/strands/event_loop/event_loop.py b/src/strands/event_loop/event_loop.py index fcb530a0d..a63dbae30 100644 --- a/src/strands/event_loop/event_loop.py +++ b/src/strands/event_loop/event_loop.py @@ -15,7 +15,7 @@ from opentelemetry import trace as trace_api -from ..hooks import AfterModelCallEvent, BeforeModelCallEvent, MessageAddedEvent +from ..hooks import AfterModelCallEvent, AfterToolsEvent, BeforeModelCallEvent, BeforeToolsEvent, MessageAddedEvent from ..telemetry.metrics import Trace from ..telemetry.tracer import Tracer, get_tracer from ..tools._validator import validate_and_prepare_tools @@ -31,6 +31,7 @@ StructuredOutputEvent, ToolInterruptEvent, ToolResultMessageEvent, + ToolsInterruptEvent, TypedEvent, ) from ..types.content import Message, Messages @@ -485,14 +486,34 @@ async def _handle_tool_execution( tool_uses = [tool_use for tool_use in tool_uses if tool_use["toolUseId"] not in tool_use_ids] interrupts = [] - tool_events = agent.tool_executor._execute( - agent, tool_uses, tool_results, cycle_trace, cycle_span, invocation_state, structured_output_context - ) - async for tool_event in tool_events: - if isinstance(tool_event, ToolInterruptEvent): - interrupts.extend(tool_event["tool_interrupt_event"]["interrupts"]) - - yield tool_event + batch_interrupted = False + + # Fire BeforeToolsEvent if there are tools to execute + if tool_uses: + before_event = BeforeToolsEvent(agent=agent, message=message, tool_uses=tool_uses) + _, batch_interrupts = await agent.hooks.invoke_callbacks_async(before_event) + + if batch_interrupts: + batch_interrupted = True + interrupts.extend(batch_interrupts) + # Yield ToolsInterruptEvent for batch-level interrupts + yield ToolsInterruptEvent(tool_uses, batch_interrupts) + + # Only execute tools if not interrupted at batch level + if not batch_interrupted: + tool_events = agent.tool_executor._execute( + agent, tool_uses, tool_results, cycle_trace, cycle_span, invocation_state, structured_output_context + ) + async for tool_event in tool_events: + if isinstance(tool_event, ToolInterruptEvent): + interrupts.extend(tool_event["tool_interrupt_event"]["interrupts"]) + + yield tool_event + + # Fire AfterToolsEvent if there are tools and no batch-level interrupt + if tool_uses and not batch_interrupted: + after_event = AfterToolsEvent(agent=agent, message=message, tool_uses=tool_uses) + await agent.hooks.invoke_callbacks_async(after_event) structured_output_result = None if structured_output_context.is_enabled: diff --git a/src/strands/hooks/__init__.py b/src/strands/hooks/__init__.py index 30163f207..8f15cc02b 100644 --- a/src/strands/hooks/__init__.py +++ b/src/strands/hooks/__init__.py @@ -33,10 +33,12 @@ def log_end(self, event: AfterInvocationEvent) -> None: AfterInvocationEvent, AfterModelCallEvent, AfterToolCallEvent, + AfterToolsEvent, AgentInitializedEvent, BeforeInvocationEvent, BeforeModelCallEvent, BeforeToolCallEvent, + BeforeToolsEvent, MessageAddedEvent, ) from .registry import BaseHookEvent, HookCallback, HookEvent, HookProvider, HookRegistry @@ -45,7 +47,9 @@ def log_end(self, event: AfterInvocationEvent) -> None: "AgentInitializedEvent", "BeforeInvocationEvent", "BeforeToolCallEvent", + "BeforeToolsEvent", "AfterToolCallEvent", + "AfterToolsEvent", "BeforeModelCallEvent", "AfterModelCallEvent", "AfterInvocationEvent", diff --git a/src/strands/hooks/events.py b/src/strands/hooks/events.py index 5e11524d1..17e608440 100644 --- a/src/strands/hooks/events.py +++ b/src/strands/hooks/events.py @@ -173,6 +173,69 @@ def should_reverse_callbacks(self) -> bool: return True +@dataclass +class BeforeToolsEvent(HookEvent, _Interruptible): + """Event triggered before a batch of tools are executed. + + This event is fired after the model returns tool use blocks but before + the tools are executed. Hook providers can use this event to inspect, + log, or implement approval workflows for tool batches. + + The event is interruptible, allowing hook callbacks to pause execution + and request user approval before proceeding with tool execution. + + Attributes: + message: The message from the model containing tool use blocks. + tool_uses: List of tools that will be executed in this batch. + """ + + message: Message + tool_uses: list[ToolUse] + + @override + def _interrupt_id(self, name: str) -> str: + """Unique id for the interrupt. + + Args: + name: User defined name for the interrupt. + + Returns: + Interrupt id. + """ + # Use a stable ID based on the tool use IDs in the batch + tool_ids = "|".join(str(tool_use.get("toolUseId", "")) for tool_use in self.tool_uses) + return f"v1:before_tools:{tool_ids}:{uuid.uuid5(uuid.NAMESPACE_OID, name)}" + + +@dataclass +class AfterToolsEvent(HookEvent): + """Event triggered after a batch of tools complete execution. + + This event is fired after all tools in a batch have been executed, + before the tool results are added to the conversation. Hook providers + can use this event for cleanup, logging, or batch-level post-processing. + + Note: This event uses reverse callback ordering, meaning callbacks registered + later will be invoked first during cleanup. + + Note: Tool results are available in the tool result message created after + this event. This event receives the original assistant message with tool uses, + not the result message. + + Attributes: + message: The original message from the model containing tool use blocks. + tool_uses: List of tools that were executed in this batch. + """ + + message: Message + tool_uses: list[ToolUse] + + @property + def should_reverse_callbacks(self) -> bool: + """True to invoke callbacks in reverse order.""" + return True + + @dataclass class BeforeModelCallEvent(HookEvent): """Event triggered before the model is invoked. diff --git a/src/strands/tools/executors/_executor.py b/src/strands/tools/executors/_executor.py index 5d01c5d48..9e20294d0 100644 --- a/src/strands/tools/executors/_executor.py +++ b/src/strands/tools/executors/_executor.py @@ -324,6 +324,9 @@ def _execute( ) -> AsyncGenerator[TypedEvent, None]: """Execute the given tools according to this executor's strategy. + BeforeToolsEvent and AfterToolsEvent hooks are triggered by the event loop, + not by the executor implementations. + Args: agent: The agent for which tools are being executed. tool_uses: Metadata and inputs for the tools to be executed. diff --git a/src/strands/types/_events.py b/src/strands/types/_events.py index d64357cf8..6985df418 100644 --- a/src/strands/types/_events.py +++ b/src/strands/types/_events.py @@ -358,6 +358,28 @@ def interrupts(self) -> list[Interrupt]: return cast(list[Interrupt], self["tool_interrupt_event"]["interrupts"]) +class ToolsInterruptEvent(TypedEvent): + """Event emitted when a batch of tools is interrupted before execution. + + This event is fired when BeforeToolsEvent callbacks raise interrupts, + preventing the batch of tools from executing. + """ + + def __init__(self, tool_uses: list[ToolUse], interrupts: list[Interrupt]) -> None: + """Set batch interrupt in the event payload. + + Args: + tool_uses: The list of tools that would have been executed + interrupts: The interrupts raised during BeforeToolsEvent + """ + super().__init__({"tools_interrupt_event": {"tool_uses": tool_uses, "interrupts": interrupts}}) + + @property + def interrupts(self) -> list[Interrupt]: + """The interrupt instances.""" + return cast(list[Interrupt], self["tools_interrupt_event"]["interrupts"]) + + class ModelMessageEvent(TypedEvent): """Event emitted when the model invocation has completed. diff --git a/test_output.log b/test_output.log new file mode 100644 index 000000000..726a4d159 --- /dev/null +++ b/test_output.log @@ -0,0 +1 @@ +/bin/sh: 1: hatch: not found diff --git a/tests/fixtures/mock_hook_provider.py b/tests/fixtures/mock_hook_provider.py index 091f44d06..7e5aead27 100644 --- a/tests/fixtures/mock_hook_provider.py +++ b/tests/fixtures/mock_hook_provider.py @@ -5,10 +5,12 @@ AfterInvocationEvent, AfterModelCallEvent, AfterToolCallEvent, + AfterToolsEvent, AgentInitializedEvent, BeforeInvocationEvent, BeforeModelCallEvent, BeforeToolCallEvent, + BeforeToolsEvent, HookEvent, HookProvider, HookRegistry, @@ -25,6 +27,8 @@ def __init__(self, event_types: list[Type] | Literal["all"]): AfterInvocationEvent, BeforeToolCallEvent, AfterToolCallEvent, + BeforeToolsEvent, + AfterToolsEvent, BeforeModelCallEvent, AfterModelCallEvent, MessageAddedEvent, diff --git a/tests/strands/agent/test_agent_hooks.py b/tests/strands/agent/test_agent_hooks.py index 00b9d368a..841dc2fb5 100644 --- a/tests/strands/agent/test_agent_hooks.py +++ b/tests/strands/agent/test_agent_hooks.py @@ -9,10 +9,12 @@ AfterInvocationEvent, AfterModelCallEvent, AfterToolCallEvent, + AfterToolsEvent, AgentInitializedEvent, BeforeInvocationEvent, BeforeModelCallEvent, BeforeToolCallEvent, + BeforeToolsEvent, MessageAddedEvent, ) from strands.types.content import Messages @@ -476,7 +478,272 @@ async def handle_after_model_call(self, event): # Should be called 3 times: initial + 2 retries assert retry_hook.call_count == 3 - assert retry_hook.retry_count == 2 + + +# Tests for BeforeToolsEvent and AfterToolsEvent + + +def test_before_tools_event_triggered(agent, hook_provider, agent_tool, tool_use): + """Verify that BeforeToolsEvent is triggered before tool batch execution.""" + # Add batch event tracking + batch_hook_provider = MockHookProvider([BeforeToolsEvent, AfterToolsEvent]) + agent.hooks.add_hook(batch_hook_provider) + + result = agent("test message") + + # Check that BeforeToolsEvent was triggered + batch_length, batch_events = batch_hook_provider.get_events() + assert batch_length == 2 # BeforeToolsEvent and AfterToolsEvent + + before_event = next(batch_events) + assert isinstance(before_event, BeforeToolsEvent) + assert before_event.agent == agent + assert len(before_event.tool_uses) == 1 + assert before_event.tool_uses[0]["name"] == "tool_decorated" + assert "toolUse" in before_event.message["content"][0] + + +def test_after_tools_event_triggered(agent, hook_provider, agent_tool, tool_use): + """Verify that AfterToolsEvent is triggered after all tools complete.""" + # Add batch event tracking + batch_hook_provider = MockHookProvider([BeforeToolsEvent, AfterToolsEvent]) + agent.hooks.add_hook(batch_hook_provider) + + result = agent("test message") + + # Check that AfterToolsEvent was triggered + batch_length, batch_events = batch_hook_provider.get_events() + assert batch_length == 2 + + before_event = next(batch_events) + after_event = next(batch_events) + + assert isinstance(after_event, AfterToolsEvent) + assert after_event.agent == agent + assert len(after_event.tool_uses) == 1 + assert after_event.tool_uses[0]["name"] == "tool_decorated" + assert "toolUse" in after_event.message["content"][0] + + +def test_after_tools_event_reverse_ordering(): + """Verify that AfterToolsEvent uses reverse callback ordering.""" + execution_order = [] + + class OrderTrackingHook1: + def register_hooks(self, registry): + registry.add_callback(AfterToolsEvent, lambda event: execution_order.append("hook1")) + + class OrderTrackingHook2: + def register_hooks(self, registry): + registry.add_callback(AfterToolsEvent, lambda event: execution_order.append("hook2")) + + @strands.tools.tool + def sample_tool(x: int) -> int: + return x * 2 + + tool_use = {"name": "sample_tool", "toolUseId": "123", "input": {"x": 5}} + agent_messages: Messages = [ + {"role": "assistant", "content": [{"toolUse": tool_use}]}, + {"role": "assistant", "content": [{"text": "Done"}]}, + ] + model = MockedModelProvider(agent_messages) + + agent = Agent( + model=model, + tools=[sample_tool], + hooks=[OrderTrackingHook1(), OrderTrackingHook2()], + ) + + agent("test") + + # AfterToolsEvent should execute in reverse order: hook2 before hook1 + assert execution_order == ["hook2", "hook1"] + + +def test_before_tools_event_with_multiple_tools(): + """Verify that BeforeToolsEvent contains all tools in batch.""" + batch_hook_provider = MockHookProvider([BeforeToolsEvent, AfterToolsEvent]) + + @strands.tools.tool + def tool1(x: int) -> int: + return x + 1 + + @strands.tools.tool + def tool2(y: int) -> int: + return y * 2 + + tool_use_1 = {"name": "tool1", "toolUseId": "123", "input": {"x": 5}} + tool_use_2 = {"name": "tool2", "toolUseId": "456", "input": {"y": 10}} + + agent_messages: Messages = [ + {"role": "assistant", "content": [{"toolUse": tool_use_1}, {"toolUse": tool_use_2}]}, + {"role": "assistant", "content": [{"text": "Done"}]}, + ] + model = MockedModelProvider(agent_messages) + + agent = Agent( + model=model, + tools=[tool1, tool2], + hooks=[batch_hook_provider], + ) + + agent("test") + + batch_length, batch_events = batch_hook_provider.get_events() + before_event = next(batch_events) + + assert isinstance(before_event, BeforeToolsEvent) + assert len(before_event.tool_uses) == 2 + assert before_event.tool_uses[0]["name"] == "tool1" + assert before_event.tool_uses[1]["name"] == "tool2" + + +def test_batch_events_not_triggered_without_tools(): + """Verify that batch events are not triggered when no tools are present.""" + batch_hook_provider = MockHookProvider([BeforeToolsEvent, AfterToolsEvent]) + + # Response with no tool uses + agent_messages: Messages = [ + {"role": "assistant", "content": [{"text": "No tools used"}]}, + ] + model = MockedModelProvider(agent_messages) + + agent = Agent( + model=model, + hooks=[batch_hook_provider], + ) + + agent("test") + + # No batch events should be triggered + batch_length, _ = batch_hook_provider.get_events() + assert batch_length == 0 + + +def test_before_tools_event_interrupt(): + """Verify that BeforeToolsEvent interrupt stops batch execution.""" + batch_hook_provider = MockHookProvider([BeforeToolsEvent, AfterToolsEvent]) + tool_hook_provider = MockHookProvider([BeforeToolCallEvent, AfterToolCallEvent]) + + class InterruptHook: + def register_hooks(self, registry): + registry.add_callback(BeforeToolsEvent, self.interrupt_batch) + + def interrupt_batch(self, event: BeforeToolsEvent): + # Interrupt without providing response + event.interrupt("batch-approval", reason="Need approval") + + @strands.tools.tool + def sample_tool(x: int) -> int: + return x * 2 + + tool_use = {"name": "sample_tool", "toolUseId": "123", "input": {"x": 5}} + agent_messages: Messages = [ + {"role": "assistant", "content": [{"toolUse": tool_use}]}, + ] + model = MockedModelProvider(agent_messages) + + agent = Agent( + model=model, + tools=[sample_tool], + hooks=[InterruptHook(), batch_hook_provider, tool_hook_provider], + ) + + result = agent("test") + + # Agent should stop with interrupt + assert result.stop_reason == "interrupt" + assert len(result.interrupts) == 1 + assert result.interrupts[0].name == "batch-approval" + + # Only BeforeToolsEvent should be triggered (AfterToolsEvent NOT fired on batch interrupt) + batch_length, batch_events = batch_hook_provider.get_events() + assert batch_length == 1 # Only BeforeToolsEvent + event1 = next(batch_events) + assert isinstance(event1, BeforeToolsEvent) + + # No individual tool events should be triggered (tools didn't execute) + tool_length, _ = tool_hook_provider.get_events() + assert tool_length == 0 + + +@pytest.mark.asyncio +async def test_before_tools_event_interrupt_async(): + """Verify that BeforeToolsEvent interrupt works in async context.""" + batch_hook_provider = MockHookProvider([BeforeToolsEvent, AfterToolsEvent]) + + class AsyncInterruptHook: + def register_hooks(self, registry): + registry.add_callback(BeforeToolsEvent, self.interrupt_batch) + + async def interrupt_batch(self, event: BeforeToolsEvent): + event.interrupt("async-batch-approval", reason="Async approval needed") + + @strands.tools.tool + def sample_tool(x: int) -> int: + return x * 2 + + tool_use = {"name": "sample_tool", "toolUseId": "123", "input": {"x": 5}} + agent_messages: Messages = [ + {"role": "assistant", "content": [{"toolUse": tool_use}]}, + ] + model = MockedModelProvider(agent_messages) + + agent = Agent( + model=model, + tools=[sample_tool], + hooks=[AsyncInterruptHook(), batch_hook_provider], + ) + + # Call agent synchronously but the hook is async + result = agent("test") + + assert result.stop_reason == "interrupt" + assert len(result.interrupts) == 1 + assert result.interrupts[0].name == "async-batch-approval" + + # Only BeforeToolsEvent should be triggered (AfterToolsEvent NOT fired on batch interrupt) + batch_length, _ = batch_hook_provider.get_events() + assert batch_length == 1 + + +def test_batch_events_with_tool_events(): + """Verify that batch events and per-tool events are triggered in correct order.""" + all_hook_provider = MockHookProvider([ + BeforeToolsEvent, + AfterToolsEvent, + BeforeToolCallEvent, + AfterToolCallEvent, + ]) + + @strands.tools.tool + def sample_tool(x: int) -> int: + return x * 2 + + tool_use = {"name": "sample_tool", "toolUseId": "123", "input": {"x": 5}} + agent_messages: Messages = [ + {"role": "assistant", "content": [{"toolUse": tool_use}]}, + {"role": "assistant", "content": [{"text": "Done"}]}, + ] + model = MockedModelProvider(agent_messages) + + agent = Agent( + model=model, + tools=[sample_tool], + hooks=[all_hook_provider], + ) + + agent("test") + + event_length, events = all_hook_provider.get_events() + assert event_length == 4 + + # Expected order: BeforeToolsEvent, BeforeToolCallEvent, AfterToolCallEvent, AfterToolsEvent + event_list = list(events) + assert isinstance(event_list[0], BeforeToolsEvent) + assert isinstance(event_list[1], BeforeToolCallEvent) + assert isinstance(event_list[2], AfterToolCallEvent) + assert isinstance(event_list[3], AfterToolsEvent) @pytest.mark.asyncio