From 912c890e670adcca79532fcc3f2333db76c51be3 Mon Sep 17 00:00:00 2001 From: Hongbo Wei Date: Wed, 17 Dec 2025 13:46:36 -0800 Subject: [PATCH 01/10] feat: add version flag and submodule version+data info --- .github/ISSUE_TEMPLATE/bug_report.md | 28 + .github/ISSUE_TEMPLATE/feature_request.md | 20 + .github/workflows/build_and_publish.yml | 8 + .github/workflows/pr-checks.yml | 47 ++ .github/workflows/slack_notifications.yml | 30 + CHANGELOG.md | 21 + Cargo.toml | 11 +- README.md | 62 +- RELEASE.md | 52 +- dylint.toml | 4 + .../src/aws/iam_client.rs | 6 +- .../src/aws/mod.rs | 2 +- .../src/aws/policy_naming.rs | 6 +- .../src/commands/apply.rs | 7 +- iam-policy-autopilot-access-denied/src/lib.rs | 2 +- .../src/synthesis/policy_builder.rs | 2 + .../src/types.rs | 5 + iam-policy-autopilot-cli/src/commands.rs | 25 +- iam-policy-autopilot-cli/src/main.rs | 37 +- iam-policy-autopilot-lints/.cargo/config.toml | 6 + iam-policy-autopilot-lints/Cargo.toml | 23 + iam-policy-autopilot-lints/README.md | 76 ++ iam-policy-autopilot-lints/rust-toolchain | 6 + iam-policy-autopilot-lints/src/lib.rs | 14 + .../src/node_kind_literal.rs | 113 +++ iam-policy-autopilot-lints/ui/main.rs | 80 +++ iam-policy-autopilot-lints/ui/main.stderr | 35 + .../src/tools/fix_access_denied.rs | 2 + .../generate_policy_for_access_denied.rs | 3 + .../Cargo.toml | 8 + .../build.rs | 166 ++++- .../src/api/generate_policies.rs | 12 +- .../src/api/get_submodule_version.rs | 28 + .../src/api/mod.rs | 2 + .../src/api/model.rs | 16 + .../src/embedded_data.rs | 214 +++--- .../src/enrichment/engine.rs | 14 +- .../src/enrichment/resource_matcher.rs | 231 ++++-- .../src/enrichment/service_reference.rs | 49 +- .../src/extraction/go/disambiguation.rs | 404 +++++++++-- .../src/extraction/go/extractor.rs | 659 ++++++++++++++++-- .../src/extraction/go/features_extractor.rs | 1 + .../src/extraction/go/mod.rs | 1 + .../src/extraction/go/node_kinds.rs | 40 ++ .../src/extraction/go/waiter_extractor.rs | 1 + .../src/extraction/javascript/shared.rs | 19 +- .../src/extraction/mod.rs | 7 + .../python/boto3_resources_model.rs | 10 +- .../python/common/argument_extractor.rs | 8 +- .../src/extraction/python/disambiguation.rs | 2 + .../extraction/python/disambiguation_tests.rs | 2 + .../src/extraction/python/mod.rs | 1 + .../src/extraction/python/node_kinds.rs | 20 + .../python/resource_direct_calls_extractor.rs | 2 + .../src/extraction/sdk_model.rs | 24 +- .../src/extraction/waiter_model.rs | 30 - .../src/lib.rs | 17 + .../tests/go_extraction_integration_test.rs | 4 +- .../tests/go_sdk_features_test.rs | 6 +- .../tests/public_api_integration_test.rs | 8 +- install.sh | 349 ++++++++++ power-iam-policy-autopilot/POWER.md | 48 ++ power-iam-policy-autopilot/mcp.json | 11 + pyproject.toml | 4 +- 64 files changed, 2709 insertions(+), 442 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/workflows/slack_notifications.yml create mode 100644 CHANGELOG.md create mode 100644 dylint.toml create mode 100644 iam-policy-autopilot-lints/.cargo/config.toml create mode 100644 iam-policy-autopilot-lints/Cargo.toml create mode 100644 iam-policy-autopilot-lints/README.md create mode 100644 iam-policy-autopilot-lints/rust-toolchain create mode 100644 iam-policy-autopilot-lints/src/lib.rs create mode 100644 iam-policy-autopilot-lints/src/node_kind_literal.rs create mode 100644 iam-policy-autopilot-lints/ui/main.rs create mode 100644 iam-policy-autopilot-lints/ui/main.stderr create mode 100644 iam-policy-autopilot-policy-generation/src/api/get_submodule_version.rs create mode 100644 iam-policy-autopilot-policy-generation/src/extraction/go/node_kinds.rs create mode 100644 iam-policy-autopilot-policy-generation/src/extraction/python/node_kinds.rs create mode 100755 install.sh create mode 100644 power-iam-policy-autopilot/POWER.md create mode 100644 power-iam-policy-autopilot/mcp.json diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..1066e1b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,28 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Please share the command, input, and output that can be used to reproduce the bug. + +Note: Please make sure to redact sensitive information from your policies, ARNs, and account IDs. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Environment (please complete the following information):** + - OS: [e.g., macOS, Linux, Windows] + - IAM Policy Autopilot Version: [e.g., 0.1.0] (run `iam-policy-autopilot --version` or `uvx iam-policy-autopilot --version`) + - Installation Method: [e.g., uvx, pip, direct install] + - MCP Client (if applicable): [e.g., Kiro, Claude Desktop, N/A] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bc2df62 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of the problem. For example: "I'm unable to generate policies for [specific scenario]" or "The tool doesn't support [specific use case]" + +**Describe the solution you'd like** +A clear and concise description of the feature or enhancement you'd like to see. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context, examples, or use cases that would help us understand your feature request. diff --git a/.github/workflows/build_and_publish.yml b/.github/workflows/build_and_publish.yml index 5d4c4bf..ba0fd5d 100644 --- a/.github/workflows/build_and_publish.yml +++ b/.github/workflows/build_and_publish.yml @@ -4,9 +4,14 @@ on: release: types: [published] +permissions: + contents: read + jobs: build_wheels: name: Build ${{ matrix.target }} wheel + permissions: + contents: read # Define the matrix for runners and Rust targets strategy: fail-fast: false @@ -59,6 +64,8 @@ jobs: test_wheels: name: Test ${{ matrix.target }} wheel needs: build_wheels + permissions: + contents: read strategy: fail-fast: false matrix: @@ -242,6 +249,7 @@ jobs: runs-on: ubuntu-latest environment: Release permissions: + contents: read id-token: write steps: diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml index 902ac50..a96b601 100644 --- a/.github/workflows/pr-checks.yml +++ b/.github/workflows/pr-checks.yml @@ -4,10 +4,15 @@ on: pull_request: branches: [ main, prerelease, releases/** ] +permissions: + contents: read + jobs: commit-messages: name: Validate Commit Messages runs-on: ubuntu-latest + permissions: + contents: read steps: - uses: actions/checkout@v4 with: @@ -49,6 +54,8 @@ jobs: name: Test Linux needs: commit-messages runs-on: ubuntu-latest + permissions: + contents: read strategy: matrix: target: [x86_64-unknown-linux-gnu, aarch64-unknown-linux-gnu] @@ -82,6 +89,8 @@ jobs: name: Test Windows needs: commit-messages runs-on: windows-latest + permissions: + contents: read strategy: matrix: target: [x86_64-pc-windows-msvc, aarch64-pc-windows-msvc] @@ -103,6 +112,8 @@ jobs: name: Test macOS needs: commit-messages runs-on: macos-latest + permissions: + contents: read strategy: matrix: target: [x86_64-apple-darwin, aarch64-apple-darwin] @@ -124,6 +135,8 @@ jobs: name: Format needs: commit-messages runs-on: ubuntu-latest + permissions: + contents: read steps: - uses: actions/checkout@v4 @@ -138,6 +151,8 @@ jobs: name: Clippy needs: commit-messages runs-on: ubuntu-latest + permissions: + contents: read steps: - uses: actions/checkout@v4 with: @@ -151,3 +166,35 @@ jobs: - name: Run clippy run: cargo clippy --workspace -- -D warnings + + custom-lints: + name: Custom Lints (dylint) + needs: commit-messages + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Install Rust stable + uses: dtolnay/rust-toolchain@stable + + - name: Install Rust nightly + uses: dtolnay/rust-toolchain@nightly + with: + components: rustc-dev, llvm-tools + + - uses: Swatinem/rust-cache@v2 + with: + cache-directories: | + ~/.dylint_drivers + + - name: Install dylint + run: cargo install cargo-dylint dylint-link + + - name: Run custom lints + run: cargo dylint --all --workspace + # Note: dylint automatically uses the nightly toolchain to build the lint library + # (specified in iam-policy-autopilot-lints/rust-toolchain) and stable to check the workspace diff --git a/.github/workflows/slack_notifications.yml b/.github/workflows/slack_notifications.yml new file mode 100644 index 0000000..6a1aa0d --- /dev/null +++ b/.github/workflows/slack_notifications.yml @@ -0,0 +1,30 @@ +name: Slack Notifier + +on: + issues: + issue_comment: + pull_request_target: + types: [opened, reopened, edited] + pull_request_review_comment: + pull_request_review: + discussion: + discussion_comment: + +permissions: + contents: read + +jobs: + notify: + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Send Event Notification + continue-on-error: true + uses: rtCamp/action-slack-notify@cdf0a2130cbcdfd82ba5fcac8e076370bf381b36 + env: + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} + SLACK_TITLE: '${{ github.event_name }}: ${{ github.event.repository.name }}' + SLACK_FOOTER: '' + MSG_MINIMAL: true + SLACK_MESSAGE: 'By ${{ github.event.sender.login }} - ${{ github.event.action }} - ${{ github.event.issue.html_url || github.event.pull_request.html_url || github.event.discussion.html_url || github.event.comment.html_url }}' \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..8318de6 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,21 @@ +## [0.1.2] - 2025-12-15 + +## Fixed + +- Use SDK info to find the operation from a method name. Fixes a bug where `modify_db_cluster` (and similar names) was renamed incorrectly to `ModifyDbCluster` instead of `ModifyDBCluster`. (#70) +- Reduce false positive findings by fixing Go SDK parameter extraction. It now uses required arguments correctly to disambiguate possible services. (#50) + +## Added + +- Added installation script for MacOS and Linux. (#44) + +## Changed + +- We now add the policy ID `IamPolicyAutopilot` in the access denied workflow. (#48) +- Updated Cargo.toml description. (#46) + +## [0.1.1] - 2025-11-26 + +### 🚀 Features + +- Initial release diff --git a/Cargo.toml b/Cargo.toml index 9600611..67f5ef1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,14 +6,15 @@ members = [ "iam-policy-autopilot-access-denied", "iam-policy-autopilot-cli" ] +exclude = ["iam-policy-autopilot-lints"] resolver = "2" [workspace.package] -version = "0.1.0" +version = "0.1.2" edition = "2021" license = "Apache-2.0" repository = "https://github.com/awslabs/iam-policy-autopilot" -description = "VS Code extension for AWS IAM permission analysis and management" +description = "An open source Model Context Protocol (MCP) server and command-line tool that helps your AI coding assistants quickly create baseline IAM policies that you can refine as your application evolves, so you can build faster." # Shared dependency versions across workspace [workspace.dependencies] @@ -45,7 +46,7 @@ criterion = "0.5" proptest = "1.0" # CLI-specific dependencies -clap = { version = "4.5", features = ["derive", "env"] } +clap = { version = "4.5", features = ["derive", "env", "cargo"] } env_logger = "0.11" log = "0.4" walkdir = "2.0" @@ -63,7 +64,9 @@ serial_test = "3.0" atty = "0.2" chrono = { version = "0.4", features = ["serde"] } uuid = { version = "1.8", features = ["v4"] } -sha2 = "0.10" +sha2 = "0.10.9" +git2 = "0.20.3" +relative-path = "2.0.1" url = "2.5" percent-encoding = "2.3" aws-sdk-iam = "1.89.0" diff --git a/README.md b/README.md index 4d938dd..d819383 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ +[![awslabs/iam-policy-autopilot License](https://img.shields.io/badge/license-Apache%202-blue)](https://github.com/awslabs/iam-policy-autopilot/blob/main/LICENSE) +[![GitHub CI Status](https://img.shields.io/github/actions/workflow/status/awslabs/iam-policy-autopilot/build_and_publish.yml?label=CI&logo=GitHub)](https://github.com/awslabs/iam-policy-autopilot/actions/workflows/build_and_publish.yml) [![PyPI - Version](https://img.shields.io/pypi/v/iam-policy-autopilot?logo=Python&logoColor=white)](https://pypi.org/project/iam-policy-autopilot/) + # IAM Policy Autopilot An open source Model Context Protocol (MCP) server and command-line tool that helps your AI coding assistants quickly create baseline IAM policies that you can refine as your application evolves, so you can build faster. IAM Policy Autopilot analyzes your application code locally to generate identity-based policies for application roles, enabling faster IAM policy creation and reducing access troubleshooting time. IAM Policy Autopilot supports applications built in Python, Go, and TypeScript. @@ -39,7 +42,7 @@ IAM Policy Autopilot stays up to date with the latest AWS services and features ### Review and refine policies generated by IAM Policy Autopilot -IAM Policy Autopilot generates policies to provide a starting point that you can refine as your application matures. Review the generated policies so that they align with your security requirements before deploying them. +IAM Policy Autopilot generates baseline policies to provide a starting point that you can refine as your application matures. Review the generated policies to ensure they align with your security requirements before deploying them. ### Understand the IAM Policy Autopilot scope @@ -49,6 +52,24 @@ IAM Policy Autopilot produces IAM identity-based policies, but doesn't support r IAM Policy Autopilot generates policies with specific actions based on deterministic analysis of your code. When you use the MCP server integration, your AI coding assistant receives this policy and might modify it when creating infrastructure-as-code templates. For example, you might see the assistant add specific resource Amazon Resource Names (ARNs) or include KMS key IDs based on additional context from your code. These changes come from your coding assistant's interpretation of your broader code context, not from the static analysis provided by IAM Policy Autopilot. Always review content generated by your coding assistant before deployment to verify that it meets your security requirements. +### Use service hints for accurate policies + +IAM Policy Autopilot's static analysis may include permissions for AWS services your application doesn't use. This happens when method names in your code match AWS SDK calls from multiple services. For example, a method called `listAccounts()` might generate permissions for both [AWS Organizations](https://docs.aws.amazon.com/organizations/latest/APIReference/API_ListAccounts.html) and [Amazon Chime](https://docs.aws.amazon.com/chime/latest/APIReference/API_ListAccounts.html) services. + +**Recommended approach**: Use the `--service-hints` option to specify only the AWS services your application actually uses. This helps IAM Policy Autopilot scope down which SDK calls to analyze, but the final policy may still include actions from other services if they're required by the operations you perform: + +```bash +# More accurate - specify only services you use +iam-policy-autopilot generate-policies ./src/app.py --service-hints s3 iam organizations --pretty + +# Less accurate - may include unnecessary permissions +iam-policy-autopilot generate-policies ./src/app.py --pretty +``` + +This significantly reduces unnecessary permissions and generates more targeted policies. Note that the final policy may still include actions from services not in your hints if they're required for the operations you perform (e.g., KMS actions for S3 encryption). + +**Note**: When using the MCP server integration with AI coding assistants, the assistant is expected to automatically provide appropriate service hints based on your code context. The `--service-hints` option is primarily for CLI usage. + ## Getting Started ### Installation @@ -67,6 +88,16 @@ Install [pip](https://pip.pypa.io/en/stable/installation/). pip install iam-policy-autopilot ``` +#### Option 3: Direct installation (MacOS/Linux only) + +To install the latest release directly, run the following script to download and install as a system utility. + +```bash +curl -sSL https://github.com/awslabs/iam-policy-autopilot/raw/refs/heads/main/install.sh | sudo sh +``` + +This will install the latest release directly to `/usr/local/bin/iam-policy-autopilot`. + ### AWS Configuration IAM Policy Autopilot requires AWS credentials to apply policy fixes and upload policies for AccessDenied debugging. @@ -191,6 +222,34 @@ Add to your Claude Desktop configuration file: } ``` +### Kiro Power Configuration + +IAM Policy Autopilot has an associated [Kiro power](https://kiro.dev/blog/introducing-powers/) configuration inside the `power-iam-policy-autopilot` directory. This can be used to install a corresponding Kiro power in your Kiro editor. + +#### Enabling the IAM Policy Autopilot Kiro Power + +To enable the IAM Policy Autopilot Kiro Power, first install the `uv` package manager by [following these instructions](https://docs.astral.sh/uv/getting-started/installation/). Then, do the following steps within Kiro: +1. Go to the "Powers" menu in the menubar on the left-hand-side. +2. Click `Add Custom Power` -> `Import power from Github` +3. In the text prompt that then appears, enter `https://github.com/awslabs/iam-policy-autopilot/tree/main/power-iam-policy-autopilot`. +4. Kiro should automatically install a new Kiro power called `IAM Policy Autopilot` within your Kiro code editor. This power should be visible in the `Powers` menu. + +If the above steps for installing the power from a GitHub repository URL does not work, you can also clone the repository and import the power directly, by doing the following: +1. Clone the git repository `https://github.com/awslabs/iam-policy-autopilot`, and remember the directory to where you cloned the repo. +2. Go to the "Powers" menu in the menubar on the left-hand-side. +3. Click `Add Custom Power` -> `Import power from a folder` +4. In the text prompt that then appears, select the `power-iam-policy-autopilot` folder in your cloned repository. For instance, if the repository is cloned to `~/workplace/iam-policy-autopilot`, you should select or enter `~/workplace/iam-policy-autopilot/power-iam-policy-autopilot`. +5. Kiro should automatically install a new Kiro power called `IAM Policy Autopilot` within your Kiro code editor. This power should be visible in the `Powers` menu. + +#### Why use IAM Policy Autopilot's Kiro Power? + +Kiro powers generally offer [a more refined experience than traditional MCP servers](https://kiro.dev/blog/introducing-powers/) because they enable MCP tools to be loaded more selectively & deliberately, reducing LLM token usage and avoiding LLM context overcrowding. + +IAM Policy Autopilot's Kiro power specifically enhances the traditional MCP experience, for multiple reasons: +1. This Kiro Power provides your LLM agent with **more steering guidance**, offering it more information on the specific use cases and best practices of our MCP tooling. +2. This Kiro power prompts your LLM agent to give a **tutorial of the MCP tools** offered by IAM Policy Autopilot, allowing you to better understand how our MCP tooling assists your use case. +3. This Kiro Power provides your LLM agent with **step-by-step onboarding validation**, allowing it to detect any problems with installations and provide remediation steps for those problems. + ## CLI Usage The `iam-policy-autopilot` CLI tool provides three main commands: @@ -232,6 +291,7 @@ iam-policy-autopilot generate-policies \ Options: - `--region ` - AWS region for resource ARNs - `--account ` - AWS account ID for resource ARNs +- `--service-hints ` - Limit analysis to only the services your application actually uses if you know them. This helps reduce unnecessary permissions. - `--upload-policies ` - Upload generated policies to AWS IAM with the specified prefix - `--pretty` - Pretty-print JSON output diff --git a/RELEASE.md b/RELEASE.md index a800cc4..af6be17 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -33,6 +33,7 @@ git pull origin main # Create release branch (replace X.Y.Z with version number) git checkout -b release/X.Y.Z ``` +*Note: It's recommended to create the release branch directly in the awslabs/iam-policy-autopilot rather than in a forked repo.* #### Optional: Cherry-pick Specific Commits @@ -152,49 +153,26 @@ Or manually create the PR through the GitHub web interface. ### 6. Merge and Create Release -After PR approval and merge: +#### Using the GitHub Web Interface -1. **Create a Git Tag:** - ```bash - # Checkout main and pull latest - git checkout main - git status # confirm your local changes - git pull origin main +It's recommended to create the new release and tag directly via the GitHub web interface, where you can automatically generate release notes, create a tag, and draft a release before publishing it. - # Create annotated tag - git tag -a X.Y.Z -m "Release X.Y.Z" +Notes: +- The new tag should be the same as the version to be released +- Make sure to select the correct release branch as the target when creating the tag + - The main branch can be used if it's identical to the release branch (i.e., no cherry-picked commits in the release branch) +- Be sure to `Save draft` and review it once before publishing the release. - # Push tag to remote - git push origin X.Y.Z - ``` -2. **Create GitHub Release:** +### Automated Build and Publish - Using GitHub CLI: - ```bash - gh release create X.Y.Z \ - --title "Release X.Y.Z" \ - --notes-file CHANGELOG.md \ - --latest - ``` - - Or manually through GitHub web interface: - - Go to repository → Releases → Draft a new release - - Choose the tag `X.Y.Z` - - Set release title: `Release X.Y.Z` - - Copy relevant section from CHANGELOG.md to release notes - - Check "Set as the latest release" - - Click "Publish release" ***One-way Door decision, make sure to review all the details*** - -3. **Automated Build and Publish:** - - The GitHub Actions workflow (`build_and_publish.yml`) will automatically: - - Build wheels for all supported platforms (Linux, Windows, macOS) - - Test the wheels on each platform - - Verify version matches the release tag - - Publish to PyPI (if tests pass) +Once a release is published, the GitHub Actions workflow (`build_and_publish.yml`) will automatically: +- Build wheels for all supported platforms (Linux, Windows, macOS) +- Test the wheels on each platform +- Verify version matches the release tag +- Publish to PyPI (if tests pass) - Monitor the workflow progress at: `https://github.com/awslabs/iam-policy-autopilot/actions` +Monitor the workflow progress at: `https://github.com/awslabs/iam-policy-autopilot/actions` ## Post-Release diff --git a/dylint.toml b/dylint.toml new file mode 100644 index 0000000..dd12b06 --- /dev/null +++ b/dylint.toml @@ -0,0 +1,4 @@ +[workspace.metadata.dylint] +libraries = [ + { path = "iam-policy-autopilot-lints" } +] diff --git a/iam-policy-autopilot-access-denied/src/aws/iam_client.rs b/iam-policy-autopilot-access-denied/src/aws/iam_client.rs index 2ebdab2..e8c213c 100644 --- a/iam-policy-autopilot-access-denied/src/aws/iam_client.rs +++ b/iam-policy-autopilot-access-denied/src/aws/iam_client.rs @@ -181,10 +181,14 @@ pub async fn find_canonical_policy( #[cfg(test)] mod tests { use super::*; - use crate::types::{ActionType, Statement}; + use crate::{ + aws::policy_naming::POLICY_PREFIX, + types::{ActionType, Statement}, + }; fn sample_policy() -> PolicyDocument { PolicyDocument { + id: Some(POLICY_PREFIX.to_string()), version: "2012-10-17".to_string(), statement: vec![Statement { sid: "Test".into(), diff --git a/iam-policy-autopilot-access-denied/src/aws/mod.rs b/iam-policy-autopilot-access-denied/src/aws/mod.rs index a6a2e1f..23f8077 100644 --- a/iam-policy-autopilot-access-denied/src/aws/mod.rs +++ b/iam-policy-autopilot-access-denied/src/aws/mod.rs @@ -1,7 +1,7 @@ //! AWS SDK integration: IAM client wrapper, principal parsing, policy naming. pub(crate) mod iam_client; -pub(crate) mod policy_naming; +pub mod policy_naming; pub mod principal; pub(crate) mod sts; diff --git a/iam-policy-autopilot-access-denied/src/aws/policy_naming.rs b/iam-policy-autopilot-access-denied/src/aws/policy_naming.rs index f042465..393b66c 100644 --- a/iam-policy-autopilot-access-denied/src/aws/policy_naming.rs +++ b/iam-policy-autopilot-access-denied/src/aws/policy_naming.rs @@ -6,7 +6,7 @@ use std::sync::OnceLock; // AWS IAM policy name character limit (128 characters) // Reference: https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_iam-quotas.html const MAX_POLICY_NAME_LENGTH: usize = 128; -const POLICY_PREFIX: &str = "IamPolicyAutopilot"; +pub const POLICY_PREFIX: &str = "IamPolicyAutopilot"; fn sanitize_component(component: &str) -> String { static SANITIZE_REGEX: OnceLock = OnceLock::new(); @@ -59,8 +59,8 @@ pub fn build_statement_sid(action: &str, date: &str, existing_sids: &[String]) - let date_no_hyphens = date.replace("-", ""); let base_sid = format!( - "IamPolicyAutopilot{}{}{}", - service_cap, action_cap, date_no_hyphens + "{}{}{}{}", + POLICY_PREFIX, service_cap, action_cap, date_no_hyphens ); let mut sid = base_sid.clone(); diff --git a/iam-policy-autopilot-access-denied/src/commands/apply.rs b/iam-policy-autopilot-access-denied/src/commands/apply.rs index 0595bad..3761aee 100644 --- a/iam-policy-autopilot-access-denied/src/commands/apply.rs +++ b/iam-policy-autopilot-access-denied/src/commands/apply.rs @@ -1,7 +1,7 @@ //! Apply logic for IAM Policy Autopilot service use crate::aws::iam_client::{find_canonical_policy, put_inline_policy}; -use crate::aws::policy_naming::{build_canonical_policy_name, build_statement_sid}; +use crate::aws::policy_naming::{build_canonical_policy_name, build_statement_sid, POLICY_PREFIX}; use crate::aws::principal::resolve_principal; use crate::aws::sts::caller_account_id; use crate::synthesis::build_single_statement; @@ -82,6 +82,10 @@ impl super::service::IamPolicyAutopilotService { sort_statements(&mut merged_statements); let policy_doc = crate::types::PolicyDocument { + id: existing + .document + .id + .or_else(|| Some(POLICY_PREFIX.to_string())), version: "2012-10-17".to_string(), statement: merged_statements, }; @@ -92,6 +96,7 @@ impl super::service::IamPolicyAutopilotService { let stmt = build_single_statement(action.clone(), plan.diagnosis.resource.clone(), sid); let policy_doc = crate::types::PolicyDocument { + id: Some(POLICY_PREFIX.to_string()), version: "2012-10-17".to_string(), statement: vec![stmt], }; diff --git a/iam-policy-autopilot-access-denied/src/lib.rs b/iam-policy-autopilot-access-denied/src/lib.rs index 9ea09f4..c3d38f9 100644 --- a/iam-policy-autopilot-access-denied/src/lib.rs +++ b/iam-policy-autopilot-access-denied/src/lib.rs @@ -4,7 +4,7 @@ //! - Principal ARN resolution and basic IAM operations (inline policies) //! -mod aws; +pub mod aws; pub mod commands; mod error; mod parsing; diff --git a/iam-policy-autopilot-access-denied/src/synthesis/policy_builder.rs b/iam-policy-autopilot-access-denied/src/synthesis/policy_builder.rs index 11d79ff..7488a69 100644 --- a/iam-policy-autopilot-access-denied/src/synthesis/policy_builder.rs +++ b/iam-policy-autopilot-access-denied/src/synthesis/policy_builder.rs @@ -27,6 +27,7 @@ pub fn build_inline_allow(actions: Vec, resource: String) -> PolicyDocum }; PolicyDocument { + id: Some("IamPolicyAutopilot".to_string()), version: "2012-10-17".to_string(), statement: vec![statement], } @@ -90,6 +91,7 @@ mod tests { } _ => panic!("expected multiple"), } + assert_eq!(policy.id, Some("IamPolicyAutopilot".to_string())); } #[test] diff --git a/iam-policy-autopilot-access-denied/src/types.rs b/iam-policy-autopilot-access-denied/src/types.rs index 8d29a68..1d0075f 100644 --- a/iam-policy-autopilot-access-denied/src/types.rs +++ b/iam-policy-autopilot-access-denied/src/types.rs @@ -69,6 +69,8 @@ impl ActionType { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "PascalCase")] pub struct PolicyDocument { + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, pub version: String, pub statement: Vec, } @@ -156,6 +158,8 @@ impl Statement { #[cfg(test)] mod tests { + use crate::aws::policy_naming::POLICY_PREFIX; + use super::*; #[test] @@ -251,6 +255,7 @@ mod tests { ), actions: vec!["s3:GetObject".to_string()], policy: PolicyDocument { + id: Some(POLICY_PREFIX.to_string()), version: "2012-10-17".to_string(), statement: vec![], }, diff --git a/iam-policy-autopilot-cli/src/commands.rs b/iam-policy-autopilot-cli/src/commands.rs index 2e45f6b..f3cf086 100644 --- a/iam-policy-autopilot-cli/src/commands.rs +++ b/iam-policy-autopilot-cli/src/commands.rs @@ -4,10 +4,10 @@ use crate::{output, types::ExitCode}; use iam_policy_autopilot_access_denied::{ApplyError, ApplyOptions, DenialType}; - fn is_tty() -> bool { atty::is(atty::Stream::Stdin) && atty::is(atty::Stream::Stderr) } +use clap::{crate_version, Parser, Subcommand}; /// Returns Some(true) if user confirmed, Some(false) if declined, None if not in TTY. fn prompt_yes_no() -> Option { @@ -143,6 +143,29 @@ async fn fix_access_denied_with_service( } } +pub fn print_version_info() -> anyhow::Result<()> { + let boto3_version_metadata = + iam_policy_autopilot_policy_generation::api::get_boto3_version_info()?; + let botocore_version_metadata = + iam_policy_autopilot_policy_generation::api::get_botocore_version_info()?; + println!("{}", crate_version!()); + println!( + "boto3 version: commit_id={}, commit_tag={}, data_hash={}", + boto3_version_metadata.git_commit_hash, + boto3_version_metadata.git_tag.unwrap_or("None".to_string()), + boto3_version_metadata.data_hash + ); + println!( + "botocore version: commit_id={}, commit_tag={}, data_hash={}", + botocore_version_metadata.git_commit_hash, + botocore_version_metadata + .git_tag + .unwrap_or("None".to_string()), + botocore_version_metadata.data_hash + ); + Ok(()) +} + fn handle_apply_error(apply_error: ApplyError) -> ExitCode { match apply_error { ApplyError::UnsupportedDenialType => { diff --git a/iam-policy-autopilot-cli/src/main.rs b/iam-policy-autopilot-cli/src/main.rs index ddea279..865d187 100644 --- a/iam-policy-autopilot-cli/src/main.rs +++ b/iam-policy-autopilot-cli/src/main.rs @@ -20,7 +20,7 @@ use std::path::PathBuf; use std::process; use anyhow::{Context, Result}; -use clap::{Parser, Subcommand}; +use clap::{crate_version, Parser, Subcommand}; use iam_policy_autopilot_policy_generation::api::model::{ AwsContext, ExtractSdkCallsConfig, GeneratePolicyConfig, }; @@ -36,6 +36,8 @@ mod types; use iam_policy_autopilot_mcp_server::{start_mcp_server, McpTransport}; use types::ExitCode; +use crate::commands::print_version_info; + /// Default port for mcp server for Http Transport static MCP_HTTP_DEFAULT_PORT: u16 = 8001; @@ -100,26 +102,28 @@ impl GeneratePolicyCliConfig { } const SERVICE_HINTS_LONG_HELP: &str = - "Space-separated list of AWS service names to filter extracted SDK calls. \ -Filters the result of source code analysis, an action from a service not provided as a hint \ -may still be included in the final policy, if IAM Policy Autopilot determines the action may \ -be required for the SDK call."; + "Space-separated list of AWS service names to filter which SDK calls are analyzed. \ +This helps reduce unnecessary permissions by limiting analysis to only the services your application actually uses. \ +For example, if your code only uses S3 and IAM services, specify '--service-hints s3 iam' to avoid \ +analyzing unrelated method calls that might match other services like Chime. \ +Note: The final policy may still include actions from services not in your hints if they are \ +required for the operations you perform (e.g., KMS actions for S3 encryption)."; #[derive(Parser, Debug)] #[command( name = "iam-policy-autopilot", author, version, + disable_version_flag = true, about = "Generate IAM policies from source code and fix AccessDenied errors", long_about = "Unified tool that combines IAM policy generation from source code analysis \ with automatic AccessDenied error fixing. Supports three main operations:\n\n\ • fix-access-denied: Fix AccessDenied errors by analyzing and applying IAM policy changes\n\ • generate-policies: Complete pipeline with enrichment for policy generation\n\ • mcp-server: Start MCP server for IDE integration. Uses STDIO transport by default.\n\n\ -Examples:\n \ iam-policy-autopilot fix-access-denied 'User: arn:aws:iam::123456789012:user/testuser is not authorized to perform: s3:GetObject on resource: arn:aws:s3:::my-bucket/my-key because no identity-based policy allows the s3:GetObject action'\n \ iam-policy-autopilot generate-policies tests/resources/test_example.py --region us-east-1 --account 123456789012 --pretty\n \ -iam-policy-autopilot generate-policies tests/resources/test_example.py --region cn-north-1 --account 123456789012\n \ +iam-policy-autopilot generate-policies tests/resources/test_example.py --service-hints s3 iam --region us-east-1 --account 123456789012 --pretty\n \ iam-policy-autopilot mcp-server\n \ iam-policy-autopilot mcp-server --transport http --port 8001" )] @@ -227,7 +231,9 @@ This flag has no effect on the generate-policies subcommand." #[command(long_about = "\ Generates complete IAM policy documents from source files. By default, all \ policies are merged into a single optimized policy document. \ -Optionally takes AWS context (region and account) for accurate ARN generation.")] +Optionally takes AWS context (region and account) for accurate ARN generation.\n\n\ +TIP: Use --service-hints to specify the particular AWS services that your application uses if you know them. \ +The final policy may still include actions from other services if required for your operations.")] GeneratePolicies { /// Source files to analyze for SDK method extraction #[arg(required = true, num_args = 1..)] @@ -346,6 +352,13 @@ for direct integration with IDEs and tools. 'http' starts an HTTP server for net Only used when --transport=http. The server will bind to 127.0.0.1 (localhost) on the specified port.")] port: u16, }, + + #[command( + about = "Print version information.", + short_flag = 'V', + long_flag = "version" + )] + Version {}, } /// Initialize logging based on configuration @@ -608,6 +621,14 @@ async fn main() { } } } + + Commands::Version {} => match print_version_info() { + Ok(()) => ExitCode::Success, + Err(e) => { + print_cli_command_error(e); + ExitCode::Error + } + }, }; process::exit(code.into()); diff --git a/iam-policy-autopilot-lints/.cargo/config.toml b/iam-policy-autopilot-lints/.cargo/config.toml new file mode 100644 index 0000000..226eca5 --- /dev/null +++ b/iam-policy-autopilot-lints/.cargo/config.toml @@ -0,0 +1,6 @@ +[target.'cfg(all())'] +rustflags = ["-C", "linker=dylint-link"] + +# For Rust versions 1.74.0 and onward, the following alternative can be used +# (see https://github.com/rust-lang/cargo/pull/12535): +# linker = "dylint-link" diff --git a/iam-policy-autopilot-lints/Cargo.toml b/iam-policy-autopilot-lints/Cargo.toml new file mode 100644 index 0000000..800344f --- /dev/null +++ b/iam-policy-autopilot-lints/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "iam_policy_autopilot_lints" +version = "0.1.0" +description = "Custom dylint lints for IAM Policy Autopilot project" +edition = "2021" +publish = false + +[lib] +crate-type = ["cdylib"] + +[dependencies] +# clippy_utils is not published to crates.io, so we use a git dependency +# This revision was chosen because it compiles with nightly-2025-09-18 (see rust-toolchain) +# The revision doesn't need to exactly match the nightly date - any recent clippy commit +# that's compatible with the nightly toolchain will work. This one was verified to build successfully. +clippy_utils = { git = "https://github.com/rust-lang/rust-clippy", rev = "20ce69b9a63bcd2756cd906fe0964d1e901e042a" } +dylint_linting = "5.0.0" + +[dev-dependencies] +dylint_testing = "5.0.0" + +[package.metadata.rust-analyzer] +rustc_private = true diff --git a/iam-policy-autopilot-lints/README.md b/iam-policy-autopilot-lints/README.md new file mode 100644 index 0000000..dce9664 --- /dev/null +++ b/iam-policy-autopilot-lints/README.md @@ -0,0 +1,76 @@ +# IAM Policy Autopilot Custom Lints + +Custom [dylint](https://github.com/trailofbits/dylint) lints for enforcing project-specific patterns. + +## Available Lints + +### `node_kind_literal` + +Enforces use of constants instead of string literals when comparing with `.kind()` method calls. + +**Bad:** +```rust +if node.kind() == "composite_literal" { + // ... +} +``` + +**Good:** +```rust +use crate::extraction::go::node_kinds::COMPOSITE_LITERAL; +if node.kind() == COMPOSITE_LITERAL { + // ... +} +``` + +## Usage + +### Install dylint + +```bash +cargo install cargo-dylint dylint-link +``` + +### Run lints + +```bash +# Check all workspace packages +cargo dylint --all --workspace + +# Check specific package +cargo dylint --all --package iam-policy-autopilot-policy-generation + +# Check all targets (including tests) +cargo dylint --all --workspace -- --all-targets +``` + +## CI Integration + +The lints run automatically on every PR via `.github/workflows/pr-checks.yml`. + +## Development + +### Test the lints + +```bash +cd iam-policy-autopilot-lints +cargo test +``` + +### Update test expectations + +If you modify a lint's output, update the expected stderr file manually: + +1. Run `cargo test` to see the diff +2. Update the corresponding `.stderr` file in `ui/` directory +3. Run `cargo test` again to verify + +### Add a new lint + +1. Create `src/my_new_lint.rs` +2. Add `mod my_new_lint;` to `src/lib.rs` +3. Add test cases in `ui/my_new_lint.rs` +4. Create expected output in `ui/my_new_lint.stderr` +5. Run `cargo test` to verify + +See [dylint documentation](https://github.com/trailofbits/dylint) for details on writing lints. diff --git a/iam-policy-autopilot-lints/rust-toolchain b/iam-policy-autopilot-lints/rust-toolchain new file mode 100644 index 0000000..97f5532 --- /dev/null +++ b/iam-policy-autopilot-lints/rust-toolchain @@ -0,0 +1,6 @@ +# Nightly toolchain required for building dylint custom lints +# Dylint lints need access to rustc internals (rustc-dev) which are only available on nightly +# Running the lints (via cargo dylint) works on stable Rust +[toolchain] +channel = "nightly-2025-09-18" +components = ["llvm-tools-preview", "rustc-dev"] diff --git a/iam-policy-autopilot-lints/src/lib.rs b/iam-policy-autopilot-lints/src/lib.rs new file mode 100644 index 0000000..e6fb5b3 --- /dev/null +++ b/iam-policy-autopilot-lints/src/lib.rs @@ -0,0 +1,14 @@ +//! Custom lints for IAM Policy Autopilot + +#![feature(rustc_private)] +#![warn(unused_extern_crates)] + +extern crate rustc_ast; +extern crate rustc_hir; + +mod node_kind_literal; + +#[test] +fn ui() { + dylint_testing::ui_test(env!("CARGO_PKG_NAME"), "ui"); +} diff --git a/iam-policy-autopilot-lints/src/node_kind_literal.rs b/iam-policy-autopilot-lints/src/node_kind_literal.rs new file mode 100644 index 0000000..44d4eb6 --- /dev/null +++ b/iam-policy-autopilot-lints/src/node_kind_literal.rs @@ -0,0 +1,113 @@ +//! Lint to enforce use of constants instead of string literals for node kinds + +use clippy_utils::diagnostics::span_lint_and_help; +use rustc_ast::LitKind; +use rustc_hir::{BinOpKind, Expr, ExprKind}; +use rustc_lint::LateLintPass; + +dylint_linting::declare_late_lint! { + /// ### What it does + /// Detects string literals used in comparisons with `.kind()` method calls, + /// which typically indicate Tree-sitter node kind checks that should use constants. + /// + /// ### Why is this bad? + /// Using string literals for node kinds: + /// - Lacks compile-time checking + /// - Misses IDE autocomplete support + /// - Makes refactoring harder + /// - Can lead to typos + /// + /// ### Example + /// ```rust + /// // Bad - string literal + /// if node.kind() == "composite_literal" { + /// // ... + /// } + /// ``` + /// + /// Use instead: + /// ```rust + /// // Good - constant from node_kinds module + /// use crate::extraction::go::node_kinds::COMPOSITE_LITERAL; + /// if node.kind() == COMPOSITE_LITERAL { + /// // ... + /// } + /// ``` + pub NODE_KIND_LITERAL, + Deny, + "use of string literals in comparisons with .kind() method calls" +} + +/// Check if an expression is a call to the `.kind()` method on a tree-sitter Node +fn is_kind_method_call<'tcx>(cx: &rustc_lint::LateContext<'tcx>, expr: &'tcx Expr<'_>) -> bool { + if let ExprKind::MethodCall(path_segment, receiver, _, _) = &expr.kind { + if path_segment.ident.name.as_str() != "kind" { + return false; + } + + // Check if the receiver type contains "Node" in its path + // This catches tree_sitter::Node and similar types + let receiver_ty = cx.typeck_results().expr_ty(receiver); + let ty_str = format!("{receiver_ty:?}"); + ty_str.contains("Node") + } else { + false + } +} + +impl<'tcx> LateLintPass<'tcx> for NodeKindLiteral { + fn check_expr(&mut self, cx: &rustc_lint::LateContext<'tcx>, expr: &'tcx Expr<'_>) { + // Check if this is a binary operation (== or !=) + if let ExprKind::Binary(op, left, right) = &expr.kind { + // Only check equality and inequality operations + if !matches!(op.node, BinOpKind::Eq | BinOpKind::Ne) { + return; + } + + // Check if one side is a .kind() call and the other is a string literal + let (kind_call, literal_expr) = if is_kind_method_call(cx, left) { + if let ExprKind::Lit(_) = right.kind { + (Some(left), Some(right)) + } else { + (None, None) + } + } else if is_kind_method_call(cx, right) { + if let ExprKind::Lit(_) = left.kind { + (Some(right), Some(left)) + } else { + (None, None) + } + } else { + (None, None) + }; + + if let (Some(_kind_call), Some(literal_expr)) = (kind_call, literal_expr) { + if let ExprKind::Lit(lit) = &literal_expr.kind { + if let LitKind::Str(symbol, _) = lit.node { + let literal_value = symbol.as_str(); + let constant_name = literal_value.to_uppercase(); + + let msg = format!( + "comparing .kind() with string literal \"{}\"", + literal_value + ); + let help = format!( + "define and use a constant like `const {}: &str = \"{}\";` in a node_kinds module", + constant_name, + literal_value + ); + + span_lint_and_help( + cx, + NODE_KIND_LITERAL, + literal_expr.span, + msg, + None, + help, + ); + } + } + } + } + } +} diff --git a/iam-policy-autopilot-lints/ui/main.rs b/iam-policy-autopilot-lints/ui/main.rs new file mode 100644 index 0000000..446a51a --- /dev/null +++ b/iam-policy-autopilot-lints/ui/main.rs @@ -0,0 +1,80 @@ +// Test cases for node_kind_literal lint + +struct Node; + +impl Node { + fn kind(&self) -> &str { + "test" + } +} + +fn test_kind_comparisons() { + let node = Node; + + // This should trigger a warning - .kind() compared with string literal + if node.kind() == "composite_literal" { + println!("found composite literal"); + } + + // This should trigger a warning - reversed comparison + if "unary_expression" == node.kind() { + println!("found unary expression"); + } + + // This should trigger a warning - inequality comparison + if node.kind() != "literal_value" { + println!("not a literal value"); + } + + // This should trigger a warning - any string literal with .kind() + if node.kind() == "some_new_node_type" { + println!("found new node type"); + } +} + +fn test_allowed_comparisons() { + let node = Node; + + // These should NOT trigger warnings (not comparing with .kind()) + let name = "my_function"; + let message = "Hello, world!"; + + // This is fine - not comparing with .kind() + if name == "test" { + println!("{}", message); + } + + // This is fine - just assigning a string + let node_kind_value = "composite_literal"; + println!("{}", node_kind_value); + + // This is fine - comparing .kind() with a constant (not a literal) + const EXPECTED_KIND: &str = "expected"; + if node.kind() == EXPECTED_KIND { + println!("matched expected kind"); + } +} + +// Test that non-Node types with kind() methods don't trigger the lint +struct OtherType; + +impl OtherType { + fn kind(&self) -> &str { + "other" + } +} + +fn test_non_node_kind() { + let other = OtherType; + + // This should NOT trigger a warning - not a tree-sitter Node type + if other.kind() == "some_string" { + println!("other type kind"); + } +} + +fn main() { + test_kind_comparisons(); + test_allowed_comparisons(); + test_non_node_kind(); +} diff --git a/iam-policy-autopilot-lints/ui/main.stderr b/iam-policy-autopilot-lints/ui/main.stderr new file mode 100644 index 0000000..df075bd --- /dev/null +++ b/iam-policy-autopilot-lints/ui/main.stderr @@ -0,0 +1,35 @@ +error: comparing .kind() with string literal "composite_literal" + --> $DIR/main.rs:15:23 + | +LL | if node.kind() == "composite_literal" { + | ^^^^^^^^^^^^^^^^^^^ + | + = help: define and use a constant like `const COMPOSITE_LITERAL: &str = "composite_literal";` in a node_kinds module + = note: `#[deny(node_kind_literal)]` on by default + +error: comparing .kind() with string literal "unary_expression" + --> $DIR/main.rs:20:8 + | +LL | if "unary_expression" == node.kind() { + | ^^^^^^^^^^^^^^^^^^ + | + = help: define and use a constant like `const UNARY_EXPRESSION: &str = "unary_expression";` in a node_kinds module + +error: comparing .kind() with string literal "literal_value" + --> $DIR/main.rs:25:23 + | +LL | if node.kind() != "literal_value" { + | ^^^^^^^^^^^^^^^ + | + = help: define and use a constant like `const LITERAL_VALUE: &str = "literal_value";` in a node_kinds module + +error: comparing .kind() with string literal "some_new_node_type" + --> $DIR/main.rs:30:23 + | +LL | if node.kind() == "some_new_node_type" { + | ^^^^^^^^^^^^^^^^^^^^ + | + = help: define and use a constant like `const SOME_NEW_NODE_TYPE: &str = "some_new_node_type";` in a node_kinds module + +error: aborting due to 4 previous errors + diff --git a/iam-policy-autopilot-mcp-server/src/tools/fix_access_denied.rs b/iam-policy-autopilot-mcp-server/src/tools/fix_access_denied.rs index bf2d17d..34c7415 100644 --- a/iam-policy-autopilot-mcp-server/src/tools/fix_access_denied.rs +++ b/iam-policy-autopilot-mcp-server/src/tools/fix_access_denied.rs @@ -144,6 +144,7 @@ pub async fn fix_access_denied( mod tests { use super::*; use anyhow::anyhow; + use iam_policy_autopilot_access_denied::aws::policy_naming::POLICY_PREFIX; // Note: These tests focus on the service layer mocking. // Full integration tests with RequestContext would require more complex setup. @@ -303,6 +304,7 @@ mod tests { ), actions: vec!["s3:GetObject".to_string()], policy: PolicyDocument { + id: Some(POLICY_PREFIX.to_string()), version: "2012-10-17".to_string(), statement: vec![], }, diff --git a/iam-policy-autopilot-mcp-server/src/tools/generate_policy_for_access_denied.rs b/iam-policy-autopilot-mcp-server/src/tools/generate_policy_for_access_denied.rs index 12ea55a..2d81d91 100644 --- a/iam-policy-autopilot-mcp-server/src/tools/generate_policy_for_access_denied.rs +++ b/iam-policy-autopilot-mcp-server/src/tools/generate_policy_for_access_denied.rs @@ -39,6 +39,7 @@ pub async fn generate_policy_for_access_denied( mod tests { use super::*; use anyhow::anyhow; + use iam_policy_autopilot_access_denied::aws::policy_naming::POLICY_PREFIX; use iam_policy_autopilot_access_denied::{ DenialType, ParsedDenial, PlanResult, PolicyDocument, }; @@ -50,6 +51,7 @@ mod tests { }; let sample_policy = PolicyDocument { + id: Some(POLICY_PREFIX.to_string()), version: "2012-10-17".to_string(), statement: vec![], }; @@ -98,6 +100,7 @@ mod tests { }; let sample_policy = PolicyDocument { + id: Some(POLICY_PREFIX.to_string()), version: "2012-10-17".to_string(), statement: vec![], }; diff --git a/iam-policy-autopilot-policy-generation/Cargo.toml b/iam-policy-autopilot-policy-generation/Cargo.toml index ea4f64e..3b939c8 100644 --- a/iam-policy-autopilot-policy-generation/Cargo.toml +++ b/iam-policy-autopilot-policy-generation/Cargo.toml @@ -27,6 +27,10 @@ serde_json.workspace = true tokio.workspace = true async-trait.workspace = true strsim.workspace = true +sha2.workspace = true +git2.workspace = true +relative-path.workspace = true + # Build dependencies [build-dependencies] @@ -40,6 +44,10 @@ tokio-util.workspace = true # JSON processing serde_json.workspace = true +sha2.workspace = true +git2.workspace = true +relative-path.workspace = true + [features] default = [] # Development features diff --git a/iam-policy-autopilot-policy-generation/build.rs b/iam-policy-autopilot-policy-generation/build.rs index da04430..ecc3b7d 100644 --- a/iam-policy-autopilot-policy-generation/build.rs +++ b/iam-policy-autopilot-policy-generation/build.rs @@ -1,9 +1,25 @@ +use git2::Commit; +use git2::Describe; +use git2::DescribeFormatOptions; +use git2::DescribeOptions; +use git2::Reference; +use git2::Repository; +use relative_path::PathExt; +use relative_path::RelativePathBuf; use serde::{Deserialize, Serialize}; use serde_json::Value; -use std::collections::HashMap; +use sha2::digest::consts::B0; +use sha2::digest::consts::B1; +use sha2::digest::generic_array::GenericArray; +use sha2::digest::typenum::UInt; +use sha2::digest::typenum::UTerm; +use sha2::{Digest, Sha256}; +use std::collections::BTreeMap; use std::env; use std::fs; +use std::io; use std::path::Path; +use std::path::PathBuf; /// Simplified service definition with fields removed #[derive(Debug, Clone, Serialize, Deserialize)] @@ -11,8 +27,8 @@ struct SimplifiedServiceDefinition { #[serde(skip_serializing_if = "Option::is_none")] version: Option, metadata: ServiceMetadata, - operations: HashMap, - shapes: HashMap, + operations: BTreeMap, + shapes: BTreeMap, } /// Service metadata from AWS service definitions @@ -37,8 +53,8 @@ struct SimplifiedOperation { struct SimplifiedShape { #[serde(rename = "type")] type_name: String, - #[serde(default, skip_serializing_if = "HashMap::is_empty")] - members: HashMap, + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + members: BTreeMap, #[serde(skip_serializing_if = "Option::is_none")] required: Option>, } @@ -49,6 +65,17 @@ struct ShapeReference { shape: String, } +// Git version and commit hash for boto3 and botocore +#[derive(Debug, Clone, Serialize, Deserialize)] +struct GitSubmoduleVersion { + #[serde(rename = "gitCommit")] + git_commit_hash: String, + #[serde(rename = "gitTag")] + git_tag: Option, + #[serde(rename = "dataHash")] + data_hash: String, +} + fn main() { println!("cargo:rerun-if-changed=resources/config/sdks/botocore-data"); println!("cargo:rerun-if-changed=resources/config/sdks/boto3"); @@ -123,6 +150,60 @@ fn main() { // Copy the boto3 directory to the workspace location copy_dir_recursive(&boto3_dir, workspace_boto3_embed_dir) .expect("Failed to copy boto3 simplified data"); + + let workspace_submodule_version_embed_dir = PathBuf::from("target/submodule-version-info"); + + // Remove existing directory if it exists + if workspace_submodule_version_embed_dir.exists() { + fs::remove_dir_all(&workspace_submodule_version_embed_dir) + .expect("Failed to remove existing submodule version directory"); + } + fs::create_dir_all(&workspace_submodule_version_embed_dir) + .expect("Failed to create submodule version directory"); + + let boto3_submodule_dir = Path::new("resources/config/sdks/boto3"); + let boto3_repo = + Repository::open(&boto3_submodule_dir).expect("Failed to open boto3 repository"); + + let boto3_info = GitSubmoduleVersion { + git_commit_hash: get_repository_commit(&boto3_repo) + .expect("Failed to get boto3 repository commit"), + git_tag: get_repository_tag(&boto3_repo).expect("Failed to get boto3 repository tag"), + data_hash: format!( + "{:X}", + sha2sum_recursive(&boto3_dir, &boto3_dir) + .expect("Failed to compute checksum over simplified boto3 data") + ), + }; + + let boto3_submodule_version_dir = + &workspace_submodule_version_embed_dir.join("boto3_version.json"); + let boto3_info_json = + serde_json::to_string(&boto3_info).expect("Failed to serialize boto3 version metadata"); + fs::write(boto3_submodule_version_dir, boto3_info_json) + .expect("Failed to write boto3 version metadata"); + + let botocore_submodule_dir = Path::new("resources/config/sdks/botocore-data"); + let botocore_repo = + Repository::open(botocore_submodule_dir).expect("Failed to open botocore repository"); + + let botocore_info = GitSubmoduleVersion { + git_commit_hash: get_repository_commit(&botocore_repo) + .expect("Failed to get botocore repository commit"), + git_tag: get_repository_tag(&botocore_repo).expect("Failed to get botocore repository tag"), + data_hash: format!( + "{:X}", + sha2sum_recursive(&simplified_dir, &simplified_dir) + .expect("Failed to compute checksum over simplified botocore data") + ), + }; + + let botocore_submodule_version_dir = + &workspace_submodule_version_embed_dir.join("botocore_version.json"); + let botocore_info_json = serde_json::to_string(&botocore_info) + .expect("Failed to serialize botocore version metadata"); + fs::write(botocore_submodule_version_dir, botocore_info_json) + .expect("Failed to write botocore version metadata"); } fn process_botocore_data( @@ -297,8 +378,8 @@ fn extract_metadata( fn simplify_operations( operations_value: Option<&Value>, -) -> Result, Box> { - let mut simplified_operations = HashMap::new(); +) -> Result, Box> { + let mut simplified_operations = BTreeMap::new(); if let Some(Value::Object(operations)) = operations_value { for (op_name, op_value) in operations { @@ -313,8 +394,8 @@ fn simplify_operations( fn simplify_shapes( shapes_value: Option<&Value>, -) -> Result, Box> { - let mut simplified_shapes = HashMap::new(); +) -> Result, Box> { + let mut simplified_shapes = BTreeMap::new(); if let Some(Value::Object(shapes)) = shapes_value { for (shape_name, shape_value) in shapes { @@ -344,6 +425,47 @@ fn copy_dir_recursive(src: &Path, dst: &Path) -> Result<(), Box Result< + GenericArray, B0>, B0>, B0>, B0>, B0>>, + Box, +> { + let mut hash_table: BTreeMap< + RelativePathBuf, + GenericArray, B0>, B0>, B0>, B0>, B0>>, + > = BTreeMap::new(); + // let next_root = if (root.is_none()) {Some(cwd)} else {root}; + + let mut dir_entry_list = fs::read_dir(cwd)? + .map(|res| res.map(|e| e.path())) + .collect::, io::Error>>()?; + dir_entry_list.sort(); + + for entry_path in dir_entry_list { + let relt_path = entry_path.clone().relative_to(root)?; + if (entry_path.is_dir()) { + hash_table.insert(relt_path.clone(), sha2sum_recursive(&entry_path, root)?); + } else { + hash_table.insert( + relt_path.clone(), + Sha256::default() + .chain_update(fs::read(entry_path)?) + .finalize(), + ); + } + } + + let mut sha2 = Sha256::new(); + for entry in hash_table { + sha2.update(entry.0.into_string()); + sha2.update(entry.1); + } + + Ok(sha2.finalize()) +} + fn process_boto3_data( boto3_path: &Path, output_dir: &Path, @@ -417,3 +539,29 @@ fn process_boto3_service_version( Ok(has_resources_file) } + +fn get_repository_tag(repo: &Repository) -> Result, Box> { + // we want to do this: git describe --exact-match --tags + let mut describe_options = DescribeOptions::new(); + describe_options.max_candidates_tags(0); + describe_options.describe_tags(); + + Ok(repo + .describe(&describe_options) + .map(|desc| { + Option::Some( + desc.format(Option::None) + .expect("Failed to format describe result"), + ) + }) + .unwrap_or_default()) +} + +fn get_repository_commit(repo: &Repository) -> Result> { + Ok(repo + .revparse_single("HEAD")? + .into_commit() + .expect("Failed to get HEAD commit hash") + .id() + .to_string()) +} diff --git a/iam-policy-autopilot-policy-generation/src/api/generate_policies.rs b/iam-policy-autopilot-policy-generation/src/api/generate_policies.rs index 682efd0..c75d6ad 100644 --- a/iam-policy-autopilot-policy-generation/src/api/generate_policies.rs +++ b/iam-policy-autopilot-policy-generation/src/api/generate_policies.rs @@ -34,6 +34,14 @@ pub async fn generate_policies( .await .context("Failed to process source files")?; + // Relies on the invariant that all source files must be of the same language, which we + // enforce in process_source_files + let sdk = extracted_methods + .metadata + .source_files + .first() + .map_or(crate::SdkType::Other, |f| f.language.sdk_type()); + let extracted_methods = extracted_methods .methods .into_iter() @@ -53,7 +61,9 @@ pub async fn generate_policies( let mut enrichment_engine = EnrichmentEngine::new(config.disable_file_system_cache)?; // Run the complete enrichment pipeline - let enriched_results = enrichment_engine.enrich_methods(&extracted_methods).await?; + let enriched_results = enrichment_engine + .enrich_methods(&extracted_methods, sdk) + .await?; let enrichment_duration = pipeline_start.elapsed(); trace!("Enrichment pipeline completed in {:?}", enrichment_duration); diff --git a/iam-policy-autopilot-policy-generation/src/api/get_submodule_version.rs b/iam-policy-autopilot-policy-generation/src/api/get_submodule_version.rs new file mode 100644 index 0000000..f156bd1 --- /dev/null +++ b/iam-policy-autopilot-policy-generation/src/api/get_submodule_version.rs @@ -0,0 +1,28 @@ +use crate::errors::{ExtractorError, Result}; +use crate::{api::model::GitSubmoduleMetadata, embedded_data::GitSubmoduleVersionInfo}; + +/// Gets the version information for the boto3 submodule. +/// +/// # Returns +/// +/// Returns the Git submodule metadata for boto3, including commit hash and version information. +/// +/// # Errors +/// +/// Returns an error if the boto3 version information cannot be retrieved. +pub fn get_boto3_version_info() -> Result { + GitSubmoduleVersionInfo::get_boto3_version_info() +} + +/// Gets the version information for the botocore submodule. +/// +/// # Returns +/// +/// Returns the Git submodule metadata for botocore, including commit hash and version information. +/// +/// # Errors +/// +/// Returns an error if the botocore version information cannot be retrieved. +pub fn get_botocore_version_info() -> Result { + GitSubmoduleVersionInfo::get_botocore_version_info() +} diff --git a/iam-policy-autopilot-policy-generation/src/api/mod.rs b/iam-policy-autopilot-policy-generation/src/api/mod.rs index 2bc4be8..8d67d9a 100644 --- a/iam-policy-autopilot-policy-generation/src/api/mod.rs +++ b/iam-policy-autopilot-policy-generation/src/api/mod.rs @@ -2,7 +2,9 @@ mod extract_sdk_calls; mod generate_policies; +mod get_submodule_version; pub use extract_sdk_calls::extract_sdk_calls; pub use generate_policies::generate_policies; +pub use get_submodule_version::{get_boto3_version_info, get_botocore_version_info}; mod common; pub mod model; diff --git a/iam-policy-autopilot-policy-generation/src/api/model.rs b/iam-policy-autopilot-policy-generation/src/api/model.rs index ff40859..e37ccac 100644 --- a/iam-policy-autopilot-policy-generation/src/api/model.rs +++ b/iam-policy-autopilot-policy-generation/src/api/model.rs @@ -1,6 +1,8 @@ //! Defined model for API use std::path::PathBuf; +use serde::{Deserialize, Serialize}; + /// Configuration for generate_policies Api #[derive(Debug, Clone)] pub struct GeneratePolicyConfig { @@ -49,6 +51,20 @@ pub struct AwsContext { pub account: String, } +/// Exposes git version and commit hash for boto3 and botocore +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GitSubmoduleMetadata { + /// the git commit hash + #[serde(rename = "gitCommit")] + pub git_commit_hash: String, + /// the git commit tag + #[serde(rename = "gitTag")] + pub git_tag: Option, + /// the simplified data file hash + #[serde(rename = "dataHash")] + pub data_hash: String, +} + impl AwsContext { /// Creates a new AwsContext with the partition automatically derived from the region. /// diff --git a/iam-policy-autopilot-policy-generation/src/embedded_data.rs b/iam-policy-autopilot-policy-generation/src/embedded_data.rs index e08b3e3..3e895f8 100644 --- a/iam-policy-autopilot-policy-generation/src/embedded_data.rs +++ b/iam-policy-autopilot-policy-generation/src/embedded_data.rs @@ -5,10 +5,14 @@ //! have been simplified to remove documentation and examples, reducing binary size //! while maintaining all essential functionality. +use std::borrow::Cow; +use std::collections::HashMap; + +use crate::api::model::GitSubmoduleMetadata; use crate::errors::{ExtractorError, Result}; use crate::extraction::sdk_model::SdkServiceDefinition; -use crate::providers::JsonProvider; use rust_embed::RustEmbed; +use serde::{Deserialize, Serialize}; /// Embedded AWS service definitions with compression /// @@ -18,7 +22,7 @@ use rust_embed::RustEmbed; #[derive(RustEmbed)] #[folder = "target/botocore-data-simplified"] #[include = "*.json"] -pub struct Botocore; +struct BotocoreRaw; /// Embedded AWS boto3 resource definitions /// @@ -27,7 +31,12 @@ pub struct Botocore; #[derive(RustEmbed)] #[folder = "target/boto3-data-simplified"] #[include = "*.json"] -pub struct Boto3Resources; +struct Boto3ResourcesRaw; + +#[derive(RustEmbed)] +#[folder = "target/submodule-version-info"] +#[include = "*.json"] +struct GitSubmoduleVersionInfoRaw; /// Embedded boto3 utilities mapping /// @@ -36,40 +45,23 @@ pub struct Boto3Resources; #[derive(RustEmbed)] #[folder = "resources/config/sdks"] #[include = "boto3_utilities_mapping.json"] -pub struct Boto3Utilities; +struct Boto3UtilitiesRaw; -impl Boto3Utilities { +impl Boto3UtilitiesRaw { /// Get the boto3 utilities mapping configuration - pub fn get_utilities_mapping() -> Option> { + fn get_utilities_mapping() -> Option> { Self::get("boto3_utilities_mapping.json").map(|file| file.data) } } -/// Embedded JavaScript SDK v3 libraries mapping -/// -/// This struct provides access to the JavaScript SDK v3 libraries mapping configuration -/// that defines how lib-* submodule commands map to client-* commands. -#[derive(RustEmbed)] -#[folder = "resources/config/sdks"] -#[include = "js_v3_libraries.json"] -pub(crate) struct JsV3Libraries; - -impl JsV3Libraries { - /// Get the JavaScript SDK v3 libraries mapping configuration - pub fn get_libraries_mapping() -> Option> { - Self::get("js_v3_libraries.json").map(|file| file.data) - } -} - -impl Boto3Resources { +impl Boto3ResourcesRaw { /// Get a boto3 resources definition file by service name and API version - pub fn get_resources_definition(service: &str, api_version: &str) -> Option> { + fn get_resources_definition(service: &str, api_version: &str) -> Option> { let start_time = std::time::Instant::now(); let json_path = format!("{}/{}/resources-1.json", service, api_version); if let Some(file) = Self::get(&json_path) { let file_size = file.data.len(); - let result = Some(file.data.to_vec()); let total_time = start_time.elapsed(); if total_time.as_millis() > 10 { @@ -82,14 +74,14 @@ impl Boto3Resources { ); } - result + Some(file.data) } else { None } } /// Build a complete service-to-versions map for boto3 resources - pub(crate) fn build_service_versions_map() -> std::collections::HashMap> { + fn build_service_versions_map() -> std::collections::HashMap> { log::debug!("Building boto3 service versions map..."); let start_time = std::time::Instant::now(); @@ -99,7 +91,7 @@ impl Boto3Resources { > = std::collections::HashMap::new(); let mut file_count = 0; - for file_path in Boto3Resources::iter() { + for file_path in Self::iter() { file_count += 1; let path_parts: Vec<&str> = file_path.split('/').collect(); if path_parts.len() >= 2 { @@ -130,15 +122,14 @@ impl Boto3Resources { } } -impl Botocore { +impl BotocoreRaw { /// Get a service definition file by service name and API version - pub fn get_service_definition(service: &str, api_version: &str) -> Option> { + fn get_service_definition(service: &str, api_version: &str) -> Option> { let start_time = std::time::Instant::now(); let json_path = format!("{}/{}/service-2.json", service, api_version); if let Some(file) = Self::get(&json_path) { let file_size = file.data.len(); - let result = Some(file.data.to_vec()); let total_time = start_time.elapsed(); if total_time.as_millis() > 10 { @@ -151,32 +142,26 @@ impl Botocore { ); } - result + Some(file.data) } else { None } } /// Get a waiters definition file by service name and API version - pub fn get_waiters( - service: &str, - api_version: &str, - ) -> Option> { + fn get_waiters(service: &str, api_version: &str) -> Option> { let path = format!("{}/{}/waiters-2.json", service, api_version); Self::get(&path).map(|file| file.data) } /// Get a paginators definition file by service name and API version - pub fn get_paginators( - service: &str, - api_version: &str, - ) -> Option> { + fn get_paginators(service: &str, api_version: &str) -> Option> { let path = format!("{}/{}/paginators-1.json", service, api_version); Self::get(&path).map(|file| file.data) } /// Build a complete service-to-versions map in a single iteration - pub(crate) fn build_service_versions_map() -> std::collections::HashMap> { + fn build_service_versions_map() -> std::collections::HashMap> { log::debug!("Building service versions map..."); let start_time = std::time::Instant::now(); @@ -186,7 +171,7 @@ impl Botocore { > = std::collections::HashMap::new(); let mut file_count = 0; - for file_path in Botocore::iter() { + for file_path in BotocoreRaw::iter() { file_count += 1; let path_parts: Vec<&str> = file_path.split('/').collect(); if path_parts.len() >= 2 { @@ -221,9 +206,9 @@ impl Botocore { /// /// Provides convenient access to embedded boto3 resource definitions with /// automatic JSON parsing. -pub(crate) struct EmbeddedBoto3Data; +pub(crate) struct Boto3Data; -impl EmbeddedBoto3Data { +impl Boto3Data { /// Get raw boto3 resources data by service name and API version /// /// # Arguments @@ -232,18 +217,21 @@ impl EmbeddedBoto3Data { /// /// # Returns /// Raw resources JSON data or None if not found - pub fn get_resources_raw(service: &str, api_version: &str) -> Option> { - Boto3Resources::get_resources_definition(service, api_version) + pub(crate) fn get_resources_raw( + service: &str, + api_version: &str, + ) -> Option> { + Boto3ResourcesRaw::get_resources_definition(service, api_version) } /// Build a complete service-to-versions map for boto3 resources pub(crate) fn build_service_versions_map() -> std::collections::HashMap> { - Boto3Resources::build_service_versions_map() + Boto3ResourcesRaw::build_service_versions_map() } /// Get the boto3 utilities mapping configuration from embedded data - pub(crate) fn get_utilities_mapping() -> Option> { - Boto3Utilities::get_utilities_mapping() + pub(crate) fn get_utilities_mapping() -> Option> { + Boto3UtilitiesRaw::get_utilities_mapping() } } @@ -251,9 +239,9 @@ impl EmbeddedBoto3Data { /// /// Provides convenient access to embedded AWS service definitions with /// automatic decompression and JSON parsing. -pub(crate) struct EmbeddedServiceData; +pub(crate) struct BotocoreData; -impl EmbeddedServiceData { +impl BotocoreData { /// Get a parsed service definition by service name and API version /// /// # Arguments @@ -262,40 +250,46 @@ impl EmbeddedServiceData { /// /// # Returns /// Parsed service definition or error if not found or parsing fails - pub(crate) async fn get_service_definition( + pub(crate) fn get_service_definition( service: &str, api_version: &str, ) -> Result { - let data = Botocore::get_service_definition(service, api_version).ok_or_else(|| { + let data = BotocoreRaw::get_service_definition(service, api_version).ok_or_else(|| { ExtractorError::validation(format!( "Service definition not found for {}/{}", service, api_version )) })?; - let json_str = std::str::from_utf8(&data).map_err(|e| { - ExtractorError::validation(format!("Invalid UTF-8 in embedded data: {}", e)) - })?; - - JsonProvider::parse(json_str).await.map_err(|e| { + serde_json::from_slice(&data).map_err(|e| { ExtractorError::sdk_processing_with_source( service, - "Failed to parse embedded service definition", + "Failed to parse service definition", e, ) }) } - /// Get raw waiters data by service name and API version + /// Get waiters data by service name and API version /// /// # Arguments /// * `service` - Service name (e.g., "s3", "ec2", "lambda") /// * `api_version` - API version (e.g., "2006-03-01", "2016-11-15") /// /// # Returns - /// Raw waiters JSON data or None if not found - pub fn get_waiters_raw(service: &str, api_version: &str) -> Option> { - Botocore::get_waiters(service, api_version).map(|data| data.to_vec()) + /// Waiters JSON data or None if not found + pub(crate) fn get_waiters( + service: &str, + api_version: &str, + ) -> Option> { + let waiters_data = BotocoreRaw::get_waiters(service, api_version)?; + + match serde_json::from_slice::( + &waiters_data, + ) { + Ok(waiters_desc) => Some(waiters_desc.waiters), + Err(_) => None, + } } /// Get raw paginators data by service name and API version @@ -307,13 +301,45 @@ impl EmbeddedServiceData { /// # Returns /// Raw paginators JSON data or None if not found #[allow(dead_code)] - pub fn get_paginators_raw(service: &str, api_version: &str) -> Option> { - Botocore::get_paginators(service, api_version).map(|data| data.to_vec()) + pub(crate) fn get_paginators_raw(service: &str, api_version: &str) -> Option> { + BotocoreRaw::get_paginators(service, api_version).map(|data| data.to_vec()) } /// Build a complete service-to-versions map in a single iteration pub(crate) fn build_service_versions_map() -> std::collections::HashMap> { - Botocore::build_service_versions_map() + BotocoreRaw::build_service_versions_map() + } +} + +/// Embedded submodule version data manager +/// +/// Provides access to git submodule information, compiled during build.rs +pub(crate) struct GitSubmoduleVersionInfo; + +impl GitSubmoduleVersionInfo { + pub(crate) fn get_boto3_version_info() -> Result { + let boto3_file = GitSubmoduleVersionInfoRaw::get("boto3_version.json") + .expect("boto3 version metadata file not found"); + + serde_json::from_slice(&boto3_file.data).map_err(|e| { + ExtractorError::sdk_processing_with_source( + "reading boto3_version.json", + "Failed to parse boto3 metadata file", + e, + ) + }) + } + pub(crate) fn get_botocore_version_info() -> Result { + let botocore_file = GitSubmoduleVersionInfoRaw::get("botocore_version.json") + .expect("botocore version metadata file not found"); + + serde_json::from_slice(&botocore_file.data).map_err(|e| { + ExtractorError::sdk_processing_with_source( + "reading botocore_version.json", + "Failed to parse botocore_version metadata file", + e, + ) + }) } } @@ -323,25 +349,25 @@ mod tests { #[test] fn test_botocore_get_service_definition_returns_none_for_invalid_service() { - let result = Botocore::get_service_definition("nonexistent-service", "2023-01-01"); + let result = BotocoreRaw::get_service_definition("nonexistent-service", "2023-01-01"); assert!(result.is_none()); } #[test] fn test_botocore_get_waiters_returns_none_for_invalid_service() { - let result = Botocore::get_waiters("nonexistent-service", "2023-01-01"); + let result = BotocoreRaw::get_waiters("nonexistent-service", "2023-01-01"); assert!(result.is_none()); } #[test] fn test_botocore_get_paginators_returns_none_for_invalid_service() { - let result = Botocore::get_paginators("nonexistent-service", "2023-01-01"); + let result = BotocoreRaw::get_paginators("nonexistent-service", "2023-01-01"); assert!(result.is_none()); } #[test] fn test_build_service_versions_map_returns_hashmap() { - let service_versions = Botocore::build_service_versions_map(); + let service_versions = BotocoreRaw::build_service_versions_map(); // Should return a HashMap assert!(service_versions.is_empty() || !service_versions.is_empty()); @@ -369,8 +395,8 @@ mod tests { #[test] fn test_build_service_versions_map_consistency() { // Call the function twice and ensure results are consistent - let map1 = Botocore::build_service_versions_map(); - let map2 = Botocore::build_service_versions_map(); + let map1 = BotocoreRaw::build_service_versions_map(); + let map2 = BotocoreRaw::build_service_versions_map(); assert_eq!( map1, map2, @@ -380,8 +406,8 @@ mod tests { #[test] fn test_embedded_service_data_build_service_versions_map_delegates() { - let embedded_result = EmbeddedServiceData::build_service_versions_map(); - let botocore_result = Botocore::build_service_versions_map(); + let embedded_result = BotocoreData::build_service_versions_map(); + let botocore_result = BotocoreRaw::build_service_versions_map(); assert_eq!( embedded_result, botocore_result, @@ -389,10 +415,9 @@ mod tests { ); } - #[tokio::test] - async fn test_embedded_service_data_get_service_definition_invalid_service() { - let result = - EmbeddedServiceData::get_service_definition("nonexistent-service", "2023-01-01").await; + #[test] + fn test_embedded_service_data_get_service_definition_invalid_service() { + let result = BotocoreData::get_service_definition("nonexistent-service", "2023-01-01"); assert!( result.is_err(), @@ -411,7 +436,7 @@ mod tests { #[test] fn test_embedded_service_data_get_waiters_raw_invalid_service() { - let result = EmbeddedServiceData::get_waiters_raw("nonexistent-service", "2023-01-01"); + let result = BotocoreData::get_waiters("nonexistent-service", "2023-01-01"); assert!( result.is_none(), "Should return None for nonexistent service" @@ -420,7 +445,7 @@ mod tests { #[test] fn test_embedded_service_data_get_paginators_raw_invalid_service() { - let result = EmbeddedServiceData::get_paginators_raw("nonexistent-service", "2023-01-01"); + let result = BotocoreData::get_paginators_raw("nonexistent-service", "2023-01-01"); assert!( result.is_none(), "Should return None for nonexistent service" @@ -429,7 +454,7 @@ mod tests { #[test] fn test_service_versions_map_structure() { - let service_versions = Botocore::build_service_versions_map(); + let service_versions = BotocoreRaw::build_service_versions_map(); for (service, versions) in &service_versions { // Service names should not contain path separators @@ -486,13 +511,13 @@ mod tests { // This test ensures the timing logic doesn't panic // We can't easily test the actual logging without setting up a logger, // but we can ensure the code path works - let result = Botocore::get_service_definition("nonexistent-service", "2023-01-01"); + let result = BotocoreRaw::get_service_definition("nonexistent-service", "2023-01-01"); assert!(result.is_none()); } #[test] fn test_service_versions_map_no_duplicates() { - let service_versions = Botocore::build_service_versions_map(); + let service_versions = BotocoreRaw::build_service_versions_map(); for (service, versions) in &service_versions { // Check that there are no duplicate versions @@ -512,18 +537,29 @@ mod tests { #[test] fn test_embedded_data_methods_handle_empty_strings() { // Test edge cases with empty strings - let result1 = Botocore::get_service_definition("", ""); - let result2 = Botocore::get_waiters("", ""); - let result3 = Botocore::get_paginators("", ""); + let result1 = BotocoreRaw::get_service_definition("", ""); + let result2 = BotocoreRaw::get_waiters("", ""); + let result3 = BotocoreRaw::get_paginators("", ""); assert!(result1.is_none()); assert!(result2.is_none()); assert!(result3.is_none()); } - #[tokio::test] - async fn test_embedded_service_data_handles_empty_strings() { - let result = EmbeddedServiceData::get_service_definition("", "").await; + #[test] + fn test_embedded_service_data_handles_empty_strings() { + let result = BotocoreData::get_service_definition("", ""); assert!(result.is_err()); } + + #[test] + fn test_get_boto3_version_info_happy_path() { + let result = GitSubmoduleVersionInfo::get_boto3_version_info(); + assert!(result.is_ok()); + } + + fn test_get_botocore_version_info_happy_path() { + let result = GitSubmoduleVersionInfo::get_botocore_version_info(); + assert!(result.is_ok()); + } } diff --git a/iam-policy-autopilot-policy-generation/src/enrichment/engine.rs b/iam-policy-autopilot-policy-generation/src/enrichment/engine.rs index 43a59cb..42168b4 100644 --- a/iam-policy-autopilot-policy-generation/src/enrichment/engine.rs +++ b/iam-policy-autopilot-policy-generation/src/enrichment/engine.rs @@ -12,7 +12,7 @@ use crate::enrichment::operation_fas_map::OperationFasMaps; use crate::enrichment::{load_operation_fas_map, ResourceMatcher, ServiceReferenceLoader}; use crate::errors::{ExtractorError, Result}; use crate::service_configuration::{self, ServiceConfiguration}; -use crate::SdkMethodCall; +use crate::{SdkMethodCall, SdkType}; /// Core enrichment engine that orchestrates the 3-stage enrichment pipeline /// @@ -42,6 +42,7 @@ impl Engine { pub async fn enrich_methods<'a>( &mut self, extracted_methods: &'a [SdkMethodCall], + sdk: SdkType, ) -> Result>> { let unique_services = self.get_unique_services(extracted_methods); @@ -51,7 +52,7 @@ impl Engine { .load_fas_maps_for_services(&unique_services, &service_cfg) .await?; - let resource_matcher = ResourceMatcher::new(service_cfg, fas_maps); + let resource_matcher = ResourceMatcher::new(service_cfg, fas_maps, sdk); let enriched_calls = self .enrich_all_methods(extracted_methods, &resource_matcher) .await?; @@ -182,7 +183,9 @@ mod tests { let start_time = Instant::now(); - let service_index = match ServiceDiscovery::load_service_index(Language::Python).await { + const LANGUAGE: Language = Language::Python; + + let service_index = match ServiceDiscovery::load_service_index(LANGUAGE).await { Ok(result) => result, Err(e) => { panic!("Failed to discover services: {}", e); @@ -205,7 +208,10 @@ mod tests { println!("\nRunning enrichment on all operations..."); let enrichment_start = Instant::now(); - match enrichment_engine.enrich_methods(&sdk_method_calls).await { + match enrichment_engine + .enrich_methods(&sdk_method_calls, LANGUAGE.sdk_type()) + .await + { Ok(enriched_calls) => { let enrichment_duration = enrichment_start.elapsed(); diff --git a/iam-policy-autopilot-policy-generation/src/enrichment/resource_matcher.rs b/iam-policy-autopilot-policy-generation/src/enrichment/resource_matcher.rs index 4123325..d9e21c3 100644 --- a/iam-policy-autopilot-policy-generation/src/enrichment/resource_matcher.rs +++ b/iam-policy-autopilot-policy-generation/src/enrichment/resource_matcher.rs @@ -14,7 +14,7 @@ use crate::enrichment::service_reference::ServiceReference; use crate::enrichment::{Condition, ServiceReferenceLoader}; use crate::errors::{ExtractorError, Result}; use crate::service_configuration::ServiceConfiguration; -use crate::SdkMethodCall; +use crate::{SdkMethodCall, SdkType}; /// ResourceMatcher coordinates OperationAction maps and Service Reference data to generate enriched method calls /// @@ -25,6 +25,7 @@ use crate::SdkMethodCall; pub(crate) struct ResourceMatcher { service_cfg: Arc, fas_maps: OperationFasMaps, + sdk: SdkType, } // TODO: Make this configurable: https://github.com/awslabs/iam-policy-autopilot/issues/19 @@ -33,10 +34,15 @@ const RESOURCE_CUTOFF: usize = 5; impl ResourceMatcher { /// Create a new ResourceMatcher instance #[must_use] - pub(crate) fn new(service_cfg: Arc, fas_maps: OperationFasMaps) -> Self { + pub(crate) fn new( + service_cfg: Arc, + fas_maps: OperationFasMaps, + sdk: SdkType, + ) -> Self { Self { service_cfg, fas_maps, + sdk, } } @@ -110,7 +116,7 @@ impl ResourceMatcher { Some(operation_fas_map) => { let service_operation_name = operation.service_operation_name(&self.service_cfg); - log::debug!("Looking up {}", service_operation_name); + log::debug!("Looking up operation {}", service_operation_name); if let Some(additional_operations) = operation_fas_map .fas_operations @@ -183,22 +189,46 @@ impl ResourceMatcher { parsed_call.name ); - let initial = FasOperation::new( - parsed_call.name.to_case(Case::Pascal), - service_name.to_string(), - Vec::new(), - ); - + let initial = { + let initial_service_name = self + .service_cfg + .rename_service_service_reference(service_name); + // Determine the initial operation name, with special handling for Python's boto3 method names + let initial_operation_name = if self.sdk == SdkType::Boto3 { + // Try to load service reference and look up the boto3 method mapping + service_reference_loader + .load(&initial_service_name) + .await? + .and_then(|service_ref| { + log::debug!("Looking up method {}", parsed_call.name); + service_ref + .boto3_method_to_operation + .get(&parsed_call.name) + .map(|op| { + log::debug!("got {:?}", op); + op.split(':').nth(1).unwrap_or(op).to_string() + }) + }) + // Fallback to PascalCase conversion if mapping not found + // This should not be reachable, but if for some reason we cannot use the SDF, + // we try converting to PascalCase, knowing that this is flawed in some cases: + // think `AddRoleToDBInstance` (actual name) + // vs. `AddRoleToDbInstance` (converted name) + .unwrap_or_else(|| parsed_call.name.to_case(Case::Pascal)) + } else { + // For non-Boto3 SDKs we use the extracted name as-is + parsed_call.name.clone() + }; + FasOperation::new(initial_operation_name, service_name.to_string(), Vec::new()) + }; // Use fixed-point algorithm to safely expand FAS operations until no new operations are found let operations = self.expand_fas_operations_to_fixed_point(initial)?; let mut enriched_actions = vec![]; for operation in operations { - let service = operation.service(&self.service_cfg); + let service_name = operation.service(&self.service_cfg); // Find the corresponding SDF using the cache - let service_reference = service_reference_loader - .load(&operation.service(&self.service_cfg)) - .await?; + let service_reference = service_reference_loader.load(&service_name).await?; match service_reference { None => { @@ -207,9 +237,8 @@ impl ResourceMatcher { Some(service_reference) => { log::debug!("Creating actions for {:?}", operation); log::debug!(" with context {:?}", operation.context); - if let Some(operation_to_authorized_actions) = service_reference_loader - .get_operation_to_authorized_actions(&service) - .await? + if let Some(operation_to_authorized_actions) = + &service_reference.operation_to_authorized_actions { log::debug!( "Looking up {}", @@ -372,6 +401,8 @@ impl ResourceMatcher { #[cfg(test)] mod tests { + use std::collections::HashMap; + use super::*; use crate::enrichment::mock_remote_service_reference; use crate::enrichment::operation_fas_map::{FasContext, FasOperation, OperationFasMap}; @@ -384,6 +415,16 @@ mod tests { } } + fn create_empty_service_config() -> Arc { + Arc::new(ServiceConfiguration { + rename_services_operation_action_map: HashMap::new(), + rename_services_service_reference: HashMap::new(), + smithy_botocore_service_name_mapping: HashMap::new(), + rename_operations: HashMap::new(), + resource_overrides: HashMap::new(), + }) + } + #[tokio::test] async fn test_enrich_method_call() { use std::collections::HashMap; @@ -410,7 +451,7 @@ mod tests { let (_, service_reference_loader) = mock_remote_service_reference::setup_mock_server_with_loader().await; - let matcher = ResourceMatcher::new(Arc::new(service_cfg), HashMap::new()); + let matcher = ResourceMatcher::new(Arc::new(service_cfg), HashMap::new(), SdkType::Boto3); let parsed_call = create_test_parsed_method_call(); // Create operation action map file @@ -486,7 +527,7 @@ mod tests { resource_overrides: HashMap::new(), }; - let matcher = ResourceMatcher::new(Arc::new(service_cfg), HashMap::new()); + let matcher = ResourceMatcher::new(Arc::new(service_cfg), HashMap::new(), SdkType::Boto3); let (mock_server, loader) = mock_remote_service_reference::setup_mock_server_with_loader().await; @@ -550,15 +591,9 @@ mod tests { use std::collections::HashMap; // Service configuration without s3 in no_operation_action_map - let service_cfg = ServiceConfiguration { - rename_services_operation_action_map: HashMap::new(), - rename_services_service_reference: HashMap::new(), - smithy_botocore_service_name_mapping: HashMap::new(), - rename_operations: HashMap::new(), - resource_overrides: HashMap::new(), - }; + let service_cfg = create_empty_service_config(); - let matcher = ResourceMatcher::new(Arc::new(service_cfg), HashMap::new()); + let matcher = ResourceMatcher::new(service_cfg, HashMap::new(), SdkType::Boto3); let parsed_call = SdkMethodCall { name: "get_object".to_string(), possible_services: vec!["s3".to_string()], @@ -685,7 +720,7 @@ mod tests { } ] })).await; - let matcher = ResourceMatcher::new(Arc::new(service_cfg), HashMap::new()); + let matcher = ResourceMatcher::new(Arc::new(service_cfg), HashMap::new(), SdkType::Boto3); // Create SdkMethodCall for connectparticipant:send_message let parsed_call = SdkMethodCall { @@ -774,7 +809,7 @@ mod tests { ) .await; - let matcher = ResourceMatcher::new(Arc::new(service_cfg), HashMap::new()); + let matcher = ResourceMatcher::new(Arc::new(service_cfg), HashMap::new(), SdkType::Boto3); // Create parsed method call for get_user let parsed_call = SdkMethodCall { @@ -849,7 +884,7 @@ mod tests { let (_, service_reference_loader) = mock_remote_service_reference::setup_mock_server_with_loader().await; - let matcher = ResourceMatcher::new(Arc::new(service_cfg), HashMap::new()); + let matcher = ResourceMatcher::new(Arc::new(service_cfg), HashMap::new(), SdkType::Boto3); // Create parsed method call for get_object let parsed_call = SdkMethodCall { @@ -912,13 +947,7 @@ mod tests { use std::collections::HashMap; // Create a simple service configuration - let service_cfg = Arc::new(ServiceConfiguration { - rename_services_operation_action_map: HashMap::new(), - rename_services_service_reference: HashMap::new(), - smithy_botocore_service_name_mapping: HashMap::new(), - rename_operations: HashMap::new(), - resource_overrides: HashMap::new(), - }); + let service_cfg = create_empty_service_config(); // Create a mock FAS map with no cycles: A -> B -> C (linear chain) let mut fas_maps = HashMap::new(); @@ -973,7 +1002,7 @@ mod tests { }), ); - let matcher = ResourceMatcher::new(service_cfg.clone(), fas_maps); + let matcher = ResourceMatcher::new(service_cfg.clone(), fas_maps, SdkType::Other); // Test expansion starting from GetObject let initial = @@ -1012,13 +1041,7 @@ mod tests { use std::collections::HashMap; // Create a simple service configuration - let service_cfg = Arc::new(ServiceConfiguration { - rename_services_operation_action_map: HashMap::new(), - rename_services_service_reference: HashMap::new(), - smithy_botocore_service_name_mapping: HashMap::new(), - rename_operations: HashMap::new(), - resource_overrides: HashMap::new(), - }); + let service_cfg = create_empty_service_config(); // Create a mock FAS map with a cycle: A -> B -> A let mut fas_maps = HashMap::new(); @@ -1064,7 +1087,7 @@ mod tests { }), ); - let matcher = ResourceMatcher::new(service_cfg.clone(), fas_maps); + let matcher = ResourceMatcher::new(service_cfg.clone(), fas_maps, SdkType::Other); // Test expansion starting from GetObject - should detect cycle and terminate let initial = @@ -1102,13 +1125,7 @@ mod tests { use std::collections::HashMap; // Create a service configuration - let service_cfg = Arc::new(ServiceConfiguration { - rename_services_operation_action_map: HashMap::new(), - rename_services_service_reference: HashMap::new(), - smithy_botocore_service_name_mapping: HashMap::new(), - rename_operations: HashMap::new(), - resource_overrides: HashMap::new(), - }); + let service_cfg = create_empty_service_config(); let mut fas_maps = HashMap::new(); @@ -1142,7 +1159,7 @@ mod tests { ); } - let matcher = ResourceMatcher::new(service_cfg.clone(), fas_maps); + let matcher = ResourceMatcher::new(service_cfg.clone(), fas_maps, SdkType::Other); let initial = FasOperation::new("GetObject".to_string(), "service-a".to_string(), Vec::new()); @@ -1168,15 +1185,9 @@ mod tests { async fn test_fas_expansion_empty_initial() { use std::collections::HashMap; - let service_cfg = Arc::new(ServiceConfiguration { - rename_services_operation_action_map: HashMap::new(), - rename_services_service_reference: HashMap::new(), - smithy_botocore_service_name_mapping: HashMap::new(), - rename_operations: HashMap::new(), - resource_overrides: HashMap::new(), - }); + let service_cfg = create_empty_service_config(); - let matcher = ResourceMatcher::new(service_cfg.clone(), HashMap::new()); + let matcher = ResourceMatcher::new(service_cfg.clone(), HashMap::new(), SdkType::Other); let initial = FasOperation::new( "NonExistentOperation".to_string(), @@ -1206,13 +1217,7 @@ mod tests { use std::collections::HashMap; // Create a simple service configuration - let service_cfg = Arc::new(ServiceConfiguration { - rename_services_operation_action_map: HashMap::new(), - rename_services_service_reference: HashMap::new(), - smithy_botocore_service_name_mapping: HashMap::new(), - rename_operations: HashMap::new(), - resource_overrides: HashMap::new(), - }); + let service_cfg = create_empty_service_config(); // Create a FAS map where A -> A with empty context (self-referential) let mut fas_maps = HashMap::new(); @@ -1235,7 +1240,7 @@ mod tests { }), ); - let matcher = ResourceMatcher::new(service_cfg.clone(), fas_maps); + let matcher = ResourceMatcher::new(service_cfg.clone(), fas_maps, SdkType::Other); // Test expansion starting from GetObject with empty context let initial = FasOperation::new( @@ -1265,4 +1270,92 @@ mod tests { println!("✓ Test passed: Self-cycle with empty context handled correctly"); } + + /// Helper function to create RDS service reference mock with multiple DB operations + /// Includes operations with and without SDK method mappings to test different scenarios + async fn mock_rds_service_reference(mock_server: &wiremock::MockServer) { + mock_remote_service_reference::mock_server_service_reference_response( + mock_server, + "rds", + serde_json::json!({ + "Name": "rds", + "Resources": [ + { + "Name": "database-cluster", + "ARNFormats": ["arn:${Partition}:rds:${Region}:${Account}:cluster:${DatabaseClusterIdentifier}"] + } + ], + "Actions": [ + { + "Name": "ModifyDBCluster", + "Resources": [{"Name": "database-cluster"}] + } + ], + "Operations": [ + { + "Name": "ModifyDBCluster", + "AuthorizedActions": [{"Name": "ModifyDBCluster", "Service": "rds"}], + "SDK": [{"Name": "rds", "Method": "modify_db_cluster", "Package": "Boto3"}] + } + ] + }), + ) + .await; + } + + #[tokio::test] + async fn test_boto3_method_name_requires_lookup() { + // Test that boto3 methods are correctly mapped using service reference SDK mapping + let config = create_empty_service_config(); + let matcher = ResourceMatcher::new(config, HashMap::new(), SdkType::Boto3); + + let (mock_server, loader) = + mock_remote_service_reference::setup_mock_server_with_loader().await; + + mock_rds_service_reference(&mock_server).await; + + let parsed_method = SdkMethodCall { + name: "modify_db_cluster".to_string(), + possible_services: vec!["rds".to_string()], + metadata: None, + }; + + let result = matcher.enrich_method_call(&parsed_method, &loader).await; + assert!(result.is_ok()); + + let enriched_calls = result.unwrap(); + assert_eq!(enriched_calls.len(), 1); + assert_eq!(enriched_calls[0].method_name, "modify_db_cluster"); + assert_eq!(enriched_calls[0].service, "rds"); + assert_eq!(enriched_calls[0].actions[0].name, "rds:ModifyDBCluster"); + } + + #[tokio::test] + async fn test_non_boto3_sdk_uses_extracted_name_directly() { + // Test that non-Boto3 SDKs (e.g., Go) use the extracted operation name directly without renaming + let config = create_empty_service_config(); + let matcher = ResourceMatcher::new(config, HashMap::new(), SdkType::Other); + + let (mock_server, loader) = + mock_remote_service_reference::setup_mock_server_with_loader().await; + + mock_rds_service_reference(&mock_server).await; + + // Go SDK extracts operation names in PascalCase (e.g., CreateDBCluster) + let parsed_method = SdkMethodCall { + name: "ModifyDBCluster".to_string(), + possible_services: vec!["rds".to_string()], + metadata: None, + }; + + let result = matcher.enrich_method_call(&parsed_method, &loader).await; + assert!(result.is_ok()); + + let enriched_calls = result.unwrap(); + assert_eq!(enriched_calls.len(), 1); + assert_eq!(enriched_calls[0].method_name, "ModifyDBCluster"); + assert_eq!(enriched_calls[0].service, "rds"); + // Should use the operation name directly without any transformation + assert_eq!(enriched_calls[0].actions[0].name, "rds:ModifyDBCluster"); + } } diff --git a/iam-policy-autopilot-policy-generation/src/enrichment/service_reference.rs b/iam-policy-autopilot-policy-generation/src/enrichment/service_reference.rs index 953f1a5..d35d48e 100644 --- a/iam-policy-autopilot-policy-generation/src/enrichment/service_reference.rs +++ b/iam-policy-autopilot-policy-generation/src/enrichment/service_reference.rs @@ -23,7 +23,6 @@ const IAM_POLICY_AUTOPILOT: &str = "IAMPolicyAutopilot"; // Cache files for 6 hours. // We can allow cache duration override in future. const DEFAULT_CACHE_DURATION_IN_SECONDS: u64 = 21600; -pub(crate) type OperationToAuthorizedActionMap = HashMap; /// Service Reference data structure /// /// Represents the complete service reference loaded from service reference endpoint. @@ -41,6 +40,8 @@ pub(crate) struct ServiceReference { /// Operation to authorized action mapping /// Note: Only partial service and operations have this data pub(crate) operation_to_authorized_actions: Option>, + /// Map from boto method names (snake_case) to operation names + pub(crate) boto3_method_to_operation: HashMap, } impl<'de> Deserialize<'de> for ServiceReference { @@ -90,22 +91,38 @@ impl<'de> Deserialize<'de> for ServiceReference { } } - let operation_to_authorized_actions = if operations.is_empty() { - None - } else { - Some( - operations - .into_iter() - .map(|operation| (operation.name.clone(), operation)) - .collect(), - ) - }; + let operation_to_authorized_actions: Option> = + if operations.is_empty() { + None + } else { + Some( + operations + .into_iter() + .map(|operation| (operation.name.clone(), operation)) + .collect(), + ) + }; + + // Build boto3_method_to_operation map + let mut boto3_method_to_operation = HashMap::new(); + if let Some(ref op_map) = operation_to_authorized_actions { + for (operation_name, operation) in op_map { + for sdk_method in &operation.sdk { + // Only add entries for Boto3 package where service name matches + if sdk_method.package == "Boto3" && sdk_method.name == temp.name { + boto3_method_to_operation + .insert(sdk_method.method.clone(), operation_name.clone()); + } + } + } + } Ok(ServiceReference { actions: temp.actions, service_name: temp.name, resources: temp.resources, operation_to_authorized_actions, + boto3_method_to_operation, }) } } @@ -484,16 +501,6 @@ impl RemoteServiceReferenceLoader { None => Ok(None), } } - - pub(crate) async fn get_operation_to_authorized_actions( - &self, - service_name: &str, - ) -> crate::errors::Result> { - Ok(self - .load(service_name) - .await? - .and_then(|sr| sr.operation_to_authorized_actions)) - } } #[cfg(test)] diff --git a/iam-policy-autopilot-policy-generation/src/extraction/go/disambiguation.rs b/iam-policy-autopilot-policy-generation/src/extraction/go/disambiguation.rs index 18e3521..91a5a38 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/go/disambiguation.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/go/disambiguation.rs @@ -152,6 +152,12 @@ impl<'a> GoMethodDisambiguator<'a> { None => return false, // Input shape not found }; + log::debug!( + "Validating {} against service '{}' operation '{}'", + method_call.name, + service_ref.service_name, + service_ref.operation_name + ); // Validate parameters against the input shape self.validate_parameters_against_shape(&metadata.parameters, input_shape, has_context) } @@ -204,8 +210,8 @@ impl<'a> GoMethodDisambiguator<'a> { .iter() .filter_map(|p| match p { Parameter::Positional { - value, type_annotation, + struct_fields, .. } => { // Skip context parameters @@ -215,69 +221,74 @@ impl<'a> GoMethodDisambiguator<'a> { } } - let value_str = value.as_string(); - // Extract field names from struct literals - if value_str.contains('{') && value_str.contains('}') { - Some(self.extract_struct_field_names(value_str)) - } else { - None - } + // Use AST-extracted fields + struct_fields.as_ref().cloned() } _ => None, }) .flatten() .collect(); + log::debug!("Extracted parameters from code: {:?}", provided_params); + // Get required parameters from the shape - let _required_params: HashSet = shape + let required_params: HashSet = shape .required .as_ref() .map(|req| req.iter().cloned().collect()) .unwrap_or_default(); + log::debug!("Required parameters from AWS model: {:?}", required_params); + + // Get all valid parameters from the shape (lowercase for case-insensitive comparison) + // AWS models are inconsistent - some use PascalCase, some use camelCase + // TODO: Canonicalize casing during deserialization instead of at comparison time + // See: https://github.com/awslabs/iam-policy-autopilot/issues/57 + let valid_params_lower: HashSet = + shape.members.keys().map(|k| k.to_lowercase()).collect(); + log::debug!( + "Valid parameters from AWS model: {:?}", + shape.members.keys() + ); - // Get all valid parameters from the shape - let valid_params: HashSet = shape.members.keys().cloned().collect(); - - // For Go, we're more lenient about missing required parameters since they might be - // provided through struct initialization or have default values - // We mainly validate that provided parameters are valid - - // Check that all provided parameters are valid + // Check that all provided parameters are valid (case-insensitive) for provided_param in &provided_params { - if !valid_params.contains(provided_param) { + let provided_lower = provided_param.to_lowercase(); + if !valid_params_lower.contains(&provided_lower) { + log::debug!( + "Rejecting: parameter '{}' not found in AWS model (case-insensitive)", + provided_param + ); return false; // Invalid parameter provided } } // If we have no parameters extracted (e.g., using variables instead of struct literals), // we accept the method call since we can't validate variable contents - // This is more lenient but prevents false negatives - true - } + // This prevents false negatives when parameters are passed via variables + if provided_params.is_empty() { + log::debug!("Accepting: no parameters extracted (likely using variables)"); + return true; + } - /// Extract field names from a struct literal string - fn extract_struct_field_names(&self, struct_literal: &str) -> Vec { - let mut field_names = Vec::new(); - - // Find the content between braces - if let Some(start) = struct_literal.find('{') { - if let Some(end) = struct_literal.rfind('}') { - let content = &struct_literal[start + 1..end]; - - // Simple parsing - split by commas and extract field names - for field_part in content.split(',') { - let field_part = field_part.trim(); - if let Some(colon_pos) = field_part.find(':') { - let field_name = field_part[..colon_pos].trim().to_string(); - if !field_name.is_empty() { - field_names.push(field_name); - } - } - } + // Validate that all required parameters are present (case-insensitive) + // Convert provided params to lowercase once for efficient lookup + let provided_params_lower: HashSet = + provided_params.iter().map(|p| p.to_lowercase()).collect(); + + for required_param in &required_params { + let required_lower = required_param.to_lowercase(); + if !provided_params_lower.contains(&required_lower) { + log::debug!( + "Rejecting: missing required parameter '{}' (provided: {:?})", + required_param, + provided_params + ); + return false; // Required parameter missing } } - field_names + log::debug!("Accepting: all validations passed"); + true } /// Filter services based on what's actually imported in the Go file @@ -329,6 +340,57 @@ mod tests { let mut services = HashMap::new(); let mut method_lookup = HashMap::new(); + // Create a test service definition for SQS + let mut sqs_operations = HashMap::new(); + let mut sqs_shapes = HashMap::new(); + + sqs_operations.insert( + "CreateQueue".to_string(), + Operation { + name: "CreateQueue".to_string(), + input: Some(ShapeReference { + shape: "CreateQueueRequest".to_string(), + }), + }, + ); + + let mut create_queue_members = HashMap::new(); + create_queue_members.insert( + "QueueName".to_string(), + ShapeReference { + shape: "String".to_string(), + }, + ); + create_queue_members.insert( + "Attributes".to_string(), + ShapeReference { + shape: "QueueAttributeMap".to_string(), + }, + ); + + sqs_shapes.insert( + "CreateQueueRequest".to_string(), + Shape { + type_name: "structure".to_string(), + members: create_queue_members, + required: Some(vec!["QueueName".to_string()]), + }, + ); + + services.insert( + "sqs".to_string(), + SdkServiceDefinition { + version: Some("2.0".to_string()), + metadata: ServiceMetadata { + api_version: "2012-11-05".to_string(), + service_id: "SQS".to_string(), + }, + operations: sqs_operations, + shapes: sqs_shapes, + waiters: HashMap::new(), + }, + ); + // Create a test service definition for S3 let mut s3_operations = HashMap::new(); let mut s3_shapes = HashMap::new(); @@ -504,6 +566,14 @@ mod tests { ], ); + method_lookup.insert( + "CreateQueue".to_string(), + vec![ServiceMethodRef { + service_name: "sqs".to_string(), + operation_name: "CreateQueue".to_string(), + }], + ); + ServiceModelIndex { services, method_lookup, @@ -525,6 +595,7 @@ mod tests { value: ParameterValue::Unresolved("context.TODO()".to_string()), position: 0, type_annotation: Some("context.Context".to_string()), + struct_fields: None, }, Parameter::Positional { value: ParameterValue::Unresolved( @@ -532,7 +603,8 @@ mod tests { .to_string(), ), position: 1, - type_annotation: Some("*s3.ListObjectsV2Input".to_string()), + type_annotation: Some("s3.ListObjectsV2Input".to_string()), + struct_fields: Some(vec!["Bucket".to_string()]), }, ], return_type: None, @@ -551,11 +623,11 @@ mod tests { fn test_context_parameter_detection() { let service_index = create_test_service_index(); let disambiguator = GoMethodDisambiguator::new(&service_index); - let context_param = Parameter::Positional { value: ParameterValue::Unresolved("context.TODO()".to_string()), position: 0, type_annotation: Some("context.Context".to_string()), + struct_fields: None, }; assert!(disambiguator.is_context_parameter(&context_param)); @@ -564,25 +636,12 @@ mod tests { value: ParameterValue::Unresolved("someValue".to_string()), position: 1, type_annotation: None, + struct_fields: None, }; assert!(!disambiguator.is_context_parameter(®ular_param)); } - #[test] - fn test_struct_field_name_extraction() { - let service_index = create_test_service_index(); - let disambiguator = GoMethodDisambiguator::new(&service_index); - - let struct_literal = - "&s3.ListObjectsV2Input{ Bucket: aws.String(\"my-bucket\"), MaxKeys: aws.Int32(10) }"; - let field_names = disambiguator.extract_struct_field_names(struct_literal); - - assert_eq!(field_names.len(), 2); - assert!(field_names.contains(&"Bucket".to_string())); - assert!(field_names.contains(&"MaxKeys".to_string())); - } - #[test] fn test_non_aws_method_call_filtered_out() { let service_index = create_test_service_index(); @@ -596,6 +655,7 @@ mod tests { value: ParameterValue::Unresolved("someParam".to_string()), position: 0, type_annotation: None, + struct_fields: None, }], return_type: None, start_position: (1, 1), @@ -633,6 +693,7 @@ mod tests { value: ParameterValue::Unresolved("context.TODO()".to_string()), position: 0, type_annotation: Some("context.Context".to_string()), + struct_fields: None, }, Parameter::Positional { value: ParameterValue::Unresolved( @@ -640,7 +701,8 @@ mod tests { .to_string(), ), position: 1, - type_annotation: Some("*s3.ListObjectsV2Input".to_string()), + type_annotation: Some("s3.ListObjectsV2Input".to_string()), + struct_fields: Some(vec!["Bucket".to_string()]), }, ], return_type: None, @@ -674,6 +736,7 @@ mod tests { value: ParameterValue::Unresolved("context.TODO()".to_string()), position: 0, type_annotation: Some("context.Context".to_string()), + struct_fields: None, }, Parameter::Positional { value: ParameterValue::Unresolved( @@ -681,7 +744,8 @@ mod tests { .to_string(), ), position: 1, - type_annotation: Some("*s3.ListObjectsV2Input".to_string()), + type_annotation: Some("s3.ListObjectsV2Input".to_string()), + struct_fields: Some(vec!["Bucket".to_string()]), }, ], return_type: None, @@ -721,6 +785,7 @@ mod tests { value: ParameterValue::Unresolved("context.TODO()".to_string()), position: 0, type_annotation: Some("context.Context".to_string()), + struct_fields: None, }, Parameter::Positional { value: ParameterValue::Unresolved( @@ -728,7 +793,8 @@ mod tests { .to_string(), ), position: 1, - type_annotation: Some("*myS3.ListObjectsV2Input".to_string()), + type_annotation: Some("myS3.ListObjectsV2Input".to_string()), + struct_fields: Some(vec!["Bucket".to_string()]), }, ], return_type: None, @@ -742,4 +808,222 @@ mod tests { assert_eq!(result.len(), 1); assert_eq!(result[0].possible_services, vec!["s3"]); } + + #[test] + fn test_missing_required_parameters_filtered_out() { + let service_index = create_test_service_index(); + let disambiguator = GoMethodDisambiguator::new(&service_index); + + // GetObject requires both Bucket and Key, but we only provide Bucket + let method_call = SdkMethodCall { + name: "GetObject".to_string(), + possible_services: Vec::new(), + metadata: Some(SdkMethodCallMetadata { + parameters: vec![ + Parameter::Positional { + value: ParameterValue::Unresolved("context.TODO()".to_string()), + position: 0, + type_annotation: Some("context.Context".to_string()), + struct_fields: None, + }, + Parameter::Positional { + value: ParameterValue::Unresolved( + "&s3.GetObjectInput{ Bucket: aws.String(\"my-bucket\") }".to_string(), + ), + position: 1, + type_annotation: Some("s3.GetObjectInput".to_string()), + struct_fields: Some(vec!["Bucket".to_string()]), + }, + ], + return_type: None, + start_position: (1, 1), + end_position: (1, 50), + receiver: Some("client".to_string()), + }), + }; + + let result = disambiguator.disambiguate_method_calls(vec![method_call], None); + // Should be filtered out because Key is required but missing + assert_eq!(result.len(), 0); + } + + #[test] + fn test_all_required_parameters_present() { + let service_index = create_test_service_index(); + let disambiguator = GoMethodDisambiguator::new(&service_index); + + // GetObject requires both Bucket and Key, and we provide both + let method_call = SdkMethodCall { + name: "GetObject".to_string(), + possible_services: Vec::new(), + metadata: Some(SdkMethodCallMetadata { + parameters: vec![ + Parameter::Positional { + value: ParameterValue::Unresolved("context.TODO()".to_string()), + position: 0, + type_annotation: Some("context.Context".to_string()), + struct_fields: None, + }, + Parameter::Positional { + value: ParameterValue::Unresolved( + "&s3.GetObjectInput{ Bucket: aws.String(\"my-bucket\"), Key: aws.String(\"my-key\") }".to_string(), + ), + position: 1, + type_annotation: Some("s3.GetObjectInput".to_string()), + struct_fields: Some(vec!["Bucket".to_string(), "Key".to_string()]), + }, + ], + return_type: None, + start_position: (1, 1), + end_position: (1, 50), + receiver: Some("client".to_string()), + }), + }; + + let result = disambiguator.disambiguate_method_calls(vec![method_call], None); + // Should pass because all required parameters are present + assert_eq!(result.len(), 1); + assert!(result[0].possible_services.contains(&"s3".to_string())); + } + + #[test] + fn test_variable_based_parameters_accepted() { + let service_index = create_test_service_index(); + let disambiguator = GoMethodDisambiguator::new(&service_index); + + // Using a variable for input - can't extract fields, so should be accepted + let method_call = SdkMethodCall { + name: "GetObject".to_string(), + possible_services: Vec::new(), + metadata: Some(SdkMethodCallMetadata { + parameters: vec![ + Parameter::Positional { + value: ParameterValue::Unresolved("context.TODO()".to_string()), + position: 0, + type_annotation: Some("context.Context".to_string()), + struct_fields: None, + }, + Parameter::Positional { + value: ParameterValue::Unresolved("getObjectInput".to_string()), + position: 1, + type_annotation: Some("s3.GetObjectInput".to_string()), + struct_fields: Some(vec!["Bucket".to_string(), "Key".to_string()]), + }, + ], + return_type: None, + start_position: (1, 1), + end_position: (1, 50), + receiver: Some("client".to_string()), + }), + }; + + let result = disambiguator.disambiguate_method_calls(vec![method_call], None); + // Should be accepted because we can't validate variable contents + assert_eq!(result.len(), 1); + assert!(result[0].possible_services.contains(&"s3".to_string())); + } + + #[test] + fn test_disambiguate_by_required_parameters() { + let service_index = create_test_service_index(); + let disambiguator = GoMethodDisambiguator::new(&service_index); + + // GetObject exists in both s3 and s3control + // s3 requires: Bucket, Key + // s3control requires: AccountId, Bucket, Key + // If we only provide Bucket and Key, it should match s3 but not s3control + let method_call = SdkMethodCall { + name: "GetObject".to_string(), + possible_services: Vec::new(), + metadata: Some(SdkMethodCallMetadata { + parameters: vec![ + Parameter::Positional { + value: ParameterValue::Unresolved("context.TODO()".to_string()), + position: 0, + type_annotation: Some("context.Context".to_string()), + struct_fields: None, + }, + Parameter::Positional { + value: ParameterValue::Unresolved( + "&s3.GetObjectInput{ Bucket: aws.String(\"my-bucket\"), Key: aws.String(\"my-key\") }".to_string(), + ), + position: 1, + type_annotation: Some("s3.GetObjectInput".to_string()), + struct_fields: Some(vec!["Bucket".to_string(), "Key".to_string()]), + }, + ], + return_type: None, + start_position: (1, 1), + end_position: (1, 50), + receiver: Some("client".to_string()), + }), + }; + + let result = disambiguator.disambiguate_method_calls(vec![method_call], None); + // Should only match s3, not s3control (which requires AccountId) + assert_eq!(result.len(), 1); + assert_eq!(result[0].possible_services, vec!["s3"]); + assert!(!result[0] + .possible_services + .contains(&"s3control".to_string())); + } + + #[test] + fn test_sqs_create_queue_with_nested_map_not_filtered() { + let service_index = create_test_service_index(); + let disambiguator = GoMethodDisambiguator::new(&service_index); + + // Test SQS CreateQueue with nested map keys in struct literal: + // result, err := sqsClient.CreateQueue(ctx, &sqs.CreateQueueInput{ + // QueueName: aws.String(queueName), + // Attributes: map[string]string{ + // "VisibilityTimeout": "60", + // "MessageRetentionPeriod": "345600", + // }, + // }) + // + // With AST-based extraction, we extract only QueueName and Attributes (top-level fields). + // We do NOT extract VisibilityTimeout or MessageRetentionPeriod (nested map keys). + // This should pass validation because: + // - QueueName (required) is present + // - Attributes (optional) is present and valid + + let method_call = SdkMethodCall { + name: "CreateQueue".to_string(), + possible_services: Vec::new(), + metadata: Some(SdkMethodCallMetadata { + parameters: vec![ + Parameter::Positional { + value: ParameterValue::Unresolved("context.TODO()".to_string()), + position: 0, + type_annotation: Some("context.Context".to_string()), + struct_fields: None, + }, + Parameter::Positional { + value: ParameterValue::Unresolved( + "&sqs.CreateQueueInput{ QueueName: aws.String(queueName), Attributes: map[string]string{\"VisibilityTimeout\": \"60\", \"MessageRetentionPeriod\": \"345600\"} }".to_string(), + ), + position: 1, + type_annotation: Some("sqs.CreateQueueInput".to_string()), + // AST-extracted fields: only top-level fields, NOT nested map keys + struct_fields: Some(vec!["QueueName".to_string(), "Attributes".to_string()]), + }, + ], + return_type: None, + start_position: (1, 1), + end_position: (1, 50), + receiver: Some("sqsClient".to_string()), + }), + }; + + let result = disambiguator.disambiguate_method_calls(vec![method_call], None); + + // Should NOT be filtered out because: + // - QueueName (required) is present in struct_fields + // - Attributes (optional) is present and valid + // - VisibilityTimeout and MessageRetentionPeriod are NOT in struct_fields (correctly not extracted) + assert_eq!(result.len(), 1, "CreateQueue should not be filtered out"); + assert_eq!(result[0].possible_services, vec!["sqs"]); + assert_eq!(result[0].name, "CreateQueue"); + } } diff --git a/iam-policy-autopilot-policy-generation/src/extraction/go/extractor.rs b/iam-policy-autopilot-policy-generation/src/extraction/go/extractor.rs index afe29d6..2a85e69 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/go/extractor.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/go/extractor.rs @@ -3,6 +3,7 @@ use crate::extraction::extractor::{Extractor, ExtractorResult}; use crate::extraction::go::disambiguation::GoMethodDisambiguator; use crate::extraction::go::features_extractor::GoFeaturesExtractor; +use crate::extraction::go::node_kinds; use crate::extraction::go::paginator_extractor::GoPaginatorExtractor; use crate::extraction::go::types::{GoImportInfo, ImportInfo}; use crate::extraction::go::waiter_extractor::GoWaiterExtractor; @@ -156,9 +157,37 @@ rule: return None; }; - // Extract arguments - get_multiple_matches returns Vec directly - let args_nodes = env.get_multiple_matches("ARGS"); - let arguments = self.extract_arguments(&args_nodes); + // Extract arguments - ARGS captures the entire argument_list node + // We need to get its children to access individual arguments + let arguments = if let Some(args_node) = env.get_match("ARGS") { + log::debug!("Matched argument_list node: {:?}", args_node.text()); + + // Get the children of the argument_list node (excluding parentheses) + let arg_children: Vec<_> = args_node + .children() + .filter(|child| { + // Filter out parentheses and commas, keep only actual argument nodes + let kind = child.kind(); + kind != node_kinds::LEFT_PAREN + && kind != node_kinds::RIGHT_PAREN + && kind != node_kinds::COMMA + }) + .collect(); + + log::debug!("Found {} argument children", arg_children.len()); + for (i, child) in arg_children.iter().enumerate() { + log::debug!( + "Argument [{}]: kind={}, text={:?}", + i, + child.kind(), + child.text() + ); + } + + self.extract_arguments_with_ast(&arg_children) + } else { + vec![] + }; // Get position information let node = node_match.get_node(); @@ -180,14 +209,15 @@ rule: Some(method_call) } - /// Extract arguments from argument nodes - fn extract_arguments( + /// Extract arguments from argument nodes with AST-based field extraction + fn extract_arguments_with_ast( &self, args_nodes: &[ast_grep_core::Node>], ) -> Vec { let mut parameters = Vec::new(); for (position, arg_node) in args_nodes.iter().enumerate() { + log::debug!("Extracting parameter from: {:?}", arg_node.text()); let arg_text = arg_node.text().to_string(); // Check if this is a context parameter (first parameter in Go AWS SDK calls) @@ -196,9 +226,22 @@ rule: } // Check if this is a struct literal (&Type{...}) else if self.is_struct_literal(arg_node) { - if let Some(param) = self.parse_struct_literal(arg_node, position) { - parameters.push(param); - } + // Use AST-based extraction for struct fields + let type_annotation = self.extract_type_from_struct_literal(&arg_node.text()); + let fields = self.extract_struct_fields_from_ast(arg_node); + + // Store as struct literal with proper field extraction + parameters.push(Parameter::Positional { + value: ParameterValue::Unresolved(arg_text), + position, + type_annotation, + struct_fields: Some(fields.clone()), + }); + + log::debug!( + "Extracted {} struct fields from composite literal", + fields.len() + ); } // Otherwise, it's a general expression else { @@ -206,9 +249,139 @@ rule: } } + log::debug!("Extracted {} parameters", parameters.len()); parameters } + /// Extract type name from struct literal text + fn extract_type_from_struct_literal(&self, text: &str) -> Option { + let trimmed = text.trim(); + if trimmed.starts_with('&') { + if let Some(brace_pos) = trimmed.find('{') { + let type_part = trimmed[1..brace_pos].trim(); + return Some(type_part.to_string()); + } + } + None + } + + /// Extract top-level struct fields from AST node + fn extract_struct_fields_from_ast( + &self, + node: &ast_grep_core::Node>, + ) -> Vec { + let field_names = Vec::new(); + + log::debug!( + "Extracting struct fields from AST node: kind={}", + node.kind() + ); + log::debug!( + "Node text (first 100 chars): {:?}", + &node.text().chars().take(100).collect::() + ); + + // Check if this node is directly a composite_literal + if node.kind() == node_kinds::COMPOSITE_LITERAL { + log::debug!("Node is directly a composite_literal"); + return self.extract_fields_from_composite_literal(node); + } + + // Check immediate children for unary_expression (for &Type{...} pattern) + for child in node.children() { + log::debug!("Checking child node: kind={}", child.kind()); + if child.kind() == node_kinds::UNARY_EXPRESSION { + // Check the unary_expression's children for composite_literal + for unary_child in child.children() { + log::debug!("Checking unary child: kind={}", unary_child.kind()); + if unary_child.kind() == node_kinds::COMPOSITE_LITERAL { + log::debug!("Found composite_literal under unary_expression"); + return self.extract_fields_from_composite_literal(&unary_child); + } + } + } else if child.kind() == node_kinds::COMPOSITE_LITERAL { + log::debug!("Found composite_literal as direct child"); + return self.extract_fields_from_composite_literal(&child); + } + } + + log::debug!("No composite_literal found"); + field_names + } + + /// Extract field names from a composite_literal node + /// + /// A `composite_literal` is a tree-sitter AST node type representing Go's composite literal syntax. + /// In Go, composite literals construct values for structs, arrays, slices, and maps. + /// + /// For our purposes, we focus on struct literals with named fields. + /// + /// # References + /// + /// - Go Language Spec: + /// - Tree-sitter Go Grammar: + /// (search for `composite_literal` to see the grammar definition) + /// + /// # Examples of composite_literals + /// + /// ```go + /// // Struct literal (what we extract from) + /// &s3.GetObjectInput{ + /// Bucket: aws.String("my-bucket"), // Field: "Bucket" + /// Key: aws.String("my-key"), // Field: "Key" + /// } + /// + /// // Array literal (not extracted) + /// []string{"a", "b", "c"} + /// + /// // Map literal (not extracted - we only get top-level fields) + /// map[string]string{ + /// "key1": "value1", + /// "key2": "value2", + /// } + /// ``` + /// + /// This function extracts only the top-level field names from struct literals, + /// not nested structures or map keys. + fn extract_fields_from_composite_literal( + &self, + composite_literal: &ast_grep_core::Node>, + ) -> Vec { + let mut field_names = Vec::new(); + + log::debug!("Extracting fields from composite_literal, looking for literal_value..."); + + // Find the literal_value child which contains the fields. + // A composite_literal has exactly one literal_value child in the AST. + let literal_value = composite_literal + .children() + .find(|child| child.kind() == node_kinds::LITERAL_VALUE); + + if let Some(literal_value) = literal_value { + log::debug!("Found literal_value, extracting keyed_elements..."); + // Extract field names from keyed_element nodes + for element in literal_value.children() { + log::debug!("Literal_value child: kind={}", element.kind()); + if element.kind() == node_kinds::KEYED_ELEMENT { + // Get the first child, which is the field name (literal_element) + if let Some(field_name_node) = element.children().next() { + log::debug!( + "Keyed_element first child: kind={}, text={:?}", + field_name_node.kind(), + field_name_node.text() + ); + if field_name_node.kind() == node_kinds::LITERAL_ELEMENT { + field_names.push(field_name_node.text().to_string()); + } + } + } + } + } + + log::debug!("Field names extracted from AST: {:?}", field_names); + field_names + } + /// Check if a node represents a context parameter fn is_context_parameter( &self, @@ -227,48 +400,6 @@ rule: let trimmed = text.trim(); trimmed.starts_with('&') && trimmed.contains('{') && trimmed.ends_with('}') } - - /// Parse a struct literal node - fn parse_struct_literal( - &self, - node: &ast_grep_core::Node>, - position: usize, - ) -> Option { - let text = node.text(); - - // Extract type name from &TypeName{...} - let type_start = if text.starts_with('&') { 1 } else { 0 }; - let brace_pos = text.find('{')?; - let type_name = text[type_start..brace_pos].trim().to_string(); - - // Extract fields from the struct literal - let fields_text = &text[brace_pos + 1..text.len() - 1]; - let fields = self.parse_struct_fields(fields_text); - - Some(Parameter::struct_literal(type_name, fields, position)) - } - - /// Parse struct fields from the content between braces - fn parse_struct_fields(&self, fields_text: &str) -> Vec { - let mut fields = Vec::new(); - - // Simple parsing - split by commas and extract field: value pairs - // This is a basic implementation; a more robust version would use proper AST parsing - for field_part in fields_text.split(',') { - let field_part = field_part.trim(); - if field_part.is_empty() { - continue; - } - - if let Some(colon_pos) = field_part.find(':') { - let name = field_part[..colon_pos].trim().to_string(); - let value = field_part[colon_pos + 1..].trim().to_string(); - fields.push(StructField { name, value }); - } - } - - fields - } } /// Represents a field in a Go struct literal @@ -404,6 +535,7 @@ impl Parameter { value: ParameterValue::Unresolved(expression), position, type_annotation: Some("context.Context".to_string()), + struct_fields: None, } } @@ -423,7 +555,8 @@ impl Parameter { Parameter::Positional { value: ParameterValue::Unresolved(format!("&{}{{ {} }}", type_name, fields_str)), position, - type_annotation: Some(format!("*{}", type_name)), + type_annotation: Some(type_name), + struct_fields: Some(fields.iter().map(|f| f.name.clone()).collect()), } } @@ -433,6 +566,7 @@ impl Parameter { value: ParameterValue::Unresolved(value), position, type_annotation: None, + struct_fields: None, } } } @@ -847,19 +981,6 @@ func main() { ); } - #[test] - fn test_struct_field_parsing() { - let extractor = GoExtractor::new(); - let fields_text = r#"Bucket: aws.String("my-bucket"), MaxKeys: aws.Int32(10)"#; - let fields = extractor.parse_struct_fields(fields_text); - - assert_eq!(fields.len(), 2); - assert_eq!(fields[0].name, "Bucket"); - assert_eq!(fields[0].value, r#"aws.String("my-bucket")"#); - assert_eq!(fields[1].name, "MaxKeys"); - assert_eq!(fields[1].value, "aws.Int32(10)"); - } - #[tokio::test] async fn test_import_extraction_and_filtering() { use crate::extraction::sdk_model::{ @@ -1213,4 +1334,414 @@ func main() { println!("✅ CloudWatch Logs service name mismatch test passed - method calls preserved when Go import name doesn't match AWS service name"); } + + #[tokio::test] + async fn test_multiline_struct_literal_argument_extraction() { + let extractor = GoExtractor::new(); + + // Test code with multi-line struct literal + let test_code = r#" +package main + +import ( + "context" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3" +) + +type BucketBasics struct { + S3Client *s3.Client +} + +func (basics BucketBasics) DownloadFile(ctx context.Context, bucketName string, objectKey string, fileName string) error { + result, err := basics.S3Client.GetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + }) + if err != nil { + return err + } + return nil +} + "#; + + let result = extractor.parse(test_code).await; + let method_calls = result.method_calls_ref(); + + println!( + "Multi-line struct literal test - Found {} method calls:", + method_calls.len() + ); + for call in method_calls { + println!( + " - {} (receiver: {:?})", + call.name, + call.metadata.as_ref().and_then(|m| m.receiver.as_ref()) + ); + if let Some(metadata) = &call.metadata { + println!(" Parameters: {} params", metadata.parameters.len()); + for (i, param) in metadata.parameters.iter().enumerate() { + match param { + Parameter::Positional { + value, + type_annotation, + .. + } => { + println!( + " [{}] value: {:?}, type: {:?}", + i, + value.as_string(), + type_annotation + ); + } + _ => println!(" [{}] {:?}", i, param), + } + } + } + } + + // Find the GetObject call + let get_object_calls: Vec<_> = method_calls + .iter() + .filter(|call| call.name == "GetObject") + .collect(); + + assert_eq!( + get_object_calls.len(), + 1, + "Should find exactly 1 GetObject call" + ); + + let get_object_call = get_object_calls[0]; + assert_eq!( + get_object_call + .metadata + .as_ref() + .unwrap() + .receiver + .as_ref() + .unwrap(), + "basics.S3Client" + ); + + // Verify we extracted the parameters + let params = &get_object_call.metadata.as_ref().unwrap().parameters; + + // Should have at least 2 parameters: ctx and the struct literal + assert!( + params.len() >= 2, + "Should have at least 2 parameters (ctx and struct literal)" + ); + + // First parameter should be context + match ¶ms[0] { + Parameter::Positional { value, .. } => { + let value_str = value.as_string(); + assert!( + value_str == "ctx" || value_str.contains("context"), + "First parameter should be context, got: {}", + value_str + ); + } + _ => panic!("Expected positional parameter for context"), + } + + // Second parameter should be the struct literal with Bucket and Key fields + match ¶ms[1] { + Parameter::Positional { value, .. } => { + let value_str = value.as_string(); + println!("Struct literal parameter: {}", value_str); + + // Verify it's a struct literal + assert!( + value_str.contains("GetObjectInput"), + "Should contain GetObjectInput type" + ); + assert!(value_str.contains("Bucket"), "Should contain Bucket field"); + assert!(value_str.contains("Key"), "Should contain Key field"); + assert!( + value_str.contains("aws.String"), + "Should contain aws.String calls" + ); + } + _ => panic!("Expected positional parameter for struct literal"), + } + + println!("✅ Multi-line struct literal argument extraction test passed!"); + } +} +#[cfg(test)] +mod test_struct_fields { + use crate::extraction::extractor::Extractor; + use crate::extraction::go::extractor::GoExtractor; + use crate::extraction::Parameter; + + /// Test extraction of struct literals with multiple fields + #[tokio::test] + async fn test_extraction_multiple_fields() { + let extractor = GoExtractor::new(); + + let code = r#" +package main + +import ( + "context" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/aws" +) + +func test() { + client := s3.NewFromConfig(cfg) + result, _ := client.GetObject(context.TODO(), &s3.GetObjectInput{ + Bucket: aws.String("my-bucket"), + Key: aws.String("my-key"), + }) +} +"#; + + let result = extractor.parse(code).await; + let method_calls = result.method_calls_ref(); + + let call = method_calls + .iter() + .find(|c| c.name == "GetObject") + .expect("Should find GetObject call"); + + let metadata = call.metadata.as_ref().expect("Should have metadata"); + assert_eq!(metadata.parameters.len(), 2); + + // Check context parameter + if let Parameter::Positional { + struct_fields, + type_annotation, + .. + } = &metadata.parameters[0] + { + assert_eq!(type_annotation.as_deref(), Some("context.Context")); + assert!( + struct_fields.is_none(), + "Context should not have struct_fields" + ); + } else { + panic!("Expected Positional parameter for context"); + } + + // Check struct literal parameter + if let Parameter::Positional { + struct_fields, + type_annotation, + .. + } = &metadata.parameters[1] + { + assert_eq!(type_annotation.as_deref(), Some("s3.GetObjectInput")); + assert!( + struct_fields.is_some(), + "struct_fields should be Some for struct literal" + ); + let fields = struct_fields.as_ref().unwrap(); + assert_eq!(fields.len(), 2); + assert!(fields.contains(&"Bucket".to_string())); + assert!(fields.contains(&"Key".to_string())); + } else { + panic!("Expected Positional parameter for struct literal"); + } + } + + /// Test that variable references don't extract struct fields + #[tokio::test] + async fn test_extraction_variable_parameter() { + let extractor = GoExtractor::new(); + + let code = r#" +package main + +import ( + "context" + "github.com/aws/aws-sdk-go-v2/service/s3" +) + +func test() { + client := s3.NewFromConfig(cfg) + getObjectInput := &s3.GetObjectInput{ + Bucket: aws.String("my-bucket"), + Key: aws.String("my-key"), + } + result, _ := client.GetObject(context.TODO(), getObjectInput) +} +"#; + + let result = extractor.parse(code).await; + let method_calls = result.method_calls_ref(); + + let call = method_calls + .iter() + .find(|c| c.name == "GetObject") + .expect("Should find GetObject call"); + + let metadata = call.metadata.as_ref().expect("Should have metadata"); + + // Check variable parameter - should NOT extract fields + if let Parameter::Positional { + struct_fields, + value, + .. + } = &metadata.parameters[1] + { + assert_eq!(value.as_string(), "getObjectInput"); + assert!( + struct_fields.is_none(), + "struct_fields should be None for variable reference" + ); + } else { + panic!("Expected Positional parameter"); + } + } + + /// Test that nested maps only extract top-level fields (critical for SQS CreateQueue case) + #[tokio::test] + async fn test_extraction_nested_map_only_top_level() { + let extractor = GoExtractor::new(); + + let code = r#" +package main + +import ( + "context" + "github.com/aws/aws-sdk-go-v2/service/sqs" + "github.com/aws/aws-sdk-go-v2/aws" +) + +func test() { + client := sqs.NewFromConfig(cfg) + result, _ := client.CreateQueue(context.TODO(), &sqs.CreateQueueInput{ + QueueName: aws.String("my-queue"), + Attributes: map[string]string{ + "VisibilityTimeout": "60", + "MessageRetentionPeriod": "345600", + }, + }) +} +"#; + + let result = extractor.parse(code).await; + let method_calls = result.method_calls_ref(); + + let call = method_calls + .iter() + .find(|c| c.name == "CreateQueue") + .expect("Should find CreateQueue call"); + + let metadata = call.metadata.as_ref().expect("Should have metadata"); + + // Check struct literal parameter + if let Parameter::Positional { struct_fields, .. } = &metadata.parameters[1] { + assert!(struct_fields.is_some()); + let fields = struct_fields.as_ref().unwrap(); + + // Should only extract top-level fields + assert_eq!(fields.len(), 2); + assert!(fields.contains(&"QueueName".to_string())); + assert!(fields.contains(&"Attributes".to_string())); + + // Should NOT contain nested map keys + assert!(!fields.contains(&"VisibilityTimeout".to_string())); + assert!(!fields.contains(&"MessageRetentionPeriod".to_string())); + } else { + panic!("Expected Positional parameter"); + } + } + + /// Test that JSON strings in payloads are not parsed as struct fields + #[tokio::test] + async fn test_extraction_json_payload_not_parsed() { + let extractor = GoExtractor::new(); + + let code = r#" +package main + +import ( + "context" + "github.com/aws/aws-sdk-go-v2/service/lambda" + "github.com/aws/aws-sdk-go-v2/aws" +) + +func test() { + client := lambda.NewFromConfig(cfg) + result, _ := client.Invoke(context.TODO(), &lambda.InvokeInput{ + FunctionName: aws.String("my-function"), + Payload: []byte(`{"action": "process", "data": {"id": 123}}`), + }) +} +"#; + + let result = extractor.parse(code).await; + let method_calls = result.method_calls_ref(); + + let call = method_calls + .iter() + .find(|c| c.name == "Invoke") + .expect("Should find Invoke call"); + + let metadata = call.metadata.as_ref().expect("Should have metadata"); + + // Check struct literal parameter + if let Parameter::Positional { struct_fields, .. } = &metadata.parameters[1] { + assert!(struct_fields.is_some()); + let fields = struct_fields.as_ref().unwrap(); + + // Should only extract Go struct fields, not JSON keys + assert_eq!(fields.len(), 2); + assert!(fields.contains(&"FunctionName".to_string())); + assert!(fields.contains(&"Payload".to_string())); + + // Should NOT extract JSON keys from the string literal + assert!(!fields.contains(&"action".to_string())); + assert!(!fields.contains(&"data".to_string())); + assert!(!fields.contains(&"id".to_string())); + } else { + panic!("Expected Positional parameter"); + } + } + + /// Test that empty struct literals result in empty field list + #[tokio::test] + async fn test_extraction_empty_struct() { + let extractor = GoExtractor::new(); + + let code = r#" +package main + +import ( + "context" + "github.com/aws/aws-sdk-go-v2/service/sts" +) + +func test() { + client := sts.NewFromConfig(cfg) + result, _ := client.GetCallerIdentity(context.TODO(), &sts.GetCallerIdentityInput{}) +} +"#; + + let result = extractor.parse(code).await; + let method_calls = result.method_calls_ref(); + + let call = method_calls + .iter() + .find(|c| c.name == "GetCallerIdentity") + .expect("Should find GetCallerIdentity call"); + + let metadata = call.metadata.as_ref().expect("Should have metadata"); + + // Check struct literal parameter + if let Parameter::Positional { struct_fields, .. } = &metadata.parameters[1] { + assert!( + struct_fields.is_some(), + "struct_fields should be Some even for empty struct" + ); + let fields = struct_fields.as_ref().unwrap(); + assert_eq!(fields.len(), 0, "Should have 0 fields for empty struct"); + } else { + panic!("Expected Positional parameter"); + } + } } diff --git a/iam-policy-autopilot-policy-generation/src/extraction/go/features_extractor.rs b/iam-policy-autopilot-policy-generation/src/extraction/go/features_extractor.rs index 0309cf0..bd58dab 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/go/features_extractor.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/go/features_extractor.rs @@ -190,6 +190,7 @@ rule: value: crate::extraction::ParameterValue::Unresolved("synthetic".to_string()), position: i, type_annotation: None, + struct_fields: None, }) .collect() } else { diff --git a/iam-policy-autopilot-policy-generation/src/extraction/go/mod.rs b/iam-policy-autopilot-policy-generation/src/extraction/go/mod.rs index f457e35..11d4b2d 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/go/mod.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/go/mod.rs @@ -3,6 +3,7 @@ pub(crate) mod disambiguation; pub(crate) mod extractor; pub(crate) mod features; pub(crate) mod features_extractor; +pub(crate) mod node_kinds; pub(crate) mod paginator_extractor; pub(crate) mod types; pub(crate) mod utils; diff --git a/iam-policy-autopilot-policy-generation/src/extraction/go/node_kinds.rs b/iam-policy-autopilot-policy-generation/src/extraction/go/node_kinds.rs new file mode 100644 index 0000000..010ad9b --- /dev/null +++ b/iam-policy-autopilot-policy-generation/src/extraction/go/node_kinds.rs @@ -0,0 +1,40 @@ +//! Tree-sitter node kind constants for Go AST +//! +//! These constants represent the node kinds returned by Tree-sitter's Go grammar. +//! Using constants instead of string literals provides: +//! - Compile-time checking of constant names +//! - IDE autocomplete support +//! - Centralized documentation of node kinds +//! - Easier refactoring +//! +//! Note: The actual values come from the Tree-sitter Go grammar and cannot be +//! changed. We're just providing named constants to avoid magic strings. + +/// A composite literal node (e.g., `Type{field: value}`) +pub(crate) const COMPOSITE_LITERAL: &str = "composite_literal"; + +/// A unary expression node (e.g., `&value`, `*ptr`) +pub(crate) const UNARY_EXPRESSION: &str = "unary_expression"; + +/// A literal value node containing struct field assignments +pub(crate) const LITERAL_VALUE: &str = "literal_value"; + +/// A keyed element in a composite literal (e.g., `field: value`) +pub(crate) const KEYED_ELEMENT: &str = "keyed_element"; + +/// A literal element representing a field name or simple value +pub(crate) const LITERAL_ELEMENT: &str = "literal_element"; + +/// An argument list node containing function/method arguments +/// Note: Currently only used in YAML pattern strings, not in Rust code comparisons +#[allow(dead_code)] +pub(crate) const ARGUMENT_LIST: &str = "argument_list"; + +/// Left parenthesis token +pub(crate) const LEFT_PAREN: &str = "("; + +/// Right parenthesis token +pub(crate) const RIGHT_PAREN: &str = ")"; + +/// Comma separator token +pub(crate) const COMMA: &str = ","; diff --git a/iam-policy-autopilot-policy-generation/src/extraction/go/waiter_extractor.rs b/iam-policy-autopilot-policy-generation/src/extraction/go/waiter_extractor.rs index e110a9c..ceb7b7c 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/go/waiter_extractor.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/go/waiter_extractor.rs @@ -340,6 +340,7 @@ impl<'a> GoWaiterExtractor<'a> { value: ParameterValue::Unresolved(param_name.clone()), position, type_annotation: None, + struct_fields: None, }); } } diff --git a/iam-policy-autopilot-policy-generation/src/extraction/javascript/shared.rs b/iam-policy-autopilot-policy-generation/src/extraction/javascript/shared.rs index 58f0075..ce8e105 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/javascript/shared.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/javascript/shared.rs @@ -5,9 +5,26 @@ use crate::extraction::javascript::types::JavaScriptScanResults; use crate::extraction::{Parameter, ParameterValue, SdkMethodCall, SdkMethodCallMetadata}; +use rust_embed::RustEmbed; use serde::Deserialize; use std::collections::HashMap; +/// Embedded JavaScript SDK v3 libraries mapping +/// +/// This struct provides access to the JavaScript SDK v3 libraries mapping configuration +/// that defines how lib-* submodule commands map to client-* commands. +#[derive(RustEmbed)] +#[folder = "resources/config/sdks"] +#[include = "js_v3_libraries.json"] +struct JsV3Libraries; + +impl JsV3Libraries { + /// Get the JavaScript SDK v3 libraries mapping configuration + fn get_libraries_mapping() -> Option> { + Self::get("js_v3_libraries.json").map(|file| file.data) + } +} + /// JSON structure for JS v3 libraries mapping /// /// The AWS SDK fro Javascript defines in aws-sdk-js-v3/lib @@ -24,7 +41,7 @@ struct JsV3LibrariesMapping { /// Load JS v3 libraries mapping from embedded data fn load_libraries_mapping() -> Option { - let content_bytes = crate::embedded_data::JsV3Libraries::get_libraries_mapping()?; + let content_bytes = JsV3Libraries::get_libraries_mapping()?; let content = std::str::from_utf8(&content_bytes).ok()?; diff --git a/iam-policy-autopilot-policy-generation/src/extraction/mod.rs b/iam-policy-autopilot-policy-generation/src/extraction/mod.rs index ff0c4a2..91be7da 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/mod.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/mod.rs @@ -189,6 +189,9 @@ pub mod core { /// This enum eliminates the conceptual confusion of the previous struct design /// where unpacked parameters had a "name" field containing expressions. /// Each variant contains only the fields that make sense for that parameter type. + /// + /// TODO: Refactor enum variant fields into separate structs to enable Default trait + /// implementation and improve ergonomics. See: https://github.com/awslabs/iam-policy-autopilot/issues/61 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub(crate) enum Parameter { /// Positional argument (e.g., first, second argument in call) @@ -199,6 +202,9 @@ pub mod core { position: usize, /// Type annotation if available (e.g., "str", "int") type_annotation: Option, + /// For Go struct literals: extracted top-level field names (not nested) + #[serde(skip_serializing_if = "Option::is_none")] + struct_fields: Option>, }, /// Named keyword argument (e.g., Bucket='my-bucket') Keyword { @@ -237,6 +243,7 @@ pub mod output { /// List of all extracted methods pub methods: Vec, /// Metadata about the extraction process + /// INVARIANT: all source_files must have the same language pub metadata: ExtractionMetadata, } diff --git a/iam-policy-autopilot-policy-generation/src/extraction/python/boto3_resources_model.rs b/iam-policy-autopilot-policy-generation/src/extraction/python/boto3_resources_model.rs index 95b57e7..df5789f 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/python/boto3_resources_model.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/python/boto3_resources_model.rs @@ -2,7 +2,7 @@ //! //! Parses boto3 resources JSON specifications and utility mappings for resource-based AWS SDK patterns. -use crate::embedded_data::EmbeddedBoto3Data; +use crate::embedded_data::Boto3Data; use convert_case::{Case, Casing}; use serde::Deserialize; use std::collections::HashMap; @@ -22,7 +22,7 @@ pub enum OperationType { /// Extract service names from embedded boto3 utilities mapping fn extract_services_from_embedded_utilities_mapping() -> Result, String> { - let content_bytes = EmbeddedBoto3Data::get_utilities_mapping() + let content_bytes = Boto3Data::get_utilities_mapping() .ok_or_else(|| "Boto3 utilities mapping not found in embedded data".to_string())?; let content = std::str::from_utf8(&content_bytes) @@ -302,7 +302,7 @@ impl Boto3ResourcesModel { /// Loads resource specifications from embedded boto3 data pub fn load_from_embedded(service_name: &str) -> Result { // Get service versions from embedded data - let service_versions = EmbeddedBoto3Data::build_service_versions_map(); + let service_versions = Boto3Data::build_service_versions_map(); // Find the service and get its latest version let versions = service_versions.get(service_name).ok_or_else(|| { @@ -317,7 +317,7 @@ impl Boto3ResourcesModel { .ok_or_else(|| format!("No versions found for service '{}'", service_name))?; // Get the resources data - let resources_data = EmbeddedBoto3Data::get_resources_raw(service_name, latest_version) + let resources_data = Boto3Data::get_resources_raw(service_name, latest_version) .ok_or_else(|| { format!( "Resources data not found for {}/{}", @@ -347,7 +347,7 @@ impl Boto3ResourcesModel { /// Merge utility methods from embedded mapping into model fn merge_utility_methods_from_embedded(model: &mut Boto3ResourcesModel) -> Result<(), String> { - let content_bytes = EmbeddedBoto3Data::get_utilities_mapping() + let content_bytes = Boto3Data::get_utilities_mapping() .ok_or_else(|| "Boto3 utilities mapping not found in embedded data".to_string())?; let content = std::str::from_utf8(&content_bytes) diff --git a/iam-policy-autopilot-policy-generation/src/extraction/python/common/argument_extractor.rs b/iam-policy-autopilot-policy-generation/src/extraction/python/common/argument_extractor.rs index c5123ee..c5150e7 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/python/common/argument_extractor.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/python/common/argument_extractor.rs @@ -4,6 +4,7 @@ //! from ast-grep nodes, handling keyword arguments, positional arguments, //! and dictionary unpacking consistently across all Python extractors. +use crate::extraction::python::node_kinds; use crate::extraction::{Parameter, ParameterValue}; use ast_grep_language::Python; @@ -20,7 +21,7 @@ impl ArgumentExtractor { for arg_node in args_nodes { // Filter out comment nodes - if arg_node.kind() == "comment" { + if arg_node.kind() == node_kinds::COMMENT { continue; } @@ -52,6 +53,7 @@ impl ArgumentExtractor { value: Self::extract_parameter_value(&arg_text), position: parameter_position, type_annotation: None, + struct_fields: None, }); parameter_position += 1; } @@ -64,14 +66,14 @@ impl ArgumentExtractor { pub fn is_keyword_argument( node: &ast_grep_core::Node>, ) -> bool { - node.kind() == "keyword_argument" + node.kind() == node_kinds::KEYWORD_ARGUMENT } /// Check if a node represents dictionary unpacking (**kwargs) pub fn is_dictionary_splat( node: &ast_grep_core::Node>, ) -> bool { - node.kind() == "dictionary_splat" + node.kind() == node_kinds::DICTIONARY_SPLAT } /// Parse a keyword argument node diff --git a/iam-policy-autopilot-policy-generation/src/extraction/python/disambiguation.rs b/iam-policy-autopilot-policy-generation/src/extraction/python/disambiguation.rs index c9f9bea..6214fdf 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/python/disambiguation.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/python/disambiguation.rs @@ -166,6 +166,8 @@ impl<'a> MethodDisambiguator<'a> { .unwrap_or_default(); // Get all valid parameters from the shape + // TODO: Make this case-insensitive like Go disambiguation to handle inconsistent + // AWS model casing. See: https://github.com/awslabs/iam-policy-autopilot/issues/57 let valid_params: HashSet = shape.members.keys().cloned().collect(); // Check that all required parameters are provided diff --git a/iam-policy-autopilot-policy-generation/src/extraction/python/disambiguation_tests.rs b/iam-policy-autopilot-policy-generation/src/extraction/python/disambiguation_tests.rs index 9b9fcd1..1858dbd 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/python/disambiguation_tests.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/python/disambiguation_tests.rs @@ -538,6 +538,7 @@ def example(): value: ParameterValue::Resolved("positional_value".to_string()), position: 3, type_annotation: None, + struct_fields: None, }, ], return_type: None, @@ -588,6 +589,7 @@ def example(): value: ParameterValue::Resolved("positional_value".to_string()), position: 2, type_annotation: None, + struct_fields: None, }, ], return_type: None, diff --git a/iam-policy-autopilot-policy-generation/src/extraction/python/mod.rs b/iam-policy-autopilot-policy-generation/src/extraction/python/mod.rs index 0de60bd..686a8ec 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/python/mod.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/python/mod.rs @@ -4,6 +4,7 @@ pub(crate) mod extractor; pub(crate) mod boto3_resources_model; pub(crate) mod common; pub(crate) mod disambiguation; +pub(crate) mod node_kinds; pub(crate) mod paginator_extractor; pub(crate) mod resource_direct_calls_extractor; pub(crate) mod waiters_extractor; diff --git a/iam-policy-autopilot-policy-generation/src/extraction/python/node_kinds.rs b/iam-policy-autopilot-policy-generation/src/extraction/python/node_kinds.rs new file mode 100644 index 0000000..75d9890 --- /dev/null +++ b/iam-policy-autopilot-policy-generation/src/extraction/python/node_kinds.rs @@ -0,0 +1,20 @@ +//! Tree-sitter node kind constants for Python AST +//! +//! These constants represent the node kinds returned by Tree-sitter's Python grammar. +//! Using constants instead of string literals provides: +//! - Compile-time checking of constant names +//! - IDE autocomplete support +//! - Centralized documentation of node kinds +//! - Easier refactoring +//! +//! Note: The actual values come from the Tree-sitter Python grammar and cannot be +//! changed. We're just providing named constants to avoid magic strings. + +/// A comment node +pub(crate) const COMMENT: &str = "comment"; + +/// A keyword argument in a function call (e.g., `key=value`) +pub(crate) const KEYWORD_ARGUMENT: &str = "keyword_argument"; + +/// A dictionary splat/unpacking operator (e.g., `**kwargs`) +pub(crate) const DICTIONARY_SPLAT: &str = "dictionary_splat"; diff --git a/iam-policy-autopilot-policy-generation/src/extraction/python/resource_direct_calls_extractor.rs b/iam-policy-autopilot-policy-generation/src/extraction/python/resource_direct_calls_extractor.rs index 289f8d9..7d32128 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/python/resource_direct_calls_extractor.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/python/resource_direct_calls_extractor.rs @@ -351,6 +351,7 @@ impl<'a> ResourceDirectCallsExtractor<'a> { value: value.clone(), position: parameters.len(), type_annotation: type_annotation.clone(), + struct_fields: None, } } } else { @@ -784,6 +785,7 @@ impl<'a> ResourceDirectCallsExtractor<'a> { value: value.clone(), position: combined_parameters.len() + i, type_annotation: type_annotation.clone(), + struct_fields: None, }, Parameter::DictionarySplat { expression, .. } => Parameter::DictionarySplat { expression: expression.clone(), diff --git a/iam-policy-autopilot-policy-generation/src/extraction/sdk_model.rs b/iam-policy-autopilot-policy-generation/src/extraction/sdk_model.rs index 3f3c184..5089527 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/sdk_model.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/sdk_model.rs @@ -16,7 +16,7 @@ use tokio::task::JoinSet; use convert_case::{Case, Casing}; use serde::{Deserialize, Serialize}; -use crate::embedded_data::EmbeddedServiceData; +use crate::embedded_data::BotocoreData; use crate::errors::{ExtractorError, Result}; use crate::Language; @@ -104,6 +104,8 @@ pub(crate) struct Shape { #[serde(rename = "type")] pub(crate) type_name: String, /// Map of member name to shape reference for structure types + /// TODO: Canonicalize member keys to lowercase during deserialization to handle + /// inconsistent casing in AWS models. See: https://github.com/awslabs/iam-policy-autopilot/issues/57 #[serde(default)] pub(crate) members: HashMap, /// Required parameters @@ -173,7 +175,7 @@ impl ServiceDiscovery { log::debug!("Starting optimized service discovery..."); // Use the optimized single-iteration approach - let service_versions_map = EmbeddedServiceData::build_service_versions_map(); + let service_versions_map = BotocoreData::build_service_versions_map(); let mut services = Vec::new(); for (service_name, api_versions) in service_versions_map { @@ -326,26 +328,14 @@ impl ServiceDiscovery { })?; // Load service definition from embedded data - let service_definition = EmbeddedServiceData::get_service_definition( + let service_definition = BotocoreData::get_service_definition( &service_info.name, &service_info.api_version, - ) - .await - .map_err(|e| { - ExtractorError::sdk_processing_with_source( - &service_info.name, - "Failed to load embedded service definition", - e, - ) - })?; + )?; // Load waiters from embedded data let waiters = - crate::extraction::waiter_model::WaitersRegistry::load_waiters_from_embedded( - &service_info.name, - &service_info.api_version, - ) - .await; + BotocoreData::get_waiters(&service_info.name, &service_info.api_version); let service_time = service_start.elapsed(); if service_time.as_millis() > 100 { diff --git a/iam-policy-autopilot-policy-generation/src/extraction/waiter_model.rs b/iam-policy-autopilot-policy-generation/src/extraction/waiter_model.rs index d78d6b4..7726051 100644 --- a/iam-policy-autopilot-policy-generation/src/extraction/waiter_model.rs +++ b/iam-policy-autopilot-policy-generation/src/extraction/waiter_model.rs @@ -4,8 +4,6 @@ //! mappings from waiter names to their underlying SDK operations. //! Waiters are available across all AWS SDKs (Python boto3, JavaScript/TypeScript, Go, etc.) -use crate::embedded_data::EmbeddedServiceData; -use crate::providers::JsonProvider; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -26,31 +24,3 @@ pub(crate) struct WaitersDescription { pub(crate) version: u32, pub(crate) waiters: HashMap, } - -/// Registry of all waiters across all AWS services -#[derive(Debug, Clone)] -pub struct WaitersRegistry; - -impl WaitersRegistry { - /// Load waiters from embedded data for a specific service - /// - /// # Arguments - /// * `service_name` - Service name (e.g., "ec2", "s3") - /// * `api_version` - API version (e.g., "2016-11-15", "2006-03-01") - /// - /// # Returns - /// HashMap of waiter names to waiter entries, or None if no waiters found - pub async fn load_waiters_from_embedded( - service_name: &str, - api_version: &str, - ) -> Option> { - let waiters_data = EmbeddedServiceData::get_waiters_raw(service_name, api_version)?; - - let waiters_str = std::str::from_utf8(&waiters_data).ok()?; - - match JsonProvider::parse::(waiters_str).await { - Ok(waiters_desc) => Some(waiters_desc.waiters), - Err(_) => None, // Silently skip on parse error - } - } -} diff --git a/iam-policy-autopilot-policy-generation/src/lib.rs b/iam-policy-autopilot-policy-generation/src/lib.rs index 2e848ed..4a68c3f 100644 --- a/iam-policy-autopilot-policy-generation/src/lib.rs +++ b/iam-policy-autopilot-policy-generation/src/lib.rs @@ -59,6 +59,23 @@ pub enum Language { TypeScript, } +impl Language { + fn sdk_type(&self) -> SdkType { + match self { + Self::Python => SdkType::Boto3, + _ => SdkType::Other, + } + } +} + +/// SdkType used, for Boto3 we look up the method name in the SDF +#[derive(Debug, Copy, Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] +#[allow(missing_docs)] +pub enum SdkType { + Boto3, + Other, +} + impl Language { /// Attempts to parse a language from a string representation. /// diff --git a/iam-policy-autopilot-policy-generation/tests/go_extraction_integration_test.rs b/iam-policy-autopilot-policy-generation/tests/go_extraction_integration_test.rs index 50fb787..bdd2ab4 100644 --- a/iam-policy-autopilot-policy-generation/tests/go_extraction_integration_test.rs +++ b/iam-policy-autopilot-policy-generation/tests/go_extraction_integration_test.rs @@ -5,7 +5,7 @@ //! enrichment and policy generation through the public API. use iam_policy_autopilot_policy_generation::{ - EnrichmentEngine, ExtractionEngine, Language, PolicyGenerationEngine, SourceFile, + EnrichmentEngine, ExtractionEngine, Language, PolicyGenerationEngine, SdkType, SourceFile, }; use std::path::PathBuf; @@ -91,7 +91,7 @@ async fn test_go_extraction_to_policy_generation_integration() { let mut enrichment_engine = EnrichmentEngine::new(false).unwrap(); match enrichment_engine - .enrich_methods(&extracted_methods.methods) + .enrich_methods(&extracted_methods.methods, SdkType::Other) .await { Ok(enriched_calls) => { diff --git a/iam-policy-autopilot-policy-generation/tests/go_sdk_features_test.rs b/iam-policy-autopilot-policy-generation/tests/go_sdk_features_test.rs index 5b62bf9..87f5fad 100644 --- a/iam-policy-autopilot-policy-generation/tests/go_sdk_features_test.rs +++ b/iam-policy-autopilot-policy-generation/tests/go_sdk_features_test.rs @@ -8,7 +8,7 @@ //! Based on go-analysis.json which documents operations requiring IAM permissions. use iam_policy_autopilot_policy_generation::{ - EnrichmentEngine, ExtractionEngine, Language, PolicyGenerationEngine, SourceFile, + EnrichmentEngine, ExtractionEngine, Language, PolicyGenerationEngine, SdkType, SourceFile, }; use std::path::PathBuf; @@ -74,7 +74,7 @@ func main() { let mut enrichment_engine = EnrichmentEngine::new(false).unwrap(); let enriched = enrichment_engine - .enrich_methods(&extracted.methods) + .enrich_methods(&extracted.methods, SdkType::Other) .await .expect("Enrichment should succeed"); @@ -154,7 +154,7 @@ func main() { let mut enrichment_engine = EnrichmentEngine::new(false).unwrap(); let enriched = enrichment_engine - .enrich_methods(&extracted.methods) + .enrich_methods(&extracted.methods, SdkType::Other) .await .expect("Enrichment should succeed"); diff --git a/iam-policy-autopilot-policy-generation/tests/public_api_integration_test.rs b/iam-policy-autopilot-policy-generation/tests/public_api_integration_test.rs index a32ead8..33195f6 100644 --- a/iam-policy-autopilot-policy-generation/tests/public_api_integration_test.rs +++ b/iam-policy-autopilot-policy-generation/tests/public_api_integration_test.rs @@ -6,7 +6,7 @@ use iam_policy_autopilot_policy_generation::{ EnrichmentEngine, ExtractionEngine, FileSystemProvider, JsonProvider, Language, - PolicyGenerationEngine, SourceFile, + PolicyGenerationEngine, SdkType, SourceFile, }; use std::io::Write; use std::path::PathBuf; @@ -91,7 +91,7 @@ def download_object(): let mut enrichment_engine = EnrichmentEngine::new(false).unwrap(); let enriched_methods = enrichment_engine - .enrich_methods(&extracted_methods.methods) + .enrich_methods(&extracted_methods.methods, SdkType::Boto3) .await .expect("Enrichment should succeed"); @@ -283,7 +283,7 @@ def multi_service_operations(): let mut enrichment_engine = EnrichmentEngine::new(false).unwrap(); let enriched = enrichment_engine - .enrich_methods(&extracted.methods) + .enrich_methods(&extracted.methods, SdkType::Boto3) .await .expect("Should enrich methods"); @@ -435,7 +435,7 @@ def start_policy_generation(): // Enrich the methods let mut enrichment_engine = EnrichmentEngine::new(false).unwrap(); let enriched = enrichment_engine - .enrich_methods(&extracted.methods) + .enrich_methods(&extracted.methods, SdkType::Boto3) .await .expect("Enrichment should succeed"); diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..4d0014f --- /dev/null +++ b/install.sh @@ -0,0 +1,349 @@ +#!/bin/sh +# IAM Policy Autopilot Installation Script +# +# This script automatically downloads and installs the IAM Policy Autopilot CLI tool +# for Unix-like systems (macOS and Linux). +# +# Supported platforms: +# - macOS (Darwin): x86_64, ARM64 +# - Linux: x86_64, ARM64 +# +# Usage: +# # Fresh installation (one-liner): +# curl -sSL https://github.com/awslabs/iam-policy-autopilot/raw/refs/heads/main/install.sh | sh +# +# # Update existing installation (same command): +# curl -sSL https://github.com/awslabs/iam-policy-autopilot/raw/refs/heads/main/install.sh | sh +# +# # Manual installation with sudo: +# curl -sSL https://github.com/awslabs/iam-policy-autopilot/raw/refs/heads/main/install.sh | sudo sh +# +# # Download and run locally: +# curl -sSL https://github.com/awslabs/iam-policy-autopilot/raw/refs/heads/main/install.sh -o install.sh +# chmod +x install.sh +# ./install.sh +# +# The script will automatically detect if you have an existing installation and update it +# if a newer version is available. If you're already running the latest version, it will +# notify you and exit without making changes. + +# Repository configuration +GITHUB_ORG="awslabs" +GITHUB_REPO="iam-policy-autopilot" + +set -e + +# Global variables for cleanup +TEMP_FILE="" +TEMP_DIR="" + +# Cleanup function +cleanup() { + if [ -n "$TEMP_FILE" ] && [ -f "$TEMP_FILE" ]; then + rm -f "$TEMP_FILE" + fi + if [ -n "$TEMP_DIR" ] && [ -d "$TEMP_DIR" ]; then + rm -rf "$TEMP_DIR" + fi +} + +# Set up cleanup trap for temporary files +trap cleanup EXIT INT TERM + +# Error handling function +error() { + echo "Error: $1" >&2 + cleanup + exit 1 +} + +# Platform detection function +detect_platform() { + echo "Detecting platform..." + + # Detect OS + OS=$(uname -s | tr '[:upper:]' '[:lower:]') + + case "$OS" in + darwin) + OS="apple-darwin" + ;; + linux) + OS="unknown-linux-gnu" + ;; + *) + error "Unsupported operating system: $OS. Supported: macOS (Darwin), Linux" + ;; + esac + + # Detect architecture + ARCH=$(uname -m) + + case "$ARCH" in + x86_64) + # Keep as is + ;; + aarch64|arm64) + ARCH="aarch64" + ;; + i686|i386) + error "32-bit x86 architecture is not supported. Supported: x86_64, ARM64" + ;; + *) + error "Unsupported architecture: $ARCH. Supported: x86_64, ARM64" + ;; + esac + + # Construct platform target triple + PLATFORM="${ARCH}-${OS}" + + echo "Detected platform: $PLATFORM" +} + +# GitHub API integration function +get_latest_release() { + echo "Fetching latest release information from GitHub..." + + # Query GitHub API for latest release + RELEASE_JSON=$(curl -sL \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "https://api.github.com/repos/$GITHUB_ORG/$GITHUB_REPO/releases/latest" 2>&1) + + # Check if curl command succeeded + if [ $? -ne 0 ]; then + error "Failed to fetch release information from GitHub. Please check your internet connection and try again." + fi + + # Check if response contains error + if echo "$RELEASE_JSON" | grep -q '"message".*"Not Found"'; then + error "GitHub repository or release not found. Please verify the repository exists." + fi + + if echo "$RELEASE_JSON" | grep -q '"message".*"API rate limit exceeded"'; then + error "GitHub API rate limit exceeded. Please try again later." + fi + + # Extract version tag + VERSION=$(echo "$RELEASE_JSON" | grep '"tag_name"' | sed -E 's/.*"tag_name"[[:space:]]*:[[:space:]]*"([^"]+)".*/\1/' | head -n 1) + + if [ -z "$VERSION" ]; then + error "Failed to parse version from GitHub API response. The response may be malformed." + fi + + echo "Latest version: $VERSION" + + # Find matching binary asset based on platform + # Look for assets that contain the platform string in their name + ASSET_URL=$(echo "$RELEASE_JSON" | grep '"browser_download_url"' | grep "$PLATFORM" | sed -E 's/.*"browser_download_url"[[:space:]]*:[[:space:]]*"([^"]+)".*/\1/' | head -n 1) + + if [ -z "$ASSET_URL" ]; then + error "No binary found for platform: $PLATFORM. This platform may not be supported yet. Please report this at https://github.com/$GITHUB_ORG/$GITHUB_REPO/issues" + fi + + # Extract asset filename from URL + ASSET_NAME=$(basename "$ASSET_URL") + + echo "Found binary: $ASSET_NAME" + echo "Download URL: $ASSET_URL" +} + +# Version comparison function +check_existing_version() { + INSTALL_DIR="/usr/local/bin" + BINARY_PATH="$INSTALL_DIR/iam-policy-autopilot" + + echo "Checking for existing installation..." + + # Check if binary exists in installation directory + if [ -f "$BINARY_PATH" ]; then + echo "Found existing installation at $BINARY_PATH" + + # Try to get installed version by executing --version command + INSTALLED_VERSION=$("$BINARY_PATH" --version 2>/dev/null | awk '{print $2}') + + # If version detection failed, proceed with installation + if [ -z "$INSTALLED_VERSION" ]; then + echo "Could not determine installed version. Proceeding with installation." + return 0 + fi + + echo "Installed version: $INSTALLED_VERSION" + + # Remove 'v' prefix from VERSION if present for comparison + LATEST_VERSION_CLEAN="${VERSION#v}" + + # Compare installed version with latest release version + if [ "$INSTALLED_VERSION" = "$LATEST_VERSION_CLEAN" ]; then + echo "" + echo "iam-policy-autopilot $INSTALLED_VERSION is already installed and up-to-date" + exit 0 + fi + + echo "Updating iam-policy-autopilot from $INSTALLED_VERSION to $LATEST_VERSION_CLEAN" + else + echo "No existing installation found" + echo "Installing iam-policy-autopilot ${VERSION#v}" + fi +} + +# Download binary function +download_binary() { + echo "" + echo "Downloading binary..." + + # Create temporary file for download + TEMP_FILE=$(mktemp) + + echo "Downloading from $ASSET_URL" + + # Try curl first, fallback to wget + if command -v curl >/dev/null 2>&1; then + # Use curl with progress bar + if ! curl -L --progress-bar -o "$TEMP_FILE" "$ASSET_URL"; then + error "Failed to download binary using curl. Please check your internet connection and try again." + fi + elif command -v wget >/dev/null 2>&1; then + # Use wget with progress display + if ! wget -q --show-progress -O "$TEMP_FILE" "$ASSET_URL"; then + error "Failed to download binary using wget. Please check your internet connection and try again." + fi + else + error "Neither curl nor wget found. Please install curl or wget and try again." + fi + + # Verify download succeeded by checking file size + if [ ! -s "$TEMP_FILE" ]; then + error "Downloaded file appears to be empty or corrupted. Please try again." + fi + + echo "Download complete" + + # Handle archived formats + # Check if the downloaded file is a tar.gz archive + if echo "$ASSET_NAME" | grep -q "\.tar\.gz$"; then + echo "Extracting tar.gz archive..." + + # Create temporary directory for extraction + TEMP_DIR=$(mktemp -d) + + # Extract archive + if ! tar -xzf "$TEMP_FILE" -C "$TEMP_DIR"; then + error "Failed to extract tar.gz archive. The file may be corrupted." + fi + + # Find the binary in the extracted files + # Look for file named iam-policy-autopilot (without extension) + EXTRACTED_BINARY=$(find "$TEMP_DIR" -type f -name "iam-policy-autopilot" | head -n 1) + + if [ -z "$EXTRACTED_BINARY" ]; then + error "Could not find iam-policy-autopilot binary in extracted archive." + fi + + # Move extracted binary to temp file location + mv "$EXTRACTED_BINARY" "$TEMP_FILE" + + # Clean up extraction directory + rm -rf "$TEMP_DIR" + + echo "Extraction complete" + elif echo "$ASSET_NAME" | grep -q "\.zip$"; then + echo "Extracting zip archive..." + + # Create temporary directory for extraction + TEMP_DIR=$(mktemp -d) + + # Check if unzip is available + if ! command -v unzip >/dev/null 2>&1; then + error "unzip command not found. Please install unzip to extract .zip archives." + fi + + # Extract archive + if ! unzip -q "$TEMP_FILE" -d "$TEMP_DIR"; then + error "Failed to extract zip archive. The file may be corrupted." + fi + + # Find the binary in the extracted files + EXTRACTED_BINARY=$(find "$TEMP_DIR" -type f -name "iam-policy-autopilot" | head -n 1) + + if [ -z "$EXTRACTED_BINARY" ]; then + error "Could not find iam-policy-autopilot binary in extracted archive." + fi + + # Move extracted binary to temp file location + mv "$EXTRACTED_BINARY" "$TEMP_FILE" + + # Clean up extraction directory + rm -rf "$TEMP_DIR" + + echo "Extraction complete" + fi + + # Verify the final binary file exists and is not empty + if [ ! -s "$TEMP_FILE" ]; then + error "Binary file is missing or empty after processing. Please try again." + fi + + echo "Binary ready for installation" +} + +# Installation function +install_binary() { + INSTALL_DIR="/usr/local/bin" + BINARY_NAME="iam-policy-autopilot" + INSTALL_PATH="$INSTALL_DIR/$BINARY_NAME" + + echo "" + echo "Installing binary..." + + # Check write permissions for installation directory + if [ ! -w "$INSTALL_DIR" ]; then + error "No write permission to $INSTALL_DIR. Try running with sudo: curl -sSL https://github.com/$GITHUB_ORG/$GITHUB_REPO/raw/refs/heads/main/install.sh | sudo sh" + fi + + # Preserve permissions when updating existing installation + # If binary already exists, save its permissions + EXISTING_PERMS="" + if [ -f "$INSTALL_PATH" ]; then + # Get current permissions in octal format (e.g., 755) + EXISTING_PERMS=$(stat -f "%Lp" "$INSTALL_PATH" 2>/dev/null || stat -c "%a" "$INSTALL_PATH" 2>/dev/null) + echo "Preserving existing permissions: $EXISTING_PERMS" + fi + + # Move downloaded binary to installation directory + if ! mv "$TEMP_FILE" "$INSTALL_PATH"; then + error "Failed to move binary to $INSTALL_PATH. Please check permissions and try again." + fi + + # Set executable permissions + # If we preserved permissions from existing installation, restore them + # Otherwise, set default executable permissions (755) + if [ -n "$EXISTING_PERMS" ]; then + chmod "$EXISTING_PERMS" "$INSTALL_PATH" + else + chmod +x "$INSTALL_PATH" + fi + + # Get the installed version for success message + FINAL_VERSION=$("$INSTALL_PATH" --version 2>/dev/null | awk '{print $2}' || echo "${VERSION#v}") + + echo "" + echo "==========================================" + echo "Installation complete!" + echo "==========================================" + echo "Version: $FINAL_VERSION" + echo "Location: $INSTALL_PATH" + echo "" + echo "Run 'iam-policy-autopilot --help' to get started" +} + +# Main execution +main() { + detect_platform + get_latest_release + check_existing_version + download_binary + install_binary +} + +main diff --git a/power-iam-policy-autopilot/POWER.md b/power-iam-policy-autopilot/POWER.md new file mode 100644 index 0000000..dad966d --- /dev/null +++ b/power-iam-policy-autopilot/POWER.md @@ -0,0 +1,48 @@ +--- +name: "iam-policy-autopilot-power" +displayName: "IAM Policy Autopilot" +description: "AWS IAM Policy Autopilot analyzes your application code locally to generate identity-based policies for application roles, enabling faster IAM policy creation and reducing access troubleshooting time" +keywords: ["IAM", "AWS", "policy", "policies", "identity", "identity-based", "permission", "access", "denial", "deny", "denied", "lambda"] +mcpServers: "iam-policy-autopilot-mcp" +--- + +# Onboarding + +Before using IAM Policy Autopilot, make sure that the following are done and configured: +## Onboarding Part 1: validate that the binary command `iam-policy-autopilot` works, AND create necessary command aliases +This Kiro Power's requires the use of the `iam-policy-autopilot@latest` package from `uv`/`uvx`; this is because this Kiro power's MCP server's configuration requires running `uvx iam-policy-autopilot@latest mcp-server`. Just for context: although users may have installed the `iam-policy-autopilot` binary in some other way, we specifically do not want to use those other binaries because we want `uv`/`uvx` to fetch the latest version of its managed `iam-policy-autopilot` binary. As such, do this pre-requisite step to setup and make sure `uvx iam-policy-autopilot@latest` works, *and also* (because `uv/uvx` shell environemnt variables need to get picked up by the Kiro IDE) that the `iam-policy-autopilot-mcp` MCP server associated with this kiro power is successfully enabled: + +1. try running `uvx iam-policy-autopilot@latest --version`; it should return `iam-policy-autopilot: version-id`, where "version-id" is something like "0.1.0". For context, this command fetches the latest version of the `iam-policy-autopilot` binary from the `uv` server, and runs it. This command should return something like `iam-policy-autopilot: version-id`, where "version-id" is something like "0.1.0". + - [remediation step 1] if this `uvx iam-policy-autopilot@latest --version` command fails specifically because `uvx` does not exist, then we can try to remediate this by installing that `uvx` command ourselves. To do this, run `curl -LsSf https://astral.sh/uv/install.sh | sh`; if the user does not have `curl`, run instead the command `wget -qO- https://astral.sh/uv/install.sh | sh`. Then, add `uvx` to path by runnning either `source $HOME/.local/bin/env` for sh/bash/zsh shells, or `source $HOME/.local/bin/env.fish` for fish shells. Finally, check that the installation has succeeded by running `uvx iam-policy-autopilot@latest --version` and seeing if it returns something like `iam-policy-autopilot: version-id`, where "version-id" is something like "0.1.0". If this check succeeds, proceed to "step 2". If this check has failed, FAIL THE ENTIRE INSTALLATION PROCESS and tell the user that `uvx iam-policy-autopilot@latest` has failed, and they must get this command working themselves, e.g. by installing `uvx` via the instructions in https://docs.astral.sh/uv/getting-started/installation/. +2. Make sure that the associated MCP server `iam-policy-autopilot-mcp` is up and running. Check that MCP calls to that server work. If they do not work, even though `uvx iam-policy-autopilot@latest --version` works, this may be because the Kiro IDE needs to pick up the latest shell environment changes. If they do not work, try to remediate the environment variables for the Kiro IDE. Then, wait a few seconds and try to reload the MCP server itself, **not the power**, and see if it works. If this is still unsuccessful, warn the user that they may manually go to the MCP server tab and click "Retry" on the `iam-policy-autopilot` MCP server there, to try to get it to reconnect. + + +## Onboarding Part 2 (optional): validate that the `aws` bin command exists, and that AWS credentials are configured. +This onboarding part consists of two steps. These two steps are optional if the user just wants to generate IAM policies for their applications or access denial errors (e.g. with the `generate_application_policies` and `generate_policy_for_access_denied` MCP tools in this kiro power), as policy generation does not require the `aws` cli or aws credentials. However, if the user wants to deploy their policy fixes to their AWS account (e.g. with the `fix_access_denied` MCP tool in this Kiro power), this requires both the following two steps to be performed, as both the `aws` cli and the user's active AWS credentials are required. + +1. First, check that calling `aws --version` in the CLI does not return a "command not found" exception; it should instead return something like this: `aws-cli/2.27.18 Python/3.13.3 Darwin/25.1.0 exe/x86_64`. + - if this does not work, FINISH the kiro power onboarding process, but WARN the user that they will not be able to perform the above actions that require the `aws` command and configured credentials. Tell the user to follow the setup guide for the AWS CLI in https://docs.aws.amazon.com/cli/v1/userguide/cli-chap-install.html, and then configure credentials by telling them to look at this link: https://docs.aws.amazon.com/cli/v1/userguide/cli-configure-files.html. +2. Second, call `aws configure list` in the CLI returns a table like the following below, AND THAT the `access_key` and `secret_key` entries in the table have values that are set. + - if this does not work, PROCEED with the kiro power onboarding process, but WARN the user that they need to configure aws credentials, by telling them to look at this link: https://docs.aws.amazon.com/cli/v1/userguide/cli-configure-files.html. +``` + Name Value Type Location + ---- ----- ---- -------- + profile None None +access_key ****************NIUM shared-credentials-file +secret_key ****************TYnY shared-credentials-file + region us-west-2 config-file ~/.aws/config +``` + +## If both onboarding parts 1 and 2 have succeeded, then onboarding is complete. If onboarding part 1 has succeeded but `aws configure list` in onboarding part 2 returned empty values for either the access key or secret key, then onboarding is complete, but warn the user that they must configure their aws credentials. + + +--- +# Best Practices and Ideal Use Cases + +To understand the best practices and use cases of this MCP server's tools, please read through ALL the instructions and use cases in the descriptions of the `generate_application_policies`, `generate_policy_for_access_denied`, and `fix_access_denied` tools in this IAM Policy Autopilot MCP server (`iam-policy-autopilot-mcp`). + +Specifically, there are certain cases when this MCP server excels: +- **generating IAM policies for a code file used in an AWS deployment (e.g. AWS Lambda function)**: the `generate_application_policies` tool in the `iam-policy-autopilot-mcp` MCP server does exactly this. Take a look through ALL the instructions and use cases for this tool, to better undstand how it is useful. +- **troubleshooting/resolving AWS IAM access denied errors**: the `generate_policy_for_access_denied` and `fix_access_denied` tools in the `iam-policy-autopilot-mcp` MCP servers can be used in combination to fix IAM access denied errors. Take a look through ALL the instructions and use cases for each of those tools, to better undstand how they are useful. For instance: if the user gives you an AWS access denied error they saw and asks you to diagnose/resolve it, OR if the user asks you to test an AWS deployment and you see an access denied error when testing, then you can do the following: + 1. invoke the `generate_policy_for_access_denied` tool, passing in the access denied error you saw. Follow ALL the instructions in that tool. This tool should retun an IAM policy to you, which should contain a fix for the access denied policy. + 2. Then call the `fix_access_denied` tool using both that IAM policy returned from the `generate_policy_for_access_denied` tool, as well as the original access denied error. This will deploy the IAM policy generated by the `generate_policy_for_access_denied` tool to the AWS account, in an attempt to fix the access denied error. \ No newline at end of file diff --git a/power-iam-policy-autopilot/mcp.json b/power-iam-policy-autopilot/mcp.json new file mode 100644 index 0000000..87dc44f --- /dev/null +++ b/power-iam-policy-autopilot/mcp.json @@ -0,0 +1,11 @@ +{ + "mcpServers": { + "iam-policy-autopilot-mcp": { + "command": "uvx", + "args": ["iam-policy-autopilot@latest", "mcp-server"], + "env": {}, + "disabled": false, + "autoApprove": [] + } + } +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 68b6853..dd82cd4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,8 +5,8 @@ build-backend = "maturin" # Standard Python package metadata [project] name = "iam-policy-autopilot" # The name it will have on PyPI -version = "0.1.0" -description = "A unified toolset for AWS IAM policy management, providing both proactive policy generation from source code analysis and reactive error fixing from AccessDenied messages." +version = "0.1.2" +description = "An open source Model Context Protocol (MCP) server and command-line tool that helps your AI coding assistants quickly create baseline IAM policies that you can refine as your application evolves, so you can build faster. IAM Policy Autopilot analyzes your application code locally to generate identity-based policies for application roles, enabling faster IAM policy creation and reducing access troubleshooting time. IAM Policy Autopilot supports applications built in Python, Go, and TypeScript." readme = "README.md" requires-python = ">=3.8" license = { file = "LICENSE" } From a6077d4458f34eb6d3fa348f1b65c94d11fa50c2 Mon Sep 17 00:00:00 2001 From: Hongbo Wei Date: Fri, 19 Dec 2025 14:24:08 -0800 Subject: [PATCH 02/10] refactor: address comments in PR#87 --- Cargo.toml | 2 +- iam-policy-autopilot-cli/src/commands.rs | 42 +++-- iam-policy-autopilot-cli/src/main.rs | 9 +- .../Cargo.toml | 4 +- .../build.rs | 176 ++++++++---------- .../resources/config/sdks/boto3 | 2 +- .../src/api/get_submodule_version.rs | 2 +- .../src/api/model.rs | 6 +- .../src/embedded_data.rs | 1 - 9 files changed, 117 insertions(+), 127 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 67f5ef1..deeaf8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -64,7 +64,7 @@ serial_test = "3.0" atty = "0.2" chrono = { version = "0.4", features = ["serde"] } uuid = { version = "1.8", features = ["v4"] } -sha2 = "0.10.9" +aws-lc-rs = "1.15.2" git2 = "0.20.3" relative-path = "2.0.1" url = "2.5" diff --git a/iam-policy-autopilot-cli/src/commands.rs b/iam-policy-autopilot-cli/src/commands.rs index f3cf086..159b05b 100644 --- a/iam-policy-autopilot-cli/src/commands.rs +++ b/iam-policy-autopilot-cli/src/commands.rs @@ -7,7 +7,7 @@ use iam_policy_autopilot_access_denied::{ApplyError, ApplyOptions, DenialType}; fn is_tty() -> bool { atty::is(atty::Stream::Stdin) && atty::is(atty::Stream::Stderr) } -use clap::{crate_version, Parser, Subcommand}; +use clap::crate_version; /// Returns Some(true) if user confirmed, Some(false) if declined, None if not in TTY. fn prompt_yes_no() -> Option { @@ -143,26 +143,28 @@ async fn fix_access_denied_with_service( } } -pub fn print_version_info() -> anyhow::Result<()> { - let boto3_version_metadata = - iam_policy_autopilot_policy_generation::api::get_boto3_version_info()?; - let botocore_version_metadata = - iam_policy_autopilot_policy_generation::api::get_botocore_version_info()?; +pub fn print_version_info(debug: bool) -> anyhow::Result<()> { println!("{}", crate_version!()); - println!( - "boto3 version: commit_id={}, commit_tag={}, data_hash={}", - boto3_version_metadata.git_commit_hash, - boto3_version_metadata.git_tag.unwrap_or("None".to_string()), - boto3_version_metadata.data_hash - ); - println!( - "botocore version: commit_id={}, commit_tag={}, data_hash={}", - botocore_version_metadata.git_commit_hash, - botocore_version_metadata - .git_tag - .unwrap_or("None".to_string()), - botocore_version_metadata.data_hash - ); + if debug { + let boto3_version_metadata = + iam_policy_autopilot_policy_generation::api::get_boto3_version_info()?; + let botocore_version_metadata = + iam_policy_autopilot_policy_generation::api::get_botocore_version_info()?; + println!( + "boto3 version: commit_id={}, commit_tag={}, data_hash={}", + boto3_version_metadata.git_commit_hash, + boto3_version_metadata.git_tag.unwrap_or("None".to_string()), + boto3_version_metadata.data_hash + ); + println!( + "botocore version: commit_id={}, commit_tag={}, data_hash={}", + botocore_version_metadata.git_commit_hash, + botocore_version_metadata + .git_tag + .unwrap_or("None".to_string()), + botocore_version_metadata.data_hash + ); + } Ok(()) } diff --git a/iam-policy-autopilot-cli/src/main.rs b/iam-policy-autopilot-cli/src/main.rs index 865d187..e7baf54 100644 --- a/iam-policy-autopilot-cli/src/main.rs +++ b/iam-policy-autopilot-cli/src/main.rs @@ -20,7 +20,7 @@ use std::path::PathBuf; use std::process; use anyhow::{Context, Result}; -use clap::{crate_version, Parser, Subcommand}; +use clap::{Parser, Subcommand}; use iam_policy_autopilot_policy_generation::api::model::{ AwsContext, ExtractSdkCallsConfig, GeneratePolicyConfig, }; @@ -358,7 +358,10 @@ Only used when --transport=http. The server will bind to 127.0.0.1 (localhost) o short_flag = 'V', long_flag = "version" )] - Version {}, + Version { + #[arg(short = 'd', long = "debug", default_value_t = false, hide = true)] + debug: bool, + }, } /// Initialize logging based on configuration @@ -622,7 +625,7 @@ async fn main() { } } - Commands::Version {} => match print_version_info() { + Commands::Version { debug } => match print_version_info(debug) { Ok(()) => ExitCode::Success, Err(e) => { print_cli_command_error(e); diff --git a/iam-policy-autopilot-policy-generation/Cargo.toml b/iam-policy-autopilot-policy-generation/Cargo.toml index 3b939c8..3b91aa2 100644 --- a/iam-policy-autopilot-policy-generation/Cargo.toml +++ b/iam-policy-autopilot-policy-generation/Cargo.toml @@ -27,7 +27,7 @@ serde_json.workspace = true tokio.workspace = true async-trait.workspace = true strsim.workspace = true -sha2.workspace = true +aws-lc-rs.workspace = true git2.workspace = true relative-path.workspace = true @@ -44,7 +44,7 @@ tokio-util.workspace = true # JSON processing serde_json.workspace = true -sha2.workspace = true +aws-lc-rs.workspace = true git2.workspace = true relative-path.workspace = true diff --git a/iam-policy-autopilot-policy-generation/build.rs b/iam-policy-autopilot-policy-generation/build.rs index ecc3b7d..57fe1ce 100644 --- a/iam-policy-autopilot-policy-generation/build.rs +++ b/iam-policy-autopilot-policy-generation/build.rs @@ -1,19 +1,9 @@ -use git2::Commit; -use git2::Describe; -use git2::DescribeFormatOptions; -use git2::DescribeOptions; -use git2::Reference; -use git2::Repository; +use aws_lc_rs::digest::{Context, Digest, SHA256}; +use git2::{DescribeOptions, Repository}; use relative_path::PathExt; use relative_path::RelativePathBuf; use serde::{Deserialize, Serialize}; use serde_json::Value; -use sha2::digest::consts::B0; -use sha2::digest::consts::B1; -use sha2::digest::generic_array::GenericArray; -use sha2::digest::typenum::UInt; -use sha2::digest::typenum::UTerm; -use sha2::{Digest, Sha256}; use std::collections::BTreeMap; use std::env; use std::fs; @@ -65,15 +55,73 @@ struct ShapeReference { shape: String, } -// Git version and commit hash for boto3 and botocore +/// This must be an exact copy of GitSubmoduleMetadata in model.rs #[derive(Debug, Clone, Serialize, Deserialize)] -struct GitSubmoduleVersion { +pub struct GitSubmoduleVersion { + /// the commit of boto3/botocore, returned on calls to iam-policy-autopilot --version --debug #[serde(rename = "gitCommit")] - git_commit_hash: String, + pub git_commit_hash: String, + /// the git tag of boto3/botocore, returned on calls to iam-policy-autopilot --version --debug #[serde(rename = "gitTag")] - git_tag: Option, + pub git_tag: Option, + /// the sha hash of boto3/botocore simplified models, returned on calls to iam-policy-autopilot --version --debug #[serde(rename = "dataHash")] - data_hash: String, + pub data_hash: String, +} + +impl GitSubmoduleVersion { + fn new(git_path: &Path, data_path: &PathBuf) -> GitSubmoduleVersion { + let repository = Repository::open(&git_path) + .expect(&format!("Failed to open repository at path {:?}", git_path)); + GitSubmoduleVersion { + git_commit_hash: get_repository_commit(&repository).expect(&format!( + "Failed to get repository commit at path {:?}", + git_path + )), + git_tag: get_repository_tag(&repository).expect(&format!( + "Failed to get repository tag at path {:?}", + git_path + )), + data_hash: format!( + "{:?}", + Self::sha2sum_recursive(&data_path, &data_path).expect(&format!( + "Failed to compute checksum over data at path {:?}", + data_path + )) + ), + } + } + + fn sha2sum_recursive(cwd: &Path, root: &Path) -> Result> { + let mut hash_table: BTreeMap = BTreeMap::new(); + + let mut dir_entry_list = fs::read_dir(cwd)? + .map(|res| res.map(|e| e.path())) + .collect::, io::Error>>()?; + dir_entry_list.sort(); + + for entry_path in dir_entry_list { + let relt_path = entry_path.clone().relative_to(root)?; + if entry_path.is_dir() { + hash_table.insert( + relt_path.clone(), + Self::sha2sum_recursive(&entry_path, root)?, + ); + } else { + let mut sha2_context = Context::new(&SHA256); + sha2_context.update(&fs::read(entry_path)?); + hash_table.insert(relt_path.clone(), sha2_context.finish()); + } + } + + let mut sha2_context = Context::new(&SHA256); + for entry in hash_table { + sha2_context.update(&entry.0.into_string().as_bytes()); + sha2_context.update(entry.1.as_ref()); + } + + Ok(sha2_context.finish()) + } } fn main() { @@ -161,49 +209,28 @@ fn main() { fs::create_dir_all(&workspace_submodule_version_embed_dir) .expect("Failed to create submodule version directory"); - let boto3_submodule_dir = Path::new("resources/config/sdks/boto3"); - let boto3_repo = - Repository::open(&boto3_submodule_dir).expect("Failed to open boto3 repository"); - - let boto3_info = GitSubmoduleVersion { - git_commit_hash: get_repository_commit(&boto3_repo) - .expect("Failed to get boto3 repository commit"), - git_tag: get_repository_tag(&boto3_repo).expect("Failed to get boto3 repository tag"), - data_hash: format!( - "{:X}", - sha2sum_recursive(&boto3_dir, &boto3_dir) - .expect("Failed to compute checksum over simplified boto3 data") - ), - }; + let boto3_info = GitSubmoduleVersion::new(Path::new("resources/config/sdks/boto3"), &boto3_dir); - let boto3_submodule_version_dir = - &workspace_submodule_version_embed_dir.join("boto3_version.json"); let boto3_info_json = serde_json::to_string(&boto3_info).expect("Failed to serialize boto3 version metadata"); - fs::write(boto3_submodule_version_dir, boto3_info_json) - .expect("Failed to write boto3 version metadata"); - - let botocore_submodule_dir = Path::new("resources/config/sdks/botocore-data"); - let botocore_repo = - Repository::open(botocore_submodule_dir).expect("Failed to open botocore repository"); - - let botocore_info = GitSubmoduleVersion { - git_commit_hash: get_repository_commit(&botocore_repo) - .expect("Failed to get botocore repository commit"), - git_tag: get_repository_tag(&botocore_repo).expect("Failed to get botocore repository tag"), - data_hash: format!( - "{:X}", - sha2sum_recursive(&simplified_dir, &simplified_dir) - .expect("Failed to compute checksum over simplified botocore data") - ), - }; + fs::write( + &workspace_submodule_version_embed_dir.join("boto3_version.json"), + boto3_info_json, + ) + .expect("Failed to write boto3 version metadata"); + + let botocore_info = GitSubmoduleVersion::new( + Path::new("resources/config/sdks/botocore-data"), + &simplified_dir, + ); - let botocore_submodule_version_dir = - &workspace_submodule_version_embed_dir.join("botocore_version.json"); let botocore_info_json = serde_json::to_string(&botocore_info) .expect("Failed to serialize botocore version metadata"); - fs::write(botocore_submodule_version_dir, botocore_info_json) - .expect("Failed to write botocore version metadata"); + fs::write( + &workspace_submodule_version_embed_dir.join("botocore_version.json"), + botocore_info_json, + ) + .expect("Failed to write botocore version metadata"); } fn process_botocore_data( @@ -425,47 +452,6 @@ fn copy_dir_recursive(src: &Path, dst: &Path) -> Result<(), Box Result< - GenericArray, B0>, B0>, B0>, B0>, B0>>, - Box, -> { - let mut hash_table: BTreeMap< - RelativePathBuf, - GenericArray, B0>, B0>, B0>, B0>, B0>>, - > = BTreeMap::new(); - // let next_root = if (root.is_none()) {Some(cwd)} else {root}; - - let mut dir_entry_list = fs::read_dir(cwd)? - .map(|res| res.map(|e| e.path())) - .collect::, io::Error>>()?; - dir_entry_list.sort(); - - for entry_path in dir_entry_list { - let relt_path = entry_path.clone().relative_to(root)?; - if (entry_path.is_dir()) { - hash_table.insert(relt_path.clone(), sha2sum_recursive(&entry_path, root)?); - } else { - hash_table.insert( - relt_path.clone(), - Sha256::default() - .chain_update(fs::read(entry_path)?) - .finalize(), - ); - } - } - - let mut sha2 = Sha256::new(); - for entry in hash_table { - sha2.update(entry.0.into_string()); - sha2.update(entry.1); - } - - Ok(sha2.finalize()) -} - fn process_boto3_data( boto3_path: &Path, output_dir: &Path, diff --git a/iam-policy-autopilot-policy-generation/resources/config/sdks/boto3 b/iam-policy-autopilot-policy-generation/resources/config/sdks/boto3 index bcabe71..b4d31fc 160000 --- a/iam-policy-autopilot-policy-generation/resources/config/sdks/boto3 +++ b/iam-policy-autopilot-policy-generation/resources/config/sdks/boto3 @@ -1 +1 @@ -Subproject commit bcabe71e19937809cf24e55d5fbeab71de8ea037 +Subproject commit b4d31fc1798a1cfa269433e8e8c9a7516f276896 diff --git a/iam-policy-autopilot-policy-generation/src/api/get_submodule_version.rs b/iam-policy-autopilot-policy-generation/src/api/get_submodule_version.rs index f156bd1..aaf425d 100644 --- a/iam-policy-autopilot-policy-generation/src/api/get_submodule_version.rs +++ b/iam-policy-autopilot-policy-generation/src/api/get_submodule_version.rs @@ -1,4 +1,4 @@ -use crate::errors::{ExtractorError, Result}; +use crate::errors::Result; use crate::{api::model::GitSubmoduleMetadata, embedded_data::GitSubmoduleVersionInfo}; /// Gets the version information for the boto3 submodule. diff --git a/iam-policy-autopilot-policy-generation/src/api/model.rs b/iam-policy-autopilot-policy-generation/src/api/model.rs index e37ccac..273f7c3 100644 --- a/iam-policy-autopilot-policy-generation/src/api/model.rs +++ b/iam-policy-autopilot-policy-generation/src/api/model.rs @@ -54,13 +54,13 @@ pub struct AwsContext { /// Exposes git version and commit hash for boto3 and botocore #[derive(Debug, Clone, Serialize, Deserialize)] pub struct GitSubmoduleMetadata { - /// the git commit hash + /// the commit of boto3/botocore, returned on calls to iam-policy-autopilot --version --debug #[serde(rename = "gitCommit")] pub git_commit_hash: String, - /// the git commit tag + /// the git tag of boto3/botocore, returned on calls to iam-policy-autopilot --version --debug #[serde(rename = "gitTag")] pub git_tag: Option, - /// the simplified data file hash + /// the sha hash of boto3/botocore simplified models, returned on calls to iam-policy-autopilot --version --debug #[serde(rename = "dataHash")] pub data_hash: String, } diff --git a/iam-policy-autopilot-policy-generation/src/embedded_data.rs b/iam-policy-autopilot-policy-generation/src/embedded_data.rs index 3e895f8..bd9ddf1 100644 --- a/iam-policy-autopilot-policy-generation/src/embedded_data.rs +++ b/iam-policy-autopilot-policy-generation/src/embedded_data.rs @@ -12,7 +12,6 @@ use crate::api::model::GitSubmoduleMetadata; use crate::errors::{ExtractorError, Result}; use crate::extraction::sdk_model::SdkServiceDefinition; use rust_embed::RustEmbed; -use serde::{Deserialize, Serialize}; /// Embedded AWS service definitions with compression /// From 869f80ab3195966c0d02f5ab933e76ba6c05a69e Mon Sep 17 00:00:00 2001 From: Hongbo Wei Date: Fri, 19 Dec 2025 14:34:14 -0800 Subject: [PATCH 03/10] refactor: fix clippy --- .../build.rs | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/iam-policy-autopilot-policy-generation/build.rs b/iam-policy-autopilot-policy-generation/build.rs index 57fe1ce..8dcd6b3 100644 --- a/iam-policy-autopilot-policy-generation/build.rs +++ b/iam-policy-autopilot-policy-generation/build.rs @@ -71,20 +71,17 @@ pub struct GitSubmoduleVersion { impl GitSubmoduleVersion { fn new(git_path: &Path, data_path: &PathBuf) -> GitSubmoduleVersion { - let repository = Repository::open(&git_path) - .expect(&format!("Failed to open repository at path {:?}", git_path)); + let repository = Repository::open(git_path) + .unwrap_or_else(|_| panic!("Failed to open repository at path {:?}", git_path)); GitSubmoduleVersion { - git_commit_hash: get_repository_commit(&repository).expect(&format!( - "Failed to get repository commit at path {:?}", - git_path - )), - git_tag: get_repository_tag(&repository).expect(&format!( - "Failed to get repository tag at path {:?}", - git_path - )), + git_commit_hash: get_repository_commit(&repository).unwrap_or_else(|_| { + panic!("Failed to get repository commit at path {:?}", git_path) + }), + git_tag: get_repository_tag(&repository) + .unwrap_or_else(|_| panic!("Failed to get repository tag at path {:?}", git_path)), data_hash: format!( "{:?}", - Self::sha2sum_recursive(&data_path, &data_path).expect(&format!( + Self::sha2sum_recursive(data_path, data_path).unwrap_or_else(|_| panic!( "Failed to compute checksum over data at path {:?}", data_path )) @@ -116,7 +113,7 @@ impl GitSubmoduleVersion { let mut sha2_context = Context::new(&SHA256); for entry in hash_table { - sha2_context.update(&entry.0.into_string().as_bytes()); + sha2_context.update(entry.0.into_string().as_bytes()); sha2_context.update(entry.1.as_ref()); } @@ -214,7 +211,7 @@ fn main() { let boto3_info_json = serde_json::to_string(&boto3_info).expect("Failed to serialize boto3 version metadata"); fs::write( - &workspace_submodule_version_embed_dir.join("boto3_version.json"), + workspace_submodule_version_embed_dir.join("boto3_version.json"), boto3_info_json, ) .expect("Failed to write boto3 version metadata"); @@ -227,7 +224,7 @@ fn main() { let botocore_info_json = serde_json::to_string(&botocore_info) .expect("Failed to serialize botocore version metadata"); fs::write( - &workspace_submodule_version_embed_dir.join("botocore_version.json"), + workspace_submodule_version_embed_dir.join("botocore_version.json"), botocore_info_json, ) .expect("Failed to write botocore version metadata"); From fbf5280d56f0cd2ca4961f8dd01e51baeecadf87 Mon Sep 17 00:00:00 2001 From: Hongbo Wei Date: Fri, 19 Dec 2025 14:40:38 -0800 Subject: [PATCH 04/10] fix: add openssl, libssl-dev dependenies to pr build workflow --- .github/workflows/pr-checks.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml index a96b601..8f1e66a 100644 --- a/.github/workflows/pr-checks.yml +++ b/.github/workflows/pr-checks.yml @@ -75,6 +75,7 @@ jobs: run: | sudo apt-get update sudo apt-get install -y gcc-aarch64-linux-gnu + sudo apt-get install -y pkg-config libssl-dev echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config.toml echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config.toml @@ -128,6 +129,9 @@ jobs: - uses: Swatinem/rust-cache@v2 + - name: Update dependencies + run: brew update && brew install pkg-config openssl + - name: Build run: cargo build --workspace --target ${{ matrix.target }} --verbose --release From 59b4b638fda4dfbeb25a3e117a545bfa49d5cb6c Mon Sep 17 00:00:00 2001 From: Hongbo Wei Date: Fri, 19 Dec 2025 14:53:50 -0800 Subject: [PATCH 05/10] fix: try to resolve build failure with openssl vendored https://github.com/cross-rs/cross/wiki/Recipes#openssl --- .github/workflows/pr-checks.yml | 4 ---- Cargo.toml | 1 + iam-policy-autopilot-policy-generation/Cargo.toml | 1 + 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml index 8f1e66a..a96b601 100644 --- a/.github/workflows/pr-checks.yml +++ b/.github/workflows/pr-checks.yml @@ -75,7 +75,6 @@ jobs: run: | sudo apt-get update sudo apt-get install -y gcc-aarch64-linux-gnu - sudo apt-get install -y pkg-config libssl-dev echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config.toml echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config.toml @@ -129,9 +128,6 @@ jobs: - uses: Swatinem/rust-cache@v2 - - name: Update dependencies - run: brew update && brew install pkg-config openssl - - name: Build run: cargo build --workspace --target ${{ matrix.target }} --verbose --release diff --git a/Cargo.toml b/Cargo.toml index deeaf8f..ff7a8a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ ast-grep-core = "0.39" schemars = { version = "^1", features = ["derive"] } rust-embed = { version = "8.9", features = ["compression", "include-exclude"] } reqwest = { version = "0.12.4", features = ["rustls-tls"], default-features = false } +openssl = { version = "0.10", features = ["vendored"] } # Native async runtime and parallel processing tokio = { version = "1.0", features = ["fs", "rt", "rt-multi-thread", "macros", "signal"] } diff --git a/iam-policy-autopilot-policy-generation/Cargo.toml b/iam-policy-autopilot-policy-generation/Cargo.toml index 3b91aa2..a897564 100644 --- a/iam-policy-autopilot-policy-generation/Cargo.toml +++ b/iam-policy-autopilot-policy-generation/Cargo.toml @@ -30,6 +30,7 @@ strsim.workspace = true aws-lc-rs.workspace = true git2.workspace = true relative-path.workspace = true +openssl.workspace = true # Build dependencies From 01b266bada4b4ba9d3fdfe207a01c2a41e7fe9e6 Mon Sep 17 00:00:00 2001 From: Hongbo Wei Date: Mon, 5 Jan 2026 14:55:38 -0800 Subject: [PATCH 06/10] refactor: reset BTreeMap to HashMap in build.rs serde structs, add comments to structs in build.rs --- .../build.rs | 38 ++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/iam-policy-autopilot-policy-generation/build.rs b/iam-policy-autopilot-policy-generation/build.rs index 8dcd6b3..b683f2b 100644 --- a/iam-policy-autopilot-policy-generation/build.rs +++ b/iam-policy-autopilot-policy-generation/build.rs @@ -2,23 +2,39 @@ use aws_lc_rs::digest::{Context, Digest, SHA256}; use git2::{DescribeOptions, Repository}; use relative_path::PathExt; use relative_path::RelativePathBuf; +use serde::Serializer; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::BTreeMap; +use std::collections::HashMap; use std::env; use std::fs; use std::io; use std::path::Path; use std::path::PathBuf; +/// For use with serde's [serialize_with] attribute; copied from https://stackoverflow.com/questions/42723065/how-to-sort-hashmap-keys-when-serializing-with-serde +fn ordered_map( + value: &HashMap, + serializer: S, +) -> Result +where + S: Serializer, +{ + let ordered: BTreeMap<_, _> = value.iter().collect(); + ordered.serialize(serializer) +} + /// Simplified service definition with fields removed #[derive(Debug, Clone, Serialize, Deserialize)] struct SimplifiedServiceDefinition { #[serde(skip_serializing_if = "Option::is_none")] version: Option, metadata: ServiceMetadata, - operations: BTreeMap, - shapes: BTreeMap, + #[serde(serialize_with = "ordered_map")] + operations: HashMap, + #[serde(serialize_with = "ordered_map")] + shapes: HashMap, } /// Service metadata from AWS service definitions @@ -43,8 +59,12 @@ struct SimplifiedOperation { struct SimplifiedShape { #[serde(rename = "type")] type_name: String, - #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] - members: BTreeMap, + #[serde( + default, + skip_serializing_if = "HashMap::is_empty", + serialize_with = "ordered_map" + )] + members: HashMap, #[serde(skip_serializing_if = "Option::is_none")] required: Option>, } @@ -55,7 +75,7 @@ struct ShapeReference { shape: String, } -/// This must be an exact copy of GitSubmoduleMetadata in model.rs +/// This must be an exact copy of GitSubmoduleMetadata in model.rs. This struct contains the boto3/botocore submodule version info; it then gets serialized into a JSON file, which is then referenced via the GitSubmoduleVersionInfoRaw RustEmbed struct in embedded_data.rs, so that this version info can get read when --version in the CLI is called. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct GitSubmoduleVersion { /// the commit of boto3/botocore, returned on calls to iam-policy-autopilot --version --debug @@ -402,8 +422,8 @@ fn extract_metadata( fn simplify_operations( operations_value: Option<&Value>, -) -> Result, Box> { - let mut simplified_operations = BTreeMap::new(); +) -> Result, Box> { + let mut simplified_operations = HashMap::new(); if let Some(Value::Object(operations)) = operations_value { for (op_name, op_value) in operations { @@ -418,8 +438,8 @@ fn simplify_operations( fn simplify_shapes( shapes_value: Option<&Value>, -) -> Result, Box> { - let mut simplified_shapes = BTreeMap::new(); +) -> Result, Box> { + let mut simplified_shapes = HashMap::new(); if let Some(Value::Object(shapes)) = shapes_value { for (shape_name, shape_value) in shapes { From c4a739cd4acb90ca0f947b42304ee1e90b4327ff Mon Sep 17 00:00:00 2001 From: Hongbo Wei Date: Mon, 5 Jan 2026 15:15:23 -0800 Subject: [PATCH 07/10] refactor: reset to btree --- .../build.rs | 36 +++++-------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/iam-policy-autopilot-policy-generation/build.rs b/iam-policy-autopilot-policy-generation/build.rs index b683f2b..1c74c4b 100644 --- a/iam-policy-autopilot-policy-generation/build.rs +++ b/iam-policy-autopilot-policy-generation/build.rs @@ -2,39 +2,23 @@ use aws_lc_rs::digest::{Context, Digest, SHA256}; use git2::{DescribeOptions, Repository}; use relative_path::PathExt; use relative_path::RelativePathBuf; -use serde::Serializer; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::BTreeMap; -use std::collections::HashMap; use std::env; use std::fs; use std::io; use std::path::Path; use std::path::PathBuf; -/// For use with serde's [serialize_with] attribute; copied from https://stackoverflow.com/questions/42723065/how-to-sort-hashmap-keys-when-serializing-with-serde -fn ordered_map( - value: &HashMap, - serializer: S, -) -> Result -where - S: Serializer, -{ - let ordered: BTreeMap<_, _> = value.iter().collect(); - ordered.serialize(serializer) -} - /// Simplified service definition with fields removed #[derive(Debug, Clone, Serialize, Deserialize)] struct SimplifiedServiceDefinition { #[serde(skip_serializing_if = "Option::is_none")] version: Option, metadata: ServiceMetadata, - #[serde(serialize_with = "ordered_map")] - operations: HashMap, - #[serde(serialize_with = "ordered_map")] - shapes: HashMap, + operations: BTreeMap, + shapes: BTreeMap, } /// Service metadata from AWS service definitions @@ -59,12 +43,8 @@ struct SimplifiedOperation { struct SimplifiedShape { #[serde(rename = "type")] type_name: String, - #[serde( - default, - skip_serializing_if = "HashMap::is_empty", - serialize_with = "ordered_map" - )] - members: HashMap, + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + members: BTreeMap, #[serde(skip_serializing_if = "Option::is_none")] required: Option>, } @@ -422,8 +402,8 @@ fn extract_metadata( fn simplify_operations( operations_value: Option<&Value>, -) -> Result, Box> { - let mut simplified_operations = HashMap::new(); +) -> Result, Box> { + let mut simplified_operations = BTreeMap::new(); if let Some(Value::Object(operations)) = operations_value { for (op_name, op_value) in operations { @@ -438,8 +418,8 @@ fn simplify_operations( fn simplify_shapes( shapes_value: Option<&Value>, -) -> Result, Box> { - let mut simplified_shapes = HashMap::new(); +) -> Result, Box> { + let mut simplified_shapes = BTreeMap::new(); if let Some(Value::Object(shapes)) = shapes_value { for (shape_name, shape_value) in shapes { From 340b7e705e9be42ab0b9fdae9a1e784388b7aaec Mon Sep 17 00:00:00 2001 From: Hongbo Wei Date: Wed, 7 Jan 2026 14:14:39 -0800 Subject: [PATCH 08/10] refactor: share submodule struct beteeen build.rs and api/model.rs in policy-generation; other refactors --- iam-policy-autopilot-cli/src/commands.rs | 6 +-- iam-policy-autopilot-cli/src/main.rs | 6 +-- .../Cargo.toml | 4 -- .../build.rs | 37 +++++++------------ .../resources/config/sdks/boto3 | 2 +- .../src/api/model.rs | 14 +------ .../src/shared_submodule_model.rs | 12 ++++++ 7 files changed, 33 insertions(+), 48 deletions(-) create mode 100644 iam-policy-autopilot-policy-generation/src/shared_submodule_model.rs diff --git a/iam-policy-autopilot-cli/src/commands.rs b/iam-policy-autopilot-cli/src/commands.rs index 159b05b..25060c5 100644 --- a/iam-policy-autopilot-cli/src/commands.rs +++ b/iam-policy-autopilot-cli/src/commands.rs @@ -3,11 +3,11 @@ //! all denial types with appropriate branching logic. use crate::{output, types::ExitCode}; +use clap::crate_version; use iam_policy_autopilot_access_denied::{ApplyError, ApplyOptions, DenialType}; fn is_tty() -> bool { atty::is(atty::Stream::Stdin) && atty::is(atty::Stream::Stderr) } -use clap::crate_version; /// Returns Some(true) if user confirmed, Some(false) if declined, None if not in TTY. fn prompt_yes_no() -> Option { @@ -143,9 +143,9 @@ async fn fix_access_denied_with_service( } } -pub fn print_version_info(debug: bool) -> anyhow::Result<()> { +pub fn print_version_info(verbose: bool) -> anyhow::Result<()> { println!("{}", crate_version!()); - if debug { + if verbose { let boto3_version_metadata = iam_policy_autopilot_policy_generation::api::get_boto3_version_info()?; let botocore_version_metadata = diff --git a/iam-policy-autopilot-cli/src/main.rs b/iam-policy-autopilot-cli/src/main.rs index e7baf54..c9158cc 100644 --- a/iam-policy-autopilot-cli/src/main.rs +++ b/iam-policy-autopilot-cli/src/main.rs @@ -359,8 +359,8 @@ Only used when --transport=http. The server will bind to 127.0.0.1 (localhost) o long_flag = "version" )] Version { - #[arg(short = 'd', long = "debug", default_value_t = false, hide = true)] - debug: bool, + #[arg(long = "verbose", default_value_t = false, hide = true)] + verbose: bool, }, } @@ -625,7 +625,7 @@ async fn main() { } } - Commands::Version { debug } => match print_version_info(debug) { + Commands::Version { verbose } => match print_version_info(verbose) { Ok(()) => ExitCode::Success, Err(e) => { print_cli_command_error(e); diff --git a/iam-policy-autopilot-policy-generation/Cargo.toml b/iam-policy-autopilot-policy-generation/Cargo.toml index 9420812..0a6d469 100644 --- a/iam-policy-autopilot-policy-generation/Cargo.toml +++ b/iam-policy-autopilot-policy-generation/Cargo.toml @@ -27,10 +27,6 @@ serde_json.workspace = true tokio.workspace = true async-trait.workspace = true strsim.workspace = true -aws-lc-rs.workspace = true -git2.workspace = true -relative-path.workspace = true -openssl.workspace = true # Build dependencies diff --git a/iam-policy-autopilot-policy-generation/build.rs b/iam-policy-autopilot-policy-generation/build.rs index 1c74c4b..d44af7f 100644 --- a/iam-policy-autopilot-policy-generation/build.rs +++ b/iam-policy-autopilot-policy-generation/build.rs @@ -55,25 +55,13 @@ struct ShapeReference { shape: String, } -/// This must be an exact copy of GitSubmoduleMetadata in model.rs. This struct contains the boto3/botocore submodule version info; it then gets serialized into a JSON file, which is then referenced via the GitSubmoduleVersionInfoRaw RustEmbed struct in embedded_data.rs, so that this version info can get read when --version in the CLI is called. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GitSubmoduleVersion { - /// the commit of boto3/botocore, returned on calls to iam-policy-autopilot --version --debug - #[serde(rename = "gitCommit")] - pub git_commit_hash: String, - /// the git tag of boto3/botocore, returned on calls to iam-policy-autopilot --version --debug - #[serde(rename = "gitTag")] - pub git_tag: Option, - /// the sha hash of boto3/botocore simplified models, returned on calls to iam-policy-autopilot --version --debug - #[serde(rename = "dataHash")] - pub data_hash: String, -} +include!("src/shared_submodule_model.rs"); -impl GitSubmoduleVersion { - fn new(git_path: &Path, data_path: &PathBuf) -> GitSubmoduleVersion { +impl GitSubmoduleMetadata { + fn new(git_path: &Path, data_path: &PathBuf) -> GitSubmoduleMetadata { let repository = Repository::open(git_path) .unwrap_or_else(|_| panic!("Failed to open repository at path {:?}", git_path)); - GitSubmoduleVersion { + GitSubmoduleMetadata { git_commit_hash: get_repository_commit(&repository).unwrap_or_else(|_| { panic!("Failed to get repository commit at path {:?}", git_path) }), @@ -89,6 +77,9 @@ impl GitSubmoduleVersion { } } + /// Recursively computes a deterministic SHA-256 hash of a directory tree. Hashes each file's contents + /// and subdirectory recursively, then combines all hashes with their relative paths in sorted order + /// to produce a single hash representing the entire directory structure and contents. fn sha2sum_recursive(cwd: &Path, root: &Path) -> Result> { let mut hash_table: BTreeMap = BTreeMap::new(); @@ -100,14 +91,11 @@ impl GitSubmoduleVersion { for entry_path in dir_entry_list { let relt_path = entry_path.clone().relative_to(root)?; if entry_path.is_dir() { - hash_table.insert( - relt_path.clone(), - Self::sha2sum_recursive(&entry_path, root)?, - ); + hash_table.insert(relt_path, Self::sha2sum_recursive(&entry_path, root)?); } else { let mut sha2_context = Context::new(&SHA256); sha2_context.update(&fs::read(entry_path)?); - hash_table.insert(relt_path.clone(), sha2_context.finish()); + hash_table.insert(relt_path, sha2_context.finish()); } } @@ -206,7 +194,8 @@ fn main() { fs::create_dir_all(&workspace_submodule_version_embed_dir) .expect("Failed to create submodule version directory"); - let boto3_info = GitSubmoduleVersion::new(Path::new("resources/config/sdks/boto3"), &boto3_dir); + let boto3_info = + GitSubmoduleMetadata::new(Path::new("resources/config/sdks/boto3"), &boto3_dir); let boto3_info_json = serde_json::to_string(&boto3_info).expect("Failed to serialize boto3 version metadata"); @@ -216,7 +205,7 @@ fn main() { ) .expect("Failed to write boto3 version metadata"); - let botocore_info = GitSubmoduleVersion::new( + let botocore_info = GitSubmoduleMetadata::new( Path::new("resources/config/sdks/botocore-data"), &simplified_dir, ); @@ -523,8 +512,8 @@ fn process_boto3_service_version( Ok(has_resources_file) } +/// Performs git describe --exact-match --tags fn get_repository_tag(repo: &Repository) -> Result, Box> { - // we want to do this: git describe --exact-match --tags let mut describe_options = DescribeOptions::new(); describe_options.max_candidates_tags(0); describe_options.describe_tags(); diff --git a/iam-policy-autopilot-policy-generation/resources/config/sdks/boto3 b/iam-policy-autopilot-policy-generation/resources/config/sdks/boto3 index b4d31fc..bcabe71 160000 --- a/iam-policy-autopilot-policy-generation/resources/config/sdks/boto3 +++ b/iam-policy-autopilot-policy-generation/resources/config/sdks/boto3 @@ -1 +1 @@ -Subproject commit b4d31fc1798a1cfa269433e8e8c9a7516f276896 +Subproject commit bcabe71e19937809cf24e55d5fbeab71de8ea037 diff --git a/iam-policy-autopilot-policy-generation/src/api/model.rs b/iam-policy-autopilot-policy-generation/src/api/model.rs index 273f7c3..77c9bb0 100644 --- a/iam-policy-autopilot-policy-generation/src/api/model.rs +++ b/iam-policy-autopilot-policy-generation/src/api/model.rs @@ -51,19 +51,7 @@ pub struct AwsContext { pub account: String, } -/// Exposes git version and commit hash for boto3 and botocore -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GitSubmoduleMetadata { - /// the commit of boto3/botocore, returned on calls to iam-policy-autopilot --version --debug - #[serde(rename = "gitCommit")] - pub git_commit_hash: String, - /// the git tag of boto3/botocore, returned on calls to iam-policy-autopilot --version --debug - #[serde(rename = "gitTag")] - pub git_tag: Option, - /// the sha hash of boto3/botocore simplified models, returned on calls to iam-policy-autopilot --version --debug - #[serde(rename = "dataHash")] - pub data_hash: String, -} +include!("../shared_submodule_model.rs"); impl AwsContext { /// Creates a new AwsContext with the partition automatically derived from the region. diff --git a/iam-policy-autopilot-policy-generation/src/shared_submodule_model.rs b/iam-policy-autopilot-policy-generation/src/shared_submodule_model.rs new file mode 100644 index 0000000..df02012 --- /dev/null +++ b/iam-policy-autopilot-policy-generation/src/shared_submodule_model.rs @@ -0,0 +1,12 @@ + +/// Exposes git version and commit hash for boto3 and botocore +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "PascalCase")] +pub struct GitSubmoduleMetadata { + /// the commit of boto3/botocore, returned on calls to iam-policy-autopilot --version --debug + pub git_commit_hash: String, + /// the git tag of boto3/botocore, returned on calls to iam-policy-autopilot --version --debug + pub git_tag: Option, + /// the sha hash of boto3/botocore simplified models, returned on calls to iam-policy-autopilot --version --debug + pub data_hash: String, +} \ No newline at end of file From 0b8b719998d29dc8b90dff34daed969227357a93 Mon Sep 17 00:00:00 2001 From: Hongbo Wei Date: Wed, 7 Jan 2026 14:21:01 -0800 Subject: [PATCH 09/10] refactor: minor refactor --- iam-policy-autopilot-policy-generation/build.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/iam-policy-autopilot-policy-generation/build.rs b/iam-policy-autopilot-policy-generation/build.rs index d44af7f..761edcd 100644 --- a/iam-policy-autopilot-policy-generation/build.rs +++ b/iam-policy-autopilot-policy-generation/build.rs @@ -100,9 +100,9 @@ impl GitSubmoduleMetadata { } let mut sha2_context = Context::new(&SHA256); - for entry in hash_table { - sha2_context.update(entry.0.into_string().as_bytes()); - sha2_context.update(entry.1.as_ref()); + for (path, digest) in hash_table { + sha2_context.update(path.into_string().as_bytes()); + sha2_context.update(digest.as_ref()); } Ok(sha2_context.finish()) From ef97c392d1f2e83120e3a17630cd7bf9dfe89711 Mon Sep 17 00:00:00 2001 From: Hongbo Wei Date: Fri, 9 Jan 2026 13:13:56 -0800 Subject: [PATCH 10/10] docs: update comments and fix typos in shared_submodule_model.rs --- .../src/shared_submodule_model.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/iam-policy-autopilot-policy-generation/src/shared_submodule_model.rs b/iam-policy-autopilot-policy-generation/src/shared_submodule_model.rs index df02012..9b1acf5 100644 --- a/iam-policy-autopilot-policy-generation/src/shared_submodule_model.rs +++ b/iam-policy-autopilot-policy-generation/src/shared_submodule_model.rs @@ -1,12 +1,14 @@ - /// Exposes git version and commit hash for boto3 and botocore +/// The struct defined here is used in both build.rs and model.rs. +/// To share this struct in both the library and the build step, we define it here, +/// and use include!(...) to include it in both uses. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "PascalCase")] pub struct GitSubmoduleMetadata { - /// the commit of boto3/botocore, returned on calls to iam-policy-autopilot --version --debug + /// the commit of boto3/botocore, returned on calls to iam-policy-autopilot --version --verbose pub git_commit_hash: String, - /// the git tag of boto3/botocore, returned on calls to iam-policy-autopilot --version --debug + /// the git tag of boto3/botocore, returned on calls to iam-policy-autopilot --version --verbose pub git_tag: Option, - /// the sha hash of boto3/botocore simplified models, returned on calls to iam-policy-autopilot --version --debug + /// the sha hash of boto3/botocore simplified models, returned on calls to iam-policy-autopilot --version --verbose pub data_hash: String, } \ No newline at end of file