diff --git a/.beads/.gitignore b/.beads/.gitignore new file mode 100644 index 00000000..374adb81 --- /dev/null +++ b/.beads/.gitignore @@ -0,0 +1,32 @@ +# SQLite databases +*.db +*.db?* +*.db-journal +*.db-wal +*.db-shm + +# Daemon runtime files +daemon.lock +daemon.log +daemon.pid +bd.sock + +# Local version tracking (prevents upgrade notification spam after git ops) +.local_version + +# Legacy database files +db.sqlite +bd.db + +# Merge artifacts (temporary files from 3-way merge) +beads.base.jsonl +beads.base.meta.json +beads.left.jsonl +beads.left.meta.json +beads.right.jsonl +beads.right.meta.json + +# Keep JSONL exports and config (source of truth for git) +!issues.jsonl +!metadata.json +!config.json diff --git a/.beads/README.md b/.beads/README.md new file mode 100644 index 00000000..50f281f0 --- /dev/null +++ b/.beads/README.md @@ -0,0 +1,81 @@ +# Beads - AI-Native Issue Tracking + +Welcome to Beads! This repository uses **Beads** for issue tracking - a modern, AI-native tool designed to live directly in your codebase alongside your code. + +## What is Beads? + +Beads is issue tracking that lives in your repo, making it perfect for AI coding agents and developers who want their issues close to their code. No web UI required - everything works through the CLI and integrates seamlessly with git. + +**Learn more:** [github.com/steveyegge/beads](https://github.com/steveyegge/beads) + +## Quick Start + +### Essential Commands + +```bash +# Create new issues +bd create "Add user authentication" + +# View all issues +bd list + +# View issue details +bd show + +# Update issue status +bd update --status in_progress +bd update --status done + +# Sync with git remote +bd sync +``` + +### Working with Issues + +Issues in Beads are: +- **Git-native**: Stored in `.beads/issues.jsonl` and synced like code +- **AI-friendly**: CLI-first design works perfectly with AI coding agents +- **Branch-aware**: Issues can follow your branch workflow +- **Always in sync**: Auto-syncs with your commits + +## Why Beads? + +✨ **AI-Native Design** +- Built specifically for AI-assisted development workflows +- CLI-first interface works seamlessly with AI coding agents +- No context switching to web UIs + +🚀 **Developer Focused** +- Issues live in your repo, right next to your code +- Works offline, syncs when you push +- Fast, lightweight, and stays out of your way + +🔧 **Git Integration** +- Automatic sync with git commits +- Branch-aware issue tracking +- Intelligent JSONL merge resolution + +## Get Started with Beads + +Try Beads in your own projects: + +```bash +# Install Beads +curl -sSL https://raw.githubusercontent.com/steveyegge/beads/main/scripts/install.sh | bash + +# Initialize in your repo +bd init + +# Create your first issue +bd create "Try out Beads" +``` + +## Learn More + +- **Documentation**: [github.com/steveyegge/beads/docs](https://github.com/steveyegge/beads/tree/main/docs) +- **Quick Start Guide**: Run `bd quickstart` +- **Examples**: [github.com/steveyegge/beads/examples](https://github.com/steveyegge/beads/tree/main/examples) + +--- + +*Beads: Issue tracking that moves at the speed of thought* ⚡ diff --git a/.beads/config.yaml b/.beads/config.yaml new file mode 100644 index 00000000..f2427856 --- /dev/null +++ b/.beads/config.yaml @@ -0,0 +1,62 @@ +# Beads Configuration File +# This file configures default behavior for all bd commands in this repository +# All settings can also be set via environment variables (BD_* prefix) +# or overridden with command-line flags + +# Issue prefix for this repository (used by bd init) +# If not set, bd init will auto-detect from directory name +# Example: issue-prefix: "myproject" creates issues like "myproject-1", "myproject-2", etc. +# issue-prefix: "" + +# Use no-db mode: load from JSONL, no SQLite, write back after each command +# When true, bd will use .beads/issues.jsonl as the source of truth +# instead of SQLite database +# no-db: false + +# Disable daemon for RPC communication (forces direct database access) +# no-daemon: false + +# Disable auto-flush of database to JSONL after mutations +# no-auto-flush: false + +# Disable auto-import from JSONL when it's newer than database +# no-auto-import: false + +# Enable JSON output by default +# json: false + +# Default actor for audit trails (overridden by BD_ACTOR or --actor) +# actor: "" + +# Path to database (overridden by BEADS_DB or --db) +# db: "" + +# Auto-start daemon if not running (can also use BEADS_AUTO_START_DAEMON) +# auto-start-daemon: true + +# Debounce interval for auto-flush (can also use BEADS_FLUSH_DEBOUNCE) +# flush-debounce: "5s" + +# Git branch for beads commits (bd sync will commit to this branch) +# IMPORTANT: Set this for team projects so all clones use the same sync branch. +# This setting persists across clones (unlike database config which is gitignored). +# Can also use BEADS_SYNC_BRANCH env var for local override. +# If not set, bd sync will require you to run 'bd config set sync.branch '. +# sync-branch: "beads-sync" + +# Multi-repo configuration (experimental - bd-307) +# Allows hydrating from multiple repositories and routing writes to the correct JSONL +# repos: +# primary: "." # Primary repo (where this database lives) +# additional: # Additional repos to hydrate from (read-only) +# - ~/beads-planning # Personal planning repo +# - ~/work-planning # Work planning repo + +# Integration settings (access with 'bd config get/set') +# These are stored in the database, not in this file: +# - jira.url +# - jira.project +# - linear.url +# - linear.api-key +# - github.org +# - github.repo diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 82d70d02..5e1640eb 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -1,4 +1,20 @@ +{"id":"contrast-ai-smartfix-action-0dj","title":"Create shared test helper modules","description":"Create test_helpers/ directory with agent_patches.py, build_patches.py, api_patches.py, common_patches.py containing reusable patch lists for consistent mocking across test files.","status":"closed","priority":0,"issue_type":"task","created_at":"2026-01-05T14:34:30.374819-05:00","updated_at":"2026-01-05T15:35:09.050783-05:00","closed_at":"2026-01-05T15:35:09.050783-05:00"} +{"id":"contrast-ai-smartfix-action-31m","title":"Improve Build/Test Workflow test coverage (19-27% → 70%)","description":"Add comprehensive tests for build_runner.py and formatter.py. Cover build execution, output capture, timeouts, formatter execution and graceful degradation. Test command validation security.","status":"closed","priority":1,"issue_type":"feature","created_at":"2026-01-05T14:34:31.598607-05:00","updated_at":"2026-01-05T15:46:45.143289-05:00","closed_at":"2026-01-05T15:46:45.143289-05:00","dependencies":[{"issue_id":"contrast-ai-smartfix-action-31m","depends_on_id":"contrast-ai-smartfix-action-gnt","type":"blocks","created_at":"2026-01-05T14:34:54.906973-05:00","created_by":"daemon"},{"issue_id":"contrast-ai-smartfix-action-31m","depends_on_id":"contrast-ai-smartfix-action-0dj","type":"blocks","created_at":"2026-01-05T14:34:55.017205-05:00","created_by":"daemon"}]} +{"id":"contrast-ai-smartfix-action-61p","title":"Improve Entry Point test coverage (32% → 60%)","description":"Add comprehensive tests for main.py orchestration. Cover SmartFix agent workflow, external agent workflows (Copilot/Claude), merge/closed handlers, version checking, config validation, top-level error handling.","status":"closed","priority":2,"issue_type":"feature","created_at":"2026-01-05T14:34:32.67711-05:00","updated_at":"2026-01-06T16:21:19.536285-05:00","closed_at":"2026-01-06T16:21:19.536285-05:00","dependencies":[{"issue_id":"contrast-ai-smartfix-action-61p","depends_on_id":"contrast-ai-smartfix-action-gnt","type":"blocks","created_at":"2026-01-05T14:34:55.345082-05:00","created_by":"daemon"},{"issue_id":"contrast-ai-smartfix-action-61p","depends_on_id":"contrast-ai-smartfix-action-0dj","type":"blocks","created_at":"2026-01-05T14:34:55.451589-05:00","created_by":"daemon"}]} +{"id":"contrast-ai-smartfix-action-6t4","title":"Add error logging to command validation","description":"Add _log_config_message() call in src/config.py line 206 before re-raising ConfigurationError. Log the CommandValidationError details to provide immediate feedback when command validation fails.","status":"in_progress","priority":2,"issue_type":"task","created_at":"2026-01-07T15:24:23.903371-05:00","updated_at":"2026-01-08T13:09:55.916809-05:00"} {"id":"contrast-ai-smartfix-action-8re","title":"Integrate validation into Config class","description":"Integrate command validation into src/config.py Config.__init__():\n- Import command_validator module\n- Add _validate_command() method to Config class\n- Call validation for BUILD_COMMAND after reading from environment (line ~82)\n- Call validation for FORMATTING_COMMAND after reading from environment (line ~84)\n- Ensure CommandValidationError is caught and converted to ConfigurationError\n- Follow existing config validation patterns (similar to _check_contrast_config_values_exist)\n- Maintain fail-fast behavior with clear error messages\n\nRelated to JIRA: AIML-337","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-23T09:50:37.859629-05:00","updated_at":"2025-12-23T10:34:27.5546-05:00","closed_at":"2025-12-23T10:34:27.5546-05:00","dependencies":[{"issue_id":"contrast-ai-smartfix-action-8re","depends_on_id":"contrast-ai-smartfix-action-gad","type":"blocks","created_at":"2025-12-23T09:51:11.594923-05:00","created_by":"daemon"}]} +{"id":"contrast-ai-smartfix-action-9hu","title":"Improve API Integration test coverage (28-66% → 70%)","description":"Add comprehensive tests for contrast_api.py, closed_handler.py, merge_handler.py. Cover HTTP error handling, timeouts, credit tracking, PR event handling, remediation status notifications.","status":"closed","priority":1,"issue_type":"feature","created_at":"2026-01-05T14:34:32.133956-05:00","updated_at":"2026-01-05T15:59:37.520196-05:00","closed_at":"2026-01-05T15:59:37.520196-05:00","dependencies":[{"issue_id":"contrast-ai-smartfix-action-9hu","depends_on_id":"contrast-ai-smartfix-action-gnt","type":"blocks","created_at":"2026-01-05T14:34:55.126797-05:00","created_by":"daemon"},{"issue_id":"contrast-ai-smartfix-action-9hu","depends_on_id":"contrast-ai-smartfix-action-0dj","type":"blocks","created_at":"2026-01-05T14:34:55.236813-05:00","created_by":"daemon"}]} +{"id":"contrast-ai-smartfix-action-cpi","title":"Improve SCM Operations test coverage (38-68% → 75%)","description":"Expand tests for github_operations.py (38% → 75%), fill gaps in git_operations.py and scm_operations.py. Focus on GitHub-specific operations: PR/issue creation, labels, GraphQL queries.","status":"closed","priority":3,"issue_type":"feature","created_at":"2026-01-05T14:34:33.129041-05:00","updated_at":"2026-01-06T16:38:37.943404-05:00","closed_at":"2026-01-06T16:38:37.943404-05:00","dependencies":[{"issue_id":"contrast-ai-smartfix-action-cpi","depends_on_id":"contrast-ai-smartfix-action-gnt","type":"blocks","created_at":"2026-01-05T14:34:55.560893-05:00","created_by":"daemon"},{"issue_id":"contrast-ai-smartfix-action-cpi","depends_on_id":"contrast-ai-smartfix-action-0dj","type":"blocks","created_at":"2026-01-05T14:34:55.670578-05:00","created_by":"daemon"}]} +{"id":"contrast-ai-smartfix-action-dtc","title":"Test event_loop_utils.py (13% → 70%)","description":"Add comprehensive tests for event_loop_utils.py async event loop management.\n\nCurrent coverage: 13%\nTarget: 70%\n\nCritical paths:\n- _run_agent_in_event_loop: event loop creation/cleanup, platform-specific setup (Windows vs Unix)\n- Agent execution with success/exceptions/timeouts\n- Event loop cleanup on errors\n- Multiple sequential agent runs\n\nTest cases:\n- Successful agent execution\n- Agent exception handling with cleanup\n- Timeout scenarios\n- Platform-specific behavior (Windows ProactorEventLoop)\n- Event loop contamination prevention\n\nMocking:\n- Agent execution (LLM calls)\n- asyncio.new_event_loop, asyncio.set_event_loop\n- platform.system\n- File operations\n\nReference: docs/plans/2026-01-05-test-coverage-improvement-design.md lines 100-135","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-05T16:38:21.109343-05:00","updated_at":"2026-01-06T13:07:30.1321-05:00","closed_at":"2026-01-06T13:07:30.1321-05:00"} +{"id":"contrast-ai-smartfix-action-e0r","title":"Test sub_agent_executor.py (50% → 70%)","description":"Add comprehensive tests for sub_agent_executor.py sub-agent orchestration logic.\n\nCurrent coverage: 50%\nTarget: 70%\n\nCritical paths:\n- Spawning sub-agents\n- Collecting results\n- Handling sub-agent failures\n- Timeout management\n\nTest cases:\n- Execute sub-agent successfully\n- Sub-agent throws exception (isolation)\n- Sub-agent timeout\n- Multiple parallel sub-agents\n- Result aggregation\n\nMocking:\n- LLM client calls\n- Agent execution\n- All I/O operations\n\nReference: docs/plans/2026-01-05-test-coverage-improvement-design.md lines 136-157","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-05T16:38:36.097839-05:00","updated_at":"2026-01-06T16:07:23.475848-05:00","closed_at":"2026-01-06T16:07:23.475848-05:00","dependencies":[{"issue_id":"contrast-ai-smartfix-action-e0r","depends_on_id":"contrast-ai-smartfix-action-uoy","type":"blocks","created_at":"2026-01-05T16:38:41.752737-05:00","created_by":"daemon"}]} {"id":"contrast-ai-smartfix-action-gad","title":"Create command validation module","description":"Create src/smartfix/config/command_validator.py with:\n- ALLOWED_COMMANDS constant (full allowlist for Java, .NET, Python, PHP, NodeJS, build tools, shell utilities)\n- BLOCKED_PATTERNS regex list (command substitution, eval, exec, dangerous operations)\n- validate_shell_command() - ensures sh/bash only execute .sh files, blocks -c flag\n- validate_redirect() - allows relative paths only, blocks absolute paths and .. traversal\n- contains_dangerous_patterns() - checks command against blocked patterns\n- split_command_chain() - parses commands by operators (\u0026\u0026, ||, ;, |)\n- parse_command_segment() - extracts executable and arguments from command segment\n- extract_redirects() - finds file redirects in command\n- validate_command() - main validation function that orchestrates all checks\n- CommandValidationError exception class\n\nRelated to JIRA: AIML-337","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-23T09:49:50.169966-05:00","updated_at":"2025-12-23T10:28:03.234049-05:00","closed_at":"2025-12-23T10:28:03.234049-05:00"} +{"id":"contrast-ai-smartfix-action-gnt","title":"Add coverage target to Makefile","description":"Add 'make coverage' target to run tests with coverage.py and generate HTML reports. This is a prerequisite for all other test work.","status":"closed","priority":0,"issue_type":"task","created_at":"2026-01-05T14:34:17.89705-05:00","updated_at":"2026-01-05T14:54:39.771242-05:00","closed_at":"2026-01-05T14:54:39.771242-05:00"} +{"id":"contrast-ai-smartfix-action-iuo","title":"Improve Agent Orchestration test coverage (10-27% → 70%)","description":"Add comprehensive tests for smartfix_agent.py, event_loop_utils.py, sub_agent_executor.py, mcp_manager.py. Focus on remediation workflow, async event loops, sub-agent execution, and MCP connection lifecycle. Use full mocking pattern.","status":"closed","priority":1,"issue_type":"feature","created_at":"2026-01-05T14:34:31.000974-05:00","updated_at":"2026-01-05T16:38:48.182515-05:00","closed_at":"2026-01-05T16:38:48.182515-05:00","dependencies":[{"issue_id":"contrast-ai-smartfix-action-iuo","depends_on_id":"contrast-ai-smartfix-action-gnt","type":"blocks","created_at":"2026-01-05T14:34:54.665564-05:00","created_by":"daemon"},{"issue_id":"contrast-ai-smartfix-action-iuo","depends_on_id":"contrast-ai-smartfix-action-0dj","type":"blocks","created_at":"2026-01-05T14:34:54.794003-05:00","created_by":"daemon"}]} +{"id":"contrast-ai-smartfix-action-ix3","title":"Add tests for source-based command validation","description":"Create comprehensive tests for the new source parameter in _validate_command. Test cases: 1) config-sourced commands skip validation (including dangerous patterns), 2) ai_detected commands go through full validation, 3) backward compatibility with default parameter. Add tests to test/test_config_integration.py.","status":"closed","priority":2,"issue_type":"task","created_at":"2026-01-07T15:28:06.081383-05:00","updated_at":"2026-01-08T11:01:55.118122-05:00","closed_at":"2026-01-08T11:00:36.265023-05:00","dependencies":[{"issue_id":"contrast-ai-smartfix-action-ix3","depends_on_id":"contrast-ai-smartfix-action-ue9","type":"blocks","created_at":"2026-01-07T15:31:18.793851-05:00","created_by":"daemon"}]} +{"id":"contrast-ai-smartfix-action-p4l","title":"Implement graceful degradation for formatter failures","description":"Code review identified that formatter.py calls error_exit() on all failures, but the design plan requires graceful degradation (log warning and continue). Formatter should be best-effort - if it fails, log a warning and allow the remediation to continue. This aligns with industry best practices where formatters are optional quality-of-life tools.\n\nAffected file: src/smartfix/domains/workflow/formatter.py:75\nRequired changes:\n1. Replace error_exit() call with warning log\n2. Update tests to verify warnings instead of error_exit calls\n3. Add test for continued execution after formatter failure\n\nReference: Test Coverage Improvement Plan - Build/Test Workflow section","status":"closed","priority":1,"issue_type":"bug","created_at":"2026-01-05T15:46:33.475021-05:00","updated_at":"2026-01-05T16:29:50.210724-05:00","closed_at":"2026-01-05T16:29:50.210728-05:00"} {"id":"contrast-ai-smartfix-action-q9y","title":"Create comprehensive test suite for command validation","description":"Create test/test_command_validation.py with comprehensive test coverage:\n- Test all allowed commands validate successfully (Java, .NET, Python, PHP, NodeJS, build tools)\n- Test blocked executables are rejected with proper error messages\n- Test command chaining with operators (\u0026\u0026, ||, ;, |)\n- Test shell script execution: sh/bash with .sh files allowed, -c flag blocked\n- Test redirect validation: relative paths pass, absolute paths and .. traversal fail\n- Test dangerous pattern detection: command substitution, eval, exec, piping to shell\n- Test edge cases: empty commands, whitespace, special characters, complex chains\n- Test error messages are clear and actionable\n- Ensure all tests pass and maintain existing test coverage (165+ tests)\n\nRelated to JIRA: AIML-337","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-23T09:51:20.598647-05:00","updated_at":"2025-12-23T10:35:10.53909-05:00","closed_at":"2025-12-23T10:35:10.53909-05:00","dependencies":[{"issue_id":"contrast-ai-smartfix-action-q9y","depends_on_id":"contrast-ai-smartfix-action-gad","type":"blocks","created_at":"2025-12-23T09:51:56.723262-05:00","created_by":"daemon"},{"issue_id":"contrast-ai-smartfix-action-q9y","depends_on_id":"contrast-ai-smartfix-action-8re","type":"blocks","created_at":"2025-12-23T09:51:56.843412-05:00","created_by":"daemon"}]} +{"id":"contrast-ai-smartfix-action-t5c","title":"Add sbt to command allowlist","description":"Add 'sbt' (Scala Build Tool) to the ALLOWED_COMMANDS list in src/smartfix/config/command_validator.py line 46, alongside other Java/Scala build tools (mvn, gradle, ant).","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-07T15:18:12.66074-05:00","updated_at":"2026-01-08T13:09:55.918101-05:00","closed_at":"2026-01-08T11:44:58.178248-05:00"} +{"id":"contrast-ai-smartfix-action-ue9","title":"Add source parameter to _validate_command for config vs AI-generated commands","description":"Modify _validate_command() in src/config.py to accept a 'source' parameter ('config' or 'ai_detected'). Config-sourced commands (from action.yml inputs) should skip allowlist validation as they come from trusted humans. AI-generated commands (future feature) should go through full validation. Update Config.__init__ to pass source='config' for BUILD_COMMAND and FORMATTING_COMMAND. Add logging to indicate when validation is skipped. This prepares infrastructure for the AI agent feature coming within next 1-2 weeks.","status":"closed","priority":1,"issue_type":"feature","created_at":"2026-01-07T15:25:48.489324-05:00","updated_at":"2026-01-08T09:59:52.425005-05:00","closed_at":"2026-01-08T09:58:40.224241-05:00"} +{"id":"contrast-ai-smartfix-action-uoy","title":"Test smartfix_agent.py (25% → 70%)","description":"Add comprehensive tests for smartfix_agent.py remediation workflow state machine.\n\nCurrent coverage: 25%\nTarget: 70%\n\nCritical paths:\n- Initial build validation (pass/fail)\n- Fix agent execution (success/failure)\n- QA loop (success/retry/failure)\n- Session completion (all failure categories)\n\nTest cases:\n- Build fails before fix → INITIAL_BUILD_FAILURE\n- Fix agent succeeds, no build command → immediate success\n- Fix agent succeeds, QA passes → success\n- Fix agent succeeds, QA exhausts retries → QA_FAILURE\n- Fix agent throws exception → AGENT_FAILURE\n- Build never passes after fix → BUILD_FAILURE\n\nMocking:\n- _run_ai_fix_agent (return mock PR body or None)\n- _run_ai_qa_agent (return success/failure)\n- run_build_command (return success/failure + output)\n- All git operations\n\nReference: docs/plans/2026-01-05-test-coverage-improvement-design.md lines 78-99","status":"closed","priority":1,"issue_type":"task","created_at":"2026-01-05T16:38:29.814352-05:00","updated_at":"2026-01-06T14:43:29.3548-05:00","closed_at":"2026-01-06T14:43:29.3548-05:00","dependencies":[{"issue_id":"contrast-ai-smartfix-action-uoy","depends_on_id":"contrast-ai-smartfix-action-dtc","type":"blocks","created_at":"2026-01-05T16:38:41.610258-05:00","created_by":"daemon"}]} {"id":"contrast-ai-smartfix-action-wuv","title":"Update documentation for command allowlist","description":"Update README.md and related documentation:\n- Add new section explaining command allowlist security feature\n- Document all allowed commands organized by language/ecosystem\n- Document allowed operators for chaining (\u0026\u0026, ||, ;, |)\n- Document allowed redirect patterns (relative paths only)\n- Provide examples of valid commands for each language\n- Provide examples of blocked commands and why they fail\n- Document error messages users might encounter\n- Add troubleshooting section for common validation failures\n- Update security.md if present to mention this security feature\n\nRelated to JIRA: AIML-337","status":"closed","priority":3,"issue_type":"task","created_at":"2025-12-23T09:52:22.925762-05:00","updated_at":"2025-12-23T10:40:22.502328-05:00","closed_at":"2025-12-23T10:40:22.502328-05:00","dependencies":[{"issue_id":"contrast-ai-smartfix-action-wuv","depends_on_id":"contrast-ai-smartfix-action-q9y","type":"blocks","created_at":"2025-12-23T09:52:26.450395-05:00","created_by":"daemon"}]} +{"id":"contrast-ai-smartfix-action-z4l","title":"Create LLM-as-judge prompt for E2E test repo PR reviews","description":"Create a reusable code review prompt for evaluating SmartFix's cybersecurity fixes in E2E test repos.\n\nPROMPT STRUCTURE:\n1. **Discovery Phase**\n - Fetch open PRs using gh CLI\n - Identify PRs created by SmartFix\n - Extract vulnerability description from PR body\n - Identify vulnerability type (SQL injection, XSS, path traversal, etc.)\n\n2. **Evaluation Criteria (Vulnerability-Agnostic)**\n - Does the fix address the specific vulnerability described in PR body?\n - Are all attack vectors for that vulnerability type blocked?\n - Input validation appropriate for the vulnerability type\n - Output encoding/sanitization where needed\n - No introduction of new vulnerabilities\n - Test coverage for the specific vulnerability\n\n3. **Review Process**\n - Parse PR body to understand the vulnerability\n - Fetch PR diff and files changed\n - Analyze if code changes eliminate the vulnerability\n - Check for incomplete fixes or edge cases\n - **CRITICAL: If any problems are flagged:**\n - Read surrounding code context (not just the diff)\n - Trace the data flow to confirm the vulnerability path\n - Verify the issue truly exists before reporting it\n - Check if existing code already handles the concern\n - Avoid false positives by validating assumptions\n - Verify test cases cover the vulnerability scenario\n - Assess if fix follows security best practices for that vuln type\n\n4. **Report Format**\n - Vulnerability type and description (from PR)\n - Fix effectiveness assessment\n - Does fix address the vulnerability? (YES/NO/PARTIAL)\n - Strengths of the fix\n - Gaps or weaknesses in the fix (ONLY if confirmed by reading surrounding code)\n - Attack vectors still exploitable (if any, with code evidence)\n - Test coverage assessment\n - Recommendations for improvement\n\nLOCATION:\ntest/prompts/smartfix-security-review.md\n\nUSAGE:\nUser runs Claude Code in E2E test repo and provides the prompt, which instructs Claude to review all open SmartFix PRs by matching code changes against the vulnerability described in each PR body.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-23T13:12:19.227567-05:00","updated_at":"2025-12-23T13:52:04.993954-05:00","closed_at":"2025-12-23T13:52:04.993954-05:00"} diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..807d5983 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ + +# Use bd merge for beads JSONL files +.beads/issues.jsonl merge=beads diff --git a/.gitignore b/.gitignore index 8f54d64f..2da8c2a4 100644 --- a/.gitignore +++ b/.gitignore @@ -178,3 +178,10 @@ cython_debug/ # PyPI configuration file .pypirc + +# bv (beads viewer) local config and caches +.bv/ + +# Local development and planning artifacts +docs/plans/ +.claude/learnings/ diff --git a/src/config.py b/src/config.py index a00017a4..0298eb35 100644 --- a/src/config.py +++ b/src/config.py @@ -23,6 +23,8 @@ from pathlib import Path from typing import Optional, Any, Dict, List +from src.smartfix.config.command_validator import validate_command, CommandValidationError + def _log_config_message(message: str, is_error: bool = False, is_warning: bool = False): """A minimal logger for use only within the config module before full logging is set up.""" @@ -81,8 +83,16 @@ def __init__(self, env: Dict[str, str] = os.environ, testing: bool = False): else: self.BUILD_COMMAND = self._get_env_var("BUILD_COMMAND", required=is_build_command_required) + # Validate BUILD_COMMAND if present + if not testing: + self._validate_command("BUILD_COMMAND", self.BUILD_COMMAND) + self.FORMATTING_COMMAND = self._get_env_var("FORMATTING_COMMAND", required=False) + # Validate FORMATTING_COMMAND if present + if not testing: + self._validate_command("FORMATTING_COMMAND", self.FORMATTING_COMMAND) + # --- Validated and normalized settings --- self.MAX_QA_ATTEMPTS = self._get_validated_int("MAX_QA_ATTEMPTS", default=6, min_val=0, max_val=10) self.MAX_OPEN_PRS = self._get_validated_int("MAX_OPEN_PRS", default=5, min_val=0) @@ -175,6 +185,49 @@ def _check_contrast_config_values_exist(self): if not all([self.CONTRAST_HOST, self.CONTRAST_ORG_ID, self.CONTRAST_APP_ID, self.CONTRAST_AUTHORIZATION_KEY, self.CONTRAST_API_KEY]): raise ConfigurationError("Error: Missing one or more Contrast API configuration variables (HOST, ORG_ID, APP_ID, AUTH_KEY, API_KEY).") + def _validate_command(self, var_name: str, command: Optional[str], source: str = "config") -> None: + """ + Validate a command against the allowlist. + + Args: + var_name: Name of the config variable (for error messages) + command: Command string to validate (can be None) + source: Command source, either "config" (from action.yml, trusted) or + "ai_detected" (generated by AI agent, requires validation). + Default: "config" + + Raises: + ConfigurationError: If command fails validation + + Notes: + - "config" source: Commands from action.yml inputs are trusted (from humans) + and skip allowlist validation + - "ai_detected" source: Commands generated by AI agents go through full + allowlist validation for security + """ + if not command: + # Empty or None commands are allowed (handled by required flag in _get_env_var) + return + + # Skip validation for config-sourced commands (from humans via action.yml) + if source == "config": + _log_config_message( + f"{var_name} from action config (trusted source), skipping allowlist validation" + ) + return + + # Validate AI-generated commands through allowlist + try: + validate_command(var_name, command) + except CommandValidationError as e: + # Log the validation failure for debugging + _log_config_message( + f"Command validation failed for {var_name}: {str(e)}", + is_error=True + ) + # Convert CommandValidationError to ConfigurationError + raise ConfigurationError(str(e)) from e + def _get_coding_agent(self) -> str: from src.smartfix.shared.coding_agents import CodingAgents coding_agent = self._get_env_var("CODING_AGENT", required=False, default="SMARTFIX") diff --git a/src/smartfix/config/__init__.py b/src/smartfix/config/__init__.py index e3f12444..772f3ec7 100644 --- a/src/smartfix/config/__init__.py +++ b/src/smartfix/config/__init__.py @@ -13,6 +13,15 @@ - TelemetryConfig: Telemetry and observability configuration """ +from src.smartfix.config.command_validator import ( + validate_command, + CommandValidationError, + ALLOWED_COMMANDS, +) + __all__ = [ # Configuration components will be exported as they are implemented + "validate_command", + "CommandValidationError", + "ALLOWED_COMMANDS", ] diff --git a/src/smartfix/config/command_validator.py b/src/smartfix/config/command_validator.py new file mode 100644 index 00000000..0ef580de --- /dev/null +++ b/src/smartfix/config/command_validator.py @@ -0,0 +1,473 @@ +# - +# #%L +# Contrast AI SmartFix +# %% +# Copyright (C) 2025 Contrast Security, Inc. +# %% +# Contact: support@contrastsecurity.com +# License: Commercial +# NOTICE: This Software and the patented inventions embodied within may only be +# used as part of Contrast Security's commercial offerings. Even though it is +# made available through public repositories, use of this Software is subject to +# the applicable End User Licensing Agreement found at +# https://www.contrastsecurity.com/enduser-terms-0317a or as otherwise agreed +# between Contrast Security and the End User. The Software may not be reverse +# engineered, modified, repackaged, sold, redistributed or otherwise used in a +# way not consistent with the End User License Agreement. +# #L% +# + +""" +Command Validation Module + +Validates build and format commands against an allowlist to prevent +arbitrary command execution in GitHub Actions workflows. +""" + +import re +import shlex +from typing import List, Tuple, Optional + + +class CommandValidationError(Exception): + """Raised when a command fails allowlist validation.""" + pass + + +# Allowed executables for build and format commands +ALLOWED_COMMANDS: List[str] = [ + # .NET + 'dotnet', 'msbuild', 'nuget', + 'nunit-console', 'nunit3-console', 'xunit.console', + 'vstest.console', 'mstest', + 'csharpier', + + # Java / Scala + 'mvn', 'gradle', 'ant', 'sbt', 'junit', 'testng', + './gradlew', './mvnw', 'gradlew', 'mvnw', # Wrapper scripts + 'google-java-format', 'checkstyle', + + # Python + 'pip', 'pip3', 'python', 'python3', + 'pytest', 'nose2', 'unittest', 'coverage', + 'poetry', 'pipenv', 'uv', 'tox', 'virtualenv', + 'black', 'autopep8', 'yapf', 'isort', 'ruff', + 'flake8', 'pylint', + + # Node.js / JavaScript / TypeScript + 'npm', 'npx', 'yarn', 'node', 'pnpm', 'bun', + 'jest', 'mocha', 'jasmine', 'karma', 'ava', 'vitest', 'nyc', + 'prettier', 'eslint', 'standard', + + # PHP + 'composer', 'php', 'phpunit', 'pest', 'codeception', + 'php-cs-fixer', 'phpcbf', + + # Multi-language formatters + 'clang-format', # Used for Java, JavaScript, Protobuf, etc. + + # Build tools + 'make', 'cmake', 'ninja', 'bazel', 'ctest', + + # Shell utilities + 'echo', 'sh', 'bash', 'grep', 'sed', 'awk', 'cat', 'tee' +] + +# Allowed operators for chaining commands +ALLOWED_OPERATORS = ['&&', '||', ';', '|'] + +# Maximum command length and complexity +MAX_COMMAND_LENGTH = 10000 # characters +MAX_SEGMENTS = 50 # maximum number of chained commands + +# Dangerous patterns to block (raw strings for regex compilation) +_BLOCKED_PATTERN_STRINGS = [ + r'\$\(', # Command substitution $(...) + r'`', # Backtick command substitution + r'\$\{', # Variable expansion ${...} + r'\beval\s', # eval command + r'\bexec\s', # exec command + r'\brm\s+-rf', # Dangerous rm + r'\bcurl.*\|', # curl piped to interpreter + r'\bwget.*\|', # wget piped to interpreter + r'>\s*/dev/', # Writing to devices + r';\s*rm\b', # rm after command separator + r'\|\s*sh\b', # Piping to shell + r'\|\s*bash\b', # Piping to bash + r'(?)&(?!&)(?![0-9])', # Background execution (&) but not &&, >&1, >&2 + r'<<\(', # Process substitution output +] + +# Pre-compile patterns for performance (65% speedup) +BLOCKED_PATTERNS = [re.compile(pattern) for pattern in _BLOCKED_PATTERN_STRINGS] + +# Dangerous interpreter flags that allow arbitrary code execution +DANGEROUS_INTERPRETER_FLAGS = { + 'node': ['-e', '--eval'], # node -e "code" + 'python': ['-c'], # python -c "code" + 'python3': ['-c'], # python3 -c "code" + 'ruby': ['-e'], # ruby -e "code" + 'perl': ['-e'], # perl -e "code" +} + +# Allowed Python -m modules (for safe subprocess execution) +ALLOWED_PYTHON_MODULES = [ + 'pytest', 'unittest', 'coverage', 'pip', 'venv', 'virtualenv', + 'black', 'autopep8', 'yapf', 'isort', 'ruff', 'flake8', 'pylint', + 'mypy', 'tox', 'nose2', 'poetry', 'pipenv', +] + + +def find_dangerous_pattern(command: str) -> Optional[str]: + """ + Find first dangerous pattern in command. + + Args: + command: Command string to check + + Returns: + The matched pattern string if found, None if safe + """ + for i, pattern in enumerate(BLOCKED_PATTERNS): + if pattern.search(command): + return _BLOCKED_PATTERN_STRINGS[i] # Return the original pattern string + return None + + +def validate_interpreter_flags(executable: str, args: List[str]) -> bool: + """ + Validates that interpreter commands don't use dangerous flags. + + Args: + executable: The executable name + args: List of arguments + + Returns: + True if valid, False if uses dangerous flags + """ + if executable not in DANGEROUS_INTERPRETER_FLAGS: + return True # Not a dangerous interpreter + + dangerous_flags = DANGEROUS_INTERPRETER_FLAGS[executable] + for flag in dangerous_flags: + if flag in args: + return False + + return True + + +def validate_python_module(args: List[str]) -> bool: + """ + Validates Python -m flag usage is restricted to allowed modules. + + Args: + args: List of arguments to python/python3 + + Returns: + True if valid, False if uses disallowed -m module + """ + # Check if -m flag is present + if '-m' not in args: + return True # No -m flag, no restriction needed + + # Find the module name after -m + try: + m_index = args.index('-m') + if m_index + 1 < len(args): + module_name = args[m_index + 1] + # Check if module is in allowlist + return module_name in ALLOWED_PYTHON_MODULES + except (ValueError, IndexError): + pass + + return False # -m flag present but no valid module + + +def validate_redirect(redirect_path: str) -> bool: + """ + Validates file redirects are safe. + + Args: + redirect_path: Path in redirect (e.g., "build.log" from "> build.log") + + Returns: + True if safe, False if dangerous + """ + # Block absolute paths + if redirect_path.startswith('/'): + return False + + # Block parent directory traversal + if '..' in redirect_path: + return False + + # Block home directory expansion + if redirect_path.startswith('~'): + return False + + return True + + +def extract_redirects(segment: str) -> List[str]: + """ + Extract file redirect paths from a command segment. + + Args: + segment: Command segment to analyze + + Returns: + List of redirect file paths found + """ + redirects = [] + + # Match patterns like: > file, >> file, 2> file, 2>> file, 3> file, 4> file, etc. + # Also match &> file and >&2, >&1 patterns + redirect_patterns = [ + r'(\d*)>\s*([^\s&|;]+)', # [n]> file (including >, 2>, 3>, 4>) + r'(\d*)>>\s*([^\s&|;]+)', # [n]>> file (including >>, 2>>, 3>>) + ] + + for pattern in redirect_patterns: + matches = re.findall(pattern, segment) + for match in matches: + # match is a tuple: (fd_number, redirect_path) + redirect_path = match[1] if len(match) > 1 else match[0] + # Skip special redirects like 2>&1, >&2 + if not redirect_path.startswith('&'): + redirects.append(redirect_path) + + return redirects + + +def validate_shell_command(executable: str, args: List[str]) -> bool: + """ + Validates shell commands (sh/bash). + Only allows: sh ./script.sh or bash ./script.sh + Blocks: sh -c "command" or bash -c "..." + + Args: + executable: The executable name + args: List of arguments + + Returns: + True if valid, False if invalid + """ + if executable not in ['sh', 'bash']: + return True # Not a shell command + + # Must have at least one argument + if not args: + return False + + # Block inline execution: -c flag + if '-c' in args: + return False + + # First non-flag argument must be a .sh file + script_path = next((arg for arg in args if not arg.startswith('-')), None) + if not script_path: + return False + + return script_path.endswith('.sh') + + +def parse_command_segment(segment: str) -> Tuple[str, List[str]]: + """ + Parse command segment into executable and arguments. + + Args: + segment: Command segment string + + Returns: + Tuple of (executable, arguments list) + """ + # Strip whitespace and handle empty segments + segment = segment.strip() + if not segment: + return '', [] + + # Remove redirects for parsing (they're validated separately) + # This handles cases like "npm test > output.txt" + segment_for_parsing = re.sub(r'\d*>\d*\s*[^\s&|;]+', '', segment).strip() + + try: + # Use shlex to properly handle quoted strings and arguments + parts = shlex.split(segment_for_parsing) + except ValueError: + # If shlex fails, fall back to simple split + parts = segment_for_parsing.split() + + if not parts: + return '', [] + + executable = parts[0] + args = parts[1:] if len(parts) > 1 else [] + + return executable, args + + +def split_command_chain(command: str) -> List[Tuple[str, str]]: + """ + Split command by operators, return list of (command, operator) tuples. + + Args: + command: Full command string with potential operators + + Returns: + List of (command_segment, operator) tuples. + Last tuple has empty string for operator. + + Example: + "npm install && npm test" -> [("npm install", "&&"), ("npm test", "")] + """ + # Pattern to match operators while preserving them + operator_pattern = '(' + '|'.join(re.escape(op) for op in ALLOWED_OPERATORS) + ')' + + # Split by operators + parts = re.split(operator_pattern, command) + + # Group into (command, operator) pairs + segments = [] + i = 0 + while i < len(parts): + cmd = parts[i].strip() + if i + 1 < len(parts) and parts[i + 1] in ALLOWED_OPERATORS: + operator = parts[i + 1] + i += 2 + else: + operator = '' + i += 1 + + if cmd: # Skip empty segments + segments.append((cmd, operator)) + + return segments + + +def validate_command(var_name: str, command: str) -> None: # noqa: C901 + """ + Validates a command against the allowlist. + + Args: + var_name: Name of the config variable (for error messages) + command: Command string to validate + + Raises: + CommandValidationError: If command fails validation + + Note: + Complexity is necessary for comprehensive security validation. + """ + # Check for empty or whitespace-only commands + if not command or not command.strip(): + raise CommandValidationError( + f"Error: {var_name} is empty or contains only whitespace.\n" + f"Please provide a valid build or format command." + ) + + # Check command length limit + if len(command) > MAX_COMMAND_LENGTH: + raise CommandValidationError( + f"Error: {var_name} exceeds maximum length of {MAX_COMMAND_LENGTH} characters.\n" + f"Command length: {len(command)}\n" + f"Please simplify your command or split into multiple steps." + ) + + # Block raw newline characters (before handling line continuations) + if '\n' in command or '\r' in command: + # Check if they're escaped (backslash-newline is OK) + unescaped_newlines = re.findall(r'(? MAX_SEGMENTS: + raise CommandValidationError( + f"Error: {var_name} exceeds maximum complexity of {MAX_SEGMENTS} chained commands.\n" + f"Command has {len(segments)} segments.\n" + f"Please simplify your command or split into multiple steps." + ) + + for segment, operator in segments: + # Validate operator (if present) + if operator and operator not in ALLOWED_OPERATORS: + raise CommandValidationError( + f"Error: {var_name} uses disallowed operator: {operator}\n" + f"Allowed operators: {', '.join(ALLOWED_OPERATORS)}\n" + f"Command: {command}" + ) + + # Parse command into executable and arguments + executable, args = parse_command_segment(segment) + + if not executable: + continue # Skip empty segments + + # Validate executable is in allowlist + if executable not in ALLOWED_COMMANDS: + raise CommandValidationError( + f"Error: {var_name} uses disallowed command: {executable}\n" + f"Command: {command}\n" + f"See documentation for allowed build and format commands." + ) + + # Special validation for shell commands + if not validate_shell_command(executable, args): + raise CommandValidationError( + f"Error: {var_name} uses shell command incorrectly: {segment}\n" + f"Shell commands (sh/bash) can only execute .sh files.\n" + f"Blocked: sh -c, bash -c\n" + f"Allowed: sh ./build.sh" + ) + + # Validate interpreter flags (node -e, python -c, etc.) + if not validate_interpreter_flags(executable, args): + flags = DANGEROUS_INTERPRETER_FLAGS.get(executable, []) + raise CommandValidationError( + f"Error: {var_name} uses dangerous interpreter flag: {executable} with {flags}\n" + f"Blocked flags allow arbitrary code execution.\n" + f"Command segment: {segment}\n" + f"Use script files instead of inline code execution." + ) + + # Validate Python -m flag usage + if executable in ['python', 'python3']: + if not validate_python_module(args): + raise CommandValidationError( + f"Error: {var_name} uses disallowed Python module with -m flag.\n" + f"Command segment: {segment}\n" + f"Allowed modules: {', '.join(ALLOWED_PYTHON_MODULES)}\n" + f"For other modules, execute them directly if they provide CLI tools." + ) + + # Validate redirects if present + redirects = extract_redirects(segment) + for redirect_path in redirects: + if not validate_redirect(redirect_path): + raise CommandValidationError( + f"Error: {var_name} contains unsafe file redirect: {redirect_path}\n" + f"Redirects must be to relative paths without '..' traversal.\n" + f"Command: {command}" + ) diff --git a/src/smartfix/domains/workflow/formatter.py b/src/smartfix/domains/workflow/formatter.py index 3e6fc4fc..eb412227 100644 --- a/src/smartfix/domains/workflow/formatter.py +++ b/src/smartfix/domains/workflow/formatter.py @@ -47,14 +47,16 @@ def run_formatting_command(formatting_command: Optional[str], repo_root: Path, r return changed_files log(f"\n--- Running Formatting Command: {formatting_command} ---") - # Modified to match run_command signature which returns only stdout + # Use shell=True to preserve shell operators like &&, ||, ; etc. + # The command is validated by command_validator.py before reaching here. current_dir = os.getcwd() try: os.chdir(str(repo_root)) # Change to repo root directory try: format_output = run_command( - formatting_command.split(), # Split string into list for Popen - check=False # Don't exit on failure, we'll check status + formatting_command, # Pass as string for shell=True + check=False, # Don't exit on failure, we'll check status + shell=True # Enable shell operators (&&, ||, |, ;) ) format_success = True # If no exception was raised, consider it successful except Exception as e: diff --git a/src/utils.py b/src/utils.py index 9cb64335..d9845a12 100644 --- a/src/utils.py +++ b/src/utils.py @@ -160,41 +160,53 @@ def __init__(self, message, return_code, command, stdout=None, stderr=None): self.stderr = stderr -def run_command(command, env=None, check=True): +def run_command(command, env=None, check=True, shell=False): # noqa: C901 """ Runs a shell command and returns its stdout. Prints command, stdout/stderr based on DEBUG_MODE. Exits on error if check=True. Args: - command: List of command and arguments to run + command: List of command and arguments to run, or string if shell=True env: Optional environment variables dictionary check: Whether to exit on command failure + shell: Whether to run the command through the shell (for operators like &&, ||, etc.) Returns: str: Command stdout output Raises: SystemExit: If check=True and command fails + + Note: + Complexity is necessary for proper command execution and error handling. """ try: # Show command and options for better debugging - options_text = f"Options: check={check}" + options_text = f"Options: check={check}, shell={shell}" if env and env.get('GITHUB_TOKEN'): # Don't print the actual token options_text += ", GITHUB_TOKEN=***" # Mask GITHUB_TOKEN value in command parts config = get_config() - masked_command = [] - for part in command: - # Note: the gh envvars "GITHUB_ENTERPRISE_TOKEN" and "GITHUB_TOKEN" have the same value as config.GITHUB_TOKEN - if config.GITHUB_TOKEN and config.GITHUB_TOKEN in part: - masked_command.append(part.replace(config.GITHUB_TOKEN, "***")) - else: - masked_command.append(part) + if shell: + # For shell commands, mask token in the string + masked_command = command + if config.GITHUB_TOKEN and config.GITHUB_TOKEN in command: + masked_command = command.replace(config.GITHUB_TOKEN, "***") + debug_log(f"::group::Running command: {masked_command}") + else: + # For list commands, mask token in each part + masked_command = [] + for part in command: + # Note: the gh envvars "GITHUB_ENTERPRISE_TOKEN" and "GITHUB_TOKEN" have the same value as config.GITHUB_TOKEN + if config.GITHUB_TOKEN and config.GITHUB_TOKEN in part: + masked_command.append(part.replace(config.GITHUB_TOKEN, "***")) + else: + masked_command.append(part) + debug_log(f"::group::Running command: {' '.join(masked_command)}") - debug_log(f"::group::Running command: {' '.join(masked_command)}") debug_log(f" {options_text}") # Merge with current environment to preserve essential variables like PATH @@ -210,7 +222,8 @@ def run_command(command, env=None, check=True): encoding='utf-8', errors='replace', check=False, # We'll handle errors ourselves - env=full_env + env=full_env, + shell=shell ) debug_log(f" Return Code: {process.returncode}") @@ -246,14 +259,15 @@ def run_command(command, env=None, check=True): debug_log(f" Command stderr:\n---\n{stderr_text}\n---") if check and process.returncode != 0: - error_message_for_log = f"Error: Command failed with return code {process.returncode}: {' '.join(command)}" + command_str = command if shell else ' '.join(command) + error_message_for_log = f"Error: Command failed with return code {process.returncode}: {command_str}" log(error_message_for_log, is_error=True) error_details = process.stderr.strip() if process.stderr else "No error output available" log(f"Error details: {error_details}", is_error=True) raise CommandExecutionError( - message=f"Command '{' '.join(command)}' failed with return code {process.returncode}.", + message=f"Command '{command_str}' failed with return code {process.returncode}.", return_code=process.returncode, - command=' '.join(command), + command=command_str, stdout=process.stdout.strip() if process.stdout else None, stderr=error_details ) diff --git a/test/prompts/README.md b/test/prompts/README.md new file mode 100644 index 00000000..62939c94 --- /dev/null +++ b/test/prompts/README.md @@ -0,0 +1,63 @@ +# SmartFix Review Prompts + +This directory contains reusable prompts for evaluating SmartFix's automated security fixes. + +## Available Prompts + +### smartfix-pr-review.md + +**Purpose:** Systematically review open PRs in E2E test repositories to evaluate the quality and effectiveness of SmartFix's cybersecurity fixes. + +**When to Use:** +- After SmartFix has created PRs in your E2E test repositories +- To validate that security vulnerabilities are properly fixed +- To assess SmartFix's fix quality across multiple vulnerability types +- Before merging SmartFix PRs into your test baseline + +**How to Use:** + +1. Navigate to an E2E test repository (Java, Python, .NET, Node.js, etc.) +2. Run Claude Code with the prompt: + ```bash + claude "Please follow the review instructions in test/prompts/smartfix-pr-review.md from the contrast-ai-smartfix-action repo" + ``` + + Or if you've copied the prompt locally: + ```bash + claude "$(cat path/to/smartfix-pr-review.md)" + ``` + +3. Claude will: + - Discover all open SmartFix PRs + - Extract vulnerability descriptions from PR bodies + - Analyze code changes and verify fixes + - Read surrounding code to confirm findings + - Generate detailed security assessment reports + +**What You'll Get:** +- Per-PR security fix effectiveness scores (0-100) +- Analysis of strengths and weaknesses +- Test coverage assessment +- Specific recommendations for improvements +- Summary statistics across all PRs +- Pattern analysis of SmartFix's fix quality + +**Example Output Structure:** +``` +PR #123: Fix SQL Injection in User Login +- Fix Status: YES +- Confidence: HIGH +- Score: 95/100 +- Strengths: Proper input validation, parameterized queries +- Test Coverage: Comprehensive +- Recommendations: Add edge case for Unicode characters +``` + +## Contributing New Prompts + +When adding new review prompts: +1. Name files descriptively: `-review.md` +2. Include clear step-by-step instructions +3. Emphasize verification and avoiding false positives +4. Provide structured output format examples +5. Update this README with usage instructions diff --git a/test/prompts/smartfix-pr-review.md b/test/prompts/smartfix-pr-review.md new file mode 100644 index 00000000..79e81959 --- /dev/null +++ b/test/prompts/smartfix-pr-review.md @@ -0,0 +1,282 @@ +# SmartFix Security Fix Review Prompt + +You are an expert security code reviewer evaluating PRs created by Contrast's SmartFix AI agent in end-to-end test repositories. Your goal is to assess whether SmartFix's code changes effectively fix the security vulnerabilities described in each PR. + +## Your Mission + +For each open PR created by SmartFix: +1. Extract the vulnerability description from the PR body +2. Analyze the code changes in the PR diff +3. Verify the fix eliminates the specific vulnerability +4. Confirm findings by reading surrounding code context +5. Provide a detailed security assessment + +## Step-by-Step Review Process + +### Step 1: Discover Open SmartFix PRs + +```bash +gh pr list --state open --json number,title,author,body,url +``` + +Filter for PRs created by SmartFix or automated agents. For each PR, proceed to Step 2. + +### Step 2: Extract Vulnerability Information + +Read the PR body carefully and extract: +- **Vulnerability Type**: SQL injection, XSS, path traversal, command injection, etc. +- **Vulnerability Description**: How the vulnerability manifests +- **Attack Vector**: How an attacker could exploit it +- **Affected Code**: Which files/functions are vulnerable +- **Severity**: Critical, High, Medium, Low + +### Step 3: Fetch and Analyze the PR Diff + +```bash +gh pr diff +``` + +Analyze the code changes: +- What files were modified? +- What specific lines changed? +- What was added, removed, or modified? + +### Step 4: Evaluate Fix Effectiveness + +Ask these critical questions: + +**Primary Question:** +- Does this code change eliminate the vulnerability described in the PR body? + +**Security Validation:** +- Are all attack vectors for this vulnerability type blocked? +- Is input validation appropriate for the vulnerability type? +- Is output encoding/sanitization applied where needed? +- Are edge cases handled (empty input, special characters, encoding bypasses)? +- Could an attacker bypass this fix with a modified payload? + +**Code Quality:** +- Does the fix introduce new vulnerabilities? +- Is the fix minimal and focused (not over-engineering)? +- Does the fix follow security best practices for this vulnerability type? + +### Step 5: Verify Findings with Context (CRITICAL) + +**BEFORE flagging ANY problem, you MUST:** + +1. **Read surrounding code context** - Not just the diff lines + ```bash + # Read the full file to understand context + gh pr view --json files -q '.files[].path' | while read file; do + echo "=== $file ===" + cat "$file" + done + ``` + +2. **Trace the data flow** + - Where does user input come from? + - How is it processed before reaching the vulnerable code? + - Are there validation layers already in place? + - What happens to the data after the fix? + +3. **Confirm the vulnerability path exists** + - Does the attack vector described in the PR actually exist? + - Is there existing code that already mitigates this? + - Could the reviewer be missing defensive layers? + +4. **Check existing tests** + ```bash + # Look for test files related to the fix + find . -name "*test*" -o -name "*spec*" | xargs grep -l "" + ``` + +5. **Validate assumptions** + - Is this a real security issue or a false positive? + - Does the framework/library already handle this? + - Are there other callers of this function that are safe? + +**DO NOT report issues based solely on diff inspection. Always verify with full code context.** + +### Step 6: Assess Test Coverage + +```bash +# Find test files in the PR +gh pr view --json files -q '.files[] | select(.path | contains("test") or contains("spec")) | .path' +``` + +Evaluate test coverage: +- Are there tests specifically for this vulnerability? +- Do tests cover the attack vector described in the PR? +- Do tests include edge cases (boundary conditions, encoding variations)? +- Are negative tests included (verify exploit attempts fail)? + +**Good test patterns:** +- Tests that attempt the exact attack described in PR body +- Tests with malicious payloads relevant to the vulnerability type +- Tests that verify error handling for invalid/dangerous input +- Integration tests showing end-to-end protection + +**Insufficient test patterns:** +- Only happy-path tests +- Tests that don't exercise the vulnerability code path +- Missing tests for edge cases mentioned in the PR + +### Step 7: Generate Security Assessment Report + +For each PR reviewed, provide this structured report: + +--- + +## PR #: + +**PR URL:** + +### Vulnerability Summary +- **Type:** +- **Severity:** +- **Description:** +- **Attack Vector:** + +### Fix Effectiveness Assessment + +**Fix Status:** [YES / PARTIAL / NO] +- **YES** = Fix completely eliminates the vulnerability +- **PARTIAL** = Fix addresses some but not all attack vectors +- **NO** = Fix does not address the vulnerability + +**Confidence Level:** [HIGH / MEDIUM / LOW] + +### Code Analysis + +**Files Changed:** +- `path/to/file1.ext` - +- `path/to/file2.ext` - + +**What the Fix Does:** + + +**How It Blocks the Attack:** + + +### Strengths of the Fix + +✅ +✅ +✅ + +### Weaknesses or Gaps (ONLY if confirmed by code inspection) + +⚠️ +⚠️ + +**Note:** Only include weaknesses that you have confirmed by: +1. Reading the full affected files (not just diff) +2. Tracing the data flow to confirm the issue path +3. Verifying the issue isn't already handled elsewhere + +### Attack Vectors Still Exploitable (if any) + +❌ + - **Code Location:** `file:line` + - **Payload Example:** `` + - **Why Still Vulnerable:** + +### Test Coverage Assessment + +**Tests Included:** [YES / NO / PARTIAL] + +**Test Quality:** +- Covers the vulnerability: [YES / NO] +- Includes attack payloads: [YES / NO] +- Tests edge cases: [YES / NO] +- Has negative tests: [YES / NO] + +**Test Files Reviewed:** +- `path/to/test1` - +- `path/to/test2` - + +**Gaps in Test Coverage:** + + +### Recommendations + +1. **Critical:** +2. **Important:** +3. **Enhancement:** + +### Overall Score + +**Security Fix Effectiveness: X/100** + +Scoring rubric: +- 90-100: Excellent - Comprehensive fix with no gaps +- 70-89: Good - Solid fix with minor improvements possible +- 50-69: Adequate - Fix works but has notable gaps +- 30-49: Weak - Partial fix, significant vulnerabilities remain +- 0-29: Ineffective - Does not address the vulnerability + +**Scoring Breakdown:** +- Vulnerability eliminated: X/40 +- Attack vectors blocked: X/20 +- Code quality: X/15 +- Test coverage: X/15 +- Edge cases handled: X/10 + +--- + +## Summary Across All PRs + +After reviewing all open SmartFix PRs, provide: + +### Overall Statistics +- **Total PRs Reviewed:** X +- **Effective Fixes (90-100):** X +- **Good Fixes (70-89):** X +- **Adequate Fixes (50-69):** X +- **Weak/Ineffective (0-49):** X + +### Pattern Analysis +**Common Strengths:** +- +- + +**Common Weaknesses:** +- +- + +**Vulnerability Types Handled Well:** +- : + +**Vulnerability Types Needing Improvement:** +- : + +### High-Priority Action Items + +1. **PR #X:** +2. **PR #Y:** + +--- + +## Important Reminders + +**Verification is Mandatory:** +- Read full files, not just diffs +- Trace data flow to confirm vulnerability paths +- Check for existing defensive layers +- Validate all assumptions with code evidence +- Avoid false positives by thorough investigation + +**Be Thorough but Fair:** +- SmartFix is an AI agent - evaluate objectively +- Credit good security practices +- Only flag real issues backed by evidence +- Acknowledge when fixes are comprehensive +- Provide constructive, actionable feedback + +**Focus on Security:** +- Primary question: "Does this fix the vulnerability?" +- Secondary: "Could an attacker bypass this?" +- Consider the vulnerability type's specific attack vectors +- Think like an attacker trying to break the fix + +Begin your review now. Good hunting! 🔍🛡️ diff --git a/test/test_command_validation.py b/test/test_command_validation.py new file mode 100644 index 00000000..c8a98f7b --- /dev/null +++ b/test/test_command_validation.py @@ -0,0 +1,323 @@ +# - +# #%L +# Contrast AI SmartFix +# %% +# Copyright (C) 2025 Contrast Security, Inc. +# %% +# Contact: support@contrastsecurity.com +# License: Commercial +# #L% +# + +""" +Tests for command validation module. +""" + +import unittest +from src.smartfix.config.command_validator import ( + validate_command, + CommandValidationError, +) + + +class TestAllowedCommands(unittest.TestCase): + """Test that all allowed commands validate successfully.""" + + def test_java_commands(self): + """Test Java ecosystem commands.""" + valid_commands = [ + "mvn clean install", + "gradle build", + "./gradlew test", + "ant compile", + ] + for cmd in valid_commands: + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_dotnet_commands(self): + """Test .NET ecosystem commands.""" + valid_commands = [ + "dotnet build", + "msbuild MyProject.sln", + "dotnet test", + ] + for cmd in valid_commands: + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_python_commands(self): + """Test Python ecosystem commands.""" + valid_commands = [ + "pytest tests/", + "python -m pytest", + "black .", + "pip install -r requirements.txt", + ] + for cmd in valid_commands: + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_nodejs_commands(self): + """Test Node.js ecosystem commands.""" + valid_commands = [ + "npm install", + "npm test", + "yarn build", + "prettier --write .", + ] + for cmd in valid_commands: + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_php_commands(self): + """Test PHP ecosystem commands.""" + valid_commands = [ + "composer install", + "phpunit tests/", + "php-cs-fixer fix", + ] + for cmd in valid_commands: + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_build_tools(self): + """Test general build tools.""" + valid_commands = [ + "make all", + "cmake .", + "echo 'Building...'", + ] + for cmd in valid_commands: + validate_command("BUILD_COMMAND", cmd) # Should not raise + + +class TestCommandChaining(unittest.TestCase): + """Test commands with operators.""" + + def test_and_operator(self): + """Test && operator for sequential execution.""" + cmd = "npm install && npm test" + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_or_operator(self): + """Test || operator for fallback.""" + cmd = "npm test || echo 'Tests failed'" + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_semicolon_operator(self): + """Test ; operator for sequential execution.""" + cmd = "npm install ; npm test" + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_pipe_operator(self): + """Test | operator for piping.""" + cmd = "npm test | grep 'passing'" + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_complex_chain(self): + """Test complex command chain.""" + cmd = "npm install && npm test || echo 'Build failed'" + validate_command("BUILD_COMMAND", cmd) # Should not raise + + +class TestShellScriptValidation(unittest.TestCase): + """Test shell script execution validation.""" + + def test_sh_with_script_file(self): + """Test sh executing .sh file is allowed.""" + cmd = "sh ./build.sh" + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_bash_with_script_file(self): + """Test bash executing .sh file is allowed.""" + cmd = "bash ./scripts/test.sh" + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_sh_with_c_flag_blocked(self): + """Test sh -c is blocked.""" + cmd = "sh -c 'npm install'" + with self.assertRaisesRegex(CommandValidationError, "shell command incorrectly"): + validate_command("BUILD_COMMAND", cmd) + + def test_bash_with_c_flag_blocked(self): + """Test bash -c is blocked.""" + cmd = "bash -c 'make build'" + with self.assertRaisesRegex(CommandValidationError, "shell command incorrectly"): + validate_command("BUILD_COMMAND", cmd) + + def test_sh_without_sh_extension_blocked(self): + """Test sh with non-.sh file is blocked.""" + cmd = "sh ./build" + with self.assertRaisesRegex(CommandValidationError, "shell command incorrectly"): + validate_command("BUILD_COMMAND", cmd) + + +class TestRedirectValidation(unittest.TestCase): + """Test file redirect validation.""" + + def test_relative_redirect_allowed(self): + """Test redirect to relative path is allowed.""" + cmd = "npm test > build.log" + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_append_redirect_allowed(self): + """Test append redirect is allowed.""" + cmd = "npm test >> output.txt" + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_stderr_redirect_allowed(self): + """Test stderr redirect is allowed.""" + cmd = "npm test 2> error.log" + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_combined_redirect_allowed(self): + """Test combined stdout/stderr redirect is allowed.""" + cmd = "npm test > output.txt 2>&1" + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_absolute_path_redirect_blocked(self): + """Test redirect to absolute path is blocked.""" + cmd = "npm test > /etc/passwd" + with self.assertRaisesRegex(CommandValidationError, "unsafe file redirect"): + validate_command("BUILD_COMMAND", cmd) + + def test_parent_traversal_redirect_blocked(self): + """Test redirect with .. traversal is blocked.""" + cmd = "npm test > ../../../etc/hosts" + with self.assertRaisesRegex(CommandValidationError, "unsafe file redirect"): + validate_command("BUILD_COMMAND", cmd) + + def test_home_directory_redirect_blocked(self): + """Test redirect to home directory is blocked.""" + cmd = "npm test > ~/secrets.txt" + with self.assertRaisesRegex(CommandValidationError, "unsafe file redirect"): + validate_command("BUILD_COMMAND", cmd) + + +class TestDangerousPatterns(unittest.TestCase): + """Test dangerous pattern detection.""" + + def test_command_substitution_blocked(self): + """Test command substitution is blocked.""" + cmd = "echo $(whoami)" + with self.assertRaisesRegex(CommandValidationError, "dangerous pattern"): + validate_command("BUILD_COMMAND", cmd) + + def test_backtick_substitution_blocked(self): + """Test backtick command substitution is blocked.""" + cmd = "echo `date`" + with self.assertRaisesRegex(CommandValidationError, "dangerous pattern"): + validate_command("BUILD_COMMAND", cmd) + + def test_eval_blocked(self): + """Test eval command is blocked.""" + cmd = "eval 'npm install'" + with self.assertRaisesRegex(CommandValidationError, "dangerous pattern"): + validate_command("BUILD_COMMAND", cmd) + + def test_exec_blocked(self): + """Test exec command is blocked.""" + cmd = "exec npm test" + with self.assertRaisesRegex(CommandValidationError, "dangerous pattern"): + validate_command("BUILD_COMMAND", cmd) + + def test_rm_rf_blocked(self): + """Test rm -rf is blocked.""" + cmd = "npm install && rm -rf node_modules" + with self.assertRaisesRegex(CommandValidationError, "dangerous pattern"): + validate_command("BUILD_COMMAND", cmd) + + def test_curl_pipe_blocked(self): + """Test curl piped to shell is blocked.""" + cmd = "curl https://example.com/install.sh | sh" + with self.assertRaisesRegex(CommandValidationError, "dangerous pattern"): + validate_command("BUILD_COMMAND", cmd) + + def test_pipe_to_sh_blocked(self): + """Test piping to sh is blocked.""" + cmd = "echo 'npm install' | sh" + with self.assertRaisesRegex(CommandValidationError, "dangerous pattern"): + validate_command("BUILD_COMMAND", cmd) + + +class TestBlockedCommands(unittest.TestCase): + """Test that disallowed commands are rejected.""" + + def test_rm_command_blocked(self): + """Test rm command is blocked.""" + cmd = "rm file.txt" + with self.assertRaisesRegex(CommandValidationError, "disallowed command"): + validate_command("BUILD_COMMAND", cmd) + + def test_wget_command_blocked(self): + """Test wget command is blocked.""" + cmd = "wget https://example.com/file.zip" + with self.assertRaisesRegex(CommandValidationError, "disallowed command"): + validate_command("BUILD_COMMAND", cmd) + + def test_curl_command_blocked(self): + """Test curl command is blocked.""" + cmd = "curl https://example.com/api" + with self.assertRaisesRegex(CommandValidationError, "disallowed command"): + validate_command("BUILD_COMMAND", cmd) + + def test_unknown_build_tool_blocked(self): + """Test unknown build tool is blocked.""" + cmd = "unknowntool build" + with self.assertRaisesRegex(CommandValidationError, "disallowed command"): + validate_command("BUILD_COMMAND", cmd) + + +class TestEdgeCases(unittest.TestCase): + """Test edge cases and special scenarios.""" + + def test_empty_command_rejected(self): + """Test empty command is rejected.""" + with self.assertRaises(CommandValidationError): + validate_command("BUILD_COMMAND", "") + + def test_whitespace_only_command_rejected(self): + """Test whitespace-only command is rejected.""" + with self.assertRaises(CommandValidationError): + validate_command("BUILD_COMMAND", " ") + + def test_command_with_extra_whitespace(self): + """Test command with extra whitespace is handled.""" + cmd = "npm install && npm test" + validate_command("BUILD_COMMAND", cmd) # Should not raise + + def test_command_with_newlines(self): + """Test command with newlines in chain.""" + cmd = "npm install && \\\nnpm test" + validate_command("BUILD_COMMAND", cmd) # Should not raise + + +class TestErrorMessages(unittest.TestCase): + """Test that error messages are clear and actionable.""" + + def test_disallowed_command_error_message(self): + """Test error message for disallowed command.""" + cmd = "wget https://example.com/file" + with self.assertRaisesRegex( + CommandValidationError, + "BUILD_COMMAND uses disallowed command: wget" + ): + validate_command("BUILD_COMMAND", cmd) + + def test_dangerous_pattern_error_message(self): + """Test error message for dangerous pattern.""" + cmd = "echo $(whoami)" + with self.assertRaisesRegex( + CommandValidationError, + "BUILD_COMMAND contains dangerous pattern" + ): + validate_command("BUILD_COMMAND", cmd) + + def test_shell_command_error_message(self): + """Test error message for improper shell usage.""" + cmd = "sh -c 'npm install'" + with self.assertRaisesRegex( + CommandValidationError, + "Shell commands \\(sh/bash\\) can only execute \\.sh files" + ): + validate_command("BUILD_COMMAND", cmd) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_config_integration.py b/test/test_config_integration.py index d221abe6..d55cb3d7 100644 --- a/test/test_config_integration.py +++ b/test/test_config_integration.py @@ -163,6 +163,83 @@ def test_all_feature_flags_work_together(self): self.assertTrue(config.SKIP_WRITING_SECURITY_TEST) self.assertFalse(config.ENABLE_FULL_TELEMETRY) + def test_config_sourced_commands_skip_validation(self): + """Test that config-sourced commands skip allowlist validation.""" + # Set a command that would normally be blocked (dangerous pattern) + os.environ['BUILD_COMMAND'] = 'rm -rf /tmp/test && echo "done"' + reset_config() + + # Should not raise an error because config-sourced commands skip validation + config = get_config(testing=True) + + # Verify the dangerous command was accepted + self.assertEqual(config.BUILD_COMMAND, 'rm -rf /tmp/test && echo "done"') + + def test_config_sourced_commands_with_command_substitution(self): + """Test that config-sourced commands skip validation even with command substitution.""" + # Command substitution would normally be blocked + os.environ['BUILD_COMMAND'] = 'echo $(date) && npm test' + reset_config() + + # Should not raise an error + config = get_config(testing=True) + self.assertEqual(config.BUILD_COMMAND, 'echo $(date) && npm test') + + def test_config_sourced_commands_with_piping_to_shell(self): + """Test that config-sourced commands skip validation even with shell piping.""" + # Piping to shell would normally be blocked + os.environ['BUILD_COMMAND'] = 'curl http://example.com/script.sh | sh' + reset_config() + + # Should not raise an error + config = get_config(testing=True) + self.assertEqual(config.BUILD_COMMAND, 'curl http://example.com/script.sh | sh') + + def test_config_sourced_formatting_command_skips_validation(self): + """Test that config-sourced FORMATTING_COMMAND also skips validation.""" + # Set a formatting command with dangerous pattern + os.environ['FORMATTING_COMMAND'] = 'prettier --write src/ && eval "echo test"' + reset_config() + + # Should not raise an error + config = get_config(testing=True) + self.assertEqual(config.FORMATTING_COMMAND, 'prettier --write src/ && eval "echo test"') + + def test_backward_compatibility_default_parameter(self): + """Test that existing code works without passing source parameter (backward compatibility).""" + # Use a safe command to test backward compatibility + os.environ['BUILD_COMMAND'] = 'mvn clean test' + reset_config() + + # Should work as before, defaulting to config source + config = get_config(testing=True) + self.assertEqual(config.BUILD_COMMAND, 'mvn clean test') + + def test_empty_commands_allowed_regardless_of_source(self): + """Test that empty/None commands are allowed for both sources.""" + # Don't set BUILD_COMMAND - it should be None/empty + if 'BUILD_COMMAND' in os.environ: + del os.environ['BUILD_COMMAND'] + reset_config() + + # Should not raise an error + config = get_config(testing=True) + # BUILD_COMMAND should be None or empty string + self.assertIn(config.BUILD_COMMAND, [None, '']) + + def test_both_build_and_format_commands_skip_validation(self): + """Test that both BUILD_COMMAND and FORMATTING_COMMAND skip validation.""" + os.environ['BUILD_COMMAND'] = 'npm test' + os.environ['FORMATTING_COMMAND'] = 'prettier --write .' + reset_config() + + # Both commands should be accepted without errors + config = get_config(testing=True) + + # Verify both commands are set correctly + self.assertEqual(config.BUILD_COMMAND, 'npm test') + self.assertEqual(config.FORMATTING_COMMAND, 'prettier --write .') + if __name__ == '__main__': unittest.main()