diff --git a/.github/workflows/wrkr-sarif.yml b/.github/workflows/wrkr-sarif.yml new file mode 100644 index 0000000..33e5d7f --- /dev/null +++ b/.github/workflows/wrkr-sarif.yml @@ -0,0 +1,31 @@ +name: wrkr-sarif + +on: + workflow_dispatch: + +permissions: + contents: read + security-events: write + +jobs: + scan-and-upload: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: Build wrkr + run: go build -o .tmp/wrkr ./cmd/wrkr + + - name: Run scan with SARIF output + run: ./.tmp/wrkr scan --path ./scenarios/wrkr/scan-mixed-org/repos --sarif --sarif-path ./.tmp/wrkr.sarif --json >/dev/null + + - name: Upload SARIF to GitHub Security + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: ./.tmp/wrkr.sarif diff --git a/.gitignore b/.gitignore index 3e73073..d21affc 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,9 @@ venv/ # Local analysis scratch .tmp/ .wrkr/ +!scenarios/wrkr/extension-detectors/repos/ext-repo/.wrkr/ +!scenarios/wrkr/extension-detectors/repos/ext-repo/.wrkr/detectors/ +!scenarios/wrkr/extension-detectors/repos/ext-repo/.wrkr/detectors/extensions.json *.sarif /wrkr /wrkr.exe diff --git a/Makefile b/Makefile index 8a84330..c545e37 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ SHELL := /bin/bash GO ?= go PKGS := ./... GOFILES := $(shell git ls-files '*.go') +DOCS_SITE_NPM_CACHE ?= $(CURDIR)/.tmp/npm-cache .PHONY: fmt lint lint-fast test test-fast test-integration test-e2e test-contracts test-scenarios \ test-hardening test-chaos test-perf test-risk-lane build hooks prepush prepush-full codeql lint-ci \ @@ -61,19 +62,23 @@ test-docs-storyline: @scripts/run_docs_smoke.sh --subset docs-site-install: - @cd docs-site && npm ci + @mkdir -p "$(DOCS_SITE_NPM_CACHE)" + @cd docs-site && NPM_CONFIG_CACHE="$(DOCS_SITE_NPM_CACHE)" npm ci docs-site-lint: - @cd docs-site && npm run lint + @mkdir -p "$(DOCS_SITE_NPM_CACHE)" + @cd docs-site && NPM_CONFIG_CACHE="$(DOCS_SITE_NPM_CACHE)" npm run lint docs-site-build: - @cd docs-site && npm run build + @mkdir -p "$(DOCS_SITE_NPM_CACHE)" + @cd docs-site && NPM_CONFIG_CACHE="$(DOCS_SITE_NPM_CACHE)" npm run build docs-site-check: @python3 scripts/check_docs_site_validation.py --report wrkr-out/docs_site_validation_report.json docs-site-audit-prod: - @cd docs-site && npm audit --omit=dev --audit-level=high + @mkdir -p "$(DOCS_SITE_NPM_CACHE)" + @cd docs-site && NPM_CONFIG_CACHE="$(DOCS_SITE_NPM_CACHE)" npm audit --omit=dev --audit-level=high test-adapter-parity: @scripts/test_adapter_parity.sh diff --git a/README.md b/README.md index f2ad57f..0e58393 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ make build Expected JSON keys by command family: -- `scan`: `status`, `target`, `findings`, `ranked_findings`, `top_findings`, `attack_paths`, `top_attack_paths`, `inventory`, `privilege_budget`, `agent_privilege_map`, `repo_exposure_summaries`, `profile`, `posture_score` (optional: `policy_warnings`, `report`) +- `scan`: `status`, `target`, `findings`, `ranked_findings`, `top_findings`, `attack_paths`, `top_attack_paths`, `inventory`, `privilege_budget`, `agent_privilege_map`, `repo_exposure_summaries`, `profile`, `posture_score` (optional: `detector_errors`, `partial_result`, `source_errors`, `source_degraded`, `policy_warnings`, `report`, `sarif`) - `report`: `status`, `generated_at`, `top_findings`, `attack_paths`, `top_attack_paths`, `total_tools`, `tool_type_breakdown`, `compliance_gap_count`, `privilege_budget`, `summary` (optional: `md_path`, `pdf_path`) - `score`: `score`, `grade`, `breakdown`, `weighted_breakdown`, `weights`, `trend_delta` (optional: `attack_paths`, `top_attack_paths`) - `evidence`: `status`, `output_dir`, `manifest_path`, `chain_path`, `framework_coverage`, `report_artifacts` @@ -127,6 +127,8 @@ Acquisition behavior: - `--path`: local, offline, fully deterministic. - `--repo` and `--org`: require `--github-api` or `WRKR_GITHUB_API_BASE`; unavailable acquisition fails closed with exit `7`. - Invalid target combinations fail with exit `6`. +- `--timeout ` bounds scan runtime. Timeout returns JSON error code `scan_timeout` (exit `1`); signal/parent cancellation returns `scan_canceled` (exit `1`). +- GitHub retry behavior is bounded and rate-limit aware (`Retry-After`/`X-RateLimit-Reset`); repeated transient failures enter cooldown degradation and are surfaced in partial-result output. ## Production Target Policy @@ -240,6 +242,7 @@ wrkr lifecycle wrkr manifest generate wrkr regress init|run wrkr score +wrkr version wrkr verify --chain wrkr evidence wrkr fix @@ -255,6 +258,7 @@ All commands support `--json`. Human-readable rationale is available via `--expl - Policy authoring: [`docs/policy_authoring.md`](docs/policy_authoring.md) - Failure taxonomy and exits: [`docs/failure_taxonomy_exit_codes.md`](docs/failure_taxonomy_exit_codes.md) - Threat model: [`docs/threat_model.md`](docs/threat_model.md) +- Compatibility and versioning policy: [`docs/trust/compatibility-and-versioning.md`](docs/trust/compatibility-and-versioning.md) - Compatibility matrix: [`docs/contracts/compatibility_matrix.md`](docs/contracts/compatibility_matrix.md) - Trust docs: [`docs/trust/`](docs/trust/) - Intent pages: [`docs/intent/`](docs/intent/) diff --git a/action/action.yaml b/action/action.yaml index 219eb5b..5b1f09e 100755 --- a/action/action.yaml +++ b/action/action.yaml @@ -1,10 +1,10 @@ name: Wrkr Action -description: Deterministic Wrkr scheduled and PR-mode execution for AI posture visibility. +description: Deterministic Wrkr scheduled, PR-mode, and SARIF execution for AI posture visibility. author: Clyra-AI inputs: mode: - description: scheduled or pr + description: scheduled, pr, or sarif required: false default: scheduled top: @@ -31,6 +31,10 @@ inputs: description: deterministic marker for PR comment upsert identity required: false default: "wrkr-action-pr-mode-v1" + sarif_path: + description: SARIF output path when mode=sarif + required: false + default: "./.tmp/wrkr.sarif" runs: using: composite @@ -40,6 +44,7 @@ runs: env: WRKR_ACTION_BLOCK_THRESHOLD: ${{ inputs.block_threshold }} WRKR_ACTION_COMMENT_FINGERPRINT: ${{ inputs.comment_fingerprint }} + WRKR_ACTION_SARIF_PATH: ${{ inputs.sarif_path }} run: | set -euo pipefail - "${{ github.action_path }}/entrypoint.sh" "${{ inputs.mode }}" "${{ inputs.top }}" "${{ inputs.target_mode }}" "${{ inputs.target_value }}" "${{ inputs.config_path }}" + "${{ github.action_path }}/entrypoint.sh" "${{ inputs.mode }}" "${{ inputs.top }}" "${{ inputs.target_mode }}" "${{ inputs.target_value }}" "${{ inputs.config_path }}" "${{ inputs.sarif_path }}" diff --git a/action/entrypoint.sh b/action/entrypoint.sh index 9970584..ecfc047 100755 --- a/action/entrypoint.sh +++ b/action/entrypoint.sh @@ -6,11 +6,12 @@ top="${2:-5}" target_mode="${3:-}" target_value="${4:-}" config_path="${5:-}" +sarif_path="${6:-${WRKR_ACTION_SARIF_PATH:-./.tmp/wrkr.sarif}}" summary_path="${WRKR_ACTION_SUMMARY_PATH:-./.tmp/wrkr-action-summary.md}" comment_fingerprint="${WRKR_ACTION_COMMENT_FINGERPRINT:-wrkr-action-pr-mode-v1}" block_threshold="${WRKR_ACTION_BLOCK_THRESHOLD:-0}" -if [[ "${mode}" != "scheduled" && "${mode}" != "pr" ]]; then +if [[ "${mode}" != "scheduled" && "${mode}" != "pr" && "${mode}" != "sarif" ]]; then echo "unsupported mode: ${mode}" >&2 exit 6 fi @@ -64,6 +65,10 @@ else exit 6 fi +if [[ "${mode}" == "sarif" ]]; then + scan_args+=(--sarif --sarif-path "${sarif_path}") +fi + scan_json="$(run_wrkr scan "${scan_args[@]}")" run_wrkr report --top "${top}" --md --md-path "${summary_path}" --template operator --share-profile internal --json >/dev/null score_json="$(run_wrkr score --json)" @@ -190,3 +195,6 @@ fi # Deterministic mode marker for workflow consumers. echo "wrkr_action_mode=${mode}" echo "wrkr_action_summary=${summary_path}" +if [[ "${mode}" == "sarif" ]]; then + echo "wrkr_action_sarif=${sarif_path}" +fi diff --git a/cmd/wrkr/main.go b/cmd/wrkr/main.go index d143e8d..2e6fe5f 100644 --- a/cmd/wrkr/main.go +++ b/cmd/wrkr/main.go @@ -1,11 +1,16 @@ package main import ( + "context" "os" + "os/signal" + "syscall" "github.com/Clyra-AI/wrkr/core/cli" ) func main() { - os.Exit(cli.Run(os.Args[1:], os.Stdout, os.Stderr)) + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer stop() + os.Exit(cli.RunWithContext(ctx, os.Args[1:], os.Stdout, os.Stderr)) } diff --git a/core/cli/jsonmode.go b/core/cli/jsonmode.go new file mode 100644 index 0000000..fe11240 --- /dev/null +++ b/core/cli/jsonmode.go @@ -0,0 +1,24 @@ +package cli + +import ( + "strconv" + "strings" +) + +// wantsJSONOutput inspects raw args to decide whether errors should be emitted as JSON. +func wantsJSONOutput(args []string) bool { + for _, arg := range args { + if arg == "--json" { + return true + } + if strings.HasPrefix(arg, "--json=") { + value := strings.TrimPrefix(arg, "--json=") + parsed, err := strconv.ParseBool(value) + if err != nil { + return true + } + return parsed + } + } + return false +} diff --git a/core/cli/jsonmode_test.go b/core/cli/jsonmode_test.go new file mode 100644 index 0000000..8d3c8d7 --- /dev/null +++ b/core/cli/jsonmode_test.go @@ -0,0 +1,55 @@ +package cli + +import ( + "bytes" + "encoding/json" + "testing" +) + +func TestSharedJSONModeParsingCases(t *testing.T) { + t.Parallel() + + cases := []struct { + name string + args []string + want bool + }{ + {name: "explicit json flag", args: []string{"--json"}, want: true}, + {name: "json true", args: []string{"--json=true"}, want: true}, + {name: "json false", args: []string{"--json=false"}, want: false}, + {name: "malformed bool falls back to json errors", args: []string{"--json=maybe"}, want: true}, + {name: "no json flag", args: []string{"scan", "--path", "."}, want: false}, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + if got := wantsJSONOutput(tc.args); got != tc.want { + t.Fatalf("wantsJSONOutput(%v)=%v, want %v", tc.args, got, tc.want) + } + }) + } +} + +func TestMalformedJSONModeFlagEmitsJSONErrorsAcrossCommands(t *testing.T) { + t.Parallel() + + commands := [][]string{ + {"--json=maybe", "--nope"}, + {"scan", "--json=maybe", "--path"}, + } + + for _, cmd := range commands { + var out bytes.Buffer + var errOut bytes.Buffer + code := Run(cmd, &out, &errOut) + if code != exitInvalidInput { + t.Fatalf("expected exit %d for %v, got %d", exitInvalidInput, cmd, code) + } + var payload map[string]any + if err := json.Unmarshal(errOut.Bytes(), &payload); err != nil { + t.Fatalf("expected JSON error payload for %v, got %q (%v)", cmd, errOut.String(), err) + } + } +} diff --git a/core/cli/root.go b/core/cli/root.go index 690bae3..a058255 100644 --- a/core/cli/root.go +++ b/core/cli/root.go @@ -1,12 +1,12 @@ package cli import ( + "context" "encoding/json" "errors" "flag" "fmt" "io" - "strconv" "strings" ) @@ -24,12 +24,20 @@ const ( // Run executes the wrkr CLI root command and returns a stable process exit code. func Run(args []string, stdout io.Writer, stderr io.Writer) int { + return RunWithContext(context.Background(), args, stdout, stderr) +} + +// RunWithContext executes the wrkr CLI root command with a caller-provided context. +func RunWithContext(ctx context.Context, args []string, stdout io.Writer, stderr io.Writer) int { + if ctx == nil { + ctx = context.Background() + } if len(args) == 0 { _, _ = fmt.Fprintln(stdout, "wrkr") return exitSuccess } - if code, handled := runKnownSubcommand(args[0], args[1:], stdout, stderr); handled { + if code, handled := runKnownSubcommand(ctx, args[0], args[1:], stdout, stderr); handled { return code } @@ -40,12 +48,12 @@ func Run(args []string, stdout io.Writer, stderr io.Writer) int { return runRootFlags(args, stdout, stderr) } -func runKnownSubcommand(name string, args []string, stdout io.Writer, stderr io.Writer) (int, bool) { +func runKnownSubcommand(ctx context.Context, name string, args []string, stdout io.Writer, stderr io.Writer) (int, bool) { switch name { case "init": return runInit(args, stdout, stderr), true case "scan": - return runScan(args, stdout, stderr), true + return runScanWithContext(ctx, args, stdout, stderr), true case "action": return runAction(args, stdout, stderr), true case "report": @@ -70,20 +78,22 @@ func runKnownSubcommand(name string, args []string, stdout io.Writer, stderr io. return runEvidence(args, stdout, stderr), true case "fix": return runFix(args, stdout, stderr), true + case "version": + return runVersion(args, stdout, stderr), true case "help": - return runHelp(args, stdout, stderr), true + return runHelp(ctx, args, stdout, stderr), true default: return 0, false } } -func runHelp(args []string, stdout io.Writer, stderr io.Writer) int { +func runHelp(ctx context.Context, args []string, stdout io.Writer, stderr io.Writer) int { if len(args) == 0 || isHelpFlag(args[0]) { return runRootFlags([]string{"--help"}, stdout, stderr) } helpArgs := append(append([]string{}, args[1:]...), "--help") - if code, handled := runKnownSubcommand(args[0], helpArgs, stdout, stderr); handled { + if code, handled := runKnownSubcommand(ctx, args[0], helpArgs, stdout, stderr); handled { return code } @@ -103,6 +113,7 @@ func runRootFlags(args []string, stdout io.Writer, stderr io.Writer) int { jsonOut := fs.Bool("json", false, "emit machine-readable output") quiet := fs.Bool("quiet", false, "suppress non-error output") explain := fs.Bool("explain", false, "emit human-readable rationale") + version := fs.Bool("version", false, "print wrkr version") fs.Usage = func() { writeRootUsage(fs.Output(), fs) } @@ -116,6 +127,9 @@ func runRootFlags(args []string, stdout io.Writer, stderr io.Writer) int { if *quiet && *explain && !*jsonOut { return emitError(stderr, jsonRequested || *jsonOut, "invalid_input", "--quiet and --explain cannot be used together", exitInvalidInput) } + if *version { + return emitVersion(stdout, jsonRequested || *jsonOut, *jsonOut) + } if *jsonOut { _ = json.NewEncoder(stdout).Encode(map[string]any{ @@ -157,6 +171,7 @@ func writeRootUsage(out io.Writer, fs *flag.FlagSet) { _, _ = fmt.Fprintln(out, " verify verify proof chain integrity") _, _ = fmt.Fprintln(out, " evidence build compliance-ready evidence bundles") _, _ = fmt.Fprintln(out, " fix apply deterministic remediations") + _, _ = fmt.Fprintln(out, " version print wrkr version") _, _ = fmt.Fprintln(out, "") _, _ = fmt.Fprintln(out, "Examples:") _, _ = fmt.Fprintln(out, " wrkr scan --path . --json") @@ -197,20 +212,3 @@ func emitError(stderr io.Writer, jsonOut bool, code, message string, exitCode in } return exitCode } - -func wantsJSONOutput(args []string) bool { - for _, arg := range args { - if arg == "--json" { - return true - } - if strings.HasPrefix(arg, "--json=") { - value := strings.TrimPrefix(arg, "--json=") - parsed, err := strconv.ParseBool(value) - if err != nil { - return true - } - return parsed - } - } - return false -} diff --git a/core/cli/scan.go b/core/cli/scan.go index 0af2759..11248b9 100644 --- a/core/cli/scan.go +++ b/core/cli/scan.go @@ -7,11 +7,7 @@ import ( "flag" "fmt" "io" - "math" "os" - "path/filepath" - "runtime/debug" - "sort" "strings" "time" @@ -22,13 +18,10 @@ import ( "github.com/Clyra-AI/wrkr/core/detect" detectdefaults "github.com/Clyra-AI/wrkr/core/detect/defaults" "github.com/Clyra-AI/wrkr/core/diff" - "github.com/Clyra-AI/wrkr/core/identity" + exportsarif "github.com/Clyra-AI/wrkr/core/export/sarif" "github.com/Clyra-AI/wrkr/core/lifecycle" "github.com/Clyra-AI/wrkr/core/manifest" - "github.com/Clyra-AI/wrkr/core/model" - "github.com/Clyra-AI/wrkr/core/policy" "github.com/Clyra-AI/wrkr/core/policy/approvedtools" - policyeval "github.com/Clyra-AI/wrkr/core/policy/eval" "github.com/Clyra-AI/wrkr/core/policy/productiontargets" profilemodel "github.com/Clyra-AI/wrkr/core/policy/profile" profileeval "github.com/Clyra-AI/wrkr/core/policy/profileeval" @@ -37,13 +30,14 @@ import ( "github.com/Clyra-AI/wrkr/core/risk" "github.com/Clyra-AI/wrkr/core/score" "github.com/Clyra-AI/wrkr/core/source" - "github.com/Clyra-AI/wrkr/core/source/github" - "github.com/Clyra-AI/wrkr/core/source/local" - "github.com/Clyra-AI/wrkr/core/source/org" "github.com/Clyra-AI/wrkr/core/state" ) -func runScan(args []string, stdout io.Writer, stderr io.Writer) int { +func runScanWithContext(parentCtx context.Context, args []string, stdout io.Writer, stderr io.Writer) int { + if parentCtx == nil { + parentCtx = context.Background() + } + jsonRequested := wantsJSONOutput(args) fs := flag.NewFlagSet("scan", flag.ContinueOnError) @@ -59,6 +53,7 @@ func runScan(args []string, stdout io.Writer, stderr io.Writer) int { repo := fs.String("repo", "", "scan one repo owner/repo") orgTarget := fs.String("org", "", "scan an organization") pathTarget := fs.String("path", "", "scan local pre-cloned repositories") + timeout := fs.Duration("timeout", 0, "optional scan timeout (0 disables)") diffMode := fs.Bool("diff", false, "show only changes since previous scan") enrich := fs.Bool("enrich", false, "enable non-deterministic enrichment lookups (network required)") baselinePath := fs.String("baseline", "", "optional fallback baseline when local state is absent") @@ -76,10 +71,15 @@ func runScan(args []string, stdout io.Writer, stderr io.Writer) int { reportTemplate := fs.String("report-template", string(reportcore.TemplateOperator), "scan summary template [exec|operator|audit|public]") reportShareProfile := fs.String("report-share-profile", string(reportcore.ShareProfileInternal), "scan summary share profile [internal|public]") reportTop := fs.Int("report-top", 5, "number of top findings included in scan summary artifact") + sarifOut := fs.Bool("sarif", false, "emit SARIF artifact") + sarifPath := fs.String("sarif-path", "wrkr.sarif", "SARIF output path") if code, handled := parseFlags(fs, args, stderr, jsonRequested || *jsonOut); handled { return code } + if *timeout < 0 { + return emitError(stderr, jsonRequested || *jsonOut, "invalid_input", "--timeout must be >= 0", exitInvalidInput) + } productionTargetsFile := strings.TrimSpace(*productionTargetsPath) if *productionTargetsStrict && productionTargetsFile == "" { return emitError( @@ -118,24 +118,31 @@ func runScan(args []string, stdout io.Writer, stderr io.Writer) int { } statePath := state.ResolvePath(*statePathFlag) - ctx := context.Background() + ctx := parentCtx + cancel := func() {} + if *timeout > 0 { + ctx, cancel = context.WithTimeout(parentCtx, *timeout) + } + defer cancel() scanStartedAt := time.Now().UTC().Truncate(time.Second) manifestOut, findings, err := acquireSources(ctx, targetMode, targetValue, *githubBaseURL, *githubToken, statePath) if err != nil { - return emitError(stderr, jsonRequested || *jsonOut, "runtime_failure", err.Error(), exitRuntime) + return emitScanRuntimeError(stderr, jsonRequested || *jsonOut, err) } scopes := detectorScopes(manifestOut) + detectorErrors := []detect.DetectorError{} if len(scopes) > 0 { registry, regErr := detectdefaults.Registry() if regErr != nil { - return emitError(stderr, jsonRequested || *jsonOut, "runtime_failure", regErr.Error(), exitRuntime) + return emitScanRuntimeError(stderr, jsonRequested || *jsonOut, regErr) } detected, runErr := registry.Run(ctx, scopes, detect.Options{Enrich: *enrich}) if runErr != nil { - return emitError(stderr, jsonRequested || *jsonOut, "runtime_failure", runErr.Error(), exitRuntime) + return emitScanRuntimeError(stderr, jsonRequested || *jsonOut, runErr) } - findings = append(findings, detected...) + findings = append(findings, detected.Findings...) + detectorErrors = append(detectorErrors, detected.DetectorErrors...) policyFindings, policyErr := evaluatePolicies(scopes, findings, strings.TrimSpace(*policyPath)) if policyErr != nil { @@ -147,7 +154,7 @@ func runScan(args []string, stdout io.Writer, stderr io.Writer) int { previousSnapshot, loadPreviousErr := loadPreviousSnapshot(statePath, strings.TrimSpace(*baselinePath)) if loadPreviousErr != nil { - return emitError(stderr, jsonRequested || *jsonOut, "runtime_failure", loadPreviousErr.Error(), exitRuntime) + return emitScanRuntimeError(stderr, jsonRequested || *jsonOut, loadPreviousErr) } now := time.Now().UTC().Truncate(time.Second) @@ -161,27 +168,27 @@ func runScan(args []string, stdout io.Writer, stderr io.Writer) int { manifestPath := manifest.ResolvePath(statePath) previousManifest, manifestErr := loadManifest(manifestPath) if manifestErr != nil { - return emitError(stderr, jsonRequested || *jsonOut, "runtime_failure", manifestErr.Error(), exitRuntime) + return emitScanRuntimeError(stderr, jsonRequested || *jsonOut, manifestErr) } baseContexts := buildFindingContexts(riskReport) observed := observedTools(findings, baseContexts) nextManifest, transitions := lifecycle.Reconcile(previousManifest, observed, now) if err := manifest.Save(manifestPath, nextManifest); err != nil { - return emitError(stderr, jsonRequested || *jsonOut, "runtime_failure", err.Error(), exitRuntime) + return emitScanRuntimeError(stderr, jsonRequested || *jsonOut, err) } chainPath := lifecycle.ChainPath(statePath) chain, chainErr := lifecycle.LoadChain(chainPath) if chainErr != nil { - return emitError(stderr, jsonRequested || *jsonOut, "runtime_failure", chainErr.Error(), exitRuntime) + return emitScanRuntimeError(stderr, jsonRequested || *jsonOut, chainErr) } for _, transition := range transitions { if err := lifecycle.AppendTransitionRecord(chain, transition, "lifecycle_transition"); err != nil { - return emitError(stderr, jsonRequested || *jsonOut, "runtime_failure", err.Error(), exitRuntime) + return emitScanRuntimeError(stderr, jsonRequested || *jsonOut, err) } } if err := lifecycle.SaveChain(chainPath, chain); err != nil { - return emitError(stderr, jsonRequested || *jsonOut, "runtime_failure", err.Error(), exitRuntime) + return emitScanRuntimeError(stderr, jsonRequested || *jsonOut, err) } identityByAgent := map[string]manifest.IdentityRecord{} @@ -274,7 +281,7 @@ func runScan(args []string, stdout io.Writer, stderr io.Writer) int { Previous: previousScore, }) if _, err := proofemit.EmitScan(statePath, now, findings, riskReport, profileResult, postureScore, transitions); err != nil { - return emitError(stderr, jsonRequested || *jsonOut, "runtime_failure", err.Error(), exitRuntime) + return emitScanRuntimeError(stderr, jsonRequested || *jsonOut, err) } snapshot := state.Snapshot{ @@ -289,7 +296,7 @@ func runScan(args []string, stdout io.Writer, stderr io.Writer) int { Transitions: transitions, } if err := state.Save(statePath, snapshot); err != nil { - return emitError(stderr, jsonRequested || *jsonOut, "runtime_failure", err.Error(), exitRuntime) + return emitScanRuntimeError(stderr, jsonRequested || *jsonOut, err) } payload := map[string]any{ @@ -297,10 +304,19 @@ func runScan(args []string, stdout io.Writer, stderr io.Writer) int { "target": manifestOut.Target, "source_manifest": manifestOut, } + if len(manifestOut.Failures) > 0 { + payload["partial_result"] = true + payload["source_errors"] = manifestOut.Failures + payload["source_degraded"] = hasDegradedFailures(manifestOut.Failures) + } + if len(detectorErrors) > 0 { + payload["detector_errors"] = detectorErrors + } if len(productionTargetWarnings) > 0 { payload["policy_warnings"] = append([]string(nil), productionTargetWarnings...) } scanReportPath := "" + scanSARIFPath := "" if *diffMode { previousFindings := []source.Finding{} @@ -344,7 +360,7 @@ func runScan(args []string, stdout io.Writer, stderr io.Writer) int { if isArtifactPathError(reportErr) { return emitError(stderr, jsonRequested || *jsonOut, "invalid_input", reportErr.Error(), exitInvalidInput) } - return emitError(stderr, jsonRequested || *jsonOut, "runtime_failure", reportErr.Error(), exitRuntime) + return emitScanRuntimeError(stderr, jsonRequested || *jsonOut, reportErr) } scanReportPath = mdOutPath payload["report"] = map[string]any{ @@ -353,12 +369,32 @@ func runScan(args []string, stdout io.Writer, stderr io.Writer) int { "share_profile": string(shareProfile), } } + if *sarifOut { + path, pathErr := resolveArtifactOutputPath(*sarifPath) + if pathErr != nil { + return emitError(stderr, jsonRequested || *jsonOut, "invalid_input", pathErr.Error(), exitInvalidInput) + } + report := exportsarif.Build(findings, wrkrVersion()) + if writeErr := exportsarif.Write(path, report); writeErr != nil { + return emitScanRuntimeError(stderr, jsonRequested || *jsonOut, writeErr) + } + scanSARIFPath = path + payload["sarif"] = map[string]any{ + "path": path, + } + } if *jsonOut { _ = json.NewEncoder(stdout).Encode(payload) return exitSuccess } if !*quiet { + for _, sourceFailure := range manifestOut.Failures { + _, _ = fmt.Fprintf(stderr, "warning: source repo=%s reason=%s\n", sourceFailure.Repo, sourceFailure.Reason) + } + for _, detectorErr := range detectorErrors { + _, _ = fmt.Fprintf(stderr, "warning: detector=%s repo=%s org=%s code=%s class=%s message=%s\n", detectorErr.Detector, detectorErr.Repo, detectorErr.Org, detectorErr.Code, detectorErr.Class, detectorErr.Message) + } for _, warning := range productionTargetWarnings { _, _ = fmt.Fprintf(stderr, "warning: %s\n", warning) } @@ -371,392 +407,22 @@ func runScan(args []string, stdout io.Writer, stderr io.Writer) int { if scanReportPath != "" { _, _ = fmt.Fprintf(stdout, "scan report: %s\n", scanReportPath) } + if scanSARIFPath != "" { + _, _ = fmt.Fprintf(stdout, "scan sarif: %s\n", scanSARIFPath) + } return exitSuccess } _, _ = fmt.Fprintln(stdout, "wrkr scan complete") return exitSuccess } -func resolveScanTarget(repo, orgInput, pathInput, configPath string) (config.TargetMode, string, config.Config, error) { - mode, value, err := resolveTarget(repo, orgInput, pathInput) - if err == nil { - return mode, value, config.Default(), nil - } - if strings.TrimSpace(repo) != "" || strings.TrimSpace(orgInput) != "" || strings.TrimSpace(pathInput) != "" { - return "", "", config.Config{}, err - } - - resolvedPath, pathErr := config.ResolvePath(configPath) - if pathErr != nil { - return "", "", config.Config{}, pathErr - } - cfg, loadErr := config.Load(resolvedPath) - if loadErr != nil { - return "", "", config.Config{}, fmt.Errorf("no target provided and no usable config default target (%v)", loadErr) - } - return cfg.DefaultTarget.Mode, cfg.DefaultTarget.Value, cfg, nil -} - -func acquireSources(ctx context.Context, mode config.TargetMode, value, githubBaseURL, githubToken, statePath string) (source.Manifest, []source.Finding, error) { - connector := github.NewConnector(githubBaseURL, githubToken, nil) - - manifestOut := source.Manifest{Target: source.Target{Mode: string(mode), Value: value}} - var findings []source.Finding - materializeRoot := "" - if mode == config.TargetRepo || mode == config.TargetOrg { - root, err := prepareMaterializedRoot(statePath) - if err != nil { - return source.Manifest{}, nil, err - } - materializeRoot = root - } - - sourceFinding := func(repoManifest source.RepoManifest, orgName, permission string) source.Finding { - return source.Finding{ - FindingType: "source_discovery", - Severity: "low", - ToolType: "source_repo", - Location: repoManifest.Location, - Repo: repoManifest.Repo, - Org: orgName, - Permissions: []string{permission}, - Detector: "source", - } - } - - switch mode { - case config.TargetRepo: - repoManifest, err := connector.AcquireRepo(ctx, value) - if err != nil { - return source.Manifest{}, nil, err - } - materialized, materializeErr := connector.MaterializeRepo(ctx, repoManifest.Repo, materializeRoot) - if materializeErr != nil { - return source.Manifest{}, nil, fmt.Errorf("materialize repo %s: %w", repoManifest.Repo, materializeErr) - } - manifestOut.Repos = []source.RepoManifest{materialized} - owner := strings.Split(value, "/")[0] - findings = append(findings, sourceFinding(materialized, owner, "repo.contents.read")) - case config.TargetOrg: - repos, failures, err := org.Acquire(ctx, value, connector, connector) - if err != nil { - return source.Manifest{}, nil, err - } - materializedRepos := make([]source.RepoManifest, 0, len(repos)) - for _, repoManifest := range repos { - materialized, materializeErr := connector.MaterializeRepo(ctx, repoManifest.Repo, materializeRoot) - if materializeErr != nil { - return source.Manifest{}, nil, fmt.Errorf("materialize org repo %s: %w", repoManifest.Repo, materializeErr) - } - materializedRepos = append(materializedRepos, materialized) - } - manifestOut.Repos = materializedRepos - manifestOut.Failures = failures - for _, repoManifest := range materializedRepos { - findings = append(findings, sourceFinding(repoManifest, value, "repo.contents.read")) - } - case config.TargetPath: - repos, err := local.Acquire(value) - if err != nil { - return source.Manifest{}, nil, err - } - manifestOut.Repos = repos - for _, repoManifest := range repos { - repoManifest.Location = filepath.ToSlash(repoManifest.Location) - findings = append(findings, sourceFinding(repoManifest, "local", "filesystem.read")) - } - default: - return source.Manifest{}, nil, fmt.Errorf("unsupported target mode %q", mode) - } - - manifestOut = source.SortManifest(manifestOut) - source.SortFindings(findings) - return manifestOut, findings, nil -} - -func prepareMaterializedRoot(statePath string) (string, error) { - cleanState := filepath.Clean(strings.TrimSpace(statePath)) - if cleanState == "" || cleanState == "." { - return "", fmt.Errorf("state path is required for materialized source acquisition") - } - root := filepath.Join(filepath.Dir(cleanState), "materialized-sources") - if err := os.RemoveAll(root); err != nil { - return "", fmt.Errorf("reset materialized source root: %w", err) - } - if err := os.MkdirAll(root, 0o750); err != nil { - return "", fmt.Errorf("create materialized source root: %w", err) - } - return root, nil -} - -func evaluatePolicies(scopes []detect.Scope, findings []source.Finding, customPolicyPath string) ([]source.Finding, error) { - byRepo := map[string][]source.Finding{} - for _, finding := range findings { - key := finding.Org + "::" + finding.Repo - byRepo[key] = append(byRepo[key], finding) - } - - out := make([]source.Finding, 0) - for _, scope := range scopes { - rules, err := policy.LoadRules(customPolicyPath, scope.Root) - if err != nil { - return nil, err - } - key := scope.Org + "::" + scope.Repo - policyFindings := policyeval.Evaluate(scope.Repo, scope.Org, byRepo[key], rules) - out = append(out, policyFindings...) - } - source.SortFindings(out) - return out, nil -} - -func detectorScopes(manifestOut source.Manifest) []detect.Scope { - scopes := make([]detect.Scope, 0, len(manifestOut.Repos)) - for _, repo := range manifestOut.Repos { - info, err := os.Stat(repo.Location) // #nosec G703 -- repo locations come from deterministic source acquisition inputs for current scan scope. - if err != nil || !info.IsDir() { - continue - } - orgName := deriveOrg(manifestOut.Target, repo) - scopes = append(scopes, detect.Scope{Org: orgName, Repo: repo.Repo, Root: repo.Location}) - } - return scopes -} - -func deriveOrg(target source.Target, repo source.RepoManifest) string { - switch target.Mode { - case string(config.TargetOrg): - if strings.TrimSpace(target.Value) == "" { - return "local" - } - return target.Value - case string(config.TargetRepo): - parts := strings.Split(repo.Repo, "/") - if len(parts) > 1 && strings.TrimSpace(parts[0]) != "" { - return parts[0] - } - parts = strings.Split(target.Value, "/") - if len(parts) > 1 { - return parts[0] - } +func emitScanRuntimeError(stderr io.Writer, jsonOut bool, err error) int { + switch { + case errors.Is(err, context.DeadlineExceeded): + return emitError(stderr, jsonOut, "scan_timeout", "scan exceeded configured timeout", exitRuntime) + case errors.Is(err, context.Canceled): + return emitError(stderr, jsonOut, "scan_canceled", "scan canceled by signal or parent context", exitRuntime) default: - return "local" - } - return "local" -} - -func loadPreviousSnapshot(statePath, baselinePath string) (*state.Snapshot, error) { - previous, err := state.Load(statePath) - if err == nil { - return &previous, nil - } - if !errors.Is(err, os.ErrNotExist) && !strings.Contains(strings.ToLower(err.Error()), "no such file") { - return nil, err - } - if strings.TrimSpace(baselinePath) != "" { - fallback, fallbackErr := state.Load(baselinePath) - if fallbackErr == nil { - return &fallback, nil - } - if !errors.Is(fallbackErr, os.ErrNotExist) && !strings.Contains(strings.ToLower(fallbackErr.Error()), "no such file") { - return nil, fallbackErr - } - } - return nil, nil -} - -func loadManifest(path string) (manifest.Manifest, error) { - loaded, err := manifest.Load(path) - if err == nil { - return loaded, nil - } - if errors.Is(err, os.ErrNotExist) || strings.Contains(strings.ToLower(err.Error()), "no such file") { - return manifest.Manifest{Version: manifest.Version, Identities: []manifest.IdentityRecord{}}, nil - } - return manifest.Manifest{}, err -} - -func buildFindingContexts(report risk.Report) map[string]agginventory.ToolContext { - out := map[string]agginventory.ToolContext{} - for _, item := range report.Ranked { - key := agginventory.KeyForFinding(item.Finding) - existing := out[key] - if item.Score > existing.RiskScore { - existing = agginventory.ToolContext{ - EndpointClass: item.EndpointClass, - DataClass: item.DataClass, - AutonomyLevel: item.AutonomyLevel, - RiskScore: item.Score, - } - } - out[key] = existing - } - return out -} - -func observedTools(findings []source.Finding, contexts map[string]agginventory.ToolContext) []lifecycle.ObservedTool { - byAgent := map[string]lifecycle.ObservedTool{} - for _, finding := range findings { - if !model.IsIdentityBearingFinding(finding) { - continue - } - org := strings.TrimSpace(finding.Org) - if org == "" { - org = "local" - } - toolID := identity.ToolID(finding.ToolType, finding.Location) - agentID := identity.AgentID(toolID, org) - ctx := contexts[agginventory.KeyForFinding(finding)] - candidate := lifecycle.ObservedTool{ - AgentID: agentID, - ToolID: toolID, - ToolType: finding.ToolType, - Org: org, - Repo: finding.Repo, - Location: finding.Location, - DataClass: ctx.DataClass, - EndpointClass: ctx.EndpointClass, - AutonomyLevel: ctx.AutonomyLevel, - RiskScore: ctx.RiskScore, - } - existing, ok := byAgent[agentID] - if !ok || candidate.RiskScore >= existing.RiskScore { - byAgent[agentID] = candidate - } - } - out := make([]lifecycle.ObservedTool, 0, len(byAgent)) - for _, item := range byAgent { - out = append(out, item) - } - sort.Slice(out, func(i, j int) bool { return out[i].AgentID < out[j].AgentID }) - return out -} - -func enrichFindingContexts(findings []source.Finding, base map[string]agginventory.ToolContext, identities map[string]manifest.IdentityRecord) map[string]agginventory.ToolContext { - out := map[string]agginventory.ToolContext{} - for key, value := range base { - out[key] = value - } - for _, finding := range findings { - org := strings.TrimSpace(finding.Org) - if org == "" { - org = "local" - } - toolID := identity.ToolID(finding.ToolType, finding.Location) - agentID := identity.AgentID(toolID, org) - record, exists := identities[agentID] - if !exists { - continue - } - key := agginventory.KeyForFinding(finding) - ctx := out[key] - ctx.ApprovalStatus = fallback(record.ApprovalState, "missing") - ctx.LifecycleState = fallback(record.Status, identity.StateDiscovered) - if ctx.DataClass == "" { - ctx.DataClass = record.DataClass - } - if ctx.EndpointClass == "" { - ctx.EndpointClass = record.EndpointClass - } - if ctx.AutonomyLevel == "" { - ctx.AutonomyLevel = record.AutonomyLevel - } - if record.RiskScore > ctx.RiskScore { - ctx.RiskScore = record.RiskScore - } - out[key] = ctx - } - return out -} - -func buildScanMethodology(manifestOut source.Manifest, findings []source.Finding, startedAt, completedAt time.Time) agginventory.MethodologySummary { - fileSet := map[string]struct{}{} - detectorCounts := map[string]int{} - for _, finding := range findings { - repo := strings.TrimSpace(finding.Repo) - location := strings.TrimSpace(finding.Location) - if repo != "" && location != "" { - fileSet[repo+"::"+location] = struct{}{} - } - detector := strings.TrimSpace(finding.Detector) - if detector == "" { - detector = "unknown" - } - detectorCounts[detector]++ - } - - detectors := make([]agginventory.MethodologyDetector, 0, len(detectorCounts)) - for detectorID, count := range detectorCounts { - detectors = append(detectors, agginventory.MethodologyDetector{ - ID: detectorID, - Version: "v1", - FindingCount: count, - }) - } - sort.Slice(detectors, func(i, j int) bool { - return detectors[i].ID < detectors[j].ID - }) - - started := startedAt.UTC().Truncate(time.Second) - completed := completedAt.UTC().Truncate(time.Second) - if completed.Before(started) { - completed = started - } - durationSeconds := math.Round(completed.Sub(started).Seconds()*100) / 100 - - return agginventory.MethodologySummary{ - WrkrVersion: scanWrkrVersion(), - ScanStartedAt: started.Format(time.RFC3339), - ScanCompletedAt: completed.Format(time.RFC3339), - ScanDurationSeconds: durationSeconds, - RepoCount: len(manifestOut.Repos), - FileCountProcessed: len(fileSet), - Detectors: detectors, - } -} - -func scanWrkrVersion() string { - info, ok := debug.ReadBuildInfo() - if !ok { - return "devel" - } - version := strings.TrimSpace(info.Main.Version) - if version == "" || version == "(devel)" { - return "devel" - } - return version -} - -func repoRootFromScopes(scopes []detect.Scope) string { - if len(scopes) == 0 { - return "" - } - sort.Slice(scopes, func(i, j int) bool { - if scopes[i].Org != scopes[j].Org { - return scopes[i].Org < scopes[j].Org - } - if scopes[i].Repo != scopes[j].Repo { - return scopes[i].Repo < scopes[j].Repo - } - return scopes[i].Root < scopes[j].Root - }) - return scopes[0].Root -} - -func fallback(value, defaultValue string) string { - if strings.TrimSpace(value) == "" { - return defaultValue - } - return value -} - -func driftTransitionCount(transitions []lifecycle.Transition) int { - count := 0 - for _, transition := range transitions { - switch strings.TrimSpace(transition.Trigger) { - case "removed", "reappeared", "modified": - count++ - } + return emitError(stderr, jsonOut, "runtime_failure", err.Error(), exitRuntime) } - return count } diff --git a/core/cli/scan_helpers.go b/core/cli/scan_helpers.go new file mode 100644 index 0000000..c491717 --- /dev/null +++ b/core/cli/scan_helpers.go @@ -0,0 +1,437 @@ +package cli + +import ( + "context" + "errors" + "fmt" + "math" + "os" + "path/filepath" + "sort" + "strings" + "time" + + agginventory "github.com/Clyra-AI/wrkr/core/aggregate/inventory" + "github.com/Clyra-AI/wrkr/core/config" + "github.com/Clyra-AI/wrkr/core/detect" + "github.com/Clyra-AI/wrkr/core/identity" + "github.com/Clyra-AI/wrkr/core/lifecycle" + "github.com/Clyra-AI/wrkr/core/manifest" + "github.com/Clyra-AI/wrkr/core/model" + "github.com/Clyra-AI/wrkr/core/policy" + policyeval "github.com/Clyra-AI/wrkr/core/policy/eval" + "github.com/Clyra-AI/wrkr/core/risk" + "github.com/Clyra-AI/wrkr/core/source" + "github.com/Clyra-AI/wrkr/core/source/github" + "github.com/Clyra-AI/wrkr/core/source/local" + "github.com/Clyra-AI/wrkr/core/source/org" + "github.com/Clyra-AI/wrkr/core/state" +) + +func resolveScanTarget(repo, orgInput, pathInput, configPath string) (config.TargetMode, string, config.Config, error) { + mode, value, err := resolveTarget(repo, orgInput, pathInput) + if err == nil { + return mode, value, config.Default(), nil + } + if strings.TrimSpace(repo) != "" || strings.TrimSpace(orgInput) != "" || strings.TrimSpace(pathInput) != "" { + return "", "", config.Config{}, err + } + + resolvedPath, pathErr := config.ResolvePath(configPath) + if pathErr != nil { + return "", "", config.Config{}, pathErr + } + cfg, loadErr := config.Load(resolvedPath) + if loadErr != nil { + return "", "", config.Config{}, fmt.Errorf("no target provided and no usable config default target (%v)", loadErr) + } + return cfg.DefaultTarget.Mode, cfg.DefaultTarget.Value, cfg, nil +} + +func acquireSources(ctx context.Context, mode config.TargetMode, value, githubBaseURL, githubToken, statePath string) (source.Manifest, []source.Finding, error) { + if ctxErr := ctx.Err(); ctxErr != nil { + return source.Manifest{}, nil, ctxErr + } + + connector := github.NewConnector(githubBaseURL, githubToken, nil) + + manifestOut := source.Manifest{Target: source.Target{Mode: string(mode), Value: value}} + var findings []source.Finding + materializeRoot := "" + if mode == config.TargetRepo || mode == config.TargetOrg { + root, err := prepareMaterializedRoot(statePath) + if err != nil { + return source.Manifest{}, nil, err + } + materializeRoot = root + } + + sourceFinding := func(repoManifest source.RepoManifest, orgName, permission string) source.Finding { + return source.Finding{ + FindingType: "source_discovery", + Severity: "low", + ToolType: "source_repo", + Location: repoManifest.Location, + Repo: repoManifest.Repo, + Org: orgName, + Permissions: []string{permission}, + Detector: "source", + } + } + + switch mode { + case config.TargetRepo: + if ctxErr := ctx.Err(); ctxErr != nil { + return source.Manifest{}, nil, ctxErr + } + repoManifest, err := connector.AcquireRepo(ctx, value) + if err != nil { + return source.Manifest{}, nil, err + } + materialized, materializeErr := connector.MaterializeRepo(ctx, repoManifest.Repo, materializeRoot) + if materializeErr != nil { + return source.Manifest{}, nil, fmt.Errorf("materialize repo %s: %w", repoManifest.Repo, materializeErr) + } + manifestOut.Repos = []source.RepoManifest{materialized} + owner := strings.Split(value, "/")[0] + findings = append(findings, sourceFinding(materialized, owner, "repo.contents.read")) + case config.TargetOrg: + if ctxErr := ctx.Err(); ctxErr != nil { + return source.Manifest{}, nil, ctxErr + } + repos, failures, err := org.Acquire(ctx, value, connector, connector) + if err != nil { + return source.Manifest{}, nil, err + } + if ctxErr := ctx.Err(); ctxErr != nil { + return source.Manifest{}, nil, ctxErr + } + materializedRepos := make([]source.RepoManifest, 0, len(repos)) + for _, repoManifest := range repos { + if ctxErr := ctx.Err(); ctxErr != nil { + return source.Manifest{}, nil, ctxErr + } + materialized, materializeErr := connector.MaterializeRepo(ctx, repoManifest.Repo, materializeRoot) + if materializeErr != nil { + reason := materializeErr.Error() + if github.IsDegradedError(materializeErr) { + reason = "connector_degraded: " + reason + } + failures = append(failures, source.RepoFailure{ + Repo: repoManifest.Repo, + Reason: reason, + }) + continue + } + materializedRepos = append(materializedRepos, materialized) + } + manifestOut.Repos = materializedRepos + manifestOut.Failures = failures + for _, repoManifest := range materializedRepos { + findings = append(findings, sourceFinding(repoManifest, value, "repo.contents.read")) + } + case config.TargetPath: + if ctxErr := ctx.Err(); ctxErr != nil { + return source.Manifest{}, nil, ctxErr + } + repos, err := local.Acquire(value) + if err != nil { + return source.Manifest{}, nil, err + } + manifestOut.Repos = repos + for _, repoManifest := range repos { + repoManifest.Location = filepath.ToSlash(repoManifest.Location) + findings = append(findings, sourceFinding(repoManifest, "local", "filesystem.read")) + } + default: + return source.Manifest{}, nil, fmt.Errorf("unsupported target mode %q", mode) + } + + manifestOut = source.SortManifest(manifestOut) + source.SortFindings(findings) + return manifestOut, findings, nil +} + +func prepareMaterializedRoot(statePath string) (string, error) { + cleanState := filepath.Clean(strings.TrimSpace(statePath)) + if cleanState == "" || cleanState == "." { + return "", fmt.Errorf("state path is required for materialized source acquisition") + } + root := filepath.Join(filepath.Dir(cleanState), "materialized-sources") + if err := os.RemoveAll(root); err != nil { + return "", fmt.Errorf("reset materialized source root: %w", err) + } + if err := os.MkdirAll(root, 0o750); err != nil { + return "", fmt.Errorf("create materialized source root: %w", err) + } + return root, nil +} + +func evaluatePolicies(scopes []detect.Scope, findings []source.Finding, customPolicyPath string) ([]source.Finding, error) { + byRepo := map[string][]source.Finding{} + for _, finding := range findings { + key := finding.Org + "::" + finding.Repo + byRepo[key] = append(byRepo[key], finding) + } + + out := make([]source.Finding, 0) + for _, scope := range scopes { + rules, err := policy.LoadRules(customPolicyPath, scope.Root) + if err != nil { + return nil, err + } + key := scope.Org + "::" + scope.Repo + policyFindings := policyeval.Evaluate(scope.Repo, scope.Org, byRepo[key], rules) + out = append(out, policyFindings...) + } + source.SortFindings(out) + return out, nil +} + +func detectorScopes(manifestOut source.Manifest) []detect.Scope { + scopes := make([]detect.Scope, 0, len(manifestOut.Repos)) + for _, repo := range manifestOut.Repos { + location := strings.TrimSpace(repo.Location) + if location == "" { + continue + } + orgName := deriveOrg(manifestOut.Target, repo) + scopes = append(scopes, detect.Scope{Org: orgName, Repo: repo.Repo, Root: location}) + } + return scopes +} + +func deriveOrg(target source.Target, repo source.RepoManifest) string { + switch target.Mode { + case string(config.TargetOrg): + if strings.TrimSpace(target.Value) == "" { + return "local" + } + return target.Value + case string(config.TargetRepo): + parts := strings.Split(repo.Repo, "/") + if len(parts) > 1 && strings.TrimSpace(parts[0]) != "" { + return parts[0] + } + parts = strings.Split(target.Value, "/") + if len(parts) > 1 { + return parts[0] + } + default: + return "local" + } + return "local" +} + +func loadPreviousSnapshot(statePath, baselinePath string) (*state.Snapshot, error) { + previous, err := state.Load(statePath) + if err == nil { + return &previous, nil + } + if !errors.Is(err, os.ErrNotExist) && !strings.Contains(strings.ToLower(err.Error()), "no such file") { + return nil, err + } + if strings.TrimSpace(baselinePath) != "" { + fallback, fallbackErr := state.Load(baselinePath) + if fallbackErr == nil { + return &fallback, nil + } + if !errors.Is(fallbackErr, os.ErrNotExist) && !strings.Contains(strings.ToLower(fallbackErr.Error()), "no such file") { + return nil, fallbackErr + } + } + return nil, nil +} + +func loadManifest(path string) (manifest.Manifest, error) { + loaded, err := manifest.Load(path) + if err == nil { + return loaded, nil + } + if errors.Is(err, os.ErrNotExist) || strings.Contains(strings.ToLower(err.Error()), "no such file") { + return manifest.Manifest{Version: manifest.Version, Identities: []manifest.IdentityRecord{}}, nil + } + return manifest.Manifest{}, err +} + +func buildFindingContexts(report risk.Report) map[string]agginventory.ToolContext { + out := map[string]agginventory.ToolContext{} + for _, item := range report.Ranked { + key := agginventory.KeyForFinding(item.Finding) + existing := out[key] + if item.Score > existing.RiskScore { + existing = agginventory.ToolContext{ + EndpointClass: item.EndpointClass, + DataClass: item.DataClass, + AutonomyLevel: item.AutonomyLevel, + RiskScore: item.Score, + } + } + out[key] = existing + } + return out +} + +func observedTools(findings []source.Finding, contexts map[string]agginventory.ToolContext) []lifecycle.ObservedTool { + byAgent := map[string]lifecycle.ObservedTool{} + for _, finding := range findings { + if !model.IsIdentityBearingFinding(finding) { + continue + } + org := strings.TrimSpace(finding.Org) + if org == "" { + org = "local" + } + toolID := identity.ToolID(finding.ToolType, finding.Location) + agentID := identity.AgentID(toolID, org) + ctx := contexts[agginventory.KeyForFinding(finding)] + candidate := lifecycle.ObservedTool{ + AgentID: agentID, + ToolID: toolID, + ToolType: finding.ToolType, + Org: org, + Repo: finding.Repo, + Location: finding.Location, + DataClass: ctx.DataClass, + EndpointClass: ctx.EndpointClass, + AutonomyLevel: ctx.AutonomyLevel, + RiskScore: ctx.RiskScore, + } + existing, ok := byAgent[agentID] + if !ok || candidate.RiskScore >= existing.RiskScore { + byAgent[agentID] = candidate + } + } + out := make([]lifecycle.ObservedTool, 0, len(byAgent)) + for _, item := range byAgent { + out = append(out, item) + } + sort.Slice(out, func(i, j int) bool { return out[i].AgentID < out[j].AgentID }) + return out +} + +func enrichFindingContexts(findings []source.Finding, base map[string]agginventory.ToolContext, identities map[string]manifest.IdentityRecord) map[string]agginventory.ToolContext { + out := map[string]agginventory.ToolContext{} + for key, value := range base { + out[key] = value + } + for _, finding := range findings { + org := strings.TrimSpace(finding.Org) + if org == "" { + org = "local" + } + toolID := identity.ToolID(finding.ToolType, finding.Location) + agentID := identity.AgentID(toolID, org) + record, exists := identities[agentID] + if !exists { + continue + } + key := agginventory.KeyForFinding(finding) + ctx := out[key] + ctx.ApprovalStatus = fallback(record.ApprovalState, "missing") + ctx.LifecycleState = fallback(record.Status, identity.StateDiscovered) + if ctx.DataClass == "" { + ctx.DataClass = record.DataClass + } + if ctx.EndpointClass == "" { + ctx.EndpointClass = record.EndpointClass + } + if ctx.AutonomyLevel == "" { + ctx.AutonomyLevel = record.AutonomyLevel + } + if record.RiskScore > ctx.RiskScore { + ctx.RiskScore = record.RiskScore + } + out[key] = ctx + } + return out +} + +func buildScanMethodology(manifestOut source.Manifest, findings []source.Finding, startedAt, completedAt time.Time) agginventory.MethodologySummary { + fileSet := map[string]struct{}{} + detectorCounts := map[string]int{} + for _, finding := range findings { + repo := strings.TrimSpace(finding.Repo) + location := strings.TrimSpace(finding.Location) + if repo != "" && location != "" { + fileSet[repo+"::"+location] = struct{}{} + } + detector := strings.TrimSpace(finding.Detector) + if detector == "" { + detector = "unknown" + } + detectorCounts[detector]++ + } + + detectors := make([]agginventory.MethodologyDetector, 0, len(detectorCounts)) + for detectorID, count := range detectorCounts { + detectors = append(detectors, agginventory.MethodologyDetector{ + ID: detectorID, + Version: "v1", + FindingCount: count, + }) + } + sort.Slice(detectors, func(i, j int) bool { + return detectors[i].ID < detectors[j].ID + }) + + started := startedAt.UTC().Truncate(time.Second) + completed := completedAt.UTC().Truncate(time.Second) + if completed.Before(started) { + completed = started + } + durationSeconds := math.Round(completed.Sub(started).Seconds()*100) / 100 + + return agginventory.MethodologySummary{ + WrkrVersion: wrkrVersion(), + ScanStartedAt: started.Format(time.RFC3339), + ScanCompletedAt: completed.Format(time.RFC3339), + ScanDurationSeconds: durationSeconds, + RepoCount: len(manifestOut.Repos), + FileCountProcessed: len(fileSet), + Detectors: detectors, + } +} + +func repoRootFromScopes(scopes []detect.Scope) string { + if len(scopes) == 0 { + return "" + } + sort.Slice(scopes, func(i, j int) bool { + if scopes[i].Org != scopes[j].Org { + return scopes[i].Org < scopes[j].Org + } + if scopes[i].Repo != scopes[j].Repo { + return scopes[i].Repo < scopes[j].Repo + } + return scopes[i].Root < scopes[j].Root + }) + return scopes[0].Root +} + +func fallback(value, defaultValue string) string { + if strings.TrimSpace(value) == "" { + return defaultValue + } + return value +} + +func driftTransitionCount(transitions []lifecycle.Transition) int { + count := 0 + for _, transition := range transitions { + switch strings.TrimSpace(transition.Trigger) { + case "removed", "reappeared", "modified": + count++ + } + } + return count +} + +func hasDegradedFailures(failures []source.RepoFailure) bool { + for _, failure := range failures { + if strings.Contains(strings.ToLower(strings.TrimSpace(failure.Reason)), "degraded") { + return true + } + } + return false +} diff --git a/core/cli/scan_partial_errors_test.go b/core/cli/scan_partial_errors_test.go new file mode 100644 index 0000000..8355e8b --- /dev/null +++ b/core/cli/scan_partial_errors_test.go @@ -0,0 +1,131 @@ +package cli + +import ( + "bytes" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "runtime" + "testing" +) + +func TestScanContinuesOnDetectorError(t *testing.T) { + t.Parallel() + + if runtime.GOOS == "windows" { + t.Skip("permission fixture is not portable on windows") + } + + tmp := t.TempDir() + reposPath := filepath.Join(tmp, "repos") + if err := os.MkdirAll(reposPath, 0o755); err != nil { + t.Fatalf("mkdir repos: %v", err) + } + + goodRepo := filepath.Join(reposPath, "alpha") + if err := os.MkdirAll(filepath.Join(goodRepo, ".codex"), 0o755); err != nil { + t.Fatalf("mkdir good repo: %v", err) + } + if err := os.WriteFile(filepath.Join(goodRepo, ".codex", "config.toml"), []byte("approval_policy = \"never\"\n"), 0o600); err != nil { + t.Fatalf("write codex config: %v", err) + } + + badRepo := filepath.Join(reposPath, "beta") + if err := os.MkdirAll(badRepo, 0o755); err != nil { + t.Fatalf("mkdir bad repo: %v", err) + } + if err := os.Chmod(badRepo, 0o000); err != nil { + t.Skipf("chmod 000 unsupported in current environment: %v", err) + } + defer func() { + _ = os.Chmod(badRepo, 0o755) + }() + + var out bytes.Buffer + var errOut bytes.Buffer + statePath := filepath.Join(tmp, "state.json") + code := Run([]string{"scan", "--path", reposPath, "--state", statePath, "--json"}, &out, &errOut) + if code != 0 { + t.Fatalf("scan failed unexpectedly: exit=%d stderr=%s", code, errOut.String()) + } + + var payload map[string]any + if err := json.Unmarshal(out.Bytes(), &payload); err != nil { + t.Fatalf("parse scan output: %v", err) + } + + findings, ok := payload["findings"].([]any) + if !ok || len(findings) == 0 { + t.Fatalf("expected findings to be preserved, got %v", payload["findings"]) + } + detectorErrors, ok := payload["detector_errors"].([]any) + if !ok || len(detectorErrors) == 0 { + t.Fatalf("expected detector_errors in payload, got %v", payload["detector_errors"]) + } + firstErr, ok := detectorErrors[0].(map[string]any) + if !ok { + t.Fatalf("unexpected detector error payload type: %T", detectorErrors[0]) + } + for _, key := range []string{"detector", "org", "repo", "code", "class", "message"} { + if _, present := firstErr[key]; !present { + t.Fatalf("detector error missing key %q: %v", key, firstErr) + } + } +} + +func TestScanOrgMaterializationFailureReturnsPartialResult(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/orgs/acme/repos": + _, _ = fmt.Fprint(w, `[{"full_name":"acme/a"},{"full_name":"acme/b"}]`) + case "/repos/acme/a": + _, _ = fmt.Fprint(w, `{"full_name":"acme/a","default_branch":"main"}`) + case "/repos/acme/b": + _, _ = fmt.Fprint(w, `{"full_name":"acme/b","default_branch":"main"}`) + case "/repos/acme/a/git/trees/main": + _, _ = fmt.Fprint(w, `{"tree":[]}`) + case "/repos/acme/b/git/trees/main": + w.WriteHeader(http.StatusBadGateway) + _, _ = fmt.Fprint(w, `{"message":"upstream unavailable"}`) + default: + t.Fatalf("unexpected path: %s", r.URL.Path) + } + })) + defer server.Close() + + tmp := t.TempDir() + statePath := filepath.Join(tmp, "state.json") + var out bytes.Buffer + var errOut bytes.Buffer + + code := Run([]string{ + "scan", + "--org", "acme", + "--github-api", server.URL, + "--state", statePath, + "--json", + }, &out, &errOut) + if code != 0 { + t.Fatalf("scan failed unexpectedly: exit=%d stderr=%s", code, errOut.String()) + } + + var payload map[string]any + if err := json.Unmarshal(out.Bytes(), &payload); err != nil { + t.Fatalf("parse scan output: %v", err) + } + if partial, ok := payload["partial_result"].(bool); !ok || !partial { + t.Fatalf("expected partial_result=true, got %v", payload["partial_result"]) + } + sourceErrors, ok := payload["source_errors"].([]any) + if !ok || len(sourceErrors) == 0 { + t.Fatalf("expected source_errors, got %v", payload["source_errors"]) + } + if degraded, ok := payload["source_degraded"].(bool); !ok || degraded { + t.Fatalf("expected source_degraded=false for non-degraded failure, got %v", payload["source_degraded"]) + } +} diff --git a/core/cli/scan_sarif_test.go b/core/cli/scan_sarif_test.go new file mode 100644 index 0000000..ab22749 --- /dev/null +++ b/core/cli/scan_sarif_test.go @@ -0,0 +1,60 @@ +package cli + +import ( + "bytes" + "encoding/json" + "os" + "path/filepath" + "reflect" + "testing" +) + +func TestScanSARIFModeDoesNotAlterNativeOutput(t *testing.T) { + t.Parallel() + + repoRoot := mustFindRepoRoot(t) + scanPath := filepath.Join(repoRoot, "scenarios", "wrkr", "scan-mixed-org", "repos") + tmp := t.TempDir() + stateA := filepath.Join(tmp, "state-a.json") + stateB := filepath.Join(tmp, "state-b.json") + sarifPath := filepath.Join(tmp, "wrkr.sarif") + + var outA bytes.Buffer + var errA bytes.Buffer + if code := Run([]string{"scan", "--path", scanPath, "--state", stateA, "--json"}, &outA, &errA); code != 0 { + t.Fatalf("baseline scan failed: code=%d stderr=%s", code, errA.String()) + } + var payloadA map[string]any + if err := json.Unmarshal(outA.Bytes(), &payloadA); err != nil { + t.Fatalf("parse baseline scan payload: %v", err) + } + + var outB bytes.Buffer + var errB bytes.Buffer + if code := Run([]string{"scan", "--path", scanPath, "--state", stateB, "--sarif", "--sarif-path", sarifPath, "--json"}, &outB, &errB); code != 0 { + t.Fatalf("sarif scan failed: code=%d stderr=%s", code, errB.String()) + } + var payloadB map[string]any + if err := json.Unmarshal(outB.Bytes(), &payloadB); err != nil { + t.Fatalf("parse sarif scan payload: %v", err) + } + + if _, present := payloadB["sarif"]; !present { + t.Fatalf("expected sarif metadata in payload, got %v", payloadB) + } + if !reflect.DeepEqual(payloadA["findings"], payloadB["findings"]) { + t.Fatal("expected SARIF mode to preserve native findings output") + } + + data, err := os.ReadFile(sarifPath) + if err != nil { + t.Fatalf("read sarif output: %v", err) + } + var sarifEnvelope map[string]any + if err := json.Unmarshal(data, &sarifEnvelope); err != nil { + t.Fatalf("parse sarif output json: %v", err) + } + if sarifEnvelope["version"] != "2.1.0" { + t.Fatalf("expected sarif version 2.1.0, got %v", sarifEnvelope["version"]) + } +} diff --git a/core/cli/scan_timeout_test.go b/core/cli/scan_timeout_test.go new file mode 100644 index 0000000..1f68756 --- /dev/null +++ b/core/cli/scan_timeout_test.go @@ -0,0 +1,132 @@ +package cli + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "path/filepath" + "testing" + "time" +) + +func TestScanTimeoutDeadlineExceeded(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(250 * time.Millisecond) + _, _ = w.Write([]byte(`{"full_name":"acme/backend","default_branch":"main"}`)) + })) + defer server.Close() + + tmp := t.TempDir() + statePath := filepath.Join(tmp, "state.json") + var out bytes.Buffer + var errOut bytes.Buffer + + code := Run([]string{ + "scan", + "--repo", "acme/backend", + "--github-api", server.URL, + "--state", statePath, + "--timeout", "20ms", + "--json", + }, &out, &errOut) + if code != 1 { + t.Fatalf("expected exit 1 for timeout, got %d (stderr=%s)", code, errOut.String()) + } + if out.Len() != 0 { + t.Fatalf("expected no stdout on timeout, got %q", out.String()) + } + assertErrorCode(t, errOut.Bytes(), "scan_timeout") +} + +func TestScanCancellationStopsAcquisitionAndDetection(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`{"full_name":"acme/backend","default_branch":"main"}`)) + })) + defer server.Close() + + tmp := t.TempDir() + statePath := filepath.Join(tmp, "state.json") + var out bytes.Buffer + var errOut bytes.Buffer + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + code := RunWithContext(ctx, []string{ + "scan", + "--repo", "acme/backend", + "--github-api", server.URL, + "--state", statePath, + "--json", + }, &out, &errOut) + if code != 1 { + t.Fatalf("expected exit 1 for canceled scan, got %d (stderr=%s)", code, errOut.String()) + } + if out.Len() != 0 { + t.Fatalf("expected no stdout on canceled scan, got %q", out.String()) + } + assertErrorCode(t, errOut.Bytes(), "scan_canceled") +} + +func TestScanOrgTimeoutDuringAcquireReturnsTimeoutError(t *testing.T) { + t.Parallel() + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/orgs/acme/repos": + _, _ = fmt.Fprint(w, `[{"full_name":"acme/a"}]`) + case "/repos/acme/a": + time.Sleep(250 * time.Millisecond) + _, _ = fmt.Fprint(w, `{"full_name":"acme/a","default_branch":"main"}`) + default: + t.Fatalf("unexpected path: %s", r.URL.Path) + } + })) + defer server.Close() + + tmp := t.TempDir() + statePath := filepath.Join(tmp, "state.json") + var out bytes.Buffer + var errOut bytes.Buffer + + code := Run([]string{ + "scan", + "--org", "acme", + "--github-api", server.URL, + "--state", statePath, + "--timeout", "20ms", + "--json", + }, &out, &errOut) + if code != 1 { + t.Fatalf("expected exit 1 for timeout, got %d (stderr=%s)", code, errOut.String()) + } + if out.Len() != 0 { + t.Fatalf("expected no stdout on timeout, got %q", out.String()) + } + assertErrorCode(t, errOut.Bytes(), "scan_timeout") +} + +func assertErrorCode(t *testing.T, payload []byte, expected string) { + t.Helper() + + var envelope map[string]any + if err := json.Unmarshal(payload, &envelope); err != nil { + t.Fatalf("parse error payload: %v (%q)", err, string(payload)) + } + errorPayload, ok := envelope["error"].(map[string]any) + if !ok { + t.Fatalf("expected error object in payload, got %v", envelope) + } + if errorPayload["code"] != expected { + t.Fatalf("expected error code %q, got %v", expected, errorPayload["code"]) + } + if errorPayload["exit_code"] != float64(1) { + t.Fatalf("expected exit_code=1, got %v", errorPayload["exit_code"]) + } +} diff --git a/core/cli/version.go b/core/cli/version.go new file mode 100644 index 0000000..73b8092 --- /dev/null +++ b/core/cli/version.go @@ -0,0 +1,55 @@ +package cli + +import ( + "encoding/json" + "flag" + "fmt" + "io" + "runtime/debug" + "strings" +) + +func runVersion(args []string, stdout io.Writer, stderr io.Writer) int { + jsonRequested := wantsJSONOutput(args) + + fs := flag.NewFlagSet("version", flag.ContinueOnError) + if jsonRequested { + fs.SetOutput(io.Discard) + } else { + fs.SetOutput(stderr) + } + + jsonOut := fs.Bool("json", false, "emit machine-readable output") + if code, handled := parseFlags(fs, args, stderr, jsonRequested || *jsonOut); handled { + return code + } + if fs.NArg() != 0 { + return emitError(stderr, jsonRequested || *jsonOut, "invalid_input", fmt.Sprintf("unsupported argument %q", fs.Arg(0)), exitInvalidInput) + } + return emitVersion(stdout, jsonRequested || *jsonOut, *jsonOut) +} + +func emitVersion(stdout io.Writer, jsonRequested bool, jsonOut bool) int { + version := wrkrVersion() + if jsonRequested || jsonOut { + _ = json.NewEncoder(stdout).Encode(map[string]any{ + "status": "ok", + "version": version, + }) + return exitSuccess + } + _, _ = fmt.Fprintf(stdout, "wrkr %s\n", version) + return exitSuccess +} + +func wrkrVersion() string { + info, ok := debug.ReadBuildInfo() + if !ok { + return "devel" + } + version := strings.TrimSpace(info.Main.Version) + if version == "" || version == "(devel)" { + return "devel" + } + return version +} diff --git a/core/cli/version_test.go b/core/cli/version_test.go new file mode 100644 index 0000000..323947e --- /dev/null +++ b/core/cli/version_test.go @@ -0,0 +1,66 @@ +package cli + +import ( + "bytes" + "encoding/json" + "strings" + "testing" +) + +func TestVersionCommandHumanAndJSON(t *testing.T) { + t.Parallel() + + var humanOut bytes.Buffer + var humanErr bytes.Buffer + if code := Run([]string{"version"}, &humanOut, &humanErr); code != 0 { + t.Fatalf("version command failed: code=%d stderr=%q", code, humanErr.String()) + } + if !strings.HasPrefix(strings.TrimSpace(humanOut.String()), "wrkr ") { + t.Fatalf("unexpected human version output: %q", humanOut.String()) + } + + var jsonOut bytes.Buffer + var jsonErr bytes.Buffer + if code := Run([]string{"version", "--json"}, &jsonOut, &jsonErr); code != 0 { + t.Fatalf("version --json failed: code=%d stderr=%q", code, jsonErr.String()) + } + var payload map[string]any + if err := json.Unmarshal(jsonOut.Bytes(), &payload); err != nil { + t.Fatalf("parse version json: %v", err) + } + if payload["status"] != "ok" { + t.Fatalf("unexpected status: %v", payload["status"]) + } + if strings.TrimSpace(payload["version"].(string)) == "" { + t.Fatalf("expected non-empty version in payload: %v", payload) + } +} + +func TestRootVersionFlag(t *testing.T) { + t.Parallel() + + var out bytes.Buffer + var errOut bytes.Buffer + if code := Run([]string{"--version"}, &out, &errOut); code != 0 { + t.Fatalf("--version failed: code=%d stderr=%q", code, errOut.String()) + } + if !strings.HasPrefix(strings.TrimSpace(out.String()), "wrkr ") { + t.Fatalf("unexpected --version output: %q", out.String()) + } + + out.Reset() + errOut.Reset() + if code := Run([]string{"--version", "--json"}, &out, &errOut); code != 0 { + t.Fatalf("--version --json failed: code=%d stderr=%q", code, errOut.String()) + } + var payload map[string]any + if err := json.Unmarshal(out.Bytes(), &payload); err != nil { + t.Fatalf("parse --version --json output: %v", err) + } + if payload["status"] != "ok" { + t.Fatalf("unexpected status: %v", payload["status"]) + } + if strings.TrimSpace(payload["version"].(string)) == "" { + t.Fatalf("expected non-empty version in payload: %v", payload) + } +} diff --git a/core/detect/a2a/detector.go b/core/detect/a2a/detector.go index 036830d..f6cfd8f 100644 --- a/core/detect/a2a/detector.go +++ b/core/detect/a2a/detector.go @@ -3,7 +3,6 @@ package a2a import ( "context" "fmt" - "os" "path/filepath" "sort" "strings" @@ -30,9 +29,8 @@ type agentCard struct { } func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } policy, _, policyErr := mcpgateway.LoadPolicy(scope.Root) diff --git a/core/detect/ciagent/detector.go b/core/detect/ciagent/detector.go index fe2a4cc..ea0094f 100644 --- a/core/detect/ciagent/detector.go +++ b/core/detect/ciagent/detector.go @@ -21,9 +21,8 @@ func New() Detector { return Detector{} } func (Detector) ID() string { return detectorID } func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } files := make([]string, 0) diff --git a/core/detect/claude/detector.go b/core/detect/claude/detector.go index 94155cc..ac8db01 100644 --- a/core/detect/claude/detector.go +++ b/core/detect/claude/detector.go @@ -3,7 +3,6 @@ package claude import ( "context" "fmt" - "os" "strings" "github.com/Clyra-AI/wrkr/core/detect" @@ -31,9 +30,8 @@ type mcpV1 struct { } func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } findings := make([]model.Finding, 0) diff --git a/core/detect/codex/detector.go b/core/detect/codex/detector.go index 6487418..6820530 100644 --- a/core/detect/codex/detector.go +++ b/core/detect/codex/detector.go @@ -3,7 +3,6 @@ package codex import ( "context" "fmt" - "os" "strings" "github.com/Clyra-AI/wrkr/core/detect" @@ -25,9 +24,8 @@ type configModel struct { } func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } findings := make([]model.Finding, 0) diff --git a/core/detect/compiledaction/detector.go b/core/detect/compiledaction/detector.go index 68f6e36..64691b7 100644 --- a/core/detect/compiledaction/detector.go +++ b/core/detect/compiledaction/detector.go @@ -32,9 +32,8 @@ type actionDoc struct { } func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } files, walkErr := detect.WalkFiles(scope.Root) diff --git a/core/detect/copilot/detector.go b/core/detect/copilot/detector.go index 7559ed3..a4f41a1 100644 --- a/core/detect/copilot/detector.go +++ b/core/detect/copilot/detector.go @@ -3,7 +3,6 @@ package copilot import ( "context" "fmt" - "os" "strings" "github.com/Clyra-AI/wrkr/core/detect" @@ -25,9 +24,8 @@ type mcpConfig struct { } func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } findings := make([]model.Finding, 0) diff --git a/core/detect/cursor/detector.go b/core/detect/cursor/detector.go index 3eeb6b4..6e50568 100644 --- a/core/detect/cursor/detector.go +++ b/core/detect/cursor/detector.go @@ -34,9 +34,8 @@ type ruleFrontmatter struct { } func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } findings := make([]model.Finding, 0) diff --git a/core/detect/defaults/defaults.go b/core/detect/defaults/defaults.go index c965dc4..db044c5 100644 --- a/core/detect/defaults/defaults.go +++ b/core/detect/defaults/defaults.go @@ -10,6 +10,7 @@ import ( "github.com/Clyra-AI/wrkr/core/detect/copilot" "github.com/Clyra-AI/wrkr/core/detect/cursor" "github.com/Clyra-AI/wrkr/core/detect/dependency" + "github.com/Clyra-AI/wrkr/core/detect/extension" "github.com/Clyra-AI/wrkr/core/detect/gaitpolicy" "github.com/Clyra-AI/wrkr/core/detect/mcp" "github.com/Clyra-AI/wrkr/core/detect/mcpgateway" @@ -34,6 +35,7 @@ func Registry() (*detect.Registry, error) { skills.New(), gaitpolicy.New(), dependency.New(), + extension.New(), secrets.New(), compiledaction.New(), ciagent.New(), diff --git a/core/detect/defaults/defaults_test.go b/core/detect/defaults/defaults_test.go index 9418a1b..b74e3fc 100644 --- a/core/detect/defaults/defaults_test.go +++ b/core/detect/defaults/defaults_test.go @@ -31,10 +31,14 @@ func TestRegistryRunsCrossDetectorCoverage(t *testing.T) { if err != nil { t.Fatalf("create detector registry: %v", err) } - findings, err := registry.Run(context.Background(), scopes, detect.Options{}) + result, err := registry.Run(context.Background(), scopes, detect.Options{}) if err != nil { t.Fatalf("run detector registry: %v", err) } + if len(result.DetectorErrors) != 0 { + t.Fatalf("expected no detector errors from fixtures, got %+v", result.DetectorErrors) + } + findings := result.Findings if len(findings) == 0 { t.Fatal("expected detector findings from mixed-org fixtures") } diff --git a/core/detect/dependency/detector.go b/core/detect/dependency/detector.go index ceae587..f79e2e1 100644 --- a/core/detect/dependency/detector.go +++ b/core/detect/dependency/detector.go @@ -26,9 +26,8 @@ func (Detector) ID() string { return detectorID } var aiKeywords = []string{"openai", "anthropic", "langchain", "llama", "cohere", "mistral", "gemini", "agent", "copilot"} func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } findings := make([]model.Finding, 0) diff --git a/core/detect/detect.go b/core/detect/detect.go index 48b1bf3..dd74ce9 100644 --- a/core/detect/detect.go +++ b/core/detect/detect.go @@ -2,7 +2,9 @@ package detect import ( "context" + "errors" "fmt" + "os" "sort" "strings" @@ -32,6 +34,22 @@ type Registry struct { detectors map[string]Detector } +// DetectorError captures a non-fatal detector failure tied to one scope. +type DetectorError struct { + Detector string `json:"detector"` + Org string `json:"org"` + Repo string `json:"repo"` + Code string `json:"code"` + Class string `json:"class"` + Message string `json:"message"` +} + +// RunResult contains deterministic findings and non-fatal detector errors. +type RunResult struct { + Findings []model.Finding `json:"findings"` + DetectorErrors []DetectorError `json:"detector_errors,omitempty"` +} + func NewRegistry() *Registry { return &Registry{detectors: map[string]Detector{}} } @@ -51,9 +69,9 @@ func (r *Registry) Register(detector Detector) error { return nil } -func (r *Registry) Run(ctx context.Context, scopes []Scope, options Options) ([]model.Finding, error) { +func (r *Registry) Run(ctx context.Context, scopes []Scope, options Options) (RunResult, error) { if len(r.detectors) == 0 || len(scopes) == 0 { - return nil, nil + return RunResult{}, nil } sortedScopes := append([]Scope(nil), scopes...) @@ -73,16 +91,99 @@ func (r *Registry) Run(ctx context.Context, scopes []Scope, options Options) ([] } sort.Strings(ids) - findings := make([]model.Finding, 0) + result := RunResult{ + Findings: make([]model.Finding, 0), + DetectorErrors: make([]DetectorError, 0), + } for _, scope := range sortedScopes { + select { + case <-ctx.Done(): + return result, ctx.Err() + default: + } + if rootErr := ValidateScopeRoot(scope.Root); rootErr != nil { + result.DetectorErrors = append(result.DetectorErrors, buildDetectorError(scope, "scope", rootErr)) + continue + } for _, id := range ids { + select { + case <-ctx.Done(): + return result, ctx.Err() + default: + } items, err := r.detectors[id].Detect(ctx, scope, options) if err != nil { - return nil, fmt.Errorf("run detector %s: %w", id, err) + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return result, err + } + result.DetectorErrors = append(result.DetectorErrors, buildDetectorError(scope, id, err)) + continue } - findings = append(findings, items...) + result.Findings = append(result.Findings, items...) + } + } + model.SortFindings(result.Findings) + sort.Slice(result.DetectorErrors, func(i, j int) bool { + a := result.DetectorErrors[i] + b := result.DetectorErrors[j] + if a.Org != b.Org { + return a.Org < b.Org + } + if a.Repo != b.Repo { + return a.Repo < b.Repo + } + if a.Detector != b.Detector { + return a.Detector < b.Detector } + if a.Code != b.Code { + return a.Code < b.Code + } + return a.Message < b.Message + }) + if len(result.Findings) == 0 { + result.Findings = nil + } + if len(result.DetectorErrors) == 0 { + result.DetectorErrors = nil + } + return result, nil +} + +func buildDetectorError(scope Scope, detector string, err error) DetectorError { + code, class := classifyDetectorError(err) + return DetectorError{ + Detector: strings.TrimSpace(detector), + Org: strings.TrimSpace(scope.Org), + Repo: strings.TrimSpace(scope.Repo), + Code: code, + Class: class, + Message: strings.TrimSpace(err.Error()), + } +} + +func classifyDetectorError(err error) (string, string) { + switch { + case err == nil: + return "detector_error", "runtime" + case errors.Is(err, os.ErrPermission): + return "permission_denied", "filesystem" + case errors.Is(err, os.ErrNotExist): + return "path_not_found", "filesystem" + } + + lower := strings.ToLower(strings.TrimSpace(err.Error())) + switch { + case strings.Contains(lower, "permission denied"): + return "permission_denied", "filesystem" + case strings.Contains(lower, "no such file") || strings.Contains(lower, "not found"): + return "path_not_found", "filesystem" + case strings.Contains(lower, "invalid extension descriptor"): + return "invalid_extension_descriptor", "extension" + case strings.Contains(lower, "not a directory"): + return "invalid_scope", "filesystem" + case strings.Contains(lower, "i/o error") || strings.Contains(lower, "input/output"): + return "io_error", "filesystem" + default: + return "detector_error", "runtime" } - model.SortFindings(findings) - return findings, nil } diff --git a/core/detect/detect_test.go b/core/detect/detect_test.go index 683bb4b..76be4df 100644 --- a/core/detect/detect_test.go +++ b/core/detect/detect_test.go @@ -2,37 +2,52 @@ package detect import ( "context" + "errors" + "fmt" + "os" "testing" "github.com/Clyra-AI/wrkr/core/model" ) type fakeDetector struct { - id string - findings []model.Finding + id string + detectF func(scope Scope) ([]model.Finding, error) } func (f fakeDetector) ID() string { return f.id } -func (f fakeDetector) Detect(_ context.Context, _ Scope, _ Options) ([]model.Finding, error) { - return append([]model.Finding(nil), f.findings...), nil +func (f fakeDetector) Detect(_ context.Context, scope Scope, _ Options) ([]model.Finding, error) { + if f.detectF == nil { + return nil, nil + } + return f.detectF(scope) } func TestRegistryRunDeterministicOrder(t *testing.T) { t.Parallel() + root := t.TempDir() registry := NewRegistry() - if err := registry.Register(fakeDetector{id: "b", findings: []model.Finding{{Severity: model.SeverityLow, FindingType: "b", ToolType: "b", Location: "2", Org: "o"}}}); err != nil { + if err := registry.Register(fakeDetector{id: "b", detectF: func(scope Scope) ([]model.Finding, error) { + return []model.Finding{{Severity: model.SeverityLow, FindingType: "b", ToolType: "b", Location: "2", Org: scope.Org, Repo: scope.Repo}}, nil + }}); err != nil { t.Fatalf("register b: %v", err) } - if err := registry.Register(fakeDetector{id: "a", findings: []model.Finding{{Severity: model.SeverityCritical, FindingType: "a", ToolType: "a", Location: "1", Org: "o"}}}); err != nil { + if err := registry.Register(fakeDetector{id: "a", detectF: func(scope Scope) ([]model.Finding, error) { + return []model.Finding{{Severity: model.SeverityCritical, FindingType: "a", ToolType: "a", Location: "1", Org: scope.Org, Repo: scope.Repo}}, nil + }}); err != nil { t.Fatalf("register a: %v", err) } - findings, err := registry.Run(context.Background(), []Scope{{Org: "o", Repo: "r", Root: "/tmp"}}, Options{}) + result, err := registry.Run(context.Background(), []Scope{{Org: "o", Repo: "r", Root: root}}, Options{}) if err != nil { t.Fatalf("run registry: %v", err) } + if len(result.DetectorErrors) != 0 { + t.Fatalf("expected no detector errors, got %+v", result.DetectorErrors) + } + findings := result.Findings if len(findings) != 2 { t.Fatalf("expected 2 findings, got %d", len(findings)) } @@ -41,6 +56,105 @@ func TestRegistryRunDeterministicOrder(t *testing.T) { } } +func TestRegistryContinuesOnDetectorError(t *testing.T) { + t.Parallel() + + root := t.TempDir() + registry := NewRegistry() + if err := registry.Register(fakeDetector{id: "good", detectF: func(scope Scope) ([]model.Finding, error) { + return []model.Finding{{ + FindingType: "tool_config", + Severity: model.SeverityLow, + ToolType: "codex", + Location: ".codex/config.toml", + Org: scope.Org, + Repo: scope.Repo, + Detector: "good", + }}, nil + }}); err != nil { + t.Fatalf("register good: %v", err) + } + if err := registry.Register(fakeDetector{id: "bad", detectF: func(_ Scope) ([]model.Finding, error) { + return nil, fmt.Errorf("read file: %w", os.ErrPermission) + }}); err != nil { + t.Fatalf("register bad: %v", err) + } + + result, err := registry.Run(context.Background(), []Scope{{Org: "acme", Repo: "backend", Root: root}}, Options{}) + if err != nil { + t.Fatalf("run registry: %v", err) + } + if len(result.Findings) != 1 { + t.Fatalf("expected findings to be preserved, got %+v", result.Findings) + } + if len(result.DetectorErrors) != 1 { + t.Fatalf("expected one detector error, got %+v", result.DetectorErrors) + } + detectorErr := result.DetectorErrors[0] + if detectorErr.Detector != "bad" || detectorErr.Repo != "backend" || detectorErr.Org != "acme" { + t.Fatalf("unexpected detector error context: %+v", detectorErr) + } + if detectorErr.Code != "permission_denied" || detectorErr.Class != "filesystem" { + t.Fatalf("unexpected detector error classification: %+v", detectorErr) + } +} + +func TestRegistryScopeValidationIsSurfacedAndDeterministic(t *testing.T) { + t.Parallel() + + root := t.TempDir() + fileScope := root + "/not-a-dir.txt" + if err := os.WriteFile(fileScope, []byte("x"), 0o600); err != nil { + t.Fatalf("write file scope: %v", err) + } + missingScope := root + "/missing" + + registry := NewRegistry() + if err := registry.Register(fakeDetector{id: "noop", detectF: func(_ Scope) ([]model.Finding, error) { return nil, nil }}); err != nil { + t.Fatalf("register noop: %v", err) + } + + result, err := registry.Run(context.Background(), []Scope{ + {Org: "acme", Repo: "repo-b", Root: missingScope}, + {Org: "acme", Repo: "repo-a", Root: fileScope}, + }, Options{}) + if err != nil { + t.Fatalf("run registry: %v", err) + } + if len(result.Findings) != 0 { + t.Fatalf("expected no findings, got %+v", result.Findings) + } + if len(result.DetectorErrors) != 2 { + t.Fatalf("expected two scope errors, got %+v", result.DetectorErrors) + } + if result.DetectorErrors[0].Repo != "repo-a" || result.DetectorErrors[0].Detector != "scope" { + t.Fatalf("expected deterministic sorting by repo/detector, got %+v", result.DetectorErrors) + } + if result.DetectorErrors[0].Code != "invalid_scope" && result.DetectorErrors[0].Code != "detector_error" { + t.Fatalf("expected invalid scope classification, got %+v", result.DetectorErrors[0]) + } + if result.DetectorErrors[1].Code != "path_not_found" { + t.Fatalf("expected missing scope classification, got %+v", result.DetectorErrors[1]) + } +} + +func TestRegistryReturnsContextCancellation(t *testing.T) { + t.Parallel() + + root := t.TempDir() + registry := NewRegistry() + if err := registry.Register(fakeDetector{id: "cancel", detectF: func(_ Scope) ([]model.Finding, error) { + return nil, context.Canceled + }}); err != nil { + t.Fatalf("register cancel detector: %v", err) + } + + _, err := registry.Run(context.Background(), []Scope{{Org: "o", Repo: "r", Root: root}}, Options{}) + if !errors.Is(err, context.Canceled) { + t.Fatalf("expected context cancellation, got %v", err) + } +} + func TestRegistryRejectsDuplicateIDs(t *testing.T) { t.Parallel() diff --git a/core/detect/extension/detector.go b/core/detect/extension/detector.go new file mode 100644 index 0000000..123a92d --- /dev/null +++ b/core/detect/extension/detector.go @@ -0,0 +1,132 @@ +package extension + +import ( + "context" + "errors" + "fmt" + "regexp" + "sort" + "strings" + + "github.com/Clyra-AI/wrkr/core/detect" + "github.com/Clyra-AI/wrkr/core/model" +) + +const ( + detectorID = "extension" + descriptorFilePath = ".wrkr/detectors/extensions.json" + descriptorVersion = "v1" +) + +var descriptorIDRE = regexp.MustCompile(`^[a-zA-Z0-9._-]+$`) + +type Detector struct{} + +func New() Detector { return Detector{} } + +func (Detector) ID() string { return detectorID } + +type descriptorEnvelope struct { + Version string `json:"version"` + Detectors []detectorDescriptorV1 `json:"detectors"` +} + +type detectorDescriptorV1 struct { + ID string `json:"id"` + FindingType string `json:"finding_type"` + ToolType string `json:"tool_type"` + Location string `json:"location"` + Severity string `json:"severity"` + Remediation string `json:"remediation"` + Permissions []string `json:"permissions"` + Evidence []model.Evidence `json:"evidence"` +} + +func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err + } + if !detect.FileExists(scope.Root, descriptorFilePath) { + return nil, nil + } + + var envelope descriptorEnvelope + if parseErr := detect.ParseJSONFile(detectorID, scope.Root, descriptorFilePath, &envelope); parseErr != nil { + return nil, fmt.Errorf("invalid extension descriptor parse_error: %s", parseErr.Message) + } + if strings.TrimSpace(envelope.Version) != descriptorVersion { + return nil, fmt.Errorf("invalid extension descriptor version: expected %q", descriptorVersion) + } + if len(envelope.Detectors) == 0 { + return nil, nil + } + + descriptors := append([]detectorDescriptorV1(nil), envelope.Detectors...) + sort.Slice(descriptors, func(i, j int) bool { + return strings.TrimSpace(descriptors[i].ID) < strings.TrimSpace(descriptors[j].ID) + }) + + seen := map[string]struct{}{} + findings := make([]model.Finding, 0, len(descriptors)) + for _, descriptor := range descriptors { + id := strings.TrimSpace(descriptor.ID) + if _, exists := seen[id]; exists { + return nil, fmt.Errorf("invalid extension descriptor %q: duplicate id", id) + } + seen[id] = struct{}{} + + if validateErr := validateDescriptor(descriptor); validateErr != nil { + return nil, fmt.Errorf("invalid extension descriptor %q: %w", id, validateErr) + } + + findings = append(findings, model.Finding{ + FindingType: descriptor.FindingType, + Severity: strings.ToLower(strings.TrimSpace(descriptor.Severity)), + ToolType: descriptor.ToolType, + Location: descriptor.Location, + Repo: scope.Repo, + Org: fallbackOrg(scope.Org), + Detector: detectorID, + Permissions: descriptor.Permissions, + Remediation: descriptor.Remediation, + Evidence: append([]model.Evidence{ + {Key: "extension_id", Value: id}, + {Key: "descriptor_version", Value: descriptorVersion}, + }, descriptor.Evidence...), + }) + } + model.SortFindings(findings) + return findings, nil +} + +func validateDescriptor(descriptor detectorDescriptorV1) error { + id := strings.TrimSpace(descriptor.ID) + if id == "" { + return errors.New("id is required") + } + if !descriptorIDRE.MatchString(id) { + return errors.New("id must match [a-zA-Z0-9._-]+") + } + if strings.TrimSpace(descriptor.FindingType) == "" { + return errors.New("finding_type is required") + } + if strings.TrimSpace(descriptor.ToolType) == "" { + return errors.New("tool_type is required") + } + if strings.TrimSpace(descriptor.Location) == "" { + return errors.New("location is required") + } + switch strings.ToLower(strings.TrimSpace(descriptor.Severity)) { + case model.SeverityCritical, model.SeverityHigh, model.SeverityMedium, model.SeverityLow, model.SeverityInfo: + default: + return errors.New("severity must be one of critical|high|medium|low|info") + } + return nil +} + +func fallbackOrg(org string) string { + if strings.TrimSpace(org) == "" { + return "local" + } + return strings.TrimSpace(org) +} diff --git a/core/detect/extension/detector_test.go b/core/detect/extension/detector_test.go new file mode 100644 index 0000000..be72332 --- /dev/null +++ b/core/detect/extension/detector_test.go @@ -0,0 +1,119 @@ +package extension + +import ( + "context" + "os" + "path/filepath" + "reflect" + "strings" + "testing" + + "github.com/Clyra-AI/wrkr/core/detect" +) + +func TestExtensionRegistryDeterministicOrdering(t *testing.T) { + t.Parallel() + + root := t.TempDir() + descriptorDir := filepath.Join(root, ".wrkr", "detectors") + if err := os.MkdirAll(descriptorDir, 0o755); err != nil { + t.Fatalf("mkdir descriptor dir: %v", err) + } + payload := []byte(`{ + "version": "v1", + "detectors": [ + {"id":"zeta","finding_type":"custom_zeta","tool_type":"custom_detector","location":".custom/zeta.yaml","severity":"low"}, + {"id":"alpha","finding_type":"custom_alpha","tool_type":"custom_detector","location":".custom/alpha.yaml","severity":"low"} + ] +}`) + if err := os.WriteFile(filepath.Join(descriptorDir, "extensions.json"), payload, 0o600); err != nil { + t.Fatalf("write descriptors: %v", err) + } + + scope := detect.Scope{Org: "acme", Repo: "backend", Root: root} + first, err := New().Detect(context.Background(), scope, detect.Options{}) + if err != nil { + t.Fatalf("first detect: %v", err) + } + second, err := New().Detect(context.Background(), scope, detect.Options{}) + if err != nil { + t.Fatalf("second detect: %v", err) + } + if !reflect.DeepEqual(first, second) { + t.Fatalf("expected deterministic output across runs\nfirst=%+v\nsecond=%+v", first, second) + } + if len(first) != 2 { + t.Fatalf("expected 2 extension findings, got %d", len(first)) + } + if first[0].FindingType != "custom_alpha" || first[1].FindingType != "custom_zeta" { + t.Fatalf("expected deterministic finding ordering by normalized sort, got %+v", first) + } +} + +func TestInvalidExtensionDescriptorFailsClosed(t *testing.T) { + t.Parallel() + + root := t.TempDir() + descriptorDir := filepath.Join(root, ".wrkr", "detectors") + if err := os.MkdirAll(descriptorDir, 0o755); err != nil { + t.Fatalf("mkdir descriptor dir: %v", err) + } + payload := []byte(`{ + "version": "v1", + "detectors": [ + {"id":"", "finding_type":"custom", "tool_type":"custom", "location":".custom/policy.yaml", "severity":"low"} + ] +}`) + if err := os.WriteFile(filepath.Join(descriptorDir, "extensions.json"), payload, 0o600); err != nil { + t.Fatalf("write descriptors: %v", err) + } + + scope := detect.Scope{Org: "acme", Repo: "backend", Root: root} + _, err := New().Detect(context.Background(), scope, detect.Options{}) + if err == nil { + t.Fatal("expected invalid extension descriptor to fail closed") + } + if got := err.Error(); got == "" || !containsAll(got, "invalid extension descriptor", "id is required") { + t.Fatalf("unexpected error message: %v", err) + } +} + +func TestExtensionRegistryNormalizesSeverity(t *testing.T) { + t.Parallel() + + root := t.TempDir() + descriptorDir := filepath.Join(root, ".wrkr", "detectors") + if err := os.MkdirAll(descriptorDir, 0o755); err != nil { + t.Fatalf("mkdir descriptor dir: %v", err) + } + payload := []byte(`{ + "version": "v1", + "detectors": [ + {"id":"alpha","finding_type":"custom_alpha","tool_type":"custom_detector","location":".custom/alpha.yaml","severity":"HIGH"} + ] +}`) + if err := os.WriteFile(filepath.Join(descriptorDir, "extensions.json"), payload, 0o600); err != nil { + t.Fatalf("write descriptors: %v", err) + } + + scope := detect.Scope{Org: "acme", Repo: "backend", Root: root} + findings, err := New().Detect(context.Background(), scope, detect.Options{}) + if err != nil { + t.Fatalf("detect: %v", err) + } + if len(findings) != 1 { + t.Fatalf("expected 1 finding, got %d", len(findings)) + } + if findings[0].Severity != "high" { + t.Fatalf("expected normalized severity high, got %q", findings[0].Severity) + } +} + +func containsAll(value string, fragments ...string) bool { + for _, fragment := range fragments { + if !strings.Contains(value, fragment) { + return false + } + } + return true +} diff --git a/core/detect/gaitpolicy/detector.go b/core/detect/gaitpolicy/detector.go index ed89687..90b32da 100644 --- a/core/detect/gaitpolicy/detector.go +++ b/core/detect/gaitpolicy/detector.go @@ -22,9 +22,8 @@ func New() Detector { return Detector{} } func (Detector) ID() string { return detectorID } func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } blocked, files, err := LoadBlockedTools(scope.Root) diff --git a/core/detect/mcp/detector.go b/core/detect/mcp/detector.go index 820d9dd..d977415 100644 --- a/core/detect/mcp/detector.go +++ b/core/detect/mcp/detector.go @@ -42,9 +42,8 @@ var pinRE = regexp.MustCompile(`@[0-9]+`) var packageRE = regexp.MustCompile(`(@[A-Za-z0-9._-]+/[A-Za-z0-9._-]+|[A-Za-z0-9._-]+)(?:@([A-Za-z0-9._-]+))?`) func (Detector) Detect(ctx context.Context, scope detect.Scope, options detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } var enrichService enrich.Service diff --git a/core/detect/mcpgateway/detector.go b/core/detect/mcpgateway/detector.go index ab4e451..6c13335 100644 --- a/core/detect/mcpgateway/detector.go +++ b/core/detect/mcpgateway/detector.go @@ -3,7 +3,6 @@ package mcpgateway import ( "context" "fmt" - "os" "path/filepath" "sort" "strings" @@ -77,9 +76,8 @@ func New() Detector { return Detector{} } func (Detector) ID() string { return detectorID } func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } policy, parseErrors, err := LoadPolicy(scope.Root) diff --git a/core/detect/promptchannel/detector.go b/core/detect/promptchannel/detector.go index 2d1862d..8935010 100644 --- a/core/detect/promptchannel/detector.go +++ b/core/detect/promptchannel/detector.go @@ -37,9 +37,8 @@ func New() Detector { return Detector{} } func (Detector) ID() string { return detectorID } func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } files, err := detect.WalkFiles(scope.Root) diff --git a/core/detect/scope.go b/core/detect/scope.go new file mode 100644 index 0000000..e64a887 --- /dev/null +++ b/core/detect/scope.go @@ -0,0 +1,23 @@ +package detect + +import ( + "fmt" + "os" + "strings" +) + +// ValidateScopeRoot ensures detector scope roots are valid directories. +func ValidateScopeRoot(root string) error { + clean := strings.TrimSpace(root) + if clean == "" { + return fmt.Errorf("scope root is required") + } + info, err := os.Stat(clean) + if err != nil { + return fmt.Errorf("stat scope root: %w", err) + } + if !info.IsDir() { + return fmt.Errorf("scope root is not a directory: %s", clean) + } + return nil +} diff --git a/core/detect/secrets/detector.go b/core/detect/secrets/detector.go index 7f62d3c..30ca529 100644 --- a/core/detect/secrets/detector.go +++ b/core/detect/secrets/detector.go @@ -24,9 +24,8 @@ func (Detector) ID() string { return detectorID } var workflowSecretRE = regexp.MustCompile(`secrets\.([A-Za-z0-9_]+)`) func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } findings := make([]model.Finding, 0) diff --git a/core/detect/skills/detector.go b/core/detect/skills/detector.go index 557ae75..b3370ba 100644 --- a/core/detect/skills/detector.go +++ b/core/detect/skills/detector.go @@ -28,9 +28,8 @@ type frontmatter struct { } func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } files, walkErr := detect.WalkFiles(scope.Root) diff --git a/core/detect/webmcp/detector.go b/core/detect/webmcp/detector.go index 16cf625..f32b69c 100644 --- a/core/detect/webmcp/detector.go +++ b/core/detect/webmcp/detector.go @@ -32,9 +32,8 @@ type declaration struct { } func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { - info, err := os.Stat(scope.Root) - if err != nil || !info.IsDir() { - return nil, nil + if err := detect.ValidateScopeRoot(scope.Root); err != nil { + return nil, err } policy, _, policyErr := mcpgateway.LoadPolicy(scope.Root) diff --git a/core/detect/webmcp/detector_test.go b/core/detect/webmcp/detector_test.go index 73ba3c6..441240e 100644 --- a/core/detect/webmcp/detector_test.go +++ b/core/detect/webmcp/detector_test.go @@ -4,6 +4,7 @@ import ( "context" "os" "path/filepath" + "strings" "testing" "github.com/Clyra-AI/wrkr/core/detect" @@ -68,6 +69,25 @@ func TestDetectWebMCPParseErrorForInvalidJavaScript(t *testing.T) { } } +func TestWebMCPParserRejectsRuntimeEvalPath(t *testing.T) { + t.Parallel() + + repoRoot := mustFindRepoRoot(t) + payload, err := os.ReadFile(filepath.Join(repoRoot, "core", "detect", "webmcp", "detector.go")) + if err != nil { + t.Fatalf("read detector source: %v", err) + } + source := string(payload) + for _, forbidden := range []string{"goja.New(", ".RunString(", ".RunProgram(", "AssertFunction(", ".Call("} { + if strings.Contains(source, forbidden) { + t.Fatalf("webmcp detector must remain AST-parse-only; found forbidden token %q", forbidden) + } + } + if !strings.Contains(source, "goja/parser") || !strings.Contains(source, "goja/ast") { + t.Fatal("expected parser-only goja imports for AST analysis") + } +} + func mustFindWebMCPFinding(t *testing.T, findings []model.Finding) model.Finding { t.Helper() for _, finding := range findings { @@ -119,3 +139,22 @@ func writeFile(t *testing.T, root, rel, content string) { t.Fatalf("write %s: %v", rel, err) } } + +func mustFindRepoRoot(t *testing.T) string { + t.Helper() + + wd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + for { + if _, statErr := os.Stat(filepath.Join(wd, "go.mod")); statErr == nil { + return wd + } + next := filepath.Dir(wd) + if next == wd { + t.Fatal("could not find repo root") + } + wd = next + } +} diff --git a/core/export/sarif/sarif.go b/core/export/sarif/sarif.go new file mode 100644 index 0000000..22ebb67 --- /dev/null +++ b/core/export/sarif/sarif.go @@ -0,0 +1,186 @@ +package sarif + +import ( + "encoding/json" + "fmt" + "os" + "sort" + "strings" + + "github.com/Clyra-AI/wrkr/core/model" +) + +const ( + schemaURL = "https://json.schemastore.org/sarif-2.1.0.json" + version = "2.1.0" +) + +type Report struct { + Schema string `json:"$schema"` + Version string `json:"version"` + Runs []Run `json:"runs"` +} + +type Run struct { + Tool Tool `json:"tool"` + Results []Result `json:"results,omitempty"` +} + +type Tool struct { + Driver Driver `json:"driver"` +} + +type Driver struct { + Name string `json:"name"` + Version string `json:"version,omitempty"` + InformationURI string `json:"informationUri,omitempty"` + Rules []Rule `json:"rules,omitempty"` +} + +type Rule struct { + ID string `json:"id"` + ShortDescription Message `json:"shortDescription,omitempty"` +} + +type Result struct { + RuleID string `json:"ruleId,omitempty"` + Level string `json:"level,omitempty"` + Message Message `json:"message"` + Locations []Location `json:"locations,omitempty"` +} + +type Message struct { + Text string `json:"text"` +} + +type Location struct { + PhysicalLocation PhysicalLocation `json:"physicalLocation"` +} + +type PhysicalLocation struct { + ArtifactLocation ArtifactLocation `json:"artifactLocation"` +} + +type ArtifactLocation struct { + URI string `json:"uri"` +} + +// Build maps Wrkr findings to a deterministic SARIF report. +func Build(findings []model.Finding, wrkrVersion string) Report { + sorted := append([]model.Finding(nil), findings...) + model.SortFindings(sorted) + + rulesByID := map[string]Rule{} + results := make([]Result, 0, len(sorted)) + for _, finding := range sorted { + ruleID := sarifRuleID(finding) + rulesByID[ruleID] = Rule{ + ID: ruleID, + ShortDescription: Message{ + Text: strings.TrimSpace(finding.FindingType), + }, + } + results = append(results, Result{ + RuleID: ruleID, + Level: severityToSARIFLevel(finding.Severity), + Message: Message{ + Text: findingMessage(finding), + }, + Locations: []Location{ + { + PhysicalLocation: PhysicalLocation{ + ArtifactLocation: ArtifactLocation{ + URI: fallbackLocation(finding.Location), + }, + }, + }, + }, + }) + } + + ruleIDs := make([]string, 0, len(rulesByID)) + for id := range rulesByID { + ruleIDs = append(ruleIDs, id) + } + sort.Strings(ruleIDs) + + rules := make([]Rule, 0, len(ruleIDs)) + for _, id := range ruleIDs { + rules = append(rules, rulesByID[id]) + } + + return Report{ + Schema: schemaURL, + Version: version, + Runs: []Run{ + { + Tool: Tool{ + Driver: Driver{ + Name: "wrkr", + Version: strings.TrimSpace(wrkrVersion), + InformationURI: "https://github.com/Clyra-AI/wrkr", + Rules: rules, + }, + }, + Results: results, + }, + }, + } +} + +// Write persists a SARIF report at path. +func Write(path string, report Report) error { + file, err := os.Create(path) // #nosec G304 -- output path is caller-controlled and validated by CLI path guards before write. + if err != nil { + return fmt.Errorf("create sarif output: %w", err) + } + defer func() { + _ = file.Close() + }() + encoder := json.NewEncoder(file) + encoder.SetIndent("", " ") + if err := encoder.Encode(report); err != nil { + return fmt.Errorf("encode sarif output: %w", err) + } + return nil +} + +func findingMessage(finding model.Finding) string { + parts := []string{strings.TrimSpace(finding.FindingType)} + if detector := strings.TrimSpace(finding.Detector); detector != "" { + parts = append(parts, "detector="+detector) + } + if repo := strings.TrimSpace(finding.Repo); repo != "" { + parts = append(parts, "repo="+repo) + } + return strings.Join(parts, " ") +} + +func fallbackLocation(location string) string { + value := strings.TrimSpace(location) + if value == "" { + return "unknown" + } + return value +} + +func sarifRuleID(finding model.Finding) string { + if id := strings.TrimSpace(finding.RuleID); id != "" { + return id + } + if findingType := strings.TrimSpace(finding.FindingType); findingType != "" { + return findingType + } + return "wrkr_finding" +} + +func severityToSARIFLevel(severity string) string { + switch strings.ToLower(strings.TrimSpace(severity)) { + case model.SeverityCritical, model.SeverityHigh: + return "error" + case model.SeverityMedium, model.SeverityLow: + return "warning" + default: + return "note" + } +} diff --git a/core/export/sarif/sarif_test.go b/core/export/sarif/sarif_test.go new file mode 100644 index 0000000..010485a --- /dev/null +++ b/core/export/sarif/sarif_test.go @@ -0,0 +1,93 @@ +package sarif + +import ( + "encoding/json" + "os" + "path/filepath" + "reflect" + "testing" + + "github.com/Clyra-AI/wrkr/core/model" +) + +func TestSARIFEmitterBuildDeterministic(t *testing.T) { + t.Parallel() + + findings := []model.Finding{ + { + FindingType: "policy_violation", + RuleID: "WRKR-001", + Severity: model.SeverityHigh, + ToolType: "policy", + Location: ".wrkr/policy.yaml", + Repo: "backend", + Org: "acme", + Detector: "policy", + }, + { + FindingType: "tool_config", + Severity: model.SeverityLow, + ToolType: "codex", + Location: ".codex/config.toml", + Repo: "backend", + Org: "acme", + Detector: "codex", + }, + } + + first := Build(findings, "v1.2.3") + second := Build(findings, "v1.2.3") + if !reflect.DeepEqual(first, second) { + t.Fatalf("expected deterministic SARIF build output\nfirst=%+v\nsecond=%+v", first, second) + } + if first.Version != version { + t.Fatalf("unexpected SARIF version: %s", first.Version) + } + if first.Schema != schemaURL { + t.Fatalf("unexpected SARIF schema url: %s", first.Schema) + } + if len(first.Runs) != 1 || len(first.Runs[0].Results) != 2 { + t.Fatalf("unexpected SARIF run/result counts: %+v", first) + } +} + +func TestSARIFEmitterValidatesAgainstSchema(t *testing.T) { + t.Parallel() + + report := Build([]model.Finding{ + { + FindingType: "custom_extension_finding", + Severity: model.SeverityMedium, + ToolType: "custom_detector", + Location: ".custom/policy.yaml", + Repo: "ext-repo", + Org: "local", + Detector: "extension", + }, + }, "devel") + + tmp := t.TempDir() + path := filepath.Join(tmp, "wrkr.sarif") + if err := Write(path, report); err != nil { + t.Fatalf("write sarif: %v", err) + } + + payload, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read sarif output: %v", err) + } + var envelope map[string]any + if err := json.Unmarshal(payload, &envelope); err != nil { + t.Fatalf("parse sarif output json: %v", err) + } + if envelope["version"] != version { + t.Fatalf("unexpected sarif version in output: %v", envelope["version"]) + } + if envelope["$schema"] != schemaURL { + t.Fatalf("unexpected sarif schema URL: %v", envelope["$schema"]) + } + runs, ok := envelope["runs"].([]any) + if !ok || len(runs) != 1 { + t.Fatalf("expected exactly one sarif run, got %v", envelope["runs"]) + } +} diff --git a/core/source/github/connector.go b/core/source/github/connector.go index 5de4fbd..e9eb012 100644 --- a/core/source/github/connector.go +++ b/core/source/github/connector.go @@ -7,13 +7,16 @@ import ( "errors" "fmt" "io" + "math" "net/http" "net/url" "os" "path" "path/filepath" "sort" + "strconv" "strings" + "sync" "time" "github.com/Clyra-AI/wrkr/core/source" @@ -25,11 +28,20 @@ type HTTPClient interface { // Connector acquires GitHub repos/org lists with deterministic request semantics. type Connector struct { - BaseURL string - Token string - HTTPClient HTTPClient - MaxRetries int - Backoff time.Duration + BaseURL string + Token string + HTTPClient HTTPClient + MaxRetries int + Backoff time.Duration + MaxBackoff time.Duration + FailureThreshold int + Cooldown time.Duration + + mu sync.Mutex + consecutiveFailures int + cooldownUntil time.Time + nowFn func() time.Time + sleepFn func(context.Context, time.Duration) error } func NewConnector(baseURL, token string, client HTTPClient) *Connector { @@ -37,14 +49,42 @@ func NewConnector(baseURL, token string, client HTTPClient) *Connector { client = &http.Client{Timeout: 10 * time.Second} } return &Connector{ - BaseURL: strings.TrimRight(baseURL, "/"), - Token: token, - HTTPClient: client, - MaxRetries: 2, - Backoff: 25 * time.Millisecond, + BaseURL: strings.TrimRight(baseURL, "/"), + Token: token, + HTTPClient: client, + MaxRetries: 2, + Backoff: 25 * time.Millisecond, + MaxBackoff: 2 * time.Second, + FailureThreshold: 3, + Cooldown: 10 * time.Second, + nowFn: time.Now, + sleepFn: sleepWithContext, } } +// DegradedError indicates connector circuit-breaker degradation. +type DegradedError struct { + CooldownUntil time.Time + Cause string +} + +func (e *DegradedError) Error() string { + cause := strings.TrimSpace(e.Cause) + if cause == "" { + cause = "upstream transient failures exceeded threshold" + } + if e.CooldownUntil.IsZero() { + return "connector degraded: " + cause + } + return fmt.Sprintf("connector degraded until %s: %s", e.CooldownUntil.UTC().Format(time.RFC3339), cause) +} + +// IsDegradedError reports whether err represents connector degradation. +func IsDegradedError(err error) bool { + var degraded *DegradedError + return errors.As(err, °raded) +} + func (c *Connector) AcquireRepo(ctx context.Context, repo string) (source.RepoManifest, error) { if err := validateRepo(repo); err != nil { return source.RepoManifest{}, err @@ -169,6 +209,9 @@ func (c *Connector) MaterializeRepo(ctx context.Context, repo string, materializ sort.Slice(tree, func(i, j int) bool { return tree[i].Path < tree[j].Path }) for _, item := range tree { + if ctxErr := ctx.Err(); ctxErr != nil { + return source.RepoManifest{}, ctxErr + } if item.Type != "blob" || strings.TrimSpace(item.Path) == "" { continue } @@ -300,8 +343,16 @@ func safeJoin(root, rel string) (string, error) { } func (c *Connector) doGETWithRetry(ctx context.Context, endpoint string) ([]byte, error) { + if degradeErr := c.checkDegraded(); degradeErr != nil { + return nil, degradeErr + } + var lastErr error for attempt := 0; attempt <= c.MaxRetries; attempt++ { + if ctxErr := ctx.Err(); ctxErr != nil { + return nil, ctxErr + } + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) if err != nil { return nil, fmt.Errorf("build request: %w", err) @@ -312,6 +363,7 @@ func (c *Connector) doGETWithRetry(ctx context.Context, endpoint string) ([]byte req.Header.Set("Accept", "application/vnd.github+json") resp, err := c.HTTPClient.Do(req) + retryDelay := c.jitteredBackoff(attempt) if err != nil { lastErr = fmt.Errorf("request failed: %w", err) } else { @@ -321,27 +373,218 @@ func (c *Connector) doGETWithRetry(ctx context.Context, endpoint string) ([]byte return nil, fmt.Errorf("read response body: %w", readErr) } if resp.StatusCode >= 200 && resp.StatusCode < 300 { + c.recordSuccess() return body, nil } if !isRetryable(resp.StatusCode) { + c.resetFailureStreak() return nil, fmt.Errorf("github API status %d: %s", resp.StatusCode, strings.TrimSpace(string(body))) } + retryDelay = c.retryDelayForResponse(resp, attempt) lastErr = fmt.Errorf("github API transient status %d", resp.StatusCode) } if attempt == c.MaxRetries { break } - select { - case <-ctx.Done(): - return nil, ctx.Err() - case <-time.After(c.Backoff * time.Duration(attempt+1)): + if sleepErr := c.sleep(ctx, retryDelay); sleepErr != nil { + return nil, sleepErr } } if lastErr == nil { lastErr = errors.New("request failed") } - return nil, lastErr + return nil, c.recordFailure(lastErr) +} + +func (c *Connector) now() time.Time { + if c.nowFn != nil { + return c.nowFn() + } + return time.Now() +} + +func (c *Connector) sleep(ctx context.Context, duration time.Duration) error { + if c.sleepFn != nil { + return c.sleepFn(ctx, duration) + } + return sleepWithContext(ctx, duration) +} + +func (c *Connector) jitteredBackoff(attempt int) time.Duration { + backoff := c.Backoff + if backoff <= 0 { + backoff = 25 * time.Millisecond + } + maxBackoff := c.MaxBackoff + if maxBackoff <= 0 { + maxBackoff = 2 * time.Second + } + + shift := attempt + if shift > 8 { + shift = 8 + } + base := float64(backoff) * math.Pow(2, float64(shift)) + delay := time.Duration(base) + if delay > maxBackoff { + delay = maxBackoff + } + + // Deterministic bounded jitter in [-20%, +20%]. + jitterPct := (attempt*37)%41 - 20 + jitter := delay * time.Duration(jitterPct) / 100 + delay += jitter + + minDelay := backoff / 2 + if minDelay <= 0 { + minDelay = time.Millisecond + } + if delay < minDelay { + delay = minDelay + } + if delay > maxBackoff { + delay = maxBackoff + } + return delay +} + +func (c *Connector) retryDelayForResponse(resp *http.Response, attempt int) time.Duration { + if resp == nil { + return c.jitteredBackoff(attempt) + } + if resp.StatusCode != http.StatusTooManyRequests { + return c.jitteredBackoff(attempt) + } + + now := c.now() + if wait, ok := parseRetryAfter(resp.Header.Get("Retry-After"), now); ok { + return wait + } + if wait, ok := parseRateLimitReset(resp.Header.Get("X-RateLimit-Reset"), now); ok { + return wait + } + return c.jitteredBackoff(attempt) +} + +func parseRetryAfter(raw string, now time.Time) (time.Duration, bool) { + value := strings.TrimSpace(raw) + if value == "" { + return 0, false + } + if seconds, err := strconv.Atoi(value); err == nil { + if seconds < 0 { + return 0, false + } + return time.Duration(seconds) * time.Second, true + } + when, err := http.ParseTime(value) + if err != nil { + return 0, false + } + wait := when.Sub(now) + if wait < 0 { + return 0, false + } + return wait, true +} + +func parseRateLimitReset(raw string, now time.Time) (time.Duration, bool) { + value := strings.TrimSpace(raw) + if value == "" { + return 0, false + } + epoch, err := strconv.ParseInt(value, 10, 64) + if err != nil || epoch <= 0 { + return 0, false + } + wait := time.Unix(epoch, 0).Sub(now) + if wait < 0 { + return 0, false + } + return wait, true +} + +func (c *Connector) checkDegraded() error { + threshold := c.FailureThreshold + cooldown := c.Cooldown + if threshold <= 0 || cooldown <= 0 { + return nil + } + + c.mu.Lock() + defer c.mu.Unlock() + + if c.cooldownUntil.IsZero() { + return nil + } + now := c.now() + if now.Before(c.cooldownUntil) { + return &DegradedError{ + CooldownUntil: c.cooldownUntil, + Cause: "cooldown active after repeated upstream failures", + } + } + c.cooldownUntil = time.Time{} + c.consecutiveFailures = 0 + return nil +} + +func (c *Connector) recordSuccess() { + c.mu.Lock() + defer c.mu.Unlock() + c.consecutiveFailures = 0 + c.cooldownUntil = time.Time{} +} + +func (c *Connector) resetFailureStreak() { + c.mu.Lock() + defer c.mu.Unlock() + c.consecutiveFailures = 0 +} + +func (c *Connector) recordFailure(lastErr error) error { + if lastErr == nil { + return errors.New("request failed") + } + if errors.Is(lastErr, context.Canceled) || errors.Is(lastErr, context.DeadlineExceeded) { + return lastErr + } + + threshold := c.FailureThreshold + cooldown := c.Cooldown + if threshold <= 0 || cooldown <= 0 { + return lastErr + } + + c.mu.Lock() + defer c.mu.Unlock() + + c.consecutiveFailures++ + if c.consecutiveFailures < threshold { + return lastErr + } + + c.cooldownUntil = c.now().Add(cooldown) + c.consecutiveFailures = 0 + return &DegradedError{ + CooldownUntil: c.cooldownUntil, + Cause: lastErr.Error(), + } +} + +func sleepWithContext(ctx context.Context, duration time.Duration) error { + if duration <= 0 { + return nil + } + timer := time.NewTimer(duration) + defer timer.Stop() + select { + case <-ctx.Done(): + return ctx.Err() + case <-timer.C: + return nil + } } func isRetryable(code int) bool { diff --git a/core/source/github/connector_test.go b/core/source/github/connector_test.go index fe0a974..99bde73 100644 --- a/core/source/github/connector_test.go +++ b/core/source/github/connector_test.go @@ -13,6 +13,7 @@ import ( "strings" "sync/atomic" "testing" + "time" ) func TestAcquireRepoRequiresBaseURL(t *testing.T) { @@ -343,3 +344,150 @@ func TestMaterializeRepoFailsClosedOnTruncatedTree(t *testing.T) { t.Fatalf("expected truncated error, got %v", err) } } + +func TestConnectorHonorsRetryAfter429(t *testing.T) { + t.Parallel() + + var attempts int32 + var slept []time.Duration + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + current := atomic.AddInt32(&attempts, 1) + if current == 1 { + w.Header().Set("Retry-After", "4") + w.WriteHeader(http.StatusTooManyRequests) + _, _ = fmt.Fprint(w, `{"message":"rate limited"}`) + return + } + _, _ = fmt.Fprint(w, `{"full_name":"acme/backend"}`) + })) + defer server.Close() + + connector := NewConnector(server.URL, "", server.Client()) + connector.MaxRetries = 2 + connector.sleepFn = func(_ context.Context, duration time.Duration) error { + slept = append(slept, duration) + return nil + } + + if _, err := connector.AcquireRepo(context.Background(), "acme/backend"); err != nil { + t.Fatalf("acquire repo: %v", err) + } + if attempts != 2 { + t.Fatalf("expected two attempts, got %d", attempts) + } + if len(slept) == 0 || slept[0] != 4*time.Second { + t.Fatalf("expected retry-after sleep of 4s, got %v", slept) + } +} + +func TestConnectorCircuitBreakerCooldown(t *testing.T) { + t.Parallel() + + var attempts int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + atomic.AddInt32(&attempts, 1) + w.WriteHeader(http.StatusBadGateway) + _, _ = fmt.Fprint(w, `{"message":"upstream down"}`) + })) + defer server.Close() + + now := time.Unix(1_700_000_000, 0) + connector := NewConnector(server.URL, "", server.Client()) + connector.MaxRetries = 0 + connector.FailureThreshold = 2 + connector.Cooldown = 30 * time.Second + connector.nowFn = func() time.Time { return now } + connector.sleepFn = func(_ context.Context, _ time.Duration) error { return nil } + + _, err := connector.AcquireRepo(context.Background(), "acme/backend") + if err == nil { + t.Fatal("expected first upstream failure") + } + if IsDegradedError(err) { + t.Fatalf("first failure should not open circuit yet: %v", err) + } + + _, err = connector.AcquireRepo(context.Background(), "acme/backend") + if err == nil { + t.Fatal("expected second failure") + } + if !IsDegradedError(err) { + t.Fatalf("expected degradation on threshold breach, got %v", err) + } + + attemptsAtOpen := atomic.LoadInt32(&attempts) + _, err = connector.AcquireRepo(context.Background(), "acme/backend") + if err == nil { + t.Fatal("expected circuit-open degraded error") + } + if !IsDegradedError(err) { + t.Fatalf("expected degraded error while cooldown active, got %v", err) + } + if got := atomic.LoadInt32(&attempts); got != attemptsAtOpen { + t.Fatalf("expected no upstream calls while cooldown active, got before=%d after=%d", attemptsAtOpen, got) + } + + now = now.Add(31 * time.Second) + _, err = connector.AcquireRepo(context.Background(), "acme/backend") + if err == nil { + t.Fatal("expected upstream request after cooldown expiry") + } + if IsDegradedError(err) { + t.Fatalf("expected non-degraded upstream error after cooldown expiry, got %v", err) + } +} + +func TestConnectorNonRetryableStatusResetsTransientFailureStreak(t *testing.T) { + t.Parallel() + + var attempts int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/repos/acme/backend" { + t.Fatalf("unexpected path: %s", r.URL.Path) + } + switch atomic.AddInt32(&attempts, 1) { + case 1: + w.WriteHeader(http.StatusBadGateway) + _, _ = fmt.Fprint(w, `{"message":"transient-1"}`) + case 2: + w.WriteHeader(http.StatusNotFound) + _, _ = fmt.Fprint(w, `{"message":"missing"}`) + default: + w.WriteHeader(http.StatusBadGateway) + _, _ = fmt.Fprint(w, `{"message":"transient-2"}`) + } + })) + defer server.Close() + + now := time.Unix(1_700_000_000, 0) + connector := NewConnector(server.URL, "", server.Client()) + connector.MaxRetries = 0 + connector.FailureThreshold = 2 + connector.Cooldown = 30 * time.Second + connector.nowFn = func() time.Time { return now } + connector.sleepFn = func(_ context.Context, _ time.Duration) error { return nil } + + _, err := connector.AcquireRepo(context.Background(), "acme/backend") + if err == nil { + t.Fatal("expected first transient failure") + } + if IsDegradedError(err) { + t.Fatalf("first transient failure must not open cooldown, got %v", err) + } + + _, err = connector.AcquireRepo(context.Background(), "acme/backend") + if err == nil { + t.Fatal("expected non-retryable status failure") + } + if IsDegradedError(err) { + t.Fatalf("non-retryable status should not open cooldown, got %v", err) + } + + _, err = connector.AcquireRepo(context.Background(), "acme/backend") + if err == nil { + t.Fatal("expected second transient failure") + } + if IsDegradedError(err) { + t.Fatalf("transient streak should reset after non-retryable status, got %v", err) + } +} diff --git a/docs/README.md b/docs/README.md index b3fc8f8..480e586 100644 --- a/docs/README.md +++ b/docs/README.md @@ -30,13 +30,16 @@ This file defines where each topic lives so docs remain contract-aligned and non - Failure taxonomy and exits: `docs/failure_taxonomy_exit_codes.md` - Policy authoring: `docs/policy_authoring.md` - Built-in policy rules: `docs/policy_builtin_rules.md` +- Extension detectors: `docs/extensions/detectors.md` - Threat model: `docs/threat_model.md` ## Contracts and Trust - Manifest spec: `docs/specs/wrkr-manifest.md` +- Compatibility and versioning policy: `docs/trust/compatibility-and-versioning.md` - Compatibility matrix: `docs/contracts/compatibility_matrix.md` - Deterministic guarantees: `docs/trust/deterministic-guarantees.md` +- goja AST-only guardrails: `docs/trust/goja-ast-only.md` - MCP enrich quality model: `docs/trust/mcp-enrich-quality-model.md` - Proof verification: `docs/trust/proof-chain-verification.md` - Security and privacy posture: `docs/trust/security-and-privacy.md` diff --git a/docs/architecture.md b/docs/architecture.md index 860ea79..81ae046 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -34,6 +34,7 @@ flowchart LR ## Deterministic Invariants - Structured configs are parsed with typed decoders where possible. +- WebMCP JavaScript parsing is AST-only (`goja/parser` + `goja/ast`), never runtime eval. - Secret values are never emitted. - Risk ordering uses deterministic tie-breakers. - Exit codes are stable API contracts. diff --git a/docs/commands/index.md b/docs/commands/index.md index 2fbcfd6..b5b6da6 100644 --- a/docs/commands/index.md +++ b/docs/commands/index.md @@ -4,8 +4,9 @@ Wrkr CLI surfaces are deterministic and file-based by default. ## Command index -- `wrkr` (root flags: `--json`, `--quiet`, `--explain`) +- `wrkr` (root flags: `--json`, `--quiet`, `--explain`, `--version`) - `wrkr help [command]` +- `wrkr version` - `wrkr init` - `wrkr scan` - `wrkr action` diff --git a/docs/commands/root.md b/docs/commands/root.md index ac933d9..75e56f1 100644 --- a/docs/commands/root.md +++ b/docs/commands/root.md @@ -4,7 +4,8 @@ ```bash wrkr [flags] -wrkr [--json] [--quiet] [--explain] +wrkr [--json] [--quiet] [--explain] [--version] +wrkr --version [--json] wrkr help [command] ``` @@ -15,6 +16,7 @@ Root help output emits a deterministic command catalog plus global flags. - `--json`: emit machine-readable output. - `--quiet`: suppress non-error output. - `--explain`: emit human-readable rationale. +- `--version`: print Wrkr version (supports `--json`). ## Discoverability @@ -40,3 +42,4 @@ wrkr help scan ``` Expected JSON keys for root-flag mode (`wrkr --json`): `status`, `message`. +Expected JSON keys for version mode (`wrkr --version --json`): `status`, `version`. diff --git a/docs/commands/scan.md b/docs/commands/scan.md index beac19e..4628200 100644 --- a/docs/commands/scan.md +++ b/docs/commands/scan.md @@ -3,7 +3,7 @@ ## Synopsis ```bash -wrkr scan [--repo | --org | --path ] [--diff] [--enrich] [--baseline ] [--config ] [--state ] [--policy ] [--approved-tools ] [--production-targets ] [--production-targets-strict] [--profile baseline|standard|strict] [--github-api ] [--github-token ] [--report-md] [--report-md-path ] [--report-template exec|operator|audit|public] [--report-share-profile internal|public] [--report-top ] [--json] [--quiet] [--explain] +wrkr scan [--repo | --org | --path ] [--timeout ] [--diff] [--enrich] [--baseline ] [--config ] [--state ] [--policy ] [--approved-tools ] [--production-targets ] [--production-targets-strict] [--profile baseline|standard|strict] [--github-api ] [--github-token ] [--report-md] [--report-md-path ] [--report-template exec|operator|audit|public] [--report-share-profile internal|public] [--report-top ] [--sarif] [--sarif-path ] [--json] [--quiet] [--explain] ``` Exactly one target source is required: `--repo`, `--org`, or `--path`. @@ -23,6 +23,7 @@ Acquisition behavior is fail-closed by target: - `--repo` - `--org` - `--path` +- `--timeout` - `--diff` - `--enrich` - `--baseline` @@ -40,6 +41,8 @@ Acquisition behavior is fail-closed by target: - `--report-template` - `--report-share-profile` - `--report-top` +- `--sarif` +- `--sarif-path` ## Example @@ -52,6 +55,9 @@ wrkr scan --org acme --github-api https://api.github.com --json ``` Expected JSON keys include `status`, `target`, `findings`, `ranked_findings`, `top_findings`, `attack_paths`, `top_attack_paths`, `inventory`, `privilege_budget`, `agent_privilege_map`, `repo_exposure_summaries`, `profile`, `posture_score`, and optional `report` when summary output is requested. +`detector_errors` is included when non-fatal detector failures occur and partial scan results are preserved. +`partial_result`, `source_errors`, and `source_degraded` are included when source acquisition/materialization has non-fatal failures. +`sarif.path` is included when `--sarif` output is requested. `inventory.methodology` emits machine-readable scan metadata (`wrkr_version`, timing, repo/file counts, detector inventory). `inventory.tools[*]` includes deterministic `approval_classification` (`approved|unapproved|unknown`), and `inventory.approval_summary` emits aggregate approval-gap ratios for campaign/report workflows. `inventory.tools[*]` also emits report-ready `tool_category` and deterministic `confidence_score` (`0.00-1.00`) for inventory breakdown tables. @@ -64,6 +70,24 @@ Prompt-channel findings use stable reason codes and evidence hashes only (`patte When `--enrich` is enabled, MCP findings include enrich provenance and quality fields: `source`, `as_of`, `package`, `version`, `advisory_count`, `registry_status`, `enrich_quality` (`ok|partial|stale|unavailable`), `advisory_schema`, `registry_schema`, and `enrich_errors`. When production target policy loading is non-fatal (`--production-targets` without `--production-targets-strict`), output may include `policy_warnings`. +Timeout/cancellation contract: + +- `--timeout ` bounds end-to-end scan runtime (`0` disables timeout). +- When timeout is exceeded, JSON error code is `scan_timeout` with exit code `1`. +- When canceled by signal or parent context, JSON error code is `scan_canceled` with exit code `1`. + +Retry/degradation contract: + +- GitHub connector retries retryable failures with bounded jittered backoff. +- HTTP `429` honors `Retry-After` and `X-RateLimit-Reset` wait semantics before retry. +- Repeated transient failures trigger connector cooldown degradation; scan surfaces this in partial-result output (`source_degraded=true` when applicable). + +SARIF contract: + +- `--sarif` emits a SARIF `2.1.0` report from scan findings. +- `--sarif-path` selects output path (default `wrkr.sarif`). +- Native `scan --json` payloads and proof outputs remain unchanged; SARIF is additive. + Approved-tools policy example: [`docs/examples/approved-tools.v1.yaml`](../examples/approved-tools.v1.yaml). Production target policy files are YAML and schema-validated (`schemas/v1/policy/production-targets.schema.json`), with exact/prefix matching only. Example: [`docs/examples/production-targets.v1.yaml`](../examples/production-targets.v1.yaml). @@ -82,3 +106,5 @@ Emerging discovery surfaces are static-only in default deterministic mode: - A2A detection uses repo-hosted agent-card JSON files only. - MCP gateway posture is derived from local config files only. - No live endpoint probing is performed by default. + +Custom extension detectors are loaded from `.wrkr/detectors/extensions.json` when present in scanned repositories. See [`docs/extensions/detectors.md`](../extensions/detectors.md). diff --git a/docs/commands/version.md b/docs/commands/version.md new file mode 100644 index 0000000..5adad47 --- /dev/null +++ b/docs/commands/version.md @@ -0,0 +1,14 @@ +# wrkr version + +## Synopsis + +```bash +wrkr version [--json] +wrkr --version [--json] +``` + +## Contract + +- Human output prints `wrkr `. +- JSON output emits `status` and `version`. +- Exit code is `0` on success. diff --git a/docs/extensions/detectors.md b/docs/extensions/detectors.md new file mode 100644 index 0000000..7ebd560 --- /dev/null +++ b/docs/extensions/detectors.md @@ -0,0 +1,41 @@ +# Custom Detector Extensions + +Wrkr supports deterministic file-based detector extensions via repository-local descriptor files. + +## Descriptor path + +- `.wrkr/detectors/extensions.json` + +## Schema contract + +- Schema: `schemas/v1/findings/extension-detectors.schema.json` +- Version field is required and currently `v1`. + +## Example + +```json +{ + "version": "v1", + "detectors": [ + { + "id": "custom_mcp_review", + "finding_type": "custom_mcp_review_required", + "tool_type": "custom_detector", + "location": ".mcp.json", + "severity": "medium", + "remediation": "Review custom MCP trust posture before approval.", + "permissions": ["mcp.access"], + "evidence": [ + {"key": "owner", "value": "security-team"} + ] + } + ] +} +``` + +## Determinism and failure behavior + +- Descriptors are loaded and validated with strict typed parsing. +- Descriptor IDs are deterministically ordered before emission. +- Invalid descriptors fail closed as detector errors with stable code/class (`invalid_extension_descriptor`, `extension`). +- Extension findings are additive and do not bypass built-in detector/risk/proof boundaries. diff --git a/docs/trust/goja-ast-only.md b/docs/trust/goja-ast-only.md new file mode 100644 index 0000000..5d81288 --- /dev/null +++ b/docs/trust/goja-ast-only.md @@ -0,0 +1,20 @@ +# goja AST-Only Guardrails + +Wrkr uses `github.com/dop251/goja` parser/AST packages for WebMCP JavaScript declaration parsing. + +## Rationale + +- JavaScript declaration surfaces are often embedded in repository files. +- AST parsing gives deterministic structural analysis without runtime side effects. +- Parser-based analysis improves resilience versus regex-only parsing while keeping fail-closed behavior. + +## Guardrails + +- Allowed usage: `goja/parser` and `goja/ast` for static parse/tree traversal only. +- Disallowed usage: runtime evaluation paths (`goja.New`, `RunString`, `RunProgram`, dynamic function execution). +- Detector behavior remains file-based and static; no live JS execution. + +## Enforcement + +- Unit test `TestWebMCPParserRejectsRuntimeEvalPath` blocks runtime-eval token regressions. +- Parse errors are surfaced as structured findings (`parse_error`) rather than silently skipped. diff --git a/internal/scenarios/contracts_test.go b/internal/scenarios/contracts_test.go index 855047b..41797f3 100644 --- a/internal/scenarios/contracts_test.go +++ b/internal/scenarios/contracts_test.go @@ -23,6 +23,7 @@ func TestScenarioContracts(t *testing.T) { "scenarios/wrkr/a2a-agent-cards/repos", "scenarios/wrkr/webmcp-declarations/repos", "scenarios/wrkr/prompt-channel-poisoning/repos", + "scenarios/wrkr/extension-detectors/repos", "scenarios/wrkr/attack-path-correlation/repos", "scenarios/wrkr/mcp-enrich-supplychain/repos", "scenarios/cross-product/proof-record-interop/records-from-all-3.jsonl", diff --git a/internal/scenarios/epic11_scenario_test.go b/internal/scenarios/epic11_scenario_test.go new file mode 100644 index 0000000..c88a83c --- /dev/null +++ b/internal/scenarios/epic11_scenario_test.go @@ -0,0 +1,46 @@ +//go:build scenario + +package scenarios + +import ( + "path/filepath" + "testing" +) + +func TestScenarioExtensionDetectorExecution(t *testing.T) { + t.Parallel() + + repoRoot := mustFindRepoRoot(t) + scanPath := filepath.Join(repoRoot, "scenarios", "wrkr", "extension-detectors", "repos") + payload := runScenarioCommandJSON(t, []string{"scan", "--path", scanPath, "--json"}) + + if errorsValue, present := payload["detector_errors"]; present { + if list, ok := errorsValue.([]any); ok && len(list) > 0 { + t.Fatalf("expected no detector errors for valid extension scenario, got %v", errorsValue) + } + } + + findings, ok := payload["findings"].([]any) + if !ok || len(findings) == 0 { + t.Fatalf("expected findings from extension scenario, got %v", payload["findings"]) + } + + foundCustom := false + for _, item := range findings { + finding, castOK := item.(map[string]any) + if !castOK { + continue + } + if finding["finding_type"] != "custom_extension_finding" { + continue + } + if finding["detector"] != "extension" { + t.Fatalf("expected detector=extension for custom finding, got %v", finding["detector"]) + } + foundCustom = true + break + } + if !foundCustom { + t.Fatalf("expected custom_extension_finding in scenario output, got %v", findings) + } +} diff --git a/internal/testutil/detectors/harness.go b/internal/testutil/detectors/harness.go index 90563f1..690d262 100644 --- a/internal/testutil/detectors/harness.go +++ b/internal/testutil/detectors/harness.go @@ -19,9 +19,12 @@ func RunFixture(t *testing.T, fixtureRoot, org, repo string, detectorList ...det } } - findings, err := registry.Run(context.Background(), []detect.Scope{{Org: org, Repo: repo, Root: fixtureRoot}}, detect.Options{}) + result, err := registry.Run(context.Background(), []detect.Scope{{Org: org, Repo: repo, Root: fixtureRoot}}, detect.Options{}) if err != nil { t.Fatalf("run detector registry: %v", err) } - return findings + if len(result.DetectorErrors) != 0 { + t.Fatalf("unexpected detector errors: %+v", result.DetectorErrors) + } + return result.Findings } diff --git a/scenarios/wrkr/extension-detectors/repos/ext-repo/.custom/policy.yaml b/scenarios/wrkr/extension-detectors/repos/ext-repo/.custom/policy.yaml new file mode 100644 index 0000000..167d0b5 --- /dev/null +++ b/scenarios/wrkr/extension-detectors/repos/ext-repo/.custom/policy.yaml @@ -0,0 +1 @@ +policy: custom-extension diff --git a/scenarios/wrkr/extension-detectors/repos/ext-repo/.wrkr/detectors/extensions.json b/scenarios/wrkr/extension-detectors/repos/ext-repo/.wrkr/detectors/extensions.json new file mode 100644 index 0000000..86b7ebc --- /dev/null +++ b/scenarios/wrkr/extension-detectors/repos/ext-repo/.wrkr/detectors/extensions.json @@ -0,0 +1,17 @@ +{ + "version": "v1", + "detectors": [ + { + "id": "custom_extension_story13", + "finding_type": "custom_extension_finding", + "tool_type": "custom_detector", + "location": ".custom/policy.yaml", + "severity": "low", + "remediation": "Review extension-defined policy finding.", + "permissions": ["mcp.access"], + "evidence": [ + {"key": "owner", "value": "security"} + ] + } + ] +} diff --git a/schemas/v1/README.md b/schemas/v1/README.md index 94f8c6a..22f88fc 100644 --- a/schemas/v1/README.md +++ b/schemas/v1/README.md @@ -3,6 +3,7 @@ This directory contains versioned JSON/YAML schemas for Wrkr runtime and artifact contracts. - `cli/`: shared CLI success/error envelope contracts. +- `findings/`: finding and extension-detector descriptor contracts. - `manifest/`: open `wrkr-manifest.yaml` interoperability specification. - `regress/`: posture regression baseline and drift-result contracts. - `report/`: deterministic shareable report-summary contracts. diff --git a/schemas/v1/findings/extension-detectors.schema.json b/schemas/v1/findings/extension-detectors.schema.json new file mode 100644 index 0000000..8cda544 --- /dev/null +++ b/schemas/v1/findings/extension-detectors.schema.json @@ -0,0 +1,53 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://schemas.clyra.ai/wrkr/v1/extension-detectors.schema.json", + "title": "Wrkr Extension Detector Descriptors", + "type": "object", + "required": ["version", "detectors"], + "properties": { + "version": { + "type": "string", + "enum": ["v1"] + }, + "detectors": { + "type": "array", + "items": { + "type": "object", + "required": ["id", "finding_type", "tool_type", "location", "severity"], + "properties": { + "id": { + "type": "string", + "pattern": "^[a-zA-Z0-9._-]+$" + }, + "finding_type": {"type": "string", "minLength": 1}, + "tool_type": {"type": "string", "minLength": 1}, + "location": {"type": "string", "minLength": 1}, + "severity": { + "type": "string", + "enum": ["critical", "high", "medium", "low", "info"] + }, + "remediation": {"type": "string"}, + "permissions": { + "type": "array", + "items": {"type": "string"}, + "uniqueItems": true + }, + "evidence": { + "type": "array", + "items": { + "type": "object", + "required": ["key", "value"], + "properties": { + "key": {"type": "string"}, + "value": {"type": "string"} + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false +} diff --git a/testinfra/contracts/story7_contracts_test.go b/testinfra/contracts/story7_contracts_test.go index 9c0db29..56d30ff 100644 --- a/testinfra/contracts/story7_contracts_test.go +++ b/testinfra/contracts/story7_contracts_test.go @@ -65,6 +65,7 @@ func TestStory7SchemaContractsStable(t *testing.T) { "schemas/v1/evidence/evidence-bundle.schema.json", "schemas/v1/export/appendix-export.schema.json", "schemas/v1/export/inventory-export.schema.json", + "schemas/v1/findings/extension-detectors.schema.json", "schemas/v1/findings/finding.schema.json", "schemas/v1/identity/identity-manifest.schema.json", "schemas/v1/inventory/inventory.schema.json",