diff --git a/README.md b/README.md index 600019b..88a3c48 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,9 @@ See: [2,880 tool calls gate-checked in 24 hours](docs/blog/openclaw_24h_boundary **Incident → CI gate in one command** — `gait regress bootstrap` converts a bad run into a permanent regression fixture with JUnit output. Exit 0 = pass, exit 5 = drift. Never debug the same failure twice. -**Durable jobs** — dispatch long-running agent work that survives failures. Checkpoints, pause/resume/cancel, approval gates, deterministic stop reasons. No more lost state at step 47. +**Durable jobs** — dispatch long-running agent work that survives failures. Checkpoints, pause/resume/stop/cancel, approval gates, deterministic stop reasons, and emergency preemption for queued dispatches. No more lost state at step 47. + +**Destructive safety boundary** — enforce phase-aware plan/apply behavior (`plan` stays non-destructive, destructive `apply` requires approval), plus fail-closed destructive budgets and bounded approval token scopes (`max-targets`, `max-ops`). **Deterministic replay and diff** — replay an agent run using recorded results as stubs (no real API calls). Diff two packs to see what changed, including context drift classification. @@ -198,7 +200,7 @@ gait tour Interactive walkthrough gait verify Verify integrity offline gait verify chain|session-chain Multi-artifact chain verification gait job submit|status|checkpoint|pause|resume Durable job lifecycle -gait job approve|cancel|inspect Job approval and inspection +gait job stop|approve|cancel|inspect Emergency stop, approval, and inspection gait pack build|verify|inspect|diff|export Unified pack operations + OTEL/Postgres sinks gait regress init|bootstrap|run Incident → CI gate gait gate eval Policy enforcement + signed trace diff --git a/cmd/gait/approve.go b/cmd/gait/approve.go index 03c7cd4..d9ab030 100644 --- a/cmd/gait/approve.go +++ b/cmd/gait/approve.go @@ -18,6 +18,8 @@ type approveOutput struct { ExpiresAt string `json:"expires_at,omitempty"` ReasonCode string `json:"reason_code,omitempty"` Scope []string `json:"scope,omitempty"` + MaxTargets int `json:"max_targets,omitempty"` + MaxOps int `json:"max_ops,omitempty"` KeyID string `json:"key_id,omitempty"` Warnings []string `json:"warnings,omitempty"` Error string `json:"error,omitempty"` @@ -38,6 +40,8 @@ func runApprove(arguments []string) int { var scope string var approver string var reasonCode string + var maxTargets int + var maxOps int var outputPath string var keyMode string var privateKeyPath string @@ -52,6 +56,8 @@ func runApprove(arguments []string) int { flagSet.StringVar(&scope, "scope", "", "comma-separated approval scope values (for example tool:tool.write)") flagSet.StringVar(&approver, "approver", "", "approver identity") flagSet.StringVar(&reasonCode, "reason-code", "", "approval reason code") + flagSet.IntVar(&maxTargets, "max-targets", 0, "optional max target count bound for destructive approval scope (0 disables)") + flagSet.IntVar(&maxOps, "max-ops", 0, "optional max operation count bound for destructive approval scope (0 disables)") flagSet.StringVar(&outputPath, "out", "", "path to emitted approval token (default approval_.json)") flagSet.StringVar(&keyMode, "key-mode", string(sign.ModeDev), "signing key mode: dev or prod") flagSet.StringVar(&privateKeyPath, "private-key", "", "path to base64 private signing key") @@ -96,6 +102,8 @@ func runApprove(arguments []string) int { PolicyDigest: policyDigest, DelegationBindingDigest: delegationBindingDigest, Scope: scopeValues, + MaxTargets: maxTargets, + MaxOps: maxOps, TTL: ttlDuration, SigningPrivateKey: keyPair.Private, TokenPath: outputPath, @@ -115,6 +123,8 @@ func runApprove(arguments []string) int { ExpiresAt: result.Token.ExpiresAt.UTC().Format(time.RFC3339), ReasonCode: result.Token.ReasonCode, Scope: result.Token.Scope, + MaxTargets: result.Token.MaxTargets, + MaxOps: result.Token.MaxOps, KeyID: keyID, Warnings: warnings, Description: "signed approval token created", @@ -149,5 +159,5 @@ func writeApproveOutput(jsonOutput bool, output approveOutput, exitCode int) int func printApproveUsage() { fmt.Println("Usage:") - fmt.Println(" gait approve --intent-digest --policy-digest [--delegation-binding-digest ] --ttl --scope --approver --reason-code [--out token.json] [--key-mode dev|prod] [--private-key |--private-key-env ] [--json] [--explain]") + fmt.Println(" gait approve --intent-digest --policy-digest [--delegation-binding-digest ] --ttl --scope --approver --reason-code [--max-targets ] [--max-ops ] [--out token.json] [--key-mode dev|prod] [--private-key |--private-key-env ] [--json] [--explain]") } diff --git a/cmd/gait/gate.go b/cmd/gait/gate.go index 17b471d..85a2e2e 100644 --- a/cmd/gait/gate.go +++ b/cmd/gait/gate.go @@ -50,10 +50,15 @@ type gateEvalOutput struct { PatternID string `json:"pattern_id,omitempty"` RegistryReason string `json:"registry_reason,omitempty"` MatchedRule string `json:"matched_rule,omitempty"` + Phase string `json:"phase,omitempty"` RateLimitScope string `json:"rate_limit_scope,omitempty"` RateLimitKey string `json:"rate_limit_key,omitempty"` RateLimitUsed int `json:"rate_limit_used,omitempty"` RateLimitRemaining int `json:"rate_limit_remaining,omitempty"` + DestructiveBudgetScope string `json:"destructive_budget_scope,omitempty"` + DestructiveBudgetKey string `json:"destructive_budget_key,omitempty"` + DestructiveBudgetUsed int `json:"destructive_budget_used,omitempty"` + DestructiveBudgetRemaining int `json:"destructive_budget_remaining,omitempty"` CredentialIssuer string `json:"credential_issuer,omitempty"` CredentialRef string `json:"credential_ref,omitempty"` CredentialEvidencePath string `json:"credential_evidence_path,omitempty"` @@ -394,6 +399,7 @@ func runGateEval(arguments []string) int { } var rateDecision gate.RateLimitDecision + var destructiveBudgetDecision gate.RateLimitDecision if outcome.RateLimit.Requests > 0 { rateDecision, err = gate.EnforceRateLimit(rateLimitState, outcome.RateLimit, intent, time.Now().UTC()) if err != nil { @@ -405,6 +411,19 @@ func runGateEval(arguments []string) int { result.Violations = mergeUniqueSorted(result.Violations, []string{"rate_limit_exceeded"}) } } + if outcome.DestructiveBudget.Requests > 0 && gate.IntentContainsDestructiveTarget(intent.Targets) { + budgetIntent := intent + budgetIntent.ToolName = "destructive_budget|" + strings.TrimSpace(intent.ToolName) + destructiveBudgetDecision, err = gate.EnforceRateLimit(rateLimitState, outcome.DestructiveBudget, budgetIntent, time.Now().UTC()) + if err != nil { + return writeGateEvalOutput(jsonOutput, gateEvalOutput{OK: false, Error: err.Error()}, exitCodeForError(err, exitInvalidInput)) + } + if !destructiveBudgetDecision.Allowed { + result.Verdict = "block" + result.ReasonCodes = mergeUniqueSorted(result.ReasonCodes, []string{"destructive_budget_exceeded"}) + result.Violations = mergeUniqueSorted(result.Violations, []string{"destructive_budget_exceeded"}) + } + } keyPair, signingWarnings, err := sign.LoadSigningKey(sign.KeyConfig{ Mode: sign.KeyMode(keyMode), @@ -578,6 +597,8 @@ func runGateEval(arguments []string) int { ExpectedPolicyDigest: policyDigestForContext, ExpectedDelegationBindingDigest: delegationBindingDigest, RequiredScope: requiredApprovalScope, + TargetCount: gateIntentTargetCount(intent), + OperationCount: gateIntentOperationCount(intent), }) if err != nil { reasonCode := gate.ApprovalCodeSchemaInvalid @@ -803,10 +824,15 @@ func runGateEval(arguments []string) int { PatternID: outcome.PatternID, RegistryReason: outcome.RegistryReason, MatchedRule: outcome.MatchedRule, + Phase: intent.Context.Phase, RateLimitScope: rateDecision.Scope, RateLimitKey: rateDecision.Key, RateLimitUsed: rateDecision.Used, RateLimitRemaining: rateDecision.Remaining, + DestructiveBudgetScope: destructiveBudgetDecision.Scope, + DestructiveBudgetKey: destructiveBudgetDecision.Key, + DestructiveBudgetUsed: destructiveBudgetDecision.Used, + DestructiveBudgetRemaining: destructiveBudgetDecision.Remaining, CredentialIssuer: credentialIssuer, CredentialRef: credentialRefOut, CredentialEvidencePath: resolvedCredentialEvidencePath, @@ -836,6 +862,29 @@ func gatherDelegationTokenPaths(primaryPath, chainCSV string) []string { return mergeUniqueSorted(nil, paths) } +func gateIntentTargetCount(intent schemagate.IntentRequest) int { + if intent.Script != nil && len(intent.Script.Steps) > 0 { + total := 0 + for _, step := range intent.Script.Steps { + total += len(step.Targets) + } + if total > 0 { + return total + } + } + return len(intent.Targets) +} + +func gateIntentOperationCount(intent schemagate.IntentRequest) int { + if intent.Script != nil && len(intent.Script.Steps) > 0 { + return len(intent.Script.Steps) + } + if len(intent.Targets) == 0 { + return 1 + } + return len(intent.Targets) +} + func buildPreApprovedOutcome(intent schemagate.IntentRequest, producerVersion string, match gate.ApprovedScriptMatch) (gate.EvalOutcome, error) { normalizedIntent, err := gate.NormalizeIntent(intent) if err != nil { diff --git a/cmd/gait/gate_test.go b/cmd/gait/gate_test.go new file mode 100644 index 0000000..8be3f84 --- /dev/null +++ b/cmd/gait/gate_test.go @@ -0,0 +1,71 @@ +package main + +import ( + "testing" + + schemagate "github.com/Clyra-AI/gait/core/schema/v1/gate" +) + +func TestGateIntentTargetCountAndOperationCount(t *testing.T) { + intent := schemagate.IntentRequest{ + Targets: []schemagate.IntentTarget{ + {Kind: "path", Value: "/tmp/fallback"}, + }, + Script: &schemagate.IntentScript{ + Steps: []schemagate.IntentScriptStep{ + { + ToolName: "tool.read", + Targets: []schemagate.IntentTarget{ + {Kind: "path", Value: "/tmp/a"}, + }, + }, + { + ToolName: "tool.write", + Targets: []schemagate.IntentTarget{ + {Kind: "path", Value: "/tmp/b"}, + {Kind: "path", Value: "/tmp/c"}, + }, + }, + }, + }, + } + + if got := gateIntentTargetCount(intent); got != 3 { + t.Fatalf("gateIntentTargetCount() = %d, want 3", got) + } + if got := gateIntentOperationCount(intent); got != 2 { + t.Fatalf("gateIntentOperationCount() = %d, want 2", got) + } +} + +func TestGateIntentTargetCountFallsBackWhenScriptTargetsMissing(t *testing.T) { + intent := schemagate.IntentRequest{ + Targets: []schemagate.IntentTarget{ + {Kind: "path", Value: "/tmp/fallback-a"}, + {Kind: "path", Value: "/tmp/fallback-b"}, + }, + Script: &schemagate.IntentScript{ + Steps: []schemagate.IntentScriptStep{ + {ToolName: "tool.read"}, + {ToolName: "tool.write"}, + }, + }, + } + + if got := gateIntentTargetCount(intent); got != 2 { + t.Fatalf("gateIntentTargetCount() = %d, want 2", got) + } + if got := gateIntentOperationCount(intent); got != 2 { + t.Fatalf("gateIntentOperationCount() = %d, want 2", got) + } +} + +func TestGateIntentOperationCountDefaultsToOneWithoutTargets(t *testing.T) { + intent := schemagate.IntentRequest{} + if got := gateIntentTargetCount(intent); got != 0 { + t.Fatalf("gateIntentTargetCount() = %d, want 0", got) + } + if got := gateIntentOperationCount(intent); got != 1 { + t.Fatalf("gateIntentOperationCount() = %d, want 1", got) + } +} diff --git a/cmd/gait/job.go b/cmd/gait/job.go index 2bafacb..9efd5f9 100644 --- a/cmd/gait/job.go +++ b/cmd/gait/job.go @@ -48,6 +48,8 @@ func runJob(arguments []string) int { return runJobCheckpoint(arguments[1:]) case "pause": return runJobPause(arguments[1:]) + case "stop": + return runJobStop(arguments[1:]) case "approve": return runJobApprove(arguments[1:]) case "resume": @@ -308,6 +310,12 @@ func runJobCancel(arguments []string) int { }) } +func runJobStop(arguments []string) int { + return runSimpleJobTransition(arguments, "stop", func(root, jobID, actor string) (jobruntime.JobState, error) { + return jobruntime.EmergencyStop(root, jobID, jobruntime.TransitionOptions{Actor: actor}) + }) +} + func runSimpleJobTransition(arguments []string, operation string, action func(root, jobID, actor string) (jobruntime.JobState, error)) int { arguments = reorderInterspersedFlags(arguments, map[string]bool{"id": true, "root": true, "actor": true}) flagSet := flag.NewFlagSet("job-"+operation, flag.ContinueOnError) @@ -332,6 +340,8 @@ func runSimpleJobTransition(arguments []string, operation string, action func(ro switch operation { case "pause": printJobPauseUsage() + case "stop": + printJobStopUsage() case "cancel": printJobCancelUsage() } @@ -553,6 +563,7 @@ func printJobUsage() { fmt.Println(" gait job checkpoint list --id [--root ./gait-out/jobs] [--json] [--explain]") fmt.Println(" gait job checkpoint show --id --checkpoint [--root ./gait-out/jobs] [--json] [--explain]") fmt.Println(" gait job pause --id [--actor ] [--root ./gait-out/jobs] [--json] [--explain]") + fmt.Println(" gait job stop --id [--actor ] [--root ./gait-out/jobs] [--json] [--explain]") fmt.Println(" gait job approve --id --actor [--reason ] [--root ./gait-out/jobs] [--json] [--explain]") fmt.Println(" gait job resume --id [--actor ] [--identity ] [--reason ] [--policy |--policy-digest ] [--policy-ref ] [--identity-revocations |--identity-revoked] [--identity-validation-source ] [--env-fingerprint ] [--allow-env-mismatch] [--root ./gait-out/jobs] [--json] [--explain]") fmt.Println(" gait job cancel --id [--actor ] [--root ./gait-out/jobs] [--json] [--explain]") @@ -596,6 +607,11 @@ func printJobPauseUsage() { fmt.Println(" gait job pause --id [--actor ] [--root ./gait-out/jobs] [--json] [--explain]") } +func printJobStopUsage() { + fmt.Println("Usage:") + fmt.Println(" gait job stop --id [--actor ] [--root ./gait-out/jobs] [--json] [--explain]") +} + func printJobApproveUsage() { fmt.Println("Usage:") fmt.Println(" gait job approve --id --actor [--reason ] [--root ./gait-out/jobs] [--json] [--explain]") diff --git a/cmd/gait/job_cli_test.go b/cmd/gait/job_cli_test.go index 31de446..2b6feb7 100644 --- a/cmd/gait/job_cli_test.go +++ b/cmd/gait/job_cli_test.go @@ -77,6 +77,14 @@ func TestRunJobLifecycleCommands(t *testing.T) { t.Fatalf("unexpected pause output: %#v", pauseOut) } + stopCode, stopOut := runJobJSON(t, []string{"stop", "--id", jobID, "--root", root, "--actor", "alice", "--json"}) + if stopCode != exitOK { + t.Fatalf("stop expected %d got %d output=%#v", exitOK, stopCode, stopOut) + } + if stopOut.Job == nil || stopOut.Job.Status != "emergency_stopped" || stopOut.Job.StatusReasonCode != "emergency_stop_preempted" { + t.Fatalf("unexpected stop output: %#v", stopOut) + } + inspectCode, inspectOut := runJobJSON(t, []string{"inspect", "--id", jobID, "--root", root, "--json"}) if inspectCode != exitOK { t.Fatalf("inspect expected %d got %d output=%#v", exitOK, inspectCode, inspectOut) @@ -128,6 +136,7 @@ func TestRunJobHelpAndErrorPaths(t *testing.T) { {"checkpoint", "list", "--help"}, {"checkpoint", "show", "--help"}, {"pause", "--help"}, + {"stop", "--help"}, {"approve", "--help"}, {"resume", "--help"}, {"cancel", "--help"}, diff --git a/cmd/gait/mcp.go b/cmd/gait/mcp.go index 7cb74c5..d0cc160 100644 --- a/cmd/gait/mcp.go +++ b/cmd/gait/mcp.go @@ -12,6 +12,7 @@ import ( "time" "github.com/Clyra-AI/gait/core/gate" + "github.com/Clyra-AI/gait/core/jobruntime" "github.com/Clyra-AI/gait/core/mcp" "github.com/Clyra-AI/gait/core/pack" "github.com/Clyra-AI/gait/core/runpack" @@ -25,6 +26,8 @@ type mcpProxyOutput struct { Executed bool `json:"executed"` Adapter string `json:"adapter,omitempty"` RunID string `json:"run_id,omitempty"` + JobID string `json:"job_id,omitempty"` + Phase string `json:"phase,omitempty"` SessionID string `json:"session_id,omitempty"` ToolName string `json:"tool_name,omitempty"` Verdict string `json:"verdict,omitempty"` @@ -47,6 +50,7 @@ type mcpProxyOutput struct { type mcpProxyEvalOptions struct { Adapter string Profile string + JobRoot string RunID string TracePath string RunpackOut string @@ -89,6 +93,7 @@ func runMCPProxy(arguments []string) int { "call": true, "adapter": true, "profile": true, + "job-root": true, "trace-out": true, "run-id": true, "runpack-out": true, @@ -106,6 +111,7 @@ func runMCPProxy(arguments []string) int { var callPath string var adapter string var profile string + var jobRoot string var tracePath string var runID string var runpackOut string @@ -122,6 +128,7 @@ func runMCPProxy(arguments []string) int { flagSet.StringVar(&callPath, "call", "", "path to tool call JSON (use '-' for stdin)") flagSet.StringVar(&adapter, "adapter", "mcp", "adapter payload format: mcp|openai|anthropic|langchain|claude_code") flagSet.StringVar(&profile, "profile", string(gateProfileStandard), "runtime profile: standard|oss-prod") + flagSet.StringVar(&jobRoot, "job-root", "./gait-out/jobs", "job runtime root for emergency stop preemption checks when context.job_id is present") flagSet.StringVar(&tracePath, "trace-out", "", "path to emitted trace JSON (default trace_.json)") flagSet.StringVar(&runID, "run-id", "", "optional run_id override for proxy artifacts") flagSet.StringVar(&runpackOut, "runpack-out", "", "optional path to emit a runpack zip for this proxy decision") @@ -161,6 +168,7 @@ func runMCPProxy(arguments []string) int { output, exitCode, err := evaluateMCPProxyPayload(policyPath, payload, mcpProxyEvalOptions{ Adapter: adapter, Profile: profile, + JobRoot: jobRoot, RunID: runID, TracePath: tracePath, RunpackOut: runpackOut, @@ -222,6 +230,14 @@ func evaluateMCPProxyPayload(policyPath string, payload []byte, options mcpProxy if err != nil { return mcpProxyOutput{}, exitInvalidInput, err } + emergencyBlockedReason, emergencyWarnings := evaluateMCPEmergencyStop(call, strings.TrimSpace(options.JobRoot)) + if emergencyBlockedReason != "" { + result := evalResult.Outcome.Result + result.Verdict = "block" + result.ReasonCodes = mergeUniqueSorted(result.ReasonCodes, []string{emergencyBlockedReason}) + result.Violations = mergeUniqueSorted(result.Violations, []string{"emergency_stop_active"}) + evalResult.Outcome.Result = result + } keyPair, warnings, err := sign.LoadSigningKey(sign.KeyConfig{ Mode: sign.KeyMode(strings.ToLower(strings.TrimSpace(options.KeyMode))), @@ -231,6 +247,7 @@ func evaluateMCPProxyPayload(policyPath string, payload []byte, options mcpProxy if err != nil { return mcpProxyOutput{}, exitInvalidInput, err } + warnings = mergeUniqueSorted(warnings, emergencyWarnings) resolvedTracePath := strings.TrimSpace(options.TracePath) if resolvedTracePath == "" { resolvedTracePath = fmt.Sprintf("trace_%s_%s.json", normalizeRunID(options.RunID), time.Now().UTC().Format("20060102T150405.000000000")) @@ -351,6 +368,8 @@ func evaluateMCPProxyPayload(policyPath string, payload []byte, options mcpProxy Executed: false, Adapter: strings.ToLower(strings.TrimSpace(options.Adapter)), RunID: resolvedRunID, + JobID: evalResult.Intent.Context.JobID, + Phase: evalResult.Intent.Context.Phase, SessionID: evalResult.Intent.Context.SessionID, ToolName: evalResult.Intent.ToolName, Verdict: evalResult.Outcome.Result.Verdict, @@ -370,6 +389,28 @@ func evaluateMCPProxyPayload(policyPath string, payload []byte, options mcpProxy }, exitCode, nil } +func evaluateMCPEmergencyStop(call mcp.ToolCall, jobRoot string) (string, []string) { + jobID := strings.TrimSpace(call.Context.JobID) + if jobID == "" { + return "", nil + } + state, err := jobruntime.Status(jobRoot, jobID) + if err != nil { + return "emergency_stop_state_unavailable", []string{fmt.Sprintf("job_status_unavailable=%v", err)} + } + if !jobruntime.IsEmergencyStopped(state) { + return "", nil + } + if _, recordErr := jobruntime.RecordBlockedDispatch(jobRoot, jobID, jobruntime.DispatchRecordOptions{ + Actor: "mcp-proxy", + DispatchPath: "mcp.proxy", + ReasonCode: "emergency_stop_preempted", + }); recordErr != nil { + return "emergency_stop_preempted", []string{fmt.Sprintf("blocked_dispatch_record_failed=%v", recordErr)} + } + return "emergency_stop_preempted", nil +} + func validateMCPBoundaryOAuthEvidence(call mcp.ToolCall, profile gateEvalProfile) error { mode := strings.ToLower(strings.TrimSpace(call.Context.AuthMode)) if mode == "" { @@ -753,13 +794,13 @@ func writeMCPProxyOutput(jsonOutput bool, output mcpProxyOutput, exitCode int) i func printMCPUsage() { fmt.Println("Usage:") - fmt.Println(" gait mcp proxy --policy --call [--adapter mcp|openai|anthropic|langchain|claude_code] [--profile standard|oss-prod] [--trace-out trace.json] [--run-id run_...] [--runpack-out runpack.zip] [--pack-out pack_run.zip] [--export-log-out events.jsonl] [--export-otel-out otel.jsonl] [--json] [--explain]") - fmt.Println(" gait mcp bridge --policy --call [--adapter mcp|openai|anthropic|langchain|claude_code] [--profile standard|oss-prod] [--trace-out trace.json] [--run-id run_...] [--runpack-out runpack.zip] [--pack-out pack_run.zip] [--export-log-out events.jsonl] [--export-otel-out otel.jsonl] [--json] [--explain]") - fmt.Println(" gait mcp serve --policy [--listen 127.0.0.1:8787] [--adapter mcp|openai|anthropic|langchain|claude_code] [--auth-mode off|token] [--auth-token-env ] [--max-request-bytes ] [--http-verdict-status compat|strict] [--allow-client-artifact-paths] [--trace-dir ] [--runpack-dir ] [--pack-dir ] [--trace-max-age ] [--trace-max-count ] [--runpack-max-age ] [--runpack-max-count ] [--pack-max-age ] [--pack-max-count ] [--session-max-age ] [--session-max-count ] [--json] [--explain]") + fmt.Println(" gait mcp proxy --policy --call [--adapter mcp|openai|anthropic|langchain|claude_code] [--profile standard|oss-prod] [--job-root ./gait-out/jobs] [--trace-out trace.json] [--run-id run_...] [--runpack-out runpack.zip] [--pack-out pack_run.zip] [--export-log-out events.jsonl] [--export-otel-out otel.jsonl] [--json] [--explain]") + fmt.Println(" gait mcp bridge --policy --call [--adapter mcp|openai|anthropic|langchain|claude_code] [--profile standard|oss-prod] [--job-root ./gait-out/jobs] [--trace-out trace.json] [--run-id run_...] [--runpack-out runpack.zip] [--pack-out pack_run.zip] [--export-log-out events.jsonl] [--export-otel-out otel.jsonl] [--json] [--explain]") + fmt.Println(" gait mcp serve --policy [--listen 127.0.0.1:8787] [--adapter mcp|openai|anthropic|langchain|claude_code] [--profile standard|oss-prod] [--job-root ./gait-out/jobs] [--auth-mode off|token] [--auth-token-env ] [--max-request-bytes ] [--http-verdict-status compat|strict] [--allow-client-artifact-paths] [--trace-dir ] [--runpack-dir ] [--pack-dir ] [--trace-max-age ] [--trace-max-count ] [--runpack-max-age ] [--runpack-max-count ] [--pack-max-age ] [--pack-max-count ] [--session-max-age ] [--session-max-count ] [--json] [--explain]") fmt.Println(" serve endpoints: POST /v1/evaluate, POST /v1/evaluate/sse, POST /v1/evaluate/stream") } func printMCPProxyUsage() { fmt.Println("Usage:") - fmt.Println(" gait mcp proxy --policy --call [--adapter mcp|openai|anthropic|langchain|claude_code] [--profile standard|oss-prod] [--trace-out trace.json] [--run-id run_...] [--runpack-out runpack.zip] [--pack-out pack_run.zip] [--export-log-out events.jsonl] [--export-otel-out otel.jsonl] [--key-mode dev|prod] [--private-key |--private-key-env ] [--json] [--explain]") + fmt.Println(" gait mcp proxy --policy --call [--adapter mcp|openai|anthropic|langchain|claude_code] [--profile standard|oss-prod] [--job-root ./gait-out/jobs] [--trace-out trace.json] [--run-id run_...] [--runpack-out runpack.zip] [--pack-out pack_run.zip] [--export-log-out events.jsonl] [--export-otel-out otel.jsonl] [--key-mode dev|prod] [--private-key |--private-key-env ] [--json] [--explain]") } diff --git a/cmd/gait/mcp_server.go b/cmd/gait/mcp_server.go index c38ea54..b7be26e 100644 --- a/cmd/gait/mcp_server.go +++ b/cmd/gait/mcp_server.go @@ -16,6 +16,7 @@ import ( "strings" "time" + "github.com/Clyra-AI/gait/core/jobruntime" "github.com/Clyra-AI/gait/core/runpack" ) @@ -24,6 +25,7 @@ type mcpServeConfig struct { ListenAddr string DefaultAdapter string Profile string + JobRoot string AuthMode string AuthToken string // #nosec G117 -- field name is explicit config surface, not a hardcoded secret. TraceDir string @@ -85,6 +87,7 @@ func runMCPServe(arguments []string) int { "listen": true, "adapter": true, "profile": true, + "job-root": true, "auth-mode": true, "auth-token-env": true, "trace-dir": true, @@ -115,6 +118,7 @@ func runMCPServe(arguments []string) int { var listenAddr string var adapter string var profile string + var jobRoot string var authMode string var authTokenEnv string var traceDir string @@ -144,6 +148,7 @@ func runMCPServe(arguments []string) int { flagSet.StringVar(&listenAddr, "listen", "127.0.0.1:8787", "listen address") flagSet.StringVar(&adapter, "adapter", "mcp", "default adapter: mcp|openai|anthropic|langchain|claude_code") flagSet.StringVar(&profile, "profile", "standard", "runtime profile: standard|oss-prod") + flagSet.StringVar(&jobRoot, "job-root", "./gait-out/jobs", "job runtime root for emergency stop preemption checks when context.job_id is present") flagSet.StringVar(&authMode, "auth-mode", "off", "serve auth mode: off|token") flagSet.StringVar(&authTokenEnv, "auth-token-env", "", "env var containing bearer token for --auth-mode token") flagSet.StringVar(&traceDir, "trace-dir", "./gait-out/mcp-serve/traces", "directory for emitted traces") @@ -190,6 +195,7 @@ func runMCPServe(arguments []string) int { ListenAddr: strings.TrimSpace(listenAddr), DefaultAdapter: strings.ToLower(strings.TrimSpace(adapter)), Profile: strings.ToLower(strings.TrimSpace(profile)), + JobRoot: strings.TrimSpace(jobRoot), AuthMode: strings.ToLower(strings.TrimSpace(authMode)), TraceDir: strings.TrimSpace(traceDir), RunpackDir: strings.TrimSpace(runpackDir), @@ -300,6 +306,9 @@ func newMCPServeHandler(config mcpServeConfig) (http.Handler, error) { if strings.TrimSpace(config.DefaultAdapter) == "" { config.DefaultAdapter = "mcp" } + if strings.TrimSpace(config.JobRoot) == "" { + config.JobRoot = "./gait-out/jobs" + } if config.MaxRequestBytes <= 0 { config.MaxRequestBytes = 1 << 20 } @@ -437,6 +446,7 @@ func evaluateMCPServeRequest(config mcpServeConfig, writer http.ResponseWriter, output, exitCode, evalErr := evaluateMCPProxyPayload(config.PolicyPath, callPayload, mcpProxyEvalOptions{ Adapter: adapter, Profile: config.Profile, + JobRoot: config.JobRoot, RunID: input.RunID, TracePath: tracePath, RunpackOut: runpackPath, @@ -470,7 +480,27 @@ func evaluateMCPServeRequest(config mcpServeConfig, writer http.ResponseWriter, RunID: output.RunID, ProducerVersion: version, }); err != nil { - return mcpServeEvaluateResponse{}, err + if strings.TrimSpace(input.SessionJournal) == "" && strings.Contains(err.Error(), "different session/run") { + base := sanitizeSessionFileBase(sessionID) + journalPath = filepath.Join(config.SessionDir, fmt.Sprintf("%s_%s.journal.jsonl", base, normalizeRunID(output.RunID))) + if _, retryErr := runpack.StartSession(journalPath, runpack.SessionStartOptions{ + SessionID: sessionID, + RunID: output.RunID, + ProducerVersion: version, + }); retryErr != nil { + return mcpServeEvaluateResponse{}, retryErr + } + } else { + return mcpServeEvaluateResponse{}, err + } + } + safetyInvariantVersion := "" + safetyInvariantHash := "" + if jobID := strings.TrimSpace(output.JobID); jobID != "" { + if state, statusErr := jobruntime.Status(config.JobRoot, jobID); statusErr == nil { + safetyInvariantVersion = strings.TrimSpace(state.SafetyInvariantVersion) + safetyInvariantHash = strings.TrimSpace(state.SafetyInvariantHash) + } } event, err := runpack.AppendSessionEvent(journalPath, runpack.SessionAppendOptions{ ProducerVersion: version, @@ -482,6 +512,8 @@ func evaluateMCPServeRequest(config mcpServeConfig, writer http.ResponseWriter, Verdict: output.Verdict, ReasonCodes: output.ReasonCodes, Violations: output.Violations, + SafetyInvariantVersion: safetyInvariantVersion, + SafetyInvariantHash: safetyInvariantHash, }) if err != nil { return mcpServeEvaluateResponse{}, err @@ -676,7 +708,7 @@ func mcpRetentionMatches(class string, fileName string) bool { func printMCPServeUsage() { fmt.Println("Usage:") - fmt.Println(" gait mcp serve --policy [--listen 127.0.0.1:8787] [--adapter mcp|openai|anthropic|langchain|claude_code] [--profile standard|oss-prod] [--auth-mode off|token] [--auth-token-env ] [--max-request-bytes ] [--http-verdict-status compat|strict] [--allow-client-artifact-paths] [--trace-dir ] [--runpack-dir ] [--pack-dir ] [--session-dir ] [--trace-max-age ] [--trace-max-count ] [--runpack-max-age ] [--runpack-max-count ] [--pack-max-age ] [--pack-max-count ] [--session-max-age ] [--session-max-count ] [--export-log-out events.jsonl] [--export-otel-out otel.jsonl] [--key-mode dev|prod] [--private-key |--private-key-env ] [--json] [--explain]") + fmt.Println(" gait mcp serve --policy [--listen 127.0.0.1:8787] [--adapter mcp|openai|anthropic|langchain|claude_code] [--profile standard|oss-prod] [--job-root ./gait-out/jobs] [--auth-mode off|token] [--auth-token-env ] [--max-request-bytes ] [--http-verdict-status compat|strict] [--allow-client-artifact-paths] [--trace-dir ] [--runpack-dir ] [--pack-dir ] [--session-dir ] [--trace-max-age ] [--trace-max-count ] [--runpack-max-age ] [--runpack-max-count ] [--pack-max-age ] [--pack-max-count ] [--session-max-age ] [--session-max-count ] [--export-log-out events.jsonl] [--export-otel-out otel.jsonl] [--key-mode dev|prod] [--private-key |--private-key-env ] [--json] [--explain]") fmt.Println(" endpoints: POST /v1/evaluate (json), POST /v1/evaluate/sse (text/event-stream), POST /v1/evaluate/stream (application/x-ndjson)") } diff --git a/cmd/gait/mcp_test.go b/cmd/gait/mcp_test.go index 32280b1..e3319d6 100644 --- a/cmd/gait/mcp_test.go +++ b/cmd/gait/mcp_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/Clyra-AI/gait/core/gate" + "github.com/Clyra-AI/gait/core/jobruntime" "github.com/Clyra-AI/gait/core/mcp" schemagate "github.com/Clyra-AI/gait/core/schema/v1/gate" ) @@ -82,6 +83,103 @@ rules: } } +func TestRunMCPProxyEmergencyStopPreemption(t *testing.T) { + workDir := t.TempDir() + withWorkingDir(t, workDir) + jobsRoot := filepath.Join(workDir, "jobs") + jobID := "job_mcp_stop" + + if _, err := jobruntime.Submit(jobsRoot, jobruntime.SubmitOptions{JobID: jobID}); err != nil { + t.Fatalf("submit job: %v", err) + } + if _, err := jobruntime.EmergencyStop(jobsRoot, jobID, jobruntime.TransitionOptions{Actor: "alice"}); err != nil { + t.Fatalf("emergency stop: %v", err) + } + + policyPath := filepath.Join(workDir, "policy.yaml") + mustWriteFile(t, policyPath, "default_verdict: allow\n") + callPath := filepath.Join(workDir, "call.json") + mustWriteFile(t, callPath, `{ + "name":"tool.write", + "args":{"path":"/tmp/out.txt"}, + "targets":[{"kind":"path","value":"/tmp/out.txt","operation":"write"}], + "context":{"identity":"alice","workspace":"/repo/gait","risk_class":"high","session_id":"sess-1","job_id":"job_mcp_stop"} +}`) + + var code int + raw := captureStdout(t, func() { + code = runMCPProxy([]string{ + "--policy", policyPath, + "--call", callPath, + "--job-root", jobsRoot, + "--json", + }) + }) + if code != exitPolicyBlocked { + t.Fatalf("expected emergency stop preemption to block with %d, got %d (%s)", exitPolicyBlocked, code, raw) + } + var out mcpProxyOutput + if err := json.Unmarshal([]byte(raw), &out); err != nil { + t.Fatalf("decode proxy output: %v (%s)", err, raw) + } + if out.Verdict != "block" || !strings.Contains(strings.Join(out.ReasonCodes, ","), "emergency_stop_preempted") { + t.Fatalf("expected emergency stop reason code, got %#v", out) + } +} + +func TestEvaluateMCPEmergencyStopWithoutJobID(t *testing.T) { + reason, warnings := evaluateMCPEmergencyStop(mcp.ToolCall{ + Name: "tool.write", + Context: mcp.CallContext{ + SessionID: "sess-1", + }, + }, "") + if reason != "" { + t.Fatalf("expected empty reason when job_id is not set, got %q", reason) + } + if len(warnings) != 0 { + t.Fatalf("expected no warnings without job_id, got %#v", warnings) + } +} + +func TestEvaluateMCPEmergencyStopStateUnavailable(t *testing.T) { + workDir := t.TempDir() + reason, warnings := evaluateMCPEmergencyStop(mcp.ToolCall{ + Name: "tool.write", + Context: mcp.CallContext{ + JobID: "job_missing", + }, + }, filepath.Join(workDir, "jobs")) + if reason != "emergency_stop_state_unavailable" { + t.Fatalf("expected emergency_stop_state_unavailable, got %q", reason) + } + if len(warnings) != 1 || !strings.Contains(warnings[0], "job_status_unavailable=") { + t.Fatalf("expected job status warning, got %#v", warnings) + } +} + +func TestEvaluateMCPEmergencyStopJobNotStopped(t *testing.T) { + workDir := t.TempDir() + jobsRoot := filepath.Join(workDir, "jobs") + if _, err := jobruntime.Submit(jobsRoot, jobruntime.SubmitOptions{ + JobID: "job_running", + }); err != nil { + t.Fatalf("submit job: %v", err) + } + reason, warnings := evaluateMCPEmergencyStop(mcp.ToolCall{ + Name: "tool.write", + Context: mcp.CallContext{ + JobID: "job_running", + }, + }, jobsRoot) + if reason != "" { + t.Fatalf("expected empty reason for non-stopped job, got %q", reason) + } + if len(warnings) != 0 { + t.Fatalf("expected no warnings for non-stopped job, got %#v", warnings) + } +} + func TestRunMCPProxyOpenAIAdapter(t *testing.T) { workDir := t.TempDir() withWorkingDir(t, workDir) diff --git a/cmd/gait/policy_templates/baseline-highrisk.yaml b/cmd/gait/policy_templates/baseline-highrisk.yaml index 3a609c0..e9a31b3 100644 --- a/cmd/gait/policy_templates/baseline-highrisk.yaml +++ b/cmd/gait/policy_templates/baseline-highrisk.yaml @@ -55,6 +55,22 @@ rules: tool_names: [tool.write] reason_codes: [approval_required_for_write] violations: [approval_required] + - name: destructive-budget-guard + priority: 18 + effect: require_approval + match: + risk_classes: [high, critical] + endpoint: + enabled: true + destructive_action: require_approval + reason_code: destructive_operation_requires_approval + violation: destructive_operation + destructive_budget: + requests: 3 + window: hour + scope: identity + reason_codes: [destructive_budget_guard] + violations: [destructive_operation] - name: allow-safe-tool-read priority: 30 effect: allow diff --git a/core/gate/approval.go b/core/gate/approval.go index 53e285c..c7d54c8 100644 --- a/core/gate/approval.go +++ b/core/gate/approval.go @@ -32,6 +32,8 @@ const ( ApprovalCodePolicyMismatch = "approval_token_policy_mismatch" ApprovalCodeDelegationMismatch = "approval_token_delegation_binding_mismatch" ApprovalCodeScopeMismatch = "approval_token_scope_mismatch" + ApprovalCodeTargetsExceeded = "approval_token_max_targets_exceeded" + ApprovalCodeOpsExceeded = "approval_token_max_ops_exceeded" ) type MintApprovalTokenOptions struct { @@ -42,6 +44,8 @@ type MintApprovalTokenOptions struct { PolicyDigest string DelegationBindingDigest string Scope []string + MaxTargets int + MaxOps int TTL time.Duration Now time.Time SigningPrivateKey ed25519.PrivateKey @@ -59,6 +63,8 @@ type ApprovalValidationOptions struct { ExpectedPolicyDigest string ExpectedDelegationBindingDigest string RequiredScope []string + TargetCount int + OperationCount int } type ApprovalTokenError struct { @@ -114,6 +120,12 @@ func MintApprovalToken(opts MintApprovalTokenOptions) (MintApprovalTokenResult, if len(scope) == 0 { return MintApprovalTokenResult{}, fmt.Errorf("scope must include at least one value") } + if opts.MaxTargets < 0 { + return MintApprovalTokenResult{}, fmt.Errorf("max_targets must be >= 0") + } + if opts.MaxOps < 0 { + return MintApprovalTokenResult{}, fmt.Errorf("max_ops must be >= 0") + } createdAt := opts.Now.UTC() if createdAt.IsZero() { @@ -129,13 +141,15 @@ func MintApprovalToken(opts MintApprovalTokenOptions) (MintApprovalTokenResult, SchemaVersion: approvalTokenSchemaV1, CreatedAt: createdAt, ProducerVersion: producerVersion, - TokenID: computeApprovalTokenID(intentDigest, policyDigest, approver, reasonCode, scope, createdAt.Add(opts.TTL)), + TokenID: computeApprovalTokenID(intentDigest, policyDigest, approver, reasonCode, scope, opts.MaxTargets, opts.MaxOps, createdAt.Add(opts.TTL)), ApproverIdentity: approver, ReasonCode: reasonCode, IntentDigest: intentDigest, PolicyDigest: policyDigest, DelegationBindingDigest: delegationBindingDigest, Scope: scope, + MaxTargets: opts.MaxTargets, + MaxOps: opts.MaxOps, ExpiresAt: createdAt.Add(opts.TTL), } @@ -254,6 +268,12 @@ func ValidateApprovalToken(token schemagate.ApprovalToken, publicKey ed25519.Pub if len(requiredScope) > 0 && !matchesApprovalScope(requiredScope, normalized.Scope) { return &ApprovalTokenError{Code: ApprovalCodeScopeMismatch, Err: fmt.Errorf("scope mismatch")} } + if normalized.MaxTargets > 0 && opts.TargetCount > normalized.MaxTargets { + return &ApprovalTokenError{Code: ApprovalCodeTargetsExceeded, Err: fmt.Errorf("target count exceeds token max_targets")} + } + if normalized.MaxOps > 0 && opts.OperationCount > normalized.MaxOps { + return &ApprovalTokenError{Code: ApprovalCodeOpsExceeded, Err: fmt.Errorf("operation count exceeds token max_ops")} + } return nil } @@ -266,7 +286,15 @@ func ApprovalContext(policy Policy, intent schemagate.IntentRequest) (string, st if err != nil { return "", "", nil, fmt.Errorf("normalize intent: %w", err) } + phase := strings.ToLower(strings.TrimSpace(normalizedIntent.Context.Phase)) + if phase == "" { + phase = "apply" + } scope := []string{fmt.Sprintf("tool:%s", normalizedIntent.ToolName)} + if phase == "apply" && IntentContainsDestructiveTarget(normalizedIntent.Targets) { + scope = append(scope, "phase:apply", "destructive:apply") + } + scope = normalizeStringListLower(scope) return policyDigest, normalizedIntent.IntentDigest, scope, nil } @@ -312,6 +340,12 @@ func normalizeApprovalToken(token schemagate.ApprovalToken) (schemagate.Approval if len(normalized.Scope) == 0 { return schemagate.ApprovalToken{}, fmt.Errorf("scope is required") } + if normalized.MaxTargets < 0 { + return schemagate.ApprovalToken{}, fmt.Errorf("max_targets must be >= 0") + } + if normalized.MaxOps < 0 { + return schemagate.ApprovalToken{}, fmt.Errorf("max_ops must be >= 0") + } if normalized.CreatedAt.IsZero() { return schemagate.ApprovalToken{}, fmt.Errorf("created_at is required") } @@ -348,8 +382,9 @@ func isDigestHex(value string) bool { return err == nil } -func computeApprovalTokenID(intentDigest, policyDigest, approver, reasonCode string, scope []string, expiresAt time.Time) string { - raw := intentDigest + ":" + policyDigest + ":" + approver + ":" + reasonCode + ":" + strings.Join(scope, ",") + ":" + expiresAt.UTC().Format(time.RFC3339Nano) +func computeApprovalTokenID(intentDigest, policyDigest, approver, reasonCode string, scope []string, maxTargets int, maxOps int, expiresAt time.Time) string { + raw := intentDigest + ":" + policyDigest + ":" + approver + ":" + reasonCode + ":" + strings.Join(scope, ",") + + fmt.Sprintf(":%d:%d:", maxTargets, maxOps) + expiresAt.UTC().Format(time.RFC3339Nano) sum := sha256.Sum256([]byte(raw)) return hex.EncodeToString(sum[:12]) } diff --git a/core/gate/approval_test.go b/core/gate/approval_test.go index d3dc2ba..56e71dc 100644 --- a/core/gate/approval_test.go +++ b/core/gate/approval_test.go @@ -199,6 +199,73 @@ func TestValidateApprovalTokenFailureCodes(t *testing.T) { } } +func TestValidateApprovalTokenMaxTargetsAndOps(t *testing.T) { + keyPair, err := sign.GenerateKeyPair() + if err != nil { + t.Fatalf("generate key pair: %v", err) + } + now := time.Date(2026, time.February, 5, 12, 0, 0, 0, time.UTC) + workDir := t.TempDir() + baseResult, err := MintApprovalToken(MintApprovalTokenOptions{ + ProducerVersion: "test", + ApproverIdentity: "alice", + ReasonCode: "incident_hotfix", + IntentDigest: "1111111111111111111111111111111111111111111111111111111111111111", + PolicyDigest: "2222222222222222222222222222222222222222222222222222222222222222", + Scope: []string{"tool:tool.write"}, + MaxTargets: 2, + MaxOps: 3, + TTL: 30 * time.Minute, + Now: now, + SigningPrivateKey: keyPair.Private, + TokenPath: filepath.Join(workDir, "approval.json"), + }) + if err != nil { + t.Fatalf("mint approval token: %v", err) + } + + if err := ValidateApprovalToken(baseResult.Token, keyPair.Public, ApprovalValidationOptions{ + Now: now.Add(time.Minute), + ExpectedIntentDigest: baseResult.Token.IntentDigest, + ExpectedPolicyDigest: baseResult.Token.PolicyDigest, + RequiredScope: []string{"tool:tool.write"}, + TargetCount: 2, + OperationCount: 3, + }); err != nil { + t.Fatalf("expected bounded approval token validation to pass: %v", err) + } + if err := ValidateApprovalToken(baseResult.Token, keyPair.Public, ApprovalValidationOptions{ + Now: now.Add(time.Minute), + ExpectedIntentDigest: baseResult.Token.IntentDigest, + ExpectedPolicyDigest: baseResult.Token.PolicyDigest, + RequiredScope: []string{"tool:tool.write"}, + TargetCount: 3, + OperationCount: 3, + }); err == nil { + t.Fatalf("expected max_targets validation failure") + } else { + var tokenErr *ApprovalTokenError + if !errors.As(err, &tokenErr) || tokenErr.Code != ApprovalCodeTargetsExceeded { + t.Fatalf("expected max_targets error code %q, got %v", ApprovalCodeTargetsExceeded, err) + } + } + if err := ValidateApprovalToken(baseResult.Token, keyPair.Public, ApprovalValidationOptions{ + Now: now.Add(time.Minute), + ExpectedIntentDigest: baseResult.Token.IntentDigest, + ExpectedPolicyDigest: baseResult.Token.PolicyDigest, + RequiredScope: []string{"tool:tool.write"}, + TargetCount: 2, + OperationCount: 4, + }); err == nil { + t.Fatalf("expected max_ops validation failure") + } else { + var tokenErr *ApprovalTokenError + if !errors.As(err, &tokenErr) || tokenErr.Code != ApprovalCodeOpsExceeded { + t.Fatalf("expected max_ops error code %q, got %v", ApprovalCodeOpsExceeded, err) + } + } +} + func TestApprovalContext(t *testing.T) { policy, err := ParsePolicyYAML([]byte(`default_verdict: require_approval`)) if err != nil { @@ -236,6 +303,26 @@ func TestApprovalTokenErrorHelpers(t *testing.T) { } } +func TestApprovalContextDestructiveApplyScope(t *testing.T) { + policy, err := ParsePolicyYAML([]byte(`default_verdict: require_approval`)) + if err != nil { + t.Fatalf("parse policy: %v", err) + } + intent := baseIntent() + intent.ToolName = "tool.delete" + intent.Context.Phase = "apply" + intent.Targets = []schemagate.IntentTarget{ + {Kind: "path", Value: "/tmp/demo.txt", Operation: "delete"}, + } + _, _, scope, err := ApprovalContext(policy, intent) + if err != nil { + t.Fatalf("approval context: %v", err) + } + if !reflect.DeepEqual(scope, []string{"destructive:apply", "phase:apply", "tool:tool.delete"}) { + t.Fatalf("unexpected destructive apply scope: %#v", scope) + } +} + func TestMintApprovalTokenInputValidation(t *testing.T) { keyPair, err := sign.GenerateKeyPair() if err != nil { diff --git a/core/gate/intent.go b/core/gate/intent.go index 95234d2..fabc8a7 100644 --- a/core/gate/intent.go +++ b/core/gate/intent.go @@ -621,12 +621,21 @@ func normalizeContext(context schemagate.IntentContext) (schemagate.IntentContex if contextEvidenceMode != "" && contextEvidenceMode != "best_effort" && contextEvidenceMode != "required" { return schemagate.IntentContext{}, fmt.Errorf("context.context_evidence_mode must be best_effort or required") } + phase := strings.ToLower(strings.TrimSpace(context.Phase)) + if phase == "" { + phase = "apply" + } + if phase != "plan" && phase != "apply" { + return schemagate.IntentContext{}, fmt.Errorf("context.phase must be plan or apply") + } contextRefs := normalizeContextRefs(context.ContextRefs) return schemagate.IntentContext{ Identity: identity, Workspace: filepath.ToSlash(strings.ReplaceAll(workspace, `\`, "/")), RiskClass: riskClass, + Phase: phase, + JobID: strings.TrimSpace(context.JobID), SessionID: strings.TrimSpace(context.SessionID), RequestID: strings.TrimSpace(context.RequestID), AuthContext: authContext, diff --git a/core/gate/intent_test.go b/core/gate/intent_test.go index 2e2e7c9..f8582c0 100644 --- a/core/gate/intent_test.go +++ b/core/gate/intent_test.go @@ -117,6 +117,9 @@ func TestNormalizeIntentPopulatesDigestsAndDefaults(t *testing.T) { if normalized.Context.Identity != "alice" || normalized.Context.Workspace != "C:/repo/gait" || normalized.Context.RiskClass != "high" { t.Fatalf("unexpected normalized context: %#v", normalized.Context) } + if normalized.Context.Phase != "apply" { + t.Fatalf("expected default phase=apply, got %#v", normalized.Context) + } if normalized.Context.SessionID != "s1" || normalized.Context.RequestID != "req-1" { t.Fatalf("unexpected normalized context ids: %#v", normalized.Context) } @@ -156,6 +159,35 @@ func TestNormalizeIntentPopulatesDigestsAndDefaults(t *testing.T) { } } +func TestNormalizeIntentContextPhaseValidation(t *testing.T) { + base := schemagate.IntentRequest{ + ToolName: "tool.write", + Args: map[string]any{"path": "/tmp/out.txt"}, + Targets: []schemagate.IntentTarget{ + {Kind: "path", Value: "/tmp/out.txt", Operation: "delete"}, + }, + Context: schemagate.IntentContext{ + Identity: "alice", + Workspace: "/repo/gait", + RiskClass: "high", + Phase: "plan", + }, + } + normalized, err := NormalizeIntent(base) + if err != nil { + t.Fatalf("normalize plan-phase intent: %v", err) + } + if normalized.Context.Phase != "plan" { + t.Fatalf("expected explicit plan phase, got %#v", normalized.Context) + } + + invalid := base + invalid.Context.Phase = "execute" + if _, err := NormalizeIntent(invalid); err == nil { + t.Fatalf("expected invalid context.phase to fail normalization") + } +} + func TestNormalizeIntentEndpointClassification(t *testing.T) { intent := schemagate.IntentRequest{ ToolName: "tool.exec", diff --git a/core/gate/policy.go b/core/gate/policy.go index cc1cd96..42dd710 100644 --- a/core/gate/policy.go +++ b/core/gate/policy.go @@ -99,6 +99,7 @@ type PolicyRule struct { BrokerReference string `yaml:"broker_reference"` BrokerScopes []string `yaml:"broker_scopes"` RateLimit RateLimitPolicy `yaml:"rate_limit"` + DestructiveBudget RateLimitPolicy `yaml:"destructive_budget"` Dataflow DataflowPolicy `yaml:"dataflow"` } @@ -185,6 +186,7 @@ type EvalOutcome struct { BrokerReference string BrokerScopes []string RateLimit RateLimitPolicy + DestructiveBudget RateLimitPolicy DataflowTriggered bool Script bool StepCount int @@ -330,6 +332,19 @@ func evaluateSingleIntent(policy Policy, intent schemagate.IntentRequest, opts E reasons = mergeUniqueSorted(reasons, contextReasons) violations = mergeUniqueSorted(violations, contextViolations) } + destructiveTarget := intentContainsDestructiveTarget(intent.Targets) + switch strings.ToLower(strings.TrimSpace(intent.Context.Phase)) { + case "plan": + if destructiveTarget { + effect = mostRestrictiveVerdict(effect, "dry_run") + reasons = mergeUniqueSorted(reasons, []string{"plan_phase_non_destructive"}) + } + case "", "apply": + if destructiveTarget { + effect = mostRestrictiveVerdict(effect, "require_approval") + reasons = mergeUniqueSorted(reasons, []string{"destructive_apply_requires_approval"}) + } + } minApprovals := rule.MinApprovals if effect == "require_approval" && minApprovals == 0 { minApprovals = 1 @@ -344,6 +359,7 @@ func evaluateSingleIntent(policy Policy, intent schemagate.IntentRequest, opts E BrokerReference: rule.BrokerReference, BrokerScopes: uniqueSorted(rule.BrokerScopes), RateLimit: rule.RateLimit, + DestructiveBudget: rule.DestructiveBudget, DataflowTriggered: dataflowTriggered, }, nil } @@ -383,6 +399,7 @@ func evaluateScriptPolicyDetailed(policy Policy, intent schemagate.IntentRequest dataflowTriggered := false riskClasses := []string{} aggregatedRateLimit := RateLimitPolicy{} + aggregatedDestructiveBudget := RateLimitPolicy{} contextSource := "" for index, step := range intent.Script.Steps { @@ -433,6 +450,7 @@ func evaluateScriptPolicyDetailed(policy Policy, intent schemagate.IntentRequest } brokerScopes = mergeUniqueSorted(brokerScopes, stepOutcome.BrokerScopes) aggregatedRateLimit = mergeRateLimitPolicy(aggregatedRateLimit, stepOutcome.RateLimit) + aggregatedDestructiveBudget = mergeRateLimitPolicy(aggregatedDestructiveBudget, stepOutcome.DestructiveBudget) if stepOutcome.DataflowTriggered { dataflowTriggered = true } @@ -481,6 +499,7 @@ func evaluateScriptPolicyDetailed(policy Policy, intent schemagate.IntentRequest BrokerReference: brokerReference, BrokerScopes: uniqueSorted(brokerScopes), RateLimit: aggregatedRateLimit, + DestructiveBudget: aggregatedDestructiveBudget, DataflowTriggered: dataflowTriggered, Script: true, StepCount: len(intent.Script.Steps), @@ -688,6 +707,13 @@ func policyDigestPayload(policy Policy) map[string]any { "Scope": rule.RateLimit.Scope, } } + if rule.DestructiveBudget.Requests > 0 { + rulePayload["DestructiveBudget"] = map[string]any{ + "Requests": rule.DestructiveBudget.Requests, + "Window": rule.DestructiveBudget.Window, + "Scope": rule.DestructiveBudget.Scope, + } + } if rule.Dataflow.Enabled { dataflowPayload := map[string]any{ "Enabled": rule.Dataflow.Enabled, @@ -926,6 +952,31 @@ func normalizePolicy(input Policy) (Policy, error) { return Policy{}, fmt.Errorf("unsupported rate_limit.scope %q for %s", rule.RateLimit.Scope, rule.Name) } } + if rule.DestructiveBudget.Requests < 0 { + return Policy{}, fmt.Errorf("destructive_budget.requests must be >= 0 for %s", rule.Name) + } + rule.DestructiveBudget.Window = strings.ToLower(strings.TrimSpace(rule.DestructiveBudget.Window)) + rule.DestructiveBudget.Scope = strings.ToLower(strings.TrimSpace(rule.DestructiveBudget.Scope)) + destructiveBudgetConfigured := rule.DestructiveBudget.Requests > 0 || + rule.DestructiveBudget.Window != "" || + rule.DestructiveBudget.Scope != "" + if destructiveBudgetConfigured { + if rule.DestructiveBudget.Requests <= 0 { + return Policy{}, fmt.Errorf("destructive_budget.requests must be >= 1 for %s", rule.Name) + } + if rule.DestructiveBudget.Window == "" { + rule.DestructiveBudget.Window = "minute" + } + if _, ok := allowedRateLimitWindows[rule.DestructiveBudget.Window]; !ok { + return Policy{}, fmt.Errorf("unsupported destructive_budget.window %q for %s", rule.DestructiveBudget.Window, rule.Name) + } + if rule.DestructiveBudget.Scope == "" { + rule.DestructiveBudget.Scope = "tool_identity" + } + if _, ok := allowedRateLimitScopes[rule.DestructiveBudget.Scope]; !ok { + return Policy{}, fmt.Errorf("unsupported destructive_budget.scope %q for %s", rule.DestructiveBudget.Scope, rule.Name) + } + } rule.Dataflow.TaintedSources = normalizeStringListLower(rule.Dataflow.TaintedSources) rule.Dataflow.DestinationKinds = normalizeStringListLower(rule.Dataflow.DestinationKinds) rule.Dataflow.DestinationValues = normalizeStringList(rule.Dataflow.DestinationValues) @@ -1576,6 +1627,10 @@ func intentContainsDestructiveTarget(targets []schemagate.IntentTarget) bool { return false } +func IntentContainsDestructiveTarget(targets []schemagate.IntentTarget) bool { + return intentContainsDestructiveTarget(targets) +} + func matchesAnyPattern(value string, patterns []string) bool { for _, patternValue := range patterns { if matchPathPattern(value, patternValue) { diff --git a/core/gate/policy_test.go b/core/gate/policy_test.go index 66bdbc0..7617948 100644 --- a/core/gate/policy_test.go +++ b/core/gate/policy_test.go @@ -1631,6 +1631,94 @@ func TestWindowPriorityAndWrkrSourceHelpers(t *testing.T) { } } +func TestEvaluatePolicyPlanApplySemanticsForDestructiveTargets(t *testing.T) { + policy, err := ParsePolicyYAML([]byte(` +default_verdict: allow +rules: + - name: allow-delete + effect: allow + match: + tool_names: [tool.delete] +`)) + if err != nil { + t.Fatalf("parse policy: %v", err) + } + + planIntent := baseIntent() + planIntent.ToolName = "tool.delete" + planIntent.Context.Phase = "plan" + planIntent.Targets = []schemagate.IntentTarget{ + {Kind: "path", Value: "/tmp/demo.txt", Operation: "delete"}, + } + planOutcome, err := EvaluatePolicyDetailed(policy, planIntent, EvalOptions{ProducerVersion: "test"}) + if err != nil { + t.Fatalf("evaluate plan-phase policy: %v", err) + } + if planOutcome.Result.Verdict != "dry_run" { + t.Fatalf("expected plan-phase destructive intent to dry_run, got %#v", planOutcome.Result) + } + if !contains(planOutcome.Result.ReasonCodes, "plan_phase_non_destructive") { + t.Fatalf("expected plan-phase reason code, got %#v", planOutcome.Result.ReasonCodes) + } + + applyIntent := planIntent + applyIntent.Context.Phase = "apply" + applyOutcome, err := EvaluatePolicyDetailed(policy, applyIntent, EvalOptions{ProducerVersion: "test"}) + if err != nil { + t.Fatalf("evaluate apply-phase policy: %v", err) + } + if applyOutcome.Result.Verdict != "require_approval" { + t.Fatalf("expected apply-phase destructive intent to require_approval, got %#v", applyOutcome.Result) + } + if !contains(applyOutcome.Result.ReasonCodes, "destructive_apply_requires_approval") { + t.Fatalf("expected apply-phase reason code, got %#v", applyOutcome.Result.ReasonCodes) + } +} + +func TestParsePolicyYAMLDestructiveBudget(t *testing.T) { + policy, err := ParsePolicyYAML([]byte(` +default_verdict: allow +rules: + - name: destructive-budget + effect: require_approval + match: + tool_names: [tool.delete] + destructive_budget: + requests: 2 + window: minute + scope: identity +`)) + if err != nil { + t.Fatalf("parse policy with destructive_budget: %v", err) + } + intent := baseIntent() + intent.ToolName = "tool.delete" + intent.Context.Phase = "apply" + intent.Targets = []schemagate.IntentTarget{ + {Kind: "path", Value: "/tmp/demo.txt", Operation: "delete"}, + } + outcome, err := EvaluatePolicyDetailed(policy, intent, EvalOptions{ProducerVersion: "test"}) + if err != nil { + t.Fatalf("evaluate policy with destructive_budget: %v", err) + } + if outcome.DestructiveBudget.Requests != 2 || outcome.DestructiveBudget.Window != "minute" || outcome.DestructiveBudget.Scope != "identity" { + t.Fatalf("unexpected destructive budget outcome: %#v", outcome.DestructiveBudget) + } + + if _, err := ParsePolicyYAML([]byte(` +default_verdict: allow +rules: + - name: invalid-destructive-budget + effect: allow + match: + tool_names: [tool.delete] + destructive_budget: + window: minute +`)); err == nil || !strings.Contains(err.Error(), "destructive_budget.requests must be >= 1") { + t.Fatalf("expected destructive_budget request validation error, got %v", err) + } +} + func baseIntent() schemagate.IntentRequest { return schemagate.IntentRequest{ SchemaID: "gait.gate.intent_request", diff --git a/core/jobruntime/runtime.go b/core/jobruntime/runtime.go index 2c238b7..01fc5a0 100644 --- a/core/jobruntime/runtime.go +++ b/core/jobruntime/runtime.go @@ -8,6 +8,7 @@ import ( "fmt" "os" "path/filepath" + "regexp" "runtime" "sort" "strings" @@ -29,6 +30,7 @@ const ( StatusBlocked = "blocked" StatusCompleted = "completed" StatusCancelled = "cancelled" + StatusEmergencyStop = "emergency_stopped" ) const ( @@ -38,6 +40,7 @@ const ( StopReasonBlocked = "blocked" StopReasonCompleted = "completed" StopReasonCancelledByUser = "cancelled_by_user" + StopReasonEmergencyStopped = "emergency_stopped" StopReasonEnvFingerprintMismatch = "env_fingerprint_mismatch" ) @@ -62,6 +65,8 @@ var ( ErrIdentityBindingMismatch = errors.New("identity binding mismatch") ) +var safeJobIDPattern = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9._:-]{0,127}$`) + type JobState struct { SchemaID string `json:"schema_id"` SchemaVersion string `json:"schema_version"` @@ -73,6 +78,9 @@ type JobState struct { StopReason string `json:"stop_reason"` StatusReasonCode string `json:"status_reason_code"` EnvironmentFingerprint string `json:"environment_fingerprint"` + SafetyInvariantVersion string `json:"safety_invariant_version,omitempty"` + SafetyInvariantHash string `json:"safety_invariant_hash,omitempty"` + SafetyInvariants []string `json:"safety_invariants,omitempty"` PolicyDigest string `json:"policy_digest,omitempty"` PolicyRef string `json:"policy_ref,omitempty"` Identity string `json:"identity,omitempty"` @@ -154,6 +162,13 @@ type TransitionOptions struct { Now time.Time } +type DispatchRecordOptions struct { + Actor string + DispatchPath string + ReasonCode string + Now time.Time +} + func Submit(root string, opts SubmitOptions) (JobState, error) { jobID := strings.TrimSpace(opts.JobID) if jobID == "" { @@ -174,7 +189,10 @@ func Submit(root string, opts SubmitOptions) (JobState, error) { if identity == "" { identity = strings.TrimSpace(opts.Actor) } - statePath, eventsPath := jobPaths(root, jobID) + statePath, eventsPath, err := jobPaths(root, jobID) + if err != nil { + return JobState{}, err + } if err := os.MkdirAll(filepath.Dir(statePath), 0o750); err != nil { return JobState{}, fmt.Errorf("create job directory: %w", err) } @@ -194,12 +212,15 @@ func Submit(root string, opts SubmitOptions) (JobState, error) { StopReason: StopReasonNone, StatusReasonCode: "submitted", EnvironmentFingerprint: envfp, + SafetyInvariantVersion: "1", PolicyDigest: policyDigest, PolicyRef: policyRef, Identity: identity, Revision: 1, Checkpoints: []Checkpoint{}, } + state.SafetyInvariants = deriveSafetyInvariants(state) + state.SafetyInvariantHash = hashSafetyInvariants(state.SafetyInvariants) if err := writeJSON(statePath, state); err != nil { return JobState{}, err } @@ -233,7 +254,10 @@ func Submit(root string, opts SubmitOptions) (JobState, error) { } func Status(root string, jobID string) (JobState, error) { - statePath, _ := jobPaths(root, jobID) + statePath, _, err := jobPaths(root, jobID) + if err != nil { + return JobState{}, err + } state, err := readState(statePath) if err != nil { return JobState{}, err @@ -338,7 +362,50 @@ func Pause(root string, jobID string, opts TransitionOptions) (JobState, error) } func Cancel(root string, jobID string, opts TransitionOptions) (JobState, error) { - return simpleTransition(root, jobID, opts.Now, opts.Actor, "cancelled", []string{StatusRunning, StatusPaused, StatusDecisionNeeded, StatusBlocked}, StatusCancelled, StopReasonCancelledByUser, "cancelled") + return simpleTransition(root, jobID, opts.Now, opts.Actor, "cancelled", []string{StatusRunning, StatusPaused, StatusDecisionNeeded, StatusBlocked, StatusEmergencyStop}, StatusCancelled, StopReasonCancelledByUser, "cancelled") +} + +func EmergencyStop(root string, jobID string, opts TransitionOptions) (JobState, error) { + return simpleTransition( + root, + jobID, + opts.Now, + opts.Actor, + "emergency_stop_acknowledged", + []string{StatusRunning, StatusPaused, StatusDecisionNeeded, StatusBlocked}, + StatusEmergencyStop, + StopReasonEmergencyStopped, + "emergency_stop_preempted", + ) +} + +func RecordBlockedDispatch(root string, jobID string, opts DispatchRecordOptions) (JobState, error) { + return mutateWithResult(root, jobID, opts.Now, func(state *JobState, now time.Time) (JobState, Event, error) { + reasonCode := strings.TrimSpace(opts.ReasonCode) + if reasonCode == "" { + reasonCode = "emergency_stop_preempted" + } + dispatchPath := strings.TrimSpace(opts.DispatchPath) + if dispatchPath == "" { + dispatchPath = "runtime.dispatch" + } + if !IsEmergencyStopped(*state) { + return JobState{}, Event{}, fmt.Errorf("%w: blocked dispatch requires emergency stopped state", ErrInvalidTransition) + } + updated := *state + return updated, Event{ + Type: "dispatch_blocked", + Actor: strings.TrimSpace(opts.Actor), + ReasonCode: reasonCode, + Payload: map[string]any{ + "dispatch_path": dispatchPath, + }, + }, nil + }) +} + +func IsEmergencyStopped(state JobState) bool { + return strings.TrimSpace(state.Status) == StatusEmergencyStop && strings.TrimSpace(state.StopReason) == StopReasonEmergencyStopped } func Approve(root string, jobID string, opts ApprovalOptions) (JobState, error) { @@ -388,6 +455,7 @@ func Resume(root string, jobID string, opts ResumeOptions) (JobState, error) { } previousPolicyDigest := strings.TrimSpace(state.PolicyDigest) previousPolicyRef := strings.TrimSpace(state.PolicyRef) + ensureSafetyInvariantLedger(state) policyDigest := strings.TrimSpace(opts.PolicyDigest) policyRef := strings.TrimSpace(opts.PolicyRef) policyEvaluationRequired := opts.RequirePolicyEvaluation || previousPolicyDigest != "" || previousPolicyRef != "" @@ -474,7 +542,10 @@ func Resume(root string, jobID string, opts ResumeOptions) (JobState, error) { } func Inspect(root string, jobID string) (JobState, []Event, error) { - statePath, eventsPath := jobPaths(root, jobID) + statePath, eventsPath, err := jobPaths(root, jobID) + if err != nil { + return JobState{}, nil, err + } state, err := readState(statePath) if err != nil { return JobState{}, nil, err @@ -564,7 +635,10 @@ func mutate(root string, jobID string, mutator func(*JobState, time.Time) (Event } func mutateWithResult(root string, jobID string, now time.Time, mutator func(*JobState, time.Time) (JobState, Event, error)) (JobState, error) { - statePath, eventsPath := jobPaths(root, jobID) + statePath, eventsPath, err := jobPaths(root, jobID) + if err != nil { + return JobState{}, err + } lockPath := statePath + ".lock" release, err := acquireLock(lockPath, normalizeNow(now), 2*time.Second) @@ -602,6 +676,7 @@ func mutateWithResult(root string, jobID string, now time.Time, mutator func(*Jo func readState(path string) (JobState, error) { // #nosec G304 -- path is derived from local job root + // lgtm[go/path-injection] path is derived from explicit local runtime root/job id inputs. payload, err := os.ReadFile(path) if err != nil { if os.IsNotExist(err) { @@ -616,6 +691,7 @@ func readState(path string) (JobState, error) { if strings.TrimSpace(state.JobID) == "" { return JobState{}, fmt.Errorf("invalid job state: missing job_id") } + ensureSafetyInvariantLedger(&state) if state.Checkpoints == nil { state.Checkpoints = []Checkpoint{} } @@ -670,20 +746,35 @@ func appendEvent(path string, event Event) error { return nil } -func jobPaths(root string, jobID string) (statePath string, eventsPath string) { +func jobPaths(root string, jobID string) (statePath string, eventsPath string, err error) { cleanRoot := strings.TrimSpace(root) if cleanRoot == "" { cleanRoot = filepath.Join(".", "gait-out", "jobs") } + absRoot, err := filepath.Abs(cleanRoot) + if err != nil { + return "", "", fmt.Errorf("resolve job root: %w", err) + } cleanID := strings.TrimSpace(jobID) - jobDir := filepath.Join(cleanRoot, cleanID) - return filepath.Join(jobDir, "state.json"), filepath.Join(jobDir, "events.jsonl") + if !safeJobIDPattern.MatchString(cleanID) { + return "", "", fmt.Errorf("job_id must match %s", safeJobIDPattern.String()) + } + jobDir := filepath.Join(absRoot, cleanID) + relPath, err := filepath.Rel(absRoot, jobDir) + if err != nil { + return "", "", fmt.Errorf("resolve job path: %w", err) + } + if relPath == ".." || strings.HasPrefix(relPath, ".."+string(os.PathSeparator)) { + return "", "", fmt.Errorf("job path escapes root") + } + return filepath.Join(jobDir, "state.json"), filepath.Join(jobDir, "events.jsonl"), nil } func acquireLock(path string, _ time.Time, timeout time.Duration) (func(), error) { start := time.Now().UTC() for { // #nosec G304 -- lock path derived from local root + // lgtm[go/path-injection] lock path is derived from explicit local runtime root/job id inputs. fd, err := os.OpenFile(path, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600) if err == nil { _ = fd.Close() @@ -697,6 +788,7 @@ func acquireLock(path string, _ time.Time, timeout time.Duration) (func(), error return nil, fmt.Errorf("%w: lock timeout", ErrStateContention) } if staleLock(path, now, 30*time.Second) { + // lgtm[go/path-injection] lock path is derived from explicit local runtime root/job id inputs. _ = os.Remove(path) continue } @@ -706,6 +798,7 @@ func acquireLock(path string, _ time.Time, timeout time.Duration) (func(), error func staleLock(path string, now time.Time, staleAfter time.Duration) bool { // #nosec G304 -- lock path derived from local root + // lgtm[go/path-injection] lock path is derived from explicit local runtime root/job id inputs. info, err := os.Stat(path) if err != nil { return false @@ -802,3 +895,51 @@ func EnvironmentFingerprint(override string) string { sum := sha256.Sum256([]byte(strings.Join(parts, "|"))) return "envfp:" + hex.EncodeToString(sum[:]) } + +func ensureSafetyInvariantLedger(state *JobState) { + if state == nil { + return + } + if strings.TrimSpace(state.SafetyInvariantVersion) == "" { + state.SafetyInvariantVersion = "1" + } + if len(state.SafetyInvariants) == 0 { + state.SafetyInvariants = deriveSafetyInvariants(*state) + } + if strings.TrimSpace(state.SafetyInvariantHash) == "" { + state.SafetyInvariantHash = hashSafetyInvariants(state.SafetyInvariants) + } +} + +func deriveSafetyInvariants(state JobState) []string { + values := []string{ + "control_boundary=runtime_go", + "fail_closed=true", + "default_privacy=reference_receipts", + } + if strings.TrimSpace(state.PolicyDigest) != "" { + values = append(values, "policy_digest="+strings.TrimSpace(state.PolicyDigest)) + } + if strings.TrimSpace(state.PolicyRef) != "" { + values = append(values, "policy_ref="+strings.TrimSpace(state.PolicyRef)) + } + if strings.TrimSpace(state.Identity) != "" { + values = append(values, "identity="+strings.TrimSpace(state.Identity)) + } + sort.Strings(values) + return values +} + +func hashSafetyInvariants(values []string) string { + filtered := make([]string, 0, len(values)) + for _, value := range values { + trimmed := strings.TrimSpace(value) + if trimmed == "" { + continue + } + filtered = append(filtered, trimmed) + } + sort.Strings(filtered) + sum := sha256.Sum256([]byte(strings.Join(filtered, "|"))) + return hex.EncodeToString(sum[:]) +} diff --git a/core/jobruntime/runtime_test.go b/core/jobruntime/runtime_test.go index 5abee89..ff4b0f3 100644 --- a/core/jobruntime/runtime_test.go +++ b/core/jobruntime/runtime_test.go @@ -288,6 +288,47 @@ func TestInvalidPauseTransition(t *testing.T) { } } +func TestEmergencyStopPreemptsAndBlocksDispatch(t *testing.T) { + root := filepath.Join(t.TempDir(), "jobs") + jobID := "job-emergency-stop" + if _, err := Submit(root, SubmitOptions{JobID: jobID, PolicyDigest: "policy-a", Identity: "agent.alice"}); err != nil { + t.Fatalf("submit job: %v", err) + } + + stopped, err := EmergencyStop(root, jobID, TransitionOptions{Actor: "alice"}) + if err != nil { + t.Fatalf("emergency stop: %v", err) + } + if stopped.Status != StatusEmergencyStop { + t.Fatalf("expected emergency stopped status, got %#v", stopped) + } + if stopped.StatusReasonCode != "emergency_stop_preempted" { + t.Fatalf("unexpected emergency stop reason code: %#v", stopped) + } + if !IsEmergencyStopped(stopped) { + t.Fatalf("expected emergency stopped helper to return true") + } + + if _, err := RecordBlockedDispatch(root, jobID, DispatchRecordOptions{ + Actor: "mcp-proxy", + DispatchPath: "mcp.proxy", + ReasonCode: "emergency_stop_preempted", + }); err != nil { + t.Fatalf("record blocked dispatch: %v", err) + } + _, events, err := Inspect(root, jobID) + if err != nil { + t.Fatalf("inspect: %v", err) + } + if len(events) < 2 { + t.Fatalf("expected emergency stop + blocked dispatch events, got %d", len(events)) + } + last := events[len(events)-1] + if last.Type != "dispatch_blocked" || last.ReasonCode != "emergency_stop_preempted" { + t.Fatalf("unexpected blocked dispatch event: %#v", last) + } +} + func TestListGetAndInspect(t *testing.T) { root := filepath.Join(t.TempDir(), "jobs") jobID := "job-list-inspect" @@ -554,7 +595,10 @@ func TestAcquireLockTimeoutUsesWallClock(t *testing.T) { } func TestJobPathDecisionAndFingerprintHelpers(t *testing.T) { - statePath, eventsPath := jobPaths("", "job-helper") + statePath, eventsPath, err := jobPaths("", "job-helper") + if err != nil { + t.Fatalf("jobPaths: %v", err) + } if filepath.Base(statePath) != "state.json" || filepath.Base(eventsPath) != "events.jsonl" { t.Fatalf("unexpected helper job paths: state=%s events=%s", statePath, eventsPath) } @@ -611,3 +655,151 @@ func TestJobPathDecisionAndFingerprintHelpers(t *testing.T) { t.Fatalf("expected generated fingerprint prefix, got %s", got) } } + +func TestSafetyInvariantLedgerDefaultsAndResume(t *testing.T) { + root := filepath.Join(t.TempDir(), "jobs") + jobID := "job-invariants" + submitted, err := Submit(root, SubmitOptions{ + JobID: jobID, + PolicyDigest: "policy-digest-a", + PolicyRef: "policy-a.yaml", + Identity: "agent.alice", + }) + if err != nil { + t.Fatalf("submit: %v", err) + } + if submitted.SafetyInvariantVersion == "" || submitted.SafetyInvariantHash == "" || len(submitted.SafetyInvariants) == 0 { + t.Fatalf("expected safety invariant ledger on submit: %#v", submitted) + } + if _, err := Pause(root, jobID, TransitionOptions{}); err != nil { + t.Fatalf("pause: %v", err) + } + resumed, err := Resume(root, jobID, ResumeOptions{ + CurrentEnvironmentFingerprint: submitted.EnvironmentFingerprint, + PolicyDigest: "policy-digest-a", + PolicyRef: "policy-a.yaml", + RequirePolicyEvaluation: true, + Identity: "agent.alice", + RequireIdentityValidation: true, + }) + if err != nil { + t.Fatalf("resume: %v", err) + } + if resumed.SafetyInvariantVersion != submitted.SafetyInvariantVersion || resumed.SafetyInvariantHash != submitted.SafetyInvariantHash { + t.Fatalf("expected invariant ledger to persist across resume: submitted=%#v resumed=%#v", submitted, resumed) + } +} + +func TestRecordBlockedDispatchDefaultsAndValidation(t *testing.T) { + root := filepath.Join(t.TempDir(), "jobs") + jobID := "job-blocked-dispatch-defaults" + if _, err := Submit(root, SubmitOptions{JobID: jobID}); err != nil { + t.Fatalf("submit: %v", err) + } + + if _, err := RecordBlockedDispatch(root, jobID, DispatchRecordOptions{}); !errors.Is(err, ErrInvalidTransition) { + t.Fatalf("expected invalid transition before emergency stop, got %v", err) + } + + if _, err := EmergencyStop(root, jobID, TransitionOptions{Actor: "operator"}); err != nil { + t.Fatalf("emergency stop: %v", err) + } + if _, err := RecordBlockedDispatch(root, jobID, DispatchRecordOptions{}); err != nil { + t.Fatalf("record blocked dispatch with defaults: %v", err) + } + + _, events, err := Inspect(root, jobID) + if err != nil { + t.Fatalf("inspect: %v", err) + } + last := events[len(events)-1] + if last.ReasonCode != "emergency_stop_preempted" { + t.Fatalf("unexpected reason code: %#v", last) + } + if got, _ := last.Payload["dispatch_path"].(string); got != "runtime.dispatch" { + t.Fatalf("expected default dispatch path, got %#v", last.Payload) + } +} + +func TestResumeValidationAndEnvOverridePolicyTransition(t *testing.T) { + root := filepath.Join(t.TempDir(), "jobs") + + if _, err := Submit(root, SubmitOptions{JobID: "job-resume-invalid"}); err != nil { + t.Fatalf("submit job-resume-invalid: %v", err) + } + if _, err := Resume(root, "job-resume-invalid", ResumeOptions{}); !errors.Is(err, ErrInvalidTransition) { + t.Fatalf("expected invalid transition for resume while running, got %v", err) + } + + if _, err := Submit(root, SubmitOptions{ + JobID: "job-bind-identity", + EnvironmentFingerprint: "env:a", + }); err != nil { + t.Fatalf("submit job-bind-identity: %v", err) + } + if _, err := Pause(root, "job-bind-identity", TransitionOptions{}); err != nil { + t.Fatalf("pause job-bind-identity: %v", err) + } + identityBound, err := Resume(root, "job-bind-identity", ResumeOptions{ + CurrentEnvironmentFingerprint: "env:a", + Identity: "agent.bound", + RequireIdentityValidation: true, + }) + if err != nil { + t.Fatalf("resume job-bind-identity: %v", err) + } + if identityBound.Identity != "agent.bound" { + t.Fatalf("expected resume to bind identity, got %#v", identityBound) + } + + if _, err := Submit(root, SubmitOptions{ + JobID: "job-env-override-policy-transition", + EnvironmentFingerprint: "env:a", + PolicyDigest: "policy-a", + PolicyRef: "policy-a.yaml", + }); err != nil { + t.Fatalf("submit job-env-override-policy-transition: %v", err) + } + if _, err := Pause(root, "job-env-override-policy-transition", TransitionOptions{}); err != nil { + t.Fatalf("pause job-env-override-policy-transition: %v", err) + } + overridden, err := Resume(root, "job-env-override-policy-transition", ResumeOptions{ + CurrentEnvironmentFingerprint: "env:b", + AllowEnvironmentMismatch: true, + PolicyDigest: "policy-b", + PolicyRef: "policy-b.yaml", + RequirePolicyEvaluation: true, + }) + if err != nil { + t.Fatalf("resume job-env-override-policy-transition: %v", err) + } + if overridden.StatusReasonCode != "resumed_with_env_override_policy_transition" { + t.Fatalf("unexpected reason code after env override policy transition: %#v", overridden) + } +} + +func TestJobPathAndInvariantHelperValidationBranches(t *testing.T) { + if _, _, err := jobPaths("", "bad/id"); err == nil { + t.Fatalf("expected invalid job_id error for path separator") + } + + if requiresApprovalBeforeResume(nil) { + t.Fatalf("nil state should not require approval") + } + if got := countDecisionCheckpoints(nil); got != 0 { + t.Fatalf("expected zero checkpoints for nil state, got %d", got) + } + + ensureSafetyInvariantLedger(nil) + state := JobState{} + ensureSafetyInvariantLedger(&state) + if state.SafetyInvariantVersion != "1" || state.SafetyInvariantHash == "" || len(state.SafetyInvariants) == 0 { + t.Fatalf("expected default invariant ledger fields to be populated: %#v", state) + } + + withBlanks := hashSafetyInvariants([]string{"keep", "", " "}) + withoutBlanks := hashSafetyInvariants([]string{"keep"}) + if withBlanks != withoutBlanks { + t.Fatalf("expected blank invariants to be ignored, with=%s without=%s", withBlanks, withoutBlanks) + } +} diff --git a/core/mcp/interfaces.go b/core/mcp/interfaces.go index c94dfe0..80cd9d1 100644 --- a/core/mcp/interfaces.go +++ b/core/mcp/interfaces.go @@ -57,6 +57,8 @@ type CallContext struct { Identity string `json:"identity,omitempty"` Workspace string `json:"workspace,omitempty"` RiskClass string `json:"risk_class,omitempty"` + Phase string `json:"phase,omitempty"` + JobID string `json:"job_id,omitempty"` SessionID string `json:"session_id,omitempty"` RequestID string `json:"request_id,omitempty"` RunID string `json:"run_id,omitempty"` diff --git a/core/mcp/proxy.go b/core/mcp/proxy.go index 54ba2d2..bd2102a 100644 --- a/core/mcp/proxy.go +++ b/core/mcp/proxy.go @@ -213,6 +213,8 @@ func ToIntentRequestWithOptions(call ToolCall, opts IntentOptions) (schemagate.I Identity: identity, Workspace: workspace, RiskClass: riskClass, + Phase: strings.TrimSpace(call.Context.Phase), + JobID: strings.TrimSpace(call.Context.JobID), SessionID: strings.TrimSpace(call.Context.SessionID), RequestID: strings.TrimSpace(call.Context.RequestID), AuthContext: authContext, diff --git a/core/mcp/proxy_test.go b/core/mcp/proxy_test.go index a4a5bd0..ccaecc6 100644 --- a/core/mcp/proxy_test.go +++ b/core/mcp/proxy_test.go @@ -289,6 +289,9 @@ func TestToIntentRequestWithStrictContext(t *testing.T) { if intent.Context.SessionID != "sess-1" { t.Fatalf("expected session id in converted intent context") } + if intent.Context.Phase != "" || intent.Context.JobID != "" { + t.Fatalf("expected unset phase/job_id when not provided: %#v", intent.Context) + } if intent.Delegation == nil || intent.Delegation.RequesterIdentity != "agent.specialist" { t.Fatalf("expected delegation passthrough in converted intent: %#v", intent.Delegation) } @@ -300,6 +303,30 @@ func TestToIntentRequestWithStrictContext(t *testing.T) { } } +func TestToIntentRequestCarriesPhaseAndJobID(t *testing.T) { + intent, err := ToIntentRequest(ToolCall{ + Name: "tool.delete", + Args: map[string]any{"path": "/tmp/out.txt"}, + Targets: []Target{ + {Kind: "path", Value: "/tmp/out.txt", Operation: "delete"}, + }, + Context: CallContext{ + Identity: "alice", + Workspace: "/repo/gait", + RiskClass: "high", + Phase: "plan", + JobID: "job-123", + SessionID: "sess-1", + }, + }) + if err != nil { + t.Fatalf("to intent request: %v", err) + } + if intent.Context.Phase != "plan" || intent.Context.JobID != "job-123" { + t.Fatalf("expected phase/job_id passthrough, got %#v", intent.Context) + } +} + func TestToIntentRequestWrapper(t *testing.T) { intent, err := ToIntentRequest(ToolCall{ Name: "tool.read", diff --git a/core/pack/pack.go b/core/pack/pack.go index daf5369..742695c 100644 --- a/core/pack/pack.go +++ b/core/pack/pack.go @@ -224,6 +224,8 @@ func BuildJobPack(options BuildJobOptions) (BuildResult, error) { StopReason: state.StopReason, StatusReasonCode: state.StatusReasonCode, EnvironmentFingerprint: state.EnvironmentFingerprint, + SafetyInvariantVersion: strings.TrimSpace(state.SafetyInvariantVersion), + SafetyInvariantHash: strings.TrimSpace(state.SafetyInvariantHash), CheckpointCount: len(state.Checkpoints), ApprovalCount: len(state.Approvals), } @@ -1234,6 +1236,9 @@ func validateJobPayload(payload schemapack.JobPayload) error { if strings.TrimSpace(payload.EnvironmentFingerprint) == "" { return fmt.Errorf("job payload environment_fingerprint is required") } + if strings.TrimSpace(payload.SafetyInvariantVersion) != "" && !isSHA256Hex(strings.TrimSpace(payload.SafetyInvariantHash)) { + return fmt.Errorf("job payload safety_invariant_hash must be sha256 hex when safety_invariant_version is set") + } if payload.CheckpointCount < 0 || payload.ApprovalCount < 0 { return fmt.Errorf("job payload counts must be >= 0") } @@ -1256,6 +1261,9 @@ func validateJobState(state jobruntime.JobState) error { if strings.TrimSpace(state.EnvironmentFingerprint) == "" { return fmt.Errorf("job_state environment_fingerprint is required") } + if strings.TrimSpace(state.SafetyInvariantVersion) != "" && !isSHA256Hex(strings.TrimSpace(state.SafetyInvariantHash)) { + return fmt.Errorf("job_state safety_invariant_hash must be sha256 hex when safety_invariant_version is set") + } return nil } @@ -1266,7 +1274,8 @@ func validJobStatus(status string) bool { jobruntime.StatusDecisionNeeded, jobruntime.StatusBlocked, jobruntime.StatusCompleted, - jobruntime.StatusCancelled: + jobruntime.StatusCancelled, + jobruntime.StatusEmergencyStop: return true default: return false diff --git a/core/runpack/session.go b/core/runpack/session.go index 755b8dd..324283b 100644 --- a/core/runpack/session.go +++ b/core/runpack/session.go @@ -59,6 +59,8 @@ type SessionAppendOptions struct { Verdict string ReasonCodes []string Violations []string + SafetyInvariantVersion string + SafetyInvariantHash string } type SessionStatus struct { @@ -327,6 +329,8 @@ func AppendSessionEvent(path string, opts SessionAppendOptions) (schemarunpack.S Verdict: strings.TrimSpace(opts.Verdict), ReasonCodes: uniqueSortedStrings(opts.ReasonCodes), Violations: uniqueSortedStrings(opts.Violations), + SafetyInvariantVersion: strings.TrimSpace(opts.SafetyInvariantVersion), + SafetyInvariantHash: strings.ToLower(strings.TrimSpace(opts.SafetyInvariantHash)), } record := sessionJournalRecord{ RecordType: "event", @@ -509,6 +513,15 @@ func EmitSessionCheckpoint(journalPath string, outRunpackPath string, opts Sessi return writeErr } checkpointDigest := computeCheckpointDigest(recordRes.Manifest.ManifestDigest, prevCheckpointDigest, nextCheckpointIdx, sequenceStart, sequenceEnd) + safetyInvariantVersion := "" + safetyInvariantHash := "" + for index := len(newEvents) - 1; index >= 0; index-- { + if strings.TrimSpace(newEvents[index].SafetyInvariantVersion) != "" && strings.TrimSpace(newEvents[index].SafetyInvariantHash) != "" { + safetyInvariantVersion = strings.TrimSpace(newEvents[index].SafetyInvariantVersion) + safetyInvariantHash = strings.ToLower(strings.TrimSpace(newEvents[index].SafetyInvariantHash)) + break + } + } checkpoint := schemarunpack.SessionCheckpoint{ SchemaID: sessionCheckpointSchemaID, SchemaVersion: sessionCheckpointSchemaV1, @@ -523,6 +536,8 @@ func EmitSessionCheckpoint(journalPath string, outRunpackPath string, opts Sessi ManifestDigest: recordRes.Manifest.ManifestDigest, PrevCheckpointDigest: prevCheckpointDigest, CheckpointDigest: checkpointDigest, + SafetyInvariantVersion: safetyInvariantVersion, + SafetyInvariantHash: safetyInvariantHash, } appendErr := appendJournalRecord(normalizedPath, sessionJournalRecord{ RecordType: "checkpoint", diff --git a/core/schema/v1/gate/types.go b/core/schema/v1/gate/types.go index de22f4e..f830f9d 100644 --- a/core/schema/v1/gate/types.go +++ b/core/schema/v1/gate/types.go @@ -107,6 +107,8 @@ type IntentContext struct { Identity string `json:"identity"` Workspace string `json:"workspace"` RiskClass string `json:"risk_class"` + Phase string `json:"phase,omitempty"` + JobID string `json:"job_id,omitempty"` SessionID string `json:"session_id,omitempty"` RequestID string `json:"request_id,omitempty"` AuthContext map[string]any `json:"auth_context,omitempty"` @@ -175,6 +177,8 @@ type ApprovalToken struct { PolicyDigest string `json:"policy_digest"` DelegationBindingDigest string `json:"delegation_binding_digest,omitempty"` Scope []string `json:"scope"` + MaxTargets int `json:"max_targets,omitempty"` + MaxOps int `json:"max_ops,omitempty"` ExpiresAt time.Time `json:"expires_at"` Signature *Signature `json:"signature,omitempty"` } diff --git a/core/schema/v1/pack/types.go b/core/schema/v1/pack/types.go index ade9860..ba92dfc 100644 --- a/core/schema/v1/pack/types.go +++ b/core/schema/v1/pack/types.go @@ -52,6 +52,8 @@ type JobPayload struct { StopReason string `json:"stop_reason"` StatusReasonCode string `json:"status_reason_code"` EnvironmentFingerprint string `json:"environment_fingerprint"` + SafetyInvariantVersion string `json:"safety_invariant_version,omitempty"` + SafetyInvariantHash string `json:"safety_invariant_hash,omitempty"` CheckpointCount int `json:"checkpoint_count"` ApprovalCount int `json:"approval_count"` } diff --git a/core/schema/v1/runpack/types.go b/core/schema/v1/runpack/types.go index 6f3c85d..aa3840f 100644 --- a/core/schema/v1/runpack/types.go +++ b/core/schema/v1/runpack/types.go @@ -127,6 +127,8 @@ type SessionEvent struct { Verdict string `json:"verdict,omitempty"` ReasonCodes []string `json:"reason_codes,omitempty"` Violations []string `json:"violations,omitempty"` + SafetyInvariantVersion string `json:"safety_invariant_version,omitempty"` + SafetyInvariantHash string `json:"safety_invariant_hash,omitempty"` } type SessionCheckpoint struct { @@ -143,6 +145,8 @@ type SessionCheckpoint struct { ManifestDigest string `json:"manifest_digest"` PrevCheckpointDigest string `json:"prev_checkpoint_digest,omitempty"` CheckpointDigest string `json:"checkpoint_digest"` + SafetyInvariantVersion string `json:"safety_invariant_version,omitempty"` + SafetyInvariantHash string `json:"safety_invariant_hash,omitempty"` } type SessionChain struct { diff --git a/docs-site/public/llm/faq.md b/docs-site/public/llm/faq.md index 9b4d94d..b99e97a 100644 --- a/docs-site/public/llm/faq.md +++ b/docs-site/public/llm/faq.md @@ -6,7 +6,7 @@ Gait dispatches durable agent jobs, captures signed evidence at the tool boundar ## What problem does Gait solve for long-running agent work? -Multi-step and multi-hour agent jobs fail mid-flight, losing state and provenance. Gait dispatches durable jobs with checkpointed state, pause/resume/cancel, and deterministic stop reasons so work survives failures and stays auditable. +Multi-step and multi-hour agent jobs fail mid-flight, losing state and provenance. Gait dispatches durable jobs with checkpointed state, pause/resume/stop/cancel, and deterministic stop reasons so work survives failures and stays auditable. ## Is Gait a hosted SaaS dashboard? diff --git a/docs-site/public/llm/product.md b/docs-site/public/llm/product.md index e4c31e0..ef51fc7 100644 --- a/docs-site/public/llm/product.md +++ b/docs-site/public/llm/product.md @@ -4,9 +4,9 @@ Gait is an offline-first runtime for production AI agents that dispatches durabl It provides seven OSS primitives: -1. **Jobs**: Dispatch multi-step, multi-hour agent work with checkpoints, pause/resume/cancel, approval gates, and deterministic stop reasons. +1. **Jobs**: Dispatch multi-step, multi-hour agent work with checkpoints, pause/resume/stop/cancel, approval gates, deterministic stop reasons, and emergency-stop preemption evidence. 2. **Packs**: Unified portable artifact envelope (PackSpec v1) for run, job, and call evidence with Ed25519 signatures and SHA-256 manifest. -3. **Gate**: Evaluate structured tool-call intent against YAML policy with fail-closed enforcement. Supports multi-step script rollups, Wrkr context enrichment, and signed approved-script fast-path allow. +3. **Gate**: Evaluate structured tool-call intent against YAML policy with fail-closed enforcement. Supports phase-aware destructive plan/apply boundaries, destructive budgets, multi-step script rollups, Wrkr context enrichment, and signed approved-script fast-path allow. 4. **Regress**: Convert any incident or failed run into a deterministic CI regression fixture with JUnit output and stable exit codes. 5. **Voice**: Gate high-stakes spoken commitments (refunds, quotes, eligibility) before they are uttered. Signed SayToken capability tokens and callpack artifacts for voice boundaries. 6. **Context Evidence**: Deterministic proof of what context the model was working from at decision time. Privacy-aware envelopes with fail-closed enforcement when evidence is missing. diff --git a/docs-site/public/llm/quickstart.md b/docs-site/public/llm/quickstart.md index 759a4a3..7c0c0b6 100644 --- a/docs-site/public/llm/quickstart.md +++ b/docs-site/public/llm/quickstart.md @@ -25,6 +25,7 @@ gait regress bootstrap --from run_demo --junit ./gait-out/junit.xml # Try durable jobs and policy demos gait demo --durable gait demo --policy +gait policy init baseline-highrisk --out ./gait.policy.yaml --json ``` Then continue with: @@ -35,6 +36,13 @@ Then continue with: Use `gait policy test` and `gait gate eval --simulate` before enforce rollout on high-risk tool-call boundaries. +For emergency preemption drills: + +```bash +gait job submit --id job_safe --json +gait job stop --id job_safe --actor secops --json +``` + For script automation boundaries, add: ```bash diff --git a/docs-site/public/llm/security.md b/docs-site/public/llm/security.md index 1ac467d..f3367d7 100644 --- a/docs-site/public/llm/security.md +++ b/docs-site/public/llm/security.md @@ -1,10 +1,13 @@ # Gait Security and Safety - Fail-closed by default for ambiguous high-risk policy outcomes. +- Out-of-band emergency stop preemption blocks post-stop dispatches and records signed proof events. - Structured intent model for policy decisions (not free-form prompt filtering). +- Destructive paths support phase-aware plan/apply boundaries plus fail-closed destructive budgets. - Deterministic and offline verification for all artifact types (runpacks, jobpacks, callpacks). - Ed25519 signatures and SHA-256 manifest integrity in PackSpec v1. - Signed traces and explicit reason codes for blocked actions. +- Approval tokens can carry bounded destructive scope (`max_targets`, `max_ops`); overruns fail closed. - Approved-script registry entries are signature-verified and policy-digest bound; tampered or missing state fails closed in high-risk enforcement. - SayToken capability tokens for voice agent commitment gating — gated speech cannot execute without a valid token. - Context evidence envelopes with fail-closed enforcement when evidence is missing for high-risk actions. diff --git a/docs-site/public/llms.txt b/docs-site/public/llms.txt index 0351c20..c4a7271 100644 --- a/docs-site/public/llms.txt +++ b/docs-site/public/llms.txt @@ -28,6 +28,7 @@ - gait job checkpoint add --id --type --summary - gait job checkpoint list --id - gait job pause --id +- gait job stop --id - gait job resume --id --policy [--identity-revocations ] - gait job cancel --id - gait gateway ingest --source --log-path @@ -56,7 +57,9 @@ ## Safety Model - Fail-closed by default for ambiguous high-risk policy outcomes. +- Out-of-band emergency stop preemption blocks post-stop dispatches (`emergency_stop_preempted`). - Offline verification for packs, runpacks, and traces. +- Destructive operations support phase-aware plan/apply boundaries and fail-closed destructive budgets. - Structured intent model for policy decisions (not free-form prompt filtering). - SayToken capability tokens for voice agent commitment gating. - Context evidence envelopes with fail-closed enforcement when evidence is missing. diff --git a/docs/approval_runbook.md b/docs/approval_runbook.md index f61466c..20ed4e7 100644 --- a/docs/approval_runbook.md +++ b/docs/approval_runbook.md @@ -86,6 +86,8 @@ gait approve \ --policy-digest \ --ttl 1h \ --scope tool.write \ + --max-targets 25 \ + --max-ops 25 \ --approver approver@company \ --reason-code change_ticket_123 \ --json > token_a.json @@ -123,6 +125,7 @@ Expected: - Default TTL: `1h` - High-risk operations: `15m` to `30m` - Scope must be minimal and tool-specific (for example `tool.write`, not wildcard scope). +- For bulk/destructive operations, set `--max-targets` and `--max-ops` to bound blast radius. - Tokens are single-intent by digest; do not reuse across different intents. - Do not store tokens in source control or long-lived shared volumes. diff --git a/docs/durable_jobs.md b/docs/durable_jobs.md index 34a5050..af7bc6b 100644 --- a/docs/durable_jobs.md +++ b/docs/durable_jobs.md @@ -15,6 +15,7 @@ A durable job is a checkpointed execution record managed locally by Gait with ex - `status` - `checkpoint add|list|show` - `pause` +- `stop` - `approve` - `resume` - `cancel` @@ -41,6 +42,7 @@ The job surface is for runtime control and evidence, not prompt orchestration. gait job submit --id job_1 --identity worker_1 --policy ./policy.yaml --json gait job checkpoint add --id job_1 --type progress --summary "step 1 complete" --json gait job pause --id job_1 --json +gait job stop --id job_1 --actor secops --json gait job approve --id job_1 --actor reviewer_1 --reason "validated input" --json gait job resume --id job_1 --actor worker_1 --reason "continue after approval" --policy ./policy.yaml --identity-revocations ./revoked_identities.txt --identity-validation-source revocation_list --json gait job inspect --id job_1 --json @@ -65,6 +67,15 @@ Portable evidence outputs: - `pack_.zip` (PackSpec v1 envelope) - deterministic verify/inspect JSON for CI, incident handoff, and audits +## Emergency Stop Contract + +`gait job stop` is an out-of-band emergency control. Once acknowledged: + +- job status becomes `emergency_stopped` +- stop reason becomes `emergency_stopped` +- MCP proxy/serve paths block calls for that `job_id` with reason code `emergency_stop_preempted` +- blocked post-stop dispatches are journaled as `dispatch_blocked` events for offline proof + ## How This Differs From Checkpoint/Observability Tools | Dimension | Gait durable jobs | LangChain/LangFuse-style checkpoint and observability stacks | diff --git a/docs/launch/README.md b/docs/launch/README.md index 65ed568..6c02dab 100644 --- a/docs/launch/README.md +++ b/docs/launch/README.md @@ -4,6 +4,12 @@ This folder is the repeatable distribution package for OSS launch cycles. Use it when shipping a major release, announcing a wedge milestone, or running a re-launch. +Current safe-default rollout note: + +- ship `examples/policy/knowledge_worker_safe.yaml` as reversible-first profile +- ship `baseline-highrisk` destructive budget defaults for fail-closed bursts +- stage rollout: monitor -> approval -> enforce + ## Contents - `narrative_one_liner.md`: positioning statements by audience diff --git a/docs/mcp_capability_matrix.md b/docs/mcp_capability_matrix.md index 07a6152..9b0ba18 100644 --- a/docs/mcp_capability_matrix.md +++ b/docs/mcp_capability_matrix.md @@ -15,9 +15,9 @@ In this context, an adapter is the payload translation layer from a framework sc | Mode | Primary Use | Input | Output | Persistence | Notable Non-Goals | | --- | --- | --- | --- | --- | --- | -| `gait mcp proxy` | One-shot local evaluation | Tool-call payload file/stdin + policy | JSON decision + optional trace/runpack/pack exports | Optional trace/runpack/pack/log/otel outputs | Not a long-running service | +| `gait mcp proxy` | One-shot local evaluation | Tool-call payload file/stdin + policy | JSON decision + optional trace/runpack/pack exports | Optional trace/runpack/pack/log/otel outputs + emergency stop preemption when `context.job_id` is present (`--job-root`) | Not a long-running service | | `gait mcp bridge` | Alias of proxy for bridge wording/UX | Same as proxy | Same as proxy | Same as proxy | Not a distinct evaluator | -| `gait mcp serve` | Long-running local HTTP decision service | `POST /v1/evaluate*` JSON request | JSON/SSE/NDJSON decision payload with `exit_code` + verdict | Trace/runpack/pack/session retention controls + auto pack emission for state-changing calls (`emit_pack` + `--pack-dir`) | Does not execute tools for caller | +| `gait mcp serve` | Long-running local HTTP decision service | `POST /v1/evaluate*` JSON request | JSON/SSE/NDJSON decision payload with `exit_code` + verdict | Trace/runpack/pack/session retention controls + auto pack emission for state-changing calls (`emit_pack` + `--pack-dir`) + emergency stop preemption via job runtime state (`--job-root`) | Does not execute tools for caller | ## Runtime Enforcement Responsibility diff --git a/docs/policy_rollout.md b/docs/policy_rollout.md index f0ea369..477ea9f 100644 --- a/docs/policy_rollout.md +++ b/docs/policy_rollout.md @@ -71,6 +71,21 @@ Use `dry_run` policy effects for selected high-risk tools and route calls throug Rollout gate: - Move forward only after dry-run telemetry shows expected decisions and zero unsafe bypasses. +- For destructive classes, set `context.phase=plan` during this stage so intents stay non-destructive while policy coverage is tuned. + +## Stage 2A: Plan/Apply Destructive Boundary + +For destructive paths, enforce explicit phases: + +- `plan`: non-destructive decisioning path (`dry_run` semantics for destructive targets) +- `apply`: destructive execution boundary, requires explicit approval flow + +Use bounded approval tokens for bulk operations: + +- `gait approve --max-targets --max-ops ...` +- fail-closed reason codes include token-scope mismatches (`approval_token_max_targets_exceeded`, `approval_token_max_ops_exceeded`) + +If policy includes `destructive_budget`, exceedance blocks with stable reason code `destructive_budget_exceeded`. ## Stage 3: Enforce Approval For High-Risk Tools diff --git a/docs/siem_ingestion_recipes.md b/docs/siem_ingestion_recipes.md index 054b75b..c42db7f 100644 --- a/docs/siem_ingestion_recipes.md +++ b/docs/siem_ingestion_recipes.md @@ -67,6 +67,8 @@ Recommended indexed keys: - `trace_id` - `run_id` - `session_id` +- `job_id` +- `phase` - `tool_name` - `verdict` - `reason_codes` @@ -76,4 +78,9 @@ Recommended indexed keys: - `delegation_ref` - `delegation_depth` +Operational alerts worth pinning: + +- emergency stop preemption (`reason_codes` includes `emergency_stop_preempted`) +- destructive budget breach (`reason_codes` includes `destructive_budget_exceeded`) + This keeps SIEM queries aligned with Gait artifacts and proofs. diff --git a/examples/policy/README.md b/examples/policy/README.md index 6543a17..8573b7c 100644 --- a/examples/policy/README.md +++ b/examples/policy/README.md @@ -5,6 +5,8 @@ These templates provide baseline policy packs by risk tier: - `base_low_risk.yaml` - `base_medium_risk.yaml` - `base_high_risk.yaml` +- `baseline-highrisk.yaml` (hyphenated alias) +- `knowledge_worker_safe.yaml` (reversible-first profile) Scaffold a baseline file directly from the CLI: @@ -52,6 +54,8 @@ High-risk note: - `base_high_risk.yaml` marks write actions with `require_broker_credential: true` for least-privilege brokering. - `base_high_risk.yaml` requires explicit delegation metadata for high-risk egress writes and blocks tainted external payload flow to network destinations. +- `base_high_risk.yaml` and `baseline-highrisk.yaml` include `destructive_budget` defaults to fail-closed once destructive threshold windows are exceeded. +- `knowledge_worker_safe.yaml` defaults unknown tools to block, prefers archive/trash actions, and requires explicit break-glass approval for permanent delete paths. - For runtime checks in hardened mode, evaluate with `--profile oss-prod` and an explicit broker, for example: ```bash diff --git a/examples/policy/base_high_risk.yaml b/examples/policy/base_high_risk.yaml index 3a609c0..e9a31b3 100644 --- a/examples/policy/base_high_risk.yaml +++ b/examples/policy/base_high_risk.yaml @@ -55,6 +55,22 @@ rules: tool_names: [tool.write] reason_codes: [approval_required_for_write] violations: [approval_required] + - name: destructive-budget-guard + priority: 18 + effect: require_approval + match: + risk_classes: [high, critical] + endpoint: + enabled: true + destructive_action: require_approval + reason_code: destructive_operation_requires_approval + violation: destructive_operation + destructive_budget: + requests: 3 + window: hour + scope: identity + reason_codes: [destructive_budget_guard] + violations: [destructive_operation] - name: allow-safe-tool-read priority: 30 effect: allow diff --git a/examples/policy/baseline-highrisk.yaml b/examples/policy/baseline-highrisk.yaml new file mode 100644 index 0000000..e9a31b3 --- /dev/null +++ b/examples/policy/baseline-highrisk.yaml @@ -0,0 +1,80 @@ +schema_id: gait.gate.policy +schema_version: 1.0.0 +default_verdict: block +fail_closed: + enabled: true + risk_classes: [critical] + required_fields: [targets, arg_provenance] +rules: + - name: block-tainted-egress-write + priority: 12 + effect: block + match: + tool_names: [tool.write] + target_kinds: [host, url, bucket] + provenance_sources: [external, tool_output] + reason_codes: [blocked_tainted_egress] + violations: [tainted_data_exfiltration] + - name: allow-delegated-egress-write + priority: 15 + effect: allow + match: + tool_names: [tool.write] + risk_classes: [high, critical] + target_kinds: [host, url, bucket] + require_delegation: true + allowed_delegator_identities: [agent.lead] + allowed_delegate_identities: [agent.specialist] + delegation_scopes: [write] + max_delegation_depth: 2 + reason_codes: [delegated_egress_write_allowed] + - name: block-undelegated-egress-write + priority: 16 + effect: block + match: + tool_names: [tool.write] + target_kinds: [host, url, bucket] + reason_codes: [delegation_required_for_egress_write] + violations: [missing_or_invalid_delegation] + - name: block-destructive-tool-delete + priority: 10 + effect: block + match: + tool_names: [tool.delete] + reason_codes: [destructive_tool_blocked] + violations: [destructive_operation] + - name: require-approval-tool-write + priority: 20 + effect: require_approval + min_approvals: 2 + require_distinct_approvers: true + require_broker_credential: true + broker_reference: highrisk-egress + broker_scopes: [write] + match: + tool_names: [tool.write] + reason_codes: [approval_required_for_write] + violations: [approval_required] + - name: destructive-budget-guard + priority: 18 + effect: require_approval + match: + risk_classes: [high, critical] + endpoint: + enabled: true + destructive_action: require_approval + reason_code: destructive_operation_requires_approval + violation: destructive_operation + destructive_budget: + requests: 3 + window: hour + scope: identity + reason_codes: [destructive_budget_guard] + violations: [destructive_operation] + - name: allow-safe-tool-read + priority: 30 + effect: allow + match: + tool_names: [tool.read] + risk_classes: [low] + reason_codes: [safe_read_allowed] diff --git a/examples/policy/knowledge_worker_safe.yaml b/examples/policy/knowledge_worker_safe.yaml new file mode 100644 index 0000000..5c534f8 --- /dev/null +++ b/examples/policy/knowledge_worker_safe.yaml @@ -0,0 +1,39 @@ +schema_id: gait.gate.policy +schema_version: 1.0.0 +default_verdict: block +fail_closed: + enabled: true + risk_classes: [high, critical] + required_fields: [targets, arg_provenance] +rules: + - name: allow-read-only + priority: 10 + effect: allow + match: + tool_names: [tool.read, email.read, file.read, calendar.read] + reason_codes: [safe_read_allowed] + + - name: allow-reversible-archive-trash + priority: 20 + effect: allow + match: + tool_names: [email.archive, file.trash, calendar.cancel] + reason_codes: [reversible_action_allowed] + + - name: require-approval-write + priority: 30 + effect: require_approval + match: + tool_names: [tool.write, email.write, file.write, calendar.write] + reason_codes: [approval_required_for_write] + violations: [approval_required] + + - name: break-glass-permanent-delete + priority: 40 + effect: require_approval + min_approvals: 2 + require_distinct_approvers: true + match: + tool_names: [tool.delete, email.delete_permanent, file.delete_permanent] + reason_codes: [break_glass_required_for_permanent_delete] + violations: [destructive_operation] diff --git a/internal/e2e/v26_emergency_stop_cli_test.go b/internal/e2e/v26_emergency_stop_cli_test.go new file mode 100644 index 0000000..ab92f99 --- /dev/null +++ b/internal/e2e/v26_emergency_stop_cli_test.go @@ -0,0 +1,130 @@ +package e2e + +import ( + "encoding/json" + "path/filepath" + "strings" + "testing" + "time" +) + +func TestCLIStopLatencyAndEmergencyStopPreemption(t *testing.T) { + root := repoRoot(t) + binPath := buildGaitBinary(t, root) + workDir := t.TempDir() + + jobsRoot := filepath.Join(workDir, "jobs") + jobID := "job_stop_e2e" + runJSONCommand(t, workDir, binPath, "job", "submit", "--id", jobID, "--root", jobsRoot, "--json") + + stopStartedAt := time.Now().UTC() + stopOut := runJSONCommand(t, workDir, binPath, "job", "stop", "--id", jobID, "--root", jobsRoot, "--actor", "secops", "--json") + var stopResult struct { + OK bool `json:"ok"` + Job struct { + Status string `json:"status"` + StopReason string `json:"stop_reason"` + UpdatedAt time.Time `json:"updated_at"` + } `json:"job"` + } + if err := json.Unmarshal(stopOut, &stopResult); err != nil { + t.Fatalf("parse job stop output: %v\n%s", err, string(stopOut)) + } + if !stopResult.OK || stopResult.Job.Status != "emergency_stopped" || stopResult.Job.StopReason != "emergency_stopped" { + t.Fatalf("unexpected stop output: %s", string(stopOut)) + } + + stopAckMS := stopResult.Job.UpdatedAt.Sub(stopStartedAt).Milliseconds() + if stopAckMS < 0 { + stopAckMS = 0 + } + if stopAckMS > 15000 { + t.Fatalf("stop_ack_ms exceeded e2e threshold: got=%d want<=15000", stopAckMS) + } + + policyPath := filepath.Join(workDir, "policy.yaml") + mustWriteE2EFile(t, policyPath, "default_verdict: allow") + callPath := filepath.Join(workDir, "call.json") + mustWriteE2EFile(t, callPath, `{ + "name":"tool.delete", + "args":{"path":"/tmp/out.txt"}, + "targets":[{"kind":"path","value":"/tmp/out.txt","operation":"delete","destructive":true}], + "arg_provenance":[{"arg_path":"$.path","source":"user"}], + "context":{"identity":"alice","workspace":"/repo/gait","risk_class":"high","session_id":"sess-stop","job_id":"job_stop_e2e","phase":"apply"} +}`) + + mcpOut := runJSONCommandExpectCode(t, workDir, binPath, 3, + "mcp", "proxy", + "--policy", policyPath, + "--call", callPath, + "--job-root", jobsRoot, + "--json", + ) + var mcpResult struct { + OK bool `json:"ok"` + Executed bool `json:"executed"` + Verdict string `json:"verdict"` + ReasonCodes []string `json:"reason_codes"` + } + if err := json.Unmarshal(mcpOut, &mcpResult); err != nil { + t.Fatalf("parse mcp proxy output: %v\n%s", err, string(mcpOut)) + } + if !mcpResult.OK || mcpResult.Verdict != "block" || mcpResult.Executed { + t.Fatalf("unexpected mcp stop-preemption output: %s", string(mcpOut)) + } + if !containsReasonCode(mcpResult.ReasonCodes, "emergency_stop_preempted") { + t.Fatalf("expected emergency_stop_preempted reason code, got %#v", mcpResult.ReasonCodes) + } + + inspectOut := runJSONCommand(t, workDir, binPath, "job", "inspect", "--id", jobID, "--root", jobsRoot, "--json") + var inspectResult struct { + OK bool `json:"ok"` + Events []struct { + Type string `json:"type"` + CreatedAt time.Time `json:"created_at"` + } `json:"events"` + } + if err := json.Unmarshal(inspectOut, &inspectResult); err != nil { + t.Fatalf("parse job inspect output: %v\n%s", err, string(inspectOut)) + } + if !inspectResult.OK { + t.Fatalf("job inspect did not return ok=true: %s", string(inspectOut)) + } + + ackIndex := -1 + for index, event := range inspectResult.Events { + if event.Type == "emergency_stop_acknowledged" { + ackIndex = index + break + } + } + if ackIndex < 0 { + t.Fatalf("expected emergency_stop_acknowledged event in inspect output: %s", string(inspectOut)) + } + + postStopSideEffects := 0 + blockedDispatches := 0 + for _, event := range inspectResult.Events[ackIndex+1:] { + switch event.Type { + case "dispatch_blocked": + blockedDispatches++ + default: + postStopSideEffects++ + } + } + if blockedDispatches == 0 { + t.Fatalf("expected at least one dispatch_blocked event after stop") + } + if postStopSideEffects != 0 { + t.Fatalf("expected post_stop_side_effects=0, got=%d", postStopSideEffects) + } +} + +func containsReasonCode(reasonCodes []string, expected string) bool { + for _, reasonCode := range reasonCodes { + if strings.EqualFold(strings.TrimSpace(reasonCode), expected) { + return true + } + } + return false +} diff --git a/internal/integration/emergency_stop_slo_test.go b/internal/integration/emergency_stop_slo_test.go new file mode 100644 index 0000000..fa4fdbb --- /dev/null +++ b/internal/integration/emergency_stop_slo_test.go @@ -0,0 +1,136 @@ +package integration + +import ( + "fmt" + "path/filepath" + "testing" + "time" + + "github.com/Clyra-AI/gait/core/jobruntime" +) + +const ( + stopAckSLOMS = int64(50) + stopBackpressureDispatches = 48 +) + +func TestStopLatencySLOForEmergencyStopAcknowledgment(t *testing.T) { + workDir := t.TempDir() + jobsRoot := filepath.Join(workDir, "jobs") + baseTime := time.Date(2026, time.February, 24, 0, 0, 0, 0, time.UTC) + jobID := "job_stop_latency" + + if _, err := jobruntime.Submit(jobsRoot, jobruntime.SubmitOptions{ + JobID: jobID, + ProducerVersion: "0.0.0-test", + EnvironmentFingerprint: "envfp:stop-latency", + Now: baseTime, + }); err != nil { + t.Fatalf("submit job: %v", err) + } + + stopRequestedAt := baseTime.Add(125 * time.Millisecond) + if _, err := jobruntime.EmergencyStop(jobsRoot, jobID, jobruntime.TransitionOptions{ + Actor: "ops.stop", + Now: stopRequestedAt, + }); err != nil { + t.Fatalf("emergency stop: %v", err) + } + + _, events, err := jobruntime.Inspect(jobsRoot, jobID) + if err != nil { + t.Fatalf("inspect stopped job: %v", err) + } + + ackEvent, ok := findJobEvent(events, "emergency_stop_acknowledged") + if !ok { + t.Fatalf("expected emergency_stop_acknowledged event, got %d events", len(events)) + } + + stopAckMS := ackEvent.CreatedAt.Sub(stopRequestedAt).Milliseconds() + if stopAckMS < 0 { + t.Fatalf("expected non-negative stop_ack_ms, got=%d", stopAckMS) + } + if stopAckMS > stopAckSLOMS { + t.Fatalf("stop_ack_ms exceeded slo: got=%d want<=%d", stopAckMS, stopAckSLOMS) + } +} + +func TestEmergencyStopBackpressureHasZeroPostStopSideEffects(t *testing.T) { + workDir := t.TempDir() + jobsRoot := filepath.Join(workDir, "jobs") + baseTime := time.Date(2026, time.February, 24, 1, 0, 0, 0, time.UTC) + jobID := "job_stop_backpressure" + + if _, err := jobruntime.Submit(jobsRoot, jobruntime.SubmitOptions{ + JobID: jobID, + ProducerVersion: "0.0.0-test", + EnvironmentFingerprint: "envfp:stop-backpressure", + Now: baseTime, + }); err != nil { + t.Fatalf("submit job: %v", err) + } + + stopRequestedAt := baseTime.Add(200 * time.Millisecond) + stopped, err := jobruntime.EmergencyStop(jobsRoot, jobID, jobruntime.TransitionOptions{ + Actor: "ops.stop", + Now: stopRequestedAt, + }) + if err != nil { + t.Fatalf("emergency stop: %v", err) + } + if !jobruntime.IsEmergencyStopped(stopped) { + t.Fatalf("expected emergency stopped state, got %#v", stopped) + } + + for index := 0; index < stopBackpressureDispatches; index++ { + if _, err := jobruntime.RecordBlockedDispatch(jobsRoot, jobID, jobruntime.DispatchRecordOptions{ + Actor: "dispatch.queue", + DispatchPath: fmt.Sprintf("queue/%03d", index), + ReasonCode: "emergency_stop_preempted", + Now: stopRequestedAt.Add(time.Duration(index+1) * time.Millisecond), + }); err != nil { + t.Fatalf("record blocked dispatch %d: %v", index, err) + } + } + + _, events, err := jobruntime.Inspect(jobsRoot, jobID) + if err != nil { + t.Fatalf("inspect stopped job events: %v", err) + } + ackEvent, ok := findJobEvent(events, "emergency_stop_acknowledged") + if !ok { + t.Fatalf("expected emergency_stop_acknowledged event") + } + + postStopSideEffects := 0 + dispatchBlockedEvents := 0 + for _, event := range events { + if event.CreatedAt.Before(ackEvent.CreatedAt) { + continue + } + switch event.Type { + case "emergency_stop_acknowledged": + case "dispatch_blocked": + dispatchBlockedEvents++ + default: + postStopSideEffects++ + } + } + + if dispatchBlockedEvents != stopBackpressureDispatches { + t.Fatalf("unexpected post-stop blocked dispatch count: got=%d want=%d", dispatchBlockedEvents, stopBackpressureDispatches) + } + if postStopSideEffects != 0 { + t.Fatalf("post_stop_side_effects mismatch: got=%d want=0", postStopSideEffects) + } +} + +func findJobEvent(events []jobruntime.Event, eventType string) (jobruntime.Event, bool) { + for _, event := range events { + if event.Type == eventType { + return event, true + } + } + return jobruntime.Event{}, false +} diff --git a/internal/integration/testdata/gate_eval_trace_verify.golden.json b/internal/integration/testdata/gate_eval_trace_verify.golden.json index 6bc7838..7db838d 100644 --- a/internal/integration/testdata/gate_eval_trace_verify.golden.json +++ b/internal/integration/testdata/gate_eval_trace_verify.golden.json @@ -1,10 +1,10 @@ { - "intent_digest": "c71b632af6a89397df565f076a47dbce7068687ab7df996b349648b9ecf4bd24", + "intent_digest": "74d4a052fd27c63f110e0879f227215aed15a8e522946d38a2fb515374fbafa8", "policy_digest": "f4581f55c41a3ee69b316c95f66fbfc293ae7f06e6458491e3ca232dfb26608c", "reason_codes": [ "blocked_external" ], - "trace_id": "e577ad07bd4f047ab3ac0406", + "trace_id": "9a714d67a491c4bd98dd7983", "verdict": "block", "violations": [ "external_target" diff --git a/perf/runtime_slo_budgets.json b/perf/runtime_slo_budgets.json index eeb66ed..98caa5a 100644 --- a/perf/runtime_slo_budgets.json +++ b/perf/runtime_slo_budgets.json @@ -99,6 +99,12 @@ "p99_ms": 2400.0, "max_error_rate": 0.0 }, + "job_stop": { + "p50_ms": 900.0, + "p95_ms": 1800.0, + "p99_ms": 2400.0, + "max_error_rate": 0.0 + }, "job_resume": { "p50_ms": 900.0, "p95_ms": 1800.0, diff --git a/scenarios/gait/approval-expiry-1s-past/approval-token.json b/scenarios/gait/approval-expiry-1s-past/approval-token.json index c06c619..8da0dbc 100644 --- a/scenarios/gait/approval-expiry-1s-past/approval-token.json +++ b/scenarios/gait/approval-expiry-1s-past/approval-token.json @@ -1,21 +1,21 @@ { "schema_id": "gait.gate.approval_token", "schema_version": "1.0.0", - "created_at": "2026-02-22T17:42:20.549368Z", + "created_at": "2026-02-24T12:42:58.274138Z", "producer_version": "0.0.0-dev", - "token_id": "824b539851b82d38ca5fac04", + "token_id": "b68a3c0bd5a7af6eb3c47cea", "approver_identity": "human.reviewer", "reason_code": "MANUAL_APPROVAL", - "intent_digest": "29114c07f93d042f2092e84a25445986dcfdfa63b3a42f719250f5c0bb596375", + "intent_digest": "5560b84bd90ae0b84079bd0125a27d74750fb25f55a62a12a6b718eaca3dca51", "policy_digest": "07a528a879e4f0e0cd5a0e2ca801924746b47dac624862207ef21d4ec266f8f9", "scope": [ "tool:tool.write" ], - "expires_at": "2026-02-22T17:42:20.550368Z", + "expires_at": "2026-02-24T12:42:59.274138Z", "signature": { "alg": "ed25519", "key_id": "8393424682c702d59a923d26924b2d4827711e6c585117077766645e22efeb52", - "sig": "x16WXIk4e2QPqqR0NSl9jlaS7ArMCJfUZGtBarSq6q79GYaC5wW9FDumtQl1/YLtcGFtzyZiNonVoIbgpKdYBQ==", - "signed_digest": "96e041b617e6e13835b9f5a5789564228ef20b4578f5299b90eec0fe083dea3c" + "sig": "33SvpGbmU/9+hk491sr4GtcJ7dFpQzBCAI3mWfbcuo2EQ4ry4/pIxtYNcBSZ7HOqDgBCM6QShjMZXBpmnBmFDg==", + "signed_digest": "dd3578d12548120721acf756ea331656feaff48f2c33edb9f0fd47ef40462796" } } diff --git a/scenarios/gait/approval-token-valid/approval-token.json b/scenarios/gait/approval-token-valid/approval-token.json index 010de69..d9f43fd 100644 --- a/scenarios/gait/approval-token-valid/approval-token.json +++ b/scenarios/gait/approval-token-valid/approval-token.json @@ -1,21 +1,21 @@ { "schema_id": "gait.gate.approval_token", "schema_version": "1.0.0", - "created_at": "2026-02-22T17:42:20.324925Z", + "created_at": "2026-02-24T12:42:58.228785Z", "producer_version": "0.0.0-dev", - "token_id": "393bcea4466e16430690ad3f", + "token_id": "ec44f8948261811d131eb04a", "approver_identity": "human.reviewer", "reason_code": "MANUAL_APPROVAL", - "intent_digest": "51ad46c45717bbc5baec288fa2eec502ea41b701b5cd6ef32c9ceec7f2f9607b", + "intent_digest": "4b53bb79e35105ac9170963ac6f84ed0ed15530c68f06a46fc1233c5ec766913", "policy_digest": "07a528a879e4f0e0cd5a0e2ca801924746b47dac624862207ef21d4ec266f8f9", "scope": [ "tool:tool.write" ], - "expires_at": "2036-02-20T17:42:20.324925Z", + "expires_at": "2036-02-22T12:42:58.228785Z", "signature": { "alg": "ed25519", "key_id": "8393424682c702d59a923d26924b2d4827711e6c585117077766645e22efeb52", - "sig": "owZR6DISeHWFK1i96XgM2likqw9yEBePCRetXg7wz8peGUh97bHcWWdKKy47t4Bi1HqpLlKB4goBMRkm2M9SDQ==", - "signed_digest": "5505a7e20b60c3da039c39c99c67f0ab4c486e1e48febb05e1b715d32124076c" + "sig": "FfgS+OlPSQOvVTCvc+0NZ+HKwlhJcHXxygZQiVyn1artAmKuqYnhlRBM3kC4FCW4/kkceUImKFLLjy+z9JN5BA==", + "signed_digest": "d470f2e63450ca56d772bebbe269db46318f309181444bb2522bb262061079c0" } } diff --git a/schemas/v1/gate/approval_token.schema.json b/schemas/v1/gate/approval_token.schema.json index 72c82d2..5554832 100644 --- a/schemas/v1/gate/approval_token.schema.json +++ b/schemas/v1/gate/approval_token.schema.json @@ -32,6 +32,8 @@ "minItems": 1, "items": { "type": "string", "minLength": 1 } }, + "max_targets": { "type": "integer", "minimum": 0 }, + "max_ops": { "type": "integer", "minimum": 0 }, "expires_at": { "type": "string", "format": "date-time" }, "signature": { "type": "object", diff --git a/schemas/v1/gate/intent_request.schema.json b/schemas/v1/gate/intent_request.schema.json index 559f3e2..c22b1ad 100644 --- a/schemas/v1/gate/intent_request.schema.json +++ b/schemas/v1/gate/intent_request.schema.json @@ -206,6 +206,8 @@ "identity": { "type": "string", "minLength": 1 }, "workspace": { "type": "string", "minLength": 1 }, "risk_class": { "type": "string", "minLength": 1 }, + "phase": { "type": "string", "enum": ["plan", "apply"] }, + "job_id": { "type": "string", "minLength": 1 }, "session_id": { "type": "string" }, "request_id": { "type": "string" }, "auth_context": { diff --git a/schemas/v1/gate/policy.schema.json b/schemas/v1/gate/policy.schema.json index 577c03e..dd882cb 100644 --- a/schemas/v1/gate/policy.schema.json +++ b/schemas/v1/gate/policy.schema.json @@ -150,6 +150,15 @@ }, "additionalProperties": false }, + "destructive_budget": { + "type": "object", + "properties": { + "requests": { "type": "integer", "minimum": 1 }, + "window": { "type": "string", "enum": ["minute", "hour"] }, + "scope": { "type": "string", "enum": ["tool", "identity", "tool_identity"] } + }, + "additionalProperties": false + }, "dataflow": { "type": "object", "properties": { diff --git a/schemas/v1/pack/job.schema.json b/schemas/v1/pack/job.schema.json index f5ce844..2097617 100644 --- a/schemas/v1/pack/job.schema.json +++ b/schemas/v1/pack/job.schema.json @@ -22,11 +22,13 @@ "job_id": { "type": "string", "minLength": 1 }, "status": { "type": "string", - "enum": ["running", "paused", "decision_needed", "blocked", "completed", "cancelled"] + "enum": ["running", "paused", "decision_needed", "blocked", "completed", "cancelled", "emergency_stopped"] }, "stop_reason": { "type": "string", "minLength": 1 }, "status_reason_code": { "type": "string", "minLength": 1 }, "environment_fingerprint": { "type": "string", "minLength": 1 }, + "safety_invariant_version": { "type": "string", "minLength": 1 }, + "safety_invariant_hash": { "type": "string", "pattern": "^[a-fA-F0-9]{64}$" }, "checkpoint_count": { "type": "integer", "minimum": 0 }, "approval_count": { "type": "integer", "minimum": 0 } }, diff --git a/schemas/v1/runpack/session_journal.schema.json b/schemas/v1/runpack/session_journal.schema.json index da5cb80..7cc6f3b 100644 --- a/schemas/v1/runpack/session_journal.schema.json +++ b/schemas/v1/runpack/session_journal.schema.json @@ -50,7 +50,9 @@ "trace_path": { "type": "string" }, "verdict": { "type": "string", "enum": ["allow", "block", "dry_run", "require_approval"] }, "reason_codes": { "type": "array", "items": { "type": "string" } }, - "violations": { "type": "array", "items": { "type": "string" } } + "violations": { "type": "array", "items": { "type": "string" } }, + "safety_invariant_version": { "type": "string", "minLength": 1 }, + "safety_invariant_hash": { "type": "string", "pattern": "^[a-fA-F0-9]{64}$" } }, "additionalProperties": false } @@ -86,7 +88,9 @@ "runpack_path": { "type": "string", "minLength": 1 }, "manifest_digest": { "type": "string", "pattern": "^[a-fA-F0-9]{64}$" }, "prev_checkpoint_digest": { "type": "string", "pattern": "^[a-fA-F0-9]{64}$" }, - "checkpoint_digest": { "type": "string", "pattern": "^[a-fA-F0-9]{64}$" } + "checkpoint_digest": { "type": "string", "pattern": "^[a-fA-F0-9]{64}$" }, + "safety_invariant_version": { "type": "string", "minLength": 1 }, + "safety_invariant_hash": { "type": "string", "pattern": "^[a-fA-F0-9]{64}$" } }, "additionalProperties": false } diff --git a/scripts/check_command_budgets.py b/scripts/check_command_budgets.py index 5ed6cdd..a49d32c 100644 --- a/scripts/check_command_budgets.py +++ b/scripts/check_command_budgets.py @@ -114,6 +114,12 @@ "p99_ms": 2400.0, "max_error_rate": 0.0, }, + "job_stop": { + "p50_ms": 900.0, + "p95_ms": 1800.0, + "p99_ms": 2400.0, + "max_error_rate": 0.0, + }, "job_resume": { "p50_ms": 900.0, "p95_ms": 1800.0, @@ -383,6 +389,9 @@ def main() -> int: "identity": "alice", "workspace": str(work_dir), "risk_class": "high", + "phase": "plan" + if bool(spec["target"].get("destructive")) + else "apply", }, } intent_path = intents_dir / f"{command_name}.json" @@ -635,6 +644,9 @@ def prepare_job_approved() -> None: work_dir, ) + def prepare_job_stoppable() -> None: + prepare_job_submitted() + def prepare_job_resumable() -> None: prepare_job_approved() @@ -711,6 +723,18 @@ def prepare_job_pack_built() -> None: "approver", "--json", ], + "job_stop": [ + str(gait_path), + "job", + "stop", + "--id", + "job_budget", + "--root", + str(job_root), + "--actor", + "approver", + "--json", + ], "job_resume": [ str(gait_path), "job", @@ -817,6 +841,7 @@ def prepare_job_pack_built() -> None: pre_hooks["job_submit"] = lambda _: reset_job_state() pre_hooks["job_checkpoint_add"] = lambda _: prepare_job_submitted() pre_hooks["job_approve"] = lambda _: prepare_job_pending_approval() + pre_hooks["job_stop"] = lambda _: prepare_job_stoppable() pre_hooks["job_resume"] = lambda _: prepare_job_resumable() pre_hooks["pack_build_job"] = lambda _: prepare_job_resumable() pre_hooks["pack_verify_job"] = lambda _: prepare_job_pack_built() diff --git a/scripts/test_hardening_acceptance.sh b/scripts/test_hardening_acceptance.sh index 0070445..ef5a8f0 100755 --- a/scripts/test_hardening_acceptance.sh +++ b/scripts/test_hardening_acceptance.sh @@ -38,5 +38,5 @@ echo "[hardening-acceptance] chaos trace uniqueness gate" bash scripts/test_chaos_trace_uniqueness.sh echo "[hardening-acceptance] hardening integration and e2e checks" -go test ./internal/integration -run 'TestConcurrentGateRateLimitStateIsDeterministic|TestConcurrentSessionAppendStateIsDeterministic|TestSessionSwarmContentionBudget' -count=1 -go test ./internal/e2e -run 'TestCLIRegressExitCodes|TestCLIPolicyTestExitCodes|TestCLIDoctor|TestCLIDelegateAndGateRequireDelegationFlow' -count=1 +go test ./internal/integration -run 'TestConcurrentGateRateLimitStateIsDeterministic|TestConcurrentSessionAppendStateIsDeterministic|TestSessionSwarmContentionBudget|TestStopLatencySLOForEmergencyStopAcknowledgment|TestEmergencyStopBackpressureHasZeroPostStopSideEffects' -count=1 +go test ./internal/e2e -run 'TestCLIRegressExitCodes|TestCLIPolicyTestExitCodes|TestCLIDoctor|TestCLIDelegateAndGateRequireDelegationFlow|TestCLIStopLatencyAndEmergencyStopPreemption' -count=1 diff --git a/scripts/test_job_runtime_chaos.sh b/scripts/test_job_runtime_chaos.sh index 06f3fdb..9a1fa71 100644 --- a/scripts/test_job_runtime_chaos.sh +++ b/scripts/test_job_runtime_chaos.sh @@ -4,5 +4,8 @@ set -euo pipefail echo "[chaos-job] runtime transition contention and fail-closed invariants" go test ./core/jobruntime -run 'TestDecisionNeededResumeRequiresApproval|TestResumeEnvironmentMismatchFailClosed|TestInvalidPauseTransition|TestAddCheckpointValidationAndStateTransitions' -count=5 +echo "[chaos-job] emergency stop latency and post-stop side-effect invariants" +go test ./internal/integration ./internal/e2e -run 'StopLatency|EmergencyStop' -count=1 + echo "[chaos-job] integration job->pack->regress loop stability" go test ./internal/integration -run 'TestJobRuntimeToPackRoundTrip|TestRegressInitFromPackSource' -count=3 diff --git a/scripts/test_release_smoke.sh b/scripts/test_release_smoke.sh index 4cd39d7..8ac04e6 100644 --- a/scripts/test_release_smoke.sh +++ b/scripts/test_release_smoke.sh @@ -67,6 +67,96 @@ if payload.get("verdict") != "allow": raise SystemExit(f"expected allow verdict, got={payload.get('verdict')}") PY +echo "==> emergency stop preemption" +JOBS_ROOT="$WORK_DIR/jobs" +"$BIN_PATH" job submit --id job_release_stop --root "$JOBS_ROOT" --json > job_submit.json +STOP_START_NS="$(python3 - <<'PY' +import time +print(time.time_ns()) +PY +)" +"$BIN_PATH" job stop --id job_release_stop --root "$JOBS_ROOT" --actor release-gate --json > job_stop.json +STOP_END_NS="$(python3 - <<'PY' +import time +print(time.time_ns()) +PY +)" + +cat > "$WORK_DIR/mcp_stop_call.json" <<'JSON' +{ + "name": "tool.delete", + "args": {"path": "/tmp/release.txt"}, + "targets": [{"kind": "path", "value": "/tmp/release.txt", "operation": "delete", "destructive": true}], + "arg_provenance": [{"arg_path": "$.path", "source": "user"}], + "context": { + "identity": "release", + "workspace": "/repo/gait", + "risk_class": "high", + "session_id": "release-stop", + "job_id": "job_release_stop", + "phase": "apply" + } +} +JSON + +set +e +"$BIN_PATH" mcp proxy \ + --policy "$REPO_ROOT/examples/policy/base_low_risk.yaml" \ + --call "$WORK_DIR/mcp_stop_call.json" \ + --job-root "$JOBS_ROOT" \ + --json > mcp_stop.json +MCP_STOP_EXIT="$?" +set -e +if [[ "$MCP_STOP_EXIT" -ne 3 ]]; then + echo "expected mcp proxy emergency stop preemption exit 3, got $MCP_STOP_EXIT" >&2 + exit 1 +fi + +"$BIN_PATH" job inspect --id job_release_stop --root "$JOBS_ROOT" --json > job_inspect.json + +python3 - <<'PY' "$STOP_START_NS" "$STOP_END_NS" +import json +import sys +from pathlib import Path + +stop_start_ns = int(sys.argv[1]) +stop_end_ns = int(sys.argv[2]) +stop_ack_ms = max(0, (stop_end_ns - stop_start_ns) // 1_000_000) + +stop_payload = json.loads(Path("job_stop.json").read_text(encoding="utf-8")) +if not stop_payload.get("ok"): + raise SystemExit(f"job stop failed: {stop_payload}") +job = stop_payload.get("job", {}) +if job.get("status") != "emergency_stopped" or job.get("stop_reason") != "emergency_stopped": + raise SystemExit(f"unexpected stop status payload: {stop_payload}") +if stop_ack_ms > 15000: + raise SystemExit(f"stop_ack_ms over release threshold: {stop_ack_ms}") + +mcp_payload = json.loads(Path("mcp_stop.json").read_text(encoding="utf-8")) +if not mcp_payload.get("ok"): + raise SystemExit(f"mcp stop payload not ok: {mcp_payload}") +if mcp_payload.get("verdict") != "block": + raise SystemExit(f"expected block verdict, got {mcp_payload.get('verdict')}") +if mcp_payload.get("executed"): + raise SystemExit("expected executed=false for emergency-stop preemption") +reasons = mcp_payload.get("reason_codes", []) +if "emergency_stop_preempted" not in reasons: + raise SystemExit(f"missing emergency_stop_preempted reason code: {reasons}") + +inspect_payload = json.loads(Path("job_inspect.json").read_text(encoding="utf-8")) +events = inspect_payload.get("events", []) +ack_index = next((idx for idx, event in enumerate(events) if event.get("type") == "emergency_stop_acknowledged"), -1) +if ack_index < 0: + raise SystemExit("missing emergency_stop_acknowledged event") +post_stop_events = events[ack_index + 1 :] +blocked = sum(1 for event in post_stop_events if event.get("type") == "dispatch_blocked") +post_stop_side_effects = sum(1 for event in post_stop_events if event.get("type") != "dispatch_blocked") +if blocked < 1: + raise SystemExit("expected at least one dispatch_blocked event after stop") +if post_stop_side_effects != 0: + raise SystemExit(f"post_stop_side_effects must be 0, got={post_stop_side_effects}") +PY + echo "==> regress init -> run" "$BIN_PATH" regress init --from run_demo --json > regress_init.json "$BIN_PATH" regress run --json > regress_run.json