diff --git a/cmd/obol/main.go b/cmd/obol/main.go index 39f3a00..563f968 100644 --- a/cmd/obol/main.go +++ b/cmd/obol/main.go @@ -7,6 +7,7 @@ import ( "path/filepath" "strings" + "github.com/ObolNetwork/obol-stack/internal/agent" "github.com/ObolNetwork/obol-stack/internal/app" "github.com/ObolNetwork/obol-stack/internal/config" "github.com/ObolNetwork/obol-stack/internal/executor" @@ -45,6 +46,9 @@ COMMANDS: stack down Stop the Obol Stack stack purge Delete stack config (use --force to also delete data) + Obol Agent: + agent init Initialize Obol Agent with Google API key + Kubernetes Tools (with auto-configured KUBECONFIG): kubectl Run kubectl with stack kubeconfig (passthrough) helm Run helm with stack kubeconfig (passthrough) @@ -151,6 +155,40 @@ GLOBAL OPTIONS: }, }, // ============================================================ + // Obol Agent Commands + // ============================================================ + { + Name: "agent", + Usage: "Manage Obol Agent", + Subcommands: []*cli.Command{ + { + Name: "init", + Usage: "Initialize Obol Agent with Google API key", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "google-api-key", + Aliases: []string{"g"}, + Usage: "Google API key for Obol Agent (required for AI features)", + EnvVars: []string{"GOOGLE_API_KEY"}, + }, + }, + Action: func(c *cli.Context) error { + googleAPIKey := c.String("google-api-key") + if err := agent.Init(cfg, googleAPIKey); err != nil { + stackID := stack.GetStackID(cfg) + l, _ := logging.NewSlogLogger(logging.LoggerConfig{ + StateDir: cfg.StateDir, + StackID: stackID, + }) + l.Error("Failed to initialize agent", "error", err.Error()) + return err + } + return nil + }, + }, + }, + }, + // ============================================================ // Kubernetes Tool Passthroughs (with auto-configured KUBECONFIG) // ============================================================ { diff --git a/internal/agent/agent.go b/internal/agent/agent.go new file mode 100644 index 0000000..e42db90 --- /dev/null +++ b/internal/agent/agent.go @@ -0,0 +1,84 @@ +package agent + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/executor" + "github.com/ObolNetwork/obol-stack/internal/logging" + "github.com/ObolNetwork/obol-stack/internal/stack" +) + +const ( + kubeconfigFile = "kubeconfig.yaml" +) + +// Init initializes the Obol Agent with required secrets +func Init(cfg *config.Config, googleAPIKey string) error { + kubeconfigPath := filepath.Join(cfg.ConfigDir, kubeconfigFile) + + // Check if kubeconfig exists (stack must be running) + if _, err := os.Stat(kubeconfigPath); os.IsNotExist(err) { + return fmt.Errorf("stack not running, use 'obol stack up' first") + } + + // Get stack ID for logging + stackID := stack.GetStackID(cfg) + if stackID == "" { + return fmt.Errorf("stack ID not found, run 'obol stack init' first") + } + + // Create logger and executor + l, cleanup := logging.NewSlogLogger(logging.LoggerConfig{ + StateDir: cfg.StateDir, + StackID: stackID, + }) + defer cleanup() + + exec := executor.New(l.Logger) + defer exec.Close() + + // Validate Google API key was provided + if googleAPIKey == "" { + l.Error("Google API key required") + return fmt.Errorf("Google API key required via --google-api-key flag or GOOGLE_API_KEY environment variable") + } + + l.Info("Initializing Obol Agent") + l.Info("Creating Google API key secret for Obol Agent") + + kubectlPath := filepath.Join(cfg.BinDir, "kubectl") + + // Create namespace (idempotent) + nsCmd := exec.Command(kubectlPath, "--kubeconfig", kubeconfigPath, "create", "namespace", "agent", "--dry-run=client", "-o", "yaml") + nsYAML, err := nsCmd.Output() + if err != nil { + return fmt.Errorf("failed to generate namespace manifest: %w", err) + } + applyNs := exec.CommandWithOutput(kubectlPath, "--kubeconfig", kubeconfigPath, "apply", "-f", "-") + applyNs.SetStdin(strings.NewReader(string(nsYAML))) + if err := applyNs.Run(); err != nil { + return fmt.Errorf("failed to create agent namespace: %w", err) + } + + // Create secret (idempotent) + secretCmd := exec.Command(kubectlPath, "--kubeconfig", kubeconfigPath, "create", "secret", "generic", "obol-agent-google-api-key", "--from-literal=GOOGLE_API_KEY="+googleAPIKey, "--namespace=agent", "--dry-run=client", "-o", "yaml") + secretYAML, err := secretCmd.Output() + if err != nil { + return fmt.Errorf("failed to generate secret manifest: %w", err) + } + applySecret := exec.CommandWithOutput(kubectlPath, "--kubeconfig", kubeconfigPath, "apply", "-f", "-") + applySecret.SetStdin(strings.NewReader(string(secretYAML))) + if err := applySecret.Run(); err != nil { + return fmt.Errorf("failed to create Google API key secret: %w", err) + } + + l.Success("Google API key secret created") + l.Success("Obol Agent initialized successfully") + l.Info("The Obol Agent deployment will now have access to Google API services") + + return nil +} diff --git a/internal/embed/defaults/obol-stack-defaults/templates/obol-agent.yaml b/internal/embed/defaults/obol-stack-defaults/templates/obol-agent.yaml new file mode 100644 index 0000000..a48f847 --- /dev/null +++ b/internal/embed/defaults/obol-stack-defaults/templates/obol-agent.yaml @@ -0,0 +1,283 @@ +--- +# Obol Agent Kubernetes Manifest +# This manifest deploys the Obol AI Agent with namespace-scoped RBAC permissions +# The agent can read cluster-wide resources (nodes, namespaces) but can only modify +# resources in specific namespaces: default, ethereum, l1, monitoring + +#------------------------------------------------------------------------------ +# ServiceAccount - Identity for the Obol Agent pod +#------------------------------------------------------------------------------ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: obol-agent + namespace: agent + +--- +#------------------------------------------------------------------------------ +# ClusterRole - Read-only access to cluster-wide resources +# Allows the agent to list namespaces and nodes across the entire cluster +#------------------------------------------------------------------------------ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: obol-agent-cluster-reader +rules: + - apiGroups: [""] + resources: ["namespaces", "nodes"] + verbs: ["get", "list", "watch"] # Read-only access + +--- +#------------------------------------------------------------------------------ +# ClusterRoleBinding - Grants cluster-wide read access to the agent +#------------------------------------------------------------------------------ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: obol-agent-cluster-reader-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: obol-agent-cluster-reader +subjects: + - kind: ServiceAccount + name: obol-agent + namespace: agent + +--- +#------------------------------------------------------------------------------ +# Namespace-Scoped Roles +# These roles grant create/update/patch permissions ONLY in specific namespaces +# Permissions: get, list, watch, create, update, patch (no delete) +#------------------------------------------------------------------------------ + +# Role for 'default' namespace +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: obol-agent-role + namespace: agent +rules: + - apiGroups: [""] # Core API group + resources: ["pods", "services", "endpoints", "persistentvolumeclaims", "configmaps", "secrets"] + verbs: ["get", "list", "watch", "create", "update", "patch"] + - apiGroups: ["apps"] # Apps API group + resources: ["deployments", "statefulsets", "daemonsets", "replicasets"] + verbs: ["get", "list", "watch", "create", "update", "patch"] + - apiGroups: ["batch"] # Batch API group + resources: ["jobs", "cronjobs"] + verbs: ["get", "list", "watch", "create", "update", "patch"] + - apiGroups: [""] + resources: ["pods/log"] # Access to pod logs + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: obol-agent-binding + namespace: agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: obol-agent-role +subjects: + - kind: ServiceAccount + name: obol-agent + namespace: agent + +--- +# Role for 'ethereum' namespace +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: obol-agent-role + namespace: ethereum +rules: + - apiGroups: [""] + resources: ["pods", "services", "endpoints", "persistentvolumeclaims", "configmaps", "secrets"] + verbs: ["get", "list", "watch", "create", "update", "patch"] + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets", "daemonsets", "replicasets"] + verbs: ["get", "list", "watch", "create", "update", "patch"] + - apiGroups: ["batch"] + resources: ["jobs", "cronjobs"] + verbs: ["get", "list", "watch", "create", "update", "patch"] + - apiGroups: [""] + resources: ["pods/log"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: obol-agent-binding + namespace: ethereum +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: obol-agent-role +subjects: + - kind: ServiceAccount + name: obol-agent + namespace: agent + +--- +# Role for 'l1' namespace +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: obol-agent-role + namespace: l1 +rules: + - apiGroups: [""] + resources: ["pods", "services", "endpoints", "persistentvolumeclaims", "configmaps", "secrets"] + verbs: ["get", "list", "watch", "create", "update", "patch"] + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets", "daemonsets", "replicasets"] + verbs: ["get", "list", "watch", "create", "update", "patch"] + - apiGroups: ["batch"] + resources: ["jobs", "cronjobs"] + verbs: ["get", "list", "watch", "create", "update", "patch"] + - apiGroups: [""] + resources: ["pods/log"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: obol-agent-binding + namespace: l1 +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: obol-agent-role +subjects: + - kind: ServiceAccount + name: obol-agent + namespace: agent + +--- +# Role for 'monitoring' namespace +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: obol-agent-role + namespace: monitoring +rules: + - apiGroups: [""] + resources: ["pods", "services", "endpoints", "persistentvolumeclaims", "configmaps", "secrets"] + verbs: ["get", "list", "watch", "create", "update", "patch"] + - apiGroups: ["apps"] + resources: ["deployments", "statefulsets", "daemonsets", "replicasets"] + verbs: ["get", "list", "watch", "create", "update", "patch"] + - apiGroups: ["batch"] + resources: ["jobs", "cronjobs"] + verbs: ["get", "list", "watch", "create", "update", "patch"] + - apiGroups: [""] + resources: ["pods/log"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: obol-agent-binding + namespace: monitoring +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: obol-agent-role +subjects: + - kind: ServiceAccount + name: obol-agent + namespace: agent + +--- +#------------------------------------------------------------------------------ +# Deployment - Obol Agent Application +# The agent provides AI-powered Kubernetes and Obol cluster management via MCP +#------------------------------------------------------------------------------ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: obol-agent + namespace: agent + labels: + app: obol-agent +spec: + replicas: 1 # Single instance deployment + selector: + matchLabels: + app: obol-agent + template: + metadata: + labels: + app: obol-agent + spec: + serviceAccountName: obol-agent # Uses the ServiceAccount created above for RBAC + containers: + - name: obol-agent + image: us-east4-docker.pkg.dev/prj-d-playgrounds-f0cb/obol-agent/obol-agent-ag-ui:latest + imagePullPolicy: Always # Always pull latest image + ports: + - name: http + containerPort: 8000 + protocol: TCP + env: + # REQUIRED: Google API key from Kubernetes secret + # Secret created via: obol stack up --google-api-key= + - name: GOOGLE_API_KEY + valueFrom: + secretKeyRef: + name: obol-agent-google-api-key + key: GOOGLE_API_KEY + optional: true # Allow deployment even if secret doesn't exist + + # PUBLIC_MODE controls Kubernetes MCP access + # false = Enable Kubernetes API access (uses RBAC permissions above) + # true = Disable Kubernetes API access (for public deployments) + - name: PUBLIC_MODE + value: "false" + + # Health checks ensure the pod is ready to receive traffic + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 10 + periodSeconds: 5 + + # Resource limits prevent the agent from consuming too many cluster resources + resources: + requests: # Minimum guaranteed resources + cpu: 500m # 0.5 CPU cores + memory: 1Gi # 1 GiB RAM + limits: # Maximum allowed resources + cpu: 2000m # 2 CPU cores + memory: 4Gi # 4 GiB RAM + +--- +#------------------------------------------------------------------------------ +# Service - Exposes the Obol Agent within the cluster +# Access the agent at: http://obol-agent.default.svc.cluster.local:8000 +#------------------------------------------------------------------------------ +apiVersion: v1 +kind: Service +metadata: + name: obol-agent + namespace: agent + labels: + app: obol-agent +spec: + type: ClusterIP # Internal cluster access only (use Ingress for external access) + ports: + - port: 8000 # Service port + targetPort: http # Container port name + protocol: TCP + name: http + selector: + app: obol-agent # Routes traffic to pods with this label