diff --git a/.gitignore b/.gitignore index de86135..a723b2b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ *.log .ansible/ .vault_pass +backups/ # Secrets and credentials *.env @@ -13,3 +14,13 @@ vault.yml id_rsa* host_vars/ group_vars/ + +# Keep inventory vars tracked (except vault secrets) +!inventories/ +!inventories/**/ +!inventories/**/group_vars/ +!inventories/**/group_vars/*.yml +!inventories/**/host_vars/ +!inventories/**/host_vars/.gitkeep +inventories/**/group_vars/vault.yml +inventories/**/group_vars/vault.manual.refactor.yml diff --git a/AGENT_HANDOFF.md b/AGENT_HANDOFF.md new file mode 100644 index 0000000..3617b11 --- /dev/null +++ b/AGENT_HANDOFF.md @@ -0,0 +1,27 @@ +# Codex Agent Handoff + +## Current State (2026-03-01) +- Enterprise install works end-to-end for `dev-main` + `andrea` profiles; control-plane Spins deployed via Docker Compose stacks under `/home/efra/openclaw-control-plane/` and managed via `make reinstall CONFIRM=1 ENV=dev LIMIT=zennook`. +- Postgres auth reconcilation now uses profile-specific admin credentials before flagging health check failures (`roles/openclaw_control_plane/tasks/profile.yml`). +- Smoke flow verifies queue + control API by checking `/tasks/{taskId}` until the task reaches a terminal status; `ops/smoke.sh` now retries and reports last status. +- OAuth login is manageable via `make oauth-login PROFILES="dev-main andrea" OAUTH_PROVIDER=openai-codex`, automatically sources `/etc/openclaw/secrets/.env`, and lists auth profiles when done (`ops/oauth-login.sh`). +- Secrets refactor pipeline adds `ops/secrets-refactor.sh`, `ops/validate-secrets.sh`, and the Makefile target `make secrets-refactor` (docs updated accordingly). + +## Handoff Checklist +1. Confirm `/etc/openclaw/secrets/dev-main.env` and `/etc/openclaw/secrets/andrea.env` contain the required temp tokens; backups are stored under `backups/`. +2. Run `make reinstall CONFIRM=1 ENV=dev LIMIT=zennook` if the environment is dirty again; the playbooks already handle purge/install/smoke in one shot. +3. After OAuth login you still need to populate `auth-profiles.json` for each agent; run `openclaw --profile models auth list` to see active entries. +4. Verify Telegram tokens via `cat /etc/openclaw/secrets/dev-main.env` (mask the values in outputs). They are also referenced in `inventories/dev/group_vars/vault.yml` and each control-plane `.env` file. + +## Next Steps for Codex agent +- Finish the implementation plan for Ansible multi-agent deployment (already captured elsewhere, but double-check architecture docs and inventory). Copy actionable instructions into the reserved roadmap file. +- When writing PRs, include `@codex` mention, request a full review, and ask for architecture implementation plan per earlier requirements. +- Keep `ops/oauth-login.sh` and `ops/smoke.sh` in sync with any profile additions (e.g., add new profile names to `PROFILES` in `Makefile`). + +## Useful Commands +- `make secrets-refactor ENV=dev LIMIT=zennook` +- `make reinstall CONFIRM=1 ENV=dev LIMIT=zennook` +- `make oauth-login PROFILES="dev-main andrea" OAUTH_PROVIDER=openai-codex` +- `make smoke ENV=dev LIMIT=zennook` + +Keep notes in this file before handing off to another Codex agent; update the `next steps` section if you take new actions. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..31488f0 --- /dev/null +++ b/Makefile @@ -0,0 +1,73 @@ +SHELL := /usr/bin/env bash + +.DEFAULT_GOAL := help + +ENV ?= dev +INVENTORY ?= inventories/$(ENV)/hosts.yml +LIMIT ?= zennook +PROFILES ?= dev-main andrea +OAUTH_PROVIDER ?= openai-codex +MODEL_REF ?= openai-codex/gpt-5.3-codex + +.PHONY: help backup purge install auto-install cloudflare auth-sync oauth-login smoke reinstall secrets-refactor + +help: + @echo "OpenClaw Ops Targets" + @echo "" + @echo " make backup Backup current OpenClaw + control-plane state" + @echo " make purge CONFIRM=1 Purge deployed state and containers" + @echo " make install Install/reconcile enterprise + control-plane" + @echo " make auto-install Automated install flow (auth-sync + install + smoke)" + @echo " make secrets-refactor Build manual secrets migration file + validate vault" + @echo " make cloudflare Reconcile Cloudflare tunnel/service only" + @echo " make auth-sync Sync Codex creds from /home/efra/.codex to OpenClaw profiles" + @echo " make oauth-login Alias to make auth-sync (legacy name)" + @echo " make smoke Run post-install smoke checks" + @echo " make reinstall CONFIRM=1 backup + purge + install + smoke" + @echo "" + @echo "Variables:" + @echo " ENV=$(ENV) INVENTORY=$(INVENTORY) LIMIT=$(LIMIT)" + @echo " PROFILES='$(PROFILES)' OAUTH_PROVIDER=$(OAUTH_PROVIDER) MODEL_REF=$(MODEL_REF)" + @echo " AUTO_PURGE=0 AUTO_BACKUP=0 (used by auto-install)" + +backup: + @ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)" ./ops/backup.sh + +purge: + @if [[ "$(CONFIRM)" != "1" ]]; then echo "Use: make purge CONFIRM=1"; exit 1; fi + @ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)" ./ops/purge.sh --yes + +install: + @ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)" ./ops/install.sh + +auto-install: + @if [[ "$(AUTO_BACKUP)" == "1" ]]; then \ + $(MAKE) backup ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)"; \ + fi + @if [[ "$(AUTO_PURGE)" == "1" ]]; then \ + $(MAKE) purge CONFIRM=1 ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)"; \ + fi + @$(MAKE) auth-sync ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)" PROFILES="$(PROFILES)" OAUTH_PROVIDER="$(OAUTH_PROVIDER)" MODEL_REF="$(MODEL_REF)" + @$(MAKE) install ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)" + @$(MAKE) smoke ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)" + +secrets-refactor: + @ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)" ./ops/secrets-refactor.sh + +cloudflare: + @ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)" ./ops/cloudflare-reconcile.sh + +auth-sync: + @ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)" PROFILES="$(PROFILES)" OAUTH_PROVIDER="$(OAUTH_PROVIDER)" MODEL_REF="$(MODEL_REF)" ./ops/auth-sync.sh + +oauth-login: auth-sync + +smoke: + @ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)" ./ops/smoke.sh + +reinstall: + @if [[ "$(CONFIRM)" != "1" ]]; then echo "Use: make reinstall CONFIRM=1"; exit 1; fi + @$(MAKE) backup ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)" + @$(MAKE) purge CONFIRM=1 ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)" + @$(MAKE) install ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)" + @$(MAKE) smoke ENV="$(ENV)" INVENTORY="$(INVENTORY)" LIMIT="$(LIMIT)" diff --git a/README.md b/README.md index 8bf4903..90995eb 100644 --- a/README.md +++ b/README.md @@ -1,293 +1,196 @@ -# OpenClaw Ansible Installer +# ClawOps Protocol Suite (Ansible Base) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Lint](https://github.com/openclaw/openclaw-ansible/actions/workflows/lint.yml/badge.svg)](https://github.com/openclaw/openclaw-ansible/actions/workflows/lint.yml) [![Ansible](https://img.shields.io/badge/Ansible-2.14+-blue.svg)](https://www.ansible.com/) -[![Multi-OS](https://img.shields.io/badge/OS-Debian%20%7C%20Ubuntu-orange.svg)](https://www.debian.org/) +[![Multi-OS](https://img.shields.io/badge/OS-Debian%20%7C%20Ubuntu%20%7C%20Fedora-orange.svg)](https://www.debian.org/) -Automated, hardened installation of [OpenClaw](https://github.com/openclaw/openclaw) with Docker and Tailscale VPN support for Debian/Ubuntu Linux. +Suite operativa para llevar OpenClaw a un estándar de despliegue enterprise: perfiles múltiples, colas Stage 2, auth-sync centralizado, smoke tests y protocolos day-2 reproducibles. -## ⚠️ macOS Support: Deprecated & Disabled +## Por Qué Nace Esta Suite -**Effective 2026-02-06, support for bare-metal macOS installations has been removed from this playbook.** +Nace para resolver un problema operativo concreto: OpenClaw funciona como producto, pero en entornos reales faltaba una capa robusta de infraestructura y protocolo para operar múltiples perfiles y agentes de forma repetible. -### Why? -The underlying project currently requires system-level permissions and configurations that introduce significant security risks when executed on a primary host OS. To protect user data and system integrity, we have disabled bare-metal execution. +Esta suite aparece para cerrar la brecha entre: -### What does this mean? -* The playbook will now explicitly fail if run on a `Darwin` (macOS) system. -* We strongly discourage manual workarounds to bypass this check. -* **Future Support:** We are evaluating a virtualization-first strategy (using Vagrant or Docker) to provide a sandboxed environment for this project in the future. +- "funciona en una máquina" y "opera estable en equipos/ambientes". +- "instalación manual" y "ciclo completo backup-purge-install-smoke". +- "credenciales dispersas" y "auth-sync controlado por perfil/agente". +- "ejecución sin trazabilidad" y "observabilidad/control con API y eventos". -## Features +## Falencias Que Cubre (Frente a Uso Base de OpenClaw) -- 🔒 **Firewall-first**: UFW firewall + Docker isolation -- 🛡️ **Fail2ban**: SSH brute-force protection out of the box -- 🔄 **Auto-updates**: Automatic security patches via unattended-upgrades -- 🔐 **Tailscale VPN**: Secure remote access without exposing services -- 🐳 **Docker**: Docker CE with security hardening -- 🚀 **One-command install**: Complete setup in minutes -- 🔧 **Auto-configuration**: DBus, systemd, environment setup -- 📦 **pnpm installation**: Uses `pnpm install -g openclaw@latest` +1. Falta de protocolo multi-perfil/multi-agente: se añade `openclaw_enterprise` y servicios por perfil. +2. Falta de orquestación de colas y control central: se añade Stage 2 (`ingress/router/worker/broker/control-api`) con NATS+Postgres. +3. Falta de sincronización de credenciales a escala: se añade `make auth-sync` con escritura de `auth-profiles.json` por agente. +4. Falta de operación day-2 unificada: se estandariza `make backup/purge/install/smoke/reinstall`. +5. Falta de validación post-despliegue: se añade smoke de salud + flujo de cola terminal. +6. Falta de visibilidad en full mode: se integra Prometheus/Grafana/Uptime Kuma. -## Quick Start +## Qué Es y Qué No Es -### Release Mode (Recommended) +### Qué es -Install the latest stable version from npm: +- Base Ansible de despliegue y operación (protocolo operativo). +- Suite de automatización para OpenClaw en escenarios enterprise. +- Capa de estandarización para equipos DevOps/Platform. -```bash -curl -fsSL https://raw.githubusercontent.com/openclaw/openclaw-ansible/main/install.sh | bash -``` +### Qué no es -### Development Mode +- No reemplaza el repositorio principal de OpenClaw. +- No es una reescritura del core de OpenClaw. +- No es un instalador "one-click" opaco; es infraestructura explícita y auditable. -Install from source for development or testing: +## Identidad de la Suite -```bash -# Clone the installer -git clone https://github.com/openclaw/openclaw-ansible.git -cd openclaw-ansible +Nombre operativo recomendado: `ClawOps Protocol Suite`. -# Install in development mode -ansible-playbook playbook.yml --ask-become-pass -e openclaw_install_mode=development -``` +Descripción corta recomendada para GitHub: -## What Gets Installed +`Suite Ansible para operación enterprise de OpenClaw con Stage 2 Control Plane (NATS/NestJS), auth-sync Codex, smoke tests y ciclo day-2 reproducible.` -- Tailscale (mesh VPN) -- UFW firewall (SSH + Tailscale ports only) -- Docker CE + Compose V2 (for sandboxes) -- Node.js 22.x + pnpm -- OpenClaw on host (not containerized) -- Systemd service (auto-start) +## Arquitectura Rápida -## Post-Install +```mermaid +flowchart LR + A[Makefile + ops scripts] --> B[playbooks enterprise/control-plane] + B --> C[role: openclaw] + B --> D[role: openclaw_enterprise] + B --> E[role: openclaw_control_plane] + B --> F[role: openclaw_cloudflare_tunnel] -After installation completes, switch to the openclaw user: + D --> G[Gateway profiles] + E --> H[Stage 2 full/lite] + F --> I[Cloudflare loopback exposure opcional] -```bash -sudo su - openclaw + H --> H1[NATS JetStream] + H --> H2[Postgres] + H --> H3[Ingress/Router/Workers/Broker] + H --> H4[Control API] + H --> H5[Prom/Grafana/Kuma full mode] ``` -Then run the quick-start onboarding wizard: - -```bash -openclaw onboard --install-daemon +## Flujo Mensajería/Cola + +```mermaid +sequenceDiagram + autonumber + participant ING as ingress + participant RT as router + participant NATS as NATS + participant WK as worker- + participant BR as broker + participant PG as postgres + participant API as control-api + + ING->>NATS: tasks.ingress + RT->>NATS: tasks.agent. + WK->>NATS: results.agent. + BR->>PG: upsert tasks + task_events + API->>PG: consulta + confirm/reject ``` -This will: -- Guide you through the setup wizard -- Configure your messaging provider (WhatsApp/Telegram/Signal) -- Install and start the daemon service - -### Alternative Manual Setup +## Operación Recomendada ```bash -# Configure manually -openclaw configure - -# Login to provider -openclaw providers login - -# Test gateway -openclaw gateway - -# Install as daemon -openclaw daemon install -openclaw daemon start - -# Check status -openclaw status -openclaw logs +make backup +make purge CONFIRM=1 +make install +make auth-sync PROFILES="dev-main andrea" OAUTH_PROVIDER=openai-codex +make smoke ``` -## Installation Modes - -### Release Mode (Default) -- Installs via `pnpm install -g openclaw@latest` -- Gets latest stable version from npm registry -- Automatic updates via `pnpm install -g openclaw@latest` -- **Recommended for production** - -### Development Mode -- Clones from `https://github.com/openclaw/openclaw.git` -- Builds from source with `pnpm build` -- Symlinks binary to `~/.local/bin/openclaw` -- Adds helpful aliases: - - `openclaw-rebuild` - Rebuild after code changes - - `openclaw-dev` - Navigate to repo directory - - `openclaw-pull` - Pull, install deps, and rebuild -- **Recommended for development and testing** - -Enable with: `-e openclaw_install_mode=development` - -## Security - -- **Public ports**: SSH (22), Tailscale (41641/udp) only -- **Fail2ban**: SSH brute-force protection (5 attempts → 1 hour ban) -- **Automatic updates**: Security patches via unattended-upgrades -- **Docker isolation**: Containers can't expose ports externally (DOCKER-USER chain) -- **Non-root**: OpenClaw runs as unprivileged user -- **Scoped sudo**: Limited to service management (not full root) -- **Systemd hardening**: NoNewPrivileges, PrivateTmp, ProtectSystem - -Verify: `nmap -p- YOUR_SERVER_IP` should show only port 22 open. - -### Security Note - -For high-security environments, audit before running: +Ciclo completo: ```bash -git clone https://github.com/openclaw/openclaw-ansible.git -cd openclaw-ansible -# Review playbook.yml and roles/ -ansible-playbook playbook.yml --check --diff # Dry run -ansible-playbook playbook.yml --ask-become-pass +make reinstall CONFIRM=1 ``` -## Documentation - -- [Configuration Guide](docs/configuration.md) - All configuration options -- [Development Mode](docs/development-mode.md) - Build from source -- [Security Architecture](docs/security.md) - Security details -- [Technical Details](docs/architecture.md) - Architecture overview -- [Troubleshooting](docs/troubleshooting.md) - Common issues -- [Agent Guidelines](AGENTS.md) - AI agent instructions - -## Requirements - -- Debian 11+ or Ubuntu 20.04+ -- Root/sudo access -- Internet connection - -## What Gets Installed - -- Tailscale (mesh VPN) -- UFW firewall (SSH + Tailscale ports only) -- Docker CE + Compose V2 (for sandboxes) -- Node.js 22.x + pnpm -- OpenClaw on host (not containerized) -- Systemd service (auto-start) - -## Manual Installation - -### Release Mode (Default) - -```bash -# Install dependencies -sudo apt update && sudo apt install -y ansible git - -# Clone repository -git clone https://github.com/openclaw/openclaw-ansible.git -cd openclaw-ansible - -# Install Ansible collections -ansible-galaxy collection install -r requirements.yml - -# Run installation -./run-playbook.sh -``` - -### Development Mode - -Build from source for development: - -```bash -# Same as above, but with development mode flag -./run-playbook.sh -e openclaw_install_mode=development - -# Or directly: -ansible-playbook playbook.yml --ask-become-pass -e openclaw_install_mode=development -``` - -This will: -- Clone openclaw repo to `~/code/openclaw` -- Run `pnpm install` and `pnpm build` -- Symlink binary to `~/.local/bin/openclaw` -- Add development aliases to `.bashrc` - -## Configuration Options - -All configuration variables can be found in [`roles/openclaw/defaults/main.yml`](roles/openclaw/defaults/main.yml). - -You can override them in three ways: - -### 1. Via Command Line - -```bash -ansible-playbook playbook.yml --ask-become-pass \ - -e openclaw_install_mode=development \ - -e "openclaw_ssh_keys=['ssh-ed25519 AAAAC3... user@host']" +## Targets Operativos + +| Target | Propósito | +|---|---| +| `make backup` | Respaldo de estado conocido | +| `make purge CONFIRM=1` | Purga runtime (destructivo) | +| `make install` | Reconciliación enterprise + control-plane | +| `make secrets-refactor` | Genera base de migración de secretos | +| `make cloudflare` | Reconciliación exclusiva del túnel | +| `make auth-sync` | Sincroniza credenciales Codex por perfil/agente | +| `make oauth-login` | Alias legado de auth-sync | +| `make smoke` | Prueba de salud + flujo de cola | +| `make reinstall CONFIRM=1` | Ciclo end-to-end | + +Variables clave: + +- `ENV` +- `INVENTORY` +- `LIMIT` +- `PROFILES` +- `OAUTH_PROVIDER` +- `MODEL_REF` + +## Auth-Sync No Interactivo + +`ops/auth-sync.sh`: + +1. Lee credenciales fuente (por defecto `/home/efra/.codex/*`). +2. Copia credenciales a `/home/openclaw/.codex`. +3. Escribe `auth-profiles.json` por agente en perfiles destino. +4. Fija modelo por perfil con `openclaw --profile models set `. + +Overrides vía `/home/efra/.env`: + +- `EFRA_CODEX_HOME` +- `EFRA_CODEX_AUTH_DEFAULT` +- `EFRA_CODEX_AUTH_ANDREA` + +## Pruebas y Calidad Operativa + +- `make smoke`: salud ingress/control-api + simulación de cola hasta estado terminal. +- `tests/run-tests.sh`: convergencia/verificación/idempotencia en harness Docker. +- `ansible-playbook --syntax-check`: validación de sintaxis de playbooks. + +## Estructura del Repo + +```text +. +├── playbook.yml +├── playbooks/ +├── roles/ +├── control-plane/ +├── inventories/ +├── ops/ +├── docs/ +└── tests/ ``` -### 2. Via Variables File +## Nota Legal Importante (MIT) -```bash -# Create vars.yml -cat > vars.yml << EOF -openclaw_install_mode: development -openclaw_ssh_keys: - - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGxxxxxxxx user@host" - - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAAB... user@host" -openclaw_repo_url: "https://github.com/YOUR_USERNAME/openclaw.git" -openclaw_repo_branch: "feature-branch" -tailscale_authkey: "tskey-auth-xxxxxxxxxxxxx" -EOF - -# Use it -ansible-playbook playbook.yml --ask-become-pass -e @vars.yml -``` - -### 3. Edit Defaults Directly - -Edit `roles/openclaw/defaults/main.yml` before running the playbook. +Sí se puede modificar gran parte del repositorio, documentación y branding. -### Available Variables +Pero **no** se debe eliminar el cumplimiento de licencia MIT en copias sustanciales del software. En la práctica, eso implica mantener los avisos de licencia/copyright aplicables en los artefactos distribuidos. -| Variable | Default | Description | -|----------|---------|-------------| -| `openclaw_user` | `openclaw` | System user name | -| `openclaw_home` | `/home/openclaw` | User home directory | -| `openclaw_install_mode` | `release` | `release` or `development` | -| `openclaw_ssh_keys` | `[]` | List of SSH public keys | -| `openclaw_repo_url` | `https://github.com/openclaw/openclaw.git` | Git repository (dev mode) | -| `openclaw_repo_branch` | `main` | Git branch (dev mode) | -| `tailscale_authkey` | `""` | Tailscale auth key for auto-connect | -| `nodejs_version` | `22.x` | Node.js version to install | +Por eso, se puede crear identidad propia de suite, pero no borrar obligaciones legales de atribución/licencia. -See [`roles/openclaw/defaults/main.yml`](roles/openclaw/defaults/main.yml) for the complete list. +## Compatibilidad y SO -### Common Configuration Examples - -#### SSH Keys for Remote Access - -```bash -ansible-playbook playbook.yml --ask-become-pass \ - -e "openclaw_ssh_keys=['ssh-ed25519 AAAAC3... user@host']" -``` +- Debian +- Ubuntu +- Fedora -#### Development Mode with Custom Repository - -```bash -ansible-playbook playbook.yml --ask-become-pass \ - -e openclaw_install_mode=development \ - -e openclaw_repo_url=https://github.com/YOUR_USERNAME/openclaw.git \ - -e openclaw_repo_branch=feature-branch -``` - -#### Tailscale Auto-Connect - -```bash -ansible-playbook playbook.yml --ask-become-pass \ - -e tailscale_authkey=tskey-auth-xxxxxxxxxxxxx -``` +macOS bare-metal está bloqueado en este repo por política de seguridad operativa. -## License +## Documentación Principal -MIT - see [LICENSE](LICENSE) +- [Architecture](docs/architecture.md) +- [Enterprise Deployment](docs/enterprise-deployment.md) +- [Operations Workflow](docs/operations-workflow.md) +- [Operator Runbook](docs/operator-runbook.md) +- [Stage 2 Control Plane](docs/control-plane-stage2.md) +- [Installed Runtime Layout](docs/architecture-installed-layout.md) +- [Cloudflare Tunnel](docs/cloudflare-tunnel.md) +- [Troubleshooting](docs/troubleshooting.md) -## Support +## Licencia -- OpenClaw: https://github.com/openclaw/openclaw -- This installer: https://github.com/openclaw/openclaw-ansible/issues +MIT. Ver [LICENSE](LICENSE). diff --git a/ansible.cfg b/ansible.cfg new file mode 100644 index 0000000..432de83 --- /dev/null +++ b/ansible.cfg @@ -0,0 +1,10 @@ +[defaults] +roles_path = ./roles +inventory = ./inventories/dev/hosts.yml +interpreter_python = auto_silent +host_key_checking = False +retry_files_enabled = False +stdout_callback = yaml + +[privilege_escalation] +become = True diff --git a/control-plane/.dockerignore b/control-plane/.dockerignore new file mode 100644 index 0000000..840fb73 --- /dev/null +++ b/control-plane/.dockerignore @@ -0,0 +1,4 @@ +node_modules +dist +.git +npm-debug.log diff --git a/control-plane/Dockerfile b/control-plane/Dockerfile new file mode 100644 index 0000000..e53d274 --- /dev/null +++ b/control-plane/Dockerfile @@ -0,0 +1,22 @@ +FROM node:22-bookworm-slim AS deps +WORKDIR /app +COPY package.json ./ +RUN npm install --omit=optional + +FROM node:22-bookworm-slim AS build +WORKDIR /app +COPY --from=deps /app/node_modules ./node_modules +COPY package.json ./ +COPY tsconfig.json tsconfig.build.json ./ +COPY src ./src +RUN npm run build + +FROM node:22-bookworm-slim AS runtime +WORKDIR /app +ENV NODE_ENV=production +COPY --from=deps /app/node_modules ./node_modules +COPY --from=build /app/dist ./dist +COPY sql ./sql +ARG SERVICE=ingress +ENV SERVICE_NAME=${SERVICE} +CMD ["sh", "-c", "node dist/${SERVICE_NAME}/main.js"] diff --git a/control-plane/README.md b/control-plane/README.md new file mode 100644 index 0000000..bd3bdbb --- /dev/null +++ b/control-plane/README.md @@ -0,0 +1,70 @@ +# ClawOps Stage 2 Control Plane + +Control-plane de la suite operativa: microservicios NestJS + NATS JetStream para ruteo multi-agente, persistencia de estados y control de ejecución. + +## Servicios + +- `ingress`: recibe tráfico Telegram/API y publica `tasks.ingress`. +- `router`: clasifica y enruta a `tasks.agent.`. +- `worker`: consume por agente y publica `results.agent.`. +- `broker`: persiste resultados/eventos y puede responder a Telegram. +- `control-api`: consulta tareas, cola y decisiones (`confirm/reject`). + +## Qué Falencia Resuelve + +1. Falta de bus/eventos para tareas multi-agente. +2. Falta de estado persistente de ejecución. +3. Falta de API de control para operaciones y confirmaciones. +4. Falta de trazabilidad de eventos por tarea. + +## Contrato de Mensajes + +### Task envelope + +- `taskId` +- `profile` +- `source.channel/chatId/userId` +- `text` +- `intent` +- `targetAgent` +- `status` + +### Result envelope + +- `taskId` +- `profile` +- `fromAgent` +- `status` +- `summary` +- `fullResponse` +- `needsConfirmation` + +## Ejecución Local + +```bash +pnpm install +pnpm run build +pnpm run start:ingress +pnpm run start:router +pnpm run start:worker +pnpm run start:broker +pnpm run start:control-api +``` + +## Variables de Entorno Relevantes + +- `OPENCLAW_PROFILE` +- `NATS_URL` +- `NATS_STREAM` +- `POSTGRES_URL` +- `WORKER_AGENT_ID` +- `WORKER_EXEC_MODE` +- `OPENCLAW_BIN` +- `OPENCLAW_HOME` +- `OPENCLAW_ENV_FILE` +- `OPENCLAW_UID` +- `OPENCLAW_GID` + +## Nota + +Este paquete se instala y reconcilia desde Ansible (`role: openclaw_control_plane`) y forma parte de la ClawOps Protocol Suite. diff --git a/control-plane/package.json b/control-plane/package.json new file mode 100644 index 0000000..278c453 --- /dev/null +++ b/control-plane/package.json @@ -0,0 +1,33 @@ +{ + "name": "openclaw-control-plane", + "version": "0.1.0", + "private": true, + "description": "NestJS microservices control plane for OpenClaw profiles", + "scripts": { + "build": "tsc -p tsconfig.build.json", + "start:ingress": "node dist/ingress/main.js", + "start:router": "node dist/router/main.js", + "start:worker": "node dist/worker/main.js", + "start:broker": "node dist/broker/main.js", + "start:control-api": "node dist/control-api/main.js", + "lint": "tsc -p tsconfig.build.json --noEmit" + }, + "dependencies": { + "@nestjs/common": "^10.4.2", + "@nestjs/core": "^10.4.2", + "@nestjs/platform-express": "^10.4.2", + "@nestjs/config": "^3.2.3", + "class-transformer": "^0.5.1", + "class-validator": "^0.14.1", + "nats": "^2.29.3", + "pg": "^8.13.1", + "prom-client": "^15.1.3", + "reflect-metadata": "^0.2.2", + "rxjs": "^7.8.1" + }, + "devDependencies": { + "@types/node": "^22.13.8", + "@types/pg": "^8.11.10", + "typescript": "^5.7.3" + } +} diff --git a/control-plane/sql/001_init.sql b/control-plane/sql/001_init.sql new file mode 100644 index 0000000..43b8b10 --- /dev/null +++ b/control-plane/sql/001_init.sql @@ -0,0 +1,29 @@ +CREATE TABLE IF NOT EXISTS tasks ( + task_id TEXT PRIMARY KEY, + profile TEXT NOT NULL, + source_channel TEXT, + chat_id TEXT, + user_id TEXT, + intent TEXT, + target_agent TEXT, + status TEXT NOT NULL, + needs_confirmation BOOLEAN NOT NULL DEFAULT FALSE, + text_payload TEXT, + summary TEXT, + result_payload JSONB, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE TABLE IF NOT EXISTS task_events ( + id BIGSERIAL PRIMARY KEY, + task_id TEXT NOT NULL, + profile TEXT NOT NULL, + event_type TEXT NOT NULL, + from_agent TEXT, + payload JSONB, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_tasks_updated_at ON tasks(updated_at DESC); +CREATE INDEX IF NOT EXISTS idx_events_task_id ON task_events(task_id); diff --git a/control-plane/src/broker/broker.module.ts b/control-plane/src/broker/broker.module.ts new file mode 100644 index 0000000..852faa9 --- /dev/null +++ b/control-plane/src/broker/broker.module.ts @@ -0,0 +1,8 @@ +import { Module } from '@nestjs/common'; + +import { BrokerRunner } from './broker.runner'; + +@Module({ + providers: [BrokerRunner] +}) +export class BrokerModule {} diff --git a/control-plane/src/broker/broker.runner.ts b/control-plane/src/broker/broker.runner.ts new file mode 100644 index 0000000..bb69cc1 --- /dev/null +++ b/control-plane/src/broker/broker.runner.ts @@ -0,0 +1,146 @@ +import { Injectable, Logger, OnModuleDestroy, OnModuleInit } from '@nestjs/common'; +import type { ConsumerMessages, NatsConnection } from 'nats'; +import type { Pool } from 'pg'; + +import { loadConfig } from '../common/config'; +import { type TaskResult } from '../common/contracts'; +import { type ServiceMetrics, initMetrics, startMetricsServer } from '../common/metrics'; +import { connectNats, decodeJson, ensureConsumer, ensureStream } from '../common/nats'; +import { createPgPool, migrate } from '../common/postgres'; + +@Injectable() +export class BrokerRunner implements OnModuleInit, OnModuleDestroy { + private readonly logger = new Logger(BrokerRunner.name); + private readonly cfg = loadConfig('broker'); + private nc: NatsConnection | null = null; + private pg: Pool | null = null; + private messages: ConsumerMessages | null = null; + private metrics: ServiceMetrics | null = null; + private metricsServer: ReturnType | null = null; + + async onModuleInit(): Promise { + this.metrics = initMetrics('broker'); + this.metricsServer = startMetricsServer(this.cfg.metricsPort, this.metrics.registry); + + this.pg = createPgPool(this.cfg.pgUrl); + await migrate(this.pg); + + this.nc = await connectNats(this.cfg.natsUrl); + await ensureStream(this.nc, this.cfg.natsStream); + const consumer = await ensureConsumer(this.nc, this.cfg.natsStream, `${this.cfg.profile}-broker`, 'results.agent.*'); + this.messages = await consumer.consume(); + + this.run().catch((error: unknown) => this.logger.error(`Broker loop failed: ${String(error)}`)); + + this.logger.log('Broker started'); + } + + async onModuleDestroy(): Promise { + this.messages?.close(); + await this.nc?.drain(); + await this.pg?.end(); + this.metricsServer?.close(); + } + + private async run(): Promise { + if (!this.messages || !this.nc || !this.pg) { + throw new Error('Broker is not initialized'); + } + + for await (const msg of this.messages) { + try { + const result = decodeJson(msg.data); + await this.persist(result); + await this.maybeSendTelegram(result); + + this.metrics?.handledMessages.inc(); + msg.ack(); + } catch (error) { + this.metrics?.failedMessages.inc(); + msg.nak(); + this.logger.error(`Broker failed to process result: ${String(error)}`); + } + } + } + + private async persist(result: TaskResult): Promise { + if (!this.pg) { + return; + } + + await this.pg.query( + ` + INSERT INTO tasks ( + task_id, profile, source_channel, chat_id, user_id, intent, target_agent, + status, needs_confirmation, summary, result_payload, updated_at + ) VALUES ($1, $2, $3, $4, $5, NULL, $6, $7, $8, $9, $10::jsonb, NOW()) + ON CONFLICT (task_id) + DO UPDATE SET + status = EXCLUDED.status, + needs_confirmation = EXCLUDED.needs_confirmation, + summary = EXCLUDED.summary, + result_payload = EXCLUDED.result_payload, + target_agent = EXCLUDED.target_agent, + updated_at = NOW(); + `, + [ + result.taskId, + result.profile, + result.source?.channel ?? 'telegram', + result.source?.chatId ?? null, + result.source?.userId ?? null, + result.fromAgent, + result.status, + result.needsConfirmation, + result.summary, + JSON.stringify(result) + ] + ); + + await this.pg.query( + ` + INSERT INTO task_events (task_id, profile, event_type, from_agent, payload) + VALUES ($1, $2, $3, $4, $5::jsonb) + `, + [result.taskId, result.profile, 'result', result.fromAgent, JSON.stringify(result)] + ); + } + + private async maybeSendTelegram(result: TaskResult): Promise { + if (!this.cfg.telegramBotToken) { + return; + } + + const chatId = result.source?.chatId ?? this.cfg.telegramDefaultChatId; + if (!chatId) { + return; + } + + const lines = [ + `[agent=${result.fromAgent}] [task=${result.taskId}]`, + result.summary, + result.fullResponse + ]; + + if (result.needsConfirmation) { + lines.push(`Accion pendiente. Responde: confirmar ${result.taskId} o rechazar ${result.taskId}`); + } + + const response = await fetch( + `https://api.telegram.org/bot${this.cfg.telegramBotToken}/sendMessage`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + chat_id: chatId, + text: lines.join('\n') + }) + } + ); + + if (!response.ok) { + const body = await response.text(); + this.logger.warn(`Telegram send failed: ${response.status} ${body}`); + } + } +} diff --git a/control-plane/src/broker/main.ts b/control-plane/src/broker/main.ts new file mode 100644 index 0000000..cdc5d22 --- /dev/null +++ b/control-plane/src/broker/main.ts @@ -0,0 +1,14 @@ +import 'reflect-metadata'; +import { NestFactory } from '@nestjs/core'; + +import { BrokerModule } from './broker.module'; + +async function bootstrap(): Promise { + await NestFactory.createApplicationContext(BrokerModule, { bufferLogs: true }); +} + +bootstrap().catch((error: unknown) => { + // eslint-disable-next-line no-console + console.error(error); + process.exit(1); +}); diff --git a/control-plane/src/common/config.ts b/control-plane/src/common/config.ts new file mode 100644 index 0000000..255dbab --- /dev/null +++ b/control-plane/src/common/config.ts @@ -0,0 +1,51 @@ +export interface AppConfig { + serviceName: string; + profile: string; + natsUrl: string; + natsStream: string; + metricsPort: number; + pgUrl: string; + telegramBotToken: string; + telegramDefaultChatId: string; + routerForcedAgent: string; + workerAgentId: string; + workerExecMode: string; + openclawBin: string; + openclawHome: string; + openclawEnvFile: string; + openclawGatewayToken: string; + openclawTimeoutMs: number; + openclawBundledPluginsDir: string; +} + +function intFromEnv(name: string, fallback: number): number { + const value = process.env[name]; + if (!value) { + return fallback; + } + const parsed = Number.parseInt(value, 10); + return Number.isNaN(parsed) ? fallback : parsed; +} + +export function loadConfig(serviceName: string): AppConfig { + return { + serviceName, + profile: process.env.OPENCLAW_PROFILE ?? 'efra-core', + natsUrl: process.env.NATS_URL ?? 'nats://nats:4222', + natsStream: process.env.NATS_STREAM ?? 'OPENCLAW_TASKS', + metricsPort: intFromEnv('METRICS_PORT', 9400), + pgUrl: process.env.POSTGRES_URL ?? 'postgres://openclaw:openclaw@postgres:5432/openclaw_control', + telegramBotToken: process.env.TELEGRAM_BOT_TOKEN ?? '', + telegramDefaultChatId: process.env.TELEGRAM_DEFAULT_CHAT_ID ?? '', + routerForcedAgent: process.env.ROUTER_FORCED_AGENT ?? '', + workerAgentId: process.env.WORKER_AGENT_ID ?? 'main', + workerExecMode: process.env.WORKER_EXEC_MODE ?? 'stub', + openclawBin: process.env.OPENCLAW_BIN ?? '/home/openclaw/.local/bin/openclaw', + openclawHome: process.env.OPENCLAW_HOME ?? '/home/openclaw', + openclawEnvFile: process.env.OPENCLAW_ENV_FILE ?? `/etc/openclaw/secrets/${process.env.OPENCLAW_PROFILE ?? 'dev-main'}.env`, + openclawGatewayToken: process.env.OPENCLAW_GATEWAY_TOKEN ?? '', + openclawTimeoutMs: intFromEnv('OPENCLAW_TIMEOUT_MS', 120000), + openclawBundledPluginsDir: + process.env.OPENCLAW_BUNDLED_PLUGINS_DIR ?? '/home/openclaw/.openclaw/bundled-extensions' + }; +} diff --git a/control-plane/src/common/contracts.ts b/control-plane/src/common/contracts.ts new file mode 100644 index 0000000..59814bc --- /dev/null +++ b/control-plane/src/common/contracts.ts @@ -0,0 +1,59 @@ +export type TaskStatus = + | 'NEW' + | 'ROUTED' + | 'RUNNING' + | 'WAITING_CONFIRMATION' + | 'DONE' + | 'FAILED' + | 'DEAD_LETTER'; + +export interface TaskEnvelope { + taskId: string; + profile: string; + source: { + channel: 'telegram' | 'api' | 'system'; + chatId?: string; + userId?: string; + username?: string; + }; + text: string; + intent?: string; + targetAgent?: string; + priority?: number; + budgetTokens?: number; + status: TaskStatus; + metadata?: Record; + createdAt: string; +} + +export interface TaskResult { + taskId: string; + profile: string; + fromAgent: string; + status: Extract; + summary: string; + fullResponse: string; + needsConfirmation: boolean; + suggestedAction?: string; + tokenUsage?: number; + costEstimate?: number; + source?: TaskEnvelope['source']; + createdAt: string; +} + +export interface ConfirmationCommand { + taskId: string; + profile: string; + decision: 'confirm' | 'reject'; + note?: string; + actor: string; + createdAt: string; +} + +export interface QueueStats { + stream: string; + messages: number; + bytes: number; + firstSeq: number; + lastSeq: number; +} diff --git a/control-plane/src/common/intents.ts b/control-plane/src/common/intents.ts new file mode 100644 index 0000000..0c18c7d --- /dev/null +++ b/control-plane/src/common/intents.ts @@ -0,0 +1,65 @@ +interface RoutingRule { + intent: string; + agent: string; + description: string; + keywords: string[]; +} + +const ROUTING_RULES: RoutingRule[] = [ + { + intent: 'browser.login', + agent: 'browser-login', + description: 'Login/OAuth and managed browser operations', + keywords: ['login', 'browser', 'portal', 'cookie', 'captcha'] + }, + { + intent: 'deploy.coolify', + agent: 'coolify-ops', + description: 'Coolify service lifecycle and deployment operations', + keywords: ['coolify', 'deploy', 'release', 'rollback', 'service up', 'service down'] + }, + { + intent: 'research.analysis', + agent: 'research', + description: 'Research, comparisons, and technical analysis', + keywords: ['investiga', 'analiza', 'research', 'comparar', 'resumen', 'benchmark'] + } +]; + +export function classifyIntent(text: string): { intent: string; targetAgent: string } { + const lowered = text.toLowerCase(); + for (const rule of ROUTING_RULES) { + if (rule.keywords.some((word) => lowered.includes(word))) { + return { intent: rule.intent, targetAgent: rule.agent }; + } + } + + return { intent: 'general.main', targetAgent: 'main' }; +} + +export function actionNeedsConfirmation(text: string): boolean { + const lowered = text.toLowerCase(); + return ['delete', 'drop', 'destroy', 'stop', 'down', 'wipe', 'rm -rf', 'shutdown'].some((token) => + lowered.includes(token) + ); +} + +export function listAvailableAgents(): Array<{ id: string; intent: string; description: string }> { + const agents = new Map(); + + agents.set('main', { + id: 'main', + intent: 'general.main', + description: 'General coordinator and fallback' + }); + + for (const rule of ROUTING_RULES) { + agents.set(rule.agent, { + id: rule.agent, + intent: rule.intent, + description: rule.description + }); + } + + return Array.from(agents.values()); +} diff --git a/control-plane/src/common/metrics.ts b/control-plane/src/common/metrics.ts new file mode 100644 index 0000000..0b4ec8f --- /dev/null +++ b/control-plane/src/common/metrics.ts @@ -0,0 +1,49 @@ +import http from 'node:http'; +import { Counter, Registry, collectDefaultMetrics } from 'prom-client'; + +export interface ServiceMetrics { + registry: Registry; + handledMessages: Counter; + failedMessages: Counter; +} + +export function initMetrics(service: string): ServiceMetrics { + const registry = new Registry(); + collectDefaultMetrics({ register: registry, prefix: `${service}_` }); + + const handledMessages = new Counter({ + name: `${service}_handled_messages_total`, + help: `Handled messages by ${service}`, + registers: [registry] + }); + + const failedMessages = new Counter({ + name: `${service}_failed_messages_total`, + help: `Failed messages by ${service}`, + registers: [registry] + }); + + return { registry, handledMessages, failedMessages }; +} + +export function startMetricsServer(port: number, registry: Registry): http.Server { + const server = http.createServer(async (_req, res) => { + if (_req.url === '/health') { + res.statusCode = 200; + res.end('ok'); + return; + } + + if (_req.url === '/metrics') { + res.setHeader('Content-Type', registry.contentType); + res.end(await registry.metrics()); + return; + } + + res.statusCode = 404; + res.end('not found'); + }); + + server.listen(port, '0.0.0.0'); + return server; +} diff --git a/control-plane/src/common/nats.ts b/control-plane/src/common/nats.ts new file mode 100644 index 0000000..3d6bb19 --- /dev/null +++ b/control-plane/src/common/nats.ts @@ -0,0 +1,89 @@ +import { + AckPolicy, + type ConnectionOptions, + DeliverPolicy, + ReplayPolicy, + RetentionPolicy, + StringCodec, + connect, + type Consumer, + type NatsConnection +} from 'nats'; + +const sc = StringCodec(); + +export async function connectNats(servers: string): Promise { + const options: ConnectionOptions = { + servers: [] + }; + + options.servers = servers + .split(',') + .map((v) => v.trim()) + .filter((v) => v.length > 0) + .map((raw) => { + const url = new URL(raw.includes('://') ? raw : `nats://${raw}`); + + if (url.username && options.user === undefined) { + options.user = decodeURIComponent(url.username); + } + if (url.password && options.pass === undefined) { + options.pass = decodeURIComponent(url.password); + } + + url.username = ''; + url.password = ''; + + return `${url.protocol}//${url.host}`; + }); + + return connect(options); +} + +export async function ensureStream(nc: NatsConnection, streamName: string): Promise { + const jsm = await nc.jetstreamManager(); + + try { + await jsm.streams.info(streamName); + } catch { + await jsm.streams.add({ + name: streamName, + subjects: ['tasks.>', 'results.>', 'control.>'], + retention: RetentionPolicy.Limits, + max_age: 7 * 24 * 60 * 60 * 1_000_000_000 + }); + } +} + +export async function ensureConsumer( + nc: NatsConnection, + streamName: string, + durableName: string, + filterSubject: string +): Promise { + const jsm = await nc.jetstreamManager(); + + try { + await jsm.consumers.info(streamName, durableName); + } catch { + await jsm.consumers.add(streamName, { + durable_name: durableName, + ack_policy: AckPolicy.Explicit, + deliver_policy: DeliverPolicy.All, + filter_subject: filterSubject, + max_ack_pending: 200, + replay_policy: ReplayPolicy.Instant + }); + } + + const js = nc.jetstream(); + return js.consumers.get(streamName, durableName); +} + +export function encodeJson(payload: unknown): Uint8Array { + return sc.encode(JSON.stringify(payload)); +} + +export function decodeJson(payload: Uint8Array): T { + return JSON.parse(sc.decode(payload)) as T; +} diff --git a/control-plane/src/common/postgres.ts b/control-plane/src/common/postgres.ts new file mode 100644 index 0000000..1b476cc --- /dev/null +++ b/control-plane/src/common/postgres.ts @@ -0,0 +1,39 @@ +import { Pool } from 'pg'; + +export function createPgPool(connectionString: string): Pool { + return new Pool({ connectionString }); +} + +export async function migrate(pool: Pool): Promise { + await pool.query(` + CREATE TABLE IF NOT EXISTS tasks ( + task_id TEXT PRIMARY KEY, + profile TEXT NOT NULL, + source_channel TEXT, + chat_id TEXT, + user_id TEXT, + intent TEXT, + target_agent TEXT, + status TEXT NOT NULL, + needs_confirmation BOOLEAN NOT NULL DEFAULT FALSE, + text_payload TEXT, + summary TEXT, + result_payload JSONB, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + ); + + CREATE TABLE IF NOT EXISTS task_events ( + id BIGSERIAL PRIMARY KEY, + task_id TEXT NOT NULL, + profile TEXT NOT NULL, + event_type TEXT NOT NULL, + from_agent TEXT, + payload JSONB, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + ); + + CREATE INDEX IF NOT EXISTS idx_tasks_updated_at ON tasks(updated_at DESC); + CREATE INDEX IF NOT EXISTS idx_events_task_id ON task_events(task_id); + `); +} diff --git a/control-plane/src/control-api/control.controller.ts b/control-plane/src/control-api/control.controller.ts new file mode 100644 index 0000000..cde1294 --- /dev/null +++ b/control-plane/src/control-api/control.controller.ts @@ -0,0 +1,46 @@ +import { Body, Controller, Get, Param, ParseIntPipe, Post, Query } from '@nestjs/common'; + +import { ControlService } from './control.service'; + +@Controller() +export class ControlController { + constructor(private readonly controlService: ControlService) {} + + @Get('/health') + health(): { ok: true } { + return { ok: true }; + } + + @Get('/tasks') + async tasks(@Query('limit', new ParseIntPipe({ optional: true })) limit?: number): Promise { + return this.controlService.listTasks(limit ?? 100); + } + + @Get('/tasks/:taskId') + async task(@Param('taskId') taskId: string): Promise { + return this.controlService.getTask(taskId); + } + + @Post('/tasks/:taskId/confirm') + async confirm( + @Param('taskId') taskId: string, + @Body() body: { actor?: string; note?: string } + ): Promise<{ ok: true }> { + await this.controlService.setDecision(taskId, 'confirm', body.actor ?? 'operator', body.note); + return { ok: true }; + } + + @Post('/tasks/:taskId/reject') + async reject( + @Param('taskId') taskId: string, + @Body() body: { actor?: string; note?: string } + ): Promise<{ ok: true }> { + await this.controlService.setDecision(taskId, 'reject', body.actor ?? 'operator', body.note); + return { ok: true }; + } + + @Get('/queues') + async queues(): Promise { + return this.controlService.queueStats(); + } +} diff --git a/control-plane/src/control-api/control.module.ts b/control-plane/src/control-api/control.module.ts new file mode 100644 index 0000000..a904782 --- /dev/null +++ b/control-plane/src/control-api/control.module.ts @@ -0,0 +1,10 @@ +import { Module } from '@nestjs/common'; + +import { ControlController } from './control.controller'; +import { ControlService } from './control.service'; + +@Module({ + controllers: [ControlController], + providers: [ControlService] +}) +export class ControlModule {} diff --git a/control-plane/src/control-api/control.service.ts b/control-plane/src/control-api/control.service.ts new file mode 100644 index 0000000..fa3fc67 --- /dev/null +++ b/control-plane/src/control-api/control.service.ts @@ -0,0 +1,110 @@ +import { Injectable, Logger, OnModuleDestroy, OnModuleInit } from '@nestjs/common'; +import type { NatsConnection } from 'nats'; +import type { Pool } from 'pg'; + +import { type ConfirmationCommand, type QueueStats } from '../common/contracts'; +import { loadConfig } from '../common/config'; +import { connectNats, encodeJson } from '../common/nats'; +import { createPgPool, migrate } from '../common/postgres'; + +@Injectable() +export class ControlService implements OnModuleInit, OnModuleDestroy { + private readonly logger = new Logger(ControlService.name); + private readonly cfg = loadConfig('control_api'); + private pg: Pool | null = null; + private nc: NatsConnection | null = null; + + async onModuleInit(): Promise { + this.pg = createPgPool(this.cfg.pgUrl); + await migrate(this.pg); + + this.nc = await connectNats(this.cfg.natsUrl); + this.logger.log('Control API ready'); + } + + async onModuleDestroy(): Promise { + await this.nc?.drain(); + await this.pg?.end(); + } + + async listTasks(limit = 100): Promise { + const safeLimit = Math.max(1, Math.min(limit, 500)); + const result = await this.pg?.query( + `SELECT task_id, profile, source_channel, chat_id, user_id, target_agent, status, needs_confirmation, summary, updated_at + FROM tasks + ORDER BY updated_at DESC + LIMIT $1`, + [safeLimit] + ); + + return result?.rows ?? []; + } + + async getTask(taskId: string): Promise { + const result = await this.pg?.query( + `SELECT task_id, profile, source_channel, chat_id, user_id, target_agent, status, needs_confirmation, summary, result_payload, updated_at + FROM tasks + WHERE task_id = $1`, + [taskId] + ); + + return result?.rows?.[0] ?? null; + } + + async setDecision(taskId: string, decision: 'confirm' | 'reject', actor: string, note?: string): Promise { + const command: ConfirmationCommand = { + taskId, + profile: this.cfg.profile, + decision, + note, + actor, + createdAt: new Date().toISOString() + }; + + const nextStatus = decision === 'confirm' ? 'DONE' : 'FAILED'; + + const updated = await this.pg?.query( + ` + UPDATE tasks + SET status = $1, + needs_confirmation = FALSE, + updated_at = NOW() + WHERE task_id = $2 + AND profile = $3 + AND needs_confirmation = TRUE + `, + [nextStatus, taskId, this.cfg.profile] + ); + + if ((updated?.rowCount ?? 0) === 0) { + this.logger.warn(`Decision ${decision} for task ${taskId} did not match a pending confirmation row`); + } + + this.nc?.publish(`control.${decision}.${this.cfg.profile}`, encodeJson(command)); + + await this.pg?.query( + ` + INSERT INTO task_events (task_id, profile, event_type, from_agent, payload) + VALUES ($1, $2, $3, $4, $5::jsonb) + `, + [taskId, this.cfg.profile, `decision_${decision}`, actor, JSON.stringify({ ...command, status: nextStatus })] + ); + } + + async queueStats(): Promise { + if (!this.nc) { + return null; + } + + const jsm = await this.nc.jetstreamManager(); + const info = await jsm.streams.info(this.cfg.natsStream); + + return { + stream: info.config.name, + messages: info.state.messages, + bytes: info.state.bytes, + firstSeq: info.state.first_seq, + lastSeq: info.state.last_seq + }; + } +} diff --git a/control-plane/src/control-api/main.ts b/control-plane/src/control-api/main.ts new file mode 100644 index 0000000..940ae35 --- /dev/null +++ b/control-plane/src/control-api/main.ts @@ -0,0 +1,28 @@ +import 'reflect-metadata'; +import { Logger } from '@nestjs/common'; +import { NestFactory } from '@nestjs/core'; + +import { loadConfig } from '../common/config'; +import { initMetrics, startMetricsServer } from '../common/metrics'; +import { ControlModule } from './control.module'; + +async function bootstrap(): Promise { + const cfg = loadConfig('control_api'); + const logger = new Logger('ControlApiMain'); + + const app = await NestFactory.create(ControlModule, { bufferLogs: true }); + const port = Number.parseInt(process.env.HTTP_PORT ?? '39090', 10); + await app.listen(port, '0.0.0.0'); + + const metrics = initMetrics('control_api'); + startMetricsServer(cfg.metricsPort, metrics.registry); + + logger.log(`Control API listening on :${port}`); + logger.log(`Metrics listening on :${cfg.metricsPort}`); +} + +bootstrap().catch((error: unknown) => { + // eslint-disable-next-line no-console + console.error(error); + process.exit(1); +}); diff --git a/control-plane/src/ingress/ingress.controller.ts b/control-plane/src/ingress/ingress.controller.ts new file mode 100644 index 0000000..4e7fb43 --- /dev/null +++ b/control-plane/src/ingress/ingress.controller.ts @@ -0,0 +1,36 @@ +import { Body, Controller, Get, HttpCode, Post } from '@nestjs/common'; + +import { IngressService } from './ingress.service'; + +@Controller() +export class IngressController { + constructor(private readonly ingressService: IngressService) {} + + @Get('/health') + health(): { ok: true } { + return { ok: true }; + } + + @Post('/telegram/webhook') + @HttpCode(202) + async telegramWebhook(@Body() body: unknown): Promise<{ accepted: true; taskId: string }> { + const result = await this.ingressService.ingestTelegram(body as never); + return { accepted: true, taskId: result.taskId }; + } + + @Post('/ingress/simulate') + @HttpCode(202) + async simulate( + @Body() body: { text: string; chatId?: string; userId?: string; username?: string } + ): Promise<{ accepted: true; taskId: string }> { + const result = await this.ingressService.ingestTelegram({ + message: { + text: body.text, + chat: { id: body.chatId ?? 'local-sim' }, + from: { id: body.userId ?? 'local-user', username: body.username ?? 'simulator' } + } + }); + + return { accepted: true, taskId: result.taskId }; + } +} diff --git a/control-plane/src/ingress/ingress.module.ts b/control-plane/src/ingress/ingress.module.ts new file mode 100644 index 0000000..b6c682d --- /dev/null +++ b/control-plane/src/ingress/ingress.module.ts @@ -0,0 +1,10 @@ +import { Module } from '@nestjs/common'; + +import { IngressController } from './ingress.controller'; +import { IngressService } from './ingress.service'; + +@Module({ + controllers: [IngressController], + providers: [IngressService] +}) +export class IngressModule {} diff --git a/control-plane/src/ingress/ingress.service.ts b/control-plane/src/ingress/ingress.service.ts new file mode 100644 index 0000000..6892998 --- /dev/null +++ b/control-plane/src/ingress/ingress.service.ts @@ -0,0 +1,118 @@ +import { Injectable, Logger, OnModuleDestroy, OnModuleInit } from '@nestjs/common'; +import { randomUUID } from 'node:crypto'; +import type { NatsConnection } from 'nats'; + +import { type AppConfig, loadConfig } from '../common/config'; +import { type TaskEnvelope } from '../common/contracts'; +import { listAvailableAgents } from '../common/intents'; +import { connectNats, encodeJson, ensureStream } from '../common/nats'; + +interface TelegramUpdate { + message?: { + text?: string; + chat?: { id?: number | string }; + from?: { id?: number | string; username?: string }; + }; +} + +@Injectable() +export class IngressService implements OnModuleInit, OnModuleDestroy { + private readonly logger = new Logger(IngressService.name); + private readonly cfg: AppConfig = loadConfig('ingress'); + private nc: NatsConnection | null = null; + + async onModuleInit(): Promise { + this.nc = await connectNats(this.cfg.natsUrl); + await ensureStream(this.nc, this.cfg.natsStream); + this.logger.log(`Connected to NATS at ${this.cfg.natsUrl}`); + } + + async onModuleDestroy(): Promise { + await this.nc?.drain(); + } + + async ingestTelegram(body: TelegramUpdate): Promise<{ taskId: string }> { + if (!this.nc) { + throw new Error('NATS is not ready'); + } + + const text = body.message?.text?.trim() ?? ''; + const chatId = body.message?.chat?.id ? String(body.message.chat.id) : this.cfg.telegramDefaultChatId; + const userId = body.message?.from?.id ? String(body.message.from.id) : undefined; + const username = body.message?.from?.username; + + if (!text) { + throw new Error('Message text is required'); + } + + if (await this.tryHandleTelegramCommand(text, chatId)) { + return { taskId: `cmd-${randomUUID()}` }; + } + + const task: TaskEnvelope = { + taskId: randomUUID(), + profile: this.cfg.profile, + source: { + channel: 'telegram', + chatId, + userId, + username + }, + text, + status: 'NEW', + priority: 5, + budgetTokens: 4000, + createdAt: new Date().toISOString() + }; + + this.nc.publish('tasks.ingress', encodeJson(task)); + this.logger.log(`Queued task ${task.taskId} for ingress`); + + return { taskId: task.taskId }; + } + + private async tryHandleTelegramCommand(text: string, chatId: string): Promise { + if (!this.isAgentsCommand(text)) { + return false; + } + + if (!this.cfg.telegramBotToken || !chatId) { + return false; + } + + const agents = listAvailableAgents(); + const lines = [ + `Available agents (${this.cfg.profile}):`, + ...agents.map((agent) => `- ${agent.id}: ${agent.description} [${agent.intent}]`), + '', + 'Usage: send a normal request and the router will select the target by intent.' + ]; + + await this.sendTelegramMessage(chatId, lines.join('\n')); + return true; + } + + private isAgentsCommand(text: string): boolean { + const firstToken = text.trim().split(/\s+/)[0]?.toLowerCase() ?? ''; + return firstToken === '/agents' || firstToken.startsWith('/agents@'); + } + + private async sendTelegramMessage(chatId: string, text: string): Promise { + const response = await fetch( + `https://api.telegram.org/bot${this.cfg.telegramBotToken}/sendMessage`, + { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + chat_id: chatId, + text + }) + } + ); + + if (!response.ok) { + const body = await response.text(); + throw new Error(`Telegram send failed: ${response.status} ${body}`); + } + } +} diff --git a/control-plane/src/ingress/main.ts b/control-plane/src/ingress/main.ts new file mode 100644 index 0000000..b639c65 --- /dev/null +++ b/control-plane/src/ingress/main.ts @@ -0,0 +1,29 @@ +import 'reflect-metadata'; +import { Logger } from '@nestjs/common'; +import { NestFactory } from '@nestjs/core'; + +import { loadConfig } from '../common/config'; +import { initMetrics, startMetricsServer } from '../common/metrics'; +import { IngressModule } from './ingress.module'; + +async function bootstrap(): Promise { + const cfg = loadConfig('ingress'); + const logger = new Logger('IngressMain'); + + const app = await NestFactory.create(IngressModule, { bufferLogs: true }); + + const port = Number.parseInt(process.env.HTTP_PORT ?? '3000', 10); + await app.listen(port, '0.0.0.0'); + + const metrics = initMetrics('ingress'); + startMetricsServer(cfg.metricsPort, metrics.registry); + + logger.log(`Ingress service listening on :${port}`); + logger.log(`Metrics listening on :${cfg.metricsPort}`); +} + +bootstrap().catch((error: unknown) => { + // eslint-disable-next-line no-console + console.error(error); + process.exit(1); +}); diff --git a/control-plane/src/router/main.ts b/control-plane/src/router/main.ts new file mode 100644 index 0000000..98abd4c --- /dev/null +++ b/control-plane/src/router/main.ts @@ -0,0 +1,14 @@ +import 'reflect-metadata'; +import { NestFactory } from '@nestjs/core'; + +import { RouterModule } from './router.module'; + +async function bootstrap(): Promise { + await NestFactory.createApplicationContext(RouterModule, { bufferLogs: true }); +} + +bootstrap().catch((error: unknown) => { + // eslint-disable-next-line no-console + console.error(error); + process.exit(1); +}); diff --git a/control-plane/src/router/router.module.ts b/control-plane/src/router/router.module.ts new file mode 100644 index 0000000..c4bcd00 --- /dev/null +++ b/control-plane/src/router/router.module.ts @@ -0,0 +1,8 @@ +import { Module } from '@nestjs/common'; + +import { RouterRunner } from './router.runner'; + +@Module({ + providers: [RouterRunner] +}) +export class RouterModule {} diff --git a/control-plane/src/router/router.runner.ts b/control-plane/src/router/router.runner.ts new file mode 100644 index 0000000..e364ad9 --- /dev/null +++ b/control-plane/src/router/router.runner.ts @@ -0,0 +1,73 @@ +import { Injectable, Logger, OnModuleDestroy, OnModuleInit } from '@nestjs/common'; +import type { ConsumerMessages, NatsConnection } from 'nats'; + +import { loadConfig } from '../common/config'; +import { classifyIntent } from '../common/intents'; +import { type ServiceMetrics, initMetrics, startMetricsServer } from '../common/metrics'; +import { type TaskEnvelope } from '../common/contracts'; +import { connectNats, decodeJson, encodeJson, ensureConsumer, ensureStream } from '../common/nats'; + +@Injectable() +export class RouterRunner implements OnModuleInit, OnModuleDestroy { + private readonly logger = new Logger(RouterRunner.name); + private readonly cfg = loadConfig('router'); + private nc: NatsConnection | null = null; + private messages: ConsumerMessages | null = null; + private metrics: ServiceMetrics | null = null; + private metricsServer: ReturnType | null = null; + + async onModuleInit(): Promise { + this.metrics = initMetrics('router'); + this.metricsServer = startMetricsServer(this.cfg.metricsPort, this.metrics.registry); + + this.nc = await connectNats(this.cfg.natsUrl); + await ensureStream(this.nc, this.cfg.natsStream); + const consumer = await ensureConsumer(this.nc, this.cfg.natsStream, `${this.cfg.profile}-router`, 'tasks.ingress'); + this.messages = await consumer.consume(); + + this.run().catch((error: unknown) => { + this.logger.error(`Router loop failed: ${String(error)}`); + }); + + this.logger.log(`Router running for profile ${this.cfg.profile}`); + } + + async onModuleDestroy(): Promise { + this.messages?.close(); + await this.nc?.drain(); + this.metricsServer?.close(); + } + + private async run(): Promise { + if (!this.messages || !this.nc) { + throw new Error('Router is not initialized'); + } + + for await (const msg of this.messages) { + try { + const task = decodeJson(msg.data); + const forced = this.cfg.routerForcedAgent.trim(); + const routed = forced + ? { intent: `forced.${forced}`, targetAgent: forced } + : classifyIntent(task.text); + + const enrichedTask: TaskEnvelope = { + ...task, + intent: routed.intent, + targetAgent: routed.targetAgent, + status: 'ROUTED' + }; + + this.nc.publish(`tasks.agent.${routed.targetAgent}`, encodeJson(enrichedTask)); + this.nc.publish('tasks.events', encodeJson({ type: 'task_routed', taskId: task.taskId, routed })); + + this.metrics?.handledMessages.inc(); + msg.ack(); + } catch (error) { + this.metrics?.failedMessages.inc(); + msg.nak(); + this.logger.error(`Failed to route task: ${String(error)}`); + } + } + } +} diff --git a/control-plane/src/worker/main.ts b/control-plane/src/worker/main.ts new file mode 100644 index 0000000..3175342 --- /dev/null +++ b/control-plane/src/worker/main.ts @@ -0,0 +1,14 @@ +import 'reflect-metadata'; +import { NestFactory } from '@nestjs/core'; + +import { WorkerModule } from './worker.module'; + +async function bootstrap(): Promise { + await NestFactory.createApplicationContext(WorkerModule, { bufferLogs: true }); +} + +bootstrap().catch((error: unknown) => { + // eslint-disable-next-line no-console + console.error(error); + process.exit(1); +}); diff --git a/control-plane/src/worker/worker.module.ts b/control-plane/src/worker/worker.module.ts new file mode 100644 index 0000000..b473b59 --- /dev/null +++ b/control-plane/src/worker/worker.module.ts @@ -0,0 +1,8 @@ +import { Module } from '@nestjs/common'; + +import { WorkerRunner } from './worker.runner'; + +@Module({ + providers: [WorkerRunner] +}) +export class WorkerModule {} diff --git a/control-plane/src/worker/worker.runner.ts b/control-plane/src/worker/worker.runner.ts new file mode 100644 index 0000000..75736bc --- /dev/null +++ b/control-plane/src/worker/worker.runner.ts @@ -0,0 +1,198 @@ +import { Injectable, Logger, OnModuleDestroy, OnModuleInit } from '@nestjs/common'; +import { execFile } from 'node:child_process'; +import { promisify } from 'node:util'; +import type { ConsumerMessages, NatsConnection } from 'nats'; + +import { loadConfig } from '../common/config'; +import { type TaskEnvelope, type TaskResult } from '../common/contracts'; +import { actionNeedsConfirmation } from '../common/intents'; +import { type ServiceMetrics, initMetrics, startMetricsServer } from '../common/metrics'; +import { connectNats, decodeJson, encodeJson, ensureConsumer, ensureStream } from '../common/nats'; + +const execFileAsync = promisify(execFile); + +@Injectable() +export class WorkerRunner implements OnModuleInit, OnModuleDestroy { + private readonly logger = new Logger(WorkerRunner.name); + private readonly cfg = loadConfig('worker'); + private nc: NatsConnection | null = null; + private messages: ConsumerMessages | null = null; + private metrics: ServiceMetrics | null = null; + private metricsServer: ReturnType | null = null; + + async onModuleInit(): Promise { + this.metrics = initMetrics(`worker_${this.cfg.workerAgentId.replace('-', '_')}`); + this.metricsServer = startMetricsServer(this.cfg.metricsPort, this.metrics.registry); + + this.nc = await connectNats(this.cfg.natsUrl); + await ensureStream(this.nc, this.cfg.natsStream); + + const durable = `${this.cfg.profile}-worker-${this.cfg.workerAgentId}`; + const filter = `tasks.agent.${this.cfg.workerAgentId}`; + const consumer = await ensureConsumer(this.nc, this.cfg.natsStream, durable, filter); + this.messages = await consumer.consume(); + + this.run().catch((error: unknown) => this.logger.error(`Worker loop failed: ${String(error)}`)); + + this.logger.log(`Worker ${this.cfg.workerAgentId} online`); + } + + async onModuleDestroy(): Promise { + this.messages?.close(); + await this.nc?.drain(); + this.metricsServer?.close(); + } + + private async run(): Promise { + if (!this.messages || !this.nc) { + throw new Error('Worker is not initialized'); + } + + for await (const msg of this.messages) { + try { + const task = decodeJson(msg.data); + const result = await this.processTask(task); + this.nc.publish(`results.agent.${this.cfg.workerAgentId}`, encodeJson(result)); + this.metrics?.handledMessages.inc(); + msg.ack(); + } catch (error) { + this.metrics?.failedMessages.inc(); + msg.nak(); + this.logger.error(`Failed task processing: ${String(error)}`); + } + } + } + + private async processTask(task: TaskEnvelope): Promise { + const needsConfirmation = actionNeedsConfirmation(task.text); + + if (needsConfirmation) { + return { + taskId: task.taskId, + profile: task.profile, + fromAgent: this.cfg.workerAgentId, + status: 'WAITING_CONFIRMATION', + summary: `Task ${task.taskId} routed to ${this.cfg.workerAgentId}`, + fullResponse: `Action requires confirmation before execution: ${task.text}`, + needsConfirmation: true, + suggestedAction: `confirmar ${task.taskId}`, + tokenUsage: Math.min(300, task.text.length * 2), + costEstimate: 0, + source: task.source, + createdAt: new Date().toISOString() + }; + } + + if (this.cfg.workerExecMode === 'openclaw') { + return this.processWithOpenClaw(task); + } + + return { + taskId: task.taskId, + profile: task.profile, + fromAgent: this.cfg.workerAgentId, + status: 'DONE', + summary: `Task ${task.taskId} routed to ${this.cfg.workerAgentId}`, + fullResponse: `Processed by ${this.cfg.workerAgentId}: ${task.text}`, + needsConfirmation: false, + tokenUsage: Math.min(300, task.text.length * 2), + costEstimate: 0, + source: task.source, + createdAt: new Date().toISOString() + }; + } + + private async processWithOpenClaw(task: TaskEnvelope): Promise { + const env = { + ...process.env, + HOME: this.cfg.openclawHome, + OPENCLAW_HOME: this.cfg.openclawHome, + OPENCLAW_GATEWAY_TOKEN: this.cfg.openclawGatewayToken, + OPENCLAW_BUNDLED_PLUGINS_DIR: this.cfg.openclawBundledPluginsDir + }; + + try { + const { stdout } = await execFileAsync( + this.cfg.openclawBin, + [ + '--profile', + this.cfg.profile, + 'agent', + '--agent', + this.cfg.workerAgentId, + '--message', + task.text, + '--json' + ], + { + env, + timeout: this.cfg.openclawTimeoutMs, + maxBuffer: 1024 * 1024 + } + ); + + const parsed = this.extractJson(stdout) as Record; + const payloads = Array.isArray(parsed?.payloads) + ? parsed.payloads + : Array.isArray(parsed?.result?.payloads) + ? parsed.result.payloads + : []; + const text = payloads + .map((item: { text?: string } | undefined) => item?.text ?? '') + .filter((line: string) => line.trim().length > 0) + .join('\n') + .trim(); + + const meta = parsed?.meta ?? parsed?.result?.meta ?? {}; + const totalTokens = Number(meta?.agentMeta?.usage?.total ?? 0); + + return { + taskId: task.taskId, + profile: task.profile, + fromAgent: this.cfg.workerAgentId, + status: 'DONE', + summary: `Task ${task.taskId} handled by OpenClaw agent ${this.cfg.workerAgentId}`, + fullResponse: text || `Agent ${this.cfg.workerAgentId} completed with empty text payload.`, + needsConfirmation: false, + tokenUsage: Number.isFinite(totalTokens) && totalTokens > 0 ? totalTokens : undefined, + costEstimate: 0, + source: task.source, + createdAt: new Date().toISOString() + }; + } catch (error) { + const detail = error instanceof Error ? error.message : String(error); + this.logger.warn(`OpenClaw exec failed for ${task.taskId}: ${detail}`); + + return { + taskId: task.taskId, + profile: task.profile, + fromAgent: this.cfg.workerAgentId, + status: 'FAILED', + summary: `Task ${task.taskId} failed in OpenClaw agent ${this.cfg.workerAgentId}`, + fullResponse: `Agent execution failed: ${detail}`, + needsConfirmation: false, + costEstimate: 0, + source: task.source, + createdAt: new Date().toISOString() + }; + } + } + + private extractJson(stdout: string): Record { + const trimmed = stdout.trim(); + if (!trimmed) { + return {}; + } + + try { + return JSON.parse(trimmed) as Record; + } catch { + const start = trimmed.indexOf('{'); + const end = trimmed.lastIndexOf('}'); + if (start >= 0 && end > start) { + return JSON.parse(trimmed.slice(start, end + 1)) as Record; + } + throw new Error('openclaw returned non-JSON output'); + } + } +} diff --git a/control-plane/tsconfig.build.json b/control-plane/tsconfig.build.json new file mode 100644 index 0000000..eacde2c --- /dev/null +++ b/control-plane/tsconfig.build.json @@ -0,0 +1,7 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "noEmit": false + }, + "exclude": ["node_modules", "dist", "**/*.spec.ts"] +} diff --git a/control-plane/tsconfig.json b/control-plane/tsconfig.json new file mode 100644 index 0000000..7ca417a --- /dev/null +++ b/control-plane/tsconfig.json @@ -0,0 +1,22 @@ +{ + "compilerOptions": { + "module": "commonjs", + "declaration": false, + "removeComments": false, + "emitDecoratorMetadata": true, + "experimentalDecorators": true, + "allowSyntheticDefaultImports": true, + "target": "ES2022", + "sourceMap": true, + "outDir": "./dist", + "baseUrl": "./", + "incremental": false, + "skipLibCheck": true, + "strict": true, + "moduleResolution": "node", + "esModuleInterop": true, + "types": ["node"] + }, + "include": ["src/**/*.ts"], + "exclude": ["node_modules", "dist"] +} diff --git a/docs/architecture-installed-layout.md b/docs/architecture-installed-layout.md new file mode 100644 index 0000000..daff9f0 --- /dev/null +++ b/docs/architecture-installed-layout.md @@ -0,0 +1,234 @@ +--- +title: Installed Runtime Layout (dev-main + efra-core) +summary: Detailed installed system structure, runtime topology, and permissions for OpenClaw enterprise profile dev-main and control-plane profile efra-core. +--- + +# Installed Runtime Layout (dev-main + efra-core) + +This document describes how Ansible leaves the system installed for: + +- Gateway enterprise profile: `dev-main` +- Control-plane profile (full mode): `efra-core` + +## 1) Disk Layout + Permissions (Detailed) + +```mermaid +flowchart LR + subgraph SRC["Ansible source (/home/efra/openclaw-ansible)"] + INV["inventories/dev/group_vars/all.yml"] + ER["role: openclaw_enterprise"] + CR["role: openclaw_control_plane"] + AS["ops/auth-sync.sh (make auth-sync)"] + end + + subgraph ETC["System artifacts (/etc + systemd)"] + EROOT["/etc/openclaw\\n750 root:openclaw"] + ESEC["/etc/openclaw/secrets\\n750 root:openclaw"] + EDEV["/etc/openclaw/secrets/dev-main.env\\n640 root:openclaw"] + EAND["/etc/openclaw/secrets/andrea.env\\n640 root:openclaw"] + UDEV["/etc/systemd/system/openclaw-gateway-dev-main.service\\n644 root:root"] + UAND["/etc/systemd/system/openclaw-gateway-andrea.service\\n644 root:root"] + end + + subgraph GW["Gateway profile dev-main (/home/openclaw/.openclaw-dev-main)"] + GROOT["state dir\\n755 openclaw:openclaw"] + GCFG["openclaw.json\\n600 openclaw:openclaw"] + GAG["agents/\\n755 openclaw:openclaw"] + + AMAIN["agents/main/agent\\n700 openclaw:openclaw"] + ARES["agents/research/agent\\n700 openclaw:openclaw"] + ABRO["agents/browser-login/agent\\n700 openclaw:openclaw"] + ACOO["agents/coolify-ops/agent\\n700 openclaw:openclaw"] + + PMAIN["auth-profiles.json (main)\\n600 openclaw:openclaw"] + PRES["auth-profiles.json (research)\\n600 openclaw:openclaw"] + PBRO["auth-profiles.json (browser-login)\\n600 openclaw:openclaw"] + PCOO["auth-profiles.json (coolify-ops)\\n600 openclaw:openclaw"] + + WMAIN["workspace\\n755 openclaw:openclaw"] + WRES["workspace-research\\n755 openclaw:clock"] + WBRO["workspace-browser-login\\n755 openclaw:openclaw"] + WCOO["workspace-coolify-ops\\n755 openclaw:clock"] + + SMAIN["agents/main/sessions\\n755 openclaw:openclaw"] + GWPROC["systemd: openclaw-gateway-dev-main\\nUser=openclaw Group=openclaw\\nbind 127.0.0.1:19011"] + end + + subgraph COD["Codex creds path"] + ECOD["/home/efra/.codex\\n(source creds)"] + OCOD["/home/openclaw/.codex\\n700 openclaw:openclaw"] + OAUTH1["/home/openclaw/.codex/auth.json\\n600 openclaw:openclaw"] + OAUTH2["/home/openclaw/.codex/auth-andrea.json\\n600 openclaw:openclaw"] + end + + subgraph CPSRC["Control-plane build source"] + CPROOT["/opt/openclaw/control-plane/source\\n755 efra:efra"] + end + + subgraph CP["Control-plane profile efra-core (/home/efra/openclaw-control-plane/efra-core)"] + CPDIR["project dir\\n755 efra:efra"] + CPENV[".env\\n640 efra:efra"] + CPC["docker-compose.yml\\n644 efra:efra"] + CPP["prometheus/prometheus.yml\\n644 efra:efra"] + CPG["grafana/provisioning/datasources/datasource.yml\\n644 efra:efra"] + + DROOT["data/\\n755 efra:efra"] + DNATS["data/nats\\n755 root:root"] + DPG["data/postgres\\n700 uid70:root"] + DPROM["data/prometheus\\n755 root:root"] + DGRA["data/grafana\\n755 root:root"] + DUK["data/uptime-kuma\\n755 root:root"] + end + + subgraph RT["Docker runtime (project ocp-efra-core)"] + ING["ingress\\n127.0.0.1:30101->3000"] + API["control-api\\n127.0.0.1:39101->39090"] + NATS["nats\\n127.0.0.1:14222->4222"] + PG["postgres"] + ROU["router"] + BRK["broker"] + WM["worker-main"] + WR["worker-research"] + WBL["worker-browser-login\\n(host network + shm 1gb)"] + WCO["worker-coolify-ops"] + PRO["prometheus\\n127.0.0.1:39091->9090"] + GRA["grafana\\n127.0.0.1:31001->3000"] + UK["uptime-kuma\\n127.0.0.1:31081->3001"] + end + + INV --> ER + INV --> CR + INV --> AS + + ER --> EROOT + ER --> ESEC + ER --> EDEV + ER --> UDEV + ER --> GROOT + ER --> GCFG + UDEV --> GWPROC + ESEC --> EDEV + ESEC --> EAND + GROOT --> GAG + GAG --> AMAIN --> PMAIN + GAG --> ARES --> PRES + GAG --> ABRO --> PBRO + GAG --> ACOO --> PCOO + GROOT --> WMAIN + GROOT --> WRES + GROOT --> WBRO + GROOT --> WCOO + GAG --> SMAIN + GCFG --> GWPROC + + ECOD --> AS --> OCOD + AS --> OAUTH1 + AS --> OAUTH2 + AS --> PMAIN + AS --> PRES + AS --> PBRO + AS --> PCOO + AS --> GCFG + + CR --> CPROOT + CR --> CPDIR + CR --> CPENV + CR --> CPC + CR --> CPP + CR --> CPG + CR --> DROOT + DROOT --> DNATS + DROOT --> DPG + DROOT --> DPROM + DROOT --> DGRA + DROOT --> DUK + + CPC --> RT + CPENV --> RT + CPROOT --> RT + + RT --> ING + RT --> API + RT --> NATS + RT --> PG + RT --> ROU + RT --> BRK + RT --> WM + RT --> WR + RT --> WBL + RT --> WCO + RT --> PRO + RT --> GRA + RT --> UK + + WM --> GWPROC + WR --> GWPROC + WBL --> GWPROC + WCO --> GWPROC +``` + +## 2) Runtime Message Flow (full efra-core) + +```mermaid +sequenceDiagram + autonumber + participant TG as Telegram/API client + participant ING as ingress :30101 + participant NATS as NATS JetStream :14222 + participant RT as router + participant WK as worker- + participant OC as openclaw CLI (worker exec mode=openclaw) + participant GW as gateway dev-main :19011 + participant BR as broker + participant PG as Postgres + participant CA as control-api :39101 + + TG->>ING: POST /telegram/webhook or /ingress/simulate + ING->>NATS: publish tasks.ingress + RT->>NATS: consume tasks.ingress + RT->>NATS: publish tasks.agent. + WK->>NATS: consume tasks.agent. + WK->>OC: openclaw --profile dev-main agent --agent + OC->>GW: uses /etc/openclaw/secrets/dev-main.env + GW-->>OC: agent response + WK->>NATS: publish results.agent. + BR->>NATS: consume results.agent.* + BR->>PG: upsert task + events + CA->>PG: GET /tasks/ + BR-->>TG: optional Telegram sendMessage +``` + +## 3) Auth Sync Flow (non-interactive) + +```mermaid +flowchart TB + A["/home/efra/.codex/auth.json (+ auth-andrea.json)\\ncredential source"] --> B["make auth-sync\\nops/auth-sync.sh"] + B --> C["/home/openclaw/.codex\\n700 openclaw:openclaw"] + C --> D["auth.json / auth-andrea.json\\n600 openclaw:openclaw"] + + B --> E["/home/openclaw/.openclaw-dev-main/agents/main/agent/auth-profiles.json\\n600 openclaw:openclaw"] + B --> F[".../agents/research/agent/auth-profiles.json\\n600 openclaw:openclaw"] + B --> G[".../agents/browser-login/agent/auth-profiles.json\\n600 openclaw:openclaw"] + B --> H[".../agents/coolify-ops/agent/auth-profiles.json\\n600 openclaw:openclaw"] + + B --> I["openclaw.json (dev-main)\\nset model: openai-codex/gpt-5.3-codex"] +``` + +## 4) Quick Permission Matrix (critical paths) + +| Path | Mode | Owner:Group | Purpose | +|---|---:|---|---| +| `/etc/openclaw` | `750` | `root:openclaw` | OpenClaw system config root | +| `/etc/openclaw/secrets` | `750` | `root:openclaw` | per-profile env secrets | +| `/etc/openclaw/secrets/dev-main.env` | `640` | `root:openclaw` | gateway/profile runtime secrets | +| `/etc/systemd/system/openclaw-gateway-dev-main.service` | `644` | `root:root` | gateway unit | +| `/home/openclaw/.openclaw-dev-main` | `755` | `openclaw:openclaw` | profile state root | +| `/home/openclaw/.openclaw-dev-main/openclaw.json` | `600` | `openclaw:openclaw` | profile config | +| `/home/openclaw/.openclaw-dev-main/agents/*/agent` | `700` | `openclaw:openclaw` | per-agent private state | +| `/home/openclaw/.openclaw-dev-main/agents/*/agent/auth-profiles.json` | `600` | `openclaw:openclaw` | provider auth store | +| `/home/openclaw/.codex` | `700` | `openclaw:openclaw` | local codex credential mirror | +| `/home/openclaw/.codex/auth*.json` | `600` | `openclaw:openclaw` | codex oauth tokens | +| `/home/efra/openclaw-control-plane/efra-core/.env` | `640` | `efra:efra` | compose secrets/env | +| `/home/efra/openclaw-control-plane/efra-core/data/postgres` | `700` | `uid70:root` | postgres persistent volume | +| `/opt/openclaw/control-plane/source` | `755` | `efra:efra` | service build source synced by ansible | + diff --git a/docs/architecture.md b/docs/architecture.md index 1034660..f54fea2 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -1,132 +1,104 @@ --- -title: Architecture -description: Technical implementation details +title: ClawOps Suite Architecture +summary: Arquitectura técnica de la suite operativa sobre OpenClaw (roles, flujos, capas y controles). --- -# Architecture +# ClawOps Suite Architecture -## Component Overview +## Objetivo Arquitectónico -``` -┌─────────────────────────────────────────┐ -│ UFW Firewall (SSH only) │ -└──────────────┬──────────────────────────┘ - │ -┌──────────────┴──────────────────────────┐ -│ DOCKER-USER Chain (iptables) │ -│ Blocks all external container access │ -└──────────────┬──────────────────────────┘ - │ -┌──────────────┴──────────────────────────┐ -│ Docker Daemon │ -│ - Non-root containers │ -│ - Localhost-only binding │ -└──────────────┬──────────────────────────┘ - │ -┌──────────────┴──────────────────────────┐ -│ OpenClaw Container │ -│ User: openclaw │ -│ Port: 127.0.0.1:3000 │ -└──────────────────────────────────────────┘ -``` +Separar claramente tres capas: -## File Structure +1. Capa producto (OpenClaw runtime). +2. Capa plataforma (Ansible roles/playbooks). +3. Capa operación (Makefile + `ops/*.sh` + smoke/runbooks). -``` -/opt/openclaw/ -├── Dockerfile -├── docker-compose.yml +Esta separación permite operación reproducible y control de drift en entornos reales. -/home/openclaw/.openclaw/ -├── config.yml -├── sessions/ -└── credentials/ +## Mapa de Componentes -/etc/systemd/system/ -└── openclaw.service +```mermaid +flowchart TB + subgraph OPS[Operation Layer] + MK[Makefile] + SH[ops/*.sh] + SM[smoke + backup/purge/install] + end -/etc/docker/ -└── daemon.json + subgraph IA[Infrastructure as Code Layer] + PB[playbooks/enterprise.yml] + R1[role openclaw] + R2[role openclaw_enterprise] + R3[role openclaw_control_plane] + R4[role openclaw_cloudflare_tunnel] + end -/etc/ufw/ -└── after.rules (DOCKER-USER chain) -``` + subgraph RT[Runtime Layer] + GW[Gateway profiles] + CP[Stage 2 Control Plane] + CF[Cloudflare tunnel opcional] + end -## Service Management + MK --> SH --> PB + PB --> R1 + PB --> R2 + PB --> R3 + PB --> R4 -OpenClaw runs as a systemd service that manages the Docker container: - -```bash -# Systemd controls Docker Compose -systemd → docker compose → openclaw container + R2 --> GW + R3 --> CP + R4 --> CF ``` -## Installation Flow - -1. **Tailscale Setup** (`tailscale.yml`) - - Add Tailscale repository - - Install Tailscale package - - Display connection instructions - -2. **User Creation** (`user.yml`) - - Create `openclaw` system user +## Stage 2 Runtime (Full/Lite) -3. **Docker Installation** (`docker.yml`) - - Install Docker CE + Compose V2 - - Add user to docker group - - Create `/etc/docker` directory +```mermaid +flowchart LR + IN[ingress] --> N[(NATS JetStream)] + RT[router] --> N + W[worker-*] --> N + B[broker] --> N + B --> P[(Postgres)] + A[control-api] --> P -4. **Firewall Setup** (`firewall.yml`) - - Install UFW - - Configure DOCKER-USER chain - - Configure Docker daemon (`/etc/docker/daemon.json`) - - Allow SSH (22/tcp) and Tailscale (41641/udp) + O[observability full mode]:::obs + O --> PR[prometheus] + O --> GR[grafana] + O --> UK[uptime-kuma] -5. **Node.js Installation** (`nodejs.yml`) - - Add NodeSource repository - - Install Node.js 22.x - - Install pnpm globally - -6. **OpenClaw Setup** (`openclaw.yml`) - - Create directories - - Generate configs from templates - - Build Docker image - - Start container via Compose - - Install systemd service - -## Key Design Decisions - -### Why UFW + DOCKER-USER? + classDef obs fill:#eef,stroke:#99c,stroke-width:1px; +``` -Docker manipulates iptables directly, bypassing UFW. The DOCKER-USER chain is evaluated before Docker's FORWARD chain, allowing us to block traffic before Docker sees it. +## Falencias Cubiertas por Diseño -### Why Localhost Binding? +| Falencia operativa | Respuesta en la suite | +|---|---| +| Instalación no repetible | Playbooks + defaults + inventarios por ambiente | +| Drift entre perfiles/agentes | Perfiles declarativos + reconciliación Ansible | +| Sin control de cola/estado | NATS + broker + control-api + PostgreSQL | +| Confirmaciones sin transición persistida | `control-api` actualiza estado y eventos en DB | +| Credenciales manuales por agente | `auth-sync` no interactivo por perfil/agente | +| Day-2 artesanal | Targets `make` estandarizados | -Defense in depth. Even if DOCKER-USER fails, localhost binding prevents external access. +## Seguridad Operativa -### Why Systemd Service? +- Secrets por perfil en `/etc/openclaw/secrets/*.env`. +- Servicios con aislamiento de usuario/perfil. +- Endpoints internos en loopback (publicación externa opcional por tunnel). +- Workers con UID/GID parametrizados para evitar supuestos rígidos de host. -- Auto-start on boot -- Clean lifecycle management -- Integration with system logs -- Dependency management (after Docker) +## Rutas Críticas -### Why Non-Root Container? +- Playbook enterprise: `playbooks/enterprise.yml` +- Roles: `roles/openclaw*` +- Control-plane source: `control-plane/` +- Inventarios: `inventories/*` +- Operación: `ops/*`, `Makefile` -Principle of least privilege. If container is compromised, attacker has limited privileges. +## Decisión de Compatibilidad -## Ansible Task Order +macOS bare-metal se considera fuera del modelo de ejecución seguro/soportado para esta suite. -``` -main.yml -├── tailscale.yml (VPN setup) -├── user.yml (create openclaw user) -├── docker.yml (install Docker, create /etc/docker) -├── firewall.yml (configure UFW + Docker daemon) -├── nodejs.yml (Node.js + pnpm) -└── openclaw.yml (container setup) -``` +## Relación con OpenClaw -Order matters: Docker must be installed before firewall configuration because: -1. `/etc/docker` directory must exist for `daemon.json` -2. Docker service must exist to be restarted after config changes +Esta suite es una capa de protocolo y operación sobre OpenClaw; no reemplaza el producto. diff --git a/docs/cloudflare-tunnel.md b/docs/cloudflare-tunnel.md new file mode 100644 index 0000000..789cae0 --- /dev/null +++ b/docs/cloudflare-tunnel.md @@ -0,0 +1,68 @@ +--- +title: Cloudflare Tunnel Exposure +summary: Publish OpenClaw and control-plane local services via Cloudflare subdomains +--- + +# Cloudflare Tunnel Exposure + +This role ports the same model used in `/home/efra/develop/cloudflare-tunnel` into Ansible so +subdomain exposure can be reconciled together with enterprise deployment. + +## What it manages + +- Installs `cloudflared` (Debian/Ubuntu). +- Writes tunnel credentials and `config.yml`. +- Installs/starts a dedicated systemd service: + - `cloudflared-.service` +- Optionally reconciles DNS CNAME records with: + - `cloudflared tunnel route dns` + +## Inventory variables + +Set in `inventories//group_vars/all.yml`: + +```yaml +openclaw_cloudflare_tunnel_enabled: true +openclaw_cloudflare_tunnel_name: "zennook-openclaw" +openclaw_cloudflare_tunnel_run_user: efra +openclaw_cloudflare_tunnel_run_group: efra +openclaw_cloudflare_tunnel_id: "{{ vault_cloudflare_tunnel_id }}" +openclaw_cloudflare_tunnel_credentials_json: "{{ vault_cloudflare_tunnel_credentials_json }}" +openclaw_cloudflare_tunnel_manage_dns: false +openclaw_cloudflare_tunnel_ingress: + - hostname: "efra-core-ingress.example.com" + service: "http://127.0.0.1:30101" + - hostname: "efra-core-control.example.com" + service: "http://127.0.0.1:39101" +``` + +If you already have tunnel credentials on host (like `/home/efra/.cloudflared/.json`): + +```yaml +openclaw_cloudflare_tunnel_manage_credentials_file: false +openclaw_cloudflare_tunnel_credentials_file: "/home/efra/.cloudflared/.json" +``` + +Set secrets in `inventories//group_vars/vault.yml`: + +```yaml +vault_cloudflare_tunnel_id: "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" +vault_cloudflare_tunnel_credentials_json: | + {"AccountTag":"...","TunnelSecret":"...","TunnelID":"...","TunnelName":"..."} +``` + +## Run + +```bash +# Full enterprise reconcile (includes cloudflare role when enabled) +make install ENV=dev LIMIT=zennook + +# Cloudflare-only reconcile +make cloudflare ENV=dev LIMIT=zennook +``` + +## Notes + +- The role assumes the tunnel already exists in Cloudflare. +- `openclaw_cloudflare_tunnel_manage_dns` is off by default to avoid accidental DNS writes. +- If DNS route reconcile is needed, first ensure `cloudflared tunnel login` was performed on the host. diff --git a/docs/control-plane-stage2.md b/docs/control-plane-stage2.md new file mode 100644 index 0000000..baae511 --- /dev/null +++ b/docs/control-plane-stage2.md @@ -0,0 +1,60 @@ +--- +title: Stage 2 Control Plane (ClawOps Suite) +summary: Capa de orquestación de cola/estado para operación multi-agente en OpenClaw enterprise. +--- + +# Stage 2 Control Plane + +## Contexto + +Stage 2 es la respuesta a una necesidad operativa: cuando hay múltiples agentes y perfiles, hace falta un plano de control explícito para enrutar, persistir, observar y decidir. + +## Modos + +- `full`: + - NATS + Postgres + - ingress/router/broker/control-api + - workers múltiples + - observabilidad (Prometheus/Grafana/Uptime Kuma) +- `lite`: + - NATS + Postgres + - ingress/router-forced-main/worker-main/broker/control-api + +## Flujo Operativo + +1. `ingress` publica tarea. +2. `router` decide destino. +3. `worker` ejecuta. +4. `broker` persiste y publica salida. +5. `control-api` consulta estados y aplica decisiones. + +## Endpoints Principales + +- Ingress: `http://127.0.0.1:/telegram/webhook` +- Simulación: `http://127.0.0.1:/ingress/simulate` +- Control API: `http://127.0.0.1:/tasks` +- Cola: `http://127.0.0.1:/queues` + +## Endurecimientos Incluidos + +- Health probe con defaults coherentes por modo (`full`/`lite`). +- UID/GID de worker parametrizado (`OPENCLAW_UID/OPENCLAW_GID`). +- Confirm/reject con transición real de estado en DB. +- Reconciliación SQL de password con escaping seguro. + +## Integración con la Suite + +Se habilita vía `openclaw_control_plane_enabled` y perfiles en inventario. + +Despliegue recomendado: + +```bash +make install +make smoke +``` + +## Referencias + +- [Operator Runbook](operator-runbook.md) +- [Operations Workflow](operations-workflow.md) +- [Architecture](architecture.md) diff --git a/docs/enterprise-deployment.md b/docs/enterprise-deployment.md new file mode 100644 index 0000000..4e12978 --- /dev/null +++ b/docs/enterprise-deployment.md @@ -0,0 +1,92 @@ +--- +title: Enterprise Deployment (ClawOps Suite) +summary: Despliegue multi-ambiente y multi-perfil con OpenClaw + Stage 2 bajo un protocolo operativo único. +--- + +# Enterprise Deployment + +## Propósito + +Estandarizar despliegues enterprise donde un solo host o conjunto de hosts necesita: + +- múltiples perfiles gateway, +- múltiples agentes por perfil, +- control de colas/estado, +- operación repetible y auditable. + +## Qué se despliega + +- `playbooks/enterprise.yml` +- `roles/openclaw` +- `roles/openclaw_enterprise` +- `roles/openclaw_control_plane` +- `roles/openclaw_cloudflare_tunnel` (opcional) + +## Modelo Multi-Perfil + +Cada perfil define al menos: + +- estado (`state_dir`, `config_path`, `workspace_root`), +- puerto gateway, +- secretos de entorno, +- lista de agentes, +- políticas de tools/modelos/sandbox. + +## Modelo Stage 2 + +Dos modos soportados: + +- `full`: cola completa + observabilidad. +- `lite`: camino mínimo para ejecución directa. + +## Ejecución + +```bash +ansible-playbook -i inventories/dev/hosts.yml playbooks/enterprise.yml --become +``` + +o + +```bash +./run-enterprise-playbook.sh dev +``` + +## Comportamiento de Rollout + +- `serial` configurable. +- tolerancia configurable a hosts no disponibles. +- ejecución progresiva para reducir riesgo de corte total. + +## Secrets y Gobernanza + +Variables sensibles deben residir en vault por ambiente: + +- tokens gateway, +- credenciales NATS/Postgres, +- tokens Telegram, +- credenciales tunnel si aplica. + +La suite escribe archivos de entorno por perfil y separa secretos de configuración funcional. + +## Qué Falencia Cubre Este Modo Enterprise + +1. Evita mezcla de estados entre perfiles. +2. Permite aislar rutas de agentes por contexto de negocio. +3. Habilita crecimiento incremental sin re-arquitectura manual. +4. Reduce dependencia de pasos ad-hoc en operadores individuales. + +## Integración con Operación Day-2 + +Para operación continua usar: + +- `make install` +- `make auth-sync` +- `make smoke` +- `make backup` +- `make purge CONFIRM=1` + +## Referencias + +- [Operations Workflow](operations-workflow.md) +- [Stage 2 Control Plane](control-plane-stage2.md) +- [Operator Runbook](operator-runbook.md) diff --git a/docs/epics/EPIC-002-control-plane-stage2.md b/docs/epics/EPIC-002-control-plane-stage2.md new file mode 100644 index 0000000..f199f95 --- /dev/null +++ b/docs/epics/EPIC-002-control-plane-stage2.md @@ -0,0 +1,32 @@ +# EPIC-002 - Stage 2 Queue Control Plane + +Status: In Progress +Owner: platform/devops + +## Goal + +Deliver a reusable package that adds queue orchestration, routing-by-intent, and telemetry for OpenClaw multi-agent deployments. + +## Scope + +- NATS JetStream task bus +- NestJS microservices control plane +- `full` and `lite` deployment modes +- Inventory-driven packaging for reuse in other profiles + +## Acceptance Criteria + +1. `efra-core` profile deploys full stack from Ansible. +2. `andrea` profile deploys lite stack from Ansible. +3. Telegram ingress can route and persist tasks with source + agent attribution. +4. Control API exposes task list and queue stats. +5. Full mode exposes Prometheus/Grafana/Uptime Kuma. +6. Documentation includes install, operations, rollback, and secrets map. + +## Evidence + +- `playbooks/enterprise.yml` includes `openclaw_control_plane` role. +- `roles/openclaw_control_plane/*` templates and tasks render full/lite stacks. +- `control-plane/src/*` contains ingress/router/worker/broker/control-api. +- `inventories/dev/group_vars/all.yml` defines `efra-core` (full) and `andrea` (lite). +- `docs/control-plane-stage2.md` documents runbook. diff --git a/docs/evidence/STAGE2-validation.md b/docs/evidence/STAGE2-validation.md new file mode 100644 index 0000000..f663eec --- /dev/null +++ b/docs/evidence/STAGE2-validation.md @@ -0,0 +1,13 @@ +# Stage 2 Validation Notes + +Date: 2026-03-01 + +## Commands + +- `ANSIBLE_ROLES_PATH=roles ansible-playbook -i inventories/dev/hosts.yml playbooks/enterprise.yml --syntax-check` -> PASS +- `npm install` and build for `control-plane` -> pending in this environment due network timeout during package fetch. + +## Observations + +- Ansible role wiring is valid and syntax-check passes. +- Node dependency install did not complete in this execution environment (timeout without output), so runtime smoke tests are pending host with outbound npm access. diff --git a/docs/operations-workflow.md b/docs/operations-workflow.md new file mode 100644 index 0000000..89b1ac3 --- /dev/null +++ b/docs/operations-workflow.md @@ -0,0 +1,79 @@ +--- +title: Operations Workflow (ClawOps Suite) +summary: Protocolo day-2 para operar OpenClaw enterprise de manera repetible. +--- + +# Operations Workflow + +## Idea Central + +La suite define un protocolo simple: cada operación crítica debe tener un comando único y repetible. + +Por eso `Makefile` expone comandos estables y `ops/*.sh` encapsula la implementación. + +## Ciclo Canónico + +```bash +make backup +make purge CONFIRM=1 +make install +make auth-sync PROFILES="dev-main andrea" OAUTH_PROVIDER=openai-codex +make smoke +``` + +Para ejecución completa: + +```bash +make reinstall CONFIRM=1 +``` + +## Comandos y Rol Operativo + +- `make backup`: preserva estado operativo antes de cambios. +- `make purge`: limpia estado runtime para reinstalación controlada. +- `make install`: reconcilia enterprise + control-plane. +- `make auth-sync`: propaga credenciales Codex a perfiles/agentes. +- `make smoke`: valida salud + flujo cola end-to-end. + +## Auth-Sync como Control de Deriva + +`auth-sync` existe para resolver una falencia operativa común: credenciales divergentes por agente/perfil. + +Estrategia: + +1. Fuente central en `/home/efra/.codex`. +2. Espejo en `/home/openclaw/.codex`. +3. Escritura determinista de `auth-profiles.json` por agente. +4. Alineación de modelo por perfil. + +## Validación de Secretos + +`make install` ejecuta validación previa de secretos para bloquear despliegues incompletos. + +Complemento: + +```bash +make secrets-refactor +``` + +Genera base de migración manual para homogeneizar vault por ambiente. + +## Qué Falencias Cubre Este Workflow + +1. Cambios manuales no auditables. +2. Reinstalaciones inconsistentes. +3. Pérdida de estado por no hacer backup previo. +4. Despliegues "verdes" sin smoke real de cola. + +## Defaults de Operación + +- `ENV=dev` +- `INVENTORY=inventories/dev/hosts.yml` +- `LIMIT=zennook` +- `PROFILES="dev-main andrea"` + +## Referencias + +- [Operator Runbook](operator-runbook.md) +- [Enterprise Deployment](enterprise-deployment.md) +- [Installed Runtime Layout](architecture-installed-layout.md) diff --git a/docs/operator-runbook.md b/docs/operator-runbook.md new file mode 100644 index 0000000..449d789 --- /dev/null +++ b/docs/operator-runbook.md @@ -0,0 +1,469 @@ +--- +title: Operator Runbook (Profiles, Agents, Auth Sync, Queues) +summary: End-to-end step-by-step guide to deploy, operate, extend, and troubleshoot multi-profile OpenClaw with Stage 2 control-plane. +--- + +# Operator Runbook + +This runbook is the canonical step-by-step guide to: + +- Install and reconcile OpenClaw with Ansible. +- Operate multi-profile gateways (`dev-main`, `andrea`, and new profiles). +- Operate Stage 2 control-plane (`full` and `lite`). +- Sync OpenAI Codex credentials per profile. +- Create new profiles and new agents safely. +- Route Telegram traffic to agents and validate queue execution. + +Use this with: + +- [Enterprise Deployment](enterprise-deployment.md) +- [Stage 2 Control Plane](control-plane-stage2.md) +- [Operations Workflow](operations-workflow.md) +- [Troubleshooting](troubleshooting.md) + +## 1. Architecture and Responsibilities + +### 1.1 Logical topology + +```mermaid +flowchart LR + A[Ansible control host] --> B[openclaw role] + A --> C[openclaw_enterprise role] + A --> D[openclaw_control_plane role] + A --> E[openclaw_cloudflare_tunnel role] + + C --> F[Gateway profile: dev-main] + C --> G[Gateway profile: andrea] + + D --> H[Control-plane: efra-core full] + D --> I[Control-plane: andrea lite] + + H --> H1[NATS] + H --> H2[Postgres] + H --> H3[Ingress] + H --> H4[Router] + H --> H5[Workers] + H --> H6[Broker] + H --> H7[Control API] + H --> H8[Prometheus/Grafana/Kuma] + + I --> I1[NATS] + I --> I2[Postgres] + I --> I3[Ingress] + I --> I4[Router forced main] + I --> I5[Worker main] + I --> I6[Broker] + I --> I7[Control API] +``` + +### 1.2 Message flow (Telegram to agent and back) + +```mermaid +sequenceDiagram + autonumber + participant TG as Telegram + participant ING as ingress + participant RT as router + participant NATS as NATS JetStream + participant WK as worker- + participant BR as broker + participant PG as PostgreSQL + + TG->>ING: POST /telegram/webhook + ING->>NATS: publish tasks.ingress + RT->>NATS: consume tasks.ingress + RT->>NATS: publish tasks.agent. + WK->>NATS: consume tasks.agent. + WK->>NATS: publish results.agent. + BR->>NATS: consume results.agent.* + BR->>PG: upsert tasks + insert task_events + BR->>TG: sendMessage (final response) +``` + +### 1.3 Component map in this repository + +- Enterprise playbook: `playbooks/enterprise.yml` +- Gateway profile role: `roles/openclaw_enterprise` +- Control-plane role: `roles/openclaw_control_plane` +- Stage 2 source code: `control-plane/` +- Environment inventory: `inventories/dev/group_vars/all.yml` + +## 2. Prerequisites + +- Supported OS on target nodes: Debian, Ubuntu, Fedora. +- Sudo privileges on target node(s). +- `ansible`, `git`, `python3`. +- Telegram Bot token(s) in Vault or inventory variables. +- OpenClaw gateway token per profile (`OPENCLAW_GATEWAY_TOKEN`). + +Optional: + +- Cloudflare tunnel credentials for remote subdomains. +- Tailscale enabled on target nodes. + +## 3. Full Install / Reconcile from Zero + +The recommended operator path is Makefile + `ops/*.sh`. + +```bash +cd /home/efra/openclaw-ansible + +# 1) Backup current runtime +make backup ENV=dev LIMIT=zennook + +# 2) Purge runtime (explicit confirmation required) +make purge CONFIRM=1 ENV=dev LIMIT=zennook + +# 3) Deploy enterprise profiles + control-plane +make install ENV=dev LIMIT=zennook + +# 4) Optional Cloudflare reconcile +make cloudflare ENV=dev LIMIT=zennook + +# 5) Credential sync (non-interactive) +make auth-sync ENV=dev LIMIT=zennook PROFILES="dev-main andrea" OAUTH_PROVIDER=openai-codex +# legacy alias +make oauth-login ENV=dev LIMIT=zennook PROFILES="dev-main andrea" OAUTH_PROVIDER=openai-codex + +# 6) Run smoke tests +make smoke ENV=dev LIMIT=zennook +``` + +Equivalent direct Ansible command: + +```bash +ansible-playbook -i inventories/dev/hosts.yml playbooks/enterprise.yml --ask-become-pass --limit zennook +``` + +## 4. Provider Auth Sync (OpenAI Codex) + +### 4.1 Why sync per profile + +Auth state is profile-specific (`--profile `), and each profile has its own +`auth-profiles.json` under its `agents/*/agent` directories. + +### 4.2 Standard sync command + +```bash +make auth-sync PROFILES="dev-main andrea" OAUTH_PROVIDER=openai-codex +``` + +`make oauth-login` remains available as a compatibility alias and runs the same sync workflow. + +### 4.3 Credential sources + +By default: + +- `EFRA_CODEX_AUTH_DEFAULT=/home/efra/.codex/auth.json` +- `EFRA_CODEX_AUTH_ANDREA=/home/efra/.codex/auth-andrea.json` + +If `/home/efra/.env` exists, `auth-sync` loads it first so these paths can be overridden. + +### 4.4 Verify auth/model state + +```bash +sudo -u openclaw -H /home/openclaw/.local/bin/openclaw --profile dev-main models status --probe +sudo -u openclaw -H /home/openclaw/.local/bin/openclaw --profile andrea models status --probe +``` + +## 5. Create a New Profile (Gateway + Optional Control-Plane) + +This section describes the exact files and steps. + +### 5.1 Add gateway profile to inventory + +Edit `inventories/dev/group_vars/all.yml` and add an item under `openclaw_enterprise_profiles`: + +```yaml +- name: ops-lab + gateway_port: 19041 + gateway_bind: loopback + state_dir: /home/openclaw/.openclaw-ops-lab + config_path: /home/openclaw/.openclaw-ops-lab/openclaw.json + workspace_root: /home/openclaw/.openclaw-ops-lab/workspace + model_primary: openai/gpt-5-mini + model_fallbacks: + - anthropic/claude-sonnet-4-5 + tools_profile: coding + sandbox_mode: non-main + sandbox_scope: session + agents: + - id: main + default: true + workspace: /home/openclaw/.openclaw-ops-lab/workspace + env: + OPENCLAW_GATEWAY_TOKEN: "{{ vault_openclaw_gateway_token_ops_lab }}" + OPENAI_API_KEY: "" + ANTHROPIC_API_KEY: "" +``` + +### 5.2 Add secrets in Vault + +Edit `inventories/dev/group_vars/vault.yml`: + +```yaml +vault_openclaw_gateway_token_ops_lab: "replace-with-strong-random-token" +``` + +Encrypt if needed: + +```bash +ansible-vault encrypt inventories/dev/group_vars/vault.yml +``` + +### 5.3 (Optional) Add control-plane profile for this gateway profile + +Under `openclaw_control_plane_profiles`: + +```yaml +- name: ops-lab + mode: lite + gateway_profile: ops-lab + project_dir: /home/efra/openclaw-control-plane/ops-lab + ingress_port: 30121 + control_api_port: 39121 + telegram_bot_token: "{{ vault_telegram_bot_token_ops_lab }}" + telegram_default_chat_id: "{{ vault_telegram_default_chat_id_ops_lab }}" + postgres_password: "{{ vault_openclaw_cp_postgres_password_ops_lab }}" + nats_user: queue + nats_password: "{{ vault_openclaw_cp_nats_password_ops_lab }}" +``` + +Vault keys: + +```yaml +vault_telegram_bot_token_ops_lab: "" +vault_telegram_default_chat_id_ops_lab: "" +vault_openclaw_cp_postgres_password_ops_lab: "replace" +vault_openclaw_cp_nats_password_ops_lab: "replace" +``` + +### 5.4 Deploy and verify + +```bash +make install ENV=dev LIMIT=zennook +sudo -u openclaw -H /home/openclaw/.local/bin/openclaw --profile ops-lab status --all +``` + +### 5.5 Login provider for new profile + +```bash +make auth-sync PROFILES="ops-lab" OAUTH_PROVIDER=openai-codex +``` + +## 6. Create a New Agent Inside an Existing Profile + +### 6.1 Add agent in inventory profile + +Inside the profile `agents:` list in `inventories/dev/group_vars/all.yml`: + +```yaml +- id: qa + workspace: /home/openclaw/.openclaw-dev-main/workspace-qa + tools: + profile: coding +``` + +Redeploy: + +```bash +make install ENV=dev LIMIT=zennook +``` + +### 6.2 Create identity/memory files for the new agent workspace + +```bash +sudo -u openclaw -H install -d -m 700 /home/openclaw/.openclaw-dev-main/workspace-qa/memory + +sudo -u openclaw -H bash -lc 'cat > /home/openclaw/.openclaw-dev-main/workspace-qa/IDENTITY.md < /home/openclaw/.openclaw-dev-main/workspace-qa/MEMORY.md < /home/openclaw/.openclaw-dev-main/workspace-qa/AGENTS.md < `browser-login` keywords: `login`, `browser`, `portal`, `cookie`, `captcha` +- `deploy.coolify` -> `coolify-ops` keywords: `coolify`, `deploy`, `release`, `rollback`, `service up`, `service down` +- `research.analysis` -> `research` keywords: `investiga`, `analiza`, `research`, `comparar`, `resumen`, `benchmark` +- fallback -> `main` + +### 7.3 Browser-login worker networking note + +For browser workflows, `worker-browser-login` uses host networking in full mode template: + +- `network_mode: host` +- `shm_size: "1gb"` +- `NATS_URL` override to loopback-published NATS (`127.0.0.1:14222`) + +This avoids hanging tasks when browser automation needs host-level gateway/browser relay access. + +## 8. Daily Command Reference (Per Profile) + +Replace `` with `dev-main`, `andrea`, or your custom profile. + +### 8.1 Health and status + +```bash +sudo -u openclaw -H /home/openclaw/.local/bin/openclaw --profile status --all +sudo -u openclaw -H /home/openclaw/.local/bin/openclaw --profile doctor --fix +sudo -u openclaw -H /home/openclaw/.local/bin/openclaw --profile security audit --deep +``` + +### 8.2 Gateway lifecycle + +```bash +sudo -u openclaw -H /home/openclaw/.local/bin/openclaw --profile gateway status +sudo -u openclaw -H /home/openclaw/.local/bin/openclaw --profile gateway stop +sudo -u openclaw -H /home/openclaw/.local/bin/openclaw --profile gateway start +``` + +### 8.3 Agent command execution + +```bash +sudo -u openclaw -H /home/openclaw/.local/bin/openclaw --profile agent --agent main --message "hola" --json +sudo -u openclaw -H /home/openclaw/.local/bin/openclaw --profile agent --agent research --message "investiga X" --json +``` + +### 8.4 Onboarding + +```bash +sudo -u openclaw -H /home/openclaw/.local/bin/openclaw --profile onboard --install-daemon +``` + +## 9. Stage 2 Validation Checklist + +```bash +# service health +curl -fsS http://127.0.0.1:30101/health +curl -fsS http://127.0.0.1:39101/health + +# queue stats +curl -fsS http://127.0.0.1:39101/queues + +# simulate ingress +curl -fsS -X POST http://127.0.0.1:30101/ingress/simulate \ + -H "content-type: application/json" \ + -d '{"text":"investiga como te llamas y cual es tu labor","chatId":"local-sim"}' +``` + +Telegram E2E checklist: + +1. Send `/agents` and verify catalog. +2. Send `investiga ...` and verify `[agent=research]`. +3. Send `login ...` and verify `[agent=browser-login]`. +4. Send `coolify deploy ...` and verify `[agent=coolify-ops]`. +5. Send generic `hola` and verify `[agent=main]`. + +## 10. Troubleshooting Matrix + +### 10.1 Missing gateway token + +Symptom: + +- `MissingEnvVarError: Missing env var "OPENCLAW_GATEWAY_TOKEN"` + +Actions: + +1. Check profile env file exists: + - `/etc/openclaw/secrets/.env` +2. Confirm token is present. +3. Re-run: + - `openclaw --profile doctor --fix` + +### 10.2 Wrong sudo syntax + +Symptom: + +- `sudo: unrecognized option '--profile'` + +Cause: + +- `--profile` belongs to `openclaw`, not `sudo`. + +Correct: + +```bash +sudo -u openclaw -H /home/openclaw/.local/bin/openclaw --profile dev-main doctor --fix +``` + +### 10.3 Auth sync cannot read Codex credentials + +Action: + +1. Ensure `/home/efra/.codex/auth.json` exists. +2. If using per-profile creds, ensure `/home/efra/.codex/auth-andrea.json` exists. +3. If custom paths are needed, export them in `/home/efra/.env`: + - `EFRA_CODEX_AUTH_DEFAULT=...` + - `EFRA_CODEX_AUTH_ANDREA=...` +4. Re-run `make auth-sync`. + +### 10.4 Browser task hangs or no Telegram response + +Actions: + +1. Check worker backlog in NATS (`num_ack_pending` / `num_pending`). +2. Ensure `worker-browser-login` has host networking in full stack template. +3. Ensure browser relay is attached (`tabs > 0`) before screenshot/login tasks. + +### 10.5 Shell completion missing file + +Symptom: + +- `bash: .../completions/openclaw.bash: No existe ...` + +Actions: + +1. `openclaw --profile doctor --fix` +2. If needed, create completion directory/file and fix ownership. + +## 11. Safe Change Process (Recommended) + +When adding profiles/agents: + +1. Update inventory + Vault. +2. `make install`. +3. Run auth sync for target profiles. +4. Run smoke checks. +5. Validate Telegram E2E. +6. Commit changes with docs + inventory + role/template updates together. + +This keeps code, runtime behavior, and operational docs aligned. diff --git a/docs/stories/STORY-201-full-stack-efra-core.md b/docs/stories/STORY-201-full-stack-efra-core.md new file mode 100644 index 0000000..bdab2a2 --- /dev/null +++ b/docs/stories/STORY-201-full-stack-efra-core.md @@ -0,0 +1,14 @@ +# STORY-201 - Deploy full stack for efra-core + +Status: Done + +## Acceptance Criteria + +- Full compose includes NATS, Postgres, ingress, router, broker, 4 workers, control-api, Prometheus, Grafana, Uptime Kuma. +- Ports bind to loopback only. +- Secrets are read from per-profile variables. + +## Evidence + +- `roles/openclaw_control_plane/templates/docker-compose.full.yml.j2` +- `inventories/dev/group_vars/all.yml` (`openclaw_control_plane_profiles[efra-core]`) diff --git a/docs/stories/STORY-202-lite-stack-andrea.md b/docs/stories/STORY-202-lite-stack-andrea.md new file mode 100644 index 0000000..2cb6d38 --- /dev/null +++ b/docs/stories/STORY-202-lite-stack-andrea.md @@ -0,0 +1,14 @@ +# STORY-202 - Deploy lite stack for andrea + +Status: Done + +## Acceptance Criteria + +- Lite compose includes only minimal services. +- Router forced route to `main` worker. +- Independent profile variables and secrets. + +## Evidence + +- `roles/openclaw_control_plane/templates/docker-compose.lite.yml.j2` +- `inventories/dev/group_vars/all.yml` (`openclaw_control_plane_profiles[andrea]`) diff --git a/docs/stories/STORY-203-intent-routing-and-result-broker.md b/docs/stories/STORY-203-intent-routing-and-result-broker.md new file mode 100644 index 0000000..70b72f7 --- /dev/null +++ b/docs/stories/STORY-203-intent-routing-and-result-broker.md @@ -0,0 +1,16 @@ +# STORY-203 - Intent routing and broker attribution + +Status: Done + +## Acceptance Criteria + +- Router classifies intent and sends to target subject. +- Worker output includes `fromAgent`, `taskId`, and confirmation flag. +- Broker persists task result and sends Telegram reply with attribution. + +## Evidence + +- `control-plane/src/router/router.runner.ts` +- `control-plane/src/worker/worker.runner.ts` +- `control-plane/src/broker/broker.runner.ts` +- `control-plane/src/common/contracts.ts` diff --git a/docs/stories/STORY-204-observability-and-control-api.md b/docs/stories/STORY-204-observability-and-control-api.md new file mode 100644 index 0000000..bd73543 --- /dev/null +++ b/docs/stories/STORY-204-observability-and-control-api.md @@ -0,0 +1,16 @@ +# STORY-204 - Observability and control API + +Status: Done + +## Acceptance Criteria + +- Full mode exports metrics for Prometheus. +- Grafana is provisioned with default datasource. +- Control API provides task listing, details, decisions, and queue stats. + +## Evidence + +- `roles/openclaw_control_plane/templates/prometheus.yml.j2` +- `roles/openclaw_control_plane/templates/grafana-datasources.yml.j2` +- `control-plane/src/control-api/control.controller.ts` +- `control-plane/src/control-api/control.service.ts` diff --git a/docs/stories/STORY-205-cloudflare-subdomain-exposure.md b/docs/stories/STORY-205-cloudflare-subdomain-exposure.md new file mode 100644 index 0000000..a06fbd5 --- /dev/null +++ b/docs/stories/STORY-205-cloudflare-subdomain-exposure.md @@ -0,0 +1,20 @@ +# STORY-205 - Cloudflare subdomain exposure + +Status: Done + +## Acceptance Criteria + +- Cloudflare Tunnel can be managed from Ansible as an optional role. +- Ingress routes map subdomains to loopback services (ingress/control-api/grafana/etc.). +- Operators can run a dedicated Cloudflare reconcile command without re-deploying all stacks. + +## Evidence + +- `roles/openclaw_cloudflare_tunnel/defaults/main.yml` +- `roles/openclaw_cloudflare_tunnel/tasks/main.yml` +- `roles/openclaw_cloudflare_tunnel/templates/cloudflared-config.yml.j2` +- `roles/openclaw_cloudflare_tunnel/templates/cloudflared.service.j2` +- `playbooks/cloudflare-only.yml` +- `ops/cloudflare-reconcile.sh` +- `Makefile` +- `docs/cloudflare-tunnel.md` diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index f4ee2a2..19e605f 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -138,3 +138,59 @@ ansible-playbook playbook.yml --ask-become-pass sudo systemctl start docker # Re-run playbook ``` + +## Gateway Unreachable After Tailscale Exposure Change (dev-main) + +**Symptom**: +- `openclaw --profile dev-main status --all` shows gateway unreachable (`ECONNREFUSED 127.0.0.1:18789`) +- `gateway probe` may show `Connect: ok` but `RPC: failed - timeout` +- Mixed state after switching to `gateway.bind=tailnet` or enabling internal `gateway.tailscale.mode=serve` + +**Root cause**: +- Local profile clients still target loopback (`ws://127.0.0.1:18789`) while gateway binding/exposure was changed. +- Residual `tailscale ssh`/forward processes can remain attached to the service cgroup. +- Internal Tailscale serve from non-interactive service users may fail depending on tailnet policy. + +**Remediation (safe baseline)**: +```bash +# 1) Keep gateway local-only +sudo -iu openclaw /home/openclaw/.local/bin/openclaw --profile dev-main config set gateway.bind loopback +sudo -iu openclaw /home/openclaw/.local/bin/openclaw --profile dev-main config set gateway.tailscale.mode off + +# 2) Restart user service using openclaw user DBus +uid=$(id -u openclaw) +export XDG_RUNTIME_DIR=/run/user/$uid +export DBUS_SESSION_BUS_ADDRESS=unix:path=$XDG_RUNTIME_DIR/bus +sudo -u openclaw XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR DBUS_SESSION_BUS_ADDRESS=$DBUS_SESSION_BUS_ADDRESS \ + systemctl --user restart openclaw-gateway-dev-main.service +sudo -u openclaw XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR DBUS_SESSION_BUS_ADDRESS=$DBUS_SESSION_BUS_ADDRESS \ + systemctl --user enable openclaw-gateway-dev-main.service + +# 3) Validate +sudo -iu openclaw /home/openclaw/.local/bin/openclaw --profile dev-main gateway probe +sudo -iu openclaw /home/openclaw/.local/bin/openclaw --profile dev-main status --all +``` + +**Expected healthy probe**: +- `Local loopback ws://127.0.0.1:18789` +- `Connect: ok` +- `RPC: ok` + +**Expose dashboard over Tailscale (recommended pattern)**: +- Keep OpenClaw bind on loopback. +- Expose separately with Tailscale Serve (HTTPS path), only if Serve is enabled in tailnet admin: +```bash +sudo tailscale serve --bg http://127.0.0.1:18789 +tailscale serve status +``` + +**Security follow-up**: +- If a token appeared in process command lines (`OPENCLAW_GATEWAY_TOKEN=...`), rotate it immediately: +```bash +sudo -iu openclaw /home/openclaw/.local/bin/openclaw --profile dev-main doctor --generate-gateway-token +uid=$(id -u openclaw) +export XDG_RUNTIME_DIR=/run/user/$uid +export DBUS_SESSION_BUS_ADDRESS=unix:path=$XDG_RUNTIME_DIR/bus +sudo -u openclaw XDG_RUNTIME_DIR=$XDG_RUNTIME_DIR DBUS_SESSION_BUS_ADDRESS=$DBUS_SESSION_BUS_ADDRESS \ + systemctl --user restart openclaw-gateway-dev-main.service +``` diff --git a/inventories/README.md b/inventories/README.md new file mode 100644 index 0000000..b454156 --- /dev/null +++ b/inventories/README.md @@ -0,0 +1,86 @@ +# Inventories + +Each environment has: + +- `hosts.yml` +- `group_vars/all.yml` +- optional `group_vars/vault.yml` (encrypted with `ansible-vault`) +- `group_vars/vault.example.yml` template + +`hosts.yml` can also include metadata-only node groups (for example `openclaw_mobile_nodes`) +to track Android/iOS companions that pair over Gateway WebSocket. Keep deployment targets +inside `openclaw_gateway`; mobile nodes are not SSH-managed by Ansible. + +Resilience knobs (set in `group_vars/all.yml` or `-e`): + +- `openclaw_rollout_serial` (default `1`) +- `openclaw_ignore_unreachable` (default `true`) +- `openclaw_max_fail_percentage` (default `100`) + +Stage 2 control-plane knobs: + +- `openclaw_control_plane_enabled` +- `openclaw_control_plane_manage_stack` +- `openclaw_control_plane_profiles` (`mode: full|lite`) + +Cloudflare subdomain exposure knobs: + +- `openclaw_cloudflare_tunnel_enabled` +- `openclaw_cloudflare_tunnel_name` +- `openclaw_cloudflare_tunnel_id` +- `openclaw_cloudflare_tunnel_credentials_json` (from vault) +- `openclaw_cloudflare_tunnel_manage_credentials_file` (set false to reuse existing credential file) +- `openclaw_cloudflare_tunnel_credentials_file` (host path to existing `.json`) +- `openclaw_cloudflare_tunnel_ingress` (list of `hostname` + `service`) +- `openclaw_cloudflare_tunnel_manage_dns` (optional route reconcile) + +Example: + +```bash +ansible-vault create inventories/prod/group_vars/vault.yml +``` + +Or bootstrap from template: + +```bash +cp inventories/prod/group_vars/vault.example.yml inventories/prod/group_vars/vault.yml +ansible-vault encrypt inventories/prod/group_vars/vault.yml +``` + +`vault.yml` should define secrets referenced in `group_vars/all.yml`, for example: + +```yaml +vault_openclaw_gateway_token_prod_main: "..." +vault_openclaw_gateway_token_prod_rescue: "..." +vault_openai_api_key_prod: "..." +vault_anthropic_api_key_prod: "..." +``` + +`vault_openai_api_key_*` / `vault_anthropic_api_key_*` can be left empty when using OAuth/browser login flows. + +## Android node flow (metadata + operations) + +1. Keep Android inventory entries under `openclaw_mobile_nodes` (metadata only). + - Suggested metadata: `openclaw_node_tailnet_ip`, `openclaw_node_magicdns`, `openclaw_node_gateway`. + - Discover from control host: `sudo tailscale status --json`. +2. Deploy/upgrade gateways with: + - `ansible-playbook -i inventories/dev/hosts.yml playbooks/enterprise.yml --ask-become-pass` +3. Pair Android from a gateway host: + - `openclaw nodes pending` + - `openclaw nodes approve ` +4. Verify runtime connectivity: + - `openclaw nodes status` + +## Browser login agent flow + +Enterprise profile examples include `browser-login` with browser-only access: + +- `tools.profile: full` +- `tools.allow: ["browser"]` +- `sandbox.mode: "off"` +- `browser.defaultProfile: openclaw` + +Manual login runbook on gateway host: + +- `openclaw browser --browser-profile openclaw start` +- `openclaw browser --browser-profile openclaw open https://x.com` diff --git a/inventories/dev/group_vars/all.yml b/inventories/dev/group_vars/all.yml new file mode 100644 index 0000000..84655d2 --- /dev/null +++ b/inventories/dev/group_vars/all.yml @@ -0,0 +1,230 @@ +--- +openclaw_install_mode: release +openclaw_enterprise_enabled: true +openclaw_rollout_serial: 1 +openclaw_ignore_unreachable: true +openclaw_max_fail_percentage: 100 +openclaw_enterprise_profiles: + - name: dev-main + gateway_port: 19011 + gateway_bind: loopback + browser: + enabled: true + defaultProfile: openclaw + state_dir: /home/openclaw/.openclaw-dev-main + config_path: /home/openclaw/.openclaw-dev-main/openclaw.json + workspace_root: /home/openclaw/.openclaw-dev-main/workspace + model_primary: openai/gpt-5-mini + model_fallbacks: + - anthropic/claude-sonnet-4-5 + - openai/gpt-5.2 + tools_profile: coding + sandbox_mode: non-main + sandbox_scope: session + user_profile: + name: Efrain Garay + call: Efra + timezone: America/Santiago + notes: + - Prefer direct answers with practical execution details. + - Keep communication clear, human, and low on filler text. + agents: + - id: main + default: true + name: Menicius Core + identity: + name: Menicius + theme: operations orchestrator + emoji: ":compass:" + workspace: /home/openclaw/.openclaw-dev-main/workspace + - id: research + name: Russell Research + identity: + name: Russell + theme: evidence analyst + emoji: ":microscope:" + workspace: /home/openclaw/.openclaw-dev-main/workspace-research + tools: + profile: coding + - id: browser-login + name: Andtera Browser + identity: + name: Andtera + theme: browser login specialist + emoji: ":key:" + workspace: /home/openclaw/.openclaw-dev-main/workspace-browser-login + sandbox: + mode: "off" + tools: + profile: full + allow: + - browser + - id: coolify-ops + name: Forge Ops + identity: + name: Forge + theme: deployment reliability engineer + emoji: ":gear:" + workspace: /home/openclaw/.openclaw-dev-main/workspace-coolify-ops + tools: + profile: coding + agent_personas: + - id: main + display_name: Menicius Core + identity_name: Menicius + creature: Systems navigator + role: Primary operator and execution lead + vibe: Direct, clear, and pragmatic + tone: Calm and action oriented + mission: Convert requests into executable outcomes and close loops with evidence. + responsibilities: + - Triage requests and define an execution path. + - Coordinate specialized agents when needed. + - Return clean human summaries with concrete next steps. + - id: research + display_name: Russell Research + identity_name: Russell + creature: Research analyst + role: Deep analysis and verification specialist + vibe: Methodical and factual + tone: Precise and neutral + mission: Produce high-confidence findings backed by explicit sources or repo evidence. + responsibilities: + - Investigate docs, code, and references before conclusions. + - Separate facts, assumptions, and open questions. + - Highlight risks, regressions, and validation gaps. + - id: browser-login + display_name: Andtera Browser + identity_name: Andtera + creature: Browser workflow operator + role: Human-in-the-loop auth and web flow specialist + vibe: Careful and procedural + tone: Simple and step-by-step + mission: Complete browser-based authentication and guided web tasks safely. + responsibilities: + - Handle login, verification, and manual browser checkpoints. + - Report exact steps, status, and blockers. + - Avoid broad changes outside browser scope. + - id: coolify-ops + display_name: Forge Ops + identity_name: Forge + creature: Infrastructure engineer + role: Coolify and deployment reliability specialist + vibe: Operational and resilient + tone: Direct with measurable outcomes + mission: Keep deployments stable and recover quickly from service failures. + responsibilities: + - Validate deployment health and runtime services. + - Apply targeted fixes with rollback-aware steps. + - Document evidence from logs, checks, and post-fix validation. + bindings: [] + env: + OPENCLAW_GATEWAY_TOKEN: "{{ vault_openclaw_gateway_token_dev_main | default('replace-me-dev-main-gateway-token') }}" + OPENAI_API_KEY: "{{ vault_openai_api_key_dev | default('') }}" + ANTHROPIC_API_KEY: "{{ vault_anthropic_api_key_dev | default('') }}" + + - name: andrea + gateway_port: 19031 + gateway_bind: loopback + state_dir: /home/openclaw/.openclaw-andrea + config_path: /home/openclaw/.openclaw-andrea/openclaw.json + workspace_root: /home/openclaw/.openclaw-andrea/workspace + model_primary: openai/gpt-5-mini + model_fallbacks: + - anthropic/claude-sonnet-4-5 + tools_profile: messaging + sandbox_mode: non-main + sandbox_scope: session + user_profile: + name: Efrain Garay + call: Efra + timezone: America/Santiago + notes: + - Keep tone warm and simple for personal assistant interactions. + - Stay concise unless more detail is requested. + agents: + - id: main + default: true + name: Andrea Concierge + identity: + name: Andrea + theme: personal concierge assistant + emoji: ":sparkles:" + workspace: /home/openclaw/.openclaw-andrea/workspace + agent_personas: + - id: main + display_name: Andrea Concierge + identity_name: Andrea + creature: Personal assistant + role: Messaging-first daily support + vibe: Friendly, clear, and calm + tone: Human and concise + mission: Help with everyday requests quickly while keeping responses clean and natural. + responsibilities: + - Prioritize direct user requests and lightweight follow-through. + - Keep instructions clear and easy to execute. + - Use Markdown code blocks whenever sharing technical snippets. + bindings: [] + env: + OPENCLAW_GATEWAY_TOKEN: "{{ vault_openclaw_gateway_token_andrea | default('replace-me-andrea-gateway-token') }}" + OPENAI_API_KEY: "{{ vault_openai_api_key_dev | default('') }}" + ANTHROPIC_API_KEY: "{{ vault_anthropic_api_key_dev | default('') }}" + +# Stage 2 control-plane package (NATS + NestJS microservices) +openclaw_control_plane_enabled: true +openclaw_control_plane_manage_stack: true +openclaw_control_plane_profiles: + - name: efra-core + mode: full + gateway_profile: dev-main + project_dir: /home/efra/openclaw-control-plane/efra-core + ingress_port: 30101 + control_api_port: 39101 + grafana_port: 31001 + prometheus_port: 39091 + uptime_kuma_port: 31081 + telegram_bot_token: "{{ vault_telegram_bot_token_efra_core | default('') }}" + telegram_default_chat_id: "{{ vault_telegram_default_chat_id_efra_core | default('') }}" + worker_exec_mode: openclaw + openclaw_env_file: /etc/openclaw/secrets/dev-main.env + openclaw_gateway_token: "{{ vault_openclaw_gateway_token_dev_main | default('replace-me-dev-main-gateway-token') }}" + postgres_password: "{{ vault_openclaw_cp_postgres_password_efra_core | default('efra-core-postgres-local') }}" + nats_user: queue + nats_password: "{{ vault_openclaw_cp_nats_password_efra_core | default('efra-core-nats-local') }}" + + - name: andrea + mode: lite + gateway_profile: andrea + project_dir: /home/efra/openclaw-control-plane/andrea + ingress_port: 30111 + control_api_port: 39111 + telegram_bot_token: "{{ vault_telegram_bot_token_andrea | default('') }}" + telegram_default_chat_id: "{{ vault_telegram_default_chat_id_andrea | default('') }}" + postgres_password: "{{ vault_openclaw_cp_postgres_password_andrea | default('andrea-postgres-local') }}" + nats_user: queue + nats_password: "{{ vault_openclaw_cp_nats_password_andrea | default('andrea-nats-local') }}" + +# Optional: expose local loopback services via Cloudflare Tunnel subdomains. +# This host uses a dedicated tunnel for openclaw-ansible services. +openclaw_cloudflare_domain: "hegga.cl" +openclaw_cloudflare_tunnel_enabled: "{{ inventory_hostname == 'zennook' }}" +openclaw_cloudflare_tunnel_name: "openclaw-zennook" +openclaw_cloudflare_tunnel_run_user: efra +openclaw_cloudflare_tunnel_run_group: efra +openclaw_cloudflare_tunnel_id: "68554472-f797-431c-bfa3-42480ea7e5c6" +openclaw_cloudflare_tunnel_manage_credentials_file: false +openclaw_cloudflare_tunnel_credentials_file: "/home/efra/.cloudflared/68554472-f797-431c-bfa3-42480ea7e5c6.json" +openclaw_cloudflare_tunnel_manage_dns: false +openclaw_cloudflare_tunnel_ingress: + - hostname: "dev-main-dashboard.{{ openclaw_cloudflare_domain }}" + service: "http://127.0.0.1:19011" + - hostname: "efra-core-ingress.{{ openclaw_cloudflare_domain }}" + service: "http://127.0.0.1:30101" + - hostname: "efra-core-control.{{ openclaw_cloudflare_domain }}" + service: "http://127.0.0.1:39101" + - hostname: "efra-core-grafana.{{ openclaw_cloudflare_domain }}" + service: "http://127.0.0.1:31001" + - hostname: "andrea-ingress.{{ openclaw_cloudflare_domain }}" + service: "http://127.0.0.1:30111" + - hostname: "andrea-control.{{ openclaw_cloudflare_domain }}" + service: "http://127.0.0.1:39111" diff --git a/inventories/dev/group_vars/vault.example.yml b/inventories/dev/group_vars/vault.example.yml new file mode 100644 index 0000000..70a6588 --- /dev/null +++ b/inventories/dev/group_vars/vault.example.yml @@ -0,0 +1,21 @@ +--- +vault_openclaw_gateway_token_dev_main: "replace-with-temp-token-dev-main" +vault_openclaw_gateway_token_andrea: "replace-with-temp-token-andrea" +vault_openai_api_key_dev: "replace-with-temp-openai-key-dev" +vault_anthropic_api_key_dev: "replace-with-temp-anthropic-key-dev" + +# Stage 2 control-plane secrets +vault_openclaw_cp_postgres_password_efra_core: "replace-with-strong-password" +vault_openclaw_cp_nats_password_efra_core: "replace-with-strong-password" +vault_telegram_bot_token_efra_core: "replace-with-bot-token" +vault_telegram_default_chat_id_efra_core: "replace-with-chat-id" + +vault_openclaw_cp_postgres_password_andrea: "replace-with-strong-password" +vault_openclaw_cp_nats_password_andrea: "replace-with-strong-password" +vault_telegram_bot_token_andrea: "replace-with-bot-token" +vault_telegram_default_chat_id_andrea: "replace-with-chat-id" + +# Cloudflare Tunnel secrets +vault_cloudflare_tunnel_id: "replace-with-tunnel-uuid" +vault_cloudflare_tunnel_credentials_json: | + {"AccountTag":"replace","TunnelSecret":"replace","TunnelID":"replace","TunnelName":"replace"} diff --git a/inventories/dev/host_vars/.gitkeep b/inventories/dev/host_vars/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/inventories/dev/hosts.yml b/inventories/dev/hosts.yml new file mode 100644 index 0000000..30c6ff1 --- /dev/null +++ b/inventories/dev/hosts.yml @@ -0,0 +1,29 @@ +--- +all: + vars: + ansible_user: efra + ansible_become: true + ansible_become_method: sudo + children: + openclaw_gateway: + hosts: + zennook: + ansible_host: 100.127.119.39 + ansible_connection: local + ansible_user: efra + fedora: + ansible_host: 100.109.82.18 + ansible_user: clawadmin + openclaw_mobile_nodes: + vars: + # Mobile nodes connect to gateway WS and are paired from gateway CLI. + # They are inventory metadata only (not Ansible SSH targets). + openclaw_node_transport: gateway_ws + openclaw_node_role: node + hosts: + android_efra: + openclaw_node_platform: android + openclaw_node_enabled: true + openclaw_node_gateway: zennook + openclaw_node_tailnet_ip: 100.78.147.33 + openclaw_node_magicdns: poco-f8-ultra.tail486c6b.ts.net diff --git a/inventories/prod/group_vars/all.yml b/inventories/prod/group_vars/all.yml new file mode 100644 index 0000000..09a26b3 --- /dev/null +++ b/inventories/prod/group_vars/all.yml @@ -0,0 +1,82 @@ +--- +openclaw_install_mode: release +openclaw_enterprise_enabled: true +openclaw_enterprise_profiles: + - name: prod-main + gateway_port: 18789 + gateway_bind: loopback + browser: + enabled: true + defaultProfile: openclaw + state_dir: /home/openclaw/.openclaw-prod-main + config_path: /home/openclaw/.openclaw-prod-main/openclaw.json + workspace_root: /home/openclaw/.openclaw-prod-main/workspace + model_primary: anthropic/claude-opus-4-6 + model_fallbacks: + - openai/gpt-5.2 + - anthropic/claude-sonnet-4-5 + - openai/gpt-5-mini + tools_profile: coding + sandbox_mode: non-main + sandbox_scope: session + max_concurrent: 8 + context_tokens: 250000 + agents: + - id: main + default: true + workspace: /home/openclaw/.openclaw-prod-main/workspace + - id: ops + workspace: /home/openclaw/.openclaw-prod-main/workspace-ops + tools: + profile: messaging + - id: research + workspace: /home/openclaw/.openclaw-prod-main/workspace-research + tools: + profile: coding + - id: browser-login + workspace: /home/openclaw/.openclaw-prod-main/workspace-browser-login + sandbox: + mode: "off" + tools: + profile: full + allow: + - browser + bindings: [] + env: + OPENCLAW_GATEWAY_TOKEN: "{{ vault_openclaw_gateway_token_prod_main | default('') }}" + OPENAI_API_KEY: "{{ vault_openai_api_key_prod | default('') }}" + ANTHROPIC_API_KEY: "{{ vault_anthropic_api_key_prod | default('') }}" + + - name: prod-rescue + gateway_port: 19789 + gateway_bind: loopback + browser: + enabled: true + defaultProfile: openclaw + state_dir: /home/openclaw/.openclaw-prod-rescue + config_path: /home/openclaw/.openclaw-prod-rescue/openclaw.json + workspace_root: /home/openclaw/.openclaw-prod-rescue/workspace + model_primary: openai/gpt-5-mini + model_fallbacks: + - anthropic/claude-sonnet-4-5 + - openai/gpt-5.2 + tools_profile: messaging + sandbox_mode: all + sandbox_scope: agent + agents: + - id: main + default: true + workspace: /home/openclaw/.openclaw-prod-rescue/workspace + - id: browser-login + workspace: /home/openclaw/.openclaw-prod-rescue/workspace-browser-login + sandbox: + mode: "off" + tools: + profile: full + allow: + - browser + bindings: [] + env: + OPENCLAW_GATEWAY_TOKEN: "{{ vault_openclaw_gateway_token_prod_rescue | default('') }}" + OPENAI_API_KEY: "{{ vault_openai_api_key_prod | default('') }}" + ANTHROPIC_API_KEY: "{{ vault_anthropic_api_key_prod | default('') }}" diff --git a/inventories/prod/group_vars/vault.example.yml b/inventories/prod/group_vars/vault.example.yml new file mode 100644 index 0000000..2acb1fd --- /dev/null +++ b/inventories/prod/group_vars/vault.example.yml @@ -0,0 +1,5 @@ +--- +vault_openclaw_gateway_token_prod_main: "replace-with-temp-token-prod-main" +vault_openclaw_gateway_token_prod_rescue: "replace-with-temp-token-prod-rescue" +vault_openai_api_key_prod: "replace-with-temp-openai-key-prod" +vault_anthropic_api_key_prod: "replace-with-temp-anthropic-key-prod" diff --git a/inventories/prod/host_vars/.gitkeep b/inventories/prod/host_vars/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/inventories/prod/hosts.yml b/inventories/prod/hosts.yml new file mode 100644 index 0000000..1aaff16 --- /dev/null +++ b/inventories/prod/hosts.yml @@ -0,0 +1,16 @@ +--- +all: + children: + openclaw_gateway: + hosts: + prod-gateway-1: + ansible_host: 10.30.10.11 + ansible_user: ubuntu + prod-gateway-2: + ansible_host: 10.30.10.12 + ansible_user: ubuntu + openclaw_mobile_nodes: + vars: + openclaw_node_transport: gateway_ws + openclaw_node_role: node + hosts: {} diff --git a/inventories/research/group_vars/all.yml b/inventories/research/group_vars/all.yml new file mode 100644 index 0000000..8907e59 --- /dev/null +++ b/inventories/research/group_vars/all.yml @@ -0,0 +1,41 @@ +--- +openclaw_install_mode: development +openclaw_enterprise_enabled: true +openclaw_enterprise_profiles: + - name: research-main + gateway_port: 19211 + gateway_bind: loopback + browser: + enabled: true + defaultProfile: openclaw + state_dir: /home/openclaw/.openclaw-research-main + config_path: /home/openclaw/.openclaw-research-main/openclaw.json + workspace_root: /home/openclaw/.openclaw-research-main/workspace + model_primary: openai/gpt-5.2 + model_fallbacks: + - anthropic/claude-opus-4-6 + - openai/gpt-5-mini + tools_profile: coding + sandbox_mode: all + sandbox_scope: session + agents: + - id: main + default: true + workspace: /home/openclaw/.openclaw-research-main/workspace + - id: experiments + workspace: /home/openclaw/.openclaw-research-main/workspace-experiments + tools: + profile: coding + - id: browser-login + workspace: /home/openclaw/.openclaw-research-main/workspace-browser-login + sandbox: + mode: "off" + tools: + profile: full + allow: + - browser + bindings: [] + env: + OPENCLAW_GATEWAY_TOKEN: "{{ vault_openclaw_gateway_token_research_main | default('') }}" + OPENAI_API_KEY: "{{ vault_openai_api_key_research | default('') }}" + ANTHROPIC_API_KEY: "{{ vault_anthropic_api_key_research | default('') }}" diff --git a/inventories/research/group_vars/vault.example.yml b/inventories/research/group_vars/vault.example.yml new file mode 100644 index 0000000..a4ef5d9 --- /dev/null +++ b/inventories/research/group_vars/vault.example.yml @@ -0,0 +1,4 @@ +--- +vault_openclaw_gateway_token_research_main: "replace-with-temp-token-research-main" +vault_openai_api_key_research: "replace-with-temp-openai-key-research" +vault_anthropic_api_key_research: "replace-with-temp-anthropic-key-research" diff --git a/inventories/research/host_vars/.gitkeep b/inventories/research/host_vars/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/inventories/research/hosts.yml b/inventories/research/hosts.yml new file mode 100644 index 0000000..40618f5 --- /dev/null +++ b/inventories/research/hosts.yml @@ -0,0 +1,13 @@ +--- +all: + children: + openclaw_gateway: + hosts: + rsch-gateway-1: + ansible_host: 10.40.10.11 + ansible_user: ubuntu + openclaw_mobile_nodes: + vars: + openclaw_node_transport: gateway_ws + openclaw_node_role: node + hosts: {} diff --git a/inventories/staging/group_vars/all.yml b/inventories/staging/group_vars/all.yml new file mode 100644 index 0000000..4ce74d9 --- /dev/null +++ b/inventories/staging/group_vars/all.yml @@ -0,0 +1,41 @@ +--- +openclaw_install_mode: release +openclaw_enterprise_enabled: true +openclaw_enterprise_profiles: + - name: stg-main + gateway_port: 19111 + gateway_bind: loopback + browser: + enabled: true + defaultProfile: openclaw + state_dir: /home/openclaw/.openclaw-stg-main + config_path: /home/openclaw/.openclaw-stg-main/openclaw.json + workspace_root: /home/openclaw/.openclaw-stg-main/workspace + model_primary: anthropic/claude-sonnet-4-5 + model_fallbacks: + - openai/gpt-5.2 + - openai/gpt-5-mini + tools_profile: coding + sandbox_mode: non-main + sandbox_scope: session + agents: + - id: main + default: true + workspace: /home/openclaw/.openclaw-stg-main/workspace + - id: ops + workspace: /home/openclaw/.openclaw-stg-main/workspace-ops + tools: + profile: messaging + - id: browser-login + workspace: /home/openclaw/.openclaw-stg-main/workspace-browser-login + sandbox: + mode: "off" + tools: + profile: full + allow: + - browser + bindings: [] + env: + OPENCLAW_GATEWAY_TOKEN: "{{ vault_openclaw_gateway_token_stg_main | default('') }}" + OPENAI_API_KEY: "{{ vault_openai_api_key_stg | default('') }}" + ANTHROPIC_API_KEY: "{{ vault_anthropic_api_key_stg | default('') }}" diff --git a/inventories/staging/group_vars/vault.example.yml b/inventories/staging/group_vars/vault.example.yml new file mode 100644 index 0000000..17738dd --- /dev/null +++ b/inventories/staging/group_vars/vault.example.yml @@ -0,0 +1,4 @@ +--- +vault_openclaw_gateway_token_stg_main: "replace-with-temp-token-stg-main" +vault_openai_api_key_stg: "replace-with-temp-openai-key-stg" +vault_anthropic_api_key_stg: "replace-with-temp-anthropic-key-stg" diff --git a/inventories/staging/host_vars/.gitkeep b/inventories/staging/host_vars/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/inventories/staging/hosts.yml b/inventories/staging/hosts.yml new file mode 100644 index 0000000..7c74f3e --- /dev/null +++ b/inventories/staging/hosts.yml @@ -0,0 +1,13 @@ +--- +all: + children: + openclaw_gateway: + hosts: + stg-gateway-1: + ansible_host: 10.20.10.11 + ansible_user: ubuntu + openclaw_mobile_nodes: + vars: + openclaw_node_transport: gateway_ws + openclaw_node_role: node + hosts: {} diff --git a/ops/auth-sync.sh b/ops/auth-sync.sh new file mode 100755 index 0000000..28028b6 --- /dev/null +++ b/ops/auth-sync.sh @@ -0,0 +1,244 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=ops/common.sh +source "${SCRIPT_DIR}/common.sh" + +provider="${OAUTH_PROVIDER:-openai-codex}" +profiles_raw="${PROFILES:-dev-main andrea}" +model_ref="${MODEL_REF:-openai-codex/gpt-5.3-codex}" + +efra_env_file="${EFRA_ENV_FILE:-/home/efra/.env}" +if [[ -f "${efra_env_file}" ]]; then + # shellcheck disable=SC1090 + source "${efra_env_file}" +fi + +source_codex_home="${EFRA_CODEX_HOME:-/home/efra/.codex}" +default_auth="${EFRA_CODEX_AUTH_DEFAULT:-${source_codex_home}/auth.json}" +andrea_auth="${EFRA_CODEX_AUTH_ANDREA:-${source_codex_home}/auth-andrea.json}" + +[[ "${provider}" == "openai-codex" ]] || die "auth-sync supports only OAUTH_PROVIDER=openai-codex" +[[ -f "${default_auth}" ]] || die "Missing default Codex auth file: ${default_auth}" + +if [[ ! -f "${andrea_auth}" ]]; then + log "Andrea auth file not found (${andrea_auth}); falling back to default credential." + andrea_auth="${default_auth}" +fi + +log "Syncing Codex credentials from ${source_codex_home} into OpenClaw profiles: ${profiles_raw}" + +run_sudo env \ + PROFILES_RAW="${profiles_raw}" \ + DEFAULT_AUTH="${default_auth}" \ + ANDREA_AUTH="${andrea_auth}" \ + node - <<'NODE' +const fs = require("fs"); +const path = require("path"); +const cp = require("child_process"); + +function decodeJwtPayload(token) { + try { + const parts = String(token || "").split("."); + if (parts.length < 2) { + return null; + } + const b64 = parts[1].replace(/-/g, "+").replace(/_/g, "/"); + const pad = "=".repeat((4 - (b64.length % 4)) % 4); + return JSON.parse(Buffer.from(b64 + pad, "base64").toString("utf8")); + } catch { + return null; + } +} + +function readCredential(filePath) { + const raw = JSON.parse(fs.readFileSync(filePath, "utf8")); + const tokens = raw && raw.tokens ? raw.tokens : {}; + const access = tokens.access_token; + const refresh = tokens.refresh_token; + const accountId = tokens.account_id; + if (typeof access !== "string" || !access || typeof refresh !== "string" || !refresh) { + throw new Error(`Missing access/refresh token in ${filePath}`); + } + + const accessPayload = decodeJwtPayload(access) || {}; + const idPayload = decodeJwtPayload(tokens.id_token) || {}; + + const expSec = Number(accessPayload.exp); + const expires = Number.isFinite(expSec) && expSec > 0 ? expSec * 1000 : Date.now() + 60 * 60 * 1000; + const email = + typeof idPayload.email === "string" && idPayload.email.trim() ? idPayload.email.trim() : "default"; + + const credential = { + type: "oauth", + provider: "openai-codex", + access, + refresh, + expires, + }; + if (typeof accountId === "string" && accountId) { + credential.accountId = accountId; + } + if (email !== "default") { + credential.email = email; + } + + return { credential, email }; +} + +function ensureDir(dirPath, uid, gid) { + fs.mkdirSync(dirPath, { recursive: true, mode: 0o700 }); + fs.chownSync(dirPath, uid, gid); +} + +function ensureProfileSkeleton(profileDir, uid, gid) { + // Recursive mkdir can leave intermediate directories owned by root. + // Ensure profile roots are writable by openclaw before model configuration. + ensureDir(profileDir, uid, gid); + ensureDir(path.join(profileDir, "agents"), uid, gid); + ensureDir(path.join(profileDir, "agents", "main"), uid, gid); +} + +function loadStore(storePath) { + try { + const parsed = JSON.parse(fs.readFileSync(storePath, "utf8")); + if (!parsed || typeof parsed !== "object") { + return { version: 1, profiles: {} }; + } + if (!parsed.profiles || typeof parsed.profiles !== "object") { + parsed.profiles = {}; + } + if (typeof parsed.version !== "number") { + parsed.version = 1; + } + return parsed; + } catch { + return { version: 1, profiles: {} }; + } +} + +function writeStore(storePath, store, uid, gid) { + ensureDir(path.dirname(storePath), uid, gid); + fs.writeFileSync(storePath, `${JSON.stringify(store, null, 2)}\n`, { mode: 0o600 }); + fs.chownSync(storePath, uid, gid); + fs.chmodSync(storePath, 0o600); +} + +function resolveProfileDir(profileName) { + if (profileName === "default" || profileName === "main") { + return "/home/openclaw/.openclaw"; + } + return `/home/openclaw/.openclaw-${profileName}`; +} + +function collectAgentDirs(profileDir) { + const dirs = new Set([path.join(profileDir, "agents", "main", "agent")]); + const configPath = path.join(profileDir, "openclaw.json"); + + try { + const cfg = JSON.parse(fs.readFileSync(configPath, "utf8")); + const list = cfg && cfg.agents && Array.isArray(cfg.agents.list) ? cfg.agents.list : []; + for (const item of list) { + if (!item || typeof item !== "object") { + continue; + } + if (typeof item.agentDir === "string" && item.agentDir.trim()) { + dirs.add(item.agentDir.trim()); + continue; + } + const id = typeof item.id === "string" && item.id.trim() ? item.id.trim() : "main"; + dirs.add(path.join(profileDir, "agents", id, "agent")); + } + } catch { + // Keep default main agent dir. + } + + return Array.from(dirs); +} + +const profilesRaw = process.env.PROFILES_RAW || "dev-main andrea"; +const profiles = profilesRaw + .split(/\s+/) + .map((v) => v.trim()) + .filter(Boolean); + +if (profiles.length === 0) { + throw new Error("PROFILES_RAW resolved to an empty profile list."); +} + +const defaultAuth = process.env.DEFAULT_AUTH; +const andreaAuth = process.env.ANDREA_AUTH || defaultAuth; +if (!defaultAuth) { + throw new Error("DEFAULT_AUTH is required."); +} + +const defaultCred = readCredential(defaultAuth); +const andreaCred = readCredential(andreaAuth); + +const uid = Number(cp.execSync("id -u openclaw", { encoding: "utf8" }).trim()); +const gid = Number(cp.execSync("id -g openclaw", { encoding: "utf8" }).trim()); + +const codexDir = "/home/openclaw/.codex"; +ensureDir(codexDir, uid, gid); +for (const [src, name] of [ + [defaultAuth, "auth.json"], + [andreaAuth, "auth-andrea.json"], +]) { + fs.copyFileSync(src, path.join(codexDir, name)); + fs.chownSync(path.join(codexDir, name), uid, gid); + fs.chmodSync(path.join(codexDir, name), 0o600); +} + +let stores = 0; +for (const profile of profiles) { + const profileDir = resolveProfileDir(profile); + const selected = profile === "andrea" ? andreaCred : defaultCred; + ensureProfileSkeleton(profileDir, uid, gid); + const agentDirs = collectAgentDirs(profileDir); + + for (const agentDir of agentDirs) { + ensureDir(agentDir, uid, gid); + const storePath = path.join(agentDir, "auth-profiles.json"); + const store = loadStore(storePath); + store.version = 1; + if (!store.profiles || typeof store.profiles !== "object") { + store.profiles = {}; + } + + store.profiles["openai-codex:default"] = selected.credential; + if (!store.order || typeof store.order !== "object") { + store.order = {}; + } + + if (selected.email && selected.email !== "default") { + const emailProfile = `openai-codex:${selected.email}`; + store.profiles[emailProfile] = selected.credential; + store.order["openai-codex"] = [emailProfile, "openai-codex:default"]; + } else { + store.order["openai-codex"] = ["openai-codex:default"]; + } + + writeStore(storePath, store, uid, gid); + stores += 1; + } + + console.log(`Synced auth-profiles for profile=${profile} agentDirs=${agentDirs.length}`); +} + +console.log(`SYNC_OK profiles=${profiles.length} stores=${stores}`); +NODE + +for profile in ${profiles_raw}; do + log "Configuring default model for profile=${profile} -> ${model_ref}" + profile_env="/etc/openclaw/secrets/${profile}.env" + run_sudo -u openclaw -H bash -lc \ + "set -euo pipefail; \ + export HOME=/home/openclaw; \ + export OPENCLAW_BUNDLED_PLUGINS_DIR=/home/openclaw/.openclaw/bundled-extensions; \ + if [[ -f '${profile_env}' ]]; then set -a; source '${profile_env}'; set +a; fi; \ + /home/openclaw/.local/bin/openclaw --profile '${profile}' models set '${model_ref}' >/dev/null; \ + /home/openclaw/.local/bin/openclaw --profile '${profile}' models status --plain" +done + +log "Credential sync completed for profiles: ${profiles_raw}" diff --git a/ops/backup.sh b/ops/backup.sh new file mode 100755 index 0000000..c3522fa --- /dev/null +++ b/ops/backup.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=ops/common.sh +source "${SCRIPT_DIR}/common.sh" + +need_cmd tar +need_cmd date + +backup_root="${BACKUP_DIR:-${ROOT_DIR}/backups}" +timestamp="$(date +%Y%m%d-%H%M%S)" +archive_path="${backup_root}/openclaw-backup-${timestamp}.tar.gz" + +mkdir -p "${backup_root}" + +candidates=( + "${ROOT_DIR}/inventories/dev/group_vars/all.yml" + "${ROOT_DIR}/inventories/dev/group_vars/vault.yml" + "/etc/openclaw" + "/opt/openclaw/control-plane" + "/home/efra/openclaw-control-plane" + "/home/openclaw/.openclaw" + "/home/openclaw/.openclaw-dev-main" + "/home/openclaw/.openclaw-andrea" + "/home/efra/.openclaw" + "/home/efra/.openclaw-dev-main" + "/home/efra/.openclaw-andrea" + "/home/openclaw/.config/systemd/user/openclaw-gateway-dev-main.service" + "/home/openclaw/.config/systemd/user/openclaw-gateway-andrea.service" + "/home/efra/.config/systemd/user/openclaw-gateway-dev-main.service" + "/home/efra/.config/systemd/user/openclaw-gateway-andrea.service" +) + +existing=() +for path in "${candidates[@]}"; do + if run_sudo test -e "${path}"; then + existing+=("${path}") + fi +done + +(( ${#existing[@]} > 0 )) || die "No known OpenClaw paths found to backup." + +log "Creating backup archive: ${archive_path}" +run_sudo tar -czf "${archive_path}" "${existing[@]}" + +if [[ "$(id -u)" -ne 0 ]]; then + run_sudo chown "$(id -u):$(id -g)" "${archive_path}" || true +fi + +log "Backup completed with ${#existing[@]} paths." +log "Archive ready: ${archive_path}" diff --git a/ops/cloudflare-reconcile.sh b/ops/cloudflare-reconcile.sh new file mode 100755 index 0000000..591a7b0 --- /dev/null +++ b/ops/cloudflare-reconcile.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=ops/common.sh +source "${SCRIPT_DIR}/common.sh" + +ansible_bin="$(resolve_ansible_bin)" +inventory_file="$(resolve_inventory)" +limit_host="$(resolve_limit)" + +need_cmd "${ansible_bin}" +[[ -f "${inventory_file}" ]] || die "Inventory not found: ${inventory_file}" + +extra_args=() +if [[ -n "${ANSIBLE_EXTRA_ARGS:-}" ]]; then + # shellcheck disable=SC2206 + extra_args=( ${ANSIBLE_EXTRA_ARGS} ) +fi + +log "Reconciling Cloudflare tunnel (inventory=${inventory_file}, limit=${limit_host})." +"${ansible_bin}" \ + -i "${inventory_file}" \ + "${ROOT_DIR}/playbooks/cloudflare-only.yml" \ + -l "${limit_host}" \ + --become \ + "${extra_args[@]}" + +log "Cloudflare reconcile completed." diff --git a/ops/common.sh b/ops/common.sh new file mode 100755 index 0000000..b430ab0 --- /dev/null +++ b/ops/common.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +log() { + printf '[ops] %s\n' "$*" +} + +die() { + printf '[ops] ERROR: %s\n' "$*" >&2 + exit 1 +} + +need_cmd() { + local cmd="$1" + command -v "$cmd" >/dev/null 2>&1 || die "Missing command: $cmd" +} + +run_sudo() { + if [[ "$(id -u)" -eq 0 ]]; then + "$@" + else + sudo "$@" + fi +} + +resolve_inventory() { + local env_name="${ENV:-dev}" + printf '%s' "${INVENTORY:-${ROOT_DIR}/inventories/${env_name}/hosts.yml}" +} + +resolve_limit() { + printf '%s' "${LIMIT:-zennook}" +} + +resolve_ansible_bin() { + printf '%s' "${ANSIBLE_PLAYBOOK_BIN:-ansible-playbook}" +} diff --git a/ops/install.sh b/ops/install.sh new file mode 100755 index 0000000..a07e1f9 --- /dev/null +++ b/ops/install.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=ops/common.sh +source "${SCRIPT_DIR}/common.sh" + +ansible_bin="$(resolve_ansible_bin)" +inventory_file="$(resolve_inventory)" +limit_host="$(resolve_limit)" +inventory_dir="$(cd "$(dirname "${inventory_file}")" && pwd)" +vault_file="${VAULT_FILE:-${inventory_dir}/group_vars/vault.yml}" + +need_cmd "${ansible_bin}" + +[[ -f "${inventory_file}" ]] || die "Inventory not found: ${inventory_file}" + +log "Validating required secrets before install." +"${SCRIPT_DIR}/validate-secrets.sh" + +extra_args=() +if [[ -f "${vault_file}" ]]; then + extra_args+=( -e "@${vault_file}" ) + log "Including vault variables file: ${vault_file}" +fi + +if [[ -n "${ANSIBLE_EXTRA_ARGS:-}" ]]; then + # shellcheck disable=SC2206 + extra_args+=( ${ANSIBLE_EXTRA_ARGS} ) +fi + +log "Running enterprise install (inventory=${inventory_file}, limit=${limit_host})." +"${ansible_bin}" \ + -i "${inventory_file}" \ + "${ROOT_DIR}/playbooks/enterprise.yml" \ + -l "${limit_host}" \ + --become \ + -e openclaw_control_plane_enabled=true \ + -e openclaw_control_plane_manage_stack=true \ + "${extra_args[@]}" + +log "Running control-plane reconciliation playbook." +"${ansible_bin}" \ + -i "${inventory_file}" \ + "${ROOT_DIR}/playbooks/control-plane-only.yml" \ + -l "${limit_host}" \ + --become \ + -e openclaw_control_plane_enabled=true \ + -e openclaw_control_plane_manage_stack=true \ + "${extra_args[@]}" + +log "Install completed." diff --git a/ops/oauth-login.sh b/ops/oauth-login.sh new file mode 100755 index 0000000..b504df9 --- /dev/null +++ b/ops/oauth-login.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=ops/common.sh +source "${SCRIPT_DIR}/common.sh" + +log "make oauth-login now delegates to non-interactive credential sync (auth-sync)." +exec "${SCRIPT_DIR}/auth-sync.sh" diff --git a/ops/purge.sh b/ops/purge.sh new file mode 100755 index 0000000..9528b10 --- /dev/null +++ b/ops/purge.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=ops/common.sh +source "${SCRIPT_DIR}/common.sh" + +need_cmd docker + +if [[ "${1:-}" != "--yes" ]]; then + die "This command is destructive. Re-run with: ./ops/purge.sh --yes" +fi + +log "Stopping/removing control-plane compose stacks (efra-core, andrea)." +for profile in efra-core andrea; do + compose_file="/home/efra/openclaw-control-plane/${profile}/docker-compose.yml" + project_name="ocp-${profile}" + if run_sudo test -f "${compose_file}"; then + run_sudo docker compose -f "${compose_file}" -p "${project_name}" down --remove-orphans --volumes || true + fi +done + +log "Stopping known OpenClaw gateway services (best effort)." +for user_name in openclaw efra; do + if id "${user_name}" >/dev/null 2>&1; then + run_sudo -u "${user_name}" bash -lc \ + "systemctl --user stop openclaw-gateway-dev-main.service openclaw-gateway-andrea.service >/dev/null 2>&1 || true" + fi +done + +run_sudo pkill -f "openclaw-gateway" || true + +purge_paths=( + "/opt/openclaw/control-plane" + "/home/efra/openclaw-control-plane" + "/home/openclaw/.openclaw" + "/home/openclaw/.openclaw-dev-main" + "/home/openclaw/.openclaw-andrea" + "/home/efra/.openclaw" + "/home/efra/.openclaw-dev-main" + "/home/efra/.openclaw-andrea" +) + +log "Removing OpenClaw runtime/state directories." +for path in "${purge_paths[@]}"; do + if run_sudo test -e "${path}"; then + run_sudo rm -rf "${path}" + log "Removed: ${path}" + fi +done + +log "Purge complete." +log "Note: /etc/openclaw was intentionally preserved (secrets/config bootstrap)." diff --git a/ops/secrets-refactor.sh b/ops/secrets-refactor.sh new file mode 100755 index 0000000..201a52e --- /dev/null +++ b/ops/secrets-refactor.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=ops/common.sh +source "${SCRIPT_DIR}/common.sh" + +need_cmd awk +need_cmd sed +need_cmd tr + +inventory_file="$(resolve_inventory)" +inventory_dir="$(cd "$(dirname "${inventory_file}")" && pwd)" +vault_file="${VAULT_FILE:-${inventory_dir}/group_vars/vault.yml}" +manual_file="${inventory_dir}/group_vars/vault.manual.refactor.yml" + +extract_env_value() { + local file="$1" + local key="$2" + if run_sudo test -f "${file}"; then + run_sudo awk -F= -v key="$key" '$1 == key {print $2; exit}' "${file}" | tr -d '\r' || true + fi +} + +current_or_default() { + local key="$1" + local fallback="$2" + if [[ -f "${vault_file}" ]]; then + local current + current="$(awk -F': *' -v key="$key" '$1 == key {sub(/^["'"'"']/, "", $2); sub(/["'"'"']$/, "", $2); print $2; exit}' "${vault_file}")" + if [[ -n "${current}" ]]; then + printf '%s' "${current}" + return 0 + fi + fi + printf '%s' "${fallback}" +} + +dev_main_gateway_token="$(extract_env_value /etc/openclaw/secrets/dev-main.env OPENCLAW_GATEWAY_TOKEN)" +andrea_gateway_token="$(extract_env_value /etc/openclaw/secrets/andrea.env OPENCLAW_GATEWAY_TOKEN)" + +efra_cp_postgres="$(extract_env_value /home/efra/openclaw-control-plane/efra-core/.env POSTGRES_PASSWORD)" +efra_cp_nats="$(extract_env_value /home/efra/openclaw-control-plane/efra-core/.env NATS_PASSWORD)" +efra_tg_token="$(extract_env_value /home/efra/openclaw-control-plane/efra-core/.env TELEGRAM_BOT_TOKEN)" +efra_tg_chat="$(extract_env_value /home/efra/openclaw-control-plane/efra-core/.env TELEGRAM_DEFAULT_CHAT_ID)" + +andrea_cp_postgres="$(extract_env_value /home/efra/openclaw-control-plane/andrea/.env POSTGRES_PASSWORD)" +andrea_cp_nats="$(extract_env_value /home/efra/openclaw-control-plane/andrea/.env NATS_PASSWORD)" +andrea_tg_token="$(extract_env_value /home/efra/openclaw-control-plane/andrea/.env TELEGRAM_BOT_TOKEN)" +andrea_tg_chat="$(extract_env_value /home/efra/openclaw-control-plane/andrea/.env TELEGRAM_DEFAULT_CHAT_ID)" + +mkdir -p "${inventory_dir}/group_vars" +umask 077 +cat > "${manual_file}" </dev/null + log "Health OK: ${url}" +} + +simulate_and_assert() { + local ingress_port="$1" + local control_port="$2" + local profile_label="$3" + local payload resp task_id task_json status try + + payload=$(cat </dev/null || true)" + if [[ -n "${task_json}" ]]; then + status="$(printf '%s' "${task_json}" | sed -n 's/.*"status":"\([^"]*\)".*/\1/p')" + if [[ -n "${status}" && "${status}" != "PENDING" && "${status}" != "QUEUED" && "${status}" != "RUNNING" ]]; then + break + fi + fi + sleep 1 + done + + [[ -n "${status}" ]] || die "Task ${task_id} did not become visible in control API (${profile_label})." + if [[ "${status}" == "PENDING" || "${status}" == "QUEUED" || "${status}" == "RUNNING" ]]; then + die "Task ${task_id} did not reach terminal status in time (${profile_label}). Last status=${status}" + fi + + log "Queue flow OK (${profile_label}) taskId=${task_id} status=${status}" +} + +log "Checking docker compose stack status." +run_sudo docker compose -f /home/efra/openclaw-control-plane/efra-core/docker-compose.yml -p ocp-efra-core ps >/dev/null +run_sudo docker compose -f /home/efra/openclaw-control-plane/andrea/docker-compose.yml -p ocp-andrea ps >/dev/null + +check_url "http://127.0.0.1:39101/health" +check_url "http://127.0.0.1:30101/health" +check_url "http://127.0.0.1:39111/health" +check_url "http://127.0.0.1:30111/health" + +simulate_and_assert 30101 39101 "efra-core" +simulate_and_assert 30111 39111 "andrea" + +log "Smoke checks completed successfully." diff --git a/ops/validate-secrets.sh b/ops/validate-secrets.sh new file mode 100755 index 0000000..289c29d --- /dev/null +++ b/ops/validate-secrets.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=ops/common.sh +source "${SCRIPT_DIR}/common.sh" + +inventory_file="$(resolve_inventory)" +inventory_dir="$(cd "$(dirname "${inventory_file}")" && pwd)" +vault_file="${VAULT_FILE:-${inventory_dir}/group_vars/vault.yml}" + +[[ -f "${vault_file}" ]] || die "Missing vault file: ${vault_file}" + +read_yaml_value() { + local key="$1" + awk -F': *' -v key="$key" '$1 == key {sub(/^["'"'"']/, "", $2); sub(/["'"'"']$/, "", $2); print $2; exit}' "${vault_file}" +} + +is_placeholder() { + local v="${1:-}" + [[ -z "${v}" ]] && return 0 + [[ "${v}" =~ replace-with|replace-me|changeme|example|dummy|temp-token|temp-key ]] +} + +required_keys=( + "vault_openclaw_gateway_token_dev_main" + "vault_openclaw_gateway_token_andrea" + "vault_openclaw_cp_postgres_password_efra_core" + "vault_openclaw_cp_nats_password_efra_core" + "vault_openclaw_cp_postgres_password_andrea" + "vault_openclaw_cp_nats_password_andrea" +) + +missing=() +for key in "${required_keys[@]}"; do + value="$(read_yaml_value "${key}")" + if is_placeholder "${value}"; then + missing+=("${key}") + fi +done + +if (( ${#missing[@]} > 0 )); then + printf '[ops] ERROR: vault secrets missing or placeholders detected:\n' >&2 + for key in "${missing[@]}"; do + printf ' - %s\n' "${key}" >&2 + done + printf '[ops] Run: make secrets-refactor\n' >&2 + exit 1 +fi + +log "Vault secret validation passed: ${vault_file}" diff --git a/playbooks/cloudflare-only.yml b/playbooks/cloudflare-only.yml new file mode 100644 index 0000000..a26c168 --- /dev/null +++ b/playbooks/cloudflare-only.yml @@ -0,0 +1,9 @@ +--- +- name: Reconcile Cloudflare tunnel exposure + hosts: openclaw_gateway + become: true + vars: + ansible_python_interpreter: /usr/bin/python3 + roles: + - role: openclaw_cloudflare_tunnel + when: openclaw_cloudflare_tunnel_enabled | bool diff --git a/playbooks/control-plane-only.yml b/playbooks/control-plane-only.yml new file mode 100644 index 0000000..5d5a642 --- /dev/null +++ b/playbooks/control-plane-only.yml @@ -0,0 +1,9 @@ +--- +- name: Deploy only OpenClaw control-plane stacks + hosts: openclaw_gateway + become: false + vars: + ansible_python_interpreter: /usr/bin/python3 + roles: + - role: openclaw_control_plane + when: openclaw_control_plane_enabled | bool diff --git a/playbooks/enterprise.yml b/playbooks/enterprise.yml new file mode 100644 index 0000000..2e0033e --- /dev/null +++ b/playbooks/enterprise.yml @@ -0,0 +1,36 @@ +--- +- name: Deploy OpenClaw enterprise topology + hosts: openclaw_gateway + become: true + strategy: linear + serial: "{{ openclaw_rollout_serial | default(1) }}" + ignore_unreachable: "{{ openclaw_ignore_unreachable | default(true) }}" + any_errors_fatal: false + max_fail_percentage: "{{ openclaw_max_fail_percentage | default(100) }}" + + vars: + ansible_python_interpreter: /usr/bin/python3 + + pre_tasks: + - name: Report unsupported OS hosts (skipping deployment roles) + ansible.builtin.debug: + msg: >- + Skipping OpenClaw roles on {{ inventory_hostname }} ({{ ansible_distribution }} + {{ ansible_distribution_version }}): supported distributions are Debian/Ubuntu/Fedora. + when: ansible_distribution not in ['Debian', 'Ubuntu', 'Fedora'] + + roles: + - role: openclaw + when: ansible_distribution in ['Debian', 'Ubuntu', 'Fedora'] + - role: openclaw_enterprise + when: + - openclaw_enterprise_enabled | bool + - ansible_distribution in ['Debian', 'Ubuntu', 'Fedora'] + - role: openclaw_control_plane + when: + - openclaw_control_plane_enabled | bool + - ansible_distribution in ['Debian', 'Ubuntu', 'Fedora'] + - role: openclaw_cloudflare_tunnel + when: + - openclaw_cloudflare_tunnel_enabled | bool + - ansible_distribution in ['Debian', 'Ubuntu', 'Fedora'] diff --git a/roles/openclaw/tasks/docker-redhat.yml b/roles/openclaw/tasks/docker-redhat.yml new file mode 100644 index 0000000..ca11dda --- /dev/null +++ b/roles/openclaw/tasks/docker-redhat.yml @@ -0,0 +1,50 @@ +--- +# RedHat/Fedora-specific Docker installation (dnf-based) + +- name: Install required system packages for Docker (RedHat/Fedora) + ansible.builtin.dnf: + name: + - dnf-plugins-core + - ca-certificates + - curl + state: present + update_cache: true + +- name: Add Docker repository (RedHat/Fedora) + ansible.builtin.get_url: + url: "https://download.docker.com/linux/fedora/docker-ce.repo" + dest: /etc/yum.repos.d/docker-ce.repo + owner: root + group: root + mode: '0644' + +- name: Install Docker CE (RedHat/Fedora) + ansible.builtin.dnf: + name: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin + state: present + update_cache: true + +- name: Ensure docker group exists + ansible.builtin.group: + name: docker + state: present + +- name: Ensure Docker service is started and enabled + ansible.builtin.systemd: + name: docker + state: started + enabled: true + +- name: Add user to docker group + ansible.builtin.user: + name: "{{ openclaw_user }}" + groups: docker + append: true + +- name: Reset SSH connection to apply docker group + ansible.builtin.meta: reset_connection diff --git a/roles/openclaw/tasks/firewall-redhat.yml b/roles/openclaw/tasks/firewall-redhat.yml new file mode 100644 index 0000000..64cd7e9 --- /dev/null +++ b/roles/openclaw/tasks/firewall-redhat.yml @@ -0,0 +1,64 @@ +--- +# RedHat/Fedora-specific firewall and security hardening (firewalld) + +- name: Install security packages (RedHat/Fedora) + ansible.builtin.dnf: + name: + - fail2ban + - firewalld + - dnf-automatic + state: present + update_cache: true + +- name: Configure fail2ban for SSH protection + ansible.builtin.copy: + dest: /etc/fail2ban/jail.local + owner: root + group: root + mode: '0644' + content: | + # OpenClaw security hardening - SSH protection + [DEFAULT] + bantime = 3600 + findtime = 600 + maxretry = 5 + backend = systemd + + [sshd] + enabled = true + port = ssh + filter = sshd + notify: Restart fail2ban + +- name: Enable and start fail2ban + ansible.builtin.systemd: + name: fail2ban + state: started + enabled: true + +- name: Enable and start firewalld + ansible.builtin.systemd: + name: firewalld + state: started + enabled: true + +- name: Allow SSH service in firewalld + ansible.posix.firewalld: + service: ssh + state: enabled + permanent: true + immediate: true + +- name: Allow Tailscale UDP port 41641 in firewalld + ansible.posix.firewalld: + port: 41641/udp + state: enabled + permanent: true + immediate: true + when: tailscale_enabled | bool + +- name: Enable automatic update timer (dnf-automatic) + ansible.builtin.systemd: + name: dnf-automatic.timer + state: started + enabled: true diff --git a/roles/openclaw/tasks/main.yml b/roles/openclaw/tasks/main.yml index 81a5dd9..bee5b32 100644 --- a/roles/openclaw/tasks/main.yml +++ b/roles/openclaw/tasks/main.yml @@ -1,21 +1,53 @@ --- +- name: Validate supported OS family for openclaw role + ansible.builtin.assert: + that: + - ansible_os_family in ['Debian', 'RedHat'] + fail_msg: >- + Unsupported OS family '{{ ansible_os_family }}' on {{ inventory_hostname }}. + Supported families: Debian, RedHat. + - name: Include system tools installation tasks ansible.builtin.include_tasks: system-tools.yml -- name: Include Tailscale installation tasks +- name: Include Tailscale installation tasks (Debian/Ubuntu) ansible.builtin.include_tasks: tailscale-linux.yml - when: tailscale_enabled | bool + when: + - tailscale_enabled | bool + - ansible_os_family == 'Debian' + +- name: Include Tailscale installation tasks (RedHat/Fedora) + ansible.builtin.include_tasks: tailscale-redhat.yml + when: + - tailscale_enabled | bool + - ansible_os_family == 'RedHat' - name: Include user creation tasks ansible.builtin.include_tasks: user.yml -- name: Include Docker installation tasks +- name: Include Docker installation tasks (Debian/Ubuntu) ansible.builtin.include_tasks: docker-linux.yml - when: not ci_test + when: + - not ci_test + - ansible_os_family == 'Debian' -- name: Include firewall configuration tasks +- name: Include Docker installation tasks (RedHat/Fedora) + ansible.builtin.include_tasks: docker-redhat.yml + when: + - not ci_test + - ansible_os_family == 'RedHat' + +- name: Include firewall configuration tasks (Debian/Ubuntu) ansible.builtin.include_tasks: firewall-linux.yml - when: not ci_test + when: + - not ci_test + - ansible_os_family == 'Debian' + +- name: Include firewall configuration tasks (RedHat/Fedora) + ansible.builtin.include_tasks: firewall-redhat.yml + when: + - not ci_test + - ansible_os_family == 'RedHat' - name: Include Node.js installation tasks ansible.builtin.include_tasks: nodejs.yml diff --git a/roles/openclaw/tasks/nodejs-debian.yml b/roles/openclaw/tasks/nodejs-debian.yml new file mode 100644 index 0000000..94a4a5b --- /dev/null +++ b/roles/openclaw/tasks/nodejs-debian.yml @@ -0,0 +1,71 @@ +--- +- name: Install required packages for Node.js + ansible.builtin.apt: + name: + - ca-certificates + - curl + - gnupg + state: present + +- name: Create directory for NodeSource GPG key + ansible.builtin.file: + path: /etc/apt/keyrings + state: directory + mode: '0755' + +- name: Add NodeSource GPG key + ansible.builtin.shell: + cmd: | + set -o pipefail + curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | \ + gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg + chmod a+r /etc/apt/keyrings/nodesource.gpg + creates: /etc/apt/keyrings/nodesource.gpg + executable: /bin/bash + +- name: Remove legacy NodeSource deb822 source file + ansible.builtin.file: + path: /etc/apt/sources.list.d/nodesource.sources + state: absent + +- name: Add NodeSource repository + ansible.builtin.copy: + dest: /etc/apt/sources.list.d/nodesource.list + mode: '0644' + content: | + deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_{{ nodejs_version }} nodistro main + +- name: Update apt cache after adding NodeSource repo + ansible.builtin.apt: + update_cache: true + +- name: Install Node.js + ansible.builtin.apt: + name: nodejs + state: present + +- name: Check if pnpm is already installed + ansible.builtin.command: pnpm --version + register: pnpm_check + failed_when: false + changed_when: false + +- name: Install pnpm globally + ansible.builtin.command: npm install -g pnpm + when: pnpm_check.rc != 0 + +- name: Verify Node.js installation + ansible.builtin.command: node --version + register: node_version + changed_when: false + +- name: Verify pnpm installation + ansible.builtin.command: pnpm --version + register: pnpm_version + changed_when: false + +- name: Display Node.js and pnpm versions + ansible.builtin.debug: + msg: + - "Node.js version: {{ node_version.stdout }}" + - "pnpm version: {{ pnpm_version.stdout }}" diff --git a/roles/openclaw/tasks/nodejs-redhat.yml b/roles/openclaw/tasks/nodejs-redhat.yml new file mode 100644 index 0000000..7c6304a --- /dev/null +++ b/roles/openclaw/tasks/nodejs-redhat.yml @@ -0,0 +1,49 @@ +--- +- name: Install required packages for Node.js (RedHat/Fedora) + ansible.builtin.dnf: + name: + - ca-certificates + - curl + - gnupg2 + state: present + update_cache: true + +- name: Add NodeSource repository (RedHat/Fedora) + ansible.builtin.shell: + cmd: | + set -o pipefail + curl -fsSL https://rpm.nodesource.com/setup_{{ nodejs_version }} | bash - + creates: /etc/yum.repos.d/nodesource-nodejs.repo + executable: /bin/bash + +- name: Install Node.js (RedHat/Fedora) + ansible.builtin.dnf: + name: nodejs + state: present + update_cache: true + +- name: Check if pnpm is already installed + ansible.builtin.command: pnpm --version + register: pnpm_check + failed_when: false + changed_when: false + +- name: Install pnpm globally + ansible.builtin.command: npm install -g pnpm + when: pnpm_check.rc != 0 + +- name: Verify Node.js installation + ansible.builtin.command: node --version + register: node_version + changed_when: false + +- name: Verify pnpm installation + ansible.builtin.command: pnpm --version + register: pnpm_version + changed_when: false + +- name: Display Node.js and pnpm versions + ansible.builtin.debug: + msg: + - "Node.js version: {{ node_version.stdout }}" + - "pnpm version: {{ pnpm_version.stdout }}" diff --git a/roles/openclaw/tasks/nodejs.yml b/roles/openclaw/tasks/nodejs.yml index 8c1ecf4..1f58d1c 100644 --- a/roles/openclaw/tasks/nodejs.yml +++ b/roles/openclaw/tasks/nodejs.yml @@ -1,69 +1,8 @@ --- -- name: Install required packages for Node.js - ansible.builtin.apt: - name: - - ca-certificates - - curl - - gnupg - state: present +- name: Include Node.js tasks for Debian/Ubuntu + ansible.builtin.include_tasks: nodejs-debian.yml + when: ansible_os_family == 'Debian' -- name: Create directory for NodeSource GPG key - ansible.builtin.file: - path: /etc/apt/keyrings - state: directory - mode: '0755' - -- name: Add NodeSource GPG key - ansible.builtin.shell: - cmd: | - set -o pipefail - curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | \ - gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg - chmod a+r /etc/apt/keyrings/nodesource.gpg - creates: /etc/apt/keyrings/nodesource.gpg - executable: /bin/bash - -- name: Add NodeSource repository - ansible.builtin.shell: - cmd: | - set -o pipefail - echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] \ - https://deb.nodesource.com/node_{{ nodejs_version }} nodistro main" | \ - tee /etc/apt/sources.list.d/nodesource.list > /dev/null - creates: /etc/apt/sources.list.d/nodesource.list - executable: /bin/bash - -- name: Update apt cache after adding NodeSource repo - ansible.builtin.apt: - update_cache: true - -- name: Install Node.js - ansible.builtin.apt: - name: nodejs - state: present - -- name: Check if pnpm is already installed - ansible.builtin.command: pnpm --version - register: pnpm_check - failed_when: false - changed_when: false - -- name: Install pnpm globally - ansible.builtin.command: npm install -g pnpm - when: pnpm_check.rc != 0 - -- name: Verify Node.js installation - ansible.builtin.command: node --version - register: node_version - changed_when: false - -- name: Verify pnpm installation - ansible.builtin.command: pnpm --version - register: pnpm_version - changed_when: false - -- name: Display Node.js and pnpm versions - ansible.builtin.debug: - msg: - - "Node.js version: {{ node_version.stdout }}" - - "pnpm version: {{ pnpm_version.stdout }}" +- name: Include Node.js tasks for RedHat/Fedora + ansible.builtin.include_tasks: nodejs-redhat.yml + when: ansible_os_family == 'RedHat' diff --git a/roles/openclaw/tasks/openclaw.yml b/roles/openclaw/tasks/openclaw.yml index 4f1a361..2cec52b 100644 --- a/roles/openclaw/tasks/openclaw.yml +++ b/roles/openclaw/tasks/openclaw.yml @@ -44,14 +44,14 @@ - "{{ openclaw_home }}/.local/share/pnpm/store" - "{{ openclaw_home }}/.local/bin" -- name: Ensure pnpm directories have correct ownership +- name: Ensure pnpm directory tree ownership is correct ansible.builtin.file: path: "{{ openclaw_home }}/.local/share/pnpm" state: directory owner: "{{ openclaw_user }}" group: "{{ openclaw_user }}" recurse: true - mode: '0755' + when: not (ci_test | default(false) | bool) - name: Configure pnpm for openclaw user ansible.builtin.shell: diff --git a/roles/openclaw/tasks/system-tools-linux.yml b/roles/openclaw/tasks/system-tools-linux.yml index 712952f..74f5838 100644 --- a/roles/openclaw/tasks/system-tools-linux.yml +++ b/roles/openclaw/tasks/system-tools-linux.yml @@ -1,6 +1,11 @@ --- # Linux-specific system tools installation (apt-based) +- name: Remove legacy NodeSource deb822 source file before apt operations + ansible.builtin.file: + path: /etc/apt/sources.list.d/nodesource.sources + state: absent + - name: Install essential system tools (Linux - apt) ansible.builtin.apt: name: diff --git a/roles/openclaw/tasks/system-tools-redhat.yml b/roles/openclaw/tasks/system-tools-redhat.yml new file mode 100644 index 0000000..3dddca1 --- /dev/null +++ b/roles/openclaw/tasks/system-tools-redhat.yml @@ -0,0 +1,55 @@ +--- +# RedHat/Fedora-specific system tools installation (dnf-based) + +- name: Install essential system tools (RedHat/Fedora - dnf) + ansible.builtin.dnf: + name: + # Editors + - vim-enhanced + - nano + # Version control + - git + - git-lfs + # Network tools + - curl + - wget + - nmap-ncat + - net-tools + - bind-utils + - iputils + - traceroute + - tcpdump + - nmap + - socat + - telnet + # Debugging tools + - strace + - lsof + - gdb + - htop + - iotop + - iftop + - sysstat + - procps-ng + # System utilities + - tmux + - tree + - jq + - unzip + - rsync + - less + # Build essentials for development + - gcc + - gcc-c++ + - make + - file + state: present + update_cache: true + +- name: Deploy global vim configuration (RedHat/Fedora) + ansible.builtin.template: + src: vimrc.j2 + dest: /etc/vimrc.local + owner: root + group: root + mode: '0644' diff --git a/roles/openclaw/tasks/system-tools.yml b/roles/openclaw/tasks/system-tools.yml index d4e0695..13f1e9a 100644 --- a/roles/openclaw/tasks/system-tools.yml +++ b/roles/openclaw/tasks/system-tools.yml @@ -1,8 +1,13 @@ --- -# Main system tools orchestration - Linux only +# Main system tools orchestration by OS family -- name: Include Linux system tools installation +- name: Include Debian/Ubuntu system tools installation ansible.builtin.include_tasks: system-tools-linux.yml + when: ansible_os_family == 'Debian' + +- name: Include RedHat/Fedora system tools installation + ansible.builtin.include_tasks: system-tools-redhat.yml + when: ansible_os_family == 'RedHat' # Common tasks for all operating systems diff --git a/roles/openclaw/tasks/tailscale-redhat.yml b/roles/openclaw/tasks/tailscale-redhat.yml new file mode 100644 index 0000000..c42d305 --- /dev/null +++ b/roles/openclaw/tasks/tailscale-redhat.yml @@ -0,0 +1,44 @@ +--- +# RedHat/Fedora-specific Tailscale installation + +- name: Add Tailscale repository (RedHat/Fedora) + ansible.builtin.get_url: + url: https://pkgs.tailscale.com/stable/fedora/tailscale.repo + dest: /etc/yum.repos.d/tailscale.repo + owner: root + group: root + mode: '0644' + +- name: Install Tailscale + ansible.builtin.dnf: + name: tailscale + state: present + update_cache: true + +- name: Enable Tailscale service (RedHat/Fedora) + ansible.builtin.systemd: + name: tailscaled + enabled: true + state: started + +- name: Check if Tailscale is already connected (RedHat/Fedora) + ansible.builtin.command: tailscale status --json + register: tailscale_status_redhat + changed_when: false + failed_when: false + +- name: Display Tailscale auth URL if not connected (RedHat/Fedora) + ansible.builtin.debug: + msg: + - "============================================" + - "Tailscale installed but not connected yet" + - "============================================" + - "" + - "To connect this machine to your Tailnet:" + - "Run: sudo tailscale up" + - "" + - "For unattended installation, use an auth key:" + - "sudo tailscale up --authkey tskey-auth-xxxxx" + - "" + - "Get auth key from: https://login.tailscale.com/admin/settings/keys" + when: tailscale_status_redhat.rc != 0 diff --git a/roles/openclaw/tasks/user.yml b/roles/openclaw/tasks/user.yml index ef669e4..c2f618d 100644 --- a/roles/openclaw/tasks/user.yml +++ b/roles/openclaw/tasks/user.yml @@ -112,17 +112,17 @@ ansible.builtin.command: "id -u {{ openclaw_user }}" register: openclaw_uid changed_when: false - when: ansible_os_family == 'Debian' and not ci_test + when: ansible_system == 'Linux' and not ci_test - name: Display openclaw user ID ansible.builtin.debug: msg: "OpenClaw user ID: {{ openclaw_uid.stdout }}" - when: ansible_os_family == 'Debian' and not ci_test + when: ansible_system == 'Linux' and not ci_test - name: Enable lingering for openclaw user (allows systemd user services without login) ansible.builtin.command: "loginctl enable-linger {{ openclaw_user }}" changed_when: false - when: ansible_os_family == 'Debian' and not ci_test + when: ansible_system == 'Linux' and not ci_test - name: Create runtime directory for openclaw user ansible.builtin.file: @@ -131,12 +131,12 @@ owner: "{{ openclaw_user }}" group: "{{ openclaw_user }}" mode: '0700' - when: ansible_os_family == 'Debian' and not ci_test + when: ansible_system == 'Linux' and not ci_test - name: Store openclaw UID as fact for later use ansible.builtin.set_fact: openclaw_uid_value: "{{ openclaw_uid.stdout }}" - when: ansible_os_family == 'Debian' and not ci_test + when: ansible_system == 'Linux' and not ci_test # SSH key configuration - name: Create .ssh directory for openclaw user @@ -174,7 +174,7 @@ owner: "{{ openclaw_user }}" group: "{{ openclaw_user }}" mode: '0644' - when: ansible_os_family == 'Debian' and not ci_test + when: ansible_system == 'Linux' and not ci_test - name: Set DBUS_SESSION_BUS_ADDRESS in .bashrc for openclaw user ansible.builtin.blockinfile: @@ -189,4 +189,4 @@ owner: "{{ openclaw_user }}" group: "{{ openclaw_user }}" mode: '0644' - when: ansible_os_family == 'Debian' and not ci_test + when: ansible_system == 'Linux' and not ci_test diff --git a/roles/openclaw_cloudflare_tunnel/defaults/main.yml b/roles/openclaw_cloudflare_tunnel/defaults/main.yml new file mode 100644 index 0000000..a7af97e --- /dev/null +++ b/roles/openclaw_cloudflare_tunnel/defaults/main.yml @@ -0,0 +1,33 @@ +--- +openclaw_cloudflare_tunnel_enabled: false +openclaw_cloudflare_tunnel_require_supported_os: true + +openclaw_cloudflare_tunnel_name: "openclaw-{{ inventory_hostname }}" +openclaw_cloudflare_tunnel_service_name: "cloudflared-{{ openclaw_cloudflare_tunnel_name }}" + +openclaw_cloudflare_tunnel_run_user: "{{ ansible_user | default('openclaw') }}" +openclaw_cloudflare_tunnel_run_group: "{{ ansible_user | default('openclaw') }}" + +openclaw_cloudflare_tunnel_workdir: >- + /home/{{ openclaw_cloudflare_tunnel_run_user }}/.openclaw-cloudflare/{{ openclaw_cloudflare_tunnel_name }} +openclaw_cloudflare_tunnel_config_path: "{{ openclaw_cloudflare_tunnel_workdir }}/config.yml" +openclaw_cloudflare_tunnel_credentials_file: >- + {{ openclaw_cloudflare_tunnel_workdir }}/{{ openclaw_cloudflare_tunnel_id }}.json + +openclaw_cloudflare_tunnel_id: "" +openclaw_cloudflare_tunnel_credentials_json: "" +openclaw_cloudflare_tunnel_manage_credentials_file: true +openclaw_cloudflare_tunnel_metrics_port: 40500 + +# List of public hostnames to expose through this tunnel. +# Example: +# openclaw_cloudflare_tunnel_ingress: +# - hostname: ingress.example.com +# service: http://127.0.0.1:30101 +# - hostname: grafana.example.com +# service: http://127.0.0.1:31001 +openclaw_cloudflare_tunnel_ingress: [] + +# Optional one-time DNS reconcile. +openclaw_cloudflare_tunnel_manage_dns: false +openclaw_cloudflare_tunnel_dns_tunnel_name: "{{ openclaw_cloudflare_tunnel_name }}" diff --git a/roles/openclaw_cloudflare_tunnel/tasks/main.yml b/roles/openclaw_cloudflare_tunnel/tasks/main.yml new file mode 100644 index 0000000..300b787 --- /dev/null +++ b/roles/openclaw_cloudflare_tunnel/tasks/main.yml @@ -0,0 +1,192 @@ +--- +- name: Validate OS support for Cloudflare Tunnel role + ansible.builtin.assert: + that: + - ansible_distribution in ['Debian', 'Ubuntu'] + fail_msg: >- + openclaw_cloudflare_tunnel currently supports Debian/Ubuntu hosts only. + Set openclaw_cloudflare_tunnel_enabled=false on this host or extend the role for this OS. + when: + - openclaw_cloudflare_tunnel_enabled | bool + - openclaw_cloudflare_tunnel_require_supported_os | bool + +- name: Validate required Cloudflare Tunnel settings + ansible.builtin.assert: + that: + - openclaw_cloudflare_tunnel_name | length > 0 + - openclaw_cloudflare_tunnel_id | length > 0 + - openclaw_cloudflare_tunnel_ingress | length > 0 + fail_msg: >- + Set openclaw_cloudflare_tunnel_id and at least one + openclaw_cloudflare_tunnel_ingress route before enabling the role. + when: openclaw_cloudflare_tunnel_enabled | bool + +- name: Validate inline credentials when role manages credentials file + ansible.builtin.assert: + that: + - openclaw_cloudflare_tunnel_credentials_json | length > 0 + fail_msg: >- + openclaw_cloudflare_tunnel_manage_credentials_file=true requires + openclaw_cloudflare_tunnel_credentials_json. + when: + - openclaw_cloudflare_tunnel_enabled | bool + - openclaw_cloudflare_tunnel_manage_credentials_file | bool + +- name: Validate external credentials file path when inline write is disabled + ansible.builtin.assert: + that: + - openclaw_cloudflare_tunnel_credentials_file | length > 0 + fail_msg: >- + openclaw_cloudflare_tunnel_manage_credentials_file=false requires + openclaw_cloudflare_tunnel_credentials_file pointing to an existing file. + when: + - openclaw_cloudflare_tunnel_enabled | bool + - not (openclaw_cloudflare_tunnel_manage_credentials_file | bool) + +- name: Check external credentials file exists + ansible.builtin.stat: + path: "{{ openclaw_cloudflare_tunnel_credentials_file }}" + register: openclaw_cloudflare_tunnel_credentials_file_stat + when: + - openclaw_cloudflare_tunnel_enabled | bool + - not (openclaw_cloudflare_tunnel_manage_credentials_file | bool) + +- name: Fail when external credentials file is missing + ansible.builtin.assert: + that: + - openclaw_cloudflare_tunnel_credentials_file_stat.stat.exists + - openclaw_cloudflare_tunnel_credentials_file_stat.stat.isreg + fail_msg: >- + Cloudflare credentials file not found at {{ openclaw_cloudflare_tunnel_credentials_file }}. + Either provide this file on host or set openclaw_cloudflare_tunnel_manage_credentials_file=true + with openclaw_cloudflare_tunnel_credentials_json. + when: + - openclaw_cloudflare_tunnel_enabled | bool + - not (openclaw_cloudflare_tunnel_manage_credentials_file | bool) +- name: Validate Cloudflare ingress route schema + ansible.builtin.assert: + that: + - route.hostname is defined + - route.hostname | length > 0 + - route.service is defined + - route.service | length > 0 + fail_msg: "Each ingress route requires non-empty hostname and service." + loop: "{{ openclaw_cloudflare_tunnel_ingress }}" + loop_control: + loop_var: route + when: openclaw_cloudflare_tunnel_enabled | bool + +- name: Resolve Cloudflare apt codename + ansible.builtin.set_fact: + openclaw_cloudflare_tunnel_apt_codename: >- + {{ 'bookworm' if ansible_distribution_release == 'trixie' else ansible_distribution_release }} + when: + - openclaw_cloudflare_tunnel_enabled | bool + - ansible_distribution in ['Debian', 'Ubuntu'] + +- name: Install Cloudflare Tunnel prerequisites + ansible.builtin.apt: + name: + - ca-certificates + - curl + - lsb-release + state: present + update_cache: true + when: + - openclaw_cloudflare_tunnel_enabled | bool + - ansible_distribution in ['Debian', 'Ubuntu'] + +- name: Install Cloudflare GPG key + ansible.builtin.get_url: + url: https://pkg.cloudflare.com/cloudflare-main.gpg + dest: /usr/share/keyrings/cloudflare-main.gpg + mode: '0644' + when: + - openclaw_cloudflare_tunnel_enabled | bool + - ansible_distribution in ['Debian', 'Ubuntu'] + +- name: Configure Cloudflare apt repository + ansible.builtin.apt_repository: + repo: >- + deb [signed-by=/usr/share/keyrings/cloudflare-main.gpg] + https://pkg.cloudflare.com/cloudflared {{ openclaw_cloudflare_tunnel_apt_codename }} main + filename: cloudflared + state: present + when: + - openclaw_cloudflare_tunnel_enabled | bool + - ansible_distribution in ['Debian', 'Ubuntu'] + +- name: Install cloudflared package + ansible.builtin.apt: + name: cloudflared + state: present + update_cache: true + when: + - openclaw_cloudflare_tunnel_enabled | bool + - ansible_distribution in ['Debian', 'Ubuntu'] + +- name: Ensure Cloudflare tunnel working directory exists + ansible.builtin.file: + path: "{{ openclaw_cloudflare_tunnel_workdir }}" + state: directory + owner: "{{ openclaw_cloudflare_tunnel_run_user }}" + group: "{{ openclaw_cloudflare_tunnel_run_group }}" + mode: '0750' + when: openclaw_cloudflare_tunnel_enabled | bool + +- name: Write Cloudflare tunnel credentials file + ansible.builtin.copy: + dest: "{{ openclaw_cloudflare_tunnel_credentials_file }}" + content: "{{ openclaw_cloudflare_tunnel_credentials_json }}\n" + owner: "{{ openclaw_cloudflare_tunnel_run_user }}" + group: "{{ openclaw_cloudflare_tunnel_run_group }}" + mode: '0600' + no_log: true + when: + - openclaw_cloudflare_tunnel_enabled | bool + - openclaw_cloudflare_tunnel_manage_credentials_file | bool + +- name: Render Cloudflare tunnel config + ansible.builtin.template: + src: cloudflared-config.yml.j2 + dest: "{{ openclaw_cloudflare_tunnel_config_path }}" + owner: "{{ openclaw_cloudflare_tunnel_run_user }}" + group: "{{ openclaw_cloudflare_tunnel_run_group }}" + mode: '0640' + when: openclaw_cloudflare_tunnel_enabled | bool + +- name: Install Cloudflare tunnel systemd service unit + ansible.builtin.template: + src: cloudflared.service.j2 + dest: "/etc/systemd/system/{{ openclaw_cloudflare_tunnel_service_name }}.service" + owner: root + group: root + mode: '0644' + when: openclaw_cloudflare_tunnel_enabled | bool + +- name: Ensure Cloudflare tunnel service is enabled and running + ansible.builtin.systemd: + name: "{{ openclaw_cloudflare_tunnel_service_name }}" + daemon_reload: true + enabled: true + state: started + when: openclaw_cloudflare_tunnel_enabled | bool + +- name: Reconcile Cloudflare DNS routes for ingress hostnames + ansible.builtin.command: + cmd: >- + cloudflared tunnel route dns + {{ openclaw_cloudflare_tunnel_dns_tunnel_name }} + {{ route.hostname }} + register: openclaw_cloudflare_dns_route + changed_when: "'Added CNAME' in (openclaw_cloudflare_dns_route.stdout | default(''))" + failed_when: >- + openclaw_cloudflare_dns_route.rc != 0 and + ('already exists' not in ((openclaw_cloudflare_dns_route.stdout | default('')) | lower)) and + ('already exists' not in ((openclaw_cloudflare_dns_route.stderr | default('')) | lower)) + loop: "{{ openclaw_cloudflare_tunnel_ingress }}" + loop_control: + loop_var: route + when: + - openclaw_cloudflare_tunnel_enabled | bool + - openclaw_cloudflare_tunnel_manage_dns | bool diff --git a/roles/openclaw_cloudflare_tunnel/templates/cloudflared-config.yml.j2 b/roles/openclaw_cloudflare_tunnel/templates/cloudflared-config.yml.j2 new file mode 100644 index 0000000..7595ebc --- /dev/null +++ b/roles/openclaw_cloudflare_tunnel/templates/cloudflared-config.yml.j2 @@ -0,0 +1,9 @@ +tunnel: {{ openclaw_cloudflare_tunnel_id }} +credentials-file: {{ openclaw_cloudflare_tunnel_credentials_file }} +metrics: 127.0.0.1:{{ openclaw_cloudflare_tunnel_metrics_port }} +ingress: +{% for route in openclaw_cloudflare_tunnel_ingress %} + - hostname: {{ route.hostname }} + service: {{ route.service }} +{% endfor %} + - service: http_status:404 diff --git a/roles/openclaw_cloudflare_tunnel/templates/cloudflared.service.j2 b/roles/openclaw_cloudflare_tunnel/templates/cloudflared.service.j2 new file mode 100644 index 0000000..8f5df0b --- /dev/null +++ b/roles/openclaw_cloudflare_tunnel/templates/cloudflared.service.j2 @@ -0,0 +1,21 @@ +[Unit] +Description=Cloudflare Tunnel ({{ openclaw_cloudflare_tunnel_name }}) +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User={{ openclaw_cloudflare_tunnel_run_user }} +Group={{ openclaw_cloudflare_tunnel_run_group }} +WorkingDirectory={{ openclaw_cloudflare_tunnel_workdir }} +ExecStart=/usr/bin/cloudflared tunnel --config {{ openclaw_cloudflare_tunnel_config_path }} run +Restart=always +RestartSec=5 +NoNewPrivileges=true +PrivateTmp=true +ProtectSystem=full +ProtectHome=false +ReadWritePaths={{ openclaw_cloudflare_tunnel_workdir }} + +[Install] +WantedBy=multi-user.target diff --git a/roles/openclaw_control_plane/defaults/main.yml b/roles/openclaw_control_plane/defaults/main.yml new file mode 100644 index 0000000..f8ab83b --- /dev/null +++ b/roles/openclaw_control_plane/defaults/main.yml @@ -0,0 +1,48 @@ +--- +openclaw_control_plane_enabled: false +openclaw_control_plane_manage_stack: true +openclaw_control_plane_require_secrets: true +openclaw_control_plane_runtime_root: /opt/openclaw/control-plane +openclaw_control_plane_source_dir: "{{ playbook_dir }}/../control-plane" +openclaw_control_plane_project_prefix: ocp +openclaw_control_plane_default_nats_stream: OPENCLAW_TASKS +openclaw_control_plane_owner: "{{ ansible_user | default('openclaw') }}" +openclaw_control_plane_group: "{{ ansible_user | default('openclaw') }}" +openclaw_control_plane_env_owner: "{{ openclaw_control_plane_owner }}" +openclaw_control_plane_env_group: "{{ openclaw_control_plane_group }}" +openclaw_control_plane_worker_uid: "{{ openclaw_uid_value | default('1000') }}" +openclaw_control_plane_worker_gid: "{{ openclaw_gid_value | default(openclaw_control_plane_worker_uid) }}" +openclaw_control_plane_health_retries: 20 +openclaw_control_plane_health_delay: 3 + +# Modes: +# - full: all services (router + broker + workers + observability) +# - lite: direct/simple path (ingress + router-forced-main + worker-main + broker + control-api) +openclaw_control_plane_profiles: [] + +# Example: +# openclaw_control_plane_profiles: +# - name: efra-core +# mode: full +# gateway_profile: dev-main +# project_dir: /opt/openclaw/control-plane/efra-core +# ingress_port: 30101 +# control_api_port: 39101 +# grafana_port: 31001 +# prometheus_port: 39091 +# telegram_bot_token: "{{ vault_telegram_bot_token_efra_core }}" +# telegram_default_chat_id: "{{ vault_telegram_default_chat_id_efra_core }}" +# postgres_password: "{{ vault_openclaw_cp_postgres_password_efra_core }}" +# nats_user: queue +# nats_password: "{{ vault_openclaw_cp_nats_password_efra_core }}" +# +# - name: andrea +# mode: lite +# gateway_profile: andrea +# ingress_port: 30111 +# control_api_port: 39111 +# telegram_bot_token: "{{ vault_telegram_bot_token_andrea }}" +# telegram_default_chat_id: "{{ vault_telegram_default_chat_id_andrea }}" +# postgres_password: "{{ vault_openclaw_cp_postgres_password_andrea }}" +# nats_user: queue +# nats_password: "{{ vault_openclaw_cp_nats_password_andrea }}" diff --git a/roles/openclaw_control_plane/tasks/main.yml b/roles/openclaw_control_plane/tasks/main.yml new file mode 100644 index 0000000..5ce5b83 --- /dev/null +++ b/roles/openclaw_control_plane/tasks/main.yml @@ -0,0 +1,88 @@ +--- +- name: Validate control-plane profiles list + ansible.builtin.assert: + that: + - openclaw_control_plane_profiles | length > 0 + fail_msg: >- + openclaw_control_plane_enabled=true requires openclaw_control_plane_profiles + with at least one profile. + +- name: Validate control-plane profile schema + ansible.builtin.assert: + that: + - profile.name is defined + - profile.name | length > 0 + - profile.mode is defined + - profile.mode in ['full', 'lite'] + fail_msg: "Each control-plane profile requires non-empty name and mode in [full, lite]." + loop: "{{ openclaw_control_plane_profiles }}" + loop_control: + loop_var: profile + no_log: true + +- name: Validate unique control-plane profile names + ansible.builtin.assert: + that: + - (openclaw_control_plane_profiles | map(attribute='name') | list | unique | length) == + (openclaw_control_plane_profiles | length) + fail_msg: "Control-plane profile names must be unique." + +- name: Validate required secrets in control-plane profile + ansible.builtin.assert: + that: + - profile.postgres_password is defined + - profile.postgres_password | length > 0 + - profile.nats_password is defined + - profile.nats_password | length > 0 + fail_msg: "Profile {{ profile.name }} requires postgres_password and nats_password." + when: openclaw_control_plane_require_secrets | bool + loop: "{{ openclaw_control_plane_profiles }}" + loop_control: + loop_var: profile + no_log: true + +- name: Resolve runtime UID/GID for openclaw worker user + ansible.builtin.getent: + database: passwd + key: "{{ openclaw_user | default('openclaw') }}" + +- name: Set effective control-plane worker UID/GID from system account + ansible.builtin.set_fact: + openclaw_control_plane_worker_uid: >- + {{ + ansible_facts.getent_passwd[openclaw_user | default('openclaw')][1] + }} + openclaw_control_plane_worker_gid: >- + {{ + ansible_facts.getent_passwd[openclaw_user | default('openclaw')][2] + }} + +- name: Ensure control-plane runtime root exists + ansible.builtin.file: + path: "{{ openclaw_control_plane_runtime_root }}" + state: directory + owner: "{{ openclaw_control_plane_owner }}" + group: "{{ openclaw_control_plane_group }}" + mode: '0755' + +- name: Ensure control-plane source directory exists on target + ansible.builtin.file: + path: "{{ openclaw_control_plane_runtime_root }}/source" + state: directory + owner: "{{ openclaw_control_plane_owner }}" + group: "{{ openclaw_control_plane_group }}" + mode: '0755' + +- name: Sync control-plane source to target + ansible.builtin.copy: + src: "{{ openclaw_control_plane_source_dir }}/" + dest: "{{ openclaw_control_plane_runtime_root }}/source/" + owner: "{{ openclaw_control_plane_owner }}" + group: "{{ openclaw_control_plane_group }}" + mode: preserve + +- name: Configure and deploy each control-plane profile + ansible.builtin.include_tasks: profile.yml + loop: "{{ openclaw_control_plane_profiles }}" + loop_control: + loop_var: profile diff --git a/roles/openclaw_control_plane/tasks/profile.yml b/roles/openclaw_control_plane/tasks/profile.yml new file mode 100644 index 0000000..704ed18 --- /dev/null +++ b/roles/openclaw_control_plane/tasks/profile.yml @@ -0,0 +1,132 @@ +--- +- name: Set control-plane project dir fact + ansible.builtin.set_fact: + openclaw_control_plane_profile_dir: "{{ profile.project_dir | default(openclaw_control_plane_runtime_root ~ '/' ~ profile.name) }}" + +- name: Ensure profile project root exists + ansible.builtin.file: + path: "{{ openclaw_control_plane_profile_dir }}" + state: directory + owner: "{{ openclaw_control_plane_owner }}" + group: "{{ openclaw_control_plane_group }}" + mode: '0755' + +- name: Ensure profile project directories exist + ansible.builtin.file: + path: "{{ openclaw_control_plane_profile_dir }}/{{ item }}" + state: directory + owner: "{{ openclaw_control_plane_owner }}" + group: "{{ openclaw_control_plane_group }}" + mode: '0755' + loop: + - data + - prometheus + - grafana/provisioning/datasources + - grafana/dashboards + - loki + +- name: Render control-plane env file + ansible.builtin.template: + src: control-plane.env.j2 + dest: "{{ openclaw_control_plane_profile_dir }}/.env" + owner: "{{ openclaw_control_plane_env_owner }}" + group: "{{ openclaw_control_plane_env_group }}" + mode: '0640' + no_log: true + +- name: Render compose stack for full mode + ansible.builtin.template: + src: docker-compose.full.yml.j2 + dest: "{{ openclaw_control_plane_profile_dir }}/docker-compose.yml" + owner: "{{ openclaw_control_plane_owner }}" + group: "{{ openclaw_control_plane_group }}" + mode: '0644' + when: profile.mode == 'full' + +- name: Render compose stack for lite mode + ansible.builtin.template: + src: docker-compose.lite.yml.j2 + dest: "{{ openclaw_control_plane_profile_dir }}/docker-compose.yml" + owner: "{{ openclaw_control_plane_owner }}" + group: "{{ openclaw_control_plane_group }}" + mode: '0644' + when: profile.mode == 'lite' + +- name: Render Prometheus config + ansible.builtin.template: + src: prometheus.yml.j2 + dest: "{{ openclaw_control_plane_profile_dir }}/prometheus/prometheus.yml" + owner: "{{ openclaw_control_plane_owner }}" + group: "{{ openclaw_control_plane_group }}" + mode: '0644' + +- name: Render Grafana datasource provisioning + ansible.builtin.template: + src: grafana-datasources.yml.j2 + dest: "{{ openclaw_control_plane_profile_dir }}/grafana/provisioning/datasources/datasource.yml" + owner: "{{ openclaw_control_plane_owner }}" + group: "{{ openclaw_control_plane_group }}" + mode: '0644' + +- name: Deploy control-plane stack with Docker Compose + community.docker.docker_compose_v2: + project_src: "{{ openclaw_control_plane_profile_dir }}" + project_name: "{{ openclaw_control_plane_project_prefix }}-{{ profile.name }}" + files: + - docker-compose.yml + state: present + build: always + remove_orphans: true + when: openclaw_control_plane_manage_stack | bool + +- name: Wait for profile postgres container to accept local connections + ansible.builtin.shell: | + docker exec \ + -e PGPASSWORD='{{ profile.postgres_admin_password | default(profile.postgres_password) | replace("'", "'\"'\"'") }}' \ + {{ openclaw_control_plane_project_prefix }}-{{ profile.name }}-postgres-1 \ + psql -h 127.0.0.1 \ + -U {{ profile.postgres_admin_user | default(profile.postgres_user | default('openclaw')) }} \ + -d {{ profile.postgres_db | default('openclaw_control') }} \ + -tAc 'select 1' + register: profile_postgres_ready + retries: 30 + delay: 2 + until: + - profile_postgres_ready.rc == 0 + - "'1' in profile_postgres_ready.stdout" + changed_when: false + when: openclaw_control_plane_manage_stack | bool + +- name: Reconcile postgres role password for control-plane profile + ansible.builtin.shell: | + docker exec -i \ + -e PGPASSWORD='{{ profile.postgres_admin_password | default(profile.postgres_password) | replace("'", "'\"'\"'") }}' \ + {{ openclaw_control_plane_project_prefix }}-{{ profile.name }}-postgres-1 \ + psql -h 127.0.0.1 \ + -U {{ profile.postgres_admin_user | default(profile.postgres_user | default('openclaw')) }} \ + -d {{ profile.postgres_db | default('openclaw_control') }} \ + -v ON_ERROR_STOP=1 <<'SQL' + DO $$ + BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = '{{ profile.postgres_user | default('openclaw') }}') THEN + CREATE ROLE {{ profile.postgres_user | default('openclaw') }} LOGIN PASSWORD '{{ profile.postgres_password | replace("'", "''") }}'; + ELSE + ALTER ROLE {{ profile.postgres_user | default('openclaw') }} WITH LOGIN PASSWORD '{{ profile.postgres_password | replace("'", "''") }}'; + END IF; + END + $$; + SQL + register: profile_postgres_reconcile + changed_when: false + when: openclaw_control_plane_manage_stack | bool + +- name: Probe control API health + ansible.builtin.uri: + url: "http://127.0.0.1:{{ profile.control_api_port | default((profile.mode == 'lite') | ternary(39111, 39101)) }}/health" + method: GET + status_code: 200 + register: profile_health + retries: "{{ openclaw_control_plane_health_retries }}" + delay: "{{ openclaw_control_plane_health_delay }}" + until: profile_health is succeeded + when: openclaw_control_plane_manage_stack | bool diff --git a/roles/openclaw_control_plane/templates/control-plane.env.j2 b/roles/openclaw_control_plane/templates/control-plane.env.j2 new file mode 100644 index 0000000..1994589 --- /dev/null +++ b/roles/openclaw_control_plane/templates/control-plane.env.j2 @@ -0,0 +1,22 @@ +OPENCLAW_PROFILE={{ profile.gateway_profile | default(profile.name) }} +NATS_URL=nats://{{ profile.nats_user | default('queue') }}:{{ profile.nats_password }}@nats:4222 +NATS_STREAM={{ profile.nats_stream | default(openclaw_control_plane_default_nats_stream) }} +NATS_USER={{ profile.nats_user | default('queue') }} +NATS_PASSWORD={{ profile.nats_password }} +POSTGRES_USER={{ profile.postgres_user | default('openclaw') }} +POSTGRES_PASSWORD={{ profile.postgres_password }} +POSTGRES_DB={{ profile.postgres_db | default('openclaw_control') }} +POSTGRES_URL=postgres://{{ profile.postgres_user | default('openclaw') }}:{{ profile.postgres_password }}@postgres:5432/{{ profile.postgres_db | default('openclaw_control') }} +TELEGRAM_BOT_TOKEN={{ profile.telegram_bot_token | default('') }} +TELEGRAM_DEFAULT_CHAT_ID={{ profile.telegram_default_chat_id | default('') }} +ROUTER_FORCED_AGENT={{ profile.router_forced_agent | default('') }} +WORKER_EXEC_MODE={{ profile.worker_exec_mode | default('stub') }} +OPENCLAW_BIN={{ profile.openclaw_bin | default('/home/openclaw/.local/bin/openclaw') }} +OPENCLAW_HOME={{ profile.openclaw_home | default('/home/openclaw') }} +OPENCLAW_UID={{ profile.openclaw_worker_uid | default(openclaw_control_plane_worker_uid) }} +OPENCLAW_GID={{ profile.openclaw_worker_gid | default(openclaw_control_plane_worker_gid) }} +OPENCLAW_ENV_FILE={{ profile.openclaw_env_file | default('/etc/openclaw/secrets/' ~ (profile.gateway_profile | default(profile.name)) ~ '.env') }} +OPENCLAW_GATEWAY_TOKEN={{ profile.openclaw_gateway_token | default('') }} +OPENCLAW_TIMEOUT_MS={{ profile.openclaw_timeout_ms | default(120000) }} +OPENCLAW_BUNDLED_PLUGINS_DIR={{ profile.openclaw_bundled_plugins_dir | default('/home/openclaw/.openclaw/bundled-extensions') }} +GRAFANA_ADMIN_PASSWORD={{ profile.grafana_admin_password | default('openclaw') }} diff --git a/roles/openclaw_control_plane/templates/docker-compose.full.yml.j2 b/roles/openclaw_control_plane/templates/docker-compose.full.yml.j2 new file mode 100644 index 0000000..23d42e9 --- /dev/null +++ b/roles/openclaw_control_plane/templates/docker-compose.full.yml.j2 @@ -0,0 +1,235 @@ +services: + nats: + image: nats:2.10-alpine + command: ["-js", "-sd", "/data", "-m", "8222", "--user", "${NATS_USER}", "--pass", "${NATS_PASSWORD}"] + restart: unless-stopped + ports: + - "127.0.0.1:{{ profile.nats_host_port | default(14222) }}:4222" + volumes: + - ./data/nats:/data + + postgres: + image: postgres:16-alpine + restart: unless-stopped + environment: + POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_DB: ${POSTGRES_DB} + volumes: + - ./data/postgres:/var/lib/postgresql/data + + ingress: + build: + context: {{ openclaw_control_plane_runtime_root }}/source + dockerfile: Dockerfile + args: + SERVICE: ingress + env_file: .env + environment: + HTTP_PORT: 3000 + METRICS_PORT: 9401 + restart: unless-stopped + depends_on: + - nats + - postgres + ports: + - "127.0.0.1:{{ profile.ingress_port | default(30101) }}:3000" + + router: + build: + context: {{ openclaw_control_plane_runtime_root }}/source + dockerfile: Dockerfile + args: + SERVICE: router + env_file: .env + environment: + METRICS_PORT: 9402 + ROUTER_FORCED_AGENT: "" + restart: unless-stopped + depends_on: + - nats + - postgres + + broker: + build: + context: {{ openclaw_control_plane_runtime_root }}/source + dockerfile: Dockerfile + args: + SERVICE: broker + env_file: .env + environment: + METRICS_PORT: 9403 + restart: unless-stopped + depends_on: + - nats + - postgres + + worker-main: + build: + context: {{ openclaw_control_plane_runtime_root }}/source + dockerfile: Dockerfile + args: + SERVICE: worker + env_file: .env + environment: + METRICS_PORT: 9411 + WORKER_AGENT_ID: main + WORKER_EXEC_MODE: ${WORKER_EXEC_MODE} + OPENCLAW_BIN: ${OPENCLAW_BIN} + OPENCLAW_HOME: ${OPENCLAW_HOME} + OPENCLAW_ENV_FILE: ${OPENCLAW_ENV_FILE} + OPENCLAW_TIMEOUT_MS: ${OPENCLAW_TIMEOUT_MS} + OPENCLAW_BUNDLED_PLUGINS_DIR: ${OPENCLAW_BUNDLED_PLUGINS_DIR} +{% if profile.worker_exec_mode | default('stub') == 'openclaw' %} + user: "${OPENCLAW_UID}:${OPENCLAW_GID}" + volumes: + - /home/openclaw:/home/openclaw +{% endif %} + restart: unless-stopped + depends_on: + - nats + - postgres + + worker-research: + build: + context: {{ openclaw_control_plane_runtime_root }}/source + dockerfile: Dockerfile + args: + SERVICE: worker + env_file: .env + environment: + METRICS_PORT: 9412 + WORKER_AGENT_ID: research + WORKER_EXEC_MODE: ${WORKER_EXEC_MODE} + OPENCLAW_BIN: ${OPENCLAW_BIN} + OPENCLAW_HOME: ${OPENCLAW_HOME} + OPENCLAW_ENV_FILE: ${OPENCLAW_ENV_FILE} + OPENCLAW_TIMEOUT_MS: ${OPENCLAW_TIMEOUT_MS} + OPENCLAW_BUNDLED_PLUGINS_DIR: ${OPENCLAW_BUNDLED_PLUGINS_DIR} +{% if profile.worker_exec_mode | default('stub') == 'openclaw' %} + user: "${OPENCLAW_UID}:${OPENCLAW_GID}" + volumes: + - /home/openclaw:/home/openclaw +{% endif %} + restart: unless-stopped + depends_on: + - nats + - postgres + + worker-browser-login: + build: + context: {{ openclaw_control_plane_runtime_root }}/source + dockerfile: Dockerfile + args: + SERVICE: worker + env_file: .env + environment: + METRICS_PORT: 9413 + WORKER_AGENT_ID: browser-login + NATS_URL: nats://{{ profile.nats_user | default('queue') }}:{{ profile.nats_password }}@127.0.0.1:{{ profile.nats_host_port | default(14222) }} + WORKER_EXEC_MODE: ${WORKER_EXEC_MODE} + OPENCLAW_BIN: ${OPENCLAW_BIN} + OPENCLAW_HOME: ${OPENCLAW_HOME} + OPENCLAW_ENV_FILE: ${OPENCLAW_ENV_FILE} + OPENCLAW_TIMEOUT_MS: ${OPENCLAW_TIMEOUT_MS} + OPENCLAW_BUNDLED_PLUGINS_DIR: ${OPENCLAW_BUNDLED_PLUGINS_DIR} +{% if profile.worker_exec_mode | default('stub') == 'openclaw' %} + network_mode: host + shm_size: "1gb" + user: "${OPENCLAW_UID}:${OPENCLAW_GID}" + volumes: + - /home/openclaw:/home/openclaw +{% endif %} + restart: unless-stopped + depends_on: + - nats + - postgres + + worker-coolify-ops: + build: + context: {{ openclaw_control_plane_runtime_root }}/source + dockerfile: Dockerfile + args: + SERVICE: worker + env_file: .env + environment: + METRICS_PORT: 9414 + WORKER_AGENT_ID: coolify-ops + WORKER_EXEC_MODE: ${WORKER_EXEC_MODE} + OPENCLAW_BIN: ${OPENCLAW_BIN} + OPENCLAW_HOME: ${OPENCLAW_HOME} + OPENCLAW_ENV_FILE: ${OPENCLAW_ENV_FILE} + OPENCLAW_TIMEOUT_MS: ${OPENCLAW_TIMEOUT_MS} + OPENCLAW_BUNDLED_PLUGINS_DIR: ${OPENCLAW_BUNDLED_PLUGINS_DIR} +{% if profile.worker_exec_mode | default('stub') == 'openclaw' %} + user: "${OPENCLAW_UID}:${OPENCLAW_GID}" + volumes: + - /home/openclaw:/home/openclaw +{% endif %} + restart: unless-stopped + depends_on: + - nats + - postgres + + control-api: + build: + context: {{ openclaw_control_plane_runtime_root }}/source + dockerfile: Dockerfile + args: + SERVICE: control-api + env_file: .env + environment: + HTTP_PORT: 39090 + METRICS_PORT: 9405 + restart: unless-stopped + depends_on: + - nats + - postgres + ports: + - "127.0.0.1:{{ profile.control_api_port | default(39101) }}:39090" + + nats-exporter: + image: natsio/prometheus-nats-exporter:0.17.2 + command: ["-varz", "http://nats:8222"] + restart: unless-stopped + depends_on: + - nats + + prometheus: + image: prom/prometheus:v2.54.1 + user: "0:0" + command: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - ./data/prometheus:/prometheus + restart: unless-stopped + ports: + - "127.0.0.1:{{ profile.prometheus_port | default(39091) }}:9090" + depends_on: + - nats-exporter + - ingress + - control-api + + grafana: + image: grafana/grafana:11.2.2 + user: "0:0" + environment: + GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD} + volumes: + - ./data/grafana:/var/lib/grafana + - ./grafana/provisioning/datasources:/etc/grafana/provisioning/datasources:ro + restart: unless-stopped + depends_on: + - prometheus + ports: + - "127.0.0.1:{{ profile.grafana_port | default(31001) }}:3000" + + uptime-kuma: + image: louislam/uptime-kuma:1.23.13 + restart: unless-stopped + volumes: + - ./data/uptime-kuma:/app/data + ports: + - "127.0.0.1:{{ profile.uptime_kuma_port | default(31081) }}:3001" diff --git a/roles/openclaw_control_plane/templates/docker-compose.lite.yml.j2 b/roles/openclaw_control_plane/templates/docker-compose.lite.yml.j2 new file mode 100644 index 0000000..08a0c0b --- /dev/null +++ b/roles/openclaw_control_plane/templates/docker-compose.lite.yml.j2 @@ -0,0 +1,106 @@ +services: + nats: + image: nats:2.10-alpine + command: ["-js", "-sd", "/data", "-m", "8222", "--user", "${NATS_USER}", "--pass", "${NATS_PASSWORD}"] + restart: unless-stopped + volumes: + - ./data/nats:/data + + postgres: + image: postgres:16-alpine + restart: unless-stopped + environment: + POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + POSTGRES_DB: ${POSTGRES_DB} + volumes: + - ./data/postgres:/var/lib/postgresql/data + + ingress: + build: + context: {{ openclaw_control_plane_runtime_root }}/source + dockerfile: Dockerfile + args: + SERVICE: ingress + env_file: .env + environment: + HTTP_PORT: 3000 + METRICS_PORT: 9401 + restart: unless-stopped + depends_on: + - nats + - postgres + ports: + - "127.0.0.1:{{ profile.ingress_port | default(30111) }}:3000" + + router: + build: + context: {{ openclaw_control_plane_runtime_root }}/source + dockerfile: Dockerfile + args: + SERVICE: router + env_file: .env + environment: + METRICS_PORT: 9402 + ROUTER_FORCED_AGENT: main + restart: unless-stopped + depends_on: + - nats + - postgres + + worker-main: + build: + context: {{ openclaw_control_plane_runtime_root }}/source + dockerfile: Dockerfile + args: + SERVICE: worker + env_file: .env + environment: + METRICS_PORT: 9411 + WORKER_AGENT_ID: main + WORKER_EXEC_MODE: ${WORKER_EXEC_MODE} + OPENCLAW_BIN: ${OPENCLAW_BIN} + OPENCLAW_HOME: ${OPENCLAW_HOME} + OPENCLAW_ENV_FILE: ${OPENCLAW_ENV_FILE} + OPENCLAW_TIMEOUT_MS: ${OPENCLAW_TIMEOUT_MS} + OPENCLAW_BUNDLED_PLUGINS_DIR: ${OPENCLAW_BUNDLED_PLUGINS_DIR} +{% if profile.worker_exec_mode | default('stub') == 'openclaw' %} + user: "${OPENCLAW_UID}:${OPENCLAW_GID}" + volumes: + - /home/openclaw:/home/openclaw +{% endif %} + restart: unless-stopped + depends_on: + - nats + - postgres + + broker: + build: + context: {{ openclaw_control_plane_runtime_root }}/source + dockerfile: Dockerfile + args: + SERVICE: broker + env_file: .env + environment: + METRICS_PORT: 9403 + restart: unless-stopped + depends_on: + - nats + - postgres + + control-api: + build: + context: {{ openclaw_control_plane_runtime_root }}/source + dockerfile: Dockerfile + args: + SERVICE: control-api + env_file: .env + environment: + HTTP_PORT: 39090 + METRICS_PORT: 9405 + restart: unless-stopped + depends_on: + - nats + - postgres + ports: + - "127.0.0.1:{{ profile.control_api_port | default(39111) }}:39090" diff --git a/roles/openclaw_control_plane/templates/grafana-datasources.yml.j2 b/roles/openclaw_control_plane/templates/grafana-datasources.yml.j2 new file mode 100644 index 0000000..bb009bb --- /dev/null +++ b/roles/openclaw_control_plane/templates/grafana-datasources.yml.j2 @@ -0,0 +1,9 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false diff --git a/roles/openclaw_control_plane/templates/prometheus.yml.j2 b/roles/openclaw_control_plane/templates/prometheus.yml.j2 new file mode 100644 index 0000000..d8cd375 --- /dev/null +++ b/roles/openclaw_control_plane/templates/prometheus.yml.j2 @@ -0,0 +1,41 @@ +global: + scrape_interval: 15s + +scrape_configs: + - job_name: nats + static_configs: + - targets: ['nats-exporter:7777'] + + - job_name: ingress + static_configs: + - targets: ['ingress:9401'] + + - job_name: router + static_configs: + - targets: ['router:9402'] + + - job_name: broker + static_configs: + - targets: ['broker:9403'] + + - job_name: worker-main + static_configs: + - targets: ['worker-main:9411'] + + - job_name: control-api + static_configs: + - targets: ['control-api:9405'] + +{% if profile.mode == 'full' %} + - job_name: worker-research + static_configs: + - targets: ['worker-research:9412'] + + - job_name: worker-browser-login + static_configs: + - targets: ['worker-browser-login:9413'] + + - job_name: worker-coolify-ops + static_configs: + - targets: ['worker-coolify-ops:9414'] +{% endif %} diff --git a/roles/openclaw_enterprise/defaults/main.yml b/roles/openclaw_enterprise/defaults/main.yml new file mode 100644 index 0000000..838a692 --- /dev/null +++ b/roles/openclaw_enterprise/defaults/main.yml @@ -0,0 +1,47 @@ +--- +openclaw_enterprise_enabled: false +openclaw_enterprise_manage_services: true +openclaw_enterprise_require_secrets: true +openclaw_enterprise_require_provider_api_keys: false +openclaw_enterprise_secret_dir: /etc/openclaw/secrets +openclaw_enterprise_openclaw_bin: "{{ openclaw_home }}/.local/bin/openclaw" + +# Example profile object fields: +# - name: prod-main +# gateway_port: 18789 +# gateway_bind: loopback +# state_dir: /home/openclaw/.openclaw-prod-main +# config_path: /home/openclaw/.openclaw-prod-main/openclaw.json +# workspace_root: /home/openclaw/.openclaw-prod-main/workspace +# env: +# OPENCLAW_GATEWAY_TOKEN: "{{ vault_openclaw_gateway_token_prod_main }}" +# OPENAI_API_KEY: "{{ vault_openai_api_key_prod }}" +# ANTHROPIC_API_KEY: "{{ vault_anthropic_api_key_prod }}" +# agents: +# - id: main +# default: true +# workspace: /home/openclaw/.openclaw-prod-main/workspace +# - id: ops +# workspace: /home/openclaw/.openclaw-prod-main/workspace-ops +# tools: +# profile: messaging +# bindings: [] +openclaw_enterprise_profiles: [] + +openclaw_enterprise_default_models: + "openai/gpt-5.2": + alias: openai-premium + params: + temperature: 0.2 + "openai/gpt-5-mini": + alias: openai-fast + params: + temperature: 0.1 + "anthropic/claude-opus-4-6": + alias: anthropic-premium + params: + temperature: 0.2 + "anthropic/claude-sonnet-4-5": + alias: anthropic-fast + params: + temperature: 0.1 diff --git a/roles/openclaw_enterprise/handlers/main.yml b/roles/openclaw_enterprise/handlers/main.yml new file mode 100644 index 0000000..17e4edc --- /dev/null +++ b/roles/openclaw_enterprise/handlers/main.yml @@ -0,0 +1,14 @@ +--- +- name: Reload systemd daemon + ansible.builtin.systemd: + daemon_reload: true + +- name: Restart enterprise gateways + ansible.builtin.systemd: + name: "openclaw-gateway-{{ profile.name }}" + state: restarted + enabled: true + loop: "{{ openclaw_enterprise_profiles }}" + loop_control: + loop_var: profile + when: openclaw_enterprise_manage_services | bool diff --git a/roles/openclaw_enterprise/tasks/main.yml b/roles/openclaw_enterprise/tasks/main.yml new file mode 100644 index 0000000..0a844a6 --- /dev/null +++ b/roles/openclaw_enterprise/tasks/main.yml @@ -0,0 +1,256 @@ +--- +- name: Validate enterprise profile list is present + ansible.builtin.assert: + that: + - openclaw_enterprise_profiles | length > 0 + fail_msg: "openclaw_enterprise_enabled=true requires openclaw_enterprise_profiles with at least one profile." + +- name: Validate required profile keys + ansible.builtin.assert: + that: + - profile.name is defined + - profile.name | length > 0 + - profile.gateway_port is defined + - profile.gateway_port | int > 0 + fail_msg: "Each profile must define non-empty name and positive gateway_port." + loop: "{{ openclaw_enterprise_profiles }}" + loop_control: + loop_var: profile + no_log: true + +- name: Validate unique enterprise profile names + ansible.builtin.assert: + that: + - (openclaw_enterprise_profiles | map(attribute='name') | list | unique | length) == + (openclaw_enterprise_profiles | length) + fail_msg: "Each profile name must be unique." + +- name: Validate unique enterprise gateway ports + ansible.builtin.assert: + that: + - (openclaw_enterprise_profiles | map(attribute='gateway_port') | list | unique | length) == + (openclaw_enterprise_profiles | length) + fail_msg: "Each profile gateway_port must be unique." + +- name: Ensure secrets directory exists + ansible.builtin.file: + path: "{{ openclaw_enterprise_secret_dir }}" + state: directory + owner: root + group: "{{ openclaw_user }}" + mode: '0750' + +- name: Check OpenClaw binary exists for enterprise services + ansible.builtin.stat: + path: "{{ openclaw_enterprise_openclaw_bin }}" + register: openclaw_enterprise_bin + when: openclaw_enterprise_manage_services | bool + +- name: Fail when OpenClaw binary is missing + ansible.builtin.fail: + msg: >- + OpenClaw binary not found at {{ openclaw_enterprise_openclaw_bin }}. + Ensure role 'openclaw' completed successfully before enabling enterprise services. + when: + - openclaw_enterprise_manage_services | bool + - not openclaw_enterprise_bin.stat.exists + +- name: Ensure profile state directory exists + ansible.builtin.file: + path: "{{ profile.state_dir | default(openclaw_home ~ '/.openclaw-' ~ profile.name) }}" + state: directory + owner: "{{ openclaw_user }}" + group: "{{ openclaw_user }}" + mode: '0755' + loop: "{{ openclaw_enterprise_profiles }}" + loop_control: + loop_var: profile + +- name: Ensure profile workspace directory exists + ansible.builtin.file: + path: >- + {{ + profile.workspace_root + | default((profile.state_dir | default(openclaw_home ~ '/.openclaw-' ~ profile.name)) ~ '/workspace') + }} + state: directory + owner: "{{ openclaw_user }}" + group: "{{ openclaw_user }}" + mode: '0755' + loop: "{{ openclaw_enterprise_profiles }}" + loop_control: + loop_var: profile + +- name: Ensure per-agent workspace directories exist + ansible.builtin.file: + path: "{{ agent_workspace }}" + state: directory + owner: "{{ openclaw_user }}" + group: "{{ openclaw_user }}" + mode: '0755' + vars: + agent_workspace: >- + {{ + item.1.workspace + | default( + item.0.workspace_root + | default((item.0.state_dir | default(openclaw_home ~ '/.openclaw-' ~ item.0.name)) ~ '/workspace') + ) + }} + loop: "{{ openclaw_enterprise_profiles | subelements('agents', skip_missing=True) }}" + +- name: Seed AGENTS.md per agent workspace + ansible.builtin.template: + src: workspace-agents.md.j2 + dest: "{{ agent_workspace }}/AGENTS.md" + owner: "{{ openclaw_user }}" + group: "{{ openclaw_user }}" + mode: '0644' + vars: + profile: "{{ item.0 }}" + agent: "{{ item.1 }}" + agent_workspace: >- + {{ + item.1.workspace + | default( + item.0.workspace_root + | default((item.0.state_dir | default(openclaw_home ~ '/.openclaw-' ~ item.0.name)) ~ '/workspace') + ) + }} + agent_persona: "{{ (item.0.agent_personas | default([]) | selectattr('id', 'equalto', item.1.id) | list | first | default({})) }}" + loop: "{{ openclaw_enterprise_profiles | subelements('agents', skip_missing=True) }}" + +- name: Seed SOUL.md per agent workspace + ansible.builtin.template: + src: workspace-soul.md.j2 + dest: "{{ agent_workspace }}/SOUL.md" + owner: "{{ openclaw_user }}" + group: "{{ openclaw_user }}" + mode: '0644' + vars: + profile: "{{ item.0 }}" + agent: "{{ item.1 }}" + agent_workspace: >- + {{ + item.1.workspace + | default( + item.0.workspace_root + | default((item.0.state_dir | default(openclaw_home ~ '/.openclaw-' ~ item.0.name)) ~ '/workspace') + ) + }} + agent_persona: "{{ (item.0.agent_personas | default([]) | selectattr('id', 'equalto', item.1.id) | list | first | default({})) }}" + loop: "{{ openclaw_enterprise_profiles | subelements('agents', skip_missing=True) }}" + +- name: Seed IDENTITY.md per agent workspace + ansible.builtin.template: + src: workspace-identity.md.j2 + dest: "{{ agent_workspace }}/IDENTITY.md" + owner: "{{ openclaw_user }}" + group: "{{ openclaw_user }}" + mode: '0644' + vars: + profile: "{{ item.0 }}" + agent: "{{ item.1 }}" + agent_workspace: >- + {{ + item.1.workspace + | default( + item.0.workspace_root + | default((item.0.state_dir | default(openclaw_home ~ '/.openclaw-' ~ item.0.name)) ~ '/workspace') + ) + }} + agent_persona: "{{ (item.0.agent_personas | default([]) | selectattr('id', 'equalto', item.1.id) | list | first | default({})) }}" + loop: "{{ openclaw_enterprise_profiles | subelements('agents', skip_missing=True) }}" + +- name: Seed USER.md per agent workspace + ansible.builtin.template: + src: workspace-user.md.j2 + dest: "{{ agent_workspace }}/USER.md" + owner: "{{ openclaw_user }}" + group: "{{ openclaw_user }}" + mode: '0644' + vars: + profile: "{{ item.0 }}" + agent: "{{ item.1 }}" + agent_workspace: >- + {{ + item.1.workspace + | default( + item.0.workspace_root + | default((item.0.state_dir | default(openclaw_home ~ '/.openclaw-' ~ item.0.name)) ~ '/workspace') + ) + }} + agent_persona: "{{ (item.0.agent_personas | default([]) | selectattr('id', 'equalto', item.1.id) | list | first | default({})) }}" + loop: "{{ openclaw_enterprise_profiles | subelements('agents', skip_missing=True) }}" + +- name: Validate required secret keys in profile env + ansible.builtin.assert: + that: + - profile.env is defined + - profile.env.OPENCLAW_GATEWAY_TOKEN is defined + - profile.env.OPENCLAW_GATEWAY_TOKEN | length > 0 + - >- + (not (openclaw_enterprise_require_provider_api_keys | bool)) or + (profile.env.OPENAI_API_KEY is defined and profile.env.OPENAI_API_KEY | length > 0) + - >- + (not (openclaw_enterprise_require_provider_api_keys | bool)) or + (profile.env.ANTHROPIC_API_KEY is defined and profile.env.ANTHROPIC_API_KEY | length > 0) + fail_msg: >- + Profile {{ profile.name }} must define env.OPENCLAW_GATEWAY_TOKEN. + OPENAI/ANTHROPIC keys are required only when + openclaw_enterprise_require_provider_api_keys=true. + when: openclaw_enterprise_require_secrets | bool + loop: "{{ openclaw_enterprise_profiles }}" + loop_control: + loop_var: profile + no_log: true + +- name: Render enterprise profile config + ansible.builtin.template: + src: openclaw-profile.json.j2 + dest: "{{ profile.config_path | default((profile.state_dir | default(openclaw_home ~ '/.openclaw-' ~ profile.name)) ~ '/openclaw.json') }}" + owner: "{{ openclaw_user }}" + group: "{{ openclaw_user }}" + mode: '0640' + loop: "{{ openclaw_enterprise_profiles }}" + loop_control: + loop_var: profile + notify: Restart enterprise gateways + +- name: Render per-profile secret environment file + ansible.builtin.template: + src: profile.env.j2 + dest: "{{ openclaw_enterprise_secret_dir }}/{{ profile.name }}.env" + owner: root + group: "{{ openclaw_user }}" + mode: '0640' + loop: "{{ openclaw_enterprise_profiles }}" + loop_control: + loop_var: profile + notify: Restart enterprise gateways + no_log: true + +- name: Render per-profile systemd unit + ansible.builtin.template: + src: openclaw-gateway-profile.service.j2 + dest: "/etc/systemd/system/openclaw-gateway-{{ profile.name }}.service" + owner: root + group: root + mode: '0644' + loop: "{{ openclaw_enterprise_profiles }}" + loop_control: + loop_var: profile + when: openclaw_enterprise_manage_services | bool + notify: + - Reload systemd daemon + - Restart enterprise gateways + +- name: Ensure enterprise gateway services are enabled and started + ansible.builtin.systemd: + name: "openclaw-gateway-{{ profile.name }}" + enabled: true + state: started + loop: "{{ openclaw_enterprise_profiles }}" + loop_control: + loop_var: profile + when: openclaw_enterprise_manage_services | bool diff --git a/roles/openclaw_enterprise/templates/openclaw-gateway-profile.service.j2 b/roles/openclaw_enterprise/templates/openclaw-gateway-profile.service.j2 new file mode 100644 index 0000000..5054271 --- /dev/null +++ b/roles/openclaw_enterprise/templates/openclaw-gateway-profile.service.j2 @@ -0,0 +1,34 @@ +[Unit] +Description=OpenClaw Gateway ({{ profile.name }}) +After=network-online.target docker.service +Wants=network-online.target +Requires=docker.service + +[Service] +Type=simple +User={{ openclaw_user }} +Group={{ openclaw_user }} +WorkingDirectory={{ openclaw_home }} + +EnvironmentFile={{ openclaw_enterprise_secret_dir }}/{{ profile.name }}.env +Environment="PNPM_HOME={{ openclaw_home }}/.local/share/pnpm" +Environment="PATH={{ openclaw_home }}/.local/bin:{{ openclaw_home }}/.local/share/pnpm:/usr/local/bin:/usr/bin:/bin" +Environment="HOME={{ openclaw_home }}" +Environment="XDG_RUNTIME_DIR=/run/user/{{ openclaw_uid_value | default('1000') }}" + +ExecStart={{ openclaw_enterprise_openclaw_bin }} --profile {{ profile.name }} gateway --bind {{ profile.gateway_bind | default('loopback') }} --port {{ profile.gateway_port }} +Restart=always +RestartSec=5 +NoNewPrivileges=true +PrivateTmp=true +ProtectSystem=strict +ProtectHome=false +ReadWritePaths={{ profile.state_dir | default(openclaw_home ~ '/.openclaw-' ~ profile.name) }} +ReadWritePaths={{ openclaw_home }}/.local + +StandardOutput=journal +StandardError=journal +SyslogIdentifier=openclaw-{{ profile.name }} + +[Install] +WantedBy=multi-user.target diff --git a/roles/openclaw_enterprise/templates/openclaw-profile.json.j2 b/roles/openclaw_enterprise/templates/openclaw-profile.json.j2 new file mode 100644 index 0000000..58ec8d3 --- /dev/null +++ b/roles/openclaw_enterprise/templates/openclaw-profile.json.j2 @@ -0,0 +1,58 @@ +{ + "gateway": { + "mode": "local", + "port": {{ profile.gateway_port }}, + "bind": "{{ profile.gateway_bind | default('loopback') }}", + "auth": { + "mode": "token", + "token": "${OPENCLAW_GATEWAY_TOKEN}" + } + }, + "session": { + "dmScope": "{{ profile.dm_scope | default('per-account-channel-peer') }}" + }, + "agents": { + "defaults": { + "workspace": "{{ profile.workspace_root | default((profile.state_dir | default(openclaw_home ~ '/.openclaw-' ~ profile.name)) ~ '/workspace') }}", + "maxConcurrent": {{ profile.max_concurrent | default(4) }}, + "contextTokens": {{ profile.context_tokens | default(200000) }}, + "models": {{ (profile.model_catalog | default(openclaw_enterprise_default_models)) | to_nice_json(indent=6) }}, + "model": { + "primary": "{{ profile.model_primary | default('anthropic/claude-sonnet-4-5') }}", + "fallbacks": {{ profile.model_fallbacks | default(['openai/gpt-5.2', 'openai/gpt-5-mini']) | to_json }} + }, + "sandbox": { + "mode": "{{ profile.sandbox_mode | default('non-main') }}", + "scope": "{{ profile.sandbox_scope | default('session') }}" + } + }, + "list": {{ profile.agents | default([]) | to_nice_json(indent=4) }} + }, + "tools": { + "profile": "{{ profile.tools_profile | default('coding') }}" + }, + "bindings": {{ profile.bindings | default([]) | to_nice_json(indent=2) }}, +{% if profile.browser is defined %} + "browser": {{ profile.browser | to_nice_json(indent=2) }}, +{% endif %} + "auth": { + "profiles": {{ profile.auth_profiles | default({ + 'openai:primary': {'provider': 'openai', 'mode': 'api_key'}, + 'openai:secondary': {'provider': 'openai', 'mode': 'api_key'}, + 'anthropic:primary': {'provider': 'anthropic', 'mode': 'api_key'}, + 'anthropic:secondary': {'provider': 'anthropic', 'mode': 'api_key'} + }) | to_nice_json(indent=4) }}, + "order": {{ profile.auth_order | default({ + 'openai': ['openai:primary', 'openai:secondary'], + 'anthropic': ['anthropic:primary', 'anthropic:secondary'] + }) | to_nice_json(indent=4) }} + }, + "logging": { + "level": "{{ profile.logging_level | default('info') }}", + "consoleLevel": "{{ profile.console_level | default('info') }}", + "redactSensitive": "tools" + }, + "diagnostics": { + "enabled": {{ profile.diagnostics_enabled | default(true) | to_json }} + } +} diff --git a/roles/openclaw_enterprise/templates/profile.env.j2 b/roles/openclaw_enterprise/templates/profile.env.j2 new file mode 100644 index 0000000..12da597 --- /dev/null +++ b/roles/openclaw_enterprise/templates/profile.env.j2 @@ -0,0 +1,10 @@ +OPENCLAW_PROFILE={{ profile.name }} +OPENCLAW_STATE_DIR={{ profile.state_dir | default(openclaw_home ~ '/.openclaw-' ~ profile.name) }} +OPENCLAW_CONFIG_PATH={{ profile.config_path | default((profile.state_dir | default(openclaw_home ~ '/.openclaw-' ~ profile.name)) ~ '/openclaw.json') }} +OPENCLAW_GATEWAY_PORT={{ profile.gateway_port }} +OPENCLAW_GATEWAY_TOKEN={{ profile.env.OPENCLAW_GATEWAY_TOKEN | default('') }} +OPENAI_API_KEY={{ profile.env.OPENAI_API_KEY | default('') }} +ANTHROPIC_API_KEY={{ profile.env.ANTHROPIC_API_KEY | default('') }} +{% for k, v in (profile.env_extra | default({})).items() %} +{{ k }}={{ v }} +{% endfor %} diff --git a/roles/openclaw_enterprise/templates/workspace-agents.md.j2 b/roles/openclaw_enterprise/templates/workspace-agents.md.j2 new file mode 100644 index 0000000..b5c68db --- /dev/null +++ b/roles/openclaw_enterprise/templates/workspace-agents.md.j2 @@ -0,0 +1,36 @@ +# AGENTS.md - {{ agent_persona.display_name | default(agent.name | default(agent.id)) }} + +Profile: `{{ profile.name }}` +Agent id: `{{ agent.id }}` + +## Mission +{{ agent_persona.mission | default('Deliver reliable outcomes for this agent scope, with clear execution and traceability.') }} + +## Core tasks +{% set responsibilities = agent_persona.responsibilities | default([]) %} +{% if responsibilities | length > 0 %} +{% for item in responsibilities %} +- {{ item }} +{% endfor %} +{% else %} +- Understand the request context before acting. +- Execute only the work that belongs to this agent scope. +- Return concise outcomes with next actionable step. +{% endif %} + +## Response style +- Keep responses clean and human. +- Avoid filler or repetitive phrasing. +- When sharing code, commands, config, or payloads, always use Markdown fenced blocks. +- Explain technical decisions briefly when they affect risk, cost, or behavior. + +## Operating protocol +- Investigate local context first (files, config, logs) before asking for clarification. +- Ask for confirmation before destructive actions or external/public actions. +- Prefer reversible operations when possible. +- Report what changed and how it was validated. + +## Session baseline +- Read `SOUL.md` for tone and boundaries. +- Read `USER.md` for user preferences. +- Keep this file updated when scope or responsibilities change. diff --git a/roles/openclaw_enterprise/templates/workspace-identity.md.j2 b/roles/openclaw_enterprise/templates/workspace-identity.md.j2 new file mode 100644 index 0000000..6e49a8f --- /dev/null +++ b/roles/openclaw_enterprise/templates/workspace-identity.md.j2 @@ -0,0 +1,7 @@ +# IDENTITY.md - {{ agent_persona.display_name | default(agent.name | default(agent.id)) }} + +- Name: {{ agent_persona.identity_name | default(agent.identity.name | default(agent.name | default(agent.id))) }} +- Creature: {{ agent_persona.creature | default('Digital operator') }} +- Vibe: {{ agent_persona.vibe | default(agent.identity.theme | default('Focused and pragmatic')) }} +- Emoji: {{ agent_persona.emoji | default(agent.identity.emoji | default(':openclaw:')) }} +- Avatar: {{ agent_persona.avatar | default(agent.identity.avatar | default('')) }} diff --git a/roles/openclaw_enterprise/templates/workspace-soul.md.j2 b/roles/openclaw_enterprise/templates/workspace-soul.md.j2 new file mode 100644 index 0000000..5746991 --- /dev/null +++ b/roles/openclaw_enterprise/templates/workspace-soul.md.j2 @@ -0,0 +1,22 @@ +# SOUL.md - {{ agent_persona.display_name | default(agent.name | default(agent.id)) }} + +## Identity core +- Role: {{ agent_persona.role | default('Specialized OpenClaw operator') }} +- Vibe: {{ agent_persona.vibe | default('Direct, calm, and practical') }} +- Tone: {{ agent_persona.tone | default('Professional, concise, and respectful') }} + +## Boundaries +- Never expose private data from unrelated sessions or channels. +- Do not simulate completion when work is still pending. +- Do not run destructive commands without explicit confirmation. +- Do not perform external/public actions unless clearly requested. + +## Working principles +- Clarity first: state assumptions and constraints early. +- Precision over verbosity: enough detail to execute safely. +- Ownership: close loops, validate outcomes, report evidence. +- Reliability: prefer deterministic steps over guesswork. + +## Behavioral requirement +- Output must feel human and clear. +- Any technical snippet must be formatted as Markdown code block. diff --git a/roles/openclaw_enterprise/templates/workspace-user.md.j2 b/roles/openclaw_enterprise/templates/workspace-user.md.j2 new file mode 100644 index 0000000..40ef136 --- /dev/null +++ b/roles/openclaw_enterprise/templates/workspace-user.md.j2 @@ -0,0 +1,22 @@ +# USER.md - About Your Human + +- Name: {{ profile.user_profile.name | default('Efrain') }} +- What to call them: {{ profile.user_profile.call | default('Efra') }} +- Pronouns: {{ profile.user_profile.pronouns | default('') }} +- Timezone: {{ profile.user_profile.timezone | default('America/Santiago') }} + +## Notes +{% set notes = profile.user_profile.notes | default([]) %} +{% if notes | length > 0 %} +{% for note in notes %} +- {{ note }} +{% endfor %} +{% else %} +- Operator expects practical outcomes and minimal fluff. +- Keep communication clear and actionable. +{% endif %} + +## Response contract +- Human-readable answers. +- Use Markdown fenced code blocks for all code/commands/config snippets. +- Separate findings, actions, and next steps clearly. diff --git a/run-enterprise-playbook.sh b/run-enterprise-playbook.sh new file mode 100755 index 0000000..d096b0f --- /dev/null +++ b/run-enterprise-playbook.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +set -euo pipefail + +ENVIRONMENT=${1:-dev} +INVENTORY="inventories/${ENVIRONMENT}/hosts.yml" +PLAYBOOK="playbooks/enterprise.yml" +VAULT_VARS_FILE="inventories/${ENVIRONMENT}/group_vars/vault.yml" + +if [[ ! -f "$INVENTORY" ]]; then + echo "Inventory not found: $INVENTORY" >&2 + echo "Usage: $0 [ansible extra args...]" >&2 + exit 1 +fi + +shift || true + +EXTRA_ARGS=("$@") + +if [[ -f "$VAULT_VARS_FILE" ]]; then + EXTRA_ARGS+=("-e" "@${VAULT_VARS_FILE}") +fi + +ansible-playbook -i "$INVENTORY" "$PLAYBOOK" --ask-become-pass "${EXTRA_ARGS[@]}"