diff --git a/.github/workflows/clawdash-image.yml b/.github/workflows/clawdash-image.yml
new file mode 100644
index 0000000..b882113
--- /dev/null
+++ b/.github/workflows/clawdash-image.yml
@@ -0,0 +1,58 @@
+name: clawdash Image
+
+on:
+ push:
+ branches:
+ - master
+ tags:
+ - "v*"
+ pull_request:
+ paths:
+ - "cmd/clawdash/**"
+ - "internal/clawdash/**"
+ - "dockerfiles/clawdash/**"
+ - "go.mod"
+ - "go.sum"
+ - ".github/workflows/clawdash-image.yml"
+ workflow_dispatch:
+
+permissions:
+ contents: read
+ packages: write
+
+jobs:
+ build-and-publish:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - uses: docker/setup-qemu-action@v3
+
+ - uses: docker/setup-buildx-action@v3
+
+ - uses: docker/login-action@v3
+ if: github.event_name != 'pull_request'
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - id: meta
+ uses: docker/metadata-action@v5
+ with:
+ images: ghcr.io/${{ github.repository_owner }}/clawdash
+ tags: |
+ type=raw,value=latest,enable={{is_default_branch}}
+ type=ref,event=tag
+ type=sha,format=short
+
+ - uses: docker/build-push-action@v6
+ with:
+ context: .
+ file: dockerfiles/clawdash/Dockerfile
+ platforms: linux/amd64,linux/arm64
+ push: ${{ github.event_name != 'pull_request' }}
+ tags: ${{ steps.meta.outputs.tags }}
+ labels: ${{ steps.meta.outputs.labels }}
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
diff --git a/README.md b/README.md
index c3b7072..0c2be82 100644
--- a/README.md
+++ b/README.md
@@ -35,11 +35,13 @@ claw build -t quickstart-assistant ./agents/assistant
claw up -f claw-pod.yml -d
# Verify
-claw ps -f claw-pod.yml # assistant + cllama-passthrough both running
+claw ps -f claw-pod.yml # assistant + cllama both running
claw health -f claw-pod.yml # both healthy
```
-Open **http://localhost:8081** — the cllama governance proxy dashboard. Watch every LLM call in real time: which agent, which model, token counts, cost.
+Open **http://localhost:8181** — the cllama governance proxy dashboard. Watch every LLM call in real time: which agent, which model, token counts, cost.
+
+Open **http://localhost:8082** — the Clawdapus Dash fleet dashboard. View live service health, topology wiring, and per-service drill-down status.
Message `@quickstart-bot` in your Discord server. The bot responds through the proxy — it has no direct API access. The dashboard updates live.
@@ -59,6 +61,23 @@ claw up -d
claw agent add researcher
```
+## Dashboard Screenshots
+
+Fleet view with integrated costs status:
+
+
+
+If a cllama build does not emit `GET /costs/api`, Clawdapus Dash surfaces an explicit "cost emission not available yet" state instead of linking to a dead page.
+API data is authoritative; log-derived cost estimation is opt-in via `CLAWDASH_COST_LOG_FALLBACK=1`.
+
+Topology view:
+
+
+
+Service detail view:
+
+
+
---
## Install
@@ -248,9 +267,9 @@ When a reasoning model tries to govern itself, the guardrails are part of the sa
- **Identity resolution:** Single proxy serves an entire pod. Bearer tokens resolve which agent is calling.
- **Cost accounting:** Extracts token usage from every response, multiplies by pricing table, tracks per agent/provider/model.
- **Audit logging:** Structured JSON on stdout — timestamp, agent, model, latency, tokens, cost, intervention reason.
-- **Operator dashboard:** Real-time web UI at port 8081 — agent activity, provider status, cost breakdown.
+- **Operator dashboard:** Real-time web UI at host port 8181 by default (container `:8081`) — agent activity, provider status, cost breakdown.
-The reference implementation is [`cllama-passthrough`](https://github.com/mostlydev/cllama-passthrough) — a zero-dependency Go binary that implements the transport layer (identity, routing, cost tracking). Future proxy types (`cllama-policy`) will add bidirectional interception: evaluating outbound prompts and amending inbound responses against the agent's behavioral contract.
+The reference implementation is [`cllama`](https://github.com/mostlydev/cllama) — a zero-dependency Go binary that implements the transport layer (identity, routing, cost tracking). Future proxy types (`cllama-policy`) will add bidirectional interception: evaluating outbound prompts and amending inbound responses against the agent's behavioral contract.
See the [cllama specification](./docs/CLLAMA_SPEC.md) for the full standard.
diff --git a/cmd/claw/compose_manifest.go b/cmd/claw/compose_manifest.go
new file mode 100644
index 0000000..ae744f7
--- /dev/null
+++ b/cmd/claw/compose_manifest.go
@@ -0,0 +1,132 @@
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "path/filepath"
+ "sort"
+ "strings"
+
+ "github.com/mostlydev/clawdapus/internal/clawdash"
+ "github.com/mostlydev/clawdapus/internal/cllama"
+ "github.com/mostlydev/clawdapus/internal/driver"
+ "github.com/mostlydev/clawdapus/internal/pod"
+)
+
+func writePodManifest(runtimeDir string, p *pod.Pod, resolved map[string]*driver.ResolvedClaw, proxies []pod.CllamaProxyConfig) (string, error) {
+ manifest := buildPodManifest(p, resolved, proxies)
+ data, err := json.MarshalIndent(manifest, "", " ")
+ if err != nil {
+ return "", fmt.Errorf("encode pod manifest: %w", err)
+ }
+
+ path := filepath.Join(runtimeDir, "pod-manifest.json")
+ if err := writeRuntimeFile(path, append(data, '\n'), 0644); err != nil {
+ return "", fmt.Errorf("write pod manifest %q: %w", path, err)
+ }
+ return path, nil
+}
+
+func buildPodManifest(p *pod.Pod, resolved map[string]*driver.ResolvedClaw, proxies []pod.CllamaProxyConfig) *clawdash.PodManifest {
+ out := &clawdash.PodManifest{
+ PodName: p.Name,
+ Services: make(map[string]clawdash.ServiceManifest, len(p.Services)),
+ }
+
+ names := make([]string, 0, len(p.Services))
+ for name := range p.Services {
+ names = append(names, name)
+ }
+ sort.Strings(names)
+
+ for _, name := range names {
+ svc := p.Services[name]
+ manifest := clawdash.ServiceManifest{
+ ImageRef: svc.Image,
+ Count: 1,
+ }
+ if svc.Claw != nil && svc.Claw.Count > 0 {
+ manifest.Count = svc.Claw.Count
+ }
+
+ if rc, ok := resolved[name]; ok && rc != nil {
+ manifest.ClawType = rc.ClawType
+ manifest.Agent = rc.Agent
+ manifest.Models = cloneStringMap(rc.Models)
+ manifest.Handles = rc.Handles
+ manifest.PeerHandles = rc.PeerHandles
+ manifest.Surfaces = toSurfaceManifest(rc.Surfaces)
+ manifest.Skills = resolvedSkillNames(rc.Skills)
+ manifest.Invocations = append([]driver.Invocation(nil), rc.Invocations...)
+ manifest.Cllama = append([]string(nil), rc.Cllama...)
+ if rc.Count > 0 {
+ manifest.Count = rc.Count
+ }
+ } else if svc.Claw != nil {
+ manifest.Handles = svc.Claw.Handles
+ manifest.Surfaces = toSurfaceManifest(svc.Claw.Surfaces)
+ manifest.Cllama = append([]string(nil), svc.Claw.Cllama...)
+ }
+
+ out.Services[name] = manifest
+ }
+
+ if len(proxies) > 0 {
+ out.Proxies = make([]clawdash.ProxyManifest, 0, len(proxies))
+ for _, proxy := range proxies {
+ out.Proxies = append(out.Proxies, clawdash.ProxyManifest{
+ ProxyType: proxy.ProxyType,
+ ServiceName: cllama.ProxyServiceName(proxy.ProxyType),
+ Image: proxy.Image,
+ })
+ }
+ sort.Slice(out.Proxies, func(i, j int) bool {
+ return out.Proxies[i].ServiceName < out.Proxies[j].ServiceName
+ })
+ }
+
+ return out
+}
+
+func toSurfaceManifest(in []driver.ResolvedSurface) []clawdash.SurfaceManifest {
+ if len(in) == 0 {
+ return nil
+ }
+ out := make([]clawdash.SurfaceManifest, 0, len(in))
+ for _, s := range in {
+ out = append(out, clawdash.SurfaceManifest{
+ Scheme: s.Scheme,
+ Target: s.Target,
+ AccessMode: s.AccessMode,
+ Ports: append([]string(nil), s.Ports...),
+ ChannelConfig: s.ChannelConfig,
+ })
+ }
+ return out
+}
+
+func resolvedSkillNames(in []driver.ResolvedSkill) []string {
+ if len(in) == 0 {
+ return nil
+ }
+ out := make([]string, 0, len(in))
+ for _, sk := range in {
+ name := strings.TrimSpace(sk.Name)
+ if name == "" {
+ continue
+ }
+ out = append(out, name)
+ }
+ return out
+}
+
+func cloneStringMap(in map[string]string) map[string]string {
+ if len(in) == 0 {
+ return nil
+ }
+ out := make(map[string]string, len(in))
+ for k, v := range in {
+ out[k] = v
+ }
+ return out
+}
diff --git a/cmd/claw/compose_manifest_test.go b/cmd/claw/compose_manifest_test.go
new file mode 100644
index 0000000..493dc55
--- /dev/null
+++ b/cmd/claw/compose_manifest_test.go
@@ -0,0 +1,128 @@
+package main
+
+import (
+ "encoding/json"
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/mostlydev/clawdapus/internal/driver"
+ "github.com/mostlydev/clawdapus/internal/pod"
+)
+
+func TestBuildPodManifestIncludesResolvedState(t *testing.T) {
+ p := &pod.Pod{
+ Name: "fleet",
+ Services: map[string]*pod.Service{
+ "bot": {
+ Image: "bot:latest",
+ Claw: &pod.ClawBlock{Count: 2},
+ },
+ "redis": {
+ Image: "redis:7",
+ },
+ },
+ }
+
+ resolved := map[string]*driver.ResolvedClaw{
+ "bot": {
+ ServiceName: "bot",
+ ImageRef: "bot:latest",
+ ClawType: "openclaw",
+ Agent: "AGENTS.md",
+ Models: map[string]string{
+ "primary": "anthropic/claude-sonnet-4-20250514",
+ },
+ Count: 2,
+ Handles: map[string]*driver.HandleInfo{
+ "discord": {ID: "123", Username: "fleet-bot"},
+ },
+ PeerHandles: map[string]map[string]*driver.HandleInfo{
+ "analyst": {
+ "discord": {ID: "456", Username: "analyst-bot"},
+ },
+ },
+ Surfaces: []driver.ResolvedSurface{
+ {Scheme: "channel", Target: "discord"},
+ {Scheme: "service", Target: "redis", Ports: []string{"6379"}},
+ },
+ Skills: []driver.ResolvedSkill{
+ {Name: "risk-limits.md", HostPath: "/host/risk-limits.md"},
+ },
+ Invocations: []driver.Invocation{
+ {Schedule: "0 * * * *", Message: "status pulse", Name: "status", To: "123"},
+ },
+ Cllama: []string{"passthrough"},
+ },
+ }
+ proxies := []pod.CllamaProxyConfig{
+ {ProxyType: "passthrough", Image: "ghcr.io/mostlydev/cllama:latest"},
+ }
+
+ got := buildPodManifest(p, resolved, proxies)
+ if got.PodName != "fleet" {
+ t.Fatalf("expected podName=fleet, got %q", got.PodName)
+ }
+ if len(got.Services) != 2 {
+ t.Fatalf("expected 2 services, got %d", len(got.Services))
+ }
+
+ botSvc := got.Services["bot"]
+ if botSvc.ClawType != "openclaw" {
+ t.Fatalf("expected claw type openclaw, got %q", botSvc.ClawType)
+ }
+ if botSvc.Count != 2 {
+ t.Fatalf("expected count 2, got %d", botSvc.Count)
+ }
+ if len(botSvc.Skills) != 1 || botSvc.Skills[0] != "risk-limits.md" {
+ t.Fatalf("expected skill name-only serialization, got %v", botSvc.Skills)
+ }
+ if len(botSvc.Cllama) != 1 || botSvc.Cllama[0] != "passthrough" {
+ t.Fatalf("expected cllama passthrough, got %v", botSvc.Cllama)
+ }
+
+ redisSvc := got.Services["redis"]
+ if redisSvc.ClawType != "" {
+ t.Fatalf("expected non-claw service clawType empty, got %q", redisSvc.ClawType)
+ }
+ if redisSvc.Count != 1 {
+ t.Fatalf("expected non-claw count 1, got %d", redisSvc.Count)
+ }
+
+ if len(got.Proxies) != 1 {
+ t.Fatalf("expected 1 proxy, got %d", len(got.Proxies))
+ }
+ if got.Proxies[0].ServiceName != "cllama" {
+ t.Fatalf("expected proxy service cllama, got %q", got.Proxies[0].ServiceName)
+ }
+}
+
+func TestWritePodManifestWritesJSONFile(t *testing.T) {
+ dir := t.TempDir()
+ p := &pod.Pod{
+ Name: "test-pod",
+ Services: map[string]*pod.Service{
+ "bot": {Image: "bot:latest"},
+ },
+ }
+
+ path, err := writePodManifest(dir, p, nil, nil)
+ if err != nil {
+ t.Fatalf("writePodManifest returned error: %v", err)
+ }
+ if path != filepath.Join(dir, "pod-manifest.json") {
+ t.Fatalf("unexpected manifest path %q", path)
+ }
+
+ raw, err := os.ReadFile(path)
+ if err != nil {
+ t.Fatalf("read manifest: %v", err)
+ }
+ var decoded map[string]interface{}
+ if err := json.Unmarshal(raw, &decoded); err != nil {
+ t.Fatalf("manifest is not valid json: %v", err)
+ }
+ if decoded["podName"] != "test-pod" {
+ t.Fatalf("expected podName=test-pod, got %v", decoded["podName"])
+ }
+}
diff --git a/cmd/claw/compose_up.go b/cmd/claw/compose_up.go
index 0378d95..a9d9e18 100644
--- a/cmd/claw/compose_up.go
+++ b/cmd/claw/compose_up.go
@@ -248,6 +248,7 @@ func runComposeUp(podFile string) error {
cllamaEnabled, cllamaAgents := detectCllama(resolvedClaws)
proxies := make([]pod.CllamaProxyConfig, 0)
+ cllamaDashboardPort := envOrDefault("CLLAMA_UI_PORT", "8181")
if cllamaEnabled {
proxyTypes := collectProxyTypes(resolvedClaws)
if len(proxyTypes) > 1 {
@@ -387,9 +388,10 @@ func runComposeUp(podFile string) error {
for _, proxyType := range proxyTypes {
proxies = append(proxies, pod.CllamaProxyConfig{
ProxyType: proxyType,
- Image: fmt.Sprintf("ghcr.io/mostlydev/cllama-%s:latest", proxyType),
+ Image: cllama.ProxyImageRef(proxyType),
ContextHostDir: filepath.Join(runtimeDir, "context"),
AuthHostDir: authDir,
+ DashboardPort: cllamaDashboardPort,
Environment: proxyEnv,
PodName: p.Name,
})
@@ -398,6 +400,21 @@ func runComposeUp(podFile string) error {
strings.Join(proxyTypes, ", "), strings.Join(cllamaAgents, ", "))
}
+ manifestPath, err := writePodManifest(runtimeDir, p, resolvedClaws, proxies)
+ if err != nil {
+ return err
+ }
+ fmt.Printf("[claw] wrote %s\n", manifestPath)
+
+ p.Clawdash = &pod.ClawdashConfig{
+ Image: "ghcr.io/mostlydev/clawdash:latest",
+ Addr: envOrDefault("CLAWDASH_ADDR", ":8082"),
+ ManifestHostPath: manifestPath,
+ DockerSockHostPath: "/var/run/docker.sock",
+ CllamaCostsURL: firstIf(cllamaEnabled, fmt.Sprintf("http://localhost:%s", cllamaDashboardPort)),
+ PodName: p.Name,
+ }
+
// Pass 2: materialize after cllama tokens/context are resolved.
for _, name := range sortedResolvedClawNames(resolvedClaws) {
rc := resolvedClaws[name]
@@ -783,6 +800,21 @@ func shortContainerIDForPostApply(id string) string {
return id[:12]
}
+func envOrDefault(key, fallback string) string {
+ v := strings.TrimSpace(os.Getenv(key))
+ if v == "" {
+ return fallback
+ }
+ return v
+}
+
+func firstIf(ok bool, value string) string {
+ if ok {
+ return value
+ }
+ return ""
+}
+
// resolveChannelID looks up a channel by name in the discord handle's guild topology.
// Returns the channel ID if found, empty string otherwise.
// Searches all guilds in the discord handle.
diff --git a/cmd/claw/spike_test.go b/cmd/claw/spike_test.go
index 52cb369..9a18ad1 100644
--- a/cmd/claw/spike_test.go
+++ b/cmd/claw/spike_test.go
@@ -199,8 +199,8 @@ func TestSpikeComposeUp(t *testing.T) {
if !ok {
t.Fatalf("openclaw.json: missing models.providers.%s object", provider)
}
- if got := entry["baseUrl"]; got != "http://cllama-passthrough:8080/v1" {
- t.Errorf("openclaw.json: expected models.providers.%s.baseUrl=http://cllama-passthrough:8080/v1, got %v", provider, got)
+ if got := entry["baseUrl"]; got != "http://cllama:8080/v1" {
+ t.Errorf("openclaw.json: expected models.providers.%s.baseUrl=http://cllama:8080/v1, got %v", provider, got)
}
providerToken, _ := entry["apiKey"].(string)
if matched, _ := regexp.MatchString(`^tiverton:[0-9a-f]{48}$`, providerToken); !matched {
@@ -296,8 +296,8 @@ func TestSpikeComposeUp(t *testing.T) {
t.Errorf("compose.generated.yml: expected to contain %q", want)
}
}
- if !strings.Contains(composeSrc, "cllama-passthrough:") {
- t.Errorf("compose.generated.yml: expected cllama-passthrough service")
+ if !strings.Contains(composeSrc, "cllama:") {
+ t.Errorf("compose.generated.yml: expected cllama service")
}
if !strings.Contains(composeSrc, "CLAW_CONTEXT_ROOT: /claw/context") {
t.Errorf("compose.generated.yml: expected cllama context root env")
@@ -442,7 +442,7 @@ func TestSpikeComposeUp(t *testing.T) {
t.Errorf("allen: ANTHROPIC_BASE_URL not set: %v", errE)
} else {
allenBaseURL := strings.TrimSpace(string(allenEnvOut))
- if !strings.Contains(allenBaseURL, "cllama-passthrough") {
+ if !strings.Contains(allenBaseURL, "cllama") {
t.Errorf("allen: ANTHROPIC_BASE_URL should point to cllama proxy, got %q", allenBaseURL)
} else {
t.Logf("allen ANTHROPIC_BASE_URL: %s", allenBaseURL)
@@ -518,8 +518,8 @@ func TestSpikeComposeUp(t *testing.T) {
if got := microCfg["model"]; got != "claude-sonnet-4" {
t.Errorf("microclaw.config.yaml: expected model=claude-sonnet-4, got %v", got)
}
- if got := microCfg["llm_base_url"]; got != "http://cllama-passthrough:8080/v1" {
- t.Errorf("microclaw.config.yaml: expected llm_base_url=http://cllama-passthrough:8080/v1, got %v", got)
+ if got := microCfg["llm_base_url"]; got != "http://cllama:8080/v1" {
+ t.Errorf("microclaw.config.yaml: expected llm_base_url=http://cllama:8080/v1, got %v", got)
}
microContainer := spikeContainerName("micro")
diff --git a/cmd/clawdash/handler.go b/cmd/clawdash/handler.go
new file mode 100644
index 0000000..d2cb28e
--- /dev/null
+++ b/cmd/clawdash/handler.go
@@ -0,0 +1,800 @@
+package main
+
+import (
+ "bufio"
+ "bytes"
+ "context"
+ "embed"
+ "encoding/json"
+ "fmt"
+ "html/template"
+ "io"
+ "net/http"
+ "net/url"
+ "os"
+ "slices"
+ "sort"
+ "strconv"
+ "strings"
+ "time"
+
+ containerapi "github.com/docker/docker/api/types/container"
+ "github.com/docker/docker/api/types/filters"
+ "github.com/docker/docker/client"
+ "github.com/docker/docker/pkg/stdcopy"
+ manifestpkg "github.com/mostlydev/clawdapus/internal/clawdash"
+ "github.com/mostlydev/clawdapus/internal/cllama"
+ "github.com/mostlydev/clawdapus/internal/driver"
+)
+
+//go:embed templates/*.html
+var templateFS embed.FS
+
+type statusSource interface {
+ Snapshot(ctx context.Context, serviceNames []string) (map[string]serviceStatus, error)
+}
+
+type handler struct {
+ manifest *manifestpkg.PodManifest
+ statusSource statusSource
+ cllamaCostsURL string
+ costLogFallback bool
+ httpClient *http.Client
+ tpl *template.Template
+}
+
+func newHandler(manifest *manifestpkg.PodManifest, source statusSource, cllamaCostsURL string, costLogFallback bool) http.Handler {
+ funcs := template.FuncMap{
+ "statusClass": statusClass,
+ "pathEscape": url.PathEscape,
+ "join": strings.Join,
+ "title": strings.Title, //nolint:staticcheck // simple title-case for badges.
+ "truncate": truncate,
+ "statusLabel": statusLabel,
+ "hasStatusData": hasStatusData,
+ }
+ tpl := template.Must(template.New("clawdash").Funcs(funcs).ParseFS(templateFS, "templates/*.html"))
+ return &handler{
+ manifest: manifest,
+ statusSource: source,
+ cllamaCostsURL: strings.TrimSpace(cllamaCostsURL),
+ costLogFallback: costLogFallback,
+ httpClient: &http.Client{
+ Timeout: 2 * time.Second,
+ },
+ tpl: tpl,
+ }
+}
+
+func (h *handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ switch {
+ case r.Method == http.MethodGet && r.URL.Path == "/":
+ h.renderFleet(w, r)
+ return
+ case r.Method == http.MethodGet && r.URL.Path == "/topology":
+ h.renderTopology(w, r)
+ return
+ case r.Method == http.MethodGet && strings.HasPrefix(r.URL.Path, "/detail/"):
+ h.renderDetail(w, r)
+ return
+ case r.Method == http.MethodGet && r.URL.Path == "/api/status":
+ h.renderAPIStatus(w, r)
+ return
+ case r.Method == http.MethodGet && r.URL.Path == "/healthz":
+ w.WriteHeader(http.StatusOK)
+ _, _ = w.Write([]byte("ok"))
+ return
+ default:
+ http.NotFound(w, r)
+ return
+ }
+}
+
+type fleetPageData struct {
+ PodName string
+ ActiveTab string
+ Agents []fleetCard
+ Proxies []fleetCard
+ Infrastructure []fleetCard
+ HasCllama bool
+ CllamaCostsURL string
+ HasCostLink bool
+ HasCostSummary bool
+ CostSummary cllamaCostSummary
+ CostSummaryErr string
+ StatusError string
+ HasStatusErrors bool
+}
+
+type cllamaCostSummary struct {
+ TotalCostUSD float64
+ Requests int
+ ProxyCount int
+ Source string
+}
+
+type fleetCard struct {
+ ServiceName string
+ DetailPath string
+ RoleBadge string
+ RoleClass string
+ ClawType string
+ Status string
+ StatusClass string
+ Uptime string
+ Model string
+ Handles []handleRow
+ ProxyType string
+ Count int
+ RunningCount int
+}
+
+type handleRow struct {
+ Platform string
+ Username string
+}
+
+func (h *handler) renderFleet(w http.ResponseWriter, r *http.Request) {
+ statuses, statusErr := h.snapshot(r.Context())
+ data := h.buildFleetPageData(r.Context(), statuses, statusErr)
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+ _ = h.tpl.ExecuteTemplate(w, "fleet.html", data)
+}
+
+func (h *handler) buildFleetPageData(ctx context.Context, statuses map[string]serviceStatus, statusErr string) fleetPageData {
+ serviceNames := sortedServiceNames(h.manifest.Services)
+ proxyByService := make(map[string]manifestpkg.ProxyManifest, len(h.manifest.Proxies))
+ for _, p := range h.manifest.Proxies {
+ proxyByService[p.ServiceName] = p
+ }
+
+ agents := make([]fleetCard, 0)
+ infra := make([]fleetCard, 0)
+ for _, name := range serviceNames {
+ svc := h.manifest.Services[name]
+ status := statuses[name]
+ card := fleetCard{
+ ServiceName: name,
+ DetailPath: "/detail/" + url.PathEscape(name),
+ Status: status.Status,
+ StatusClass: statusClass(status.Status),
+ Uptime: status.Uptime,
+ Model: primaryModel(svc.Models),
+ Handles: sortedHandles(svc.Handles),
+ Count: svc.Count,
+ RunningCount: status.Running,
+ }
+ if card.Count < 1 {
+ card.Count = 1
+ }
+
+ if svc.ClawType != "" {
+ card.RoleBadge = svc.ClawType
+ card.RoleClass = "badge-cyan"
+ card.ClawType = svc.ClawType
+ card.ProxyType = joinNonEmpty(svc.Cllama, ", ")
+ agents = append(agents, card)
+ continue
+ }
+
+ if proxy, ok := proxyByService[name]; ok {
+ card.RoleBadge = "proxy"
+ card.RoleClass = "badge-amber"
+ card.ProxyType = proxy.ProxyType
+ agents = append(agents, card)
+ continue
+ }
+
+ card.RoleBadge = "native"
+ card.RoleClass = "badge-green"
+ infra = append(infra, card)
+ }
+
+ proxies := make([]fleetCard, 0, len(h.manifest.Proxies))
+ for _, proxy := range h.manifest.Proxies {
+ status := statuses[proxy.ServiceName]
+ proxies = append(proxies, fleetCard{
+ ServiceName: proxy.ServiceName,
+ DetailPath: "/detail/" + url.PathEscape(proxy.ServiceName),
+ RoleBadge: "proxy",
+ RoleClass: "badge-amber",
+ Status: status.Status,
+ StatusClass: statusClass(status.Status),
+ Uptime: status.Uptime,
+ ProxyType: proxy.ProxyType,
+ Count: 1,
+ })
+ }
+ sort.Slice(proxies, func(i, j int) bool { return proxies[i].ServiceName < proxies[j].ServiceName })
+
+ costSummary, costErr := h.fetchCllamaCostSummary(ctx)
+
+ return fleetPageData{
+ PodName: h.manifest.PodName,
+ ActiveTab: "fleet",
+ Agents: agents,
+ Proxies: proxies,
+ Infrastructure: infra,
+ HasCllama: len(proxies) > 0,
+ CllamaCostsURL: h.cllamaCostsURL,
+ HasCostLink: costSummary != nil && costSummary.Source == "api" && strings.TrimSpace(h.cllamaCostsURL) != "",
+ HasCostSummary: costSummary != nil,
+ CostSummary: firstCostSummary(costSummary),
+ CostSummaryErr: costErr,
+ StatusError: statusErr,
+ HasStatusErrors: statusErr != "",
+ }
+}
+
+type detailPageData struct {
+ PodName string
+ ActiveTab string
+ ServiceName string
+ ImageRef string
+ Count int
+ IsProxy bool
+ Status serviceStatus
+ StatusClass string
+ StatusError string
+ Surfaces []manifestpkg.SurfaceManifest
+ Handles []handleDetailRow
+ Skills []string
+ Invocations []driver.Invocation
+ Models []modelRow
+ Cllama []cllamaDetailRow
+ HasStatusErrors bool
+}
+
+type handleDetailRow struct {
+ Platform string
+ Username string
+ ID string
+ Guilds []driver.GuildInfo
+}
+
+type modelRow struct {
+ Slot string
+ Model string
+}
+
+type cllamaDetailRow struct {
+ ProxyType string
+ ServiceName string
+ TokenStatus string
+}
+
+func (h *handler) renderDetail(w http.ResponseWriter, r *http.Request) {
+ raw := strings.TrimPrefix(r.URL.Path, "/detail/")
+ name, err := url.PathUnescape(raw)
+ if err != nil || strings.TrimSpace(name) == "" {
+ http.NotFound(w, r)
+ return
+ }
+
+ statuses, statusErr := h.snapshot(r.Context())
+ data, ok := h.buildDetailPageData(name, statuses, statusErr)
+ if !ok {
+ http.NotFound(w, r)
+ return
+ }
+
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+ _ = h.tpl.ExecuteTemplate(w, "detail.html", data)
+}
+
+func (h *handler) buildDetailPageData(name string, statuses map[string]serviceStatus, statusErr string) (detailPageData, bool) {
+ svc, ok := h.manifest.Services[name]
+ proxyInfo, isProxy := h.proxyByServiceName(name)
+ if !ok && !isProxy {
+ return detailPageData{}, false
+ }
+
+ if !ok && isProxy {
+ svc = manifestpkg.ServiceManifest{
+ ImageRef: proxyInfo.Image,
+ Count: 1,
+ }
+ }
+ if svc.Count < 1 {
+ svc.Count = 1
+ }
+
+ models := make([]modelRow, 0, len(svc.Models))
+ for slot, modelRef := range svc.Models {
+ models = append(models, modelRow{Slot: slot, Model: modelRef})
+ }
+ sort.Slice(models, func(i, j int) bool { return models[i].Slot < models[j].Slot })
+
+ handleRows := make([]handleDetailRow, 0, len(svc.Handles))
+ for platform, info := range svc.Handles {
+ if info == nil {
+ continue
+ }
+ handleRows = append(handleRows, handleDetailRow{
+ Platform: platform,
+ Username: info.Username,
+ ID: info.ID,
+ Guilds: info.Guilds,
+ })
+ }
+ sort.Slice(handleRows, func(i, j int) bool { return handleRows[i].Platform < handleRows[j].Platform })
+
+ cllamaRows := make([]cllamaDetailRow, 0)
+ proxyByType := make(map[string]string, len(h.manifest.Proxies))
+ for _, p := range h.manifest.Proxies {
+ proxyByType[p.ProxyType] = p.ServiceName
+ }
+ tokenStatus := "absent"
+ if statuses[name].HasCllamaToken {
+ tokenStatus = "present"
+ }
+ for _, proxyType := range svc.Cllama {
+ serviceName := proxyByType[proxyType]
+ if serviceName == "" {
+ serviceName = cllama.ProxyServiceName(proxyType)
+ }
+ cllamaRows = append(cllamaRows, cllamaDetailRow{
+ ProxyType: proxyType,
+ ServiceName: serviceName,
+ TokenStatus: tokenStatus,
+ })
+ }
+ if isProxy {
+ cllamaRows = append(cllamaRows, cllamaDetailRow{
+ ProxyType: proxyInfo.ProxyType,
+ ServiceName: proxyInfo.ServiceName,
+ TokenStatus: "absent",
+ })
+ }
+
+ status := statuses[name]
+ if status.Service == "" {
+ status = unknownStatus(name)
+ }
+
+ return detailPageData{
+ PodName: h.manifest.PodName,
+ ActiveTab: "detail",
+ ServiceName: name,
+ ImageRef: firstNonEmpty(svc.ImageRef, proxyInfo.Image),
+ Count: svc.Count,
+ IsProxy: isProxy,
+ Status: status,
+ StatusClass: statusClass(status.Status),
+ StatusError: statusErr,
+ Surfaces: svc.Surfaces,
+ Handles: handleRows,
+ Skills: slices.Clone(svc.Skills),
+ Invocations: slices.Clone(svc.Invocations),
+ Models: models,
+ Cllama: cllamaRows,
+ HasStatusErrors: statusErr != "",
+ }, true
+}
+
+func (h *handler) renderTopology(w http.ResponseWriter, r *http.Request) {
+ statuses, statusErr := h.snapshot(r.Context())
+ data := buildTopologyPageData(h.manifest, statuses, statusErr)
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+ _ = h.tpl.ExecuteTemplate(w, "topology.html", data)
+}
+
+type apiStatusResponse struct {
+ GeneratedAt string `json:"generatedAt"`
+ Services map[string]serviceStatus `json:"services"`
+ Error string `json:"error,omitempty"`
+}
+
+func (h *handler) renderAPIStatus(w http.ResponseWriter, r *http.Request) {
+ statuses, err := h.snapshot(r.Context())
+ resp := apiStatusResponse{
+ GeneratedAt: time.Now().UTC().Format(time.RFC3339),
+ Services: statuses,
+ }
+ code := http.StatusOK
+ if err != "" {
+ resp.Error = err
+ code = http.StatusServiceUnavailable
+ }
+ w.Header().Set("Content-Type", "application/json; charset=utf-8")
+ w.WriteHeader(code)
+ _ = json.NewEncoder(w).Encode(resp)
+}
+
+func (h *handler) snapshot(ctx context.Context) (map[string]serviceStatus, string) {
+ names := h.allServiceNames()
+ timeoutCtx, cancel := context.WithTimeout(ctx, 4*time.Second)
+ defer cancel()
+
+ statuses, err := h.statusSource.Snapshot(timeoutCtx, names)
+ if err == nil {
+ return statuses, ""
+ }
+ fallback := make(map[string]serviceStatus, len(names))
+ for _, name := range names {
+ fallback[name] = unknownStatus(name)
+ }
+ return fallback, fmt.Sprintf("live status unavailable: %v", err)
+}
+
+func (h *handler) allServiceNames() []string {
+ set := make(map[string]struct{}, len(h.manifest.Services)+len(h.manifest.Proxies))
+ for name := range h.manifest.Services {
+ set[name] = struct{}{}
+ }
+ for _, proxy := range h.manifest.Proxies {
+ if strings.TrimSpace(proxy.ServiceName) != "" {
+ set[proxy.ServiceName] = struct{}{}
+ }
+ }
+ names := make([]string, 0, len(set))
+ for name := range set {
+ names = append(names, name)
+ }
+ sort.Strings(names)
+ return names
+}
+
+func (h *handler) proxyByServiceName(name string) (manifestpkg.ProxyManifest, bool) {
+ for _, proxy := range h.manifest.Proxies {
+ if proxy.ServiceName == name {
+ return proxy, true
+ }
+ }
+ return manifestpkg.ProxyManifest{}, false
+}
+
+func readManifest(path string) (*manifestpkg.PodManifest, error) {
+ raw, err := os.ReadFile(path)
+ if err != nil {
+ return nil, err
+ }
+ var manifest manifestpkg.PodManifest
+ if err := json.Unmarshal(raw, &manifest); err != nil {
+ return nil, err
+ }
+ if manifest.Services == nil {
+ manifest.Services = make(map[string]manifestpkg.ServiceManifest)
+ }
+ return &manifest, nil
+}
+
+func sortedServiceNames(services map[string]manifestpkg.ServiceManifest) []string {
+ names := make([]string, 0, len(services))
+ for name := range services {
+ names = append(names, name)
+ }
+ sort.Strings(names)
+ return names
+}
+
+func sortedHandles(handles map[string]*driver.HandleInfo) []handleRow {
+ out := make([]handleRow, 0, len(handles))
+ for platform, info := range handles {
+ if info == nil {
+ continue
+ }
+ username := info.Username
+ if strings.TrimSpace(username) == "" {
+ username = info.ID
+ }
+ out = append(out, handleRow{
+ Platform: platform,
+ Username: username,
+ })
+ }
+ sort.Slice(out, func(i, j int) bool { return out[i].Platform < out[j].Platform })
+ return out
+}
+
+func primaryModel(models map[string]string) string {
+ if len(models) == 0 {
+ return ""
+ }
+ if primary := strings.TrimSpace(models["primary"]); primary != "" {
+ return primary
+ }
+ keys := make([]string, 0, len(models))
+ for k := range models {
+ keys = append(keys, k)
+ }
+ sort.Strings(keys)
+ for _, k := range keys {
+ if strings.TrimSpace(models[k]) != "" {
+ return models[k]
+ }
+ }
+ return ""
+}
+
+func statusClass(status string) string {
+ switch strings.ToLower(strings.TrimSpace(status)) {
+ case "healthy", "running":
+ return "status-healthy"
+ case "starting":
+ return "status-starting"
+ case "unhealthy", "stopped", "dead", "exited":
+ return "status-unhealthy"
+ default:
+ return "status-unknown"
+ }
+}
+
+func statusLabel(status string) string {
+ s := strings.TrimSpace(status)
+ if s == "" {
+ return "unknown"
+ }
+ return s
+}
+
+func hasStatusData(value string) bool {
+ return strings.TrimSpace(value) != ""
+}
+
+func truncate(s string, n int) string {
+ if n <= 0 {
+ return ""
+ }
+ if len(s) <= n {
+ return s
+ }
+ if n <= 3 {
+ return s[:n]
+ }
+ return s[:n-3] + "..."
+}
+
+func joinNonEmpty(items []string, sep string) string {
+ out := make([]string, 0, len(items))
+ for _, item := range items {
+ item = strings.TrimSpace(item)
+ if item == "" {
+ continue
+ }
+ out = append(out, item)
+ }
+ return strings.Join(out, sep)
+}
+
+func firstNonEmpty(values ...string) string {
+ for _, v := range values {
+ if strings.TrimSpace(v) != "" {
+ return v
+ }
+ }
+ return ""
+}
+
+func firstCostSummary(in *cllamaCostSummary) cllamaCostSummary {
+ if in == nil {
+ return cllamaCostSummary{}
+ }
+ return *in
+}
+
+func (h *handler) fetchCllamaCostSummary(ctx context.Context) (*cllamaCostSummary, string) {
+ if len(h.manifest.Proxies) == 0 {
+ return nil, ""
+ }
+ summary, apiErr := h.fetchCllamaCostSummaryFromAPI(ctx)
+ if summary != nil {
+ summary.Source = "api"
+ return summary, ""
+ }
+ if !h.costLogFallback {
+ return nil, apiErr
+ }
+ if summary, err := h.fetchCllamaCostSummaryFromLogs(ctx); summary != nil {
+ summary.Source = "logs"
+ if strings.TrimSpace(apiErr) != "" {
+ return summary, fmt.Sprintf("cost API unavailable (%s); showing log-derived estimate", apiErr)
+ }
+ if strings.TrimSpace(err) != "" {
+ return summary, err
+ }
+ return summary, "showing log-derived estimate"
+ }
+ if strings.TrimSpace(apiErr) != "" {
+ return nil, apiErr
+ }
+ return nil, "no cllama cost data available"
+}
+
+func (h *handler) fetchCllamaCostSummaryFromAPI(ctx context.Context) (*cllamaCostSummary, string) {
+ summary := &cllamaCostSummary{}
+ success := 0
+ lastErr := ""
+
+ for _, proxy := range h.manifest.Proxies {
+ serviceName := strings.TrimSpace(proxy.ServiceName)
+ if serviceName == "" {
+ continue
+ }
+ endpoint := fmt.Sprintf("http://%s:8081/costs/api", serviceName)
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
+ if err != nil {
+ lastErr = fmt.Sprintf("build request for %s: %v", serviceName, err)
+ continue
+ }
+
+ resp, err := h.httpClient.Do(req)
+ if err != nil {
+ lastErr = fmt.Sprintf("%s unavailable: %v", serviceName, err)
+ continue
+ }
+
+ var payload map[string]interface{}
+ if resp.StatusCode == http.StatusOK {
+ if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
+ lastErr = fmt.Sprintf("%s invalid JSON from /costs/api", serviceName)
+ _ = resp.Body.Close()
+ continue
+ }
+ } else {
+ lastErr = fmt.Sprintf("%s missing /costs/api (status %d)", serviceName, resp.StatusCode)
+ _ = resp.Body.Close()
+ continue
+ }
+ _ = resp.Body.Close()
+
+ summary.TotalCostUSD += asFloat(payload["total_cost_usd"])
+ summary.Requests += asInt(payload["total_requests"])
+ success++
+ }
+
+ if success == 0 {
+ if strings.TrimSpace(lastErr) == "" {
+ lastErr = "no cllama cost emission endpoint detected"
+ }
+ return nil, lastErr
+ }
+ summary.ProxyCount = success
+ return summary, ""
+}
+
+func (h *handler) fetchCllamaCostSummaryFromLogs(ctx context.Context) (*cllamaCostSummary, string) {
+ cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
+ if err != nil {
+ return nil, fmt.Sprintf("docker client unavailable for cost log fallback: %v", err)
+ }
+ defer cli.Close()
+
+ summary := &cllamaCostSummary{}
+ success := 0
+ lastErr := ""
+
+ for _, proxy := range h.manifest.Proxies {
+ serviceName := strings.TrimSpace(proxy.ServiceName)
+ if serviceName == "" {
+ continue
+ }
+
+ containerID, err := findProxyContainerID(ctx, cli, h.manifest.PodName, serviceName)
+ if err != nil {
+ lastErr = fmt.Sprintf("%s container lookup failed: %v", serviceName, err)
+ continue
+ }
+
+ rc, err := cli.ContainerLogs(ctx, containerID, containerapi.LogsOptions{
+ ShowStdout: true,
+ ShowStderr: false,
+ Tail: "500",
+ })
+ if err != nil {
+ lastErr = fmt.Sprintf("%s log read failed: %v", serviceName, err)
+ continue
+ }
+
+ var stdout bytes.Buffer
+ var stderr bytes.Buffer
+ _, copyErr := stdcopy.StdCopy(&stdout, &stderr, rc)
+ _ = rc.Close()
+ if copyErr != nil && copyErr != io.EOF {
+ lastErr = fmt.Sprintf("%s log decode failed: %v", serviceName, copyErr)
+ continue
+ }
+
+ total, reqs := parseCostSummaryFromLogs(stdout.String())
+ summary.TotalCostUSD += total
+ summary.Requests += reqs
+ success++
+ }
+
+ if success == 0 {
+ if strings.TrimSpace(lastErr) == "" {
+ lastErr = "no proxy logs available for cost fallback"
+ }
+ return nil, lastErr
+ }
+ summary.ProxyCount = success
+ return summary, ""
+}
+
+func findProxyContainerID(ctx context.Context, cli *client.Client, podName, serviceName string) (string, error) {
+ args := filters.NewArgs(
+ filters.Arg("label", "claw.pod="+strings.TrimSpace(podName)),
+ filters.Arg("label", "claw.service="+strings.TrimSpace(serviceName)),
+ )
+ containers, err := cli.ContainerList(ctx, containerapi.ListOptions{
+ All: true,
+ Filters: args,
+ })
+ if err != nil {
+ return "", err
+ }
+ if len(containers) == 0 {
+ return "", fmt.Errorf("not found")
+ }
+ return containers[0].ID, nil
+}
+
+func parseCostSummaryFromLogs(logs string) (float64, int) {
+ total := 0.0
+ requests := 0
+ scanner := bufio.NewScanner(strings.NewReader(logs))
+ for scanner.Scan() {
+ line := strings.TrimSpace(scanner.Text())
+ if line == "" || !strings.HasPrefix(line, "{") {
+ continue
+ }
+ var payload map[string]interface{}
+ if err := json.Unmarshal([]byte(line), &payload); err != nil {
+ continue
+ }
+ if _, ok := payload["cost_usd"]; !ok {
+ continue
+ }
+ total += asFloat(payload["cost_usd"])
+ requests++
+ }
+ return total, requests
+}
+
+func asFloat(v interface{}) float64 {
+ switch n := v.(type) {
+ case float64:
+ return n
+ case float32:
+ return float64(n)
+ case int:
+ return float64(n)
+ case int64:
+ return float64(n)
+ case json.Number:
+ f, err := n.Float64()
+ if err == nil {
+ return f
+ }
+ case string:
+ f, err := strconv.ParseFloat(strings.TrimSpace(n), 64)
+ if err == nil {
+ return f
+ }
+ }
+ return 0
+}
+
+func asInt(v interface{}) int {
+ switch n := v.(type) {
+ case float64:
+ return int(n)
+ case float32:
+ return int(n)
+ case int:
+ return n
+ case int64:
+ return int(n)
+ case json.Number:
+ i, err := n.Int64()
+ if err == nil {
+ return int(i)
+ }
+ case string:
+ i, err := strconv.Atoi(strings.TrimSpace(n))
+ if err == nil {
+ return i
+ }
+ }
+ return 0
+}
diff --git a/cmd/clawdash/handler_test.go b/cmd/clawdash/handler_test.go
new file mode 100644
index 0000000..9ffa694
--- /dev/null
+++ b/cmd/clawdash/handler_test.go
@@ -0,0 +1,200 @@
+package main
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+ "net/http/httptest"
+ "strings"
+ "testing"
+ "time"
+
+ manifestpkg "github.com/mostlydev/clawdapus/internal/clawdash"
+ "github.com/mostlydev/clawdapus/internal/driver"
+)
+
+type fakeStatusSource struct {
+ statuses map[string]serviceStatus
+ err error
+}
+
+func (f fakeStatusSource) Snapshot(_ context.Context, _ []string) (map[string]serviceStatus, error) {
+ if f.err != nil {
+ return nil, f.err
+ }
+ return f.statuses, nil
+}
+
+func testManifest() *manifestpkg.PodManifest {
+ return &manifestpkg.PodManifest{
+ PodName: "fleet",
+ Services: map[string]manifestpkg.ServiceManifest{
+ "bot": {
+ ClawType: "openclaw",
+ ImageRef: "bot:latest",
+ Count: 1,
+ Surfaces: []manifestpkg.SurfaceManifest{
+ {Scheme: "channel", Target: "discord"},
+ {Scheme: "service", Target: "api"},
+ {Scheme: "volume", Target: "shared-data"},
+ },
+ Cllama: []string{"passthrough"},
+ Handles: map[string]*driver.HandleInfo{
+ "discord": {ID: "123", Username: "fleet-bot"},
+ },
+ },
+ "api": {
+ ImageRef: "api:latest",
+ Count: 1,
+ },
+ },
+ Proxies: []manifestpkg.ProxyManifest{
+ {ProxyType: "passthrough", ServiceName: "cllama", Image: "cllama:latest"},
+ },
+ }
+}
+
+func testStatuses() map[string]serviceStatus {
+ return map[string]serviceStatus{
+ "bot": {
+ Service: "bot",
+ Status: "healthy",
+ State: "running",
+ Uptime: "3m 2s",
+ Instances: 1,
+ Running: 1,
+ },
+ "api": {
+ Service: "api",
+ Status: "running",
+ State: "running",
+ Uptime: "8m 10s",
+ Instances: 1,
+ Running: 1,
+ },
+ "cllama": {
+ Service: "cllama",
+ Status: "healthy",
+ State: "running",
+ Uptime: "3m 1s",
+ Instances: 1,
+ Running: 1,
+ },
+ }
+}
+
+func TestFleetPageRenders(t *testing.T) {
+ h := newHandler(testManifest(), fakeStatusSource{statuses: testStatuses()}, "http://localhost:8181", false)
+ req := httptest.NewRequest(http.MethodGet, "/", nil)
+ w := httptest.NewRecorder()
+ h.ServeHTTP(w, req)
+
+ if w.Code != http.StatusOK {
+ t.Fatalf("expected 200, got %d", w.Code)
+ }
+ body := w.Body.String()
+ if !strings.Contains(body, "Fleet Overview") {
+ t.Fatalf("expected fleet heading in body")
+ }
+ if !strings.Contains(body, "bot") {
+ t.Fatalf("expected service name in body")
+ }
+ if !strings.Contains(body, "Costs") {
+ t.Fatalf("expected costs panel in body")
+ }
+ if !strings.Contains(body, "Cost emission not available yet") {
+ t.Fatalf("expected costs emission warning in body")
+ }
+ if strings.Contains(body, "Open cllama dashboard") {
+ t.Fatalf("expected costs link to be hidden when /costs/api is unavailable")
+ }
+}
+
+func TestFleetPageShowsCostLinkWhenCostAPIAvailable(t *testing.T) {
+ raw := newHandler(testManifest(), fakeStatusSource{statuses: testStatuses()}, "http://localhost:8181", false)
+ h, ok := raw.(*handler)
+ if !ok {
+ t.Fatal("expected *handler")
+ }
+ h.httpClient = &http.Client{
+ Timeout: time.Second,
+ Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
+ if req.URL.String() != "http://cllama:8081/costs/api" {
+ return nil, fmt.Errorf("unexpected URL: %s", req.URL.String())
+ }
+ return &http.Response{
+ StatusCode: http.StatusOK,
+ Header: make(http.Header),
+ Body: io.NopCloser(strings.NewReader(`{"total_cost_usd":1.2345,"total_requests":42}`)),
+ }, nil
+ }),
+ }
+
+ req := httptest.NewRequest(http.MethodGet, "/", nil)
+ w := httptest.NewRecorder()
+ h.ServeHTTP(w, req)
+
+ if w.Code != http.StatusOK {
+ t.Fatalf("expected 200, got %d", w.Code)
+ }
+ body := w.Body.String()
+ if !strings.Contains(body, "$1.2345") {
+ t.Fatalf("expected rendered API cost summary, got body:\n%s", body)
+ }
+ if !strings.Contains(body, "Open cllama dashboard") {
+ t.Fatalf("expected costs link when API summary is available")
+ }
+}
+
+func TestTopologyPageRenders(t *testing.T) {
+ h := newHandler(testManifest(), fakeStatusSource{statuses: testStatuses()}, "http://localhost:8181", false)
+ req := httptest.NewRequest(http.MethodGet, "/topology", nil)
+ w := httptest.NewRecorder()
+ h.ServeHTTP(w, req)
+
+ if w.Code != http.StatusOK {
+ t.Fatalf("expected 200, got %d", w.Code)
+ }
+ if !strings.Contains(w.Body.String(), "Topology") {
+ t.Fatalf("expected topology title in body")
+ }
+}
+
+func TestAPIStatusJSON(t *testing.T) {
+ h := newHandler(testManifest(), fakeStatusSource{statuses: testStatuses()}, "http://localhost:8181", false)
+ req := httptest.NewRequest(http.MethodGet, "/api/status", nil)
+ w := httptest.NewRecorder()
+ h.ServeHTTP(w, req)
+
+ if w.Code != http.StatusOK {
+ t.Fatalf("expected 200, got %d body=%s", w.Code, w.Body.String())
+ }
+ var payload struct {
+ Services map[string]serviceStatus `json:"services"`
+ }
+ if err := json.Unmarshal(w.Body.Bytes(), &payload); err != nil {
+ t.Fatalf("invalid json: %v", err)
+ }
+ if payload.Services["bot"].Status != "healthy" {
+ t.Fatalf("expected bot healthy, got %q", payload.Services["bot"].Status)
+ }
+}
+
+func TestDetailMissingServiceNotFound(t *testing.T) {
+ h := newHandler(testManifest(), fakeStatusSource{statuses: testStatuses()}, "http://localhost:8181", false)
+ req := httptest.NewRequest(http.MethodGet, "/detail/missing", nil)
+ w := httptest.NewRecorder()
+ h.ServeHTTP(w, req)
+
+ if w.Code != http.StatusNotFound {
+ t.Fatalf("expected 404, got %d", w.Code)
+ }
+}
+
+type roundTripFunc func(*http.Request) (*http.Response, error)
+
+func (f roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
+ return f(req)
+}
diff --git a/cmd/clawdash/main.go b/cmd/clawdash/main.go
new file mode 100644
index 0000000..4df53a4
--- /dev/null
+++ b/cmd/clawdash/main.go
@@ -0,0 +1,125 @@
+package main
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "net/http"
+ "os"
+ "os/signal"
+ "strings"
+ "syscall"
+ "time"
+)
+
+func main() {
+ cfg := loadConfig()
+
+ if len(os.Args) > 1 && strings.TrimSpace(os.Args[1]) == "-healthcheck" {
+ if err := runHealthcheck(cfg); err != nil {
+ fmt.Fprintln(os.Stderr, err.Error())
+ os.Exit(1)
+ }
+ return
+ }
+
+ if err := run(cfg); err != nil {
+ fmt.Fprintln(os.Stderr, err.Error())
+ os.Exit(1)
+ }
+}
+
+type config struct {
+ Addr string
+ ManifestPath string
+ CllamaCostsURL string
+ CostLogFallback bool
+}
+
+func loadConfig() config {
+ return config{
+ Addr: envOr("CLAWDASH_ADDR", ":8082"),
+ ManifestPath: envOr("CLAWDASH_MANIFEST", "/claw/pod-manifest.json"),
+ CllamaCostsURL: strings.TrimSpace(os.Getenv("CLAWDASH_CLLAMA_COSTS_URL")),
+ CostLogFallback: envBool(
+ "CLAWDASH_COST_LOG_FALLBACK",
+ ),
+ }
+}
+
+func run(cfg config) error {
+ manifest, err := readManifest(cfg.ManifestPath)
+ if err != nil {
+ return fmt.Errorf("clawdash: read manifest: %w", err)
+ }
+
+ source, err := newDockerStatusSource(manifest.PodName)
+ if err != nil {
+ return fmt.Errorf("clawdash: docker client: %w", err)
+ }
+ defer source.Close()
+
+ h := newHandler(manifest, source, cfg.CllamaCostsURL, cfg.CostLogFallback)
+ srv := &http.Server{
+ Addr: cfg.Addr,
+ Handler: h,
+ ReadHeaderTimeout: 10 * time.Second,
+ }
+
+ errCh := make(chan error, 1)
+ go func() {
+ fmt.Fprintf(os.Stderr, "clawdash ui listening on %s\n", cfg.Addr)
+ errCh <- srv.ListenAndServe()
+ }()
+
+ sigCh := make(chan os.Signal, 1)
+ signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
+
+ select {
+ case sig := <-sigCh:
+ ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+ defer cancel()
+ _ = sig
+ return srv.Shutdown(ctx)
+ case err := <-errCh:
+ if errors.Is(err, http.ErrServerClosed) {
+ return nil
+ }
+ return err
+ }
+}
+
+func runHealthcheck(cfg config) error {
+ manifest, err := readManifest(cfg.ManifestPath)
+ if err != nil {
+ return fmt.Errorf("clawdash healthcheck: read manifest: %w", err)
+ }
+ if strings.TrimSpace(manifest.PodName) == "" {
+ return fmt.Errorf("clawdash healthcheck: manifest podName is empty")
+ }
+ source, err := newDockerStatusSource(manifest.PodName)
+ if err != nil {
+ return fmt.Errorf("clawdash healthcheck: docker client: %w", err)
+ }
+ defer source.Close()
+
+ ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
+ defer cancel()
+ if err := source.Ping(ctx); err != nil {
+ return fmt.Errorf("clawdash healthcheck: docker ping failed: %w", err)
+ }
+ return nil
+}
+
+func envOr(key, fallback string) string {
+ v := strings.TrimSpace(os.Getenv(key))
+ if v == "" {
+ return fallback
+ }
+ return v
+}
+
+func envBool(key string) bool {
+ v := strings.ToLower(strings.TrimSpace(os.Getenv(key)))
+ return v == "1" || v == "true" || v == "yes" || v == "on"
+}
diff --git a/cmd/clawdash/status.go b/cmd/clawdash/status.go
new file mode 100644
index 0000000..5f1cb0a
--- /dev/null
+++ b/cmd/clawdash/status.go
@@ -0,0 +1,290 @@
+package main
+
+import (
+ "context"
+ "fmt"
+ "sort"
+ "strings"
+ "time"
+
+ "github.com/docker/docker/api/types"
+ "github.com/docker/docker/api/types/container"
+ "github.com/docker/docker/api/types/filters"
+ "github.com/docker/docker/client"
+)
+
+type serviceStatus struct {
+ Service string `json:"service"`
+ Status string `json:"status"`
+ State string `json:"state"`
+ Health string `json:"health,omitempty"`
+ Uptime string `json:"uptime"`
+ ContainerID string `json:"containerId,omitempty"`
+ Instances int `json:"instances"`
+ Running int `json:"running"`
+ HasCllamaToken bool `json:"hasCllamaToken,omitempty"`
+}
+
+type dockerStatusSource struct {
+ podName string
+ cli *client.Client
+ now func() time.Time
+}
+
+type instance struct {
+ id string
+ status string
+ state string
+ health string
+ startedAt time.Time
+ running bool
+ hasCllamaToken bool
+}
+
+func newDockerStatusSource(podName string) (*dockerStatusSource, error) {
+ cli, err := client.NewClientWithOpts(client.FromEnv, client.WithAPIVersionNegotiation())
+ if err != nil {
+ return nil, err
+ }
+ return &dockerStatusSource{
+ podName: podName,
+ cli: cli,
+ now: time.Now,
+ }, nil
+}
+
+func (d *dockerStatusSource) Close() error {
+ return d.cli.Close()
+}
+
+func (d *dockerStatusSource) Ping(ctx context.Context) error {
+ _, err := d.cli.Ping(ctx)
+ return err
+}
+
+func (d *dockerStatusSource) Snapshot(ctx context.Context, serviceNames []string) (map[string]serviceStatus, error) {
+ nameSet := make(map[string]struct{}, len(serviceNames))
+ out := make(map[string]serviceStatus, len(serviceNames))
+ for _, name := range serviceNames {
+ nameSet[name] = struct{}{}
+ out[name] = unknownStatus(name)
+ }
+
+ args := filters.NewArgs(filters.Arg("label", "claw.pod="+d.podName))
+ containers, err := d.cli.ContainerList(ctx, container.ListOptions{
+ All: true,
+ Filters: args,
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ buckets := make(map[string][]instance)
+ for _, c := range containers {
+ serviceName := serviceNameFromLabels(c.Labels, c.Names)
+ if serviceName == "" {
+ continue
+ }
+ if _, ok := nameSet[serviceName]; !ok {
+ continue
+ }
+
+ inspect, err := d.cli.ContainerInspect(ctx, c.ID)
+ if err != nil {
+ continue
+ }
+ inst := containerToInstance(inspect)
+ buckets[serviceName] = append(buckets[serviceName], inst)
+ }
+
+ now := d.now()
+ for serviceName, instances := range buckets {
+ out[serviceName] = aggregateInstances(serviceName, instances, now)
+ }
+
+ return out, nil
+}
+
+func unknownStatus(service string) serviceStatus {
+ return serviceStatus{
+ Service: service,
+ Status: "unknown",
+ State: "unknown",
+ Uptime: "-",
+ Instances: 0,
+ Running: 0,
+ }
+}
+
+func serviceNameFromLabels(labels map[string]string, names []string) string {
+ if labels == nil {
+ labels = map[string]string{}
+ }
+ if v := strings.TrimSpace(labels["claw.service"]); v != "" {
+ return v
+ }
+ if v := strings.TrimSpace(labels["com.docker.compose.service"]); v != "" {
+ return v
+ }
+ if len(names) > 0 {
+ return strings.TrimPrefix(names[0], "/")
+ }
+ return ""
+}
+
+func containerToInstance(info types.ContainerJSON) instance {
+ state := "unknown"
+ health := ""
+ running := false
+ startedAt := time.Time{}
+
+ if info.ContainerJSONBase != nil && info.State != nil {
+ state = strings.ToLower(strings.TrimSpace(info.State.Status))
+ running = info.State.Running
+ if info.State.Health != nil {
+ health = strings.ToLower(strings.TrimSpace(info.State.Health.Status))
+ }
+ if started := strings.TrimSpace(info.State.StartedAt); started != "" {
+ if ts, err := time.Parse(time.RFC3339Nano, started); err == nil {
+ startedAt = ts
+ }
+ }
+ }
+
+ hasToken := false
+ if info.Config != nil {
+ for _, raw := range info.Config.Env {
+ k, v, ok := strings.Cut(raw, "=")
+ if !ok {
+ continue
+ }
+ if strings.TrimSpace(k) == "CLLAMA_TOKEN" && strings.TrimSpace(v) != "" {
+ hasToken = true
+ break
+ }
+ }
+ }
+
+ return instance{
+ id: info.ID,
+ status: normalizeStatus(state, running, health),
+ state: state,
+ health: health,
+ startedAt: startedAt,
+ running: running,
+ hasCllamaToken: hasToken,
+ }
+}
+
+func aggregateInstances(service string, instances []instance, now time.Time) serviceStatus {
+ if len(instances) == 0 {
+ return unknownStatus(service)
+ }
+
+ sort.Slice(instances, func(i, j int) bool {
+ return statusSeverity(instances[i].status) > statusSeverity(instances[j].status)
+ })
+ worst := instances[0]
+
+ running := 0
+ hasToken := false
+ longest := time.Duration(0)
+ for _, inst := range instances {
+ if inst.running {
+ running++
+ if !inst.startedAt.IsZero() {
+ if dur := now.Sub(inst.startedAt); dur > longest {
+ longest = dur
+ }
+ }
+ }
+ if inst.hasCllamaToken {
+ hasToken = true
+ }
+ }
+
+ uptime := "-"
+ if longest > 0 {
+ uptime = formatDuration(longest)
+ }
+
+ return serviceStatus{
+ Service: service,
+ Status: worst.status,
+ State: worst.state,
+ Health: worst.health,
+ Uptime: uptime,
+ ContainerID: shortID(worst.id),
+ Instances: len(instances),
+ Running: running,
+ HasCllamaToken: hasToken,
+ }
+}
+
+func normalizeStatus(state string, running bool, health string) string {
+ if running {
+ if health == "healthy" || health == "unhealthy" || health == "starting" {
+ return health
+ }
+ return "running"
+ }
+
+ switch state {
+ case "restarting", "created", "paused":
+ return "starting"
+ case "dead", "exited", "removing", "":
+ return "stopped"
+ default:
+ return state
+ }
+}
+
+func statusSeverity(status string) int {
+ switch status {
+ case "healthy":
+ return 0
+ case "running":
+ return 1
+ case "starting":
+ return 2
+ case "unknown":
+ return 2
+ case "unhealthy":
+ return 3
+ case "stopped":
+ return 4
+ default:
+ return 3
+ }
+}
+
+func formatDuration(d time.Duration) string {
+ if d < 0 {
+ d = 0
+ }
+ d = d.Round(time.Second)
+ h := int(d / time.Hour)
+ d -= time.Duration(h) * time.Hour
+ m := int(d / time.Minute)
+ d -= time.Duration(m) * time.Minute
+ s := int(d / time.Second)
+
+ if h > 0 {
+ return fmt.Sprintf("%dh %dm", h, m)
+ }
+ if m > 0 {
+ return fmt.Sprintf("%dm %ds", m, s)
+ }
+ return fmt.Sprintf("%ds", s)
+}
+
+func shortID(id string) string {
+ id = strings.TrimSpace(id)
+ if id == "" {
+ return ""
+ }
+ if len(id) <= 12 {
+ return id
+ }
+ return id[:12]
+}
diff --git a/cmd/clawdash/templates/detail.html b/cmd/clawdash/templates/detail.html
new file mode 100644
index 0000000..d917db0
--- /dev/null
+++ b/cmd/clawdash/templates/detail.html
@@ -0,0 +1,573 @@
+
+
+
+
+
+ clawdapus dash - detail
+
+
+
+
+
+
+
+
+
+
+
+
+
{{.ServiceName}}
+
image: {{if .ImageRef}}{{.ImageRef}}{{else}}-{{end}} {{if gt .Count 1}}| replicas: {{.Count}}{{end}}
+
+ {{if .IsProxy}}
+
proxy
+ {{end}}
+
+
+ {{if .HasStatusErrors}}
+ {{.StatusError}}
+ {{end}}
+
+
+
+ Status
+
+
+
+
Health
+
+
+
+ {{statusLabel .Status.Status}}
+
+
+
+
+
Uptime
+
{{.Status.Uptime}}
+
+
+
Container
+
{{if .Status.ContainerID}}{{.Status.ContainerID}}{{else}}-{{end}}
+
+
+
Instances
+
{{.Status.Running}} / {{.Status.Instances}}
+
+
+
+
+
+
+ {{if .Surfaces}}
+
+
+ Surfaces
+
+
+
+ | Scheme | Target | Access | Ports |
+
+
+ {{range .Surfaces}}
+
+ | {{.Scheme}} |
+ {{.Target}} |
+ {{if .AccessMode}}{{.AccessMode}}{{else}}-{{end}} |
+ {{if .Ports}}{{join .Ports ", "}}{{else}}-{{end}} |
+
+ {{end}}
+
+
+
+
+
+ {{end}}
+
+ {{if .Handles}}
+
+
+ Handles
+
+
+
+ | Platform | Username | ID | Guilds |
+
+
+ {{range .Handles}}
+
+ | {{.Platform}} |
+ {{if .Username}}{{.Username}}{{else}}-{{end}} |
+ {{.ID}} |
+
+ {{if .Guilds}}
+ {{range .Guilds}}
+
+ {{if .Name}}{{.Name}}{{else}}guild{{end}} ({{.ID}})
+ {{if .Channels}}{{range .Channels}}{{if .Name}}{{.Name}}{{else}}{{.ID}}{{end}} ({{.ID}}) {{end}}{{else}}No channels{{end}}
+
+ {{end}}
+ {{else}}
+ -
+ {{end}}
+ |
+
+ {{end}}
+
+
+
+
+
+ {{end}}
+
+ {{if .Skills}}
+
+
+ Skills
+
+
+ {{range .Skills}}
+ - {{.}}
+ {{end}}
+
+
+
+
+ {{end}}
+
+ {{if .Invocations}}
+
+
+ Invoke
+
+
+
+ | Name | Schedule | Message | To |
+
+
+ {{range .Invocations}}
+
+ | {{if .Name}}{{.Name}}{{else}}-{{end}} |
+ {{if .Schedule}}{{.Schedule}}{{else}}-{{end}} |
+ {{if .Message}}{{truncate .Message 120}}{{else}}-{{end}} |
+ {{if .To}}{{.To}}{{else}}-{{end}} |
+
+ {{end}}
+
+
+
+
+
+ {{end}}
+
+ {{if .Models}}
+
+
+ Models
+
+
+
+ | Slot | Provider / Model |
+
+
+ {{range .Models}}
+
+ | {{.Slot}} |
+ {{.Model}} |
+
+ {{end}}
+
+
+
+
+
+ {{end}}
+
+ {{if .Cllama}}
+
+
+ Cllama
+
+
+
+ | Proxy Type | Proxy Service | Token |
+
+
+ {{range .Cllama}}
+
+ | {{.ProxyType}} |
+ {{.ServiceName}} |
+ {{.TokenStatus}} |
+
+ {{end}}
+
+
+
+
+
+ {{end}}
+
+
+
+
+
+
+
diff --git a/cmd/clawdash/templates/fleet.html b/cmd/clawdash/templates/fleet.html
new file mode 100644
index 0000000..a956121
--- /dev/null
+++ b/cmd/clawdash/templates/fleet.html
@@ -0,0 +1,638 @@
+
+
+
+
+
+ clawdapus dash - fleet
+
+
+
+
+
+
+
+
+
+ Fleet Overview
+ Live view of agents, proxies, and infrastructure for pod {{.PodName}}.
+ {{if not .HasCllama}}
+ This pod has no cllama proxy, so the cllama costs dashboard link is hidden.
+ {{end}}
+
+ {{if .HasStatusErrors}}
+ {{.StatusError}}
+ {{end}}
+
+ {{if .HasCllama}}
+
+
+
+ {{if .HasCostSummary}}
+
+
+
Total USD
+
${{printf "%.4f" .CostSummary.TotalCostUSD}}
+
+
+
Requests
+
{{.CostSummary.Requests}}
+
+
+
Proxies Reporting
+
{{.CostSummary.ProxyCount}}
+
+
+ {{if .CostSummaryErr}}
+
{{.CostSummaryErr}}
+ {{end}}
+ {{else}}
+
Cost emission not available yet: {{.CostSummaryErr}}
+ {{end}}
+ {{if .HasCostLink}}
+
Open cllama dashboard
+ {{end}}
+
+
+ {{end}}
+
+
+
+ {{if .Agents}}
+
+ {{else}}
+ No claw agents found in this pod manifest.
+ {{end}}
+
+
+ {{if .HasCllama}}
+
+ {{end}}
+
+
+
+ {{if .Infrastructure}}
+
+ {{else}}
+ No native infrastructure services detected.
+ {{end}}
+
+
+
+
+
+
+
+
diff --git a/cmd/clawdash/templates/topology.html b/cmd/clawdash/templates/topology.html
new file mode 100644
index 0000000..f1b24bb
--- /dev/null
+++ b/cmd/clawdash/templates/topology.html
@@ -0,0 +1,382 @@
+
+
+
+
+
+ clawdapus dash - topology
+
+
+
+
+
+
+
+
+
+ Topology
+
+ Channels, agents, {{if .HasCllama}}proxies, {{end}}services, and volumes with live health markers.
+
+
+ {{if .HasStatusErrors}}
+ {{.StatusError}}
+ {{end}}
+
+
+
+ {{range .Lanes}}
+
{{.Title}}
+ {{end}}
+
+
+ {{if .HasNodes}}
+
+
+
+ {{range .Nodes}}
+
+ {{.Label}}
+
+
+ {{end}}
+
+
+ Hover a node to isolate connected paths. Colors: cyan channel, amber service, green volume/host{{if .HasCllama}}, purple proxy{{end}}.
+
+ {{else}}
+
No topology nodes are available for this pod snapshot yet.
+ {{end}}
+
+
+
+
+
+
+
+
diff --git a/cmd/clawdash/topology.go b/cmd/clawdash/topology.go
new file mode 100644
index 0000000..29b1829
--- /dev/null
+++ b/cmd/clawdash/topology.go
@@ -0,0 +1,340 @@
+package main
+
+import (
+ "fmt"
+ "sort"
+ "strings"
+
+ manifestpkg "github.com/mostlydev/clawdapus/internal/clawdash"
+ "github.com/mostlydev/clawdapus/internal/cllama"
+)
+
+type topologyPageData struct {
+ PodName string
+ ActiveTab string
+ Lanes []topologyLane
+ CanvasWidth int
+ CanvasHeight int
+ Nodes []topologyNode
+ Edges []topologyEdge
+ HasNodes bool
+ HasCllama bool
+ StatusError string
+ HasStatusErrors bool
+}
+
+type topologyLane struct {
+ Key string
+ Title string
+}
+
+type topologyNode struct {
+ ID string
+ Label string
+ Lane string
+ ServiceName string
+ X int
+ Y int
+ Width int
+ Height int
+ Status string
+ StatusClass string
+ Neighbors string
+}
+
+type topologyEdge struct {
+ FromID string
+ ToID string
+ Path string
+ Color string
+}
+
+type edgeDef struct {
+ fromLane string
+ fromName string
+ toLane string
+ toName string
+ kind string
+}
+
+func buildTopologyPageData(manifest *manifestpkg.PodManifest, statuses map[string]serviceStatus, statusErr string) topologyPageData {
+ proxyByType := make(map[string]string, len(manifest.Proxies))
+ proxyNames := make([]string, 0, len(manifest.Proxies))
+ for _, proxy := range manifest.Proxies {
+ proxyByType[proxy.ProxyType] = proxy.ServiceName
+ proxyNames = append(proxyNames, proxy.ServiceName)
+ }
+ sort.Strings(proxyNames)
+
+ agentNames := make([]string, 0)
+ channelSet := map[string]struct{}{}
+ serviceSet := map[string]struct{}{}
+ volumeSet := map[string]struct{}{}
+ edgeDefs := make([]edgeDef, 0)
+
+ serviceNames := sortedServiceNames(manifest.Services)
+ for _, serviceName := range serviceNames {
+ svc := manifest.Services[serviceName]
+ if svc.ClawType != "" {
+ agentNames = append(agentNames, serviceName)
+ } else {
+ serviceSet[serviceName] = struct{}{}
+ }
+ }
+ sort.Strings(agentNames)
+
+ for _, agent := range agentNames {
+ svc := manifest.Services[agent]
+ for _, surface := range svc.Surfaces {
+ switch surface.Scheme {
+ case "channel":
+ channelSet[surface.Target] = struct{}{}
+ edgeDefs = append(edgeDefs, edgeDef{
+ fromLane: "channel", fromName: surface.Target,
+ toLane: "agent", toName: agent,
+ kind: "channel",
+ })
+ case "service":
+ serviceSet[surface.Target] = struct{}{}
+ edgeDefs = append(edgeDefs, edgeDef{
+ fromLane: "agent", fromName: agent,
+ toLane: "service", toName: surface.Target,
+ kind: "service",
+ })
+ case "volume":
+ volumeSet[surface.Target] = struct{}{}
+ edgeDefs = append(edgeDefs, edgeDef{
+ fromLane: "agent", fromName: agent,
+ toLane: "volume", toName: surface.Target,
+ kind: "volume",
+ })
+ case "host":
+ hostTarget := "host:" + surface.Target
+ volumeSet[hostTarget] = struct{}{}
+ edgeDefs = append(edgeDefs, edgeDef{
+ fromLane: "agent", fromName: agent,
+ toLane: "volume", toName: hostTarget,
+ kind: "volume",
+ })
+ }
+ }
+
+ for _, proxyType := range svc.Cllama {
+ proxyService := proxyByType[proxyType]
+ if proxyService == "" {
+ proxyService = cllama.ProxyServiceName(proxyType)
+ if strings.TrimSpace(proxyService) != "" {
+ proxyNames = append(proxyNames, proxyService)
+ }
+ }
+ edgeDefs = append(edgeDefs, edgeDef{
+ fromLane: "agent", fromName: agent,
+ toLane: "proxy", toName: proxyService,
+ kind: "proxy",
+ })
+ }
+ }
+
+ proxyNames = uniqueSorted(proxyNames)
+ channels := sortedSet(channelSet)
+ services := sortedSet(serviceSet)
+ volumes := sortedSet(volumeSet)
+
+ const (
+ nodeW = 172
+ nodeH = 44
+ xStart = 24
+ yStart = 52
+ laneGap = 220
+ rowGap = 68
+ canvasPad = 36
+ minRows = 3
+ )
+
+ lanesMeta := make([]topologyLane, 0, 5)
+ lanesMeta = append(lanesMeta, topologyLane{Key: "channel", Title: "Channels"})
+ lanesMeta = append(lanesMeta, topologyLane{Key: "agent", Title: "Agents"})
+ hasCllama := len(proxyNames) > 0
+ if hasCllama {
+ lanesMeta = append(lanesMeta, topologyLane{Key: "proxy", Title: "Proxies"})
+ }
+ lanesMeta = append(lanesMeta, topologyLane{Key: "service", Title: "Services"})
+ lanesMeta = append(lanesMeta, topologyLane{Key: "volume", Title: "Volumes"})
+
+ laneX := make(map[string]int, len(lanesMeta))
+ for i, lane := range lanesMeta {
+ laneX[lane.Key] = xStart + laneGap*i
+ }
+
+ type laneNodes struct {
+ lane string
+ names []string
+ }
+ lanes := make([]laneNodes, 0, len(lanesMeta))
+ for _, lane := range lanesMeta {
+ names := []string{}
+ switch lane.Key {
+ case "channel":
+ names = channels
+ case "agent":
+ names = agentNames
+ case "proxy":
+ names = proxyNames
+ case "service":
+ names = services
+ case "volume":
+ names = volumes
+ }
+ lanes = append(lanes, laneNodes{lane: lane.Key, names: names})
+ }
+
+ nodeMap := make(map[string]topologyNode)
+ nodes := make([]topologyNode, 0)
+ maxRows := minRows
+ for _, lane := range lanes {
+ if len(lane.names) > maxRows {
+ maxRows = len(lane.names)
+ }
+ for row, name := range lane.names {
+ serviceName := ""
+ switch lane.lane {
+ case "agent", "proxy", "service":
+ serviceName = name
+ }
+ status := statuses[serviceName]
+ if strings.TrimSpace(status.Status) == "" {
+ status = unknownStatus(serviceName)
+ }
+ if serviceName == "" {
+ status.Status = "n/a"
+ status.Uptime = "-"
+ }
+
+ node := topologyNode{
+ ID: topologyNodeID(lane.lane, name),
+ Label: name,
+ Lane: lane.lane,
+ ServiceName: serviceName,
+ X: laneX[lane.lane],
+ Y: yStart + row*rowGap,
+ Width: nodeW,
+ Height: nodeH,
+ Status: status.Status,
+ StatusClass: statusClass(status.Status),
+ }
+ nodes = append(nodes, node)
+ nodeMap[nodeKey(lane.lane, name)] = node
+ }
+ }
+
+ neighborMap := make(map[string]map[string]struct{})
+ edges := make([]topologyEdge, 0)
+ seenEdges := make(map[string]struct{})
+ for _, edge := range edgeDefs {
+ from, okFrom := nodeMap[nodeKey(edge.fromLane, edge.fromName)]
+ to, okTo := nodeMap[nodeKey(edge.toLane, edge.toName)]
+ if !okFrom || !okTo {
+ continue
+ }
+
+ key := from.ID + ">" + to.ID + ":" + edge.kind
+ if _, exists := seenEdges[key]; exists {
+ continue
+ }
+ seenEdges[key] = struct{}{}
+
+ x1 := from.X + from.Width
+ y1 := from.Y + from.Height/2
+ x2 := to.X
+ y2 := to.Y + to.Height/2
+ mid := (x1 + x2) / 2
+
+ edges = append(edges, topologyEdge{
+ FromID: from.ID,
+ ToID: to.ID,
+ Path: fmt.Sprintf("M %d %d C %d %d, %d %d, %d %d", x1, y1, mid, y1, mid, y2, x2, y2),
+ Color: topologyEdgeColor(edge.kind),
+ })
+
+ if neighborMap[from.ID] == nil {
+ neighborMap[from.ID] = map[string]struct{}{}
+ }
+ if neighborMap[to.ID] == nil {
+ neighborMap[to.ID] = map[string]struct{}{}
+ }
+ neighborMap[from.ID][to.ID] = struct{}{}
+ neighborMap[to.ID][from.ID] = struct{}{}
+ }
+
+ for i := range nodes {
+ neighbors := sortedSet(neighborMap[nodes[i].ID])
+ nodes[i].Neighbors = strings.Join(neighbors, ",")
+ }
+
+ canvasWidth := xStart + laneGap*(len(lanesMeta)-1) + nodeW + canvasPad
+ canvasHeight := yStart + maxRows*rowGap + canvasPad
+ if canvasHeight < 300 {
+ canvasHeight = 300
+ }
+
+ return topologyPageData{
+ PodName: manifest.PodName,
+ ActiveTab: "topology",
+ Lanes: lanesMeta,
+ CanvasWidth: canvasWidth,
+ CanvasHeight: canvasHeight,
+ Nodes: nodes,
+ Edges: edges,
+ HasNodes: len(nodes) > 0,
+ HasCllama: hasCllama,
+ StatusError: statusErr,
+ HasStatusErrors: statusErr != "",
+ }
+}
+
+func topologyNodeID(lane, name string) string {
+ safe := strings.ToLower(strings.TrimSpace(name))
+ replacer := strings.NewReplacer(" ", "-", "/", "-", ":", "-", ".", "-", "_", "-")
+ safe = replacer.Replace(safe)
+ return lane + "-" + safe
+}
+
+func nodeKey(lane, name string) string {
+ return lane + "|" + name
+}
+
+func topologyEdgeColor(kind string) string {
+ switch kind {
+ case "channel":
+ return "var(--cyan)"
+ case "service":
+ return "var(--amber)"
+ case "volume":
+ return "var(--green)"
+ case "proxy":
+ return "var(--purple)"
+ default:
+ return "var(--line-bright)"
+ }
+}
+
+func sortedSet(set map[string]struct{}) []string {
+ out := make([]string, 0, len(set))
+ for v := range set {
+ out = append(out, v)
+ }
+ sort.Strings(out)
+ return out
+}
+
+func uniqueSorted(items []string) []string {
+ set := make(map[string]struct{}, len(items))
+ for _, item := range items {
+ item = strings.TrimSpace(item)
+ if item == "" {
+ continue
+ }
+ set[item] = struct{}{}
+ }
+ return sortedSet(set)
+}
diff --git a/dockerfiles/clawdash/Dockerfile b/dockerfiles/clawdash/Dockerfile
new file mode 100644
index 0000000..2f6ed1e
--- /dev/null
+++ b/dockerfiles/clawdash/Dockerfile
@@ -0,0 +1,13 @@
+FROM golang:1.23 AS build
+WORKDIR /src
+COPY go.mod go.sum* ./
+RUN go mod download 2>/dev/null || true
+COPY . .
+RUN CGO_ENABLED=0 go build -o /clawdash ./cmd/clawdash
+
+FROM gcr.io/distroless/static-debian12
+COPY --from=build /clawdash /clawdash
+EXPOSE 8082
+HEALTHCHECK --interval=15s --timeout=5s --retries=3 \
+ CMD ["/clawdash", "-healthcheck"]
+ENTRYPOINT ["/clawdash"]
diff --git a/docs/plans/2026-02-26-cllama-cost-hooks.md b/docs/plans/2026-02-26-cllama-cost-hooks.md
index 9b901b7..0fea657 100644
--- a/docs/plans/2026-02-26-cllama-cost-hooks.md
+++ b/docs/plans/2026-02-26-cllama-cost-hooks.md
@@ -6,6 +6,8 @@
**Architecture:** The proxy already intercepts every LLM request and response. Cost hooks read the `usage` block from OpenAI-compatible responses, multiply by a pricing table, and aggregate in-memory per agent. A new `internal/cost/` package owns the pricing table and accumulator. The logger gains `tokens_in`, `tokens_out`, `cost_usd` fields. The UI gains a `/costs` page. No persistent storage — costs reset on proxy restart (persistence is a future concern; structured logs are the durable record).
+**Emission contract for fleet dashboards:** `GET /costs/api` is the stable machine-readable interface for downstream dashboards (including Clawdapus Dash). UI routing (`/` vs `/costs`) may evolve, but `/costs/api` should remain stable and versioned when shape changes.
+
**Tech Stack:** Go 1.23, `sync` (thread-safe accumulator), `encoding/json` (response parsing), `html/template` (UI), `time` (windowed stats)
**Repo:** `mostlydev/cllama-passthrough` (at `/Users/wojtek/dev/ai/clawdapus/cllama-passthrough`)
diff --git a/docs/plans/2026-02-28-clawdash-dashboard-design.md b/docs/plans/2026-02-28-clawdash-dashboard-design.md
new file mode 100644
index 0000000..9c4d95c
--- /dev/null
+++ b/docs/plans/2026-02-28-clawdash-dashboard-design.md
@@ -0,0 +1,202 @@
+# Clawdapus Dash (`clawdash`) — Fleet Dashboard Design
+
+**Date:** 2026-02-28
+**Status:** IMPLEMENTED
+
+## Overview
+
+Clawdapus Dash (`clawdash`) is a standalone operator-facing container auto-injected into every Clawdapus pod. It provides fleet-level observability: a single pane of glass showing all agents, services, cllama proxies, surfaces, and their live health status.
+
+## Architecture
+
+### Container Model
+
+- **Service name:** `clawdash`
+- **Image:** `ghcr.io/mostlydev/clawdash:latest` (Go binary, embedded HTML templates)
+- **Port:** `:8082` (configurable via `CLAWDASH_ADDR`)
+- **Hardening:** `read_only: true`, `tmpfs: [/tmp]`, `restart: on-failure` — same as all claw services
+- **Network:** `claw-internal`
+- **Labels:** `claw.pod: `, `claw.role: dashboard`
+
+### Data Sources
+
+Two complementary inputs:
+
+1. **`/claw/pod-manifest.json`** (read-only bind mount) — Generated by `compose_up.go` during the materialize pass. Contains the full resolved pod topology as a static snapshot:
+ - Per-service: name, clawType, imageRef, agent file, models, count
+ - Handles: platform, ID, username, guilds (with channels)
+ - Surfaces: scheme, target, accessMode, ports, channelConfig
+ - Skills: name list
+ - Invocations: schedule, message, to, name
+ - Cllama: proxy types, proxy service names
+ - Peer relationships: peerHandles map
+
+2. **`/var/run/docker.sock`** (read-only bind mount) — Live container status via Docker API:
+ - Container state: running, stopped, restarting
+ - Health check results: healthy, unhealthy, starting
+ - Uptime duration
+ - No exec, no lifecycle ops — strictly read-only
+
+### Injection into Compose
+
+Same pattern as cllama sidecar injection in `compose_emit.go`. A new `ClawdashConfig` struct passed to `EmitCompose`, which adds the `clawdash` service entry to `compose.generated.yml`. Always injected when the pod has any `x-claw` services (same `hasClaw` gate).
+
+### Manifest Generation
+
+During `claw up`, after Pass 1 (inspect + resolve all services) but before Pass 2 (materialize), `compose_up.go` serializes the resolved pod state into `pod-manifest.json` in the runtime dir. This is a JSON serialization of:
+
+```go
+type PodManifest struct {
+ PodName string `json:"podName"`
+ Services map[string]ServiceManifest `json:"services"`
+ Proxies []ProxyManifest `json:"proxies"`
+}
+
+type ServiceManifest struct {
+ ClawType string `json:"clawType"`
+ ImageRef string `json:"imageRef"`
+ Agent string `json:"agent"`
+ Models map[string]string `json:"models,omitempty"`
+ Count int `json:"count"`
+ Handles map[string]*driver.HandleInfo `json:"handles,omitempty"`
+ Surfaces []SurfaceManifest `json:"surfaces,omitempty"`
+ Skills []string `json:"skills,omitempty"`
+ Invocations []driver.Invocation `json:"invocations,omitempty"`
+ Cllama []string `json:"cllama,omitempty"`
+}
+
+type SurfaceManifest struct {
+ Scheme string `json:"scheme"`
+ Target string `json:"target"`
+ AccessMode string `json:"accessMode,omitempty"`
+ Ports []string `json:"ports,omitempty"`
+}
+
+type ProxyManifest struct {
+ ProxyType string `json:"proxyType"`
+ ServiceName string `json:"serviceName"`
+ Image string `json:"image"`
+}
+```
+
+Skills are serialized as name-only (no host paths — those are meaningless inside the dashboard container).
+
+## Pages
+
+### 1. Fleet Overview (`/`)
+
+The home page. A card grid showing every service in the pod.
+
+**Card grouping** (section headers):
+- **Agents** — services with a clawType (openclaw, nanoclaw, etc.)
+- **Proxies** — cllama services
+- **Infrastructure** — non-claw services (databases, caches, etc.)
+
+**Agent card contents:**
+- Service name (bold) + claw type badge (small pill)
+- Health status: green/amber/red dot + status text (healthy, unhealthy, starting, stopped)
+- Model: primary model slot value (e.g. `anthropic/claude-sonnet-4-20250514`)
+- Handles: platform icon(s) + username (e.g. Discord icon + `@fleet-bot`)
+- Cllama indicator: proxy type badge if proxied (e.g. `passthrough`)
+- Uptime: human-readable duration
+- Count: ordinal badge if count > 1 (e.g. `x3`)
+
+**Proxy card contents:**
+- Service name + `proxy` role badge
+- Health dot + status
+- Proxy type
+- Uptime
+
+**Infrastructure card contents:**
+- Service name + `native` badge
+- Health dot (from Docker healthcheck if configured)
+- Uptime
+
+**Click action:** Navigates to `/detail/`
+
+### 2. Topology (`/topology`)
+
+Layered column wiring diagram showing how pod components connect.
+
+**Five swim lanes (left to right):**
+
+```
+Channels │ Agents │ Proxies │ Services │ Volumes
+──────────┼──────────┼───────────┼────────────┼─────────
+discord ─── bot-a ─── cllama │ │ shared-data
+discord ─── bot-b ─┘ ─── postgres │
+ └── bot-c │ └ workspace
+```
+
+**Implementation:**
+- HTML nodes positioned in CSS grid columns
+- SVG `` or `` elements for connections, drawn between node edges
+- Color-coded by surface scheme:
+ - Cyan (`--cyan`) for channel surfaces
+ - Amber (`--amber`) for service surfaces
+ - Green (`--green`) for volume/host surfaces
+ - Purple (`--purple`) for cllama proxy links
+- Hover: highlighting a node dims all unconnected nodes/lines (CSS opacity transition)
+- Health dots on each node (same as fleet cards)
+
+**Data flow:** Derived entirely from `pod-manifest.json` — surfaces define the edges, services define the nodes. Proxies appear in the middle column based on each agent's `cllama` field. Channels/services/volumes appear based on surface scheme.
+
+**No JS framework.** Pure HTML + inline SVG + CSS. Connection line coordinates computed server-side in Go template rendering (node positions are deterministic from the grid layout).
+
+### 3. Detail (`/detail/:service`)
+
+Drill-down view for a single service. Sections rendered as collapsible panels:
+
+| Section | Content |
+|---------|---------|
+| **Status** | Health dot, status text, uptime, container ID (short), image ref |
+| **Surfaces** | Table: scheme, target, access mode, ports |
+| **Handles** | Table: platform, username, ID, guilds (expandable with channels) |
+| **Skills** | Bulleted list of skill names |
+| **Invoke** | Table: name, cron schedule, message (truncated), target channel |
+| **Models** | Table: slot, provider/model |
+| **Cllama** | Proxy type(s), proxy service name, token status (present/absent — never shown) |
+
+## Visual Design
+
+Inherits cllama's design language for visual cohesion across the pod's operator tools:
+
+- **Fonts:** Geist Mono (monospace, labels/badges), Outfit (sans-serif, body text)
+- **Color palette:** Same CSS variables — `--bg: #0c1017`, `--cyan: #22d3ee`, `--amber: #f0a500`, `--green: #34d399`, `--red: #ef4444`, `--purple: #a78bfa`
+- **Scan-line overlay:** Same subtle `repeating-linear-gradient` texture
+- **Top bar:** `CLAWDAPUS DASH` brand in Geist Mono uppercase, nav tabs (Fleet / Topology / Detail)
+- **Cards:** `--bg-raised` background, `--line` border, subtle hover glow
+- **Badges/pills:** Rounded, small, colored by type (cyan for claw types, amber for proxy, green for native)
+
+## Live Updates
+
+Smooth partial refresh without full-page reloads:
+
+- A small `