From e094bba323689ee5902a69889873f893a3063418 Mon Sep 17 00:00:00 2001
From: Vivek Chand <vivekchand19@gmail.com>
Date: Mon, 23 Mar 2026 10:12:52 +0100
Subject: [PATCH 1/3] feat: token velocity alert for runaway loop detection
 (closes #313)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Sliding 2-min window token threshold (default: 10,000 tokens/2min)
- Consecutive tool-call chain detection (default: N=20 without human turn)
- Cost velocity alert (default: $0.10/min over 5-min window)
- In-dashboard banner with Kill Loop + Dismiss buttons (animated, red/orange)
- Background thread checks velocity every 30s
- Telegram notification on new alert types
- GET /api/alerts/velocity — current alert state
- POST /api/alerts/velocity/config — configure thresholds
- POST /api/alerts/velocity/dismiss — dismiss active alerts
- Config stored in existing budget_config SQLite table (velocity_ prefix)
---
 dashboard.py | 441 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 424 insertions(+), 17 deletions(-)
diff --git a/dashboard.py b/dashboard.py
index 4aaf385..6d2748c 100755
--- a/dashboard.py
+++ b/dashboard.py
@@ -2765,6 +2765,92 @@ def get_local_ip():
   <button id="alert-resume-btn" onclick="resumeGateway()" style="display:none;background:#16a34a;color:#fff;border:none;border-radius:6px;padding:4px 12px;font-size:12px;cursor:pointer;font-weight:600;">Resume Gateway</button>
 </div>
 
+<!-- Velocity Alert Banner -->
+<div id="velocity-alert-banner" style="display:none;padding:0;border-bottom:3px solid #f97316;">
+  <div style="background:linear-gradient(135deg,#431407,#7c2d12);padding:12px 16px;display:flex;align-items:flex-start;gap:12px;flex-wrap:wrap;">
+    <div style="display:flex;align-items:center;gap:8px;flex-shrink:0;">
+      <span style="font-size:22px;animation:velocityPulse 1s ease-in-out infinite;">🔥</span>
+      <div>
+        <div style="font-size:13px;font-weight:800;color:#fed7aa;letter-spacing:0.3px;">RUNAWAY AGENT DETECTED</div>
+        <div style="font-size:11px;color:#fb923c;margin-top:1px;">Token velocity threshold exceeded</div>
+      </div>
+    </div>
+    <div id="velocity-alert-details" style="flex:1;min-width:200px;display:flex;flex-wrap:wrap;gap:8px;align-items:center;"></div>
+    <div style="display:flex;gap:8px;flex-shrink:0;align-items:center;">
+      <button onclick="killAgentLoop()" style="background:#ef4444;color:#fff;border:none;border-radius:8px;padding:7px 16px;font-size:12px;font-weight:700;cursor:pointer;display:flex;align-items:center;gap:5px;">
+        &#x26D4; Kill Loop
+      </button>
+      <button onclick="dismissVelocityAlert()" style="background:rgba(255,255,255,0.12);color:#fed7aa;border:1px solid rgba(249,115,22,0.4);border-radius:8px;padding:7px 12px;font-size:12px;cursor:pointer;">
+        Dismiss
+      </button>
+    </div>
+  </div>
+</div>
+<style>
+@keyframes velocityPulse { 0%,100%{transform:scale(1);opacity:1} 50%{transform:scale(1.15);opacity:0.8} }
+</style>
+<script>
+var _velocityCheckTimer = null;
+
+async function checkVelocityAlerts() {
+  try {
+    var data = await fetch('/api/alerts/velocity').then(function(r){return r.json();});
+    var banner = document.getElementById('velocity-alert-banner');
+    if (!banner) return;
+    if (!data.active || !data.alerts || data.alerts.length === 0) {
+      banner.style.display = 'none';
+      return;
+    }
+    banner.style.display = 'block';
+    var details = document.getElementById('velocity-alert-details');
+    if (!details) return;
+    var html = '';
+    data.alerts.forEach(function(alert) {
+      var icon = alert.type === 'token_velocity' ? '📊' :
+                 alert.type === 'cost_velocity' ? '💰' : '🔄';
+      var color = alert.severity === 'critical' ? '#fca5a5' : '#fdba74';
+      html += '<div style="background:rgba(0,0,0,0.25);border:1px solid rgba(249,115,22,0.3);border-radius:6px;padding:5px 10px;">';
+      html += '<span style="font-size:14px;">' + icon + '</span> ';
+      html += '<span style="font-size:11px;font-weight:700;color:' + color + ';">' + escHtml(alert.title) + '</span>';
+      html += '<div style="font-size:10px;color:#fb923c;margin-top:1px;">' + escHtml(alert.message) + '</div>';
+      html += '</div>';
+    });
+    details.innerHTML = html;
+  } catch(e) {
+    // Silent fail - velocity endpoint may not be ready yet
+  }
+}
+
+async function killAgentLoop() {
+  if (!confirm('Pause the OpenClaw gateway to stop the runaway agent loop?')) return;
+  try {
+    await fetch('/api/budget/pause', {method:'POST'});
+    document.getElementById('velocity-alert-banner').style.display = 'none';
+    var b = document.getElementById('alert-banner');
+    var m = document.getElementById('alert-banner-msg');
+    if (b && m) {
+      m.textContent = 'Gateway paused to stop runaway loop. Resume when ready.';
+      b.style.display = 'flex';
+      var rb = document.getElementById('alert-resume-btn');
+      if (rb) rb.style.display = '';
+    }
+  } catch(e) {}
+}
+
+async function dismissVelocityAlert() {
+  try {
+    await fetch('/api/alerts/velocity/dismiss', {method:'POST'});
+    document.getElementById('velocity-alert-banner').style.display = 'none';
+  } catch(e) {
+    document.getElementById('velocity-alert-banner').style.display = 'none';
+  }
+}
+
+// Start velocity check: every 30s
+_velocityCheckTimer = setInterval(checkVelocityAlerts, 30000);
+setTimeout(checkVelocityAlerts, 5000);
+</script>
+
 <!-- Budget Settings Modal -->
 <div id="budget-modal" style="display:none;position:fixed;inset:0;z-index:1200;background:rgba(0,0,0,0.5);align-items:center;justify-content:center;">
   <div style="background:var(--bg-primary);border:1px solid var(--border-primary);border-radius:16px;width:90%;max-width:560px;padding:24px;box-shadow:0 25px 50px rgba(0,0,0,0.25);">
@@ -17988,6 +18074,284 @@ def api_alerts_webhook_test():
     return jsonify({'ok': True, 'sent': sent})
 
 
+# ── Token Velocity Alert ────────────────────────────────────────────────
+# Detects runaway agent loops via:
+#   1. Sliding 2-min token window threshold
+#   2. Consecutive tool-call chain without human turn
+#   3. Cost velocity ($/min)
+
+VELOCITY_ALERT_STATE = {
+    'active': False,
+    'alerts': [],           # list of active alert dicts
+    'last_checked': 0,
+}
+VELOCITY_ALERT_LOCK = threading.Lock()
+
+# Configurable defaults (can be overridden via POST /api/alerts/velocity/config)
+_VELOCITY_CONFIG = {
+    'token_window_sec': 120,           # 2-minute sliding window
+    'token_window_threshold': 10000,   # tokens/2min
+    'tool_chain_threshold': 20,        # consecutive tool calls without human turn
+    'cost_per_min_threshold': 0.10,    # $/min
+    'enabled': True,
+    'notify_telegram': True,
+}
+
+def _get_velocity_config():
+    """Load velocity config from DB or return defaults."""
+    cfg = dict(_VELOCITY_CONFIG)
+    try:
+        with _fleet_db_lock:
+            db = _fleet_db()
+            rows = db.execute(
+                "SELECT key, value FROM budget_config WHERE key LIKE 'velocity_%'"
+            ).fetchall()
+            db.close()
+        for row in rows:
+            k = row['key'].replace('velocity_', '', 1)
+            if k in cfg:
+                v = row['value']
+                if isinstance(cfg[k], bool):
+                    cfg[k] = v.lower() in ('true', '1', 'yes')
+                elif isinstance(cfg[k], int):
+                    cfg[k] = int(float(v))
+                elif isinstance(cfg[k], float):
+                    cfg[k] = float(v)
+    except Exception:
+        pass
+    return cfg
+
+
+def _compute_velocity_alerts():
+    """Compute current velocity metrics and return list of active alert dicts."""
+    cfg = _get_velocity_config()
+    if not cfg.get('enabled', True):
+        return []
+
+    now = time.time()
+    alerts = []
+
+    # ── 1. Token velocity (sliding 2-min window) ───────────────────────
+    window_sec = cfg['token_window_sec']
+    window_start = now - window_sec
+    window_tokens = 0
+    with _metrics_lock:
+        for entry in metrics_store.get('tokens', []):
+            if entry.get('timestamp', 0) >= window_start:
+                window_tokens += entry.get('total', 0)
+
+    if window_tokens >= cfg['token_window_threshold']:
+        alerts.append({
+            'type': 'token_velocity',
+            'severity': 'critical',
+            'title': 'Token velocity too high',
+            'message': (
+                f'{window_tokens:,} tokens consumed in the last '
+                f'{window_sec // 60} min '
+                f'(threshold: {cfg["token_window_threshold"]:,})'
+            ),
+            'value': window_tokens,
+            'threshold': cfg['token_window_threshold'],
+            'unit': f'tokens/{window_sec // 60}min',
+        })
+
+    # ── 2. Cost velocity ($/min over last 5 min) ───────────────────────
+    cost_window = 300  # 5-min window for cost rate
+    cost_window_start = now - cost_window
+    window_cost = 0.0
+    with _metrics_lock:
+        for entry in metrics_store.get('cost', []):
+            if entry.get('timestamp', 0) >= cost_window_start:
+                window_cost += entry.get('usd', 0)
+    cost_per_min = (window_cost / (cost_window / 60)) if cost_window > 0 else 0
+
+    if cost_per_min >= cfg['cost_per_min_threshold']:
+        alerts.append({
+            'type': 'cost_velocity',
+            'severity': 'warning',
+            'title': 'Cost velocity alert',
+            'message': (
+                f'Spending at ${cost_per_min:.3f}/min '
+                f'(threshold: ${cfg["cost_per_min_threshold"]:.2f}/min)'
+            ),
+            'value': round(cost_per_min, 4),
+            'threshold': cfg['cost_per_min_threshold'],
+            'unit': '$/min',
+        })
+
+    # ── 3. Consecutive tool-call chain detection ────────────────────────
+    consecutive_tools = _count_consecutive_tool_calls()
+    if consecutive_tools >= cfg['tool_chain_threshold']:
+        alerts.append({
+            'type': 'tool_chain',
+            'severity': 'warning',
+            'title': 'Possible agent loop detected',
+            'message': (
+                f'{consecutive_tools} consecutive tool calls without a human turn '
+                f'(threshold: {cfg["tool_chain_threshold"]})'
+            ),
+            'value': consecutive_tools,
+            'threshold': cfg['tool_chain_threshold'],
+            'unit': 'tool calls',
+        })
+
+    return alerts
+
+
+def _count_consecutive_tool_calls():
+    """Parse the most recent session transcript to count consecutive tool calls
+    without an intervening human (user) turn."""
+    sessions_dir = SESSIONS_DIR or os.path.expanduser('~/.openclaw/agents/main/sessions')
+    if not os.path.isdir(sessions_dir):
+        return 0
+    try:
+        candidates = []
+        for f in os.listdir(sessions_dir):
+            if not f.endswith('.jsonl'):
+                continue
+            fp = os.path.join(sessions_dir, f)
+            try:
+                st = os.stat(fp)
+                if time.time() - st.st_mtime < 3600:  # active in last hour
+                    candidates.append((fp, st.st_mtime))
+            except Exception:
+                continue
+        if not candidates:
+            return 0
+        candidates.sort(key=lambda x: x[1], reverse=True)
+        best = candidates[0][0]
+
+        with open(best, 'rb') as fh:
+            fh.seek(0, 2)
+            size = fh.tell()
+            chunk = min(size, 256000)
+            fh.seek(max(0, size - chunk))
+            tail = fh.read().decode('utf-8', errors='replace')
+
+        lines = tail.strip().split('\n')
+
+        # Walk backwards counting tool calls; reset on human turn
+        consecutive = 0
+        max_consecutive = 0
+        for line in reversed(lines):
+            try:
+                obj = json.loads(line)
+            except Exception:
+                continue
+            msg = obj.get('message', obj)
+            role = msg.get('role', '')
+            if role == 'user':
+                # Human turn resets counter
+                break
+            if role == 'assistant':
+                content = msg.get('content', [])
+                if isinstance(content, list):
+                    for item in content:
+                        if item.get('type') == 'toolCall':
+                            consecutive += 1
+                elif isinstance(content, str) and content:
+                    # Text-only assistant message — still counts as non-human but not a tool call
+                    pass
+        return consecutive
+    except Exception:
+        return 0
+
+
+def _velocity_check_and_notify():
+    """Background check: compute velocity alerts and send Telegram if new."""
+    global VELOCITY_ALERT_STATE
+    alerts = _compute_velocity_alerts()
+    now = time.time()
+
+    with VELOCITY_ALERT_LOCK:
+        prev_active = VELOCITY_ALERT_STATE.get('active', False)
+        was_types = {a['type'] for a in VELOCITY_ALERT_STATE.get('alerts', [])}
+        VELOCITY_ALERT_STATE['active'] = len(alerts) > 0
+        VELOCITY_ALERT_STATE['alerts'] = alerts
+        VELOCITY_ALERT_STATE['last_checked'] = now
+        new_types = {a['type'] for a in alerts}
+
+    # Send Telegram for new alert types
+    cfg = _get_velocity_config()
+    if cfg.get('notify_telegram', True):
+        for alert in alerts:
+            if alert['type'] not in was_types:
+                _send_telegram_alert(
+                    f'⚠️ Velocity Alert: {alert["title"]}\n{alert["message"]}'
+                )
+
+
+def _velocity_monitor_loop():
+    """Background thread: check velocity every 30 seconds."""
+    while True:
+        time.sleep(30)
+        try:
+            _velocity_check_and_notify()
+        except Exception as e:
+            print(f'[warn] Velocity monitor error: {e}')
+
+
+def _start_velocity_monitor_thread():
+    """Start the background velocity monitor thread."""
+    t = threading.Thread(target=_velocity_monitor_loop, daemon=True)
+    t.start()
+
+
+@bp_alerts.route('/api/alerts/velocity')
+def api_alerts_velocity():
+    """Return current velocity alert state (active alerts + metrics)."""
+    with VELOCITY_ALERT_LOCK:
+        state = dict(VELOCITY_ALERT_STATE)
+
+    # If stale (> 45s), recompute inline
+    if time.time() - state.get('last_checked', 0) > 45:
+        alerts = _compute_velocity_alerts()
+        now = time.time()
+        with VELOCITY_ALERT_LOCK:
+            VELOCITY_ALERT_STATE['active'] = len(alerts) > 0
+            VELOCITY_ALERT_STATE['alerts'] = alerts
+            VELOCITY_ALERT_STATE['last_checked'] = now
+        state = {'active': len(alerts) > 0, 'alerts': alerts, 'last_checked': now}
+
+    return jsonify(state)
+
+
+@bp_alerts.route('/api/alerts/velocity/config', methods=['GET', 'POST'])
+def api_alerts_velocity_config():
+    """Get or update velocity alert configuration."""
+    if request.method == 'POST':
+        data = request.get_json(silent=True) or {}
+        allowed = {
+            'token_window_sec', 'token_window_threshold',
+            'tool_chain_threshold', 'cost_per_min_threshold',
+            'enabled', 'notify_telegram',
+        }
+        updates = {f'velocity_{k}': str(v) for k, v in data.items() if k in allowed}
+        if updates:
+            now = time.time()
+            with _fleet_db_lock:
+                db = _fleet_db()
+                for k, v in updates.items():
+                    db.execute(
+                        "INSERT OR REPLACE INTO budget_config (key, value, updated_at) "
+                        "VALUES (?, ?, ?)", (k, v, now)
+                    )
+                db.commit()
+                db.close()
+        return jsonify({'ok': True, 'config': _get_velocity_config()})
+    return jsonify(_get_velocity_config())
+
+
+@bp_alerts.route('/api/alerts/velocity/dismiss', methods=['POST'])
+def api_alerts_velocity_dismiss():
+    """Dismiss (clear) velocity alerts."""
+    with VELOCITY_ALERT_LOCK:
+        VELOCITY_ALERT_STATE['active'] = False
+        VELOCITY_ALERT_STATE['alerts'] = []
+        VELOCITY_ALERT_STATE['last_checked'] = time.time()
+    return jsonify({'ok': True})
+
+
 # ── History / Time-Series API ────────────────────────────────────────────
 
 @bp_history.route('/api/history/metrics')
@@ -21990,21 +22354,61 @@ def api_security_posture():
 
 @bp_health.route('/api/heatmap')
 def api_heatmap():
-    """Activity heatmap - events per hour for the last 7 days."""
+    """Activity heatmap - events per hour for the last 30 days (uses session JSONL data)."""
     now = datetime.now()
-    # Initialize 7 days × 24 hours grid
+    days_back = 30
+    # Initialize 30 days x 24 hours grid
     grid = {}
     day_labels = []
-    for i in range(6, -1, -1):
+    for i in range(days_back - 1, -1, -1):
         d = now - timedelta(days=i)
         ds = d.strftime('%Y-%m-%d')
         grid[ds] = [0] * 24
-        day_labels.append({'date': ds, 'label': d.strftime('%a %d')})
+        day_labels.append({'date': ds, 'label': d.strftime('%b %d')})
+
+    cutoff = now - timedelta(days=days_back)
+
+    # Primary source: session JSONL files (richer data, longer history)
+    sessions_dir = _get_sessions_dir()
+    if os.path.isdir(sessions_dir):
+        for fname in os.listdir(sessions_dir):
+            if not fname.endswith('.jsonl'):
+                continue
+            fpath = os.path.join(sessions_dir, fname)
+            try:
+                mtime = datetime.fromtimestamp(os.path.getmtime(fpath))
+                if mtime < cutoff:
+                    continue
+                with open(fpath, 'r') as f:
+                    for line in f:
+                        try:
+                            obj = json.loads(line.strip())
+                            raw_ts = (obj.get('timestamp') or obj.get('time')
+                                      or obj.get('created_at'))
+                            if raw_ts is None:
+                                continue
+                            if isinstance(raw_ts, (int, float)):
+                                dt = datetime.fromtimestamp(
+                                    raw_ts / 1000 if raw_ts > 1e12 else raw_ts)
+                            else:
+                                dt = datetime.fromisoformat(
+                                    str(raw_ts).replace('Z', '+00:00').replace('+00:00', ''))
+                            if dt < cutoff:
+                                continue
+                            day_key = dt.strftime('%Y-%m-%d')
+                            if day_key in grid:
+                                grid[day_key][dt.hour] += 1
+                        except Exception:
+                            continue
+            except Exception:
+                continue
 
-    # Parse log files for the last 7 days
-    for i in range(7):
+    # Fallback: log files for days with no session data
+    for i in range(days_back):
         d = now - timedelta(days=i)
         ds = d.strftime('%Y-%m-%d')
+        if max(grid.get(ds, [0])) > 0:
+            continue  # already have session data for this day
         log_file = _find_log_file(ds)
         if not log_file:
             continue
@@ -22013,29 +22417,31 @@ def api_heatmap():
                 for line in f:
                     try:
                         obj = json.loads(line.strip())
-                        ts = obj.get('time') or (obj.get('_meta', {}).get('date') if isinstance(obj.get('_meta'), dict) else None)
+                        ts = obj.get('time') or (
+                            obj.get('_meta', {}).get('date')
+                            if isinstance(obj.get('_meta'), dict) else None)
                         if ts:
                             if isinstance(ts, (int, float)):
-                                dt = datetime.fromtimestamp(ts / 1000 if ts > 1e12 else ts)
+                                dt = datetime.fromtimestamp(
+                                    ts / 1000 if ts > 1e12 else ts)
                             else:
-                                dt = datetime.fromisoformat(str(ts).replace('Z', '+00:00').replace('+00:00', ''))
-                            hour = dt.hour
+                                dt = datetime.fromisoformat(
+                                    str(ts).replace('Z', '+00:00').replace('+00:00', ''))
                             day_key = dt.strftime('%Y-%m-%d')
                             if day_key in grid:
-                                grid[day_key][hour] += 1
+                                grid[day_key][dt.hour] += 1
                     except Exception:
-                        # Count non-JSON lines too
-                        if ds in grid:
-                            grid[ds][12] += 1  # default to noon
+                        continue
         except Exception:
             pass
 
     max_val = max(max(hours) for hours in grid.values()) if grid else 0
-    days = []
+    result_days = []
     for dl in day_labels:
-        days.append({'label': dl['label'], 'hours': grid.get(dl['date'], [0] * 24)})
+        result_days.append({'date': dl['date'], 'label': dl['label'],
+                            'hours': grid.get(dl['date'], [0] * 24)})
 
-    return jsonify({'days': days, 'max': max_val})
+    return jsonify({'days': result_days, 'max': max_val})
 
 
 @bp_health.route('/api/system-health')
@@ -24366,6 +24772,7 @@ def _run_server(args):
     _detect_heartbeat_interval()
     _start_fleet_maintenance_thread()
     _start_budget_monitor_thread()
+    _start_velocity_monitor_thread()
 
     try:
         print(BANNER.format(version=__version__))

From bfc1f37ef0c008d5615b0db22b3b766d69802025 Mon Sep 17 00:00:00 2001
From: Vivek Chand <vivekchand19@gmail.com>
Date: Mon, 23 Mar 2026 10:17:22 +0100
Subject: [PATCH 2/3] =?UTF-8?q?feat:=2030-day=20activity=20heatmap=20?=
 =?UTF-8?q?=E2=80=94=20expand=20from=207=20to=2030=20days,=20use=20session?=
 =?UTF-8?q?=20JSONL=20data,=20add=20HTML=20panel=20to=20Overview?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- api_heatmap(): now covers 30 days (was 7); primary source is session JSONL
  files so historical data is richer; log-file fallback for days with no
  session events; each day entry now includes 'date' field alongside 'label'
- loadHeatmap() JS: updated for 30-day view — sparse day labels (every 5th),
  6-band colour scale, hour labels only at 00/06/12/18, safe guard when
  heatmap-grid element is absent
- startSystemHealthRefresh(): now calls loadHeatmap() on startup and refreshes
  every 5 minutes via _heatmapTimer
- Overview HTML (both light/dark themes): added Activity Heatmap panel inside
  the System Health section with heatmap-grid + heatmap-legend elements
- tests/test_api.py: added TestHeatmap (7 tests) — 30-day length, 24 buckets
  per day, label/date keys, non-negative int counts, max field correctness;
  all 78 tests pass

Closes vivekchand/clawmetry#69
---
 dashboard.py      | 58 +++++++++++++++++++++++++++++++++++++----------
 tests/test_api.py | 55 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+), 12 deletions(-)

diff --git a/dashboard.py b/dashboard.py
index 6d2748c..93e21ee 100755
--- a/dashboard.py
+++ b/dashboard.py
@@ -3065,6 +3065,11 @@ def get_local_ip():
         <div id="sh-inference" style="margin-bottom:14px;"></div></div>
         <div id="sh-security-wrap" style="display:none;"><div style="font-size:11px;text-transform:uppercase;letter-spacing:1.5px;color:var(--text-muted);font-weight:600;margin-bottom:6px;">🛡️ Security Posture</div>
         <div id="sh-security" style="margin-bottom:14px;"></div></div>
+        <div style="font-size:11px;text-transform:uppercase;letter-spacing:1.5px;color:var(--text-muted);font-weight:600;margin-bottom:6px;">📅 Activity Heatmap <span style="font-size:10px;text-transform:none;letter-spacing:0;font-weight:400;color:var(--text-faint);">30 days · hourly</span></div>
+        <div class="heatmap-wrap">
+          <div id="heatmap-grid" class="heatmap-grid"><span style="color:var(--text-muted);font-size:12px;">Loading...</span></div>
+        </div>
+        <div id="heatmap-legend" class="heatmap-legend"></div>
       </div>
     </div>
 
@@ -8197,6 +8202,11 @@ def get_local_ip():
         <div id="sh-inference" style="margin-bottom:14px;"></div></div>
         <div id="sh-security-wrap" style="display:none;"><div style="font-size:11px;text-transform:uppercase;letter-spacing:1.5px;color:var(--text-muted);font-weight:600;margin-bottom:6px;">🛡️ Security Posture</div>
         <div id="sh-security" style="margin-bottom:14px;"></div></div>
+        <div style="font-size:11px;text-transform:uppercase;letter-spacing:1.5px;color:var(--text-muted);font-weight:600;margin-bottom:6px;">📅 Activity Heatmap <span style="font-size:10px;text-transform:none;letter-spacing:0;font-weight:400;color:var(--text-faint);">30 days · hourly</span></div>
+        <div class="heatmap-wrap">
+          <div id="heatmap-grid" class="heatmap-grid"><span style="color:var(--text-muted);font-size:12px;">Loading...</span></div>
+        </div>
+        <div id="heatmap-legend" class="heatmap-legend"></div>
       </div>
     </div>
 
@@ -11392,36 +11402,60 @@ def get_local_ip():
 }
 function startSystemHealthRefresh() {
   loadSystemHealth();
+  loadHeatmap();
   if (window._sysHealthTimer) clearInterval(window._sysHealthTimer);
   window._sysHealthTimer = setInterval(loadSystemHealth, 30000);
+  // Refresh heatmap every 5 minutes (data changes slowly)
+  if (window._heatmapTimer) clearInterval(window._heatmapTimer);
+  window._heatmapTimer = setInterval(loadHeatmap, 300000);
 }
 
-// ===== Activity Heatmap =====
+// ===== Activity Heatmap (30-day hourly grid) =====
 async function loadHeatmap() {
+  var gridEl = document.getElementById('heatmap-grid');
+  var legendEl = document.getElementById('heatmap-legend');
+  if (!gridEl) return;
   try {
     var data = await fetch('/api/heatmap').then(r => r.json());
-    var grid = document.getElementById('heatmap-grid');
     var maxVal = Math.max(1, data.max);
+    // Hour labels row
     var html = '<div class="heatmap-label"></div>';
-    for (var h = 0; h < 24; h++) { html += '<div class="heatmap-hour-label">' + (h < 10 ? '0' : '') + h + '</div>'; }
+    for (var h = 0; h < 24; h++) {
+      html += '<div class="heatmap-hour-label">' + (h % 6 === 0 ? (h < 10 ? '0' + h : String(h)) : '') + '</div>';
+    }
     data.days.forEach(function(day) {
-      html += '<div class="heatmap-label">' + day.label + '</div>';
+      // Show label only every 5 days to avoid crowding on 30-day view
+      var showLabel = day.label.endsWith('01') || day.label.endsWith('05') ||
+                      day.label.endsWith('10') || day.label.endsWith('15') ||
+                      day.label.endsWith('20') || day.label.endsWith('25') ||
+                      day.label.endsWith('30');
+      html += '<div class="heatmap-label" style="font-size:9px;">' + (showLabel ? day.label : '') + '</div>';
       day.hours.forEach(function(val, hi) {
         var intensity = val / maxVal;
         var color;
         if (val === 0) color = '#12122a';
-        else if (intensity < 0.25) color = '#1a3a2a';
-        else if (intensity < 0.5) color = '#2a6a3a';
-        else if (intensity < 0.75) color = '#4a9a2a';
+        else if (intensity < 0.2) color = '#1a3a2a';
+        else if (intensity < 0.4) color = '#2a6a3a';
+        else if (intensity < 0.6) color = '#3a8a2a';
+        else if (intensity < 0.8) color = '#4a9a2a';
         else color = '#6adb3a';
-        html += '<div class="heatmap-cell" style="background:' + color + ';" title="' + day.label + ' ' + (hi < 10 ? '0' : '') + hi + ':00 - ' + val + ' events"></div>';
+        html += '<div class="heatmap-cell" style="background:' + color + ';" title="' +
+          day.label + ' ' + (hi < 10 ? '0' : '') + hi + ':00\u202f\u2014\u202f' + val + ' event' + (val !== 1 ? 's' : '') +
+          '"></div>';
       });
     });
-    grid.innerHTML = html;
-    var legend = document.getElementById('heatmap-legend');
-    legend.innerHTML = 'Less <div class="heatmap-legend-cell" style="background:#12122a"></div><div class="heatmap-legend-cell" style="background:#1a3a2a"></div><div class="heatmap-legend-cell" style="background:#2a6a3a"></div><div class="heatmap-legend-cell" style="background:#4a9a2a"></div><div class="heatmap-legend-cell" style="background:#6adb3a"></div> More';
+    gridEl.innerHTML = html;
+    if (legendEl) {
+      legendEl.innerHTML = 'Less\u00a0' +
+        '<div class="heatmap-legend-cell" style="background:#12122a"></div>' +
+        '<div class="heatmap-legend-cell" style="background:#1a3a2a"></div>' +
+        '<div class="heatmap-legend-cell" style="background:#2a6a3a"></div>' +
+        '<div class="heatmap-legend-cell" style="background:#4a9a2a"></div>' +
+        '<div class="heatmap-legend-cell" style="background:#6adb3a"></div>' +
+        '\u00a0More\u00a0\u00b7\u00a0' + data.days.length + ' days';
+    }
   } catch(e) {
-    document.getElementById('heatmap-grid').innerHTML = '<span style="color:#555">No activity data</span>';
+    gridEl.innerHTML = '<span style="color:#555">No activity data</span>';
   }
 }
 
diff --git a/tests/test_api.py b/tests/test_api.py
index 7b2b678..9915eba 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -443,3 +443,58 @@ def test_memory_analytics_files_have_status(self, api, base_url):
             assert f["status"] in ("ok", "warning", "critical")
 
 
+
+
+class TestHeatmap:
+    """Tests for the 30-day activity heatmap endpoint."""
+
+    def test_heatmap_returns_200(self, api, base_url):
+        """Heatmap endpoint returns 200."""
+        r = api.get(f"{base_url}/api/heatmap", timeout=10)
+        assert r.status_code == 200, f"Expected 200, got {r.status_code}: {r.text[:200]}"
+
+    def test_heatmap_has_required_keys(self, api, base_url):
+        """Response contains 'days' list and 'max' value."""
+        d = assert_ok(get(api, base_url, "/api/heatmap"))
+        assert_keys(d, "days", "max")
+        assert isinstance(d["days"], list), "'days' must be a list"
+        assert isinstance(d["max"], (int, float)), "'max' must be numeric"
+
+    def test_heatmap_returns_30_days(self, api, base_url):
+        """Heatmap covers exactly 30 days."""
+        d = assert_ok(get(api, base_url, "/api/heatmap"))
+        assert len(d["days"]) == 30, f"Expected 30 days, got {len(d['days'])}"
+
+    def test_heatmap_each_day_has_24_hours(self, api, base_url):
+        """Every day entry has exactly 24 hourly buckets."""
+        d = assert_ok(get(api, base_url, "/api/heatmap"))
+        for day in d["days"]:
+            assert "hours" in day, f"Day entry missing 'hours': {day}"
+            assert len(day["hours"]) == 24, (
+                f"Expected 24 hourly buckets, got {len(day['hours'])} for {day.get('label')}"
+            )
+
+    def test_heatmap_day_has_label_and_date(self, api, base_url):
+        """Every day entry has 'label' and 'date' fields."""
+        d = assert_ok(get(api, base_url, "/api/heatmap"))
+        for day in d["days"]:
+            assert_keys(day, "label", "date", "hours")
+
+    def test_heatmap_hours_are_non_negative_ints(self, api, base_url):
+        """All hourly counts are non-negative integers."""
+        d = assert_ok(get(api, base_url, "/api/heatmap"))
+        for day in d["days"]:
+            for count in day["hours"]:
+                assert isinstance(count, int) and count >= 0, (
+                    f"Invalid hourly count {count!r} in {day.get('label')}"
+                )
+
+    def test_heatmap_max_matches_data(self, api, base_url):
+        """'max' equals the maximum hourly event count across all days."""
+        d = assert_ok(get(api, base_url, "/api/heatmap"))
+        computed_max = max(
+            (max(day["hours"]) for day in d["days"]), default=0
+        )
+        assert d["max"] == computed_max, (
+            f"'max' field {d['max']} does not match computed max {computed_max}"
+        )

From cfebd50e761bd67cd483031381907a006f01f109 Mon Sep 17 00:00:00 2001
From: Vivek Chand <vivekchand19@gmail.com>
Date: Mon, 23 Mar 2026 10:19:33 +0100
Subject: [PATCH 3/3] content: ClawMetry vs NemoClaw + OpenClaw memory
 monitoring blog posts

- docs/blog/nemo-vs-clawmetry.md: comparison targeting GTC buzz
  Angle: local-first free (ClawMetry) vs enterprise-priced (NemoClaw)
  Validates the market, highlights ClawMetry's zero-setup advantage

- docs/blog/openclaw-memory-monitoring.md: SEO gap filler
  Targets 'openclaw memory monitoring' (zero existing content)
  Explains memory drift, context window consumption, ClawMetry analytics

Both CTA to 'pip install clawmetry'
---
 docs/blog/nemo-vs-clawmetry.md          | 84 ++++++++++++++++++++++
 docs/blog/openclaw-memory-monitoring.md | 93 +++++++++++++++++++++++++
 2 files changed, 177 insertions(+)
 create mode 100644 docs/blog/nemo-vs-clawmetry.md
 create mode 100644 docs/blog/openclaw-memory-monitoring.md

diff --git a/docs/blog/nemo-vs-clawmetry.md b/docs/blog/nemo-vs-clawmetry.md
new file mode 100644
index 0000000..0ca18fc
--- /dev/null
+++ b/docs/blog/nemo-vs-clawmetry.md
@@ -0,0 +1,84 @@
+# ClawMetry vs NemoClaw: Which OpenClaw Observability Tool Is Right for You?
+
+**TL;DR:** NemoClaw is enterprise observability for OpenClaw with Kubernetes and cloud infrastructure. ClawMetry is free, open source, and runs on your laptop in 30 seconds. Different tools, different audiences.
+
+NVIDIA just announced NemoClaw at GTC, and the reaction in the OpenClaw community has been fascinating. Some folks are excited. Others are confused. And a surprising number are asking: "Wait, isn't that what ClawMetry already does?"
+
+Kind of. But not really. Let me break it down.
+
+## What NemoClaw Is
+
+NemoClaw is NVIDIA's enterprise observability layer for OpenClaw deployments. It's built on their NeMo Agent Toolkit and targets teams running OpenClaw at scale — think Fortune 500 companies, research labs, and cloud-first organizations.
+
+Features include multi-node fleet management, cloud dashboards, compliance reporting, and deep integration with Kubernetes. It's designed to plug into existing enterprise observability stacks (Datadog, Dynatrace, OpenTelemetry).
+
+The target user: a platform engineering team managing dozens of OpenClaw nodes across production, staging, and dev environments.
+
+## What ClawMetry Is
+
+ClawMetry is an open source monitoring dashboard for OpenClaw that you can install in 30 seconds:
+
+```bash
+pip install clawmetry
+clawmetry
+```
+
+That's it. Open your browser, and you've got a full dashboard showing agent sessions, token usage, memory file health, brain activity, security posture, and cron job status.
+
+The target user: a solo developer or small team who wants to understand what their AI agent is actually doing.
+
+## The Core Difference: Local vs Cloud
+
+This is the real split.
+
+**ClawMetry** is local-first. Your data never leaves your machine. The dashboard reads directly from OpenClaw's log files and JSONL session records. No API keys, no accounts, no cloud subscription. Privacy by default.
+
+**NemoClaw** is cloud-first. It's designed for scenarios where you need centralized visibility across multiple nodes, cloud-hosted dashboards, and enterprise SLAs. That requires infrastructure, and infrastructure costs money.
+
+Neither approach is wrong. They solve different problems.
+
+## Feature Comparison
+
+| Feature | ClawMetry | NemoClaw |
+|---|---|---|
+| Price | Free, open source | Enterprise pricing |
+| Setup time | 30 seconds | Days/weeks |
+| Infrastructure | None (runs locally) | Kubernetes/cloud |
+| Data privacy | 100% local | Cloud-hosted |
+| Multi-node fleet | Basic | Full |
+| Compliance reporting | No | Yes |
+| Token cost tracking | Yes | Yes |
+| Memory file analytics | Yes | Unknown |
+| Brain/session visualization | Yes | Yes |
+| Security posture scan | Yes | Unknown |
+
+## When to Use ClawMetry
+
+- You're a solo developer or small team
+- You care about data privacy and don't want logs in the cloud
+- You want something that works immediately with zero setup
+- You're on a budget (free is good)
+- You want open source you can inspect and modify
+
+## When to Consider NemoClaw
+
+- You're running OpenClaw at enterprise scale (20+ nodes)
+- You need compliance reporting for auditors
+- You're already using Kubernetes and want everything in one place
+- Your organization requires vendor support and SLAs
+
+## The Bottom Line
+
+NemoClaw's announcement actually validates what ClawMetry has been saying for months: OpenClaw observability is a real problem worth solving. When NVIDIA builds an enterprise product in your space, it's a good sign.
+
+But enterprise tooling isn't right for everyone. If you want to understand what your AI agent is doing right now, without signing up for anything or setting up infrastructure, ClawMetry is your tool.
+
+```bash
+pip install clawmetry
+```
+
+One command. Your dashboard is running in 30 seconds.
+
+---
+
+*ClawMetry is free and open source. Star it on [GitHub](https://github.com/vivekchand/clawmetry) and try it today.*
diff --git a/docs/blog/openclaw-memory-monitoring.md b/docs/blog/openclaw-memory-monitoring.md
new file mode 100644
index 0000000..5f2919b
--- /dev/null
+++ b/docs/blog/openclaw-memory-monitoring.md
@@ -0,0 +1,93 @@
+# OpenClaw Memory Monitoring: Why Your Agent's "Brain" Needs Watching
+
+**TL;DR:** OpenClaw agents persist their personality and context in files like SOUL.md, MEMORY.md, and AGENTS.md. If these drift silently, your agent changes behavior without you noticing. ClawMetry is the only tool that monitors this.
+
+Here's something most people running OpenClaw agents don't think about until something goes wrong: your agent has memory, and that memory can drift.
+
+Not dramatically. Not in ways that set off alarms. Just quietly, gradually, your agent's SOUL.md gets a new paragraph, your MEMORY.md grows by a few kilobytes every day, and six weeks later you're wondering why your agent feels "different" than when you first set it up.
+
+This is OpenClaw memory drift, and it's more common than you'd think.
+
+## What Is OpenClaw Memory?
+
+OpenClaw agents persist their identity and context across sessions through a set of files in the workspace:
+
+- **SOUL.md** — The agent's personality, values, and behavioral guidelines
+- **MEMORY.md** — Long-term curated knowledge the agent has built up
+- **AGENTS.md** — Workspace conventions and task state rules
+- **memory/YYYY-MM-DD.md** — Daily raw notes and session logs
+
+These aren't configuration files in the traditional sense. They're more like a brain. The agent reads them at the start of each session to reconstruct who it is and what it knows.
+
+And like any brain, they need maintenance.
+
+## Why Memory Drift Is a Problem
+
+When memory files grow unchecked, a few things happen:
+
+**Context window consumption.** If your MEMORY.md is 32KB and your SOUL.md is another 16KB, that's 48KB of context before your agent has read a single user message. For a model with a 128K token context window, you've burned 10-15% before the conversation starts. This adds up fast.
+
+**Stale knowledge.** Daily memory files accumulate. An agent that's been running for 60 days has 60 daily log files. Most of that is outdated context that was relevant in January but is now just noise.
+
+**Silent personality changes.** When an agent updates its own SOUL.md or MEMORY.md (which it often does during conversations), the changes can be subtle. A new preference here, a modified rule there. Over weeks, the cumulative drift can meaningfully change how the agent behaves.
+
+None of this triggers an error. No alert fires. The agent just quietly becomes someone slightly different.
+
+## How ClawMetry Monitors OpenClaw Memory
+
+ClawMetry's Memory tab includes a built-in analytics panel that tracks all of this automatically.
+
+Open ClawMetry, click Memory, and you'll see:
+
+**Memory health status** — A quick green/yellow/red indicator showing whether your memory files are healthy, growing large, or at risk of bloat.
+
+**Context budget bars** — Visual indicators showing what percentage of common model context windows (Claude 200K, GPT-4 128K, Gemini 1M) your memory files are consuming. If your memory files are eating 25% of your Claude context before the conversation starts, you'll see it immediately.
+
+**Largest files chart** — A bar chart showing which files are biggest, with color coding for files that need attention.
+
+**Daily growth sparkline** — A 30-day chart of how your daily memory files are growing. A healthy pattern is steady. A hockey stick pattern means something is accumulating.
+
+**Recommendations** — Specific suggestions for files that have grown too large, with guidance on what to prune.
+
+## Catching a Memory Change in Action
+
+Here's a real example of ClawMetry catching drift. An agent had been running for a month. The Memory tab showed MEMORY.md had grown from 8KB to 23KB over 30 days.
+
+Drilling in, the largest files were: MEMORY.md (23KB), memory/2026-02-14.md (18KB), SOUL.md (12KB).
+
+The SOUL.md flag was the interesting one. At 12KB, it had grown significantly from its original 4KB. Reviewing the file showed the agent had been adding detailed notes about every project it touched, slowly transforming what was meant to be a personality guide into a project wiki.
+
+Without ClawMetry's memory analytics, this would have been invisible until the agent started behaving strangely.
+
+## Setting Up OpenClaw Memory Monitoring
+
+```bash
+pip install clawmetry
+clawmetry
+```
+
+Navigate to the Memory tab. ClawMetry auto-discovers your OpenClaw workspace and starts monitoring immediately.
+
+No configuration. No API keys. No cloud setup.
+
+The memory analytics panel loads automatically and gives you an instant health snapshot of your agent's brain.
+
+## The Rule of Thumb
+
+A healthy OpenClaw memory setup:
+- SOUL.md under 8KB (personality guide, not a wiki)
+- MEMORY.md under 16KB (curated wisdom, not a log dump)
+- Daily files older than 30 days archived or deleted
+- Total memory context under 10% of your model's context window
+
+ClawMetry monitors all of this and flags when you're approaching thresholds before it becomes a problem.
+
+---
+
+*ClawMetry is the only OpenClaw monitoring tool with built-in memory analytics. Free and open source.*
+
+```bash
+pip install clawmetry
+```
+
+*[Star on GitHub](https://github.com/vivekchand/clawmetry) — contributions welcome.*