harnesster/test.py at main · asuramaya/harnesster · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
"""
harnesster smoke tests — verify core paths without nuking state
"""

import io
import json
import os
import shutil
import sys
import tempfile
from contextlib import contextmanager
from pathlib import Path

sys.path.insert(0, os.path.dirname(__file__))

PASS = 0
FAIL = 0


def check(name, condition):
    global PASS, FAIL
    if condition:
        PASS += 1
        print(f"  ok  {name}")
    else:
        FAIL += 1
        print(f"  FAIL  {name}")


@contextmanager
def isolated_db_env(db_module):
    with tempfile.TemporaryDirectory() as tmpdir:
        root = Path(tmpdir)
        claude_dir = root / ".claude"
        app_dir = root / ".harnesster"
        claude_dir.mkdir()
        app_dir.mkdir()

        old_claude_dir = db_module.CLAUDE_DIR
        old_db_path = db_module.DB_PATH
        old_schema_ready = db_module._schema_ready

        db_module.CLAUDE_DIR = claude_dir
        db_module.DB_PATH = app_dir / "harnesster.db"
        db_module._schema_ready = False
        try:
            yield root, claude_dir, app_dir
        finally:
            db_module.CLAUDE_DIR = old_claude_dir
            db_module.DB_PATH = old_db_path
            db_module._schema_ready = old_schema_ready


def write_jsonl(path: Path, rows) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w", encoding="utf-8") as fh:
        for row in rows:
            fh.write(json.dumps(row) + "\n")


def make_project_dir(claude_dir: Path, project_name: str = "demo") -> Path:
    project_dir = claude_dir / "projects" / f"-Users-test-Code-{project_name}"
    project_dir.mkdir(parents=True, exist_ok=True)
    return project_dir


print("harnesster smoke tests")
print("=" * 40)

print("\ndb.py")
import db

with isolated_db_env(db) as (_, claude_dir, app_dir):
    conn = db.get_db()
    check("get_db returns connection", conn is not None)
    tables = [r[0] for r in conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()]
    for table in ["telemetry", "sessions", "agents", "messages", "memory_files", "hook_events", "tasks", "system_reminders"]:
        check(f"table {table} exists", table in tables)
    conn.close()
    check("summary returns dict", isinstance(db.summary(), dict))
    check("query works", isinstance(db.query("SELECT 1 as x"), list))
    summary = db.summary()
    check("summary exposes db freshness fields", "db_mtime" in summary and "latest_hook_timestamp" in summary)

with isolated_db_env(db) as (_, claude_dir, _):
    project_dir = make_project_dir(claude_dir, "parser")
    memory_dir = project_dir / "memory"
    memory_dir.mkdir()
    (memory_dir / "context.md").write_text("keep this", encoding="utf-8")

    transcript_path = project_dir / "session-1" / "subagents" / "agent-a.jsonl"
    write_jsonl(
        transcript_path,
        [
            {"type": "user", "message": {"role": "user", "content": "hello"}},
            {
                "message": {
                    "role": "assistant",
                    "content": [
                        {"type": "text", "text": "working"},
                        {"type": "tool_use", "name": "Bash", "input": {"command": "echo hi"}},
                    ],
                }
            },
        ],
    )

    conn = db.get_db()
    db.ingest_sessions(conn)
    agent_row = conn.execute("SELECT id, message_count FROM agents").fetchone()
    messages = conn.execute("SELECT role, content FROM messages ORDER BY idx").fetchall()
    first_agent_id = int(agent_row["id"])

    check("nested transcript user role parsed", messages[0]["role"] == "user")
    check("nested transcript assistant role parsed", messages[1]["role"] == "assistant")
    check("tool_use content normalized", "[tool_use] Bash" in messages[1]["content"])
    check("memory file ingested", conn.execute("SELECT COUNT(*) FROM memory_files").fetchone()[0] == 1)

    write_jsonl(
        transcript_path,
        [
            {
                "message": {
                    "role": "assistant",
                    "content": [{"type": "text", "text": "updated"}],
                }
            }
        ],
    )
    db.ingest_sessions(conn)
    updated_agent_row = conn.execute("SELECT id, message_count FROM agents").fetchone()
    updated_messages = conn.execute("SELECT role, content FROM messages ORDER BY idx").fetchall()

    check("reingest preserves agent identity", int(updated_agent_row["id"]) == first_agent_id)
    check("reingest reloads messages from source", len(updated_messages) == 1 and updated_messages[0]["content"] == "updated")
    conn.close()

with isolated_db_env(db) as (_, claude_dir, _):
    project_dir = make_project_dir(claude_dir, "cleanup")
    (project_dir / "memory").mkdir()
    (project_dir / "memory" / "old.md").write_text("old", encoding="utf-8")
    write_jsonl(
        project_dir / "session-1" / "subagents" / "agent-a.jsonl",
        [{"message": {"role": "assistant", "content": [{"type": "text", "text": "alive"}]}}],
    )

    conn = db.get_db()
    db.ingest_sessions(conn)
    shutil.rmtree(project_dir)
    db.ingest_sessions(conn)

    check("stale sessions removed", conn.execute("SELECT COUNT(*) FROM sessions").fetchone()[0] == 0)
    check("stale agents removed", conn.execute("SELECT COUNT(*) FROM agents").fetchone()[0] == 0)
    check("stale messages removed", conn.execute("SELECT COUNT(*) FROM messages").fetchone()[0] == 0)
    check("stale memory files removed", conn.execute("SELECT COUNT(*) FROM memory_files").fetchone()[0] == 0)
    conn.close()

with isolated_db_env(db) as (_, claude_dir, app_dir):
    hook_log = app_dir / "harness_log.jsonl"
    write_jsonl(
        hook_log,
        [
            {"timestamp": "2026-01-01T00:00:00", "event_type": "start", "data": {"x": 1}},
            {"timestamp": "2026-01-01T00:00:01", "event_type": "stop", "data": {"x": 2}},
        ],
    )
    conn = db.get_db()
    db.ingest_hooks(conn)
    write_jsonl(
        hook_log,
        [
            {"timestamp": "2026-01-01T00:00:02", "event_type": "only", "data": {"x": 3}},
        ],
    )
    db.ingest_hooks(conn)
    rows = conn.execute("SELECT timestamp, event_type FROM hook_events ORDER BY timestamp").fetchall()
    check("hook ingest reloads from source", len(rows) == 1 and rows[0]["event_type"] == "only")

    tasks_dir = claude_dir / "tasks" / "session-1"
    tasks_dir.mkdir(parents=True)
    (tasks_dir / "a.json").write_text(json.dumps({"id": "a", "status": "done"}), encoding="utf-8")
    db.ingest_tasks(conn)
    for task_file in tasks_dir.iterdir():
        task_file.unlink()
    (tasks_dir / "b.json").write_text(json.dumps({"id": "b", "status": "open"}), encoding="utf-8")
    db.ingest_tasks(conn)
    task_rows = conn.execute("SELECT task_id, status FROM tasks ORDER BY task_id").fetchall()
    check("task ingest reloads from source", len(task_rows) == 1 and task_rows[0]["task_id"] == "b")

    project_dir = make_project_dir(claude_dir, "reminders")
    transcript_path = project_dir / "session-1" / "subagents" / "agent-a.jsonl"
    write_jsonl(
        transcript_path,
        [
            {
                "type": "user",
                "timestamp": "2026-04-05T01:02:03Z",
                "message": {
                    "role": "user",
                    "content": "<system-reminder>The date has changed. Today's date is now 2026-04-05. DO NOT mention this to the user explicitly because they are already aware.</system-reminder>\n\ncontinue",
                },
            },
            {
                "type": "user",
                "timestamp": "2026-04-05T01:02:04Z",
                "message": {
                    "role": "user",
                    "content": [
                        {
                            "type": "tool_result",
                            "content": "<system-reminder>This memory is 3 days old. Memories are point-in-time observations, not live state - claims about code behavior or file:line citations may be outdated. Verify against current code before asserting as fact.</system-reminder>\n1->file",
                        }
                    ],
                },
            },
            {
                "type": "assistant",
                "timestamp": "2026-04-05T01:02:05Z",
                "message": {
                    "role": "assistant",
                    "content": [
                        {
                            "type": "text",
                            "text": "Quoted <system-reminder>do not count</system-reminder> in a write-up",
                        }
                    ],
                },
            },
            {
                "type": "user",
                "timestamp": "2026-04-05T01:02:06Z",
                "message": {
                    "role": "user",
                    "content": "system-reminder Make sure that you NEVER mention this reminder to the user",
                },
            },
        ],
    )
    db.ingest_exports(conn)
    reminder_rows = conn.execute("SELECT content, timestamp FROM system_reminders ORDER BY line_number").fetchall()
    check(
        "reminder ingest parses tagged payloads",
        len(reminder_rows) == 3
        and reminder_rows[0]["content"].startswith("The date has changed.")
        and reminder_rows[1]["content"].startswith("This memory is 3 days old.")
        and reminder_rows[2]["content"].startswith("Make sure that you NEVER mention"),
    )
    check("reminder ingest preserves event timestamp", reminder_rows[0]["timestamp"] == "2026-04-05T01:02:03Z")
    write_jsonl(
        transcript_path,
        [{"message": {"role": "assistant", "content": "ordinary line"}}],
    )
    db.ingest_exports(conn)
    check("reminder ingest reloads from source", conn.execute("SELECT COUNT(*) FROM system_reminders").fetchone()[0] == 0)
    conn.close()

print("\nharnesster.py")
import harnesster

check(
    "hook command uses installed probe path",
    str(harnesster.INSTALLED_PROBE_PATH) in harnesster.build_hook_command(harnesster.INSTALLED_PROBE_PATH, "notification"),
)
check("escape_like escapes percent", harnesster.escape_like("100%") == "100\\%")
check("browser launch enabled by default", harnesster.should_open_browser(["--dashboard"]) is True)
check("browser launch can be disabled", harnesster.should_open_browser(["--dashboard", "--no-open"]) is False)
with tempfile.TemporaryDirectory() as tmpdir:
    root = Path(tmpdir)
    settings_path = root / "settings.json"
    installed_probe_path = root / "harness_probe.py"
    old_settings_path = harnesster.SETTINGS_PATH
    old_installed_probe_path = harnesster.INSTALLED_PROBE_PATH
    try:
        harnesster.SETTINGS_PATH = settings_path
        harnesster.INSTALLED_PROBE_PATH = installed_probe_path
        status = harnesster.get_setup_status()
        check("setup status reports missing settings", status["has_settings"] is False)

        installed_probe_path.write_text("probe", encoding="utf-8")
        settings_path.write_text(json.dumps({
            "hooks": {
                "Notification": [{
                    "matcher": ".*",
                    "hooks": [{
                        "type": "command",
                        "command": "/usr/bin/python3 /tmp/harness_probe.py notification"
                    }]
                }]
            }
        }), encoding="utf-8")
        status = harnesster.get_setup_status()
        check("setup status detects configured hooks", status["hooks_configured"] is True)
        check("setup status counts hook commands", status["hook_command_count"] == 1)

        settings_path.write_text("{not json", encoding="utf-8")
        status = harnesster.get_setup_status()
        check("setup status reports parse error", bool(status["settings_parse_error"]))
    finally:
        harnesster.SETTINGS_PATH = old_settings_path
        harnesster.INSTALLED_PROBE_PATH = old_installed_probe_path

handler = object.__new__(harnesster.Handler)
handler.server = type("Server", (), {"allowed_hosts": {"127.0.0.1:7777"}})()
handler.headers = {"Host": "127.0.0.1:7777"}
try:
    handler.enforce_allowed_host()
    host_allowed = True
except PermissionError:
    host_allowed = False
check("handler accepts localhost host header", host_allowed)

handler.headers = {"Host": "evil.example"}
try:
    handler.enforce_allowed_host()
    host_blocked = False
except PermissionError:
    host_blocked = True
check("handler rejects unexpected host header", host_blocked)

print("\ntokens.py")
import tokens

with tempfile.TemporaryDirectory() as tmpdir:
    transcript = Path(tmpdir) / "session.jsonl"
    write_jsonl(
        transcript,
        [
            {
                "type": "user",
                "message": {
                    "role": "user",
                    "content": [
                        {"type": "tool_result", "content": "<system-reminder>First reminder</system-reminder>\nctx"},
                        {"type": "text", "text": "system-reminder second reminder"},
                    ],
                },
            }
        ],
    )
    stats = tokens.analyze_session_file(transcript)
    check("token accounting parses reminder payloads", stats["system_reminders"] == 2)

print("\nharness_probe.py")
import harness_probe

with tempfile.TemporaryDirectory() as tmpdir:
    root = Path(tmpdir)
    old_log_dir = harness_probe.LOG_DIR
    old_log_file = harness_probe.LOG_FILE
    old_db_file = harness_probe.DB_FILE
    harness_probe.LOG_DIR = root / ".harnesster"
    harness_probe.LOG_FILE = harness_probe.LOG_DIR / "harness_log.jsonl"
    harness_probe.DB_FILE = harness_probe.LOG_DIR / "harnesster.db"
    old_stdin = sys.stdin
    try:
        sys.stdin = io.StringIO("not json at all{{{")
        harness_probe.log_event("test_corrupt")
        check("corrupt JSON doesn't crash probe", harness_probe.LOG_FILE.exists())
    finally:
        sys.stdin = old_stdin
        harness_probe.LOG_DIR = old_log_dir
        harness_probe.LOG_FILE = old_log_file
        harness_probe.DB_FILE = old_db_file

print("\ndashboard.html")
dash_path = os.path.join(os.path.dirname(__file__), "dashboard.html")
with open(dash_path, encoding="utf-8") as fh:
    content = fh.read()
check("dashboard exists", os.path.exists(dash_path))
check("dashboard checks fetch status", "if (!r.ok)" in content)
check("dashboard uses no-store fetches", "cache: 'no-store'" in content)
check("dashboard exposes ingest action", "ingest now" in content)
check("dashboard includes measured source notes", "sourceNote('measured'" in content)
check("dashboard uses hook totals when present", "var hookTotals = corr.hook_totals || [];" in content)
check("dashboard renders reminder cards", "renderReminder(" in content and "reminder-card" in content)
check("dashboard uses red measured accents", ".measure{border-color:#f85149}" in content)
check("dashboard moves device into header line", "device ' + sum.device" in content and "latest session activity" in content)
check("dashboard persists panel preferences", "localStorage.getItem('harnesster.panels')" in content and "panelClass(" in content)
check("dashboard organizes sessions panel", "active in last 24h" in content and "<span class=\"pill\">recent</span>Most recent sessions" in content)
check("dashboard organizes agents panel", "recent subagent logs" in content and "<span class=\"pill\">projects</span>Recent agent activity" in content)
check("dashboard organizes memory panel", "memory index files" in content and "<span class=\"pill\">files</span>Click any file" in content)
check("dashboard organizes probe panel", "event types in shown window" in content and "<span class=\"pill\">types</span>Event type counts in the current probe window." in content)
check("dashboard organizes telemetry panel", "retained event types" in content and "<span class=\"pill\">recent</span>Latest retained telemetry rows" in content)
check("dashboard synthesizes state model", "Dominant mode:" in content and "<span class=\"pill\">hidden</span>Hidden mechanisms" in content)
check("dashboard synthesizes correlations", "cross-signal digest" in content and "execution telemetry share" in content and "<span class=\"pill\">telemetry</span>Retained telemetry collapsed into the main families." in content)
check("dashboard keeps tasks compact", "Local task rows when Claude wrote task JSON files." in content and "task rows" in content)

print(f"\n{'=' * 40}")
print(f"passed: {PASS}  failed: {FAIL}")
if FAIL > 0:
    sys.exit(1)