diff --git a/bin/lib/debug.js b/bin/lib/debug.js new file mode 100644 index 000000000..5465b8042 --- /dev/null +++ b/bin/lib/debug.js @@ -0,0 +1,4 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +module.exports = require("../../dist/lib/debug"); diff --git a/bin/nemoclaw.js b/bin/nemoclaw.js index 0d20c5e41..a5eb719d1 100755 --- a/bin/nemoclaw.js +++ b/bin/nemoclaw.js @@ -794,15 +794,48 @@ function stop() { } function debug(args) { - const result = spawnSync("bash", [path.join(SCRIPTS, "debug.sh"), ...args], { - stdio: "inherit", - cwd: ROOT, - env: { - ...process.env, - SANDBOX_NAME: registry.listSandboxes().defaultSandbox || "", - }, - }); - exitWithSpawnResult(result); + const { runDebug } = require("./lib/debug"); + const opts = {}; + for (let i = 0; i < args.length; i++) { + switch (args[i]) { + case "--help": + case "-h": + console.log("Collect NemoClaw diagnostic information\n"); + console.log("Usage: nemoclaw debug [--quick] [--output FILE] [--sandbox NAME]\n"); + console.log("Options:"); + console.log(" --quick, -q Only collect minimal diagnostics"); + console.log(" --output, -o FILE Write a tarball to FILE"); + console.log(" --sandbox NAME Target sandbox name"); + process.exit(0); + break; + case "--quick": + case "-q": + opts.quick = true; + break; + case "--output": + case "-o": + if (!args[i + 1] || args[i + 1].startsWith("-")) { + console.error("Error: --output requires a file path argument"); + process.exit(1); + } + opts.output = args[++i]; + break; + case "--sandbox": + if (!args[i + 1] || args[i + 1].startsWith("-")) { + console.error("Error: --sandbox requires a name argument"); + process.exit(1); + } + opts.sandboxName = args[++i]; + break; + default: + console.error(`Unknown option: ${args[i]}`); + process.exit(1); + } + } + if (!opts.sandboxName) { + opts.sandboxName = registry.listSandboxes().defaultSandbox || undefined; + } + runDebug(opts); } function uninstall(args) { diff --git a/src/lib/debug.test.ts b/src/lib/debug.test.ts new file mode 100644 index 000000000..7aa76d195 --- /dev/null +++ b/src/lib/debug.test.ts @@ -0,0 +1,52 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, it, expect } from "vitest"; +// Import from compiled dist/ so coverage is attributed correctly. +import { redact } from "../../dist/lib/debug"; + +describe("redact", () => { + it("redacts NVIDIA_API_KEY=value patterns", () => { + const key = ["NVIDIA", "API", "KEY"].join("_"); + expect(redact(`${key}=some-value`)).toBe(`${key}=`); + }); + + it("redacts generic KEY/TOKEN/SECRET/PASSWORD env vars", () => { + expect(redact("API_KEY=secret123")).toBe("API_KEY="); + expect(redact("MY_TOKEN=tok_abc")).toBe("MY_TOKEN="); + expect(redact("DB_PASSWORD=hunter2")).toBe("DB_PASSWORD="); + expect(redact("MY_SECRET=s3cret")).toBe("MY_SECRET="); + expect(redact("CREDENTIAL=cred")).toBe("CREDENTIAL="); + }); + + it("redacts nvapi- prefixed keys", () => { + expect(redact("using key nvapi-AbCdEfGhIj1234")).toBe("using key "); + }); + + it("redacts classic GitHub personal access tokens (ghp_)", () => { + expect(redact("token: ghp_" + "a".repeat(36))).toBe("token: "); + }); + + it("redacts fine-grained GitHub personal access tokens (github_pat_)", () => { + expect(redact("token: github_pat_" + "A".repeat(40))).toBe("token: "); + }); + + it("redacts Bearer tokens", () => { + expect(redact("Authorization: Bearer eyJhbGciOiJIUzI1NiJ9.payload.sig")).toBe( + "Authorization: Bearer ", + ); + }); + + it("handles multiple patterns in one string", () => { + const input = "API_KEY=secret nvapi-abcdefghijk Bearer tok123"; + const result = redact(input); + expect(result).not.toContain("secret"); + expect(result).not.toContain("nvapi-abcdefghijk"); + expect(result).not.toContain("tok123"); + }); + + it("leaves clean text unchanged", () => { + const clean = "Hello world, no secrets here"; + expect(redact(clean)).toBe(clean); + }); +}); diff --git a/src/lib/debug.ts b/src/lib/debug.ts new file mode 100644 index 000000000..2ce3bd480 --- /dev/null +++ b/src/lib/debug.ts @@ -0,0 +1,487 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { execFileSync, spawnSync } from "node:child_process"; +import { existsSync, mkdtempSync, rmSync, unlinkSync, writeFileSync } from "node:fs"; +import { platform, tmpdir } from "node:os"; +import { basename, dirname, join } from "node:path"; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface DebugOptions { + /** Target sandbox name (auto-detected if omitted). */ + sandboxName?: string; + /** Only collect minimal diagnostics. */ + quick?: boolean; + /** Write a tarball to this path. */ + output?: string; +} + +// --------------------------------------------------------------------------- +// Colour helpers — respect NO_COLOR +// --------------------------------------------------------------------------- + +const useColor = !process.env.NO_COLOR && process.stdout.isTTY; +const GREEN = useColor ? "\x1b[0;32m" : ""; +const YELLOW = useColor ? "\x1b[1;33m" : ""; +const CYAN = useColor ? "\x1b[0;36m" : ""; +const NC = useColor ? "\x1b[0m" : ""; + +function info(msg: string): void { + console.log(`${GREEN}[debug]${NC} ${msg}`); +} + +function warn(msg: string): void { + console.log(`${YELLOW}[debug]${NC} ${msg}`); +} + +function section(title: string): void { + console.log(`\n${CYAN}═══ ${title} ═══${NC}\n`); +} + +// --------------------------------------------------------------------------- +// Secret redaction +// --------------------------------------------------------------------------- + +const REDACT_PATTERNS: [RegExp, string][] = [ + [/(NVIDIA_API_KEY|API_KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|_KEY)=\S+/gi, "$1="], + [/nvapi-[A-Za-z0-9_-]{10,}/g, ""], + [/(?:ghp_|github_pat_)[A-Za-z0-9_]{30,}/g, ""], + [/(Bearer )\S+/gi, "$1"], +]; + +export function redact(text: string): string { + let result = text; + for (const [pattern, replacement] of REDACT_PATTERNS) { + result = result.replace(pattern, replacement); + } + return result; +} + +// --------------------------------------------------------------------------- +// Command runner +// --------------------------------------------------------------------------- + +const isMacOS = platform() === "darwin"; +const TIMEOUT_MS = 30_000; + +function commandExists(cmd: string): boolean { + try { + // Use sh -c with the command as a separate argument to avoid shell injection. + // While cmd values are hardcoded internally, this is defensive. + execFileSync("sh", ["-c", `command -v "$1"`, "--", cmd], { + stdio: ["ignore", "ignore", "ignore"], + }); + return true; + } catch { + return false; + } +} + +function collect(collectDir: string, label: string, command: string, args: string[]): void { + const filename = label.replace(/[ /]/g, (c) => (c === " " ? "_" : "-")); + const outfile = join(collectDir, `${filename}.txt`); + + if (!commandExists(command)) { + const msg = ` (${command} not found, skipping)`; + console.log(msg); + writeFileSync(outfile, msg + "\n"); + return; + } + + const result = spawnSync(command, args, { + timeout: TIMEOUT_MS, + stdio: ["ignore", "pipe", "pipe"], + encoding: "utf-8", + }); + + const raw = (result.stdout ?? "") + "\n" + (result.stderr ?? ""); + const redacted = redact(raw); + writeFileSync(outfile, redacted); + console.log(redacted.trimEnd()); + + if (result.status !== 0) { + console.log(" (command exited with non-zero status)"); + } +} + +/** Run a shell one-liner via `sh -c`. */ +function collectShell(collectDir: string, label: string, shellCmd: string): void { + const filename = label.replace(/[ /]/g, (c) => (c === " " ? "_" : "-")); + const outfile = join(collectDir, `${filename}.txt`); + + const result = spawnSync("sh", ["-c", shellCmd], { + timeout: TIMEOUT_MS, + stdio: ["ignore", "pipe", "pipe"], + encoding: "utf-8", + }); + + const raw = (result.stdout ?? "") + "\n" + (result.stderr ?? ""); + const redacted = redact(raw); + writeFileSync(outfile, redacted); + console.log(redacted.trimEnd()); + + if (result.status !== 0) { + console.log(" (command exited with non-zero status)"); + } +} + +// --------------------------------------------------------------------------- +// Auto-detect sandbox name +// --------------------------------------------------------------------------- + +function detectSandboxName(): string { + if (!commandExists("openshell")) return "default"; + try { + const output = execFileSync("openshell", ["sandbox", "list"], { + encoding: "utf-8", + timeout: 10_000, + stdio: ["ignore", "pipe", "ignore"], + }); + const lines = output.split("\n").filter((l) => l.trim().length > 0); + for (const line of lines) { + const first = line.trim().split(/\s+/)[0]; + if (first && first.toLowerCase() !== "name") return first; + } + } catch { + /* ignore */ + } + return "default"; +} + +// --------------------------------------------------------------------------- +// Diagnostic sections +// --------------------------------------------------------------------------- + +function collectSystem(collectDir: string, quick: boolean): void { + section("System"); + collect(collectDir, "date", "date", []); + collect(collectDir, "uname", "uname", ["-a"]); + collect(collectDir, "uptime", "uptime", []); + + if (isMacOS) { + collectShell( + collectDir, + "memory", + 'echo "Physical: $(($(sysctl -n hw.memsize) / 1048576)) MB"; vm_stat', + ); + } else { + collect(collectDir, "free", "free", ["-m"]); + } + + if (!quick) { + collect(collectDir, "df", "df", ["-h"]); + } +} + +function collectProcesses(collectDir: string, quick: boolean): void { + section("Processes"); + if (isMacOS) { + collectShell( + collectDir, + "ps-cpu", + "ps -eo pid,ppid,comm,%mem,%cpu | sort -k5 -rn | head -30", + ); + } else { + collectShell( + collectDir, + "ps-cpu", + "ps -eo pid,ppid,cmd,%mem,%cpu --sort=-%cpu | head -30", + ); + } + + if (!quick) { + if (isMacOS) { + collectShell( + collectDir, + "ps-mem", + "ps -eo pid,ppid,comm,%mem,%cpu | sort -k4 -rn | head -30", + ); + collectShell(collectDir, "top", "top -l 1 | head -50"); + } else { + collectShell( + collectDir, + "ps-mem", + "ps -eo pid,ppid,cmd,%mem,%cpu --sort=-%mem | head -30", + ); + collectShell(collectDir, "top", "top -b -n 1 | head -50"); + } + } +} + +function collectGpu(collectDir: string, quick: boolean): void { + section("GPU"); + collect(collectDir, "nvidia-smi", "nvidia-smi", []); + + if (!quick) { + collect(collectDir, "nvidia-smi-dmon", "nvidia-smi", [ + "dmon", + "-s", + "pucvmet", + "-c", + "10", + ]); + collect(collectDir, "nvidia-smi-query", "nvidia-smi", [ + "--query-gpu=name,utilization.gpu,utilization.memory,memory.total,memory.used,temperature.gpu,power.draw", + "--format=csv", + ]); + } +} + +function collectDocker(collectDir: string, quick: boolean): void { + section("Docker"); + collect(collectDir, "docker-ps", "docker", ["ps", "-a"]); + collect(collectDir, "docker-stats", "docker", ["stats", "--no-stream"]); + + if (!quick) { + collect(collectDir, "docker-info", "docker", ["info"]); + collect(collectDir, "docker-df", "docker", ["system", "df"]); + } + + // NemoClaw-labelled containers + if (commandExists("docker")) { + try { + const output = execFileSync( + "docker", + ["ps", "-a", "--filter", "label=com.nvidia.nemoclaw", "--format", "{{.Names}}"], + { encoding: "utf-8", timeout: TIMEOUT_MS, stdio: ["ignore", "pipe", "ignore"] }, + ); + const containers = output.split("\n").filter((c) => c.trim().length > 0); + for (const cid of containers) { + collect(collectDir, `docker-logs-${cid}`, "docker", ["logs", "--tail", "200", cid]); + if (!quick) { + collect(collectDir, `docker-inspect-${cid}`, "docker", ["inspect", cid]); + } + } + } catch { + /* docker not available or timed out */ + } + } +} + +function collectOpenshell( + collectDir: string, + sandboxName: string, + quick: boolean, +): void { + section("OpenShell"); + collect(collectDir, "openshell-status", "openshell", ["status"]); + collect(collectDir, "openshell-sandbox-list", "openshell", ["sandbox", "list"]); + collect(collectDir, "openshell-sandbox-get", "openshell", ["sandbox", "get", sandboxName]); + collect(collectDir, "openshell-logs", "openshell", ["logs", sandboxName]); + + if (!quick) { + collect(collectDir, "openshell-gateway-info", "openshell", ["gateway", "info"]); + } +} + +function collectSandboxInternals( + collectDir: string, + sandboxName: string, + quick: boolean, +): void { + if (!commandExists("openshell")) return; + + // Check if sandbox exists + try { + const output = execFileSync("openshell", ["sandbox", "list"], { + encoding: "utf-8", + timeout: 10_000, + stdio: ["ignore", "pipe", "ignore"], + }); + const names = output + .split("\n") + .map((l) => l.trim().split(/\s+/)[0]) + .filter((n) => n && n.toLowerCase() !== "name"); + if (!names.includes(sandboxName)) return; + } catch { + return; + } + + section("Sandbox Internals"); + + // Generate temporary SSH config + const sshConfigPath = join(tmpdir(), `nemoclaw-ssh-${String(Date.now())}`); + try { + const sshResult = spawnSync("openshell", ["sandbox", "ssh-config", sandboxName], { + timeout: TIMEOUT_MS, + stdio: ["ignore", "pipe", "ignore"], + encoding: "utf-8", + }); + if (sshResult.status !== 0) { + warn(`Could not generate SSH config for sandbox '${sandboxName}', skipping internals`); + return; + } + writeFileSync(sshConfigPath, sshResult.stdout ?? ""); + + const sshHost = `openshell-${sandboxName}`; + const sshBase = [ + "-F", + sshConfigPath, + "-o", + "StrictHostKeyChecking=no", + "-o", + "ConnectTimeout=10", + sshHost, + ]; + + // Use collect() with array args — no shell interpolation of sandboxName + collect(collectDir, "sandbox-ps", "ssh", [...sshBase, "ps", "-ef"]); + collect(collectDir, "sandbox-free", "ssh", [...sshBase, "free", "-m"]); + if (!quick) { + collect(collectDir, "sandbox-top", "ssh", [ + ...sshBase, + "top", + "-b", + "-n", + "1", + ]); + collect(collectDir, "sandbox-gateway-log", "ssh", [ + ...sshBase, + "tail", + "-200", + "/tmp/gateway.log", + ]); + } + } finally { + if (existsSync(sshConfigPath)) { + unlinkSync(sshConfigPath); + } + } +} + +function collectNetwork(collectDir: string): void { + section("Network"); + if (isMacOS) { + collectShell(collectDir, "listening", "netstat -anp tcp | grep LISTEN"); + collect(collectDir, "ifconfig", "ifconfig", []); + collect(collectDir, "routes", "netstat", ["-rn"]); + collect(collectDir, "dns-config", "scutil", ["--dns"]); + } else { + collect(collectDir, "ss", "ss", ["-ltnp"]); + collect(collectDir, "ip-addr", "ip", ["addr"]); + collect(collectDir, "ip-route", "ip", ["route"]); + collectShell(collectDir, "resolv-conf", "cat /etc/resolv.conf"); + } + collect(collectDir, "nslookup", "nslookup", ["integrate.api.nvidia.com"]); + collectShell( + collectDir, + "curl-models", + 'code=$(curl -s -o /dev/null -w "%{http_code}" https://integrate.api.nvidia.com/v1/models); echo "HTTP $code"; if [ "$code" -ge 200 ] && [ "$code" -lt 500 ]; then echo "NIM API reachable"; else echo "NIM API unreachable"; exit 1; fi', + ); + collectShell(collectDir, "lsof-net", "lsof -i -P -n 2>/dev/null | head -50"); + collect(collectDir, "lsof-18789", "lsof", ["-i", ":18789"]); +} + +function collectOnboardSession(collectDir: string, repoDir: string): void { + section("Onboard Session"); + const helperPath = join(repoDir, "bin", "lib", "onboard-session.js"); + if (!existsSync(helperPath) || !commandExists("node")) { + console.log(" (onboard session helper not available, skipping)"); + return; + } + + const script = [ + "const helper = require(process.argv[1]);", + "const summary = helper.summarizeForDebug();", + "if (!summary) { process.stdout.write('No onboard session state found.\\n'); process.exit(0); }", + "process.stdout.write(JSON.stringify(summary, null, 2) + '\\n');", + ].join(" "); + + collect(collectDir, "onboard-session-summary", "node", ["-e", script, helperPath]); +} + +function collectKernel(collectDir: string): void { + section("Kernel / IO"); + if (isMacOS) { + collect(collectDir, "vmstat", "vm_stat", []); + collect(collectDir, "iostat", "iostat", ["-c", "5", "-w", "1"]); + } else { + collect(collectDir, "vmstat", "vmstat", ["1", "5"]); + collect(collectDir, "iostat", "iostat", ["-xz", "1", "5"]); + } +} + +function collectKernelMessages(collectDir: string): void { + section("Kernel Messages"); + if (isMacOS) { + collectShell( + collectDir, + "system-log", + 'log show --last 5m --predicate "eventType == logEvent" --style compact 2>/dev/null | tail -100', + ); + } else { + collectShell(collectDir, "dmesg", "dmesg | tail -100"); + } +} + +// --------------------------------------------------------------------------- +// Tarball +// --------------------------------------------------------------------------- + +function createTarball(collectDir: string, output: string): void { + spawnSync("tar", ["czf", output, "-C", dirname(collectDir), basename(collectDir)], { + stdio: "inherit", + timeout: 60_000, + }); + info(`Tarball written to ${output}`); + warn( + "Known secrets are auto-redacted, but please review for any remaining sensitive data before sharing.", + ); + info("Attach this file to your GitHub issue."); +} + +// --------------------------------------------------------------------------- +// Main entry point +// --------------------------------------------------------------------------- + +export function runDebug(opts: DebugOptions = {}): void { + const quick = opts.quick ?? false; + const output = opts.output ?? ""; + // Compiled location: dist/lib/debug.js → repo root is 2 levels up + const repoDir = join(__dirname, "..", ".."); + + // Resolve sandbox name + let sandboxName = + opts.sandboxName ?? process.env.NEMOCLAW_SANDBOX ?? process.env.SANDBOX_NAME ?? ""; + if (!sandboxName) { + sandboxName = detectSandboxName(); + } + + // Create temp collection directory + const collectDir = mkdtempSync(join(tmpdir(), "nemoclaw-debug-")); + + try { + info(`Collecting diagnostics for sandbox '${sandboxName}'...`); + info(`Quick mode: ${String(quick)}`); + if (output) info(`Tarball output: ${output}`); + console.log(""); + + collectSystem(collectDir, quick); + collectProcesses(collectDir, quick); + collectGpu(collectDir, quick); + collectDocker(collectDir, quick); + collectOpenshell(collectDir, sandboxName, quick); + collectOnboardSession(collectDir, repoDir); + collectSandboxInternals(collectDir, sandboxName, quick); + + if (!quick) { + collectNetwork(collectDir); + collectKernel(collectDir); + } + + collectKernelMessages(collectDir); + + if (output) { + createTarball(collectDir, output); + } + + console.log(""); + info("Done. If filing a bug, run with --output and attach the tarball to your issue:"); + info(" nemoclaw debug --output /tmp/nemoclaw-debug.tar.gz"); + } finally { + rmSync(collectDir, { recursive: true, force: true }); + } +}