diff --git a/scripts/brev-setup.sh b/scripts/brev-setup.sh index 1bf5d9de4..c2a6a7c12 100755 --- a/scripts/brev-setup.sh +++ b/scripts/brev-setup.sh @@ -37,6 +37,64 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" export NEEDRESTART_MODE=a export DEBIAN_FRONTEND=noninteractive +# ── GPU detection ───────────────────────────────────────────────── +# Brev GPU images ship the nvidia-smi binary even on CPU-only instances. +# We need nvidia-smi to actually *run* (driver loaded) to treat this as +# a GPU host. Export a flag so every later section uses the same check. +if command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi >/dev/null 2>&1; then + HAS_GPU=true +else + HAS_GPU=false +fi +info "GPU detected: $HAS_GPU" + +# ── Acquire exclusive apt access ────────────────────────────────── +# Cloud VMs run multiple apt processes on boot: cloud-init, apt-daily, +# and unattended-upgrades. We must wait for cloud-init AND disable the +# background services before we can safely use apt ourselves. +APT_WAIT_MAX=300 # seconds — fail if apt is still busy after 5 min +APT_QUIET_WINDOW=5 # seconds — apt must be idle this long before we proceed + +if command -v cloud-init >/dev/null 2>&1; then + info "Waiting for cloud-init to finish..." + if ! cloud-init status --wait >/dev/null 2>&1; then + warn "cloud-init exited with an error — proceeding, but the VM may be in an inconsistent state" + fi + info "cloud-init done" +fi +# Stop background apt services that run independently of cloud-init +info "Disabling background apt services..." +sudo systemctl stop apt-daily.timer apt-daily-upgrade.timer unattended-upgrades 2>/dev/null || true +sudo systemctl disable apt-daily.timer apt-daily-upgrade.timer 2>/dev/null || true +# Kill any straggler apt/dpkg processes +sudo pkill -9 -x apt-get 2>/dev/null || true +sudo pkill -9 -x apt 2>/dev/null || true +sudo pkill -9 -x dpkg 2>/dev/null || true +# Release any stale locks left by killed processes +sudo rm -f /var/lib/dpkg/lock-frontend /var/lib/apt/lists/lock /var/cache/apt/archives/lock 2>/dev/null || true +sudo dpkg --configure -a 2>/dev/null || true + +# Wait until apt/dpkg are truly idle for $APT_QUIET_WINDOW consecutive seconds. +apt_waited=0 +apt_quiet=0 +while [ "$apt_quiet" -lt "$APT_QUIET_WINDOW" ]; do + if [ "$apt_waited" -ge "$APT_WAIT_MAX" ]; then + fail "apt/dpkg still busy after ${APT_WAIT_MAX}s — cannot proceed" + fi + if pgrep -Ex "apt-get|apt|dpkg" >/dev/null 2>&1 \ + || fuser /var/lib/dpkg/lock-frontend /var/lib/apt/lists/lock /var/cache/apt/archives/lock 2>/dev/null; then + info "Waiting for apt/dpkg processes to finish... (${apt_waited}s elapsed)" + apt_quiet=0 + sleep 2 + apt_waited=$((apt_waited + 2)) + else + apt_quiet=$((apt_quiet + 1)) + sleep 1 + apt_waited=$((apt_waited + 1)) + fi +done +info "apt is now exclusively ours" + # --- 0. Node.js (needed for services) --- if ! command -v node >/dev/null 2>&1; then info "Installing Node.js..." @@ -55,9 +113,16 @@ if ! command -v node >/dev/null 2>&1; then else fail "No SHA-256 verification tool found (need sha256sum or shasum)" fi - sudo -E bash "$tmpdir/setup_node.sh" >/dev/null 2>&1 + sudo -E bash "$tmpdir/setup_node.sh" ) - sudo apt-get install -y -qq nodejs >/dev/null 2>&1 + for attempt in 1 2 3 4 5; do + if sudo apt-get install -y -qq nodejs; then + break + fi + [ "$attempt" -eq 5 ] && fail "Node.js install failed after 5 attempts" + info "Node.js install failed (attempt $attempt/5), retrying in 30s..." + sleep 30 + done info "Node.js $(node --version) installed" else info "Node.js already installed: $(node --version)" @@ -66,8 +131,14 @@ fi # --- 1. Docker --- if ! command -v docker >/dev/null 2>&1; then info "Installing Docker..." - sudo apt-get update -qq >/dev/null 2>&1 - sudo apt-get install -y -qq docker.io >/dev/null 2>&1 + for attempt in 1 2 3 4 5; do + if sudo apt-get update -qq && sudo apt-get install -y -qq docker.io; then + break + fi + [ "$attempt" -eq 5 ] && fail "Docker install failed after 5 attempts" + info "Docker install failed (attempt $attempt/5), retrying in 30s..." + sleep 30 + done sudo usermod -aG docker "$(whoami)" info "Docker installed" else @@ -75,7 +146,7 @@ else fi # --- 2. NVIDIA Container Toolkit (if GPU present) --- -if command -v nvidia-smi >/dev/null 2>&1; then +if [ "$HAS_GPU" = true ]; then if ! dpkg -s nvidia-container-toolkit >/dev/null 2>&1; then info "Installing NVIDIA Container Toolkit..." curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \ @@ -91,6 +162,22 @@ if command -v nvidia-smi >/dev/null 2>&1; then else info "NVIDIA Container Toolkit already installed" fi +else + # CPU-only instance: ensure Docker uses runc (not nvidia) as default runtime. + # Brev GPU images pre-configure nvidia as the default Docker runtime even on + # CPU instances, causing "nvidia-container-cli: nvml error: driver not loaded" + # when starting containers. + if grep -q '"default-runtime".*nvidia' /etc/docker/daemon.json 2>/dev/null; then + info "Resetting Docker default runtime to runc (no GPU detected)..." + sudo python3 -c " +import json +with open('/etc/docker/daemon.json') as f: cfg = json.load(f) +cfg.pop('default-runtime', None) +with open('/etc/docker/daemon.json', 'w') as f: json.dump(cfg, f, indent=2) +" + sudo systemctl restart docker + info "Docker runtime reset to runc" + fi fi # --- 3. openshell CLI (binary release, not pip) --- @@ -139,7 +226,7 @@ fi VLLM_MODEL="nvidia/nemotron-3-nano-30b-a3b" if [ "${SKIP_VLLM:-}" = "1" ]; then info "Skipping vLLM install (SKIP_VLLM=1)" -elif command -v nvidia-smi >/dev/null 2>&1; then +elif [ "$HAS_GPU" = true ]; then if ! python3 -c "import vllm" 2>/dev/null; then info "Installing vLLM..." if ! command -v pip3 >/dev/null 2>&1; then diff --git a/scripts/setup.sh b/scripts/setup.sh index 4ba7f13dd..b503fb09c 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -112,7 +112,8 @@ info "Starting OpenShell gateway..." openshell gateway destroy -g nemoclaw >/dev/null 2>&1 || true docker volume ls -q --filter "name=openshell-cluster-nemoclaw" | grep . && docker volume ls -q --filter "name=openshell-cluster-nemoclaw" | xargs docker volume rm || true GATEWAY_ARGS=(--name nemoclaw) -command -v nvidia-smi >/dev/null 2>&1 && GATEWAY_ARGS+=(--gpu) +# Only enable GPU if nvidia-smi actually works (driver loaded), not just present on PATH +nvidia-smi >/dev/null 2>&1 && GATEWAY_ARGS+=(--gpu) if ! openshell gateway start "${GATEWAY_ARGS[@]}" 2>&1 | grep -E "Gateway|✓|Error|error"; then warn "Gateway start failed. Cleaning up stale state..." openshell gateway destroy -g nemoclaw >/dev/null 2>&1 || true diff --git a/test/e2e/brev-e2e.test.js b/test/e2e/brev-e2e.test.js index cd298ddd7..8f251f67e 100644 --- a/test/e2e/brev-e2e.test.js +++ b/test/e2e/brev-e2e.test.js @@ -17,8 +17,7 @@ * * Optional env vars: * TEST_SUITE — which test to run: full (default), credential-sanitization, telegram-injection, all - * BREV_MIN_VCPU — Minimum vCPUs for CPU instance (default: 4) - * BREV_MIN_RAM — Minimum RAM in GB for CPU instance (default: 16) + * BREV_INSTANCE_TYPE — Brev/GCP instance type (default: n2-standard-4) */ import { describe, it, expect, beforeAll, afterAll } from "vitest"; @@ -28,8 +27,8 @@ import { homedir } from "node:os"; import path from "node:path"; // CPU instance specs: min vCPUs and RAM for the instance search -const BREV_MIN_VCPU = parseInt(process.env.BREV_MIN_VCPU || "4", 10); -const BREV_MIN_RAM = parseInt(process.env.BREV_MIN_RAM || "16", 10); +// Use a known CPU-only GCP instance type to avoid GPU images with broken nvidia runtime +const BREV_INSTANCE_TYPE = process.env.BREV_INSTANCE_TYPE || "n2-standard-4"; const INSTANCE_NAME = process.env.INSTANCE_NAME; const TEST_SUITE = process.env.TEST_SUITE || "full"; const REPO_DIR = path.resolve(import.meta.dirname, "../.."); @@ -58,19 +57,15 @@ function ssh(cmd, { timeout = 120_000, stream = false } = {}) { return stream ? "" : result.trim(); } -/** - * Escape a value for safe inclusion in a single-quoted shell string. - * Replaces single quotes with the shell-safe sequence: '\'' - */ -function shellEscape(value) { - return String(value).replace(/'/g, "'\\''"); +function shellQuote(value) { + return `'${String(value).replace(/'/g, "'\\''")}'`; } /** Run a command on the remote VM with env vars set for NemoClaw. */ function sshEnv(cmd, { timeout = 600_000, stream = false } = {}) { const envPrefix = [ - `export NVIDIA_API_KEY='${shellEscape(process.env.NVIDIA_API_KEY)}'`, - `export GITHUB_TOKEN='${shellEscape(process.env.GITHUB_TOKEN)}'`, + `export NVIDIA_API_KEY=${shellQuote(process.env.NVIDIA_API_KEY)}`, + `export GITHUB_TOKEN=${shellQuote(process.env.GITHUB_TOKEN)}`, `export NEMOCLAW_NON_INTERACTIVE=1`, `export NEMOCLAW_SANDBOX_NAME=e2e-test`, ].join(" && "); @@ -131,14 +126,21 @@ describe.runIf(hasRequiredVars)("Brev E2E", () => { ); brev("login", "--token", process.env.BREV_API_TOKEN); - // Create bare CPU instance via brev search cpu | brev create - console.log(`[${elapsed()}] Creating CPU instance via brev search cpu | brev create...`); - console.log(`[${elapsed()}] min-vcpu: ${BREV_MIN_VCPU}, min-ram: ${BREV_MIN_RAM}GB`); - execSync( - `brev search cpu --min-vcpu ${BREV_MIN_VCPU} --min-ram ${BREV_MIN_RAM} --sort price | ` + - `brev create ${INSTANCE_NAME} --detached`, - { encoding: "utf-8", timeout: 180_000, stdio: ["pipe", "inherit", "inherit"] }, - ); + // Delete any leftover instance from a previous failed run + try { + brev("delete", INSTANCE_NAME); + console.log(`[${elapsed()}] Deleted leftover instance "${INSTANCE_NAME}"`); + } catch { + // Expected — no leftover instance + } + + // Create CPU instance with a known GCP instance type + console.log(`[${elapsed()}] Creating CPU instance (type: ${BREV_INSTANCE_TYPE})...`); + execSync(`brev create --type ${BREV_INSTANCE_TYPE} ${INSTANCE_NAME} --detached`, { + encoding: "utf-8", + timeout: 180_000, + stdio: ["pipe", "inherit", "inherit"], + }); instanceCreated = true; console.log(`[${elapsed()}] brev create returned (instance provisioning in background)`); @@ -161,6 +163,11 @@ describe.runIf(hasRequiredVars)("Brev E2E", () => { ); console.log(`[${elapsed()}] Code synced`); + // Wait for cloud-init to finish — Brev instances run apt provisioning on boot + console.log(`[${elapsed()}] Waiting for cloud-init to finish...`); + ssh(`cloud-init status --wait 2>/dev/null || true`, { timeout: 600_000, stream: true }); + console.log(`[${elapsed()}] cloud-init done`); + // Bootstrap VM — stream output to CI log so we can see progress console.log(`[${elapsed()}] Running brev-setup.sh (bootstrap)...`); sshEnv(`cd ${remoteDir} && SKIP_VLLM=1 bash scripts/brev-setup.sh`, { diff --git a/test/e2e/test-telegram-injection.sh b/test/e2e/test-telegram-injection.sh index 64ae41efb..0c671b22d 100755 --- a/test/e2e/test-telegram-injection.sh +++ b/test/e2e/test-telegram-injection.sh @@ -450,6 +450,391 @@ else fail "T8b: Message with special characters caused empty/error response" fi +# ══════════════════════════════════════════════════════════════════ +# Phase 7: Real runAgentInSandbox() Invocation +# +# PR #1092 feedback from @cv: The tests above use ad-hoc SSH commands +# that bypass the actual code path in telegram-bridge.js. This phase +# exercises the real runAgentInSandbox() function via Node.js to ensure +# the production code is validated. +# ══════════════════════════════════════════════════════════════════ +section "Phase 7: Real runAgentInSandbox() Code Path" + +info "T9: Testing real runAgentInSandbox() with injection payload..." + +# Create a test script that invokes the actual runAgentInSandbox function +cat >/tmp/test-real-bridge.js <<'TESTSCRIPT' +// Test script to invoke the real runAgentInSandbox() function. +// +// NOTE: On unfixed code, this will fail because: +// 1. runAgentInSandbox is not exported (no require.main guard) +// 2. The function signature doesn't accept an options parameter +// 3. initConfig() runs at require-time and fails without TELEGRAM_BOT_TOKEN +// These failures are expected and prove the tests detect the missing fix. +// After the fix PR is merged, this script will work correctly. +const path = require('path'); + +// Mock resolveOpenshell to use the system openshell +const resolveOpenshellPath = require.resolve(path.join(process.cwd(), 'bin/lib/resolve-openshell')); +require.cache[resolveOpenshellPath] = { + exports: { resolveOpenshell: () => process.env.OPENSHELL_PATH || 'openshell' } +}; + +const bridge = require(path.join(process.cwd(), 'scripts/telegram-bridge.js')); + +async function test() { + const message = process.argv[2] || 'Hello'; + const sessionId = process.argv[3] || 'e2e-test'; + const apiKey = process.env.NVIDIA_API_KEY; + const sandbox = process.env.NEMOCLAW_SANDBOX_NAME || 'e2e-test'; + const openshell = process.env.OPENSHELL_PATH || 'openshell'; + + if (!apiKey) { + console.error('NVIDIA_API_KEY required'); + process.exit(1); + } + + try { + // Call the real runAgentInSandbox with options override for testing + const result = await bridge.runAgentInSandbox(message, sessionId, { + apiKey, + sandbox, + openshell, + }); + console.log('RESULT_START'); + console.log(result); + console.log('RESULT_END'); + } catch (err) { + console.error('ERROR:', err.message); + process.exit(1); + } +} + +test(); +TESTSCRIPT + +# Find openshell path +OPENSHELL_PATH=$(command -v openshell) || OPENSHELL_PATH="" +if [ -z "$OPENSHELL_PATH" ]; then + skip "T9: openshell not found, cannot test real runAgentInSandbox()" +else + # Clean up any previous injection markers + sandbox_exec "rm -f /tmp/injection-proof-t9" >/dev/null 2>&1 + + # Test with injection payload through real runAgentInSandbox + t9_result=$(cd "$REPO" && OPENSHELL_PATH="$OPENSHELL_PATH" \ + NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \ + timeout 120 node /tmp/test-real-bridge.js \ + '$(touch /tmp/injection-proof-t9 && echo INJECTED)' \ + 'e2e-injection-t9' 2>&1) || true + + # Check if injection file was created + injection_check_t9=$(sandbox_exec "test -f /tmp/injection-proof-t9 && echo EXPLOITED || echo SAFE") + if echo "$injection_check_t9" | grep -q "SAFE"; then + pass "T9: Real runAgentInSandbox() prevented \$(command) injection" + else + fail "T9: Real runAgentInSandbox() EXECUTED injection — vulnerability in production code!" + fi +fi + +# T10: Test backticks through real code path +info "T10: Testing real runAgentInSandbox() with backtick payload..." + +if [ -z "$OPENSHELL_PATH" ]; then + skip "T10: openshell not found, cannot test real runAgentInSandbox()" +else + sandbox_exec "rm -f /tmp/injection-proof-t10" >/dev/null 2>&1 + + t10_result=$(cd "$REPO" && OPENSHELL_PATH="$OPENSHELL_PATH" \ + NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \ + timeout 120 node /tmp/test-real-bridge.js \ + '`touch /tmp/injection-proof-t10`' \ + 'e2e-injection-t10' 2>&1) || true + + injection_check_t10=$(sandbox_exec "test -f /tmp/injection-proof-t10 && echo EXPLOITED || echo SAFE") + if echo "$injection_check_t10" | grep -q "SAFE"; then + pass "T10: Real runAgentInSandbox() prevented backtick injection" + else + fail "T10: Real runAgentInSandbox() EXECUTED backtick injection — vulnerability in production code!" + fi +fi + +# T11: Test API key not exposed through real code path +info "T11: Verifying API key not in process arguments (real code path)..." + +if [ -z "$OPENSHELL_PATH" ]; then + skip "T11: openshell not found, cannot test real runAgentInSandbox()" +else + # Start the bridge test in background and check ps + cd "$REPO" && OPENSHELL_PATH="$OPENSHELL_PATH" \ + NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \ + timeout 60 node /tmp/test-real-bridge.js 'Hello world' 'e2e-ps-check' & + BRIDGE_PID=$! + + # Poll the process list multiple times to catch short-lived SSH invocations + API_KEY_PREFIX="${NVIDIA_API_KEY:0:15}" + KEY_LEAKED=false + for _poll in $(seq 1 10); do + sleep 1 + # shellcheck disable=SC2009 # pgrep doesn't support the output format we need + ps_output=$(ps aux 2>/dev/null | grep -v grep | grep -v "test-telegram-injection" || true) + if echo "$ps_output" | grep -qF "$API_KEY_PREFIX"; then + KEY_LEAKED=true + break + fi + done + + if [ "$KEY_LEAKED" = true ]; then + fail "T11: API key found in process arguments via real runAgentInSandbox()" + else + pass "T11: API key NOT visible in process arguments (real code path)" + fi + + # Clean up background process + kill $BRIDGE_PID 2>/dev/null || true + wait $BRIDGE_PID 2>/dev/null || true +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 8: Multi-line Message Handling +# ══════════════════════════════════════════════════════════════════ +section "Phase 8: Multi-line Message Handling" + +# T12: Multi-line message with injection on second line +info "T12: Testing multi-line message with injection payload..." + +if [ -z "$OPENSHELL_PATH" ]; then + skip "T12: openshell not found, cannot test multi-line messages" +else + sandbox_exec "rm -f /tmp/injection-proof-t12" >/dev/null 2>&1 + + # Multi-line message with injection attempt on line 2 + MULTILINE_PAYLOAD=$'Hello\n$(touch /tmp/injection-proof-t12)\nWorld' + + t12_result=$(cd "$REPO" && OPENSHELL_PATH="$OPENSHELL_PATH" \ + NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \ + timeout 120 node /tmp/test-real-bridge.js \ + "$MULTILINE_PAYLOAD" \ + 'e2e-multiline-t12' 2>&1) || true + + injection_check_t12=$(sandbox_exec "test -f /tmp/injection-proof-t12 && echo EXPLOITED || echo SAFE") + if echo "$injection_check_t12" | grep -q "SAFE"; then + pass "T12: Multi-line message injection was NOT executed" + else + fail "T12: Multi-line message injection was EXECUTED — vulnerability!" + fi +fi + +# T13: Message with embedded newlines and backticks +info "T13: Testing message with newlines and backticks..." + +ssh_config_t13="$(mktemp)" +openshell sandbox ssh-config "$SANDBOX_NAME" >"$ssh_config_t13" 2>/dev/null +sandbox_exec "rm -f /tmp/injection-proof-t13" >/dev/null 2>&1 + +PAYLOAD_MULTILINE_BT=$'First line\n`touch /tmp/injection-proof-t13`\nThird line' + +timeout 30 ssh -F "$ssh_config_t13" \ + -o StrictHostKeyChecking=no \ + -o UserKnownHostsFile=/dev/null \ + -o LogLevel=ERROR \ + "openshell-${SANDBOX_NAME}" \ + 'MSG=$(cat) && echo "$MSG"' \ + <<<"$PAYLOAD_MULTILINE_BT" >/dev/null 2>&1 || true +rm -f "$ssh_config_t13" + +injection_check_t13=$(sandbox_exec "test -f /tmp/injection-proof-t13 && echo EXPLOITED || echo SAFE") +if echo "$injection_check_t13" | grep -q "SAFE"; then + pass "T13: Multi-line backtick injection was NOT executed" +else + fail "T13: Multi-line backtick injection was EXECUTED" +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 9: Session ID Option Injection Prevention +# ══════════════════════════════════════════════════════════════════ +section "Phase 9: Session ID Option Injection" + +# T14: Leading hyphen session IDs should be sanitized +info "T14: Testing sanitizeSessionId strips leading hyphens..." + +t14_result=$(cd "$REPO" && node -e " + const path = require('path'); + // Mock resolveOpenshell to avoid PATH dependency + const resolveOpenshellPath = require.resolve('./bin/lib/resolve-openshell'); + require.cache[resolveOpenshellPath] = { + exports: { resolveOpenshell: () => '/mock/openshell' } + }; + const bridge = require(path.join(process.cwd(), 'scripts/telegram-bridge.js')); + + // Test cases for option injection prevention + const tests = [ + { input: '--help', expected: 'help' }, + { input: '-v', expected: 'v' }, + { input: '---test', expected: 'test' }, + { input: '-123456789', expected: '123456789' }, + { input: '--', expected: null }, + { input: '-', expected: null }, + { input: 'valid-id', expected: 'valid-id' }, + { input: '-abc-123', expected: 'abc-123' }, + ]; + + let passed = 0; + let failed = 0; + for (const t of tests) { + const result = bridge.sanitizeSessionId(t.input); + if (result === t.expected) { + passed++; + } else { + console.log('FAIL: sanitizeSessionId(' + JSON.stringify(t.input) + ') = ' + JSON.stringify(result) + ', expected ' + JSON.stringify(t.expected)); + failed++; + } + } + console.log('SANITIZE_RESULT: ' + passed + ' passed, ' + failed + ' failed'); + process.exit(failed > 0 ? 1 : 0); +" 2>&1) + +if echo "$t14_result" | grep -q "SANITIZE_RESULT:.*0 failed"; then + pass "T14: sanitizeSessionId correctly strips leading hyphens" +else + fail "T14: sanitizeSessionId failed to strip leading hyphens: $t14_result" +fi + +# T15: Verify leading hyphen session ID doesn't cause option injection in real call +info "T15: Testing real runAgentInSandbox with leading-hyphen session ID..." + +if [ -z "$OPENSHELL_PATH" ]; then + skip "T15: openshell not found, cannot test session ID sanitization" +else + # Create a fresh test script that checks the actual session ID used + cat >/tmp/test-session-id.js <<'TESTSCRIPT' +const path = require('path'); +const resolveOpenshellPath = require.resolve(path.join(process.cwd(), 'bin/lib/resolve-openshell')); +require.cache[resolveOpenshellPath] = { + exports: { resolveOpenshell: () => process.env.OPENSHELL_PATH || 'openshell' } +}; +const bridge = require(path.join(process.cwd(), 'scripts/telegram-bridge.js')); + +// Just test the sanitization with a negative chat ID (like Telegram groups) +const sessionId = process.argv[2] || '--help'; +const sanitized = bridge.sanitizeSessionId(sessionId); +console.log('INPUT:', sessionId); +console.log('SANITIZED:', sanitized); +if (sanitized && sanitized.startsWith('-')) { + console.log('ERROR: Sanitized ID starts with hyphen — option injection possible!'); + process.exit(1); +} else { + console.log('OK: No leading hyphen in sanitized ID'); + process.exit(0); +} +TESTSCRIPT + + t15_result=$(cd "$REPO" && OPENSHELL_PATH="$OPENSHELL_PATH" \ + node /tmp/test-session-id.js '--help' 2>&1) || true + + if echo "$t15_result" | grep -q "OK: No leading hyphen"; then + pass "T15: Session ID '--help' sanitized to prevent option injection" + else + fail "T15: Session ID sanitization failed: $t15_result" + fi + + # Also test with negative number (Telegram group chat ID) + t15b_result=$(cd "$REPO" && OPENSHELL_PATH="$OPENSHELL_PATH" \ + node /tmp/test-session-id.js '-123456789' 2>&1) || true + + if echo "$t15b_result" | grep -q "OK: No leading hyphen"; then + pass "T15b: Negative chat ID '-123456789' sanitized correctly" + else + fail "T15b: Negative chat ID sanitization failed: $t15b_result" + fi + + rm -f /tmp/test-session-id.js +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 10: Empty Message Handling +# ══════════════════════════════════════════════════════════════════ +section "Phase 10: Empty and Edge Case Messages" + +# T16: Empty session ID should return error +info "T16: Testing empty session ID handling..." + +t16_result=$(cd "$REPO" && node -e " + const path = require('path'); + const resolveOpenshellPath = require.resolve('./bin/lib/resolve-openshell'); + require.cache[resolveOpenshellPath] = { + exports: { resolveOpenshell: () => '/mock/openshell' } + }; + const bridge = require(path.join(process.cwd(), 'scripts/telegram-bridge.js')); + + const result = bridge.sanitizeSessionId(''); + if (result === null) { + console.log('OK: Empty session ID returns null'); + process.exit(0); + } else { + console.log('FAIL: Empty session ID returned:', result); + process.exit(1); + } +" 2>&1) + +if echo "$t16_result" | grep -q "OK:"; then + pass "T16: Empty session ID correctly returns null" +else + fail "T16: Empty session ID handling failed: $t16_result" +fi + +# T17: Session ID with only special characters +info "T17: Testing session ID with only special characters..." + +t17_result=$(cd "$REPO" && node -e " + const path = require('path'); + const resolveOpenshellPath = require.resolve('./bin/lib/resolve-openshell'); + require.cache[resolveOpenshellPath] = { + exports: { resolveOpenshell: () => '/mock/openshell' } + }; + const bridge = require(path.join(process.cwd(), 'scripts/telegram-bridge.js')); + + const testCases = [ + { input: ';;;', expected: null }, + { input: '\$()', expected: null }, + { input: '---', expected: null }, + { input: '!@#\$%', expected: null }, + ]; + + let ok = true; + for (const tc of testCases) { + const result = bridge.sanitizeSessionId(tc.input); + if (result !== tc.expected) { + console.log('FAIL:', JSON.stringify(tc.input), '→', result, 'expected', tc.expected); + ok = false; + } + } + if (ok) { + console.log('OK: All special-character session IDs return null'); + } + process.exit(ok ? 0 : 1); +" 2>&1) + +if echo "$t17_result" | grep -q "OK:"; then + pass "T17: Special-character-only session IDs correctly return null" +else + fail "T17: Special character handling failed: $t17_result" +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 11: Cleanup +# ══════════════════════════════════════════════════════════════════ +section "Phase 11: Cleanup" + +info "Cleaning up injection marker files in sandbox..." +sandbox_exec "rm -f /tmp/injection-proof-t1 /tmp/injection-proof-t2 /tmp/injection-proof-t3 \ + /tmp/injection-proof-t9 /tmp/injection-proof-t10 /tmp/injection-proof-t12 /tmp/injection-proof-t13" >/dev/null 2>&1 + +info "Cleaning up local temp files..." +rm -f /tmp/test-real-bridge.js /tmp/test-session-id.js 2>/dev/null || true + +pass "Cleanup completed" + # ══════════════════════════════════════════════════════════════════ # Summary # ══════════════════════════════════════════════════════════════════