fix: address CodeRabbit review — timeout, pipefail, fail-closed probes, shell injection in test

jyaunches · jyaunches · commit 32687e3a4620 · 2026-03-30T12:18:31.000-04:00
- Bump e2e-brev workflow timeout-minutes from 60 to 90
- Add fail-fast when launchable setup exceeds 40-min wait
- Add pipefail to remote pipeline commands in runRemoteTest and npm ci
- Fix backtick shell injection in validateName test loop (use process.argv)
- Make sandbox_exec fail closed with __PROBE_FAILED__ sentinel
- Add probe failure checks in C6/C7 sandbox assertions
diff --git a/.github/workflows/e2e-brev.yaml b/.github/workflows/e2e-brev.yaml
@@ -96,7 +96,7 @@ jobs:
   e2e-brev:
     if: github.repository == 'NVIDIA/NemoClaw'
     runs-on: ubuntu-latest
-    timeout-minutes: 60
+    timeout-minutes: 90
     steps:
       - name: Checkout target branch
         uses: actions/checkout@v6
diff --git a/test/e2e/brev-e2e.test.js b/test/e2e/brev-e2e.test.js
@@ -111,6 +111,7 @@ function waitForSsh(maxAttempts = 60, intervalMs = 5_000) {
 
 function runRemoteTest(scriptPath) {
   const cmd = [
+    `set -o pipefail`,
     `source ~/.nvm/nvm.sh 2>/dev/null || true`,
     `cd ${remoteDir}`,
     `export npm_config_prefix=$HOME/.local`,
@@ -205,6 +206,14 @@ describe.runIf(hasRequiredVars)("Brev E2E", () => {
         execSync(`sleep ${setupPollInterval / 1000}`);
       }
 
+      // Fail fast if neither readiness marker appeared within the timeout
+      if (Date.now() - setupStart >= setupMaxWait) {
+        throw new Error(
+          `Launchable setup did not complete within ${setupMaxWait / 60_000} minutes. ` +
+          `Neither '=== Ready ===' in /tmp/launch-plugin.log nor install-ran marker found.`,
+        );
+      }
+
       // The launch script installs Docker, OpenShell CLI, clones NemoClaw main,
       // and sets up code-server — but it does NOT run `nemoclaw onboard` (that's
       // deferred to an interactive code-server terminal). So at this point we have:
@@ -222,7 +231,7 @@ describe.runIf(hasRequiredVars)("Brev E2E", () => {
 
       // Install deps for our branch
       console.log(`[${elapsed()}] Running npm ci to sync dependencies...`);
-      sshWithSecrets(`source ~/.nvm/nvm.sh 2>/dev/null || true && cd ${remoteDir} && npm ci --ignore-scripts 2>&1 | tail -5`, { timeout: 300_000, stream: true });
+      sshWithSecrets(`set -o pipefail && source ~/.nvm/nvm.sh 2>/dev/null || true && cd ${remoteDir} && npm ci --ignore-scripts 2>&1 | tail -5`, { timeout: 300_000, stream: true });
       console.log(`[${elapsed()}] Dependencies synced`);
 
       // Run nemoclaw onboard (non-interactive) — this is the path real users take.
diff --git a/test/e2e/test-credential-sanitization.sh b/test/e2e/test-credential-sanitization.sh
@@ -69,24 +69,35 @@ fi
 
 SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-test}"
 
-# Run a command inside the sandbox and capture output
+# Run a command inside the sandbox and capture output.
+# Returns __PROBE_FAILED__ and exit 1 if SSH setup or execution fails,
+# so callers can distinguish "no output" from "probe never ran".
 sandbox_exec() {
   local cmd="$1"
   local ssh_config
   ssh_config="$(mktemp)"
-  openshell sandbox ssh-config "$SANDBOX_NAME" >"$ssh_config" 2>/dev/null
+  if ! openshell sandbox ssh-config "$SANDBOX_NAME" >"$ssh_config" 2>/dev/null; then
+    rm -f "$ssh_config"
+    echo "__PROBE_FAILED__"
+    return 1
+  fi
 
   local result
+  local rc=0
   result=$(timeout 60 ssh -F "$ssh_config" \
     -o StrictHostKeyChecking=no \
     -o UserKnownHostsFile=/dev/null \
     -o ConnectTimeout=10 \
     -o LogLevel=ERROR \
     "openshell-${SANDBOX_NAME}" \
     "$cmd" \
-    2>&1) || true
+    2>&1) || rc=$?
 
   rm -f "$ssh_config"
+  if [ "$rc" -ne 0 ] && [ -z "$result" ]; then
+    echo "__PROBE_FAILED__"
+    return 1
+  fi
   echo "$result"
 }
 
@@ -396,7 +407,9 @@ section "Phase 2: Runtime Sandbox Credential Check"
 info "C6: Checking for auth-profiles.json inside sandbox..."
 c6_result=$(sandbox_exec "find /sandbox -name 'auth-profiles.json' 2>/dev/null | head -5")
 
-if [ -z "$c6_result" ]; then
+if [ "$c6_result" = "__PROBE_FAILED__" ]; then
+  fail "C6: Sandbox probe failed — SSH did not execute; cannot verify auth-profiles.json absence"
+elif [ -z "$c6_result" ]; then
   pass "C6: No auth-profiles.json found inside sandbox"
 else
   fail "C6: auth-profiles.json found inside sandbox: $c6_result"
@@ -411,7 +424,9 @@ c7_nvapi=$(sandbox_exec "grep -r 'nvapi-' /sandbox/.openclaw/ /sandbox/.nemoclaw
 c7_ghp=$(sandbox_exec "grep -r 'ghp_' /sandbox/.openclaw/ /sandbox/.nemoclaw/ 2>/dev/null | grep -v 'STRIPPED' | grep -v '/policies/' | head -5" || true)
 c7_npm=$(sandbox_exec "grep -r 'npm_' /sandbox/.openclaw/ /sandbox/.nemoclaw/ 2>/dev/null | grep -v 'STRIPPED' | grep -v '/policies/' | head -5" || true)
 
-if [ -z "$c7_nvapi" ] && [ -z "$c7_ghp" ] && [ -z "$c7_npm" ]; then
+if [ "$c7_nvapi" = "__PROBE_FAILED__" ] || [ "$c7_ghp" = "__PROBE_FAILED__" ] || [ "$c7_npm" = "__PROBE_FAILED__" ]; then
+  fail "C7: Sandbox probe failed — SSH did not execute; cannot verify secret absence"
+elif [ -z "$c7_nvapi" ] && [ -z "$c7_ghp" ] && [ -z "$c7_npm" ]; then
   pass "C7: No secret patterns (nvapi-, ghp_, npm_) found in sandbox config"
 else
   fail "C7: Secret patterns found in sandbox — nvapi: ${c7_nvapi:0:100}, ghp: ${c7_ghp:0:100}, npm: ${c7_npm:0:100}"
diff --git a/test/e2e/test-telegram-injection.sh b/test/e2e/test-telegram-injection.sh
@@ -379,17 +379,18 @@ else
   fail "T7: SANDBOX_NAME '--help' was ACCEPTED — option injection possible!"
 fi
 
-# Additional invalid names
+# Additional invalid names — pass via process.argv to avoid shell expansion of
+# backticks and $() in double-quoted node -e strings.
 for invalid_name in '$(whoami)' '`id`' 'foo bar' '../etc/passwd' 'UPPERCASE'; do
   t_result=$(cd "$REPO" && node -e "
     const { validateName } = require('./bin/lib/runner');
     try {
-      validateName('$invalid_name', 'SANDBOX_NAME');
+      validateName(process.argv[1], 'SANDBOX_NAME');
       console.log('ACCEPTED');
     } catch (e) {
       console.log('REJECTED');
     }
-  " 2>&1)
+  " -- "$invalid_name" 2>&1)
 
   if echo "$t_result" | grep -q "REJECTED"; then
     pass "T6/T7 extra: SANDBOX_NAME '${invalid_name}' correctly rejected"