diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index 37e78ecd7..e74d49921 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -185,6 +185,28 @@ function hasStaleGateway(gwInfoOutput) {
   return typeof gwInfoOutput === "string" && gwInfoOutput.length > 0 && gwInfoOutput.includes(GATEWAY_NAME);
 }
 
+const ANSI_ESCAPE = String.fromCharCode(27);
+const ANSI_REGEX = new RegExp(`${ANSI_ESCAPE}\\[[0-9;]*[A-Za-z]`, "g");
+
+function stripAnsi(value = "") {
+  return value.replace(ANSI_REGEX, "");
+}
+
+function getActiveGatewayName(statusOutput = "") {
+  if (typeof statusOutput !== "string" || statusOutput.length === 0) {
+    return "";
+  }
+  const match = stripAnsi(statusOutput)
+    .match(/^\s*Gateway:\s+(.+?)\s*$/m);
+  return match ? match[1].trim() : "";
+}
+
+function isGatewayHealthy(statusOutput = "", gwInfoOutput = "") {
+  const connected = typeof statusOutput === "string" && statusOutput.includes("Connected");
+  const activeGateway = getActiveGatewayName(statusOutput);
+  return connected && activeGateway === GATEWAY_NAME && hasStaleGateway(gwInfoOutput);
+}
+
 function streamSandboxCreate(command, env = process.env, options = {}) {
   const child = spawn("bash", ["-lc", command], {
     cwd: ROOT,
@@ -1237,8 +1259,16 @@ async function preflight() {
   // A previous onboard run may have left the gateway container and port
   // forward running.  If a NemoClaw-owned gateway is still present, tear
   // it down so the port check below doesn't fail on our own leftovers.
+  const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true });
   const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true });
-  if (hasStaleGateway(gwInfo)) {
+  const healthyGateway = isGatewayHealthy(gatewayStatus, gwInfo);
+  if (healthyGateway) {
+    console.log("  Reusing existing NemoClaw gateway...");
+    runOpenshell(["forward", "stop", "18789"], { ignoreError: true });
+    runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true });
+    process.env.OPENSHELL_GATEWAY = GATEWAY_NAME;
+    console.log("  ✓ Existing gateway selected");
+  } else if (hasStaleGateway(gwInfo)) {
     console.log("  Cleaning up previous NemoClaw session...");
     runOpenshell(["forward", "stop", "18789"], { ignoreError: true });
     runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true });
@@ -1251,6 +1281,10 @@ async function preflight() {
     { port: 18789, label: "NemoClaw dashboard" },
   ];
   for (const { port, label } of requiredPorts) {
+    if (port === 8080 && healthyGateway) {
+      console.log(`  ✓ Port ${port} already in use by active NemoClaw gateway (${label})`);
+      continue;
+    }
     const portCheck = await checkPortAvailable(port);
     if (!portCheck.ok) {
       console.error("");
@@ -1305,11 +1339,21 @@ function destroyGateway() {
 
 // ── Step 2: Gateway ──────────────────────────────────────────────
 
-async function startGateway(_gpu) {
+async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) {
   step(3, 7, "Starting OpenShell gateway");
 
-  // Clean up any previous gateway and its Docker volumes
-  destroyGateway();
+  const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true });
+  const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true });
+  if (isGatewayHealthy(gatewayStatus, gwInfo)) {
+    console.log("  ✓ Reusing existing gateway");
+    runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true });
+    process.env.OPENSHELL_GATEWAY = GATEWAY_NAME;
+    return;
+  }
+
+  if (hasStaleGateway(gwInfo)) {
+    runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true });
+  }
 
   const gwArgs = ["--name", GATEWAY_NAME];
   // Do NOT pass --gpu here. On DGX Spark (and most GPU hosts), inference is
@@ -1332,22 +1376,29 @@ async function startGateway(_gpu) {
   if (startResult.status !== 0) {
     console.error("  Gateway failed to start. Cleaning up stale state...");
     destroyGateway();
-    console.error("  Stale state removed. Please rerun: nemoclaw onboard");
-    process.exit(1);
+    if (exitOnFailure) {
+      console.error("  Stale state removed. Please rerun: nemoclaw onboard");
+      process.exit(1);
+    }
+    throw new Error("Gateway failed to start");
   }
 
   // Verify health
   for (let i = 0; i < 5; i++) {
     const status = runCaptureOpenshell(["status"], { ignoreError: true });
-    if (status.includes("Connected")) {
+    const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true });
+    if (isGatewayHealthy(status, gwInfo)) {
       console.log("  ✓ Gateway is healthy");
       break;
     }
     if (i === 4) {
       console.error("  Gateway health check failed. Cleaning up stale state...");
       destroyGateway();
-      console.error("  Stale state removed. Please rerun: nemoclaw onboard");
-      process.exit(1);
+      if (exitOnFailure) {
+        console.error("  Stale state removed. Please rerun: nemoclaw onboard");
+        process.exit(1);
+      }
+      throw new Error("Gateway failed to start");
     }
     sleep(2);
   }
@@ -1364,6 +1415,14 @@ async function startGateway(_gpu) {
   process.env.OPENSHELL_GATEWAY = GATEWAY_NAME;
 }
 
+async function startGateway(_gpu) {
+  return startGatewayWithOptions(_gpu, { exitOnFailure: true });
+}
+
+async function startGatewayForRecovery(_gpu) {
+  return startGatewayWithOptions(_gpu, { exitOnFailure: false });
+}
+
 // ── Step 3: Sandbox ──────────────────────────────────────────────
 
 async function createSandbox(gpu, model, provider, preferredInferenceApi = null) {
@@ -1415,6 +1474,7 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null)
   run(`cp -r "${path.join(ROOT, "nemoclaw-blueprint")}" "${buildCtx}/nemoclaw-blueprint"`);
   run(`cp -r "${path.join(ROOT, "scripts")}" "${buildCtx}/scripts"`);
   run(`rm -rf "${buildCtx}/nemoclaw/node_modules"`, { ignoreError: true });
+  run(`bash "${buildCtx}/scripts/clean-staged-tree.sh" "${buildCtx}/nemoclaw-blueprint"`, { ignoreError: true });
 
   // Create sandbox (use -- echo to avoid dropping into interactive shell)
   // Pass the base policy so sandbox starts in proxy mode (required for policy updates later)
@@ -1551,9 +1611,7 @@ async function setupNim(gpu) {
   const options = [];
   options.push({
     key: "build",
-    label:
-      "NVIDIA Endpoints" +
-      (!ollamaRunning && !(EXPERIMENTAL && vllmRunning) ? " (recommended)" : ""),
+    label: "NVIDIA Endpoints",
   });
   options.push({ key: "openai", label: "OpenAI" });
   options.push({ key: "custom", label: "Other OpenAI-compatible endpoint" });
@@ -2155,7 +2213,7 @@ async function setupPolicies(sandboxName) {
 // ── Dashboard ────────────────────────────────────────────────────
 
 const CONTROL_UI_PORT = 18789;
-const CONTROL_UI_CHAT_PATH = "/chat?session=main";
+const CONTROL_UI_PATH = "/";
 
 function findOpenclawJsonPath(dir) {
   if (!fs.existsSync(dir)) return null;
@@ -2201,17 +2259,13 @@ function fetchGatewayAuthTokenFromSandbox(sandboxName) {
   }
 }
 
-function buildControlUiChatUrls(token) {
+function buildControlUiUrls(token) {
   const hash = token ? `#token=${token}` : "";
-  const pathChat = `${CONTROL_UI_CHAT_PATH}${hash}`;
-  const bases = [
-    `http://127.0.0.1:${CONTROL_UI_PORT}`,
-    `http://localhost:${CONTROL_UI_PORT}`,
-  ];
+  const baseUrl = `http://127.0.0.1:${CONTROL_UI_PORT}`;
+  const urls = [`${baseUrl}${CONTROL_UI_PATH}${hash}`];
   const chatUi = (process.env.CHAT_UI_URL || "").trim().replace(/\/$/, "");
-  const urls = bases.map((b) => `${b}${pathChat}`);
-  if (chatUi && /^https?:\/\//i.test(chatUi) && !bases.includes(chatUi)) {
-    urls.push(`${chatUi}${pathChat}`);
+  if (chatUi && /^https?:\/\//i.test(chatUi) && chatUi !== baseUrl) {
+    urls.push(`${chatUi}${CONTROL_UI_PATH}${hash}`);
   }
   return [...new Set(urls)];
 }
@@ -2239,22 +2293,26 @@ function printDashboard(sandboxName, model, provider, nimContainer = null) {
   console.log(`  Model        ${model} (${providerLabel})`);
   console.log(`  NIM          ${nimLabel}`);
   console.log(`  ${"─".repeat(50)}`);
-  console.log(`  Next:`);
+  console.log(`  Run:         nemoclaw ${sandboxName} connect`);
+  console.log(`  Status:      nemoclaw ${sandboxName} status`);
+  console.log(`  Logs:        nemoclaw ${sandboxName} logs --follow`);
+  console.log("");
   if (token) {
-    note("  URLs below embed the gateway token — treat them like a password.");
-    console.log(`  Control UI:  copy one line into your browser (port ${CONTROL_UI_PORT} must be forwarded):`);
-    for (const u of buildControlUiChatUrls(token)) {
-      console.log(`    ${u}`);
+    console.log("  OpenClaw UI (tokenized URL; treat it like a password)");
+    console.log(`  Port ${CONTROL_UI_PORT} must be forwarded before opening this URL.`);
+    for (const url of buildControlUiUrls(token)) {
+      console.log(`  ${url}`);
     }
   } else {
     note("  Could not read gateway token from the sandbox (download failed).");
-    console.log(`  Control UI:  http://127.0.0.1:${CONTROL_UI_PORT}${CONTROL_UI_CHAT_PATH}`);
+    console.log("  OpenClaw UI");
+    console.log(`  Port ${CONTROL_UI_PORT} must be forwarded before opening this URL.`);
+    for (const url of buildControlUiUrls()) {
+      console.log(`  ${url}`);
+    }
     console.log(`  Token:       nemoclaw ${sandboxName} connect  →  jq -r '.gateway.auth.token' /sandbox/.openclaw/openclaw.json`);
     console.log(`               append  #token=<token>  to the URL, or see /tmp/gateway.log inside the sandbox.`);
   }
-  console.log(`  Run:         nemoclaw ${sandboxName} connect`);
-  console.log(`  Status:      nemoclaw ${sandboxName} status`);
-  console.log(`  Logs:        nemoclaw ${sandboxName} logs --follow`);
   console.log(`  ${"─".repeat(50)}`);
   console.log("");
 }
@@ -2297,12 +2355,16 @@ module.exports = {
   getInstalledOpenshellVersion,
   getStableGatewayImageRef,
   hasStaleGateway,
+  isGatewayHealthy,
   isSandboxReady,
   onboard,
+  preflight,
   pruneStaleSandboxEntry,
   runCaptureOpenshell,
   setupInference,
   setupNim,
+  startGateway,
+  startGatewayForRecovery,
   writeSandboxConfigSyncFile,
   patchStagedDockerfile,
 };
diff --git a/bin/nemoclaw.js b/bin/nemoclaw.js
index 868b00b83..3d82ffa96 100755
--- a/bin/nemoclaw.js
+++ b/bin/nemoclaw.js
@@ -20,7 +20,9 @@ const R = _useColor ? "\x1b[0m" : "";
 const _RD = _useColor ? "\x1b[1;31m" : "";
 const YW = _useColor ? "\x1b[1;33m" : "";
 
-const { ROOT, SCRIPTS, run, runCapture, runInteractive, shellQuote, validateName } = require("./lib/runner");
+const { ROOT, SCRIPTS, run, runCapture: _runCapture, runInteractive, shellQuote, validateName } = require("./lib/runner");
+const { resolveOpenshell } = require("./lib/resolve-openshell");
+const { startGatewayForRecovery } = require("./lib/onboard");
 const {
   ensureApiKey,
   ensureGithubToken,
@@ -41,6 +43,263 @@ const GLOBAL_COMMANDS = new Set([
 ]);
 
 const REMOTE_UNINSTALL_URL = "https://raw.githubusercontent.com/NVIDIA/NemoClaw/refs/heads/main/uninstall.sh";
+let OPENSHELL_BIN = null;
+
+function getOpenshellBinary() {
+  if (!OPENSHELL_BIN) {
+    OPENSHELL_BIN = resolveOpenshell();
+  }
+  if (!OPENSHELL_BIN) {
+    console.error("openshell CLI not found. Install OpenShell before using sandbox commands.");
+    process.exit(1);
+  }
+  return OPENSHELL_BIN;
+}
+
+function runOpenshell(args, opts = {}) {
+  const result = spawnSync(getOpenshellBinary(), args, {
+    cwd: ROOT,
+    env: { ...process.env, ...opts.env },
+    encoding: "utf-8",
+    stdio: opts.stdio ?? "inherit",
+  });
+  if (result.status !== 0 && !opts.ignoreError) {
+    console.error(`  Command failed (exit ${result.status}): openshell ${args.join(" ")}`);
+    process.exit(result.status || 1);
+  }
+  return result;
+}
+
+function captureOpenshell(args, opts = {}) {
+  const result = spawnSync(getOpenshellBinary(), args, {
+    cwd: ROOT,
+    env: { ...process.env, ...opts.env },
+    encoding: "utf-8",
+    stdio: ["ignore", "pipe", "pipe"],
+  });
+  return {
+    status: result.status ?? 1,
+    output: `${result.stdout || ""}${opts.ignoreError ? "" : result.stderr || ""}`.trim(),
+  };
+}
+
+function stripAnsi(value = "") {
+  // eslint-disable-next-line no-control-regex
+  return String(value).replace(/\x1b\[[0-9;]*m/g, "");
+}
+
+function hasNamedGateway(output = "") {
+  return stripAnsi(output).includes("Gateway: nemoclaw");
+}
+
+function getActiveGatewayName(output = "") {
+  const match = stripAnsi(output).match(/^\s*Gateway:\s+(.+?)\s*$/m);
+  return match ? match[1].trim() : "";
+}
+
+function getNamedGatewayLifecycleState() {
+  const status = captureOpenshell(["status"]);
+  const gatewayInfo = captureOpenshell(["gateway", "info", "-g", "nemoclaw"]);
+  const cleanStatus = stripAnsi(status.output);
+  const activeGateway = getActiveGatewayName(status.output);
+  const connected = /^\s*Status:\s*Connected\b/im.test(cleanStatus);
+  const named = hasNamedGateway(gatewayInfo.output);
+  const refusing = /Connection refused|client error \(Connect\)|tcp connect error/i.test(cleanStatus);
+  if (connected && activeGateway === "nemoclaw" && named) {
+    return { state: "healthy_named", status: status.output, gatewayInfo: gatewayInfo.output };
+  }
+  if (activeGateway === "nemoclaw" && named && refusing) {
+    return { state: "named_unreachable", status: status.output, gatewayInfo: gatewayInfo.output };
+  }
+  if (activeGateway === "nemoclaw" && named) {
+    return { state: "named_unhealthy", status: status.output, gatewayInfo: gatewayInfo.output };
+  }
+  if (connected) {
+    return { state: "connected_other", status: status.output, gatewayInfo: gatewayInfo.output };
+  }
+  return { state: "missing_named", status: status.output, gatewayInfo: gatewayInfo.output };
+}
+
+async function recoverNamedGatewayRuntime() {
+  const before = getNamedGatewayLifecycleState();
+  if (before.state === "healthy_named") {
+    return { recovered: true, before, after: before, attempted: false };
+  }
+
+  runOpenshell(["gateway", "select", "nemoclaw"], { ignoreError: true });
+  let after = getNamedGatewayLifecycleState();
+  if (after.state === "healthy_named") {
+    process.env.OPENSHELL_GATEWAY = "nemoclaw";
+    return { recovered: true, before, after, attempted: true, via: "select" };
+  }
+
+  const shouldStartGateway = [before.state, after.state].some((state) =>
+    ["named_unhealthy", "named_unreachable", "connected_other"].includes(state)
+  );
+
+  if (shouldStartGateway) {
+    try {
+      await startGatewayForRecovery();
+    } catch {
+      // Fall through to the lifecycle re-check below so we preserve the
+      // existing recovery result shape and emit the correct classification.
+    }
+    runOpenshell(["gateway", "select", "nemoclaw"], { ignoreError: true });
+    after = getNamedGatewayLifecycleState();
+    if (after.state === "healthy_named") {
+      process.env.OPENSHELL_GATEWAY = "nemoclaw";
+      return { recovered: true, before, after, attempted: true, via: "start" };
+    }
+  }
+
+  return { recovered: false, before, after, attempted: true };
+}
+
+function getSandboxGatewayState(sandboxName) {
+  const result = captureOpenshell(["sandbox", "get", sandboxName]);
+  const output = result.output;
+  if (result.status === 0) {
+    return { state: "present", output };
+  }
+  if (/NotFound|sandbox not found/i.test(output)) {
+    return { state: "missing", output };
+  }
+  if (/transport error|Connection refused|handshake verification failed|Missing gateway auth token|device identity required/i.test(output)) {
+    return { state: "gateway_error", output };
+  }
+  return { state: "unknown_error", output };
+}
+
+function printGatewayLifecycleHint(output = "", sandboxName = "", writer = console.error) {
+  const cleanOutput = stripAnsi(output);
+  if (/No gateway configured/i.test(cleanOutput)) {
+    writer("  The selected NemoClaw gateway is no longer configured or its metadata/runtime has been lost.");
+    writer("  Start the gateway again with `openshell gateway start --name nemoclaw` before expecting existing sandboxes to reconnect.");
+    writer("  If the gateway has to be rebuilt from scratch, recreate the affected sandbox afterward.");
+    return;
+  }
+  if (/Connection refused|client error \(Connect\)|tcp connect error/i.test(cleanOutput) && /Gateway:\s+nemoclaw/i.test(cleanOutput)) {
+    writer("  The selected NemoClaw gateway exists in metadata, but its API is refusing connections after restart.");
+    writer("  This usually means the gateway runtime did not come back cleanly after the restart.");
+    writer("  Retry `openshell gateway start --name nemoclaw`; if it stays in this state, rebuild the gateway before expecting existing sandboxes to reconnect.");
+    return;
+  }
+  if (/handshake verification failed/i.test(cleanOutput)) {
+    writer("  This looks like gateway identity drift after restart.");
+    writer("  Existing sandboxes may still be recorded locally, but the current gateway no longer trusts their prior connection state.");
+    writer("  Try re-establishing the NemoClaw gateway/runtime first. If the sandbox is still unreachable, recreate just that sandbox with `nemoclaw onboard`.");
+    return;
+  }
+  if (/Connection refused|transport error/i.test(cleanOutput)) {
+    writer(`  The sandbox '${sandboxName}' may still exist, but the current gateway/runtime is not reachable.`);
+    writer("  Check `openshell status`, verify the active gateway, and retry.");
+    return;
+  }
+  if (/Missing gateway auth token|device identity required/i.test(cleanOutput)) {
+    writer("  The gateway is reachable, but the current auth or device identity state is not usable.");
+    writer("  Verify the active gateway and retry after re-establishing the runtime.");
+  }
+}
+
+async function getReconciledSandboxGatewayState(sandboxName) {
+  let lookup = getSandboxGatewayState(sandboxName);
+  if (lookup.state === "present") {
+    return lookup;
+  }
+  if (lookup.state === "missing") {
+    return lookup;
+  }
+
+  if (lookup.state === "gateway_error") {
+    const recovery = await recoverNamedGatewayRuntime();
+    if (recovery.recovered) {
+      const retried = getSandboxGatewayState(sandboxName);
+      if (retried.state === "present" || retried.state === "missing") {
+        return { ...retried, recoveredGateway: true, recoveryVia: recovery.via || null };
+      }
+      if (/handshake verification failed/i.test(retried.output)) {
+        return {
+          state: "identity_drift",
+          output: retried.output,
+          recoveredGateway: true,
+          recoveryVia: recovery.via || null,
+        };
+      }
+      return { ...retried, recoveredGateway: true, recoveryVia: recovery.via || null };
+    }
+    const latestLifecycle = getNamedGatewayLifecycleState();
+    const latestStatus = stripAnsi(latestLifecycle.status || "");
+    if (/No gateway configured/i.test(latestStatus)) {
+      return {
+        state: "gateway_missing_after_restart",
+        output: latestLifecycle.status || lookup.output,
+      };
+    }
+    if (/Connection refused|client error \(Connect\)|tcp connect error/i.test(latestStatus) && /Gateway:\s+nemoclaw/i.test(latestStatus)) {
+      return {
+        state: "gateway_unreachable_after_restart",
+        output: latestLifecycle.status || lookup.output,
+      };
+    }
+    if (recovery.after?.state === "named_unreachable" || recovery.before?.state === "named_unreachable") {
+      return {
+        state: "gateway_unreachable_after_restart",
+        output: recovery.after?.status || recovery.before?.status || lookup.output,
+      };
+    }
+    return { ...lookup, gatewayRecoveryFailed: true };
+  }
+
+  return lookup;
+}
+
+async function ensureLiveSandboxOrExit(sandboxName) {
+  const lookup = await getReconciledSandboxGatewayState(sandboxName);
+  if (lookup.state === "present") {
+    return lookup;
+  }
+  if (lookup.state === "missing") {
+    registry.removeSandbox(sandboxName);
+    console.error(`  Sandbox '${sandboxName}' is not present in the live OpenShell gateway.`);
+    console.error("  Removed stale local registry entry.");
+    console.error("  Run `nemoclaw list` to confirm the remaining sandboxes, or `nemoclaw onboard` to create a new one.");
+    process.exit(1);
+  }
+  if (lookup.state === "identity_drift") {
+    console.error(`  Sandbox '${sandboxName}' is recorded locally, but the gateway trust material rotated after restart.`);
+    if (lookup.output) {
+      console.error(lookup.output);
+    }
+    console.error("  Existing sandbox connections cannot be reattached safely after this gateway identity change.");
+    console.error("  Recreate this sandbox with `nemoclaw onboard` once the gateway runtime is stable.");
+    process.exit(1);
+  }
+  if (lookup.state === "gateway_unreachable_after_restart") {
+    console.error(`  Sandbox '${sandboxName}' may still exist, but the selected NemoClaw gateway is still refusing connections after restart.`);
+    if (lookup.output) {
+      console.error(lookup.output);
+    }
+    console.error("  Retry `openshell gateway start --name nemoclaw` and verify `openshell status` is healthy before reconnecting.");
+    console.error("  If the gateway never becomes healthy, rebuild the gateway and then recreate the affected sandbox.");
+    process.exit(1);
+  }
+  if (lookup.state === "gateway_missing_after_restart") {
+    console.error(`  Sandbox '${sandboxName}' may still exist locally, but the NemoClaw gateway is no longer configured after restart/rebuild.`);
+    if (lookup.output) {
+      console.error(lookup.output);
+    }
+    console.error("  Start the gateway again with `openshell gateway start --name nemoclaw` before retrying.");
+    console.error("  If the gateway had to be rebuilt from scratch, recreate the affected sandbox afterward.");
+    process.exit(1);
+  }
+  console.error(`  Unable to verify sandbox '${sandboxName}' against the live OpenShell gateway.`);
+  if (lookup.output) {
+    console.error(lookup.output);
+  }
+  printGatewayLifecycleHint(lookup.output, sandboxName);
+  console.error("  Check `openshell status` and the active gateway, then retry.");
+  process.exit(1);
+}
 
 function resolveUninstallScript() {
   const candidates = [
@@ -298,17 +557,22 @@ function listSandboxes() {
 
 // ── Sandbox-scoped actions ───────────────────────────────────────
 
-function sandboxConnect(sandboxName) {
-  const qn = shellQuote(sandboxName);
+async function sandboxConnect(sandboxName) {
+  await ensureLiveSandboxOrExit(sandboxName);
   // Ensure port forward is alive before connecting
-  run(`openshell forward start --background 18789 ${qn} 2>/dev/null || true`, { ignoreError: true });
-  runInteractive(`openshell sandbox connect ${qn}`);
+  runOpenshell(["forward", "start", "--background", "18789", sandboxName], { ignoreError: true });
+  const result = spawnSync(getOpenshellBinary(), ["sandbox", "connect", sandboxName], {
+    stdio: "inherit",
+    cwd: ROOT,
+    env: process.env,
+  });
+  exitWithSpawnResult(result);
 }
 
-function sandboxStatus(sandboxName) {
+async function sandboxStatus(sandboxName) {
   const sb = registry.getSandbox(sandboxName);
   const live = parseGatewayInference(
-    runCapture("openshell inference get 2>/dev/null", { ignoreError: true })
+    captureOpenshell(["inference", "get"], { ignoreError: true }).output
   );
   if (sb) {
     console.log("");
@@ -319,8 +583,51 @@ function sandboxStatus(sandboxName) {
     console.log(`    Policies: ${(sb.policies || []).join(", ") || "none"}`);
   }
 
-  // openshell info
-  run(`openshell sandbox get ${shellQuote(sandboxName)} 2>/dev/null || true`, { ignoreError: true });
+  const lookup = await getReconciledSandboxGatewayState(sandboxName);
+  if (lookup.state === "present") {
+    console.log("");
+    if (lookup.recoveredGateway) {
+      console.log(`  Recovered NemoClaw gateway runtime via ${lookup.recoveryVia || "gateway reattach"}.`);
+      console.log("");
+    }
+    console.log(lookup.output);
+  } else if (lookup.state === "missing") {
+    registry.removeSandbox(sandboxName);
+    console.log("");
+    console.log(`  Sandbox '${sandboxName}' is not present in the live OpenShell gateway.`);
+    console.log("  Removed stale local registry entry.");
+  } else if (lookup.state === "identity_drift") {
+    console.log("");
+    console.log(`  Sandbox '${sandboxName}' is recorded locally, but the gateway trust material rotated after restart.`);
+    if (lookup.output) {
+      console.log(lookup.output);
+    }
+    console.log("  Existing sandbox connections cannot be reattached safely after this gateway identity change.");
+    console.log("  Recreate this sandbox with `nemoclaw onboard` once the gateway runtime is stable.");
+  } else if (lookup.state === "gateway_unreachable_after_restart") {
+    console.log("");
+    console.log(`  Sandbox '${sandboxName}' may still exist, but the selected NemoClaw gateway is still refusing connections after restart.`);
+    if (lookup.output) {
+      console.log(lookup.output);
+    }
+    console.log("  Retry `openshell gateway start --name nemoclaw` and verify `openshell status` is healthy before reconnecting.");
+    console.log("  If the gateway never becomes healthy, rebuild the gateway and then recreate the affected sandbox.");
+  } else if (lookup.state === "gateway_missing_after_restart") {
+    console.log("");
+    console.log(`  Sandbox '${sandboxName}' may still exist locally, but the NemoClaw gateway is no longer configured after restart/rebuild.`);
+    if (lookup.output) {
+      console.log(lookup.output);
+    }
+    console.log("  Start the gateway again with `openshell gateway start --name nemoclaw` before retrying.");
+    console.log("  If the gateway had to be rebuilt from scratch, recreate the affected sandbox afterward.");
+  } else {
+    console.log("");
+    console.log(`  Could not verify sandbox '${sandboxName}' against the live OpenShell gateway.`);
+    if (lookup.output) {
+      console.log(lookup.output);
+    }
+    printGatewayLifecycleHint(lookup.output, sandboxName, console.log);
+  }
 
   // NIM health
   const nimStat = sb && sb.nimContainer ? nim.nimStatusByName(sb.nimContainer) : nim.nimStatus(sandboxName);
@@ -332,8 +639,9 @@ function sandboxStatus(sandboxName) {
 }
 
 function sandboxLogs(sandboxName, follow) {
-  const followFlag = follow ? " --tail" : "";
-  run(`openshell logs ${shellQuote(sandboxName)}${followFlag}`);
+  const args = ["logs", sandboxName];
+  if (follow) args.push("--follow");
+  runOpenshell(args);
 }
 
 async function sandboxPolicyAdd(sandboxName) {
@@ -390,7 +698,7 @@ async function sandboxDestroy(sandboxName, args = []) {
   else nim.stopNimContainer(sandboxName);
 
   console.log(`  Deleting sandbox '${sandboxName}'...`);
-  run(`openshell sandbox delete ${shellQuote(sandboxName)} 2>/dev/null || true`, { ignoreError: true });
+  runOpenshell(["sandbox", "delete", sandboxName], { ignoreError: true });
 
   registry.removeSandbox(sandboxName);
   console.log(`  ${G}✓${R} Sandbox '${sandboxName}' destroyed`);
@@ -488,8 +796,8 @@ const [cmd, ...args] = process.argv.slice(2);
     const actionArgs = args.slice(1);
 
     switch (action) {
-      case "connect":     sandboxConnect(cmd); break;
-      case "status":      sandboxStatus(cmd); break;
+      case "connect":     await sandboxConnect(cmd); break;
+      case "status":      await sandboxStatus(cmd); break;
       case "logs":        sandboxLogs(cmd, actionArgs.includes("--follow")); break;
       case "policy-add":  await sandboxPolicyAdd(cmd); break;
       case "policy-list": sandboxPolicyList(cmd); break;
diff --git a/scripts/clean-staged-tree.sh b/scripts/clean-staged-tree.sh
new file mode 100755
index 000000000..93a550e21
--- /dev/null
+++ b/scripts/clean-staged-tree.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+set -euo pipefail
+
+target_dir="${1:-}"
+
+if [ -z "$target_dir" ]; then
+  echo "usage: $0 <directory>" >&2
+  exit 1
+fi
+
+rm -rf "$target_dir/.venv" "$target_dir/.pytest_cache"
+find "$target_dir" -type d -name __pycache__ -prune -exec rm -rf {} + 2>/dev/null || true
diff --git a/scripts/nemoclaw-start.sh b/scripts/nemoclaw-start.sh
index 518ef8555..203794ed7 100755
--- a/scripts/nemoclaw-start.sh
+++ b/scripts/nemoclaw-start.sh
@@ -18,12 +18,12 @@ set -euo pipefail
 # Harden: limit process count to prevent fork bombs (ref: #809)
 # Best-effort: some container runtimes (e.g., brev) restrict ulimit
 # modification, returning "Invalid argument". Warn but don't block startup.
-if ! ulimit -Hu 512 2>/dev/null; then
-  echo "[SECURITY] Could not set hard nproc limit (container runtime may restrict ulimit)" >&2
-fi
 if ! ulimit -Su 512 2>/dev/null; then
   echo "[SECURITY] Could not set soft nproc limit (container runtime may restrict ulimit)" >&2
 fi
+if ! ulimit -Hu 512 2>/dev/null; then
+  echo "[SECURITY] Could not set hard nproc limit (container runtime may restrict ulimit)" >&2
+fi
 
 # SECURITY: Lock down PATH so the agent cannot inject malicious binaries
 # into commands executed by the entrypoint or auto-pair watcher.
diff --git a/scripts/setup.sh b/scripts/setup.sh
index 99cd40f2f..81bd7a2c2 100755
--- a/scripts/setup.sh
+++ b/scripts/setup.sh
@@ -201,6 +201,7 @@ cp -r "$REPO_DIR/nemoclaw" "$BUILD_CTX/nemoclaw"
 cp -r "$REPO_DIR/nemoclaw-blueprint" "$BUILD_CTX/nemoclaw-blueprint"
 cp -r "$REPO_DIR/scripts" "$BUILD_CTX/scripts"
 rm -rf "$BUILD_CTX/nemoclaw/node_modules"
+bash "$BUILD_CTX/scripts/clean-staged-tree.sh" "$BUILD_CTX/nemoclaw-blueprint" 2>/dev/null || true
 
 # Capture full output to a temp file so we can filter for display but still
 # detect failures. The raw log is kept on failure for debugging.
diff --git a/test/cli.test.js b/test/cli.test.js
index 82dd5ee64..7cfb06e0d 100644
--- a/test/cli.test.js
+++ b/test/cli.test.js
@@ -3,16 +3,22 @@
 
 import { describe, it, expect } from "vitest";
 import { execSync } from "node:child_process";
+import fs from "node:fs";
+import os from "node:os";
 import path from "node:path";
 
 const CLI = path.join(import.meta.dirname, "..", "bin", "nemoclaw.js");
 
 function run(args) {
+  return runWithEnv(args);
+}
+
+function runWithEnv(args, env = {}, timeout = 10000) {
   try {
     const out = execSync(`node "${CLI}" ${args}`, {
       encoding: "utf-8",
-      timeout: 10000,
-      env: { ...process.env, HOME: "/tmp/nemoclaw-cli-test-" + Date.now() },
+      timeout,
+      env: { ...process.env, HOME: "/tmp/nemoclaw-cli-test-" + Date.now(), ...env },
     });
     return { code: 0, out };
   } catch (err) {
@@ -90,4 +96,588 @@ describe("CLI dispatch", () => {
     expect(r.out.includes("Troubleshooting")).toBeTruthy();
     expect(r.out.includes("nemoclaw debug")).toBeTruthy();
   });
+
+  it("passes --follow through to openshell logs", () => {
+    const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-cli-logs-follow-"));
+    const localBin = path.join(home, "bin");
+    const registryDir = path.join(home, ".nemoclaw");
+    const markerFile = path.join(home, "logs-args");
+    fs.mkdirSync(localBin, { recursive: true });
+    fs.mkdirSync(registryDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(registryDir, "sandboxes.json"),
+      JSON.stringify({
+        sandboxes: {
+          alpha: {
+            name: "alpha",
+            model: "test-model",
+            provider: "nvidia-prod",
+            gpuEnabled: false,
+            policies: [],
+          },
+        },
+        defaultSandbox: "alpha",
+      }),
+      { mode: 0o600 }
+    );
+    fs.writeFileSync(
+      path.join(localBin, "openshell"),
+      [
+        "#!/usr/bin/env bash",
+        `marker_file=${JSON.stringify(markerFile)}`,
+        "printf '%s ' \"$@\" > \"$marker_file\"",
+        "exit 0",
+      ].join("\n"),
+      { mode: 0o755 }
+    );
+
+    const r = runWithEnv("alpha logs --follow", {
+      HOME: home,
+      PATH: `${localBin}:${process.env.PATH || ""}`,
+    });
+
+    expect(r.code).toBe(0);
+    expect(fs.readFileSync(markerFile, "utf8")).toContain("logs alpha --follow");
+  });
+
+  it("removes stale registry entries when connect targets a missing live sandbox", () => {
+    const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-cli-stale-connect-"));
+    const localBin = path.join(home, "bin");
+    const registryDir = path.join(home, ".nemoclaw");
+    fs.mkdirSync(localBin, { recursive: true });
+    fs.mkdirSync(registryDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(registryDir, "sandboxes.json"),
+      JSON.stringify({
+        sandboxes: {
+          alpha: {
+            name: "alpha",
+            model: "test-model",
+            provider: "nvidia-prod",
+            gpuEnabled: false,
+            policies: [],
+          },
+        },
+        defaultSandbox: "alpha",
+      }),
+      { mode: 0o600 }
+    );
+    fs.writeFileSync(
+      path.join(localBin, "openshell"),
+      [
+        "#!/usr/bin/env bash",
+        "if [ \"$1\" = \"sandbox\" ] && [ \"$2\" = \"get\" ] && [ \"$3\" = \"alpha\" ]; then",
+        "  echo 'Error: status: NotFound, message: \"sandbox not found\"' >&2",
+        "  exit 1",
+        "fi",
+        "exit 0",
+      ].join("\n"),
+      { mode: 0o755 }
+    );
+
+    const r = runWithEnv("alpha connect", {
+      HOME: home,
+      PATH: `${localBin}:${process.env.PATH || ""}`,
+    });
+
+    expect(r.code).toBe(1);
+    expect(r.out.includes("Removed stale local registry entry")).toBeTruthy();
+    const saved = JSON.parse(fs.readFileSync(path.join(registryDir, "sandboxes.json"), "utf8"));
+    expect(saved.sandboxes.alpha).toBeUndefined();
+  });
+
+  it("keeps registry entries when status hits a gateway-level transport error", () => {
+    const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-cli-gateway-error-"));
+    const localBin = path.join(home, "bin");
+    const registryDir = path.join(home, ".nemoclaw");
+    fs.mkdirSync(localBin, { recursive: true });
+    fs.mkdirSync(registryDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(registryDir, "sandboxes.json"),
+      JSON.stringify({
+        sandboxes: {
+          alpha: {
+            name: "alpha",
+            model: "test-model",
+            provider: "nvidia-prod",
+            gpuEnabled: false,
+            policies: [],
+          },
+        },
+        defaultSandbox: "alpha",
+      }),
+      { mode: 0o600 }
+    );
+    fs.writeFileSync(
+      path.join(localBin, "openshell"),
+      [
+        "#!/usr/bin/env bash",
+        "if [ \"$1\" = \"sandbox\" ] && [ \"$2\" = \"get\" ] && [ \"$3\" = \"alpha\" ]; then",
+        "  echo 'Error: transport error: handshake verification failed' >&2",
+        "  exit 1",
+        "fi",
+        "exit 0",
+      ].join("\n"),
+      { mode: 0o755 }
+    );
+
+    const r = runWithEnv("alpha status", {
+      HOME: home,
+      PATH: `${localBin}:${process.env.PATH || ""}`,
+    }, 25000);
+
+    expect(r.code).toBe(0);
+    expect(r.out.includes("Could not verify sandbox 'alpha'")).toBeTruthy();
+    expect(r.out.includes("gateway identity drift after restart")).toBeTruthy();
+    const saved = JSON.parse(fs.readFileSync(path.join(registryDir, "sandboxes.json"), "utf8"));
+    expect(saved.sandboxes.alpha).toBeTruthy();
+  }, 25000);
+
+  it("recovers status after gateway runtime is reattached", () => {
+    const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-cli-recover-status-"));
+    const localBin = path.join(home, "bin");
+    const registryDir = path.join(home, ".nemoclaw");
+    const stateFile = path.join(home, "sandbox-get-count");
+    fs.mkdirSync(localBin, { recursive: true });
+    fs.mkdirSync(registryDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(registryDir, "sandboxes.json"),
+      JSON.stringify({
+        sandboxes: {
+          alpha: {
+            name: "alpha",
+            model: "test-model",
+            provider: "nvidia-prod",
+            gpuEnabled: false,
+            policies: [],
+          },
+        },
+        defaultSandbox: "alpha",
+      }),
+      { mode: 0o600 }
+    );
+    fs.writeFileSync(
+      path.join(localBin, "openshell"),
+      [
+        "#!/usr/bin/env bash",
+        `state_file=${JSON.stringify(stateFile)}`,
+        "count=$(cat \"$state_file\" 2>/dev/null || echo 0)",
+        "if [ \"$1\" = \"sandbox\" ] && [ \"$2\" = \"get\" ] && [ \"$3\" = \"alpha\" ]; then",
+        "  count=$((count + 1))",
+        "  echo \"$count\" > \"$state_file\"",
+        "  if [ \"$count\" -eq 1 ]; then",
+        "    echo 'Error: transport error: Connection refused' >&2",
+        "    exit 1",
+        "  fi",
+        "  echo 'Sandbox: alpha'",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"status\" ]; then",
+        "  echo 'Server Status'",
+        "  echo",
+        "  echo '  Gateway: nemoclaw'",
+        "  echo '  Status: Connected'",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"info\" ] && [ \"$3\" = \"-g\" ] && [ \"$4\" = \"nemoclaw\" ]; then",
+        "  echo 'Gateway Info'",
+        "  echo",
+        "  echo '  Gateway: nemoclaw'",
+        "  exit 0",
+        "fi",
+        "exit 0",
+      ].join("\n"),
+      { mode: 0o755 }
+    );
+
+    const r = runWithEnv("alpha status", {
+      HOME: home,
+      PATH: `${localBin}:${process.env.PATH || ""}`,
+    });
+
+    expect(r.code).toBe(0);
+    expect(r.out.includes("Recovered NemoClaw gateway runtime")).toBeTruthy();
+    expect(r.out.includes("Sandbox: alpha")).toBeTruthy();
+  });
+
+  it("does not treat a different connected gateway as a healthy nemoclaw gateway", () => {
+    const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-cli-mixed-gateway-"));
+    const localBin = path.join(home, "bin");
+    const registryDir = path.join(home, ".nemoclaw");
+    fs.mkdirSync(localBin, { recursive: true });
+    fs.mkdirSync(registryDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(registryDir, "sandboxes.json"),
+      JSON.stringify({
+        sandboxes: {
+          alpha: {
+            name: "alpha",
+            model: "test-model",
+            provider: "nvidia-prod",
+            gpuEnabled: false,
+            policies: [],
+          },
+        },
+        defaultSandbox: "alpha",
+      }),
+      { mode: 0o600 }
+    );
+    fs.writeFileSync(
+      path.join(localBin, "openshell"),
+      [
+        "#!/usr/bin/env bash",
+        "if [ \"$1\" = \"sandbox\" ] && [ \"$2\" = \"get\" ] && [ \"$3\" = \"alpha\" ]; then",
+        "  echo 'Error: transport error: Connection refused' >&2",
+        "  exit 1",
+        "fi",
+        "if [ \"$1\" = \"status\" ]; then",
+        "  echo 'Server Status'",
+        "  echo",
+        "  echo '  Gateway: openshell'",
+        "  echo '  Status: Connected'",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"info\" ] && [ \"$3\" = \"-g\" ] && [ \"$4\" = \"nemoclaw\" ]; then",
+        "  echo 'Gateway Info'",
+        "  echo",
+        "  echo '  Gateway: nemoclaw'",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"select\" ] && [ \"$3\" = \"nemoclaw\" ]; then",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"start\" ] && [ \"$3\" = \"--name\" ] && [ \"$4\" = \"nemoclaw\" ]; then",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"inference\" ] && [ \"$2\" = \"get\" ]; then",
+        "  exit 0",
+        "fi",
+        "exit 0",
+      ].join("\n"),
+      { mode: 0o755 }
+    );
+
+    const r = runWithEnv("alpha status", {
+      HOME: home,
+      PATH: `${localBin}:${process.env.PATH || ""}`,
+    }, 25000);
+
+    expect(r.code).toBe(0);
+    expect(r.out.includes("Recovered NemoClaw gateway runtime")).toBeFalsy();
+    expect(r.out.includes("Could not verify sandbox 'alpha'")).toBeTruthy();
+    expect(r.out.includes("verify the active gateway")).toBeTruthy();
+  }, 25000);
+
+  it("matches ANSI-decorated gateway transport errors when printing lifecycle hints", () => {
+    const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-cli-ansi-transport-hint-"));
+    const localBin = path.join(home, "bin");
+    const registryDir = path.join(home, ".nemoclaw");
+    fs.mkdirSync(localBin, { recursive: true });
+    fs.mkdirSync(registryDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(registryDir, "sandboxes.json"),
+      JSON.stringify({
+        sandboxes: {
+          alpha: {
+            name: "alpha",
+            model: "test-model",
+            provider: "nvidia-prod",
+            gpuEnabled: false,
+            policies: [],
+          },
+        },
+        defaultSandbox: "alpha",
+      }),
+      { mode: 0o600 }
+    );
+    fs.writeFileSync(
+      path.join(localBin, "openshell"),
+      [
+        "#!/usr/bin/env bash",
+        "if [ \"$1\" = \"sandbox\" ] && [ \"$2\" = \"get\" ] && [ \"$3\" = \"alpha\" ]; then",
+        "  printf '\\033[31mError: trans\\033[0mport error: Connec\\033[33mtion refused\\033[0m\\n' >&2",
+        "  exit 1",
+        "fi",
+        "if [ \"$1\" = \"status\" ]; then",
+        "  echo 'Server Status'",
+        "  echo",
+        "  echo '  Gateway: openshell'",
+        "  echo '  Status: Disconnected'",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"info\" ] && [ \"$3\" = \"-g\" ] && [ \"$4\" = \"nemoclaw\" ]; then",
+        "  printf 'Gateway Info\\n\\n  Gateway: openshell\\n'",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"select\" ] && [ \"$3\" = \"nemoclaw\" ]; then",
+        "  exit 0",
+        "fi",
+        "exit 0",
+      ].join("\n"),
+      { mode: 0o755 }
+    );
+
+    const r = runWithEnv("alpha status", {
+      HOME: home,
+      PATH: `${localBin}:${process.env.PATH || ""}`,
+    }, 25000);
+
+    expect(r.code).toBe(0);
+    expect(r.out.includes("current gateway/runtime is not reachable")).toBeTruthy();
+  }, 25000);
+
+  it("matches ANSI-decorated gateway auth errors when printing lifecycle hints", () => {
+    const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-cli-ansi-auth-hint-"));
+    const localBin = path.join(home, "bin");
+    const registryDir = path.join(home, ".nemoclaw");
+    fs.mkdirSync(localBin, { recursive: true });
+    fs.mkdirSync(registryDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(registryDir, "sandboxes.json"),
+      JSON.stringify({
+        sandboxes: {
+          alpha: {
+            name: "alpha",
+            model: "test-model",
+            provider: "nvidia-prod",
+            gpuEnabled: false,
+            policies: [],
+          },
+        },
+        defaultSandbox: "alpha",
+      }),
+      { mode: 0o600 }
+    );
+    fs.writeFileSync(
+      path.join(localBin, "openshell"),
+      [
+        "#!/usr/bin/env bash",
+        "if [ \"$1\" = \"sandbox\" ] && [ \"$2\" = \"get\" ] && [ \"$3\" = \"alpha\" ]; then",
+        "  printf '\\033[31mMissing gateway auth\\033[0m token\\n' >&2",
+        "  exit 1",
+        "fi",
+        "if [ \"$1\" = \"status\" ]; then",
+        "  echo 'Server Status'",
+        "  echo",
+        "  echo '  Gateway: openshell'",
+        "  echo '  Status: Disconnected'",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"info\" ] && [ \"$3\" = \"-g\" ] && [ \"$4\" = \"nemoclaw\" ]; then",
+        "  printf 'Gateway Info\\n\\n  Gateway: openshell\\n'",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"select\" ] && [ \"$3\" = \"nemoclaw\" ]; then",
+        "  exit 0",
+        "fi",
+        "exit 0",
+      ].join("\n"),
+      { mode: 0o755 }
+    );
+
+    const r = runWithEnv("alpha status", {
+      HOME: home,
+      PATH: `${localBin}:${process.env.PATH || ""}`,
+    }, 25000);
+
+    expect(r.code).toBe(0);
+    expect(r.out.includes("Verify the active gateway and retry after re-establishing the runtime.")).toBeTruthy();
+  }, 25000);
+
+  it("explains unrecoverable gateway trust rotation after restart", () => {
+    const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-cli-identity-drift-"));
+    const localBin = path.join(home, "bin");
+    const registryDir = path.join(home, ".nemoclaw");
+    fs.mkdirSync(localBin, { recursive: true });
+    fs.mkdirSync(registryDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(registryDir, "sandboxes.json"),
+      JSON.stringify({
+        sandboxes: {
+          alpha: {
+            name: "alpha",
+            model: "test-model",
+            provider: "nvidia-prod",
+            gpuEnabled: false,
+            policies: [],
+          },
+        },
+        defaultSandbox: "alpha",
+      }),
+      { mode: 0o600 }
+    );
+    fs.writeFileSync(
+      path.join(localBin, "openshell"),
+      [
+        "#!/usr/bin/env bash",
+        "if [ \"$1\" = \"sandbox\" ] && [ \"$2\" = \"get\" ] && [ \"$3\" = \"alpha\" ]; then",
+        "  echo 'Error: transport error: handshake verification failed' >&2",
+        "  exit 1",
+        "fi",
+        "if [ \"$1\" = \"status\" ]; then",
+        "  echo 'Server Status'",
+        "  echo",
+        "  echo '  Gateway: nemoclaw'",
+        "  echo '  Status: Connected'",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"info\" ] && [ \"$3\" = \"-g\" ] && [ \"$4\" = \"nemoclaw\" ]; then",
+        "  echo 'Gateway Info'",
+        "  echo",
+        "  echo '  Gateway: nemoclaw'",
+        "  exit 0",
+        "fi",
+        "exit 0",
+      ].join("\n"),
+      { mode: 0o755 }
+    );
+
+    const statusResult = runWithEnv("alpha status", {
+      HOME: home,
+      PATH: `${localBin}:${process.env.PATH || ""}`,
+    }, 25000);
+    expect(statusResult.code).toBe(0);
+    expect(statusResult.out.includes("gateway trust material rotated after restart")).toBeTruthy();
+    expect(statusResult.out.includes("cannot be reattached safely")).toBeTruthy();
+
+    const connectResult = runWithEnv("alpha connect", {
+      HOME: home,
+      PATH: `${localBin}:${process.env.PATH || ""}`,
+    });
+    expect(connectResult.code).toBe(1);
+    expect(connectResult.out.includes("gateway trust material rotated after restart")).toBeTruthy();
+    expect(connectResult.out.includes("Recreate this sandbox")).toBeTruthy();
+  });
+
+  it("explains when gateway metadata exists but the restarted API is still refusing connections", () => {
+    const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-cli-gateway-unreachable-"));
+    const localBin = path.join(home, "bin");
+    const registryDir = path.join(home, ".nemoclaw");
+    fs.mkdirSync(localBin, { recursive: true });
+    fs.mkdirSync(registryDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(registryDir, "sandboxes.json"),
+      JSON.stringify({
+        sandboxes: {
+          alpha: {
+            name: "alpha",
+            model: "test-model",
+            provider: "nvidia-prod",
+            gpuEnabled: false,
+            policies: [],
+          },
+        },
+        defaultSandbox: "alpha",
+      }),
+      { mode: 0o600 }
+    );
+    fs.writeFileSync(
+      path.join(localBin, "openshell"),
+      [
+        "#!/usr/bin/env bash",
+        "if [ \"$1\" = \"sandbox\" ] && [ \"$2\" = \"get\" ] && [ \"$3\" = \"alpha\" ]; then",
+        "  echo 'Error: transport error: Connection refused' >&2",
+        "  exit 1",
+        "fi",
+        "if [ \"$1\" = \"status\" ]; then",
+        "  echo 'Server Status'",
+        "  echo",
+        "  echo '  Gateway: nemoclaw'",
+        "  echo '  Server: https://127.0.0.1:8080'",
+        "  echo 'Error: client error (Connect)' >&2",
+        "  echo 'Connection refused (os error 111)' >&2",
+        "  exit 1",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"info\" ] && [ \"$3\" = \"-g\" ] && [ \"$4\" = \"nemoclaw\" ]; then",
+        "  echo 'Gateway Info'",
+        "  echo",
+        "  echo '  Gateway: nemoclaw'",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"select\" ] && [ \"$3\" = \"nemoclaw\" ]; then",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"start\" ] && [ \"$3\" = \"--name\" ] && [ \"$4\" = \"nemoclaw\" ]; then",
+        "  exit 0",
+        "fi",
+        "exit 0",
+      ].join("\n"),
+      { mode: 0o755 }
+    );
+
+    const statusResult = runWithEnv("alpha status", {
+      HOME: home,
+      PATH: `${localBin}:${process.env.PATH || ""}`,
+    }, 25000);
+    expect(statusResult.code).toBe(0);
+    expect(statusResult.out.includes("gateway is still refusing connections after restart")).toBeTruthy();
+    expect(statusResult.out.includes("Retry `openshell gateway start --name nemoclaw`")).toBeTruthy();
+
+    const connectResult = runWithEnv("alpha connect", {
+      HOME: home,
+      PATH: `${localBin}:${process.env.PATH || ""}`,
+    });
+    expect(connectResult.code).toBe(1);
+    expect(connectResult.out.includes("gateway is still refusing connections after restart")).toBeTruthy();
+    expect(connectResult.out.includes("If the gateway never becomes healthy")).toBeTruthy();
+  }, 25000);
+
+  it("explains when the named gateway is no longer configured after restart or rebuild", () => {
+    const home = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-cli-gateway-missing-"));
+    const localBin = path.join(home, "bin");
+    const registryDir = path.join(home, ".nemoclaw");
+    fs.mkdirSync(localBin, { recursive: true });
+    fs.mkdirSync(registryDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(registryDir, "sandboxes.json"),
+      JSON.stringify({
+        sandboxes: {
+          alpha: {
+            name: "alpha",
+            model: "test-model",
+            provider: "nvidia-prod",
+            gpuEnabled: false,
+            policies: [],
+          },
+        },
+        defaultSandbox: "alpha",
+      }),
+      { mode: 0o600 }
+    );
+    fs.writeFileSync(
+      path.join(localBin, "openshell"),
+      [
+        "#!/usr/bin/env bash",
+        "if [ \"$1\" = \"sandbox\" ] && [ \"$2\" = \"get\" ] && [ \"$3\" = \"alpha\" ]; then",
+        "  echo 'Error: transport error: Connection refused' >&2",
+        "  exit 1",
+        "fi",
+        "if [ \"$1\" = \"status\" ]; then",
+        "  echo 'Gateway Status'",
+        "  echo",
+        "  echo '  Status: No gateway configured.'",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"info\" ] && [ \"$3\" = \"-g\" ] && [ \"$4\" = \"nemoclaw\" ]; then",
+        "  exit 1",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"select\" ] && [ \"$3\" = \"nemoclaw\" ]; then",
+        "  exit 0",
+        "fi",
+        "if [ \"$1\" = \"gateway\" ] && [ \"$2\" = \"start\" ] && [ \"$3\" = \"--name\" ] && [ \"$4\" = \"nemoclaw\" ]; then",
+        "  exit 1",
+        "fi",
+        "exit 0",
+      ].join("\n"),
+      { mode: 0o755 }
+    );
+
+    const statusResult = runWithEnv("alpha status", {
+      HOME: home,
+      PATH: `${localBin}:${process.env.PATH || ""}`,
+    });
+    expect(statusResult.code).toBe(0);
+    expect(statusResult.out.includes("gateway is no longer configured after restart/rebuild")).toBeTruthy();
+    expect(statusResult.out.includes("Start the gateway again")).toBeTruthy();
+  }, 25000);
 });
diff --git a/test/e2e/test-double-onboard.sh b/test/e2e/test-double-onboard.sh
index f70d6533e..da2f4a065 100755
--- a/test/e2e/test-double-onboard.sh
+++ b/test/e2e/test-double-onboard.sh
@@ -2,28 +2,25 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-# Double onboard: verify that consecutive `nemoclaw onboard` runs recover
-# automatically from stale state (gateway, port forward, registry entries)
-# left behind by a previous run.
+# Double onboard / lifecycle recovery:
+#   - prove repeat onboard reuses the healthy shared NemoClaw gateway
+#   - prove onboarding a second sandbox does not destroy the first sandbox
+#   - prove stale registry entries are reconciled against live OpenShell state
+#   - prove gateway rebuilds surface the expected lifecycle guidance
 #
-# Regression test for issues #21, #22, #140, #152, #397.
-#
-# Key insight: running onboard without NVIDIA_API_KEY in non-interactive
-# mode causes process.exit(1) at step 4, but steps 1-3 (preflight,
-# gateway, sandbox) complete first — naturally simulating an unclean exit.
-#
-# Prerequisites:
-#   - Docker running
-#   - openshell CLI installed
-#   - nemoclaw CLI installed
-#   - NVIDIA_API_KEY must NOT be set
-#
-# Usage:
-#   unset NVIDIA_API_KEY
-#   bash test/e2e/test-double-onboard.sh
+# This script intentionally uses a local fake OpenAI-compatible endpoint so it
+# matches the current onboarding flow. Older versions of this test relied on a
+# missing/invalid NVIDIA_API_KEY causing a late failure after sandbox creation;
+# that no longer reflects current non-interactive onboarding behavior.
 
 set -uo pipefail
 
+if [ -z "${NEMOCLAW_E2E_NO_TIMEOUT:-}" ]; then
+  export NEMOCLAW_E2E_NO_TIMEOUT=1
+  TIMEOUT_SECONDS="${NEMOCLAW_E2E_TIMEOUT_SECONDS:-900}"
+  exec timeout -s TERM "$TIMEOUT_SECONDS" "$0" "$@"
+fi
+
 PASS=0
 FAIL=0
 TOTAL=0
@@ -44,22 +41,144 @@ section() {
 }
 info() { printf '\033[1;34m  [info]\033[0m %s\n' "$1"; }
 
+registry_has() {
+  local sandbox_name="$1"
+  [ -f "$REGISTRY" ] && grep -q "$sandbox_name" "$REGISTRY"
+}
+
 SANDBOX_A="e2e-double-a"
 SANDBOX_B="e2e-double-b"
 REGISTRY="$HOME/.nemoclaw/sandboxes.json"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
+FAKE_HOST="127.0.0.1"
+FAKE_PORT="${NEMOCLAW_FAKE_PORT:-18080}"
+FAKE_BASE_URL="http://${FAKE_HOST}:${FAKE_PORT}/v1"
+FAKE_LOG="$(mktemp)"
+FAKE_PID=""
+
+if command -v node >/dev/null 2>&1 && [ -f "$REPO_ROOT/bin/nemoclaw.js" ]; then
+  NEMOCLAW_CMD=(node "$REPO_ROOT/bin/nemoclaw.js")
+else
+  NEMOCLAW_CMD=(nemoclaw)
+fi
+
+# shellcheck disable=SC2329
+cleanup() {
+  if [ -n "$FAKE_PID" ] && kill -0 "$FAKE_PID" 2>/dev/null; then
+    kill "$FAKE_PID" 2>/dev/null || true
+    wait "$FAKE_PID" 2>/dev/null || true
+  fi
+  rm -f "$FAKE_LOG"
+}
+trap cleanup EXIT
+
+start_fake_openai() {
+  python3 - "$FAKE_HOST" "$FAKE_PORT" >"$FAKE_LOG" 2>&1 <<'PY' &
+import json
+import sys
+from http.server import BaseHTTPRequestHandler, HTTPServer
+
+HOST = sys.argv[1]
+PORT = int(sys.argv[2])
+
+
+class Handler(BaseHTTPRequestHandler):
+    def _send(self, status, payload):
+        body = json.dumps(payload).encode("utf-8")
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+    def log_message(self, format, *args):
+        return
+
+    def do_GET(self):
+        if self.path in ("/v1/models", "/models"):
+            self._send(200, {"data": [{"id": "test-model", "object": "model"}]})
+            return
+        self._send(404, {"error": {"message": "not found"}})
+
+    def do_POST(self):
+        length = int(self.headers.get("Content-Length", "0"))
+        if length:
+            self.rfile.read(length)
+        if self.path in ("/v1/chat/completions", "/chat/completions"):
+            self._send(
+                200,
+                {
+                    "id": "chatcmpl-test",
+                    "object": "chat.completion",
+                    "choices": [{"index": 0, "message": {"role": "assistant", "content": "ok"}, "finish_reason": "stop"}],
+                },
+            )
+            return
+        if self.path in ("/v1/responses", "/responses"):
+            self._send(
+                200,
+                {
+                    "id": "resp-test",
+                    "object": "response",
+                    "output": [{"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "ok"}]}],
+                },
+            )
+            return
+        self._send(404, {"error": {"message": "not found"}})
+
+
+HTTPServer((HOST, PORT), Handler).serve_forever()
+PY
+  FAKE_PID=$!
+
+  for _ in $(seq 1 20); do
+    if curl -sf "${FAKE_BASE_URL}/models" >/dev/null 2>&1; then
+      return 0
+    fi
+    sleep 1
+  done
+
+  return 1
+}
+
+run_onboard() {
+  local sandbox_name="$1"
+  local recreate="${2:-0}"
+  local log_file
+  log_file="$(mktemp)"
+
+  local -a env_args=(
+    "COMPATIBLE_API_KEY=dummy"
+    "NEMOCLAW_NON_INTERACTIVE=1"
+    "NEMOCLAW_PROVIDER=custom"
+    "NEMOCLAW_ENDPOINT_URL=${FAKE_BASE_URL}"
+    "NEMOCLAW_MODEL=test-model"
+    "NEMOCLAW_SANDBOX_NAME=${sandbox_name}"
+    "NEMOCLAW_POLICY_MODE=skip"
+  )
+  if [ "$recreate" = "1" ]; then
+    env_args+=("NEMOCLAW_RECREATE_SANDBOX=1")
+  fi
+
+  env "${env_args[@]}" "${NEMOCLAW_CMD[@]}" onboard --non-interactive >"$log_file" 2>&1
+  RUN_ONBOARD_EXIT=$?
+  RUN_ONBOARD_OUTPUT="$(cat "$log_file")"
+  rm -f "$log_file"
+}
+
+run_nemoclaw() {
+  "${NEMOCLAW_CMD[@]}" "$@"
+}
 
 # ══════════════════════════════════════════════════════════════════
 # Phase 0: Pre-cleanup
 # ══════════════════════════════════════════════════════════════════
 section "Phase 0: Pre-cleanup"
 info "Destroying any leftover test sandboxes/gateway from previous runs..."
-# Use nemoclaw destroy (not just openshell sandbox delete) to also clean
-# the nemoclaw registry at ~/.nemoclaw/sandboxes.json.  Stale registry
-# entries from a previous run would cause Phase 2 to exit with
-# "Sandbox already exists" before the test even starts.
-if command -v nemoclaw >/dev/null 2>&1; then
-  nemoclaw "$SANDBOX_A" destroy --yes 2>/dev/null || true
-  nemoclaw "$SANDBOX_B" destroy --yes 2>/dev/null || true
+if [ -x "$REPO_ROOT/bin/nemoclaw.js" ] || command -v nemoclaw >/dev/null 2>&1; then
+  run_nemoclaw "$SANDBOX_A" destroy --yes 2>/dev/null || true
+  run_nemoclaw "$SANDBOX_B" destroy --yes 2>/dev/null || true
 fi
 openshell sandbox delete "$SANDBOX_A" 2>/dev/null || true
 openshell sandbox delete "$SANDBOX_B" 2>/dev/null || true
@@ -68,7 +187,7 @@ openshell gateway destroy -g nemoclaw 2>/dev/null || true
 pass "Pre-cleanup complete"
 
 # ══════════════════════════════════════════════════════════════════
-# Phase 1: Prerequisites
+# Phase 1: Prerequisites + fake endpoint
 # ══════════════════════════════════════════════════════════════════
 section "Phase 1: Prerequisites"
 
@@ -86,51 +205,53 @@ else
   exit 1
 fi
 
-if command -v nemoclaw >/dev/null 2>&1; then
-  pass "nemoclaw CLI installed"
+if [ -x "$REPO_ROOT/bin/nemoclaw.js" ] || command -v nemoclaw >/dev/null 2>&1; then
+  pass "nemoclaw CLI available"
 else
   fail "nemoclaw CLI not found — cannot continue"
   exit 1
 fi
 
-if [ -n "${NVIDIA_API_KEY:-}" ]; then
-  fail "NVIDIA_API_KEY is set — this test requires it UNSET (unset NVIDIA_API_KEY)"
+if command -v python3 >/dev/null 2>&1; then
+  pass "python3 installed"
+else
+  fail "python3 not found — cannot continue"
   exit 1
+fi
+
+if start_fake_openai; then
+  pass "Fake OpenAI-compatible endpoint started at ${FAKE_BASE_URL}"
 else
-  pass "NVIDIA_API_KEY is not set (required for controlled step-4 exit)"
+  fail "Failed to start fake OpenAI-compatible endpoint"
+  info "Fake server log:"
+  sed 's/^/    /' "$FAKE_LOG"
+  exit 1
 fi
 
 # ══════════════════════════════════════════════════════════════════
-# Phase 2: First onboard (e2e-double-a) — leaves stale state
+# Phase 2: First onboard (e2e-double-a)
 # ══════════════════════════════════════════════════════════════════
 section "Phase 2: First onboard ($SANDBOX_A)"
-info "Running nemoclaw onboard — expect exit 1 (no API key)..."
+info "Running successful non-interactive onboard against local compatible endpoint..."
 
-# Write to temp file to avoid openshell FD inheritance blocking $()
-ONBOARD_LOG="$(mktemp)"
-NEMOCLAW_NON_INTERACTIVE=1 \
-  NEMOCLAW_SANDBOX_NAME="$SANDBOX_A" \
-  NEMOCLAW_POLICY_MODE=skip \
-  nemoclaw onboard --non-interactive >"$ONBOARD_LOG" 2>&1
-exit1=$?
-output1="$(cat "$ONBOARD_LOG")"
-rm -f "$ONBOARD_LOG"
+run_onboard "$SANDBOX_A"
+output1="$RUN_ONBOARD_OUTPUT"
+exit1="$RUN_ONBOARD_EXIT"
 
-if [ $exit1 -eq 1 ]; then
-  pass "First onboard exited 1 (step 4 failed as expected)"
+if [ "$exit1" -eq 0 ]; then
+  pass "First onboard completed successfully"
 else
-  fail "First onboard exited $exit1 (expected 1)"
+  fail "First onboard exited $exit1 (expected 0)"
 fi
 
 if grep -q "Sandbox '${SANDBOX_A}' created" <<<"$output1"; then
-  pass "Sandbox '$SANDBOX_A' created (step 3 completed)"
+  pass "Sandbox '$SANDBOX_A' created"
 else
-  fail "Sandbox creation not confirmed in output"
+  fail "Sandbox '$SANDBOX_A' creation not confirmed in output"
 fi
 
-# Verify stale state was left behind
 if openshell gateway info -g nemoclaw 2>/dev/null | grep -q "nemoclaw"; then
-  pass "Gateway is still running (stale state)"
+  pass "Gateway is running after first onboard"
 else
   fail "Gateway is not running after first onboard"
 fi
@@ -141,96 +262,76 @@ else
   fail "Sandbox '$SANDBOX_A' not found in openshell"
 fi
 
-if [ -f "$REGISTRY" ] && grep -q "$SANDBOX_A" "$REGISTRY"; then
+if registry_has "$SANDBOX_A"; then
   pass "Registry contains '$SANDBOX_A'"
 else
   fail "Registry does not contain '$SANDBOX_A'"
 fi
 
-info "Stale state confirmed — NOT cleaning up before next onboard"
-
 # ══════════════════════════════════════════════════════════════════
-# Phase 3: Second onboard — SAME name (e2e-double-a)
+# Phase 3: Second onboard — SAME name (recreate)
 # ══════════════════════════════════════════════════════════════════
-section "Phase 3: Second onboard ($SANDBOX_A — same name, stale state)"
+section "Phase 3: Second onboard ($SANDBOX_A — same name, recreate)"
 info "Running nemoclaw onboard with NEMOCLAW_RECREATE_SANDBOX=1..."
 
-ONBOARD_LOG="$(mktemp)"
-NEMOCLAW_NON_INTERACTIVE=1 \
-  NEMOCLAW_SANDBOX_NAME="$SANDBOX_A" \
-  NEMOCLAW_RECREATE_SANDBOX=1 \
-  NEMOCLAW_POLICY_MODE=skip \
-  nemoclaw onboard --non-interactive >"$ONBOARD_LOG" 2>&1
-exit2=$?
-output2="$(cat "$ONBOARD_LOG")"
-rm -f "$ONBOARD_LOG"
+run_onboard "$SANDBOX_A" "1"
+output2="$RUN_ONBOARD_OUTPUT"
+exit2="$RUN_ONBOARD_EXIT"
 
-# Step 4 still fails (no API key), but steps 1-3 should succeed
-if [ $exit2 -eq 1 ]; then
-  pass "Second onboard exited 1 (step 4 failed as expected)"
+if [ "$exit2" -eq 0 ]; then
+  pass "Second onboard completed successfully"
 else
-  fail "Second onboard exited $exit2 (expected 1)"
+  fail "Second onboard exited $exit2 (expected 0)"
 fi
 
-if grep -q "Cleaning up previous NemoClaw session" <<<"$output2"; then
-  pass "Stale session cleanup fired on second onboard"
+if grep -q "Reusing existing NemoClaw gateway" <<<"$output2"; then
+  pass "Healthy gateway reused on second onboard"
 else
-  fail "Stale session cleanup did NOT fire (regression: #397)"
+  fail "Healthy gateway was not reused on second onboard"
 fi
 
 if grep -q "Port 8080 is not available" <<<"$output2"; then
-  fail "Port 8080 conflict detected (regression: #21)"
+  fail "Port 8080 conflict detected (regression)"
 else
-  pass "No port 8080 conflict"
+  pass "No port 8080 conflict on second onboard"
 fi
 
 if grep -q "Port 18789 is not available" <<<"$output2"; then
-  fail "Port 18789 conflict detected"
+  fail "Port 18789 conflict detected on second onboard"
 else
-  pass "No port 18789 conflict"
+  pass "No port 18789 conflict on second onboard"
 fi
 
-if grep -q "Sandbox '${SANDBOX_A}' created" <<<"$output2"; then
-  pass "Sandbox '$SANDBOX_A' recreated"
-else
-  fail "Sandbox '$SANDBOX_A' was not recreated"
-fi
-
-if openshell gateway info -g nemoclaw 2>/dev/null | grep -q "nemoclaw"; then
-  pass "Gateway running after second onboard"
+if openshell sandbox get "$SANDBOX_A" >/dev/null 2>&1; then
+  pass "Sandbox '$SANDBOX_A' still exists after recreate"
 else
-  fail "Gateway not running after second onboard"
+  fail "Sandbox '$SANDBOX_A' missing after recreate"
 fi
 
 # ══════════════════════════════════════════════════════════════════
-# Phase 4: Third onboard — DIFFERENT name (e2e-double-b)
+# Phase 4: Third onboard — DIFFERENT name
 # ══════════════════════════════════════════════════════════════════
-section "Phase 4: Third onboard ($SANDBOX_B — different name, stale state)"
+section "Phase 4: Third onboard ($SANDBOX_B — different name)"
 info "Running nemoclaw onboard with new sandbox name..."
 
-ONBOARD_LOG="$(mktemp)"
-NEMOCLAW_NON_INTERACTIVE=1 \
-  NEMOCLAW_SANDBOX_NAME="$SANDBOX_B" \
-  NEMOCLAW_POLICY_MODE=skip \
-  nemoclaw onboard --non-interactive >"$ONBOARD_LOG" 2>&1
-exit3=$?
-output3="$(cat "$ONBOARD_LOG")"
-rm -f "$ONBOARD_LOG"
+run_onboard "$SANDBOX_B"
+output3="$RUN_ONBOARD_OUTPUT"
+exit3="$RUN_ONBOARD_EXIT"
 
-if [ $exit3 -eq 1 ]; then
-  pass "Third onboard exited 1 (step 4 failed as expected)"
+if [ "$exit3" -eq 0 ]; then
+  pass "Third onboard completed successfully"
 else
-  fail "Third onboard exited $exit3 (expected 1)"
+  fail "Third onboard exited $exit3 (expected 0)"
 fi
 
-if grep -q "Cleaning up previous NemoClaw session" <<<"$output3"; then
-  pass "Stale session cleanup fired on third onboard"
+if grep -q "Reusing existing NemoClaw gateway" <<<"$output3"; then
+  pass "Healthy gateway reused on third onboard"
 else
-  fail "Stale session cleanup did NOT fire on third onboard"
+  fail "Healthy gateway was not reused on third onboard"
 fi
 
 if grep -q "Port 8080 is not available" <<<"$output3"; then
-  fail "Port 8080 conflict on third onboard (regression)"
+  fail "Port 8080 conflict on third onboard"
 else
   pass "No port 8080 conflict on third onboard"
 fi
@@ -241,19 +342,100 @@ else
   pass "No port 18789 conflict on third onboard"
 fi
 
-if grep -q "Sandbox '${SANDBOX_B}' created" <<<"$output3"; then
+if openshell sandbox get "$SANDBOX_B" >/dev/null 2>&1; then
   pass "Sandbox '$SANDBOX_B' created"
 else
   fail "Sandbox '$SANDBOX_B' was not created"
 fi
 
+if openshell sandbox get "$SANDBOX_A" >/dev/null 2>&1; then
+  pass "First sandbox '$SANDBOX_A' still exists after creating '$SANDBOX_B'"
+else
+  fail "First sandbox '$SANDBOX_A' disappeared after creating '$SANDBOX_B' (regression: #849)"
+fi
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 5: Stale registry reconciliation
+# ══════════════════════════════════════════════════════════════════
+section "Phase 5: Stale registry reconciliation"
+info "Deleting '$SANDBOX_A' directly in OpenShell to leave a stale NemoClaw registry entry..."
+
+openshell sandbox delete "$SANDBOX_A" 2>/dev/null || true
+
+if registry_has "$SANDBOX_A"; then
+  pass "Registry still contains stale '$SANDBOX_A' entry"
+else
+  fail "Registry was unexpectedly cleaned before status reconciliation"
+fi
+
+STATUS_LOG="$(mktemp)"
+run_nemoclaw "$SANDBOX_A" status >"$STATUS_LOG" 2>&1
+status_exit=$?
+status_output="$(cat "$STATUS_LOG")"
+rm -f "$STATUS_LOG"
+
+if [ "$status_exit" -eq 0 ]; then
+  pass "Stale sandbox status exited 0"
+else
+  fail "Stale sandbox status exited $status_exit (expected 0)"
+fi
+
+if grep -q "Removed stale local registry entry" <<<"$status_output"; then
+  pass "Stale registry entry was reconciled during status"
+else
+  fail "Stale registry reconciliation message missing"
+fi
+
+if registry_has "$SANDBOX_A"; then
+  fail "Registry still contains '$SANDBOX_A' after status reconciliation"
+else
+  pass "Registry entry for '$SANDBOX_A' removed after status reconciliation"
+fi
+
 # ══════════════════════════════════════════════════════════════════
-# Phase 5: Final cleanup
+# Phase 6: Gateway lifecycle response
 # ══════════════════════════════════════════════════════════════════
-section "Phase 5: Final cleanup"
+section "Phase 6: Gateway lifecycle response"
+info "Stopping the NemoClaw gateway runtime to verify current lifecycle behavior..."
 
-nemoclaw "$SANDBOX_A" destroy --yes 2>/dev/null || true
-nemoclaw "$SANDBOX_B" destroy --yes 2>/dev/null || true
+openshell forward stop 18789 2>/dev/null || true
+openshell gateway stop -g nemoclaw 2>/dev/null || true
+
+GATEWAY_LOG="$(mktemp)"
+run_nemoclaw "$SANDBOX_B" status >"$GATEWAY_LOG" 2>&1
+gateway_status_exit=$?
+gateway_status_output="$(cat "$GATEWAY_LOG")"
+rm -f "$GATEWAY_LOG"
+
+if [ "$gateway_status_exit" -eq 0 ]; then
+  pass "Post-stop status exited 0"
+else
+  fail "Post-stop status exited $gateway_status_exit (expected 0)"
+fi
+
+if grep -qE \
+  "Recovered NemoClaw gateway runtime|gateway is no longer configured after restart/rebuild|gateway is still refusing connections after restart|gateway trust material rotated after restart" \
+  <<<"$gateway_status_output"; then
+  pass "Gateway lifecycle response was explicit after gateway stop"
+else
+  fail "Gateway lifecycle response was not explicit after gateway stop"
+  info "Observed status output:"
+  printf '%s\n' "$gateway_status_output" | sed 's/^/    /'
+fi
+
+if registry_has "$SANDBOX_B"; then
+  pass "Registry still contains '$SANDBOX_B' after gateway stop"
+else
+  fail "Registry is missing '$SANDBOX_B' after gateway stop"
+fi
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 7: Final cleanup
+# ══════════════════════════════════════════════════════════════════
+section "Phase 7: Final cleanup"
+
+run_nemoclaw "$SANDBOX_A" destroy --yes 2>/dev/null || true
+run_nemoclaw "$SANDBOX_B" destroy --yes 2>/dev/null || true
 openshell sandbox delete "$SANDBOX_A" 2>/dev/null || true
 openshell sandbox delete "$SANDBOX_B" 2>/dev/null || true
 openshell forward stop 18789 2>/dev/null || true
@@ -279,9 +461,6 @@ fi
 
 pass "Final cleanup complete"
 
-# ══════════════════════════════════════════════════════════════════
-# Summary
-# ══════════════════════════════════════════════════════════════════
 echo ""
 echo "========================================"
 echo "  Double Onboard E2E Results:"
@@ -291,7 +470,7 @@ echo "    Total:   $TOTAL"
 echo "========================================"
 
 if [ "$FAIL" -eq 0 ]; then
-  printf '\n\033[1;32m  Double onboard PASSED — stale state recovery verified.\033[0m\n'
+  printf '\n\033[1;32m  Double onboard and lifecycle recovery PASSED.\033[0m\n'
   exit 0
 else
   printf '\n\033[1;31m  %d test(s) failed.\033[0m\n' "$FAIL"
diff --git a/test/gateway-cleanup.test.js b/test/gateway-cleanup.test.js
index 5043a2373..799680048 100644
--- a/test/gateway-cleanup.test.js
+++ b/test/gateway-cleanup.test.js
@@ -23,16 +23,17 @@ describe("gateway cleanup: Docker volumes removed on failure (#17)", () => {
 
   it("onboard.js: volume cleanup runs on gateway start failure", () => {
     const content = fs.readFileSync(path.join(ROOT, "bin/lib/onboard.js"), "utf-8");
-    // The startGateway function should call destroyGateway after a failed start
-    const startGwBlock = content.match(/async function startGateway[\s\S]*?^}/m);
+    const startGwBlock = content.match(/async function startGatewayWithOptions[\s\S]*?^}/m);
     expect(startGwBlock).toBeTruthy();
 
-    // Count calls to destroyGateway — should be at least 3:
-    // 1. pre-cleanup before start
-    // 2. after start failure
-    // 3. after health check failure
-    const calls = (startGwBlock[0].match(/destroyGateway\(\)/g) || []).length;
-    expect(calls).toBeGreaterThanOrEqual(3);
+    // Current behavior:
+    // 1. stale gateway metadata is destroyed directly before start, if present
+    // 2. destroyGateway() runs after start failure
+    // 3. destroyGateway() runs after health check failure
+    expect(startGwBlock[0].includes('if (hasStaleGateway(gwInfo))')).toBe(true);
+    expect(startGwBlock[0].includes('runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME]')).toBe(true);
+    const destroyCalls = (startGwBlock[0].match(/destroyGateway\(\)/g) || []).length;
+    expect(destroyCalls).toBeGreaterThanOrEqual(2);
   });
 
   it("uninstall.sh: includes Docker volume cleanup", () => {
diff --git a/test/onboard.test.js b/test/onboard.test.js
index f1240a9ed..8a8046b52 100644
--- a/test/onboard.test.js
+++ b/test/onboard.test.js
@@ -12,6 +12,7 @@ import {
   buildSandboxConfigSyncScript,
   getFutureShellPathHint,
   getInstalledOpenshellVersion,
+  isGatewayHealthy,
   getSandboxInferenceConfig,
   getStableGatewayImageRef,
   patchStagedDockerfile,
@@ -152,6 +153,33 @@ describe("onboard helpers", () => {
     expect(getStableGatewayImageRef("bogus")).toBe(null);
   });
 
+  it("recognizes only a connected named NemoClaw gateway as healthy", () => {
+    expect(
+      isGatewayHealthy(
+        "Server Status\n\n  Gateway: nemoclaw\n  Status: Connected",
+        "Gateway Info\n\n  Gateway: nemoclaw\n  Gateway endpoint: https://127.0.0.1:8080"
+      )
+    ).toBe(true);
+    expect(
+      isGatewayHealthy(
+        "Server Status\n\n  Gateway: openshell\n  Status: Connected",
+        "Gateway Info\n\n  Gateway: nemoclaw\n  Gateway endpoint: https://127.0.0.1:8080"
+      )
+    ).toBe(false);
+    expect(
+      isGatewayHealthy(
+        "Server Status\n\n  Gateway: openshell\n  Status: Connected",
+        "Error: no gateway metadata found"
+      )
+    ).toBe(false);
+    expect(
+      isGatewayHealthy(
+        "Server Status\n\n  Gateway: nemoclaw\n  Status: Disconnected",
+        "Gateway Info\n\n  Gateway: nemoclaw\n  Gateway endpoint: https://127.0.0.1:8080"
+      )
+    ).toBe(false);
+  });
+
   it("returns a future-shell PATH hint for user-local openshell installs", () => {
     expect(getFutureShellPathHint("/home/test/.local/bin", "/usr/local/bin:/usr/bin")).toBe(
       'export PATH="/home/test/.local/bin:$PATH"'
@@ -436,6 +464,68 @@ console.log(JSON.stringify({ liveExists, sandbox: registry.getSandbox("my-assist
     assert.equal(payload.sandbox, null);
   });
 
+  it("reuses an existing healthy gateway instead of destroying it", () => {
+    const repoRoot = path.join(import.meta.dirname, "..");
+    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-gateway-reuse-"));
+    const fakeBin = path.join(tmpDir, "bin");
+    const scriptPath = path.join(tmpDir, "gateway-reuse-check.js");
+    const onboardPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "onboard.js"));
+    const runnerPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "runner.js"));
+
+    fs.mkdirSync(fakeBin, { recursive: true });
+    fs.writeFileSync(path.join(fakeBin, "openshell"), "#!/usr/bin/env bash\nexit 0\n", { mode: 0o755 });
+
+    const script = String.raw`
+const runner = require(${runnerPath});
+const commands = [];
+
+runner.run = (command, opts = {}) => {
+  commands.push(command);
+  return { status: 0 };
+};
+runner.runCapture = (command) => {
+  if (command.includes("'status'")) {
+    return "Server Status\n\n  Gateway: nemoclaw\n  Status: Connected";
+  }
+  if (command.includes("'gateway' 'info' '-g' 'nemoclaw'")) {
+    return "Gateway Info\n\n  Gateway: nemoclaw\n  Gateway endpoint: https://127.0.0.1:8080";
+  }
+  if (command.includes("'--version'")) {
+    return "openshell 0.0.12";
+  }
+  return "";
+};
+
+const { startGateway } = require(${onboardPath});
+
+(async () => {
+  await startGateway(null);
+  console.log(JSON.stringify(commands));
+})().catch((error) => {
+  console.error(error);
+  process.exit(1);
+});
+`;
+    fs.writeFileSync(scriptPath, script);
+
+    const result = spawnSync(process.execPath, [scriptPath], {
+      cwd: repoRoot,
+      encoding: "utf-8",
+      env: {
+        ...process.env,
+        HOME: tmpDir,
+        PATH: `${fakeBin}:${process.env.PATH || ""}`,
+      },
+    });
+
+    assert.equal(result.status, 0, result.stderr);
+    const commands = JSON.parse(result.stdout.trim().split("\n").pop());
+    assert.equal(commands.length, 1);
+    assert.match(commands[0], /gateway' 'select' 'nemoclaw'/);
+    assert.doesNotMatch(commands[0], /gateway' 'destroy'/);
+    assert.doesNotMatch(commands[0], /gateway' 'start'/);
+  });
+
   it("builds the sandbox without uploading an external OpenClaw config file", async () => {
     const repoRoot = path.join(import.meta.dirname, "..");
     const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-create-sandbox-"));