diff --git a/bin/lib/onboard-session.js b/bin/lib/onboard-session.js
new file mode 100644
index 000000000..819790173
--- /dev/null
+++ b/bin/lib/onboard-session.js
@@ -0,0 +1,432 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+const fs = require("fs");
+const path = require("path");
+
+const SESSION_VERSION = 1;
+const SESSION_DIR = path.join(process.env.HOME || "/tmp", ".nemoclaw");
+const SESSION_FILE = path.join(SESSION_DIR, "onboard-session.json");
+const LOCK_FILE = path.join(SESSION_DIR, "onboard.lock");
+const VALID_STEP_STATES = new Set(["pending", "in_progress", "complete", "failed", "skipped"]);
+
+function ensureSessionDir() {
+  fs.mkdirSync(SESSION_DIR, { recursive: true, mode: 0o700 });
+}
+
+function sessionPath() {
+  return SESSION_FILE;
+}
+
+function lockPath() {
+  return LOCK_FILE;
+}
+
+function defaultSteps() {
+  return {
+    preflight: { status: "pending", startedAt: null, completedAt: null, error: null },
+    gateway: { status: "pending", startedAt: null, completedAt: null, error: null },
+    sandbox: { status: "pending", startedAt: null, completedAt: null, error: null },
+    provider_selection: { status: "pending", startedAt: null, completedAt: null, error: null },
+    inference: { status: "pending", startedAt: null, completedAt: null, error: null },
+    openclaw: { status: "pending", startedAt: null, completedAt: null, error: null },
+    policies: { status: "pending", startedAt: null, completedAt: null, error: null },
+  };
+}
+
+function createSession(overrides = {}) {
+  const now = new Date().toISOString();
+  return {
+    version: SESSION_VERSION,
+    sessionId: overrides.sessionId || `${Date.now()}-${Math.random().toString(36).slice(2, 10)}`,
+    resumable: true,
+    status: "in_progress",
+    mode: overrides.mode || "interactive",
+    startedAt: overrides.startedAt || now,
+    updatedAt: overrides.updatedAt || now,
+    lastStepStarted: overrides.lastStepStarted || null,
+    lastCompletedStep: overrides.lastCompletedStep || null,
+    failure: overrides.failure || null,
+    sandboxName: overrides.sandboxName || null,
+    provider: overrides.provider || null,
+    model: overrides.model || null,
+    endpointUrl: overrides.endpointUrl || null,
+    credentialEnv: overrides.credentialEnv || null,
+    preferredInferenceApi: overrides.preferredInferenceApi || null,
+    nimContainer: overrides.nimContainer || null,
+    policyPresets: Array.isArray(overrides.policyPresets) ? overrides.policyPresets.filter((value) => typeof value === "string") : null,
+    metadata: {
+      gatewayName: overrides.metadata?.gatewayName || "nemoclaw",
+    },
+    steps: {
+      ...defaultSteps(),
+      ...(overrides.steps || {}),
+    },
+  };
+}
+
+function isObject(value) {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+function redactSensitiveText(value) {
+  if (typeof value !== "string") return null;
+  return value
+    .replace(/(NVIDIA_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GEMINI_API_KEY|COMPATIBLE_API_KEY|COMPATIBLE_ANTHROPIC_API_KEY)=\S+/gi, "$1=<REDACTED>")
+    .replace(/Bearer\s+\S+/gi, "Bearer <REDACTED>")
+    .replace(/nvapi-[A-Za-z0-9_-]{10,}/g, "<REDACTED>")
+    .replace(/ghp_[A-Za-z0-9]{20,}/g, "<REDACTED>")
+    .replace(/sk-[A-Za-z0-9_-]{10,}/g, "<REDACTED>")
+    .slice(0, 240);
+}
+
+function sanitizeFailure(input) {
+  if (!input) return null;
+  const step = typeof input.step === "string" ? input.step : null;
+  const message = redactSensitiveText(input.message);
+  const recordedAt = typeof input.recordedAt === "string" ? input.recordedAt : new Date().toISOString();
+  return step || message ? { step, message, recordedAt } : null;
+}
+
+function validateStep(step) {
+  if (!isObject(step)) return false;
+  if (!VALID_STEP_STATES.has(step.status)) return false;
+  return true;
+}
+
+function redactUrl(value) {
+  if (typeof value !== "string" || value.length === 0) return null;
+  try {
+    const url = new URL(value);
+    if (url.username || url.password) {
+      url.username = "";
+      url.password = "";
+    }
+    for (const key of [...url.searchParams.keys()]) {
+      if (/(^|[-_])(?:signature|sig|token|auth|access_token)$/i.test(key)) {
+        url.searchParams.set(key, "<REDACTED>");
+      }
+    }
+    url.hash = "";
+    return url.toString();
+  } catch {
+    return redactSensitiveText(value);
+  }
+}
+
+// eslint-disable-next-line complexity
+function normalizeSession(data) {
+  if (!isObject(data) || data.version !== SESSION_VERSION) return null;
+  const normalized = createSession({
+    sessionId: typeof data.sessionId === "string" ? data.sessionId : undefined,
+    mode: typeof data.mode === "string" ? data.mode : undefined,
+    startedAt: typeof data.startedAt === "string" ? data.startedAt : undefined,
+    updatedAt: typeof data.updatedAt === "string" ? data.updatedAt : undefined,
+    sandboxName: typeof data.sandboxName === "string" ? data.sandboxName : null,
+    provider: typeof data.provider === "string" ? data.provider : null,
+    model: typeof data.model === "string" ? data.model : null,
+    endpointUrl: typeof data.endpointUrl === "string" ? redactUrl(data.endpointUrl) : null,
+    credentialEnv: typeof data.credentialEnv === "string" ? data.credentialEnv : null,
+    preferredInferenceApi: typeof data.preferredInferenceApi === "string" ? data.preferredInferenceApi : null,
+    nimContainer: typeof data.nimContainer === "string" ? data.nimContainer : null,
+    policyPresets: Array.isArray(data.policyPresets) ? data.policyPresets.filter((value) => typeof value === "string") : null,
+    lastStepStarted: typeof data.lastStepStarted === "string" ? data.lastStepStarted : null,
+    lastCompletedStep: typeof data.lastCompletedStep === "string" ? data.lastCompletedStep : null,
+    failure: sanitizeFailure(data.failure),
+    metadata: isObject(data.metadata) ? data.metadata : undefined,
+  });
+  normalized.resumable = data.resumable !== false;
+  normalized.status = typeof data.status === "string" ? data.status : normalized.status;
+
+  if (isObject(data.steps)) {
+    for (const [name, step] of Object.entries(data.steps)) {
+      if (Object.prototype.hasOwnProperty.call(normalized.steps, name) && validateStep(step)) {
+        normalized.steps[name] = {
+          status: step.status,
+          startedAt: typeof step.startedAt === "string" ? step.startedAt : null,
+          completedAt: typeof step.completedAt === "string" ? step.completedAt : null,
+          error: redactSensitiveText(step.error),
+        };
+      }
+    }
+  }
+
+  return normalized;
+}
+
+function loadSession() {
+  try {
+    if (!fs.existsSync(SESSION_FILE)) {
+      return null;
+    }
+    const parsed = JSON.parse(fs.readFileSync(SESSION_FILE, "utf-8"));
+    return normalizeSession(parsed);
+  } catch {
+    return null;
+  }
+}
+
+function saveSession(session) {
+  const normalized = normalizeSession(session) || createSession();
+  normalized.updatedAt = new Date().toISOString();
+  ensureSessionDir();
+  const tmpFile = path.join(
+    SESSION_DIR,
+    `.onboard-session.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2, 8)}.tmp`
+  );
+  fs.writeFileSync(tmpFile, JSON.stringify(normalized, null, 2), { mode: 0o600 });
+  fs.renameSync(tmpFile, SESSION_FILE);
+  return normalized;
+}
+
+function clearSession() {
+  try {
+    if (fs.existsSync(SESSION_FILE)) {
+      fs.unlinkSync(SESSION_FILE);
+    }
+  } catch {
+    return;
+  }
+}
+
+function parseLockFile(contents) {
+  try {
+    const parsed = JSON.parse(contents);
+    if (typeof parsed?.pid !== "number") return null;
+    return {
+      pid: parsed.pid,
+      startedAt: typeof parsed.startedAt === "string" ? parsed.startedAt : null,
+      command: typeof parsed.command === "string" ? parsed.command : null,
+    };
+  } catch {
+    return null;
+  }
+}
+
+function isProcessAlive(pid) {
+  if (!Number.isInteger(pid) || pid <= 0) return false;
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch (error) {
+    return error?.code === "EPERM";
+  }
+}
+
+function acquireOnboardLock(command = null) {
+  ensureSessionDir();
+  const payload = JSON.stringify(
+    {
+      pid: process.pid,
+      startedAt: new Date().toISOString(),
+      command: typeof command === "string" ? command : null,
+    },
+    null,
+    2
+  );
+
+  for (let attempt = 0; attempt < 2; attempt++) {
+    try {
+      const fd = fs.openSync(LOCK_FILE, "wx", 0o600);
+      fs.writeFileSync(fd, payload);
+      fs.closeSync(fd);
+      return { acquired: true, lockFile: LOCK_FILE, stale: false };
+    } catch (error) {
+      if (error?.code !== "EEXIST") {
+        throw error;
+      }
+
+      let existing;
+      try {
+        existing = parseLockFile(fs.readFileSync(LOCK_FILE, "utf8"));
+      } catch (readError) {
+        if (readError?.code === "ENOENT") {
+          continue;
+        }
+        throw readError;
+      }
+      if (!existing) {
+        continue;
+      }
+      if (existing && isProcessAlive(existing.pid)) {
+        return {
+          acquired: false,
+          lockFile: LOCK_FILE,
+          stale: false,
+          holderPid: existing.pid,
+          holderStartedAt: existing.startedAt,
+          holderCommand: existing.command,
+        };
+      }
+
+      try {
+        fs.unlinkSync(LOCK_FILE);
+      } catch (unlinkError) {
+        if (unlinkError?.code !== "ENOENT") {
+          throw unlinkError;
+        }
+      }
+    }
+  }
+
+  return { acquired: false, lockFile: LOCK_FILE, stale: true };
+}
+
+function releaseOnboardLock() {
+  try {
+    if (!fs.existsSync(LOCK_FILE)) return;
+    let existing = null;
+    try {
+      existing = parseLockFile(fs.readFileSync(LOCK_FILE, "utf8"));
+    } catch (error) {
+      if (error?.code === "ENOENT") return;
+      throw error;
+    }
+    if (!existing) return;
+    if (existing.pid !== process.pid) return;
+    fs.unlinkSync(LOCK_FILE);
+  } catch {
+    return;
+  }
+}
+
+function updateSession(mutator) {
+  const current = loadSession() || createSession();
+  const next = typeof mutator === "function" ? mutator(current) || current : current;
+  return saveSession(next);
+}
+
+function markStepStarted(stepName) {
+  return updateSession((session) => {
+    const step = session.steps[stepName];
+    if (!step) return session;
+    step.status = "in_progress";
+    step.startedAt = new Date().toISOString();
+    step.completedAt = null;
+    step.error = null;
+    session.lastStepStarted = stepName;
+    session.failure = null;
+    session.status = "in_progress";
+    return session;
+  });
+}
+
+function markStepComplete(stepName, updates = {}) {
+  return updateSession((session) => {
+    const step = session.steps[stepName];
+    if (!step) return session;
+    step.status = "complete";
+    step.completedAt = new Date().toISOString();
+    step.error = null;
+    session.lastCompletedStep = stepName;
+    session.failure = null;
+    Object.assign(session, filterSafeUpdates(updates));
+    return session;
+  });
+}
+
+function markStepFailed(stepName, message = null) {
+  return updateSession((session) => {
+    const step = session.steps[stepName];
+    if (!step) return session;
+    step.status = "failed";
+    step.completedAt = null;
+    step.error = redactSensitiveText(message);
+    session.failure = sanitizeFailure({
+      step: stepName,
+      message,
+      recordedAt: new Date().toISOString(),
+    });
+    session.status = "failed";
+    return session;
+  });
+}
+
+function completeSession(updates = {}) {
+  return updateSession((session) => {
+    Object.assign(session, filterSafeUpdates(updates));
+    session.status = "complete";
+    session.resumable = false;
+    session.failure = null;
+    return session;
+  });
+}
+
+function filterSafeUpdates(updates) {
+  const safe = {};
+  if (!isObject(updates)) return safe;
+  if (typeof updates.sandboxName === "string") safe.sandboxName = updates.sandboxName;
+  if (typeof updates.provider === "string") safe.provider = updates.provider;
+  if (typeof updates.model === "string") safe.model = updates.model;
+  if (typeof updates.endpointUrl === "string") safe.endpointUrl = redactUrl(updates.endpointUrl);
+  if (typeof updates.credentialEnv === "string") safe.credentialEnv = updates.credentialEnv;
+  if (typeof updates.preferredInferenceApi === "string") safe.preferredInferenceApi = updates.preferredInferenceApi;
+  if (typeof updates.nimContainer === "string") safe.nimContainer = updates.nimContainer;
+  if (Array.isArray(updates.policyPresets)) {
+    safe.policyPresets = updates.policyPresets.filter((value) => typeof value === "string");
+  }
+  if (isObject(updates.metadata) && typeof updates.metadata.gatewayName === "string") {
+    safe.metadata = {
+      gatewayName: updates.metadata.gatewayName,
+    };
+  }
+  return safe;
+}
+
+function summarizeForDebug(session = loadSession()) {
+  if (!session) return null;
+  return {
+    version: session.version,
+    sessionId: session.sessionId,
+    status: session.status,
+    resumable: session.resumable,
+    mode: session.mode,
+    startedAt: session.startedAt,
+    updatedAt: session.updatedAt,
+    sandboxName: session.sandboxName,
+    provider: session.provider,
+    model: session.model,
+    endpointUrl: redactUrl(session.endpointUrl),
+    credentialEnv: session.credentialEnv,
+    preferredInferenceApi: session.preferredInferenceApi,
+    nimContainer: session.nimContainer,
+    policyPresets: session.policyPresets,
+    lastStepStarted: session.lastStepStarted,
+    lastCompletedStep: session.lastCompletedStep,
+    failure: session.failure,
+    steps: Object.fromEntries(
+      Object.entries(session.steps).map(([name, step]) => [
+        name,
+        {
+          status: step.status,
+          startedAt: step.startedAt,
+          completedAt: step.completedAt,
+          error: step.error,
+        },
+      ])
+    ),
+  };
+}
+
+module.exports = {
+  LOCK_FILE,
+  SESSION_DIR,
+  SESSION_FILE,
+  SESSION_VERSION,
+  acquireOnboardLock,
+  clearSession,
+  completeSession,
+  createSession,
+  loadSession,
+  markStepComplete,
+  markStepFailed,
+  markStepStarted,
+  lockPath,
+  redactUrl,
+  saveSession,
+  releaseOnboardLock,
+  sessionPath,
+  redactSensitiveText,
+  summarizeForDebug,
+  updateSession,
+};
diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js
index 955894113..e58c64502 100644
--- a/bin/lib/onboard.js
+++ b/bin/lib/onboard.js
@@ -24,6 +24,7 @@ const {
   CLOUD_MODEL_OPTIONS,
   DEFAULT_CLOUD_MODEL,
   getProviderSelectionConfig,
+  parseGatewayInference,
 } = require("./inference-config");
 const {
   inferContainerRuntime,
@@ -34,6 +35,7 @@ const { resolveOpenshell } = require("./resolve-openshell");
 const { prompt, ensureApiKey, getCredential, saveCredential } = require("./credentials");
 const registry = require("./registry");
 const nim = require("./nim");
+const onboardSession = require("./onboard-session");
 const policies = require("./policies");
 const { checkPortAvailable } = require("./preflight");
 const EXPERIMENTAL = process.env.NEMOCLAW_EXPERIMENTAL === "1";
@@ -182,29 +184,90 @@ function isSandboxReady(output, sandboxName) {
  * @returns {boolean}
  */
 function hasStaleGateway(gwInfoOutput) {
-  return typeof gwInfoOutput === "string" && gwInfoOutput.length > 0 && gwInfoOutput.includes(GATEWAY_NAME);
+  const cleanOutput =
+    typeof gwInfoOutput === "string"
+      ? // eslint-disable-next-line no-control-regex
+        gwInfoOutput.replace(/\x1b\[[0-9;]*m/g, "")
+      : "";
+  return (
+    cleanOutput.length > 0 &&
+    cleanOutput.includes(`Gateway: ${GATEWAY_NAME}`) &&
+    !cleanOutput.includes("No gateway metadata found")
+  );
+}
+
+function getReportedGatewayName(output = "") {
+  if (typeof output !== "string") return null;
+  // eslint-disable-next-line no-control-regex
+  const cleanOutput = output.replace(/\x1b\[[0-9;]*m/g, "");
+  const match = cleanOutput.match(/^\s*Gateway:\s+([^\s]+)/m);
+  return match ? match[1] : null;
+}
+
+function isGatewayConnected(statusOutput = "") {
+  return typeof statusOutput === "string" && statusOutput.includes("Connected");
+}
+
+function hasActiveGatewayInfo(activeGatewayInfoOutput = "") {
+  return (
+    typeof activeGatewayInfoOutput === "string" &&
+    activeGatewayInfoOutput.includes("Gateway endpoint:") &&
+    !activeGatewayInfoOutput.includes("No gateway metadata found")
+  );
+}
+
+function isSelectedGateway(statusOutput = "", gatewayName = GATEWAY_NAME) {
+  return getReportedGatewayName(statusOutput) === gatewayName;
 }
 
-const ANSI_ESCAPE = String.fromCharCode(27);
-const ANSI_REGEX = new RegExp(`${ANSI_ESCAPE}\\[[0-9;]*[A-Za-z]`, "g");
+function isGatewayHealthy(statusOutput = "", gwInfoOutput = "", activeGatewayInfoOutput = "") {
+  const namedGatewayKnown = hasStaleGateway(gwInfoOutput);
+  if (!namedGatewayKnown || !isGatewayConnected(statusOutput)) return false;
 
-function stripAnsi(value = "") {
-  return value.replace(ANSI_REGEX, "");
+  const activeGatewayName = getReportedGatewayName(statusOutput) || getReportedGatewayName(activeGatewayInfoOutput);
+  return activeGatewayName === GATEWAY_NAME;
 }
 
-function getActiveGatewayName(statusOutput = "") {
-  if (typeof statusOutput !== "string" || statusOutput.length === 0) {
-    return "";
+function getGatewayReuseState(statusOutput = "", gwInfoOutput = "", activeGatewayInfoOutput = "") {
+  if (isGatewayHealthy(statusOutput, gwInfoOutput, activeGatewayInfoOutput)) {
+    return "healthy";
   }
-  const match = stripAnsi(statusOutput)
-    .match(/^\s*Gateway:\s+(.+?)\s*$/m);
-  return match ? match[1].trim() : "";
+  const connected = isGatewayConnected(statusOutput);
+  const activeGatewayName = getReportedGatewayName(statusOutput) || getReportedGatewayName(activeGatewayInfoOutput);
+  if (connected && activeGatewayName === GATEWAY_NAME) {
+    return "active-unnamed";
+  }
+  if (connected && activeGatewayName && activeGatewayName !== GATEWAY_NAME) {
+    return "foreign-active";
+  }
+  if (hasStaleGateway(gwInfoOutput)) {
+    return "stale";
+  }
+  if (hasActiveGatewayInfo(activeGatewayInfoOutput)) {
+    return "active-unnamed";
+  }
+  return "missing";
+}
+
+function getSandboxStateFromOutputs(sandboxName, getOutput = "", listOutput = "") {
+  if (!sandboxName) return "missing";
+  if (!getOutput) return "missing";
+  return isSandboxReady(listOutput, sandboxName) ? "ready" : "not_ready";
 }
 
-function isGatewayHealthy(statusOutput = "", gwInfoOutput = "") {
-  const connected = typeof statusOutput === "string" && statusOutput.includes("Connected");
-  const activeGateway = getActiveGatewayName(statusOutput);
-  return connected && activeGateway === GATEWAY_NAME && hasStaleGateway(gwInfoOutput);
+function getSandboxReuseState(sandboxName) {
+  if (!sandboxName) return "missing";
+  const getOutput = runCaptureOpenshell(["sandbox", "get", sandboxName], { ignoreError: true });
+  const listOutput = runCaptureOpenshell(["sandbox", "list"], { ignoreError: true });
+  return getSandboxStateFromOutputs(sandboxName, getOutput, listOutput);
+}
+
+function repairRecordedSandbox(sandboxName) {
+  if (!sandboxName) return;
+  note(`  [resume] Cleaning up recorded sandbox '${sandboxName}' before recreating it.`);
+  runOpenshell(["forward", "stop", "18789"], { ignoreError: true });
+  runOpenshell(["sandbox", "delete", sandboxName], { ignoreError: true });
+  registry.removeSandbox(sandboxName);
 }
 
 function streamSandboxCreate(command, env = process.env, options = {}) {
@@ -375,6 +438,15 @@ function formatEnvAssignment(name, value) {
   return `${name}=${value}`;
 }
 
+function hydrateCredentialEnv(envName) {
+  if (!envName) return null;
+  const value = getCredential(envName);
+  if (value) {
+    process.env[envName] = value;
+  }
+  return value || null;
+}
+
 function getCurlTimingArgs() {
   return ["--connect-timeout 5", "--max-time 20"];
 }
@@ -413,6 +485,11 @@ function verifyInferenceRoute(_provider, _model) {
   }
 }
 
+function isInferenceRouteReady(provider, model) {
+  const live = parseGatewayInference(runCaptureOpenshell(["inference", "get"], { ignoreError: true }));
+  return Boolean(live && live.provider === provider && live.model === model);
+}
+
 function sandboxExistsInGateway(sandboxName) {
   const output = runCaptureOpenshell(["sandbox", "get", sandboxName], { ignoreError: true });
   return Boolean(output);
@@ -442,6 +519,10 @@ exit
 `.trim();
 }
 
+function isOpenclawReady(sandboxName) {
+  return Boolean(fetchGatewayAuthTokenFromSandbox(sandboxName));
+}
+
 function writeSandboxConfigSyncFile(script, tmpDir = os.tmpdir(), now = Date.now()) {
   const scriptFile = path.join(tmpDir, `nemoclaw-sync-${now}.sh`);
   fs.writeFileSync(scriptFile, `${script}\n`, { mode: 0o600 });
@@ -956,6 +1037,98 @@ async function promptManualModelId(promptLabel, errorLabel, validator = null) {
     return trimmed;
   }
 }
+function shouldIncludeBuildContextPath(sourceRoot, candidatePath) {
+  const relative = path.relative(sourceRoot, candidatePath);
+  if (!relative || relative === "") return true;
+
+  const segments = relative.split(path.sep);
+  const basename = path.basename(candidatePath);
+  const excludedSegments = new Set([
+    ".venv",
+    ".ruff_cache",
+    ".pytest_cache",
+    ".mypy_cache",
+    "__pycache__",
+    "node_modules",
+    ".git",
+  ]);
+
+  if (basename === ".DS_Store" || basename.startsWith("._")) {
+    return false;
+  }
+
+  return !segments.some((segment) => excludedSegments.has(segment));
+}
+
+function copyBuildContextDir(sourceDir, destinationDir) {
+  fs.cpSync(sourceDir, destinationDir, {
+    recursive: true,
+    filter: (candidatePath) => shouldIncludeBuildContextPath(sourceDir, candidatePath),
+  });
+}
+
+function classifySandboxCreateFailure(output = "") {
+  const text = String(output || "");
+  const uploadedToGateway =
+    /\[progress\]\s+Uploaded to gateway/i.test(text) ||
+    /Image .*available in the gateway/i.test(text);
+
+  if (/failed to read image export stream|Timeout error/i.test(text)) {
+    return {
+      kind: "image_transfer_timeout",
+      uploadedToGateway,
+    };
+  }
+
+  if (/Connection reset by peer/i.test(text)) {
+    return {
+      kind: "image_transfer_reset",
+      uploadedToGateway,
+    };
+  }
+
+  if (/Created sandbox:/i.test(text)) {
+    return {
+      kind: "sandbox_create_incomplete",
+      uploadedToGateway: true,
+    };
+  }
+
+  return {
+    kind: "unknown",
+    uploadedToGateway,
+  };
+}
+
+function printSandboxCreateRecoveryHints(output = "") {
+  const failure = classifySandboxCreateFailure(output);
+  if (failure.kind === "image_transfer_timeout") {
+    console.error("  Hint: image upload into the OpenShell gateway timed out.");
+    console.error("  Recovery: nemoclaw onboard --resume");
+    if (failure.uploadedToGateway) {
+      console.error("  Progress reached the gateway upload stage, so resume may be able to reuse existing gateway state.");
+    }
+    console.error("  If this repeats, check Docker memory and retry on a host with more RAM.");
+    return;
+  }
+  if (failure.kind === "image_transfer_reset") {
+    console.error("  Hint: the image push/import stream was interrupted.");
+    console.error("  Recovery: nemoclaw onboard --resume");
+    if (failure.uploadedToGateway) {
+      console.error("  The image appears to have reached the gateway before the stream failed.");
+    }
+    console.error("  If this repeats, restart Docker or the gateway and retry.");
+    return;
+  }
+  if (failure.kind === "sandbox_create_incomplete") {
+    console.error("  Hint: sandbox creation started but the create stream did not finish cleanly.");
+    console.error("  Recovery: nemoclaw onboard --resume");
+    console.error("  Check: openshell sandbox list        # verify whether the sandbox became ready");
+    return;
+  }
+  console.error("  Recovery: nemoclaw onboard --resume");
+  console.error("  Or:      nemoclaw onboard");
+}
 
 async function promptCloudModel() {
   console.log("");
@@ -1074,6 +1247,86 @@ function prepareOllamaModel(model, installedModels = []) {
   return validateOllamaModel(model, runCapture);
 }
 
+function getRequestedSandboxNameHint() {
+  const raw = process.env.NEMOCLAW_SANDBOX_NAME;
+  if (typeof raw !== "string") return null;
+  const normalized = raw.trim().toLowerCase();
+  return normalized || null;
+}
+
+function getResumeSandboxConflict(session) {
+  const requestedSandboxName = getRequestedSandboxNameHint();
+  if (!requestedSandboxName || !session?.sandboxName) {
+    return null;
+  }
+  return requestedSandboxName !== session.sandboxName
+    ? { requestedSandboxName, recordedSandboxName: session.sandboxName }
+    : null;
+}
+
+function getRequestedProviderHint(nonInteractive = isNonInteractive()) {
+  return nonInteractive ? getNonInteractiveProvider() : null;
+}
+
+function getRequestedModelHint(nonInteractive = isNonInteractive()) {
+  if (!nonInteractive) return null;
+  const providerKey = getRequestedProviderHint(nonInteractive) || "cloud";
+  return getNonInteractiveModel(providerKey);
+}
+
+function getEffectiveProviderName(providerKey) {
+  if (!providerKey) return null;
+  if (REMOTE_PROVIDER_CONFIG[providerKey]) {
+    return REMOTE_PROVIDER_CONFIG[providerKey].providerName;
+  }
+
+  switch (providerKey) {
+    case "nim-local":
+      return "nvidia-nim";
+    case "ollama":
+      return "ollama-local";
+    case "vllm":
+      return "vllm-local";
+    default:
+      return providerKey;
+  }
+}
+
+function getResumeConfigConflicts(session, opts = {}) {
+  const conflicts = [];
+  const nonInteractive = opts.nonInteractive ?? isNonInteractive();
+
+  const sandboxConflict = getResumeSandboxConflict(session);
+  if (sandboxConflict) {
+    conflicts.push({
+      field: "sandbox",
+      requested: sandboxConflict.requestedSandboxName,
+      recorded: sandboxConflict.recordedSandboxName,
+    });
+  }
+
+  const requestedProvider = getRequestedProviderHint(nonInteractive);
+  const effectiveRequestedProvider = getEffectiveProviderName(requestedProvider);
+  if (effectiveRequestedProvider && session?.provider && effectiveRequestedProvider !== session.provider) {
+    conflicts.push({
+      field: "provider",
+      requested: effectiveRequestedProvider,
+      recorded: session.provider,
+    });
+  }
+
+  const requestedModel = getRequestedModelHint(nonInteractive);
+  if (requestedModel && session?.model && requestedModel !== session.model) {
+    conflicts.push({
+      field: "model",
+      requested: requestedModel,
+      recorded: session.model,
+    });
+  }
+
+  return conflicts;
+}
+
 function isDockerRunning() {
   try {
     runCapture("docker info", { ignoreError: false });
@@ -1133,6 +1386,13 @@ function sleep(seconds) {
   require("child_process").spawnSync("sleep", [String(seconds)]);
 }
 
+function destroyGateway() {
+  runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true });
+  // openshell gateway destroy doesn't remove Docker volumes, which leaves
+  // corrupted cluster state that breaks the next gateway start. Clean them up.
+  run(`docker volume ls -q --filter "name=openshell-cluster-${GATEWAY_NAME}" | grep . && docker volume ls -q --filter "name=openshell-cluster-${GATEWAY_NAME}" | xargs docker volume rm || true`, { ignoreError: true });
+}
+
 async function ensureNamedCredential(envName, label, helpUrl = null) {
   let key = getCredential(envName);
   if (key) {
@@ -1256,20 +1516,15 @@ async function preflight() {
     console.log("  Add that export to your shell profile, or open a new terminal before running openshell directly.");
   }
 
-  // Clean up stale NemoClaw session before checking ports.
-  // A previous onboard run may have left the gateway container and port
-  // forward running.  If a NemoClaw-owned gateway is still present, tear
-  // it down so the port check below doesn't fail on our own leftovers.
+  // Clean up stale or unnamed NemoClaw gateway state before checking ports.
+  // A healthy named gateway can be reused later in onboarding, so avoid
+  // tearing it down here. If some other gateway is active, do not treat it
+  // as NemoClaw state; let the port checks surface the conflict instead.
   const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true });
   const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true });
-  const healthyGateway = isGatewayHealthy(gatewayStatus, gwInfo);
-  if (healthyGateway) {
-    console.log("  Reusing existing NemoClaw gateway...");
-    runOpenshell(["forward", "stop", "18789"], { ignoreError: true });
-    runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true });
-    process.env.OPENSHELL_GATEWAY = GATEWAY_NAME;
-    console.log("  ✓ Existing gateway selected");
-  } else if (hasStaleGateway(gwInfo)) {
+  const activeGatewayInfo = runCaptureOpenshell(["gateway", "info"], { ignoreError: true });
+  const gatewayReuseState = getGatewayReuseState(gatewayStatus, gwInfo, activeGatewayInfo);
+  if (gatewayReuseState === "stale" || gatewayReuseState === "active-unnamed") {
     console.log("  Cleaning up previous NemoClaw session...");
     runOpenshell(["forward", "stop", "18789"], { ignoreError: true });
     runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true });
@@ -1282,12 +1537,12 @@ async function preflight() {
     { port: 18789, label: "NemoClaw dashboard" },
   ];
   for (const { port, label } of requiredPorts) {
-    if (port === 8080 && healthyGateway) {
-      console.log(`  ✓ Port ${port} already in use by active NemoClaw gateway (${label})`);
-      continue;
-    }
     const portCheck = await checkPortAvailable(port);
     if (!portCheck.ok) {
+      if ((port === 8080 || port === 18789) && gatewayReuseState === "healthy") {
+        console.log(`  ✓ Port ${port} already owned by healthy NemoClaw runtime (${label})`);
+        continue;
+      }
       console.error("");
       console.error(`  !! Port ${port} is not available.`);
       console.error(`     ${label} needs this port.`);
@@ -1329,23 +1584,15 @@ async function preflight() {
   return gpu;
 }
 
-// ── Gateway cleanup ──────────────────────────────────────────────
-
-function destroyGateway() {
-  runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true });
-  // openshell gateway destroy doesn't remove Docker volumes, which leaves
-  // corrupted cluster state that breaks the next gateway start. Clean them up.
-  run(`docker volume ls -q --filter "name=openshell-cluster-${GATEWAY_NAME}" | grep . && docker volume ls -q --filter "name=openshell-cluster-${GATEWAY_NAME}" | xargs docker volume rm || true`, { ignoreError: true });
-}
-
 // ── Step 2: Gateway ──────────────────────────────────────────────
 
 async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) {
-  step(3, 7, "Starting OpenShell gateway");
+  step(2, 7, "Starting OpenShell gateway");
 
   const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true });
   const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true });
-  if (isGatewayHealthy(gatewayStatus, gwInfo)) {
+  const activeGatewayInfo = runCaptureOpenshell(["gateway", "info"], { ignoreError: true });
+  if (isGatewayHealthy(gatewayStatus, gwInfo, activeGatewayInfo)) {
     console.log("  ✓ Reusing existing gateway");
     runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true });
     process.env.OPENSHELL_GATEWAY = GATEWAY_NAME;
@@ -1362,15 +1609,9 @@ async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) {
   // sandbox itself does not need direct GPU access. Passing --gpu causes
   // FailedPrecondition errors when the gateway's k3s device plugin cannot
   // allocate GPUs. See: https://build.nvidia.com/spark/nemoclaw/instructions
-  const gatewayEnv = {};
-  const openshellVersion = getInstalledOpenshellVersion();
-  const stableGatewayImage = openshellVersion
-    ? `ghcr.io/nvidia/openshell/cluster:${openshellVersion}`
-    : null;
-  if (stableGatewayImage && openshellVersion) {
-    gatewayEnv.OPENSHELL_CLUSTER_IMAGE = stableGatewayImage;
-    gatewayEnv.IMAGE_TAG = openshellVersion;
-    console.log(`  Using pinned OpenShell gateway image: ${stableGatewayImage}`);
+  const gatewayEnv = getGatewayStartEnv();
+  if (gatewayEnv.OPENSHELL_CLUSTER_IMAGE) {
+    console.log(`  Using pinned OpenShell gateway image: ${gatewayEnv.OPENSHELL_CLUSTER_IMAGE}`);
   }
 
   const startResult = runOpenshell(["gateway", "start", ...gwArgs], { ignoreError: true, env: gatewayEnv });
@@ -1384,11 +1625,11 @@ async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) {
     throw new Error("Gateway failed to start");
   }
 
-  // Verify health
   for (let i = 0; i < 5; i++) {
     const status = runCaptureOpenshell(["status"], { ignoreError: true });
-    const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true });
-    if (isGatewayHealthy(status, gwInfo)) {
+    const namedInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true });
+    const currentInfo = runCaptureOpenshell(["gateway", "info"], { ignoreError: true });
+    if (isGatewayHealthy(status, namedInfo, currentInfo)) {
       console.log("  ✓ Gateway is healthy");
       break;
     }
@@ -1410,7 +1651,6 @@ async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) {
     console.log("  Patching CoreDNS for Colima...");
     run(`bash "${path.join(SCRIPTS, "fix-coredns.sh")}" ${GATEWAY_NAME} 2>&1 || true`, { ignoreError: true });
   }
-  // Give DNS a moment to propagate
   sleep(5);
   runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true });
   process.env.OPENSHELL_GATEWAY = GATEWAY_NAME;
@@ -1424,11 +1664,52 @@ async function startGatewayForRecovery(_gpu) {
   return startGatewayWithOptions(_gpu, { exitOnFailure: false });
 }
 
-// ── Step 3: Sandbox ──────────────────────────────────────────────
+function getGatewayStartEnv() {
+  const gatewayEnv = {};
+  const openshellVersion = getInstalledOpenshellVersion();
+  const stableGatewayImage = openshellVersion
+    ? `ghcr.io/nvidia/openshell/cluster:${openshellVersion}`
+    : null;
+  if (stableGatewayImage && openshellVersion) {
+    gatewayEnv.OPENSHELL_CLUSTER_IMAGE = stableGatewayImage;
+    gatewayEnv.IMAGE_TAG = openshellVersion;
+  }
+  return gatewayEnv;
+}
 
-async function createSandbox(gpu, model, provider, preferredInferenceApi = null) {
-  step(5, 7, "Creating sandbox");
+async function recoverGatewayRuntime() {
+  runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true });
+  let status = runCaptureOpenshell(["status"], { ignoreError: true });
+  if (status.includes("Connected") && isSelectedGateway(status)) {
+    process.env.OPENSHELL_GATEWAY = GATEWAY_NAME;
+    return true;
+  }
+
+  runOpenshell(["gateway", "start", "--name", GATEWAY_NAME], {
+    ignoreError: true,
+    env: getGatewayStartEnv(),
+  });
+  runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true });
+
+  for (let i = 0; i < 5; i++) {
+    status = runCaptureOpenshell(["status"], { ignoreError: true });
+    if (status.includes("Connected") && isSelectedGateway(status)) {
+      process.env.OPENSHELL_GATEWAY = GATEWAY_NAME;
+      const runtime = getContainerRuntime();
+      if (shouldPatchCoredns(runtime)) {
+        run(`bash "${path.join(SCRIPTS, "fix-coredns.sh")}" ${GATEWAY_NAME} 2>&1 || true`, { ignoreError: true });
+      }
+      return true;
+    }
+    sleep(2);
+  }
+
+  return false;
+}
+
+// ── Step 3: Sandbox ──────────────────────────────────────────────
 
+async function promptValidatedSandboxName() {
   const nameAnswer = await promptOrDefault(
     "  Sandbox name (lowercase, numbers, hyphens) [my-assistant]: ",
     "NEMOCLAW_SANDBOX_NAME", "my-assistant"
@@ -1444,23 +1725,35 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null)
     process.exit(1);
   }
 
+  return sandboxName;
+}
+
+// eslint-disable-next-line complexity
+async function createSandbox(gpu, model, provider, preferredInferenceApi = null, sandboxNameOverride = null) {
+  step(5, 7, "Creating sandbox");
+
+  const sandboxName = sandboxNameOverride || (await promptValidatedSandboxName());
+
   // Reconcile local registry state with the live OpenShell gateway state.
   const liveExists = pruneStaleSandboxEntry(sandboxName);
 
   if (liveExists) {
-    if (isNonInteractive()) {
-      if (process.env.NEMOCLAW_RECREATE_SANDBOX !== "1") {
-        console.error(`  Sandbox '${sandboxName}' already exists.`);
-        console.error("  Set NEMOCLAW_RECREATE_SANDBOX=1 to recreate it in non-interactive mode.");
-        process.exit(1);
+    const existingSandboxState = getSandboxReuseState(sandboxName);
+    if (existingSandboxState === "ready" && process.env.NEMOCLAW_RECREATE_SANDBOX !== "1") {
+      if (isNonInteractive()) {
+        note(`  [non-interactive] Sandbox '${sandboxName}' exists and is ready — reusing it`);
+      } else {
+        console.log(`  Sandbox '${sandboxName}' already exists and is ready.`);
+        console.log("  Reusing existing sandbox.");
+        console.log("  Set NEMOCLAW_RECREATE_SANDBOX=1 to recreate it instead.");
       }
-      note(`  [non-interactive] Sandbox '${sandboxName}' exists — recreating`);
+      return sandboxName;
+    }
+
+    if (existingSandboxState === "ready") {
+      note(`  Sandbox '${sandboxName}' exists and is ready — recreating by explicit request.`);
     } else {
-      const recreate = await prompt(`  Sandbox '${sandboxName}' already exists. Recreate? [y/N]: `);
-      if (recreate.toLowerCase() !== "y") {
-        console.log("  Keeping existing sandbox.");
-        return sandboxName;
-      }
+      note(`  Sandbox '${sandboxName}' exists but is not ready — recreating it.`);
     }
     // Destroy old sandbox
     runOpenshell(["sandbox", "delete", sandboxName], { ignoreError: true });
@@ -1471,11 +1764,9 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null)
   const buildCtx = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-build-"));
   const stagedDockerfile = path.join(buildCtx, "Dockerfile");
   fs.copyFileSync(path.join(ROOT, "Dockerfile"), stagedDockerfile);
-  run(`cp -r "${path.join(ROOT, "nemoclaw")}" "${buildCtx}/nemoclaw"`);
-  run(`cp -r "${path.join(ROOT, "nemoclaw-blueprint")}" "${buildCtx}/nemoclaw-blueprint"`);
-  run(`cp -r "${path.join(ROOT, "scripts")}" "${buildCtx}/scripts"`);
-  run(`rm -rf "${buildCtx}/nemoclaw/node_modules"`, { ignoreError: true });
-  run(`bash "${buildCtx}/scripts/clean-staged-tree.sh" "${buildCtx}/nemoclaw-blueprint"`, { ignoreError: true });
+  copyBuildContextDir(path.join(ROOT, "nemoclaw"), path.join(buildCtx, "nemoclaw"));
+  copyBuildContextDir(path.join(ROOT, "nemoclaw-blueprint"), path.join(buildCtx, "nemoclaw-blueprint"));
+  copyBuildContextDir(path.join(ROOT, "scripts"), path.join(buildCtx, "scripts"));
 
   // Create sandbox (use -- echo to avoid dropping into interactive shell)
   // Pass the base policy so sandbox starts in proxy mode (required for policy updates later)
@@ -1539,7 +1830,7 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null)
       console.error(createResult.output);
     }
     console.error("  Try:  openshell sandbox list        # check gateway state");
-    console.error("  Try:  nemoclaw onboard              # retry from scratch");
+    printSandboxCreateRecoveryHints(createResult.output);
     process.exit(createResult.status || 1);
   }
 
@@ -1595,7 +1886,7 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null)
 
 // eslint-disable-next-line complexity
 async function setupNim(gpu) {
-  step(2, 7, "Configuring inference (NIM)");
+  step(3, 7, "Configuring inference (NIM)");
 
   let model = null;
   let provider = REMOTE_PROVIDER_CONFIG.build.providerName;
@@ -1613,7 +1904,9 @@ async function setupNim(gpu) {
   const options = [];
   options.push({
     key: "build",
-    label: "NVIDIA Endpoints",
+    label:
+      "NVIDIA Endpoints" +
+      (!ollamaRunning && !(EXPERIMENTAL && vllmRunning) ? " (recommended)" : ""),
   });
   options.push({ key: "openai", label: "OpenAI" });
   options.push({ key: "custom", label: "Other OpenAI-compatible endpoint" });
@@ -2003,7 +2296,10 @@ async function setupInference(sandboxName, model, provider, endpointUrl = null,
       : Object.values(REMOTE_PROVIDER_CONFIG).find((entry) => entry.providerName === provider);
     const resolvedCredentialEnv = credentialEnv || (config && config.credentialEnv);
     const resolvedEndpointUrl = endpointUrl || (config && config.endpointUrl);
-    const env = resolvedCredentialEnv ? { [resolvedCredentialEnv]: process.env[resolvedCredentialEnv] } : {};
+    const credentialValue = hydrateCredentialEnv(resolvedCredentialEnv);
+    const env = resolvedCredentialEnv && credentialValue
+      ? { [resolvedCredentialEnv]: credentialValue }
+      : {};
     upsertProvider(provider, config.providerType, resolvedCredentialEnv, resolvedEndpointUrl, env);
     const args = ["inference", "set"];
     if (config.skipVerify) {
@@ -2077,7 +2373,7 @@ async function setupOpenclaw(sandboxName, model, provider) {
 // ── Step 7: Policy presets ───────────────────────────────────────
 
 // eslint-disable-next-line complexity
-async function setupPolicies(sandboxName) {
+async function _setupPolicies(sandboxName) {
   step(7, 7, "Policy presets");
 
   const suggestions = ["pypi", "npm"];
@@ -2214,6 +2510,144 @@ async function setupPolicies(sandboxName) {
   console.log("  ✓ Policies applied");
 }
 
+function arePolicyPresetsApplied(sandboxName, selectedPresets = []) {
+  if (!Array.isArray(selectedPresets) || selectedPresets.length === 0) return false;
+  const applied = new Set(policies.getAppliedPresets(sandboxName));
+  return selectedPresets.every((preset) => applied.has(preset));
+}
+
+// eslint-disable-next-line complexity
+async function setupPoliciesWithSelection(sandboxName, options = {}) {
+  const selectedPresets = Array.isArray(options.selectedPresets) ? options.selectedPresets : null;
+  const onSelection = typeof options.onSelection === "function" ? options.onSelection : null;
+
+  step(7, 7, "Policy presets");
+
+  const suggestions = ["pypi", "npm"];
+  if (getCredential("TELEGRAM_BOT_TOKEN")) suggestions.push("telegram");
+  if (getCredential("SLACK_BOT_TOKEN") || process.env.SLACK_BOT_TOKEN) suggestions.push("slack");
+  if (getCredential("DISCORD_BOT_TOKEN") || process.env.DISCORD_BOT_TOKEN) suggestions.push("discord");
+
+  const allPresets = policies.listPresets();
+  const applied = policies.getAppliedPresets(sandboxName);
+  let chosen = selectedPresets;
+
+  if (chosen && chosen.length > 0) {
+    if (onSelection) onSelection(chosen);
+    if (!waitForSandboxReady(sandboxName)) {
+      console.error(`  Sandbox '${sandboxName}' was not ready for policy application.`);
+      process.exit(1);
+    }
+    note(`  [resume] Reapplying policy presets: ${chosen.join(", ")}`);
+    for (const name of chosen) {
+      if (applied.includes(name)) continue;
+      policies.applyPreset(sandboxName, name);
+    }
+    return chosen;
+  }
+
+  if (isNonInteractive()) {
+    const policyMode = (process.env.NEMOCLAW_POLICY_MODE || "suggested").trim().toLowerCase();
+    chosen = suggestions;
+
+    if (policyMode === "skip" || policyMode === "none" || policyMode === "no") {
+      note("  [non-interactive] Skipping policy presets.");
+      return [];
+    }
+
+    if (policyMode === "custom" || policyMode === "list") {
+      chosen = parsePolicyPresetEnv(process.env.NEMOCLAW_POLICY_PRESETS);
+      if (chosen.length === 0) {
+        console.error("  NEMOCLAW_POLICY_PRESETS is required when NEMOCLAW_POLICY_MODE=custom.");
+        process.exit(1);
+      }
+    } else if (policyMode === "suggested" || policyMode === "default" || policyMode === "auto") {
+      const envPresets = parsePolicyPresetEnv(process.env.NEMOCLAW_POLICY_PRESETS);
+      if (envPresets.length > 0) {
+        chosen = envPresets;
+      }
+    } else {
+      console.error(`  Unsupported NEMOCLAW_POLICY_MODE: ${policyMode}`);
+      console.error("  Valid values: suggested, custom, skip");
+      process.exit(1);
+    }
+
+    const knownPresets = new Set(allPresets.map((p) => p.name));
+    const invalidPresets = chosen.filter((name) => !knownPresets.has(name));
+    if (invalidPresets.length > 0) {
+      console.error(`  Unknown policy preset(s): ${invalidPresets.join(", ")}`);
+      process.exit(1);
+    }
+
+    if (onSelection) onSelection(chosen);
+    if (!waitForSandboxReady(sandboxName)) {
+      console.error(`  Sandbox '${sandboxName}' was not ready for policy application.`);
+      process.exit(1);
+    }
+    note(`  [non-interactive] Applying policy presets: ${chosen.join(", ")}`);
+    for (const name of chosen) {
+      for (let attempt = 0; attempt < 3; attempt += 1) {
+        try {
+          policies.applyPreset(sandboxName, name);
+          break;
+        } catch (err) {
+          const message = err && err.message ? err.message : String(err);
+          if (message.includes("Unimplemented")) {
+            console.error("  OpenShell policy updates are not supported by this gateway build.");
+            console.error("  This is a known issue tracked in NemoClaw #536.");
+            throw err;
+          }
+          if (!message.includes("sandbox not found") || attempt === 2) {
+            throw err;
+          }
+          sleep(2);
+        }
+      }
+    }
+    return chosen;
+  }
+
+  console.log("");
+  console.log("  Available policy presets:");
+  allPresets.forEach((p) => {
+    const marker = applied.includes(p.name) ? "●" : "○";
+    const suggested = suggestions.includes(p.name) ? " (suggested)" : "";
+    console.log(`    ${marker} ${p.name} — ${p.description}${suggested}`);
+  });
+  console.log("");
+
+  const answer = await prompt(`  Apply suggested presets (${suggestions.join(", ")})? [Y/n/list]: `);
+
+  if (answer.toLowerCase() === "n") {
+    console.log("  Skipping policy presets.");
+    return [];
+  }
+
+  let interactiveChoice = suggestions;
+  if (answer.toLowerCase() === "list") {
+    const custom = await prompt("  Enter preset names (comma-separated): ");
+    interactiveChoice = parsePolicyPresetEnv(custom);
+  }
+
+  const knownPresets = new Set(allPresets.map((p) => p.name));
+  const invalidPresets = interactiveChoice.filter((name) => !knownPresets.has(name));
+  if (invalidPresets.length > 0) {
+    console.error(`  Unknown policy preset(s): ${invalidPresets.join(", ")}`);
+    process.exit(1);
+  }
+
+  if (onSelection) onSelection(interactiveChoice);
+  if (!waitForSandboxReady(sandboxName)) {
+    console.error(`  Sandbox '${sandboxName}' was not ready for policy application.`);
+    process.exit(1);
+  }
+
+  for (const name of interactiveChoice) {
+    policies.applyPreset(sandboxName, name);
+  }
+  return interactiveChoice;
+}
+
 // ── Dashboard ────────────────────────────────────────────────────
 
 const CONTROL_UI_PORT = 18789;
@@ -2321,54 +2755,322 @@ function printDashboard(sandboxName, model, provider, nimContainer = null) {
   console.log("");
 }
 
+function startRecordedStep(stepName, updates = {}) {
+  onboardSession.markStepStarted(stepName);
+  if (Object.keys(updates).length > 0) {
+    onboardSession.updateSession((session) => {
+      if (typeof updates.sandboxName === "string") session.sandboxName = updates.sandboxName;
+      if (typeof updates.provider === "string") session.provider = updates.provider;
+      if (typeof updates.model === "string") session.model = updates.model;
+      return session;
+    });
+  }
+}
+
+function resumeStepMessage(stepName, detail) {
+  console.log(`  [resume] Skipping ${stepName}${detail ? ` (${detail})` : ""}`);
+}
+
 // ── Main ─────────────────────────────────────────────────────────
 
+// eslint-disable-next-line complexity
 async function onboard(opts = {}) {
   NON_INTERACTIVE = opts.nonInteractive || process.env.NEMOCLAW_NON_INTERACTIVE === "1";
   delete process.env.OPENSHELL_GATEWAY;
+  const resume = opts.resume === true;
+  const lockResult = onboardSession.acquireOnboardLock(
+    `nemoclaw onboard${resume ? " --resume" : ""}${isNonInteractive() ? " --non-interactive" : ""}`
+  );
+  if (!lockResult.acquired) {
+    console.error("  Another NemoClaw onboarding run is already in progress.");
+    if (lockResult.holderPid) {
+      console.error(`  Lock holder PID: ${lockResult.holderPid}`);
+    }
+    if (lockResult.holderStartedAt) {
+      console.error(`  Started: ${lockResult.holderStartedAt}`);
+    }
+    console.error("  Wait for it to finish, or remove the stale lock if the previous run crashed:");
+    console.error(`    rm -f "${lockResult.lockFile}"`);
+    process.exit(1);
+  }
 
-  console.log("");
-  console.log("  NemoClaw Onboarding");
-  if (isNonInteractive()) note("  (non-interactive mode)");
-  console.log("  ===================");
-
-  const gpu = await preflight();
-  const { model, provider, endpointUrl, credentialEnv, preferredInferenceApi, nimContainer } = await setupNim(gpu);
-  process.env.NEMOCLAW_OPENSHELL_BIN = getOpenshellBinary();
-  await startGateway(gpu);
-  await setupInference(GATEWAY_NAME, model, provider, endpointUrl, credentialEnv);
-  // The key is now stored in openshell's provider config. Clear it from our
-  // process environment so new child processes don't inherit it. Note: this
-  // does NOT clear /proc/pid/environ (kernel snapshot is immutable after exec),
-  // but it prevents run()'s { ...process.env } from propagating the key.
-  delete process.env.NVIDIA_API_KEY;
-  const sandboxName = await createSandbox(gpu, model, provider, preferredInferenceApi);
-  if (nimContainer) {
-    registry.updateSandbox(sandboxName, { nimContainer });
-  }
-  await setupOpenclaw(sandboxName, model, provider);
-  await setupPolicies(sandboxName);
-  printDashboard(sandboxName, model, provider, nimContainer);
+  let lockReleased = false;
+  const releaseOnboardLock = () => {
+    if (lockReleased) return;
+    lockReleased = true;
+    onboardSession.releaseOnboardLock();
+  };
+  process.once("exit", releaseOnboardLock);
+
+  try {
+    let session;
+    if (resume) {
+      session = onboardSession.loadSession();
+      if (!session || session.resumable === false) {
+        console.error("  No resumable onboarding session was found.");
+        console.error("  Run: nemoclaw onboard");
+        process.exit(1);
+      }
+      const resumeConflicts = getResumeConfigConflicts(session, { nonInteractive: isNonInteractive() });
+      if (resumeConflicts.length > 0) {
+        for (const conflict of resumeConflicts) {
+          if (conflict.field === "sandbox") {
+            console.error(
+              `  Resumable state belongs to sandbox '${conflict.recorded}', not '${conflict.requested}'.`
+            );
+          } else {
+            console.error(
+              `  Resumable state recorded ${conflict.field} '${conflict.recorded}', not '${conflict.requested}'.`
+            );
+          }
+        }
+        console.error("  Run: nemoclaw onboard              # start a fresh onboarding session");
+        console.error("  Or rerun with the original settings to continue that session.");
+        process.exit(1);
+      }
+      onboardSession.updateSession((current) => {
+        current.mode = isNonInteractive() ? "non-interactive" : "interactive";
+        current.failure = null;
+        current.status = "in_progress";
+        return current;
+      });
+      session = onboardSession.loadSession();
+    } else {
+      session = onboardSession.saveSession(
+        onboardSession.createSession({
+          mode: isNonInteractive() ? "non-interactive" : "interactive",
+          metadata: { gatewayName: "nemoclaw" },
+        })
+      );
+    }
+
+    let completed = false;
+    process.once("exit", (code) => {
+      if (!completed && code !== 0) {
+        const current = onboardSession.loadSession();
+        const failedStep = current?.lastStepStarted;
+        if (failedStep) {
+          onboardSession.markStepFailed(failedStep, "Onboarding exited before the step completed.");
+        }
+      }
+    });
+
+    console.log("");
+    console.log("  NemoClaw Onboarding");
+    if (isNonInteractive()) note("  (non-interactive mode)");
+    if (resume) note("  (resume mode)");
+    console.log("  ===================");
+
+    let gpu;
+    const resumePreflight = resume && session?.steps?.preflight?.status === "complete";
+    if (resumePreflight) {
+      resumeStepMessage("preflight", "cached");
+      gpu = nim.detectGpu();
+    } else {
+      startRecordedStep("preflight");
+      gpu = await preflight();
+      onboardSession.markStepComplete("preflight");
+    }
+
+    const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true });
+    const gatewayInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true });
+    const activeGatewayInfo = runCaptureOpenshell(["gateway", "info"], { ignoreError: true });
+    const gatewayReuseState = getGatewayReuseState(gatewayStatus, gatewayInfo, activeGatewayInfo);
+    const canReuseHealthyGateway = gatewayReuseState === "healthy";
+    const resumeGateway = resume && session?.steps?.gateway?.status === "complete" && canReuseHealthyGateway;
+    if (resumeGateway) {
+      resumeStepMessage("gateway", "running");
+    } else if (!resume && canReuseHealthyGateway) {
+      note("  Reusing healthy NemoClaw gateway.");
+    } else {
+      if (resume && session?.steps?.gateway?.status === "complete") {
+        if (gatewayReuseState === "active-unnamed") {
+          note("  [resume] Gateway is active but named metadata is missing; recreating it safely.");
+        } else if (gatewayReuseState === "foreign-active") {
+          note("  [resume] A different OpenShell gateway is active; NemoClaw will not reuse it.");
+        } else if (gatewayReuseState === "stale") {
+          note("  [resume] Recorded gateway is unhealthy; recreating it.");
+        } else {
+          note("  [resume] Recorded gateway state is unavailable; recreating it.");
+        }
+      }
+      startRecordedStep("gateway");
+      await startGateway(gpu);
+      onboardSession.markStepComplete("gateway");
+    }
+
+    let sandboxName = session?.sandboxName || null;
+    let model = session?.model || null;
+    let provider = session?.provider || null;
+    let endpointUrl = session?.endpointUrl || null;
+    let credentialEnv = session?.credentialEnv || null;
+    let preferredInferenceApi = session?.preferredInferenceApi || null;
+    let nimContainer = session?.nimContainer || null;
+    const resumeProviderSelection =
+      resume &&
+      session?.steps?.provider_selection?.status === "complete" &&
+      typeof provider === "string" &&
+      typeof model === "string";
+    if (resumeProviderSelection) {
+      resumeStepMessage("provider selection", `${provider} / ${model}`);
+      hydrateCredentialEnv(credentialEnv);
+    } else {
+      startRecordedStep("provider_selection", { sandboxName });
+      const selection = await setupNim(gpu);
+      model = selection.model;
+      provider = selection.provider;
+      endpointUrl = selection.endpointUrl;
+      credentialEnv = selection.credentialEnv;
+      preferredInferenceApi = selection.preferredInferenceApi;
+      nimContainer = selection.nimContainer;
+      onboardSession.markStepComplete("provider_selection", {
+        sandboxName,
+        provider,
+        model,
+        endpointUrl,
+        credentialEnv,
+        preferredInferenceApi,
+        nimContainer,
+      });
+    }
+
+    process.env.NEMOCLAW_OPENSHELL_BIN = getOpenshellBinary();
+    const resumeInference =
+      resume &&
+      typeof provider === "string" &&
+      typeof model === "string" &&
+      isInferenceRouteReady(provider, model);
+    if (resumeInference) {
+      resumeStepMessage("inference", `${provider} / ${model}`);
+      if (nimContainer) {
+        registry.updateSandbox(sandboxName, { nimContainer });
+      }
+      onboardSession.markStepComplete("inference", { sandboxName, provider, model, nimContainer });
+    } else {
+      startRecordedStep("inference", { sandboxName, provider, model });
+      await setupInference(GATEWAY_NAME, model, provider, endpointUrl, credentialEnv);
+      delete process.env.NVIDIA_API_KEY;
+      if (nimContainer) {
+        registry.updateSandbox(sandboxName, { nimContainer });
+      }
+      onboardSession.markStepComplete("inference", { sandboxName, provider, model, nimContainer });
+    }
+
+    const sandboxReuseState = getSandboxReuseState(sandboxName);
+    const resumeSandbox = resume && session?.steps?.sandbox?.status === "complete" && sandboxReuseState === "ready";
+    if (resumeSandbox) {
+      resumeStepMessage("sandbox", sandboxName);
+    } else {
+      if (resume && session?.steps?.sandbox?.status === "complete") {
+        if (sandboxReuseState === "not_ready") {
+          note(`  [resume] Recorded sandbox '${sandboxName}' exists but is not ready; recreating it.`);
+          repairRecordedSandbox(sandboxName);
+        } else {
+          note("  [resume] Recorded sandbox state is unavailable; recreating it.");
+          if (sandboxName) {
+            registry.removeSandbox(sandboxName);
+          }
+        }
+      }
+      sandboxName = sandboxName || (await promptValidatedSandboxName());
+      startRecordedStep("sandbox", { sandboxName, provider, model });
+      sandboxName = await createSandbox(gpu, model, provider, preferredInferenceApi, sandboxName);
+      onboardSession.markStepComplete("sandbox", { sandboxName, provider, model, nimContainer });
+    }
+
+    const resumeOpenclaw = resume && sandboxName && isOpenclawReady(sandboxName);
+    if (resumeOpenclaw) {
+      resumeStepMessage("openclaw", sandboxName);
+      onboardSession.markStepComplete("openclaw", { sandboxName, provider, model });
+    } else {
+      startRecordedStep("openclaw", { sandboxName, provider, model });
+      await setupOpenclaw(sandboxName, model, provider);
+      onboardSession.markStepComplete("openclaw", { sandboxName, provider, model });
+    }
+
+    const recordedPolicyPresets = Array.isArray(session?.policyPresets) ? session.policyPresets : null;
+    const resumePolicies =
+      resume &&
+      sandboxName &&
+      arePolicyPresetsApplied(sandboxName, recordedPolicyPresets || []);
+    if (resumePolicies) {
+      resumeStepMessage("policies", (recordedPolicyPresets || []).join(", "));
+      onboardSession.markStepComplete("policies", { sandboxName, provider, model, policyPresets: recordedPolicyPresets || [] });
+    } else {
+      startRecordedStep("policies", {
+        sandboxName,
+        provider,
+        model,
+        policyPresets: recordedPolicyPresets || [],
+      });
+      const appliedPolicyPresets = await setupPoliciesWithSelection(sandboxName, {
+        selectedPresets:
+          resume &&
+          session?.steps?.policies?.status !== "complete" &&
+          Array.isArray(recordedPolicyPresets) &&
+          recordedPolicyPresets.length > 0
+            ? recordedPolicyPresets
+            : null,
+        onSelection: (policyPresets) => {
+          onboardSession.updateSession((current) => {
+            current.policyPresets = policyPresets;
+            return current;
+          });
+        },
+      });
+      onboardSession.markStepComplete("policies", {
+        sandboxName,
+        provider,
+        model,
+        policyPresets: appliedPolicyPresets,
+      });
+    }
+
+    onboardSession.completeSession({ sandboxName, provider, model });
+    completed = true;
+    printDashboard(sandboxName, model, provider, nimContainer);
+  } finally {
+    releaseOnboardLock();
+  }
 }
 
 module.exports = {
   buildSandboxConfigSyncScript,
-  getFutureShellPathHint,
+  copyBuildContextDir,
+  classifySandboxCreateFailure,
   createSandbox,
+  getFutureShellPathHint,
+  getGatewayStartEnv,
+  getGatewayReuseState,
   getSandboxInferenceConfig,
   getInstalledOpenshellVersion,
+  getRequestedModelHint,
+  getRequestedProviderHint,
   getStableGatewayImageRef,
-  hasStaleGateway,
+  getResumeConfigConflicts,
   isGatewayHealthy,
+  hasStaleGateway,
+  getRequestedSandboxNameHint,
+  getResumeSandboxConflict,
+  getSandboxReuseState,
+  getSandboxStateFromOutputs,
   isSandboxReady,
   onboard,
-  preflight,
+  onboardSession,
+  printSandboxCreateRecoveryHints,
   pruneStaleSandboxEntry,
+  repairRecordedSandbox,
+  recoverGatewayRuntime,
+  startGatewayForRecovery,
   runCaptureOpenshell,
   setupInference,
   setupNim,
-  startGateway,
-  startGatewayForRecovery,
+  isInferenceRouteReady,
+  isOpenclawReady,
+  arePolicyPresetsApplied,
+  setupPoliciesWithSelection,
+  hydrateCredentialEnv,
+  shouldIncludeBuildContextPath,
   writeSandboxConfigSyncFile,
   patchStagedDockerfile,
 };
diff --git a/bin/lib/runtime-recovery.js b/bin/lib/runtime-recovery.js
new file mode 100644
index 000000000..b4c1301c0
--- /dev/null
+++ b/bin/lib/runtime-recovery.js
@@ -0,0 +1,81 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+const onboardSession = require("./onboard-session");
+
+function stripAnsi(text) {
+  // eslint-disable-next-line no-control-regex
+  return String(text || "").replace(/\x1b\[[0-9;]*m/g, "");
+}
+
+function parseLiveSandboxNames(listOutput = "") {
+  const clean = stripAnsi(listOutput);
+  const names = new Set();
+  for (const rawLine of clean.split("\n")) {
+    const line = rawLine.trim();
+    if (!line) continue;
+    if (/^(NAME|No sandboxes found\.?$)/i.test(line)) continue;
+    if (/^Error:/i.test(line)) continue;
+    const cols = line.split(/\s+/);
+    if (cols[0]) {
+      names.add(cols[0]);
+    }
+  }
+  return names;
+}
+
+function classifySandboxLookup(output = "") {
+  const clean = stripAnsi(output).trim();
+  if (!clean) {
+    return { state: "missing", reason: "empty" };
+  }
+  if (/sandbox not found|status:\s*NotFound/i.test(clean)) {
+    return { state: "missing", reason: "not_found" };
+  }
+  if (
+    /transport error|client error|Connection reset by peer|Connection refused|No active gateway|Gateway: .*Error/i.test(
+      clean
+    )
+  ) {
+    return { state: "unavailable", reason: "gateway_unavailable" };
+  }
+  return { state: "present", reason: "ok" };
+}
+
+function classifyGatewayStatus(output = "") {
+  const clean = stripAnsi(output).trim();
+  if (!clean) {
+    return { state: "inactive", reason: "empty" };
+  }
+  if (/Connected/i.test(clean)) {
+    return { state: "connected", reason: "ok" };
+  }
+  if (
+    /No active gateway|transport error|client error|Connection reset by peer|Connection refused|Gateway: .*Error/i.test(
+      clean
+    )
+  ) {
+    return { state: "unavailable", reason: "gateway_unavailable" };
+  }
+  return { state: "inactive", reason: "not_connected" };
+}
+
+function shouldAttemptGatewayRecovery({ sandboxState = "missing", gatewayState = "inactive" } = {}) {
+  return sandboxState === "unavailable" && gatewayState !== "connected";
+}
+
+function getRecoveryCommand() {
+  const session = onboardSession.loadSession();
+  if (session && session.resumable !== false) {
+    return "nemoclaw onboard --resume";
+  }
+  return "nemoclaw onboard";
+}
+
+module.exports = {
+  classifyGatewayStatus,
+  classifySandboxLookup,
+  getRecoveryCommand,
+  parseLiveSandboxNames,
+  shouldAttemptGatewayRecovery,
+};
diff --git a/bin/nemoclaw.js b/bin/nemoclaw.js
index b070b7e9a..00a430b1d 100755
--- a/bin/nemoclaw.js
+++ b/bin/nemoclaw.js
@@ -134,7 +134,7 @@ async function recoverNamedGatewayRuntime() {
   }
 
   const shouldStartGateway = [before.state, after.state].some((state) =>
-    ["named_unhealthy", "named_unreachable", "connected_other"].includes(state)
+    ["missing_named", "named_unhealthy", "named_unreachable", "connected_other"].includes(state)
   );
 
   if (shouldStartGateway) {
@@ -334,15 +334,16 @@ function exitWithSpawnResult(result) {
 
 async function onboard(args) {
   const { onboard: runOnboard } = require("./lib/onboard");
-  const allowedArgs = new Set(["--non-interactive"]);
+  const allowedArgs = new Set(["--non-interactive", "--resume"]);
   const unknownArgs = args.filter((arg) => !allowedArgs.has(arg));
   if (unknownArgs.length > 0) {
     console.error(`  Unknown onboard option(s): ${unknownArgs.join(", ")}`);
-    console.error("  Usage: nemoclaw onboard [--non-interactive]");
+    console.error("  Usage: nemoclaw onboard [--non-interactive] [--resume]");
     process.exit(1);
   }
   const nonInteractive = args.includes("--non-interactive");
-  await runOnboard({ nonInteractive });
+  const resume = args.includes("--resume");
+  await runOnboard({ nonInteractive, resume });
 }
 
 async function setup() {
diff --git a/install.sh b/install.sh
index 0f67d0766..f81ac7f45 100755
--- a/install.sh
+++ b/install.sh
@@ -124,18 +124,38 @@ print_banner() {
 
 print_done() {
   local elapsed=$((SECONDS - _INSTALL_START))
-  local sandbox_name
-  sandbox_name="$(resolve_default_sandbox_name)"
   info "=== Installation complete ==="
   printf "\n"
   printf "  ${C_GREEN}${C_BOLD}NemoClaw${C_RESET}  ${C_DIM}(%ss)${C_RESET}\n" "$elapsed"
   printf "\n"
-  printf "  ${C_GREEN}Your OpenClaw Sandbox is live.${C_RESET}\n"
-  printf "  ${C_DIM}Sandbox in, break things, and tell us what you find.${C_RESET}\n"
-  printf "\n"
-  printf "  ${C_GREEN}Next:${C_RESET}\n"
-  printf "  %s$%s nemoclaw %s connect\n" "$C_GREEN" "$C_RESET" "$sandbox_name"
-  printf "  %ssandbox@%s$%s openclaw tui\n" "$C_GREEN" "$sandbox_name" "$C_RESET"
+  if [[ "$ONBOARD_RAN" == true ]]; then
+    local sandbox_name
+    sandbox_name="$(resolve_default_sandbox_name)"
+    printf "  ${C_GREEN}Your OpenClaw Sandbox is live.${C_RESET}\n"
+    printf "  ${C_DIM}Sandbox in, break things, and tell us what you find.${C_RESET}\n"
+    printf "\n"
+    printf "  ${C_GREEN}Next:${C_RESET}\n"
+    printf "  %s$%s nemoclaw %s connect\n" "$C_GREEN" "$C_RESET" "$sandbox_name"
+    printf "  %ssandbox@%s$%s openclaw tui\n" "$C_GREEN" "$sandbox_name" "$C_RESET"
+  elif [[ "$NEMOCLAW_READY_NOW" == true ]]; then
+    printf "  ${C_GREEN}NemoClaw CLI is ready in this shell.${C_RESET}\n"
+    printf "  ${C_DIM}Onboarding has not run yet.${C_RESET}\n"
+    printf "\n"
+    printf "  ${C_GREEN}Next:${C_RESET}\n"
+    printf "  %s$%s nemoclaw onboard\n" "$C_GREEN" "$C_RESET"
+  else
+    printf "  ${C_GREEN}NemoClaw CLI is installed.${C_RESET}\n"
+    printf "  ${C_DIM}Onboarding did not run because this shell cannot resolve 'nemoclaw' yet.${C_RESET}\n"
+    printf "\n"
+    printf "  ${C_GREEN}Next:${C_RESET}\n"
+    if [[ -n "$NEMOCLAW_RECOVERY_EXPORT_DIR" ]]; then
+      printf "  %s$%s export PATH=\"%s:\$PATH\"\n" "$C_GREEN" "$C_RESET" "$NEMOCLAW_RECOVERY_EXPORT_DIR"
+    fi
+    if [[ -n "$NEMOCLAW_RECOVERY_PROFILE" ]]; then
+      printf "  %s$%s source %s\n" "$C_GREEN" "$C_RESET" "$NEMOCLAW_RECOVERY_PROFILE"
+    fi
+    printf "  %s$%s nemoclaw onboard\n" "$C_GREEN" "$C_RESET"
+  fi
   printf "\n"
   printf "  ${C_BOLD}GitHub${C_RESET}  ${C_DIM}https://github.com/nvidia/nemoclaw${C_RESET}\n"
   printf "  ${C_BOLD}Docs${C_RESET}    ${C_DIM}https://docs.nvidia.com/nemoclaw/latest/${C_RESET}\n"
@@ -218,6 +238,10 @@ MIN_NPM_MAJOR=10
 RUNTIME_REQUIREMENT_MSG="NemoClaw requires Node.js >=${MIN_NODE_VERSION} and npm >=${MIN_NPM_MAJOR}."
 NEMOCLAW_SHIM_DIR="${HOME}/.local/bin"
 ORIGINAL_PATH="${PATH:-}"
+NEMOCLAW_READY_NOW=false
+NEMOCLAW_RECOVERY_PROFILE=""
+NEMOCLAW_RECOVERY_EXPORT_DIR=""
+ONBOARD_RAN=false
 
 # Compare two semver strings (major.minor.patch). Returns 0 if $1 >= $2.
 # Rejects prerelease suffixes (e.g. "22.16.0-rc.1") to avoid arithmetic errors.
@@ -248,6 +272,30 @@ ensure_nvm_loaded() {
   fi
 }
 
+detect_shell_profile() {
+  local profile="$HOME/.bashrc"
+  case "$(basename "${SHELL:-}")" in
+    zsh)
+      profile="$HOME/.zshrc"
+      ;;
+    fish)
+      profile="$HOME/.config/fish/config.fish"
+      ;;
+    tcsh)
+      profile="$HOME/.tcshrc"
+      ;;
+    csh)
+      profile="$HOME/.cshrc"
+      ;;
+    *)
+      if [[ ! -f "$HOME/.bashrc" && -f "$HOME/.profile" ]]; then
+        profile="$HOME/.profile"
+      fi
+      ;;
+  esac
+  printf "%s" "$profile"
+}
+
 # Refresh PATH so that npm global bin is discoverable.
 # After nvm installs Node.js the global bin lives under the nvm prefix,
 # which may not yet be on PATH in the current session.
@@ -509,30 +557,30 @@ install_nemoclaw() {
 # ---------------------------------------------------------------------------
 verify_nemoclaw() {
   if command_exists nemoclaw; then
+    NEMOCLAW_READY_NOW=true
     info "Verified: nemoclaw is available at $(command -v nemoclaw)"
     return 0
   fi
 
-  # nemoclaw not on PATH — try to diagnose and suggest a fix
-  warn "nemoclaw is not on PATH after installation."
-
   local npm_bin
   npm_bin="$(npm config get prefix 2>/dev/null)/bin" || true
 
   if [[ -n "$npm_bin" && -x "$npm_bin/nemoclaw" ]]; then
     ensure_nemoclaw_shim || true
     if command_exists nemoclaw; then
+      NEMOCLAW_READY_NOW=true
       info "Verified: nemoclaw is available at $(command -v nemoclaw)"
       return 0
     fi
 
-    warn "Found nemoclaw at $npm_bin/nemoclaw but could not expose it on PATH."
-    warn ""
-    warn "Add one of these directories to your shell profile:"
-    warn "  $NEMOCLAW_SHIM_DIR"
-    warn "  $npm_bin"
-    warn ""
-    warn "Continuing — nemoclaw is installed but requires a PATH update."
+    NEMOCLAW_RECOVERY_PROFILE="$(detect_shell_profile)"
+    if [[ -x "$NEMOCLAW_SHIM_DIR/nemoclaw" ]]; then
+      NEMOCLAW_RECOVERY_EXPORT_DIR="$NEMOCLAW_SHIM_DIR"
+    else
+      NEMOCLAW_RECOVERY_EXPORT_DIR="$npm_bin"
+    fi
+    warn "Found nemoclaw at $npm_bin/nemoclaw but this shell still cannot resolve it."
+    warn "Onboarding will be skipped until PATH is updated."
     return 0
   else
     warn "Could not locate the nemoclaw executable."
@@ -547,14 +595,33 @@ verify_nemoclaw() {
 # ---------------------------------------------------------------------------
 run_onboard() {
   info "Running nemoclaw onboard…"
+  local -a onboard_cmd=(onboard)
+  if command_exists node && [[ -f "${HOME}/.nemoclaw/onboard-session.json" ]]; then
+    if node -e '
+      const fs = require("fs");
+      const file = process.argv[1];
+      try {
+        const data = JSON.parse(fs.readFileSync(file, "utf8"));
+        const resumable = data && data.resumable !== false;
+        const status = data && data.status;
+        process.exit(resumable && status && status !== "complete" ? 0 : 1);
+      } catch {
+        process.exit(1);
+      }
+    ' "${HOME}/.nemoclaw/onboard-session.json"; then
+      info "Found an interrupted onboarding session — resuming it."
+      onboard_cmd+=(--resume)
+    fi
+  fi
   if [ "${NON_INTERACTIVE:-}" = "1" ]; then
-    nemoclaw onboard --non-interactive
+    onboard_cmd+=(--non-interactive)
+    nemoclaw "${onboard_cmd[@]}"
   elif [ -t 0 ]; then
-    nemoclaw onboard
+    nemoclaw "${onboard_cmd[@]}"
   elif exec 3</dev/tty; then
     info "Installer stdin is piped; attaching onboarding to /dev/tty…"
     local status=0
-    nemoclaw onboard <&3 || status=$?
+    nemoclaw "${onboard_cmd[@]}" <&3 || status=$?
     exec 3<&-
     return "$status"
   else
@@ -565,30 +632,32 @@ run_onboard() {
 # 6. Post-install message (printed last — after onboarding — so PATH hints stay visible)
 # ---------------------------------------------------------------------------
 post_install_message() {
-  # Only show shell reload instructions when Node was installed via a
-  # version manager that modifies PATH in shell profile files.
-  # nvm and fnm require sourcing the profile; nodesource/brew install to
-  # system paths already on PATH.
-  if [[ ! -s "${NVM_DIR:-$HOME/.nvm}/nvm.sh" ]]; then
+  if [[ "$NEMOCLAW_READY_NOW" == true ]]; then
     return 0
   fi
 
-  local profile="$HOME/.bashrc"
-  if [[ -n "${ZSH_VERSION:-}" ]] || [[ "$(basename "${SHELL:-}")" == "zsh" ]]; then
-    profile="$HOME/.zshrc"
-  elif [[ ! -f "$HOME/.bashrc" && -f "$HOME/.profile" ]]; then
-    profile="$HOME/.profile"
+  if [[ -z "$NEMOCLAW_RECOVERY_EXPORT_DIR" ]]; then
+    return 0
+  fi
+
+  if [[ -z "$NEMOCLAW_RECOVERY_PROFILE" ]]; then
+    NEMOCLAW_RECOVERY_PROFILE="$(detect_shell_profile)"
   fi
 
   echo ""
   echo "  ──────────────────────────────────────────────────"
-  warn "Your current shell may not have the updated PATH."
+  warn "Your current shell cannot resolve 'nemoclaw' yet."
   echo ""
   echo "  To use nemoclaw now, run:"
   echo ""
-  echo "    source $profile"
+  echo "    export PATH=\"${NEMOCLAW_RECOVERY_EXPORT_DIR}:\$PATH\""
+  echo "    source ${NEMOCLAW_RECOVERY_PROFILE}"
+  echo ""
+  echo "  Then run:"
+  echo ""
+  echo "    nemoclaw onboard"
   echo ""
-  echo "  Or open a new terminal window."
+  echo "  Or open a new terminal window after updating your shell profile."
   echo "  ──────────────────────────────────────────────────"
   echo ""
 }
@@ -635,8 +704,9 @@ main() {
   step 3 "Onboarding"
   if command_exists nemoclaw; then
     run_onboard
+    ONBOARD_RAN=true
   else
-    warn "Skipping onboarding — nemoclaw is not on PATH. Run 'nemoclaw onboard' after updating your PATH."
+    warn "Skipping onboarding — this shell still cannot resolve 'nemoclaw'."
   fi
 
   print_done
diff --git a/scripts/debug.sh b/scripts/debug.sh
index 045f38fc9..2426d4287 100755
--- a/scripts/debug.sh
+++ b/scripts/debug.sh
@@ -107,6 +107,16 @@ elif command -v gtimeout >/dev/null 2>&1; then
   TIMEOUT_BIN="gtimeout"
 fi
 
+SCRIPT_DIR=""
+REPO_ROOT=""
+ONBOARD_SESSION_HELPER=""
+SCRIPT_PATH="${BASH_SOURCE[0]:-}"
+if [ -n "$SCRIPT_PATH" ] && [ -f "$SCRIPT_PATH" ]; then
+  SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_PATH")" && pwd)"
+  REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+  ONBOARD_SESSION_HELPER="${REPO_ROOT}/bin/lib/onboard-session.js"
+fi
+
 # Redact known sensitive patterns (API keys, tokens, passwords in env/args).
 redact() {
   sed -E \
@@ -243,6 +253,24 @@ if [ "$QUICK" = false ]; then
   collect "openshell-gateway-info" openshell gateway info
 fi
 
+# -- Onboard session state --
+
+section "Onboard Session"
+if [ -n "$ONBOARD_SESSION_HELPER" ] && [ -f "$ONBOARD_SESSION_HELPER" ] && command -v node >/dev/null 2>&1; then
+  # shellcheck disable=SC2016
+  collect "onboard-session-summary" node -e '
+    const helper = require(process.argv[1]);
+    const summary = helper.summarizeForDebug();
+    if (!summary) {
+      process.stdout.write("No onboard session state found.\n");
+      process.exit(0);
+    }
+    process.stdout.write(`${JSON.stringify(summary, null, 2)}\n`);
+  ' "$ONBOARD_SESSION_HELPER"
+else
+  echo "  (onboard session helper not available, skipping)"
+fi
+
 # -- Sandbox internals (via SSH using openshell ssh-config) --
 
 if command -v openshell &>/dev/null \
diff --git a/scripts/install.sh b/scripts/install.sh
index e94becca3..d67afea45 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -2,527 +2,31 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
-# NemoClaw curl-pipe-bash installer.
-#
-# Usage:
-#   curl -fsSL https://raw.githubusercontent.com/NVIDIA/NemoClaw/main/scripts/install.sh | bash
+# Legacy installer compatibility wrapper.
+# The supported installer entrypoint is the repository-root install.sh:
+#   curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash
 
 set -euo pipefail
 
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-NC='\033[0m'
+ROOT_INSTALLER_URL="https://www.nvidia.com/nemoclaw.sh"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
+ROOT_INSTALLER="${SCRIPT_DIR%/scripts}/install.sh"
 
-info() { echo -e "${GREEN}[install]${NC} $1"; }
-warn() { echo -e "${YELLOW}[install]${NC} $1"; }
-fail() {
-  echo -e "${RED}[install]${NC} $1"
-  exit 1
+warn_legacy_path() {
+  cat >&2 <<EOF
+[install] deprecated compatibility wrapper: scripts/install.sh
+[install] supported installer: ${ROOT_INSTALLER_URL}
+EOF
 }
 
-define_runtime_helpers() {
-  socket_exists() {
-    local socket_path="$1"
-
-    if [ -n "${NEMOCLAW_TEST_SOCKET_PATHS:-}" ]; then
-      case ":$NEMOCLAW_TEST_SOCKET_PATHS:" in
-        *":$socket_path:"*) return 0 ;;
-      esac
-    fi
-
-    [ -S "$socket_path" ]
-  }
-
-  find_colima_docker_socket() {
-    local home_dir="${1:-${HOME:-/tmp}}"
-    local socket_path
-
-    for socket_path in \
-      "$home_dir/.colima/default/docker.sock" \
-      "$home_dir/.config/colima/default/docker.sock"; do
-      if socket_exists "$socket_path"; then
-        printf '%s\n' "$socket_path"
-        return 0
-      fi
-    done
-
-    return 1
-  }
-
-  find_docker_desktop_socket() {
-    local home_dir="${1:-${HOME:-/tmp}}"
-    local socket_path="$home_dir/.docker/run/docker.sock"
-
-    if socket_exists "$socket_path"; then
-      printf '%s\n' "$socket_path"
-      return 0
-    fi
-
-    return 1
-  }
-
-  detect_docker_host() {
-    if [ -n "${DOCKER_HOST:-}" ]; then
-      printf '%s\n' "$DOCKER_HOST"
-      return 0
-    fi
-
-    local home_dir="${1:-${HOME:-/tmp}}"
-    local socket_path
-
-    if socket_path="$(find_colima_docker_socket "$home_dir")"; then
-      printf 'unix://%s\n' "$socket_path"
-      return 0
-    fi
-
-    if socket_path="$(find_docker_desktop_socket "$home_dir")"; then
-      printf 'unix://%s\n' "$socket_path"
-      return 0
-    fi
-
-    return 1
-  }
-}
-
-SCRIPT_PATH="${BASH_SOURCE[0]-}"
-SCRIPT_DIR=""
-if [ -n "$SCRIPT_PATH" ]; then
-  SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_PATH")" && pwd)"
-fi
-
-if [ -n "$SCRIPT_DIR" ] && [ -f "$SCRIPT_DIR/lib/runtime.sh" ]; then
-  # shellcheck source=/dev/null
-  . "$SCRIPT_DIR/lib/runtime.sh"
-else
-  define_runtime_helpers
-fi
-
-# Ensure nvm environment is loaded in the current shell.
-# Skip if node is already on PATH — sourcing nvm.sh can reset PATH and
-# override the caller's node/npm (e.g. in test environments with stubs).
-ensure_nvm_loaded() {
-  command -v node &>/dev/null && return 0
-  if [ -z "${NVM_DIR:-}" ]; then
-    export NVM_DIR="$HOME/.nvm"
-  fi
-  if [ -s "$NVM_DIR/nvm.sh" ]; then
-    # shellcheck source=/dev/null
-    . "$NVM_DIR/nvm.sh"
-  fi
-}
-
-# Refresh PATH so that npm global bin is discoverable.
-refresh_path() {
-  ensure_nvm_loaded
-
-  local npm_bin
-  npm_bin="$(npm config get prefix 2>/dev/null)/bin" || true
-  if [ -n "$npm_bin" ] && [ -d "$npm_bin" ]; then
-    case ":$PATH:" in
-      *":$npm_bin:"*) ;; # already on PATH
-      *) export PATH="$npm_bin:$PATH" ;;
-    esac
-  fi
-}
-
-MIN_NODE_VERSION="22.16.0"
-MIN_NPM_MAJOR=10
-RUNTIME_REQUIREMENT_MSG="NemoClaw requires Node.js >=${MIN_NODE_VERSION} and npm >=${MIN_NPM_MAJOR}."
-
-OS="$(uname -s)"
-ARCH="$(uname -m)"
-
-case "$OS" in
-  Darwin) OS_LABEL="macOS" ;;
-  Linux) OS_LABEL="Linux" ;;
-  *) fail "Unsupported OS: $OS" ;;
-esac
-
-case "$ARCH" in
-  x86_64 | amd64) ARCH_LABEL="x86_64" ;;
-  aarch64 | arm64) ARCH_LABEL="aarch64" ;;
-  *) fail "Unsupported architecture: $ARCH" ;;
-esac
-
-info "Detected $OS_LABEL ($ARCH_LABEL)"
-
-# ── Detect Node.js version manager ──────────────────────────────
-
-NODE_MGR="none"
-NEED_RESHIM=false
-
-if command -v asdf >/dev/null 2>&1 && asdf plugin list 2>/dev/null | grep -q nodejs; then
-  NODE_MGR="asdf"
-elif [ -n "${NVM_DIR:-}" ] && [ -s "${NVM_DIR}/nvm.sh" ]; then
-  NODE_MGR="nvm"
-elif [ -s "$HOME/.nvm/nvm.sh" ]; then
-  export NVM_DIR="$HOME/.nvm"
-  NODE_MGR="nvm"
-elif command -v fnm >/dev/null 2>&1; then
-  NODE_MGR="fnm"
-elif command -v brew >/dev/null 2>&1 && [ "$OS" = "Darwin" ]; then
-  NODE_MGR="brew"
-elif [ "$OS" = "Linux" ]; then
-  NODE_MGR="nodesource"
-fi
-
-info "Node.js manager: $NODE_MGR"
-
-# Compare two semver strings (major.minor.patch). Returns 0 if $1 >= $2.
-# Rejects prerelease suffixes (e.g. "22.16.0-rc.1") to avoid arithmetic errors.
-version_gte() {
-  [[ "$1" =~ ^[0-9]+(\.[0-9]+){0,2}$ ]] || return 1
-  [[ "$2" =~ ^[0-9]+(\.[0-9]+){0,2}$ ]] || return 1
-  local -a a b
-  IFS=. read -ra a <<<"$1"
-  IFS=. read -ra b <<<"$2"
-  for i in 0 1 2; do
-    local ai=${a[$i]:-0} bi=${b[$i]:-0}
-    if ((ai > bi)); then return 0; fi
-    if ((ai < bi)); then return 1; fi
-  done
-  return 0
-}
-
-version_major() {
-  printf '%s\n' "${1#v}" | cut -d. -f1
-}
-
-ensure_supported_runtime() {
-  command -v node >/dev/null 2>&1 || fail "${RUNTIME_REQUIREMENT_MSG} Node.js was not found on PATH."
-  command -v npm >/dev/null 2>&1 || fail "${RUNTIME_REQUIREMENT_MSG} npm was not found on PATH."
-
-  local node_version npm_version node_major npm_major
-  node_version="$(node -v 2>/dev/null || true)"
-  npm_version="$(npm --version 2>/dev/null || true)"
-  node_major="$(version_major "$node_version")"
-  npm_major="$(version_major "$npm_version")"
-
-  [[ "$node_major" =~ ^[0-9]+$ ]] || fail "Could not determine Node.js version from '${node_version}'. ${RUNTIME_REQUIREMENT_MSG}"
-  [[ "$npm_major" =~ ^[0-9]+$ ]] || fail "Could not determine npm version from '${npm_version}'. ${RUNTIME_REQUIREMENT_MSG}"
-
-  if ! version_gte "${node_version#v}" "$MIN_NODE_VERSION" || ((npm_major < MIN_NPM_MAJOR)); then
-    fail "Unsupported runtime detected: Node.js ${node_version:-unknown}, npm ${npm_version:-unknown}. ${RUNTIME_REQUIREMENT_MSG} Upgrade Node.js and rerun the installer."
-  fi
-
-  info "Runtime OK: Node.js ${node_version}, npm ${npm_version}"
-}
-
-# ── Install Node.js 22 if needed ────────────────────────────────
-
-install_node() {
-  local current_version=""
-  if command -v node >/dev/null 2>&1; then
-    current_version="$(node -v 2>/dev/null | sed 's/^v//')"
-  fi
-
-  if [ -n "$current_version" ] && version_gte "$current_version" "$MIN_NODE_VERSION"; then
-    info "Node.js v${current_version} meets minimum requirement (>= v${MIN_NODE_VERSION})"
-    return 0
-  fi
-
-  info "Installing Node.js 22..."
+warn_legacy_path
 
-  case "$NODE_MGR" in
-    asdf)
-      local latest_22
-      latest_22="$(asdf list all nodejs 2>/dev/null | grep '^22\.' | tail -1)"
-      [ -n "$latest_22" ] || fail "Could not find Node.js 22 in asdf"
-      asdf install nodejs "$latest_22"
-      asdf global nodejs "$latest_22"
-      NEED_RESHIM=true
-      ;;
-    nvm)
-      # shellcheck source=/dev/null
-      . "${NVM_DIR}/nvm.sh"
-      nvm install 22
-      nvm use 22
-      nvm alias default 22
-      ;;
-    fnm)
-      fnm install 22
-      fnm use 22
-      fnm default 22
-      ;;
-    brew)
-      brew install node@22
-      brew link --overwrite node@22 2>/dev/null || true
-      ;;
-    nodesource)
-      curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash - >/dev/null 2>&1
-      sudo apt-get install -y -qq nodejs >/dev/null 2>&1
-      ;;
-    none)
-      fail "No Node.js version manager found. Install Node.js >=${MIN_NODE_VERSION} manually, then re-run."
-      ;;
-  esac
-
-  info "Node.js $(node -v) installed"
-}
-
-install_node
-ensure_supported_runtime
-
-# ── Install Docker ───────────────────────────────────────────────
-
-install_docker() {
-  if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
-    info "Docker already running"
-    return 0
-  fi
-
-  if command -v docker >/dev/null 2>&1; then
-    # Docker installed but not running
-    if [ "$OS" = "Darwin" ]; then
-      local colima_socket=""
-      local docker_desktop_socket=""
-      colima_socket="$(find_colima_docker_socket || true)"
-      docker_desktop_socket="$(find_docker_desktop_socket || true)"
-
-      if [ -n "${DOCKER_HOST:-}" ]; then
-        fail "Docker is installed but the selected runtime is not running. Start the runtime behind DOCKER_HOST (${DOCKER_HOST}) and re-run."
-      fi
-
-      if [ -n "$colima_socket" ] && [ -n "$docker_desktop_socket" ]; then
-        fail "Both Colima and Docker Desktop are available on this Mac. Start the runtime you want explicitly and re-run, or set DOCKER_HOST to select one."
-      fi
-
-      if [ -n "$docker_desktop_socket" ]; then
-        fail "Docker Desktop appears to be installed but is not running. Start Docker Desktop and re-run."
-      fi
-
-      if command -v colima >/dev/null 2>&1; then
-        info "Starting Colima..."
-        colima start
-        return 0
-      fi
-    fi
-    fail "Docker is installed but not running. Please start Docker and re-run."
-  fi
-
-  info "Installing Docker..."
-
-  case "$OS" in
-    Darwin)
-      if ! command -v brew >/dev/null 2>&1; then
-        fail "Homebrew required to install Docker on macOS. Install from https://brew.sh"
-      fi
-      info "Installing Colima + Docker CLI via Homebrew..."
-      brew install colima docker
-      info "Starting Colima..."
-      colima start
-      ;;
-    Linux)
-      sudo apt-get update -qq >/dev/null 2>&1
-      sudo apt-get install -y -qq docker.io >/dev/null 2>&1
-      sudo usermod -aG docker "$(whoami)"
-      info "Docker installed. You may need to log out and back in for group changes."
-      ;;
-  esac
-
-  if ! docker info >/dev/null 2>&1; then
-    fail "Docker installed but not running. Start Docker and re-run."
-  fi
-
-  info "Docker is running"
-}
-
-install_docker
-
-# ── Install OpenShell CLI binary ─────────────────────────────────
-
-install_openshell() {
-  if command -v openshell >/dev/null 2>&1; then
-    info "openshell already installed: $(openshell --version 2>&1 || echo 'unknown')"
-    return 0
-  fi
-
-  info "Installing openshell CLI..."
-
-  case "$OS" in
-    Darwin)
-      case "$ARCH_LABEL" in
-        x86_64) ASSET="openshell-x86_64-apple-darwin.tar.gz" ;;
-        aarch64) ASSET="openshell-aarch64-apple-darwin.tar.gz" ;;
-      esac
-      ;;
-    Linux)
-      case "$ARCH_LABEL" in
-        x86_64) ASSET="openshell-x86_64-unknown-linux-musl.tar.gz" ;;
-        aarch64) ASSET="openshell-aarch64-unknown-linux-musl.tar.gz" ;;
-      esac
-      ;;
-  esac
-
-  tmpdir="$(mktemp -d)"
-  if command -v gh >/dev/null 2>&1; then
-    GH_TOKEN="${GITHUB_TOKEN:-}" gh release download --repo NVIDIA/OpenShell \
-      --pattern "$ASSET" --dir "$tmpdir"
-  else
-    # Fallback: curl latest release
-    curl -fsSL "https://github.com/NVIDIA/OpenShell/releases/latest/download/$ASSET" \
-      -o "$tmpdir/$ASSET"
-  fi
-
-  tar xzf "$tmpdir/$ASSET" -C "$tmpdir"
-
-  if [ -w /usr/local/bin ]; then
-    install -m 755 "$tmpdir/openshell" /usr/local/bin/openshell
-  else
-    sudo install -m 755 "$tmpdir/openshell" /usr/local/bin/openshell
-  fi
-
-  rm -rf "$tmpdir"
-  info "openshell $(openshell --version 2>&1 || echo '') installed"
-}
-
-install_openshell
-
-# ── Pre-extract openclaw workaround (GH-503) ────────────────────
-# The openclaw npm tarball is missing directory entries for extensions/,
-# skills/, and dist/plugin-sdk/config/. npm's tar extractor hard-fails on
-# these but system tar handles them fine. We pre-extract openclaw into
-# node_modules BEFORE npm install so npm sees the dep is already satisfied.
-pre_extract_openclaw() {
-  local install_dir="$1"
-  local openclaw_version
-  openclaw_version=$(node -e "console.log(require('${install_dir}/package.json').dependencies.openclaw)" 2>/dev/null) || openclaw_version=""
-
-  if [ -z "$openclaw_version" ]; then
-    warn "Could not determine openclaw version — skipping pre-extraction"
-    return 1
-  fi
-
-  info "Pre-extracting openclaw@${openclaw_version} with system tar (GH-503 workaround)…"
-  local tmpdir
-  tmpdir="$(mktemp -d)"
-  if npm pack "openclaw@${openclaw_version}" --pack-destination "$tmpdir" >/dev/null 2>&1; then
-    local tgz
-    tgz="$(find "$tmpdir" -maxdepth 1 -name 'openclaw-*.tgz' -print -quit)"
-    if [ -n "$tgz" ] && [ -f "$tgz" ]; then
-      if mkdir -p "${install_dir}/node_modules/openclaw" \
-        && tar xzf "$tgz" -C "${install_dir}/node_modules/openclaw" --strip-components=1; then
-        info "openclaw pre-extracted successfully"
-      else
-        warn "Failed to extract openclaw tarball"
-        rm -rf "$tmpdir"
-        return 1
-      fi
-    else
-      warn "npm pack succeeded but tarball not found"
-      rm -rf "$tmpdir"
-      return 1
-    fi
-  else
-    warn "Failed to download openclaw tarball"
-    rm -rf "$tmpdir"
-    return 1
-  fi
-  rm -rf "$tmpdir"
-}
-
-# ── Resolve release tag ──────────────────────────────────────────
-# Priority: NEMOCLAW_INSTALL_TAG env var > GitHub releases API > "main" fallback.
-resolve_release_tag() {
-  if [ -n "${NEMOCLAW_INSTALL_TAG:-}" ]; then
-    printf "%s" "$NEMOCLAW_INSTALL_TAG"
-    return 0
-  fi
-
-  local response tag
-  response="$(curl -fsSL --max-time 10 \
-    https://api.github.com/repos/NVIDIA/NemoClaw/releases/latest 2>/dev/null)" || true
-  tag="$(printf '%s' "$response" \
-    | grep '"tag_name"' \
-    | sed -E 's/.*"tag_name":[[:space:]]*"([^"]+)".*/\1/' \
-    | head -1 || true)"
-
-  if [ -n "$tag" ] && printf '%s' "$tag" | grep -qE '^v[0-9]'; then
-    printf "%s" "$tag"
-  else
-    printf "main"
-  fi
-}
-
-# ── Install NemoClaw CLI ─────────────────────────────────────────
-
-info "Installing nemoclaw CLI..."
-# Resolve the latest release tag so we never install raw main.
-NEMOCLAW_RELEASE_REF="$(resolve_release_tag)"
-info "Resolved install ref: ${NEMOCLAW_RELEASE_REF}"
-# Clone first so we can pre-extract openclaw before npm install (GH-503).
-# npm install -g git+https://... does this internally but we can't hook
-# into its extraction pipeline, so we do it ourselves.
-NEMOCLAW_SRC="${HOME}/.nemoclaw/source"
-rm -rf "$NEMOCLAW_SRC"
-mkdir -p "$(dirname "$NEMOCLAW_SRC")"
-git clone --depth 1 --branch "$NEMOCLAW_RELEASE_REF" https://github.com/NVIDIA/NemoClaw.git "$NEMOCLAW_SRC"
-pre_extract_openclaw "$NEMOCLAW_SRC" || warn "Pre-extraction failed — npm install may fail if openclaw tarball is broken"
-# Use sudo for npm link only when the global prefix directory is not writable
-# by the current user (e.g., system-managed nodesource installs to /usr).
-SUDO=""
-NPM_GLOBAL_PREFIX="$(npm config get prefix 2>/dev/null)" || true
-if [ -n "$NPM_GLOBAL_PREFIX" ] && [ ! -w "$NPM_GLOBAL_PREFIX" ] && [ "$(id -u)" -ne 0 ]; then
-  SUDO="sudo"
-fi
-(cd "$NEMOCLAW_SRC" && npm install --ignore-scripts && cd nemoclaw && npm install --ignore-scripts && npm run build && cd .. && $SUDO npm link)
-
-if [ "$NEED_RESHIM" = true ]; then
-  info "Reshimming asdf..."
-  asdf reshim nodejs
-fi
-
-refresh_path
-
-# ── Verify ───────────────────────────────────────────────────────
-
-if ! command -v nemoclaw >/dev/null 2>&1; then
-  # Try refreshing PATH one more time
-  refresh_path
-fi
-
-if ! command -v nemoclaw >/dev/null 2>&1; then
-  npm_bin="$(npm config get prefix 2>/dev/null)/bin" || true
-  if [ -n "$npm_bin" ] && [ -x "$npm_bin/nemoclaw" ]; then
-    warn "nemoclaw installed at $npm_bin/nemoclaw but not on current PATH."
-    warn ""
-    warn "Add it to your shell profile:"
-    warn "  echo 'export PATH=\"$npm_bin:\$PATH\"' >> ~/.bashrc"
-    warn "  source ~/.bashrc"
-    warn ""
-    warn "Or for zsh:"
-    warn "  echo 'export PATH=\"$npm_bin:\$PATH\"' >> ~/.zshrc"
-    warn "  source ~/.zshrc"
-  else
-    fail "nemoclaw installation failed. Binary not found."
-  fi
+if [[ ! -f "$ROOT_INSTALLER" ]]; then
+  cat <<EOF >&2
+[install] scripts/install.sh only works from a NemoClaw repository checkout.
+[install] supported installer: ${ROOT_INSTALLER_URL}
+EOF
+  exit 1
 fi
 
-echo ""
-info "Installation complete!"
-info "nemoclaw $(nemoclaw --version 2>/dev/null || echo 'v0.1.0') is ready."
-echo ""
-echo "  Run \`nemoclaw onboard\` to get started"
-echo ""
-
-# ── Post-install: shell reload instructions ──────────────────
-
-if [ "$NODE_MGR" = "nvm" ] || [ "$NODE_MGR" = "fnm" ]; then
-  profile="$HOME/.bashrc"
-  if [ -n "${ZSH_VERSION:-}" ] || [ "$(basename "${SHELL:-}")" = "zsh" ]; then
-    profile="$HOME/.zshrc"
-  elif [ ! -f "$HOME/.bashrc" ] && [ -f "$HOME/.profile" ]; then
-    profile="$HOME/.profile"
-  fi
-  echo "  ──────────────────────────────────────────────────"
-  warn "Your current shell may not have the updated PATH."
-  echo ""
-  echo "  To use nemoclaw now, run:"
-  echo ""
-  echo "    source $profile"
-  echo ""
-  echo "  Or open a new terminal window."
-  echo "  ──────────────────────────────────────────────────"
-  echo ""
-fi
+exec bash "$ROOT_INSTALLER" "$@"
diff --git a/test/cli.test.js b/test/cli.test.js
index 7cfb06e0d..aba5f865c 100644
--- a/test/cli.test.js
+++ b/test/cli.test.js
@@ -68,6 +68,12 @@ describe("CLI dispatch", () => {
     expect(r.out.includes("Unknown onboard option")).toBeTruthy();
   });
 
+  it("accepts onboard --resume in CLI parsing", () => {
+    const r = run("onboard --resume --non-interactiv");
+    expect(r.code).toBe(1);
+    expect(r.out.includes("Unknown onboard option(s): --non-interactiv")).toBeTruthy();
+  });
+
   it("debug --help exits 0 and shows usage", () => {
     const r = run("debug --help");
     expect(r.code).toBe(0);
@@ -81,6 +87,7 @@ describe("CLI dispatch", () => {
     expect(r.code).toBe(0);
     expect(r.out.includes("Collecting diagnostics")).toBeTruthy();
     expect(r.out.includes("System")).toBeTruthy();
+    expect(r.out.includes("Onboard Session")).toBeTruthy();
     expect(r.out.includes("Done")).toBeTruthy();
   });
 
diff --git a/test/e2e/test-onboard-repair.sh b/test/e2e/test-onboard-repair.sh
new file mode 100755
index 000000000..5e14763e1
--- /dev/null
+++ b/test/e2e/test-onboard-repair.sh
@@ -0,0 +1,331 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# E2E: resume repair and invalidation behavior.
+#
+# Regression coverage for issue #446.
+# Validates that:
+#   1. Resume recreates a missing recorded sandbox instead of assuming it still exists.
+#   2. Resume rejects a different requested sandbox name on the same host.
+#   3. Resume rejects explicit provider/model changes that conflict with recorded state.
+#
+# Prerequisites:
+#   - Docker running
+#   - openshell CLI installed
+#   - Node.js available
+#   - NVIDIA_API_KEY set to a valid nvapi-* key before starting the test
+#
+# Usage:
+#   NVIDIA_API_KEY=nvapi-... bash test/e2e/test-onboard-repair.sh
+
+set -uo pipefail
+
+PASS=0
+FAIL=0
+SKIP=0
+TOTAL=0
+
+pass() {
+  ((PASS++))
+  ((TOTAL++))
+  printf '\033[32m  PASS: %s\033[0m\n' "$1"
+}
+fail() {
+  ((FAIL++))
+  ((TOTAL++))
+  printf '\033[31m  FAIL: %s\033[0m\n' "$1"
+}
+skip() {
+  ((SKIP++))
+  ((TOTAL++))
+  printf '\033[33m  SKIP: %s\033[0m\n' "$1"
+}
+section() {
+  echo ""
+  printf '\033[1;36m=== %s ===\033[0m\n' "$1"
+}
+info() { printf '\033[1;34m  [info]\033[0m %s\n' "$1"; }
+
+if [ -d /workspace ] && [ -f /workspace/install.sh ]; then
+  REPO="/workspace"
+elif [ -f "$(cd "$(dirname "$0")/../.." && pwd)/install.sh" ]; then
+  REPO="$(cd "$(dirname "$0")/../.." && pwd)"
+else
+  echo "ERROR: Cannot find repo root."
+  exit 1
+fi
+
+run_nemoclaw() {
+  node "$REPO/bin/nemoclaw.js" "$@"
+}
+
+SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-repair}"
+OTHER_SANDBOX_NAME="${NEMOCLAW_OTHER_SANDBOX_NAME:-e2e-other}"
+SESSION_FILE="$HOME/.nemoclaw/onboard-session.json"
+RESTORE_API_KEY="${NVIDIA_API_KEY:-}"
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 0: Pre-cleanup
+# ══════════════════════════════════════════════════════════════════
+section "Phase 0: Pre-cleanup"
+info "Destroying any leftover sandbox/gateway from previous runs..."
+run_nemoclaw "$SANDBOX_NAME" destroy 2>/dev/null || true
+run_nemoclaw "$OTHER_SANDBOX_NAME" destroy 2>/dev/null || true
+openshell sandbox delete "$SANDBOX_NAME" 2>/dev/null || true
+openshell sandbox delete "$OTHER_SANDBOX_NAME" 2>/dev/null || true
+openshell forward stop 18789 2>/dev/null || true
+openshell gateway destroy -g nemoclaw 2>/dev/null || true
+rm -f "$SESSION_FILE"
+pass "Pre-cleanup complete"
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 1: Prerequisites
+# ══════════════════════════════════════════════════════════════════
+section "Phase 1: Prerequisites"
+
+if docker info >/dev/null 2>&1; then
+  pass "Docker is running"
+else
+  fail "Docker is not running — cannot continue"
+  exit 1
+fi
+
+if command -v openshell >/dev/null 2>&1; then
+  pass "openshell CLI installed"
+else
+  fail "openshell CLI not found — cannot continue"
+  exit 1
+fi
+
+if command -v node >/dev/null 2>&1; then
+  pass "Node.js available"
+else
+  fail "Node.js not found — cannot continue"
+  exit 1
+fi
+
+if [[ -n "$RESTORE_API_KEY" && "$RESTORE_API_KEY" == nvapi-* ]]; then
+  pass "NVIDIA_API_KEY is set (starts with nvapi-)"
+else
+  fail "NVIDIA_API_KEY not set or invalid — required for resume completion"
+  exit 1
+fi
+
+node -e '
+const { saveCredential } = require(process.argv[1]);
+saveCredential("NVIDIA_API_KEY", process.argv[2]);
+' "$REPO/bin/lib/credentials.js" "$RESTORE_API_KEY"
+pass "Stored NVIDIA_API_KEY in ~/.nemoclaw/credentials.json for resume hydration"
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 2: Create interrupted resumable state
+# ══════════════════════════════════════════════════════════════════
+section "Phase 2: Create interrupted state"
+info "Running onboard with an invalid policy mode to create resumable state..."
+
+FIRST_LOG="$(mktemp)"
+NEMOCLAW_NON_INTERACTIVE=1 \
+  NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
+  NEMOCLAW_RECREATE_SANDBOX=1 \
+  NEMOCLAW_POLICY_MODE=invalid \
+  node "$REPO/bin/nemoclaw.js" onboard --non-interactive >"$FIRST_LOG" 2>&1
+first_exit=$?
+first_output="$(cat "$FIRST_LOG")"
+rm -f "$FIRST_LOG"
+
+if [ $first_exit -eq 1 ]; then
+  pass "First onboard exited 1 (expected interrupted run)"
+else
+  fail "First onboard exited $first_exit (expected 1)"
+  echo "$first_output"
+  exit 1
+fi
+
+if [ -f "$SESSION_FILE" ]; then
+  pass "Onboard session file created"
+else
+  fail "Onboard session file missing after interrupted run"
+fi
+
+if echo "$first_output" | grep -q "Unsupported NEMOCLAW_POLICY_MODE: invalid"; then
+  pass "First run failed at policy setup as intended"
+else
+  fail "First run did not fail at the expected policy step"
+fi
+
+if openshell sandbox get "$SANDBOX_NAME" >/dev/null 2>&1; then
+  pass "Sandbox '$SANDBOX_NAME' exists after interrupted run"
+else
+  fail "Sandbox '$SANDBOX_NAME' not found after interrupted run"
+fi
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 3: Repair missing sandbox on resume
+# ══════════════════════════════════════════════════════════════════
+section "Phase 3: Repair missing sandbox"
+info "Deleting the recorded sandbox under the session, then resuming..."
+
+openshell sandbox delete "$SANDBOX_NAME" >/dev/null 2>&1 || true
+openshell forward stop 18789 >/dev/null 2>&1 || true
+
+if openshell sandbox get "$SANDBOX_NAME" >/dev/null 2>&1; then
+  fail "Sandbox '$SANDBOX_NAME' still exists after forced deletion"
+else
+  pass "Sandbox '$SANDBOX_NAME' removed to simulate stale recorded state"
+fi
+
+REPAIR_LOG="$(mktemp)"
+env -u NVIDIA_API_KEY \
+  NEMOCLAW_NON_INTERACTIVE=1 \
+  NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
+  NEMOCLAW_POLICY_MODE=skip \
+  node "$REPO/bin/nemoclaw.js" onboard --resume --non-interactive >"$REPAIR_LOG" 2>&1
+repair_exit=$?
+repair_output="$(cat "$REPAIR_LOG")"
+rm -f "$REPAIR_LOG"
+
+if [ $repair_exit -eq 0 ]; then
+  pass "Resume completed after repairing missing sandbox"
+else
+  fail "Resume exited $repair_exit during missing-sandbox repair"
+  echo "$repair_output"
+  exit 1
+fi
+
+if echo "$repair_output" | grep -q "\[resume\] Skipping preflight (cached)"; then
+  pass "Repair resume skipped preflight"
+else
+  fail "Repair resume did not skip preflight"
+fi
+
+if echo "$repair_output" | grep -q "\[resume\] Skipping gateway (running)"; then
+  pass "Repair resume skipped gateway"
+else
+  fail "Repair resume did not skip gateway"
+fi
+
+if echo "$repair_output" | grep -q "\[resume\] Recorded sandbox state is unavailable; recreating it."; then
+  pass "Repair resume detected missing sandbox"
+else
+  fail "Repair resume did not report missing sandbox recreation"
+fi
+
+if echo "$repair_output" | grep -q "\[5/7\] Creating sandbox"; then
+  pass "Repair resume recreated sandbox"
+else
+  fail "Repair resume did not rerun sandbox creation"
+fi
+
+if run_nemoclaw "$SANDBOX_NAME" status >/dev/null 2>&1; then
+  pass "Repaired sandbox '$SANDBOX_NAME' is manageable"
+else
+  fail "Repaired sandbox '$SANDBOX_NAME' status failed"
+fi
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 4: Reject conflicting sandbox
+# ══════════════════════════════════════════════════════════════════
+section "Phase 4: Reject conflicting sandbox"
+info "Attempting resume with a different sandbox name..."
+
+SANDBOX_CONFLICT_LOG="$(mktemp)"
+env -u NVIDIA_API_KEY \
+  NEMOCLAW_NON_INTERACTIVE=1 \
+  NEMOCLAW_SANDBOX_NAME="$OTHER_SANDBOX_NAME" \
+  NEMOCLAW_POLICY_MODE=skip \
+  node "$REPO/bin/nemoclaw.js" onboard --resume --non-interactive >"$SANDBOX_CONFLICT_LOG" 2>&1
+sandbox_conflict_exit=$?
+sandbox_conflict_output="$(cat "$SANDBOX_CONFLICT_LOG")"
+rm -f "$SANDBOX_CONFLICT_LOG"
+
+if [ $sandbox_conflict_exit -eq 1 ]; then
+  pass "Resume rejected conflicting sandbox name"
+else
+  fail "Resume exited $sandbox_conflict_exit for conflicting sandbox (expected 1)"
+fi
+
+if echo "$sandbox_conflict_output" | grep -q "Resumable state belongs to sandbox '${SANDBOX_NAME}', not '${OTHER_SANDBOX_NAME}'."; then
+  pass "Conflicting sandbox message is explicit"
+else
+  fail "Conflicting sandbox message missing or incorrect"
+fi
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 5: Reject conflicting provider/model
+# ══════════════════════════════════════════════════════════════════
+section "Phase 5: Reject conflicting provider and model"
+info "Attempting resume with conflicting provider/model inputs..."
+
+PROVIDER_CONFLICT_LOG="$(mktemp)"
+env -u NVIDIA_API_KEY \
+  NEMOCLAW_NON_INTERACTIVE=1 \
+  NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
+  NEMOCLAW_PROVIDER=openai \
+  NEMOCLAW_MODEL=gpt-5.4 \
+  NEMOCLAW_POLICY_MODE=skip \
+  node "$REPO/bin/nemoclaw.js" onboard --resume --non-interactive >"$PROVIDER_CONFLICT_LOG" 2>&1
+provider_conflict_exit=$?
+provider_conflict_output="$(cat "$PROVIDER_CONFLICT_LOG")"
+rm -f "$PROVIDER_CONFLICT_LOG"
+
+if [ $provider_conflict_exit -eq 1 ]; then
+  pass "Resume rejected conflicting provider/model"
+else
+  fail "Resume exited $provider_conflict_exit for conflicting provider/model (expected 1)"
+fi
+
+if echo "$provider_conflict_output" | grep -Eq "Resumable state recorded provider '.*', not '.*'\."; then
+  pass "Conflicting provider message is explicit"
+else
+  fail "Conflicting provider message missing or incorrect"
+fi
+
+if echo "$provider_conflict_output" | grep -Eq "Resumable state recorded model '.*', not 'gpt-5.4'\."; then
+  pass "Conflicting model message is explicit"
+else
+  fail "Conflicting model message missing or incorrect"
+fi
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 6: Final cleanup
+# ══════════════════════════════════════════════════════════════════
+section "Phase 6: Final cleanup"
+
+run_nemoclaw "$SANDBOX_NAME" destroy 2>/dev/null || true
+run_nemoclaw "$OTHER_SANDBOX_NAME" destroy 2>/dev/null || true
+openshell sandbox delete "$SANDBOX_NAME" 2>/dev/null || true
+openshell sandbox delete "$OTHER_SANDBOX_NAME" 2>/dev/null || true
+openshell forward stop 18789 2>/dev/null || true
+openshell gateway destroy -g nemoclaw 2>/dev/null || true
+rm -f "$SESSION_FILE"
+
+if openshell sandbox get "$SANDBOX_NAME" >/dev/null 2>&1; then
+  fail "Sandbox '$SANDBOX_NAME' still exists after cleanup"
+else
+  pass "Sandbox '$SANDBOX_NAME' cleaned up"
+fi
+
+if [ -f "$SESSION_FILE" ]; then
+  fail "Onboard session file still exists after cleanup"
+else
+  pass "Onboard session file cleaned up"
+fi
+
+pass "Final cleanup complete"
+
+# ══════════════════════════════════════════════════════════════════
+# Summary
+# ══════════════════════════════════════════════════════════════════
+echo ""
+echo "========================================"
+echo "  PASS: $PASS"
+echo "  FAIL: $FAIL"
+echo "  SKIP: $SKIP"
+echo " TOTAL: $TOTAL"
+echo "========================================"
+echo ""
+
+if [ $FAIL -ne 0 ]; then
+  exit 1
+fi
diff --git a/test/e2e/test-onboard-resume.sh b/test/e2e/test-onboard-resume.sh
new file mode 100755
index 000000000..2ccef1fc3
--- /dev/null
+++ b/test/e2e/test-onboard-resume.sh
@@ -0,0 +1,339 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# E2E: interrupted onboard -> resume -> verify completion.
+#
+# Regression test for issue #446.
+# Validates that:
+#   1. A non-interactive onboard run can fail after sandbox creation while leaving resumable state.
+#   2. The onboard session file records the interrupted state safely.
+#   3. `nemoclaw onboard --resume --non-interactive` skips cached preflight,
+#      gateway, and sandbox work, then completes by hydrating the stored credential.
+#
+# Prerequisites:
+#   - Docker running
+#   - openshell CLI installed
+#   - Node.js available
+#   - NVIDIA_API_KEY set to a valid nvapi-* key before starting the test
+#
+# Usage:
+#   NVIDIA_API_KEY=nvapi-... bash test/e2e/test-onboard-resume.sh
+
+set -uo pipefail
+
+if [ "${NEMOCLAW_E2E_NO_TIMEOUT:-0}" != "1" ]; then
+  TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-600}"
+  TIMEOUT_BIN=""
+  if command -v timeout >/dev/null 2>&1; then
+    TIMEOUT_BIN="timeout"
+  elif command -v gtimeout >/dev/null 2>&1; then
+    TIMEOUT_BIN="gtimeout"
+  fi
+
+  if [ -n "$TIMEOUT_BIN" ]; then
+    export NEMOCLAW_E2E_NO_TIMEOUT=1
+    exec "$TIMEOUT_BIN" -s TERM "$TIMEOUT_SECONDS" "$0" "$@"
+  fi
+fi
+
+PASS=0
+FAIL=0
+SKIP=0
+TOTAL=0
+
+pass() {
+  ((PASS++))
+  ((TOTAL++))
+  printf '\033[32m  PASS: %s\033[0m\n' "$1"
+}
+fail() {
+  ((FAIL++))
+  ((TOTAL++))
+  printf '\033[31m  FAIL: %s\033[0m\n' "$1"
+}
+skip() {
+  ((SKIP++))
+  ((TOTAL++))
+  printf '\033[33m  SKIP: %s\033[0m\n' "$1"
+}
+section() {
+  echo ""
+  printf '\033[1;36m=== %s ===\033[0m\n' "$1"
+}
+info() { printf '\033[1;34m  [info]\033[0m %s\n' "$1"; }
+
+if [ -d /workspace ] && [ -f /workspace/install.sh ]; then
+  REPO="/workspace"
+elif [ -f "$(cd "$(dirname "$0")/../.." && pwd)/install.sh" ]; then
+  REPO="$(cd "$(dirname "$0")/../.." && pwd)"
+else
+  echo "ERROR: Cannot find repo root."
+  exit 1
+fi
+
+run_nemoclaw() {
+  node "$REPO/bin/nemoclaw.js" "$@"
+}
+
+SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-resume}"
+SESSION_FILE="$HOME/.nemoclaw/onboard-session.json"
+REGISTRY="$HOME/.nemoclaw/sandboxes.json"
+RESTORE_API_KEY="${NVIDIA_API_KEY:-}"
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 0: Pre-cleanup
+# ══════════════════════════════════════════════════════════════════
+section "Phase 0: Pre-cleanup"
+info "Destroying any leftover sandbox/gateway from previous runs..."
+run_nemoclaw "$SANDBOX_NAME" destroy 2>/dev/null || true
+openshell sandbox delete "$SANDBOX_NAME" 2>/dev/null || true
+openshell forward stop 18789 2>/dev/null || true
+openshell gateway destroy -g nemoclaw 2>/dev/null || true
+rm -f "$SESSION_FILE"
+pass "Pre-cleanup complete"
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 1: Prerequisites
+# ══════════════════════════════════════════════════════════════════
+section "Phase 1: Prerequisites"
+
+if docker info >/dev/null 2>&1; then
+  pass "Docker is running"
+else
+  fail "Docker is not running — cannot continue"
+  exit 1
+fi
+
+if command -v openshell >/dev/null 2>&1; then
+  pass "openshell CLI installed"
+else
+  fail "openshell CLI not found — cannot continue"
+  exit 1
+fi
+
+if command -v node >/dev/null 2>&1; then
+  pass "Node.js available"
+else
+  fail "Node.js not found — cannot continue"
+  exit 1
+fi
+
+if [[ -n "$RESTORE_API_KEY" && "$RESTORE_API_KEY" == nvapi-* ]]; then
+  pass "NVIDIA_API_KEY is set (starts with nvapi-)"
+else
+  fail "NVIDIA_API_KEY not set or invalid — required for resume completion"
+  exit 1
+fi
+
+if curl -sf --max-time 10 https://integrate.api.nvidia.com/v1/models >/dev/null 2>&1; then
+  pass "Network access to integrate.api.nvidia.com"
+else
+  fail "Cannot reach integrate.api.nvidia.com"
+  exit 1
+fi
+
+node -e '
+const { saveCredential } = require(process.argv[1]);
+saveCredential("NVIDIA_API_KEY", process.argv[2]);
+' "$REPO/bin/lib/credentials.js" "$RESTORE_API_KEY"
+pass "Stored NVIDIA_API_KEY in ~/.nemoclaw/credentials.json for resume hydration"
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 2: First onboard (forced failure after sandbox creation)
+# ══════════════════════════════════════════════════════════════════
+section "Phase 2: First onboard (interrupted)"
+info "Running onboard with an invalid policy mode to create resumable state..."
+
+FIRST_LOG="$(mktemp)"
+NEMOCLAW_NON_INTERACTIVE=1 \
+  NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
+  NEMOCLAW_RECREATE_SANDBOX=1 \
+  NEMOCLAW_POLICY_MODE=invalid \
+  node "$REPO/bin/nemoclaw.js" onboard --non-interactive >"$FIRST_LOG" 2>&1
+first_exit=$?
+first_output="$(cat "$FIRST_LOG")"
+rm -f "$FIRST_LOG"
+
+if [ $first_exit -eq 1 ]; then
+  pass "First onboard exited 1 (expected interrupted run)"
+else
+  fail "First onboard exited $first_exit (expected 1)"
+  echo "$first_output"
+  exit 1
+fi
+
+if echo "$first_output" | grep -q "Sandbox '${SANDBOX_NAME}' created"; then
+  pass "Sandbox '$SANDBOX_NAME' created before interruption"
+else
+  fail "Sandbox creation not confirmed in first run output"
+fi
+
+if echo "$first_output" | grep -q "Unsupported NEMOCLAW_POLICY_MODE: invalid"; then
+  pass "First run failed at policy setup as intended"
+else
+  fail "First run did not fail at the expected policy step"
+fi
+
+if openshell sandbox get "$SANDBOX_NAME" >/dev/null 2>&1; then
+  pass "Sandbox '$SANDBOX_NAME' exists after interrupted run"
+else
+  fail "Sandbox '$SANDBOX_NAME' not found after interrupted run"
+fi
+
+if [ -f "$SESSION_FILE" ]; then
+  pass "Onboard session file created"
+else
+  fail "Onboard session file missing after interrupted run"
+fi
+
+node -e '
+const fs = require("fs");
+const file = process.argv[1];
+const data = JSON.parse(fs.readFileSync(file, "utf8"));
+if (data.status !== "failed") process.exit(1);
+if (data.lastCompletedStep !== "openclaw") process.exit(2);
+if (!data.failure || data.failure.step !== "policies") process.exit(3);
+' "$SESSION_FILE"
+case $? in
+  0) pass "Session file recorded openclaw completion and policy failure" ;;
+  *) fail "Session file did not record the expected interrupted state" ;;
+esac
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 3: Resume and complete
+# ══════════════════════════════════════════════════════════════════
+section "Phase 3: Resume"
+info "Running onboard --resume with NVIDIA_API_KEY removed from env..."
+
+RESUME_LOG="$(mktemp)"
+env -u NVIDIA_API_KEY \
+  NEMOCLAW_NON_INTERACTIVE=1 \
+  NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \
+  NEMOCLAW_POLICY_MODE=skip \
+  node "$REPO/bin/nemoclaw.js" onboard --resume --non-interactive >"$RESUME_LOG" 2>&1
+resume_exit=$?
+resume_output="$(cat "$RESUME_LOG")"
+rm -f "$RESUME_LOG"
+
+if [ $resume_exit -eq 0 ]; then
+  pass "Resume completed successfully"
+else
+  fail "Resume exited $resume_exit (expected 0)"
+  echo "$resume_output"
+  exit 1
+fi
+
+if echo "$resume_output" | grep -q "\[resume\] Skipping preflight (cached)"; then
+  pass "Resume skipped preflight"
+else
+  fail "Resume did not skip preflight"
+fi
+
+if echo "$resume_output" | grep -q "\[resume\] Skipping gateway (running)"; then
+  pass "Resume skipped gateway"
+else
+  fail "Resume did not skip gateway"
+fi
+
+if echo "$resume_output" | grep -q "\[resume\] Skipping sandbox (${SANDBOX_NAME})"; then
+  pass "Resume skipped sandbox"
+else
+  fail "Resume did not skip sandbox"
+fi
+
+if echo "$resume_output" | grep -q "\[1/7\] Preflight checks"; then
+  fail "Resume reran preflight unexpectedly"
+else
+  pass "Resume did not rerun preflight"
+fi
+
+if echo "$resume_output" | grep -q "\[2/7\] Starting OpenShell gateway"; then
+  fail "Resume reran gateway startup unexpectedly"
+else
+  pass "Resume did not rerun gateway startup"
+fi
+
+if echo "$resume_output" | grep -q "\[5/7\] Creating sandbox"; then
+  fail "Resume reran sandbox creation unexpectedly"
+else
+  pass "Resume did not rerun sandbox creation"
+fi
+
+if echo "$resume_output" | grep -q "\[4/7\] Setting up inference provider"; then
+  pass "Resume continued with inference setup"
+else
+  fail "Resume did not continue with inference setup"
+fi
+
+if run_nemoclaw "$SANDBOX_NAME" status >/dev/null 2>&1; then
+  pass "Sandbox '$SANDBOX_NAME' is manageable after resume"
+else
+  fail "Sandbox '$SANDBOX_NAME' status failed after resume"
+fi
+
+node -e '
+const fs = require("fs");
+const file = process.argv[1];
+const data = JSON.parse(fs.readFileSync(file, "utf8"));
+if (data.status !== "complete") process.exit(1);
+if (data.provider !== "nvidia-prod") process.exit(2);
+if (data.steps.preflight.status !== "complete") process.exit(3);
+if (data.steps.gateway.status !== "complete") process.exit(4);
+if (data.steps.sandbox.status !== "complete") process.exit(5);
+if (data.steps.provider_selection.status !== "complete") process.exit(6);
+if (data.steps.inference.status !== "complete") process.exit(7);
+if (data.steps.openclaw.status !== "complete") process.exit(8);
+if (data.steps.policies.status !== "complete") process.exit(9);
+' "$SESSION_FILE"
+case $? in
+  0) pass "Session file recorded full completion after resume" ;;
+  *) fail "Session file did not record the expected completed state after resume" ;;
+esac
+
+if [ -f "$REGISTRY" ] && grep -q "$SANDBOX_NAME" "$REGISTRY"; then
+  pass "Registry contains resumed sandbox entry"
+else
+  fail "Registry does not contain resumed sandbox entry"
+fi
+
+# ══════════════════════════════════════════════════════════════════
+# Phase 4: Final cleanup
+# ══════════════════════════════════════════════════════════════════
+section "Phase 4: Final cleanup"
+
+run_nemoclaw "$SANDBOX_NAME" destroy 2>/dev/null || true
+openshell sandbox delete "$SANDBOX_NAME" 2>/dev/null || true
+openshell forward stop 18789 2>/dev/null || true
+openshell gateway destroy -g nemoclaw 2>/dev/null || true
+rm -f "$SESSION_FILE"
+
+if openshell sandbox get "$SANDBOX_NAME" >/dev/null 2>&1; then
+  fail "Sandbox '$SANDBOX_NAME' still exists after cleanup"
+else
+  pass "Sandbox '$SANDBOX_NAME' cleaned up"
+fi
+
+if [ -f "$SESSION_FILE" ]; then
+  fail "Onboard session file still exists after cleanup"
+else
+  pass "Onboard session file cleaned up"
+fi
+
+pass "Final cleanup complete"
+
+# ══════════════════════════════════════════════════════════════════
+# Summary
+# ══════════════════════════════════════════════════════════════════
+echo ""
+echo "========================================"
+echo "  PASS: $PASS"
+echo "  FAIL: $FAIL"
+echo "  SKIP: $SKIP"
+echo " TOTAL: $TOTAL"
+echo "========================================"
+echo ""
+
+if [ $FAIL -ne 0 ]; then
+  exit 1
+fi
diff --git a/test/install-preflight.test.js b/test/install-preflight.test.js
index 2d5e11504..e24fe17e4 100644
--- a/test/install-preflight.test.js
+++ b/test/install-preflight.test.js
@@ -8,7 +8,8 @@ import path from "node:path";
 import { spawnSync } from "node:child_process";
 
 const INSTALLER = path.join(import.meta.dirname, "..", "install.sh");
-const CURL_PIPE_INSTALLER = path.join(import.meta.dirname, "..", "scripts", "install.sh");
+const CURL_PIPE_INSTALLER = path.join(import.meta.dirname, "..", "install.sh");
+const LEGACY_INSTALLER_WRAPPER = path.join(import.meta.dirname, "..", "scripts", "install.sh");
 const GITHUB_INSTALL_URL = "git+https://github.com/NVIDIA/NemoClaw.git";
 const TEST_SYSTEM_PATH = "/usr/bin:/bin";
 
@@ -284,181 +285,22 @@ exit 98
     expect(output).not.toMatch(/npm install -g nemoclaw/);
   });
 
-  it("does not silently prefer Colima when both macOS runtimes are available", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-macos-runtime-choice-"));
-    const fakeBin = path.join(tmp, "bin");
-    const colimaSocket = path.join(tmp, ".colima/default/docker.sock");
-    const dockerDesktopSocket = path.join(tmp, ".docker/run/docker.sock");
-    fs.mkdirSync(fakeBin);
-
-    writeExecutable(
-      path.join(fakeBin, "node"),
-      `#!/usr/bin/env bash
-if [ "$1" = "-v" ] || [ "$1" = "--version" ]; then
-  echo "v22.16.0"
-  exit 0
-fi
-exit 99
-`,
-    );
-
-    writeExecutable(
-      path.join(fakeBin, "npm"),
-      `#!/usr/bin/env bash
-if [ "$1" = "--version" ]; then
-  echo "10.9.2"
-  exit 0
-fi
-echo "/tmp/npm-prefix"
-exit 0
-`,
-    );
-
-    writeExecutable(
-      path.join(fakeBin, "docker"),
-      `#!/usr/bin/env bash
-if [ "$1" = "info" ]; then
-  exit 1
-fi
-exit 0
-`,
-    );
-
-    writeExecutable(
-      path.join(fakeBin, "colima"),
-      `#!/usr/bin/env bash
-echo "colima should not be started" >&2
-exit 97
-`,
-    );
-
-    writeExecutable(
-      path.join(fakeBin, "uname"),
-      `#!/usr/bin/env bash
-if [ "$1" = "-s" ]; then
-  echo "Darwin"
-  exit 0
-fi
-if [ "$1" = "-m" ]; then
-  echo "arm64"
-  exit 0
-fi
-echo "Darwin"
-`,
-    );
-
-    const result = spawnSync("bash", [CURL_PIPE_INSTALLER], {
+  it("legacy scripts/install.sh delegates to the root installer from a repo checkout", () => {
+    const result = spawnSync("bash", [LEGACY_INSTALLER_WRAPPER, "--help"], {
       cwd: path.join(import.meta.dirname, ".."),
       encoding: "utf-8",
-      env: {
-        ...process.env,
-        HOME: tmp,
-        PATH: `${fakeBin}:${TEST_SYSTEM_PATH}`,
-        NEMOCLAW_TEST_SOCKET_PATHS: `${colimaSocket}:${dockerDesktopSocket}`,
-      },
     });
 
     const output = `${result.stdout}${result.stderr}`;
-    expect(result.status).not.toBe(0);
-    expect(output).toMatch(/Both Colima and Docker Desktop are available/);
-    expect(output).not.toMatch(/colima should not be started/);
+    expect(result.status).toBe(0);
+    expect(output).toMatch(/deprecated compatibility wrapper/);
+    expect(output).toMatch(/https:\/\/www\.nvidia\.com\/nemoclaw\.sh/);
+    expect(output).toMatch(/NemoClaw Installer/);
   });
 
-  it("can run via stdin without a sibling runtime.sh file", () => {
-    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-curl-pipe-installer-"));
-    const fakeBin = path.join(tmp, "bin");
-    const prefix = path.join(tmp, "prefix");
-    fs.mkdirSync(fakeBin);
-    fs.mkdirSync(path.join(prefix, "bin"), { recursive: true });
-
-    writeExecutable(
-      path.join(fakeBin, "node"),
-      `#!/usr/bin/env bash
-if [ "$1" = "-v" ] || [ "$1" = "--version" ]; then
-  echo "v22.16.0"
-  exit 0
-fi
-if [ "$1" = "-e" ]; then
-  exit 1
-fi
-exit 99
-`,
-    );
-
-    writeExecutable(
-      path.join(fakeBin, "git"),
-      `#!/usr/bin/env bash
-if [ "$1" = "clone" ]; then
-  target="\${@: -1}"
-  mkdir -p "$target/nemoclaw"
-  echo '{"name":"nemoclaw","version":"0.1.0","dependencies":{"openclaw":"2026.3.11"}}' > "$target/package.json"
-  echo '{"name":"nemoclaw-plugin","version":"0.1.0"}' > "$target/nemoclaw/package.json"
-  exit 0
-fi
-exit 0
-`,
-    );
-
-    writeExecutable(
-      path.join(fakeBin, "npm"),
-      `#!/usr/bin/env bash
-set -euo pipefail
-if [ "$1" = "--version" ]; then
-  echo "10.9.2"
-  exit 0
-fi
-if [ "$1" = "config" ] && [ "$2" = "get" ] && [ "$3" = "prefix" ]; then
-  echo "$NPM_PREFIX"
-  exit 0
-fi
-if [ "$1" = "pack" ]; then
-  exit 1
-fi
-if [ "$1" = "install" ] && [[ "$*" == *"--ignore-scripts"* ]]; then
-  exit 0
-fi
-if [ "$1" = "run" ]; then
-  exit 0
-fi
-if [ "$1" = "link" ]; then
-  cat > "$NPM_PREFIX/bin/nemoclaw" <<'EOS'
-#!/usr/bin/env bash
-if [ "$1" = "--version" ]; then
-  echo "v0.1.0-test"
-  exit 0
-fi
-exit 0
-EOS
-  chmod +x "$NPM_PREFIX/bin/nemoclaw"
-  exit 0
-fi
-echo "unexpected npm invocation: $*" >&2
-exit 98
-`,
-    );
-
-    writeExecutable(
-      path.join(fakeBin, "docker"),
-      `#!/usr/bin/env bash
-if [ "$1" = "info" ]; then
-  exit 0
-fi
-exit 0
-`,
-    );
-
-    writeExecutable(
-      path.join(fakeBin, "openshell"),
-      `#!/usr/bin/env bash
-if [ "$1" = "--version" ]; then
-  echo "openshell 0.0.9"
-  exit 0
-fi
-exit 0
-`,
-    );
-
-    const scriptContents = fs.readFileSync(CURL_PIPE_INSTALLER, "utf-8");
+  it("legacy scripts/install.sh fails clearly when run without the repo root installer", () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-legacy-installer-stdin-"));
+    const scriptContents = fs.readFileSync(LEGACY_INSTALLER_WRAPPER, "utf-8");
     const result = spawnSync("bash", [], {
       cwd: tmp,
       input: scriptContents,
@@ -466,16 +308,16 @@ exit 0
       env: {
         ...process.env,
         HOME: tmp,
-        PATH: `${fakeBin}:${TEST_SYSTEM_PATH}`,
-        NEMOCLAW_NON_INTERACTIVE: "1",
-        NPM_PREFIX: prefix,
+        PATH: TEST_SYSTEM_PATH,
       },
     });
 
     const output = `${result.stdout}${result.stderr}`;
-    expect(result.status).toBe(0);
-    expect(output).toMatch(/Installation complete!/);
-    expect(output).toMatch(/nemoclaw v0\.1\.0-test is ready/);
+    expect(result.status).not.toBe(0);
+    expect(output).toMatch(/deprecated compatibility wrapper/);
+    expect(output).toMatch(/supported installer/);
+    expect(output).toMatch(/https:\/\/www\.nvidia\.com\/nemoclaw\.sh/);
+    expect(output).toMatch(/only works from a NemoClaw repository checkout/);
   });
 
   it("--help exits 0 and shows install usage", () => {
@@ -580,6 +422,70 @@ fi`,
     expect(log).not.toMatch(new RegExp(GITHUB_INSTALL_URL.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")));
   });
 
+  it("auto-resumes an interrupted onboarding session during install", () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-install-resume-"));
+    const fakeBin = path.join(tmp, "bin");
+    const prefix = path.join(tmp, "prefix");
+    const onboardLog = path.join(tmp, "onboard.log");
+    fs.mkdirSync(fakeBin);
+    fs.mkdirSync(path.join(prefix, "bin"), { recursive: true });
+    fs.mkdirSync(path.join(tmp, ".nemoclaw"), { recursive: true });
+
+    fs.writeFileSync(
+      path.join(tmp, ".nemoclaw", "onboard-session.json"),
+      JSON.stringify({ resumable: true, status: "in_progress" }, null, 2),
+    );
+
+    writeNodeStub(fakeBin);
+    writeNpmStub(
+      fakeBin,
+      `if [ "$1" = "pack" ]; then
+  tmpdir="$4"
+  mkdir -p "$tmpdir/package"
+  tar -czf "$tmpdir/openclaw-2026.3.11.tgz" -C "$tmpdir" package
+  exit 0
+fi
+if [ "$1" = "install" ]; then exit 0; fi
+if [ "$1" = "run" ] && [ "$2" = "build" ]; then exit 0; fi
+if [ "$1" = "link" ]; then
+  cat > "$NPM_PREFIX/bin/nemoclaw" <<'EOS'
+#!/usr/bin/env bash
+printf '%s\\n' "$*" >> "$NEMOCLAW_ONBOARD_LOG"
+exit 0
+EOS
+  chmod +x "$NPM_PREFIX/bin/nemoclaw"
+  exit 0
+fi`,
+    );
+
+    fs.writeFileSync(
+      path.join(tmp, "package.json"),
+      JSON.stringify({ name: "nemoclaw", version: "0.1.0" }, null, 2),
+    );
+    fs.mkdirSync(path.join(tmp, "nemoclaw"), { recursive: true });
+    fs.writeFileSync(
+      path.join(tmp, "nemoclaw", "package.json"),
+      JSON.stringify({ name: "nemoclaw-plugin", version: "0.1.0" }, null, 2),
+    );
+
+    const result = spawnSync("bash", [INSTALLER], {
+      cwd: tmp,
+      encoding: "utf-8",
+      env: {
+        ...process.env,
+        HOME: tmp,
+        PATH: `${fakeBin}:${TEST_SYSTEM_PATH}`,
+        NEMOCLAW_NON_INTERACTIVE: "1",
+        NPM_PREFIX: prefix,
+        NEMOCLAW_ONBOARD_LOG: onboardLog,
+      },
+    });
+
+    expect(result.status).toBe(0);
+    expect(`${result.stdout}${result.stderr}`).toMatch(/Found an interrupted onboarding session — resuming it\./);
+    expect(fs.readFileSync(onboardLog, "utf-8")).toMatch(/^onboard --resume --non-interactive$/m);
+  });
+
   it("spin() non-TTY: dumps wrapped-command output and exits non-zero on failure", () => {
     const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-install-spin-fail-"));
     const fakeBin = path.join(tmp, "bin");
@@ -744,6 +650,121 @@ exit 0
     expect(fs.readlinkSync(shimPath)).toBe(path.join(prefix, "bin", "nemoclaw"));
     expect(`${result.stdout}${result.stderr}`).toMatch(/Created user-local shim/);
   });
+
+  it("does not print PATH recovery instructions when nemoclaw is already usable in this shell", () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-install-ready-shell-"));
+    const fakeBin = path.join(tmp, "bin");
+    const prefix = path.join(tmp, "prefix");
+    const nvmDir = path.join(tmp, ".nvm");
+    fs.mkdirSync(fakeBin);
+    fs.mkdirSync(path.join(prefix, "bin"), { recursive: true });
+    fs.mkdirSync(nvmDir, { recursive: true });
+    fs.writeFileSync(path.join(nvmDir, "nvm.sh"), "# stub nvm\n");
+
+    writeExecutable(
+      path.join(fakeBin, "node"),
+      `#!/usr/bin/env bash
+if [ "$1" = "-v" ] || [ "$1" = "--version" ]; then
+  echo "v22.16.0"
+  exit 0
+fi
+if [ "$1" = "-e" ]; then
+  exit 1
+fi
+exit 99
+`,
+    );
+
+    writeExecutable(
+      path.join(fakeBin, "git"),
+      `#!/usr/bin/env bash
+if [ "$1" = "clone" ]; then
+  target="\${@: -1}"
+  mkdir -p "$target/nemoclaw"
+  echo '{"name":"nemoclaw","version":"0.1.0","dependencies":{"openclaw":"2026.3.11"}}' > "$target/package.json"
+  echo '{"name":"nemoclaw-plugin","version":"0.1.0"}' > "$target/nemoclaw/package.json"
+  exit 0
+fi
+exit 0
+`,
+    );
+
+    writeExecutable(
+      path.join(fakeBin, "npm"),
+      `#!/usr/bin/env bash
+set -euo pipefail
+if [ "$1" = "--version" ]; then
+  echo "10.9.2"
+  exit 0
+fi
+if [ "$1" = "config" ] && [ "$2" = "get" ] && [ "$3" = "prefix" ]; then
+  echo "$NPM_PREFIX"
+  exit 0
+fi
+if [ "$1" = "pack" ]; then
+  exit 1
+fi
+if [ "$1" = "install" ] && [[ "$*" == *"--ignore-scripts"* ]]; then
+  exit 0
+fi
+if [ "$1" = "run" ]; then
+  exit 0
+fi
+if [ "$1" = "link" ]; then
+  cat > "$NPM_PREFIX/bin/nemoclaw" <<'EOS'
+#!/usr/bin/env bash
+if [ "$1" = "onboard" ] || [ "$1" = "--version" ]; then
+  exit 0
+fi
+exit 0
+EOS
+  chmod +x "$NPM_PREFIX/bin/nemoclaw"
+  exit 0
+fi
+echo "unexpected npm invocation: $*" >&2
+exit 98
+`,
+    );
+
+    writeExecutable(
+      path.join(fakeBin, "docker"),
+      `#!/usr/bin/env bash
+if [ "$1" = "info" ]; then
+  exit 0
+fi
+exit 0
+`,
+    );
+
+    writeExecutable(
+      path.join(fakeBin, "openshell"),
+      `#!/usr/bin/env bash
+if [ "$1" = "--version" ]; then
+  echo "openshell 0.0.9"
+  exit 0
+fi
+exit 0
+`,
+    );
+
+    const result = spawnSync("bash", [INSTALLER], {
+      cwd: tmp,
+      encoding: "utf-8",
+      env: {
+        ...process.env,
+        HOME: tmp,
+        PATH: `${fakeBin}:${TEST_SYSTEM_PATH}`,
+        NEMOCLAW_NON_INTERACTIVE: "1",
+        NPM_PREFIX: prefix,
+        NVM_DIR: nvmDir,
+      },
+    });
+
+    const output = `${result.stdout}${result.stderr}`;
+    expect(result.status).toBe(0);
+    expect(output).not.toMatch(/current shell cannot resolve 'nemoclaw'/);
+    expect(output).not.toMatch(/source .*\.bashrc|source .*\.zshrc|source .*\.profile/);
+  });
 });
 
 // ---------------------------------------------------------------------------
diff --git a/test/nemoclaw-cli-recovery.test.js b/test/nemoclaw-cli-recovery.test.js
new file mode 100644
index 000000000..f3ba3df0a
--- /dev/null
+++ b/test/nemoclaw-cli-recovery.test.js
@@ -0,0 +1,105 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import assert from "node:assert/strict";
+import { spawnSync } from "node:child_process";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { describe, it } from "vitest";
+
+describe("nemoclaw CLI runtime recovery", () => {
+  it("recovers sandbox status when openshell is only available via the resolved fallback path", () => {
+    const repoRoot = path.join(import.meta.dirname, "..");
+    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-cli-recovery-"));
+    const homeLocalBin = path.join(tmpDir, ".local", "bin");
+    const stateDir = path.join(tmpDir, "state");
+    const registryDir = path.join(tmpDir, ".nemoclaw");
+    const openshellPath = path.join(homeLocalBin, "openshell");
+    const stateFile = path.join(stateDir, "openshell-state.json");
+
+    fs.mkdirSync(homeLocalBin, { recursive: true });
+    fs.mkdirSync(stateDir, { recursive: true });
+    fs.mkdirSync(registryDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(registryDir, "sandboxes.json"),
+      JSON.stringify({
+        defaultSandbox: "my-assistant",
+        sandboxes: {
+          "my-assistant": {
+            name: "my-assistant",
+            model: "nvidia/nemotron-3-super-120b-a12b",
+            provider: "nvidia-prod",
+            gpuEnabled: false,
+            policies: [],
+          },
+        },
+      }),
+      { mode: 0o600 }
+    );
+    fs.writeFileSync(stateFile, JSON.stringify({ statusCalls: 0, sandboxGetCalls: 0 }));
+    fs.writeFileSync(
+      openshellPath,
+      `#!${process.execPath}
+const fs = require("fs");
+const path = require("path");
+const statePath = ${JSON.stringify(stateFile)};
+const args = process.argv.slice(2);
+const state = JSON.parse(fs.readFileSync(statePath, "utf8"));
+
+if (args[0] === "status") {
+  state.statusCalls += 1;
+  fs.writeFileSync(statePath, JSON.stringify(state));
+  if (state.statusCalls === 1) {
+    process.stdout.write("Error:   × No active gateway\\n");
+  } else {
+    process.stdout.write("Gateway: nemoclaw\\nStatus: Connected\\n");
+  }
+  process.exit(0);
+}
+
+if (args[0] === "gateway" && (args[1] === "start" || args[1] === "select")) {
+  fs.writeFileSync(statePath, JSON.stringify(state));
+  process.exit(0);
+}
+
+if (args[0] === "gateway" && args[1] === "info") {
+  process.stdout.write("Gateway: nemoclaw\\nGateway endpoint: https://127.0.0.1:8080\\n");
+  process.exit(0);
+}
+
+if (args[0] === "sandbox" && args[1] === "get" && args[2] === "my-assistant") {
+  state.sandboxGetCalls += 1;
+  fs.writeFileSync(statePath, JSON.stringify(state));
+  if (state.sandboxGetCalls === 1) {
+    process.stdout.write("Error:   × transport error\\n  ╰─▶ Connection reset by peer (os error 104)\\n");
+    process.exit(1);
+  }
+  process.stdout.write("Sandbox:\\n\\n  Id: abc\\n  Name: my-assistant\\n  Namespace: openshell\\n  Phase: Ready\\n");
+  process.exit(0);
+}
+
+if (args[0] === "logs") {
+  process.exit(0);
+}
+
+process.exit(0);
+`,
+      { mode: 0o755 }
+    );
+
+    const result = spawnSync(process.execPath, [path.join(repoRoot, "bin", "nemoclaw.js"), "my-assistant", "status"], {
+      cwd: repoRoot,
+      encoding: "utf-8",
+      env: {
+        ...process.env,
+        HOME: tmpDir,
+        PATH: "/usr/bin:/bin",
+      },
+    });
+
+    assert.equal(result.status, 0, result.stderr);
+    assert.match(result.stdout, /Recovered NemoClaw gateway runtime via (start|select)/);
+    assert.match(result.stdout, /Phase: Ready/);
+  });
+});
diff --git a/test/onboard-session.test.js b/test/onboard-session.test.js
new file mode 100644
index 000000000..08dc5d30f
--- /dev/null
+++ b/test/onboard-session.test.js
@@ -0,0 +1,222 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import fs from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import { createRequire } from "node:module";
+
+const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-session-"));
+const require = createRequire(import.meta.url);
+const modulePath = require.resolve("../bin/lib/onboard-session");
+const originalHome = process.env.HOME;
+let session;
+
+beforeEach(() => {
+  process.env.HOME = tmpDir;
+  delete require.cache[modulePath];
+  session = require("../bin/lib/onboard-session");
+  session.clearSession();
+  session.releaseOnboardLock();
+});
+
+afterEach(() => {
+  delete require.cache[modulePath];
+  if (originalHome === undefined) {
+    delete process.env.HOME;
+  } else {
+    process.env.HOME = originalHome;
+  }
+});
+
+describe("onboard session", () => {
+  it("starts empty", () => {
+    expect(session.loadSession()).toBeNull();
+  });
+
+  it("creates and persists a session with restrictive permissions", () => {
+    const created = session.createSession({ mode: "non-interactive" });
+    const saved = session.saveSession(created);
+    const stat = fs.statSync(session.SESSION_FILE);
+    const dirStat = fs.statSync(path.dirname(session.SESSION_FILE));
+
+    expect(saved.mode).toBe("non-interactive");
+    expect(fs.existsSync(session.SESSION_FILE)).toBe(true);
+    expect(stat.mode & 0o777).toBe(0o600);
+    expect(dirStat.mode & 0o777).toBe(0o700);
+  });
+
+  it("redacts credential-bearing endpoint URLs before persisting them", () => {
+    session.saveSession(session.createSession());
+    session.markStepComplete("provider_selection", {
+      endpointUrl:
+        "https://alice:secret@example.com/v1/models?token=abc123&sig=def456&X-Amz-Signature=ghi789&keep=yes#token=frag",
+    });
+
+    const loaded = session.loadSession();
+    expect(loaded.endpointUrl).toBe(
+      "https://example.com/v1/models?token=%3CREDACTED%3E&sig=%3CREDACTED%3E&X-Amz-Signature=%3CREDACTED%3E&keep=yes"
+    );
+    expect(session.summarizeForDebug().endpointUrl).toBe(loaded.endpointUrl);
+  });
+
+  it("marks steps started, completed, and failed", () => {
+    session.saveSession(session.createSession());
+    session.markStepStarted("gateway");
+    let loaded = session.loadSession();
+    expect(loaded.steps.gateway.status).toBe("in_progress");
+    expect(loaded.lastStepStarted).toBe("gateway");
+    expect(loaded.steps.gateway.completedAt).toBeNull();
+
+    session.markStepComplete("gateway", { sandboxName: "my-assistant" });
+    loaded = session.loadSession();
+    expect(loaded.steps.gateway.status).toBe("complete");
+    expect(loaded.sandboxName).toBe("my-assistant");
+    expect(loaded.steps.gateway.completedAt).toBeTruthy();
+
+    session.markStepFailed("sandbox", "Sandbox creation failed");
+    loaded = session.loadSession();
+    expect(loaded.steps.sandbox.status).toBe("failed");
+    expect(loaded.steps.sandbox.completedAt).toBeNull();
+    expect(loaded.failure.step).toBe("sandbox");
+    expect(loaded.failure.message).toMatch(/Sandbox creation failed/);
+  });
+
+  it("persists safe provider metadata without persisting secrets", () => {
+    session.saveSession(session.createSession());
+    session.markStepComplete("provider_selection", {
+      provider: "nvidia-nim",
+      model: "nvidia/test-model",
+      sandboxName: "my-assistant",
+      endpointUrl: "https://example.com/v1",
+      credentialEnv: "NVIDIA_API_KEY",
+      preferredInferenceApi: "openai-completions",
+      nimContainer: "nim-123",
+      policyPresets: ["pypi", "npm"],
+      apiKey: "nvapi-secret",
+      metadata: {
+        gatewayName: "nemoclaw",
+        token: "secret",
+      },
+    });
+
+    const loaded = session.loadSession();
+    expect(loaded.provider).toBe("nvidia-nim");
+    expect(loaded.model).toBe("nvidia/test-model");
+    expect(loaded.sandboxName).toBe("my-assistant");
+    expect(loaded.endpointUrl).toBe("https://example.com/v1");
+    expect(loaded.credentialEnv).toBe("NVIDIA_API_KEY");
+    expect(loaded.preferredInferenceApi).toBe("openai-completions");
+    expect(loaded.nimContainer).toBe("nim-123");
+    expect(loaded.policyPresets).toEqual(["pypi", "npm"]);
+    expect(loaded.apiKey).toBeUndefined();
+    expect(loaded.metadata.gatewayName).toBe("nemoclaw");
+    expect(loaded.metadata.token).toBeUndefined();
+  });
+
+  it("does not clear existing metadata when updates omit whitelisted metadata fields", () => {
+    session.saveSession(session.createSession({ metadata: { gatewayName: "nemoclaw" } }));
+    session.markStepComplete("provider_selection", {
+      metadata: {
+        token: "should-not-persist",
+      },
+    });
+
+    const loaded = session.loadSession();
+    expect(loaded.metadata.gatewayName).toBe("nemoclaw");
+    expect(loaded.metadata.token).toBeUndefined();
+  });
+
+  it("returns null for corrupt session data", () => {
+    fs.mkdirSync(path.dirname(session.SESSION_FILE), { recursive: true });
+    fs.writeFileSync(session.SESSION_FILE, "not-json");
+    expect(session.loadSession()).toBeNull();
+  });
+
+  it("acquires and releases the onboard lock", () => {
+    const acquired = session.acquireOnboardLock("nemoclaw onboard");
+    expect(acquired.acquired).toBe(true);
+    expect(fs.existsSync(session.LOCK_FILE)).toBe(true);
+
+    const secondAttempt = session.acquireOnboardLock("nemoclaw onboard --resume");
+    expect(secondAttempt.acquired).toBe(false);
+    expect(secondAttempt.holderPid).toBe(process.pid);
+
+    session.releaseOnboardLock();
+    expect(fs.existsSync(session.LOCK_FILE)).toBe(false);
+  });
+
+  it("replaces a stale onboard lock", () => {
+    fs.mkdirSync(path.dirname(session.LOCK_FILE), { recursive: true });
+    fs.writeFileSync(
+      session.LOCK_FILE,
+      JSON.stringify({ pid: 999999, startedAt: "2026-03-25T00:00:00.000Z", command: "nemoclaw onboard" }),
+      { mode: 0o600 }
+    );
+
+    const acquired = session.acquireOnboardLock("nemoclaw onboard --resume");
+    expect(acquired.acquired).toBe(true);
+
+    const written = JSON.parse(fs.readFileSync(session.LOCK_FILE, "utf8"));
+    expect(written.pid).toBe(process.pid);
+  });
+
+  it("treats unreadable or transient lock contents as a retry, not a stale lock", () => {
+    fs.mkdirSync(path.dirname(session.LOCK_FILE), { recursive: true });
+    fs.writeFileSync(session.LOCK_FILE, "{not-json", { mode: 0o600 });
+
+    const acquired = session.acquireOnboardLock("nemoclaw onboard --resume");
+    expect(acquired.acquired).toBe(false);
+    expect(acquired.stale).toBe(true);
+    expect(fs.existsSync(session.LOCK_FILE)).toBe(true);
+  });
+
+  it("ignores malformed lock files when releasing the onboard lock", () => {
+    fs.mkdirSync(path.dirname(session.LOCK_FILE), { recursive: true });
+    fs.writeFileSync(session.LOCK_FILE, "{not-json", { mode: 0o600 });
+
+    session.releaseOnboardLock();
+    expect(fs.existsSync(session.LOCK_FILE)).toBe(true);
+  });
+
+  it("redacts sensitive values from persisted failure messages", () => {
+    session.saveSession(session.createSession());
+    session.markStepFailed(
+      "inference",
+      "provider auth failed with NVIDIA_API_KEY=nvapi-secret Bearer topsecret sk-secret-value ghp_1234567890123456789012345"
+    );
+
+    const loaded = session.loadSession();
+    expect(loaded.steps.inference.error).toContain("NVIDIA_API_KEY=<REDACTED>");
+    expect(loaded.steps.inference.error).toContain("Bearer <REDACTED>");
+    expect(loaded.steps.inference.error).not.toContain("nvapi-secret");
+    expect(loaded.steps.inference.error).not.toContain("topsecret");
+    expect(loaded.steps.inference.error).not.toContain("sk-secret-value");
+    expect(loaded.steps.inference.error).not.toContain("ghp_1234567890123456789012345");
+    expect(loaded.failure.message).toBe(loaded.steps.inference.error);
+  });
+
+  it("summarizes the session for debug output", () => {
+    session.saveSession(session.createSession({ sandboxName: "my-assistant" }));
+    session.markStepStarted("preflight");
+    session.markStepComplete("preflight");
+    session.completeSession();
+    const summary = session.summarizeForDebug();
+
+    expect(summary.sandboxName).toBe("my-assistant");
+    expect(summary.steps.preflight.status).toBe("complete");
+    expect(summary.steps.preflight.startedAt).toBeTruthy();
+    expect(summary.steps.preflight.completedAt).toBeTruthy();
+    expect(summary.resumable).toBe(false);
+  });
+
+  it("keeps debug summaries redacted when failures were sanitized", () => {
+    session.saveSession(session.createSession({ sandboxName: "my-assistant" }));
+    session.markStepFailed("provider_selection", "Bearer abcdefghijklmnopqrstuvwxyz");
+    const summary = session.summarizeForDebug();
+
+    expect(summary.failure.message).toContain("Bearer <REDACTED>");
+    expect(summary.failure.message).not.toContain("abcdefghijklmnopqrstuvwxyz");
+  });
+});
diff --git a/test/onboard.test.js b/test/onboard.test.js
index 8a8046b52..16b7e5453 100644
--- a/test/onboard.test.js
+++ b/test/onboard.test.js
@@ -10,16 +10,60 @@ import { describe, expect, it } from "vitest";
 
 import {
   buildSandboxConfigSyncScript,
+  classifySandboxCreateFailure,
+  getGatewayReuseState,
   getFutureShellPathHint,
-  getInstalledOpenshellVersion,
-  isGatewayHealthy,
   getSandboxInferenceConfig,
+  getInstalledOpenshellVersion,
+  getRequestedModelHint,
+  getRequestedProviderHint,
+  getRequestedSandboxNameHint,
+  getResumeConfigConflicts,
+  getResumeSandboxConflict,
+  getSandboxStateFromOutputs,
   getStableGatewayImageRef,
+  isGatewayHealthy,
   patchStagedDockerfile,
+  printSandboxCreateRecoveryHints,
+  shouldIncludeBuildContextPath,
   writeSandboxConfigSyncFile,
 } from "../bin/lib/onboard";
 
 describe("onboard helpers", () => {
+  it("classifies sandbox create timeout failures and tracks upload progress", () => {
+    expect(
+      classifySandboxCreateFailure("Error: failed to read image export stream\nTimeout error").kind
+    ).toBe("image_transfer_timeout");
+    expect(
+      classifySandboxCreateFailure(
+        [
+          "  Pushing image openshell/sandbox-from:123 into gateway \"nemoclaw\"",
+          "  [progress] Uploaded to gateway",
+          "Error: failed to read image export stream",
+        ].join("\n")
+      )
+    ).toEqual({
+      kind: "image_transfer_timeout",
+      uploadedToGateway: true,
+    });
+  });
+
+  it("classifies sandbox create connection resets and incomplete create streams", () => {
+    expect(classifySandboxCreateFailure("Connection reset by peer").kind).toBe("image_transfer_reset");
+    expect(
+      classifySandboxCreateFailure(
+        [
+          "  Image openshell/sandbox-from:123 is available in the gateway.",
+          "Created sandbox: my-assistant",
+          "Error: stream closed unexpectedly",
+        ].join("\n")
+      )
+    ).toEqual({
+      kind: "sandbox_create_incomplete",
+      uploadedToGateway: true,
+    });
+  });
+
   it("builds a sandbox sync script that only writes nemoclaw config", () => {
     const script = buildSandboxConfigSyncScript({
       endpointType: "custom",
@@ -153,33 +197,222 @@ describe("onboard helpers", () => {
     expect(getStableGatewayImageRef("bogus")).toBe(null);
   });
 
-  it("recognizes only a connected named NemoClaw gateway as healthy", () => {
+  it("treats the gateway as healthy only when nemoclaw is running and connected", () => {
     expect(
       isGatewayHealthy(
-        "Server Status\n\n  Gateway: nemoclaw\n  Status: Connected",
+        "Gateway status: Connected\nGateway: nemoclaw",
+        "Gateway Info\n\n  Gateway: nemoclaw\n  Gateway endpoint: https://127.0.0.1:8080",
         "Gateway Info\n\n  Gateway: nemoclaw\n  Gateway endpoint: https://127.0.0.1:8080"
       )
     ).toBe(true);
     expect(
       isGatewayHealthy(
-        "Server Status\n\n  Gateway: openshell\n  Status: Connected",
-        "Gateway Info\n\n  Gateway: nemoclaw\n  Gateway endpoint: https://127.0.0.1:8080"
+        "\u001b[1mServer Status\u001b[0m\n\n  Gateway: openshell\n  Server: https://127.0.0.1:8080\n  Status: Connected",
+        "Error:   × No gateway metadata found for 'nemoclaw'.",
+        "Gateway Info\n\n  Gateway: openshell\n  Gateway endpoint: https://127.0.0.1:8080"
       )
     ).toBe(false);
     expect(
       isGatewayHealthy(
         "Server Status\n\n  Gateway: openshell\n  Status: Connected",
-        "Error: no gateway metadata found"
+        "Gateway Info\n\n  Gateway: nemoclaw\n  Gateway endpoint: https://127.0.0.1:8080",
+        "Gateway Info\n\n  Gateway: openshell\n  Gateway endpoint: https://127.0.0.1:8080"
       )
     ).toBe(false);
+    expect(isGatewayHealthy("Gateway status: Disconnected", "Gateway: nemoclaw")).toBe(false);
+    expect(isGatewayHealthy("Gateway status: Connected", "Gateway: something-else")).toBe(false);
+  });
+
+  it("classifies gateway reuse states conservatively", () => {
     expect(
-      isGatewayHealthy(
-        "Server Status\n\n  Gateway: nemoclaw\n  Status: Disconnected",
+      getGatewayReuseState(
+        "Gateway status: Connected\nGateway: nemoclaw",
+        "Gateway Info\n\n  Gateway: nemoclaw\n  Gateway endpoint: https://127.0.0.1:8080",
+        "Gateway Info\n\n  Gateway: nemoclaw\n  Gateway endpoint: https://127.0.0.1:8080"
+      )
+    ).toBe("healthy");
+    expect(
+      getGatewayReuseState(
+        "Gateway status: Connected",
+        "Error:   × No gateway metadata found for 'nemoclaw'.",
+        "Gateway Info\n\n  Gateway: openshell\n  Gateway endpoint: https://127.0.0.1:8080"
+      )
+    ).toBe("foreign-active");
+    expect(
+      getGatewayReuseState(
+        "Server Status\n\n  Gateway: openshell\n  Status: Connected",
+        "Gateway Info\n\n  Gateway: nemoclaw\n  Gateway endpoint: https://127.0.0.1:8080",
+        "Gateway Info\n\n  Gateway: openshell\n  Gateway endpoint: https://127.0.0.1:8080"
+      )
+    ).toBe("foreign-active");
+    expect(
+      getGatewayReuseState(
+        "Gateway status: Disconnected",
         "Gateway Info\n\n  Gateway: nemoclaw\n  Gateway endpoint: https://127.0.0.1:8080"
       )
+    ).toBe("stale");
+    expect(
+      getGatewayReuseState(
+        "Gateway status: Connected\nGateway: nemoclaw",
+        "",
+        "Gateway Info\n\n  Gateway: nemoclaw\n  Gateway endpoint: https://127.0.0.1:8080"
+      )
+    ).toBe("active-unnamed");
+    expect(
+      getGatewayReuseState(
+        "Gateway status: Connected",
+        "",
+        "Gateway Info\n\n  Gateway: openshell\n  Gateway endpoint: https://127.0.0.1:8080"
+      )
+    ).toBe("foreign-active");
+    expect(getGatewayReuseState("", "")).toBe("missing");
+  });
+
+  it("classifies sandbox reuse states from openshell outputs", () => {
+    expect(
+      getSandboxStateFromOutputs(
+        "my-assistant",
+        "Name: my-assistant",
+        "my-assistant   Ready   2m ago"
+      )
+    ).toBe("ready");
+    expect(
+      getSandboxStateFromOutputs(
+        "my-assistant",
+        "Name: my-assistant",
+        "my-assistant   NotReady   init failed"
+      )
+    ).toBe("not_ready");
+    expect(getSandboxStateFromOutputs("my-assistant", "", "")).toBe("missing");
+  });
+
+  it("filters local-only artifacts out of the sandbox build context", () => {
+    expect(
+      shouldIncludeBuildContextPath(
+        "/repo/nemoclaw-blueprint",
+        "/repo/nemoclaw-blueprint/orchestrator/main.py"
+      )
+    ).toBe(true);
+    expect(
+      shouldIncludeBuildContextPath(
+        "/repo/nemoclaw-blueprint",
+        "/repo/nemoclaw-blueprint/.venv/bin/python"
+      )
+    ).toBe(false);
+    expect(
+      shouldIncludeBuildContextPath(
+        "/repo/nemoclaw-blueprint",
+        "/repo/nemoclaw-blueprint/.ruff_cache/cache"
+      )
+    ).toBe(false);
+    expect(
+      shouldIncludeBuildContextPath(
+        "/repo/nemoclaw-blueprint",
+        "/repo/nemoclaw-blueprint/._pyvenv.cfg"
+      )
     ).toBe(false);
   });
 
+  it("normalizes sandbox name hints from the environment", () => {
+    const previous = process.env.NEMOCLAW_SANDBOX_NAME;
+    process.env.NEMOCLAW_SANDBOX_NAME = "  My-Assistant  ";
+    try {
+      expect(getRequestedSandboxNameHint()).toBe("my-assistant");
+    } finally {
+      if (previous === undefined) {
+        delete process.env.NEMOCLAW_SANDBOX_NAME;
+      } else {
+        process.env.NEMOCLAW_SANDBOX_NAME = previous;
+      }
+    }
+  });
+
+  it("detects resume conflicts when a different sandbox is requested", () => {
+    const previous = process.env.NEMOCLAW_SANDBOX_NAME;
+    process.env.NEMOCLAW_SANDBOX_NAME = "other-sandbox";
+    try {
+      expect(getResumeSandboxConflict({ sandboxName: "my-assistant" })).toEqual({
+        requestedSandboxName: "other-sandbox",
+        recordedSandboxName: "my-assistant",
+      });
+      expect(getResumeSandboxConflict({ sandboxName: "other-sandbox" })).toBe(null);
+    } finally {
+      if (previous === undefined) {
+        delete process.env.NEMOCLAW_SANDBOX_NAME;
+      } else {
+        process.env.NEMOCLAW_SANDBOX_NAME = previous;
+      }
+    }
+  });
+
+  it("returns provider and model hints only for non-interactive runs", () => {
+    const previousProvider = process.env.NEMOCLAW_PROVIDER;
+    const previousModel = process.env.NEMOCLAW_MODEL;
+    process.env.NEMOCLAW_PROVIDER = "cloud";
+    process.env.NEMOCLAW_MODEL = "nvidia/test-model";
+    try {
+      expect(getRequestedProviderHint(true)).toBe("build");
+      expect(getRequestedModelHint(true)).toBe("nvidia/test-model");
+      expect(getRequestedProviderHint(false)).toBe(null);
+      expect(getRequestedModelHint(false)).toBe(null);
+    } finally {
+      if (previousProvider === undefined) {
+        delete process.env.NEMOCLAW_PROVIDER;
+      } else {
+        process.env.NEMOCLAW_PROVIDER = previousProvider;
+      }
+      if (previousModel === undefined) {
+        delete process.env.NEMOCLAW_MODEL;
+      } else {
+        process.env.NEMOCLAW_MODEL = previousModel;
+      }
+    }
+  });
+
+  it("detects resume conflicts for explicit provider and model changes", () => {
+    const previousProvider = process.env.NEMOCLAW_PROVIDER;
+    const previousModel = process.env.NEMOCLAW_MODEL;
+    process.env.NEMOCLAW_PROVIDER = "cloud";
+    process.env.NEMOCLAW_MODEL = "nvidia/other-model";
+    try {
+      // Provider conflict uses a two-stage alias chain in non-interactive mode:
+      // "cloud" first resolves to the requested hint, then that hint resolves
+      // to the effective provider name "nvidia-prod" for conflict comparison.
+      expect(
+        getResumeConfigConflicts(
+          {
+            sandboxName: "my-assistant",
+            provider: "nvidia-nim",
+            model: "nvidia/nemotron-3-super-120b-a12b",
+          },
+          { nonInteractive: true }
+        )
+      ).toEqual([
+        {
+          field: "provider",
+          requested: "nvidia-prod",
+          recorded: "nvidia-nim",
+        },
+        {
+          field: "model",
+          requested: "nvidia/other-model",
+          recorded: "nvidia/nemotron-3-super-120b-a12b",
+        },
+      ]);
+    } finally {
+      if (previousProvider === undefined) {
+        delete process.env.NEMOCLAW_PROVIDER;
+      } else {
+        process.env.NEMOCLAW_PROVIDER = previousProvider;
+      }
+      if (previousModel === undefined) {
+        delete process.env.NEMOCLAW_MODEL;
+      } else {
+        process.env.NEMOCLAW_MODEL = previousModel;
+      }
+    }
+  });
+
   it("returns a future-shell PATH hint for user-local openshell installs", () => {
     expect(getFutureShellPathHint("/home/test/.local/bin", "/usr/local/bin:/usr/bin")).toBe(
       'export PATH="/home/test/.local/bin:$PATH"'
@@ -273,6 +506,175 @@ const { setupInference } = require(${onboardPath});
     assert.match(commands[2].command, /inference' 'set'/);
   });
 
+  it("detects when the live inference route already matches the requested provider and model", () => {
+    const repoRoot = path.join(import.meta.dirname, "..");
+    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-inference-ready-"));
+    const fakeOpenshell = path.join(tmpDir, "openshell");
+    const scriptPath = path.join(tmpDir, "inference-ready-check.js");
+    const onboardPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "onboard.js"));
+
+    fs.writeFileSync(
+      fakeOpenshell,
+      `#!/usr/bin/env bash
+if [ "$1" = "inference" ] && [ "$2" = "get" ]; then
+  cat <<'EOF'
+Gateway inference:
+
+  Route: inference.local
+  Provider: nvidia-prod
+  Model: nvidia/nemotron-3-super-120b-a12b
+  Version: 1
+EOF
+  exit 0
+fi
+exit 1
+`,
+      { mode: 0o755 }
+    );
+
+    fs.writeFileSync(
+      scriptPath,
+      `
+const { isInferenceRouteReady } = require(${onboardPath});
+console.log(JSON.stringify({
+  same: isInferenceRouteReady("nvidia-prod", "nvidia/nemotron-3-super-120b-a12b"),
+  otherModel: isInferenceRouteReady("nvidia-prod", "nvidia/other-model"),
+  otherProvider: isInferenceRouteReady("openai-api", "nvidia/nemotron-3-super-120b-a12b"),
+}));
+`
+    );
+
+    const result = spawnSync(process.execPath, [scriptPath], {
+      cwd: repoRoot,
+      encoding: "utf-8",
+      env: {
+        ...process.env,
+        PATH: `${tmpDir}:${process.env.PATH || ""}`,
+      },
+    });
+
+    try {
+      expect(result.status).toBe(0);
+      expect(JSON.parse(result.stdout.trim())).toEqual({
+        same: true,
+        otherModel: false,
+        otherProvider: false,
+      });
+    } finally {
+      fs.rmSync(tmpDir, { recursive: true, force: true });
+    }
+  });
+
+  it("detects when OpenClaw is already configured inside the sandbox", () => {
+    const repoRoot = path.join(import.meta.dirname, "..");
+    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-openclaw-ready-"));
+    const fakeOpenshell = path.join(tmpDir, "openshell");
+    const scriptPath = path.join(tmpDir, "openclaw-ready-check.js");
+    const onboardPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "onboard.js"));
+
+    fs.writeFileSync(
+      fakeOpenshell,
+      `#!/usr/bin/env bash
+if [ "$1" = "sandbox" ] && [ "$2" = "download" ]; then
+  dest="\${@: -1}"
+  mkdir -p "$dest/sandbox/.openclaw"
+  cat > "$dest/sandbox/.openclaw/openclaw.json" <<'EOF'
+{"gateway":{"auth":{"token":"test-token"}}}
+EOF
+  exit 0
+fi
+exit 1
+`,
+      { mode: 0o755 }
+    );
+
+    fs.writeFileSync(
+      scriptPath,
+      `
+const { isOpenclawReady } = require(${onboardPath});
+console.log(JSON.stringify({
+  ready: isOpenclawReady("my-assistant"),
+}));
+`
+    );
+
+    const result = spawnSync(process.execPath, [scriptPath], {
+      cwd: repoRoot,
+      encoding: "utf-8",
+      env: {
+        ...process.env,
+        PATH: `${tmpDir}:${process.env.PATH || ""}`,
+      },
+    });
+
+    try {
+      expect(result.status).toBe(0);
+      expect(JSON.parse(result.stdout.trim())).toEqual({ ready: true });
+    } finally {
+      fs.rmSync(tmpDir, { recursive: true, force: true });
+    }
+  });
+
+  it("detects when recorded policy presets are already applied", () => {
+    const repoRoot = path.join(import.meta.dirname, "..");
+    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-policy-ready-"));
+    const registryDir = path.join(tmpDir, ".nemoclaw");
+    const registryFile = path.join(registryDir, "sandboxes.json");
+    const scriptPath = path.join(tmpDir, "policy-ready-check.js");
+    const onboardPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "onboard.js"));
+
+    fs.mkdirSync(registryDir, { recursive: true });
+    fs.writeFileSync(
+      registryFile,
+      JSON.stringify(
+        {
+          sandboxes: {
+            "my-assistant": {
+              name: "my-assistant",
+              policies: ["pypi", "npm"],
+            },
+          },
+          defaultSandbox: "my-assistant",
+        },
+        null,
+        2
+      )
+    );
+
+    fs.writeFileSync(
+      scriptPath,
+      `
+const { arePolicyPresetsApplied } = require(${onboardPath});
+console.log(JSON.stringify({
+  ready: arePolicyPresetsApplied("my-assistant", ["pypi", "npm"]),
+  missing: arePolicyPresetsApplied("my-assistant", ["pypi", "slack"]),
+  empty: arePolicyPresetsApplied("my-assistant", []),
+}));
+`
+    );
+
+    const result = spawnSync(process.execPath, [scriptPath], {
+      cwd: repoRoot,
+      encoding: "utf-8",
+      env: {
+        ...process.env,
+        HOME: tmpDir,
+      },
+    });
+
+    try {
+      expect(result.status).toBe(0);
+      const payload = JSON.parse(result.stdout.trim());
+      expect(payload).toEqual({
+        ready: true,
+        missing: false,
+        empty: false,
+      });
+    } finally {
+      fs.rmSync(tmpDir, { recursive: true, force: true });
+    }
+  });
+
   it("uses native Anthropic provider creation without embedding the secret in argv", () => {
     const repoRoot = path.join(import.meta.dirname, "..");
     const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-anthropic-"));
@@ -415,29 +817,56 @@ const { setupInference } = require(${onboardPath});
     assert.match(commands[3].command, /inference' 'set' '--no-verify'/);
   });
 
-  it("drops stale local sandbox registry entries when the live sandbox is gone", () => {
+  it("hydrates stored provider credentials when setupInference runs without process env set", () => {
     const repoRoot = path.join(import.meta.dirname, "..");
-    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-stale-sandbox-"));
+    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-resume-cred-"));
     const fakeBin = path.join(tmpDir, "bin");
-    const scriptPath = path.join(tmpDir, "stale-sandbox-check.js");
+    const scriptPath = path.join(tmpDir, "setup-resume-credential-check.js");
     const onboardPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "onboard.js"));
-    const registryPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "registry.js"));
     const runnerPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "runner.js"));
+    const registryPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "registry.js"));
+    const credentialsPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "credentials.js"));
 
     fs.mkdirSync(fakeBin, { recursive: true });
     fs.writeFileSync(path.join(fakeBin, "openshell"), "#!/usr/bin/env bash\nexit 0\n", { mode: 0o755 });
 
     const script = String.raw`
-const registry = require(${registryPath});
 const runner = require(${runnerPath});
-runner.runCapture = (command) => (command.includes("'sandbox' 'get' 'my-assistant'") ? "" : "");
+const registry = require(${registryPath});
+const credentials = require(${credentialsPath});
 
-registry.registerSandbox({ name: "my-assistant" });
+const commands = [];
+runner.run = (command, opts = {}) => {
+  commands.push({ command, env: opts.env || null });
+  return { status: 0 };
+};
+runner.runCapture = (command) => {
+  if (command.includes("inference") && command.includes("get")) {
+    return [
+      "Gateway inference:",
+      "",
+      "  Route: inference.local",
+      "  Provider: openai-api",
+      "  Model: gpt-5.4",
+      "  Version: 1",
+    ].join("\n");
+  }
+  return "";
+};
+registry.updateSandbox = () => true;
 
-const { pruneStaleSandboxEntry } = require(${onboardPath});
+credentials.saveCredential("OPENAI_API_KEY", "sk-stored-secret");
+delete process.env.OPENAI_API_KEY;
 
-const liveExists = pruneStaleSandboxEntry("my-assistant");
-console.log(JSON.stringify({ liveExists, sandbox: registry.getSandbox("my-assistant") }));
+const { setupInference } = require(${onboardPath});
+
+(async () => {
+  await setupInference("test-box", "gpt-5.4", "openai-api", "https://api.openai.com/v1", "OPENAI_API_KEY");
+  console.log(JSON.stringify({ commands, openai: process.env.OPENAI_API_KEY || null }));
+})().catch((error) => {
+  console.error(error);
+  process.exit(1);
+});
 `;
     fs.writeFileSync(scriptPath, script);
 
@@ -452,59 +881,35 @@ console.log(JSON.stringify({ liveExists, sandbox: registry.getSandbox("my-assist
     });
 
     assert.equal(result.status, 0, result.stderr);
-    const payloadLine = result.stdout
-      .trim()
-      .split("\n")
-      .slice()
-      .reverse()
-      .find((line) => line.startsWith("{") && line.endsWith("}"));
-    assert.ok(payloadLine, `expected JSON payload in stdout:\n${result.stdout}`);
-    const payload = JSON.parse(payloadLine);
-    assert.equal(payload.liveExists, false);
-    assert.equal(payload.sandbox, null);
+    const payload = JSON.parse(result.stdout.trim().split("\n").pop());
+    assert.equal(payload.openai, "sk-stored-secret");
+    assert.equal(payload.commands[1].env.OPENAI_API_KEY, "sk-stored-secret");
+    assert.doesNotMatch(payload.commands[1].command, /sk-stored-secret/);
   });
 
-  it("reuses an existing healthy gateway instead of destroying it", () => {
+  it("drops stale local sandbox registry entries when the live sandbox is gone", () => {
     const repoRoot = path.join(import.meta.dirname, "..");
-    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-gateway-reuse-"));
+    const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-stale-sandbox-"));
     const fakeBin = path.join(tmpDir, "bin");
-    const scriptPath = path.join(tmpDir, "gateway-reuse-check.js");
+    const scriptPath = path.join(tmpDir, "stale-sandbox-check.js");
     const onboardPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "onboard.js"));
+    const registryPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "registry.js"));
     const runnerPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "runner.js"));
 
     fs.mkdirSync(fakeBin, { recursive: true });
     fs.writeFileSync(path.join(fakeBin, "openshell"), "#!/usr/bin/env bash\nexit 0\n", { mode: 0o755 });
 
     const script = String.raw`
+const registry = require(${registryPath});
 const runner = require(${runnerPath});
-const commands = [];
+runner.runCapture = (command) => (command.includes("'sandbox' 'get' 'my-assistant'") ? "" : "");
 
-runner.run = (command, opts = {}) => {
-  commands.push(command);
-  return { status: 0 };
-};
-runner.runCapture = (command) => {
-  if (command.includes("'status'")) {
-    return "Server Status\n\n  Gateway: nemoclaw\n  Status: Connected";
-  }
-  if (command.includes("'gateway' 'info' '-g' 'nemoclaw'")) {
-    return "Gateway Info\n\n  Gateway: nemoclaw\n  Gateway endpoint: https://127.0.0.1:8080";
-  }
-  if (command.includes("'--version'")) {
-    return "openshell 0.0.12";
-  }
-  return "";
-};
+registry.registerSandbox({ name: "my-assistant" });
 
-const { startGateway } = require(${onboardPath});
+const { pruneStaleSandboxEntry } = require(${onboardPath});
 
-(async () => {
-  await startGateway(null);
-  console.log(JSON.stringify(commands));
-})().catch((error) => {
-  console.error(error);
-  process.exit(1);
-});
+const liveExists = pruneStaleSandboxEntry("my-assistant");
+console.log(JSON.stringify({ liveExists, sandbox: registry.getSandbox("my-assistant") }));
 `;
     fs.writeFileSync(scriptPath, script);
 
@@ -519,11 +924,16 @@ const { startGateway } = require(${onboardPath});
     });
 
     assert.equal(result.status, 0, result.stderr);
-    const commands = JSON.parse(result.stdout.trim().split("\n").pop());
-    assert.equal(commands.length, 1);
-    assert.match(commands[0], /gateway' 'select' 'nemoclaw'/);
-    assert.doesNotMatch(commands[0], /gateway' 'destroy'/);
-    assert.doesNotMatch(commands[0], /gateway' 'start'/);
+    const payloadLine = result.stdout
+      .trim()
+      .split("\n")
+      .slice()
+      .reverse()
+      .find((line) => line.startsWith("{") && line.endsWith("}"));
+    assert.ok(payloadLine, `expected JSON payload in stdout:\n${result.stdout}`);
+    const payload = JSON.parse(payloadLine);
+    assert.equal(payload.liveExists, false);
+    assert.equal(payload.sandbox, null);
   });
 
   it("builds the sandbox without uploading an external OpenClaw config file", async () => {
@@ -737,6 +1147,54 @@ const { createSandbox } = require(${onboardPath});
     assert.equal(payload.stderrDestroyCalls, 1);
   });
 
+  it("prints resume guidance when sandbox image upload times out", () => {
+    const errors = [];
+    const originalError = console.error;
+    console.error = (...args) => errors.push(args.join(" "));
+    try {
+      printSandboxCreateRecoveryHints(
+        [
+          "  Pushing image openshell/sandbox-from:123 into gateway nemoclaw",
+          "  [progress] Uploaded to gateway",
+          "Error: failed to read image export stream",
+          "Timeout error",
+        ].join("\n")
+      );
+    } finally {
+      console.error = originalError;
+    }
+
+    const joined = errors.join("\n");
+    assert.match(joined, /Hint: image upload into the OpenShell gateway timed out\./);
+    assert.match(joined, /Recovery: nemoclaw onboard --resume/);
+    assert.match(
+      joined,
+      /Progress reached the gateway upload stage, so resume may be able to reuse existing gateway state\./
+    );
+  });
+
+  it("prints resume guidance when sandbox image upload resets after transfer progress", () => {
+    const errors = [];
+    const originalError = console.error;
+    console.error = (...args) => errors.push(args.join(" "));
+    try {
+      printSandboxCreateRecoveryHints(
+        [
+          "  Pushing image openshell/sandbox-from:123 into gateway nemoclaw",
+          "  [progress] Uploaded to gateway",
+          "Error: Connection reset by peer",
+        ].join("\n")
+      );
+    } finally {
+      console.error = originalError;
+    }
+
+    const joined = errors.join("\n");
+    assert.match(joined, /Hint: the image push\/import stream was interrupted\./);
+    assert.match(joined, /Recovery: nemoclaw onboard --resume/);
+    assert.match(joined, /The image appears to have reached the gateway before the stream failed\./);
+  });
+
   it("accepts gateway inference when system inference is separately not configured", () => {
     const repoRoot = path.join(import.meta.dirname, "..");
     const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-inference-get-"));
diff --git a/test/runtime-recovery.test.js b/test/runtime-recovery.test.js
new file mode 100644
index 000000000..b6870877c
--- /dev/null
+++ b/test/runtime-recovery.test.js
@@ -0,0 +1,74 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+import { describe, expect, it } from "vitest";
+
+import {
+  classifyGatewayStatus,
+  classifySandboxLookup,
+  parseLiveSandboxNames,
+  shouldAttemptGatewayRecovery,
+} from "../bin/lib/runtime-recovery";
+
+describe("runtime recovery helpers", () => {
+  it("parses live sandbox names from openshell sandbox list output", () => {
+    expect(
+      Array.from(
+        parseLiveSandboxNames(
+          [
+            "NAME              NAMESPACE  CREATED              PHASE",
+            "alpha             openshell  2026-03-24 10:00:00  Ready",
+            "beta              openshell  2026-03-24 10:01:00  Provisioning",
+          ].join("\n")
+        )
+      )
+    ).toEqual(["alpha", "beta"]);
+  });
+
+  it("treats no-sandboxes output as an empty set", () => {
+    expect(Array.from(parseLiveSandboxNames("No sandboxes found."))).toEqual([]);
+  });
+
+  it("classifies missing sandbox lookups", () => {
+    expect(classifySandboxLookup('Error:   × status: NotFound, message: "sandbox not found"').state).toBe("missing");
+    expect(classifySandboxLookup("").state).toBe("missing");
+  });
+
+  it("classifies transport and gateway failures as unavailable", () => {
+    expect(classifySandboxLookup("Error:   × transport error\n  ╰─▶ Connection reset by peer (os error 104)").state).toBe(
+      "unavailable"
+    );
+    expect(classifySandboxLookup("Error:   × client error (Connect)\n  ╰─▶ Connection refused (os error 111)").state).toBe(
+      "unavailable"
+    );
+  });
+
+  it("classifies successful sandbox lookups as present", () => {
+    expect(
+      classifySandboxLookup(
+        [
+          "Sandbox:",
+          "",
+          "  Id: abc",
+          "  Name: my-assistant",
+          "  Namespace: openshell",
+          "  Phase: Ready",
+        ].join("\n")
+      ).state
+    ).toBe("present");
+  });
+
+  it("classifies gateway status output for restart recovery", () => {
+    expect(classifyGatewayStatus("Gateway: nemoclaw\nStatus: Connected").state).toBe("connected");
+    expect(classifyGatewayStatus("Error:   × No active gateway").state).toBe("unavailable");
+    expect(classifyGatewayStatus("").state).toBe("inactive");
+  });
+
+  it("only attempts gateway recovery when sandbox access is unavailable and gateway is down", () => {
+    expect(shouldAttemptGatewayRecovery({ sandboxState: "unavailable", gatewayState: "unavailable" })).toBe(true);
+    expect(shouldAttemptGatewayRecovery({ sandboxState: "unavailable", gatewayState: "inactive" })).toBe(true);
+    expect(shouldAttemptGatewayRecovery({ sandboxState: "present", gatewayState: "unavailable" })).toBe(false);
+    expect(shouldAttemptGatewayRecovery({ sandboxState: "missing", gatewayState: "inactive" })).toBe(false);
+    expect(shouldAttemptGatewayRecovery({ sandboxState: "unavailable", gatewayState: "connected" })).toBe(false);
+  });
+});
diff --git a/test/uninstall.test.js b/test/uninstall.test.js
index 60e7e977e..cd0178638 100644
--- a/test/uninstall.test.js
+++ b/test/uninstall.test.js
@@ -9,6 +9,21 @@ import { spawnSync } from "node:child_process";
 
 const UNINSTALL_SCRIPT = path.join(import.meta.dirname, "..", "uninstall.sh");
 
+function createFakeNpmEnv(tmp) {
+  const fakeBin = path.join(tmp, "bin");
+  const npmPath = path.join(fakeBin, "npm");
+  fs.mkdirSync(fakeBin, { recursive: true });
+  fs.writeFileSync(
+    npmPath,
+    "#!/usr/bin/env bash\nexit 0\n",
+    { mode: 0o755 }
+  );
+  return {
+    ...process.env,
+    PATH: `${fakeBin}:${process.env.PATH || "/usr/bin:/bin"}`,
+  };
+}
+
 describe("uninstall CLI flags", () => {
   it("--help exits 0 and shows usage", () => {
     const result = spawnSync("bash", [UNINSTALL_SCRIPT, "--help"], {
@@ -76,8 +91,11 @@ describe("uninstall helpers", () => {
     const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-uninstall-shim-"));
     const shimDir = path.join(tmp, ".local", "bin");
     const shimPath = path.join(shimDir, "nemoclaw");
+    const targetPath = path.join(tmp, "prefix", "bin", "nemoclaw");
     fs.mkdirSync(shimDir, { recursive: true });
-    fs.writeFileSync(shimPath, "#!/usr/bin/env bash\n", { mode: 0o755 });
+    fs.mkdirSync(path.dirname(targetPath), { recursive: true });
+    fs.writeFileSync(targetPath, "#!/usr/bin/env bash\n", { mode: 0o755 });
+    fs.symlinkSync(targetPath, shimPath);
 
     const result = spawnSync(
       "bash",
@@ -85,10 +103,54 @@ describe("uninstall helpers", () => {
       {
         cwd: path.join(import.meta.dirname, ".."),
         encoding: "utf-8",
+        env: createFakeNpmEnv(tmp),
       },
     );
 
     expect(result.status).toBe(0);
     expect(fs.existsSync(shimPath)).toBe(false);
   });
+
+  it("preserves a user-managed nemoclaw file in the shim directory", () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-uninstall-preserve-"));
+    const shimDir = path.join(tmp, ".local", "bin");
+    const shimPath = path.join(shimDir, "nemoclaw");
+    fs.mkdirSync(shimDir, { recursive: true });
+    fs.writeFileSync(shimPath, "#!/usr/bin/env bash\n", { mode: 0o755 });
+
+    const result = spawnSync(
+      "bash",
+      ["-lc", `HOME="${tmp}" source "${UNINSTALL_SCRIPT}"; remove_nemoclaw_cli`],
+      {
+        cwd: path.join(import.meta.dirname, ".."),
+        encoding: "utf-8",
+        env: createFakeNpmEnv(tmp),
+      },
+    );
+
+    expect(result.status).toBe(0);
+    expect(fs.existsSync(shimPath)).toBe(true);
+    expect(`${result.stdout}${result.stderr}`).toMatch(/not an installer-managed shim/);
+  });
+
+  it("removes the onboard session file as part of NemoClaw state cleanup", () => {
+    const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-uninstall-session-"));
+    const stateDir = path.join(tmp, ".nemoclaw");
+    const sessionPath = path.join(stateDir, "onboard-session.json");
+    fs.mkdirSync(stateDir, { recursive: true });
+    fs.writeFileSync(sessionPath, JSON.stringify({ status: "complete" }));
+
+    const result = spawnSync(
+      "bash",
+      ["-lc", `HOME="${tmp}" source "${UNINSTALL_SCRIPT}"; remove_nemoclaw_state`],
+      {
+        cwd: path.join(import.meta.dirname, ".."),
+        encoding: "utf-8",
+      },
+    );
+
+    expect(result.status).toBe(0);
+    expect(fs.existsSync(sessionPath)).toBe(false);
+    expect(fs.existsSync(stateDir)).toBe(false);
+  });
 });
diff --git a/uninstall.sh b/uninstall.sh
index 42aa4e3de..c8bf4d4d6 100755
--- a/uninstall.sh
+++ b/uninstall.sh
@@ -7,7 +7,7 @@
 #   - NemoClaw helper services
 #   - All OpenShell sandboxes plus the NemoClaw gateway/providers
 #   - NemoClaw/OpenShell/OpenClaw Docker images built or pulled for the sandbox flow
-#   - ~/.nemoclaw plus ~/.config/{openshell,nemoclaw} state
+#   - ~/.nemoclaw plus ~/.config/{openshell,nemoclaw} state, including onboard-session.json
 #   - Global nemoclaw npm install/link
 #   - OpenShell binary if it was installed to the standard installer path
 #
@@ -305,8 +305,10 @@ remove_nemoclaw_cli() {
     warn "npm not found; skipping nemoclaw npm uninstall."
   fi
 
-  if [ -L "${NEMOCLAW_SHIM_DIR}/nemoclaw" ] || [ -f "${NEMOCLAW_SHIM_DIR}/nemoclaw" ]; then
+  if [ -L "${NEMOCLAW_SHIM_DIR}/nemoclaw" ]; then
     remove_path "${NEMOCLAW_SHIM_DIR}/nemoclaw"
+  elif [ -f "${NEMOCLAW_SHIM_DIR}/nemoclaw" ]; then
+    warn "Leaving ${NEMOCLAW_SHIM_DIR}/nemoclaw in place because it is not an installer-managed shim."
   fi
 }