diff --git a/bin/lib/onboard-session.js b/bin/lib/onboard-session.js new file mode 100644 index 000000000..819790173 --- /dev/null +++ b/bin/lib/onboard-session.js @@ -0,0 +1,432 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +const fs = require("fs"); +const path = require("path"); + +const SESSION_VERSION = 1; +const SESSION_DIR = path.join(process.env.HOME || "/tmp", ".nemoclaw"); +const SESSION_FILE = path.join(SESSION_DIR, "onboard-session.json"); +const LOCK_FILE = path.join(SESSION_DIR, "onboard.lock"); +const VALID_STEP_STATES = new Set(["pending", "in_progress", "complete", "failed", "skipped"]); + +function ensureSessionDir() { + fs.mkdirSync(SESSION_DIR, { recursive: true, mode: 0o700 }); +} + +function sessionPath() { + return SESSION_FILE; +} + +function lockPath() { + return LOCK_FILE; +} + +function defaultSteps() { + return { + preflight: { status: "pending", startedAt: null, completedAt: null, error: null }, + gateway: { status: "pending", startedAt: null, completedAt: null, error: null }, + sandbox: { status: "pending", startedAt: null, completedAt: null, error: null }, + provider_selection: { status: "pending", startedAt: null, completedAt: null, error: null }, + inference: { status: "pending", startedAt: null, completedAt: null, error: null }, + openclaw: { status: "pending", startedAt: null, completedAt: null, error: null }, + policies: { status: "pending", startedAt: null, completedAt: null, error: null }, + }; +} + +function createSession(overrides = {}) { + const now = new Date().toISOString(); + return { + version: SESSION_VERSION, + sessionId: overrides.sessionId || `${Date.now()}-${Math.random().toString(36).slice(2, 10)}`, + resumable: true, + status: "in_progress", + mode: overrides.mode || "interactive", + startedAt: overrides.startedAt || now, + updatedAt: overrides.updatedAt || now, + lastStepStarted: overrides.lastStepStarted || null, + lastCompletedStep: overrides.lastCompletedStep || null, + failure: overrides.failure || null, + sandboxName: overrides.sandboxName || null, + provider: overrides.provider || null, + model: overrides.model || null, + endpointUrl: overrides.endpointUrl || null, + credentialEnv: overrides.credentialEnv || null, + preferredInferenceApi: overrides.preferredInferenceApi || null, + nimContainer: overrides.nimContainer || null, + policyPresets: Array.isArray(overrides.policyPresets) ? overrides.policyPresets.filter((value) => typeof value === "string") : null, + metadata: { + gatewayName: overrides.metadata?.gatewayName || "nemoclaw", + }, + steps: { + ...defaultSteps(), + ...(overrides.steps || {}), + }, + }; +} + +function isObject(value) { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function redactSensitiveText(value) { + if (typeof value !== "string") return null; + return value + .replace(/(NVIDIA_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GEMINI_API_KEY|COMPATIBLE_API_KEY|COMPATIBLE_ANTHROPIC_API_KEY)=\S+/gi, "$1=") + .replace(/Bearer\s+\S+/gi, "Bearer ") + .replace(/nvapi-[A-Za-z0-9_-]{10,}/g, "") + .replace(/ghp_[A-Za-z0-9]{20,}/g, "") + .replace(/sk-[A-Za-z0-9_-]{10,}/g, "") + .slice(0, 240); +} + +function sanitizeFailure(input) { + if (!input) return null; + const step = typeof input.step === "string" ? input.step : null; + const message = redactSensitiveText(input.message); + const recordedAt = typeof input.recordedAt === "string" ? input.recordedAt : new Date().toISOString(); + return step || message ? { step, message, recordedAt } : null; +} + +function validateStep(step) { + if (!isObject(step)) return false; + if (!VALID_STEP_STATES.has(step.status)) return false; + return true; +} + +function redactUrl(value) { + if (typeof value !== "string" || value.length === 0) return null; + try { + const url = new URL(value); + if (url.username || url.password) { + url.username = ""; + url.password = ""; + } + for (const key of [...url.searchParams.keys()]) { + if (/(^|[-_])(?:signature|sig|token|auth|access_token)$/i.test(key)) { + url.searchParams.set(key, ""); + } + } + url.hash = ""; + return url.toString(); + } catch { + return redactSensitiveText(value); + } +} + +// eslint-disable-next-line complexity +function normalizeSession(data) { + if (!isObject(data) || data.version !== SESSION_VERSION) return null; + const normalized = createSession({ + sessionId: typeof data.sessionId === "string" ? data.sessionId : undefined, + mode: typeof data.mode === "string" ? data.mode : undefined, + startedAt: typeof data.startedAt === "string" ? data.startedAt : undefined, + updatedAt: typeof data.updatedAt === "string" ? data.updatedAt : undefined, + sandboxName: typeof data.sandboxName === "string" ? data.sandboxName : null, + provider: typeof data.provider === "string" ? data.provider : null, + model: typeof data.model === "string" ? data.model : null, + endpointUrl: typeof data.endpointUrl === "string" ? redactUrl(data.endpointUrl) : null, + credentialEnv: typeof data.credentialEnv === "string" ? data.credentialEnv : null, + preferredInferenceApi: typeof data.preferredInferenceApi === "string" ? data.preferredInferenceApi : null, + nimContainer: typeof data.nimContainer === "string" ? data.nimContainer : null, + policyPresets: Array.isArray(data.policyPresets) ? data.policyPresets.filter((value) => typeof value === "string") : null, + lastStepStarted: typeof data.lastStepStarted === "string" ? data.lastStepStarted : null, + lastCompletedStep: typeof data.lastCompletedStep === "string" ? data.lastCompletedStep : null, + failure: sanitizeFailure(data.failure), + metadata: isObject(data.metadata) ? data.metadata : undefined, + }); + normalized.resumable = data.resumable !== false; + normalized.status = typeof data.status === "string" ? data.status : normalized.status; + + if (isObject(data.steps)) { + for (const [name, step] of Object.entries(data.steps)) { + if (Object.prototype.hasOwnProperty.call(normalized.steps, name) && validateStep(step)) { + normalized.steps[name] = { + status: step.status, + startedAt: typeof step.startedAt === "string" ? step.startedAt : null, + completedAt: typeof step.completedAt === "string" ? step.completedAt : null, + error: redactSensitiveText(step.error), + }; + } + } + } + + return normalized; +} + +function loadSession() { + try { + if (!fs.existsSync(SESSION_FILE)) { + return null; + } + const parsed = JSON.parse(fs.readFileSync(SESSION_FILE, "utf-8")); + return normalizeSession(parsed); + } catch { + return null; + } +} + +function saveSession(session) { + const normalized = normalizeSession(session) || createSession(); + normalized.updatedAt = new Date().toISOString(); + ensureSessionDir(); + const tmpFile = path.join( + SESSION_DIR, + `.onboard-session.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2, 8)}.tmp` + ); + fs.writeFileSync(tmpFile, JSON.stringify(normalized, null, 2), { mode: 0o600 }); + fs.renameSync(tmpFile, SESSION_FILE); + return normalized; +} + +function clearSession() { + try { + if (fs.existsSync(SESSION_FILE)) { + fs.unlinkSync(SESSION_FILE); + } + } catch { + return; + } +} + +function parseLockFile(contents) { + try { + const parsed = JSON.parse(contents); + if (typeof parsed?.pid !== "number") return null; + return { + pid: parsed.pid, + startedAt: typeof parsed.startedAt === "string" ? parsed.startedAt : null, + command: typeof parsed.command === "string" ? parsed.command : null, + }; + } catch { + return null; + } +} + +function isProcessAlive(pid) { + if (!Number.isInteger(pid) || pid <= 0) return false; + try { + process.kill(pid, 0); + return true; + } catch (error) { + return error?.code === "EPERM"; + } +} + +function acquireOnboardLock(command = null) { + ensureSessionDir(); + const payload = JSON.stringify( + { + pid: process.pid, + startedAt: new Date().toISOString(), + command: typeof command === "string" ? command : null, + }, + null, + 2 + ); + + for (let attempt = 0; attempt < 2; attempt++) { + try { + const fd = fs.openSync(LOCK_FILE, "wx", 0o600); + fs.writeFileSync(fd, payload); + fs.closeSync(fd); + return { acquired: true, lockFile: LOCK_FILE, stale: false }; + } catch (error) { + if (error?.code !== "EEXIST") { + throw error; + } + + let existing; + try { + existing = parseLockFile(fs.readFileSync(LOCK_FILE, "utf8")); + } catch (readError) { + if (readError?.code === "ENOENT") { + continue; + } + throw readError; + } + if (!existing) { + continue; + } + if (existing && isProcessAlive(existing.pid)) { + return { + acquired: false, + lockFile: LOCK_FILE, + stale: false, + holderPid: existing.pid, + holderStartedAt: existing.startedAt, + holderCommand: existing.command, + }; + } + + try { + fs.unlinkSync(LOCK_FILE); + } catch (unlinkError) { + if (unlinkError?.code !== "ENOENT") { + throw unlinkError; + } + } + } + } + + return { acquired: false, lockFile: LOCK_FILE, stale: true }; +} + +function releaseOnboardLock() { + try { + if (!fs.existsSync(LOCK_FILE)) return; + let existing = null; + try { + existing = parseLockFile(fs.readFileSync(LOCK_FILE, "utf8")); + } catch (error) { + if (error?.code === "ENOENT") return; + throw error; + } + if (!existing) return; + if (existing.pid !== process.pid) return; + fs.unlinkSync(LOCK_FILE); + } catch { + return; + } +} + +function updateSession(mutator) { + const current = loadSession() || createSession(); + const next = typeof mutator === "function" ? mutator(current) || current : current; + return saveSession(next); +} + +function markStepStarted(stepName) { + return updateSession((session) => { + const step = session.steps[stepName]; + if (!step) return session; + step.status = "in_progress"; + step.startedAt = new Date().toISOString(); + step.completedAt = null; + step.error = null; + session.lastStepStarted = stepName; + session.failure = null; + session.status = "in_progress"; + return session; + }); +} + +function markStepComplete(stepName, updates = {}) { + return updateSession((session) => { + const step = session.steps[stepName]; + if (!step) return session; + step.status = "complete"; + step.completedAt = new Date().toISOString(); + step.error = null; + session.lastCompletedStep = stepName; + session.failure = null; + Object.assign(session, filterSafeUpdates(updates)); + return session; + }); +} + +function markStepFailed(stepName, message = null) { + return updateSession((session) => { + const step = session.steps[stepName]; + if (!step) return session; + step.status = "failed"; + step.completedAt = null; + step.error = redactSensitiveText(message); + session.failure = sanitizeFailure({ + step: stepName, + message, + recordedAt: new Date().toISOString(), + }); + session.status = "failed"; + return session; + }); +} + +function completeSession(updates = {}) { + return updateSession((session) => { + Object.assign(session, filterSafeUpdates(updates)); + session.status = "complete"; + session.resumable = false; + session.failure = null; + return session; + }); +} + +function filterSafeUpdates(updates) { + const safe = {}; + if (!isObject(updates)) return safe; + if (typeof updates.sandboxName === "string") safe.sandboxName = updates.sandboxName; + if (typeof updates.provider === "string") safe.provider = updates.provider; + if (typeof updates.model === "string") safe.model = updates.model; + if (typeof updates.endpointUrl === "string") safe.endpointUrl = redactUrl(updates.endpointUrl); + if (typeof updates.credentialEnv === "string") safe.credentialEnv = updates.credentialEnv; + if (typeof updates.preferredInferenceApi === "string") safe.preferredInferenceApi = updates.preferredInferenceApi; + if (typeof updates.nimContainer === "string") safe.nimContainer = updates.nimContainer; + if (Array.isArray(updates.policyPresets)) { + safe.policyPresets = updates.policyPresets.filter((value) => typeof value === "string"); + } + if (isObject(updates.metadata) && typeof updates.metadata.gatewayName === "string") { + safe.metadata = { + gatewayName: updates.metadata.gatewayName, + }; + } + return safe; +} + +function summarizeForDebug(session = loadSession()) { + if (!session) return null; + return { + version: session.version, + sessionId: session.sessionId, + status: session.status, + resumable: session.resumable, + mode: session.mode, + startedAt: session.startedAt, + updatedAt: session.updatedAt, + sandboxName: session.sandboxName, + provider: session.provider, + model: session.model, + endpointUrl: redactUrl(session.endpointUrl), + credentialEnv: session.credentialEnv, + preferredInferenceApi: session.preferredInferenceApi, + nimContainer: session.nimContainer, + policyPresets: session.policyPresets, + lastStepStarted: session.lastStepStarted, + lastCompletedStep: session.lastCompletedStep, + failure: session.failure, + steps: Object.fromEntries( + Object.entries(session.steps).map(([name, step]) => [ + name, + { + status: step.status, + startedAt: step.startedAt, + completedAt: step.completedAt, + error: step.error, + }, + ]) + ), + }; +} + +module.exports = { + LOCK_FILE, + SESSION_DIR, + SESSION_FILE, + SESSION_VERSION, + acquireOnboardLock, + clearSession, + completeSession, + createSession, + loadSession, + markStepComplete, + markStepFailed, + markStepStarted, + lockPath, + redactUrl, + saveSession, + releaseOnboardLock, + sessionPath, + redactSensitiveText, + summarizeForDebug, + updateSession, +}; diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index 955894113..e58c64502 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -24,6 +24,7 @@ const { CLOUD_MODEL_OPTIONS, DEFAULT_CLOUD_MODEL, getProviderSelectionConfig, + parseGatewayInference, } = require("./inference-config"); const { inferContainerRuntime, @@ -34,6 +35,7 @@ const { resolveOpenshell } = require("./resolve-openshell"); const { prompt, ensureApiKey, getCredential, saveCredential } = require("./credentials"); const registry = require("./registry"); const nim = require("./nim"); +const onboardSession = require("./onboard-session"); const policies = require("./policies"); const { checkPortAvailable } = require("./preflight"); const EXPERIMENTAL = process.env.NEMOCLAW_EXPERIMENTAL === "1"; @@ -182,29 +184,90 @@ function isSandboxReady(output, sandboxName) { * @returns {boolean} */ function hasStaleGateway(gwInfoOutput) { - return typeof gwInfoOutput === "string" && gwInfoOutput.length > 0 && gwInfoOutput.includes(GATEWAY_NAME); + const cleanOutput = + typeof gwInfoOutput === "string" + ? // eslint-disable-next-line no-control-regex + gwInfoOutput.replace(/\x1b\[[0-9;]*m/g, "") + : ""; + return ( + cleanOutput.length > 0 && + cleanOutput.includes(`Gateway: ${GATEWAY_NAME}`) && + !cleanOutput.includes("No gateway metadata found") + ); +} + +function getReportedGatewayName(output = "") { + if (typeof output !== "string") return null; + // eslint-disable-next-line no-control-regex + const cleanOutput = output.replace(/\x1b\[[0-9;]*m/g, ""); + const match = cleanOutput.match(/^\s*Gateway:\s+([^\s]+)/m); + return match ? match[1] : null; +} + +function isGatewayConnected(statusOutput = "") { + return typeof statusOutput === "string" && statusOutput.includes("Connected"); +} + +function hasActiveGatewayInfo(activeGatewayInfoOutput = "") { + return ( + typeof activeGatewayInfoOutput === "string" && + activeGatewayInfoOutput.includes("Gateway endpoint:") && + !activeGatewayInfoOutput.includes("No gateway metadata found") + ); +} + +function isSelectedGateway(statusOutput = "", gatewayName = GATEWAY_NAME) { + return getReportedGatewayName(statusOutput) === gatewayName; } -const ANSI_ESCAPE = String.fromCharCode(27); -const ANSI_REGEX = new RegExp(`${ANSI_ESCAPE}\\[[0-9;]*[A-Za-z]`, "g"); +function isGatewayHealthy(statusOutput = "", gwInfoOutput = "", activeGatewayInfoOutput = "") { + const namedGatewayKnown = hasStaleGateway(gwInfoOutput); + if (!namedGatewayKnown || !isGatewayConnected(statusOutput)) return false; -function stripAnsi(value = "") { - return value.replace(ANSI_REGEX, ""); + const activeGatewayName = getReportedGatewayName(statusOutput) || getReportedGatewayName(activeGatewayInfoOutput); + return activeGatewayName === GATEWAY_NAME; } -function getActiveGatewayName(statusOutput = "") { - if (typeof statusOutput !== "string" || statusOutput.length === 0) { - return ""; +function getGatewayReuseState(statusOutput = "", gwInfoOutput = "", activeGatewayInfoOutput = "") { + if (isGatewayHealthy(statusOutput, gwInfoOutput, activeGatewayInfoOutput)) { + return "healthy"; } - const match = stripAnsi(statusOutput) - .match(/^\s*Gateway:\s+(.+?)\s*$/m); - return match ? match[1].trim() : ""; + const connected = isGatewayConnected(statusOutput); + const activeGatewayName = getReportedGatewayName(statusOutput) || getReportedGatewayName(activeGatewayInfoOutput); + if (connected && activeGatewayName === GATEWAY_NAME) { + return "active-unnamed"; + } + if (connected && activeGatewayName && activeGatewayName !== GATEWAY_NAME) { + return "foreign-active"; + } + if (hasStaleGateway(gwInfoOutput)) { + return "stale"; + } + if (hasActiveGatewayInfo(activeGatewayInfoOutput)) { + return "active-unnamed"; + } + return "missing"; +} + +function getSandboxStateFromOutputs(sandboxName, getOutput = "", listOutput = "") { + if (!sandboxName) return "missing"; + if (!getOutput) return "missing"; + return isSandboxReady(listOutput, sandboxName) ? "ready" : "not_ready"; } -function isGatewayHealthy(statusOutput = "", gwInfoOutput = "") { - const connected = typeof statusOutput === "string" && statusOutput.includes("Connected"); - const activeGateway = getActiveGatewayName(statusOutput); - return connected && activeGateway === GATEWAY_NAME && hasStaleGateway(gwInfoOutput); +function getSandboxReuseState(sandboxName) { + if (!sandboxName) return "missing"; + const getOutput = runCaptureOpenshell(["sandbox", "get", sandboxName], { ignoreError: true }); + const listOutput = runCaptureOpenshell(["sandbox", "list"], { ignoreError: true }); + return getSandboxStateFromOutputs(sandboxName, getOutput, listOutput); +} + +function repairRecordedSandbox(sandboxName) { + if (!sandboxName) return; + note(` [resume] Cleaning up recorded sandbox '${sandboxName}' before recreating it.`); + runOpenshell(["forward", "stop", "18789"], { ignoreError: true }); + runOpenshell(["sandbox", "delete", sandboxName], { ignoreError: true }); + registry.removeSandbox(sandboxName); } function streamSandboxCreate(command, env = process.env, options = {}) { @@ -375,6 +438,15 @@ function formatEnvAssignment(name, value) { return `${name}=${value}`; } +function hydrateCredentialEnv(envName) { + if (!envName) return null; + const value = getCredential(envName); + if (value) { + process.env[envName] = value; + } + return value || null; +} + function getCurlTimingArgs() { return ["--connect-timeout 5", "--max-time 20"]; } @@ -413,6 +485,11 @@ function verifyInferenceRoute(_provider, _model) { } } +function isInferenceRouteReady(provider, model) { + const live = parseGatewayInference(runCaptureOpenshell(["inference", "get"], { ignoreError: true })); + return Boolean(live && live.provider === provider && live.model === model); +} + function sandboxExistsInGateway(sandboxName) { const output = runCaptureOpenshell(["sandbox", "get", sandboxName], { ignoreError: true }); return Boolean(output); @@ -442,6 +519,10 @@ exit `.trim(); } +function isOpenclawReady(sandboxName) { + return Boolean(fetchGatewayAuthTokenFromSandbox(sandboxName)); +} + function writeSandboxConfigSyncFile(script, tmpDir = os.tmpdir(), now = Date.now()) { const scriptFile = path.join(tmpDir, `nemoclaw-sync-${now}.sh`); fs.writeFileSync(scriptFile, `${script}\n`, { mode: 0o600 }); @@ -956,6 +1037,98 @@ async function promptManualModelId(promptLabel, errorLabel, validator = null) { return trimmed; } } +function shouldIncludeBuildContextPath(sourceRoot, candidatePath) { + const relative = path.relative(sourceRoot, candidatePath); + if (!relative || relative === "") return true; + + const segments = relative.split(path.sep); + const basename = path.basename(candidatePath); + const excludedSegments = new Set([ + ".venv", + ".ruff_cache", + ".pytest_cache", + ".mypy_cache", + "__pycache__", + "node_modules", + ".git", + ]); + + if (basename === ".DS_Store" || basename.startsWith("._")) { + return false; + } + + return !segments.some((segment) => excludedSegments.has(segment)); +} + +function copyBuildContextDir(sourceDir, destinationDir) { + fs.cpSync(sourceDir, destinationDir, { + recursive: true, + filter: (candidatePath) => shouldIncludeBuildContextPath(sourceDir, candidatePath), + }); +} + +function classifySandboxCreateFailure(output = "") { + const text = String(output || ""); + const uploadedToGateway = + /\[progress\]\s+Uploaded to gateway/i.test(text) || + /Image .*available in the gateway/i.test(text); + + if (/failed to read image export stream|Timeout error/i.test(text)) { + return { + kind: "image_transfer_timeout", + uploadedToGateway, + }; + } + + if (/Connection reset by peer/i.test(text)) { + return { + kind: "image_transfer_reset", + uploadedToGateway, + }; + } + + if (/Created sandbox:/i.test(text)) { + return { + kind: "sandbox_create_incomplete", + uploadedToGateway: true, + }; + } + + return { + kind: "unknown", + uploadedToGateway, + }; +} + +function printSandboxCreateRecoveryHints(output = "") { + const failure = classifySandboxCreateFailure(output); + if (failure.kind === "image_transfer_timeout") { + console.error(" Hint: image upload into the OpenShell gateway timed out."); + console.error(" Recovery: nemoclaw onboard --resume"); + if (failure.uploadedToGateway) { + console.error(" Progress reached the gateway upload stage, so resume may be able to reuse existing gateway state."); + } + console.error(" If this repeats, check Docker memory and retry on a host with more RAM."); + return; + } + if (failure.kind === "image_transfer_reset") { + console.error(" Hint: the image push/import stream was interrupted."); + console.error(" Recovery: nemoclaw onboard --resume"); + if (failure.uploadedToGateway) { + console.error(" The image appears to have reached the gateway before the stream failed."); + } + console.error(" If this repeats, restart Docker or the gateway and retry."); + return; + } + if (failure.kind === "sandbox_create_incomplete") { + console.error(" Hint: sandbox creation started but the create stream did not finish cleanly."); + console.error(" Recovery: nemoclaw onboard --resume"); + console.error(" Check: openshell sandbox list # verify whether the sandbox became ready"); + return; + } + console.error(" Recovery: nemoclaw onboard --resume"); + console.error(" Or: nemoclaw onboard"); +} async function promptCloudModel() { console.log(""); @@ -1074,6 +1247,86 @@ function prepareOllamaModel(model, installedModels = []) { return validateOllamaModel(model, runCapture); } +function getRequestedSandboxNameHint() { + const raw = process.env.NEMOCLAW_SANDBOX_NAME; + if (typeof raw !== "string") return null; + const normalized = raw.trim().toLowerCase(); + return normalized || null; +} + +function getResumeSandboxConflict(session) { + const requestedSandboxName = getRequestedSandboxNameHint(); + if (!requestedSandboxName || !session?.sandboxName) { + return null; + } + return requestedSandboxName !== session.sandboxName + ? { requestedSandboxName, recordedSandboxName: session.sandboxName } + : null; +} + +function getRequestedProviderHint(nonInteractive = isNonInteractive()) { + return nonInteractive ? getNonInteractiveProvider() : null; +} + +function getRequestedModelHint(nonInteractive = isNonInteractive()) { + if (!nonInteractive) return null; + const providerKey = getRequestedProviderHint(nonInteractive) || "cloud"; + return getNonInteractiveModel(providerKey); +} + +function getEffectiveProviderName(providerKey) { + if (!providerKey) return null; + if (REMOTE_PROVIDER_CONFIG[providerKey]) { + return REMOTE_PROVIDER_CONFIG[providerKey].providerName; + } + + switch (providerKey) { + case "nim-local": + return "nvidia-nim"; + case "ollama": + return "ollama-local"; + case "vllm": + return "vllm-local"; + default: + return providerKey; + } +} + +function getResumeConfigConflicts(session, opts = {}) { + const conflicts = []; + const nonInteractive = opts.nonInteractive ?? isNonInteractive(); + + const sandboxConflict = getResumeSandboxConflict(session); + if (sandboxConflict) { + conflicts.push({ + field: "sandbox", + requested: sandboxConflict.requestedSandboxName, + recorded: sandboxConflict.recordedSandboxName, + }); + } + + const requestedProvider = getRequestedProviderHint(nonInteractive); + const effectiveRequestedProvider = getEffectiveProviderName(requestedProvider); + if (effectiveRequestedProvider && session?.provider && effectiveRequestedProvider !== session.provider) { + conflicts.push({ + field: "provider", + requested: effectiveRequestedProvider, + recorded: session.provider, + }); + } + + const requestedModel = getRequestedModelHint(nonInteractive); + if (requestedModel && session?.model && requestedModel !== session.model) { + conflicts.push({ + field: "model", + requested: requestedModel, + recorded: session.model, + }); + } + + return conflicts; +} + function isDockerRunning() { try { runCapture("docker info", { ignoreError: false }); @@ -1133,6 +1386,13 @@ function sleep(seconds) { require("child_process").spawnSync("sleep", [String(seconds)]); } +function destroyGateway() { + runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true }); + // openshell gateway destroy doesn't remove Docker volumes, which leaves + // corrupted cluster state that breaks the next gateway start. Clean them up. + run(`docker volume ls -q --filter "name=openshell-cluster-${GATEWAY_NAME}" | grep . && docker volume ls -q --filter "name=openshell-cluster-${GATEWAY_NAME}" | xargs docker volume rm || true`, { ignoreError: true }); +} + async function ensureNamedCredential(envName, label, helpUrl = null) { let key = getCredential(envName); if (key) { @@ -1256,20 +1516,15 @@ async function preflight() { console.log(" Add that export to your shell profile, or open a new terminal before running openshell directly."); } - // Clean up stale NemoClaw session before checking ports. - // A previous onboard run may have left the gateway container and port - // forward running. If a NemoClaw-owned gateway is still present, tear - // it down so the port check below doesn't fail on our own leftovers. + // Clean up stale or unnamed NemoClaw gateway state before checking ports. + // A healthy named gateway can be reused later in onboarding, so avoid + // tearing it down here. If some other gateway is active, do not treat it + // as NemoClaw state; let the port checks surface the conflict instead. const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true }); const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true }); - const healthyGateway = isGatewayHealthy(gatewayStatus, gwInfo); - if (healthyGateway) { - console.log(" Reusing existing NemoClaw gateway..."); - runOpenshell(["forward", "stop", "18789"], { ignoreError: true }); - runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true }); - process.env.OPENSHELL_GATEWAY = GATEWAY_NAME; - console.log(" ✓ Existing gateway selected"); - } else if (hasStaleGateway(gwInfo)) { + const activeGatewayInfo = runCaptureOpenshell(["gateway", "info"], { ignoreError: true }); + const gatewayReuseState = getGatewayReuseState(gatewayStatus, gwInfo, activeGatewayInfo); + if (gatewayReuseState === "stale" || gatewayReuseState === "active-unnamed") { console.log(" Cleaning up previous NemoClaw session..."); runOpenshell(["forward", "stop", "18789"], { ignoreError: true }); runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true }); @@ -1282,12 +1537,12 @@ async function preflight() { { port: 18789, label: "NemoClaw dashboard" }, ]; for (const { port, label } of requiredPorts) { - if (port === 8080 && healthyGateway) { - console.log(` ✓ Port ${port} already in use by active NemoClaw gateway (${label})`); - continue; - } const portCheck = await checkPortAvailable(port); if (!portCheck.ok) { + if ((port === 8080 || port === 18789) && gatewayReuseState === "healthy") { + console.log(` ✓ Port ${port} already owned by healthy NemoClaw runtime (${label})`); + continue; + } console.error(""); console.error(` !! Port ${port} is not available.`); console.error(` ${label} needs this port.`); @@ -1329,23 +1584,15 @@ async function preflight() { return gpu; } -// ── Gateway cleanup ────────────────────────────────────────────── - -function destroyGateway() { - runOpenshell(["gateway", "destroy", "-g", GATEWAY_NAME], { ignoreError: true }); - // openshell gateway destroy doesn't remove Docker volumes, which leaves - // corrupted cluster state that breaks the next gateway start. Clean them up. - run(`docker volume ls -q --filter "name=openshell-cluster-${GATEWAY_NAME}" | grep . && docker volume ls -q --filter "name=openshell-cluster-${GATEWAY_NAME}" | xargs docker volume rm || true`, { ignoreError: true }); -} - // ── Step 2: Gateway ────────────────────────────────────────────── async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) { - step(3, 7, "Starting OpenShell gateway"); + step(2, 7, "Starting OpenShell gateway"); const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true }); const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true }); - if (isGatewayHealthy(gatewayStatus, gwInfo)) { + const activeGatewayInfo = runCaptureOpenshell(["gateway", "info"], { ignoreError: true }); + if (isGatewayHealthy(gatewayStatus, gwInfo, activeGatewayInfo)) { console.log(" ✓ Reusing existing gateway"); runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true }); process.env.OPENSHELL_GATEWAY = GATEWAY_NAME; @@ -1362,15 +1609,9 @@ async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) { // sandbox itself does not need direct GPU access. Passing --gpu causes // FailedPrecondition errors when the gateway's k3s device plugin cannot // allocate GPUs. See: https://build.nvidia.com/spark/nemoclaw/instructions - const gatewayEnv = {}; - const openshellVersion = getInstalledOpenshellVersion(); - const stableGatewayImage = openshellVersion - ? `ghcr.io/nvidia/openshell/cluster:${openshellVersion}` - : null; - if (stableGatewayImage && openshellVersion) { - gatewayEnv.OPENSHELL_CLUSTER_IMAGE = stableGatewayImage; - gatewayEnv.IMAGE_TAG = openshellVersion; - console.log(` Using pinned OpenShell gateway image: ${stableGatewayImage}`); + const gatewayEnv = getGatewayStartEnv(); + if (gatewayEnv.OPENSHELL_CLUSTER_IMAGE) { + console.log(` Using pinned OpenShell gateway image: ${gatewayEnv.OPENSHELL_CLUSTER_IMAGE}`); } const startResult = runOpenshell(["gateway", "start", ...gwArgs], { ignoreError: true, env: gatewayEnv }); @@ -1384,11 +1625,11 @@ async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) { throw new Error("Gateway failed to start"); } - // Verify health for (let i = 0; i < 5; i++) { const status = runCaptureOpenshell(["status"], { ignoreError: true }); - const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true }); - if (isGatewayHealthy(status, gwInfo)) { + const namedInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true }); + const currentInfo = runCaptureOpenshell(["gateway", "info"], { ignoreError: true }); + if (isGatewayHealthy(status, namedInfo, currentInfo)) { console.log(" ✓ Gateway is healthy"); break; } @@ -1410,7 +1651,6 @@ async function startGatewayWithOptions(_gpu, { exitOnFailure = true } = {}) { console.log(" Patching CoreDNS for Colima..."); run(`bash "${path.join(SCRIPTS, "fix-coredns.sh")}" ${GATEWAY_NAME} 2>&1 || true`, { ignoreError: true }); } - // Give DNS a moment to propagate sleep(5); runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true }); process.env.OPENSHELL_GATEWAY = GATEWAY_NAME; @@ -1424,11 +1664,52 @@ async function startGatewayForRecovery(_gpu) { return startGatewayWithOptions(_gpu, { exitOnFailure: false }); } -// ── Step 3: Sandbox ────────────────────────────────────────────── +function getGatewayStartEnv() { + const gatewayEnv = {}; + const openshellVersion = getInstalledOpenshellVersion(); + const stableGatewayImage = openshellVersion + ? `ghcr.io/nvidia/openshell/cluster:${openshellVersion}` + : null; + if (stableGatewayImage && openshellVersion) { + gatewayEnv.OPENSHELL_CLUSTER_IMAGE = stableGatewayImage; + gatewayEnv.IMAGE_TAG = openshellVersion; + } + return gatewayEnv; +} -async function createSandbox(gpu, model, provider, preferredInferenceApi = null) { - step(5, 7, "Creating sandbox"); +async function recoverGatewayRuntime() { + runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true }); + let status = runCaptureOpenshell(["status"], { ignoreError: true }); + if (status.includes("Connected") && isSelectedGateway(status)) { + process.env.OPENSHELL_GATEWAY = GATEWAY_NAME; + return true; + } + + runOpenshell(["gateway", "start", "--name", GATEWAY_NAME], { + ignoreError: true, + env: getGatewayStartEnv(), + }); + runOpenshell(["gateway", "select", GATEWAY_NAME], { ignoreError: true }); + + for (let i = 0; i < 5; i++) { + status = runCaptureOpenshell(["status"], { ignoreError: true }); + if (status.includes("Connected") && isSelectedGateway(status)) { + process.env.OPENSHELL_GATEWAY = GATEWAY_NAME; + const runtime = getContainerRuntime(); + if (shouldPatchCoredns(runtime)) { + run(`bash "${path.join(SCRIPTS, "fix-coredns.sh")}" ${GATEWAY_NAME} 2>&1 || true`, { ignoreError: true }); + } + return true; + } + sleep(2); + } + + return false; +} + +// ── Step 3: Sandbox ────────────────────────────────────────────── +async function promptValidatedSandboxName() { const nameAnswer = await promptOrDefault( " Sandbox name (lowercase, numbers, hyphens) [my-assistant]: ", "NEMOCLAW_SANDBOX_NAME", "my-assistant" @@ -1444,23 +1725,35 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null) process.exit(1); } + return sandboxName; +} + +// eslint-disable-next-line complexity +async function createSandbox(gpu, model, provider, preferredInferenceApi = null, sandboxNameOverride = null) { + step(5, 7, "Creating sandbox"); + + const sandboxName = sandboxNameOverride || (await promptValidatedSandboxName()); + // Reconcile local registry state with the live OpenShell gateway state. const liveExists = pruneStaleSandboxEntry(sandboxName); if (liveExists) { - if (isNonInteractive()) { - if (process.env.NEMOCLAW_RECREATE_SANDBOX !== "1") { - console.error(` Sandbox '${sandboxName}' already exists.`); - console.error(" Set NEMOCLAW_RECREATE_SANDBOX=1 to recreate it in non-interactive mode."); - process.exit(1); + const existingSandboxState = getSandboxReuseState(sandboxName); + if (existingSandboxState === "ready" && process.env.NEMOCLAW_RECREATE_SANDBOX !== "1") { + if (isNonInteractive()) { + note(` [non-interactive] Sandbox '${sandboxName}' exists and is ready — reusing it`); + } else { + console.log(` Sandbox '${sandboxName}' already exists and is ready.`); + console.log(" Reusing existing sandbox."); + console.log(" Set NEMOCLAW_RECREATE_SANDBOX=1 to recreate it instead."); } - note(` [non-interactive] Sandbox '${sandboxName}' exists — recreating`); + return sandboxName; + } + + if (existingSandboxState === "ready") { + note(` Sandbox '${sandboxName}' exists and is ready — recreating by explicit request.`); } else { - const recreate = await prompt(` Sandbox '${sandboxName}' already exists. Recreate? [y/N]: `); - if (recreate.toLowerCase() !== "y") { - console.log(" Keeping existing sandbox."); - return sandboxName; - } + note(` Sandbox '${sandboxName}' exists but is not ready — recreating it.`); } // Destroy old sandbox runOpenshell(["sandbox", "delete", sandboxName], { ignoreError: true }); @@ -1471,11 +1764,9 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null) const buildCtx = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-build-")); const stagedDockerfile = path.join(buildCtx, "Dockerfile"); fs.copyFileSync(path.join(ROOT, "Dockerfile"), stagedDockerfile); - run(`cp -r "${path.join(ROOT, "nemoclaw")}" "${buildCtx}/nemoclaw"`); - run(`cp -r "${path.join(ROOT, "nemoclaw-blueprint")}" "${buildCtx}/nemoclaw-blueprint"`); - run(`cp -r "${path.join(ROOT, "scripts")}" "${buildCtx}/scripts"`); - run(`rm -rf "${buildCtx}/nemoclaw/node_modules"`, { ignoreError: true }); - run(`bash "${buildCtx}/scripts/clean-staged-tree.sh" "${buildCtx}/nemoclaw-blueprint"`, { ignoreError: true }); + copyBuildContextDir(path.join(ROOT, "nemoclaw"), path.join(buildCtx, "nemoclaw")); + copyBuildContextDir(path.join(ROOT, "nemoclaw-blueprint"), path.join(buildCtx, "nemoclaw-blueprint")); + copyBuildContextDir(path.join(ROOT, "scripts"), path.join(buildCtx, "scripts")); // Create sandbox (use -- echo to avoid dropping into interactive shell) // Pass the base policy so sandbox starts in proxy mode (required for policy updates later) @@ -1539,7 +1830,7 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null) console.error(createResult.output); } console.error(" Try: openshell sandbox list # check gateway state"); - console.error(" Try: nemoclaw onboard # retry from scratch"); + printSandboxCreateRecoveryHints(createResult.output); process.exit(createResult.status || 1); } @@ -1595,7 +1886,7 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null) // eslint-disable-next-line complexity async function setupNim(gpu) { - step(2, 7, "Configuring inference (NIM)"); + step(3, 7, "Configuring inference (NIM)"); let model = null; let provider = REMOTE_PROVIDER_CONFIG.build.providerName; @@ -1613,7 +1904,9 @@ async function setupNim(gpu) { const options = []; options.push({ key: "build", - label: "NVIDIA Endpoints", + label: + "NVIDIA Endpoints" + + (!ollamaRunning && !(EXPERIMENTAL && vllmRunning) ? " (recommended)" : ""), }); options.push({ key: "openai", label: "OpenAI" }); options.push({ key: "custom", label: "Other OpenAI-compatible endpoint" }); @@ -2003,7 +2296,10 @@ async function setupInference(sandboxName, model, provider, endpointUrl = null, : Object.values(REMOTE_PROVIDER_CONFIG).find((entry) => entry.providerName === provider); const resolvedCredentialEnv = credentialEnv || (config && config.credentialEnv); const resolvedEndpointUrl = endpointUrl || (config && config.endpointUrl); - const env = resolvedCredentialEnv ? { [resolvedCredentialEnv]: process.env[resolvedCredentialEnv] } : {}; + const credentialValue = hydrateCredentialEnv(resolvedCredentialEnv); + const env = resolvedCredentialEnv && credentialValue + ? { [resolvedCredentialEnv]: credentialValue } + : {}; upsertProvider(provider, config.providerType, resolvedCredentialEnv, resolvedEndpointUrl, env); const args = ["inference", "set"]; if (config.skipVerify) { @@ -2077,7 +2373,7 @@ async function setupOpenclaw(sandboxName, model, provider) { // ── Step 7: Policy presets ─────────────────────────────────────── // eslint-disable-next-line complexity -async function setupPolicies(sandboxName) { +async function _setupPolicies(sandboxName) { step(7, 7, "Policy presets"); const suggestions = ["pypi", "npm"]; @@ -2214,6 +2510,144 @@ async function setupPolicies(sandboxName) { console.log(" ✓ Policies applied"); } +function arePolicyPresetsApplied(sandboxName, selectedPresets = []) { + if (!Array.isArray(selectedPresets) || selectedPresets.length === 0) return false; + const applied = new Set(policies.getAppliedPresets(sandboxName)); + return selectedPresets.every((preset) => applied.has(preset)); +} + +// eslint-disable-next-line complexity +async function setupPoliciesWithSelection(sandboxName, options = {}) { + const selectedPresets = Array.isArray(options.selectedPresets) ? options.selectedPresets : null; + const onSelection = typeof options.onSelection === "function" ? options.onSelection : null; + + step(7, 7, "Policy presets"); + + const suggestions = ["pypi", "npm"]; + if (getCredential("TELEGRAM_BOT_TOKEN")) suggestions.push("telegram"); + if (getCredential("SLACK_BOT_TOKEN") || process.env.SLACK_BOT_TOKEN) suggestions.push("slack"); + if (getCredential("DISCORD_BOT_TOKEN") || process.env.DISCORD_BOT_TOKEN) suggestions.push("discord"); + + const allPresets = policies.listPresets(); + const applied = policies.getAppliedPresets(sandboxName); + let chosen = selectedPresets; + + if (chosen && chosen.length > 0) { + if (onSelection) onSelection(chosen); + if (!waitForSandboxReady(sandboxName)) { + console.error(` Sandbox '${sandboxName}' was not ready for policy application.`); + process.exit(1); + } + note(` [resume] Reapplying policy presets: ${chosen.join(", ")}`); + for (const name of chosen) { + if (applied.includes(name)) continue; + policies.applyPreset(sandboxName, name); + } + return chosen; + } + + if (isNonInteractive()) { + const policyMode = (process.env.NEMOCLAW_POLICY_MODE || "suggested").trim().toLowerCase(); + chosen = suggestions; + + if (policyMode === "skip" || policyMode === "none" || policyMode === "no") { + note(" [non-interactive] Skipping policy presets."); + return []; + } + + if (policyMode === "custom" || policyMode === "list") { + chosen = parsePolicyPresetEnv(process.env.NEMOCLAW_POLICY_PRESETS); + if (chosen.length === 0) { + console.error(" NEMOCLAW_POLICY_PRESETS is required when NEMOCLAW_POLICY_MODE=custom."); + process.exit(1); + } + } else if (policyMode === "suggested" || policyMode === "default" || policyMode === "auto") { + const envPresets = parsePolicyPresetEnv(process.env.NEMOCLAW_POLICY_PRESETS); + if (envPresets.length > 0) { + chosen = envPresets; + } + } else { + console.error(` Unsupported NEMOCLAW_POLICY_MODE: ${policyMode}`); + console.error(" Valid values: suggested, custom, skip"); + process.exit(1); + } + + const knownPresets = new Set(allPresets.map((p) => p.name)); + const invalidPresets = chosen.filter((name) => !knownPresets.has(name)); + if (invalidPresets.length > 0) { + console.error(` Unknown policy preset(s): ${invalidPresets.join(", ")}`); + process.exit(1); + } + + if (onSelection) onSelection(chosen); + if (!waitForSandboxReady(sandboxName)) { + console.error(` Sandbox '${sandboxName}' was not ready for policy application.`); + process.exit(1); + } + note(` [non-interactive] Applying policy presets: ${chosen.join(", ")}`); + for (const name of chosen) { + for (let attempt = 0; attempt < 3; attempt += 1) { + try { + policies.applyPreset(sandboxName, name); + break; + } catch (err) { + const message = err && err.message ? err.message : String(err); + if (message.includes("Unimplemented")) { + console.error(" OpenShell policy updates are not supported by this gateway build."); + console.error(" This is a known issue tracked in NemoClaw #536."); + throw err; + } + if (!message.includes("sandbox not found") || attempt === 2) { + throw err; + } + sleep(2); + } + } + } + return chosen; + } + + console.log(""); + console.log(" Available policy presets:"); + allPresets.forEach((p) => { + const marker = applied.includes(p.name) ? "●" : "○"; + const suggested = suggestions.includes(p.name) ? " (suggested)" : ""; + console.log(` ${marker} ${p.name} — ${p.description}${suggested}`); + }); + console.log(""); + + const answer = await prompt(` Apply suggested presets (${suggestions.join(", ")})? [Y/n/list]: `); + + if (answer.toLowerCase() === "n") { + console.log(" Skipping policy presets."); + return []; + } + + let interactiveChoice = suggestions; + if (answer.toLowerCase() === "list") { + const custom = await prompt(" Enter preset names (comma-separated): "); + interactiveChoice = parsePolicyPresetEnv(custom); + } + + const knownPresets = new Set(allPresets.map((p) => p.name)); + const invalidPresets = interactiveChoice.filter((name) => !knownPresets.has(name)); + if (invalidPresets.length > 0) { + console.error(` Unknown policy preset(s): ${invalidPresets.join(", ")}`); + process.exit(1); + } + + if (onSelection) onSelection(interactiveChoice); + if (!waitForSandboxReady(sandboxName)) { + console.error(` Sandbox '${sandboxName}' was not ready for policy application.`); + process.exit(1); + } + + for (const name of interactiveChoice) { + policies.applyPreset(sandboxName, name); + } + return interactiveChoice; +} + // ── Dashboard ──────────────────────────────────────────────────── const CONTROL_UI_PORT = 18789; @@ -2321,54 +2755,322 @@ function printDashboard(sandboxName, model, provider, nimContainer = null) { console.log(""); } +function startRecordedStep(stepName, updates = {}) { + onboardSession.markStepStarted(stepName); + if (Object.keys(updates).length > 0) { + onboardSession.updateSession((session) => { + if (typeof updates.sandboxName === "string") session.sandboxName = updates.sandboxName; + if (typeof updates.provider === "string") session.provider = updates.provider; + if (typeof updates.model === "string") session.model = updates.model; + return session; + }); + } +} + +function resumeStepMessage(stepName, detail) { + console.log(` [resume] Skipping ${stepName}${detail ? ` (${detail})` : ""}`); +} + // ── Main ───────────────────────────────────────────────────────── +// eslint-disable-next-line complexity async function onboard(opts = {}) { NON_INTERACTIVE = opts.nonInteractive || process.env.NEMOCLAW_NON_INTERACTIVE === "1"; delete process.env.OPENSHELL_GATEWAY; + const resume = opts.resume === true; + const lockResult = onboardSession.acquireOnboardLock( + `nemoclaw onboard${resume ? " --resume" : ""}${isNonInteractive() ? " --non-interactive" : ""}` + ); + if (!lockResult.acquired) { + console.error(" Another NemoClaw onboarding run is already in progress."); + if (lockResult.holderPid) { + console.error(` Lock holder PID: ${lockResult.holderPid}`); + } + if (lockResult.holderStartedAt) { + console.error(` Started: ${lockResult.holderStartedAt}`); + } + console.error(" Wait for it to finish, or remove the stale lock if the previous run crashed:"); + console.error(` rm -f "${lockResult.lockFile}"`); + process.exit(1); + } - console.log(""); - console.log(" NemoClaw Onboarding"); - if (isNonInteractive()) note(" (non-interactive mode)"); - console.log(" ==================="); - - const gpu = await preflight(); - const { model, provider, endpointUrl, credentialEnv, preferredInferenceApi, nimContainer } = await setupNim(gpu); - process.env.NEMOCLAW_OPENSHELL_BIN = getOpenshellBinary(); - await startGateway(gpu); - await setupInference(GATEWAY_NAME, model, provider, endpointUrl, credentialEnv); - // The key is now stored in openshell's provider config. Clear it from our - // process environment so new child processes don't inherit it. Note: this - // does NOT clear /proc/pid/environ (kernel snapshot is immutable after exec), - // but it prevents run()'s { ...process.env } from propagating the key. - delete process.env.NVIDIA_API_KEY; - const sandboxName = await createSandbox(gpu, model, provider, preferredInferenceApi); - if (nimContainer) { - registry.updateSandbox(sandboxName, { nimContainer }); - } - await setupOpenclaw(sandboxName, model, provider); - await setupPolicies(sandboxName); - printDashboard(sandboxName, model, provider, nimContainer); + let lockReleased = false; + const releaseOnboardLock = () => { + if (lockReleased) return; + lockReleased = true; + onboardSession.releaseOnboardLock(); + }; + process.once("exit", releaseOnboardLock); + + try { + let session; + if (resume) { + session = onboardSession.loadSession(); + if (!session || session.resumable === false) { + console.error(" No resumable onboarding session was found."); + console.error(" Run: nemoclaw onboard"); + process.exit(1); + } + const resumeConflicts = getResumeConfigConflicts(session, { nonInteractive: isNonInteractive() }); + if (resumeConflicts.length > 0) { + for (const conflict of resumeConflicts) { + if (conflict.field === "sandbox") { + console.error( + ` Resumable state belongs to sandbox '${conflict.recorded}', not '${conflict.requested}'.` + ); + } else { + console.error( + ` Resumable state recorded ${conflict.field} '${conflict.recorded}', not '${conflict.requested}'.` + ); + } + } + console.error(" Run: nemoclaw onboard # start a fresh onboarding session"); + console.error(" Or rerun with the original settings to continue that session."); + process.exit(1); + } + onboardSession.updateSession((current) => { + current.mode = isNonInteractive() ? "non-interactive" : "interactive"; + current.failure = null; + current.status = "in_progress"; + return current; + }); + session = onboardSession.loadSession(); + } else { + session = onboardSession.saveSession( + onboardSession.createSession({ + mode: isNonInteractive() ? "non-interactive" : "interactive", + metadata: { gatewayName: "nemoclaw" }, + }) + ); + } + + let completed = false; + process.once("exit", (code) => { + if (!completed && code !== 0) { + const current = onboardSession.loadSession(); + const failedStep = current?.lastStepStarted; + if (failedStep) { + onboardSession.markStepFailed(failedStep, "Onboarding exited before the step completed."); + } + } + }); + + console.log(""); + console.log(" NemoClaw Onboarding"); + if (isNonInteractive()) note(" (non-interactive mode)"); + if (resume) note(" (resume mode)"); + console.log(" ==================="); + + let gpu; + const resumePreflight = resume && session?.steps?.preflight?.status === "complete"; + if (resumePreflight) { + resumeStepMessage("preflight", "cached"); + gpu = nim.detectGpu(); + } else { + startRecordedStep("preflight"); + gpu = await preflight(); + onboardSession.markStepComplete("preflight"); + } + + const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true }); + const gatewayInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true }); + const activeGatewayInfo = runCaptureOpenshell(["gateway", "info"], { ignoreError: true }); + const gatewayReuseState = getGatewayReuseState(gatewayStatus, gatewayInfo, activeGatewayInfo); + const canReuseHealthyGateway = gatewayReuseState === "healthy"; + const resumeGateway = resume && session?.steps?.gateway?.status === "complete" && canReuseHealthyGateway; + if (resumeGateway) { + resumeStepMessage("gateway", "running"); + } else if (!resume && canReuseHealthyGateway) { + note(" Reusing healthy NemoClaw gateway."); + } else { + if (resume && session?.steps?.gateway?.status === "complete") { + if (gatewayReuseState === "active-unnamed") { + note(" [resume] Gateway is active but named metadata is missing; recreating it safely."); + } else if (gatewayReuseState === "foreign-active") { + note(" [resume] A different OpenShell gateway is active; NemoClaw will not reuse it."); + } else if (gatewayReuseState === "stale") { + note(" [resume] Recorded gateway is unhealthy; recreating it."); + } else { + note(" [resume] Recorded gateway state is unavailable; recreating it."); + } + } + startRecordedStep("gateway"); + await startGateway(gpu); + onboardSession.markStepComplete("gateway"); + } + + let sandboxName = session?.sandboxName || null; + let model = session?.model || null; + let provider = session?.provider || null; + let endpointUrl = session?.endpointUrl || null; + let credentialEnv = session?.credentialEnv || null; + let preferredInferenceApi = session?.preferredInferenceApi || null; + let nimContainer = session?.nimContainer || null; + const resumeProviderSelection = + resume && + session?.steps?.provider_selection?.status === "complete" && + typeof provider === "string" && + typeof model === "string"; + if (resumeProviderSelection) { + resumeStepMessage("provider selection", `${provider} / ${model}`); + hydrateCredentialEnv(credentialEnv); + } else { + startRecordedStep("provider_selection", { sandboxName }); + const selection = await setupNim(gpu); + model = selection.model; + provider = selection.provider; + endpointUrl = selection.endpointUrl; + credentialEnv = selection.credentialEnv; + preferredInferenceApi = selection.preferredInferenceApi; + nimContainer = selection.nimContainer; + onboardSession.markStepComplete("provider_selection", { + sandboxName, + provider, + model, + endpointUrl, + credentialEnv, + preferredInferenceApi, + nimContainer, + }); + } + + process.env.NEMOCLAW_OPENSHELL_BIN = getOpenshellBinary(); + const resumeInference = + resume && + typeof provider === "string" && + typeof model === "string" && + isInferenceRouteReady(provider, model); + if (resumeInference) { + resumeStepMessage("inference", `${provider} / ${model}`); + if (nimContainer) { + registry.updateSandbox(sandboxName, { nimContainer }); + } + onboardSession.markStepComplete("inference", { sandboxName, provider, model, nimContainer }); + } else { + startRecordedStep("inference", { sandboxName, provider, model }); + await setupInference(GATEWAY_NAME, model, provider, endpointUrl, credentialEnv); + delete process.env.NVIDIA_API_KEY; + if (nimContainer) { + registry.updateSandbox(sandboxName, { nimContainer }); + } + onboardSession.markStepComplete("inference", { sandboxName, provider, model, nimContainer }); + } + + const sandboxReuseState = getSandboxReuseState(sandboxName); + const resumeSandbox = resume && session?.steps?.sandbox?.status === "complete" && sandboxReuseState === "ready"; + if (resumeSandbox) { + resumeStepMessage("sandbox", sandboxName); + } else { + if (resume && session?.steps?.sandbox?.status === "complete") { + if (sandboxReuseState === "not_ready") { + note(` [resume] Recorded sandbox '${sandboxName}' exists but is not ready; recreating it.`); + repairRecordedSandbox(sandboxName); + } else { + note(" [resume] Recorded sandbox state is unavailable; recreating it."); + if (sandboxName) { + registry.removeSandbox(sandboxName); + } + } + } + sandboxName = sandboxName || (await promptValidatedSandboxName()); + startRecordedStep("sandbox", { sandboxName, provider, model }); + sandboxName = await createSandbox(gpu, model, provider, preferredInferenceApi, sandboxName); + onboardSession.markStepComplete("sandbox", { sandboxName, provider, model, nimContainer }); + } + + const resumeOpenclaw = resume && sandboxName && isOpenclawReady(sandboxName); + if (resumeOpenclaw) { + resumeStepMessage("openclaw", sandboxName); + onboardSession.markStepComplete("openclaw", { sandboxName, provider, model }); + } else { + startRecordedStep("openclaw", { sandboxName, provider, model }); + await setupOpenclaw(sandboxName, model, provider); + onboardSession.markStepComplete("openclaw", { sandboxName, provider, model }); + } + + const recordedPolicyPresets = Array.isArray(session?.policyPresets) ? session.policyPresets : null; + const resumePolicies = + resume && + sandboxName && + arePolicyPresetsApplied(sandboxName, recordedPolicyPresets || []); + if (resumePolicies) { + resumeStepMessage("policies", (recordedPolicyPresets || []).join(", ")); + onboardSession.markStepComplete("policies", { sandboxName, provider, model, policyPresets: recordedPolicyPresets || [] }); + } else { + startRecordedStep("policies", { + sandboxName, + provider, + model, + policyPresets: recordedPolicyPresets || [], + }); + const appliedPolicyPresets = await setupPoliciesWithSelection(sandboxName, { + selectedPresets: + resume && + session?.steps?.policies?.status !== "complete" && + Array.isArray(recordedPolicyPresets) && + recordedPolicyPresets.length > 0 + ? recordedPolicyPresets + : null, + onSelection: (policyPresets) => { + onboardSession.updateSession((current) => { + current.policyPresets = policyPresets; + return current; + }); + }, + }); + onboardSession.markStepComplete("policies", { + sandboxName, + provider, + model, + policyPresets: appliedPolicyPresets, + }); + } + + onboardSession.completeSession({ sandboxName, provider, model }); + completed = true; + printDashboard(sandboxName, model, provider, nimContainer); + } finally { + releaseOnboardLock(); + } } module.exports = { buildSandboxConfigSyncScript, - getFutureShellPathHint, + copyBuildContextDir, + classifySandboxCreateFailure, createSandbox, + getFutureShellPathHint, + getGatewayStartEnv, + getGatewayReuseState, getSandboxInferenceConfig, getInstalledOpenshellVersion, + getRequestedModelHint, + getRequestedProviderHint, getStableGatewayImageRef, - hasStaleGateway, + getResumeConfigConflicts, isGatewayHealthy, + hasStaleGateway, + getRequestedSandboxNameHint, + getResumeSandboxConflict, + getSandboxReuseState, + getSandboxStateFromOutputs, isSandboxReady, onboard, - preflight, + onboardSession, + printSandboxCreateRecoveryHints, pruneStaleSandboxEntry, + repairRecordedSandbox, + recoverGatewayRuntime, + startGatewayForRecovery, runCaptureOpenshell, setupInference, setupNim, - startGateway, - startGatewayForRecovery, + isInferenceRouteReady, + isOpenclawReady, + arePolicyPresetsApplied, + setupPoliciesWithSelection, + hydrateCredentialEnv, + shouldIncludeBuildContextPath, writeSandboxConfigSyncFile, patchStagedDockerfile, }; diff --git a/bin/lib/runtime-recovery.js b/bin/lib/runtime-recovery.js new file mode 100644 index 000000000..b4c1301c0 --- /dev/null +++ b/bin/lib/runtime-recovery.js @@ -0,0 +1,81 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +const onboardSession = require("./onboard-session"); + +function stripAnsi(text) { + // eslint-disable-next-line no-control-regex + return String(text || "").replace(/\x1b\[[0-9;]*m/g, ""); +} + +function parseLiveSandboxNames(listOutput = "") { + const clean = stripAnsi(listOutput); + const names = new Set(); + for (const rawLine of clean.split("\n")) { + const line = rawLine.trim(); + if (!line) continue; + if (/^(NAME|No sandboxes found\.?$)/i.test(line)) continue; + if (/^Error:/i.test(line)) continue; + const cols = line.split(/\s+/); + if (cols[0]) { + names.add(cols[0]); + } + } + return names; +} + +function classifySandboxLookup(output = "") { + const clean = stripAnsi(output).trim(); + if (!clean) { + return { state: "missing", reason: "empty" }; + } + if (/sandbox not found|status:\s*NotFound/i.test(clean)) { + return { state: "missing", reason: "not_found" }; + } + if ( + /transport error|client error|Connection reset by peer|Connection refused|No active gateway|Gateway: .*Error/i.test( + clean + ) + ) { + return { state: "unavailable", reason: "gateway_unavailable" }; + } + return { state: "present", reason: "ok" }; +} + +function classifyGatewayStatus(output = "") { + const clean = stripAnsi(output).trim(); + if (!clean) { + return { state: "inactive", reason: "empty" }; + } + if (/Connected/i.test(clean)) { + return { state: "connected", reason: "ok" }; + } + if ( + /No active gateway|transport error|client error|Connection reset by peer|Connection refused|Gateway: .*Error/i.test( + clean + ) + ) { + return { state: "unavailable", reason: "gateway_unavailable" }; + } + return { state: "inactive", reason: "not_connected" }; +} + +function shouldAttemptGatewayRecovery({ sandboxState = "missing", gatewayState = "inactive" } = {}) { + return sandboxState === "unavailable" && gatewayState !== "connected"; +} + +function getRecoveryCommand() { + const session = onboardSession.loadSession(); + if (session && session.resumable !== false) { + return "nemoclaw onboard --resume"; + } + return "nemoclaw onboard"; +} + +module.exports = { + classifyGatewayStatus, + classifySandboxLookup, + getRecoveryCommand, + parseLiveSandboxNames, + shouldAttemptGatewayRecovery, +}; diff --git a/bin/nemoclaw.js b/bin/nemoclaw.js index b070b7e9a..00a430b1d 100755 --- a/bin/nemoclaw.js +++ b/bin/nemoclaw.js @@ -134,7 +134,7 @@ async function recoverNamedGatewayRuntime() { } const shouldStartGateway = [before.state, after.state].some((state) => - ["named_unhealthy", "named_unreachable", "connected_other"].includes(state) + ["missing_named", "named_unhealthy", "named_unreachable", "connected_other"].includes(state) ); if (shouldStartGateway) { @@ -334,15 +334,16 @@ function exitWithSpawnResult(result) { async function onboard(args) { const { onboard: runOnboard } = require("./lib/onboard"); - const allowedArgs = new Set(["--non-interactive"]); + const allowedArgs = new Set(["--non-interactive", "--resume"]); const unknownArgs = args.filter((arg) => !allowedArgs.has(arg)); if (unknownArgs.length > 0) { console.error(` Unknown onboard option(s): ${unknownArgs.join(", ")}`); - console.error(" Usage: nemoclaw onboard [--non-interactive]"); + console.error(" Usage: nemoclaw onboard [--non-interactive] [--resume]"); process.exit(1); } const nonInteractive = args.includes("--non-interactive"); - await runOnboard({ nonInteractive }); + const resume = args.includes("--resume"); + await runOnboard({ nonInteractive, resume }); } async function setup() { diff --git a/install.sh b/install.sh index 0f67d0766..f81ac7f45 100755 --- a/install.sh +++ b/install.sh @@ -124,18 +124,38 @@ print_banner() { print_done() { local elapsed=$((SECONDS - _INSTALL_START)) - local sandbox_name - sandbox_name="$(resolve_default_sandbox_name)" info "=== Installation complete ===" printf "\n" printf " ${C_GREEN}${C_BOLD}NemoClaw${C_RESET} ${C_DIM}(%ss)${C_RESET}\n" "$elapsed" printf "\n" - printf " ${C_GREEN}Your OpenClaw Sandbox is live.${C_RESET}\n" - printf " ${C_DIM}Sandbox in, break things, and tell us what you find.${C_RESET}\n" - printf "\n" - printf " ${C_GREEN}Next:${C_RESET}\n" - printf " %s$%s nemoclaw %s connect\n" "$C_GREEN" "$C_RESET" "$sandbox_name" - printf " %ssandbox@%s$%s openclaw tui\n" "$C_GREEN" "$sandbox_name" "$C_RESET" + if [[ "$ONBOARD_RAN" == true ]]; then + local sandbox_name + sandbox_name="$(resolve_default_sandbox_name)" + printf " ${C_GREEN}Your OpenClaw Sandbox is live.${C_RESET}\n" + printf " ${C_DIM}Sandbox in, break things, and tell us what you find.${C_RESET}\n" + printf "\n" + printf " ${C_GREEN}Next:${C_RESET}\n" + printf " %s$%s nemoclaw %s connect\n" "$C_GREEN" "$C_RESET" "$sandbox_name" + printf " %ssandbox@%s$%s openclaw tui\n" "$C_GREEN" "$sandbox_name" "$C_RESET" + elif [[ "$NEMOCLAW_READY_NOW" == true ]]; then + printf " ${C_GREEN}NemoClaw CLI is ready in this shell.${C_RESET}\n" + printf " ${C_DIM}Onboarding has not run yet.${C_RESET}\n" + printf "\n" + printf " ${C_GREEN}Next:${C_RESET}\n" + printf " %s$%s nemoclaw onboard\n" "$C_GREEN" "$C_RESET" + else + printf " ${C_GREEN}NemoClaw CLI is installed.${C_RESET}\n" + printf " ${C_DIM}Onboarding did not run because this shell cannot resolve 'nemoclaw' yet.${C_RESET}\n" + printf "\n" + printf " ${C_GREEN}Next:${C_RESET}\n" + if [[ -n "$NEMOCLAW_RECOVERY_EXPORT_DIR" ]]; then + printf " %s$%s export PATH=\"%s:\$PATH\"\n" "$C_GREEN" "$C_RESET" "$NEMOCLAW_RECOVERY_EXPORT_DIR" + fi + if [[ -n "$NEMOCLAW_RECOVERY_PROFILE" ]]; then + printf " %s$%s source %s\n" "$C_GREEN" "$C_RESET" "$NEMOCLAW_RECOVERY_PROFILE" + fi + printf " %s$%s nemoclaw onboard\n" "$C_GREEN" "$C_RESET" + fi printf "\n" printf " ${C_BOLD}GitHub${C_RESET} ${C_DIM}https://github.com/nvidia/nemoclaw${C_RESET}\n" printf " ${C_BOLD}Docs${C_RESET} ${C_DIM}https://docs.nvidia.com/nemoclaw/latest/${C_RESET}\n" @@ -218,6 +238,10 @@ MIN_NPM_MAJOR=10 RUNTIME_REQUIREMENT_MSG="NemoClaw requires Node.js >=${MIN_NODE_VERSION} and npm >=${MIN_NPM_MAJOR}." NEMOCLAW_SHIM_DIR="${HOME}/.local/bin" ORIGINAL_PATH="${PATH:-}" +NEMOCLAW_READY_NOW=false +NEMOCLAW_RECOVERY_PROFILE="" +NEMOCLAW_RECOVERY_EXPORT_DIR="" +ONBOARD_RAN=false # Compare two semver strings (major.minor.patch). Returns 0 if $1 >= $2. # Rejects prerelease suffixes (e.g. "22.16.0-rc.1") to avoid arithmetic errors. @@ -248,6 +272,30 @@ ensure_nvm_loaded() { fi } +detect_shell_profile() { + local profile="$HOME/.bashrc" + case "$(basename "${SHELL:-}")" in + zsh) + profile="$HOME/.zshrc" + ;; + fish) + profile="$HOME/.config/fish/config.fish" + ;; + tcsh) + profile="$HOME/.tcshrc" + ;; + csh) + profile="$HOME/.cshrc" + ;; + *) + if [[ ! -f "$HOME/.bashrc" && -f "$HOME/.profile" ]]; then + profile="$HOME/.profile" + fi + ;; + esac + printf "%s" "$profile" +} + # Refresh PATH so that npm global bin is discoverable. # After nvm installs Node.js the global bin lives under the nvm prefix, # which may not yet be on PATH in the current session. @@ -509,30 +557,30 @@ install_nemoclaw() { # --------------------------------------------------------------------------- verify_nemoclaw() { if command_exists nemoclaw; then + NEMOCLAW_READY_NOW=true info "Verified: nemoclaw is available at $(command -v nemoclaw)" return 0 fi - # nemoclaw not on PATH — try to diagnose and suggest a fix - warn "nemoclaw is not on PATH after installation." - local npm_bin npm_bin="$(npm config get prefix 2>/dev/null)/bin" || true if [[ -n "$npm_bin" && -x "$npm_bin/nemoclaw" ]]; then ensure_nemoclaw_shim || true if command_exists nemoclaw; then + NEMOCLAW_READY_NOW=true info "Verified: nemoclaw is available at $(command -v nemoclaw)" return 0 fi - warn "Found nemoclaw at $npm_bin/nemoclaw but could not expose it on PATH." - warn "" - warn "Add one of these directories to your shell profile:" - warn " $NEMOCLAW_SHIM_DIR" - warn " $npm_bin" - warn "" - warn "Continuing — nemoclaw is installed but requires a PATH update." + NEMOCLAW_RECOVERY_PROFILE="$(detect_shell_profile)" + if [[ -x "$NEMOCLAW_SHIM_DIR/nemoclaw" ]]; then + NEMOCLAW_RECOVERY_EXPORT_DIR="$NEMOCLAW_SHIM_DIR" + else + NEMOCLAW_RECOVERY_EXPORT_DIR="$npm_bin" + fi + warn "Found nemoclaw at $npm_bin/nemoclaw but this shell still cannot resolve it." + warn "Onboarding will be skipped until PATH is updated." return 0 else warn "Could not locate the nemoclaw executable." @@ -547,14 +595,33 @@ verify_nemoclaw() { # --------------------------------------------------------------------------- run_onboard() { info "Running nemoclaw onboard…" + local -a onboard_cmd=(onboard) + if command_exists node && [[ -f "${HOME}/.nemoclaw/onboard-session.json" ]]; then + if node -e ' + const fs = require("fs"); + const file = process.argv[1]; + try { + const data = JSON.parse(fs.readFileSync(file, "utf8")); + const resumable = data && data.resumable !== false; + const status = data && data.status; + process.exit(resumable && status && status !== "complete" ? 0 : 1); + } catch { + process.exit(1); + } + ' "${HOME}/.nemoclaw/onboard-session.json"; then + info "Found an interrupted onboarding session — resuming it." + onboard_cmd+=(--resume) + fi + fi if [ "${NON_INTERACTIVE:-}" = "1" ]; then - nemoclaw onboard --non-interactive + onboard_cmd+=(--non-interactive) + nemoclaw "${onboard_cmd[@]}" elif [ -t 0 ]; then - nemoclaw onboard + nemoclaw "${onboard_cmd[@]}" elif exec 3/dev/null 2>&1; then TIMEOUT_BIN="gtimeout" fi +SCRIPT_DIR="" +REPO_ROOT="" +ONBOARD_SESSION_HELPER="" +SCRIPT_PATH="${BASH_SOURCE[0]:-}" +if [ -n "$SCRIPT_PATH" ] && [ -f "$SCRIPT_PATH" ]; then + SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_PATH")" && pwd)" + REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" + ONBOARD_SESSION_HELPER="${REPO_ROOT}/bin/lib/onboard-session.js" +fi + # Redact known sensitive patterns (API keys, tokens, passwords in env/args). redact() { sed -E \ @@ -243,6 +253,24 @@ if [ "$QUICK" = false ]; then collect "openshell-gateway-info" openshell gateway info fi +# -- Onboard session state -- + +section "Onboard Session" +if [ -n "$ONBOARD_SESSION_HELPER" ] && [ -f "$ONBOARD_SESSION_HELPER" ] && command -v node >/dev/null 2>&1; then + # shellcheck disable=SC2016 + collect "onboard-session-summary" node -e ' + const helper = require(process.argv[1]); + const summary = helper.summarizeForDebug(); + if (!summary) { + process.stdout.write("No onboard session state found.\n"); + process.exit(0); + } + process.stdout.write(`${JSON.stringify(summary, null, 2)}\n`); + ' "$ONBOARD_SESSION_HELPER" +else + echo " (onboard session helper not available, skipping)" +fi + # -- Sandbox internals (via SSH using openshell ssh-config) -- if command -v openshell &>/dev/null \ diff --git a/scripts/install.sh b/scripts/install.sh index e94becca3..d67afea45 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -2,527 +2,31 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # -# NemoClaw curl-pipe-bash installer. -# -# Usage: -# curl -fsSL https://raw.githubusercontent.com/NVIDIA/NemoClaw/main/scripts/install.sh | bash +# Legacy installer compatibility wrapper. +# The supported installer entrypoint is the repository-root install.sh: +# curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash set -euo pipefail -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' +ROOT_INSTALLER_URL="https://www.nvidia.com/nemoclaw.sh" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +ROOT_INSTALLER="${SCRIPT_DIR%/scripts}/install.sh" -info() { echo -e "${GREEN}[install]${NC} $1"; } -warn() { echo -e "${YELLOW}[install]${NC} $1"; } -fail() { - echo -e "${RED}[install]${NC} $1" - exit 1 +warn_legacy_path() { + cat >&2 </dev/null && return 0 - if [ -z "${NVM_DIR:-}" ]; then - export NVM_DIR="$HOME/.nvm" - fi - if [ -s "$NVM_DIR/nvm.sh" ]; then - # shellcheck source=/dev/null - . "$NVM_DIR/nvm.sh" - fi -} - -# Refresh PATH so that npm global bin is discoverable. -refresh_path() { - ensure_nvm_loaded - - local npm_bin - npm_bin="$(npm config get prefix 2>/dev/null)/bin" || true - if [ -n "$npm_bin" ] && [ -d "$npm_bin" ]; then - case ":$PATH:" in - *":$npm_bin:"*) ;; # already on PATH - *) export PATH="$npm_bin:$PATH" ;; - esac - fi -} - -MIN_NODE_VERSION="22.16.0" -MIN_NPM_MAJOR=10 -RUNTIME_REQUIREMENT_MSG="NemoClaw requires Node.js >=${MIN_NODE_VERSION} and npm >=${MIN_NPM_MAJOR}." - -OS="$(uname -s)" -ARCH="$(uname -m)" - -case "$OS" in - Darwin) OS_LABEL="macOS" ;; - Linux) OS_LABEL="Linux" ;; - *) fail "Unsupported OS: $OS" ;; -esac - -case "$ARCH" in - x86_64 | amd64) ARCH_LABEL="x86_64" ;; - aarch64 | arm64) ARCH_LABEL="aarch64" ;; - *) fail "Unsupported architecture: $ARCH" ;; -esac - -info "Detected $OS_LABEL ($ARCH_LABEL)" - -# ── Detect Node.js version manager ────────────────────────────── - -NODE_MGR="none" -NEED_RESHIM=false - -if command -v asdf >/dev/null 2>&1 && asdf plugin list 2>/dev/null | grep -q nodejs; then - NODE_MGR="asdf" -elif [ -n "${NVM_DIR:-}" ] && [ -s "${NVM_DIR}/nvm.sh" ]; then - NODE_MGR="nvm" -elif [ -s "$HOME/.nvm/nvm.sh" ]; then - export NVM_DIR="$HOME/.nvm" - NODE_MGR="nvm" -elif command -v fnm >/dev/null 2>&1; then - NODE_MGR="fnm" -elif command -v brew >/dev/null 2>&1 && [ "$OS" = "Darwin" ]; then - NODE_MGR="brew" -elif [ "$OS" = "Linux" ]; then - NODE_MGR="nodesource" -fi - -info "Node.js manager: $NODE_MGR" - -# Compare two semver strings (major.minor.patch). Returns 0 if $1 >= $2. -# Rejects prerelease suffixes (e.g. "22.16.0-rc.1") to avoid arithmetic errors. -version_gte() { - [[ "$1" =~ ^[0-9]+(\.[0-9]+){0,2}$ ]] || return 1 - [[ "$2" =~ ^[0-9]+(\.[0-9]+){0,2}$ ]] || return 1 - local -a a b - IFS=. read -ra a <<<"$1" - IFS=. read -ra b <<<"$2" - for i in 0 1 2; do - local ai=${a[$i]:-0} bi=${b[$i]:-0} - if ((ai > bi)); then return 0; fi - if ((ai < bi)); then return 1; fi - done - return 0 -} - -version_major() { - printf '%s\n' "${1#v}" | cut -d. -f1 -} - -ensure_supported_runtime() { - command -v node >/dev/null 2>&1 || fail "${RUNTIME_REQUIREMENT_MSG} Node.js was not found on PATH." - command -v npm >/dev/null 2>&1 || fail "${RUNTIME_REQUIREMENT_MSG} npm was not found on PATH." - - local node_version npm_version node_major npm_major - node_version="$(node -v 2>/dev/null || true)" - npm_version="$(npm --version 2>/dev/null || true)" - node_major="$(version_major "$node_version")" - npm_major="$(version_major "$npm_version")" - - [[ "$node_major" =~ ^[0-9]+$ ]] || fail "Could not determine Node.js version from '${node_version}'. ${RUNTIME_REQUIREMENT_MSG}" - [[ "$npm_major" =~ ^[0-9]+$ ]] || fail "Could not determine npm version from '${npm_version}'. ${RUNTIME_REQUIREMENT_MSG}" - - if ! version_gte "${node_version#v}" "$MIN_NODE_VERSION" || ((npm_major < MIN_NPM_MAJOR)); then - fail "Unsupported runtime detected: Node.js ${node_version:-unknown}, npm ${npm_version:-unknown}. ${RUNTIME_REQUIREMENT_MSG} Upgrade Node.js and rerun the installer." - fi - - info "Runtime OK: Node.js ${node_version}, npm ${npm_version}" -} - -# ── Install Node.js 22 if needed ──────────────────────────────── - -install_node() { - local current_version="" - if command -v node >/dev/null 2>&1; then - current_version="$(node -v 2>/dev/null | sed 's/^v//')" - fi - - if [ -n "$current_version" ] && version_gte "$current_version" "$MIN_NODE_VERSION"; then - info "Node.js v${current_version} meets minimum requirement (>= v${MIN_NODE_VERSION})" - return 0 - fi - - info "Installing Node.js 22..." +warn_legacy_path - case "$NODE_MGR" in - asdf) - local latest_22 - latest_22="$(asdf list all nodejs 2>/dev/null | grep '^22\.' | tail -1)" - [ -n "$latest_22" ] || fail "Could not find Node.js 22 in asdf" - asdf install nodejs "$latest_22" - asdf global nodejs "$latest_22" - NEED_RESHIM=true - ;; - nvm) - # shellcheck source=/dev/null - . "${NVM_DIR}/nvm.sh" - nvm install 22 - nvm use 22 - nvm alias default 22 - ;; - fnm) - fnm install 22 - fnm use 22 - fnm default 22 - ;; - brew) - brew install node@22 - brew link --overwrite node@22 2>/dev/null || true - ;; - nodesource) - curl -fsSL https://deb.nodesource.com/setup_22.x | sudo -E bash - >/dev/null 2>&1 - sudo apt-get install -y -qq nodejs >/dev/null 2>&1 - ;; - none) - fail "No Node.js version manager found. Install Node.js >=${MIN_NODE_VERSION} manually, then re-run." - ;; - esac - - info "Node.js $(node -v) installed" -} - -install_node -ensure_supported_runtime - -# ── Install Docker ─────────────────────────────────────────────── - -install_docker() { - if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then - info "Docker already running" - return 0 - fi - - if command -v docker >/dev/null 2>&1; then - # Docker installed but not running - if [ "$OS" = "Darwin" ]; then - local colima_socket="" - local docker_desktop_socket="" - colima_socket="$(find_colima_docker_socket || true)" - docker_desktop_socket="$(find_docker_desktop_socket || true)" - - if [ -n "${DOCKER_HOST:-}" ]; then - fail "Docker is installed but the selected runtime is not running. Start the runtime behind DOCKER_HOST (${DOCKER_HOST}) and re-run." - fi - - if [ -n "$colima_socket" ] && [ -n "$docker_desktop_socket" ]; then - fail "Both Colima and Docker Desktop are available on this Mac. Start the runtime you want explicitly and re-run, or set DOCKER_HOST to select one." - fi - - if [ -n "$docker_desktop_socket" ]; then - fail "Docker Desktop appears to be installed but is not running. Start Docker Desktop and re-run." - fi - - if command -v colima >/dev/null 2>&1; then - info "Starting Colima..." - colima start - return 0 - fi - fi - fail "Docker is installed but not running. Please start Docker and re-run." - fi - - info "Installing Docker..." - - case "$OS" in - Darwin) - if ! command -v brew >/dev/null 2>&1; then - fail "Homebrew required to install Docker on macOS. Install from https://brew.sh" - fi - info "Installing Colima + Docker CLI via Homebrew..." - brew install colima docker - info "Starting Colima..." - colima start - ;; - Linux) - sudo apt-get update -qq >/dev/null 2>&1 - sudo apt-get install -y -qq docker.io >/dev/null 2>&1 - sudo usermod -aG docker "$(whoami)" - info "Docker installed. You may need to log out and back in for group changes." - ;; - esac - - if ! docker info >/dev/null 2>&1; then - fail "Docker installed but not running. Start Docker and re-run." - fi - - info "Docker is running" -} - -install_docker - -# ── Install OpenShell CLI binary ───────────────────────────────── - -install_openshell() { - if command -v openshell >/dev/null 2>&1; then - info "openshell already installed: $(openshell --version 2>&1 || echo 'unknown')" - return 0 - fi - - info "Installing openshell CLI..." - - case "$OS" in - Darwin) - case "$ARCH_LABEL" in - x86_64) ASSET="openshell-x86_64-apple-darwin.tar.gz" ;; - aarch64) ASSET="openshell-aarch64-apple-darwin.tar.gz" ;; - esac - ;; - Linux) - case "$ARCH_LABEL" in - x86_64) ASSET="openshell-x86_64-unknown-linux-musl.tar.gz" ;; - aarch64) ASSET="openshell-aarch64-unknown-linux-musl.tar.gz" ;; - esac - ;; - esac - - tmpdir="$(mktemp -d)" - if command -v gh >/dev/null 2>&1; then - GH_TOKEN="${GITHUB_TOKEN:-}" gh release download --repo NVIDIA/OpenShell \ - --pattern "$ASSET" --dir "$tmpdir" - else - # Fallback: curl latest release - curl -fsSL "https://github.com/NVIDIA/OpenShell/releases/latest/download/$ASSET" \ - -o "$tmpdir/$ASSET" - fi - - tar xzf "$tmpdir/$ASSET" -C "$tmpdir" - - if [ -w /usr/local/bin ]; then - install -m 755 "$tmpdir/openshell" /usr/local/bin/openshell - else - sudo install -m 755 "$tmpdir/openshell" /usr/local/bin/openshell - fi - - rm -rf "$tmpdir" - info "openshell $(openshell --version 2>&1 || echo '') installed" -} - -install_openshell - -# ── Pre-extract openclaw workaround (GH-503) ──────────────────── -# The openclaw npm tarball is missing directory entries for extensions/, -# skills/, and dist/plugin-sdk/config/. npm's tar extractor hard-fails on -# these but system tar handles them fine. We pre-extract openclaw into -# node_modules BEFORE npm install so npm sees the dep is already satisfied. -pre_extract_openclaw() { - local install_dir="$1" - local openclaw_version - openclaw_version=$(node -e "console.log(require('${install_dir}/package.json').dependencies.openclaw)" 2>/dev/null) || openclaw_version="" - - if [ -z "$openclaw_version" ]; then - warn "Could not determine openclaw version — skipping pre-extraction" - return 1 - fi - - info "Pre-extracting openclaw@${openclaw_version} with system tar (GH-503 workaround)…" - local tmpdir - tmpdir="$(mktemp -d)" - if npm pack "openclaw@${openclaw_version}" --pack-destination "$tmpdir" >/dev/null 2>&1; then - local tgz - tgz="$(find "$tmpdir" -maxdepth 1 -name 'openclaw-*.tgz' -print -quit)" - if [ -n "$tgz" ] && [ -f "$tgz" ]; then - if mkdir -p "${install_dir}/node_modules/openclaw" \ - && tar xzf "$tgz" -C "${install_dir}/node_modules/openclaw" --strip-components=1; then - info "openclaw pre-extracted successfully" - else - warn "Failed to extract openclaw tarball" - rm -rf "$tmpdir" - return 1 - fi - else - warn "npm pack succeeded but tarball not found" - rm -rf "$tmpdir" - return 1 - fi - else - warn "Failed to download openclaw tarball" - rm -rf "$tmpdir" - return 1 - fi - rm -rf "$tmpdir" -} - -# ── Resolve release tag ────────────────────────────────────────── -# Priority: NEMOCLAW_INSTALL_TAG env var > GitHub releases API > "main" fallback. -resolve_release_tag() { - if [ -n "${NEMOCLAW_INSTALL_TAG:-}" ]; then - printf "%s" "$NEMOCLAW_INSTALL_TAG" - return 0 - fi - - local response tag - response="$(curl -fsSL --max-time 10 \ - https://api.github.com/repos/NVIDIA/NemoClaw/releases/latest 2>/dev/null)" || true - tag="$(printf '%s' "$response" \ - | grep '"tag_name"' \ - | sed -E 's/.*"tag_name":[[:space:]]*"([^"]+)".*/\1/' \ - | head -1 || true)" - - if [ -n "$tag" ] && printf '%s' "$tag" | grep -qE '^v[0-9]'; then - printf "%s" "$tag" - else - printf "main" - fi -} - -# ── Install NemoClaw CLI ───────────────────────────────────────── - -info "Installing nemoclaw CLI..." -# Resolve the latest release tag so we never install raw main. -NEMOCLAW_RELEASE_REF="$(resolve_release_tag)" -info "Resolved install ref: ${NEMOCLAW_RELEASE_REF}" -# Clone first so we can pre-extract openclaw before npm install (GH-503). -# npm install -g git+https://... does this internally but we can't hook -# into its extraction pipeline, so we do it ourselves. -NEMOCLAW_SRC="${HOME}/.nemoclaw/source" -rm -rf "$NEMOCLAW_SRC" -mkdir -p "$(dirname "$NEMOCLAW_SRC")" -git clone --depth 1 --branch "$NEMOCLAW_RELEASE_REF" https://github.com/NVIDIA/NemoClaw.git "$NEMOCLAW_SRC" -pre_extract_openclaw "$NEMOCLAW_SRC" || warn "Pre-extraction failed — npm install may fail if openclaw tarball is broken" -# Use sudo for npm link only when the global prefix directory is not writable -# by the current user (e.g., system-managed nodesource installs to /usr). -SUDO="" -NPM_GLOBAL_PREFIX="$(npm config get prefix 2>/dev/null)" || true -if [ -n "$NPM_GLOBAL_PREFIX" ] && [ ! -w "$NPM_GLOBAL_PREFIX" ] && [ "$(id -u)" -ne 0 ]; then - SUDO="sudo" -fi -(cd "$NEMOCLAW_SRC" && npm install --ignore-scripts && cd nemoclaw && npm install --ignore-scripts && npm run build && cd .. && $SUDO npm link) - -if [ "$NEED_RESHIM" = true ]; then - info "Reshimming asdf..." - asdf reshim nodejs -fi - -refresh_path - -# ── Verify ─────────────────────────────────────────────────────── - -if ! command -v nemoclaw >/dev/null 2>&1; then - # Try refreshing PATH one more time - refresh_path -fi - -if ! command -v nemoclaw >/dev/null 2>&1; then - npm_bin="$(npm config get prefix 2>/dev/null)/bin" || true - if [ -n "$npm_bin" ] && [ -x "$npm_bin/nemoclaw" ]; then - warn "nemoclaw installed at $npm_bin/nemoclaw but not on current PATH." - warn "" - warn "Add it to your shell profile:" - warn " echo 'export PATH=\"$npm_bin:\$PATH\"' >> ~/.bashrc" - warn " source ~/.bashrc" - warn "" - warn "Or for zsh:" - warn " echo 'export PATH=\"$npm_bin:\$PATH\"' >> ~/.zshrc" - warn " source ~/.zshrc" - else - fail "nemoclaw installation failed. Binary not found." - fi +if [[ ! -f "$ROOT_INSTALLER" ]]; then + cat <&2 +[install] scripts/install.sh only works from a NemoClaw repository checkout. +[install] supported installer: ${ROOT_INSTALLER_URL} +EOF + exit 1 fi -echo "" -info "Installation complete!" -info "nemoclaw $(nemoclaw --version 2>/dev/null || echo 'v0.1.0') is ready." -echo "" -echo " Run \`nemoclaw onboard\` to get started" -echo "" - -# ── Post-install: shell reload instructions ────────────────── - -if [ "$NODE_MGR" = "nvm" ] || [ "$NODE_MGR" = "fnm" ]; then - profile="$HOME/.bashrc" - if [ -n "${ZSH_VERSION:-}" ] || [ "$(basename "${SHELL:-}")" = "zsh" ]; then - profile="$HOME/.zshrc" - elif [ ! -f "$HOME/.bashrc" ] && [ -f "$HOME/.profile" ]; then - profile="$HOME/.profile" - fi - echo " ──────────────────────────────────────────────────" - warn "Your current shell may not have the updated PATH." - echo "" - echo " To use nemoclaw now, run:" - echo "" - echo " source $profile" - echo "" - echo " Or open a new terminal window." - echo " ──────────────────────────────────────────────────" - echo "" -fi +exec bash "$ROOT_INSTALLER" "$@" diff --git a/test/cli.test.js b/test/cli.test.js index 7cfb06e0d..aba5f865c 100644 --- a/test/cli.test.js +++ b/test/cli.test.js @@ -68,6 +68,12 @@ describe("CLI dispatch", () => { expect(r.out.includes("Unknown onboard option")).toBeTruthy(); }); + it("accepts onboard --resume in CLI parsing", () => { + const r = run("onboard --resume --non-interactiv"); + expect(r.code).toBe(1); + expect(r.out.includes("Unknown onboard option(s): --non-interactiv")).toBeTruthy(); + }); + it("debug --help exits 0 and shows usage", () => { const r = run("debug --help"); expect(r.code).toBe(0); @@ -81,6 +87,7 @@ describe("CLI dispatch", () => { expect(r.code).toBe(0); expect(r.out.includes("Collecting diagnostics")).toBeTruthy(); expect(r.out.includes("System")).toBeTruthy(); + expect(r.out.includes("Onboard Session")).toBeTruthy(); expect(r.out.includes("Done")).toBeTruthy(); }); diff --git a/test/e2e/test-onboard-repair.sh b/test/e2e/test-onboard-repair.sh new file mode 100755 index 000000000..5e14763e1 --- /dev/null +++ b/test/e2e/test-onboard-repair.sh @@ -0,0 +1,331 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# E2E: resume repair and invalidation behavior. +# +# Regression coverage for issue #446. +# Validates that: +# 1. Resume recreates a missing recorded sandbox instead of assuming it still exists. +# 2. Resume rejects a different requested sandbox name on the same host. +# 3. Resume rejects explicit provider/model changes that conflict with recorded state. +# +# Prerequisites: +# - Docker running +# - openshell CLI installed +# - Node.js available +# - NVIDIA_API_KEY set to a valid nvapi-* key before starting the test +# +# Usage: +# NVIDIA_API_KEY=nvapi-... bash test/e2e/test-onboard-repair.sh + +set -uo pipefail + +PASS=0 +FAIL=0 +SKIP=0 +TOTAL=0 + +pass() { + ((PASS++)) + ((TOTAL++)) + printf '\033[32m PASS: %s\033[0m\n' "$1" +} +fail() { + ((FAIL++)) + ((TOTAL++)) + printf '\033[31m FAIL: %s\033[0m\n' "$1" +} +skip() { + ((SKIP++)) + ((TOTAL++)) + printf '\033[33m SKIP: %s\033[0m\n' "$1" +} +section() { + echo "" + printf '\033[1;36m=== %s ===\033[0m\n' "$1" +} +info() { printf '\033[1;34m [info]\033[0m %s\n' "$1"; } + +if [ -d /workspace ] && [ -f /workspace/install.sh ]; then + REPO="/workspace" +elif [ -f "$(cd "$(dirname "$0")/../.." && pwd)/install.sh" ]; then + REPO="$(cd "$(dirname "$0")/../.." && pwd)" +else + echo "ERROR: Cannot find repo root." + exit 1 +fi + +run_nemoclaw() { + node "$REPO/bin/nemoclaw.js" "$@" +} + +SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-repair}" +OTHER_SANDBOX_NAME="${NEMOCLAW_OTHER_SANDBOX_NAME:-e2e-other}" +SESSION_FILE="$HOME/.nemoclaw/onboard-session.json" +RESTORE_API_KEY="${NVIDIA_API_KEY:-}" + +# ══════════════════════════════════════════════════════════════════ +# Phase 0: Pre-cleanup +# ══════════════════════════════════════════════════════════════════ +section "Phase 0: Pre-cleanup" +info "Destroying any leftover sandbox/gateway from previous runs..." +run_nemoclaw "$SANDBOX_NAME" destroy 2>/dev/null || true +run_nemoclaw "$OTHER_SANDBOX_NAME" destroy 2>/dev/null || true +openshell sandbox delete "$SANDBOX_NAME" 2>/dev/null || true +openshell sandbox delete "$OTHER_SANDBOX_NAME" 2>/dev/null || true +openshell forward stop 18789 2>/dev/null || true +openshell gateway destroy -g nemoclaw 2>/dev/null || true +rm -f "$SESSION_FILE" +pass "Pre-cleanup complete" + +# ══════════════════════════════════════════════════════════════════ +# Phase 1: Prerequisites +# ══════════════════════════════════════════════════════════════════ +section "Phase 1: Prerequisites" + +if docker info >/dev/null 2>&1; then + pass "Docker is running" +else + fail "Docker is not running — cannot continue" + exit 1 +fi + +if command -v openshell >/dev/null 2>&1; then + pass "openshell CLI installed" +else + fail "openshell CLI not found — cannot continue" + exit 1 +fi + +if command -v node >/dev/null 2>&1; then + pass "Node.js available" +else + fail "Node.js not found — cannot continue" + exit 1 +fi + +if [[ -n "$RESTORE_API_KEY" && "$RESTORE_API_KEY" == nvapi-* ]]; then + pass "NVIDIA_API_KEY is set (starts with nvapi-)" +else + fail "NVIDIA_API_KEY not set or invalid — required for resume completion" + exit 1 +fi + +node -e ' +const { saveCredential } = require(process.argv[1]); +saveCredential("NVIDIA_API_KEY", process.argv[2]); +' "$REPO/bin/lib/credentials.js" "$RESTORE_API_KEY" +pass "Stored NVIDIA_API_KEY in ~/.nemoclaw/credentials.json for resume hydration" + +# ══════════════════════════════════════════════════════════════════ +# Phase 2: Create interrupted resumable state +# ══════════════════════════════════════════════════════════════════ +section "Phase 2: Create interrupted state" +info "Running onboard with an invalid policy mode to create resumable state..." + +FIRST_LOG="$(mktemp)" +NEMOCLAW_NON_INTERACTIVE=1 \ + NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \ + NEMOCLAW_RECREATE_SANDBOX=1 \ + NEMOCLAW_POLICY_MODE=invalid \ + node "$REPO/bin/nemoclaw.js" onboard --non-interactive >"$FIRST_LOG" 2>&1 +first_exit=$? +first_output="$(cat "$FIRST_LOG")" +rm -f "$FIRST_LOG" + +if [ $first_exit -eq 1 ]; then + pass "First onboard exited 1 (expected interrupted run)" +else + fail "First onboard exited $first_exit (expected 1)" + echo "$first_output" + exit 1 +fi + +if [ -f "$SESSION_FILE" ]; then + pass "Onboard session file created" +else + fail "Onboard session file missing after interrupted run" +fi + +if echo "$first_output" | grep -q "Unsupported NEMOCLAW_POLICY_MODE: invalid"; then + pass "First run failed at policy setup as intended" +else + fail "First run did not fail at the expected policy step" +fi + +if openshell sandbox get "$SANDBOX_NAME" >/dev/null 2>&1; then + pass "Sandbox '$SANDBOX_NAME' exists after interrupted run" +else + fail "Sandbox '$SANDBOX_NAME' not found after interrupted run" +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 3: Repair missing sandbox on resume +# ══════════════════════════════════════════════════════════════════ +section "Phase 3: Repair missing sandbox" +info "Deleting the recorded sandbox under the session, then resuming..." + +openshell sandbox delete "$SANDBOX_NAME" >/dev/null 2>&1 || true +openshell forward stop 18789 >/dev/null 2>&1 || true + +if openshell sandbox get "$SANDBOX_NAME" >/dev/null 2>&1; then + fail "Sandbox '$SANDBOX_NAME' still exists after forced deletion" +else + pass "Sandbox '$SANDBOX_NAME' removed to simulate stale recorded state" +fi + +REPAIR_LOG="$(mktemp)" +env -u NVIDIA_API_KEY \ + NEMOCLAW_NON_INTERACTIVE=1 \ + NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \ + NEMOCLAW_POLICY_MODE=skip \ + node "$REPO/bin/nemoclaw.js" onboard --resume --non-interactive >"$REPAIR_LOG" 2>&1 +repair_exit=$? +repair_output="$(cat "$REPAIR_LOG")" +rm -f "$REPAIR_LOG" + +if [ $repair_exit -eq 0 ]; then + pass "Resume completed after repairing missing sandbox" +else + fail "Resume exited $repair_exit during missing-sandbox repair" + echo "$repair_output" + exit 1 +fi + +if echo "$repair_output" | grep -q "\[resume\] Skipping preflight (cached)"; then + pass "Repair resume skipped preflight" +else + fail "Repair resume did not skip preflight" +fi + +if echo "$repair_output" | grep -q "\[resume\] Skipping gateway (running)"; then + pass "Repair resume skipped gateway" +else + fail "Repair resume did not skip gateway" +fi + +if echo "$repair_output" | grep -q "\[resume\] Recorded sandbox state is unavailable; recreating it."; then + pass "Repair resume detected missing sandbox" +else + fail "Repair resume did not report missing sandbox recreation" +fi + +if echo "$repair_output" | grep -q "\[5/7\] Creating sandbox"; then + pass "Repair resume recreated sandbox" +else + fail "Repair resume did not rerun sandbox creation" +fi + +if run_nemoclaw "$SANDBOX_NAME" status >/dev/null 2>&1; then + pass "Repaired sandbox '$SANDBOX_NAME' is manageable" +else + fail "Repaired sandbox '$SANDBOX_NAME' status failed" +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 4: Reject conflicting sandbox +# ══════════════════════════════════════════════════════════════════ +section "Phase 4: Reject conflicting sandbox" +info "Attempting resume with a different sandbox name..." + +SANDBOX_CONFLICT_LOG="$(mktemp)" +env -u NVIDIA_API_KEY \ + NEMOCLAW_NON_INTERACTIVE=1 \ + NEMOCLAW_SANDBOX_NAME="$OTHER_SANDBOX_NAME" \ + NEMOCLAW_POLICY_MODE=skip \ + node "$REPO/bin/nemoclaw.js" onboard --resume --non-interactive >"$SANDBOX_CONFLICT_LOG" 2>&1 +sandbox_conflict_exit=$? +sandbox_conflict_output="$(cat "$SANDBOX_CONFLICT_LOG")" +rm -f "$SANDBOX_CONFLICT_LOG" + +if [ $sandbox_conflict_exit -eq 1 ]; then + pass "Resume rejected conflicting sandbox name" +else + fail "Resume exited $sandbox_conflict_exit for conflicting sandbox (expected 1)" +fi + +if echo "$sandbox_conflict_output" | grep -q "Resumable state belongs to sandbox '${SANDBOX_NAME}', not '${OTHER_SANDBOX_NAME}'."; then + pass "Conflicting sandbox message is explicit" +else + fail "Conflicting sandbox message missing or incorrect" +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 5: Reject conflicting provider/model +# ══════════════════════════════════════════════════════════════════ +section "Phase 5: Reject conflicting provider and model" +info "Attempting resume with conflicting provider/model inputs..." + +PROVIDER_CONFLICT_LOG="$(mktemp)" +env -u NVIDIA_API_KEY \ + NEMOCLAW_NON_INTERACTIVE=1 \ + NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \ + NEMOCLAW_PROVIDER=openai \ + NEMOCLAW_MODEL=gpt-5.4 \ + NEMOCLAW_POLICY_MODE=skip \ + node "$REPO/bin/nemoclaw.js" onboard --resume --non-interactive >"$PROVIDER_CONFLICT_LOG" 2>&1 +provider_conflict_exit=$? +provider_conflict_output="$(cat "$PROVIDER_CONFLICT_LOG")" +rm -f "$PROVIDER_CONFLICT_LOG" + +if [ $provider_conflict_exit -eq 1 ]; then + pass "Resume rejected conflicting provider/model" +else + fail "Resume exited $provider_conflict_exit for conflicting provider/model (expected 1)" +fi + +if echo "$provider_conflict_output" | grep -Eq "Resumable state recorded provider '.*', not '.*'\."; then + pass "Conflicting provider message is explicit" +else + fail "Conflicting provider message missing or incorrect" +fi + +if echo "$provider_conflict_output" | grep -Eq "Resumable state recorded model '.*', not 'gpt-5.4'\."; then + pass "Conflicting model message is explicit" +else + fail "Conflicting model message missing or incorrect" +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 6: Final cleanup +# ══════════════════════════════════════════════════════════════════ +section "Phase 6: Final cleanup" + +run_nemoclaw "$SANDBOX_NAME" destroy 2>/dev/null || true +run_nemoclaw "$OTHER_SANDBOX_NAME" destroy 2>/dev/null || true +openshell sandbox delete "$SANDBOX_NAME" 2>/dev/null || true +openshell sandbox delete "$OTHER_SANDBOX_NAME" 2>/dev/null || true +openshell forward stop 18789 2>/dev/null || true +openshell gateway destroy -g nemoclaw 2>/dev/null || true +rm -f "$SESSION_FILE" + +if openshell sandbox get "$SANDBOX_NAME" >/dev/null 2>&1; then + fail "Sandbox '$SANDBOX_NAME' still exists after cleanup" +else + pass "Sandbox '$SANDBOX_NAME' cleaned up" +fi + +if [ -f "$SESSION_FILE" ]; then + fail "Onboard session file still exists after cleanup" +else + pass "Onboard session file cleaned up" +fi + +pass "Final cleanup complete" + +# ══════════════════════════════════════════════════════════════════ +# Summary +# ══════════════════════════════════════════════════════════════════ +echo "" +echo "========================================" +echo " PASS: $PASS" +echo " FAIL: $FAIL" +echo " SKIP: $SKIP" +echo " TOTAL: $TOTAL" +echo "========================================" +echo "" + +if [ $FAIL -ne 0 ]; then + exit 1 +fi diff --git a/test/e2e/test-onboard-resume.sh b/test/e2e/test-onboard-resume.sh new file mode 100755 index 000000000..2ccef1fc3 --- /dev/null +++ b/test/e2e/test-onboard-resume.sh @@ -0,0 +1,339 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# E2E: interrupted onboard -> resume -> verify completion. +# +# Regression test for issue #446. +# Validates that: +# 1. A non-interactive onboard run can fail after sandbox creation while leaving resumable state. +# 2. The onboard session file records the interrupted state safely. +# 3. `nemoclaw onboard --resume --non-interactive` skips cached preflight, +# gateway, and sandbox work, then completes by hydrating the stored credential. +# +# Prerequisites: +# - Docker running +# - openshell CLI installed +# - Node.js available +# - NVIDIA_API_KEY set to a valid nvapi-* key before starting the test +# +# Usage: +# NVIDIA_API_KEY=nvapi-... bash test/e2e/test-onboard-resume.sh + +set -uo pipefail + +if [ "${NEMOCLAW_E2E_NO_TIMEOUT:-0}" != "1" ]; then + TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-600}" + TIMEOUT_BIN="" + if command -v timeout >/dev/null 2>&1; then + TIMEOUT_BIN="timeout" + elif command -v gtimeout >/dev/null 2>&1; then + TIMEOUT_BIN="gtimeout" + fi + + if [ -n "$TIMEOUT_BIN" ]; then + export NEMOCLAW_E2E_NO_TIMEOUT=1 + exec "$TIMEOUT_BIN" -s TERM "$TIMEOUT_SECONDS" "$0" "$@" + fi +fi + +PASS=0 +FAIL=0 +SKIP=0 +TOTAL=0 + +pass() { + ((PASS++)) + ((TOTAL++)) + printf '\033[32m PASS: %s\033[0m\n' "$1" +} +fail() { + ((FAIL++)) + ((TOTAL++)) + printf '\033[31m FAIL: %s\033[0m\n' "$1" +} +skip() { + ((SKIP++)) + ((TOTAL++)) + printf '\033[33m SKIP: %s\033[0m\n' "$1" +} +section() { + echo "" + printf '\033[1;36m=== %s ===\033[0m\n' "$1" +} +info() { printf '\033[1;34m [info]\033[0m %s\n' "$1"; } + +if [ -d /workspace ] && [ -f /workspace/install.sh ]; then + REPO="/workspace" +elif [ -f "$(cd "$(dirname "$0")/../.." && pwd)/install.sh" ]; then + REPO="$(cd "$(dirname "$0")/../.." && pwd)" +else + echo "ERROR: Cannot find repo root." + exit 1 +fi + +run_nemoclaw() { + node "$REPO/bin/nemoclaw.js" "$@" +} + +SANDBOX_NAME="${NEMOCLAW_SANDBOX_NAME:-e2e-resume}" +SESSION_FILE="$HOME/.nemoclaw/onboard-session.json" +REGISTRY="$HOME/.nemoclaw/sandboxes.json" +RESTORE_API_KEY="${NVIDIA_API_KEY:-}" + +# ══════════════════════════════════════════════════════════════════ +# Phase 0: Pre-cleanup +# ══════════════════════════════════════════════════════════════════ +section "Phase 0: Pre-cleanup" +info "Destroying any leftover sandbox/gateway from previous runs..." +run_nemoclaw "$SANDBOX_NAME" destroy 2>/dev/null || true +openshell sandbox delete "$SANDBOX_NAME" 2>/dev/null || true +openshell forward stop 18789 2>/dev/null || true +openshell gateway destroy -g nemoclaw 2>/dev/null || true +rm -f "$SESSION_FILE" +pass "Pre-cleanup complete" + +# ══════════════════════════════════════════════════════════════════ +# Phase 1: Prerequisites +# ══════════════════════════════════════════════════════════════════ +section "Phase 1: Prerequisites" + +if docker info >/dev/null 2>&1; then + pass "Docker is running" +else + fail "Docker is not running — cannot continue" + exit 1 +fi + +if command -v openshell >/dev/null 2>&1; then + pass "openshell CLI installed" +else + fail "openshell CLI not found — cannot continue" + exit 1 +fi + +if command -v node >/dev/null 2>&1; then + pass "Node.js available" +else + fail "Node.js not found — cannot continue" + exit 1 +fi + +if [[ -n "$RESTORE_API_KEY" && "$RESTORE_API_KEY" == nvapi-* ]]; then + pass "NVIDIA_API_KEY is set (starts with nvapi-)" +else + fail "NVIDIA_API_KEY not set or invalid — required for resume completion" + exit 1 +fi + +if curl -sf --max-time 10 https://integrate.api.nvidia.com/v1/models >/dev/null 2>&1; then + pass "Network access to integrate.api.nvidia.com" +else + fail "Cannot reach integrate.api.nvidia.com" + exit 1 +fi + +node -e ' +const { saveCredential } = require(process.argv[1]); +saveCredential("NVIDIA_API_KEY", process.argv[2]); +' "$REPO/bin/lib/credentials.js" "$RESTORE_API_KEY" +pass "Stored NVIDIA_API_KEY in ~/.nemoclaw/credentials.json for resume hydration" + +# ══════════════════════════════════════════════════════════════════ +# Phase 2: First onboard (forced failure after sandbox creation) +# ══════════════════════════════════════════════════════════════════ +section "Phase 2: First onboard (interrupted)" +info "Running onboard with an invalid policy mode to create resumable state..." + +FIRST_LOG="$(mktemp)" +NEMOCLAW_NON_INTERACTIVE=1 \ + NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \ + NEMOCLAW_RECREATE_SANDBOX=1 \ + NEMOCLAW_POLICY_MODE=invalid \ + node "$REPO/bin/nemoclaw.js" onboard --non-interactive >"$FIRST_LOG" 2>&1 +first_exit=$? +first_output="$(cat "$FIRST_LOG")" +rm -f "$FIRST_LOG" + +if [ $first_exit -eq 1 ]; then + pass "First onboard exited 1 (expected interrupted run)" +else + fail "First onboard exited $first_exit (expected 1)" + echo "$first_output" + exit 1 +fi + +if echo "$first_output" | grep -q "Sandbox '${SANDBOX_NAME}' created"; then + pass "Sandbox '$SANDBOX_NAME' created before interruption" +else + fail "Sandbox creation not confirmed in first run output" +fi + +if echo "$first_output" | grep -q "Unsupported NEMOCLAW_POLICY_MODE: invalid"; then + pass "First run failed at policy setup as intended" +else + fail "First run did not fail at the expected policy step" +fi + +if openshell sandbox get "$SANDBOX_NAME" >/dev/null 2>&1; then + pass "Sandbox '$SANDBOX_NAME' exists after interrupted run" +else + fail "Sandbox '$SANDBOX_NAME' not found after interrupted run" +fi + +if [ -f "$SESSION_FILE" ]; then + pass "Onboard session file created" +else + fail "Onboard session file missing after interrupted run" +fi + +node -e ' +const fs = require("fs"); +const file = process.argv[1]; +const data = JSON.parse(fs.readFileSync(file, "utf8")); +if (data.status !== "failed") process.exit(1); +if (data.lastCompletedStep !== "openclaw") process.exit(2); +if (!data.failure || data.failure.step !== "policies") process.exit(3); +' "$SESSION_FILE" +case $? in + 0) pass "Session file recorded openclaw completion and policy failure" ;; + *) fail "Session file did not record the expected interrupted state" ;; +esac + +# ══════════════════════════════════════════════════════════════════ +# Phase 3: Resume and complete +# ══════════════════════════════════════════════════════════════════ +section "Phase 3: Resume" +info "Running onboard --resume with NVIDIA_API_KEY removed from env..." + +RESUME_LOG="$(mktemp)" +env -u NVIDIA_API_KEY \ + NEMOCLAW_NON_INTERACTIVE=1 \ + NEMOCLAW_SANDBOX_NAME="$SANDBOX_NAME" \ + NEMOCLAW_POLICY_MODE=skip \ + node "$REPO/bin/nemoclaw.js" onboard --resume --non-interactive >"$RESUME_LOG" 2>&1 +resume_exit=$? +resume_output="$(cat "$RESUME_LOG")" +rm -f "$RESUME_LOG" + +if [ $resume_exit -eq 0 ]; then + pass "Resume completed successfully" +else + fail "Resume exited $resume_exit (expected 0)" + echo "$resume_output" + exit 1 +fi + +if echo "$resume_output" | grep -q "\[resume\] Skipping preflight (cached)"; then + pass "Resume skipped preflight" +else + fail "Resume did not skip preflight" +fi + +if echo "$resume_output" | grep -q "\[resume\] Skipping gateway (running)"; then + pass "Resume skipped gateway" +else + fail "Resume did not skip gateway" +fi + +if echo "$resume_output" | grep -q "\[resume\] Skipping sandbox (${SANDBOX_NAME})"; then + pass "Resume skipped sandbox" +else + fail "Resume did not skip sandbox" +fi + +if echo "$resume_output" | grep -q "\[1/7\] Preflight checks"; then + fail "Resume reran preflight unexpectedly" +else + pass "Resume did not rerun preflight" +fi + +if echo "$resume_output" | grep -q "\[2/7\] Starting OpenShell gateway"; then + fail "Resume reran gateway startup unexpectedly" +else + pass "Resume did not rerun gateway startup" +fi + +if echo "$resume_output" | grep -q "\[5/7\] Creating sandbox"; then + fail "Resume reran sandbox creation unexpectedly" +else + pass "Resume did not rerun sandbox creation" +fi + +if echo "$resume_output" | grep -q "\[4/7\] Setting up inference provider"; then + pass "Resume continued with inference setup" +else + fail "Resume did not continue with inference setup" +fi + +if run_nemoclaw "$SANDBOX_NAME" status >/dev/null 2>&1; then + pass "Sandbox '$SANDBOX_NAME' is manageable after resume" +else + fail "Sandbox '$SANDBOX_NAME' status failed after resume" +fi + +node -e ' +const fs = require("fs"); +const file = process.argv[1]; +const data = JSON.parse(fs.readFileSync(file, "utf8")); +if (data.status !== "complete") process.exit(1); +if (data.provider !== "nvidia-prod") process.exit(2); +if (data.steps.preflight.status !== "complete") process.exit(3); +if (data.steps.gateway.status !== "complete") process.exit(4); +if (data.steps.sandbox.status !== "complete") process.exit(5); +if (data.steps.provider_selection.status !== "complete") process.exit(6); +if (data.steps.inference.status !== "complete") process.exit(7); +if (data.steps.openclaw.status !== "complete") process.exit(8); +if (data.steps.policies.status !== "complete") process.exit(9); +' "$SESSION_FILE" +case $? in + 0) pass "Session file recorded full completion after resume" ;; + *) fail "Session file did not record the expected completed state after resume" ;; +esac + +if [ -f "$REGISTRY" ] && grep -q "$SANDBOX_NAME" "$REGISTRY"; then + pass "Registry contains resumed sandbox entry" +else + fail "Registry does not contain resumed sandbox entry" +fi + +# ══════════════════════════════════════════════════════════════════ +# Phase 4: Final cleanup +# ══════════════════════════════════════════════════════════════════ +section "Phase 4: Final cleanup" + +run_nemoclaw "$SANDBOX_NAME" destroy 2>/dev/null || true +openshell sandbox delete "$SANDBOX_NAME" 2>/dev/null || true +openshell forward stop 18789 2>/dev/null || true +openshell gateway destroy -g nemoclaw 2>/dev/null || true +rm -f "$SESSION_FILE" + +if openshell sandbox get "$SANDBOX_NAME" >/dev/null 2>&1; then + fail "Sandbox '$SANDBOX_NAME' still exists after cleanup" +else + pass "Sandbox '$SANDBOX_NAME' cleaned up" +fi + +if [ -f "$SESSION_FILE" ]; then + fail "Onboard session file still exists after cleanup" +else + pass "Onboard session file cleaned up" +fi + +pass "Final cleanup complete" + +# ══════════════════════════════════════════════════════════════════ +# Summary +# ══════════════════════════════════════════════════════════════════ +echo "" +echo "========================================" +echo " PASS: $PASS" +echo " FAIL: $FAIL" +echo " SKIP: $SKIP" +echo " TOTAL: $TOTAL" +echo "========================================" +echo "" + +if [ $FAIL -ne 0 ]; then + exit 1 +fi diff --git a/test/install-preflight.test.js b/test/install-preflight.test.js index 2d5e11504..e24fe17e4 100644 --- a/test/install-preflight.test.js +++ b/test/install-preflight.test.js @@ -8,7 +8,8 @@ import path from "node:path"; import { spawnSync } from "node:child_process"; const INSTALLER = path.join(import.meta.dirname, "..", "install.sh"); -const CURL_PIPE_INSTALLER = path.join(import.meta.dirname, "..", "scripts", "install.sh"); +const CURL_PIPE_INSTALLER = path.join(import.meta.dirname, "..", "install.sh"); +const LEGACY_INSTALLER_WRAPPER = path.join(import.meta.dirname, "..", "scripts", "install.sh"); const GITHUB_INSTALL_URL = "git+https://github.com/NVIDIA/NemoClaw.git"; const TEST_SYSTEM_PATH = "/usr/bin:/bin"; @@ -284,181 +285,22 @@ exit 98 expect(output).not.toMatch(/npm install -g nemoclaw/); }); - it("does not silently prefer Colima when both macOS runtimes are available", () => { - const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-macos-runtime-choice-")); - const fakeBin = path.join(tmp, "bin"); - const colimaSocket = path.join(tmp, ".colima/default/docker.sock"); - const dockerDesktopSocket = path.join(tmp, ".docker/run/docker.sock"); - fs.mkdirSync(fakeBin); - - writeExecutable( - path.join(fakeBin, "node"), - `#!/usr/bin/env bash -if [ "$1" = "-v" ] || [ "$1" = "--version" ]; then - echo "v22.16.0" - exit 0 -fi -exit 99 -`, - ); - - writeExecutable( - path.join(fakeBin, "npm"), - `#!/usr/bin/env bash -if [ "$1" = "--version" ]; then - echo "10.9.2" - exit 0 -fi -echo "/tmp/npm-prefix" -exit 0 -`, - ); - - writeExecutable( - path.join(fakeBin, "docker"), - `#!/usr/bin/env bash -if [ "$1" = "info" ]; then - exit 1 -fi -exit 0 -`, - ); - - writeExecutable( - path.join(fakeBin, "colima"), - `#!/usr/bin/env bash -echo "colima should not be started" >&2 -exit 97 -`, - ); - - writeExecutable( - path.join(fakeBin, "uname"), - `#!/usr/bin/env bash -if [ "$1" = "-s" ]; then - echo "Darwin" - exit 0 -fi -if [ "$1" = "-m" ]; then - echo "arm64" - exit 0 -fi -echo "Darwin" -`, - ); - - const result = spawnSync("bash", [CURL_PIPE_INSTALLER], { + it("legacy scripts/install.sh delegates to the root installer from a repo checkout", () => { + const result = spawnSync("bash", [LEGACY_INSTALLER_WRAPPER, "--help"], { cwd: path.join(import.meta.dirname, ".."), encoding: "utf-8", - env: { - ...process.env, - HOME: tmp, - PATH: `${fakeBin}:${TEST_SYSTEM_PATH}`, - NEMOCLAW_TEST_SOCKET_PATHS: `${colimaSocket}:${dockerDesktopSocket}`, - }, }); const output = `${result.stdout}${result.stderr}`; - expect(result.status).not.toBe(0); - expect(output).toMatch(/Both Colima and Docker Desktop are available/); - expect(output).not.toMatch(/colima should not be started/); + expect(result.status).toBe(0); + expect(output).toMatch(/deprecated compatibility wrapper/); + expect(output).toMatch(/https:\/\/www\.nvidia\.com\/nemoclaw\.sh/); + expect(output).toMatch(/NemoClaw Installer/); }); - it("can run via stdin without a sibling runtime.sh file", () => { - const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-curl-pipe-installer-")); - const fakeBin = path.join(tmp, "bin"); - const prefix = path.join(tmp, "prefix"); - fs.mkdirSync(fakeBin); - fs.mkdirSync(path.join(prefix, "bin"), { recursive: true }); - - writeExecutable( - path.join(fakeBin, "node"), - `#!/usr/bin/env bash -if [ "$1" = "-v" ] || [ "$1" = "--version" ]; then - echo "v22.16.0" - exit 0 -fi -if [ "$1" = "-e" ]; then - exit 1 -fi -exit 99 -`, - ); - - writeExecutable( - path.join(fakeBin, "git"), - `#!/usr/bin/env bash -if [ "$1" = "clone" ]; then - target="\${@: -1}" - mkdir -p "$target/nemoclaw" - echo '{"name":"nemoclaw","version":"0.1.0","dependencies":{"openclaw":"2026.3.11"}}' > "$target/package.json" - echo '{"name":"nemoclaw-plugin","version":"0.1.0"}' > "$target/nemoclaw/package.json" - exit 0 -fi -exit 0 -`, - ); - - writeExecutable( - path.join(fakeBin, "npm"), - `#!/usr/bin/env bash -set -euo pipefail -if [ "$1" = "--version" ]; then - echo "10.9.2" - exit 0 -fi -if [ "$1" = "config" ] && [ "$2" = "get" ] && [ "$3" = "prefix" ]; then - echo "$NPM_PREFIX" - exit 0 -fi -if [ "$1" = "pack" ]; then - exit 1 -fi -if [ "$1" = "install" ] && [[ "$*" == *"--ignore-scripts"* ]]; then - exit 0 -fi -if [ "$1" = "run" ]; then - exit 0 -fi -if [ "$1" = "link" ]; then - cat > "$NPM_PREFIX/bin/nemoclaw" <<'EOS' -#!/usr/bin/env bash -if [ "$1" = "--version" ]; then - echo "v0.1.0-test" - exit 0 -fi -exit 0 -EOS - chmod +x "$NPM_PREFIX/bin/nemoclaw" - exit 0 -fi -echo "unexpected npm invocation: $*" >&2 -exit 98 -`, - ); - - writeExecutable( - path.join(fakeBin, "docker"), - `#!/usr/bin/env bash -if [ "$1" = "info" ]; then - exit 0 -fi -exit 0 -`, - ); - - writeExecutable( - path.join(fakeBin, "openshell"), - `#!/usr/bin/env bash -if [ "$1" = "--version" ]; then - echo "openshell 0.0.9" - exit 0 -fi -exit 0 -`, - ); - - const scriptContents = fs.readFileSync(CURL_PIPE_INSTALLER, "utf-8"); + it("legacy scripts/install.sh fails clearly when run without the repo root installer", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-legacy-installer-stdin-")); + const scriptContents = fs.readFileSync(LEGACY_INSTALLER_WRAPPER, "utf-8"); const result = spawnSync("bash", [], { cwd: tmp, input: scriptContents, @@ -466,16 +308,16 @@ exit 0 env: { ...process.env, HOME: tmp, - PATH: `${fakeBin}:${TEST_SYSTEM_PATH}`, - NEMOCLAW_NON_INTERACTIVE: "1", - NPM_PREFIX: prefix, + PATH: TEST_SYSTEM_PATH, }, }); const output = `${result.stdout}${result.stderr}`; - expect(result.status).toBe(0); - expect(output).toMatch(/Installation complete!/); - expect(output).toMatch(/nemoclaw v0\.1\.0-test is ready/); + expect(result.status).not.toBe(0); + expect(output).toMatch(/deprecated compatibility wrapper/); + expect(output).toMatch(/supported installer/); + expect(output).toMatch(/https:\/\/www\.nvidia\.com\/nemoclaw\.sh/); + expect(output).toMatch(/only works from a NemoClaw repository checkout/); }); it("--help exits 0 and shows install usage", () => { @@ -580,6 +422,70 @@ fi`, expect(log).not.toMatch(new RegExp(GITHUB_INSTALL_URL.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"))); }); + it("auto-resumes an interrupted onboarding session during install", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-install-resume-")); + const fakeBin = path.join(tmp, "bin"); + const prefix = path.join(tmp, "prefix"); + const onboardLog = path.join(tmp, "onboard.log"); + fs.mkdirSync(fakeBin); + fs.mkdirSync(path.join(prefix, "bin"), { recursive: true }); + fs.mkdirSync(path.join(tmp, ".nemoclaw"), { recursive: true }); + + fs.writeFileSync( + path.join(tmp, ".nemoclaw", "onboard-session.json"), + JSON.stringify({ resumable: true, status: "in_progress" }, null, 2), + ); + + writeNodeStub(fakeBin); + writeNpmStub( + fakeBin, + `if [ "$1" = "pack" ]; then + tmpdir="$4" + mkdir -p "$tmpdir/package" + tar -czf "$tmpdir/openclaw-2026.3.11.tgz" -C "$tmpdir" package + exit 0 +fi +if [ "$1" = "install" ]; then exit 0; fi +if [ "$1" = "run" ] && [ "$2" = "build" ]; then exit 0; fi +if [ "$1" = "link" ]; then + cat > "$NPM_PREFIX/bin/nemoclaw" <<'EOS' +#!/usr/bin/env bash +printf '%s\\n' "$*" >> "$NEMOCLAW_ONBOARD_LOG" +exit 0 +EOS + chmod +x "$NPM_PREFIX/bin/nemoclaw" + exit 0 +fi`, + ); + + fs.writeFileSync( + path.join(tmp, "package.json"), + JSON.stringify({ name: "nemoclaw", version: "0.1.0" }, null, 2), + ); + fs.mkdirSync(path.join(tmp, "nemoclaw"), { recursive: true }); + fs.writeFileSync( + path.join(tmp, "nemoclaw", "package.json"), + JSON.stringify({ name: "nemoclaw-plugin", version: "0.1.0" }, null, 2), + ); + + const result = spawnSync("bash", [INSTALLER], { + cwd: tmp, + encoding: "utf-8", + env: { + ...process.env, + HOME: tmp, + PATH: `${fakeBin}:${TEST_SYSTEM_PATH}`, + NEMOCLAW_NON_INTERACTIVE: "1", + NPM_PREFIX: prefix, + NEMOCLAW_ONBOARD_LOG: onboardLog, + }, + }); + + expect(result.status).toBe(0); + expect(`${result.stdout}${result.stderr}`).toMatch(/Found an interrupted onboarding session — resuming it\./); + expect(fs.readFileSync(onboardLog, "utf-8")).toMatch(/^onboard --resume --non-interactive$/m); + }); + it("spin() non-TTY: dumps wrapped-command output and exits non-zero on failure", () => { const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-install-spin-fail-")); const fakeBin = path.join(tmp, "bin"); @@ -744,6 +650,121 @@ exit 0 expect(fs.readlinkSync(shimPath)).toBe(path.join(prefix, "bin", "nemoclaw")); expect(`${result.stdout}${result.stderr}`).toMatch(/Created user-local shim/); }); + + it("does not print PATH recovery instructions when nemoclaw is already usable in this shell", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-install-ready-shell-")); + const fakeBin = path.join(tmp, "bin"); + const prefix = path.join(tmp, "prefix"); + const nvmDir = path.join(tmp, ".nvm"); + fs.mkdirSync(fakeBin); + fs.mkdirSync(path.join(prefix, "bin"), { recursive: true }); + fs.mkdirSync(nvmDir, { recursive: true }); + fs.writeFileSync(path.join(nvmDir, "nvm.sh"), "# stub nvm\n"); + + writeExecutable( + path.join(fakeBin, "node"), + `#!/usr/bin/env bash +if [ "$1" = "-v" ] || [ "$1" = "--version" ]; then + echo "v22.16.0" + exit 0 +fi +if [ "$1" = "-e" ]; then + exit 1 +fi +exit 99 +`, + ); + + writeExecutable( + path.join(fakeBin, "git"), + `#!/usr/bin/env bash +if [ "$1" = "clone" ]; then + target="\${@: -1}" + mkdir -p "$target/nemoclaw" + echo '{"name":"nemoclaw","version":"0.1.0","dependencies":{"openclaw":"2026.3.11"}}' > "$target/package.json" + echo '{"name":"nemoclaw-plugin","version":"0.1.0"}' > "$target/nemoclaw/package.json" + exit 0 +fi +exit 0 +`, + ); + + writeExecutable( + path.join(fakeBin, "npm"), + `#!/usr/bin/env bash +set -euo pipefail +if [ "$1" = "--version" ]; then + echo "10.9.2" + exit 0 +fi +if [ "$1" = "config" ] && [ "$2" = "get" ] && [ "$3" = "prefix" ]; then + echo "$NPM_PREFIX" + exit 0 +fi +if [ "$1" = "pack" ]; then + exit 1 +fi +if [ "$1" = "install" ] && [[ "$*" == *"--ignore-scripts"* ]]; then + exit 0 +fi +if [ "$1" = "run" ]; then + exit 0 +fi +if [ "$1" = "link" ]; then + cat > "$NPM_PREFIX/bin/nemoclaw" <<'EOS' +#!/usr/bin/env bash +if [ "$1" = "onboard" ] || [ "$1" = "--version" ]; then + exit 0 +fi +exit 0 +EOS + chmod +x "$NPM_PREFIX/bin/nemoclaw" + exit 0 +fi +echo "unexpected npm invocation: $*" >&2 +exit 98 +`, + ); + + writeExecutable( + path.join(fakeBin, "docker"), + `#!/usr/bin/env bash +if [ "$1" = "info" ]; then + exit 0 +fi +exit 0 +`, + ); + + writeExecutable( + path.join(fakeBin, "openshell"), + `#!/usr/bin/env bash +if [ "$1" = "--version" ]; then + echo "openshell 0.0.9" + exit 0 +fi +exit 0 +`, + ); + + const result = spawnSync("bash", [INSTALLER], { + cwd: tmp, + encoding: "utf-8", + env: { + ...process.env, + HOME: tmp, + PATH: `${fakeBin}:${TEST_SYSTEM_PATH}`, + NEMOCLAW_NON_INTERACTIVE: "1", + NPM_PREFIX: prefix, + NVM_DIR: nvmDir, + }, + }); + + const output = `${result.stdout}${result.stderr}`; + expect(result.status).toBe(0); + expect(output).not.toMatch(/current shell cannot resolve 'nemoclaw'/); + expect(output).not.toMatch(/source .*\.bashrc|source .*\.zshrc|source .*\.profile/); + }); }); // --------------------------------------------------------------------------- diff --git a/test/nemoclaw-cli-recovery.test.js b/test/nemoclaw-cli-recovery.test.js new file mode 100644 index 000000000..f3ba3df0a --- /dev/null +++ b/test/nemoclaw-cli-recovery.test.js @@ -0,0 +1,105 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import assert from "node:assert/strict"; +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { describe, it } from "vitest"; + +describe("nemoclaw CLI runtime recovery", () => { + it("recovers sandbox status when openshell is only available via the resolved fallback path", () => { + const repoRoot = path.join(import.meta.dirname, ".."); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-cli-recovery-")); + const homeLocalBin = path.join(tmpDir, ".local", "bin"); + const stateDir = path.join(tmpDir, "state"); + const registryDir = path.join(tmpDir, ".nemoclaw"); + const openshellPath = path.join(homeLocalBin, "openshell"); + const stateFile = path.join(stateDir, "openshell-state.json"); + + fs.mkdirSync(homeLocalBin, { recursive: true }); + fs.mkdirSync(stateDir, { recursive: true }); + fs.mkdirSync(registryDir, { recursive: true }); + fs.writeFileSync( + path.join(registryDir, "sandboxes.json"), + JSON.stringify({ + defaultSandbox: "my-assistant", + sandboxes: { + "my-assistant": { + name: "my-assistant", + model: "nvidia/nemotron-3-super-120b-a12b", + provider: "nvidia-prod", + gpuEnabled: false, + policies: [], + }, + }, + }), + { mode: 0o600 } + ); + fs.writeFileSync(stateFile, JSON.stringify({ statusCalls: 0, sandboxGetCalls: 0 })); + fs.writeFileSync( + openshellPath, + `#!${process.execPath} +const fs = require("fs"); +const path = require("path"); +const statePath = ${JSON.stringify(stateFile)}; +const args = process.argv.slice(2); +const state = JSON.parse(fs.readFileSync(statePath, "utf8")); + +if (args[0] === "status") { + state.statusCalls += 1; + fs.writeFileSync(statePath, JSON.stringify(state)); + if (state.statusCalls === 1) { + process.stdout.write("Error: × No active gateway\\n"); + } else { + process.stdout.write("Gateway: nemoclaw\\nStatus: Connected\\n"); + } + process.exit(0); +} + +if (args[0] === "gateway" && (args[1] === "start" || args[1] === "select")) { + fs.writeFileSync(statePath, JSON.stringify(state)); + process.exit(0); +} + +if (args[0] === "gateway" && args[1] === "info") { + process.stdout.write("Gateway: nemoclaw\\nGateway endpoint: https://127.0.0.1:8080\\n"); + process.exit(0); +} + +if (args[0] === "sandbox" && args[1] === "get" && args[2] === "my-assistant") { + state.sandboxGetCalls += 1; + fs.writeFileSync(statePath, JSON.stringify(state)); + if (state.sandboxGetCalls === 1) { + process.stdout.write("Error: × transport error\\n ╰─▶ Connection reset by peer (os error 104)\\n"); + process.exit(1); + } + process.stdout.write("Sandbox:\\n\\n Id: abc\\n Name: my-assistant\\n Namespace: openshell\\n Phase: Ready\\n"); + process.exit(0); +} + +if (args[0] === "logs") { + process.exit(0); +} + +process.exit(0); +`, + { mode: 0o755 } + ); + + const result = spawnSync(process.execPath, [path.join(repoRoot, "bin", "nemoclaw.js"), "my-assistant", "status"], { + cwd: repoRoot, + encoding: "utf-8", + env: { + ...process.env, + HOME: tmpDir, + PATH: "/usr/bin:/bin", + }, + }); + + assert.equal(result.status, 0, result.stderr); + assert.match(result.stdout, /Recovered NemoClaw gateway runtime via (start|select)/); + assert.match(result.stdout, /Phase: Ready/); + }); +}); diff --git a/test/onboard-session.test.js b/test/onboard-session.test.js new file mode 100644 index 000000000..08dc5d30f --- /dev/null +++ b/test/onboard-session.test.js @@ -0,0 +1,222 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { createRequire } from "node:module"; + +const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-session-")); +const require = createRequire(import.meta.url); +const modulePath = require.resolve("../bin/lib/onboard-session"); +const originalHome = process.env.HOME; +let session; + +beforeEach(() => { + process.env.HOME = tmpDir; + delete require.cache[modulePath]; + session = require("../bin/lib/onboard-session"); + session.clearSession(); + session.releaseOnboardLock(); +}); + +afterEach(() => { + delete require.cache[modulePath]; + if (originalHome === undefined) { + delete process.env.HOME; + } else { + process.env.HOME = originalHome; + } +}); + +describe("onboard session", () => { + it("starts empty", () => { + expect(session.loadSession()).toBeNull(); + }); + + it("creates and persists a session with restrictive permissions", () => { + const created = session.createSession({ mode: "non-interactive" }); + const saved = session.saveSession(created); + const stat = fs.statSync(session.SESSION_FILE); + const dirStat = fs.statSync(path.dirname(session.SESSION_FILE)); + + expect(saved.mode).toBe("non-interactive"); + expect(fs.existsSync(session.SESSION_FILE)).toBe(true); + expect(stat.mode & 0o777).toBe(0o600); + expect(dirStat.mode & 0o777).toBe(0o700); + }); + + it("redacts credential-bearing endpoint URLs before persisting them", () => { + session.saveSession(session.createSession()); + session.markStepComplete("provider_selection", { + endpointUrl: + "https://alice:secret@example.com/v1/models?token=abc123&sig=def456&X-Amz-Signature=ghi789&keep=yes#token=frag", + }); + + const loaded = session.loadSession(); + expect(loaded.endpointUrl).toBe( + "https://example.com/v1/models?token=%3CREDACTED%3E&sig=%3CREDACTED%3E&X-Amz-Signature=%3CREDACTED%3E&keep=yes" + ); + expect(session.summarizeForDebug().endpointUrl).toBe(loaded.endpointUrl); + }); + + it("marks steps started, completed, and failed", () => { + session.saveSession(session.createSession()); + session.markStepStarted("gateway"); + let loaded = session.loadSession(); + expect(loaded.steps.gateway.status).toBe("in_progress"); + expect(loaded.lastStepStarted).toBe("gateway"); + expect(loaded.steps.gateway.completedAt).toBeNull(); + + session.markStepComplete("gateway", { sandboxName: "my-assistant" }); + loaded = session.loadSession(); + expect(loaded.steps.gateway.status).toBe("complete"); + expect(loaded.sandboxName).toBe("my-assistant"); + expect(loaded.steps.gateway.completedAt).toBeTruthy(); + + session.markStepFailed("sandbox", "Sandbox creation failed"); + loaded = session.loadSession(); + expect(loaded.steps.sandbox.status).toBe("failed"); + expect(loaded.steps.sandbox.completedAt).toBeNull(); + expect(loaded.failure.step).toBe("sandbox"); + expect(loaded.failure.message).toMatch(/Sandbox creation failed/); + }); + + it("persists safe provider metadata without persisting secrets", () => { + session.saveSession(session.createSession()); + session.markStepComplete("provider_selection", { + provider: "nvidia-nim", + model: "nvidia/test-model", + sandboxName: "my-assistant", + endpointUrl: "https://example.com/v1", + credentialEnv: "NVIDIA_API_KEY", + preferredInferenceApi: "openai-completions", + nimContainer: "nim-123", + policyPresets: ["pypi", "npm"], + apiKey: "nvapi-secret", + metadata: { + gatewayName: "nemoclaw", + token: "secret", + }, + }); + + const loaded = session.loadSession(); + expect(loaded.provider).toBe("nvidia-nim"); + expect(loaded.model).toBe("nvidia/test-model"); + expect(loaded.sandboxName).toBe("my-assistant"); + expect(loaded.endpointUrl).toBe("https://example.com/v1"); + expect(loaded.credentialEnv).toBe("NVIDIA_API_KEY"); + expect(loaded.preferredInferenceApi).toBe("openai-completions"); + expect(loaded.nimContainer).toBe("nim-123"); + expect(loaded.policyPresets).toEqual(["pypi", "npm"]); + expect(loaded.apiKey).toBeUndefined(); + expect(loaded.metadata.gatewayName).toBe("nemoclaw"); + expect(loaded.metadata.token).toBeUndefined(); + }); + + it("does not clear existing metadata when updates omit whitelisted metadata fields", () => { + session.saveSession(session.createSession({ metadata: { gatewayName: "nemoclaw" } })); + session.markStepComplete("provider_selection", { + metadata: { + token: "should-not-persist", + }, + }); + + const loaded = session.loadSession(); + expect(loaded.metadata.gatewayName).toBe("nemoclaw"); + expect(loaded.metadata.token).toBeUndefined(); + }); + + it("returns null for corrupt session data", () => { + fs.mkdirSync(path.dirname(session.SESSION_FILE), { recursive: true }); + fs.writeFileSync(session.SESSION_FILE, "not-json"); + expect(session.loadSession()).toBeNull(); + }); + + it("acquires and releases the onboard lock", () => { + const acquired = session.acquireOnboardLock("nemoclaw onboard"); + expect(acquired.acquired).toBe(true); + expect(fs.existsSync(session.LOCK_FILE)).toBe(true); + + const secondAttempt = session.acquireOnboardLock("nemoclaw onboard --resume"); + expect(secondAttempt.acquired).toBe(false); + expect(secondAttempt.holderPid).toBe(process.pid); + + session.releaseOnboardLock(); + expect(fs.existsSync(session.LOCK_FILE)).toBe(false); + }); + + it("replaces a stale onboard lock", () => { + fs.mkdirSync(path.dirname(session.LOCK_FILE), { recursive: true }); + fs.writeFileSync( + session.LOCK_FILE, + JSON.stringify({ pid: 999999, startedAt: "2026-03-25T00:00:00.000Z", command: "nemoclaw onboard" }), + { mode: 0o600 } + ); + + const acquired = session.acquireOnboardLock("nemoclaw onboard --resume"); + expect(acquired.acquired).toBe(true); + + const written = JSON.parse(fs.readFileSync(session.LOCK_FILE, "utf8")); + expect(written.pid).toBe(process.pid); + }); + + it("treats unreadable or transient lock contents as a retry, not a stale lock", () => { + fs.mkdirSync(path.dirname(session.LOCK_FILE), { recursive: true }); + fs.writeFileSync(session.LOCK_FILE, "{not-json", { mode: 0o600 }); + + const acquired = session.acquireOnboardLock("nemoclaw onboard --resume"); + expect(acquired.acquired).toBe(false); + expect(acquired.stale).toBe(true); + expect(fs.existsSync(session.LOCK_FILE)).toBe(true); + }); + + it("ignores malformed lock files when releasing the onboard lock", () => { + fs.mkdirSync(path.dirname(session.LOCK_FILE), { recursive: true }); + fs.writeFileSync(session.LOCK_FILE, "{not-json", { mode: 0o600 }); + + session.releaseOnboardLock(); + expect(fs.existsSync(session.LOCK_FILE)).toBe(true); + }); + + it("redacts sensitive values from persisted failure messages", () => { + session.saveSession(session.createSession()); + session.markStepFailed( + "inference", + "provider auth failed with NVIDIA_API_KEY=nvapi-secret Bearer topsecret sk-secret-value ghp_1234567890123456789012345" + ); + + const loaded = session.loadSession(); + expect(loaded.steps.inference.error).toContain("NVIDIA_API_KEY="); + expect(loaded.steps.inference.error).toContain("Bearer "); + expect(loaded.steps.inference.error).not.toContain("nvapi-secret"); + expect(loaded.steps.inference.error).not.toContain("topsecret"); + expect(loaded.steps.inference.error).not.toContain("sk-secret-value"); + expect(loaded.steps.inference.error).not.toContain("ghp_1234567890123456789012345"); + expect(loaded.failure.message).toBe(loaded.steps.inference.error); + }); + + it("summarizes the session for debug output", () => { + session.saveSession(session.createSession({ sandboxName: "my-assistant" })); + session.markStepStarted("preflight"); + session.markStepComplete("preflight"); + session.completeSession(); + const summary = session.summarizeForDebug(); + + expect(summary.sandboxName).toBe("my-assistant"); + expect(summary.steps.preflight.status).toBe("complete"); + expect(summary.steps.preflight.startedAt).toBeTruthy(); + expect(summary.steps.preflight.completedAt).toBeTruthy(); + expect(summary.resumable).toBe(false); + }); + + it("keeps debug summaries redacted when failures were sanitized", () => { + session.saveSession(session.createSession({ sandboxName: "my-assistant" })); + session.markStepFailed("provider_selection", "Bearer abcdefghijklmnopqrstuvwxyz"); + const summary = session.summarizeForDebug(); + + expect(summary.failure.message).toContain("Bearer "); + expect(summary.failure.message).not.toContain("abcdefghijklmnopqrstuvwxyz"); + }); +}); diff --git a/test/onboard.test.js b/test/onboard.test.js index 8a8046b52..16b7e5453 100644 --- a/test/onboard.test.js +++ b/test/onboard.test.js @@ -10,16 +10,60 @@ import { describe, expect, it } from "vitest"; import { buildSandboxConfigSyncScript, + classifySandboxCreateFailure, + getGatewayReuseState, getFutureShellPathHint, - getInstalledOpenshellVersion, - isGatewayHealthy, getSandboxInferenceConfig, + getInstalledOpenshellVersion, + getRequestedModelHint, + getRequestedProviderHint, + getRequestedSandboxNameHint, + getResumeConfigConflicts, + getResumeSandboxConflict, + getSandboxStateFromOutputs, getStableGatewayImageRef, + isGatewayHealthy, patchStagedDockerfile, + printSandboxCreateRecoveryHints, + shouldIncludeBuildContextPath, writeSandboxConfigSyncFile, } from "../bin/lib/onboard"; describe("onboard helpers", () => { + it("classifies sandbox create timeout failures and tracks upload progress", () => { + expect( + classifySandboxCreateFailure("Error: failed to read image export stream\nTimeout error").kind + ).toBe("image_transfer_timeout"); + expect( + classifySandboxCreateFailure( + [ + " Pushing image openshell/sandbox-from:123 into gateway \"nemoclaw\"", + " [progress] Uploaded to gateway", + "Error: failed to read image export stream", + ].join("\n") + ) + ).toEqual({ + kind: "image_transfer_timeout", + uploadedToGateway: true, + }); + }); + + it("classifies sandbox create connection resets and incomplete create streams", () => { + expect(classifySandboxCreateFailure("Connection reset by peer").kind).toBe("image_transfer_reset"); + expect( + classifySandboxCreateFailure( + [ + " Image openshell/sandbox-from:123 is available in the gateway.", + "Created sandbox: my-assistant", + "Error: stream closed unexpectedly", + ].join("\n") + ) + ).toEqual({ + kind: "sandbox_create_incomplete", + uploadedToGateway: true, + }); + }); + it("builds a sandbox sync script that only writes nemoclaw config", () => { const script = buildSandboxConfigSyncScript({ endpointType: "custom", @@ -153,33 +197,222 @@ describe("onboard helpers", () => { expect(getStableGatewayImageRef("bogus")).toBe(null); }); - it("recognizes only a connected named NemoClaw gateway as healthy", () => { + it("treats the gateway as healthy only when nemoclaw is running and connected", () => { expect( isGatewayHealthy( - "Server Status\n\n Gateway: nemoclaw\n Status: Connected", + "Gateway status: Connected\nGateway: nemoclaw", + "Gateway Info\n\n Gateway: nemoclaw\n Gateway endpoint: https://127.0.0.1:8080", "Gateway Info\n\n Gateway: nemoclaw\n Gateway endpoint: https://127.0.0.1:8080" ) ).toBe(true); expect( isGatewayHealthy( - "Server Status\n\n Gateway: openshell\n Status: Connected", - "Gateway Info\n\n Gateway: nemoclaw\n Gateway endpoint: https://127.0.0.1:8080" + "\u001b[1mServer Status\u001b[0m\n\n Gateway: openshell\n Server: https://127.0.0.1:8080\n Status: Connected", + "Error: × No gateway metadata found for 'nemoclaw'.", + "Gateway Info\n\n Gateway: openshell\n Gateway endpoint: https://127.0.0.1:8080" ) ).toBe(false); expect( isGatewayHealthy( "Server Status\n\n Gateway: openshell\n Status: Connected", - "Error: no gateway metadata found" + "Gateway Info\n\n Gateway: nemoclaw\n Gateway endpoint: https://127.0.0.1:8080", + "Gateway Info\n\n Gateway: openshell\n Gateway endpoint: https://127.0.0.1:8080" ) ).toBe(false); + expect(isGatewayHealthy("Gateway status: Disconnected", "Gateway: nemoclaw")).toBe(false); + expect(isGatewayHealthy("Gateway status: Connected", "Gateway: something-else")).toBe(false); + }); + + it("classifies gateway reuse states conservatively", () => { expect( - isGatewayHealthy( - "Server Status\n\n Gateway: nemoclaw\n Status: Disconnected", + getGatewayReuseState( + "Gateway status: Connected\nGateway: nemoclaw", + "Gateway Info\n\n Gateway: nemoclaw\n Gateway endpoint: https://127.0.0.1:8080", + "Gateway Info\n\n Gateway: nemoclaw\n Gateway endpoint: https://127.0.0.1:8080" + ) + ).toBe("healthy"); + expect( + getGatewayReuseState( + "Gateway status: Connected", + "Error: × No gateway metadata found for 'nemoclaw'.", + "Gateway Info\n\n Gateway: openshell\n Gateway endpoint: https://127.0.0.1:8080" + ) + ).toBe("foreign-active"); + expect( + getGatewayReuseState( + "Server Status\n\n Gateway: openshell\n Status: Connected", + "Gateway Info\n\n Gateway: nemoclaw\n Gateway endpoint: https://127.0.0.1:8080", + "Gateway Info\n\n Gateway: openshell\n Gateway endpoint: https://127.0.0.1:8080" + ) + ).toBe("foreign-active"); + expect( + getGatewayReuseState( + "Gateway status: Disconnected", "Gateway Info\n\n Gateway: nemoclaw\n Gateway endpoint: https://127.0.0.1:8080" ) + ).toBe("stale"); + expect( + getGatewayReuseState( + "Gateway status: Connected\nGateway: nemoclaw", + "", + "Gateway Info\n\n Gateway: nemoclaw\n Gateway endpoint: https://127.0.0.1:8080" + ) + ).toBe("active-unnamed"); + expect( + getGatewayReuseState( + "Gateway status: Connected", + "", + "Gateway Info\n\n Gateway: openshell\n Gateway endpoint: https://127.0.0.1:8080" + ) + ).toBe("foreign-active"); + expect(getGatewayReuseState("", "")).toBe("missing"); + }); + + it("classifies sandbox reuse states from openshell outputs", () => { + expect( + getSandboxStateFromOutputs( + "my-assistant", + "Name: my-assistant", + "my-assistant Ready 2m ago" + ) + ).toBe("ready"); + expect( + getSandboxStateFromOutputs( + "my-assistant", + "Name: my-assistant", + "my-assistant NotReady init failed" + ) + ).toBe("not_ready"); + expect(getSandboxStateFromOutputs("my-assistant", "", "")).toBe("missing"); + }); + + it("filters local-only artifacts out of the sandbox build context", () => { + expect( + shouldIncludeBuildContextPath( + "/repo/nemoclaw-blueprint", + "/repo/nemoclaw-blueprint/orchestrator/main.py" + ) + ).toBe(true); + expect( + shouldIncludeBuildContextPath( + "/repo/nemoclaw-blueprint", + "/repo/nemoclaw-blueprint/.venv/bin/python" + ) + ).toBe(false); + expect( + shouldIncludeBuildContextPath( + "/repo/nemoclaw-blueprint", + "/repo/nemoclaw-blueprint/.ruff_cache/cache" + ) + ).toBe(false); + expect( + shouldIncludeBuildContextPath( + "/repo/nemoclaw-blueprint", + "/repo/nemoclaw-blueprint/._pyvenv.cfg" + ) ).toBe(false); }); + it("normalizes sandbox name hints from the environment", () => { + const previous = process.env.NEMOCLAW_SANDBOX_NAME; + process.env.NEMOCLAW_SANDBOX_NAME = " My-Assistant "; + try { + expect(getRequestedSandboxNameHint()).toBe("my-assistant"); + } finally { + if (previous === undefined) { + delete process.env.NEMOCLAW_SANDBOX_NAME; + } else { + process.env.NEMOCLAW_SANDBOX_NAME = previous; + } + } + }); + + it("detects resume conflicts when a different sandbox is requested", () => { + const previous = process.env.NEMOCLAW_SANDBOX_NAME; + process.env.NEMOCLAW_SANDBOX_NAME = "other-sandbox"; + try { + expect(getResumeSandboxConflict({ sandboxName: "my-assistant" })).toEqual({ + requestedSandboxName: "other-sandbox", + recordedSandboxName: "my-assistant", + }); + expect(getResumeSandboxConflict({ sandboxName: "other-sandbox" })).toBe(null); + } finally { + if (previous === undefined) { + delete process.env.NEMOCLAW_SANDBOX_NAME; + } else { + process.env.NEMOCLAW_SANDBOX_NAME = previous; + } + } + }); + + it("returns provider and model hints only for non-interactive runs", () => { + const previousProvider = process.env.NEMOCLAW_PROVIDER; + const previousModel = process.env.NEMOCLAW_MODEL; + process.env.NEMOCLAW_PROVIDER = "cloud"; + process.env.NEMOCLAW_MODEL = "nvidia/test-model"; + try { + expect(getRequestedProviderHint(true)).toBe("build"); + expect(getRequestedModelHint(true)).toBe("nvidia/test-model"); + expect(getRequestedProviderHint(false)).toBe(null); + expect(getRequestedModelHint(false)).toBe(null); + } finally { + if (previousProvider === undefined) { + delete process.env.NEMOCLAW_PROVIDER; + } else { + process.env.NEMOCLAW_PROVIDER = previousProvider; + } + if (previousModel === undefined) { + delete process.env.NEMOCLAW_MODEL; + } else { + process.env.NEMOCLAW_MODEL = previousModel; + } + } + }); + + it("detects resume conflicts for explicit provider and model changes", () => { + const previousProvider = process.env.NEMOCLAW_PROVIDER; + const previousModel = process.env.NEMOCLAW_MODEL; + process.env.NEMOCLAW_PROVIDER = "cloud"; + process.env.NEMOCLAW_MODEL = "nvidia/other-model"; + try { + // Provider conflict uses a two-stage alias chain in non-interactive mode: + // "cloud" first resolves to the requested hint, then that hint resolves + // to the effective provider name "nvidia-prod" for conflict comparison. + expect( + getResumeConfigConflicts( + { + sandboxName: "my-assistant", + provider: "nvidia-nim", + model: "nvidia/nemotron-3-super-120b-a12b", + }, + { nonInteractive: true } + ) + ).toEqual([ + { + field: "provider", + requested: "nvidia-prod", + recorded: "nvidia-nim", + }, + { + field: "model", + requested: "nvidia/other-model", + recorded: "nvidia/nemotron-3-super-120b-a12b", + }, + ]); + } finally { + if (previousProvider === undefined) { + delete process.env.NEMOCLAW_PROVIDER; + } else { + process.env.NEMOCLAW_PROVIDER = previousProvider; + } + if (previousModel === undefined) { + delete process.env.NEMOCLAW_MODEL; + } else { + process.env.NEMOCLAW_MODEL = previousModel; + } + } + }); + it("returns a future-shell PATH hint for user-local openshell installs", () => { expect(getFutureShellPathHint("/home/test/.local/bin", "/usr/local/bin:/usr/bin")).toBe( 'export PATH="/home/test/.local/bin:$PATH"' @@ -273,6 +506,175 @@ const { setupInference } = require(${onboardPath}); assert.match(commands[2].command, /inference' 'set'/); }); + it("detects when the live inference route already matches the requested provider and model", () => { + const repoRoot = path.join(import.meta.dirname, ".."); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-inference-ready-")); + const fakeOpenshell = path.join(tmpDir, "openshell"); + const scriptPath = path.join(tmpDir, "inference-ready-check.js"); + const onboardPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "onboard.js")); + + fs.writeFileSync( + fakeOpenshell, + `#!/usr/bin/env bash +if [ "$1" = "inference" ] && [ "$2" = "get" ]; then + cat <<'EOF' +Gateway inference: + + Route: inference.local + Provider: nvidia-prod + Model: nvidia/nemotron-3-super-120b-a12b + Version: 1 +EOF + exit 0 +fi +exit 1 +`, + { mode: 0o755 } + ); + + fs.writeFileSync( + scriptPath, + ` +const { isInferenceRouteReady } = require(${onboardPath}); +console.log(JSON.stringify({ + same: isInferenceRouteReady("nvidia-prod", "nvidia/nemotron-3-super-120b-a12b"), + otherModel: isInferenceRouteReady("nvidia-prod", "nvidia/other-model"), + otherProvider: isInferenceRouteReady("openai-api", "nvidia/nemotron-3-super-120b-a12b"), +})); +` + ); + + const result = spawnSync(process.execPath, [scriptPath], { + cwd: repoRoot, + encoding: "utf-8", + env: { + ...process.env, + PATH: `${tmpDir}:${process.env.PATH || ""}`, + }, + }); + + try { + expect(result.status).toBe(0); + expect(JSON.parse(result.stdout.trim())).toEqual({ + same: true, + otherModel: false, + otherProvider: false, + }); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it("detects when OpenClaw is already configured inside the sandbox", () => { + const repoRoot = path.join(import.meta.dirname, ".."); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-openclaw-ready-")); + const fakeOpenshell = path.join(tmpDir, "openshell"); + const scriptPath = path.join(tmpDir, "openclaw-ready-check.js"); + const onboardPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "onboard.js")); + + fs.writeFileSync( + fakeOpenshell, + `#!/usr/bin/env bash +if [ "$1" = "sandbox" ] && [ "$2" = "download" ]; then + dest="\${@: -1}" + mkdir -p "$dest/sandbox/.openclaw" + cat > "$dest/sandbox/.openclaw/openclaw.json" <<'EOF' +{"gateway":{"auth":{"token":"test-token"}}} +EOF + exit 0 +fi +exit 1 +`, + { mode: 0o755 } + ); + + fs.writeFileSync( + scriptPath, + ` +const { isOpenclawReady } = require(${onboardPath}); +console.log(JSON.stringify({ + ready: isOpenclawReady("my-assistant"), +})); +` + ); + + const result = spawnSync(process.execPath, [scriptPath], { + cwd: repoRoot, + encoding: "utf-8", + env: { + ...process.env, + PATH: `${tmpDir}:${process.env.PATH || ""}`, + }, + }); + + try { + expect(result.status).toBe(0); + expect(JSON.parse(result.stdout.trim())).toEqual({ ready: true }); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it("detects when recorded policy presets are already applied", () => { + const repoRoot = path.join(import.meta.dirname, ".."); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-policy-ready-")); + const registryDir = path.join(tmpDir, ".nemoclaw"); + const registryFile = path.join(registryDir, "sandboxes.json"); + const scriptPath = path.join(tmpDir, "policy-ready-check.js"); + const onboardPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "onboard.js")); + + fs.mkdirSync(registryDir, { recursive: true }); + fs.writeFileSync( + registryFile, + JSON.stringify( + { + sandboxes: { + "my-assistant": { + name: "my-assistant", + policies: ["pypi", "npm"], + }, + }, + defaultSandbox: "my-assistant", + }, + null, + 2 + ) + ); + + fs.writeFileSync( + scriptPath, + ` +const { arePolicyPresetsApplied } = require(${onboardPath}); +console.log(JSON.stringify({ + ready: arePolicyPresetsApplied("my-assistant", ["pypi", "npm"]), + missing: arePolicyPresetsApplied("my-assistant", ["pypi", "slack"]), + empty: arePolicyPresetsApplied("my-assistant", []), +})); +` + ); + + const result = spawnSync(process.execPath, [scriptPath], { + cwd: repoRoot, + encoding: "utf-8", + env: { + ...process.env, + HOME: tmpDir, + }, + }); + + try { + expect(result.status).toBe(0); + const payload = JSON.parse(result.stdout.trim()); + expect(payload).toEqual({ + ready: true, + missing: false, + empty: false, + }); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + it("uses native Anthropic provider creation without embedding the secret in argv", () => { const repoRoot = path.join(import.meta.dirname, ".."); const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-anthropic-")); @@ -415,29 +817,56 @@ const { setupInference } = require(${onboardPath}); assert.match(commands[3].command, /inference' 'set' '--no-verify'/); }); - it("drops stale local sandbox registry entries when the live sandbox is gone", () => { + it("hydrates stored provider credentials when setupInference runs without process env set", () => { const repoRoot = path.join(import.meta.dirname, ".."); - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-stale-sandbox-")); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-resume-cred-")); const fakeBin = path.join(tmpDir, "bin"); - const scriptPath = path.join(tmpDir, "stale-sandbox-check.js"); + const scriptPath = path.join(tmpDir, "setup-resume-credential-check.js"); const onboardPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "onboard.js")); - const registryPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "registry.js")); const runnerPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "runner.js")); + const registryPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "registry.js")); + const credentialsPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "credentials.js")); fs.mkdirSync(fakeBin, { recursive: true }); fs.writeFileSync(path.join(fakeBin, "openshell"), "#!/usr/bin/env bash\nexit 0\n", { mode: 0o755 }); const script = String.raw` -const registry = require(${registryPath}); const runner = require(${runnerPath}); -runner.runCapture = (command) => (command.includes("'sandbox' 'get' 'my-assistant'") ? "" : ""); +const registry = require(${registryPath}); +const credentials = require(${credentialsPath}); -registry.registerSandbox({ name: "my-assistant" }); +const commands = []; +runner.run = (command, opts = {}) => { + commands.push({ command, env: opts.env || null }); + return { status: 0 }; +}; +runner.runCapture = (command) => { + if (command.includes("inference") && command.includes("get")) { + return [ + "Gateway inference:", + "", + " Route: inference.local", + " Provider: openai-api", + " Model: gpt-5.4", + " Version: 1", + ].join("\n"); + } + return ""; +}; +registry.updateSandbox = () => true; -const { pruneStaleSandboxEntry } = require(${onboardPath}); +credentials.saveCredential("OPENAI_API_KEY", "sk-stored-secret"); +delete process.env.OPENAI_API_KEY; -const liveExists = pruneStaleSandboxEntry("my-assistant"); -console.log(JSON.stringify({ liveExists, sandbox: registry.getSandbox("my-assistant") })); +const { setupInference } = require(${onboardPath}); + +(async () => { + await setupInference("test-box", "gpt-5.4", "openai-api", "https://api.openai.com/v1", "OPENAI_API_KEY"); + console.log(JSON.stringify({ commands, openai: process.env.OPENAI_API_KEY || null })); +})().catch((error) => { + console.error(error); + process.exit(1); +}); `; fs.writeFileSync(scriptPath, script); @@ -452,59 +881,35 @@ console.log(JSON.stringify({ liveExists, sandbox: registry.getSandbox("my-assist }); assert.equal(result.status, 0, result.stderr); - const payloadLine = result.stdout - .trim() - .split("\n") - .slice() - .reverse() - .find((line) => line.startsWith("{") && line.endsWith("}")); - assert.ok(payloadLine, `expected JSON payload in stdout:\n${result.stdout}`); - const payload = JSON.parse(payloadLine); - assert.equal(payload.liveExists, false); - assert.equal(payload.sandbox, null); + const payload = JSON.parse(result.stdout.trim().split("\n").pop()); + assert.equal(payload.openai, "sk-stored-secret"); + assert.equal(payload.commands[1].env.OPENAI_API_KEY, "sk-stored-secret"); + assert.doesNotMatch(payload.commands[1].command, /sk-stored-secret/); }); - it("reuses an existing healthy gateway instead of destroying it", () => { + it("drops stale local sandbox registry entries when the live sandbox is gone", () => { const repoRoot = path.join(import.meta.dirname, ".."); - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-gateway-reuse-")); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-stale-sandbox-")); const fakeBin = path.join(tmpDir, "bin"); - const scriptPath = path.join(tmpDir, "gateway-reuse-check.js"); + const scriptPath = path.join(tmpDir, "stale-sandbox-check.js"); const onboardPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "onboard.js")); + const registryPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "registry.js")); const runnerPath = JSON.stringify(path.join(repoRoot, "bin", "lib", "runner.js")); fs.mkdirSync(fakeBin, { recursive: true }); fs.writeFileSync(path.join(fakeBin, "openshell"), "#!/usr/bin/env bash\nexit 0\n", { mode: 0o755 }); const script = String.raw` +const registry = require(${registryPath}); const runner = require(${runnerPath}); -const commands = []; +runner.runCapture = (command) => (command.includes("'sandbox' 'get' 'my-assistant'") ? "" : ""); -runner.run = (command, opts = {}) => { - commands.push(command); - return { status: 0 }; -}; -runner.runCapture = (command) => { - if (command.includes("'status'")) { - return "Server Status\n\n Gateway: nemoclaw\n Status: Connected"; - } - if (command.includes("'gateway' 'info' '-g' 'nemoclaw'")) { - return "Gateway Info\n\n Gateway: nemoclaw\n Gateway endpoint: https://127.0.0.1:8080"; - } - if (command.includes("'--version'")) { - return "openshell 0.0.12"; - } - return ""; -}; +registry.registerSandbox({ name: "my-assistant" }); -const { startGateway } = require(${onboardPath}); +const { pruneStaleSandboxEntry } = require(${onboardPath}); -(async () => { - await startGateway(null); - console.log(JSON.stringify(commands)); -})().catch((error) => { - console.error(error); - process.exit(1); -}); +const liveExists = pruneStaleSandboxEntry("my-assistant"); +console.log(JSON.stringify({ liveExists, sandbox: registry.getSandbox("my-assistant") })); `; fs.writeFileSync(scriptPath, script); @@ -519,11 +924,16 @@ const { startGateway } = require(${onboardPath}); }); assert.equal(result.status, 0, result.stderr); - const commands = JSON.parse(result.stdout.trim().split("\n").pop()); - assert.equal(commands.length, 1); - assert.match(commands[0], /gateway' 'select' 'nemoclaw'/); - assert.doesNotMatch(commands[0], /gateway' 'destroy'/); - assert.doesNotMatch(commands[0], /gateway' 'start'/); + const payloadLine = result.stdout + .trim() + .split("\n") + .slice() + .reverse() + .find((line) => line.startsWith("{") && line.endsWith("}")); + assert.ok(payloadLine, `expected JSON payload in stdout:\n${result.stdout}`); + const payload = JSON.parse(payloadLine); + assert.equal(payload.liveExists, false); + assert.equal(payload.sandbox, null); }); it("builds the sandbox without uploading an external OpenClaw config file", async () => { @@ -737,6 +1147,54 @@ const { createSandbox } = require(${onboardPath}); assert.equal(payload.stderrDestroyCalls, 1); }); + it("prints resume guidance when sandbox image upload times out", () => { + const errors = []; + const originalError = console.error; + console.error = (...args) => errors.push(args.join(" ")); + try { + printSandboxCreateRecoveryHints( + [ + " Pushing image openshell/sandbox-from:123 into gateway nemoclaw", + " [progress] Uploaded to gateway", + "Error: failed to read image export stream", + "Timeout error", + ].join("\n") + ); + } finally { + console.error = originalError; + } + + const joined = errors.join("\n"); + assert.match(joined, /Hint: image upload into the OpenShell gateway timed out\./); + assert.match(joined, /Recovery: nemoclaw onboard --resume/); + assert.match( + joined, + /Progress reached the gateway upload stage, so resume may be able to reuse existing gateway state\./ + ); + }); + + it("prints resume guidance when sandbox image upload resets after transfer progress", () => { + const errors = []; + const originalError = console.error; + console.error = (...args) => errors.push(args.join(" ")); + try { + printSandboxCreateRecoveryHints( + [ + " Pushing image openshell/sandbox-from:123 into gateway nemoclaw", + " [progress] Uploaded to gateway", + "Error: Connection reset by peer", + ].join("\n") + ); + } finally { + console.error = originalError; + } + + const joined = errors.join("\n"); + assert.match(joined, /Hint: the image push\/import stream was interrupted\./); + assert.match(joined, /Recovery: nemoclaw onboard --resume/); + assert.match(joined, /The image appears to have reached the gateway before the stream failed\./); + }); + it("accepts gateway inference when system inference is separately not configured", () => { const repoRoot = path.join(import.meta.dirname, ".."); const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-inference-get-")); diff --git a/test/runtime-recovery.test.js b/test/runtime-recovery.test.js new file mode 100644 index 000000000..b6870877c --- /dev/null +++ b/test/runtime-recovery.test.js @@ -0,0 +1,74 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; + +import { + classifyGatewayStatus, + classifySandboxLookup, + parseLiveSandboxNames, + shouldAttemptGatewayRecovery, +} from "../bin/lib/runtime-recovery"; + +describe("runtime recovery helpers", () => { + it("parses live sandbox names from openshell sandbox list output", () => { + expect( + Array.from( + parseLiveSandboxNames( + [ + "NAME NAMESPACE CREATED PHASE", + "alpha openshell 2026-03-24 10:00:00 Ready", + "beta openshell 2026-03-24 10:01:00 Provisioning", + ].join("\n") + ) + ) + ).toEqual(["alpha", "beta"]); + }); + + it("treats no-sandboxes output as an empty set", () => { + expect(Array.from(parseLiveSandboxNames("No sandboxes found."))).toEqual([]); + }); + + it("classifies missing sandbox lookups", () => { + expect(classifySandboxLookup('Error: × status: NotFound, message: "sandbox not found"').state).toBe("missing"); + expect(classifySandboxLookup("").state).toBe("missing"); + }); + + it("classifies transport and gateway failures as unavailable", () => { + expect(classifySandboxLookup("Error: × transport error\n ╰─▶ Connection reset by peer (os error 104)").state).toBe( + "unavailable" + ); + expect(classifySandboxLookup("Error: × client error (Connect)\n ╰─▶ Connection refused (os error 111)").state).toBe( + "unavailable" + ); + }); + + it("classifies successful sandbox lookups as present", () => { + expect( + classifySandboxLookup( + [ + "Sandbox:", + "", + " Id: abc", + " Name: my-assistant", + " Namespace: openshell", + " Phase: Ready", + ].join("\n") + ).state + ).toBe("present"); + }); + + it("classifies gateway status output for restart recovery", () => { + expect(classifyGatewayStatus("Gateway: nemoclaw\nStatus: Connected").state).toBe("connected"); + expect(classifyGatewayStatus("Error: × No active gateway").state).toBe("unavailable"); + expect(classifyGatewayStatus("").state).toBe("inactive"); + }); + + it("only attempts gateway recovery when sandbox access is unavailable and gateway is down", () => { + expect(shouldAttemptGatewayRecovery({ sandboxState: "unavailable", gatewayState: "unavailable" })).toBe(true); + expect(shouldAttemptGatewayRecovery({ sandboxState: "unavailable", gatewayState: "inactive" })).toBe(true); + expect(shouldAttemptGatewayRecovery({ sandboxState: "present", gatewayState: "unavailable" })).toBe(false); + expect(shouldAttemptGatewayRecovery({ sandboxState: "missing", gatewayState: "inactive" })).toBe(false); + expect(shouldAttemptGatewayRecovery({ sandboxState: "unavailable", gatewayState: "connected" })).toBe(false); + }); +}); diff --git a/test/uninstall.test.js b/test/uninstall.test.js index 60e7e977e..cd0178638 100644 --- a/test/uninstall.test.js +++ b/test/uninstall.test.js @@ -9,6 +9,21 @@ import { spawnSync } from "node:child_process"; const UNINSTALL_SCRIPT = path.join(import.meta.dirname, "..", "uninstall.sh"); +function createFakeNpmEnv(tmp) { + const fakeBin = path.join(tmp, "bin"); + const npmPath = path.join(fakeBin, "npm"); + fs.mkdirSync(fakeBin, { recursive: true }); + fs.writeFileSync( + npmPath, + "#!/usr/bin/env bash\nexit 0\n", + { mode: 0o755 } + ); + return { + ...process.env, + PATH: `${fakeBin}:${process.env.PATH || "/usr/bin:/bin"}`, + }; +} + describe("uninstall CLI flags", () => { it("--help exits 0 and shows usage", () => { const result = spawnSync("bash", [UNINSTALL_SCRIPT, "--help"], { @@ -76,8 +91,11 @@ describe("uninstall helpers", () => { const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-uninstall-shim-")); const shimDir = path.join(tmp, ".local", "bin"); const shimPath = path.join(shimDir, "nemoclaw"); + const targetPath = path.join(tmp, "prefix", "bin", "nemoclaw"); fs.mkdirSync(shimDir, { recursive: true }); - fs.writeFileSync(shimPath, "#!/usr/bin/env bash\n", { mode: 0o755 }); + fs.mkdirSync(path.dirname(targetPath), { recursive: true }); + fs.writeFileSync(targetPath, "#!/usr/bin/env bash\n", { mode: 0o755 }); + fs.symlinkSync(targetPath, shimPath); const result = spawnSync( "bash", @@ -85,10 +103,54 @@ describe("uninstall helpers", () => { { cwd: path.join(import.meta.dirname, ".."), encoding: "utf-8", + env: createFakeNpmEnv(tmp), }, ); expect(result.status).toBe(0); expect(fs.existsSync(shimPath)).toBe(false); }); + + it("preserves a user-managed nemoclaw file in the shim directory", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-uninstall-preserve-")); + const shimDir = path.join(tmp, ".local", "bin"); + const shimPath = path.join(shimDir, "nemoclaw"); + fs.mkdirSync(shimDir, { recursive: true }); + fs.writeFileSync(shimPath, "#!/usr/bin/env bash\n", { mode: 0o755 }); + + const result = spawnSync( + "bash", + ["-lc", `HOME="${tmp}" source "${UNINSTALL_SCRIPT}"; remove_nemoclaw_cli`], + { + cwd: path.join(import.meta.dirname, ".."), + encoding: "utf-8", + env: createFakeNpmEnv(tmp), + }, + ); + + expect(result.status).toBe(0); + expect(fs.existsSync(shimPath)).toBe(true); + expect(`${result.stdout}${result.stderr}`).toMatch(/not an installer-managed shim/); + }); + + it("removes the onboard session file as part of NemoClaw state cleanup", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-uninstall-session-")); + const stateDir = path.join(tmp, ".nemoclaw"); + const sessionPath = path.join(stateDir, "onboard-session.json"); + fs.mkdirSync(stateDir, { recursive: true }); + fs.writeFileSync(sessionPath, JSON.stringify({ status: "complete" })); + + const result = spawnSync( + "bash", + ["-lc", `HOME="${tmp}" source "${UNINSTALL_SCRIPT}"; remove_nemoclaw_state`], + { + cwd: path.join(import.meta.dirname, ".."), + encoding: "utf-8", + }, + ); + + expect(result.status).toBe(0); + expect(fs.existsSync(sessionPath)).toBe(false); + expect(fs.existsSync(stateDir)).toBe(false); + }); }); diff --git a/uninstall.sh b/uninstall.sh index 42aa4e3de..c8bf4d4d6 100755 --- a/uninstall.sh +++ b/uninstall.sh @@ -7,7 +7,7 @@ # - NemoClaw helper services # - All OpenShell sandboxes plus the NemoClaw gateway/providers # - NemoClaw/OpenShell/OpenClaw Docker images built or pulled for the sandbox flow -# - ~/.nemoclaw plus ~/.config/{openshell,nemoclaw} state +# - ~/.nemoclaw plus ~/.config/{openshell,nemoclaw} state, including onboard-session.json # - Global nemoclaw npm install/link # - OpenShell binary if it was installed to the standard installer path # @@ -305,8 +305,10 @@ remove_nemoclaw_cli() { warn "npm not found; skipping nemoclaw npm uninstall." fi - if [ -L "${NEMOCLAW_SHIM_DIR}/nemoclaw" ] || [ -f "${NEMOCLAW_SHIM_DIR}/nemoclaw" ]; then + if [ -L "${NEMOCLAW_SHIM_DIR}/nemoclaw" ]; then remove_path "${NEMOCLAW_SHIM_DIR}/nemoclaw" + elif [ -f "${NEMOCLAW_SHIM_DIR}/nemoclaw" ]; then + warn "Leaving ${NEMOCLAW_SHIM_DIR}/nemoclaw in place because it is not an installer-managed shim." fi }