-
Notifications
You must be signed in to change notification settings - Fork 2.1k
fix: harden installer and onboard resiliency #961
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 7 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
7b70543
fix: harden installer and onboard resiliency
kjw3 59a8936
fix: address installer and debug review follow-ups
kjw3 345f8c6
fix: harden onboard resume across later setup steps
kjw3 071ee70
test: simplify payload extraction in onboard tests
kjw3 6d52a60
test: keep onboard payload extraction target-compatible
kjw3 d9b0634
chore: merge origin/main into fix/446-installer-resiliency-signed
kjw3 d8174d7
chore: align onboard session lint with complexity rule
kjw3 33768ea
Merge branch 'main' into fix/446-installer-resiliency-signed
kjw3 3a57d02
Merge branch 'main' into fix/446-installer-resiliency-signed
kjw3 49a02f2
fix: harden onboard session safety and lock handling
kjw3 e1e990b
fix: tighten onboard session redaction and metadata handling
kjw3 91b3ee7
chore: merge origin/main into fix/446-installer-resiliency-signed
kjw3 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,391 @@ | ||
| // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| // SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| const fs = require("fs"); | ||
| const path = require("path"); | ||
|
|
||
| const SESSION_VERSION = 1; | ||
| const SESSION_DIR = path.join(process.env.HOME || "/tmp", ".nemoclaw"); | ||
| const SESSION_FILE = path.join(SESSION_DIR, "onboard-session.json"); | ||
| const LOCK_FILE = path.join(SESSION_DIR, "onboard.lock"); | ||
| const VALID_STEP_STATES = new Set(["pending", "in_progress", "complete", "failed", "skipped"]); | ||
|
|
||
| function ensureSessionDir() { | ||
| fs.mkdirSync(SESSION_DIR, { recursive: true, mode: 0o700 }); | ||
| } | ||
|
|
||
| function sessionPath() { | ||
| return SESSION_FILE; | ||
| } | ||
|
|
||
| function lockPath() { | ||
| return LOCK_FILE; | ||
| } | ||
|
|
||
| function defaultSteps() { | ||
| return { | ||
| preflight: { status: "pending", startedAt: null, completedAt: null, error: null }, | ||
| gateway: { status: "pending", startedAt: null, completedAt: null, error: null }, | ||
| sandbox: { status: "pending", startedAt: null, completedAt: null, error: null }, | ||
| provider_selection: { status: "pending", startedAt: null, completedAt: null, error: null }, | ||
| inference: { status: "pending", startedAt: null, completedAt: null, error: null }, | ||
| openclaw: { status: "pending", startedAt: null, completedAt: null, error: null }, | ||
| policies: { status: "pending", startedAt: null, completedAt: null, error: null }, | ||
| }; | ||
| } | ||
|
|
||
| function createSession(overrides = {}) { | ||
| const now = new Date().toISOString(); | ||
| return { | ||
| version: SESSION_VERSION, | ||
| sessionId: overrides.sessionId || `${Date.now()}-${Math.random().toString(36).slice(2, 10)}`, | ||
| resumable: true, | ||
| status: "in_progress", | ||
| mode: overrides.mode || "interactive", | ||
| startedAt: overrides.startedAt || now, | ||
| updatedAt: overrides.updatedAt || now, | ||
| lastStepStarted: overrides.lastStepStarted || null, | ||
| lastCompletedStep: overrides.lastCompletedStep || null, | ||
| failure: overrides.failure || null, | ||
| sandboxName: overrides.sandboxName || null, | ||
| provider: overrides.provider || null, | ||
| model: overrides.model || null, | ||
| endpointUrl: overrides.endpointUrl || null, | ||
| credentialEnv: overrides.credentialEnv || null, | ||
| preferredInferenceApi: overrides.preferredInferenceApi || null, | ||
| nimContainer: overrides.nimContainer || null, | ||
| policyPresets: Array.isArray(overrides.policyPresets) ? overrides.policyPresets.filter((value) => typeof value === "string") : null, | ||
| metadata: { | ||
| gatewayName: overrides.metadata?.gatewayName || "nemoclaw", | ||
| }, | ||
| steps: { | ||
| ...defaultSteps(), | ||
| ...(overrides.steps || {}), | ||
| }, | ||
| }; | ||
| } | ||
|
|
||
| function isObject(value) { | ||
| return typeof value === "object" && value !== null && !Array.isArray(value); | ||
| } | ||
|
|
||
| function redactSensitiveText(value) { | ||
| if (typeof value !== "string") return null; | ||
| return value | ||
| .replace(/(NVIDIA_API_KEY|OPENAI_API_KEY|ANTHROPIC_API_KEY|GEMINI_API_KEY|COMPATIBLE_API_KEY|COMPATIBLE_ANTHROPIC_API_KEY)=\S+/gi, "$1=<REDACTED>") | ||
| .replace(/Bearer\s+\S+/gi, "Bearer <REDACTED>") | ||
| .replace(/nvapi-[A-Za-z0-9_-]{10,}/g, "<REDACTED>") | ||
| .replace(/ghp_[A-Za-z0-9]{20,}/g, "<REDACTED>") | ||
| .replace(/sk-[A-Za-z0-9_-]{10,}/g, "<REDACTED>") | ||
| .slice(0, 240); | ||
| } | ||
|
|
||
| function sanitizeFailure(input) { | ||
| if (!input) return null; | ||
| const step = typeof input.step === "string" ? input.step : null; | ||
| const message = redactSensitiveText(input.message); | ||
| const recordedAt = typeof input.recordedAt === "string" ? input.recordedAt : new Date().toISOString(); | ||
| return step || message ? { step, message, recordedAt } : null; | ||
| } | ||
|
|
||
| function validateStep(step) { | ||
| if (!isObject(step)) return false; | ||
| if (!VALID_STEP_STATES.has(step.status)) return false; | ||
| return true; | ||
| } | ||
|
|
||
| // eslint-disable-next-line complexity | ||
| function normalizeSession(data) { | ||
| if (!isObject(data) || data.version !== SESSION_VERSION) return null; | ||
| const normalized = createSession({ | ||
| sessionId: typeof data.sessionId === "string" ? data.sessionId : undefined, | ||
| mode: typeof data.mode === "string" ? data.mode : undefined, | ||
| startedAt: typeof data.startedAt === "string" ? data.startedAt : undefined, | ||
| updatedAt: typeof data.updatedAt === "string" ? data.updatedAt : undefined, | ||
| sandboxName: typeof data.sandboxName === "string" ? data.sandboxName : null, | ||
| provider: typeof data.provider === "string" ? data.provider : null, | ||
| model: typeof data.model === "string" ? data.model : null, | ||
| endpointUrl: typeof data.endpointUrl === "string" ? data.endpointUrl : null, | ||
| credentialEnv: typeof data.credentialEnv === "string" ? data.credentialEnv : null, | ||
| preferredInferenceApi: typeof data.preferredInferenceApi === "string" ? data.preferredInferenceApi : null, | ||
| nimContainer: typeof data.nimContainer === "string" ? data.nimContainer : null, | ||
| policyPresets: Array.isArray(data.policyPresets) ? data.policyPresets.filter((value) => typeof value === "string") : null, | ||
| lastStepStarted: typeof data.lastStepStarted === "string" ? data.lastStepStarted : null, | ||
| lastCompletedStep: typeof data.lastCompletedStep === "string" ? data.lastCompletedStep : null, | ||
| failure: sanitizeFailure(data.failure), | ||
| metadata: isObject(data.metadata) ? data.metadata : undefined, | ||
| }); | ||
| normalized.resumable = data.resumable !== false; | ||
| normalized.status = typeof data.status === "string" ? data.status : normalized.status; | ||
|
|
||
| if (isObject(data.steps)) { | ||
| for (const [name, step] of Object.entries(data.steps)) { | ||
| if (Object.prototype.hasOwnProperty.call(normalized.steps, name) && validateStep(step)) { | ||
| normalized.steps[name] = { | ||
| status: step.status, | ||
| startedAt: typeof step.startedAt === "string" ? step.startedAt : null, | ||
| completedAt: typeof step.completedAt === "string" ? step.completedAt : null, | ||
| error: redactSensitiveText(step.error), | ||
| }; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return normalized; | ||
| } | ||
|
|
||
| function loadSession() { | ||
| try { | ||
| if (!fs.existsSync(SESSION_FILE)) { | ||
| return null; | ||
| } | ||
| const parsed = JSON.parse(fs.readFileSync(SESSION_FILE, "utf-8")); | ||
| return normalizeSession(parsed); | ||
| } catch { | ||
| return null; | ||
| } | ||
| } | ||
|
|
||
| function saveSession(session) { | ||
| const normalized = normalizeSession(session) || createSession(); | ||
| normalized.updatedAt = new Date().toISOString(); | ||
| ensureSessionDir(); | ||
| const tmpFile = path.join( | ||
| SESSION_DIR, | ||
| `.onboard-session.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2, 8)}.tmp` | ||
| ); | ||
| fs.writeFileSync(tmpFile, JSON.stringify(normalized, null, 2), { mode: 0o600 }); | ||
| fs.renameSync(tmpFile, SESSION_FILE); | ||
| return normalized; | ||
| } | ||
|
|
||
| function clearSession() { | ||
| try { | ||
| if (fs.existsSync(SESSION_FILE)) { | ||
| fs.unlinkSync(SESSION_FILE); | ||
| } | ||
| } catch { | ||
| return; | ||
| } | ||
| } | ||
|
|
||
| function parseLockFile(contents) { | ||
| try { | ||
| const parsed = JSON.parse(contents); | ||
| if (typeof parsed?.pid !== "number") return null; | ||
| return { | ||
| pid: parsed.pid, | ||
| startedAt: typeof parsed.startedAt === "string" ? parsed.startedAt : null, | ||
| command: typeof parsed.command === "string" ? parsed.command : null, | ||
| }; | ||
| } catch { | ||
| return null; | ||
| } | ||
| } | ||
|
|
||
| function isProcessAlive(pid) { | ||
| if (!Number.isInteger(pid) || pid <= 0) return false; | ||
| try { | ||
| process.kill(pid, 0); | ||
| return true; | ||
| } catch (error) { | ||
| return error?.code === "EPERM"; | ||
| } | ||
| } | ||
|
|
||
| function acquireOnboardLock(command = null) { | ||
| ensureSessionDir(); | ||
| const payload = JSON.stringify( | ||
| { | ||
| pid: process.pid, | ||
| startedAt: new Date().toISOString(), | ||
| command: typeof command === "string" ? command : null, | ||
| }, | ||
| null, | ||
| 2 | ||
| ); | ||
|
|
||
| for (let attempt = 0; attempt < 2; attempt++) { | ||
| try { | ||
| const fd = fs.openSync(LOCK_FILE, "wx", 0o600); | ||
| fs.writeFileSync(fd, payload); | ||
| fs.closeSync(fd); | ||
| return { acquired: true, lockFile: LOCK_FILE, stale: false }; | ||
| } catch (error) { | ||
| if (error?.code !== "EEXIST") { | ||
| throw error; | ||
| } | ||
|
|
||
| const existing = parseLockFile(fs.readFileSync(LOCK_FILE, "utf8")); | ||
| if (existing && isProcessAlive(existing.pid)) { | ||
| return { | ||
| acquired: false, | ||
| lockFile: LOCK_FILE, | ||
| stale: false, | ||
| holderPid: existing.pid, | ||
| holderStartedAt: existing.startedAt, | ||
| holderCommand: existing.command, | ||
| }; | ||
| } | ||
|
|
||
| try { | ||
| fs.unlinkSync(LOCK_FILE); | ||
| } catch (unlinkError) { | ||
| if (unlinkError?.code !== "ENOENT") { | ||
| throw unlinkError; | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return { acquired: false, lockFile: LOCK_FILE, stale: true }; | ||
| } | ||
|
|
||
| function releaseOnboardLock() { | ||
| try { | ||
| if (!fs.existsSync(LOCK_FILE)) return; | ||
| const existing = parseLockFile(fs.readFileSync(LOCK_FILE, "utf8")); | ||
| if (existing && existing.pid !== process.pid) return; | ||
| fs.unlinkSync(LOCK_FILE); | ||
| } catch { | ||
| return; | ||
| } | ||
| } | ||
|
|
||
| function updateSession(mutator) { | ||
| const current = loadSession() || createSession(); | ||
| const next = typeof mutator === "function" ? mutator(current) || current : current; | ||
| return saveSession(next); | ||
| } | ||
|
|
||
| function markStepStarted(stepName) { | ||
| return updateSession((session) => { | ||
| const step = session.steps[stepName]; | ||
| if (!step) return session; | ||
| step.status = "in_progress"; | ||
| step.startedAt = new Date().toISOString(); | ||
| step.error = null; | ||
| session.lastStepStarted = stepName; | ||
| session.failure = null; | ||
| session.status = "in_progress"; | ||
| return session; | ||
| }); | ||
coderabbitai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| function markStepComplete(stepName, updates = {}) { | ||
| return updateSession((session) => { | ||
| const step = session.steps[stepName]; | ||
| if (!step) return session; | ||
| step.status = "complete"; | ||
| step.completedAt = new Date().toISOString(); | ||
| step.error = null; | ||
| session.lastCompletedStep = stepName; | ||
| session.failure = null; | ||
| Object.assign(session, filterSafeUpdates(updates)); | ||
| return session; | ||
| }); | ||
| } | ||
|
|
||
| function markStepFailed(stepName, message = null) { | ||
| return updateSession((session) => { | ||
| const step = session.steps[stepName]; | ||
| if (!step) return session; | ||
| step.status = "failed"; | ||
| step.error = redactSensitiveText(message); | ||
| session.failure = sanitizeFailure({ | ||
| step: stepName, | ||
| message, | ||
| recordedAt: new Date().toISOString(), | ||
| }); | ||
| session.status = "failed"; | ||
| return session; | ||
| }); | ||
| } | ||
|
|
||
| function completeSession(updates = {}) { | ||
| return updateSession((session) => { | ||
| Object.assign(session, filterSafeUpdates(updates)); | ||
| session.status = "complete"; | ||
| session.failure = null; | ||
| return session; | ||
coderabbitai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| }); | ||
| } | ||
|
|
||
| function filterSafeUpdates(updates) { | ||
| const safe = {}; | ||
| if (!isObject(updates)) return safe; | ||
| if (typeof updates.sandboxName === "string") safe.sandboxName = updates.sandboxName; | ||
| if (typeof updates.provider === "string") safe.provider = updates.provider; | ||
| if (typeof updates.model === "string") safe.model = updates.model; | ||
| if (typeof updates.endpointUrl === "string") safe.endpointUrl = updates.endpointUrl; | ||
coderabbitai[bot] marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if (typeof updates.credentialEnv === "string") safe.credentialEnv = updates.credentialEnv; | ||
| if (typeof updates.preferredInferenceApi === "string") safe.preferredInferenceApi = updates.preferredInferenceApi; | ||
| if (typeof updates.nimContainer === "string") safe.nimContainer = updates.nimContainer; | ||
| if (Array.isArray(updates.policyPresets)) { | ||
| safe.policyPresets = updates.policyPresets.filter((value) => typeof value === "string"); | ||
| } | ||
| if (isObject(updates.metadata)) { | ||
| safe.metadata = {}; | ||
| if (typeof updates.metadata.gatewayName === "string") { | ||
| safe.metadata.gatewayName = updates.metadata.gatewayName; | ||
| } | ||
coderabbitai[bot] marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| return safe; | ||
| } | ||
|
|
||
| function summarizeForDebug(session = loadSession()) { | ||
| if (!session) return null; | ||
| return { | ||
| version: session.version, | ||
| sessionId: session.sessionId, | ||
| status: session.status, | ||
| resumable: session.resumable, | ||
| mode: session.mode, | ||
| startedAt: session.startedAt, | ||
| updatedAt: session.updatedAt, | ||
| sandboxName: session.sandboxName, | ||
| provider: session.provider, | ||
| model: session.model, | ||
| endpointUrl: session.endpointUrl, | ||
| credentialEnv: session.credentialEnv, | ||
| preferredInferenceApi: session.preferredInferenceApi, | ||
| nimContainer: session.nimContainer, | ||
| policyPresets: session.policyPresets, | ||
| lastStepStarted: session.lastStepStarted, | ||
| lastCompletedStep: session.lastCompletedStep, | ||
| failure: session.failure, | ||
| steps: Object.fromEntries( | ||
| Object.entries(session.steps).map(([name, step]) => [ | ||
| name, | ||
| { | ||
| status: step.status, | ||
| startedAt: step.startedAt, | ||
| completedAt: step.completedAt, | ||
| error: step.error, | ||
| }, | ||
| ]) | ||
| ), | ||
| }; | ||
| } | ||
|
|
||
| module.exports = { | ||
| LOCK_FILE, | ||
| SESSION_DIR, | ||
| SESSION_FILE, | ||
| SESSION_VERSION, | ||
| acquireOnboardLock, | ||
| clearSession, | ||
| completeSession, | ||
| createSession, | ||
| loadSession, | ||
| markStepComplete, | ||
| markStepFailed, | ||
| markStepStarted, | ||
| lockPath, | ||
| saveSession, | ||
| releaseOnboardLock, | ||
| sessionPath, | ||
| redactSensitiveText, | ||
| summarizeForDebug, | ||
| updateSession, | ||
| }; | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.