@@ -1760,117 +1760,128 @@ async function createSandbox(gpu, model, provider, preferredInferenceApi = null,
17601760 registry . removeSandbox ( sandboxName ) ;
17611761 }
17621762
1763- // Stage build context
1763+ // Stage build context.
1764+ // The build context contains source code, scripts, and potentially API keys
1765+ // in env args, so it must not persist in /tmp after a failed sandbox create.
1766+ // run() calls process.exit() on failure (bypassing try/finally), so we
1767+ // register a process 'exit' handler to guarantee cleanup in all cases.
17641768 const buildCtx = fs . mkdtempSync ( path . join ( os . tmpdir ( ) , "nemoclaw-build-" ) ) ;
1765- const stagedDockerfile = path . join ( buildCtx , "Dockerfile" ) ;
1766- fs . copyFileSync ( path . join ( ROOT , "Dockerfile" ) , stagedDockerfile ) ;
1767- copyBuildContextDir ( path . join ( ROOT , "nemoclaw" ) , path . join ( buildCtx , "nemoclaw" ) ) ;
1768- copyBuildContextDir ( path . join ( ROOT , "nemoclaw-blueprint" ) , path . join ( buildCtx , "nemoclaw-blueprint" ) ) ;
1769- copyBuildContextDir ( path . join ( ROOT , "scripts" ) , path . join ( buildCtx , "scripts" ) ) ;
1770-
1771- // Create sandbox (use -- echo to avoid dropping into interactive shell)
1772- // Pass the base policy so sandbox starts in proxy mode (required for policy updates later)
1773- const basePolicyPath = path . join ( ROOT , "nemoclaw-blueprint" , "policies" , "openclaw-sandbox.yaml" ) ;
1774- const createArgs = [
1775- "--from" , `${ buildCtx } /Dockerfile` ,
1776- "--name" , sandboxName ,
1777- "--policy" , basePolicyPath ,
1778- ] ;
1779- // --gpu is intentionally omitted. See comment in startGateway().
1780-
1781- console . log ( ` Creating sandbox '${ sandboxName } ' (this takes a few minutes on first run)...` ) ;
1782- const chatUiUrl = process . env . CHAT_UI_URL || "http://127.0.0.1:18789" ;
1783- patchStagedDockerfile ( stagedDockerfile , model , chatUiUrl , String ( Date . now ( ) ) , provider , preferredInferenceApi ) ;
1784- // Only pass non-sensitive env vars to the sandbox. NVIDIA_API_KEY is NOT
1785- // needed inside the sandbox — inference is proxied through the OpenShell
1786- // gateway which injects the stored credential server-side. The gateway
1787- // also strips any Authorization headers sent by the sandbox client.
1788- // See: crates/openshell-sandbox/src/proxy.rs (header stripping),
1789- // crates/openshell-router/src/backend.rs (server-side auth injection).
1790- const envArgs = [ formatEnvAssignment ( "CHAT_UI_URL" , chatUiUrl ) ] ;
1791- const sandboxEnv = { ...process . env } ;
1792- delete sandboxEnv . NVIDIA_API_KEY ;
1793- const discordToken = getCredential ( "DISCORD_BOT_TOKEN" ) || process . env . DISCORD_BOT_TOKEN ;
1794- if ( discordToken ) {
1795- sandboxEnv . DISCORD_BOT_TOKEN = discordToken ;
1796- }
1797- const slackToken = getCredential ( "SLACK_BOT_TOKEN" ) || process . env . SLACK_BOT_TOKEN ;
1798- if ( slackToken ) {
1799- sandboxEnv . SLACK_BOT_TOKEN = slackToken ;
1800- }
1801-
1802- // Run without piping through awk — the pipe masked non-zero exit codes
1803- // from openshell because bash returns the status of the last pipeline
1804- // command (awk, always 0) unless pipefail is set. Removing the pipe
1805- // lets the real exit code flow through to run().
1806- const createCommand = `${ openshellShellCommand ( [
1807- "sandbox" ,
1808- "create" ,
1809- ...createArgs ,
1810- "--" ,
1811- "env" ,
1812- ...envArgs ,
1813- "nemoclaw-start" ,
1814- ] ) } 2>&1`;
1815- const createResult = await streamSandboxCreate ( createCommand , sandboxEnv , {
1816- readyCheck : ( ) => {
1817- const list = runCaptureOpenshell ( [ "sandbox" , "list" ] , { ignoreError : true } ) ;
1818- return isSandboxReady ( list , sandboxName ) ;
1819- } ,
1820- } ) ;
1769+ const cleanupBuildCtx = ( ) => {
1770+ try { fs . rmSync ( buildCtx , { recursive : true , force : true } ) ; } catch { }
1771+ } ;
1772+ process . on ( "exit" , cleanupBuildCtx ) ;
18211773
1822- // Clean up build context regardless of outcome
1823- run ( `rm -rf "${ buildCtx } "` , { ignoreError : true } ) ;
1774+ try {
1775+ const stagedDockerfile = path . join ( buildCtx , "Dockerfile" ) ;
1776+ fs . copyFileSync ( path . join ( ROOT , "Dockerfile" ) , stagedDockerfile ) ;
1777+ copyBuildContextDir ( path . join ( ROOT , "nemoclaw" ) , path . join ( buildCtx , "nemoclaw" ) ) ;
1778+ copyBuildContextDir ( path . join ( ROOT , "nemoclaw-blueprint" ) , path . join ( buildCtx , "nemoclaw-blueprint" ) ) ;
1779+ copyBuildContextDir ( path . join ( ROOT , "scripts" ) , path . join ( buildCtx , "scripts" ) ) ;
1780+
1781+ // Create sandbox (use -- echo to avoid dropping into interactive shell)
1782+ // Pass the base policy so sandbox starts in proxy mode (required for policy updates later)
1783+ const basePolicyPath = path . join ( ROOT , "nemoclaw-blueprint" , "policies" , "openclaw-sandbox.yaml" ) ;
1784+ const createArgs = [
1785+ "--from" , `${ buildCtx } /Dockerfile` ,
1786+ "--name" , sandboxName ,
1787+ "--policy" , basePolicyPath ,
1788+ ] ;
1789+ // --gpu is intentionally omitted. See comment in startGateway().
1790+
1791+ console . log ( ` Creating sandbox '${ sandboxName } ' (this takes a few minutes on first run)...` ) ;
1792+ const chatUiUrl = process . env . CHAT_UI_URL || "http://127.0.0.1:18789" ;
1793+ patchStagedDockerfile ( stagedDockerfile , model , chatUiUrl , String ( Date . now ( ) ) , provider , preferredInferenceApi ) ;
1794+ // Only pass non-sensitive env vars to the sandbox. NVIDIA_API_KEY is NOT
1795+ // needed inside the sandbox — inference is proxied through the OpenShell
1796+ // gateway which injects the stored credential server-side. The gateway
1797+ // also strips any Authorization headers sent by the sandbox client.
1798+ // See: crates/openshell-sandbox/src/proxy.rs (header stripping),
1799+ // crates/openshell-router/src/backend.rs (server-side auth injection).
1800+ const envArgs = [ formatEnvAssignment ( "CHAT_UI_URL" , chatUiUrl ) ] ;
1801+ const sandboxEnv = { ...process . env } ;
1802+ delete sandboxEnv . NVIDIA_API_KEY ;
1803+ const discordToken = getCredential ( "DISCORD_BOT_TOKEN" ) || process . env . DISCORD_BOT_TOKEN ;
1804+ if ( discordToken ) {
1805+ sandboxEnv . DISCORD_BOT_TOKEN = discordToken ;
1806+ }
1807+ const slackToken = getCredential ( "SLACK_BOT_TOKEN" ) || process . env . SLACK_BOT_TOKEN ;
1808+ if ( slackToken ) {
1809+ sandboxEnv . SLACK_BOT_TOKEN = slackToken ;
1810+ }
1811+
1812+ // Run without piping through awk — the pipe masked non-zero exit codes
1813+ // from openshell because bash returns the status of the last pipeline
1814+ // command (awk, always 0) unless pipefail is set. Removing the pipe
1815+ // lets the real exit code flow through to run().
1816+ const createCommand = `${ openshellShellCommand ( [
1817+ "sandbox" ,
1818+ "create" ,
1819+ ...createArgs ,
1820+ "--" ,
1821+ "env" ,
1822+ ...envArgs ,
1823+ "nemoclaw-start" ,
1824+ ] ) } 2>&1`;
1825+ const createResult = await streamSandboxCreate ( createCommand , sandboxEnv , {
1826+ readyCheck : ( ) => {
1827+ const list = runCaptureOpenshell ( [ "sandbox" , "list" ] , { ignoreError : true } ) ;
1828+ return isSandboxReady ( list , sandboxName ) ;
1829+ } ,
1830+ } ) ;
18241831
1825- if ( createResult . status !== 0 ) {
1826- console . error ( "" ) ;
1827- console . error ( ` Sandbox creation failed (exit ${ createResult . status } ).` ) ;
1828- if ( createResult . output ) {
1832+ if ( createResult . status !== 0 ) {
18291833 console . error ( "" ) ;
1830- console . error ( createResult . output ) ;
1831- }
1832- console . error ( " Try: openshell sandbox list # check gateway state" ) ;
1833- printSandboxCreateRecoveryHints ( createResult . output ) ;
1834- process . exit ( createResult . status || 1 ) ;
1835- }
1836-
1837- // Wait for sandbox to reach Ready state in k3s before registering.
1838- // On WSL2 + Docker Desktop the pod can take longer to initialize;
1839- // without this gate, NemoClaw registers a phantom sandbox that
1840- // causes "sandbox not found" on every subsequent connect/status call.
1841- console . log ( " Waiting for sandbox to become ready..." ) ;
1842- let ready = false ;
1843- for ( let i = 0 ; i < 30 ; i ++ ) {
1844- const list = runCaptureOpenshell ( [ "sandbox" , "list" ] , { ignoreError : true } ) ;
1845- if ( isSandboxReady ( list , sandboxName ) ) {
1846- ready = true ;
1847- break ;
1834+ console . error ( ` Sandbox creation failed (exit ${ createResult . status } ).` ) ;
1835+ if ( createResult . output ) {
1836+ console . error ( "" ) ;
1837+ console . error ( createResult . output ) ;
1838+ }
1839+ console . error ( " Try: openshell sandbox list # check gateway state" ) ;
1840+ printSandboxCreateRecoveryHints ( createResult . output ) ;
1841+ process . exit ( createResult . status || 1 ) ;
1842+ }
1843+
1844+ // Wait for sandbox to reach Ready state in k3s before registering.
1845+ // On WSL2 + Docker Desktop the pod can take longer to initialize;
1846+ // without this gate, NemoClaw registers a phantom sandbox that
1847+ // causes "sandbox not found" on every subsequent connect/status call.
1848+ console . log ( " Waiting for sandbox to become ready..." ) ;
1849+ let ready = false ;
1850+ for ( let i = 0 ; i < 30 ; i ++ ) {
1851+ const list = runCaptureOpenshell ( [ "sandbox" , "list" ] , { ignoreError : true } ) ;
1852+ if ( isSandboxReady ( list , sandboxName ) ) {
1853+ ready = true ;
1854+ break ;
1855+ }
1856+ require ( "child_process" ) . spawnSync ( "sleep" , [ "2" ] ) ;
18481857 }
1849- require ( "child_process" ) . spawnSync ( "sleep" , [ "2" ] ) ;
1850- }
18511858
1852- if ( ! ready ) {
1853- // Clean up the orphaned sandbox so the next onboard retry with the same
1854- // name doesn't fail on "sandbox already exists".
1855- const delResult = runOpenshell ( [ "sandbox" , "delete" , sandboxName ] , { ignoreError : true } ) ;
1856- console . error ( "" ) ;
1857- console . error ( ` Sandbox '${ sandboxName } ' was created but did not become ready within 60s.` ) ;
1858- if ( delResult . status === 0 ) {
1859- console . error ( " The orphaned sandbox has been removed — you can safely retry." ) ;
1860- } else {
1861- console . error ( ` Could not remove the orphaned sandbox. Manual cleanup:` ) ;
1862- console . error ( ` openshell sandbox delete "${ sandboxName } "` ) ;
1859+ if ( ! ready ) {
1860+ // Clean up the orphaned sandbox so the next onboard retry with the same
1861+ // name doesn't fail on "sandbox already exists".
1862+ const delResult = runOpenshell ( [ "sandbox" , "delete" , sandboxName ] , { ignoreError : true } ) ;
1863+ console . error ( "" ) ;
1864+ console . error ( ` Sandbox '${ sandboxName } ' was created but did not become ready within 60s.` ) ;
1865+ if ( delResult . status === 0 ) {
1866+ console . error ( " The orphaned sandbox has been removed — you can safely retry." ) ;
1867+ } else {
1868+ console . error ( ` Could not remove the orphaned sandbox. Manual cleanup:` ) ;
1869+ console . error ( ` openshell sandbox delete "${ sandboxName } "` ) ;
1870+ }
1871+ console . error ( " Retry: nemoclaw onboard" ) ;
1872+ process . exit ( 1 ) ;
18631873 }
1864- console . error ( " Retry: nemoclaw onboard" ) ;
1865- process . exit ( 1 ) ;
1866- }
18671874
1868- // Release any stale forward on port 18789 before claiming it for the new sandbox.
1869- // A previous onboard run may have left the port forwarded to a different sandbox,
1870- // which would silently prevent the new sandbox's dashboard from being reachable.
1871- runOpenshell ( [ "forward" , "stop" , "18789" ] , { ignoreError : true } ) ;
1872- // Forward dashboard port to the new sandbox
1873- runOpenshell ( [ "forward" , "start" , "--background" , "18789" , sandboxName ] , { ignoreError : true } ) ;
1875+ // Release any stale forward on port 18789 before claiming it for the new sandbox.
1876+ // A previous onboard run may have left the port forwarded to a different sandbox,
1877+ // which would silently prevent the new sandbox's dashboard from being reachable.
1878+ runOpenshell ( [ "forward" , "stop" , "18789" ] , { ignoreError : true } ) ;
1879+ // Forward dashboard port to the new sandbox
1880+ runOpenshell ( [ "forward" , "start" , "--background" , "18789" , sandboxName ] , { ignoreError : true } ) ;
1881+ } finally {
1882+ cleanupBuildCtx ( ) ;
1883+ process . removeListener ( "exit" , cleanupBuildCtx ) ;
1884+ }
18741885
18751886 // Register only after confirmed ready — prevents phantom entries
18761887 registry . registerSandbox ( {
0 commit comments