@@ -191,9 +191,10 @@ class BaseCodeExecutor(BaseModel):
191191
192192The effective timeout is resolved as:
193193``` python
194+ input_t = code_execution_input.timeout_seconds
194195timeout = (
195- code_execution_input.timeout_seconds
196- ?? self .default_timeout_seconds
196+ input_t if input_t is not None
197+ else self .default_timeout_seconds
197198)
198199```
199200
@@ -213,9 +214,10 @@ run it in a separate thread with a join timeout:
213214import threading
214215
215216def execute_code (self , invocation_context , code_execution_input ):
217+ input_t = code_execution_input.timeout_seconds
216218 timeout = (
217- code_execution_input.timeout_seconds
218- or self .default_timeout_seconds
219+ input_t if input_t is not None
220+ else self .default_timeout_seconds
219221 )
220222 if timeout is None :
221223 # No timeout: current behavior (blocking exec)
@@ -268,9 +270,10 @@ running indefinitely after the join timeout expires.
268270import threading
269271
270272def execute_code (self , invocation_context , code_execution_input ):
273+ input_t = code_execution_input.timeout_seconds
271274 timeout = (
272- code_execution_input.timeout_seconds
273- or self .default_timeout_seconds
275+ input_t if input_t is not None
276+ else self .default_timeout_seconds
274277 )
275278
276279 # Create the exec instance
@@ -279,28 +282,32 @@ def execute_code(self, invocation_context, code_execution_input):
279282 [' python3' , ' -c' , code_execution_input.code],
280283 )[' Id' ]
281284
282- # Start a timer to kill the exec's PID on timeout
285+ # Start a timer to kill the exec on timeout.
286+ #
287+ # NOTE on PID namespaces: exec_inspect().Pid returns the
288+ # host-namespace PID of the exec'd process. Passing that PID
289+ # to `kill` inside the container will not match (different PID
290+ # namespace). Instead, we use the Docker API to kill from the
291+ # host side, or find the container-namespace PID.
283292 timer = None
284293 timed_out = threading.Event()
285294 if timeout is not None :
286295 def _kill_exec ():
287296 timed_out.set()
288297 try :
289- # Get the PID of the exec'd process
290- info = self ._client.api.exec_inspect(exec_id)
291- pid = info.get(' Pid' , 0 )
292- if pid > 0 :
293- self ._container.exec_run(
294- [' kill' , ' -9' , str (pid)],
295- detach = True ,
296- )
297- except Exception :
298- # Fallback: kill all non-init processes
298+ # Approach: find exec'd process by its command
299+ # inside the container's PID namespace, then kill.
300+ # `exec_inspect` gives us the command; `pkill -f`
301+ # matches it inside the container.
299302 self ._container.exec_run(
300- [' sh' , ' -c' ,
301- ' kill -9 $(ps -o pid= | grep -v "^\\ s*1$")' ],
303+ [' pkill' , ' -9' , ' -f' , ' python3 -c' ],
302304 detach = True ,
303305 )
306+ except Exception :
307+ pass
308+ # If the above fails or is insufficient, the next
309+ # execute_code() call will detect the zombie and
310+ # restart the container.
304311 timer = threading.Timer(timeout, _kill_exec)
305312 timer.start()
306313
@@ -317,21 +324,34 @@ def execute_code(self, invocation_context, code_execution_input):
317324 # ... parse output as before
318325```
319326
320- ** Why targeted PID kill, not ` kill -9 -1 ` :**
321- - ` kill -9 -1 ` kills * all* processes in the container, including the
322- init/shell process that keeps the container alive. This would force a
323- container restart on the next call.
324- - Targeted kill via ` exec_inspect ` → PID only terminates the timed-out
325- process, leaving the container healthy for subsequent calls.
326- - The fallback (` kill all non-init ` ) is a safety net if ` exec_inspect `
327- fails (e.g., Docker API version mismatch).
328-
329- ** Alternative — container restart on timeout:** Simpler but more costly.
330- Stop and restart the container after timeout. Acceptable if stateful mode
331- is not in use (no accumulated state to preserve).
332-
333- ** Recommendation:** Use Docker exec kill as the primary approach. This is
334- more robust than thread+join and properly cleans up runaway processes.
327+ ** PID namespace considerations:**
328+ - ` exec_inspect(exec_id)['Pid'] ` returns the ** host-namespace** PID.
329+ Running ` kill <host-pid> ` inside the container operates in the
330+ ** container PID namespace** and will target a different (or
331+ non-existent) process. This is a common Docker pitfall.
332+ - The correct approaches are:
333+ 1 . ** ` pkill -f ` inside container** — Match the exec'd command string
334+ within the container's PID namespace. Works for ` python3 -c ` but
335+ may be too broad if multiple execs are running concurrently.
336+ 2 . ** Host-side kill via ` docker top ` + ` os.kill ` ** — Use
337+ ` container.top() ` to map container PIDs to host PIDs, then
338+ ` os.kill(host_pid, 9) ` from the host. More precise but requires
339+ host-level permissions.
340+ 3 . ** Container restart** — Simplest and most reliable. Acceptable
341+ when stateful mode is not in use.
342+
343+ ** Recovery after timeout kill:**
344+ - ** Stateless mode:** No recovery needed. Next ` execute_code() ` call
345+ starts a fresh process in the same container.
346+ - ** Stateful mode (cumulative replay):** The timed-out block is NOT
347+ appended to history (append-after-success invariant), so replay
348+ remains clean. However, if ` pkill ` killed a persistent interpreter
349+ (Phase 2 / Option A), the executor must detect this and restart
350+ it before the next call.
351+
352+ ** Recommendation:** Use ` pkill -f ` as the primary approach for Phase 1.
353+ Migrate to host-side kill or container restart for more robust cleanup
354+ in Phase 2 when persistent processes are introduced.
335355
336356#### 4.2.4 ` GkeCodeExecutor ` — Already Implemented
337357
@@ -345,9 +365,10 @@ class GkeCodeExecutor(BaseCodeExecutor):
345365
346366In ` execute_code() ` , resolve timeout from per-invocation input first:
347367``` python
368+ input_t = code_execution_input.timeout_seconds
348369timeout = (
349- code_execution_input.timeout_seconds
350- or self .default_timeout_seconds
370+ input_t if input_t is not None
371+ else self .default_timeout_seconds
351372)
352373```
353374
@@ -657,9 +678,15 @@ in a stateful executor, with no isolation between different skills or
657678invocations.
658679
659680** Action items:**
660- 1 . ` ExecuteSkillScriptTool ` should generate a deterministic
661- ` execution_id ` from the skill name + invocation context (e.g.,
662- ` f"skill:{skill_name}:{invocation_id}" ` )
681+ 1 . ` ExecuteSkillScriptTool ` should generate a ** session-stable**
682+ ` execution_id ` scoped to skill + agent. The key must persist
683+ across turns so that stateful code history is preserved:
684+ ``` python
685+ execution_id = f " skill: { skill_name} : { session.id} : { agent_name} "
686+ ```
687+ Using ` invocation_id ` would be incorrect here — it changes every
688+ turn, defeating statefulness. ` session.id ` is stable for the
689+ lifetime of the conversation.
6636902 . Pass ` execution_id ` to ` CodeExecutionInput `
6646913 . This enables future stateful skill scripts where a skill can
665692 maintain state across multiple calls within the same session
@@ -799,7 +826,7 @@ class LocalSandboxCodeExecutor(BaseCodeExecutor):
799826 - Optional chroot or tmpdir working directory
800827 """
801828
802- timeout_seconds : int = 30
829+ default_timeout_seconds : int = 30
803830 max_memory_mb: int = 256
804831 max_cpu_seconds: int = 30
805832 allowed_env_vars: list[str ] = []
@@ -819,35 +846,53 @@ class LocalSandboxCodeExecutor(BaseCodeExecutor):
819846 if k in os.environ}
820847 env[' PATH' ] = ' /usr/bin:/usr/local/bin'
821848
849+ input_t = code_execution_input.timeout_seconds
822850 timeout = (
823- code_execution_input.timeout_seconds
824- or self .default_timeout_seconds
825- or self .max_cpu_seconds
851+ input_t if input_t is not None
852+ else self .default_timeout_seconds
826853 )
827-
828- # Use process_group (Python 3.11+) instead of
829- # preexec_fn, which is not fork-safe with threads.
830- # process_group=0 places the child in its own
831- # process group, enabling clean group kill on
832- # timeout via os.killpg().
854+ if timeout is None :
855+ timeout = self .max_cpu_seconds
856+
857+ import sys
858+ # Prefer process_group (3.11+) over preexec_fn
859+ # (not fork-safe with threads).
860+ spawn_kwargs = {}
861+ if sys.version_info >= (3 , 11 ):
862+ spawn_kwargs[' process_group' ] = 0
863+ else :
864+ # Fallback for 3.10; caveat: not fork-safe
865+ def _set_limits ():
866+ import resource
867+ resource.setrlimit(
868+ resource.RLIMIT_CPU ,
869+ (self .max_cpu_seconds,) * 2 ,
870+ )
871+ mem = self .max_memory_mb * 1024 * 1024
872+ resource.setrlimit(
873+ resource.RLIMIT_AS , (mem, mem),
874+ )
875+ spawn_kwargs[' preexec_fn' ] = _set_limits
876+
877+ cmd = [
878+ ' python3' , ' -c' ,
879+ f ' import resource; '
880+ f ' resource.setrlimit(resource.RLIMIT_CPU, '
881+ f ' ( { self .max_cpu_seconds} , '
882+ f ' { self .max_cpu_seconds} )); '
883+ f ' resource.setrlimit(resource.RLIMIT_AS, '
884+ f ' ( { self .max_memory_mb * 1024 * 1024 } , '
885+ f ' { self .max_memory_mb * 1024 * 1024 } )); '
886+ f ' exec(open( { f.name!r } ).read()) ' ,
887+ ]
833888 result = subprocess.run(
834- [
835- ' python3' , ' -c' ,
836- f ' import resource; '
837- f ' resource.setrlimit(resource.RLIMIT_CPU, '
838- f ' ( { self .max_cpu_seconds} , '
839- f ' { self .max_cpu_seconds} )); '
840- f ' resource.setrlimit(resource.RLIMIT_AS, '
841- f ' ( { self .max_memory_mb * 1024 * 1024 } , '
842- f ' { self .max_memory_mb * 1024 * 1024 } )); '
843- f ' exec(open( { f.name!r } ).read()) ' ,
844- ],
889+ cmd,
845890 capture_output = True ,
846891 text = True ,
847892 timeout = timeout,
848893 env = env,
849- process_group = 0 , # Python 3.11+, fork-safe
850894 cwd = tempfile.gettempdir(),
895+ ** spawn_kwargs,
851896 )
852897
853898 return CodeExecutionResult(
@@ -859,7 +904,9 @@ class LocalSandboxCodeExecutor(BaseCodeExecutor):
859904
860905** Platform considerations:**
861906- ` resource.setrlimit ` is Unix-only (Linux, macOS)
862- - ` process_group=0 ` requires Python 3.11+ (the minimum for ADK)
907+ - ` process_group=0 ` requires Python 3.11+ (ADK supports >=3.10, so
908+ this must be gated with a version check or use ` preexec_fn ` as
909+ fallback on 3.10)
863910- On Windows, use ` subprocess.CREATE_NO_WINDOW ` and
864911 ` subprocess.Popen ` with ` creationflags ` for job object limits
865912- Fallback to timeout-only on platforms without ` resource ` module
@@ -873,14 +920,17 @@ class LocalSandboxCodeExecutor(BaseCodeExecutor):
873920 in its own process group, enabling clean ` os.killpg() ` on timeout.
874921- Resource limits are set via an inline ` -c ` wrapper script instead
875922 of ` preexec_fn ` , avoiding the fork-safety issue entirely.
923+ - On Python 3.10 (ADK minimum is ` >=3.10 ` ), fall back to
924+ ` preexec_fn=set_limits ` with a documented caveat about thread safety.
876925
877926** Dependencies:** None (stdlib only). This is the key advantage over
878927` ContainerCodeExecutor ` .
879928
880929** Limitations:**
881930- Less isolation than containers (shared filesystem, kernel)
882931- Cannot restrict network access without OS-level firewall rules
883- - Requires Python 3.11+ (already the ADK minimum)
932+ - ` process_group ` requires Python 3.11+; falls back to ` preexec_fn `
933+ on 3.10 (ADK minimum is ` >=3.10 ` per ` pyproject.toml ` )
884934
885935#### 6.3.3 Tier 3: Promote ` ContainerCodeExecutor ` as Default
886936
@@ -1035,13 +1085,21 @@ class CodeExecutionInput:
10351085
10361086### 7.4 Testing Strategy
10371087
1038- | Category | Approach |
1039- | ----------| ----------|
1040- | Unit tests | Mock-based tests for each executor (existing pattern) |
1041- | Integration tests | Real executor tests (like the ones added for ` ExecuteSkillScriptTool ` ) |
1042- | Timeout tests | Scripts with ` time.sleep() ` to verify timeout enforcement |
1043- | Security tests | Scripts attempting blocked operations to verify restrictions |
1044- | Stateful tests | Multi-call sequences verifying variable persistence |
1088+ ** Current test coverage gaps:** Unit tests exist for
1089+ ` UnsafeLocalCodeExecutor ` , ` GkeCodeExecutor ` ,
1090+ ` AgentEngineSandboxCodeExecutor ` , ` BuiltInCodeExecutor ` , and
1091+ ` CodeExecutorContext ` , but ** no unit test file exists for
1092+ ` ContainerCodeExecutor ` ** (` tests/unittests/code_executors/ ` has no
1093+ ` test_container_code_executor.py ` ). Likewise, ` LocalSandboxCodeExecutor `
1094+ is new and has no tests yet.
1095+
1096+ | Category | Approach | New tests needed |
1097+ | ----------| ----------| -----------------|
1098+ | Unit tests | Mock-based tests per executor | ** Add ` test_container_code_executor.py ` ** , add ` test_local_sandbox_code_executor.py ` |
1099+ | Integration tests | Real executor tests (like ` ExecuteSkillScriptTool ` integration tests) | Add Docker-based container tests (CI-gated) |
1100+ | Timeout tests | Scripts with ` time.sleep() ` to verify enforcement | Per-executor timeout tests |
1101+ | Security tests | Scripts attempting blocked operations | ` restrict_builtins ` bypass attempts, env var leakage |
1102+ | Stateful tests | Multi-call sequences verifying variable persistence | Append-after-success, failure-does-not-poison, ` execution_id ` isolation |
10451103
10461104---
10471105
0 commit comments