diff --git a/Dockerfile b/Dockerfile index d7c40c1..dc30212 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,29 @@ +FROM ubuntu:24.04 AS bwrap-builder + +# Build bubblewrap from source — overlay support requires >= 0.10.0, +# but Ubuntu 24.04 ships 0.9.0. +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + ca-certificates \ + meson \ + gcc \ + libc6-dev \ + pkg-config \ + libcap-dev \ + xz-utils \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -fsSL https://github.com/containers/bubblewrap/releases/download/v0.11.0/bubblewrap-0.11.0.tar.xz \ + | tar xJ \ + && cd bubblewrap-0.11.0 \ + && meson setup _build --prefix=/usr \ + && meson compile -C _build \ + && meson install -C _build + FROM ubuntu:24.04 -# Install runtime dependencies: bash, jq, curl, python3, git -# Clean up apt cache to keep the image small +# Install runtime dependencies RUN apt-get update && \ apt-get install -y --no-install-recommends \ bash \ @@ -10,8 +32,12 @@ RUN apt-get update && \ python3 \ git \ ca-certificates \ + libcap2 \ && rm -rf /var/lib/apt/lists/* +# Install bubblewrap 0.11.0 (with overlay support) +COPY --from=bwrap-builder /usr/bin/bwrap /usr/bin/bwrap + # Create the rlm directory structure RUN mkdir -p /rlm/tree /context diff --git a/README.md b/README.md index c66f588..b8d3773 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,8 @@ All configuration is via environment variables. The LLM never sees these. | `RLM_MAX_ITERATIONS` | `15` | Max loop iterations per invocation | | `RLM_MAX_DEPTH` | `3` | Max recursion depth | | `RLM_MAX_TOKENS` | `16384` | Max tokens per LLM response | +| `RLM_ISOLATION` | `bwrap` | Sandbox for child processes: `bwrap` or `none` | +| `RLM_PROJECT_DIR` | `$(pwd)` | Directory to overlay when `RLM_ISOLATION=bwrap` | ## Installation @@ -74,7 +76,7 @@ docker build -t rlm . docker run -e OPENROUTER_API_KEY="sk-or-v1-..." rlm "What is 2 + 2?" ``` -The image includes bash, jq, curl, python3, and git. +The image includes bash, jq, curl, python3, git, and bubblewrap. ### From source @@ -94,7 +96,7 @@ Or install system-wide: **Required:** bash (4+), jq, curl -**Optional:** python3, git (included in the Docker image) +**Optional:** python3, git, bubblewrap (all included in the Docker image) **API key:** An [OpenRouter](https://openrouter.ai/) API key. Set `OPENROUTER_API_KEY` or write it to `/etc/rlm/api-key`. @@ -103,10 +105,10 @@ Or install system-wide: Unit tests use a mock LLM (no API key needed). E2E tests make real API calls. ```bash -# Unit tests -- 67 tests, all mocked +# Unit tests -- mocked, no API key needed bats test/ -# E2E tests -- 9 tests, requires OPENROUTER_API_KEY +# E2E tests -- requires OPENROUTER_API_KEY bats e2e/ # Specific test file @@ -120,12 +122,12 @@ Test dependencies: [BATS](https://github.com/bats-core/bats-core) (Bash Automate ``` unix-rlm/ bin/rlm # the script -- self-contained - test/ # BATS unit tests (67 tests, mock LLM) + test/ # BATS unit tests (mock LLM) test_helper.bash *.bats fixtures/ # mock LLM response fixtures lib/ # vendored BATS libraries - e2e/ # BATS E2E tests (9 tests, real LLM) + e2e/ # BATS E2E tests (real LLM) e2e_helper.bash *.bats eval/ # TypeScript eval harness @@ -186,6 +188,12 @@ See [EVAL_PLAN.md](../EVAL_PLAN.md) for full details. `rlm` executes LLM-generated code with the same privileges as the calling user. When deployed on a managed VM (e.g., a Firecracker microVM), the VM is the security boundary. On other platforms, the operator is responsible for isolation (container, VM, etc.). Do not run `rlm` with elevated privileges on a shared system without appropriate sandboxing. +### Filesystem isolation + +When a child `rlm` is spawned (depth > 0), it re-execs itself inside [bubblewrap](https://github.com/containers/bubblewrap) with an overlayfs on `RLM_PROJECT_DIR`. The child can read the parent's files, but writes are captured in a private overlay — parallel children cannot conflict with each other or corrupt the parent's data. The `/rlm/tree` directory is bind-mounted writable through the sandbox so trace files and `RETURN` answer files remain visible to the parent. + +Set `RLM_ISOLATION=none` to disable. Requires `bwrap` on the host (included in the Docker image; `apt install bubblewrap` on Debian/Ubuntu). + The API key is read from `OPENROUTER_API_KEY` or `/etc/rlm/api-key` at startup. It is sent only to the OpenRouter API endpoint. ## License diff --git a/bin/rlm b/bin/rlm index ca56be3..15a4e24 100755 --- a/bin/rlm +++ b/bin/rlm @@ -17,6 +17,8 @@ set -euo pipefail # RLM_PLUGINS — Comma-separated plugin names to load # RLM_PLUGINS_DIR — Directory containing plugin .md files # RLM_CHILD_SYSTEM_PROMPT — Custom system prompt for this child (set by parent) +# RLM_ISOLATION — Sandbox mode: bwrap (default) or none +# RLM_PROJECT_DIR — Directory to overlay in bwrap (default: cwd at root) # # Internal environment (set by rlm, visible to children): # RLM_WORKDIR — This invocation's working directory @@ -72,9 +74,9 @@ Your iterations are finite. Do not waste them — each one should make measurabl Pipe data with: echo "data" | rlm "query" Pass a custom system prompt with: RLM_CHILD_SYSTEM_PROMPT="..." rlm "query" -The filesystem is shared across all rlm calls. /context/ is for data you -intend to share across the recursion tree. Write working files to your own -directory; avoid concurrent writes to the same path. +Each child rlm runs in an isolated overlay filesystem. Children can read +the parent'\''s files but their writes are private — parallel children cannot +conflict. To pass results back, use RETURN or write to /rlm/tree/ (shared). ## Designing Delegation @@ -107,6 +109,49 @@ emit_metadata() { } RLM_DEPTH="${RLM_DEPTH:-0}" +RLM_ISOLATION="${RLM_ISOLATION:-bwrap}" # bwrap | none + +# --- Bubblewrap isolation for child processes -------------------------------- +# When a child rlm is spawned (depth > 0), re-exec inside bwrap with an overlay +# on the project directory. This prevents parallel children from stomping on +# each other's files. The /rlm/tree is bind-mounted writable so trace files and +# RETURN answer files remain visible to the parent. +# +# Set RLM_ISOLATION=none to disable. +# Set RLM_PROJECT_DIR to control which directory gets the overlay (default: cwd +# of the root invocation). +if [ "$RLM_DEPTH" -gt 0 ] \ + && [ "$RLM_ISOLATION" = "bwrap" ] \ + && [ -z "${_RLM_IN_BWRAP:-}" ] \ + && [ -z "${_RLM_MOCK_DIR:-}" ] \ + && command -v bwrap >/dev/null 2>&1; then + + # Default to / so the entire filesystem is overlaid if the root + # invocation did not export RLM_PROJECT_DIR. In normal operation the + # root process always sets RLM_PROJECT_DIR=$(pwd) before children run. + _RLM_PROJECT_DIR="${RLM_PROJECT_DIR:-/}" + _RLM_TREE_ROOT="${_RLM_TREE_ROOT:-/rlm/tree}" + + export _RLM_IN_BWRAP=1 + + # Re-exec inside bwrap with an overlay on the project directory. + # Use --tmp-overlay so the overlay upper/work dirs live on an + # invisible tmpfs managed by bwrap itself. This avoids the + # nested-overlayfs problem inside Docker (overlay2 does not + # support overlayfs as an upper filesystem). + # Note: /tmp is bind-mounted writable, so avoid placing + # RLM_PROJECT_DIR under /tmp (the bind takes precedence over + # the overlay). + exec bwrap \ + --ro-bind / / \ + --dev /dev \ + --proc /proc \ + --bind /tmp /tmp \ + --bind "$_RLM_TREE_ROOT" "$_RLM_TREE_ROOT" \ + --overlay-src "$_RLM_PROJECT_DIR" \ + --tmp-overlay "$_RLM_PROJECT_DIR" \ + -- "$0" "$@" +fi BASE_SYSTEM_PROMPT='You are a helpful assistant. Answer the question directly and concisely.' @@ -153,6 +198,7 @@ if [ "$_SELF_DEPTH" -eq 0 ]; then export RLM_PARENT_ID="" export RLM_ROOT_QUERY="${query:0:200}" export RLM_LINEAGE="$query" + export RLM_PROJECT_DIR="${RLM_PROJECT_DIR:-$(pwd)}" else export RLM_INVOCATION_ID="d${_SELF_DEPTH}-c$$" export RLM_PARENT_ID="${RLM_PARENT_ID:-root}" diff --git a/e2e/bwrap.bats b/e2e/bwrap.bats new file mode 100644 index 0000000..78bfa37 --- /dev/null +++ b/e2e/bwrap.bats @@ -0,0 +1,102 @@ +#!/usr/bin/env bats +# Bubblewrap isolation tests — verify child rlm processes cannot mutate parent filesystem + +load e2e_helper + +setup() { + _require_api_key + E2E_TEMP="$(mktemp -d)" + export _RLM_TREE_ROOT="$E2E_TEMP/rlm/tree" + mkdir -p "$_RLM_TREE_ROOT" + export RLM_MAX_ITERATIONS=5 + export RLM_MAX_DEPTH=3 + export RLM_MAX_TOKENS=4096 + export RLM_ISOLATION=bwrap + # Project dir must NOT be under /tmp — bwrap binds /tmp writable, + # which would override the overlay. Use a dir under /var/tmp instead. + BWRAP_PROJECT="$(mktemp -d /var/tmp/rlm-bwrap-e2e.XXXXXX)" + export RLM_PROJECT_DIR="$BWRAP_PROJECT" + unset _RLM_MOCK_DIR 2>/dev/null || true + unset RLM_WORKDIR 2>/dev/null || true + unset RLM_DEPTH 2>/dev/null || true + unset _RLM_RESUME_DIR 2>/dev/null || true + unset _RLM_IN_BWRAP 2>/dev/null || true + + if ! command -v bwrap >/dev/null 2>&1; then + skip "bwrap not installed" + fi +} + +teardown() { + rm -rf "${E2E_TEMP:-}" 2>/dev/null || true + rm -rf "${BWRAP_PROJECT:-}" 2>/dev/null || true +} + +@test "bwrap: child cannot delete parent file" { + echo "precious data" > "$BWRAP_PROJECT/important.txt" + + # Delegate to a child rlm (depth 1) so bwrap activates + run_rlm_with_retry 120 3 "Delegate to a child: rlm \"Delete the file $BWRAP_PROJECT/important.txt using rm -f. Verify it is gone with ls. Then RETURN done.\". Then RETURN the child's result." + + assert_success + + # Parent's file must still exist despite child deleting it inside overlay + [ -f "$BWRAP_PROJECT/important.txt" ] + [ "$(cat "$BWRAP_PROJECT/important.txt")" = "precious data" ] +} + +@test "bwrap: child cannot overwrite parent file" { + echo "original content" > "$BWRAP_PROJECT/data.txt" + + # Delegate to a child rlm (depth 1) so bwrap activates + run_rlm_with_retry 120 3 "Delegate to a child: rlm \"Overwrite the file $BWRAP_PROJECT/data.txt with the text 'corrupted' using echo. Then RETURN done.\". Then RETURN the child's result." + + assert_success + + # Parent's file must retain original content + [ -f "$BWRAP_PROJECT/data.txt" ] + [ "$(cat "$BWRAP_PROJECT/data.txt")" = "original content" ] +} + +@test "bwrap: child can read parent file" { + echo "readable secret 42" > "$BWRAP_PROJECT/readable.txt" + + # Delegate to a child rlm (depth 1) so bwrap activates — child reads through overlay + run_rlm_with_retry 120 3 "Delegate to a child: result=\$(rlm \"Read the file $BWRAP_PROJECT/readable.txt and RETURN its exact contents.\"). Then RETURN the result." + + assert_success + assert_output --partial "readable secret 42" +} + +@test "bwrap: parallel children do not conflict" { + run_rlm_with_retry 180 3 "Run two child rlm processes in parallel: +Child 1: rlm \"Write ALPHA to $BWRAP_PROJECT/shared.txt, sleep 1, read it back, RETURN the contents\" & +Child 2: rlm \"Write BETA to $BWRAP_PROJECT/shared.txt, sleep 1, read it back, RETURN the contents\" & +Wait for both. Then read $BWRAP_PROJECT/shared.txt yourself (it should not exist since children wrote to overlays). RETURN whether the file exists or not." + + assert_success + # The parent's copy should NOT have been written to by either child + [ ! -f "$BWRAP_PROJECT/shared.txt" ] +} + +@test "bwrap: RETURN mechanism works through isolation" { + # The answer file is in /rlm/tree which is bind-mounted writable, + # so RETURN must work even inside bwrap + run_rlm_with_retry 120 3 "Delegate to a child: result=\$(rlm \"Compute 7*8 in bash and RETURN just the number\"). Then RETURN the result." + + assert_success + assert_output --partial "56" +} + +@test "bwrap disabled: child CAN delete parent file (control test)" { + export RLM_ISOLATION=none + echo "fragile data" > "$BWRAP_PROJECT/fragile.txt" + + # Delegate to a child rlm (depth 1) — without bwrap, child has real filesystem access + run_rlm_with_retry 120 3 "Delegate to a child: rlm \"Delete the file $BWRAP_PROJECT/fragile.txt using rm -f. Verify it is gone with ls. Then RETURN done.\". Then RETURN the child's result." + + assert_success + + # Without bwrap, the file should be gone + [ ! -f "$BWRAP_PROJECT/fragile.txt" ] +} diff --git a/test/bwrap.bats b/test/bwrap.bats new file mode 100644 index 0000000..ffe2caa --- /dev/null +++ b/test/bwrap.bats @@ -0,0 +1,175 @@ +#!/usr/bin/env bats +# Unit tests for bwrap isolation logic (no API calls, mock LLM) + +load test_helper + +setup() { + TEST_TEMP="$(mktemp -d)" + export TEST_TEMP + export _RLM_TREE_ROOT="$TEST_TEMP/rlm/tree" + export _RLM_MOCK_DIR="$PROJECT_ROOT/test/fixtures/simple-return" + unset RLM_WORKDIR RLM_ANSWER_FILE RLM_INPUT RLM_DEPTH _RLM_IN_BWRAP 2>/dev/null || true +} + +teardown() { + # overlayfs workdirs may contain root-owned files; ignore cleanup errors + rm -rf "$TEST_TEMP" 2>/dev/null || true +} + +@test "bwrap: skipped in mock mode (depth 0)" { + export RLM_ISOLATION=bwrap + run "$RLM_BIN" "test query" + assert_success + assert_output "hello" +} + +@test "bwrap: skipped in mock mode (depth > 0)" { + # Even at depth > 0, mock mode should bypass bwrap + export RLM_ISOLATION=bwrap + export RLM_DEPTH=1 + run "$RLM_BIN" "test query" + assert_success + assert_output "hello" +} + +@test "bwrap: skipped when RLM_ISOLATION=none" { + export RLM_ISOLATION=none + export RLM_DEPTH=1 + run "$RLM_BIN" "test query" + assert_success + assert_output "hello" +} + +@test "bwrap: RLM_PROJECT_DIR set at root depth" { + run bash -c 'export _RLM_TREE_ROOT="'"$_RLM_TREE_ROOT"'"; export _RLM_MOCK_DIR="'"$_RLM_MOCK_DIR"'"; "$1" "test" >/dev/null 2>&1; echo "$RLM_PROJECT_DIR"' _ "$RLM_BIN" + # RLM_PROJECT_DIR is exported inside the script; we verify the script + # doesn't crash and runs to completion (mock mode, no bwrap) + assert_success +} + +@test "bwrap: isolation default is bwrap" { + run grep -c 'RLM_ISOLATION=.*bwrap' "$RLM_BIN" + assert_output "1" +} + +# --- Mechanical invariant tests (no LLM, just bwrap) --- +# These tests use --overlay (explicit upper/work dirs) rather than --tmp-overlay +# (which rlm uses in production) so we can inspect the upper dir to verify +# writes are captured. The E2E tests exercise the real --tmp-overlay code path. + +@test "bwrap mechanical: overlay captures writes to project dir" { + if ! command -v bwrap >/dev/null 2>&1; then skip "bwrap not installed"; fi + + local project_dir="$TEST_TEMP/project" + local upper="$TEST_TEMP/upper" + local work="$TEST_TEMP/work" + mkdir -p "$project_dir" "$upper" "$work" + echo "original" > "$project_dir/file.txt" + + # Child writes to project dir inside bwrap overlay + bwrap \ + --ro-bind / / --dev /dev --proc /proc \ + --bind /tmp /tmp \ + --overlay-src "$project_dir" \ + --overlay "$upper" "$work" "$project_dir" \ + -- bash -c "echo 'modified' > $project_dir/file.txt" + + # Parent's file is unchanged + [ "$(cat "$project_dir/file.txt")" = "original" ] + # Child's write captured in upper + [ "$(cat "$upper/file.txt")" = "modified" ] +} + +@test "bwrap mechanical: overlay blocks deletion of parent files" { + if ! command -v bwrap >/dev/null 2>&1; then skip "bwrap not installed"; fi + + local project_dir="$TEST_TEMP/project" + local upper="$TEST_TEMP/upper" + local work="$TEST_TEMP/work" + mkdir -p "$project_dir" "$upper" "$work" + echo "precious" > "$project_dir/important.txt" + + bwrap \ + --ro-bind / / --dev /dev --proc /proc \ + --bind /tmp /tmp \ + --overlay-src "$project_dir" \ + --overlay "$upper" "$work" "$project_dir" \ + -- bash -c "rm -f $project_dir/important.txt" + + # Parent's file survives + [ -f "$project_dir/important.txt" ] + [ "$(cat "$project_dir/important.txt")" = "precious" ] +} + +@test "bwrap mechanical: bind-mounted tree root is writable and visible to parent" { + if ! command -v bwrap >/dev/null 2>&1; then skip "bwrap not installed"; fi + + local tree_root="$TEST_TEMP/rlm-tree" + local project_dir="$TEST_TEMP/project" + local upper="$TEST_TEMP/upper" + local work="$TEST_TEMP/work" + mkdir -p "$tree_root" "$project_dir" "$upper" "$work" + + bwrap \ + --ro-bind / / --dev /dev --proc /proc \ + --bind /tmp /tmp \ + --bind "$tree_root" "$tree_root" \ + --overlay-src "$project_dir" \ + --overlay "$upper" "$work" "$project_dir" \ + -- bash -c "echo 'answer' > $tree_root/answer.txt" + + # Parent sees the write (bind mount, not overlaid) + [ -f "$tree_root/answer.txt" ] + [ "$(cat "$tree_root/answer.txt")" = "answer" ] +} + +@test "bwrap mechanical: child can read parent files through overlay" { + if ! command -v bwrap >/dev/null 2>&1; then skip "bwrap not installed"; fi + + local project_dir="$TEST_TEMP/project" + local upper="$TEST_TEMP/upper" + local work="$TEST_TEMP/work" + mkdir -p "$project_dir" "$upper" "$work" + echo "secret123" > "$project_dir/data.txt" + + run bwrap \ + --ro-bind / / --dev /dev --proc /proc \ + --bind /tmp /tmp \ + --overlay-src "$project_dir" \ + --overlay "$upper" "$work" "$project_dir" \ + -- cat "$project_dir/data.txt" + + assert_success + assert_output "secret123" +} + +@test "bwrap mechanical: parallel children cannot see each other's writes" { + if ! command -v bwrap >/dev/null 2>&1; then skip "bwrap not installed"; fi + + local project_dir="$TEST_TEMP/project" + mkdir -p "$project_dir" + + local upper1="$TEST_TEMP/upper1" work1="$TEST_TEMP/work1" + local upper2="$TEST_TEMP/upper2" work2="$TEST_TEMP/work2" + mkdir -p "$upper1" "$work1" "$upper2" "$work2" + + # Child 1 writes ALPHA + bwrap --ro-bind / / --dev /dev --proc /proc --bind /tmp /tmp \ + --overlay-src "$project_dir" --overlay "$upper1" "$work1" "$project_dir" \ + -- bash -c "echo ALPHA > $project_dir/shared.txt" & + pid1=$! + + # Child 2 writes BETA + bwrap --ro-bind / / --dev /dev --proc /proc --bind /tmp /tmp \ + --overlay-src "$project_dir" --overlay "$upper2" "$work2" "$project_dir" \ + -- bash -c "echo BETA > $project_dir/shared.txt" & + pid2=$! + + wait $pid1 $pid2 + + # Parent sees no file (neither child's write leaked) + [ ! -f "$project_dir/shared.txt" ] + # Each upper dir has its own version + [ "$(cat "$upper1/shared.txt")" = "ALPHA" ] + [ "$(cat "$upper2/shared.txt")" = "BETA" ] +}