Skip to content

Commit

Permalink
feat: introduce landlock based sandboxing
Browse files Browse the repository at this point in the history
Co-authored-by: Quentin Kaiser <[email protected]>
  • Loading branch information
2 people authored and László Vaskó committed Oct 13, 2024
1 parent bfc4aed commit 098e9dd
Show file tree
Hide file tree
Showing 8 changed files with 223 additions and 4 deletions.
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ lz4 = "^4.3.2"
lief = "^0.15.1"
cryptography = ">=41.0,<44.0"
treelib = "^1.7.0"
unblob-native = "^0.1.1"
unblob-native = "^0.1.2"
jefferson = "^0.4.5"
rich = "^13.3.5"
pyfatfs = "^1.0.5"
Expand Down
27 changes: 27 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
DEFAULT_SKIP_MAGIC,
ExtractionConfig,
)
from unblob.testing import is_sandbox_available
from unblob.ui import (
NullProgressReporter,
ProgressReporter,
Expand Down Expand Up @@ -425,3 +426,29 @@ def test_clear_skip_magics(
assert sorted(process_file_mock.call_args.args[0].skip_magic) == sorted(
skip_magic
), fail_message


@pytest.mark.skipif(
not is_sandbox_available(), reason="Sandboxing is only available on Linux"
)
def test_sandbox_escape(tmp_path: Path):
runner = CliRunner()

in_path = tmp_path / "input"
in_path.touch()
extract_dir = tmp_path / "extract-dir"
params = ["--extract-dir", str(extract_dir), str(in_path)]

unrelated_file = tmp_path / "unrelated"

process_file_mock = mock.MagicMock(
side_effect=lambda *_args, **_kwargs: unrelated_file.write_text(
"sandbox escape"
)
)
with mock.patch.object(unblob.cli, "process_file", process_file_mock):
result = runner.invoke(unblob.cli.cli, params)

assert result.exit_code != 0
assert isinstance(result.exception, PermissionError)
process_file_mock.assert_called_once()
57 changes: 57 additions & 0 deletions tests/test_sandbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from pathlib import Path

import pytest

from unblob.processing import ExtractionConfig
from unblob.sandbox import Sandbox
from unblob.testing import is_sandbox_available

pytestmark = pytest.mark.skipif(
not is_sandbox_available(), reason="Sandboxing only works on Linux"
)


@pytest.fixture
def log_path(tmp_path):
return tmp_path / "unblob.log"


@pytest.fixture
def extraction_config(extraction_config, tmp_path):
extraction_config.extract_root = tmp_path / "extract" / "root"
# parent has to exist
extraction_config.extract_root.parent.mkdir()
return extraction_config


@pytest.fixture
def sandbox(extraction_config: ExtractionConfig, log_path: Path):
return Sandbox(extraction_config, log_path, None)


def test_necessary_resources_can_be_created_in_sandbox(
sandbox: Sandbox, extraction_config: ExtractionConfig, log_path: Path
):
directory_in_extract_root = extraction_config.extract_root / "path" / "to" / "dir"
file_in_extract_root = directory_in_extract_root / "file"

sandbox.run(extraction_config.extract_root.mkdir, parents=True)
sandbox.run(directory_in_extract_root.mkdir, parents=True)

sandbox.run(file_in_extract_root.touch)
sandbox.run(file_in_extract_root.write_text, "file content")

# log-file is already opened
log_path.touch()
sandbox.run(log_path.write_text, "log line")


def test_access_outside_sandbox_is_not_possible(sandbox: Sandbox, tmp_path: Path):
unrelated_dir = tmp_path / "unrelated" / "path"
unrelated_file = tmp_path / "unrelated-file"

with pytest.raises(PermissionError):
sandbox.run(unrelated_dir.mkdir, parents=True)

with pytest.raises(PermissionError):
sandbox.run(unrelated_file.touch)
4 changes: 3 additions & 1 deletion unblob/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
ExtractionConfig,
process_file,
)
from .sandbox import Sandbox
from .ui import NullProgressReporter, RichConsoleProgressReporter

logger = get_logger()
Expand Down Expand Up @@ -301,7 +302,8 @@ def cli(
)

logger.info("Start processing file", file=file)
process_results = process_file(config, file, report_file)
sandbox = Sandbox(config, log_path, report_file)
process_results = sandbox.run(process_file, config, file, report_file)
if verbose == 0:
if skip_extraction:
print_scan_report(process_results)
Expand Down
1 change: 0 additions & 1 deletion unblob/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ def get_extract_dir_for(self, path: Path) -> Path:
return extract_dir.expanduser().resolve()


@terminate_gracefully
def process_file(
config: ExtractionConfig, input_path: Path, report_file: Optional[Path] = None
) -> ProcessResult:
Expand Down
118 changes: 118 additions & 0 deletions unblob/sandbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import ctypes
import sys
import threading
from pathlib import Path
from typing import Callable, Iterable, Optional, Type, TypeVar

from structlog import get_logger
from unblob_native.sandbox import (
AccessFS,
SandboxError,
restrict_access,
)

if sys.version_info >= (3, 10):
from typing import ParamSpec
else:
from typing_extensions import ParamSpec

from unblob.processing import ExtractionConfig

logger = get_logger()

P = ParamSpec("P")
R = TypeVar("R")


class Sandbox:
"""Configures restricted file-systems to run functions in.
When calling ``run()``, a separate thread will be configured with
minimum required file-system permissions. All subprocesses spawned
from that thread will honor the restrictions.
"""

def __init__(
self,
config: ExtractionConfig,
log_path: Path,
report_file: Optional[Path],
extra_restrictions: Iterable[AccessFS] = (),
):
self.restrictions = [
# Python, shared libraries, extractor binaries and so on
AccessFS.read("/"),
# Multiprocessing
AccessFS.read_write("/dev/shm"), # noqa: S108
# Extracted contents
AccessFS.read_write(config.extract_root),
AccessFS.make_dir(config.extract_root.parent),
AccessFS.read_write(log_path),
*extra_restrictions,
]

if report_file:
self.restrictions += [
AccessFS.read_write(report_file),
AccessFS.make_reg(report_file.parent),
]

def run(self, callback: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R:
"""Run callback with restricted filesystem access."""
exception = None
result = None

def _run_in_thread(callback, *args, **kwargs):
nonlocal exception, result

self._try_enter_sandbox()
try:
result = callback(*args, **kwargs)
except BaseException as e:
exception = e

thread = threading.Thread(
target=_run_in_thread, args=(callback, *args), kwargs=kwargs
)
thread.start()

try:
thread.join()
except KeyboardInterrupt:
raise_in_thread(thread, KeyboardInterrupt)
thread.join()

if exception:
raise exception # pyright: ignore[reportGeneralTypeIssues]
return result # pyright: ignore[reportReturnType]

def _try_enter_sandbox(self):
try:
restrict_access(*self.restrictions)
except SandboxError:
logger.warning(
"Sandboxing FS access is unavailable on this system, skipping."
)


def raise_in_thread(thread: threading.Thread, exctype: Type) -> None:
if thread.ident is None:
raise RuntimeError("Thread is not started")

res = ctypes.pythonapi.PyThreadState_SetAsyncExc(
ctypes.c_ulong(thread.ident), ctypes.py_object(exctype)
)

# success
if res == 1:
return

# Need to revert the call to restore interpreter state
ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_ulong(thread.ident), None)

# Thread could have exited since
if res == 0:
return

# Something bad have happened
raise RuntimeError("Could not raise exception in thread", thread.ident)
16 changes: 16 additions & 0 deletions unblob/testing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import binascii
import glob
import io
import platform
import shlex
import subprocess
from pathlib import Path
Expand All @@ -10,6 +11,7 @@
from lark.lark import Lark
from lark.visitors import Discard, Transformer
from pytest_cov.embed import cleanup_on_sigterm
from unblob_native.sandbox import AccessFS, SandboxError, restrict_access

from unblob.finder import build_hyperscan_database
from unblob.logging import configure_logger
Expand Down Expand Up @@ -217,3 +219,17 @@ def start(self, s):
rv.write(line.data)

return rv.getvalue()


def is_sandbox_available():
is_sandbox_available = True

try:
restrict_access(AccessFS.read_write("/"))
except SandboxError:
is_sandbox_available = False

if platform.architecture == "x86_64" and platform.system == "linux":
assert is_sandbox_available, "Sandboxing should work at least on Linux-x86_64"

return is_sandbox_available

0 comments on commit 098e9dd

Please sign in to comment.