From 18fb699e9d57f3c1ce12b80d5f95c9ca7d9b86c5 Mon Sep 17 00:00:00 2001 From: yangyufan <936930947@qq.com> Date: Fri, 8 May 2026 21:45:03 +0800 Subject: [PATCH 1/2] fix(uploads): add Windows support for safe symlink-protected uploads --- .../harness/deerflow/uploads/manager.py | 83 ++++++++++++++----- 1 file changed, 62 insertions(+), 21 deletions(-) diff --git a/backend/packages/harness/deerflow/uploads/manager.py b/backend/packages/harness/deerflow/uploads/manager.py index 1a1b63f09b..8a2f417b91 100644 --- a/backend/packages/harness/deerflow/uploads/manager.py +++ b/backend/packages/harness/deerflow/uploads/manager.py @@ -121,40 +121,81 @@ def open_upload_file_no_symlink(base_dir: Path, filename: str) -> tuple[Path, ob Upload directories may be mounted into local sandboxes. A sandbox process can therefore leave a symlink at a future upload filename. Normal ``Path.write_bytes`` follows that link and can overwrite files outside the uploads directory with - gateway privileges. This helper rejects symlink destinations and uses - ``O_NOFOLLOW`` where available so the final path component cannot be raced into - a symlink between validation and open. + gateway privileges. This helper rejects symlink destinations using ``O_NOFOLLOW`` + on POSIX; on Windows it uses ``os.lstat`` to pre-check and ``os.open`` without + ``O_NOFOLLOW`` (Windows lacks this flag), relying on the pre-check to catch + existing symlinks and path-traversal validation to prevent escapes. """ - safe_name = normalize_filename(filename) - dest = base_dir / safe_name + safe_name = normalize_filename(filename) # 去除路径遍历字符,得到安全的文件名 + dest = base_dir / safe_name # 拼接到上传目录,得到完整目标路径 try: - st = os.lstat(dest) + st = os.lstat(dest) # 获取文件属性(不跟随 symlink) except FileNotFoundError: - st = None + st = None # 文件不存在,st 为 None,后续直接创建 - if st is not None and not stat.S_ISREG(st.st_mode): + if st is not None and not stat.S_ISREG(st.st_mode): # S_ISREG = 普通文件(regular file) raise UnsafeUploadPathError(f"Upload destination is not a regular file: {safe_name}") validate_path_traversal(dest, base_dir) - if not hasattr(os, "O_NOFOLLOW"): - raise UnsafeUploadPathError("Upload writes require O_NOFOLLOW support") + has_nofollow = hasattr(os, "O_NOFOLLOW") + + # 原逻辑:利用 O_NOFOLLOW 在 open() 时拒绝 symlink + if has_nofollow: + flags = os.O_WRONLY | os.O_CREAT | os.O_NOFOLLOW + if hasattr(os, "O_NONBLOCK"): + flags |= os.O_NONBLOCK + + try: + fd = os.open(dest, flags, 0o600) + except OSError as exc: + if exc.errno in {errno.ELOOP, errno.EISDIR, errno.ENOTDIR, errno.ENXIO, errno.EAGAIN}: + raise UnsafeUploadPathError(f"Unsafe upload destination: {safe_name}") from exc + raise + + try: + opened_stat = os.fstat(fd) + if not stat.S_ISREG(opened_stat.st_mode) or opened_stat.st_nlink != 1: + raise UnsafeUploadPathError(f"Upload destination is not an exclusive regular file: {safe_name}") + + os.ftruncate(fd, 0) + + fh = os.fdopen(fd, "wb") + fd = -1 + finally: + if fd >= 0: + os.close(fd) + return dest, fh + + # Windows 分支:系统没有 O_NOFOLLOW,改用"双重 lstat 检查 + 事后 fstat 检查" + if st is not None and st.st_nlink > 1: # 硬链接数量 > 1 → 拒绝 + raise UnsafeUploadPathError(f"Upload destination has multiple links: {safe_name}") + + flags = os.O_WRONLY | os.O_CREAT + + # Windows 默认文本模式会做 \n ↔ \r\n 自动转换,破坏二进制文件 + # O_BINARY 告诉系统不要做任何换行符转换 + if hasattr(os, "O_BINARY"): + flags |= os.O_BINARY + try: + pre_open_st = os.lstat(dest) + except FileNotFoundError: + pre_open_st = None - flags = os.O_WRONLY | os.O_CREAT | os.O_NOFOLLOW - if hasattr(os, "O_NONBLOCK"): - flags |= os.O_NONBLOCK + # 如果文件存在,必须是普通文件(不是 symlink/目录/设备) + if pre_open_st is not None and not stat.S_ISREG(pre_open_st.st_mode): + raise UnsafeUploadPathError(f"Upload destination is not a regular file: {safe_name}") + # 硬链接数量 > 1 → 拒绝(truncate 会影响其他文件名) + if pre_open_st is not None and pre_open_st.st_nlink > 1: + raise UnsafeUploadPathError(f"Upload destination has multiple links: {safe_name}") - try: - fd = os.open(dest, flags, 0o600) - except OSError as exc: - if exc.errno in {errno.ELOOP, errno.EISDIR, errno.ENOTDIR, errno.ENXIO, errno.EAGAIN}: - raise UnsafeUploadPathError(f"Unsafe upload destination: {safe_name}") from exc - raise + fd = os.open(dest, flags, 0o600) # 以 0o600 权限打开/创建文件 + # 这就是用户建议的"先打开、再检查、再清空"流程! try: - opened_stat = os.fstat(fd) - if not stat.S_ISREG(opened_stat.st_mode) or opened_stat.st_nlink != 1: + opened_stat = os.fstat(fd) # 通过 fd 获取打开后的真实 inode 属性 + if not stat.S_ISREG(opened_stat.st_mode) or opened_stat.st_nlink > 1: raise UnsafeUploadPathError(f"Upload destination is not an exclusive regular file: {safe_name}") os.ftruncate(fd, 0) fh = os.fdopen(fd, "wb") From 353e919180e2ea8c6476a090e5b9e230bc307eb5 Mon Sep 17 00:00:00 2001 From: yangyufan <936930947@qq.com> Date: Sat, 9 May 2026 16:52:13 +0800 Subject: [PATCH 2/2] fix(uploads): update tests and translate comments; --- .../harness/deerflow/uploads/manager.py | 45 ++++++++++--------- backend/tests/test_uploads_manager.py | 15 ++++--- 2 files changed, 34 insertions(+), 26 deletions(-) diff --git a/backend/packages/harness/deerflow/uploads/manager.py b/backend/packages/harness/deerflow/uploads/manager.py index 8a2f417b91..d8ce5527c0 100644 --- a/backend/packages/harness/deerflow/uploads/manager.py +++ b/backend/packages/harness/deerflow/uploads/manager.py @@ -122,27 +122,28 @@ def open_upload_file_no_symlink(base_dir: Path, filename: str) -> tuple[Path, ob therefore leave a symlink at a future upload filename. Normal ``Path.write_bytes`` follows that link and can overwrite files outside the uploads directory with gateway privileges. This helper rejects symlink destinations using ``O_NOFOLLOW`` - on POSIX; on Windows it uses ``os.lstat`` to pre-check and ``os.open`` without - ``O_NOFOLLOW`` (Windows lacks this flag), relying on the pre-check to catch - existing symlinks and path-traversal validation to prevent escapes. + on POSIX. On Windows (which lacks ``O_NOFOLLOW``), it uses dual ``lstat`` checks + and ``fstat`` validation after ``open()`` to reduce the TOCTOU window; this does + not eliminate all races but makes exploitation significantly harder. Path-traversal + validation prevents escapes from *base_dir* in both cases. """ - safe_name = normalize_filename(filename) # 去除路径遍历字符,得到安全的文件名 - dest = base_dir / safe_name # 拼接到上传目录,得到完整目标路径 + safe_name = normalize_filename(filename) + dest = base_dir / safe_name try: - st = os.lstat(dest) # 获取文件属性(不跟随 symlink) + st = os.lstat(dest) except FileNotFoundError: - st = None # 文件不存在,st 为 None,后续直接创建 + st = None - if st is not None and not stat.S_ISREG(st.st_mode): # S_ISREG = 普通文件(regular file) + if st is not None and not stat.S_ISREG(st.st_mode): raise UnsafeUploadPathError(f"Upload destination is not a regular file: {safe_name}") validate_path_traversal(dest, base_dir) has_nofollow = hasattr(os, "O_NOFOLLOW") - # 原逻辑:利用 O_NOFOLLOW 在 open() 时拒绝 symlink if has_nofollow: + # POSIX: O_NOFOLLOW makes open() fail with ELOOP if dest is a symlink. flags = os.O_WRONLY | os.O_CREAT | os.O_NOFOLLOW if hasattr(os, "O_NONBLOCK"): flags |= os.O_NONBLOCK @@ -158,9 +159,7 @@ def open_upload_file_no_symlink(base_dir: Path, filename: str) -> tuple[Path, ob opened_stat = os.fstat(fd) if not stat.S_ISREG(opened_stat.st_mode) or opened_stat.st_nlink != 1: raise UnsafeUploadPathError(f"Upload destination is not an exclusive regular file: {safe_name}") - os.ftruncate(fd, 0) - fh = os.fdopen(fd, "wb") fd = -1 finally: @@ -168,33 +167,37 @@ def open_upload_file_no_symlink(base_dir: Path, filename: str) -> tuple[Path, ob os.close(fd) return dest, fh - # Windows 分支:系统没有 O_NOFOLLOW,改用"双重 lstat 检查 + 事后 fstat 检查" - if st is not None and st.st_nlink > 1: # 硬链接数量 > 1 → 拒绝 + # Windows: no O_NOFOLLOW available. Uses a second lstat immediately before open() + # to narrow the TOCTOU window, then fstat after open() as a further defence. + # Note: a narrow race window remains between the pre-open lstat and open(); the + # path-traversal check mitigates escapes from base_dir but cannot prevent an + # attacker who can atomically replace dest with a symlink after the check. + if st is not None and st.st_nlink > 1: raise UnsafeUploadPathError(f"Upload destination has multiple links: {safe_name}") flags = os.O_WRONLY | os.O_CREAT - - # Windows 默认文本模式会做 \n ↔ \r\n 自动转换,破坏二进制文件 - # O_BINARY 告诉系统不要做任何换行符转换 if hasattr(os, "O_BINARY"): flags |= os.O_BINARY + try: pre_open_st = os.lstat(dest) except FileNotFoundError: pre_open_st = None - # 如果文件存在,必须是普通文件(不是 symlink/目录/设备) if pre_open_st is not None and not stat.S_ISREG(pre_open_st.st_mode): raise UnsafeUploadPathError(f"Upload destination is not a regular file: {safe_name}") - # 硬链接数量 > 1 → 拒绝(truncate 会影响其他文件名) if pre_open_st is not None and pre_open_st.st_nlink > 1: raise UnsafeUploadPathError(f"Upload destination has multiple links: {safe_name}") - fd = os.open(dest, flags, 0o600) # 以 0o600 权限打开/创建文件 + try: + fd = os.open(dest, flags, 0o600) + except OSError as exc: + if exc.errno in {errno.EISDIR, errno.ENOTDIR, errno.ENXIO, errno.EAGAIN}: + raise UnsafeUploadPathError(f"Unsafe upload destination: {safe_name}") from exc + raise - # 这就是用户建议的"先打开、再检查、再清空"流程! try: - opened_stat = os.fstat(fd) # 通过 fd 获取打开后的真实 inode 属性 + opened_stat = os.fstat(fd) if not stat.S_ISREG(opened_stat.st_mode) or opened_stat.st_nlink > 1: raise UnsafeUploadPathError(f"Upload destination is not an exclusive regular file: {safe_name}") os.ftruncate(fd, 0) diff --git a/backend/tests/test_uploads_manager.py b/backend/tests/test_uploads_manager.py index 2cf1ae7fb9..733e9e4020 100644 --- a/backend/tests/test_uploads_manager.py +++ b/backend/tests/test_uploads_manager.py @@ -126,15 +126,18 @@ def test_overwrites_existing_regular_file_with_single_link(self, tmp_path): assert dest.read_bytes() == b"new contents" assert os.stat(dest).st_nlink == 1 - def test_fails_closed_without_no_follow_support(self, tmp_path, monkeypatch): + def test_fallback_without_no_follow_support_succeeds(self, tmp_path, monkeypatch): monkeypatch.delattr(os, "O_NOFOLLOW", raising=False) - with pytest.raises(UnsafeUploadPathError, match="O_NOFOLLOW"): - write_upload_file_no_symlink(tmp_path, "notes.txt", b"hello") - - assert not (tmp_path / "notes.txt").exists() + # When O_NOFOLLOW is absent (Windows), the function falls back to + # a dual-lstat + fstat approach and succeeds. + result = write_upload_file_no_symlink(tmp_path, "notes.txt", b"hello") + assert result == tmp_path / "notes.txt" + assert (tmp_path / "notes.txt").read_bytes() == b"hello" def test_open_uses_nonblocking_flag_when_available(self, tmp_path): + if not hasattr(os, "O_NONBLOCK"): + pytest.skip("O_NONBLOCK not available on this platform") with patch("deerflow.uploads.manager.os.open", side_effect=OSError(errno.ENXIO, "no reader")) as open_mock: with pytest.raises(UnsafeUploadPathError, match="Unsafe upload destination"): write_upload_file_no_symlink(tmp_path, "pipe.txt", b"hello") @@ -144,6 +147,8 @@ def test_open_uses_nonblocking_flag_when_available(self, tmp_path): @pytest.mark.parametrize("open_errno", [errno.ENXIO, errno.EAGAIN]) def test_nonblocking_special_file_open_errors_are_unsafe(self, tmp_path, open_errno): + if not hasattr(os, "O_NONBLOCK"): + pytest.skip("O_NONBLOCK not available on this platform") with patch("deerflow.uploads.manager.os.open", side_effect=OSError(open_errno, "would block")): with pytest.raises(UnsafeUploadPathError, match="Unsafe upload destination"): write_upload_file_no_symlink(tmp_path, "pipe.txt", b"hello")