Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,7 @@ Multi-file upload with automatic document conversion:
- Rejects directory inputs before copying so uploads stay all-or-nothing
- Reuses one conversion worker per request when called from an active event loop
- Files stored in thread-isolated directories
- Duplicate filenames in a single upload request are auto-renamed with `_N` suffixes so later files do not truncate earlier files
- Agent receives uploaded file list via `UploadsMiddleware`

See [docs/FILE_UPLOAD.md](docs/FILE_UPLOAD.md) for details.
Expand Down
2 changes: 1 addition & 1 deletion backend/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ FastAPI application providing REST endpoints for frontend integration:
| `POST /api/memory/reload` | Force memory reload |
| `GET /api/memory/config` | Memory configuration |
| `GET /api/memory/status` | Combined config + data |
| `POST /api/threads/{id}/uploads` | Upload files (auto-converts PDF/PPT/Excel/Word to Markdown, rejects directory paths) |
| `POST /api/threads/{id}/uploads` | Upload files (auto-converts PDF/PPT/Excel/Word to Markdown, rejects directory paths, auto-renames duplicate filenames in one request) |
| `GET /api/threads/{id}/uploads/list` | List uploaded files |
| `DELETE /api/threads/{id}` | Delete DeerFlow-managed local thread data after LangGraph thread deletion; unexpected failures are logged server-side and return a generic 500 detail |
| `GET /api/threads/{id}/artifacts/{path}` | Serve generated artifacts |
Expand Down
10 changes: 9 additions & 1 deletion backend/app/gateway/routers/uploads.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from deerflow.uploads.manager import (
PathTraversalError,
UnsafeUploadPathError,
claim_unique_filename,
delete_file_safe,
enrich_file_listing,
ensure_uploads_dir,
Expand Down Expand Up @@ -192,6 +193,10 @@ async def upload_files(
sandbox_sync_targets = []
skipped_files = []
total_size = 0
# Track filenames within this request so duplicate form parts do not
# silently truncate each other. Existing uploads keep the historical
# overwrite behavior for a single replacement upload.
seen_filenames: set[str] = set()

sandbox_provider = get_sandbox_provider()
sync_to_sandbox = not _uses_thread_data_mounts(sandbox_provider)
Expand All @@ -208,7 +213,8 @@ async def upload_files(
continue

try:
safe_filename = normalize_filename(file.filename)
original_filename = normalize_filename(file.filename)
safe_filename = claim_unique_filename(original_filename, seen_filenames)
except ValueError:
logger.warning(f"Skipping file with unsafe filename: {file.filename!r}")
continue
Expand Down Expand Up @@ -236,6 +242,8 @@ async def upload_files(
"virtual_path": virtual_path,
"artifact_url": upload_artifact_url(thread_id, safe_filename),
}
if safe_filename != original_filename:
file_info["original_filename"] = original_filename

logger.info(f"Saved file: {safe_filename} ({file_size} bytes) to {file_info['path']}")

Expand Down
33 changes: 33 additions & 0 deletions backend/tests/test_uploads_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,39 @@ def test_upload_files_writes_thread_storage_and_skips_local_sandbox_sync(tmp_pat
sandbox.update_file.assert_not_called()


def test_upload_files_auto_renames_duplicate_form_filenames(tmp_path):
thread_uploads_dir = tmp_path / "uploads"
thread_uploads_dir.mkdir(parents=True)

provider = MagicMock()
provider.uses_thread_data_mounts = True

with (
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
patch.object(uploads, "get_sandbox_provider", return_value=provider),
):
Comment on lines +71 to +75
result = asyncio.run(
call_unwrapped(
uploads.upload_files,
"thread-local",
request=MagicMock(),
files=[
UploadFile(filename="data.txt", file=BytesIO(b"first")),
UploadFile(filename="data.txt", file=BytesIO(b"second")),
],
config=SimpleNamespace(),
)
)

assert result.success is True
assert [file_info["filename"] for file_info in result.files] == ["data.txt", "data_1.txt"]
assert "original_filename" not in result.files[0]
assert result.files[1]["original_filename"] == "data.txt"
assert (thread_uploads_dir / "data.txt").read_bytes() == b"first"
assert (thread_uploads_dir / "data_1.txt").read_bytes() == b"second"


def test_upload_files_skips_acquire_when_thread_data_is_mounted(tmp_path):
thread_uploads_dir = tmp_path / "uploads"
thread_uploads_dir.mkdir(parents=True)
Expand Down
Loading