Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions services/storage/src/simcore_service_storage/simcore_s3_dsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
expand_directory,
get_accessible_project_ids,
get_directory_file_id,
is_nested_level_file_id,
list_child_paths_from_repository,
list_child_paths_from_s3,
)
Expand All @@ -104,6 +105,7 @@
_NO_CONCURRENCY: Final[int] = 1
_MAX_PARALLEL_S3_CALLS: Final[NonNegativeInt] = 10


_logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -731,15 +733,18 @@ async def delete_file(
connection=connection, file_ids=[file_id]
)

if parent_dir_fmds := await file_meta_data_repo.list_filter_with_partial_file_id(
connection=connection,
user_or_project_filter=UserOrProjectFilter(
user_id=user_id, project_ids=[]
),
file_id_prefix=compute_file_id_prefix(file_id, 2),
partial_file_id=None,
is_directory=True,
sha256_checksum=None,
# NOTE: if the file was at root level, we do not have to update the parent (not tracked in the DB)
if is_nested_level_file_id(file_id) and (
parent_dir_fmds := await file_meta_data_repo.list_filter_with_partial_file_id(
connection=connection,
user_or_project_filter=UserOrProjectFilter(
user_id=user_id, project_ids=[]
),
file_id_prefix=compute_file_id_prefix(file_id, 2),
partial_file_id=None,
is_directory=True,
sha256_checksum=None,
)
):
parent_dir_fmd = max(
parent_dir_fmds, key=lambda fmd: len(fmd.file_id)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import logging
from contextlib import suppress
from pathlib import Path
from typing import TypeAlias
from typing import Final, TypeAlias
from uuid import uuid4

from aws_library.s3 import S3MetaData, SimcoreS3API
from aws_library.s3._constants import STREAM_READER_CHUNK_SIZE
from aws_library.s3._constants import S3_OBJECT_DELIMITER, STREAM_READER_CHUNK_SIZE
from aws_library.s3._models import S3ObjectKey
from common_library.json_serialization import json_dumps, json_loads
from models_library.api_schemas_storage.storage_schemas import S3BucketName
Expand Down Expand Up @@ -34,6 +34,8 @@

_logger = logging.getLogger(__name__)

ROOT_FILE_ID_LEVELS: Final[int] = 3


async def _list_all_files_in_folder(
*,
Expand Down Expand Up @@ -140,8 +142,17 @@ async def get_directory_file_id(


def compute_file_id_prefix(file_id: str, levels: int):
components = file_id.strip("/").split("/")
return "/".join(components[:levels])
components = file_id.strip(S3_OBJECT_DELIMITER).split(S3_OBJECT_DELIMITER)
return S3_OBJECT_DELIMITER.join(components[:levels])


def get_file_id_level(file_id: str) -> int:
components = file_id.strip(S3_OBJECT_DELIMITER).split(S3_OBJECT_DELIMITER)
return len(components)


def is_nested_level_file_id(file_id: str) -> bool:
return get_file_id_level(file_id) > ROOT_FILE_ID_LEVELS


def create_random_export_name(user_id: UserID) -> StorageFileID:
Expand Down
7 changes: 7 additions & 0 deletions services/storage/tests/unit/test_simcore_s3_dsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,7 @@ async def test_search_directories(
# Create directories with different naming patterns
test_directories = [
("test_dir_1", 3, 2), # directory name, subdir_count, file_count
("test_dir_1/subdir_a", 1, 1),
("test_dir_2", 2, 3),
("data_folder", 1, 2),
("backup_directory", 2, 1),
Expand Down Expand Up @@ -629,3 +630,9 @@ async def test_search_directories(
file_names_only = {f.file_name for f in backup_results if not f.is_directory}
assert "backup_directory" in directory_names
assert "backup_config.json" in file_names_only

# Test 6: Search for subdirectories
subdir_results = await _search_files_by_pattern(
simcore_s3_dsm, user_id, "*subdir_*", project_id
)
assert len(subdir_results) == 1 # Only subdir_a
100 changes: 100 additions & 0 deletions services/storage/tests/unit/test_simcore_s3_dsm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
_replace_node_id_project_id_in_path,
compute_file_id_prefix,
ensure_user_selection_from_same_base_directory,
get_file_id_level,
is_nested_level_file_id,
)


Expand Down Expand Up @@ -129,3 +131,101 @@ def test_ensure_user_selection_from_same_base_directory(
)
def test__replace_node_id_project_id_in_path(path: str, expected: str):
assert _replace_node_id_project_id_in_path(_IDS_NAMES_MAP, path) == expected


@pytest.mark.parametrize(
"file_id, expected_level",
[
# Empty and root cases
("", 1), # Empty string splits to [""] which has length 1
("/", 1), # Stripped to "" then split to [""] which has length 1
("//", 1), # Stripped to "" then split to [""] which has length 1
# Single component
("project_id", 1),
("api", 1),
# Two components (root level)
("project_id/node_id", 2),
("api/node_id", 2),
(
"b21a3b80-d578-4b33-a224-e24ee2e4966a/42b9cc07-60f5-4d29-a063-176d1467901c",
2,
),
# Three components
("project_id/node_id/file.txt", 3),
("api/node_id/folder", 3),
# Multiple components (deep nesting)
("project_id/node_id/folder/subfolder", 4),
("project_id/node_id/folder/subfolder/file.txt", 5),
(
"b21a3b80-d578-4b33-a224-e24ee2e4966a/42b9cc07-60f5-4d29-a063-176d1467901c/my/amazing/sub/folder/with/a/file.bin",
9,
),
(
"api/42b9cc07-60f5-4d29-a063-176d1467901c/my/amazing/sub/folder/with/a/file.bin",
9,
),
# With leading/trailing slashes (should be stripped)
("/project_id/node_id/", 2),
("//project_id/node_id//", 2),
("/project_id/node_id/file.txt/", 3),
# Edge cases with multiple consecutive slashes
("project_id//node_id", 3), # Splits to ["project_id", "", "node_id"]
("project_id///node_id", 4), # Splits to ["project_id", "", "", "node_id"]
(
"project_id/node_id//file.txt",
4,
), # Splits to ["project_id", "node_id", "", "file.txt"]
],
)
def test_get_file_id_level(file_id: str, expected_level: int):
assert get_file_id_level(file_id) == expected_level


@pytest.mark.parametrize(
"file_id, expected_is_nested",
[
# ROOT_FILE_ID_LEVELS = 3, so nested files have > 3 levels
# Not nested (levels <= 3)
("", False), # Level 1
("/", False), # Level 1
("project_id", False), # Level 1
("project_id/node_id", False), # Level 2
("api/node_id", False), # Level 2
("project_id/node_id/file.txt", False), # Level 3 (exactly ROOT_FILE_ID_LEVELS)
("api/node_id/folder", False), # Level 3
(
"b21a3b80-d578-4b33-a224-e24ee2e4966a/42b9cc07-60f5-4d29-a063-176d1467901c/file.txt",
False,
), # Level 3
("//project_id/node_id/folder//", False), # Level 3 after stripping
# Nested (levels > 3)
("project_id/node_id/folder/file.txt", True), # Level 4
("project_id/node_id/folder/subfolder", True), # Level 4
("project_id/node_id/folder/subfolder/file.txt", True), # Level 5
("api/node_id/nested/folder/file.txt", True), # Level 5
(
"b21a3b80-d578-4b33-a224-e24ee2e4966a/42b9cc07-60f5-4d29-a063-176d1467901c/my/amazing/sub/folder/with/a/file.bin",
True,
), # Level 9
(
"api/42b9cc07-60f5-4d29-a063-176d1467901c/my/amazing/sub/folder/with/a/file.bin",
True,
), # Level 9
# With leading/trailing slashes
("/project_id/node_id/folder/file.txt/", True), # Level 4 after stripping
# Edge cases with multiple consecutive slashes
(
"project_id//node_id//file.txt",
True,
), # Level 4: ["project_id", "", "node_id", "", "file.txt"]
(
"project_id/node_id//folder/file.txt",
True,
), # Level 5: ["project_id", "node_id", "", "folder", "file.txt"]
# Boundary cases (exactly at the threshold)
("project_id/node_id/exactly_three_levels", False), # Level 3
("project_id/node_id/four/levels", True), # Level 4
],
)
def test_is_nested_level_file_id(file_id: str, expected_is_nested: bool):
assert is_nested_level_file_id(file_id) == expected_is_nested
Loading