diff --git a/packages/aws-library/requirements/_base.txt b/packages/aws-library/requirements/_base.txt index 325529a68118..0102e5c679e0 100644 --- a/packages/aws-library/requirements/_base.txt +++ b/packages/aws-library/requirements/_base.txt @@ -244,6 +244,8 @@ protobuf==5.29.3 # opentelemetry-proto psutil==6.1.1 # via -r requirements/../../../packages/service-library/requirements/_base.in +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.6 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -368,6 +370,8 @@ six==1.17.0 # via python-dateutil sniffio==1.3.1 # via anyio +stream-zip==0.0.83 + # via -r requirements/../../../packages/service-library/requirements/_base.in tenacity==9.0.0 # via -r requirements/../../../packages/service-library/requirements/_base.in toolz==1.0.0 diff --git a/packages/aws-library/src/aws_library/s3/_client.py b/packages/aws-library/src/aws_library/s3/_client.py index 29f639b90a58..ec504df74444 100644 --- a/packages/aws-library/src/aws_library/s3/_client.py +++ b/packages/aws-library/src/aws_library/s3/_client.py @@ -14,9 +14,12 @@ from botocore import exceptions as botocore_exc from botocore.client import Config from models_library.basic_types import SHA256Str +from models_library.bytes_iters import BytesIter, DataSize from models_library.storage_schemas import ETag, S3BucketName, UploadedPart from pydantic import AnyUrl, ByteSize, TypeAdapter +from servicelib.bytes_iters import DEFAULT_READ_CHUNK_SIZE, BytesStreamer from servicelib.logging_utils import log_catch, log_context +from servicelib.s3_utils import FileLikeReader from servicelib.utils import limited_gather from settings_library.s3 import S3Settings from types_aiobotocore_s3 import S3Client @@ -470,6 +473,57 @@ async def copy_objects_recursively( limit=_MAX_CONCURRENT_COPY, ) + async def get_bytes_streamer_from_object( + self, + bucket_name: S3BucketName, + object_key: S3ObjectKey, + *, + chunk_size: int = DEFAULT_READ_CHUNK_SIZE, + ) -> BytesStreamer: + """stream read an object from S3 chunk by chunk""" + + # NOTE `download_fileobj` cannot be used to implement this because + # it will buffer the entire file in memory instead of reading it + # chunk by chunk + + # below is a quick call + head_response = await self._client.head_object( + Bucket=bucket_name, Key=object_key + ) + data_size = DataSize(head_response["ContentLength"]) + + async def _() -> BytesIter: + # Download the file in chunks + position = 0 + while position < data_size: + # Calculate the range for this chunk + end = min(position + chunk_size - 1, data_size - 1) + range_header = f"bytes={position}-{end}" + + # Download the chunk + response = await self._client.get_object( + Bucket=bucket_name, Key=object_key, Range=range_header + ) + + chunk = await response["Body"].read() + + # Yield the chunk for processing + yield chunk + + position += chunk_size + + return BytesStreamer(data_size, _) + + @s3_exception_handler(_logger) + async def upload_object_from_file_like( + self, + bucket_name: S3BucketName, + object_key: S3ObjectKey, + file_like_reader: FileLikeReader, + ) -> None: + """streams write an object in S3 from an AsyncIterable[bytes]""" + await self._client.upload_fileobj(file_like_reader, bucket_name, object_key) # type: ignore[arg-type] + @staticmethod def is_multipart(file_size: ByteSize) -> bool: return file_size >= MULTIPART_UPLOADS_MIN_TOTAL_SIZE diff --git a/packages/aws-library/tests/test_s3_client.py b/packages/aws-library/tests/test_s3_client.py index d3e585aa5f7a..cc5957d2ab68 100644 --- a/packages/aws-library/tests/test_s3_client.py +++ b/packages/aws-library/tests/test_s3_client.py @@ -1,26 +1,35 @@ -# pylint:disable=unused-variable -# pylint:disable=unused-argument +# pylint:disable=contextmanager-generator-missing-cleanup +# pylint:disable=no-name-in-module +# pylint:disable=protected-access # pylint:disable=redefined-outer-name # pylint:disable=too-many-arguments -# pylint:disable=protected-access -# pylint:disable=no-name-in-module +# pylint:disable=unused-argument +# pylint:disable=unused-variable import asyncio import filecmp import json import logging +import random +import time from collections import defaultdict -from collections.abc import AsyncIterator, Awaitable, Callable +from collections.abc import AsyncIterator, Awaitable, Callable, Iterator +from contextlib import contextmanager from dataclasses import dataclass from pathlib import Path from typing import Any +from unittest.mock import AsyncMock, Mock +import aiofiles import botocore.exceptions import pytest from aiohttp import ClientSession from aws_library.s3._client import S3ObjectKey, SimcoreS3API -from aws_library.s3._constants import MULTIPART_UPLOADS_MIN_TOTAL_SIZE +from aws_library.s3._constants import ( + MULTIPART_COPY_THRESHOLD, + MULTIPART_UPLOADS_MIN_TOTAL_SIZE, +) from aws_library.s3._errors import ( S3BucketInvalidError, S3DestinationNotEmptyError, @@ -34,6 +43,12 @@ from moto.server import ThreadedMotoServer from pydantic import AnyUrl, ByteSize, TypeAdapter from pytest_benchmark.plugin import BenchmarkFixture +from pytest_mock import MockerFixture +from pytest_simcore.helpers.comparing import ( + assert_same_contents, + assert_same_file_content, + get_files_info_from_path, +) from pytest_simcore.helpers.logging_tools import log_context from pytest_simcore.helpers.parametrizations import ( byte_size_ids, @@ -44,7 +59,13 @@ upload_file_to_presigned_link, ) from pytest_simcore.helpers.typing_env import EnvVarsDict -from servicelib.utils import limited_as_completed +from servicelib.archiving_utils import unarchive_dir +from servicelib.bytes_iters import ArchiveEntries, DiskStreamReader, get_zip_bytes_iter +from servicelib.bytes_iters._models import DataSize +from servicelib.file_utils import remove_directory +from servicelib.progress_bar import ProgressBarData +from servicelib.s3_utils import FileLikeBytesIterReader +from servicelib.utils import limited_as_completed, limited_gather from settings_library.s3 import S3Settings from types_aiobotocore_s3 import S3Client from types_aiobotocore_s3.literals import BucketLocationConstraintType @@ -346,7 +367,7 @@ def set_log_levels_for_noisy_libraries() -> None: @pytest.fixture async def with_uploaded_folder_on_s3( create_folder_of_size_with_multiple_files: Callable[ - [ByteSize, ByteSize, ByteSize], Path + [ByteSize, ByteSize, ByteSize, Path | None], Path ], upload_file: Callable[[Path, Path], Awaitable[UploadedFile]], directory_size: ByteSize, @@ -355,7 +376,7 @@ async def with_uploaded_folder_on_s3( ) -> list[UploadedFile]: # create random files of random size and upload to S3 folder = create_folder_of_size_with_multiple_files( - ByteSize(directory_size), ByteSize(min_file_size), ByteSize(max_file_size) + ByteSize(directory_size), ByteSize(min_file_size), ByteSize(max_file_size), None ) list_uploaded_files = [] @@ -1375,3 +1396,245 @@ def run_async_test(dst_folder: str) -> None: ) benchmark.pedantic(run_async_test, setup=dst_folder_setup, rounds=4) + + +async def test_read_from_bytes_streamer( + mocked_s3_server_envs: EnvVarsDict, + with_uploaded_file_on_s3: UploadedFile, + simcore_s3_api: SimcoreS3API, + with_s3_bucket: S3BucketName, + fake_file_name: Path, +): + async with aiofiles.open(fake_file_name, "wb") as f: + bytes_streamer = await simcore_s3_api.get_bytes_streamer_from_object( + with_s3_bucket, with_uploaded_file_on_s3.s3_key, chunk_size=1024 + ) + assert isinstance(bytes_streamer.data_size, DataSize) + async for chunk in bytes_streamer.with_progress_bytes_iter(AsyncMock()): + await f.write(chunk) + + assert bytes_streamer.data_size == fake_file_name.stat().st_size + + await assert_same_file_content(with_uploaded_file_on_s3.local_path, fake_file_name) + + +@pytest.mark.parametrize("upload_from_s3", [True, False]) +async def test_upload_object_from_file_like( + mocked_s3_server_envs: EnvVarsDict, + with_uploaded_file_on_s3: UploadedFile, + simcore_s3_api: SimcoreS3API, + with_s3_bucket: S3BucketName, + upload_from_s3: bool, +): + object_key = "read_from_s3_write_to_s3" + + if upload_from_s3: + bytes_streamer = await simcore_s3_api.get_bytes_streamer_from_object( + with_s3_bucket, with_uploaded_file_on_s3.s3_key + ) + assert isinstance(bytes_streamer.data_size, DataSize) + await simcore_s3_api.upload_object_from_file_like( + with_s3_bucket, + object_key, + FileLikeBytesIterReader( + bytes_streamer.with_progress_bytes_iter(AsyncMock()) + ), + ) + else: + await simcore_s3_api.upload_object_from_file_like( + with_s3_bucket, + object_key, + FileLikeBytesIterReader( + DiskStreamReader(with_uploaded_file_on_s3.local_path) + .get_bytes_streamer() + .bytes_iter_callable() + ), + ) + + await simcore_s3_api.delete_object(bucket=with_s3_bucket, object_key=object_key) + + +@contextmanager +def _folder_with_files( + create_folder_of_size_with_multiple_files: Callable[ + [ByteSize, ByteSize, ByteSize, Path | None], Path + ], + target_folder: Path, +) -> Iterator[dict[str, Path]]: + target_folder.mkdir(parents=True, exist_ok=True) + folder_path = create_folder_of_size_with_multiple_files( + TypeAdapter(ByteSize).validate_python("10MiB"), + TypeAdapter(ByteSize).validate_python("10KiB"), + TypeAdapter(ByteSize).validate_python("100KiB"), + target_folder, + ) + + relative_names_to_paths = get_files_info_from_path(folder_path) + + yield relative_names_to_paths + + for file in relative_names_to_paths.values(): + file.unlink() + + +@pytest.fixture +def path_local_files_for_archive( + tmp_path: Path, + create_folder_of_size_with_multiple_files: Callable[ + [ByteSize, ByteSize, ByteSize, Path | None], Path + ], +) -> Iterator[Path]: + dir_path = tmp_path / "not_uploaded" + with _folder_with_files(create_folder_of_size_with_multiple_files, dir_path): + yield dir_path + + +@pytest.fixture +async def path_s3_files_for_archive( + tmp_path: Path, + create_folder_of_size_with_multiple_files: Callable[ + [ByteSize, ByteSize, ByteSize, Path | None], Path + ], + s3_client: S3Client, + with_s3_bucket: S3BucketName, +) -> AsyncIterator[Path]: + dir_path = tmp_path / "stored_in_s3" + with _folder_with_files( + create_folder_of_size_with_multiple_files, dir_path + ) as relative_names_to_paths: + await limited_gather( + *( + s3_client.upload_file( + Filename=f"{file}", Bucket=with_s3_bucket, Key=s3_object_key + ) + for s3_object_key, file in relative_names_to_paths.items() + ), + limit=10, + ) + yield dir_path + + await delete_all_object_versions( + s3_client, with_s3_bucket, relative_names_to_paths.keys() + ) + + +@pytest.fixture +def archive_download_path(tmp_path: Path, faker: Faker) -> Iterator[Path]: + path = tmp_path / f"downlaoded_ardhive_{faker.uuid4()}.zip" + yield path + if path.exists(): + path.unlink() + + +@pytest.fixture +async def extracted_archive_path(tmp_path: Path, faker: Faker) -> AsyncIterator[Path]: + path = tmp_path / f"decomrepssed_archive{faker.uuid4()}" + path.mkdir(parents=True, exist_ok=True) + assert path.is_dir() + yield path + await remove_directory(path) + assert not path.is_dir() + + +@pytest.fixture +async def archive_s3_object_key( + with_s3_bucket: S3BucketName, simcore_s3_api: SimcoreS3API +) -> AsyncIterator[S3ObjectKey]: + s3_object_key = "read_from_s3_write_to_s3" + yield s3_object_key + await simcore_s3_api.delete_object(bucket=with_s3_bucket, object_key=s3_object_key) + + +@pytest.fixture +def mocked_progress_bar_cb(mocker: MockerFixture) -> Mock: + def _progress_cb(*args, **kwargs) -> None: + print(f"received progress: {args}, {kwargs}") + + return mocker.Mock(side_effect=_progress_cb) + + +async def test_workflow_compress_s3_objects_and_local_files_in_a_single_archive_then_upload_to_s3( + mocked_s3_server_envs: EnvVarsDict, + path_local_files_for_archive: Path, + path_s3_files_for_archive: Path, + archive_download_path: Path, + extracted_archive_path: Path, + simcore_s3_api: SimcoreS3API, + with_s3_bucket: S3BucketName, + s3_client: S3Client, + archive_s3_object_key: S3ObjectKey, + mocked_progress_bar_cb: Mock, +): + # In this test: + # - files are read form disk and S3 + # - a zip archive is created on the go + # - the zip archive is streamed to S3 as soon as chunks inside it are created + # Uses no disk and constant memory for the entire opration. + + # 1. assemble and upload zip archive + + archive_entries: ArchiveEntries = [] + + local_files = get_files_info_from_path(path_local_files_for_archive) + for file_name, file_path in local_files.items(): + archive_entries.append( + ( + file_name, + DiskStreamReader(file_path).get_bytes_streamer(), + ) + ) + + s3_files = get_files_info_from_path(path_s3_files_for_archive) + + for s3_object_key in s3_files: + archive_entries.append( + ( + s3_object_key, + await simcore_s3_api.get_bytes_streamer_from_object( + with_s3_bucket, s3_object_key + ), + ) + ) + + # shuffle order of files in archive. + # some will be read from S3 and some from the disk + random.shuffle(archive_entries) + + started = time.time() + + async with ProgressBarData( + num_steps=1, + progress_report_cb=mocked_progress_bar_cb, + description="root_bar", + ) as progress_bar: + await simcore_s3_api.upload_object_from_file_like( + with_s3_bucket, + archive_s3_object_key, + FileLikeBytesIterReader( + get_zip_bytes_iter( + archive_entries, + progress_bar=progress_bar, + chunk_size=MULTIPART_COPY_THRESHOLD, + ) + ), + ) + + duration = time.time() - started + print(f"Zip created on S3 in {duration:.2f} seconds") + + # 2. download zip archive form S3 + print(f"downloading {archive_download_path}") + await s3_client.download_file( + with_s3_bucket, archive_s3_object_key, f"{archive_download_path}" + ) + + # 3. extract archive + await unarchive_dir(archive_download_path, extracted_archive_path) + + # 4. compare + print("comparing files") + all_files_in_zip = get_files_info_from_path(path_local_files_for_archive) | s3_files + + await assert_same_contents( + all_files_in_zip, get_files_info_from_path(extracted_archive_path) + ) diff --git a/packages/models-library/src/models_library/bytes_iters.py b/packages/models-library/src/models_library/bytes_iters.py new file mode 100644 index 000000000000..5ec9bb961f3e --- /dev/null +++ b/packages/models-library/src/models_library/bytes_iters.py @@ -0,0 +1,9 @@ +from collections.abc import AsyncIterable, Callable +from typing import TypeAlias + +from pydantic import ByteSize + +BytesIter: TypeAlias = AsyncIterable[bytes] + +BytesIterCallable: TypeAlias = Callable[[], BytesIter] +DataSize: TypeAlias = ByteSize diff --git a/packages/pytest-simcore/src/pytest_simcore/file_extra.py b/packages/pytest-simcore/src/pytest_simcore/file_extra.py index d2c85ee3de09..a761a571c2df 100644 --- a/packages/pytest-simcore/src/pytest_simcore/file_extra.py +++ b/packages/pytest-simcore/src/pytest_simcore/file_extra.py @@ -1,6 +1,7 @@ import logging from collections.abc import Callable from pathlib import Path +from typing import Iterable import pytest from faker import Faker @@ -8,6 +9,17 @@ from pytest_simcore.helpers.logging_tools import log_context +@pytest.fixture +def fake_file_name(tmp_path: Path, faker: Faker) -> Iterable[Path]: + file = tmp_path / faker.file_name() + + yield file + + if file.exists(): + file.unlink() + assert not file.exists() + + @pytest.fixture def create_file_of_size(tmp_path: Path, faker: Faker) -> Callable[[ByteSize], Path]: # NOTE: cleanup is done by tmp_path fixture @@ -60,11 +72,12 @@ def _create_random_content( @pytest.fixture def create_folder_of_size_with_multiple_files( tmp_path: Path, faker: Faker -) -> Callable[[ByteSize, ByteSize, ByteSize], Path]: +) -> Callable[[ByteSize, ByteSize, ByteSize, Path | None], Path]: def _create_folder_of_size_with_multiple_files( directory_size: ByteSize, file_min_size: ByteSize, file_max_size: ByteSize, + working_directory: Path | None, depth: NonNegativeInt | None = None, ) -> Path: # Helper function to create random files and directories @@ -72,17 +85,18 @@ def _create_folder_of_size_with_multiple_files( assert file_min_size <= file_max_size # Recursively create content in the temporary directory + folder_path = working_directory or tmp_path remaining_size = directory_size with log_context( logging.INFO, msg=f"creating {directory_size.human_readable()} of random files " - f"(up to {file_max_size.human_readable()}) in {tmp_path}", + f"(up to {file_max_size.human_readable()}) in {folder_path}", ) as ctx: num_files_created = 0 while remaining_size > 0: remaining_size = _create_random_content( faker, - base_dir=tmp_path, + base_dir=folder_path, file_min_size=file_min_size, file_max_size=file_max_size, remaining_size=remaining_size, @@ -90,6 +104,6 @@ def _create_folder_of_size_with_multiple_files( ) num_files_created += 1 ctx.logger.info("created %s files", num_files_created) - return tmp_path + return folder_path return _create_folder_of_size_with_multiple_files diff --git a/packages/pytest-simcore/src/pytest_simcore/helpers/comparing.py b/packages/pytest-simcore/src/pytest_simcore/helpers/comparing.py new file mode 100644 index 000000000000..31d2e9868066 --- /dev/null +++ b/packages/pytest-simcore/src/pytest_simcore/helpers/comparing.py @@ -0,0 +1,61 @@ +import asyncio +import hashlib +from concurrent.futures import ProcessPoolExecutor +from pathlib import Path +from typing import TypeAlias + +import aiofiles +from servicelib.file_utils import create_sha256_checksum + +_FilesInfo: TypeAlias = dict[str, Path] + + +def get_relative_to(folder: Path, file: Path) -> str: + return f"{file.relative_to(folder)}" + + +async def assert_same_file_content(path_1: Path, path_2: Path) -> None: + async with aiofiles.open(path_1, "rb") as f1, aiofiles.open(path_2, "rb") as f2: + checksum_1 = await create_sha256_checksum(f1) + checksum_2 = await create_sha256_checksum(f2) + assert checksum_1 == checksum_2 + + +def get_files_info_from_path(folder: Path) -> _FilesInfo: + return {get_relative_to(folder, f): f for f in folder.rglob("*") if f.is_file()} + + +def compute_hash(file_path: Path) -> tuple[Path, str]: + with Path.open(file_path, "rb") as file_to_hash: + file_hash = hashlib.md5() # noqa: S324 + chunk = file_to_hash.read(8192) + while chunk: + file_hash.update(chunk) + chunk = file_to_hash.read(8192) + + return file_path, file_hash.hexdigest() + + +async def compute_hashes(file_paths: list[Path]) -> dict[Path, str]: + """given a list of files computes hashes for the files on a process pool""" + + loop = asyncio.get_event_loop() + + with ProcessPoolExecutor() as prcess_pool_executor: + tasks = [ + loop.run_in_executor(prcess_pool_executor, compute_hash, file_path) + for file_path in file_paths + ] + # pylint: disable=unnecessary-comprehension + # see return value of _compute_hash it is a tuple, mapping list[Tuple[Path,str]] to Dict[Path, str] here + return dict(await asyncio.gather(*tasks)) + + +async def assert_same_contents(file_info1: _FilesInfo, file_info2: _FilesInfo) -> None: + assert set(file_info1.keys()) == set(file_info2.keys()) + + hashes_1 = await compute_hashes(list(file_info1.values())) + hashes_2 = await compute_hashes(list(file_info2.values())) + + for key in file_info1: + assert hashes_1[file_info1[key]] == hashes_2[file_info2[key]] diff --git a/packages/pytest-simcore/src/pytest_simcore/simcore_storage_service.py b/packages/pytest-simcore/src/pytest_simcore/simcore_storage_service.py index a14e61a1ba54..c0c6c26fbe1b 100644 --- a/packages/pytest-simcore/src/pytest_simcore/simcore_storage_service.py +++ b/packages/pytest-simcore/src/pytest_simcore/simcore_storage_service.py @@ -53,7 +53,7 @@ async def storage_service( assert storage_endpoint.host is not None assert storage_endpoint.port is not None mocker.patch( - "simcore_sdk.node_ports_common._filemanager._get_https_link_if_storage_secure", + "simcore_sdk.node_ports_common._filemanager_utils._get_https_link_if_storage_secure", replace_storage_endpoint(storage_endpoint.host, storage_endpoint.port), ) diff --git a/packages/service-library/requirements/_base.in b/packages/service-library/requirements/_base.in index 7961b0970986..027a631287e1 100644 --- a/packages/service-library/requirements/_base.in +++ b/packages/service-library/requirements/_base.in @@ -18,15 +18,16 @@ arrow # date/time faststream opentelemetry-api opentelemetry-exporter-otlp -opentelemetry-instrumentation-requests -opentelemetry-instrumentation-redis opentelemetry-instrumentation-logging +opentelemetry-instrumentation-redis +opentelemetry-instrumentation-requests opentelemetry-sdk psutil pydantic pyinstrument pyyaml redis +stream-zip tenacity toolz tqdm diff --git a/packages/service-library/requirements/_base.txt b/packages/service-library/requirements/_base.txt index e22224d901ab..14773018e1c4 100644 --- a/packages/service-library/requirements/_base.txt +++ b/packages/service-library/requirements/_base.txt @@ -173,6 +173,8 @@ protobuf==5.29.3 # opentelemetry-proto psutil==6.1.1 # via -r requirements/_base.in +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.6 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -254,6 +256,8 @@ six==1.17.0 # via python-dateutil sniffio==1.3.1 # via anyio +stream-zip==0.0.83 + # via -r requirements/_base.in tenacity==9.0.0 # via -r requirements/_base.in toolz==1.0.0 diff --git a/packages/service-library/src/servicelib/bytes_iters/__init__.py b/packages/service-library/src/servicelib/bytes_iters/__init__.py new file mode 100644 index 000000000000..9d4fb6704df5 --- /dev/null +++ b/packages/service-library/src/servicelib/bytes_iters/__init__.py @@ -0,0 +1,15 @@ +from ._constants import DEFAULT_READ_CHUNK_SIZE +from ._input import DiskStreamReader +from ._models import BytesStreamer +from ._output import DiskStreamWriter +from ._stream_zip import ArchiveEntries, ArchiveFileEntry, get_zip_bytes_iter + +__all__: tuple[str, ...] = ( + "ArchiveEntries", + "ArchiveFileEntry", + "BytesStreamer", + "DEFAULT_READ_CHUNK_SIZE", + "DiskStreamReader", + "DiskStreamWriter", + "get_zip_bytes_iter", +) diff --git a/packages/service-library/src/servicelib/bytes_iters/_constants.py b/packages/service-library/src/servicelib/bytes_iters/_constants.py new file mode 100644 index 000000000000..d7259d34b7ae --- /dev/null +++ b/packages/service-library/src/servicelib/bytes_iters/_constants.py @@ -0,0 +1,5 @@ +from typing import Final + +from pydantic import ByteSize, TypeAdapter + +DEFAULT_READ_CHUNK_SIZE: Final[int] = TypeAdapter(ByteSize).validate_python("1MiB") diff --git a/packages/service-library/src/servicelib/bytes_iters/_input.py b/packages/service-library/src/servicelib/bytes_iters/_input.py new file mode 100644 index 000000000000..becec0981fcd --- /dev/null +++ b/packages/service-library/src/servicelib/bytes_iters/_input.py @@ -0,0 +1,25 @@ +from pathlib import Path + +import aiofiles +from models_library.bytes_iters import BytesIter, DataSize + +from ._constants import DEFAULT_READ_CHUNK_SIZE +from ._models import BytesStreamer + + +class DiskStreamReader: + def __init__(self, file_path: Path, *, chunk_size=DEFAULT_READ_CHUNK_SIZE): + self.file_path = file_path + self.chunk_size = chunk_size + + def get_bytes_streamer(self) -> BytesStreamer: + async def _() -> BytesIter: + async with aiofiles.open(self.file_path, "rb") as f: + while True: + chunk = await f.read(self.chunk_size) + if not chunk: + break + + yield chunk + + return BytesStreamer(DataSize(self.file_path.stat().st_size), _) diff --git a/packages/service-library/src/servicelib/bytes_iters/_models.py b/packages/service-library/src/servicelib/bytes_iters/_models.py new file mode 100644 index 000000000000..9eeec804b5b0 --- /dev/null +++ b/packages/service-library/src/servicelib/bytes_iters/_models.py @@ -0,0 +1,18 @@ +from dataclasses import dataclass + +from models_library.bytes_iters import BytesIter, BytesIterCallable, DataSize + +from ..progress_bar import ProgressBarData + + +@dataclass(frozen=True) +class BytesStreamer: + data_size: DataSize + bytes_iter_callable: BytesIterCallable + + async def with_progress_bytes_iter( + self, progress_bar: ProgressBarData + ) -> BytesIter: + async for chunk in self.bytes_iter_callable(): + await progress_bar.update(len(chunk)) + yield chunk diff --git a/packages/service-library/src/servicelib/bytes_iters/_output.py b/packages/service-library/src/servicelib/bytes_iters/_output.py new file mode 100644 index 000000000000..9995ce4d33b5 --- /dev/null +++ b/packages/service-library/src/servicelib/bytes_iters/_output.py @@ -0,0 +1,29 @@ +from pathlib import Path + +import aiofiles +from models_library.bytes_iters import BytesIter + +from ..s3_utils import FileLikeBytesIterReader + + +class DiskStreamWriter: + def __init__(self, destination_path: Path): + self.destination_path = destination_path + + async def write_from_bytes_iter(self, stream: BytesIter) -> None: + async with aiofiles.open(self.destination_path, "wb") as f: + async for chunk in stream: + await f.write(chunk) + await f.flush() + + async def write_from_file_like( + self, file_like_reader: FileLikeBytesIterReader + ) -> None: + async with aiofiles.open(self.destination_path, "wb") as f: + while True: + chunk = await file_like_reader.read(100) + if not chunk: + break + + await f.write(chunk) + await f.flush() diff --git a/packages/service-library/src/servicelib/bytes_iters/_stream_zip.py b/packages/service-library/src/servicelib/bytes_iters/_stream_zip.py new file mode 100644 index 000000000000..3f1f89a0e49c --- /dev/null +++ b/packages/service-library/src/servicelib/bytes_iters/_stream_zip.py @@ -0,0 +1,54 @@ +from collections.abc import AsyncIterable +from datetime import UTC, datetime +from stat import S_IFREG +from typing import TypeAlias + +from models_library.bytes_iters import BytesIter, DataSize +from stream_zip import ZIP_32, AsyncMemberFile, async_stream_zip + +from ..progress_bar import ProgressBarData +from ._models import BytesStreamer + +FileNameInArchive: TypeAlias = str +ArchiveFileEntry: TypeAlias = tuple[FileNameInArchive, BytesStreamer] +ArchiveEntries: TypeAlias = list[ArchiveFileEntry] + + +async def _member_files_iter( + archive_entries: ArchiveEntries, progress_bar: ProgressBarData +) -> AsyncIterable[AsyncMemberFile]: + for file_name, byte_streamer in archive_entries: + yield ( + file_name, + datetime.now(UTC), + S_IFREG | 0o600, + ZIP_32, + byte_streamer.with_progress_bytes_iter(progress_bar=progress_bar), + ) + + +async def get_zip_bytes_iter( + archive_entries: ArchiveEntries, + *, + progress_bar: ProgressBarData | None = None, + chunk_size: int, +) -> BytesIter: + # NOTE: this is CPU bound task, even though the loop is not blocked, + # the CPU is still used for compressing the content. + if progress_bar is None: + progress_bar = ProgressBarData(num_steps=1, description="zip archive stream") + + total_stream_lenth = DataSize( + sum(bytes_streamer.data_size for _, bytes_streamer in archive_entries) + ) + description = f"files: count={len(archive_entries)}, size={total_stream_lenth.human_readable()}" + + async with progress_bar.sub_progress( + steps=total_stream_lenth, description=description, progress_unit="Byte" + ) as sub_progress: + # NOTE: do not disable compression or the streams will be + # loaded fully in memory before yielding their content + async for chunk in async_stream_zip( + _member_files_iter(archive_entries, sub_progress), chunk_size=chunk_size + ): + yield chunk diff --git a/packages/service-library/src/servicelib/s3_utils.py b/packages/service-library/src/servicelib/s3_utils.py new file mode 100644 index 000000000000..f9492af2e321 --- /dev/null +++ b/packages/service-library/src/servicelib/s3_utils.py @@ -0,0 +1,32 @@ +from typing import Protocol + +from models_library.bytes_iters import BytesIter + + +class FileLikeReader(Protocol): + """minimal interface for upload from file objects to S3""" + + async def read(self, size: int) -> bytes: + ... + + +class FileLikeBytesIterReader(FileLikeReader): + def __init__(self, bytes_iter: BytesIter): + self._bytes_iter = bytes_iter + self._buffer = bytearray() + self._async_iterator = self._get_iterator() + + async def _get_iterator(self): + async for chunk in self._bytes_iter: + yield chunk + + async def read(self, size: int) -> bytes: + while len(self._buffer) < size: + try: + chunk = await anext(self._async_iterator) + self._buffer.extend(chunk) + except StopAsyncIteration: + break # End of file + + result, self._buffer = self._buffer[:size], self._buffer[size:] + return bytes(result) diff --git a/packages/service-library/tests/archiving_utils/test_archiving_utils.py b/packages/service-library/tests/archiving_utils/test_archiving_utils.py index 3aab7383644f..ba47ee00c0b0 100644 --- a/packages/service-library/tests/archiving_utils/test_archiving_utils.py +++ b/packages/service-library/tests/archiving_utils/test_archiving_utils.py @@ -4,19 +4,18 @@ # pylint:disable=no-name-in-module import asyncio -import hashlib import os import secrets import string import tempfile from collections.abc import Callable, Iterable -from concurrent.futures import ProcessPoolExecutor from pathlib import Path import pytest from faker import Faker from pydantic import ByteSize, TypeAdapter from pytest_benchmark.plugin import BenchmarkFixture +from pytest_simcore.helpers.comparing import compute_hash, compute_hashes from servicelib.archiving_utils import archive_dir, unarchive_dir @@ -92,32 +91,6 @@ def get_all_files_in_dir(dir_path: Path) -> set[Path]: } -def _compute_hash(file_path: Path) -> tuple[Path, str]: - with Path.open(file_path, "rb") as file_to_hash: - file_hash = hashlib.md5() # noqa: S324 - chunk = file_to_hash.read(8192) - while chunk: - file_hash.update(chunk) - chunk = file_to_hash.read(8192) - - return file_path, file_hash.hexdigest() - - -async def compute_hashes(file_paths: list[Path]) -> dict[Path, str]: - """given a list of files computes hashes for the files on a process pool""" - - loop = asyncio.get_event_loop() - - with ProcessPoolExecutor() as prcess_pool_executor: - tasks = [ - loop.run_in_executor(prcess_pool_executor, _compute_hash, file_path) - for file_path in file_paths - ] - # pylint: disable=unnecessary-comprehension - # see return value of _compute_hash it is a tuple, mapping list[Tuple[Path,str]] to Dict[Path, str] here - return dict(await asyncio.gather(*tasks)) - - def full_file_path_from_dir_and_subdirs(dir_path: Path) -> list[Path]: return [x for x in dir_path.rglob("*") if x.is_file()] @@ -423,8 +396,8 @@ async def test_regression_archive_hash_does_not_change( await archive_dir(mixed_file_types, second_archive, compress=compress) assert second_archive.exists() - _, first_hash = _compute_hash(first_archive) - _, second_hash = _compute_hash(second_archive) + _, first_hash = compute_hash(first_archive) + _, second_hash = compute_hash(second_archive) assert first_hash == second_hash diff --git a/packages/service-library/tests/test_bytes_iters.py b/packages/service-library/tests/test_bytes_iters.py new file mode 100644 index 000000000000..32c3037a9f0e --- /dev/null +++ b/packages/service-library/tests/test_bytes_iters.py @@ -0,0 +1,137 @@ +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument + +import secrets +from collections.abc import AsyncIterable +from pathlib import Path +from unittest.mock import Mock + +import pytest +from faker import Faker +from pytest_mock import MockerFixture +from pytest_simcore.helpers.comparing import ( + assert_same_contents, + get_files_info_from_path, + get_relative_to, +) +from servicelib.archiving_utils import unarchive_dir +from servicelib.bytes_iters import ( + ArchiveEntries, + DiskStreamReader, + DiskStreamWriter, + get_zip_bytes_iter, +) +from servicelib.file_utils import remove_directory +from servicelib.progress_bar import ProgressBarData +from servicelib.s3_utils import FileLikeBytesIterReader + + +def _ensure_dir(path: Path) -> Path: + path.mkdir(parents=True, exist_ok=True) + assert path.exists() + assert path.is_dir() + return path + + +@pytest.fixture +def local_files_dir(tmp_path: Path) -> Path: + # Cotent to add to the zip + return _ensure_dir(tmp_path / "local_files_dir") + + +@pytest.fixture +def local_archive_path(tmp_path: Path) -> Path: + # local destination of archive (either form S3 or archived locally) + return tmp_path / "archive.zip" + + +@pytest.fixture +def local_unpacked_archive(tmp_path: Path) -> Path: + # contents of unpacked archive + return _ensure_dir(tmp_path / "unpacked_archive") + + +def _rand_range(lower: int, upper: int) -> int: + return secrets.randbelow(upper) + (upper - lower) + 1 + + +def _generate_files_in_path(faker: Faker, base_dir: Path, *, prefix: str = "") -> None: + # mixed small text files and binary files + (base_dir / "empty").mkdir() + + (base_dir / "d1").mkdir() + for i in range(_rand_range(10, 40)): + (base_dir / "d1" / f"{prefix}f{i}.txt").write_text(faker.text()) + (base_dir / "d1" / f"{prefix}b{i}.bin").write_bytes(faker.json_bytes()) + + (base_dir / "d1" / "sd1").mkdir() + for i in range(_rand_range(10, 40)): + (base_dir / "d1" / "sd1" / f"{prefix}f{i}.txt").write_text(faker.text()) + (base_dir / "d1" / "sd1" / f"{prefix}b{i}.bin").write_bytes(faker.json_bytes()) + + (base_dir / "fancy-names").mkdir() + for fancy_name in ( + "i have some spaces in my name", + "(%$)&%$()", + " ", + ): + (base_dir / "fancy-names" / fancy_name).write_text(faker.text()) + + +@pytest.fixture +async def prepare_content(local_files_dir: Path, faker: Faker) -> AsyncIterable[None]: + _generate_files_in_path(faker, local_files_dir, prefix="local_") + yield + await remove_directory(local_files_dir, only_children=True) + + +@pytest.fixture +def mocked_progress_bar_cb(mocker: MockerFixture) -> Mock: + def _progress_cb(*args, **kwargs) -> None: + print(f"received progress: {args}, {kwargs}") + + return mocker.Mock(side_effect=_progress_cb) + + +@pytest.mark.parametrize("use_file_like", [True, False]) +async def test_get_zip_bytes_iter( + mocked_progress_bar_cb: Mock, + prepare_content: None, + local_files_dir: Path, + local_archive_path: Path, + local_unpacked_archive: Path, + use_file_like: bool, +): + # 1. generate archive form soruces + archive_files: ArchiveEntries = [] + for file in (x for x in local_files_dir.rglob("*") if x.is_file()): + archive_name = get_relative_to(local_files_dir, file) + + archive_files.append( + (archive_name, DiskStreamReader(file).get_bytes_streamer()) + ) + + writer = DiskStreamWriter(local_archive_path) + + async with ProgressBarData( + num_steps=1, + progress_report_cb=mocked_progress_bar_cb, + description="root_bar", + ) as root: + bytes_iter = get_zip_bytes_iter( + archive_files, progress_bar=root, chunk_size=1024 + ) + + if use_file_like: + await writer.write_from_file_like(FileLikeBytesIterReader(bytes_iter)) + else: + await writer.write_from_bytes_iter(bytes_iter) + + # 2. extract archive using exiting tools + await unarchive_dir(local_archive_path, local_unpacked_archive) + + # 3. compare files in directories (same paths & sizes) + await assert_same_contents( + get_files_info_from_path(local_files_dir), + get_files_info_from_path(local_unpacked_archive), + ) diff --git a/packages/simcore-sdk/requirements/_base.txt b/packages/simcore-sdk/requirements/_base.txt index 8ed488aef03a..5a5912436bbf 100644 --- a/packages/simcore-sdk/requirements/_base.txt +++ b/packages/simcore-sdk/requirements/_base.txt @@ -267,6 +267,8 @@ psycopg2-binary==2.9.10 # via # aiopg # sqlalchemy +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.6 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -415,6 +417,8 @@ sqlalchemy==1.4.54 # -r requirements/../../../packages/postgres-database/requirements/_base.in # aiopg # alembic +stream-zip==0.0.83 + # via -r requirements/../../../packages/service-library/requirements/_base.in tenacity==9.0.0 # via # -r requirements/../../../packages/service-library/requirements/_base.in diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/_filemanager.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/_filemanager_utils.py similarity index 98% rename from packages/simcore-sdk/src/simcore_sdk/node_ports_common/_filemanager.py rename to packages/simcore-sdk/src/simcore_sdk/node_ports_common/_filemanager_utils.py index 034e35357a8f..9042568652f5 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/_filemanager.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/_filemanager_utils.py @@ -54,7 +54,7 @@ def _get_https_link_if_storage_secure(url: str) -> str: return url -async def _complete_upload( +async def complete_upload( session: ClientSession, upload_completion_link: AnyUrl, parts: list[UploadedPart], @@ -118,7 +118,7 @@ async def _complete_upload( raise exceptions.S3TransferError(msg) -async def _resolve_location_id( +async def resolve_location_id( client_session: ClientSession, user_id: UserID, store_name: LocationName | None, @@ -136,7 +136,7 @@ async def _resolve_location_id( return store_id -async def _abort_upload( +async def abort_upload( session: ClientSession, abort_upload_link: AnyUrl, *, reraise_exceptions: bool ) -> None: # abort the upload correctly, so it can revert back to last version diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py index 6ddb18d68f2d..ca0a54875ea8 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py @@ -30,8 +30,7 @@ from yarl import URL from ..node_ports_common.client_session_manager import ClientSessionContextManager -from . import aws_s3_cli, exceptions, r_clone, storage_client -from ._filemanager import _abort_upload, _complete_upload, _resolve_location_id +from . import _filemanager_utils, aws_s3_cli, exceptions, r_clone, storage_client from .file_io_utils import ( LogRedirectCB, UploadableFileObject, @@ -48,7 +47,7 @@ async def complete_file_upload( client_session: ClientSession | None = None, ) -> ETag: async with ClientSessionContextManager(client_session) as session: - e_tag: ETag | None = await _complete_upload( + e_tag: ETag | None = await _filemanager_utils.complete_upload( session=session, upload_completion_link=upload_completion_link, parts=uploaded_parts, @@ -75,7 +74,9 @@ async def get_download_link_from_s3( :raises exceptions.StorageServerIssue """ async with ClientSessionContextManager(client_session) as session: - store_id = await _resolve_location_id(session, user_id, store_name, store_id) + store_id = await _filemanager_utils.resolve_location_id( + session, user_id, store_name, store_id + ) file_link = await storage_client.get_download_file_link( session=session, file_id=s3_object, @@ -99,7 +100,9 @@ async def get_upload_links_from_s3( sha256_checksum: SHA256Str | None, ) -> tuple[LocationID, FileUploadSchema]: async with ClientSessionContextManager(client_session) as session: - store_id = await _resolve_location_id(session, user_id, store_name, store_id) + store_id = await _filemanager_utils.resolve_location_id( + session, user_id, store_name, store_id + ) file_links = await storage_client.get_upload_file_links( session=session, file_id=s3_object, @@ -144,7 +147,9 @@ async def download_path_from_s3( ) async with ClientSessionContextManager(client_session) as session: - store_id = await _resolve_location_id(session, user_id, store_name, store_id) + store_id = await _filemanager_utils.resolve_location_id( + session, user_id, store_name, store_id + ) file_meta_data: FileMetaDataGet = await _get_file_meta_data( user_id=user_id, s3_object=s3_object, @@ -259,7 +264,7 @@ async def abort_upload( """ async with ClientSessionContextManager(client_session) as session: - await _abort_upload( + await _filemanager_utils.abort_upload( session=session, abort_upload_link=abort_upload_link, reraise_exceptions=True, @@ -425,13 +430,13 @@ async def _upload_path( # pylint: disable=too-many-arguments ) as exc: _logger.exception("The upload failed with an unexpected error:") if upload_links: - await _abort_upload( + await _filemanager_utils.abort_upload( session, upload_links.links.abort_upload, reraise_exceptions=False ) raise exceptions.S3TransferError from exc except CancelledError: if upload_links: - await _abort_upload( + await _filemanager_utils.abort_upload( session, upload_links.links.abort_upload, reraise_exceptions=False ) raise @@ -485,7 +490,7 @@ async def _upload_to_s3( progress_bar=progress_bar, ) # complete the upload - e_tag = await _complete_upload( + e_tag = await _filemanager_utils.complete_upload( session, upload_links.links.complete_upload, uploaded_parts, diff --git a/packages/simcore-sdk/tests/unit/test_storage_client.py b/packages/simcore-sdk/tests/unit/test_storage_client.py index c739b7c07ebd..c094b7a04b9f 100644 --- a/packages/simcore-sdk/tests/unit/test_storage_client.py +++ b/packages/simcore-sdk/tests/unit/test_storage_client.py @@ -24,7 +24,9 @@ from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict, setenvs_from_dict from servicelib.aiohttp import status from simcore_sdk.node_ports_common import exceptions -from simcore_sdk.node_ports_common._filemanager import _get_https_link_if_storage_secure +from simcore_sdk.node_ports_common._filemanager_utils import ( + _get_https_link_if_storage_secure, +) from simcore_sdk.node_ports_common.storage_client import ( LinkType, delete_file, diff --git a/services/agent/requirements/_base.txt b/services/agent/requirements/_base.txt index b4bb9f796d9a..5a05454ee00d 100644 --- a/services/agent/requirements/_base.txt +++ b/services/agent/requirements/_base.txt @@ -264,6 +264,8 @@ protobuf==5.28.3 # opentelemetry-proto psutil==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_base.in +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.2 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -401,6 +403,8 @@ starlette==0.41.3 # -c requirements/../../../requirements/constraints.txt # fastapi # prometheus-fastapi-instrumentator +stream-zip==0.0.83 + # via -r requirements/../../../packages/service-library/requirements/_base.in tenacity==9.0.0 # via -r requirements/../../../packages/service-library/requirements/_base.in toolz==1.0.0 diff --git a/services/api-server/requirements/_base.txt b/services/api-server/requirements/_base.txt index ccac839c8978..59d1e3d5f64c 100644 --- a/services/api-server/requirements/_base.txt +++ b/services/api-server/requirements/_base.txt @@ -529,6 +529,8 @@ psycopg2-binary==2.9.10 # sqlalchemy pycparser==2.22 # via cffi +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.3 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -799,6 +801,10 @@ starlette==0.41.3 # -c requirements/../../../requirements/constraints.txt # fastapi # prometheus-fastapi-instrumentator +stream-zip==0.0.83 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in tenacity==9.0.0 # via # -r requirements/../../../packages/service-library/requirements/_base.in diff --git a/services/autoscaling/requirements/_base.txt b/services/autoscaling/requirements/_base.txt index 7e342b2ba6f3..e1fb697d2a2e 100644 --- a/services/autoscaling/requirements/_base.txt +++ b/services/autoscaling/requirements/_base.txt @@ -479,6 +479,8 @@ psutil==6.1.0 # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in # distributed +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.3 # via # -c requirements/../../../packages/aws-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -716,6 +718,10 @@ starlette==0.41.3 # -c requirements/../../../requirements/constraints.txt # fastapi # prometheus-fastapi-instrumentator +stream-zip==0.0.83 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in tblib==3.0.0 # via # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt diff --git a/services/catalog/requirements/_base.txt b/services/catalog/requirements/_base.txt index 3bfd3ff9b98c..8d4f1a21569f 100644 --- a/services/catalog/requirements/_base.txt +++ b/services/catalog/requirements/_base.txt @@ -341,6 +341,8 @@ psutil==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_base.in psycopg2-binary==2.9.10 # via sqlalchemy +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.3 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -522,6 +524,8 @@ starlette==0.41.3 # -c requirements/../../../requirements/constraints.txt # fastapi # prometheus-fastapi-instrumentator +stream-zip==0.0.83 + # via -r requirements/../../../packages/service-library/requirements/_base.in tenacity==9.0.0 # via # -r requirements/../../../packages/service-library/requirements/_base.in diff --git a/services/clusters-keeper/requirements/_base.txt b/services/clusters-keeper/requirements/_base.txt index 869f41bbd6c7..19d74db0e9df 100644 --- a/services/clusters-keeper/requirements/_base.txt +++ b/services/clusters-keeper/requirements/_base.txt @@ -477,6 +477,8 @@ psutil==6.1.0 # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in # distributed +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.3 # via # -c requirements/../../../packages/aws-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -714,6 +716,10 @@ starlette==0.41.3 # -c requirements/../../../requirements/constraints.txt # fastapi # prometheus-fastapi-instrumentator +stream-zip==0.0.83 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in tblib==3.0.0 # via # -c requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt diff --git a/services/dask-sidecar/requirements/_base.txt b/services/dask-sidecar/requirements/_base.txt index 8bef9cdda226..87ea5af27fc4 100644 --- a/services/dask-sidecar/requirements/_base.txt +++ b/services/dask-sidecar/requirements/_base.txt @@ -317,6 +317,8 @@ psutil==6.1.0 # via # -r requirements/../../../packages/service-library/requirements/_base.in # distributed +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.3 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -486,6 +488,8 @@ sniffio==1.3.1 # via anyio sortedcontainers==2.4.0 # via distributed +stream-zip==0.0.83 + # via -r requirements/../../../packages/service-library/requirements/_base.in tblib==3.0.0 # via distributed tenacity==9.0.0 diff --git a/services/datcore-adapter/requirements/_base.txt b/services/datcore-adapter/requirements/_base.txt index 6e221cd0a67d..c582684c7730 100644 --- a/services/datcore-adapter/requirements/_base.txt +++ b/services/datcore-adapter/requirements/_base.txt @@ -289,6 +289,8 @@ protobuf==4.25.4 # opentelemetry-proto psutil==6.0.0 # via -r requirements/../../../packages/service-library/requirements/_base.in +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.2 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -437,6 +439,8 @@ starlette==0.41.0 # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # fastapi +stream-zip==0.0.83 + # via -r requirements/../../../packages/service-library/requirements/_base.in tenacity==8.5.0 # via -r requirements/../../../packages/service-library/requirements/_base.in toolz==0.12.1 diff --git a/services/director-v2/requirements/_base.txt b/services/director-v2/requirements/_base.txt index 849294433571..0dc92ca0e738 100644 --- a/services/director-v2/requirements/_base.txt +++ b/services/director-v2/requirements/_base.txt @@ -588,6 +588,8 @@ psycopg2-binary==2.9.9 # via # aiopg # sqlalchemy +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.2 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -947,6 +949,10 @@ starlette==0.41.2 # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # fastapi +stream-zip==0.0.83 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in tblib==3.0.0 # via # -r requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt diff --git a/services/director/requirements/_base.txt b/services/director/requirements/_base.txt index e5f0aaa40b92..59dc5bf6e708 100644 --- a/services/director/requirements/_base.txt +++ b/services/director/requirements/_base.txt @@ -299,6 +299,8 @@ protobuf==5.28.3 # opentelemetry-proto psutil==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_base.in +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.2 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -443,6 +445,8 @@ starlette==0.41.3 # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # fastapi +stream-zip==0.0.83 + # via -r requirements/../../../packages/service-library/requirements/_base.in tenacity==9.0.0 # via # -r requirements/../../../packages/service-library/requirements/_base.in diff --git a/services/dynamic-scheduler/requirements/_base.txt b/services/dynamic-scheduler/requirements/_base.txt index 90db0013db18..a1b42fff0641 100644 --- a/services/dynamic-scheduler/requirements/_base.txt +++ b/services/dynamic-scheduler/requirements/_base.txt @@ -350,6 +350,8 @@ psutil==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_base.in psycopg2-binary==2.9.10 # via sqlalchemy +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.2 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -534,6 +536,8 @@ starlette==0.41.3 # -c requirements/../../../requirements/constraints.txt # fastapi # prometheus-fastapi-instrumentator +stream-zip==0.0.83 + # via -r requirements/../../../packages/service-library/requirements/_base.in tenacity==9.0.0 # via -r requirements/../../../packages/service-library/requirements/_base.in toolz==1.0.0 diff --git a/services/dynamic-sidecar/requirements/_base.txt b/services/dynamic-sidecar/requirements/_base.txt index 614193f285df..d80c66683c28 100644 --- a/services/dynamic-sidecar/requirements/_base.txt +++ b/services/dynamic-sidecar/requirements/_base.txt @@ -452,6 +452,8 @@ psycopg2-binary==2.9.10 # via # aiopg # sqlalchemy +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.2 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -748,6 +750,10 @@ starlette==0.41.3 # -c requirements/../../../requirements/constraints.txt # fastapi # prometheus-fastapi-instrumentator +stream-zip==0.0.83 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in tenacity==9.0.0 # via # -r requirements/../../../packages/service-library/requirements/_base.in diff --git a/services/dynamic-sidecar/tests/integration/test_modules_long_running_tasks.py b/services/dynamic-sidecar/tests/integration/test_modules_long_running_tasks.py index 4156c091e7e0..7302e23f2095 100644 --- a/services/dynamic-sidecar/tests/integration/test_modules_long_running_tasks.py +++ b/services/dynamic-sidecar/tests/integration/test_modules_long_running_tasks.py @@ -166,7 +166,7 @@ async def simcore_storage_service(mocker: MockerFixture, app: FastAPI) -> None: # NOTE: Mock to ensure container IP agrees with host IP when testing mocker.patch( - "simcore_sdk.node_ports_common._filemanager._get_https_link_if_storage_secure", + "simcore_sdk.node_ports_common._filemanager_utils._get_https_link_if_storage_secure", replace_storage_endpoint(storage_host, int(storage_port)), ) diff --git a/services/efs-guardian/requirements/_base.txt b/services/efs-guardian/requirements/_base.txt index 0b41961a9cfe..0fba0025cdb8 100644 --- a/services/efs-guardian/requirements/_base.txt +++ b/services/efs-guardian/requirements/_base.txt @@ -454,6 +454,8 @@ psutil==6.1.0 # -r requirements/../../../packages/service-library/requirements/_base.in psycopg2-binary==2.9.10 # via sqlalchemy +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.2 # via # -c requirements/../../../packages/aws-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -730,6 +732,10 @@ starlette==0.41.2 # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # fastapi +stream-zip==0.0.83 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in tenacity==9.0.0 # via # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in diff --git a/services/invitations/requirements/_base.txt b/services/invitations/requirements/_base.txt index 2522f21fbe5f..e1dd53adc35d 100644 --- a/services/invitations/requirements/_base.txt +++ b/services/invitations/requirements/_base.txt @@ -284,6 +284,8 @@ psutil==6.1.0 # via -r requirements/../../../packages/service-library/requirements/_base.in pycparser==2.22 # via cffi +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.2 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -423,6 +425,8 @@ starlette==0.41.3 # -c requirements/../../../requirements/constraints.txt # fastapi # prometheus-fastapi-instrumentator +stream-zip==0.0.83 + # via -r requirements/../../../packages/service-library/requirements/_base.in tenacity==9.0.0 # via -r requirements/../../../packages/service-library/requirements/_base.in toolz==1.0.0 diff --git a/services/payments/requirements/_base.txt b/services/payments/requirements/_base.txt index e2a3b2a31f55..46195ba19aaa 100644 --- a/services/payments/requirements/_base.txt +++ b/services/payments/requirements/_base.txt @@ -358,6 +358,8 @@ pyasn1==0.6.1 # rsa pycparser==2.22 # via cffi +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.2 # via # -c requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -557,6 +559,8 @@ starlette==0.41.3 # -c requirements/../../../requirements/constraints.txt # fastapi # prometheus-fastapi-instrumentator +stream-zip==0.0.83 + # via -r requirements/../../../packages/service-library/requirements/_base.in tenacity==9.0.0 # via -r requirements/../../../packages/service-library/requirements/_base.in toolz==1.0.0 diff --git a/services/resource-usage-tracker/requirements/_base.txt b/services/resource-usage-tracker/requirements/_base.txt index dda85299ef45..429f3e3d53e5 100644 --- a/services/resource-usage-tracker/requirements/_base.txt +++ b/services/resource-usage-tracker/requirements/_base.txt @@ -482,6 +482,8 @@ psutil==6.0.0 # -r requirements/../../../packages/service-library/requirements/_base.in psycopg2-binary==2.9.9 # via sqlalchemy +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.2 # via # -c requirements/../../../packages/aws-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -777,6 +779,10 @@ starlette==0.41.2 # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # fastapi +stream-zip==0.0.83 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in tenacity==8.5.0 # via # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in diff --git a/services/storage/requirements/_base.txt b/services/storage/requirements/_base.txt index 3b6a798add8e..f947f8c7ba63 100644 --- a/services/storage/requirements/_base.txt +++ b/services/storage/requirements/_base.txt @@ -512,6 +512,8 @@ psutil==6.1.1 # -r requirements/../../../packages/service-library/requirements/_base.in psycopg2-binary==2.9.10 # via sqlalchemy +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.6 # via # -c requirements/../../../packages/aws-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -798,6 +800,10 @@ starlette==0.45.3 # -c requirements/../../../requirements/constraints.txt # fastapi # prometheus-fastapi-instrumentator +stream-zip==0.0.83 + # via + # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/service-library/requirements/_base.in tenacity==9.0.0 # via # -r requirements/../../../packages/aws-library/requirements/../../../packages/service-library/requirements/_base.in diff --git a/services/web/server/requirements/_base.txt b/services/web/server/requirements/_base.txt index 562b0eab736b..90a7f9e5259b 100644 --- a/services/web/server/requirements/_base.txt +++ b/services/web/server/requirements/_base.txt @@ -543,6 +543,8 @@ pycountry==23.12.11 # via -r requirements/_base.in pycparser==2.21 # via cffi +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.2 # via # -c requirements/../../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -786,6 +788,10 @@ sqlalchemy==1.4.47 # -r requirements/../../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_base.in # aiopg # alembic +stream-zip==0.0.83 + # via + # -r requirements/../../../../packages/service-library/requirements/_base.in + # -r requirements/../../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in swagger-ui-py==23.9.23 # via -r requirements/_base.in tenacity==8.5.0 diff --git a/tests/swarm-deploy/requirements/_test.txt b/tests/swarm-deploy/requirements/_test.txt index 43a246d3b3b4..e379ac942b95 100644 --- a/tests/swarm-deploy/requirements/_test.txt +++ b/tests/swarm-deploy/requirements/_test.txt @@ -375,6 +375,8 @@ psycopg2-binary==2.9.10 # via # aiopg # sqlalchemy +pycryptodome==3.21.0 + # via stream-zip pydantic==2.10.6 # via # -c requirements/../../../packages/models-library/requirements/../../../packages/common-library/requirements/../../../requirements/constraints.txt @@ -657,6 +659,10 @@ sqlalchemy==1.4.54 # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/_base.in # aiopg # alembic +stream-zip==0.0.83 + # via + # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in tenacity==9.0.0 # via # -r requirements/../../../packages/postgres-database/requirements/_migration.txt