Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨Streaming utils for zipping and reading/wiring to S3 #7186

Merged
Merged
Changes from 1 commit
Commits
Show all changes
74 commits
Select commit Hold shift + click to select a range
0054d6a
added stream-zip
Feb 7, 2025
ff96d46
added utils for stream zipping
Feb 7, 2025
fe6c34d
rename
Feb 7, 2025
bd1df7c
added minimal progress support
Feb 7, 2025
7b7aae0
rename
Feb 7, 2025
5f72a43
fixed types
Feb 7, 2025
aace76a
refactor
Feb 7, 2025
388b81a
refactor
Feb 7, 2025
357273a
added S3 streaming and integration test
Feb 7, 2025
973423e
refactor
Feb 7, 2025
b2a66f5
Merge remote-tracking branch 'upstream/master' into pr-osparc-stream-…
Feb 7, 2025
c129672
removed debug print
Feb 7, 2025
e32f265
refactor to use size instead of items count as progress
Feb 7, 2025
de490ad
using faster file hash checking
Feb 7, 2025
706934d
Merge remote-tracking branch 'upstream/master' into pr-osparc-stream-…
Feb 7, 2025
c672212
refactor progress on zip
Feb 7, 2025
cee5e9c
remove unused
Feb 7, 2025
6d3eb72
remove unused
Feb 7, 2025
3873976
remove outdated
Feb 7, 2025
706ee4b
reshuffled imports
Feb 7, 2025
a7e0867
fixed more broken imports
Feb 7, 2025
4775e55
reverted delted import
Feb 7, 2025
e0a5407
remove unused error
Feb 7, 2025
23515e3
revert number
Feb 7, 2025
b546f10
fixed broken import
Feb 7, 2025
d26289d
typing
Feb 10, 2025
f2c5923
fixeed tests
Feb 10, 2025
c2eea57
typing and imports
Feb 10, 2025
dc3b63a
fixed broken test
Feb 10, 2025
7eac64d
Merge remote-tracking branch 'upstream/master' into pr-osparc-stream-…
Feb 11, 2025
1fabe65
added missing
Feb 11, 2025
b245e63
rename module
Feb 11, 2025
19b5ef5
added FileLikeFileStreamReader
Feb 11, 2025
a5c9060
Merge remote-tracking branch 'upstream/master' into pr-osparc-stream-…
Feb 11, 2025
6fb389d
repalced with simpler implementation
Feb 11, 2025
c6cc5e0
rename
Feb 11, 2025
e7eee8b
refactor imports
Feb 11, 2025
2045c08
refactor
Feb 11, 2025
1298895
refactor
Feb 11, 2025
d17e450
added readme
Feb 11, 2025
d14d8fd
refacto fixture
Feb 11, 2025
0497f86
extended tests
Feb 11, 2025
6261c65
extended tests
Feb 12, 2025
cd9d443
renaming
Feb 12, 2025
0777442
Merge remote-tracking branch 'upstream/master' into pr-osparc-stream-…
Feb 12, 2025
e042a6c
fixed broken mocks
Feb 12, 2025
eea3fba
Merge remote-tracking branch 'upstream/master' into pr-osparc-stream-…
Feb 13, 2025
ed8280f
rename
Feb 13, 2025
f170791
rename
Feb 13, 2025
17c0ac5
rename module
Feb 13, 2025
c024827
refactor interface
Feb 13, 2025
a844a0c
refactor progress
Feb 13, 2025
a277e12
refactor placement of FileLikeFileStreamReader
Feb 13, 2025
5a1a8e7
rename
Feb 13, 2025
ec87ba0
update
Feb 13, 2025
b835ae0
rename and move around parts
Feb 13, 2025
70afc40
renamed modules
Feb 13, 2025
0addf04
renames
Feb 13, 2025
cb479f3
moved imports to more appropriate places
Feb 13, 2025
881d2a1
Merge remote-tracking branch 'upstream/master' into pr-osparc-stream-…
Feb 13, 2025
e37972c
refactor
Feb 13, 2025
0f11526
renamed
Feb 13, 2025
b23d1f1
renamed to bytes_iter
Feb 13, 2025
b29bff2
renaming paths
Feb 13, 2025
d6ca255
renamed
Feb 13, 2025
a169f73
refactor
Feb 13, 2025
2c2d2db
added missing type
Feb 14, 2025
526ed0a
Merge remote-tracking branch 'upstream/master' into pr-osparc-stream-…
Feb 14, 2025
0ec7e23
rename fixture
Feb 14, 2025
9d48624
Merge remote-tracking branch 'upstream/master' into pr-osparc-stream-…
Feb 14, 2025
3aa766b
Merge remote-tracking branch 'upstream/master' into pr-osparc-stream-…
Feb 17, 2025
9cf8f1b
rename
Feb 17, 2025
d2f928a
removed todo in test
Feb 17, 2025
25372c7
Merge branch 'master' into pr-osparc-stream-zipping-of-s3-content
GitHK Feb 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
refactor
  • Loading branch information
Andrei Neagu committed Feb 7, 2025
commit 973423ec3251f9d4b6f0b9f569cbe8c1e16f464d
30 changes: 19 additions & 11 deletions packages/aws-library/tests/test_s3_client.py
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@
import json
import logging
import random
import time
from collections import defaultdict
from collections.abc import AsyncIterator, Awaitable, Callable, Iterator
from dataclasses import dataclass
@@ -1446,11 +1447,10 @@ async def test_upload_object_from_file_stream(
@pytest.fixture
def files_stored_locally(
create_file_of_size: Callable[[ByteSize], Path],
file_size: ByteSize,
local_count: int,
) -> Iterator[set[Path]]:
files = {
create_file_of_size(TypeAdapter(ByteSize).validate_python("10Mib"))
for _ in range(10)
}
files = {create_file_of_size(file_size) for _ in range(local_count)}

yield files

@@ -1461,13 +1461,12 @@ def files_stored_locally(
@pytest.fixture
async def files_stored_in_s3(
create_file_of_size: Callable[[ByteSize], Path],
file_size: ByteSize,
remote_count: int,
s3_client: S3Client,
with_s3_bucket: S3BucketName,
) -> AsyncIterator[set[Path]]:
files = {
create_file_of_size(TypeAdapter(ByteSize).validate_python("10Mib"))
for _ in range(10)
}
files = {create_file_of_size(file_size) for _ in range(remote_count)}
for file in files:
await s3_client.upload_file(
Filename=f"{file}",
@@ -1524,6 +1523,14 @@ async def archive_s3_object_key(
await simcore_s3_api.delete_object(bucket=with_s3_bucket, object_key=s3_object_key)


@pytest.mark.parametrize(
"file_size, local_count, remote_count",
[
pytest.param(
TypeAdapter(ByteSize).validate_python("10Mib"), 10, 10, id="small"
),
],
)
async def test_workflow_compress_s3_objects_and_local_files_in_a_single_archive_then_upload_to_s3(
mocked_s3_server_envs: EnvVarsDict,
files_stored_locally: set[Path],
@@ -1554,7 +1561,6 @@ async def test_workflow_compress_s3_objects_and_local_files_in_a_single_archive_
)

for s3_object_key in _get_s3_object_keys(files_stored_in_s3):
print(f"will upload {s3_object_key=}")
archive_file_entries.append(
(
s3_object_key,
@@ -1573,6 +1579,7 @@ async def test_workflow_compress_s3_objects_and_local_files_in_a_single_archive_
progress_report_cb=mocked_progress_bar_cb,
description="root_bar",
) as root:
started = time.time()
await simcore_s3_api.upload_object_from_file_stream(
with_s3_bucket,
archive_s3_object_key,
@@ -1582,9 +1589,11 @@ async def test_workflow_compress_s3_objects_and_local_files_in_a_single_archive_
chunk_size=MIN_MULTIPART_UPLOAD_CHUNK_SIZE,
),
)
duration = time.time() - started
print(f"Zip created on S3 in {duration:.2f} seconds")

# 2. download zip archive form S3

print(f"downloading {archive_download_path}")
await s3_client.download_file(
with_s3_bucket, archive_s3_object_key, f"{archive_download_path}"
)
@@ -1593,7 +1602,6 @@ async def test_workflow_compress_s3_objects_and_local_files_in_a_single_archive_
await unarchive_dir(archive_download_path, extracted_archive_path)

# 4. compare

print("comparing files")
all_files_in_zip = get_files_info_from_itrable(
files_stored_locally
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@

import aiofiles
from servicelib.file_utils import create_sha256_checksum
from servicelib.utils import limited_gather

_FilesInfo: TypeAlias = dict[str, Path]

@@ -30,5 +31,10 @@ def get_files_info_from_itrable(items: Iterable[Path]) -> _FilesInfo:
async def assert_same_contents(file_info1: _FilesInfo, file_info2: _FilesInfo) -> None:
assert set(file_info1.keys()) == set(file_info2.keys())

for file_name in file_info1:
await assert_same_file_content(file_info1[file_name], file_info2[file_name])
await limited_gather(
*(
assert_same_file_content(file_info1[file_name], file_info2[file_name])
for file_name in file_info1
),
limit=10,
)