From f3778882bb7c0be9009ebed96a0ebe921ae2aa76 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Sun, 6 Aug 2023 22:04:46 +0530 Subject: [PATCH 01/98] set blob storage base directory to ~/.syft/data/ --- packages/syft/src/syft/node/node.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/syft/src/syft/node/node.py b/packages/syft/src/syft/node/node.py index 5b87aa8ca91..c197d2a625a 100644 --- a/packages/syft/src/syft/node/node.py +++ b/packages/syft/src/syft/node/node.py @@ -82,6 +82,7 @@ from ..service.user.user_service import UserService from ..service.user.user_stash import UserStash from ..store.blob_storage import BlobStorageConfig +from ..store.blob_storage.on_disk import OnDiskBlobStorageClientConfig from ..store.blob_storage.on_disk import OnDiskBlobStorageConfig from ..store.dict_document_store import DictStoreConfig from ..store.document_store import StoreConfig @@ -93,6 +94,7 @@ from ..types.uid import UID from ..util.experimental_flags import flags from ..util.telemetry import instrument +from ..util.util import get_root_data_path from ..util.util import random_name from ..util.util import str_to_bool from ..util.util import thread_ident @@ -299,7 +301,14 @@ def __init__( NodeRegistry.set_node_for(self.id, self) def init_blob_storage(self, config: Optional[BlobStorageConfig] = None) -> None: - config_ = OnDiskBlobStorageConfig() if config is None else config + # set path to ~/.syft/node_uid/ + if config is None: + root_directory = get_root_data_path() + base_directory = root_directory / f"{self.id}" + client_config = OnDiskBlobStorageClientConfig(base_directory=base_directory) + config_ = OnDiskBlobStorageConfig(client_config=client_config) + else: + config_ = config self.blob_storage_client = config_.client_type(config=config_.client_config) def init_queue_manager(self, queue_config: Optional[QueueConfig]): From 103d9ccc4afe559b318e2b1b643c26c135d0ad2d Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Sun, 6 Aug 2023 22:10:55 +0530 Subject: [PATCH 02/98] create base directory if not exists during OnDisk client initialization --- packages/syft/src/syft/store/blob_storage/on_disk.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/syft/src/syft/store/blob_storage/on_disk.py b/packages/syft/src/syft/store/blob_storage/on_disk.py index 68ced67f0d0..28ddf6649f7 100644 --- a/packages/syft/src/syft/store/blob_storage/on_disk.py +++ b/packages/syft/src/syft/store/blob_storage/on_disk.py @@ -1,4 +1,5 @@ # stdlib +import os from pathlib import Path from tempfile import gettempdir from typing import Any @@ -75,6 +76,7 @@ class OnDiskBlobStorageClient(BlobStorageClient): def __init__(self, **data: Any): super().__init__(**data) + os.makedirs(self.config.base_directory, exist_ok=True) self._connection = OnDiskBlobStorageConnection(self.config.base_directory) def __enter__(self) -> BlobStorageConnection: From 6b1f6c56d527ca47166dfaf1d6ef3ed872467db5 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Mon, 7 Aug 2023 10:11:45 +0530 Subject: [PATCH 03/98] add seaweedfs to docker compose use leveldb instead of redis --- packages/grid/docker-compose.dev.yml | 18 ++++++++--------- packages/grid/docker-compose.yml | 29 ++++++++++++++-------------- packages/grid/seaweedfs/filer.toml | 14 ++++++++++---- 3 files changed, 33 insertions(+), 28 deletions(-) diff --git a/packages/grid/docker-compose.dev.yml b/packages/grid/docker-compose.dev.yml index 046720b6b79..39a508e2eeb 100644 --- a/packages/grid/docker-compose.dev.yml +++ b/packages/grid/docker-compose.dev.yml @@ -71,12 +71,12 @@ services: ports: - "4000" - # seaweedfs: - # profiles: - # - blob-storage - # # volumes: - # # - ./data/seaweedfs:/data - # ports: - # - "9333" # admin web port - # - "8888" # filer web port - # # - "8333" # S3 API port + seaweedfs: + profiles: + - blob-storage + # volumes: + # - ./data/seaweedfs:/data + ports: + - "9333" # admin web port + - "8888" # filer web port + - "8333" # S3 API port diff --git a/packages/grid/docker-compose.yml b/packages/grid/docker-compose.yml index 2309749dfe8..1e33cd394af 100644 --- a/packages/grid/docker-compose.yml +++ b/packages/grid/docker-compose.yml @@ -212,20 +212,19 @@ services: - NETWORK_NAME=omnet - STACK_API_KEY=$STACK_API_KEY - # seaweedfs: - # profiles: - # - blob-storage - # depends_on: - # - proxy - # - redis - # image: "${DOCKER_IMAGE_SEAWEEDFS?Variable not set}" - # environment: - # - S3_VOLUME_SIZE_MB=${S3_VOLUME_SIZE_MB:-1024} - # command: "server -s3 -s3.config=/etc/s3config.json -master.volumeSizeLimitMB=${S3_VOLUME_SIZE_MB}" - # volumes: - # - seaweedfs-data:/data - # - ./seaweedfs/s3config.json:/etc/s3config.json - # - ./seaweedfs/filer.toml:/etc/seaweedfs/filer.toml + seaweedfs: + profiles: + - blob-storage + depends_on: + - proxy + image: "${DOCKER_IMAGE_SEAWEEDFS?Variable not set}" + environment: + - S3_VOLUME_SIZE_MB=${S3_VOLUME_SIZE_MB:-1024} + command: "server -s3 -s3.config=/etc/s3config.json -master.volumeSizeLimitMB=${S3_VOLUME_SIZE_MB}" + volumes: + - seaweedfs-data:/data/blob + - ./seaweedfs/s3config.json:/etc/s3config.json + - ./seaweedfs/filer.toml:/etc/seaweedfs/filer.toml mongo: image: mongo:latest @@ -262,7 +261,7 @@ volumes: tailscale-data: headscale-data: # app-redis-data: - # seaweedfs-data: + seaweedfs-data: mongo-data: networks: diff --git a/packages/grid/seaweedfs/filer.toml b/packages/grid/seaweedfs/filer.toml index 00f62836f9c..dd69566768f 100644 --- a/packages/grid/seaweedfs/filer.toml +++ b/packages/grid/seaweedfs/filer.toml @@ -1,5 +1,11 @@ -[redis] +# [redis] +# enabled = true +# address = "redis:6379" +# password = "" +# database = 15 + +[leveldb2] +# local on disk, mostly for simple single-machine setup, fairly scalable +# faster than previous leveldb, recommended. enabled = true -address = "redis:6379" -password = "" -database = 15 +dir = "./filerldb2" \ No newline at end of file From cc8a445cbbcbeb7be2d475aff8af97d228b06917 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Mon, 7 Aug 2023 13:46:49 +0530 Subject: [PATCH 04/98] - add boto3 to setup.cfg - add connect method to BlobStorageConnection - implement connect method for OnDisk storage connection - define SeaweedFS client config - implement SeaweedFSClient using boto3 - implement write method in SeaweedFSBlobDeposit - implement read, allocate and write methods in SeaweedFSConnection - add service method to mark multipart upload complete - add method to mark multipart upload complete in SeaweedFSConnection. Co-authored-by: Kien Dang Co-authored-by: Khoa Nguyen --- packages/syft/setup.cfg | 1 + packages/syft/src/syft/node/node.py | 1 - .../src/syft/service/blob_storage/service.py | 25 ++- .../src/syft/store/blob_storage/__init__.py | 25 ++- .../src/syft/store/blob_storage/on_disk.py | 18 +- .../src/syft/store/blob_storage/seaweedfs.py | 156 +++++++++++++++++- packages/syft/src/syft/types/blob_storage.py | 2 +- 7 files changed, 204 insertions(+), 24 deletions(-) diff --git a/packages/syft/setup.cfg b/packages/syft/setup.cfg index 7f508c7af63..c67124137df 100644 --- a/packages/syft/setup.cfg +++ b/packages/syft/setup.cfg @@ -27,6 +27,7 @@ package_dir = syft = bcrypt==4.0.1 + boto3==1.28.20 forbiddenfruit==0.1.4 gevent==22.10.2 gipc==1.5.0 diff --git a/packages/syft/src/syft/node/node.py b/packages/syft/src/syft/node/node.py index c197d2a625a..388e8f81660 100644 --- a/packages/syft/src/syft/node/node.py +++ b/packages/syft/src/syft/node/node.py @@ -301,7 +301,6 @@ def __init__( NodeRegistry.set_node_for(self.id, self) def init_blob_storage(self, config: Optional[BlobStorageConfig] = None) -> None: - # set path to ~/.syft/node_uid/ if config is None: root_directory = get_root_data_path() base_directory = root_directory / f"{self.id}" diff --git a/packages/syft/src/syft/service/blob_storage/service.py b/packages/syft/src/syft/service/blob_storage/service.py index 542b4068f1f..1a5644a6dfa 100644 --- a/packages/syft/src/syft/service/blob_storage/service.py +++ b/packages/syft/src/syft/service/blob_storage/service.py @@ -54,7 +54,7 @@ def read( ) -> Union[BlobRetrieval, SyftError]: result = self.stash.get_by_uid(context.credentials, uid=uid) if result.is_ok(): - with context.node.blob_storage_client as conn: + with context.node.blob_storage_client.connect() as conn: return conn.read(result.ok().location) return SyftError(message=result.err()) @@ -62,7 +62,7 @@ def read( def allocate( self, context: AuthedServiceContext, obj: CreateBlobStorageEntry ) -> Union[BlobDeposit, SyftError]: - with context.node.blob_storage_client as conn: + with context.node.blob_storage_client.connect() as conn: secure_location = conn.allocate(obj) blob_storage_entry = BlobStorageEntry( @@ -101,5 +101,26 @@ def write_to_disk( except Exception as e: return SyftError(message=f"Failed to write object to disk: {e}") + @service_method(path="blob_storage.mark_write_complete", name="make_write_complete") + def mark_write_complete( + self, + context: AuthedServiceContext, + uid: UID, + etags: List, + ) -> Union[SyftError, SyftSuccess]: + result = self.stash.get_by_uid( + credentials=context.credentials, + uid=uid, + ) + if result.is_err(): + return SyftError(message=f"{result.err()}") + + obj: Optional[BlobStorageEntry] = result.ok() + + with context.node.blob_storage_client.connect() as conn: + result = conn.complete_multipart_upload(obj, etags) + + return result + TYPE_TO_SERVICE[BlobStorageEntry] = BlobStorageEntry diff --git a/packages/syft/src/syft/store/blob_storage/__init__.py b/packages/syft/src/syft/store/blob_storage/__init__.py index a6de9989978..d8d9bb6aefb 100644 --- a/packages/syft/src/syft/store/blob_storage/__init__.py +++ b/packages/syft/src/syft/store/blob_storage/__init__.py @@ -47,6 +47,8 @@ # third party from pydantic import BaseModel +import requests +from typing_extensions import Self # relative from ...serde.deserialize import _deserialize as deserialize @@ -78,7 +80,7 @@ class SyftObjectRetrieval(BlobRetrieval): syft_object: bytes - def read(self) -> SyftObject: + def read(self) -> Union[SyftObject, SyftError]: return deserialize(self.syft_object, from_bytes=True) @@ -89,8 +91,14 @@ class BlobRetrievalByURL(BlobRetrieval): url: str - def read(self) -> SyftObject: - pass + def read(self) -> Union[SyftObject, SyftError]: + response = requests.get(self.url) + + try: + response.raise_for_status() + return deserialize(response.content, from_bytes=True) + except requests.HTTPError as e: + return SyftError(message=f"Failed to retrieve with Error: {e}") @serializable() @@ -110,6 +118,12 @@ class BlobStorageClientConfig(BaseModel): class BlobStorageConnection: + def __enter__(self) -> Self: + raise NotImplementedError + + def __exit__(self, *exc) -> None: + raise NotImplementedError + def read(self, fp: SecureFilePathLocation) -> BlobRetrieval: raise NotImplementedError @@ -124,10 +138,7 @@ def write(self, obj: BlobStorageEntry) -> BlobDeposit: class BlobStorageClient(SyftBaseModel): config: BlobStorageClientConfig - def __enter__(self) -> BlobStorageConnection: - raise NotImplementedError - - def __exit__(self, *exc) -> None: + def connect(self) -> BlobStorageConnection: raise NotImplementedError diff --git a/packages/syft/src/syft/store/blob_storage/on_disk.py b/packages/syft/src/syft/store/blob_storage/on_disk.py index 28ddf6649f7..c71ea2ff40a 100644 --- a/packages/syft/src/syft/store/blob_storage/on_disk.py +++ b/packages/syft/src/syft/store/blob_storage/on_disk.py @@ -7,7 +7,7 @@ from typing import Union # third party -from pydantic import PrivateAttr +from typing_extensions import Self # relative from . import BlobDeposit @@ -50,6 +50,12 @@ class OnDiskBlobStorageConnection(BlobStorageConnection): def __init__(self, base_directory: Path) -> None: self._base_directory = base_directory + def __enter__(self) -> Self: + return self + + def __exit__(self, *exc) -> None: + pass + def read(self, fp: SecureFilePathLocation) -> BlobRetrieval: return SyftObjectRetrieval( syft_object=(self._base_directory / fp.path).read_bytes() @@ -72,20 +78,16 @@ class OnDiskBlobStorageClientConfig(BlobStorageClientConfig): @serializable() class OnDiskBlobStorageClient(BlobStorageClient): config: OnDiskBlobStorageClientConfig - _connection: OnDiskBlobStorageConnection = PrivateAttr() def __init__(self, **data: Any): super().__init__(**data) os.makedirs(self.config.base_directory, exist_ok=True) - self._connection = OnDiskBlobStorageConnection(self.config.base_directory) - def __enter__(self) -> BlobStorageConnection: - return self._connection - - def __exit__(self, *exc) -> None: - pass + def connect(self) -> BlobStorageConnection: + return OnDiskBlobStorageConnection(self.config.base_directory) class OnDiskBlobStorageConfig(BlobStorageConfig): client_type: Type[BlobStorageClient] = OnDiskBlobStorageClient client_config: OnDiskBlobStorageClientConfig = OnDiskBlobStorageClientConfig() + client_config: OnDiskBlobStorageClientConfig = OnDiskBlobStorageClientConfig() diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index 386ea7ff68f..6ed4682fd93 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -1,16 +1,47 @@ # stdlib +from io import BytesIO +import math +from typing import List from typing import Union +# third party +import boto3 +from botocore.client import BaseClient as S3BaseClient +from botocore.client import ClientError as BotoClientError +from botocore.client import Config +import requests +from typing_extensions import Self + # relative from . import BlobDeposit +from . import BlobRetrieval +from . import BlobRetrievalByURL from . import BlobStorageClient from . import BlobStorageClientConfig from . import BlobStorageConfig from . import BlobStorageConnection from ...serde.serializable import serializable from ...service.response import SyftError +from ...service.response import SyftException from ...service.response import SyftSuccess +from ...types.blob_storage import BlobStorageEntry +from ...types.blob_storage import CreateBlobStorageEntry +from ...types.blob_storage import SecureFilePathLocation +from ...types.grid_url import GridURL from ...types.syft_object import SYFT_OBJECT_VERSION_1 +from ...types.uid import UID + +READ_EXPIRATION_TIME = 1800 # seconds +WRITE_EXPIRATION_TIME = 900 # seconds +DEFAULT_CHUNK_SIZE = 1024 # GB + + +# def _byte_chunks(bytes: BytesIO, size: int) -> Generator[bytes]: +# while True: +# try: +# yield bytes.read(size) +# except BlockingIOError: +# return @serializable() @@ -18,24 +49,139 @@ class SeaweedFSBlobDeposit(BlobDeposit): __canonical_name__ = "SeaweedFSBlobDeposit" __version__ = SYFT_OBJECT_VERSION_1 + urls: list[str] + def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: - pass + # relative + from ...client.api import APIRegistry + + etags = [] + part_no = 1 + + try: + for byte_chunk, url in zip( + BytesIO(data).read(DEFAULT_CHUNK_SIZE), self.urls + ): + response = requests.put(url=url, data=byte_chunk) + response.raise_for_status() + etag = response.headers["ETag"] + etags.append({"ETag": etag, "PartNumber": part_no}) + part_no += 1 + except requests.HTTPError as e: + return SyftError(message=f"{e}") + + api = APIRegistry.api_for( + node_uid=self.syft_node_location, + user_verify_key=self.syft_client_verify_key, + ) + return api.services.blob_storage.mark_write_complete( + etags=etags, uid=self.blob_storage_entry_id + ) @serializable() class SeaweedFSClientConfig(BlobStorageClientConfig): - pass + host: str + port: int + access_key: str + secret_key: str + region: str + bucket_name: str + + @property + def endpoint_url(self) -> str: + grid_url = GridURL(host_or_ip=self.host, port=self.port) + return grid_url.url @serializable() class SeaweedFSClient(BlobStorageClient): config: SeaweedFSClientConfig - def __enter__(self) -> BlobStorageConnection: - pass + def connect(self) -> BlobStorageConnection: + return SeaweedFSConnection( + client=boto3.client( + "s3", + endpoint_url=self.config.endpoint_url, + aws_access_key_id=self.config.access_key, + aws_secret_access_key=self.config.secret_key, + config=Config(signature_version="s3v4"), + region_name=self.config.region, + ), + bucket_name=self.config.bucket_name, + ) + + +@serializable() +class SeaweedFSConnection(BlobStorageConnection): + client: S3BaseClient + bucket_name: str + + def __init__(self, client: S3BaseClient, bucket_name: str): + self.client = client + + def __enter__(self) -> Self: + return self def __exit__(self, *exc) -> None: - pass + self.client.close() + + def read(self, fp: SecureFilePathLocation) -> BlobRetrieval: + try: + url = self.client.generate_presigned_url( + ClientMethod="get_object", + Params={"Bucket": self.bucket_name, "Key": fp.path}, + ExpiresIn=READ_EXPIRATION_TIME, + ) + return BlobRetrievalByURL(url=url) + except BotoClientError as e: + raise SyftException(e) + + def allocate(self, obj: CreateBlobStorageEntry) -> SecureFilePathLocation: + try: + result = self.create_multipart_upload( + Bucket=self.bucket_name, + Key=str(obj.id), + ) + upload_id = UID(result["UploadId"]) + return SecureFilePathLocation(id=upload_id, path=str(obj.id)) + except BotoClientError as e: + raise SyftException(e) + + def write(self, obj: BlobStorageEntry) -> BlobDeposit: + total_parts = math.ceil(obj.file_size / DEFAULT_CHUNK_SIZE) + urls = [] + for part_no in range(total_parts): + # Creating presigned urls + signed_url = self.client.generate_presigned_url( + ClientMethod="upload_part", + Params={ + "Bucket": self.bucket_name, + "Key": obj.location.path, + "UploadId": obj.location.id.value, + "PartNumber": part_no + 1, + }, + ExpiresIn=WRITE_EXPIRATION_TIME, + ) + urls.append(signed_url) + + return SeaweedFSBlobDeposit(urls=urls) + + def complete_multipart_upload( + self, + blob_entry: BlobStorageEntry, + etags: List, + ) -> Union[SyftError, SyftSuccess]: + try: + self.client.complete_multipart_upload( + Bucket=self.bucket_name, + Key=blob_entry.location.path, + MultipartUpload={"Parts": etags}, + UploadId=blob_entry.location.id.to_string(), + ) + return SyftSuccess("Successfully saved file.") + except BotoClientError as e: + return SyftError(f"{e}") class SeaweedFSConfig(BlobStorageConfig): diff --git a/packages/syft/src/syft/types/blob_storage.py b/packages/syft/src/syft/types/blob_storage.py index 376b5c26a27..ea0debd6328 100644 --- a/packages/syft/src/syft/types/blob_storage.py +++ b/packages/syft/src/syft/types/blob_storage.py @@ -39,7 +39,7 @@ class BlobStorageEntry(SyftObject): mimetype: str = "bytes" file_size: int uploaded_by: SyftVerifyKey - create_at: DateTime = DateTime.now() + created_at: DateTime = DateTime.now() @serializable() From fc200a0d4f56a4a492b436ffaf71524b1968a462 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Mon, 7 Aug 2023 17:51:01 +0800 Subject: [PATCH 05/98] Minor fixes/cleanup --- .../src/syft/store/blob_storage/seaweedfs.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index 6ed4682fd93..ff032e0fea8 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -1,6 +1,7 @@ # stdlib from io import BytesIO import math +from typing import Generator from typing import List from typing import Union @@ -36,12 +37,12 @@ DEFAULT_CHUNK_SIZE = 1024 # GB -# def _byte_chunks(bytes: BytesIO, size: int) -> Generator[bytes]: -# while True: -# try: -# yield bytes.read(size) -# except BlockingIOError: -# return +def _byte_chunks(bytes: BytesIO, size: int) -> Generator[bytes]: + while True: + try: + yield bytes.read(size) + except BlockingIOError: + return @serializable() @@ -56,19 +57,17 @@ def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: from ...client.api import APIRegistry etags = [] - part_no = 1 try: - for byte_chunk, url in zip( - BytesIO(data).read(DEFAULT_CHUNK_SIZE), self.urls + for part_no, (byte_chunk, url) in enumerate( + zip(_byte_chunks(BytesIO(data), DEFAULT_CHUNK_SIZE), self.urls), start=1 ): response = requests.put(url=url, data=byte_chunk) response.raise_for_status() etag = response.headers["ETag"] etags.append({"ETag": etag, "PartNumber": part_no}) - part_no += 1 except requests.HTTPError as e: - return SyftError(message=f"{e}") + return SyftError(message=str(e)) api = APIRegistry.api_for( node_uid=self.syft_node_location, @@ -139,31 +138,32 @@ def read(self, fp: SecureFilePathLocation) -> BlobRetrieval: def allocate(self, obj: CreateBlobStorageEntry) -> SecureFilePathLocation: try: + obj_id = str(obj.id) result = self.create_multipart_upload( Bucket=self.bucket_name, - Key=str(obj.id), + Key=obj_id, ) upload_id = UID(result["UploadId"]) - return SecureFilePathLocation(id=upload_id, path=str(obj.id)) + return SecureFilePathLocation(id=upload_id, path=obj_id) except BotoClientError as e: raise SyftException(e) def write(self, obj: BlobStorageEntry) -> BlobDeposit: total_parts = math.ceil(obj.file_size / DEFAULT_CHUNK_SIZE) - urls = [] - for part_no in range(total_parts): - # Creating presigned urls - signed_url = self.client.generate_presigned_url( + + urls = [ + self.client.generate_presigned_url( ClientMethod="upload_part", Params={ "Bucket": self.bucket_name, "Key": obj.location.path, "UploadId": obj.location.id.value, - "PartNumber": part_no + 1, + "PartNumber": i + 1, }, ExpiresIn=WRITE_EXPIRATION_TIME, ) - urls.append(signed_url) + for i in range(total_parts) + ] return SeaweedFSBlobDeposit(urls=urls) @@ -181,7 +181,7 @@ def complete_multipart_upload( ) return SyftSuccess("Successfully saved file.") except BotoClientError as e: - return SyftError(f"{e}") + return SyftError(message=str(e)) class SeaweedFSConfig(BlobStorageConfig): From febe8ad5ba565b8dae0fdef74fafcc14902dc49c Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Mon, 7 Aug 2023 17:59:42 +0800 Subject: [PATCH 06/98] Also catch ConnectionError in addition to HTTPError --- packages/syft/src/syft/store/blob_storage/__init__.py | 5 ++--- packages/syft/src/syft/store/blob_storage/seaweedfs.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/syft/src/syft/store/blob_storage/__init__.py b/packages/syft/src/syft/store/blob_storage/__init__.py index d8d9bb6aefb..8f9c07a5246 100644 --- a/packages/syft/src/syft/store/blob_storage/__init__.py +++ b/packages/syft/src/syft/store/blob_storage/__init__.py @@ -92,12 +92,11 @@ class BlobRetrievalByURL(BlobRetrieval): url: str def read(self) -> Union[SyftObject, SyftError]: - response = requests.get(self.url) - try: + response = requests.get(self.url) response.raise_for_status() return deserialize(response.content, from_bytes=True) - except requests.HTTPError as e: + except requests.RequestException as e: return SyftError(message=f"Failed to retrieve with Error: {e}") diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index ff032e0fea8..8911e7e742c 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -66,7 +66,7 @@ def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: response.raise_for_status() etag = response.headers["ETag"] etags.append({"ETag": etag, "PartNumber": part_no}) - except requests.HTTPError as e: + except requests.RequestException as e: return SyftError(message=str(e)) api = APIRegistry.api_for( From 2d34ee28cf2e1414163bab9d60053ea33e670917 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Mon, 7 Aug 2023 18:04:54 +0800 Subject: [PATCH 07/98] Specify request timeout --- packages/syft/src/syft/store/blob_storage/__init__.py | 3 ++- packages/syft/src/syft/store/blob_storage/seaweedfs.py | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/syft/src/syft/store/blob_storage/__init__.py b/packages/syft/src/syft/store/blob_storage/__init__.py index 8f9c07a5246..0a513f61478 100644 --- a/packages/syft/src/syft/store/blob_storage/__init__.py +++ b/packages/syft/src/syft/store/blob_storage/__init__.py @@ -62,6 +62,7 @@ from ...types.syft_object import SYFT_OBJECT_VERSION_1 from ...types.syft_object import SyftObject from ...types.uid import UID +from ...util.constants import DEFAULT_TIMEOUT @serializable() @@ -93,7 +94,7 @@ class BlobRetrievalByURL(BlobRetrieval): def read(self) -> Union[SyftObject, SyftError]: try: - response = requests.get(self.url) + response = requests.get(self.url, timeout=DEFAULT_TIMEOUT) response.raise_for_status() return deserialize(response.content, from_bytes=True) except requests.RequestException as e: diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index 8911e7e742c..54e5857ab2e 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -31,6 +31,7 @@ from ...types.grid_url import GridURL from ...types.syft_object import SYFT_OBJECT_VERSION_1 from ...types.uid import UID +from ...util.constants import DEFAULT_TIMEOUT READ_EXPIRATION_TIME = 1800 # seconds WRITE_EXPIRATION_TIME = 900 # seconds @@ -62,7 +63,9 @@ def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: for part_no, (byte_chunk, url) in enumerate( zip(_byte_chunks(BytesIO(data), DEFAULT_CHUNK_SIZE), self.urls), start=1 ): - response = requests.put(url=url, data=byte_chunk) + response = requests.put( + url=url, data=byte_chunk, timeout=DEFAULT_TIMEOUT + ) response.raise_for_status() etag = response.headers["ETag"] etags.append({"ETag": etag, "PartNumber": part_no}) From bfe7e9b1031c0d5affb38066722c6b09a67c8f46 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Mon, 7 Aug 2023 18:22:57 +0800 Subject: [PATCH 08/98] Use Path.mkdir --- packages/syft/src/syft/store/blob_storage/on_disk.py | 3 +-- packages/syft/src/syft/util/util.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/syft/src/syft/store/blob_storage/on_disk.py b/packages/syft/src/syft/store/blob_storage/on_disk.py index c71ea2ff40a..c4977b66ae6 100644 --- a/packages/syft/src/syft/store/blob_storage/on_disk.py +++ b/packages/syft/src/syft/store/blob_storage/on_disk.py @@ -1,5 +1,4 @@ # stdlib -import os from pathlib import Path from tempfile import gettempdir from typing import Any @@ -81,7 +80,7 @@ class OnDiskBlobStorageClient(BlobStorageClient): def __init__(self, **data: Any): super().__init__(**data) - os.makedirs(self.config.base_directory, exist_ok=True) + self.config.base_directory.mkdir(exist_ok=True) def connect(self) -> BlobStorageConnection: return OnDiskBlobStorageConnection(self.config.base_directory) diff --git a/packages/syft/src/syft/util/util.py b/packages/syft/src/syft/util/util.py index 9aa9498fba7..983b9e29e89 100644 --- a/packages/syft/src/syft/util/util.py +++ b/packages/syft/src/syft/util/util.py @@ -192,8 +192,8 @@ def get_root_data_path() -> Path: # on Windows the directory is: C:/Users/$USER/.syft/data data_dir = Path.home() / ".syft" / "data" + data_dir.mkdir(exist_ok=True) - os.makedirs(data_dir, exist_ok=True) return data_dir From d9d6d3327aa30213b47452b4dc85ff3ddcfbec30 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Mon, 7 Aug 2023 18:47:10 +0800 Subject: [PATCH 09/98] Create parent directories of .syft/data if needed --- packages/syft/src/syft/util/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/syft/src/syft/util/util.py b/packages/syft/src/syft/util/util.py index 983b9e29e89..8b2eaaff9ce 100644 --- a/packages/syft/src/syft/util/util.py +++ b/packages/syft/src/syft/util/util.py @@ -192,7 +192,7 @@ def get_root_data_path() -> Path: # on Windows the directory is: C:/Users/$USER/.syft/data data_dir = Path.home() / ".syft" / "data" - data_dir.mkdir(exist_ok=True) + data_dir.mkdir(parents=True, exist_ok=True) return data_dir From 47eca43e1c724fa13aa55e51b9f2a16db4739f11 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Mon, 7 Aug 2023 19:00:52 +0800 Subject: [PATCH 10/98] Fix util.download_file not related to seaweed but this is right below the code I was working on --- packages/syft/src/syft/util/util.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/packages/syft/src/syft/util/util.py b/packages/syft/src/syft/util/util.py index 8b2eaaff9ce..a18bc85bb94 100644 --- a/packages/syft/src/syft/util/util.py +++ b/packages/syft/src/syft/util/util.py @@ -198,16 +198,15 @@ def get_root_data_path() -> Path: def download_file(url: str, full_path: Union[str, Path]) -> Optional[Path]: - if not os.path.exists(full_path): + full_path = Path(full_path) + if not full_path.exists(): r = requests.get(url, allow_redirects=True, verify=verify_tls()) # nosec - if r.status_code < 199 or 299 < r.status_code: + if not r.ok: print(f"Got {r.status_code} trying to download {url}") return None - path = os.path.dirname(full_path) - os.makedirs(path, exist_ok=True) - with open(full_path, "wb") as f: - f.write(r.content) - return Path(full_path) + full_path.parent.mkdir(parents=True, exist_ok=True) + full_path.write_bytes(r.content) + return full_path def verify_tls() -> bool: From cceb8bba8abe72fb408e63b30a02e2d732e1972c Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Tue, 8 Aug 2023 14:33:13 +0530 Subject: [PATCH 11/98] - create seaweed client config in grid/node.py - create and pass blob storage config to Domain and Gateway in grid/node.py - Define BlobDepositType typing - fix typo in name for mark_write_complete service - temporary convert s3 url host to localhost/blob - fix size calculation of obj in CreateBlobStorageEntry - create a separate secure location class for SeaweedFS Client Co-authored-by: Khoa Nguyen Co-authored-by: Kien Dang Co-authored-by: Peter Chung <96817160+PeterChung241@users.noreply.github.com> --- packages/grid/backend/grid/core/node.py | 16 +++++++ packages/syft/src/syft/client/api.py | 1 - .../src/syft/service/blob_storage/service.py | 9 ++-- .../src/syft/store/blob_storage/on_disk.py | 1 - .../src/syft/store/blob_storage/seaweedfs.py | 43 ++++++++++++------- packages/syft/src/syft/types/blob_storage.py | 14 +++++- 6 files changed, 61 insertions(+), 23 deletions(-) diff --git a/packages/grid/backend/grid/core/node.py b/packages/grid/backend/grid/core/node.py index ccfa0bdf506..d164debc14a 100644 --- a/packages/grid/backend/grid/core/node.py +++ b/packages/grid/backend/grid/core/node.py @@ -5,6 +5,9 @@ from syft.node.node import get_node_name from syft.node.node import get_node_side_type from syft.node.node import get_node_type +from syft.node.node import get_node_uid_env +from syft.store.blob_storage.seaweedfs import SeaweedFSClientConfig +from syft.store.blob_storage.seaweedfs import SeaweedFSConfig from syft.store.mongo_client import MongoStoreClientConfig from syft.store.mongo_document_store import MongoStoreConfig from syft.store.sqlite_document_store import SQLiteStoreClientConfig @@ -32,6 +35,17 @@ node_side_type = get_node_side_type() enable_warnings = get_enable_warnings() +seaweed_client_config = SeaweedFSClientConfig( + host=settings.S3_ENDPOINT, + port=settings.S3_PORT, + access_key=settings.S3_ROOT_USER, + secret_key=settings.S3_ROOT_PWD, + region=settings.S3_REGION, + bucket_name=get_node_uid_env(), +) + +blob_storage_config = SeaweedFSConfig(client_config=seaweed_client_config) + if node_type == "gateway" or node_type == "network": worker = Gateway( @@ -40,6 +54,7 @@ action_store_config=sql_store_config, document_store_config=mongo_store_config, enable_warnings=enable_warnings, + blob_storage_config=blob_storage_config, ) else: worker = Domain( @@ -48,4 +63,5 @@ action_store_config=sql_store_config, document_store_config=mongo_store_config, enable_warnings=enable_warnings, + blob_storage_config=blob_storage_config, ) diff --git a/packages/syft/src/syft/client/api.py b/packages/syft/src/syft/client/api.py index 4f441989373..1405ed1a2f5 100644 --- a/packages/syft/src/syft/client/api.py +++ b/packages/syft/src/syft/client/api.py @@ -372,7 +372,6 @@ def debox_signed_syftapicall_response( if not signed_result.is_valid: return SyftError(message="The result signature is invalid") # type: ignore - return signed_result.message.data diff --git a/packages/syft/src/syft/service/blob_storage/service.py b/packages/syft/src/syft/service/blob_storage/service.py index 1a5644a6dfa..e32383eb768 100644 --- a/packages/syft/src/syft/service/blob_storage/service.py +++ b/packages/syft/src/syft/service/blob_storage/service.py @@ -6,8 +6,9 @@ # relative from ...serde.serializable import serializable -from ...store.blob_storage import BlobDeposit from ...store.blob_storage import BlobRetrieval +from ...store.blob_storage.on_disk import OnDiskBlobDeposit +from ...store.blob_storage.seaweedfs import SeaweedFSBlobDeposit from ...store.document_store import DocumentStore from ...types.blob_storage import BlobStorageEntry from ...types.blob_storage import CreateBlobStorageEntry @@ -20,6 +21,8 @@ from ..service import service_method from .stash import BlobStorageStash +BlobDepositType = Union[OnDiskBlobDeposit, SeaweedFSBlobDeposit] + @serializable() class BlobStorageService(AbstractService): @@ -61,7 +64,7 @@ def read( @service_method(path="blob_storage.allocate", name="allocate") def allocate( self, context: AuthedServiceContext, obj: CreateBlobStorageEntry - ) -> Union[BlobDeposit, SyftError]: + ) -> Union[BlobDepositType, SyftError]: with context.node.blob_storage_client.connect() as conn: secure_location = conn.allocate(obj) @@ -101,7 +104,7 @@ def write_to_disk( except Exception as e: return SyftError(message=f"Failed to write object to disk: {e}") - @service_method(path="blob_storage.mark_write_complete", name="make_write_complete") + @service_method(path="blob_storage.mark_write_complete", name="mark_write_complete") def mark_write_complete( self, context: AuthedServiceContext, diff --git a/packages/syft/src/syft/store/blob_storage/on_disk.py b/packages/syft/src/syft/store/blob_storage/on_disk.py index c4977b66ae6..da8f5324eaa 100644 --- a/packages/syft/src/syft/store/blob_storage/on_disk.py +++ b/packages/syft/src/syft/store/blob_storage/on_disk.py @@ -89,4 +89,3 @@ def connect(self) -> BlobStorageConnection: class OnDiskBlobStorageConfig(BlobStorageConfig): client_type: Type[BlobStorageClient] = OnDiskBlobStorageClient client_config: OnDiskBlobStorageClientConfig = OnDiskBlobStorageClientConfig() - client_config: OnDiskBlobStorageClientConfig = OnDiskBlobStorageClientConfig() diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index 54e5857ab2e..ffcceeef4f6 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -3,6 +3,7 @@ import math from typing import Generator from typing import List +from typing import Type from typing import Union # third party @@ -27,18 +28,18 @@ from ...service.response import SyftSuccess from ...types.blob_storage import BlobStorageEntry from ...types.blob_storage import CreateBlobStorageEntry +from ...types.blob_storage import SeaweedSecureFilePathLocation from ...types.blob_storage import SecureFilePathLocation from ...types.grid_url import GridURL from ...types.syft_object import SYFT_OBJECT_VERSION_1 -from ...types.uid import UID from ...util.constants import DEFAULT_TIMEOUT READ_EXPIRATION_TIME = 1800 # seconds WRITE_EXPIRATION_TIME = 900 # seconds -DEFAULT_CHUNK_SIZE = 1024 # GB +DEFAULT_CHUNK_SIZE = 1024**2 # 1 GB -def _byte_chunks(bytes: BytesIO, size: int) -> Generator[bytes]: +def _byte_chunks(bytes: BytesIO, size: int) -> Generator[bytes, None, None]: while True: try: yield bytes.read(size) @@ -57,12 +58,22 @@ def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: # relative from ...client.api import APIRegistry + api = APIRegistry.api_for( + node_uid=self.syft_node_location, + user_verify_key=self.syft_client_verify_key, + ) + etags = [] try: for part_no, (byte_chunk, url) in enumerate( - zip(_byte_chunks(BytesIO(data), DEFAULT_CHUNK_SIZE), self.urls), start=1 + zip(_byte_chunks(BytesIO(data), DEFAULT_CHUNK_SIZE), self.urls), + start=1, ): + # TODO: 🟡 Do this in a much clean way + url = ( + str(api.connection.url) + "/blob" + url.split("seaweedfs:8333")[-1] + ) response = requests.put( url=url, data=byte_chunk, timeout=DEFAULT_TIMEOUT ) @@ -72,10 +83,6 @@ def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: except requests.RequestException as e: return SyftError(message=str(e)) - api = APIRegistry.api_for( - node_uid=self.syft_node_location, - user_verify_key=self.syft_client_verify_key, - ) return api.services.blob_storage.mark_write_complete( etags=etags, uid=self.blob_storage_entry_id ) @@ -121,6 +128,7 @@ class SeaweedFSConnection(BlobStorageConnection): def __init__(self, client: S3BaseClient, bucket_name: str): self.client = client + self.bucket_name = bucket_name def __enter__(self) -> Self: return self @@ -135,6 +143,9 @@ def read(self, fp: SecureFilePathLocation) -> BlobRetrieval: Params={"Bucket": self.bucket_name, "Key": fp.path}, ExpiresIn=READ_EXPIRATION_TIME, ) + + # TODO: 🟡 Do this in a much clean way + url = "http://localhost:8081" + "/blob" + url.split("seaweedfs:8333")[-1] return BlobRetrievalByURL(url=url) except BotoClientError as e: raise SyftException(e) @@ -142,12 +153,12 @@ def read(self, fp: SecureFilePathLocation) -> BlobRetrieval: def allocate(self, obj: CreateBlobStorageEntry) -> SecureFilePathLocation: try: obj_id = str(obj.id) - result = self.create_multipart_upload( + result = self.client.create_multipart_upload( Bucket=self.bucket_name, Key=obj_id, ) - upload_id = UID(result["UploadId"]) - return SecureFilePathLocation(id=upload_id, path=obj_id) + upload_id = result["UploadId"] + return SeaweedSecureFilePathLocation(upload_id=upload_id, path=obj_id) except BotoClientError as e: raise SyftException(e) @@ -160,7 +171,7 @@ def write(self, obj: BlobStorageEntry) -> BlobDeposit: Params={ "Bucket": self.bucket_name, "Key": obj.location.path, - "UploadId": obj.location.id.value, + "UploadId": obj.location.upload_id, "PartNumber": i + 1, }, ExpiresIn=WRITE_EXPIRATION_TIME, @@ -168,7 +179,7 @@ def write(self, obj: BlobStorageEntry) -> BlobDeposit: for i in range(total_parts) ] - return SeaweedFSBlobDeposit(urls=urls) + return SeaweedFSBlobDeposit(blob_storage_entry_id=obj.id, urls=urls) def complete_multipart_upload( self, @@ -180,13 +191,13 @@ def complete_multipart_upload( Bucket=self.bucket_name, Key=blob_entry.location.path, MultipartUpload={"Parts": etags}, - UploadId=blob_entry.location.id.to_string(), + UploadId=blob_entry.location.upload_id, ) - return SyftSuccess("Successfully saved file.") + return SyftSuccess(message="Successfully saved file.") except BotoClientError as e: return SyftError(message=str(e)) class SeaweedFSConfig(BlobStorageConfig): - client_type = SeaweedFSClient + client_type: Type[BlobStorageClient] = SeaweedFSClient client_config: SeaweedFSClientConfig diff --git a/packages/syft/src/syft/types/blob_storage.py b/packages/syft/src/syft/types/blob_storage.py index ea0debd6328..e250aeab104 100644 --- a/packages/syft/src/syft/types/blob_storage.py +++ b/packages/syft/src/syft/types/blob_storage.py @@ -11,6 +11,7 @@ # relative from ..node.credentials import SyftVerifyKey +from ..serde import serialize from ..serde.serializable import serializable from ..service.response import SyftException from .datetime import DateTime @@ -28,13 +29,21 @@ class SecureFilePathLocation(SyftObject): path: str +@serializable() +class SeaweedSecureFilePathLocation(SecureFilePathLocation): + __canonical_name__ = "SeaweedSecureFilePathLocation" + __version__ = SYFT_OBJECT_VERSION_1 + + upload_id: str + + @serializable() class BlobStorageEntry(SyftObject): __canonical_name__ = "BlobStorageEntry" __version__ = SYFT_OBJECT_VERSION_1 id: UID - location: SecureFilePathLocation + location: Union[SecureFilePathLocation, SeaweedSecureFilePathLocation] type_: Optional[Type[SyftObject]] mimetype: str = "bytes" file_size: int @@ -54,7 +63,8 @@ class CreateBlobStorageEntry(SyftObject): @classmethod def from_obj(cls, obj: SyftObject) -> Self: - return cls(file_size=sys.getsizeof(obj), type_=type(obj)) + file_size = sys.getsizeof(serialize._serialize(obj=obj, to_bytes=True)) + return cls(file_size=file_size, type_=type(obj)) @classmethod def from_path(cls, fp: Union[str, Path], mimetype: Optional[str] = None) -> Self: From af856f4a5214eb630f990bc931a02f953fc52ba7 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Tue, 8 Aug 2023 17:08:49 +0530 Subject: [PATCH 12/98] - add route for Blob Store in Routes - add method to SyftClient that maps a given path to blob route - save pre-signed urls as GridUrl objects - convert pre-signed urls to /blob route on client side during read and write --- packages/syft/src/syft/client/client.py | 5 +++ .../src/syft/service/blob_storage/service.py | 5 ++- .../src/syft/store/blob_storage/__init__.py | 15 ++++++-- .../src/syft/store/blob_storage/seaweedfs.py | 34 ++++++++----------- 4 files changed, 36 insertions(+), 23 deletions(-) diff --git a/packages/syft/src/syft/client/client.py b/packages/syft/src/syft/client/client.py index cf647d80db6..d86d90796ea 100644 --- a/packages/syft/src/syft/client/client.py +++ b/packages/syft/src/syft/client/client.py @@ -122,6 +122,7 @@ class Routes(Enum): ROUTE_LOGIN = f"{API_PATH}/login" ROUTE_REGISTER = f"{API_PATH}/register" ROUTE_API_CALL = f"{API_PATH}/api_call" + ROUTE_BLOB_STORE = "/blob" @serializable(attrs=["proxy_target_uid", "url"]) @@ -148,6 +149,10 @@ def get_cache_key(self) -> str: def api_url(self) -> GridURL: return self.url.with_path(self.routes.ROUTE_API_CALL.value) + def to_blob_route(self, path: str) -> GridURL: + _path = self.routes.ROUTE_BLOB_STORE.value + path + return self.url.with_path(_path) + @property def session(self) -> Session: if self.session_cache is None: diff --git a/packages/syft/src/syft/service/blob_storage/service.py b/packages/syft/src/syft/service/blob_storage/service.py index e32383eb768..2ab7c99acf8 100644 --- a/packages/syft/src/syft/service/blob_storage/service.py +++ b/packages/syft/src/syft/service/blob_storage/service.py @@ -104,7 +104,10 @@ def write_to_disk( except Exception as e: return SyftError(message=f"Failed to write object to disk: {e}") - @service_method(path="blob_storage.mark_write_complete", name="mark_write_complete") + @service_method( + path="blob_storage.mark_write_complete", + name="mark_write_complete", + ) def mark_write_complete( self, context: AuthedServiceContext, diff --git a/packages/syft/src/syft/store/blob_storage/__init__.py b/packages/syft/src/syft/store/blob_storage/__init__.py index 0a513f61478..578f184ec04 100644 --- a/packages/syft/src/syft/store/blob_storage/__init__.py +++ b/packages/syft/src/syft/store/blob_storage/__init__.py @@ -59,6 +59,7 @@ from ...types.blob_storage import BlobStorageEntry from ...types.blob_storage import CreateBlobStorageEntry from ...types.blob_storage import SecureFilePathLocation +from ...types.grid_url import GridURL from ...types.syft_object import SYFT_OBJECT_VERSION_1 from ...types.syft_object import SyftObject from ...types.uid import UID @@ -70,7 +71,7 @@ class BlobRetrieval(SyftObject): __canonical_name__ = "BlobRetrieval" __version__ = SYFT_OBJECT_VERSION_1 - def read(self) -> SyftObject: + def read(self) -> Union[SyftObject, SyftError]: pass @@ -90,11 +91,19 @@ class BlobRetrievalByURL(BlobRetrieval): __canonical_name__ = "BlobRetrievalByURL" __version__ = SYFT_OBJECT_VERSION_1 - url: str + url: GridURL def read(self) -> Union[SyftObject, SyftError]: + # relative + from ...client.api import APIRegistry + + api = APIRegistry.api_for( + node_uid=self.syft_node_location, + user_verify_key=self.syft_client_verify_key, + ) + blob_url = api.connection.to_blob_route(self.url.url_path) try: - response = requests.get(self.url, timeout=DEFAULT_TIMEOUT) + response = requests.get(str(blob_url), timeout=DEFAULT_TIMEOUT) response.raise_for_status() return deserialize(response.content, from_bytes=True) except requests.RequestException as e: diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index ffcceeef4f6..c3af9b3d793 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -52,7 +52,7 @@ class SeaweedFSBlobDeposit(BlobDeposit): __canonical_name__ = "SeaweedFSBlobDeposit" __version__ = SYFT_OBJECT_VERSION_1 - urls: list[str] + urls: List[GridURL] def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: # relative @@ -70,12 +70,9 @@ def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: zip(_byte_chunks(BytesIO(data), DEFAULT_CHUNK_SIZE), self.urls), start=1, ): - # TODO: 🟡 Do this in a much clean way - url = ( - str(api.connection.url) + "/blob" + url.split("seaweedfs:8333")[-1] - ) + blob_url = api.connection.to_blob_route(url.url_path) response = requests.put( - url=url, data=byte_chunk, timeout=DEFAULT_TIMEOUT + url=str(blob_url), data=byte_chunk, timeout=DEFAULT_TIMEOUT ) response.raise_for_status() etag = response.headers["ETag"] @@ -143,10 +140,7 @@ def read(self, fp: SecureFilePathLocation) -> BlobRetrieval: Params={"Bucket": self.bucket_name, "Key": fp.path}, ExpiresIn=READ_EXPIRATION_TIME, ) - - # TODO: 🟡 Do this in a much clean way - url = "http://localhost:8081" + "/blob" + url.split("seaweedfs:8333")[-1] - return BlobRetrievalByURL(url=url) + return BlobRetrievalByURL(url=GridURL.from_url(url)) except BotoClientError as e: raise SyftException(e) @@ -166,15 +160,17 @@ def write(self, obj: BlobStorageEntry) -> BlobDeposit: total_parts = math.ceil(obj.file_size / DEFAULT_CHUNK_SIZE) urls = [ - self.client.generate_presigned_url( - ClientMethod="upload_part", - Params={ - "Bucket": self.bucket_name, - "Key": obj.location.path, - "UploadId": obj.location.upload_id, - "PartNumber": i + 1, - }, - ExpiresIn=WRITE_EXPIRATION_TIME, + GridURL.from_url( + self.client.generate_presigned_url( + ClientMethod="upload_part", + Params={ + "Bucket": self.bucket_name, + "Key": obj.location.path, + "UploadId": obj.location.upload_id, + "PartNumber": i + 1, + }, + ExpiresIn=WRITE_EXPIRATION_TIME, + ) ) for i in range(total_parts) ] From 8a9d6c586b7e00b4aec95d77ba068d9fcf49c9b1 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Tue, 8 Aug 2023 22:24:04 +0800 Subject: [PATCH 13/98] Fix GridURL typing and a minor bug copy -> __copy__ --- packages/syft/src/syft/types/grid_url.py | 25 +++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/packages/syft/src/syft/types/grid_url.py b/packages/syft/src/syft/types/grid_url.py index 9ec6eeddd3a..ec84bab463d 100644 --- a/packages/syft/src/syft/types/grid_url.py +++ b/packages/syft/src/syft/types/grid_url.py @@ -11,6 +11,7 @@ # third party import requests +from typing_extensions import Self # relative from ..serde.serializable import serializable @@ -19,8 +20,8 @@ @serializable(attrs=["protocol", "host_or_ip", "port", "path", "query"]) class GridURL: - @staticmethod - def from_url(url: Union[str, GridURL]) -> GridURL: + @classmethod + def from_url(cls, url: Union[str, GridURL]) -> Self: if isinstance(url, GridURL): return url try: @@ -36,7 +37,7 @@ def from_url(url: Union[str, GridURL]) -> GridURL: host_or_ip = host_or_ip_parts[0] if parts.scheme == "https": port = 443 - return GridURL( + return cls( host_or_ip=host_or_ip, path=parts.path, port=port, @@ -77,12 +78,12 @@ def __init__( self.protocol = protocol self.query = query - def with_path(self, path: str) -> GridURL: + def with_path(self, path: str) -> Self: dupe = copy.copy(self) dupe.path = path return dupe - def as_container_host(self, container_host: Optional[str] = None) -> GridURL: + def as_container_host(self, container_host: Optional[str] = None) -> Self: if self.host_or_ip not in [ "localhost", "host.docker.internal", @@ -105,7 +106,7 @@ def as_container_host(self, container_host: Optional[str] = None) -> GridURL: # convert it back for non container clients hostname = "localhost" - return GridURL( + return self.__class__( protocol=self.protocol, host_or_ip=hostname, port=self.port, @@ -139,7 +140,7 @@ def base_url_no_port(self) -> str: def url_path(self) -> str: return f"{self.path}{self.query_string}" - def to_tls(self) -> GridURL: + def to_tls(self) -> Self: if self.protocol == "https": return self @@ -150,7 +151,9 @@ def to_tls(self) -> GridURL: new_base_url = r.url if new_base_url.endswith("/"): new_base_url = new_base_url[0:-1] - return GridURL.from_url(url=f"{new_base_url}{self.path}{self.query_string}") + return self.__class__.from_url( + url=f"{new_base_url}{self.path}{self.query_string}" + ) def __repr__(self) -> str: return f"<{type(self).__name__} {self.url}>" @@ -161,9 +164,9 @@ def __str__(self) -> str: def __hash__(self) -> int: return hash(self.__str__()) - def copy(self) -> GridURL: - return GridURL.from_url(self.url) + def __copy__(self) -> Self: + return self.__class__.from_url(self.url) - def set_port(self, port: int) -> GridURL: + def set_port(self, port: int) -> Self: self.port = port return self From 795d3b97dd0e7d75e718b1d8f1ed7385d3109a69 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Wed, 9 Aug 2023 14:17:09 +0530 Subject: [PATCH 14/98] - pass s3 username and password via hagrid as cli agruments - pass s3 username and password to s3config.json via jinja template - fix usage of templates when running hagrid in EDITABLE_MODE --- packages/grid/seaweedfs/s3config.json | 4 ++-- packages/hagrid/hagrid/cli.py | 28 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/packages/grid/seaweedfs/s3config.json b/packages/grid/seaweedfs/s3config.json index 49416a081fe..aca589cdf2e 100644 --- a/packages/grid/seaweedfs/s3config.json +++ b/packages/grid/seaweedfs/s3config.json @@ -4,8 +4,8 @@ "name": "admin", "credentials": [ { - "accessKey": "admin", - "secretKey": "admin" + "accessKey": "{{ S3_ROOT_USER }}", + "secretKey": "{{ S3_ROOT_PWD }}" } ], "actions": ["Admin", "Read", "List", "Tagging", "Write"] diff --git a/packages/hagrid/hagrid/cli.py b/packages/hagrid/hagrid/cli.py index b269b4497b2..174e8b100ea 100644 --- a/packages/hagrid/hagrid/cli.py +++ b/packages/hagrid/hagrid/cli.py @@ -79,6 +79,7 @@ from .lib import gitpod_url from .lib import hagrid_root from .lib import is_gitpod +from .lib import manifest_template_path from .lib import name_tag from .lib import save_vm_details_as_json from .lib import update_repo @@ -443,6 +444,20 @@ def clean(location: str) -> None: is_flag=True, help="Launch a low side node type else a high side node type", ) +@click.option( + "--set-s3-username", + default=None, + required=False, + type=str, + help="Set root username for s3 blob storage", +) +@click.option( + "--set-s3-password", + default=None, + required=False, + type=str, + help="Set root password for s3 blob storage", +) def launch(args: TypeTuple[str], **kwargs: Any) -> None: verb = get_launch_verb() try: @@ -1236,6 +1251,10 @@ def create_launch_cmd( parsed_kwargs["use_blob_storage"] = not bool(kwargs["no_blob_storage"]) + if parsed_kwargs["use_blob_storage"]: + parsed_kwargs["set_s3_username"] = kwargs["set_s3_username"] + parsed_kwargs["set_s3_password"] = kwargs["set_s3_password"] + parsed_kwargs["node_count"] = ( int(kwargs["node_count"]) if "node_count" in kwargs else 1 ) @@ -1334,6 +1353,9 @@ def create_launch_cmd( parsed_kwargs["enable_signup"] = str_to_bool(cast(str, kwargs["enable_signup"])) + if parsed_kwargs["template"] is None and EDITABLE_MODE: + parsed_kwargs["template"] = str(manifest_template_path()) + # Override template tag with user input tag if ( parsed_kwargs["tag"] is not None @@ -2189,6 +2211,12 @@ def create_launch_docker_cmd( if "set_root_email" in kwargs and kwargs["set_root_email"] is not None: envs["DEFAULT_ROOT_EMAIL"] = kwargs["set_root_email"] + if "set_s3_username" in kwargs and kwargs["set_s3_username"] is not None: + envs["S3_ROOT_USER"] = kwargs["set_s3_username"] + + if "set_s3_password" in kwargs and kwargs["set_s3_password"] is not None: + envs["S3_ROOT_PWD"] = kwargs["set_s3_password"] + if "release" in kwargs: envs["RELEASE"] = kwargs["release"] From b0727d266759fe1ebdd3dffc89904f9d0226221f Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Wed, 9 Aug 2023 18:59:17 +0530 Subject: [PATCH 15/98] revert to use static s3config.json --- packages/grid/docker-compose.yml | 5 ++++- packages/grid/seaweedfs/s3config.json | 4 ++-- packages/hagrid/hagrid/cli.py | 4 ---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/packages/grid/docker-compose.yml b/packages/grid/docker-compose.yml index 1e33cd394af..de3d2f45021 100644 --- a/packages/grid/docker-compose.yml +++ b/packages/grid/docker-compose.yml @@ -220,7 +220,10 @@ services: image: "${DOCKER_IMAGE_SEAWEEDFS?Variable not set}" environment: - S3_VOLUME_SIZE_MB=${S3_VOLUME_SIZE_MB:-1024} - command: "server -s3 -s3.config=/etc/s3config.json -master.volumeSizeLimitMB=${S3_VOLUME_SIZE_MB}" + - S3_ROOT_USER=${S3_ROOT_USER:-admin} + - S3_ROOT_PWD=${S3_ROOT_PWD:-admin} + command: + - "server -s3 -s3.config=/etc/s3config.json -s3.port=S3_PORT -master.volumeSizeLimitMB=${S3_VOLUME_SIZE_MB}" volumes: - seaweedfs-data:/data/blob - ./seaweedfs/s3config.json:/etc/s3config.json diff --git a/packages/grid/seaweedfs/s3config.json b/packages/grid/seaweedfs/s3config.json index aca589cdf2e..49416a081fe 100644 --- a/packages/grid/seaweedfs/s3config.json +++ b/packages/grid/seaweedfs/s3config.json @@ -4,8 +4,8 @@ "name": "admin", "credentials": [ { - "accessKey": "{{ S3_ROOT_USER }}", - "secretKey": "{{ S3_ROOT_PWD }}" + "accessKey": "admin", + "secretKey": "admin" } ], "actions": ["Admin", "Read", "List", "Tagging", "Write"] diff --git a/packages/hagrid/hagrid/cli.py b/packages/hagrid/hagrid/cli.py index 174e8b100ea..ce6815fa2c1 100644 --- a/packages/hagrid/hagrid/cli.py +++ b/packages/hagrid/hagrid/cli.py @@ -79,7 +79,6 @@ from .lib import gitpod_url from .lib import hagrid_root from .lib import is_gitpod -from .lib import manifest_template_path from .lib import name_tag from .lib import save_vm_details_as_json from .lib import update_repo @@ -1353,9 +1352,6 @@ def create_launch_cmd( parsed_kwargs["enable_signup"] = str_to_bool(cast(str, kwargs["enable_signup"])) - if parsed_kwargs["template"] is None and EDITABLE_MODE: - parsed_kwargs["template"] = str(manifest_template_path()) - # Override template tag with user input tag if ( parsed_kwargs["tag"] is not None From 7800b265ab72a9b3c30dff6d4730645eed8a5b76 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Thu, 10 Aug 2023 17:00:05 +0800 Subject: [PATCH 16/98] Initial work on action object integration Co-authored-by: Peter Chung Co-authored-by: Khoa Nguyen Co-authored-by: Shubham Gupta --- .../src/syft/service/action/action_object.py | 167 ++++++++++++------ .../syft/src/syft/service/action/numpy.py | 4 +- .../src/syft/service/blob_storage/service.py | 11 ++ packages/syft/src/syft/types/blob_storage.py | 18 ++ 4 files changed, 146 insertions(+), 54 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index a531cd0e3b9..3c7b6ae5082 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -30,6 +30,8 @@ from ...serde.serializable import serializable from ...service.response import SyftError from ...store.linked_obj import LinkedObject +from ...types.blob_storage import BlobStorageEntry +from ...types.blob_storage import CreateBlobStorageEntry from ...types.syft_object import SYFT_OBJECT_VERSION_1 from ...types.syft_object import SyftBaseObject from ...types.syft_object import SyftObject @@ -295,8 +297,9 @@ def convert_to_pointers( if args is not None: for arg in args: if not isinstance(arg, ActionObject): - arg = ActionObject.from_obj(arg) + arg = ActionObject.from_obj(syft_action_data_cache=arg) arg.syft_node_uid = node_uid + arg.save() arg = api.services.action.set(arg) # arg = action_obj.send( # client @@ -306,8 +309,9 @@ def convert_to_pointers( if kwargs is not None: for k, arg in kwargs.items(): if not isinstance(arg, ActionObject): - arg = ActionObject.from_obj(arg) + arg = ActionObject.from_obj(syft_action_data_cache=arg) arg.syft_node_uid = node_uid + arg.save() arg = api.services.action.set(arg) # arg = action_obj.send(client) @@ -410,6 +414,8 @@ def debox_args_and_kwargs(args: Any, kwargs: Any) -> Tuple[Any, Any]: "_repr_debug_", "as_empty", "get", + "save", + "__set_syft_action_data_cache", ] @@ -420,8 +426,8 @@ class ActionObject(SyftObject): __version__ = SYFT_OBJECT_VERSION_1 __attr_searchable__: List[str] = [] - syft_action_data: Optional[Any] = None - # syft_action_proxy_reference: Optional[LinkedObject] = None + syft_action_data_cache: Optional[Any] = None + syft_blob_storage_entry_id: Optional[UID] = None syft_pointer_type: ClassVar[Type[ActionObjectPointer]] # Help with calculating history hash for code verification @@ -436,29 +442,52 @@ class ActionObject(SyftObject): _syft_post_hooks__: Dict[str, List] = {} syft_twin_type: TwinMode = TwinMode.NONE syft_passthrough_attrs = BASE_PASSTHROUGH_ATTRS + syft_action_data_type: Optional[Any] + syft_action_data_repr_:Optional[str] + syft_action_data_str_: Optional[str] # syft_dont_wrap_attrs = ["shape"] - # @property - # def syft_action_proxy(self) -> Optional[BlobStorageEntry]: - # return ( - # self.syft_action_proxy_reference.resolve - # if self.syft_action_proxy_reference is not None - # else None - # ) + @property + def syft_action_data(self) -> Any: + # relative + from ...client.api import APIRegistry - # @property - # def syft_action_data(self) -> Any: - # # relative - # from ...client.api import APIRegistry + if self.syft_action_data_cache is None: + api = APIRegistry.api_for( + node_uid=self.node_uid, + user_verify_key=self.syft_client_verify_key, + ) + blob_retrieval_object = api.services.blob_storage.read( + uid=self.syft_blob_storage_entry_id + ) + self.syft_action_data_cache = blob_retrieval_object.read() + return self.syft_action_data_cache - # api = APIRegistry.api_for( - # node_uid=self.node_uid, - # user_verify_key=self.syft_client_verify_key, - # ) - # syft_object_resource = api.services.blob_storage.read( - # uid=self.syft_action_proxy_reference.id - # ) - # return syft_object_resource.read() + def __set_syft_action_data_cache(self, data: Any) -> None: + # relative + from ...client.api import APIRegistry + + api = APIRegistry.api_for( + node_uid=self.node_uid, + user_verify_key=self.syft_client_verify_key, + ) + if api is not None: + storage_entry = CreateBlobStorageEntry.from_obj(data) + blob_deposit_object = api.services.blob_storage.allocate(storage_entry) + blob_deposit_object.write(data) + self.syft_blob_storage_entry_id = storage_entry.id + + self.syft_action_data_cache = data + self.syft_action_data_type = type(data) + + self.syft_action_data_repr_ = ( + data._repr_markdown_() + if hasattr(data, "_repr_markdown_") + else data.__repr__() + ) + self.syft_action_data_str_ = str(data) + + syft_action_data = syft_action_data.setter(__set_syft_action_data_cache) @property def is_pointer(self) -> bool: @@ -474,6 +503,46 @@ def make_id(cls, v: Optional[UID]) -> UID: """Generate or reuse an UID""" return Action.make_id(v) + class Config: + validate_assignment = True + + @pydantic.root_validator() + def __check_action_data(cls, values: dict) -> dict: + v = values.get("syft_action_data_cache") + values["syft_action_data_type"] = type(v) + + values["syft_action_data_repr_"] = ( + v._repr_markdown_() + if hasattr(v, "_repr_markdown_") + else v.__repr__() + ) + values["syft_action_data_str_"] = str(v) + return values + + + # @pydantic.validator("syft_action_data_cache", pre=True, always=True) + # def check_action_data(cls, v: Optional[Any], values: Dict) -> Any: + # """Generate or reuse an UID""" + # values["syft_action_data_type"] = type(v) + + # values["syft_action_data_type"] = ( + # v._repr_markdown_() + # if hasattr(v, "_repr_markdown_") + # else v.__repr__() + # ) + # values["syft_action_data_type"] = str(v) + # return v + + + # action_obj = ActionObject.from_obj(syft_action_data_cache=np.array[1, 2, 3]) + # api.set + # action_obj.save() + + def save(self) -> None: + data = self.syft_action_data + self.__set_syft_action_data_cache(data) + self.syft_action_data_cache = None + @property def is_mock(self): return self.syft_twin_type == TwinMode.MOCK @@ -486,17 +555,17 @@ def is_real(self): def is_twin(self): return self.syft_twin_type != TwinMode.NONE - @pydantic.validator("syft_action_data", pre=True, always=True) - def check_action_data( - cls, v: ActionObject.syft_pointer_type - ) -> ActionObject.syft_pointer_type: - if cls == AnyActionObject or isinstance( - v, (cls.syft_internal_type, ActionDataEmpty) - ): - return v - raise SyftException( - f"Must init {cls} with {cls.syft_internal_type} not {type(v)}" - ) + # @pydantic.validator("syft_action_data", pre=True, always=True) + # def check_action_data( + # cls, v: ActionObject.syft_pointer_type + # ) -> ActionObject.syft_pointer_type: + # if cls == AnyActionObject or isinstance( + # v, (cls.syft_internal_type, ActionDataEmpty) + # ): + # return v + # raise SyftException( + # f"Must init {cls} with {cls.syft_internal_type} not {type(v)}" + # ) def syft_point_to(self, node_uid: UID) -> "ActionObject": """Set the syft_node_uid, used in the post hooks""" @@ -716,7 +785,7 @@ def syft_make_action_with_self( def syft_get_path(self) -> str: """Get the type path of the underlying object""" if isinstance(self, AnyActionObject) and self.syft_internal_type: - return f"{type(self.syft_action_data).__name__}" # avoids AnyActionObject errors + return f"{self.syft_action_data_type.__name__}" # avoids AnyActionObject errors return f"{type(self).__name__}" def syft_remote_method( @@ -743,6 +812,7 @@ def wrapper( def send(self, client: SyftClient) -> Self: """Send the object to a Syft Client""" + self.save() res = client.api.services.action.set(self) res.syft_node_location = client.id res.syft_client_verify_key = client.verify_key @@ -795,19 +865,19 @@ def from_obj( syft_lineage_id: Optional[LineageID] Which LineageID to use for the ActionObject. Optional """ - if id and syft_lineage_id and id != syft_lineage_id.id: + if id is not None and syft_lineage_id is not None and id != syft_lineage_id.id: raise ValueError("UID and LineageID should match") action_type = action_type_for_object(syft_action_data) - action_object = action_type(syft_action_data=syft_action_data) + action_object = action_type(syft_action_data_cache=syft_action_data) - if id: + if id is not None: action_object.id = id - if syft_lineage_id: + if syft_lineage_id is not None: action_object.id = syft_lineage_id.id action_object.syft_history_hash = syft_lineage_id.syft_history_hash - elif id: + elif id is not None: action_object.syft_history_hash = hash(id) return action_object @@ -842,14 +912,13 @@ def empty( empty = ActionDataEmpty(syft_internal_type=syft_internal_type) res = ActionObject.from_obj( - syft_action_data=empty, id=id, syft_lineage_id=syft_lineage_id + id=id, syft_lineage_id=syft_lineage_id, syft_action_data_cache=empty ) res.__dict__["syft_internal_type"] = syft_internal_type return res def delete_data(self): - empty = ActionDataEmpty(syft_internal_type=self.syft_internal_type) - self.syft_action_data = empty + empty = ActionDataEmpty(syft_internal_type=self.syft_internal_type, syft_action_data_cache=empty) def __post_init__(self) -> None: """Add pre/post hooks.""" @@ -966,8 +1035,7 @@ def _syft_output_action_object( syft_twin_type = TwinMode.NONE if context.result_twin_type is not None: syft_twin_type = context.result_twin_type - result = constructor(syft_action_data=result, syft_twin_type=syft_twin_type) - + result = constructor(syft_twin_type=syft_twin_type, syft_action_data_cache=result) return result def _syft_passthrough_attrs(self) -> List[str]: @@ -1240,13 +1308,8 @@ def _repr_markdown_(self) -> str: res = "TwinPointer(Real)" elif not self.is_twin: res = "Pointer" - child_repr = ( - self.syft_action_data._repr_markdown_() - if hasattr(self.syft_action_data, "_repr_markdown_") - else self.syft_action_data.__repr__() - ) - return f"```python\n{res}\n```\n{child_repr}" + return f"```python\n{res}\n```\n{self.syft_action_data_repr_}" def __repr__(self) -> str: if self.is_mock: @@ -1255,7 +1318,7 @@ def __repr__(self) -> str: res = "TwinPointer(Real)" if not self.is_twin: res = "Pointer" - return f"{res}:\n{str(self.syft_action_data)}" + return f"{res}:\n{self.syft_action_data_str_}" def __call__(self, *args: Any, **kwds: Any) -> Any: return self.__call__(*args, **kwds) diff --git a/packages/syft/src/syft/service/action/numpy.py b/packages/syft/src/syft/service/action/numpy.py index c5b6e044e39..17a90268ed3 100644 --- a/packages/syft/src/syft/service/action/numpy.py +++ b/packages/syft/src/syft/service/action/numpy.py @@ -72,12 +72,12 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): result = getattr(ufunc, method)(*inputs, **kwargs) if type(result) is tuple: return tuple( - NumpyArrayObject(syft_action_data=x, dtype=x.dtype, shape=x.shape) + NumpyArrayObject(_syft_action_data=x, dtype=x.dtype, shape=x.shape) for x in result ) else: return NumpyArrayObject( - syft_action_data=result, dtype=result.dtype, shape=result.shape + _syft_action_data=result, dtype=result.dtype, shape=result.shape ) diff --git a/packages/syft/src/syft/service/blob_storage/service.py b/packages/syft/src/syft/service/blob_storage/service.py index 2ab7c99acf8..9ff26491dc4 100644 --- a/packages/syft/src/syft/service/blob_storage/service.py +++ b/packages/syft/src/syft/service/blob_storage/service.py @@ -11,6 +11,7 @@ from ...store.blob_storage.seaweedfs import SeaweedFSBlobDeposit from ...store.document_store import DocumentStore from ...types.blob_storage import BlobStorageEntry +from ...types.blob_storage import BlobStorageMetadata from ...types.blob_storage import CreateBlobStorageEntry from ...types.uid import UID from ..context import AuthedServiceContext @@ -51,6 +52,16 @@ def get_blob_storage_entry_by_uid( return result.ok() return SyftError(message=result.err()) + @service_method(path="blob_storage.get_metadata", name="get_metadata") + def get_blob_storage_metadata_by_uid( + self, context: AuthedServiceContext, uid: UID + ) -> Union[BlobStorageEntry, SyftError]: + result = self.stash.get_by_uid(context.credentials, uid=uid) + if result.is_ok(): + blob_storage_entry = result.ok() + return blob_storage_entry.to(BlobStorageMetadata) + return SyftError(message=result.err()) + @service_method(path="blob_storage.read", name="read") def read( self, context: AuthedServiceContext, uid: UID diff --git a/packages/syft/src/syft/types/blob_storage.py b/packages/syft/src/syft/types/blob_storage.py index e250aeab104..80836a86d9c 100644 --- a/packages/syft/src/syft/types/blob_storage.py +++ b/packages/syft/src/syft/types/blob_storage.py @@ -18,6 +18,8 @@ from .syft_object import SYFT_OBJECT_VERSION_1 from .syft_object import SyftObject from .uid import UID +from ..types.transforms import keep +from ..types.transforms import transform @serializable() @@ -51,6 +53,16 @@ class BlobStorageEntry(SyftObject): created_at: DateTime = DateTime.now() +@serializable() +class BlobStorageMetadata(SyftObject): + __canonical_name__ = "BlobStorageMetadata" + __version__ = SYFT_OBJECT_VERSION_1 + + type_: Optional[Type[SyftObject]] + mimetype: str = "bytes" + file_size: int + + @serializable() class CreateBlobStorageEntry(SyftObject): __canonical_name__ = "CreateBlobStorageEntry" @@ -85,3 +97,9 @@ def from_path(cls, fp: Union[str, Path], mimetype: Optional[str] = None) -> Self ) return cls(mimetype=mimetype, file_size=path.stat().st_size) + + + +@transform(BlobStorageEntry, BlobStorageMetadata) +def storage_entry_to_metadata(): + return [keep(["id", "type_", "mimetype", "file_size"])] From ca75c0cc2bfe95b04a0a77f76cb5382869c63456 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Thu, 10 Aug 2023 18:08:10 +0530 Subject: [PATCH 17/98] update seaweed container command to configure s3 credentials dynamically --- packages/grid/docker-compose.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/grid/docker-compose.yml b/packages/grid/docker-compose.yml index de3d2f45021..81a1281fbf9 100644 --- a/packages/grid/docker-compose.yml +++ b/packages/grid/docker-compose.yml @@ -222,13 +222,13 @@ services: - S3_VOLUME_SIZE_MB=${S3_VOLUME_SIZE_MB:-1024} - S3_ROOT_USER=${S3_ROOT_USER:-admin} - S3_ROOT_PWD=${S3_ROOT_PWD:-admin} + - S3_PORT=${S3_PORT:-8888} + entrypoint: ["/bin/sh", "-c"] command: - - "server -s3 -s3.config=/etc/s3config.json -s3.port=S3_PORT -master.volumeSizeLimitMB=${S3_VOLUME_SIZE_MB}" + - sleep 30 && echo 's3.configure -access_key ${S3_ROOT_USER} -secret_key ${S3_ROOT_PWD} -user iam -actions Read,Write,List,Tagging,Admin -apply' | weed shell & weed server -s3 -s3.port=${S3_PORT} -master.volumeSizeLimitMB=${S3_VOLUME_SIZE_MB} volumes: - seaweedfs-data:/data/blob - - ./seaweedfs/s3config.json:/etc/s3config.json - ./seaweedfs/filer.toml:/etc/seaweedfs/filer.toml - mongo: image: mongo:latest restart: always From deb3cec5515c7f35a46019d4051a89e8a749df30 Mon Sep 17 00:00:00 2001 From: khoaguin Date: Thu, 10 Aug 2023 19:54:26 +0700 Subject: [PATCH 18/98] fix some linting issues --- .../syft/src/syft/service/action/action_object.py | 15 ++++++--------- packages/syft/src/syft/types/blob_storage.py | 5 ++--- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 3c7b6ae5082..d12dcf6fc19 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -30,7 +30,6 @@ from ...serde.serializable import serializable from ...service.response import SyftError from ...store.linked_obj import LinkedObject -from ...types.blob_storage import BlobStorageEntry from ...types.blob_storage import CreateBlobStorageEntry from ...types.syft_object import SYFT_OBJECT_VERSION_1 from ...types.syft_object import SyftBaseObject @@ -443,7 +442,7 @@ class ActionObject(SyftObject): syft_twin_type: TwinMode = TwinMode.NONE syft_passthrough_attrs = BASE_PASSTHROUGH_ATTRS syft_action_data_type: Optional[Any] - syft_action_data_repr_:Optional[str] + syft_action_data_repr_: Optional[str] syft_action_data_str_: Optional[str] # syft_dont_wrap_attrs = ["shape"] @@ -512,14 +511,11 @@ def __check_action_data(cls, values: dict) -> dict: values["syft_action_data_type"] = type(v) values["syft_action_data_repr_"] = ( - v._repr_markdown_() - if hasattr(v, "_repr_markdown_") - else v.__repr__() + v._repr_markdown_() if hasattr(v, "_repr_markdown_") else v.__repr__() ) values["syft_action_data_str_"] = str(v) return values - # @pydantic.validator("syft_action_data_cache", pre=True, always=True) # def check_action_data(cls, v: Optional[Any], values: Dict) -> Any: # """Generate or reuse an UID""" @@ -533,7 +529,6 @@ def __check_action_data(cls, values: dict) -> dict: # values["syft_action_data_type"] = str(v) # return v - # action_obj = ActionObject.from_obj(syft_action_data_cache=np.array[1, 2, 3]) # api.set # action_obj.save() @@ -918,7 +913,7 @@ def empty( return res def delete_data(self): - empty = ActionDataEmpty(syft_internal_type=self.syft_internal_type, syft_action_data_cache=empty) + ActionDataEmpty(syft_internal_type=self.syft_internal_type) def __post_init__(self) -> None: """Add pre/post hooks.""" @@ -1035,7 +1030,9 @@ def _syft_output_action_object( syft_twin_type = TwinMode.NONE if context.result_twin_type is not None: syft_twin_type = context.result_twin_type - result = constructor(syft_twin_type=syft_twin_type, syft_action_data_cache=result) + result = constructor( + syft_twin_type=syft_twin_type, syft_action_data_cache=result + ) return result def _syft_passthrough_attrs(self) -> List[str]: diff --git a/packages/syft/src/syft/types/blob_storage.py b/packages/syft/src/syft/types/blob_storage.py index 80836a86d9c..c4759b6af75 100644 --- a/packages/syft/src/syft/types/blob_storage.py +++ b/packages/syft/src/syft/types/blob_storage.py @@ -14,12 +14,12 @@ from ..serde import serialize from ..serde.serializable import serializable from ..service.response import SyftException +from ..types.transforms import keep +from ..types.transforms import transform from .datetime import DateTime from .syft_object import SYFT_OBJECT_VERSION_1 from .syft_object import SyftObject from .uid import UID -from ..types.transforms import keep -from ..types.transforms import transform @serializable() @@ -99,7 +99,6 @@ def from_path(cls, fp: Union[str, Path], mimetype: Optional[str] = None) -> Self return cls(mimetype=mimetype, file_size=path.stat().st_size) - @transform(BlobStorageEntry, BlobStorageMetadata) def storage_entry_to_metadata(): return [keep(["id", "type_", "mimetype", "file_size"])] From 035919336e1c534b4fe5a101106a5c15386b926f Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Thu, 10 Aug 2023 18:46:42 +0530 Subject: [PATCH 19/98] - revert delete_data logic --- packages/syft/src/syft/service/action/action_object.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index d12dcf6fc19..4d22e154302 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -913,7 +913,8 @@ def empty( return res def delete_data(self): - ActionDataEmpty(syft_internal_type=self.syft_internal_type) + empty = ActionDataEmpty(syft_internal_type=self.syft_internal_type) + self.syft_action_data = empty def __post_init__(self) -> None: """Add pre/post hooks.""" From e38d7fa9b86f070afe17e73bdf202db710eaed88 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Fri, 11 Aug 2023 12:57:33 +0530 Subject: [PATCH 20/98] - rename __set_syft_action_data_cache to _set_syft_action_data - serialize data to bytes before saving it to seaweed in _set_syft_action_data - change typing for type_ field in BlobStorageEntry and CreateBlobStorageEntry --- .../src/syft/service/action/action_object.py | 17 +++++++++-------- packages/syft/src/syft/types/blob_storage.py | 4 ++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 4d22e154302..9737778e359 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -28,6 +28,7 @@ from ...client.api import SyftAPI from ...client.client import SyftClient from ...serde.serializable import serializable +from ...serde.serialize import _serialize as serialize from ...service.response import SyftError from ...store.linked_obj import LinkedObject from ...types.blob_storage import CreateBlobStorageEntry @@ -414,7 +415,7 @@ def debox_args_and_kwargs(args: Any, kwargs: Any) -> Tuple[Any, Any]: "as_empty", "get", "save", - "__set_syft_action_data_cache", + "_set_syft_action_data", ] @@ -462,18 +463,18 @@ def syft_action_data(self) -> Any: self.syft_action_data_cache = blob_retrieval_object.read() return self.syft_action_data_cache - def __set_syft_action_data_cache(self, data: Any) -> None: + def _set_syft_action_data(self, data: Any) -> None: # relative from ...client.api import APIRegistry api = APIRegistry.api_for( - node_uid=self.node_uid, + node_uid=self.syft_node_location, user_verify_key=self.syft_client_verify_key, ) if api is not None: storage_entry = CreateBlobStorageEntry.from_obj(data) blob_deposit_object = api.services.blob_storage.allocate(storage_entry) - blob_deposit_object.write(data) + blob_deposit_object.write(serialize(data, to_bytes=True)) self.syft_blob_storage_entry_id = storage_entry.id self.syft_action_data_cache = data @@ -486,7 +487,7 @@ def __set_syft_action_data_cache(self, data: Any) -> None: ) self.syft_action_data_str_ = str(data) - syft_action_data = syft_action_data.setter(__set_syft_action_data_cache) + syft_action_data = syft_action_data.setter(_set_syft_action_data) @property def is_pointer(self) -> bool: @@ -535,7 +536,7 @@ def __check_action_data(cls, values: dict) -> dict: def save(self) -> None: data = self.syft_action_data - self.__set_syft_action_data_cache(data) + self._set_syft_action_data(data) self.syft_action_data_cache = None @property @@ -807,10 +808,10 @@ def wrapper( def send(self, client: SyftClient) -> Self: """Send the object to a Syft Client""" + self.syft_node_location = client.id + self.syft_client_verify_key = client.verify_key self.save() res = client.api.services.action.set(self) - res.syft_node_location = client.id - res.syft_client_verify_key = client.verify_key return res def get_from(self, client: SyftClient) -> Any: diff --git a/packages/syft/src/syft/types/blob_storage.py b/packages/syft/src/syft/types/blob_storage.py index c4759b6af75..09cc44d79bc 100644 --- a/packages/syft/src/syft/types/blob_storage.py +++ b/packages/syft/src/syft/types/blob_storage.py @@ -46,7 +46,7 @@ class BlobStorageEntry(SyftObject): id: UID location: Union[SecureFilePathLocation, SeaweedSecureFilePathLocation] - type_: Optional[Type[SyftObject]] + type_: Optional[Type] mimetype: str = "bytes" file_size: int uploaded_by: SyftVerifyKey @@ -69,7 +69,7 @@ class CreateBlobStorageEntry(SyftObject): __version__ = SYFT_OBJECT_VERSION_1 id: UID - type_: Optional[Type[SyftObject]] + type_: Optional[Type] mimetype: str = "bytes" file_size: int From 12ae1b01d4d989e9d402ca55d6f709a63a0cb362 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Fri, 11 Aug 2023 17:11:31 +0800 Subject: [PATCH 21/98] Updates Co-authored-by: Shubham Gupta --- packages/syft/src/syft/node/routes.py | 2 ++ .../src/syft/service/action/action_object.py | 21 ++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/packages/syft/src/syft/node/routes.py b/packages/syft/src/syft/node/routes.py index 8bbe0df8e6d..9d720414ad4 100644 --- a/packages/syft/src/syft/node/routes.py +++ b/packages/syft/src/syft/node/routes.py @@ -89,7 +89,9 @@ def syft_new_api(request: Request, verify_key: str) -> Response: return handle_syft_new_api(user_verify_key) def handle_new_api_call(data: bytes) -> Response: + print("Reached the endpoint...." * 10) obj_msg = deserialize(blob=data, from_bytes=True) + print("HErere deserialized blob store" * 10) result = worker.handle_api_call(api_call=obj_msg) return Response( serialize(result, to_bytes=True), diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 9737778e359..0dff98ba664 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -416,6 +416,7 @@ def debox_args_and_kwargs(args: Any, kwargs: Any) -> Tuple[Any, Any]: "get", "save", "_set_syft_action_data", + "syft_action_data", ] @@ -454,9 +455,13 @@ def syft_action_data(self) -> Any: if self.syft_action_data_cache is None: api = APIRegistry.api_for( - node_uid=self.node_uid, + node_uid=self.syft_node_location, user_verify_key=self.syft_client_verify_key, ) + # service = context.node.get_service(BlobStorageService) + # action_object.with_node_context(context) + # action_object.node_context = context + # action_object.clear_context() blob_retrieval_object = api.services.blob_storage.read( uid=self.syft_blob_storage_entry_id ) @@ -475,7 +480,7 @@ def _set_syft_action_data(self, data: Any) -> None: storage_entry = CreateBlobStorageEntry.from_obj(data) blob_deposit_object = api.services.blob_storage.allocate(storage_entry) blob_deposit_object.write(serialize(data, to_bytes=True)) - self.syft_blob_storage_entry_id = storage_entry.id + self.syft_blob_storage_entry_id = blob_deposit_object.blob_storage_entry_id self.syft_action_data_cache = data self.syft_action_data_type = type(data) @@ -1077,6 +1082,9 @@ def _syft_attr_propagate_ids(self, context, name: str, result: Any) -> Any: result.syft_node_location = context.syft_node_location result.syft_client_verify_key = context.syft_client_verify_key + # Propogate Syft blob storage entry id + result.syft_blob_storage_entry_id = context.obj.syft_blob_storage_entry_id + # Propagate Result ID if context.result_id is not None: result.id = context.result_id @@ -1105,7 +1113,14 @@ def _syft_wrap_attribute_for_bool_on_nonbools(self, name: str) -> Any: context, _, _ = self._syft_run_pre_hooks__(context, name, (), {}) # no input needs to propagate - result = self._syft_run_post_hooks__(context, name, bool(self.syft_action_data)) + result = self._syft_run_post_hooks__( + context, + name, + any( + x is not None + for x in (self.syft_blob_storage_entry_id, self.syft_action_data_cache) + ), + ) result = self._syft_attr_propagate_ids(context, name, result) def __wrapper__bool__() -> bool: From 3ea478be8e98f707dd338120dda9cbafec893863 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Fri, 11 Aug 2023 16:28:19 +0530 Subject: [PATCH 22/98] hide std output of the s3 config credentials --- packages/grid/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/grid/docker-compose.yml b/packages/grid/docker-compose.yml index 81a1281fbf9..926530950a7 100644 --- a/packages/grid/docker-compose.yml +++ b/packages/grid/docker-compose.yml @@ -225,7 +225,7 @@ services: - S3_PORT=${S3_PORT:-8888} entrypoint: ["/bin/sh", "-c"] command: - - sleep 30 && echo 's3.configure -access_key ${S3_ROOT_USER} -secret_key ${S3_ROOT_PWD} -user iam -actions Read,Write,List,Tagging,Admin -apply' | weed shell & weed server -s3 -s3.port=${S3_PORT} -master.volumeSizeLimitMB=${S3_VOLUME_SIZE_MB} + - sleep 30 && echo 's3.configure -access_key ${S3_ROOT_USER} -secret_key ${S3_ROOT_PWD} -user iam -actions Read,Write,List,Tagging,Admin -apply' | weed shell > /dev/null 2>&1 & weed server -s3 -s3.port=${S3_PORT} -master.volumeSizeLimitMB=${S3_VOLUME_SIZE_MB} volumes: - seaweedfs-data:/data/blob - ./seaweedfs/filer.toml:/etc/seaweedfs/filer.toml From 55b46d00344ca8753003e333f96ddc4b1f1c2f62 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Fri, 11 Aug 2023 19:03:06 +0530 Subject: [PATCH 23/98] create a start script for seaweedfs and move the entrypoint to it add seaweedfs start script to manifest --- packages/grid/docker-compose.yml | 5 +++-- packages/grid/seaweedfs/start.sh | 6 ++++++ packages/hagrid/hagrid/manifest_template.yml | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 packages/grid/seaweedfs/start.sh diff --git a/packages/grid/docker-compose.yml b/packages/grid/docker-compose.yml index 926530950a7..106ab372dd4 100644 --- a/packages/grid/docker-compose.yml +++ b/packages/grid/docker-compose.yml @@ -223,12 +223,13 @@ services: - S3_ROOT_USER=${S3_ROOT_USER:-admin} - S3_ROOT_PWD=${S3_ROOT_PWD:-admin} - S3_PORT=${S3_PORT:-8888} - entrypoint: ["/bin/sh", "-c"] + entrypoint: ["/bin/sh"] command: - - sleep 30 && echo 's3.configure -access_key ${S3_ROOT_USER} -secret_key ${S3_ROOT_PWD} -user iam -actions Read,Write,List,Tagging,Admin -apply' | weed shell > /dev/null 2>&1 & weed server -s3 -s3.port=${S3_PORT} -master.volumeSizeLimitMB=${S3_VOLUME_SIZE_MB} + - "/etc/seaweedfs/start.sh" volumes: - seaweedfs-data:/data/blob - ./seaweedfs/filer.toml:/etc/seaweedfs/filer.toml + - ./seaweedfs/start.sh:/etc/seaweedfs/start.sh mongo: image: mongo:latest restart: always diff --git a/packages/grid/seaweedfs/start.sh b/packages/grid/seaweedfs/start.sh new file mode 100644 index 00000000000..d6dc34f535d --- /dev/null +++ b/packages/grid/seaweedfs/start.sh @@ -0,0 +1,6 @@ +#! /usr/bin/env bash + +sleep 30 && +echo "s3.configure -access_key ${S3_ROOT_USER} -secret_key ${S3_ROOT_PWD} -user iam -actions Read,Write,List,Tagging,Admin -apply" \ +| weed shell > /dev/null 2>&1 \ +& weed server -s3 -s3.port=${S3_PORT} -master.volumeSizeLimitMB=${S3_VOLUME_SIZE_MB} \ No newline at end of file diff --git a/packages/hagrid/hagrid/manifest_template.yml b/packages/hagrid/hagrid/manifest_template.yml index be97056c5c6..cf5d7e1408d 100644 --- a/packages/hagrid/hagrid/manifest_template.yml +++ b/packages/hagrid/hagrid/manifest_template.yml @@ -12,7 +12,7 @@ files: - rabbitmq/rabbitmq.conf - redis/redis.conf - seaweedfs/filer.toml - - seaweedfs/s3config.json + - seaweedfs/start.sh - vpn/config.yaml - default.env docker: From 8388fbdcd4e7baa58bacd9610ee22e0d72cff54e Mon Sep 17 00:00:00 2001 From: Peter Chung Date: Mon, 14 Aug 2023 17:17:33 +1000 Subject: [PATCH 24/98] Added delete method for service and on_disk --- .../src/syft/service/blob_storage/service.py | 17 +++++++++++++++++ .../syft/src/syft/store/blob_storage/on_disk.py | 7 +++++++ 2 files changed, 24 insertions(+) diff --git a/packages/syft/src/syft/service/blob_storage/service.py b/packages/syft/src/syft/service/blob_storage/service.py index 9ff26491dc4..c3a8c24fe89 100644 --- a/packages/syft/src/syft/service/blob_storage/service.py +++ b/packages/syft/src/syft/service/blob_storage/service.py @@ -139,5 +139,22 @@ def mark_write_complete( return result + @service_method(path="blob_storage.delete", name="delete") + def delete( + self, context: AuthedServiceContext, uid: UID + ) -> Union[SyftSuccess, SyftError]: + result = self.stash.get_by_uid(context.credentials, uid=uid) + if result.is_ok(): + with context.node.blob_storage_client.connect() as conn: + file_unlinked = conn.delete(result.ok().location) + blob_storage_entry_deleted = self.stash.delete_by_uid( + context.credentials, uid=uid + ) + if file_unlinked and blob_storage_entry_deleted: + return SyftSuccess(message="File successfully deleted.") + else: + return SyftError(message="File deletion failed.") + return SyftError(message=result.err()) + TYPE_TO_SERVICE[BlobStorageEntry] = BlobStorageEntry diff --git a/packages/syft/src/syft/store/blob_storage/on_disk.py b/packages/syft/src/syft/store/blob_storage/on_disk.py index da8f5324eaa..a1d8084f883 100644 --- a/packages/syft/src/syft/store/blob_storage/on_disk.py +++ b/packages/syft/src/syft/store/blob_storage/on_disk.py @@ -68,6 +68,13 @@ def allocate(self, obj: CreateBlobStorageEntry) -> SecureFilePathLocation: def write(self, obj: BlobStorageEntry) -> BlobDeposit: return OnDiskBlobDeposit(blob_storage_entry_id=obj.id) + def delete(self, fp: SecureFilePathLocation) -> bool: + try: + (self._base_directory / fp.path).unlink() + return True + except FileNotFoundError: + return False + @serializable() class OnDiskBlobStorageClientConfig(BlobStorageClientConfig): From 2d74146514e3f7ab5b612450d9f350648d0feeec Mon Sep 17 00:00:00 2001 From: Peter Chung Date: Mon, 14 Aug 2023 17:50:18 +1000 Subject: [PATCH 25/98] added delete method to seaweedfs --- .../syft/src/syft/store/blob_storage/seaweedfs.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index c3af9b3d793..2062743d12e 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -193,6 +193,21 @@ def complete_multipart_upload( except BotoClientError as e: return SyftError(message=str(e)) + def delete( + self, + fp: SecureFilePathLocation, + ) -> Union[SyftError, SyftSuccess]: + # try: + result = self.client.delete_object( + Bucket=self.bucket_name, + Key=fp.path, + ) + print(result) + return result + # return SyftSuccess(message="Successfully deleted file.") + # except BotoClientError as e: + # return SyftError(message=str(e)) + class SeaweedFSConfig(BlobStorageConfig): client_type: Type[BlobStorageClient] = SeaweedFSClient From 9d0b1c81b904a2a457706407344f346488ca8705 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Mon, 14 Aug 2023 15:47:43 +0530 Subject: [PATCH 26/98] remove print statements add syft_action_data and _set_syft_action_data to pass through attrs remove unused validator --- packages/syft/src/syft/node/routes.py | 2 -- .../src/syft/service/action/action_object.py | 22 +++++-------------- .../syft/src/syft/service/action/numpy.py | 6 ++--- 3 files changed, 8 insertions(+), 22 deletions(-) diff --git a/packages/syft/src/syft/node/routes.py b/packages/syft/src/syft/node/routes.py index 9d720414ad4..8bbe0df8e6d 100644 --- a/packages/syft/src/syft/node/routes.py +++ b/packages/syft/src/syft/node/routes.py @@ -89,9 +89,7 @@ def syft_new_api(request: Request, verify_key: str) -> Response: return handle_syft_new_api(user_verify_key) def handle_new_api_call(data: bytes) -> Response: - print("Reached the endpoint...." * 10) obj_msg = deserialize(blob=data, from_bytes=True) - print("HErere deserialized blob store" * 10) result = worker.handle_api_call(api_call=obj_msg) return Response( serialize(result, to_bytes=True), diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 0dff98ba664..c4978ff6380 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -181,6 +181,8 @@ class ActionObjectPointer: "get_from", # syft "get", # syft "delete_data", # syft + "_set_syft_action_data", # syft + "syft_action_data", # syft ] dont_wrap_output_attrs = [ "__repr__", @@ -462,6 +464,9 @@ def syft_action_data(self) -> Any: # action_object.with_node_context(context) # action_object.node_context = context # action_object.clear_context() + # + # + blob_retrieval_object = api.services.blob_storage.read( uid=self.syft_blob_storage_entry_id ) @@ -522,23 +527,6 @@ def __check_action_data(cls, values: dict) -> dict: values["syft_action_data_str_"] = str(v) return values - # @pydantic.validator("syft_action_data_cache", pre=True, always=True) - # def check_action_data(cls, v: Optional[Any], values: Dict) -> Any: - # """Generate or reuse an UID""" - # values["syft_action_data_type"] = type(v) - - # values["syft_action_data_type"] = ( - # v._repr_markdown_() - # if hasattr(v, "_repr_markdown_") - # else v.__repr__() - # ) - # values["syft_action_data_type"] = str(v) - # return v - - # action_obj = ActionObject.from_obj(syft_action_data_cache=np.array[1, 2, 3]) - # api.set - # action_obj.save() - def save(self) -> None: data = self.syft_action_data self._set_syft_action_data(data) diff --git a/packages/syft/src/syft/service/action/numpy.py b/packages/syft/src/syft/service/action/numpy.py index 17a90268ed3..58eb58e3617 100644 --- a/packages/syft/src/syft/service/action/numpy.py +++ b/packages/syft/src/syft/service/action/numpy.py @@ -39,7 +39,7 @@ def numpy_like_eq(left: Any, right: Any) -> bool: # 🔵 TODO 7: Map TPActionObjects and their 3rd Party types like numpy type to these # classes for bi-directional lookup. -@serializable() +@serializable(without=["syft_action_data_cache"]) class NumpyArrayObject(ActionObject, np.lib.mixins.NDArrayOperatorsMixin): __canonical_name__ = "NumpyArrayObject" __version__ = SYFT_OBJECT_VERSION_1 @@ -72,12 +72,12 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): result = getattr(ufunc, method)(*inputs, **kwargs) if type(result) is tuple: return tuple( - NumpyArrayObject(_syft_action_data=x, dtype=x.dtype, shape=x.shape) + NumpyArrayObject(syft_action_data_cache=x, dtype=x.dtype, shape=x.shape) for x in result ) else: return NumpyArrayObject( - _syft_action_data=result, dtype=result.dtype, shape=result.shape + syft_action_data_cache=result, dtype=result.dtype, shape=result.shape ) From 8f873a27115d65fb18e5dc06f3edd21920578d5c Mon Sep 17 00:00:00 2001 From: Peter Chung Date: Mon, 14 Aug 2023 23:51:27 +1000 Subject: [PATCH 27/98] Added delete method for SeaweedFSConnection --- .../src/syft/store/blob_storage/seaweedfs.py | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index 2062743d12e..8ce903c3e41 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -196,17 +196,15 @@ def complete_multipart_upload( def delete( self, fp: SecureFilePathLocation, - ) -> Union[SyftError, SyftSuccess]: - # try: - result = self.client.delete_object( - Bucket=self.bucket_name, - Key=fp.path, - ) - print(result) - return result - # return SyftSuccess(message="Successfully deleted file.") - # except BotoClientError as e: - # return SyftError(message=str(e)) + ) -> bool: + try: + self.client.delete_object( + Bucket=self.bucket_name, + Key=fp.path, + ) + return True + except BotoClientError as e: + return False class SeaweedFSConfig(BlobStorageConfig): From 7925ba98c5a79b51fb320a3a757ba6646175f1e3 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Tue, 15 Aug 2023 13:09:52 +0530 Subject: [PATCH 28/98] - replace checking type from syft_action_data to self.syft_action_data_type - replace checking bool type on syft_action_data to self.syft_has_bool_attr --- .../src/syft/service/action/action_object.py | 22 ++++++++++--------- .../syft/src/syft/service/action/numpy.py | 4 ++-- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index c4978ff6380..c023aca2c08 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -448,6 +448,7 @@ class ActionObject(SyftObject): syft_action_data_type: Optional[Any] syft_action_data_repr_: Optional[str] syft_action_data_str_: Optional[str] + syft_has_bool_attr: Optional[bool] # syft_dont_wrap_attrs = ["shape"] @property @@ -496,6 +497,7 @@ def _set_syft_action_data(self, data: Any) -> None: else data.__repr__() ) self.syft_action_data_str_ = str(data) + self.syft_has_bool_attr = hasattr(data, "__bool__") syft_action_data = syft_action_data.setter(_set_syft_action_data) @@ -940,8 +942,8 @@ def __post_init__(self) -> None: if side_effect not in self._syft_post_hooks__[HOOK_ALWAYS]: self._syft_post_hooks__[HOOK_ALWAYS].append(side_effect) - if isinstance(self.syft_action_data, ActionObject): - raise Exception("Nested ActionObjects", self.syft_action_data) + if isinstance(self.syft_action_data_type, ActionObject): + raise Exception("Nested ActionObjects", self.syft_action_data_repr_) self.syft_history_hash = hash(self.id) @@ -1086,7 +1088,7 @@ def _syft_wrap_attribute_for_bool_on_nonbools(self, name: str) -> Any: "[_wrap_attribute_for_bool_on_nonbools] Use this only for the __bool__ operator" ) - if hasattr(self.syft_action_data, "__bool__"): + if self.syft_has_bool_attr: raise RuntimeError( "[_wrap_attribute_for_bool_on_nonbools] self.syft_action_data already implements the bool operator" ) @@ -1150,7 +1152,7 @@ def fake_func(*args: Any, **kwargs: Any) -> Any: debug(f"[__getattribute__] Handling method {name} ") if ( - isinstance(self.syft_action_data, ActionDataEmpty) + isinstance(self.syft_action_data_type, ActionDataEmpty) and name not in action_data_empty_must_run ): original_func = fake_func @@ -1219,9 +1221,9 @@ def _syft_setattr(self, name, value): def fake_func(*args: Any, **kwargs: Any) -> Any: return ActionDataEmpty(syft_internal_type=self.syft_internal_type) - if isinstance(self.syft_action_data, ActionDataEmpty) or has_action_data_empty( - args=args, kwargs=kwargs - ): + if isinstance( + self.syft_action_data_type, ActionDataEmpty + ) or has_action_data_empty(args=args, kwargs=kwargs): local_func = fake_func else: local_func = getattr(self.syft_action_data, op_name) @@ -1271,7 +1273,7 @@ def __getattribute__(self, name: str) -> Any: context_self = self._syft_get_attr_context(name) # Handle bool operator on nonbools - if name == "__bool__" and not hasattr(self.syft_action_data, "__bool__"): + if name == "__bool__" and not self.syft_has_bool_attr: return self._syft_wrap_attribute_for_bool_on_nonbools(name) # Handle Properties @@ -1472,7 +1474,7 @@ def __rrshift__(self, other: Any) -> Any: return self._syft_output_action_object(self.__rrshift__(other)) -@serializable() +@serializable(without=["syft_action_data_cache"]) class AnyActionObject(ActionObject): __canonical_name__ = "AnyActionObject" __version__ = SYFT_OBJECT_VERSION_1 @@ -1503,7 +1505,7 @@ def debug_original_func(name: str, func: Callable) -> None: def is_action_data_empty(obj: Any) -> bool: return isinstance(obj, AnyActionObject) and isinstance( - obj.syft_action_data, ActionDataEmpty + obj.syft_action_data_type, ActionDataEmpty ) diff --git a/packages/syft/src/syft/service/action/numpy.py b/packages/syft/src/syft/service/action/numpy.py index 58eb58e3617..767e95b613e 100644 --- a/packages/syft/src/syft/service/action/numpy.py +++ b/packages/syft/src/syft/service/action/numpy.py @@ -81,7 +81,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ) -@serializable() +@serializable(without=["syft_action_data_cache"]) class NumpyScalarObject(ActionObject, np.lib.mixins.NDArrayOperatorsMixin): __canonical_name__ = "NumpyScalarObject" __version__ = SYFT_OBJECT_VERSION_1 @@ -94,7 +94,7 @@ def __float__(self) -> float: return float(self.syft_action_data) -@serializable() +@serializable(without=["syft_action_data_cache"]) class NumpyBoolObject(ActionObject, np.lib.mixins.NDArrayOperatorsMixin): __canonical_name__ = "NumpyBoolObject" __version__ = SYFT_OBJECT_VERSION_1 From 21e4f5c7c97d2dc23d65dacd1129f2ca1cc5412d Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Wed, 16 Aug 2023 15:52:47 +0530 Subject: [PATCH 29/98] - add AuthNodeContextRegistry class - use auth node context in syft action data is API is done - update blob api to use node context if api is not None - save result object on execute in action service --- packages/grid/docker-compose.dev.yml | 2 + packages/syft/src/syft/node/node.py | 29 ++++++++++++ .../src/syft/service/action/action_object.py | 46 +++++++++++++------ .../src/syft/service/action/action_service.py | 10 ++-- .../src/syft/service/blob_storage/service.py | 1 + .../src/syft/store/blob_storage/__init__.py | 5 +- .../src/syft/store/blob_storage/seaweedfs.py | 23 ++++++++-- 7 files changed, 92 insertions(+), 24 deletions(-) diff --git a/packages/grid/docker-compose.dev.yml b/packages/grid/docker-compose.dev.yml index 39a508e2eeb..3faef69b269 100644 --- a/packages/grid/docker-compose.dev.yml +++ b/packages/grid/docker-compose.dev.yml @@ -46,6 +46,8 @@ services: - ${RELATIVE_PATH}./data/package-cache:/root/.cache environment: - DEV_MODE=True + stdin_open: true + tty: true # backend_stream: # volumes: diff --git a/packages/syft/src/syft/node/node.py b/packages/syft/src/syft/node/node.py index 388e8f81660..139a0b68258 100644 --- a/packages/syft/src/syft/node/node.py +++ b/packages/syft/src/syft/node/node.py @@ -3,6 +3,7 @@ # stdlib import binascii +from collections import OrderedDict import contextlib from datetime import datetime from functools import partial @@ -16,6 +17,7 @@ from typing import Dict from typing import List from typing import Optional +from typing import Tuple from typing import Type from typing import Union import uuid @@ -181,6 +183,32 @@ def get_venv_packages() -> str: default_root_password = get_default_root_password() +class AuthNodeContextRegistry: + __node_context_registry__: Dict[Tuple, Node] = OrderedDict() + + @classmethod + def set_node_context( + cls, + node_uid: Union[UID, str], + context: NodeServiceContext, + user_verify_key: Union[SyftVerifyKey, str], + ): + if isinstance(node_uid, str): + node_uid = UID.from_string(node_uid) + + if isinstance(user_verify_key, str): + user_verify_key = SyftVerifyKey.from_string(user_verify_key) + + key = (node_uid, user_verify_key) + + cls.__node_context_registry__[key] = context + + @classmethod + def get_auth_context(cls) -> AuthedServiceContext: + if len(cls.__node_context_registry__) > 0: + return list(cls.__node_context_registry__.values())[0] + + @instrument class Node(AbstractNode): signing_key: Optional[SyftSigningKey] @@ -433,6 +461,7 @@ def __repr__(self) -> str: def post_init(self) -> None: context = AuthedServiceContext(node=self, credentials=self.verify_key) + AuthNodeContextRegistry.set_node_context(self.id, context, self.verify_key) if UserCodeService in self.services: user_code_service = self.get_service(UserCodeService) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index c023aca2c08..261e496fc61 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -38,6 +38,7 @@ from ...types.uid import LineageID from ...types.uid import UID from ...util.logger import debug +from ..blob_storage.service import BlobStorageService from ..response import SyftException from .action_data_empty import ActionDataEmpty from .action_permissions import ActionPermission @@ -455,38 +456,53 @@ class ActionObject(SyftObject): def syft_action_data(self) -> Any: # relative from ...client.api import APIRegistry + from ...node.node import AuthNodeContextRegistry if self.syft_action_data_cache is None: api = APIRegistry.api_for( node_uid=self.syft_node_location, user_verify_key=self.syft_client_verify_key, ) - # service = context.node.get_service(BlobStorageService) - # action_object.with_node_context(context) - # action_object.node_context = context - # action_object.clear_context() - # - # - - blob_retrieval_object = api.services.blob_storage.read( - uid=self.syft_blob_storage_entry_id - ) - self.syft_action_data_cache = blob_retrieval_object.read() + + if api is not None: + blob_retrieval_object = api.services.blob_storage.read( + uid=self.syft_blob_storage_entry_id + ) + self.syft_action_data_cache = blob_retrieval_object.read() + else: + node_context = AuthNodeContextRegistry.get_auth_context() + blob_service: BlobStorageService = node_context.node.get_service( + BlobStorageService + ) + blob_retrieval_object = blob_service.read( + node_context, self.syft_blob_storage_entry_id + ) + self.syft_action_data_cache = blob_retrieval_object.read() + return self.syft_action_data_cache def _set_syft_action_data(self, data: Any) -> None: # relative from ...client.api import APIRegistry + from ...node.node import AuthNodeContextRegistry + + storage_entry = CreateBlobStorageEntry.from_obj(data) api = APIRegistry.api_for( node_uid=self.syft_node_location, user_verify_key=self.syft_client_verify_key, ) if api is not None: - storage_entry = CreateBlobStorageEntry.from_obj(data) blob_deposit_object = api.services.blob_storage.allocate(storage_entry) - blob_deposit_object.write(serialize(data, to_bytes=True)) - self.syft_blob_storage_entry_id = blob_deposit_object.blob_storage_entry_id + else: + node_context = AuthNodeContextRegistry.get_auth_context() + blob_service: BlobStorageService = node_context.node.get_service( + BlobStorageService + ) + blob_deposit_object = blob_service.allocate(node_context, storage_entry) + + blob_deposit_object.write(serialize(data, to_bytes=True)) + self.syft_blob_storage_entry_id = blob_deposit_object.blob_storage_entry_id self.syft_action_data_cache = data self.syft_action_data_type = type(data) @@ -903,7 +919,7 @@ def empty( empty = ActionDataEmpty(syft_internal_type=syft_internal_type) res = ActionObject.from_obj( - id=id, syft_lineage_id=syft_lineage_id, syft_action_data_cache=empty + id=id, syft_lineage_id=syft_lineage_id, syft_action_data=empty ) res.__dict__["syft_internal_type"] = syft_internal_type return res diff --git a/packages/syft/src/syft/service/action/action_service.py b/packages/syft/src/syft/service/action/action_service.py index 1819b4d0ce4..92b364b35e1 100644 --- a/packages/syft/src/syft/service/action/action_service.py +++ b/packages/syft/src/syft/service/action/action_service.py @@ -53,8 +53,9 @@ def np_array(self, context: AuthedServiceContext, data: Any) -> Any: if not isinstance(data, np.ndarray): data = np.array(data) np_obj = NumpyArrayObject( - syft_action_data=data, dtype=data.dtype, shape=data.shape + dtype=data.dtype, shape=data.shape, syft_action_data_cache=data ) + np_obj.save() np_pointer = self.set(context, np_obj) return np_pointer @@ -207,6 +208,7 @@ def _user_code_execute( except Exception as e: return Err(f"_user_code_execute failed. {e}") + result_action_object.save() set_result = self.store.set( uid=result_id, credentials=context.credentials, @@ -409,7 +411,7 @@ def execute( ) resolved_self = resolved_self.ok() if action.op == "__call__" and isinstance( - resolved_self.syft_action_data, Plan + resolved_self.syft_action_data_type, Plan ): result_action_object = self.execute_plan( plan=resolved_self.syft_action_data, @@ -439,7 +441,7 @@ def execute( has_result_read_permission = self.has_read_permission_for_action_result( context, action ) - + result_action_object.save() set_result = self.store.set( uid=action.result_id, credentials=context.credentials, @@ -666,7 +668,7 @@ def execute_object( def wrap_result(result_id: UID, result: Any) -> ActionObject: # 🟡 TODO 11: Figure out how we want to store action object results action_type = action_type_for_type(result) - result_action_object = action_type(id=result_id, syft_action_data=result) + result_action_object = action_type(id=result_id, syft_action_data_cache=result) return result_action_object diff --git a/packages/syft/src/syft/service/blob_storage/service.py b/packages/syft/src/syft/service/blob_storage/service.py index 9ff26491dc4..8522d87fd8b 100644 --- a/packages/syft/src/syft/service/blob_storage/service.py +++ b/packages/syft/src/syft/service/blob_storage/service.py @@ -80,6 +80,7 @@ def allocate( secure_location = conn.allocate(obj) blob_storage_entry = BlobStorageEntry( + id=obj.id, location=secure_location, type_=obj.type_, mimetype=obj.mimetype, diff --git a/packages/syft/src/syft/store/blob_storage/__init__.py b/packages/syft/src/syft/store/blob_storage/__init__.py index 578f184ec04..d4077495042 100644 --- a/packages/syft/src/syft/store/blob_storage/__init__.py +++ b/packages/syft/src/syft/store/blob_storage/__init__.py @@ -101,7 +101,10 @@ def read(self) -> Union[SyftObject, SyftError]: node_uid=self.syft_node_location, user_verify_key=self.syft_client_verify_key, ) - blob_url = api.connection.to_blob_route(self.url.url_path) + if api is not None: + blob_url = api.connection.to_blob_route(self.url.url_path) + else: + blob_url = self.url try: response = requests.get(str(blob_url), timeout=DEFAULT_TIMEOUT) response.raise_for_status() diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index c3af9b3d793..3a7f3b9feef 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -57,12 +57,16 @@ class SeaweedFSBlobDeposit(BlobDeposit): def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: # relative from ...client.api import APIRegistry + from ...node.node import AuthNodeContextRegistry + from ...service.blob_storage.service import BlobStorageService api = APIRegistry.api_for( node_uid=self.syft_node_location, user_verify_key=self.syft_client_verify_key, ) + node_context = AuthNodeContextRegistry.get_auth_context() + etags = [] try: @@ -70,7 +74,10 @@ def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: zip(_byte_chunks(BytesIO(data), DEFAULT_CHUNK_SIZE), self.urls), start=1, ): - blob_url = api.connection.to_blob_route(url.url_path) + if api is not None: + blob_url = api.connection.to_blob_route(url.url_path) + else: + blob_url = url response = requests.put( url=str(blob_url), data=byte_chunk, timeout=DEFAULT_TIMEOUT ) @@ -80,9 +87,17 @@ def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: except requests.RequestException as e: return SyftError(message=str(e)) - return api.services.blob_storage.mark_write_complete( - etags=etags, uid=self.blob_storage_entry_id - ) + if api is not None: + return api.services.blob_storage.mark_write_complete( + etags=etags, uid=self.blob_storage_entry_id + ) + else: + mark_as_complete = node_context.node.get_service_method( + BlobStorageService.mark_write_complete + ) + return mark_as_complete( + node_context, etags=etags, uid=self.blob_storage_entry_id + ) @serializable() From 38375af382ea8072d5fe8c7aff22aafcd0b61c29 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Wed, 16 Aug 2023 22:46:13 +0530 Subject: [PATCH 30/98] integrate blob upload save method with Twin Object --- packages/syft/src/syft/client/domain_client.py | 8 +++++++- packages/syft/src/syft/types/twin_object.py | 16 +++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/packages/syft/src/syft/client/domain_client.py b/packages/syft/src/syft/client/domain_client.py index c8d0be36a89..c0f3819d6af 100644 --- a/packages/syft/src/syft/client/domain_client.py +++ b/packages/syft/src/syft/client/domain_client.py @@ -75,9 +75,15 @@ def upload_dataset(self, dataset: CreateDataset) -> Union[SyftSuccess, SyftError for asset in tqdm(dataset.asset_list): print(f"Uploading: {asset.name}") try: - twin = TwinObject(private_obj=asset.data, mock_obj=asset.mock) + twin = TwinObject( + private_obj=asset.data, + mock_obj=asset.mock, + syft_node_location=self.id, + syft_client_verify_key=self.verify_key, + ) except Exception as e: return SyftError(message=f"Failed to create twin. {e}") + twin.save() response = self.api.services.action.set(twin) if isinstance(response, SyftError): print(f"Failed to upload asset\n: {asset}") diff --git a/packages/syft/src/syft/types/twin_object.py b/packages/syft/src/syft/types/twin_object.py index b95eff90555..9fe9cb2f853 100644 --- a/packages/syft/src/syft/types/twin_object.py +++ b/packages/syft/src/syft/types/twin_object.py @@ -23,7 +23,7 @@ def to_action_object(obj: Any) -> ActionObject: return obj if type(obj) in action_types: - return action_types[type(obj)](syft_action_data=obj) + return action_types[type(obj)](syft_action_data_cache=obj) raise Exception(f"{type(obj)} not in action_types") @@ -35,9 +35,9 @@ class TwinObject(SyftObject): __attr_searchable__ = [] id: UID - private_obj: ActionObject + private_obj: Optional[ActionObject] = None private_obj_id: UID = None # type: ignore - mock_obj: ActionObject + mock_obj: Optional[ActionObject] mock_obj_id: UID = None # type: ignore @pydantic.validator("private_obj", pre=True, always=True) @@ -71,3 +71,13 @@ def mock(self) -> ActionObject: mock.syft_twin_type = TwinMode.MOCK mock.id = twin_id return mock + + def save(self): + # Set node location and verify key + self.private_obj.syft_node_location = self.syft_node_location + self.private_obj.syft_client_verify_key = self.syft_client_verify_key + self.mock_obj.syft_node_location = self.syft_node_location + self.mock_obj.syft_client_verify_key = self.syft_client_verify_key + + self.private_obj.save() + self.mock_obj.save() From 104d24a832c17972c53c32095c4df2ff7a8e81cc Mon Sep 17 00:00:00 2001 From: Peter Chung Date: Thu, 17 Aug 2023 16:04:58 +1000 Subject: [PATCH 31/98] Added delete to BlobStorageConnection --- packages/syft/src/syft/store/blob_storage/__init__.py | 3 +++ packages/syft/src/syft/store/blob_storage/seaweedfs.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/syft/src/syft/store/blob_storage/__init__.py b/packages/syft/src/syft/store/blob_storage/__init__.py index 578f184ec04..57b86d836ab 100644 --- a/packages/syft/src/syft/store/blob_storage/__init__.py +++ b/packages/syft/src/syft/store/blob_storage/__init__.py @@ -142,6 +142,9 @@ def allocate(self, obj: CreateBlobStorageEntry) -> SecureFilePathLocation: def write(self, obj: BlobStorageEntry) -> BlobDeposit: raise NotImplementedError + def delete(self, fp: SecureFilePathLocation) -> bool: + raise NotImplementedError + @serializable() class BlobStorageClient(SyftBaseModel): diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index 8ce903c3e41..1feb96c6b3e 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -203,7 +203,7 @@ def delete( Key=fp.path, ) return True - except BotoClientError as e: + except BotoClientError: return False From ea5e1b1707eea29a516807900c560da8e949b22e Mon Sep 17 00:00:00 2001 From: Peter Chung Date: Thu, 17 Aug 2023 18:18:22 +1000 Subject: [PATCH 32/98] updated delete methods to return more specific errors --- .../src/syft/service/blob_storage/service.py | 17 +++++++++++------ .../syft/src/syft/store/blob_storage/on_disk.py | 8 ++++---- .../src/syft/store/blob_storage/seaweedfs.py | 13 +++++-------- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/packages/syft/src/syft/service/blob_storage/service.py b/packages/syft/src/syft/service/blob_storage/service.py index c3a8c24fe89..131d791d5a4 100644 --- a/packages/syft/src/syft/service/blob_storage/service.py +++ b/packages/syft/src/syft/service/blob_storage/service.py @@ -145,15 +145,20 @@ def delete( ) -> Union[SyftSuccess, SyftError]: result = self.stash.get_by_uid(context.credentials, uid=uid) if result.is_ok(): - with context.node.blob_storage_client.connect() as conn: - file_unlinked = conn.delete(result.ok().location) + try: + with context.node.blob_storage_client.connect() as conn: + file_unlinked_result = conn.delete(result.ok().location) + except Exception as e: + return SyftError(message=f"Failed to delete file: {e}") + + if isinstance(file_unlinked_result, SyftError): + return file_unlinked_result blob_storage_entry_deleted = self.stash.delete_by_uid( context.credentials, uid=uid ) - if file_unlinked and blob_storage_entry_deleted: - return SyftSuccess(message="File successfully deleted.") - else: - return SyftError(message="File deletion failed.") + if blob_storage_entry_deleted.is_ok(): + return file_unlinked_result + return SyftError(message=result.err()) diff --git a/packages/syft/src/syft/store/blob_storage/on_disk.py b/packages/syft/src/syft/store/blob_storage/on_disk.py index a1d8084f883..89fbc96527e 100644 --- a/packages/syft/src/syft/store/blob_storage/on_disk.py +++ b/packages/syft/src/syft/store/blob_storage/on_disk.py @@ -68,12 +68,12 @@ def allocate(self, obj: CreateBlobStorageEntry) -> SecureFilePathLocation: def write(self, obj: BlobStorageEntry) -> BlobDeposit: return OnDiskBlobDeposit(blob_storage_entry_id=obj.id) - def delete(self, fp: SecureFilePathLocation) -> bool: + def delete(self, fp: SecureFilePathLocation) -> Union[SyftSuccess, SyftError]: try: (self._base_directory / fp.path).unlink() - return True - except FileNotFoundError: - return False + return SyftSuccess(message="Successfully deleted file.") + except FileNotFoundError as e: + return SyftError(message=f"Failed to delete file: {e}") @serializable() diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index 1feb96c6b3e..a2ca828da3e 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -196,15 +196,12 @@ def complete_multipart_upload( def delete( self, fp: SecureFilePathLocation, - ) -> bool: + ) -> Union[SyftSuccess, SyftError]: try: - self.client.delete_object( - Bucket=self.bucket_name, - Key=fp.path, - ) - return True - except BotoClientError: - return False + self.client.delete_object(Bucket=self.bucket_name, Key=fp.path) + return SyftSuccess(message="Successfully deleted file.") + except BotoClientError as e: + return SyftError(message=str(e)) class SeaweedFSConfig(BlobStorageConfig): From d44eae0a5b082ff6f5f5bcadc2fbf0e237970144 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Thu, 17 Aug 2023 16:46:33 +0530 Subject: [PATCH 33/98] create a method to get from context or api update syft_action_data and _set_syft_action_data to use from_context_or_api --- .../syft/src/syft/client/domain_client.py | 2 +- packages/syft/src/syft/node/node.py | 2 +- .../src/syft/service/action/action_object.py | 52 +++++-------------- packages/syft/src/syft/service/service.py | 47 +++++++++++++++++ .../src/syft/store/blob_storage/seaweedfs.py | 25 ++++----- packages/syft/src/syft/types/twin_object.py | 4 +- 6 files changed, 75 insertions(+), 57 deletions(-) diff --git a/packages/syft/src/syft/client/domain_client.py b/packages/syft/src/syft/client/domain_client.py index c0f3819d6af..a309a40e840 100644 --- a/packages/syft/src/syft/client/domain_client.py +++ b/packages/syft/src/syft/client/domain_client.py @@ -81,9 +81,9 @@ def upload_dataset(self, dataset: CreateDataset) -> Union[SyftSuccess, SyftError syft_node_location=self.id, syft_client_verify_key=self.verify_key, ) + twin.save() except Exception as e: return SyftError(message=f"Failed to create twin. {e}") - twin.save() response = self.api.services.action.set(twin) if isinstance(response, SyftError): print(f"Failed to upload asset\n: {asset}") diff --git a/packages/syft/src/syft/node/node.py b/packages/syft/src/syft/node/node.py index 139a0b68258..72b7ca6b19e 100644 --- a/packages/syft/src/syft/node/node.py +++ b/packages/syft/src/syft/node/node.py @@ -204,7 +204,7 @@ def set_node_context( cls.__node_context_registry__[key] = context @classmethod - def get_auth_context(cls) -> AuthedServiceContext: + def get_auth_context(cls) -> Optional[AuthedServiceContext]: if len(cls.__node_context_registry__) > 0: return list(cls.__node_context_registry__.values())[0] diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 31085d44abc..3511ea509a3 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -38,8 +38,8 @@ from ...types.uid import LineageID from ...types.uid import UID from ...util.logger import debug -from ..blob_storage.service import BlobStorageService from ..response import SyftException +from ..service import from_api_or_context from .action_data_empty import ActionDataEmpty from .action_permissions import ActionPermission from .action_types import action_type_for_object @@ -454,53 +454,29 @@ class ActionObject(SyftObject): @property def syft_action_data(self) -> Any: - # relative - from ...client.api import APIRegistry - from ...node.node import AuthNodeContextRegistry - if self.syft_action_data_cache is None: - api = APIRegistry.api_for( - node_uid=self.syft_node_location, - user_verify_key=self.syft_client_verify_key, + blob_storage_read_method = from_api_or_context( + func_or_path="blob_storage.read", + syft_node_location=self.syft_node_location, + syft_client_verify_key=self.syft_client_verify_key, ) - if api is not None: - blob_retrieval_object = api.services.blob_storage.read( - uid=self.syft_blob_storage_entry_id - ) - self.syft_action_data_cache = blob_retrieval_object.read() - else: - node_context = AuthNodeContextRegistry.get_auth_context() - blob_service: BlobStorageService = node_context.node.get_service( - BlobStorageService - ) - blob_retrieval_object = blob_service.read( - node_context, self.syft_blob_storage_entry_id - ) - self.syft_action_data_cache = blob_retrieval_object.read() + blob_retrieval_object = blob_storage_read_method( + uid=self.syft_blob_storage_entry_id + ) + self.syft_action_data_cache = blob_retrieval_object.read() return self.syft_action_data_cache def _set_syft_action_data(self, data: Any) -> None: - # relative - from ...client.api import APIRegistry - from ...node.node import AuthNodeContextRegistry - storage_entry = CreateBlobStorageEntry.from_obj(data) - api = APIRegistry.api_for( - node_uid=self.syft_node_location, - user_verify_key=self.syft_client_verify_key, + allocate_method = from_api_or_context( + func_or_path="blob_storage.allocate", + syft_node_location=self.syft_node_location, + syft_client_verify_key=self.syft_client_verify_key, ) - if api is not None: - blob_deposit_object = api.services.blob_storage.allocate(storage_entry) - else: - node_context = AuthNodeContextRegistry.get_auth_context() - blob_service: BlobStorageService = node_context.node.get_service( - BlobStorageService - ) - blob_deposit_object = blob_service.allocate(node_context, storage_entry) - + blob_deposit_object = allocate_method(storage_entry) blob_deposit_object.write(serialize(data, to_bytes=True)) self.syft_blob_storage_entry_id = blob_deposit_object.blob_storage_entry_id diff --git a/packages/syft/src/syft/service/service.py b/packages/syft/src/syft/service/service.py index e7533a71823..f348d8441cd 100644 --- a/packages/syft/src/syft/service/service.py +++ b/packages/syft/src/syft/service/service.py @@ -1,6 +1,7 @@ # stdlib from collections import defaultdict from copy import deepcopy +from functools import partial import inspect from inspect import Parameter from typing import Any @@ -407,3 +408,49 @@ def get_transform( version_to = type_to.__version__ mapping_string = f"{klass_from}_{version_from}_x_{klass_to}_{version_to}" return cls.__object_transform_registry__[mapping_string] + + +def from_api_or_context( + func_or_path: str, + syft_node_location: Optional[UID] = None, + syft_client_verify_key: Optional[SyftVerifyKey] = None, + role: Optional[ServiceRole] = None, +): + # relative + from ..client.api import APIRegistry + from ..node.node import AuthNodeContextRegistry + + if callable(func_or_path): + func_or_path = func_or_path.__qualname__ + + node_context = AuthNodeContextRegistry.get_auth_context() + + if syft_node_location and syft_client_verify_key: + api = APIRegistry.api_for( + node_uid=syft_node_location, + user_verify_key=syft_client_verify_key, + ) + if api is not None: + service_method = api.services + for path in func_or_path.split("."): + service_method = getattr(service_method, path) + return service_method + elif node_context: + user_config_registry = UserServiceConfigRegistry.from_role( + node_context.role, + ) + if func_or_path not in user_config_registry: + if ServiceConfigRegistry.path_exists(func_or_path): + return SyftError( + message=f"As a `{role}` you have has no access to: {func_or_path}" + ) + else: + return SyftError( + message=f"API call not in registered services: {func_or_path}" + ) + + _private_api_path = user_config_registry.private_path_for(func_or_path) + service_method = node_context.node.get_service_method( + _private_api_path, + ) + return partial(service_method, node_context) diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index 3a7f3b9feef..c58680ba9a3 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -26,6 +26,7 @@ from ...service.response import SyftError from ...service.response import SyftException from ...service.response import SyftSuccess +from ...service.service import from_api_or_context from ...types.blob_storage import BlobStorageEntry from ...types.blob_storage import CreateBlobStorageEntry from ...types.blob_storage import SeaweedSecureFilePathLocation @@ -57,16 +58,12 @@ class SeaweedFSBlobDeposit(BlobDeposit): def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: # relative from ...client.api import APIRegistry - from ...node.node import AuthNodeContextRegistry - from ...service.blob_storage.service import BlobStorageService api = APIRegistry.api_for( node_uid=self.syft_node_location, user_verify_key=self.syft_client_verify_key, ) - node_context = AuthNodeContextRegistry.get_auth_context() - etags = [] try: @@ -87,17 +84,15 @@ def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: except requests.RequestException as e: return SyftError(message=str(e)) - if api is not None: - return api.services.blob_storage.mark_write_complete( - etags=etags, uid=self.blob_storage_entry_id - ) - else: - mark_as_complete = node_context.node.get_service_method( - BlobStorageService.mark_write_complete - ) - return mark_as_complete( - node_context, etags=etags, uid=self.blob_storage_entry_id - ) + mark_write_complete_method = from_api_or_context( + func_or_path="blob_storage.mark_write_complete", + syft_node_location=self.syft_node_location, + syft_client_verify_key=self.syft_client_verify_key, + ) + return mark_write_complete_method( + etags=etags, + uid=self.blob_storage_entry_id, + ) @serializable() diff --git a/packages/syft/src/syft/types/twin_object.py b/packages/syft/src/syft/types/twin_object.py index 9fe9cb2f853..b66583ff810 100644 --- a/packages/syft/src/syft/types/twin_object.py +++ b/packages/syft/src/syft/types/twin_object.py @@ -35,9 +35,9 @@ class TwinObject(SyftObject): __attr_searchable__ = [] id: UID - private_obj: Optional[ActionObject] = None + private_obj: ActionObject private_obj_id: UID = None # type: ignore - mock_obj: Optional[ActionObject] + mock_obj: ActionObject mock_obj_id: UID = None # type: ignore @pydantic.validator("private_obj", pre=True, always=True) From bc37ff19ea012e9e72ecaf750221f569d19ccf5d Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Thu, 17 Aug 2023 18:22:53 +0530 Subject: [PATCH 34/98] move AuthNodeContextRegistry.set_node_context to api call method fix Action Object creation in convert_to_pointers add __check_action_data to BASE_PASSTHROUGH_ATTRS --- packages/syft/src/syft/node/node.py | 2 +- .../src/syft/service/action/action_object.py | 17 +++++++++++------ packages/syft/src/syft/service/service.py | 9 ++++----- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/packages/syft/src/syft/node/node.py b/packages/syft/src/syft/node/node.py index 72b7ca6b19e..c78ef47b2ed 100644 --- a/packages/syft/src/syft/node/node.py +++ b/packages/syft/src/syft/node/node.py @@ -461,7 +461,6 @@ def __repr__(self) -> str: def post_init(self) -> None: context = AuthedServiceContext(node=self, credentials=self.verify_key) - AuthNodeContextRegistry.set_node_context(self.id, context, self.verify_key) if UserCodeService in self.services: user_code_service = self.get_service(UserCodeService) @@ -755,6 +754,7 @@ def handle_api_call_with_unsigned_result( context = AuthedServiceContext( node=self, credentials=credentials, role=role ) + AuthNodeContextRegistry.set_node_context(self.id, context, self.verify_key) user_config_registry = UserServiceConfigRegistry.from_role(role) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 3511ea509a3..df63da9c5a6 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -300,7 +300,7 @@ def convert_to_pointers( if args is not None: for arg in args: if not isinstance(arg, ActionObject): - arg = ActionObject.from_obj(syft_action_data_cache=arg) + arg = ActionObject.from_obj(syft_action_data=arg) arg.syft_node_uid = node_uid arg.save() arg = api.services.action.set(arg) @@ -312,7 +312,7 @@ def convert_to_pointers( if kwargs is not None: for k, arg in kwargs.items(): if not isinstance(arg, ActionObject): - arg = ActionObject.from_obj(syft_action_data_cache=arg) + arg = ActionObject.from_obj(syft_action_data=arg) arg.syft_node_uid = node_uid arg.save() arg = api.services.action.set(arg) @@ -420,6 +420,7 @@ def debox_args_and_kwargs(args: Any, kwargs: Any) -> Tuple[Any, Any]: "save", "_set_syft_action_data", "syft_action_data", + "__check_action_data", ] @@ -476,9 +477,10 @@ def _set_syft_action_data(self, data: Any) -> None: syft_node_location=self.syft_node_location, syft_client_verify_key=self.syft_client_verify_key, ) - blob_deposit_object = allocate_method(storage_entry) - blob_deposit_object.write(serialize(data, to_bytes=True)) - self.syft_blob_storage_entry_id = blob_deposit_object.blob_storage_entry_id + if allocate_method is not None: + blob_deposit_object = allocate_method(storage_entry) + blob_deposit_object.write(serialize(data, to_bytes=True)) + self.syft_blob_storage_entry_id = blob_deposit_object.blob_storage_entry_id self.syft_action_data_cache = data self.syft_action_data_type = type(data) @@ -1020,7 +1022,10 @@ def _syft_output_action_object( if context.result_twin_type is not None: syft_twin_type = context.result_twin_type result = constructor( - syft_twin_type=syft_twin_type, syft_action_data_cache=result + syft_twin_type=syft_twin_type, + syft_action_data_cache=result, + syft_node_location=self.syft_node_location, + syft_client_verify_key=self.syft_client_verify_key, ) return result diff --git a/packages/syft/src/syft/service/service.py b/packages/syft/src/syft/service/service.py index f348d8441cd..cc00de0156d 100644 --- a/packages/syft/src/syft/service/service.py +++ b/packages/syft/src/syft/service/service.py @@ -414,7 +414,6 @@ def from_api_or_context( func_or_path: str, syft_node_location: Optional[UID] = None, syft_client_verify_key: Optional[SyftVerifyKey] = None, - role: Optional[ServiceRole] = None, ): # relative from ..client.api import APIRegistry @@ -423,8 +422,6 @@ def from_api_or_context( if callable(func_or_path): func_or_path = func_or_path.__qualname__ - node_context = AuthNodeContextRegistry.get_auth_context() - if syft_node_location and syft_client_verify_key: api = APIRegistry.api_for( node_uid=syft_node_location, @@ -435,14 +432,16 @@ def from_api_or_context( for path in func_or_path.split("."): service_method = getattr(service_method, path) return service_method - elif node_context: + + node_context = AuthNodeContextRegistry.get_auth_context() + if node_context is not None: user_config_registry = UserServiceConfigRegistry.from_role( node_context.role, ) if func_or_path not in user_config_registry: if ServiceConfigRegistry.path_exists(func_or_path): return SyftError( - message=f"As a `{role}` you have has no access to: {func_or_path}" + message=f"As a `{node_context.role}` you have has no access to: {func_or_path}" ) else: return SyftError( From 4c6c04f013c6a33735567284ccf88c31857c91c2 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Thu, 17 Aug 2023 18:27:16 +0530 Subject: [PATCH 35/98] fix on disk write --- .../syft/src/syft/store/blob_storage/on_disk.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/packages/syft/src/syft/store/blob_storage/on_disk.py b/packages/syft/src/syft/store/blob_storage/on_disk.py index da8f5324eaa..4dc1ffca6bd 100644 --- a/packages/syft/src/syft/store/blob_storage/on_disk.py +++ b/packages/syft/src/syft/store/blob_storage/on_disk.py @@ -32,15 +32,14 @@ class OnDiskBlobDeposit(BlobDeposit): def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: # relative - from ...client.api import APIRegistry + from ...service.service import from_api_or_context - api = APIRegistry.api_for( - node_uid=self.syft_node_location, - user_verify_key=self.syft_client_verify_key, - ) - return api.services.blob_storage.write_to_disk( - data=data, uid=self.blob_storage_entry_id + write_to_disk_method = from_api_or_context( + func_or_path="blob_storage.write_to_disk", + syft_node_location=self.syft_node_location, + syft_client_verify_key=self.syft_client_verify_key, ) + return write_to_disk_method(data=data, uid=self.blob_storage_entry_id) class OnDiskBlobStorageConnection(BlobStorageConnection): From 1d60c7c4cae5601e541f8eeb56dcad256d0e26a0 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Fri, 18 Aug 2023 12:16:26 +0530 Subject: [PATCH 36/98] skip serde for syft_action_data_cache in Pandas Action Object --- packages/syft/src/syft/service/action/pandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/syft/src/syft/service/action/pandas.py b/packages/syft/src/syft/service/action/pandas.py index cac27b94f12..c36f909561c 100644 --- a/packages/syft/src/syft/service/action/pandas.py +++ b/packages/syft/src/syft/service/action/pandas.py @@ -16,7 +16,7 @@ from .action_types import action_types -@serializable() +@serializable(without=["syft_action_data_cache"]) class PandasDataFrameObject(ActionObject): __canonical_name__ = "PandasDataframeObject" __version__ = SYFT_OBJECT_VERSION_1 @@ -42,7 +42,7 @@ def syft_is_property(self, obj: Any, method: str) -> bool: return super().syft_is_property(obj, method) -@serializable() +@serializable(without=["syft_action_data_cache"]) class PandasSeriesObject(ActionObject): __canonical_name__ = "PandasSeriesObject" __version__ = SYFT_OBJECT_VERSION_1 From 682d4e22f1f7c5004c1f03d13555811f85158416 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Sun, 20 Aug 2023 21:13:31 +0530 Subject: [PATCH 37/98] - add a created time DateTime field to ActionObject - rename .save method to _save_to_blob_store - conditionally fetch data from blob store in case of syft_action_data - set syft_action_data to ActionDataEmpty post saving data to blob store --- .../src/syft/service/action/action_object.py | 70 +++++++++++-------- .../src/syft/service/action/action_service.py | 17 ++++- .../syft/src/syft/service/action/numpy.py | 6 +- .../syft/src/syft/service/action/pandas.py | 4 +- packages/syft/src/syft/types/twin_object.py | 10 ++- 5 files changed, 62 insertions(+), 45 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index df63da9c5a6..e1de0d2fc3c 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -10,7 +10,6 @@ from typing import Callable from typing import ClassVar from typing import Dict -from typing import KeysView from typing import List from typing import Optional from typing import Tuple @@ -32,6 +31,7 @@ from ...service.response import SyftError from ...store.linked_obj import LinkedObject from ...types.blob_storage import CreateBlobStorageEntry +from ...types.datetime import DateTime from ...types.syft_object import SYFT_OBJECT_VERSION_1 from ...types.syft_object import SyftBaseObject from ...types.syft_object import SyftObject @@ -302,7 +302,7 @@ def convert_to_pointers( if not isinstance(arg, ActionObject): arg = ActionObject.from_obj(syft_action_data=arg) arg.syft_node_uid = node_uid - arg.save() + arg._save_to_blob_store() arg = api.services.action.set(arg) # arg = action_obj.send( # client @@ -314,7 +314,7 @@ def convert_to_pointers( if not isinstance(arg, ActionObject): arg = ActionObject.from_obj(syft_action_data=arg) arg.syft_node_uid = node_uid - arg.save() + arg._save_to_blob_store() arg = api.services.action.set(arg) # arg = action_obj.send(client) @@ -417,7 +417,7 @@ def debox_args_and_kwargs(args: Any, kwargs: Any) -> Tuple[Any, Any]: "_repr_debug_", "as_empty", "get", - "save", + "_save_to_blob_store", "_set_syft_action_data", "syft_action_data", "__check_action_data", @@ -447,15 +447,17 @@ class ActionObject(SyftObject): _syft_post_hooks__: Dict[str, List] = {} syft_twin_type: TwinMode = TwinMode.NONE syft_passthrough_attrs = BASE_PASSTHROUGH_ATTRS - syft_action_data_type: Optional[Any] + syft_action_data_type: Optional[Type] syft_action_data_repr_: Optional[str] syft_action_data_str_: Optional[str] syft_has_bool_attr: Optional[bool] + syft_resolve_data: Optional[bool] + syft_created_at: Optional[DateTime] # syft_dont_wrap_attrs = ["shape"] @property def syft_action_data(self) -> Any: - if self.syft_action_data_cache is None: + if self.syft_blob_storage_entry_id and self.syft_created_at: blob_storage_read_method = from_api_or_context( func_or_path="blob_storage.read", syft_node_location=self.syft_node_location, @@ -470,28 +472,32 @@ def syft_action_data(self) -> Any: return self.syft_action_data_cache def _set_syft_action_data(self, data: Any) -> None: - storage_entry = CreateBlobStorageEntry.from_obj(data) + if not isinstance(data, ActionDataEmpty): + storage_entry = CreateBlobStorageEntry.from_obj(data) - allocate_method = from_api_or_context( - func_or_path="blob_storage.allocate", - syft_node_location=self.syft_node_location, - syft_client_verify_key=self.syft_client_verify_key, - ) - if allocate_method is not None: - blob_deposit_object = allocate_method(storage_entry) - blob_deposit_object.write(serialize(data, to_bytes=True)) - self.syft_blob_storage_entry_id = blob_deposit_object.blob_storage_entry_id + allocate_method = from_api_or_context( + func_or_path="blob_storage.allocate", + syft_node_location=self.syft_node_location, + syft_client_verify_key=self.syft_client_verify_key, + ) + if allocate_method is not None: + blob_deposit_object = allocate_method(storage_entry) + blob_deposit_object.write(serialize(data, to_bytes=True)) + self.syft_blob_storage_entry_id = ( + blob_deposit_object.blob_storage_entry_id + ) - self.syft_action_data_cache = data - self.syft_action_data_type = type(data) + self.syft_action_data_type = type(data) - self.syft_action_data_repr_ = ( - data._repr_markdown_() - if hasattr(data, "_repr_markdown_") - else data.__repr__() - ) - self.syft_action_data_str_ = str(data) - self.syft_has_bool_attr = hasattr(data, "__bool__") + self.syft_action_data_repr_ = ( + data._repr_markdown_() + if hasattr(data, "_repr_markdown_") + else data.__repr__() + ) + self.syft_action_data_str_ = str(data) + self.syft_has_bool_attr = hasattr(data, "__bool__") + + self.syft_action_data_cache = data syft_action_data = syft_action_data.setter(_set_syft_action_data) @@ -523,10 +529,10 @@ def __check_action_data(cls, values: dict) -> dict: values["syft_action_data_str_"] = str(v) return values - def save(self) -> None: + def _save_to_blob_store(self) -> None: data = self.syft_action_data self._set_syft_action_data(data) - self.syft_action_data_cache = None + self.syft_action_data_cache = self.as_empty() @property def is_mock(self): @@ -633,6 +639,8 @@ def _syft_try_to_save_to_store(self, obj) -> None: # relative from ...client.api import APIRegistry + obj._save_to_blob_store() + action = Action( path="", op="", @@ -799,7 +807,7 @@ def send(self, client: SyftClient) -> Self: """Send the object to a Syft Client""" self.syft_node_location = client.id self.syft_client_verify_key = client.verify_key - self.save() + self._save_to_blob_store() res = client.api.services.action.set(self) return res @@ -1294,8 +1302,8 @@ def __setattr__(self, name: str, value: Any) -> Any: context_self = self.syft_action_data # type: ignore return context_self.__setattr__(name, value) - def keys(self) -> KeysView[str]: - return self.syft_action_data.keys() # type: ignore + # def keys(self) -> KeysView[str]: + # return self.syft_action_data.keys() # type: ignore ###### __DUNDER_MIFFLIN__ @@ -1471,7 +1479,7 @@ def __rrshift__(self, other: Any) -> Any: return self._syft_output_action_object(self.__rrshift__(other)) -@serializable(without=["syft_action_data_cache"]) +@serializable() class AnyActionObject(ActionObject): __canonical_name__ = "AnyActionObject" __version__ = SYFT_OBJECT_VERSION_1 diff --git a/packages/syft/src/syft/service/action/action_service.py b/packages/syft/src/syft/service/action/action_service.py index 92b364b35e1..c5256fa6c0c 100644 --- a/packages/syft/src/syft/service/action/action_service.py +++ b/packages/syft/src/syft/service/action/action_service.py @@ -13,6 +13,7 @@ # relative from ...serde.serializable import serializable +from ...types.datetime import DateTime from ...types.twin_object import TwinObject from ...types.uid import UID from ..code.user_code import UserCode @@ -55,7 +56,7 @@ def np_array(self, context: AuthedServiceContext, data: Any) -> Any: np_obj = NumpyArrayObject( dtype=data.dtype, shape=data.shape, syft_action_data_cache=data ) - np_obj.save() + np_obj._save_to_blob_store() np_pointer = self.set(context, np_obj) return np_pointer @@ -72,6 +73,13 @@ def set( ) -> Result[ActionObject, str]: """Save an object to the action store""" # 🟡 TODO 9: Create some kind of type checking / protocol for SyftSerializable + + if isinstance(action_object, ActionObject): + action_object.syft_created_at = DateTime.now() + else: + action_object.private_obj.syft_created_at = DateTime.now() + action_object.mock_obj.syft_created_at = DateTime.now() + result = self.store.set( uid=action_object.id, credentials=context.credentials, @@ -208,7 +216,7 @@ def _user_code_execute( except Exception as e: return Err(f"_user_code_execute failed. {e}") - result_action_object.save() + result_action_object._save_to_blob_store() set_result = self.store.set( uid=result_id, credentials=context.credentials, @@ -394,8 +402,10 @@ def execute( # relative from .plan import Plan + has_value_mutated = True if action.action_type == ActionType.CREATEOBJECT: result_action_object = Ok(action.create_object) + has_value_mutated = False elif action.action_type == ActionType.FUNCTION: result_action_object = self.call_function(context, action) else: @@ -441,7 +451,8 @@ def execute( has_result_read_permission = self.has_read_permission_for_action_result( context, action ) - result_action_object.save() + if has_value_mutated: + result_action_object._save_to_blob_store() set_result = self.store.set( uid=action.result_id, credentials=context.credentials, diff --git a/packages/syft/src/syft/service/action/numpy.py b/packages/syft/src/syft/service/action/numpy.py index 767e95b613e..1f3e706d9f3 100644 --- a/packages/syft/src/syft/service/action/numpy.py +++ b/packages/syft/src/syft/service/action/numpy.py @@ -39,7 +39,7 @@ def numpy_like_eq(left: Any, right: Any) -> bool: # 🔵 TODO 7: Map TPActionObjects and their 3rd Party types like numpy type to these # classes for bi-directional lookup. -@serializable(without=["syft_action_data_cache"]) +@serializable() class NumpyArrayObject(ActionObject, np.lib.mixins.NDArrayOperatorsMixin): __canonical_name__ = "NumpyArrayObject" __version__ = SYFT_OBJECT_VERSION_1 @@ -81,7 +81,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): ) -@serializable(without=["syft_action_data_cache"]) +@serializable() class NumpyScalarObject(ActionObject, np.lib.mixins.NDArrayOperatorsMixin): __canonical_name__ = "NumpyScalarObject" __version__ = SYFT_OBJECT_VERSION_1 @@ -94,7 +94,7 @@ def __float__(self) -> float: return float(self.syft_action_data) -@serializable(without=["syft_action_data_cache"]) +@serializable() class NumpyBoolObject(ActionObject, np.lib.mixins.NDArrayOperatorsMixin): __canonical_name__ = "NumpyBoolObject" __version__ = SYFT_OBJECT_VERSION_1 diff --git a/packages/syft/src/syft/service/action/pandas.py b/packages/syft/src/syft/service/action/pandas.py index c36f909561c..cac27b94f12 100644 --- a/packages/syft/src/syft/service/action/pandas.py +++ b/packages/syft/src/syft/service/action/pandas.py @@ -16,7 +16,7 @@ from .action_types import action_types -@serializable(without=["syft_action_data_cache"]) +@serializable() class PandasDataFrameObject(ActionObject): __canonical_name__ = "PandasDataframeObject" __version__ = SYFT_OBJECT_VERSION_1 @@ -42,7 +42,7 @@ def syft_is_property(self, obj: Any, method: str) -> bool: return super().syft_is_property(obj, method) -@serializable(without=["syft_action_data_cache"]) +@serializable() class PandasSeriesObject(ActionObject): __canonical_name__ = "PandasSeriesObject" __version__ = SYFT_OBJECT_VERSION_1 diff --git a/packages/syft/src/syft/types/twin_object.py b/packages/syft/src/syft/types/twin_object.py index b66583ff810..df5fd1c6484 100644 --- a/packages/syft/src/syft/types/twin_object.py +++ b/packages/syft/src/syft/types/twin_object.py @@ -72,12 +72,10 @@ def mock(self) -> ActionObject: mock.id = twin_id return mock - def save(self): + def _save_to_store(self): # Set node location and verify key + # Only save private data to blob store + # Mock data is still attached to Twin object. self.private_obj.syft_node_location = self.syft_node_location self.private_obj.syft_client_verify_key = self.syft_client_verify_key - self.mock_obj.syft_node_location = self.syft_node_location - self.mock_obj.syft_client_verify_key = self.syft_client_verify_key - - self.private_obj.save() - self.mock_obj.save() + self.private_obj._save_to_blob_store() From 4ec1b0a29f74322e3bc081c11b20d6b88d160418 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Sun, 20 Aug 2023 21:27:36 +0530 Subject: [PATCH 38/98] skip syft_action_data_cache check if value is ActionEmptyData --- .../syft/src/syft/service/action/action_object.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index e1de0d2fc3c..3e767b326ba 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -521,12 +521,14 @@ class Config: @pydantic.root_validator() def __check_action_data(cls, values: dict) -> dict: v = values.get("syft_action_data_cache") - values["syft_action_data_type"] = type(v) + if not isinstance(v, ActionDataEmpty): + values["syft_action_data_type"] = type(v) - values["syft_action_data_repr_"] = ( - v._repr_markdown_() if hasattr(v, "_repr_markdown_") else v.__repr__() - ) - values["syft_action_data_str_"] = str(v) + values["syft_action_data_repr_"] = ( + v._repr_markdown_() if hasattr(v, "_repr_markdown_") else v.__repr__() + ) + values["syft_action_data_str_"] = str(v) + values["syft_has_bool_attr"] = hasattr(v, "__bool__") return values def _save_to_blob_store(self) -> None: From 70486984cd1aa269d82cfc15cd196c77e9922915 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Mon, 21 Aug 2023 12:23:37 +0530 Subject: [PATCH 39/98] - Added a method to return ActionEmptyData object at ActionObject level - propogate repr, str, type and bool information for Action Objects Co-authored-by: Kien Dang Co-authored-by: Peter Chung --- .../src/syft/service/action/action_object.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 3e767b326ba..24373b9e2cb 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -421,6 +421,7 @@ def debox_args_and_kwargs(args: Any, kwargs: Any) -> Tuple[Any, Any]: "_set_syft_action_data", "syft_action_data", "__check_action_data", + "as_empty_data", ] @@ -534,7 +535,7 @@ def __check_action_data(cls, values: dict) -> dict: def _save_to_blob_store(self) -> None: data = self.syft_action_data self._set_syft_action_data(data) - self.syft_action_data_cache = self.as_empty() + self.syft_action_data_cache = self.as_empty_data() @property def is_mock(self): @@ -888,6 +889,9 @@ def remove_trace_hook(cls): return True # self._syft_pre_hooks__[HOOK_ALWAYS].pop(trace_action_side_effct, None) + def as_empty_data(self) -> ActionDataEmpty: + return ActionDataEmpty(syft_internal_type=self.syft_internal_type) + @staticmethod def empty( syft_internal_type: Type[Any] = NoneType, @@ -1080,7 +1084,16 @@ def _syft_attr_propagate_ids(self, context, name: str, result: Any) -> Any: result.syft_client_verify_key = context.syft_client_verify_key # Propogate Syft blob storage entry id - result.syft_blob_storage_entry_id = context.obj.syft_blob_storage_entry_id + object_attrs = [ + "syft_blob_storage_entry_id", + "syft_action_data_repr_", + "syft_action_data_str_", + "syft_action_data_type", + "syft_has_bool_attr", + ] + for attr_name in object_attrs: + attr_value = getattr(context.obj, attr_name, None) + setattr(result, attr_name, attr_value) # Propagate Result ID if context.result_id is not None: From 276b30ffc4373644e60ec25d548594fd6e4c5697 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Mon, 21 Aug 2023 13:05:43 +0530 Subject: [PATCH 40/98] - rename .save in twin object to _save_to_blob_store - replace action.save method with action.save in requests.py - pass syft client verify key and node location to from_obj constructor Co-authored-by: Kien Dang --- .../syft/src/syft/client/domain_client.py | 2 +- .../src/syft/service/action/action_object.py | 21 ++++++++++++++-- .../src/syft/service/action/action_service.py | 25 ++++--------------- .../syft/src/syft/service/request/request.py | 23 ++++++++++++----- packages/syft/src/syft/types/twin_object.py | 2 +- 5 files changed, 43 insertions(+), 30 deletions(-) diff --git a/packages/syft/src/syft/client/domain_client.py b/packages/syft/src/syft/client/domain_client.py index a309a40e840..ca3dda482a1 100644 --- a/packages/syft/src/syft/client/domain_client.py +++ b/packages/syft/src/syft/client/domain_client.py @@ -81,7 +81,7 @@ def upload_dataset(self, dataset: CreateDataset) -> Union[SyftSuccess, SyftError syft_node_location=self.id, syft_client_verify_key=self.verify_key, ) - twin.save() + twin._save_to_blob_store() except Exception as e: return SyftError(message=f"Failed to create twin. {e}") response = self.api.services.action.set(twin) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 24373b9e2cb..1c5d4fe4879 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -26,6 +26,7 @@ # relative from ...client.api import SyftAPI from ...client.client import SyftClient +from ...node.credentials import SyftVerifyKey from ...serde.serializable import serializable from ...serde.serialize import _serialize as serialize from ...service.response import SyftError @@ -300,7 +301,11 @@ def convert_to_pointers( if args is not None: for arg in args: if not isinstance(arg, ActionObject): - arg = ActionObject.from_obj(syft_action_data=arg) + arg = ActionObject.from_obj( + syft_action_data=arg, + syft_client_verify_key=api.syft_client_verify_key, + syft_node_location=api.syft_node_location, + ) arg.syft_node_uid = node_uid arg._save_to_blob_store() arg = api.services.action.set(arg) @@ -312,7 +317,11 @@ def convert_to_pointers( if kwargs is not None: for k, arg in kwargs.items(): if not isinstance(arg, ActionObject): - arg = ActionObject.from_obj(syft_action_data=arg) + arg = ActionObject.from_obj( + syft_action_data=arg, + syft_client_verify_key=api.syft_client_verify_key, + syft_node_location=api.syft_node_location, + ) arg.syft_node_uid = node_uid arg._save_to_blob_store() arg = api.services.action.set(arg) @@ -850,6 +859,8 @@ def from_obj( syft_action_data: Any, id: Optional[UID] = None, syft_lineage_id: Optional[LineageID] = None, + syft_client_verify_key: Optional[SyftVerifyKey] = None, + syft_node_location: Optional[UID] = None, ) -> ActionObject: """Create an ActionObject from an existing object. @@ -870,6 +881,12 @@ def from_obj( if id is not None: action_object.id = id + if syft_client_verify_key is not None: + action_object.syft_client_verify_key = syft_client_verify_key + + if syft_node_location is not None: + action_object.syft_node_location = syft_node_location + if syft_lineage_id is not None: action_object.id = syft_lineage_id.id action_object.syft_history_hash = syft_lineage_id.syft_history_hash diff --git a/packages/syft/src/syft/service/action/action_service.py b/packages/syft/src/syft/service/action/action_service.py index c5256fa6c0c..889bc8da8f0 100644 --- a/packages/syft/src/syft/service/action/action_service.py +++ b/packages/syft/src/syft/service/action/action_service.py @@ -92,23 +92,6 @@ def set( return Ok(action_object) return result.err() - @service_method(path="action.save", name="save") - def save( - self, - context: AuthedServiceContext, - action_object: Union[ActionObject, TwinObject], - ) -> Result[SyftSuccess, str]: - """Save an object to the action store""" - # 🟡 TODO 9: Create some kind of type checking / protocol for SyftSerializable - result = self.store.set( - uid=action_object.id, - credentials=context.credentials, - syft_object=action_object, - ) - if result.is_ok(): - return Ok(SyftSuccess(message=f"{type(action_object)} saved")) - return result.err() - @service_method(path="action.get", name="get", roles=GUEST_ROLE_LEVEL) def get( self, @@ -402,10 +385,12 @@ def execute( # relative from .plan import Plan - has_value_mutated = True + data_uploaded_to_blob_store = True if action.action_type == ActionType.CREATEOBJECT: result_action_object = Ok(action.create_object) - has_value_mutated = False + data_uploaded_to_blob_store = ( + action.create_object.blob_storage_entry_id is not None + ) elif action.action_type == ActionType.FUNCTION: result_action_object = self.call_function(context, action) else: @@ -451,7 +436,7 @@ def execute( has_result_read_permission = self.has_read_permission_for_action_result( context, action ) - if has_value_mutated: + if not data_uploaded_to_blob_store: result_action_object._save_to_blob_store() set_result = self.store.set( uid=action.result_id, diff --git a/packages/syft/src/syft/service/request/request.py b/packages/syft/src/syft/service/request/request.py index 949087d05d9..b93852de0fd 100644 --- a/packages/syft/src/syft/service/request/request.py +++ b/packages/syft/src/syft/service/request/request.py @@ -443,15 +443,26 @@ def accept_by_depositing_result(self, result: Any, force: bool = False): message="Already approved, if you want to force updating the result use force=True" ) action_obj_id = state.output_history[0].outputs[0] - action_object = ActionObject.from_obj(result, id=action_obj_id) - result = api.services.action.save(action_object) - if not result: + action_object = ActionObject.from_obj( + result, + id=action_obj_id, + syft_client_verify_key=api.syft_client_verify_key, + syft_node_location=api.syft_node_location, + ) + action_object._save_to_blob_store() + result = api.services.action.set(action_object) + if isinstance(result, SyftError): return result return SyftSuccess(message="Request submitted for updating result.") else: - action_object = ActionObject.from_obj(result) - result = api.services.action.save(action_object) - if not result: + action_object = ActionObject.from_obj( + result, + syft_client_verify_key=api.syft_client_verify_key, + syft_node_location=api.syft_node_location, + ) + action_object._save_to_blob_store() + result = api.services.action.set(action_object) + if isinstance(result, SyftError): return result ctx = AuthedServiceContext(credentials=api.signing_key.verify_key) diff --git a/packages/syft/src/syft/types/twin_object.py b/packages/syft/src/syft/types/twin_object.py index df5fd1c6484..0e07180199b 100644 --- a/packages/syft/src/syft/types/twin_object.py +++ b/packages/syft/src/syft/types/twin_object.py @@ -72,7 +72,7 @@ def mock(self) -> ActionObject: mock.id = twin_id return mock - def _save_to_store(self): + def _save_to_blob_store(self): # Set node location and verify key # Only save private data to blob store # Mock data is still attached to Twin object. From 0f1efdf334d65753c7007f91c0aa34f399d5b3cb Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Mon, 21 Aug 2023 14:03:29 +0530 Subject: [PATCH 41/98] handle action object repr in _repr_debug --- packages/syft/src/syft/types/syft_object.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/syft/src/syft/types/syft_object.py b/packages/syft/src/syft/types/syft_object.py index 45f814e7f91..68d2758fffa 100644 --- a/packages/syft/src/syft/types/syft_object.py +++ b/packages/syft/src/syft/types/syft_object.py @@ -250,6 +250,8 @@ def _repr_debug_(self) -> str: value = getattr(self, attr, "") value_type = full_name_with_qualname(type(attr)) value_type = value_type.replace("builtins.", "") + if hasattr(value, "syft_action_data_str_"): + value = value.syft_action_data_str_ value = f'"{value}"' if isinstance(value, str) else value _repr_str += f" {attr}: {value_type} = {value}\n" return _repr_str From 98ec4a3bf31ce9169facd0e6ec92af05b52454b0 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Mon, 21 Aug 2023 16:23:36 +0530 Subject: [PATCH 42/98] add a notebook to demo dataset --- .../Experimental/Shubham/00-load-data.ipynb | 755 ++++++++++++++++++ 1 file changed, 755 insertions(+) create mode 100644 notebooks/Experimental/Shubham/00-load-data.ipynb diff --git a/notebooks/Experimental/Shubham/00-load-data.ipynb b/notebooks/Experimental/Shubham/00-load-data.ipynb new file mode 100644 index 00000000000..79e34654aea --- /dev/null +++ b/notebooks/Experimental/Shubham/00-load-data.ipynb @@ -0,0 +1,755 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Loading data into Syft Domain Server as a Data Owner\n", + "\n", + "Welcome to Syft! This tutorial consists of 4 Jupyter notebooks that covers the basics of Syft which includes\n", + "* [Uploading a private dataset as a Data Owner](./00-load-data.ipynb)\n", + "* [Submitting code to run analysis on the private dataset as a Data Scientist](./01-submit-code.ipynb)\n", + "* [Reviewing and approving the code as a Data Owner](02-review-code-and-approve.ipynb)\n", + "* [Downloading/Retrieving the results of the code execution as a Data Scientist](03-data-scientist-download-result.ipynb)\n", + "\n", + "In Syft, a **Data Owner** provides datasets which they would like to make available for study by an outside party they may or may not fully trust has good intentions. Meanwhile, **Data Scientists** are end users who desire to perform computations or answer a specific question using one or more Data Owners' datasets." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install Syft & Import packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "SYFT_VERSION = \">=0.8.2.b0,<0.9\"\n", + "package_string = f'\"syft{SYFT_VERSION}\"'\n", + "# %pip install {package_string} -f https://whls.blob.core.windows.net/unstable/index.html -q" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import syft as sy\n", + "sy.requires(SYFT_VERSION)\n", + "from syft import autocache\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Launch a Syft Domain Server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# # Launch a fresh domain server named \"test-domain-1\" in dev mode on the local machine\n", + "# node = sy.orchestra.launch(name=\"test-domain-1\", port=\"auto\", dev_mode=True, reset=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# log into the node with default root credentials\n", + "domain_client = sy.login(url=\"http://localhost:8081\", email=\"info@openmined.org\", password=\"changethis\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# List the available API\n", + "domain_client.api" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data Subjects\n", + "\n", + "Think of Data Subjects as individuals/organizations/institutions owning a dataset that you can pool together privately in Syft.\n", + "\n", + "For this notebook, we'll create a sample dataset that includes trade information of various commodities for different countries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Check for existing Data Subjects\n", + "data_subjects = domain_client.data_subject_registry.get_all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "data_subjects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "assert len(data_subjects) == 0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add Data subjects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "country = sy.DataSubject(name=\"Country\", aliases=[\"country_code\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "canada = sy.DataSubject(name=\"Canada\", aliases=[\"country_code:ca\"])\n", + "germany = sy.DataSubject(name=\"Germany\", aliases=[\"country_code:de\"])\n", + "spain = sy.DataSubject(name=\"Spain\", aliases=[\"country_code:es\"])\n", + "france = sy.DataSubject(name=\"France\", aliases=[\"country_code:fr\"])\n", + "japan = sy.DataSubject(name=\"Japan\", aliases=[\"country_code:jp\"])\n", + "uk = sy.DataSubject(name=\"United Kingdom\", aliases=[\"country_code:uk\"])\n", + "usa = sy.DataSubject(name=\"United States of America\", aliases=[\"country_code:us\"])\n", + "australia = sy.DataSubject(name=\"Australia\", aliases=[\"country_code:au\"])\n", + "india = sy.DataSubject(name=\"India\", aliases=[\"country_code:in\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "country.add_member(canada)\n", + "country.add_member(germany)\n", + "country.add_member(spain)\n", + "country.add_member(france)\n", + "country.add_member(japan)\n", + "country.add_member(uk)\n", + "country.add_member(usa)\n", + "country.add_member(australia)\n", + "country.add_member(india)\n", + "\n", + "country.members" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Adds the data subject and all its members to the registry\n", + "response = domain_client.data_subject_registry.add_data_subject(country)\n", + "response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "assert response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Lets look at the data subjects added to the data\n", + "data_subjects = domain_client.data_subject_registry.get_all()\n", + "data_subjects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "assert len(data_subjects) == 10" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare the dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For simplicity, we'll be working with Canada's trade dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "canada_dataset_url = \"https://github.com/OpenMined/datasets/blob/main/trade_flow/ca%20-%20feb%202021.csv?raw=True\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "df = pd.read_csv(autocache(canada_dataset_url))\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In Syft, every dataset has two variants - **Mock** and **Private**.\n", + "\n", + "* **Mock** dataset is a mock/dummy version of the private data that can be accessed & read by the data scientists.\n", + "* **Private** dataset is the actual data that will never be accessed by the data scientist.\n", + "\n", + "To keep things simple, we sample different data points as Mock & Private. But in reality you would want to generate a random dataset for the Mock variant. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# private data samples\n", + "ca_data = df[0:10]\n", + "ca_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Mock data samples\n", + "mock_ca_data = df[10:20]\n", + "mock_ca_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a Syft Dataset\n", + "\n", + "In Syft, `Dataset` is a collection of Assets. For example, `Dataset` can be a \"Lung Cancer Dataset\", and `Assets` will be train, test & validation splits for this dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "dataset = sy.Dataset(name=\"Canada Trade Value 1\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "dataset.set_description(\"Canada Trade Data\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "dataset.add_citation(\"Person, place or thing\")\n", + "dataset.add_url(\"https://github.com/OpenMined/datasets/tree/main/trade_flow\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "dataset.add_contributor(name=\"Andrew Trask\", \n", + " email=\"andrew@openmined.org\",\n", + " note=\"Andrew runs this domain and prepared the dataset metadata.\")\n", + "\n", + "dataset.add_contributor(name=\"Madhava Jay\", \n", + " email=\"madhava@openmined.org\",\n", + " note=\"Madhava tweaked the description to add the URL because Andrew forgot.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "dataset.contributors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "assert len(dataset.contributors) == 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add Assets to the Syft Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "ctf = sy.Asset(name=\"canada_trade_flow\")\n", + "ctf.set_description(\"Canada trade flow represents export & import of different commodities to other countries\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "ctf.add_contributor(name=\"Andrew Trask\", \n", + " email=\"andrew@openmined.org\",\n", + " note=\"Andrew runs this domain and prepared the asset.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# This is where we add the private data (pandas df/numpy array) to the `Asset`\n", + "ctf.set_obj(ca_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# We must set the shape of this private data\n", + "ctf.set_shape(ca_data.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# We assign the data subject for whom this data belongs to, in this\n", + "ctf.add_data_subject(canada)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Optionally, if we don't want to add any Mock dataset\n", + "ctf.no_mock()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# We must add this Asset to our Dataset\n", + "dataset.add_asset(ctf)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# In case we want to remove a dataset & its associated assets\n", + "dataset.remove_asset(name=ctf.name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Let's assign the Mock data to the Asset by calling `set_mock` method\n", + "ctf.set_mock(mock_ca_data, mock_is_real=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Let's add our Asset back into our \"Canada Trade Value\" Dataset\n", + "dataset.add_asset(ctf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Upload Syft Dataset to Domain Server" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "domain_client.upload_dataset(dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We can list all the datasets on the Domain Server by invoking the following\n", + "datasets = domain_client.datasets.get_all()\n", + "datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "assert len(datasets) == 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reading the Syft Dataset from Domain Server\n", + "\n", + "Following the logical hierarchy of `Dataset`, `Asset`, and its variant, we can read the data as follows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "domain_client.datasets[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Reading the mock dataset\n", + "mock = domain_client.datasets[0].assets[0].mock" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mock" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "assert mock_ca_data.equals(mock)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "action_id = domain_client.datasets[0].assets[0].action_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "private_data = domain_client.api.services.action.get(action_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "private_data.syft_action_data_cache" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "private_data.syft_action_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Reading the real dataset\n", + "# NOTE: Private data can be accessed by the Data Owners, but NOT the Data Scientists\n", + "real = domain_client.datasets[2].assets[0].data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "assert ca_data.equals(real)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a new Data Scientist account on the Domain Server\n", + "\n", + "Signup is disabled by default.\n", + "An Admin/DO can enable it by `domain_client.settings.allow_guest_signup(enable=True)`\n", + "\n", + "Refer to notebook [07-domain-register-control-flow](./07-domain-register-control-flow.ipynb) for more information." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "domain_client.register(name=\"Jane Doe\", email=\"jane@caltech.edu\", password=\"abc123\", password_verify=\"abc123\", institution=\"Caltech\", website=\"https://www.caltech.edu/\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Cleanup local domain server\n", + "if node.node_type.value == \"python\":\n", + " node.land()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 9012a795e7b48cd104608b97e3724403a56f0ef0 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Mon, 21 Aug 2023 17:48:17 +0530 Subject: [PATCH 43/98] add missing syft node location and client verify key during Action Object creation --- .../syft/src/syft/service/action/action_object.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 1c5d4fe4879..5c66ea4ccb8 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -309,9 +309,6 @@ def convert_to_pointers( arg.syft_node_uid = node_uid arg._save_to_blob_store() arg = api.services.action.set(arg) - # arg = action_obj.send( - # client - # ) # make sure this doesn't break things later on in send_method_action arg_list.append(arg) if kwargs is not None: @@ -325,7 +322,6 @@ def convert_to_pointers( arg.syft_node_uid = node_uid arg._save_to_blob_store() arg = api.services.action.set(arg) - # arg = action_obj.send(client) kwarg_dict[k] = arg @@ -651,6 +647,9 @@ def _syft_try_to_save_to_store(self, obj) -> None: # relative from ...client.api import APIRegistry + if obj.syft_node_location is None: + obj.syft_node_location = obj.syft_node_uid + obj._save_to_blob_store() action = Action( @@ -691,7 +690,13 @@ def _syft_prepare_obj_uid(self, obj) -> LineageID: # We got a raw object. We need to create the ActionObject from scratch and save it in the store. obj_id = Action.make_id(None) lin_obj_id = Action.make_result_id(obj_id) - act_obj = ActionObject.from_obj(obj, id=obj_id, syft_lineage_id=lin_obj_id) + act_obj = ActionObject.from_obj( + obj, + id=obj_id, + syft_lineage_id=lin_obj_id, + syft_client_verify_key=self.syft_client_verify_key, + syft_node_location=self.syft_node_location, + ) self._syft_try_to_save_to_store(act_obj) From a9993bb305e5e356558a2dc756186c6f44fd9314 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Tue, 22 Aug 2023 11:18:04 +0530 Subject: [PATCH 44/98] - change blob service write, read APIs role access to GUEST_ROLE - handle obj not exists in blob storage service --- .../src/syft/service/action/action_object.py | 2 ++ .../src/syft/service/blob_storage/service.py | 35 ++++++++++++++++--- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 5c66ea4ccb8..1d6ff97ad6a 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -473,6 +473,8 @@ def syft_action_data(self) -> Any: blob_retrieval_object = blob_storage_read_method( uid=self.syft_blob_storage_entry_id ) + if isinstance(blob_retrieval_object, SyftError): + return blob_retrieval_object self.syft_action_data_cache = blob_retrieval_object.read() return self.syft_action_data_cache diff --git a/packages/syft/src/syft/service/blob_storage/service.py b/packages/syft/src/syft/service/blob_storage/service.py index f5f19f1d84e..ad2cd7e2ba5 100644 --- a/packages/syft/src/syft/service/blob_storage/service.py +++ b/packages/syft/src/syft/service/blob_storage/service.py @@ -20,6 +20,7 @@ from ..service import AbstractService from ..service import TYPE_TO_SERVICE from ..service import service_method +from ..user.user_roles import GUEST_ROLE_LEVEL from .stash import BlobStorageStash BlobDepositType = Union[OnDiskBlobDeposit, SeaweedFSBlobDeposit] @@ -62,17 +63,29 @@ def get_blob_storage_metadata_by_uid( return blob_storage_entry.to(BlobStorageMetadata) return SyftError(message=result.err()) - @service_method(path="blob_storage.read", name="read") + @service_method( + path="blob_storage.read", + name="read", + roles=GUEST_ROLE_LEVEL, + ) def read( self, context: AuthedServiceContext, uid: UID ) -> Union[BlobRetrieval, SyftError]: result = self.stash.get_by_uid(context.credentials, uid=uid) if result.is_ok(): + obj = result.ok() + if obj is None: + return SyftError(message=f"No blob storage entry exists for uid: {uid}") + with context.node.blob_storage_client.connect() as conn: - return conn.read(result.ok().location) + return conn.read(obj.location) return SyftError(message=result.err()) - @service_method(path="blob_storage.allocate", name="allocate") + @service_method( + path="blob_storage.allocate", + name="allocate", + roles=GUEST_ROLE_LEVEL, + ) def allocate( self, context: AuthedServiceContext, obj: CreateBlobStorageEntry ) -> Union[BlobDepositType, SyftError]: @@ -94,7 +107,11 @@ def allocate( return SyftError(message=f"{result.err()}") return blob_deposit - @service_method(path="blob_storage.write_to_disk", name="write_to_disk") + @service_method( + path="blob_storage.write_to_disk", + name="write_to_disk", + roles=GUEST_ROLE_LEVEL, + ) def write_to_disk( self, context: AuthedServiceContext, uid: UID, data: bytes ) -> Union[SyftSuccess, SyftError]: @@ -119,6 +136,7 @@ def write_to_disk( @service_method( path="blob_storage.mark_write_complete", name="mark_write_complete", + roles=GUEST_ROLE_LEVEL, ) def mark_write_complete( self, @@ -135,6 +153,9 @@ def mark_write_complete( obj: Optional[BlobStorageEntry] = result.ok() + if obj is None: + return SyftError(message=f"No blob storage entry exists for uid: {uid}") + with context.node.blob_storage_client.connect() as conn: result = conn.complete_multipart_upload(obj, etags) @@ -146,9 +167,13 @@ def delete( ) -> Union[SyftSuccess, SyftError]: result = self.stash.get_by_uid(context.credentials, uid=uid) if result.is_ok(): + obj = result.ok() + + if obj is None: + return SyftError(message=f"No blob storage entry exists for uid: {uid}") try: with context.node.blob_storage_client.connect() as conn: - file_unlinked_result = conn.delete(result.ok().location) + file_unlinked_result = conn.delete(obj.location) except Exception as e: return SyftError(message=f"Failed to delete file: {e}") From 733183a496a1316037bf97bd4f1830c07bcf385d Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Tue, 22 Aug 2023 16:47:52 +0800 Subject: [PATCH 45/98] Create blob storage entry from file Co-authored-by: Shubham Gupta --- .../syft/service/action/action_data_empty.py | 26 ++++++++++ .../src/syft/service/action/action_object.py | 50 ++++++++++++++++++- .../src/syft/store/blob_storage/on_disk.py | 5 +- .../src/syft/store/blob_storage/seaweedfs.py | 10 ++-- packages/syft/src/syft/types/blob_storage.py | 10 +++- 5 files changed, 91 insertions(+), 10 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_data_empty.py b/packages/syft/src/syft/service/action/action_data_empty.py index f45a784c396..6eb6e44a2a0 100644 --- a/packages/syft/src/syft/service/action/action_data_empty.py +++ b/packages/syft/src/syft/service/action/action_data_empty.py @@ -2,8 +2,13 @@ from __future__ import annotations # stdlib +from pathlib import Path from typing import Optional from typing import Type +from typing import Union + +# third party +import pydantic # relative from ...serde.serializable import serializable @@ -25,3 +30,24 @@ def __repr__(self) -> str: def __str__(self) -> str: return f"{type(self).__name__} UID: {self.id} <{self.syft_internal_type}>" + + +@serializable() +class ActionFileData(SyftObject): + __canonical_name__ = "ActionFileData" + __version__ = SYFT_OBJECT_VERSION_1 + + filepath: Path + + @pydantic.validator("filepath", pre=True) + def __validate_file_path(cls, v: Union[str, Path]) -> Path: + if isinstance(v, str): + v = Path(v) + + if v.exists() and v.is_file(): + return v + + raise ValueError(f"Not a valid path to file. {v}") + + def as_buffer(self): + return open(self.filepath, "rb") diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 1d6ff97ad6a..8cedbc02c8b 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -4,6 +4,7 @@ # stdlib from enum import Enum import inspect +from io import BytesIO import traceback import types from typing import Any @@ -42,6 +43,7 @@ from ..response import SyftException from ..service import from_api_or_context from .action_data_empty import ActionDataEmpty +from .action_data_empty import ActionFileData from .action_permissions import ActionPermission from .action_types import action_type_for_object from .action_types import action_type_for_type @@ -481,7 +483,10 @@ def syft_action_data(self) -> Any: def _set_syft_action_data(self, data: Any) -> None: if not isinstance(data, ActionDataEmpty): - storage_entry = CreateBlobStorageEntry.from_obj(data) + if isinstance(data, ActionFileData): + storage_entry = CreateBlobStorageEntry.from_path(data.filepath) + else: + storage_entry = CreateBlobStorageEntry.from_obj(data) allocate_method = from_api_or_context( func_or_path="blob_storage.allocate", @@ -490,7 +495,13 @@ def _set_syft_action_data(self, data: Any) -> None: ) if allocate_method is not None: blob_deposit_object = allocate_method(storage_entry) - blob_deposit_object.write(serialize(data, to_bytes=True)) + + if isinstance(data, ActionFileData): + buffer = data.as_buffer() + else: + buffer = BytesIO(serialize(data, to_bytes=True)) + + blob_deposit_object.write(buffer) self.syft_blob_storage_entry_id = ( blob_deposit_object.blob_storage_entry_id ) @@ -861,6 +872,41 @@ def as_empty(self): id = id.id return ActionObject.empty(self.syft_internal_type, id, self.syft_lineage_id) + @staticmethod + def from_file( + filepath: str, + id: Optional[UID] = None, + syft_lineage_id: Optional[LineageID] = None, + syft_client_verify_key: Optional[SyftVerifyKey] = None, + syft_node_location: Optional[UID] = None, + ): + """Create an Action Object from a file.""" + + if id is not None and syft_lineage_id is not None and id != syft_lineage_id.id: + raise ValueError("UID and LineageID should match") + + syft_action_data = ActionFileData(filepath=filepath) + action_type = action_type_for_object(syft_action_data) + + action_object = action_type(syft_action_data_cache=syft_action_data) + + if id is not None: + action_object.id = id + + if syft_client_verify_key is not None: + action_object.syft_client_verify_key = syft_client_verify_key + + if syft_node_location is not None: + action_object.syft_node_location = syft_node_location + + if syft_lineage_id is not None: + action_object.id = syft_lineage_id.id + action_object.syft_history_hash = syft_lineage_id.syft_history_hash + elif id is not None: + action_object.syft_history_hash = hash(id) + + return action_object + @staticmethod def from_obj( syft_action_data: Any, diff --git a/packages/syft/src/syft/store/blob_storage/on_disk.py b/packages/syft/src/syft/store/blob_storage/on_disk.py index 45dd38e8df7..0106a611c39 100644 --- a/packages/syft/src/syft/store/blob_storage/on_disk.py +++ b/packages/syft/src/syft/store/blob_storage/on_disk.py @@ -1,4 +1,5 @@ # stdlib +from io import BytesIO from pathlib import Path from tempfile import gettempdir from typing import Any @@ -30,7 +31,7 @@ class OnDiskBlobDeposit(BlobDeposit): __canonical_name__ = "OnDiskBlobDeposit" __version__ = SYFT_OBJECT_VERSION_1 - def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: + def write(self, data: BytesIO) -> Union[SyftSuccess, SyftError]: # relative from ...service.service import from_api_or_context @@ -39,7 +40,7 @@ def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: syft_node_location=self.syft_node_location, syft_client_verify_key=self.syft_client_verify_key, ) - return write_to_disk_method(data=data, uid=self.blob_storage_entry_id) + return write_to_disk_method(data=data.read(), uid=self.blob_storage_entry_id) class OnDiskBlobStorageConnection(BlobStorageConnection): diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index e82c0252747..9d3e7304a5c 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -55,7 +55,7 @@ class SeaweedFSBlobDeposit(BlobDeposit): urls: List[GridURL] - def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: + def write(self, data: BytesIO) -> Union[SyftSuccess, SyftError]: # relative from ...client.api import APIRegistry @@ -68,7 +68,7 @@ def write(self, data: bytes) -> Union[SyftSuccess, SyftError]: try: for part_no, (byte_chunk, url) in enumerate( - zip(_byte_chunks(BytesIO(data), DEFAULT_CHUNK_SIZE), self.urls), + zip(_byte_chunks(data, DEFAULT_CHUNK_SIZE), self.urls), start=1, ): if api is not None: @@ -156,13 +156,13 @@ def read(self, fp: SecureFilePathLocation) -> BlobRetrieval: def allocate(self, obj: CreateBlobStorageEntry) -> SecureFilePathLocation: try: - obj_id = str(obj.id) + file_name = obj.file_name result = self.client.create_multipart_upload( Bucket=self.bucket_name, - Key=obj_id, + Key=file_name, ) upload_id = result["UploadId"] - return SeaweedSecureFilePathLocation(upload_id=upload_id, path=obj_id) + return SeaweedSecureFilePathLocation(upload_id=upload_id, path=file_name) except BotoClientError as e: raise SyftException(e) diff --git a/packages/syft/src/syft/types/blob_storage.py b/packages/syft/src/syft/types/blob_storage.py index 09cc44d79bc..73e5a39a77f 100644 --- a/packages/syft/src/syft/types/blob_storage.py +++ b/packages/syft/src/syft/types/blob_storage.py @@ -2,6 +2,7 @@ import mimetypes from pathlib import Path import sys +from typing import List from typing import Optional from typing import Type from typing import Union @@ -72,6 +73,7 @@ class CreateBlobStorageEntry(SyftObject): type_: Optional[Type] mimetype: str = "bytes" file_size: int + extensions: List[str] @classmethod def from_obj(cls, obj: SyftObject) -> Self: @@ -96,7 +98,13 @@ def from_path(cls, fp: Union[str, Path], mimetype: Optional[str] = None) -> Self "Please specify mimetype manually `from_path(..., mimetype = ...)`." ) - return cls(mimetype=mimetype, file_size=path.stat().st_size) + return cls( + mimetype=mimetype, file_size=path.stat().st_size, extensions=path.suffixes + ) + + @property + def file_name(self) -> str: + return str(self.id) + "".join(self.extensions) @transform(BlobStorageEntry, BlobStorageMetadata) From 54e766f6a9eded5d984d0181f13c87209bd372e3 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Tue, 22 Aug 2023 21:26:50 +0530 Subject: [PATCH 46/98] - implement a classmethod to return node context registry via userverifykey - add a method to BaseSyftObject to set node uid and syft client verify key - fix passing of client verify key and node uid via SyftAPI object - set syft verify key and node location on Action objs at service level - add blob storage permission change to ActionStoreChange - add method for adding and removing permission at BaseStash - add abstract methods for adding and removing permission to StorePartition --- packages/syft/src/syft/node/node.py | 13 +++-- .../src/syft/service/action/action_object.py | 20 +++++--- .../src/syft/service/action/action_service.py | 39 ++++++++++++-- .../syft/src/syft/service/request/request.py | 51 ++++++++++++++----- packages/syft/src/syft/service/service.py | 27 ++++++---- .../syft/src/syft/store/document_store.py | 21 ++++++-- .../src/syft/store/mongo_document_store.py | 17 ++++++- packages/syft/src/syft/types/blob_storage.py | 2 +- packages/syft/src/syft/types/syft_object.py | 4 ++ packages/syft/src/syft/types/twin_object.py | 13 +++-- 10 files changed, 159 insertions(+), 48 deletions(-) diff --git a/packages/syft/src/syft/node/node.py b/packages/syft/src/syft/node/node.py index c78ef47b2ed..a663c642118 100644 --- a/packages/syft/src/syft/node/node.py +++ b/packages/syft/src/syft/node/node.py @@ -203,10 +203,13 @@ def set_node_context( cls.__node_context_registry__[key] = context - @classmethod - def get_auth_context(cls) -> Optional[AuthedServiceContext]: - if len(cls.__node_context_registry__) > 0: - return list(cls.__node_context_registry__.values())[0] + def auth_context_for_user( + cls, + node_uid: UID, + user_verify_key: SyftVerifyKey, + ) -> Optional[AuthedServiceContext]: + key = (node_uid, user_verify_key) + return cls.__node_context_registry__.get(key) @instrument @@ -754,7 +757,7 @@ def handle_api_call_with_unsigned_result( context = AuthedServiceContext( node=self, credentials=credentials, role=role ) - AuthNodeContextRegistry.set_node_context(self.id, context, self.verify_key) + AuthNodeContextRegistry.set_node_context(self.id, context, credentials) user_config_registry = UserServiceConfigRegistry.from_role(role) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 8cedbc02c8b..51f0689c4f3 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -305,8 +305,8 @@ def convert_to_pointers( if not isinstance(arg, ActionObject): arg = ActionObject.from_obj( syft_action_data=arg, - syft_client_verify_key=api.syft_client_verify_key, - syft_node_location=api.syft_node_location, + syft_client_verify_key=api.signing_key.verify_key, + syft_node_location=api.node_uid, ) arg.syft_node_uid = node_uid arg._save_to_blob_store() @@ -318,8 +318,8 @@ def convert_to_pointers( if not isinstance(arg, ActionObject): arg = ActionObject.from_obj( syft_action_data=arg, - syft_client_verify_key=api.syft_client_verify_key, - syft_node_location=api.syft_node_location, + syft_client_verify_key=api.signing_key.verify_key, + syft_node_location=api.node_uid, ) arg.syft_node_uid = node_uid arg._save_to_blob_store() @@ -429,6 +429,7 @@ def debox_args_and_kwargs(args: Any, kwargs: Any) -> Tuple[Any, Any]: "syft_action_data", "__check_action_data", "as_empty_data", + "_set_obj_location_", ] @@ -496,12 +497,17 @@ def _set_syft_action_data(self, data: Any) -> None: if allocate_method is not None: blob_deposit_object = allocate_method(storage_entry) + if isinstance(blob_deposit_object, SyftError): + return blob_deposit_object + if isinstance(data, ActionFileData): buffer = data.as_buffer() else: buffer = BytesIO(serialize(data, to_bytes=True)) - blob_deposit_object.write(buffer) + result = blob_deposit_object.write(buffer) + if isinstance(result, SyftError): + return result self.syft_blob_storage_entry_id = ( blob_deposit_object.blob_storage_entry_id ) @@ -552,7 +558,9 @@ def __check_action_data(cls, values: dict) -> dict: def _save_to_blob_store(self) -> None: data = self.syft_action_data - self._set_syft_action_data(data) + result = self._set_syft_action_data(data) + if isinstance(result, SyftError): + return result self.syft_action_data_cache = self.as_empty_data() @property diff --git a/packages/syft/src/syft/service/action/action_service.py b/packages/syft/src/syft/service/action/action_service.py index 889bc8da8f0..e8f4bc1d189 100644 --- a/packages/syft/src/syft/service/action/action_service.py +++ b/packages/syft/src/syft/service/action/action_service.py @@ -54,9 +54,16 @@ def np_array(self, context: AuthedServiceContext, data: Any) -> Any: if not isinstance(data, np.ndarray): data = np.array(data) np_obj = NumpyArrayObject( - dtype=data.dtype, shape=data.shape, syft_action_data_cache=data + dtype=data.dtype, + shape=data.shape, + syft_action_data_cache=data, + syft_node_location=context.node.id, + syft_client_verify_key=context.credentials, ) - np_obj._save_to_blob_store() + blob_store_result = np_obj._save_to_blob_store() + if isinstance(blob_store_result, SyftError): + return blob_store_result + np_pointer = self.set(context, np_obj) return np_pointer @@ -114,7 +121,11 @@ def _get( uid=uid, credentials=context.credentials, has_permission=has_permission ) if result.is_ok(): - obj = result.ok() + obj: Union[TwinObject, ActionObject] = result.ok() + obj._set_obj_location_( + context.node.id, + context.credentials, + ) if isinstance(obj, TwinObject): if twin_mode == TwinMode.PRIVATE: obj = obj.private @@ -140,6 +151,11 @@ def get_pointer( uid=uid, credentials=context.credentials, node_uid=context.node.id ) if result.is_ok(): + obj = result.ok() + obj._set_obj_location_( + context.node.id, + context.credentials, + ) return Ok(result.ok()) return Err(result.err()) @@ -199,7 +215,14 @@ def _user_code_execute( except Exception as e: return Err(f"_user_code_execute failed. {e}") - result_action_object._save_to_blob_store() + result_action_object._set_obj_location_( + context.node.id, + context.credentials, + ) + blob_store_result = result_action_object._save_to_blob_store() + if isinstance(blob_store_result, SyftError): + return blob_store_result + set_result = self.store.set( uid=result_id, credentials=context.credentials, @@ -437,7 +460,13 @@ def execute( context, action ) if not data_uploaded_to_blob_store: - result_action_object._save_to_blob_store() + result_action_object._set_obj_location_( + context.node.id, + context.credentials, + ) + blob_store_result = result_action_object._save_to_blob_store() + if isinstance(blob_store_result, SyftError): + return blob_store_result set_result = self.store.set( uid=action.result_id, credentials=context.credentials, diff --git a/packages/syft/src/syft/service/request/request.py b/packages/syft/src/syft/service/request/request.py index d6e046b1361..1385c7dc9c8 100644 --- a/packages/syft/src/syft/service/request/request.py +++ b/packages/syft/src/syft/service/request/request.py @@ -41,6 +41,7 @@ from ..action.action_service import ActionService from ..action.action_store import ActionObjectPermission from ..action.action_store import ActionPermission +from ..blob_storage.service import BlobStorageService from ..code.user_code import UserCode from ..code.user_code import UserCodeStatus from ..context import AuthedServiceContext @@ -97,29 +98,51 @@ def _run( self, context: ChangeContext, apply: bool ) -> Result[SyftSuccess, SyftError]: try: - action_service = context.node.get_service(ActionService) + action_service: ActionService = context.node.get_service(ActionService) + blob_storage_service = context.node.get_service(BlobStorageService) action_store = action_service.store # can we ever have a lineage ID in the store? obj_uid = self.linked_obj.object_uid obj_uid = obj_uid.id if isinstance(obj_uid, LineageID) else obj_uid + action_obj = action_store.get( + uid=obj_uid, + credentials=context.approving_user_credentials, + ) + + if action_obj.is_err(): + return Err(SyftError(message=f"{action_obj.err()}")) + + action_obj = action_obj.ok() + owner_permission = ActionObjectPermission( uid=obj_uid, credentials=context.approving_user_credentials, permission=self.apply_permission_type, ) if action_store.has_permission(permission=owner_permission): - requesting_permission = ActionObjectPermission( - uid=obj_uid, + requesting_permission_action_obj = ActionObjectPermission( + uid=action_obj.id, + credentials=context.requesting_user_credentials, + permission=self.apply_permission_type, + ) + requesting_permission_blob_obj = ActionObjectPermission( + uid=action_obj.syft_blob_storage_entry_id, credentials=context.requesting_user_credentials, permission=self.apply_permission_type, ) if apply: - action_store.add_permission(requesting_permission) + action_store.add_permission(requesting_permission_action_obj) + blob_storage_service.stash.add_permission( + requesting_permission_blob_obj + ) else: - if action_store.has_permission(requesting_permission): - action_store.remove_permission(requesting_permission) + if action_store.has_permission(requesting_permission_action_obj): + action_store.remove_permission(requesting_permission_action_obj) + blob_storage_service.stash.remove_permission( + requesting_permission_blob_obj + ) else: return Err( SyftError( @@ -445,10 +468,12 @@ def accept_by_depositing_result(self, result: Any, force: bool = False): action_object = ActionObject.from_obj( result, id=action_obj_id, - syft_client_verify_key=api.syft_client_verify_key, - syft_node_location=api.syft_node_location, + syft_client_verify_key=api.signing_key.verify_key, + syft_node_location=api.node_uid, ) - action_object._save_to_blob_store() + blob_store_result = action_object._save_to_blob_store() + if isinstance(blob_store_result, SyftError): + return blob_store_result result = api.services.action.set(action_object) if isinstance(result, SyftError): return result @@ -456,10 +481,12 @@ def accept_by_depositing_result(self, result: Any, force: bool = False): else: action_object = ActionObject.from_obj( result, - syft_client_verify_key=api.syft_client_verify_key, - syft_node_location=api.syft_node_location, + syft_client_verify_key=api.signing_key.verify_key, + syft_node_location=api.node_uid, ) - action_object._save_to_blob_store() + blob_store_result = action_object._save_to_blob_store() + if isinstance(result, SyftError): + return result result = api.services.action.set(action_object) if isinstance(result, SyftError): return result diff --git a/packages/syft/src/syft/service/service.py b/packages/syft/src/syft/service/service.py index cc00de0156d..974fe2d02b6 100644 --- a/packages/syft/src/syft/service/service.py +++ b/packages/syft/src/syft/service/service.py @@ -422,18 +422,23 @@ def from_api_or_context( if callable(func_or_path): func_or_path = func_or_path.__qualname__ - if syft_node_location and syft_client_verify_key: - api = APIRegistry.api_for( - node_uid=syft_node_location, - user_verify_key=syft_client_verify_key, - ) - if api is not None: - service_method = api.services - for path in func_or_path.split("."): - service_method = getattr(service_method, path) - return service_method + if not (syft_node_location and syft_client_verify_key): + return None - node_context = AuthNodeContextRegistry.get_auth_context() + api = APIRegistry.api_for( + node_uid=syft_node_location, + user_verify_key=syft_client_verify_key, + ) + if api is not None: + service_method = api.services + for path in func_or_path.split("."): + service_method = getattr(service_method, path) + return service_method + + node_context = AuthNodeContextRegistry.auth_context_for_user( + node_uid=syft_node_location, + user_verify_key=syft_client_verify_key, + ) if node_context is not None: user_config_registry = UserServiceConfigRegistry.from_role( node_context.role, diff --git a/packages/syft/src/syft/store/document_store.py b/packages/syft/src/syft/store/document_store.py index d5018ca9e9e..43d028560ad 100644 --- a/packages/syft/src/syft/store/document_store.py +++ b/packages/syft/src/syft/store/document_store.py @@ -485,6 +485,15 @@ def _delete(self, qk: QueryKey) -> Result[SyftSuccess, Err]: def _all(self) -> Result[List[BaseStash.object_type], str]: raise NotImplementedError + def add_permission(self, permission: ActionObjectPermission) -> None: + raise NotImplementedError + + def add_permissions(self, permissions: List[ActionObjectPermission]) -> None: + raise NotImplementedError + + def remove_permission(self, permission: ActionObjectPermission) -> None: + raise NotImplementedError + @instrument @serializable() @@ -543,6 +552,15 @@ def get_all( ) -> Result[List[BaseStash.object_type], str]: return self.partition.all(credentials, order_by, has_permission) + def add_permissions(self, permissions: List[ActionObjectPermission]) -> None: + self.partition.add_permissions(permissions) + + def add_permission(self, permission: ActionObjectPermission) -> None: + self.partition.add_permission(permission) + + def remove_permission(self, permission: ActionObjectPermission) -> None: + self.partition.remove_permission(permission) + def __len__(self) -> int: return len(self.partition) @@ -679,9 +697,6 @@ def get_by_uid( qks = QueryKeys(qks=[UIDPartitionKey.with_obj(uid)]) return self.query_one(credentials=credentials, qks=qks) - def add_permissions(self, permissions: List[ActionObjectPermission]) -> None: - self.partition.add_permissions(permissions) - def set( self, credentials: SyftVerifyKey, diff --git a/packages/syft/src/syft/store/mongo_document_store.py b/packages/syft/src/syft/store/mongo_document_store.py index cd1f0e0aa42..b28a374b6b0 100644 --- a/packages/syft/src/syft/store/mongo_document_store.py +++ b/packages/syft/src/syft/store/mongo_document_store.py @@ -30,6 +30,7 @@ from ..types.transforms import TransformContext from ..types.transforms import transform from ..types.transforms import transform_method +from ..types.uid import UID from .document_store import DocumentStore from .document_store import PartitionKey from .document_store import QueryKey @@ -349,7 +350,21 @@ def has_permission(self, permission: ActionObjectPermission) -> bool: return True def add_permissions(self, permissions: List[ActionObjectPermission]) -> None: - # TODO: implemenet + # TODO: Implement this + pass + + def take_ownership( + self, uid: UID, credentials: SyftVerifyKey + ) -> Result[SyftSuccess, str]: + # TODO: Implement this + pass + + def remove_permission(self, permission: ActionObjectPermission): + # TODO: Implement this + pass + + def add_permission(self, permission: ActionObjectPermission) -> None: + # TODO: Implement this pass def _all( diff --git a/packages/syft/src/syft/types/blob_storage.py b/packages/syft/src/syft/types/blob_storage.py index 73e5a39a77f..5f783d3b68e 100644 --- a/packages/syft/src/syft/types/blob_storage.py +++ b/packages/syft/src/syft/types/blob_storage.py @@ -73,7 +73,7 @@ class CreateBlobStorageEntry(SyftObject): type_: Optional[Type] mimetype: str = "bytes" file_size: int - extensions: List[str] + extensions: List[str] = [] @classmethod def from_obj(cls, obj: SyftObject) -> Self: diff --git a/packages/syft/src/syft/types/syft_object.py b/packages/syft/src/syft/types/syft_object.py index 68d2758fffa..74fe7ee5f2e 100644 --- a/packages/syft/src/syft/types/syft_object.py +++ b/packages/syft/src/syft/types/syft_object.py @@ -90,6 +90,10 @@ class Config: syft_node_location: Optional[UID] syft_client_verify_key: Optional[SyftVerifyKey] + def _set_obj_location_(self, node_uid, credentials): + self.syft_node_location = node_uid + self.syft_client_verify_key = credentials + class Context(SyftBaseObject): pass diff --git a/packages/syft/src/syft/types/twin_object.py b/packages/syft/src/syft/types/twin_object.py index 0e07180199b..8097b446cb5 100644 --- a/packages/syft/src/syft/types/twin_object.py +++ b/packages/syft/src/syft/types/twin_object.py @@ -74,8 +74,13 @@ def mock(self) -> ActionObject: def _save_to_blob_store(self): # Set node location and verify key - # Only save private data to blob store - # Mock data is still attached to Twin object. - self.private_obj.syft_node_location = self.syft_node_location - self.private_obj.syft_client_verify_key = self.syft_client_verify_key + self.private_obj._set_obj_location_( + self.syft_node_location, + self.syft_client_verify_key, + ) + self.mock_obj._set_obj_location_( + self.syft_node_location, + self.syft_client_verify_key, + ) self.private_obj._save_to_blob_store() + self.mock_obj._save_to_blob_store() From 7a16be1716255e72083b7af18fdd6ba298b0ba91 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Tue, 22 Aug 2023 23:00:51 +0530 Subject: [PATCH 47/98] - replace use of action_store.set to action_service.set - pass has permission flag via context to action set method - add extra_kwargs dict to Auth Node Context - add classmethod decorator to auth_context_for_user - comment saving mock to blob store --- .../src/syft/external/oblv/oblv_service.py | 14 ++------ packages/syft/src/syft/node/node.py | 1 + .../src/syft/service/action/action_service.py | 33 ++++++++----------- packages/syft/src/syft/service/context.py | 2 ++ .../syft/service/enclave/enclave_service.py | 14 ++------ packages/syft/src/syft/types/twin_object.py | 10 +++--- 6 files changed, 27 insertions(+), 47 deletions(-) diff --git a/packages/syft/src/syft/external/oblv/oblv_service.py b/packages/syft/src/syft/external/oblv/oblv_service.py index b185c53006b..c024118e04b 100644 --- a/packages/syft/src/syft/external/oblv/oblv_service.py +++ b/packages/syft/src/syft/external/oblv/oblv_service.py @@ -391,12 +391,8 @@ def send_user_code_inputs_to_enclave( dict_object = ActionObject.from_obj({}) dict_object.id = user_code_id dict_object[str(context.credentials)] = inputs - action_service.store.set( - uid=user_code_id, - credentials=context.node.verify_key, - syft_object=dict_object, - has_result_read_permission=True, - ) + context.extra_kwargs = {"has_result_read_permission": True} + action_service.set(context, dict_object) else: res = action_service.store.get( @@ -405,11 +401,7 @@ def send_user_code_inputs_to_enclave( if res.is_ok(): dict_object = res.ok() dict_object[str(context.credentials)] = inputs - action_service.store.set( - uid=user_code_id, - credentials=context.node.verify_key, - syft_object=dict_object, - ) + action_service.set(context, dict_object) else: return res diff --git a/packages/syft/src/syft/node/node.py b/packages/syft/src/syft/node/node.py index a663c642118..24143d58568 100644 --- a/packages/syft/src/syft/node/node.py +++ b/packages/syft/src/syft/node/node.py @@ -203,6 +203,7 @@ def set_node_context( cls.__node_context_registry__[key] = context + @classmethod def auth_context_for_user( cls, node_uid: UID, diff --git a/packages/syft/src/syft/service/action/action_service.py b/packages/syft/src/syft/service/action/action_service.py index e8f4bc1d189..ec473edba1b 100644 --- a/packages/syft/src/syft/service/action/action_service.py +++ b/packages/syft/src/syft/service/action/action_service.py @@ -223,12 +223,10 @@ def _user_code_execute( if isinstance(blob_store_result, SyftError): return blob_store_result - set_result = self.store.set( - uid=result_id, - credentials=context.credentials, - syft_object=result_action_object, - has_result_read_permission=True, - ) + # pass permission information to the action store as extra kwargs + context.extra_kwargs = {"has_result_read_permission": True} + + set_result = self.set(context, result_action_object) if set_result.is_err(): return set_result.err() @@ -241,7 +239,7 @@ def _user_code_execute( ] ) - return Ok(result_action_object) + return set_result def execute_plan( self, plan, context: AuthedServiceContext, plan_kwargs: Dict[str, ActionObject] @@ -467,24 +465,19 @@ def execute( blob_store_result = result_action_object._save_to_blob_store() if isinstance(blob_store_result, SyftError): return blob_store_result - set_result = self.store.set( - uid=action.result_id, - credentials=context.credentials, - syft_object=result_action_object, - has_result_read_permission=has_result_read_permission, - ) + + # pass permission information to the action store as extra kwargs + context.extra_kwargs = { + "has_result_read_permission": has_result_read_permission + } + + set_result = self.set(context, result_action_object) if set_result.is_err(): return Err( f"Failed executing action {action}, set result is an error: {set_result.err()}" ) - if isinstance(result_action_object, TwinObject): - result_action_object = result_action_object.mock - # we patch this on the object, because this is the thing we are getting back - result_action_object.id = action.result_id - result_action_object.syft_point_to(context.node.id) - - return Ok(result_action_object) + return set_result def has_read_permission_for_action_result( self, context: AuthedServiceContext, action: Action diff --git a/packages/syft/src/syft/service/context.py b/packages/syft/src/syft/service/context.py index 2b9135e4f3a..6e7e29f7110 100644 --- a/packages/syft/src/syft/service/context.py +++ b/packages/syft/src/syft/service/context.py @@ -1,4 +1,5 @@ # stdlib +from typing import Dict from typing import List from typing import Optional @@ -32,6 +33,7 @@ class AuthedServiceContext(NodeServiceContext): credentials: SyftVerifyKey role: ServiceRole = ServiceRole.NONE + extra_kwargs: Dict = {} def capabilities(self) -> List[ServiceRoleCapability]: return ROLE_TO_CAPABILITIES.get(self.role, []) diff --git a/packages/syft/src/syft/service/enclave/enclave_service.py b/packages/syft/src/syft/service/enclave/enclave_service.py index df931ccaea4..6f3d8cab281 100644 --- a/packages/syft/src/syft/service/enclave/enclave_service.py +++ b/packages/syft/src/syft/service/enclave/enclave_service.py @@ -77,14 +77,10 @@ def send_user_code_inputs_to_enclave( dict_object = ActionObject.from_obj({}) dict_object.id = user_code_id dict_object[str(context.credentials)] = inputs + context.extra_kwargs = {"has_result_read_permission": True} # TODO: Instead of using the action store, modify to # use the action service directly to store objects - action_service.store.set( - uid=user_code_id, - credentials=context.node.verify_key, - syft_object=dict_object, - has_result_read_permission=True, - ) + action_service.set(context, dict_object) else: res = action_service.store.get( @@ -93,11 +89,7 @@ def send_user_code_inputs_to_enclave( if res.is_ok(): dict_object = res.ok() dict_object[str(context.credentials)] = inputs - action_service.store.set( - uid=user_code_id, - credentials=context.node.verify_key, - syft_object=dict_object, - ) + action_service.set(context, dict_object) else: return SyftError( message=f"Error while fetching the object on Enclave: {res.err()}" diff --git a/packages/syft/src/syft/types/twin_object.py b/packages/syft/src/syft/types/twin_object.py index 8097b446cb5..02e77ceffce 100644 --- a/packages/syft/src/syft/types/twin_object.py +++ b/packages/syft/src/syft/types/twin_object.py @@ -78,9 +78,9 @@ def _save_to_blob_store(self): self.syft_node_location, self.syft_client_verify_key, ) - self.mock_obj._set_obj_location_( - self.syft_node_location, - self.syft_client_verify_key, - ) + # self.mock_obj._set_obj_location_( + # self.syft_node_location, + # self.syft_client_verify_key, + # ) self.private_obj._save_to_blob_store() - self.mock_obj._save_to_blob_store() + # self.mock_obj._save_to_blob_store() From 318abaa0c7b422d57bcda252bb74fcc20659ada6 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Tue, 22 Aug 2023 23:07:31 +0530 Subject: [PATCH 48/98] pass has_result_read_permission from context to action store --- packages/syft/src/syft/service/action/action_service.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/syft/src/syft/service/action/action_service.py b/packages/syft/src/syft/service/action/action_service.py index ec473edba1b..810aa8bcd86 100644 --- a/packages/syft/src/syft/service/action/action_service.py +++ b/packages/syft/src/syft/service/action/action_service.py @@ -87,10 +87,15 @@ def set( action_object.private_obj.syft_created_at = DateTime.now() action_object.mock_obj.syft_created_at = DateTime.now() + has_result_read_permission = context.extra_kwargs.get( + "has_result_read_permission", False + ) + result = self.store.set( uid=action_object.id, credentials=context.credentials, syft_object=action_object, + has_result_read_permission=has_result_read_permission, ) if result.is_ok(): if isinstance(action_object, TwinObject): From d70193a732d171b8bcf10e92cb8660ee12cb8d1a Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Wed, 23 Aug 2023 12:37:12 +0530 Subject: [PATCH 49/98] - check if blob storage permission before removing during undo request - implement has_permission method to document store and base stash --- notebooks/api/0.8/09-blob-storage.ipynb | 52 +++++++++++++++++-- .../src/syft/service/action/action_store.py | 3 +- .../syft/src/syft/service/request/request.py | 3 ++ .../syft/service/request/request_service.py | 2 +- .../syft/src/syft/store/document_store.py | 6 +++ .../syft/src/syft/store/kv_document_store.py | 3 +- 6 files changed, 59 insertions(+), 10 deletions(-) diff --git a/notebooks/api/0.8/09-blob-storage.ipynb b/notebooks/api/0.8/09-blob-storage.ipynb index 2a3cfbc78be..e83f03a6413 100644 --- a/notebooks/api/0.8/09-blob-storage.ipynb +++ b/notebooks/api/0.8/09-blob-storage.ipynb @@ -18,6 +18,7 @@ "outputs": [], "source": [ "import syft as sy\n", + "import io\n", "sy.requires(SYFT_VERSION)\n", "from syft import autocache" ] @@ -49,7 +50,8 @@ "from syft.client.client import SyftClient\n", "from syft.store.blob_storage import BlobDeposit\n", "from syft.types.blob_storage import CreateBlobStorageEntry\n", - "from syft.types.syft_object import SyftObject" + "from syft.types.syft_object import SyftObject\n", + "from byte" ] }, { @@ -80,8 +82,17 @@ "outputs": [], "source": [ "# write/deposit object\n", - "blob_deposit = allocate_object(domain_client, user_object)\n", - "write_result = blob_deposit.write(sy.serialize(user_object, to_bytes=True))\n", + "blob_deposit = allocate_object(domain_client, user_object)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = io.BytesIO(sy.serialize(user_object, to_bytes=True))\n", + "write_result = blob_deposit.write(data)\n", "write_result" ] }, @@ -96,11 +107,29 @@ "user_object_read = blob_retrieval.read()\n", "user_object_read" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # Cleanup local domain server\n", + "if node.node_type.value == \"python\":\n", + " node.land()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "syft08", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -114,7 +143,20 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.9.7" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false } }, "nbformat": 4, diff --git a/packages/syft/src/syft/service/action/action_store.py b/packages/syft/src/syft/service/action/action_store.py index f9006abd3f2..ef7dd308fc3 100644 --- a/packages/syft/src/syft/service/action/action_store.py +++ b/packages/syft/src/syft/service/action/action_store.py @@ -226,9 +226,8 @@ def remove_permission(self, permission: ActionObjectPermission): self.permissions[permission.uid] = permissions def add_permissions(self, permissions: List[ActionObjectPermission]) -> None: - results = [] for permission in permissions: - results.append(self.add_permission(permission)) + self.add_permission(permission) @serializable() diff --git a/packages/syft/src/syft/service/request/request.py b/packages/syft/src/syft/service/request/request.py index 1385c7dc9c8..2d403f62604 100644 --- a/packages/syft/src/syft/service/request/request.py +++ b/packages/syft/src/syft/service/request/request.py @@ -140,6 +140,9 @@ def _run( else: if action_store.has_permission(requesting_permission_action_obj): action_store.remove_permission(requesting_permission_action_obj) + if blob_storage_service.stash.has_permission( + requesting_permission_blob_obj + ): blob_storage_service.stash.remove_permission( requesting_permission_blob_obj ) diff --git a/packages/syft/src/syft/service/request/request_service.py b/packages/syft/src/syft/service/request/request_service.py index 49ea8980ec3..2022fb7da30 100644 --- a/packages/syft/src/syft/service/request/request_service.py +++ b/packages/syft/src/syft/service/request/request_service.py @@ -236,7 +236,7 @@ def undo( if result.is_err(): return SyftError( - f"Failed to undo Request: <{uid}> with error: {result.err()}" + message=f"Failed to undo Request: <{uid}> with error: {result.err()}" ) link = LinkedObject.with_context(request, context=context) diff --git a/packages/syft/src/syft/store/document_store.py b/packages/syft/src/syft/store/document_store.py index 43d028560ad..98ca622c5bb 100644 --- a/packages/syft/src/syft/store/document_store.py +++ b/packages/syft/src/syft/store/document_store.py @@ -494,6 +494,9 @@ def add_permissions(self, permissions: List[ActionObjectPermission]) -> None: def remove_permission(self, permission: ActionObjectPermission) -> None: raise NotImplementedError + def has_permission(self, permission: ActionObjectPermission) -> bool: + raise NotImplementedError + @instrument @serializable() @@ -561,6 +564,9 @@ def add_permission(self, permission: ActionObjectPermission) -> None: def remove_permission(self, permission: ActionObjectPermission) -> None: self.partition.remove_permission(permission) + def has_permission(self, permission: ActionObjectPermission) -> bool: + return self.partition.has_permission(permission=permission) + def __len__(self) -> int: return len(self.partition) diff --git a/packages/syft/src/syft/store/kv_document_store.py b/packages/syft/src/syft/store/kv_document_store.py index 8ba859c9a48..754abe6fcce 100644 --- a/packages/syft/src/syft/store/kv_document_store.py +++ b/packages/syft/src/syft/store/kv_document_store.py @@ -253,9 +253,8 @@ def remove_permission(self, permission: ActionObjectPermission): self.permissions[permission.uid] = permissions def add_permissions(self, permissions: List[ActionObjectPermission]) -> None: - results = [] for permission in permissions: - results.append(self.add_permission(permission)) + self.add_permission(permission) def has_permission(self, permission: ActionObjectPermission) -> bool: if not isinstance(permission.permission, ActionPermission): From 1264758f8bed6c703b7361f5a3cac9760167484f Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Wed, 23 Aug 2023 12:45:27 +0530 Subject: [PATCH 50/98] fix import in blob storage notebook --- notebooks/api/0.8/09-blob-storage.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/notebooks/api/0.8/09-blob-storage.ipynb b/notebooks/api/0.8/09-blob-storage.ipynb index e83f03a6413..d6a9abcba89 100644 --- a/notebooks/api/0.8/09-blob-storage.ipynb +++ b/notebooks/api/0.8/09-blob-storage.ipynb @@ -50,8 +50,7 @@ "from syft.client.client import SyftClient\n", "from syft.store.blob_storage import BlobDeposit\n", "from syft.types.blob_storage import CreateBlobStorageEntry\n", - "from syft.types.syft_object import SyftObject\n", - "from byte" + "from syft.types.syft_object import SyftObject" ] }, { From fb405346254bcb96d1f958df7be29d3aa9490c44 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Thu, 24 Aug 2023 00:40:40 +0530 Subject: [PATCH 51/98] fix issues of blob storage with enclave service fix issues with execute code handle error at blob storage add missing permissions for blob storage --- .../src/syft/external/oblv/oblv_service.py | 12 ++++---- packages/syft/src/syft/node/node.py | 2 +- .../src/syft/service/action/action_object.py | 29 +++++++++++-------- .../src/syft/service/action/action_service.py | 26 +++++++++++++++++ .../syft/src/syft/service/action/pandas.py | 3 +- .../src/syft/service/blob_storage/service.py | 3 ++ .../syft/service/code/user_code_service.py | 8 ++++- packages/syft/src/syft/service/context.py | 8 +++++ .../syft/service/enclave/enclave_service.py | 24 +++++++++++---- .../syft/src/syft/service/policy/policy.py | 17 ++++++++--- .../syft/src/syft/service/request/request.py | 4 +-- .../syft/service/request/request_service.py | 5 +++- .../src/syft/store/blob_storage/__init__.py | 4 ++- .../src/syft/store/blob_storage/on_disk.py | 13 ++++++--- .../src/syft/store/blob_storage/seaweedfs.py | 8 +++-- 15 files changed, 124 insertions(+), 42 deletions(-) diff --git a/packages/syft/src/syft/external/oblv/oblv_service.py b/packages/syft/src/syft/external/oblv/oblv_service.py index c024118e04b..38d380be88a 100644 --- a/packages/syft/src/syft/external/oblv/oblv_service.py +++ b/packages/syft/src/syft/external/oblv/oblv_service.py @@ -387,21 +387,21 @@ def send_user_code_inputs_to_enclave( user_code.status = res.ok() user_code_service.update_code_state(context=context, code_item=user_code) + root_context = context.as_root_context() + if not action_service.exists(context=context, obj_id=user_code_id): dict_object = ActionObject.from_obj({}) dict_object.id = user_code_id dict_object[str(context.credentials)] = inputs - context.extra_kwargs = {"has_result_read_permission": True} - action_service.set(context, dict_object) + root_context.extra_kwargs = {"has_result_read_permission": True} + action_service.set(root_context, dict_object) else: - res = action_service.store.get( - uid=user_code_id, credentials=context.node.verify_key - ) + res = action_service.get(uid=user_code_id, context=context) if res.is_ok(): dict_object = res.ok() dict_object[str(context.credentials)] = inputs - action_service.set(context, dict_object) + action_service.set(root_context, dict_object) else: return res diff --git a/packages/syft/src/syft/node/node.py b/packages/syft/src/syft/node/node.py index 24143d58568..549a45231e5 100644 --- a/packages/syft/src/syft/node/node.py +++ b/packages/syft/src/syft/node/node.py @@ -199,7 +199,7 @@ def set_node_context( if isinstance(user_verify_key, str): user_verify_key = SyftVerifyKey.from_string(user_verify_key) - key = (node_uid, user_verify_key) + key = "-".join(str(x) for x in (node_uid, user_verify_key)) cls.__node_context_registry__[key] = context diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 51f0689c4f3..6f058aba9c1 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -467,18 +467,21 @@ class ActionObject(SyftObject): @property def syft_action_data(self) -> Any: if self.syft_blob_storage_entry_id and self.syft_created_at: - blob_storage_read_method = from_api_or_context( - func_or_path="blob_storage.read", - syft_node_location=self.syft_node_location, - syft_client_verify_key=self.syft_client_verify_key, - ) + # If ActionDataEmpty then try to fetch it from store. + if isinstance(self.syft_action_data_cache, ActionDataEmpty): + blob_storage_read_method = from_api_or_context( + func_or_path="blob_storage.read", + syft_node_location=self.syft_node_location, + syft_client_verify_key=self.syft_client_verify_key, + ) - blob_retrieval_object = blob_storage_read_method( - uid=self.syft_blob_storage_entry_id - ) - if isinstance(blob_retrieval_object, SyftError): - return blob_retrieval_object - self.syft_action_data_cache = blob_retrieval_object.read() + if blob_storage_read_method is not None: + blob_retrieval_object = blob_storage_read_method( + uid=self.syft_blob_storage_entry_id + ) + if isinstance(blob_retrieval_object, SyftError): + return blob_retrieval_object + self.syft_action_data_cache = blob_retrieval_object.read() return self.syft_action_data_cache @@ -556,8 +559,10 @@ def __check_action_data(cls, values: dict) -> dict: values["syft_has_bool_attr"] = hasattr(v, "__bool__") return values - def _save_to_blob_store(self) -> None: + def _save_to_blob_store(self) -> Optional[SyftError]: data = self.syft_action_data + if isinstance(data, SyftError): + return data result = self._set_syft_action_data(data) if isinstance(result, SyftError): return result diff --git a/packages/syft/src/syft/service/action/action_service.py b/packages/syft/src/syft/service/action/action_service.py index 810aa8bcd86..dcaaf0517bb 100644 --- a/packages/syft/src/syft/service/action/action_service.py +++ b/packages/syft/src/syft/service/action/action_service.py @@ -16,6 +16,7 @@ from ...types.datetime import DateTime from ...types.twin_object import TwinObject from ...types.uid import UID +from ..blob_storage.service import BlobStorageService from ..code.user_code import UserCode from ..code.user_code import execute_byte_code from ..context import AuthedServiceContext @@ -178,6 +179,17 @@ def _user_code_execute( if filtered_kwargs.is_err(): return filtered_kwargs filtered_kwargs = filtered_kwargs.ok() + + expected_input_kwargs = set() + for _inp_kwarg in code_item.input_policy.inputs.values(): + expected_input_kwargs.update(_inp_kwarg.keys()) + permitted_input_kwargs = list(filtered_kwargs.keys()) + not_approved_kwargs = set(expected_input_kwargs) - set(permitted_input_kwargs) + if len(not_approved_kwargs) > 0: + return Err( + f"Input arguments: {not_approved_kwargs} to the function are not approved yet." + ) + has_twin_inputs = False real_kwargs = {} @@ -236,6 +248,10 @@ def _user_code_execute( if set_result.is_err(): return set_result.err() + blob_storage_service: BlobStorageService = context.node.get_service( + BlobStorageService + ) + if len(code_item.output_policy.output_readers) > 0: self.store.add_permissions( [ @@ -243,6 +259,16 @@ def _user_code_execute( for x in code_item.output_policy.output_readers ] ) + blob_storage_service.stash.add_permissions( + [ + ActionObjectPermission( + result_action_object.syft_blob_storage_entry_id, + ActionPermission.READ, + x, + ) + for x in code_item.output_policy.output_readers + ] + ) return set_result diff --git a/packages/syft/src/syft/service/action/pandas.py b/packages/syft/src/syft/service/action/pandas.py index cac27b94f12..cd669ff1425 100644 --- a/packages/syft/src/syft/service/action/pandas.py +++ b/packages/syft/src/syft/service/action/pandas.py @@ -1,7 +1,6 @@ # stdlib from typing import Any from typing import ClassVar -from typing import Optional from typing import Type # third party @@ -50,7 +49,7 @@ class PandasSeriesObject(ActionObject): syft_internal_type = Series syft_passthrough_attrs = BASE_PASSTHROUGH_ATTRS - name: Optional[str] = None + # name: Optional[str] = None # syft_dont_wrap_attrs = ["shape"] def __getattribute__(self, name: str) -> Any: diff --git a/packages/syft/src/syft/service/blob_storage/service.py b/packages/syft/src/syft/service/blob_storage/service.py index ad2cd7e2ba5..383f5723654 100644 --- a/packages/syft/src/syft/service/blob_storage/service.py +++ b/packages/syft/src/syft/service/blob_storage/service.py @@ -92,6 +92,9 @@ def allocate( with context.node.blob_storage_client.connect() as conn: secure_location = conn.allocate(obj) + if isinstance(secure_location, SyftError): + return secure_location + blob_storage_entry = BlobStorageEntry( id=obj.id, location=secure_location, diff --git a/packages/syft/src/syft/service/code/user_code_service.py b/packages/syft/src/syft/service/code/user_code_service.py index 3d9fb656134..d776b5020c5 100644 --- a/packages/syft/src/syft/service/code/user_code_service.py +++ b/packages/syft/src/syft/service/code/user_code_service.py @@ -189,7 +189,13 @@ def get_results( connection=connection, credentials=context.node.signing_key, ) - return enclave_client.code.get_results(code.id) + outputs = enclave_client.code.get_results(code.id) + if isinstance(outputs, list): + for output in outputs: + output.syft_action_data + else: + outputs.syft_action_data + return outputs # if the current node is the enclave else: diff --git a/packages/syft/src/syft/service/context.py b/packages/syft/src/syft/service/context.py index 6e7e29f7110..5f8613ad3d0 100644 --- a/packages/syft/src/syft/service/context.py +++ b/packages/syft/src/syft/service/context.py @@ -38,6 +38,14 @@ class AuthedServiceContext(NodeServiceContext): def capabilities(self) -> List[ServiceRoleCapability]: return ROLE_TO_CAPABILITIES.get(self.role, []) + def with_credentials(self, credentials: SyftVerifyKey, role: ServiceRole): + return AuthedServiceContext(credentials=credentials, role=role, node=self.node) + + def as_root_context(self): + return AuthedServiceContext( + credentials=self.node.verify_key, role=ServiceRole.ADMIN, node=self.node + ) + class UnauthedServiceContext(NodeServiceContext): __canonical_name__ = "UnauthedServiceContext" diff --git a/packages/syft/src/syft/service/enclave/enclave_service.py b/packages/syft/src/syft/service/enclave/enclave_service.py index 6f3d8cab281..bc4d289b5e9 100644 --- a/packages/syft/src/syft/service/enclave/enclave_service.py +++ b/packages/syft/src/syft/service/enclave/enclave_service.py @@ -10,6 +10,7 @@ from ...service.response import SyftSuccess from ...service.user.user_roles import GUEST_ROLE_LEVEL from ...store.document_store import DocumentStore +from ...types.twin_object import TwinObject from ...types.uid import UID from ..action.action_object import ActionObject from ..code.user_code_service import UserCode @@ -73,23 +74,22 @@ def send_user_code_inputs_to_enclave( if isinstance(user_code_update, SyftError): return user_code_update + root_context = context.as_root_context() if not action_service.exists(context=context, obj_id=user_code_id): dict_object = ActionObject.from_obj({}) dict_object.id = user_code_id dict_object[str(context.credentials)] = inputs - context.extra_kwargs = {"has_result_read_permission": True} + root_context.extra_kwargs = {"has_result_read_permission": True} # TODO: Instead of using the action store, modify to # use the action service directly to store objects - action_service.set(context, dict_object) + action_service.set(root_context, dict_object) else: - res = action_service.store.get( - uid=user_code_id, credentials=context.node.verify_key - ) + res = action_service.get(uid=user_code_id, context=root_context) if res.is_ok(): dict_object = res.ok() dict_object[str(context.credentials)] = inputs - action_service.set(context, dict_object) + action_service.set(root_context, dict_object) else: return SyftError( message=f"Error while fetching the object on Enclave: {res.err()}" @@ -155,6 +155,18 @@ def propagate_inputs_to_enclave(user_code: UserCode, context: ChangeContext): if isinstance(inputs, SyftError): return inputs + # Save inputs to blob store + for var_name, var_value in inputs.items(): + if isinstance(var_value, (ActionObject, TwinObject)): + # Set the obj location to enclave + var_value._set_obj_location_( + enclave_client.api.node_uid, + enclave_client.verify_key, + ) + var_value._save_to_blob_store() + + inputs[var_name] = var_value + # send data of the current node to enclave res = send_method( user_code_id=user_code.id, diff --git a/packages/syft/src/syft/service/policy/policy.py b/packages/syft/src/syft/service/policy/policy.py index 0fdd2ba1ebd..8a1c5a7f284 100644 --- a/packages/syft/src/syft/service/policy/policy.py +++ b/packages/syft/src/syft/service/policy/policy.py @@ -194,14 +194,23 @@ def _inputs_for_context(self, context: ChangeContext): user_node_view = NodeIdentity.from_change_context(context) inputs = self.inputs[user_node_view] + root_context = AuthedServiceContext( + node=context.node, credentials=context.approving_user_credentials + ).as_root_context() + action_service = context.node.get_service("actionservice") for var_name, uid in inputs.items(): - action_object = action_service.store.get( - uid=uid, credentials=user_node_view.verify_key - ) + action_object = action_service.get(uid=uid, context=root_context) if action_object.is_err(): return SyftError(message=action_object.err()) - inputs[var_name] = action_object.ok() + action_object_value = action_object.ok() + # resolve syft action data from blob store + if isinstance(action_object_value, TwinObject): + action_object_value.private_obj.syft_action_data + action_object_value.mock_obj.syft_action_data + elif isinstance(action_object_value, ActionObject): + action_object_value.syft_action_data + inputs[var_name] = action_object_value return inputs diff --git a/packages/syft/src/syft/service/request/request.py b/packages/syft/src/syft/service/request/request.py index 2d403f62604..28a40c648c5 100644 --- a/packages/syft/src/syft/service/request/request.py +++ b/packages/syft/src/syft/service/request/request.py @@ -644,7 +644,7 @@ def _run( try: obj = self.linked_obj.resolve_with_context(context) if obj.is_err(): - return SyftError(message=obj.err()) + return Err(SyftError(message=obj.err())) obj = obj.ok() if apply: obj = self.mutate(obj, value=self.value) @@ -833,7 +833,7 @@ def _run( return Err(valid) obj = self.linked_obj.resolve_with_context(context) if obj.is_err(): - return SyftError(message=obj.err()) + return Err(SyftError(message=obj.err())) obj = obj.ok() if apply: res = self.mutate(obj, context, undo=False) diff --git a/packages/syft/src/syft/service/request/request_service.py b/packages/syft/src/syft/service/request/request_service.py index 2022fb7da30..c8df9106291 100644 --- a/packages/syft/src/syft/service/request/request_service.py +++ b/packages/syft/src/syft/service/request/request_service.py @@ -215,7 +215,10 @@ def apply( ) send_notification(context=context, notification=notification) - return result.value + # TODO: check whereever we're return SyftError encapsulate it in Result. + if hasattr(result, "value"): + return result.value + return result return request.value @service_method(path="request.undo", name="undo") diff --git a/packages/syft/src/syft/store/blob_storage/__init__.py b/packages/syft/src/syft/store/blob_storage/__init__.py index cc1e714d7eb..3b19d70eba9 100644 --- a/packages/syft/src/syft/store/blob_storage/__init__.py +++ b/packages/syft/src/syft/store/blob_storage/__init__.py @@ -139,7 +139,9 @@ def __exit__(self, *exc) -> None: def read(self, fp: SecureFilePathLocation) -> BlobRetrieval: raise NotImplementedError - def allocate(self, obj: CreateBlobStorageEntry) -> SecureFilePathLocation: + def allocate( + self, obj: CreateBlobStorageEntry + ) -> Union[SecureFilePathLocation, SyftError]: raise NotImplementedError def write(self, obj: BlobStorageEntry) -> BlobDeposit: diff --git a/packages/syft/src/syft/store/blob_storage/on_disk.py b/packages/syft/src/syft/store/blob_storage/on_disk.py index 0106a611c39..ead24885893 100644 --- a/packages/syft/src/syft/store/blob_storage/on_disk.py +++ b/packages/syft/src/syft/store/blob_storage/on_disk.py @@ -60,10 +60,15 @@ def read(self, fp: SecureFilePathLocation) -> BlobRetrieval: syft_object=(self._base_directory / fp.path).read_bytes() ) - def allocate(self, obj: CreateBlobStorageEntry) -> SecureFilePathLocation: - return SecureFilePathLocation( - path=str((self._base_directory / str(obj.id)).absolute()) - ) + def allocate( + self, obj: CreateBlobStorageEntry + ) -> Union[SecureFilePathLocation, SyftError]: + try: + return SecureFilePathLocation( + path=str((self._base_directory / str(obj.id)).absolute()) + ) + except Exception as e: + return SyftError(message=f"Failed to allocate: {e}") def write(self, obj: BlobStorageEntry) -> BlobDeposit: return OnDiskBlobDeposit(blob_storage_entry_id=obj.id) diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index 9d3e7304a5c..91214bf686e 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -154,7 +154,9 @@ def read(self, fp: SecureFilePathLocation) -> BlobRetrieval: except BotoClientError as e: raise SyftException(e) - def allocate(self, obj: CreateBlobStorageEntry) -> SecureFilePathLocation: + def allocate( + self, obj: CreateBlobStorageEntry + ) -> Union[SecureFilePathLocation, SyftError]: try: file_name = obj.file_name result = self.client.create_multipart_upload( @@ -164,7 +166,9 @@ def allocate(self, obj: CreateBlobStorageEntry) -> SecureFilePathLocation: upload_id = result["UploadId"] return SeaweedSecureFilePathLocation(upload_id=upload_id, path=file_name) except BotoClientError as e: - raise SyftException(e) + return SyftError( + message=f"Failed to allocate space for {obj} with error: {e}" + ) def write(self, obj: BlobStorageEntry) -> BlobDeposit: total_parts = math.ceil(obj.file_size / DEFAULT_CHUNK_SIZE) From f72209949e5e1f17ba3b6627ea84a564e0a78d19 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Thu, 24 Aug 2023 13:39:31 +0530 Subject: [PATCH 52/98] update syft action data to handle file type uploads to blob store --- .../src/syft/service/blob_storage/service.py | 4 ++-- .../src/syft/store/blob_storage/__init__.py | 16 +++++++++++++++- .../syft/src/syft/store/blob_storage/on_disk.py | 10 +++++++--- .../src/syft/store/blob_storage/seaweedfs.py | 9 +++++++-- packages/syft/src/syft/types/blob_storage.py | 17 ++++++++++++++++- 5 files changed, 47 insertions(+), 9 deletions(-) diff --git a/packages/syft/src/syft/service/blob_storage/service.py b/packages/syft/src/syft/service/blob_storage/service.py index 383f5723654..1858c1bc17a 100644 --- a/packages/syft/src/syft/service/blob_storage/service.py +++ b/packages/syft/src/syft/service/blob_storage/service.py @@ -73,12 +73,12 @@ def read( ) -> Union[BlobRetrieval, SyftError]: result = self.stash.get_by_uid(context.credentials, uid=uid) if result.is_ok(): - obj = result.ok() + obj: BlobStorageEntry = result.ok() if obj is None: return SyftError(message=f"No blob storage entry exists for uid: {uid}") with context.node.blob_storage_client.connect() as conn: - return conn.read(obj.location) + return conn.read(obj.location, obj.type_) return SyftError(message=result.err()) @service_method( diff --git a/packages/syft/src/syft/store/blob_storage/__init__.py b/packages/syft/src/syft/store/blob_storage/__init__.py index 3b19d70eba9..854bcebf401 100644 --- a/packages/syft/src/syft/store/blob_storage/__init__.py +++ b/packages/syft/src/syft/store/blob_storage/__init__.py @@ -42,8 +42,10 @@ # stdlib +from typing import Optional from typing import Type from typing import Union +from urllib.request import urlretrieve # third party from pydantic import BaseModel @@ -56,6 +58,8 @@ from ...service.response import SyftError from ...service.response import SyftSuccess from ...types.base import SyftBaseModel +from ...types.blob_storage import BlobFile +from ...types.blob_storage import BlobFileType from ...types.blob_storage import BlobStorageEntry from ...types.blob_storage import CreateBlobStorageEntry from ...types.blob_storage import SecureFilePathLocation @@ -71,6 +75,9 @@ class BlobRetrieval(SyftObject): __canonical_name__ = "BlobRetrieval" __version__ = SYFT_OBJECT_VERSION_1 + type_: Optional[Type] + file_name: str + def read(self) -> Union[SyftObject, SyftError]: pass @@ -83,6 +90,10 @@ class SyftObjectRetrieval(BlobRetrieval): syft_object: bytes def read(self) -> Union[SyftObject, SyftError]: + if self.type_ is BlobFileType: + with open(self.file_name, "wb") as fp: + fp.write(self.syft_object) + return BlobFile(file_name=self.file_name) return deserialize(self.syft_object, from_bytes=True) @@ -106,6 +117,9 @@ def read(self) -> Union[SyftObject, SyftError]: else: blob_url = self.url try: + if self.type_ is BlobFileType: + urlretrieve(str(blob_url), filename=self.file_name) + return BlobFile(file_name=self.file_name) response = requests.get(str(blob_url), timeout=DEFAULT_TIMEOUT) response.raise_for_status() return deserialize(response.content, from_bytes=True) @@ -136,7 +150,7 @@ def __enter__(self) -> Self: def __exit__(self, *exc) -> None: raise NotImplementedError - def read(self, fp: SecureFilePathLocation) -> BlobRetrieval: + def read(self, fp: SecureFilePathLocation, type_: Optional[Type]) -> BlobRetrieval: raise NotImplementedError def allocate( diff --git a/packages/syft/src/syft/store/blob_storage/on_disk.py b/packages/syft/src/syft/store/blob_storage/on_disk.py index ead24885893..72e2d668e6f 100644 --- a/packages/syft/src/syft/store/blob_storage/on_disk.py +++ b/packages/syft/src/syft/store/blob_storage/on_disk.py @@ -3,6 +3,7 @@ from pathlib import Path from tempfile import gettempdir from typing import Any +from typing import Optional from typing import Type from typing import Union @@ -55,9 +56,12 @@ def __enter__(self) -> Self: def __exit__(self, *exc) -> None: pass - def read(self, fp: SecureFilePathLocation) -> BlobRetrieval: + def read(self, fp: SecureFilePathLocation, type_: Optional[Type]) -> BlobRetrieval: + file_path = self._base_directory / fp.path return SyftObjectRetrieval( - syft_object=(self._base_directory / fp.path).read_bytes() + syft_object=file_path.read_bytes(), + file_name=file_path.name, + type_=type_, ) def allocate( @@ -65,7 +69,7 @@ def allocate( ) -> Union[SecureFilePathLocation, SyftError]: try: return SecureFilePathLocation( - path=str((self._base_directory / str(obj.id)).absolute()) + path=str((self._base_directory / obj.file_name).absolute()) ) except Exception as e: return SyftError(message=f"Failed to allocate: {e}") diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index 91214bf686e..c73ff71a580 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -1,8 +1,10 @@ # stdlib from io import BytesIO import math +from pathlib import Path from typing import Generator from typing import List +from typing import Optional from typing import Type from typing import Union @@ -143,14 +145,17 @@ def __enter__(self) -> Self: def __exit__(self, *exc) -> None: self.client.close() - def read(self, fp: SecureFilePathLocation) -> BlobRetrieval: + def read(self, fp: SecureFilePathLocation, type_: Optional[Type]) -> BlobRetrieval: try: url = self.client.generate_presigned_url( ClientMethod="get_object", Params={"Bucket": self.bucket_name, "Key": fp.path}, ExpiresIn=READ_EXPIRATION_TIME, ) - return BlobRetrievalByURL(url=GridURL.from_url(url)) + + return BlobRetrievalByURL( + url=GridURL.from_url(url), file_name=Path(fp.path).name, type_=type_ + ) except BotoClientError as e: raise SyftException(e) diff --git a/packages/syft/src/syft/types/blob_storage.py b/packages/syft/src/syft/types/blob_storage.py index 5f783d3b68e..71f2c40aa5c 100644 --- a/packages/syft/src/syft/types/blob_storage.py +++ b/packages/syft/src/syft/types/blob_storage.py @@ -23,6 +23,18 @@ from .uid import UID +@serializable() +class BlobFile(SyftObject): + __canonical_name__ = "BlobFile" + __version__ = SYFT_OBJECT_VERSION_1 + + file_name: str + + +class BlobFileType(type): + pass + + @serializable() class SecureFilePathLocation(SyftObject): __canonical_name__ = "SecureFilePathLocation" @@ -99,7 +111,10 @@ def from_path(cls, fp: Union[str, Path], mimetype: Optional[str] = None) -> Self ) return cls( - mimetype=mimetype, file_size=path.stat().st_size, extensions=path.suffixes + mimetype=mimetype, + file_size=path.stat().st_size, + extensions=path.suffixes, + type_=BlobFileType, ) @property From 8ef9d712f6e4cf126dd275ecd6519d45fc857eea Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Thu, 24 Aug 2023 16:50:21 +0530 Subject: [PATCH 53/98] pass blob storage config to worker settings reset blob storage if node is reset --- packages/syft/src/syft/node/node.py | 15 +++++++++++++++ packages/syft/src/syft/node/worker_settings.py | 2 ++ packages/syft/src/syft/service/queue/queue.py | 1 + 3 files changed, 18 insertions(+) diff --git a/packages/syft/src/syft/node/node.py b/packages/syft/src/syft/node/node.py index 549a45231e5..776955d264c 100644 --- a/packages/syft/src/syft/node/node.py +++ b/packages/syft/src/syft/node/node.py @@ -383,6 +383,7 @@ def named( raise Exception(f"Invalid UID: {name_hash_string} for name: {name}") uid = UID(name_hash_string) key = SyftSigningKey(signing_key=SigningKey(name_hash)) + blob_storage_config = None if reset: store_config = SQLiteStoreClientConfig() store_config.filename = f"{uid}.sqlite" @@ -406,6 +407,18 @@ def named( if os.path.exists(store_config.file_path): os.unlink(store_config.file_path) + # Reset blob storage + root_directory = get_root_data_path() + base_directory = root_directory / f"{uid}" + for file in base_directory.iterdir(): + file.unlink() + blob_client_config = OnDiskBlobStorageClientConfig( + base_directory=base_directory + ) + blob_storage_config = OnDiskBlobStorageConfig( + client_config=blob_client_config + ) + return cls( name=name, id=uid, @@ -416,6 +429,7 @@ def named( node_type=node_type, node_side_type=node_side_type, enable_warnings=enable_warnings, + blob_storage_config=blob_storage_config, ) def is_root(self, credentials: SyftVerifyKey) -> bool: @@ -876,6 +890,7 @@ def task_runner( signing_key=worker_settings.signing_key, document_store_config=worker_settings.document_store_config, action_store_config=worker_settings.action_store_config, + blob_storage_config=worker_settings.blob_store_config, is_subprocess=True, ) try: diff --git a/packages/syft/src/syft/node/worker_settings.py b/packages/syft/src/syft/node/worker_settings.py index b0d88ce92a6..b01bba776a7 100644 --- a/packages/syft/src/syft/node/worker_settings.py +++ b/packages/syft/src/syft/node/worker_settings.py @@ -28,6 +28,7 @@ class WorkerSettings(SyftObject): signing_key: SyftSigningKey document_store_config: StoreConfig action_store_config: StoreConfig + blob_store_config: StoreConfig @staticmethod def from_node(node: AbstractNode) -> Self: @@ -39,4 +40,5 @@ def from_node(node: AbstractNode) -> Self: document_store_config=node.document_store_config, action_store_config=node.action_store_config, node_side_type=node.node_side_type.value, + blob_store_config=node.blob_store_config, ) diff --git a/packages/syft/src/syft/service/queue/queue.py b/packages/syft/src/syft/service/queue/queue.py index e22062bf90c..e171db8ca41 100644 --- a/packages/syft/src/syft/service/queue/queue.py +++ b/packages/syft/src/syft/service/queue/queue.py @@ -77,6 +77,7 @@ def handle_message(message: bytes): signing_key=worker_settings.signing_key, document_store_config=worker_settings.document_store_config, action_store_config=worker_settings.action_store_config, + blob_storage_config=worker_settings.blob_store_config, is_subprocess=True, ) From ff7e3762ee4b48618fcf61c1ae2fd28d1f15d0e3 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Thu, 24 Aug 2023 17:15:21 +0530 Subject: [PATCH 54/98] fix key generation in AuthNodeContextRegistry --- packages/syft/src/syft/node/node.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/packages/syft/src/syft/node/node.py b/packages/syft/src/syft/node/node.py index 776955d264c..1ccea965cd2 100644 --- a/packages/syft/src/syft/node/node.py +++ b/packages/syft/src/syft/node/node.py @@ -199,17 +199,21 @@ def set_node_context( if isinstance(user_verify_key, str): user_verify_key = SyftVerifyKey.from_string(user_verify_key) - key = "-".join(str(x) for x in (node_uid, user_verify_key)) + key = cls._get_key(node_uid=node_uid, user_verify_key=user_verify_key) cls.__node_context_registry__[key] = context + @staticmethod + def _get_key(node_uid: UID, user_verify_key: SyftVerifyKey) -> str: + return "-".join(str(x) for x in (node_uid, user_verify_key)) + @classmethod def auth_context_for_user( cls, node_uid: UID, user_verify_key: SyftVerifyKey, ) -> Optional[AuthedServiceContext]: - key = (node_uid, user_verify_key) + key = cls._get_key(node_uid=node_uid, user_verify_key=user_verify_key) return cls.__node_context_registry__.get(key) @@ -478,7 +482,12 @@ def __repr__(self) -> str: return f"{type(self).__name__}: {self.name} - {self.id} - {self.node_type}{service_string}" def post_init(self) -> None: - context = AuthedServiceContext(node=self, credentials=self.verify_key) + context = AuthedServiceContext( + node=self, credentials=self.verify_key, role=ServiceRole.ADMIN + ) + AuthNodeContextRegistry.set_node_context( + node_uid=self.id, user_verify_key=self.verify_key, context=context + ) if UserCodeService in self.services: user_code_service = self.get_service(UserCodeService) From b6f2ff5a305355efe9531e185b0ef268a73caee0 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Thu, 24 Aug 2023 17:36:54 +0530 Subject: [PATCH 55/98] add serde for blob storage config fix type for blob storage config in worker settings --- packages/syft/src/syft/node/node.py | 1 + packages/syft/src/syft/node/worker_settings.py | 6 +++++- packages/syft/src/syft/service/network/network_service.py | 1 + packages/syft/src/syft/store/blob_storage/__init__.py | 1 + packages/syft/src/syft/store/blob_storage/on_disk.py | 1 + packages/syft/src/syft/store/blob_storage/seaweedfs.py | 1 + 6 files changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/syft/src/syft/node/node.py b/packages/syft/src/syft/node/node.py index 1ccea965cd2..7066f0ac1b3 100644 --- a/packages/syft/src/syft/node/node.py +++ b/packages/syft/src/syft/node/node.py @@ -344,6 +344,7 @@ def init_blob_storage(self, config: Optional[BlobStorageConfig] = None) -> None: config_ = OnDiskBlobStorageConfig(client_config=client_config) else: config_ = config + self.blob_store_config = config_ self.blob_storage_client = config_.client_type(config=config_.client_config) def init_queue_manager(self, queue_config: Optional[QueueConfig]): diff --git a/packages/syft/src/syft/node/worker_settings.py b/packages/syft/src/syft/node/worker_settings.py index b01bba776a7..6996fb411ee 100644 --- a/packages/syft/src/syft/node/worker_settings.py +++ b/packages/syft/src/syft/node/worker_settings.py @@ -1,6 +1,9 @@ # future from __future__ import annotations +# stdlib +from typing import Optional + # third party from typing_extensions import Self @@ -10,6 +13,7 @@ from ..abstract_node import NodeType from ..node.credentials import SyftSigningKey from ..serde.serializable import serializable +from ..store.blob_storage import BlobStorageConfig from ..store.document_store import StoreConfig from ..types.syft_object import SYFT_OBJECT_VERSION_1 from ..types.syft_object import SyftObject @@ -28,7 +32,7 @@ class WorkerSettings(SyftObject): signing_key: SyftSigningKey document_store_config: StoreConfig action_store_config: StoreConfig - blob_store_config: StoreConfig + blob_store_config: Optional[BlobStorageConfig] @staticmethod def from_node(node: AbstractNode) -> Self: diff --git a/packages/syft/src/syft/service/network/network_service.py b/packages/syft/src/syft/service/network/network_service.py index aa9a4b62c9c..ee8fe02843b 100644 --- a/packages/syft/src/syft/service/network/network_service.py +++ b/packages/syft/src/syft/service/network/network_service.py @@ -503,6 +503,7 @@ def http_connection_to_node_route() -> List[Callable]: def get_python_node_route(context: TransformContext) -> TransformContext: context.output["id"] = context.obj.node.id + print("Store config....", context.obj.node.blob_store_config) context.output["worker_settings"] = WorkerSettings.from_node(context.obj.node) context.output["proxy_target_uid"] = context.obj.proxy_target_uid return context diff --git a/packages/syft/src/syft/store/blob_storage/__init__.py b/packages/syft/src/syft/store/blob_storage/__init__.py index 854bcebf401..99fde7b1678 100644 --- a/packages/syft/src/syft/store/blob_storage/__init__.py +++ b/packages/syft/src/syft/store/blob_storage/__init__.py @@ -173,6 +173,7 @@ def connect(self) -> BlobStorageConnection: raise NotImplementedError +@serializable() class BlobStorageConfig(SyftBaseModel): client_type: Type[BlobStorageClient] client_config: BlobStorageClientConfig diff --git a/packages/syft/src/syft/store/blob_storage/on_disk.py b/packages/syft/src/syft/store/blob_storage/on_disk.py index 72e2d668e6f..81f990ca6e8 100644 --- a/packages/syft/src/syft/store/blob_storage/on_disk.py +++ b/packages/syft/src/syft/store/blob_storage/on_disk.py @@ -102,6 +102,7 @@ def connect(self) -> BlobStorageConnection: return OnDiskBlobStorageConnection(self.config.base_directory) +@serializable() class OnDiskBlobStorageConfig(BlobStorageConfig): client_type: Type[BlobStorageClient] = OnDiskBlobStorageClient client_config: OnDiskBlobStorageClientConfig = OnDiskBlobStorageClientConfig() diff --git a/packages/syft/src/syft/store/blob_storage/seaweedfs.py b/packages/syft/src/syft/store/blob_storage/seaweedfs.py index c73ff71a580..39b8c161bfe 100644 --- a/packages/syft/src/syft/store/blob_storage/seaweedfs.py +++ b/packages/syft/src/syft/store/blob_storage/seaweedfs.py @@ -223,6 +223,7 @@ def delete( return SyftError(message=str(e)) +@serializable() class SeaweedFSConfig(BlobStorageConfig): client_type: Type[BlobStorageClient] = SeaweedFSClient client_config: SeaweedFSClientConfig From 23fa9359cbd8b1f49de0b8914cca8ef5a468d358 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Thu, 24 Aug 2023 17:43:40 +0530 Subject: [PATCH 56/98] unlink blob directory only if it exists --- packages/syft/src/syft/node/node.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/syft/src/syft/node/node.py b/packages/syft/src/syft/node/node.py index 7066f0ac1b3..5029f1d12ea 100644 --- a/packages/syft/src/syft/node/node.py +++ b/packages/syft/src/syft/node/node.py @@ -415,8 +415,9 @@ def named( # Reset blob storage root_directory = get_root_data_path() base_directory = root_directory / f"{uid}" - for file in base_directory.iterdir(): - file.unlink() + if base_directory.exists(): + for file in base_directory.iterdir(): + file.unlink() blob_client_config = OnDiskBlobStorageClientConfig( base_directory=base_directory ) From 76a75243f6218d86e8c23d352896734cd566d9a3 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Fri, 25 Aug 2023 13:31:26 +0800 Subject: [PATCH 57/98] CI --- packages/syft/src/syft/service/code/user_code_service.py | 4 ++-- packages/syft/src/syft/service/policy/policy.py | 6 +++--- packages/syft/src/syft/store/blob_storage/__init__.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/syft/src/syft/service/code/user_code_service.py b/packages/syft/src/syft/service/code/user_code_service.py index d776b5020c5..e4a9eba500b 100644 --- a/packages/syft/src/syft/service/code/user_code_service.py +++ b/packages/syft/src/syft/service/code/user_code_service.py @@ -192,9 +192,9 @@ def get_results( outputs = enclave_client.code.get_results(code.id) if isinstance(outputs, list): for output in outputs: - output.syft_action_data + output.syft_action_data # noqa: B018 else: - outputs.syft_action_data + outputs.syft_action_data # noqa: B018 return outputs # if the current node is the enclave diff --git a/packages/syft/src/syft/service/policy/policy.py b/packages/syft/src/syft/service/policy/policy.py index 8a1c5a7f284..af2be77fd5f 100644 --- a/packages/syft/src/syft/service/policy/policy.py +++ b/packages/syft/src/syft/service/policy/policy.py @@ -206,10 +206,10 @@ def _inputs_for_context(self, context: ChangeContext): action_object_value = action_object.ok() # resolve syft action data from blob store if isinstance(action_object_value, TwinObject): - action_object_value.private_obj.syft_action_data - action_object_value.mock_obj.syft_action_data + action_object_value.private_obj.syft_action_data # noqa: B018 + action_object_value.mock_obj.syft_action_data # noqa: B018 elif isinstance(action_object_value, ActionObject): - action_object_value.syft_action_data + action_object_value.syft_action_data # noqa: B018 inputs[var_name] = action_object_value return inputs diff --git a/packages/syft/src/syft/store/blob_storage/__init__.py b/packages/syft/src/syft/store/blob_storage/__init__.py index 99fde7b1678..561fdd4a6b5 100644 --- a/packages/syft/src/syft/store/blob_storage/__init__.py +++ b/packages/syft/src/syft/store/blob_storage/__init__.py @@ -118,7 +118,7 @@ def read(self) -> Union[SyftObject, SyftError]: blob_url = self.url try: if self.type_ is BlobFileType: - urlretrieve(str(blob_url), filename=self.file_name) + urlretrieve(str(blob_url), filename=self.file_name) # nosec return BlobFile(file_name=self.file_name) response = requests.get(str(blob_url), timeout=DEFAULT_TIMEOUT) response.raise_for_status() From e8b2e9ffe02302c67621f557659a69be45545754 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Sun, 27 Aug 2023 20:19:20 +0530 Subject: [PATCH 58/98] - fetch api for root and guest client in node.py - fix repr for action objects - fix filter for args and kwargs during Plan execute --- packages/syft/src/syft/node/node.py | 10 ++++-- .../src/syft/service/action/action_object.py | 33 ++++++++++++++----- .../src/syft/service/action/action_service.py | 12 +++---- 3 files changed, 39 insertions(+), 16 deletions(-) diff --git a/packages/syft/src/syft/node/node.py b/packages/syft/src/syft/node/node.py index 5029f1d12ea..b4584bf6a6f 100644 --- a/packages/syft/src/syft/node/node.py +++ b/packages/syft/src/syft/node/node.py @@ -450,7 +450,9 @@ def root_client(self): client_type = connection.get_client_type() if isinstance(client_type, SyftError): return client_type - return client_type(connection=connection, credentials=self.signing_key) + root_client = client_type(connection=connection, credentials=self.signing_key) + root_client.api.refresh_api_callback() + return root_client @property def guest_client(self): @@ -471,7 +473,11 @@ def get_guest_client(self, verbose: bool = True): if isinstance(client_type, SyftError): return client_type - return client_type(connection=connection, credentials=SyftSigningKey.generate()) + guest_client = client_type( + connection=connection, credentials=SyftSigningKey.generate() + ) + guest_client.api.refresh_api_callback() + return guest_client def __repr__(self) -> str: service_string = "" diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 6f058aba9c1..e6d72fc69cc 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -676,7 +676,13 @@ def _syft_try_to_save_to_store(self, obj) -> None: if obj.syft_node_location is None: obj.syft_node_location = obj.syft_node_uid - obj._save_to_blob_store() + api = None + if TraceResult._client is not None: + api = TraceResult._client.api + + if api is not None: + obj._set_node_location(api.node_uid, api.signing_key.verify_key) + obj._save_to_blob_store() action = Action( path="", @@ -689,12 +695,11 @@ def _syft_try_to_save_to_store(self, obj) -> None: create_object=obj, ) - if TraceResult._client is not None: - api = TraceResult._client.api + if api is not None: TraceResult.result += [action] else: api = APIRegistry.api_for( - node_uid=self.syft_node_uid, + node_uid=self.syft_node_location, user_verify_key=self.syft_client_verify_key, ) api.services.action.execute(action) @@ -848,8 +853,7 @@ def wrapper( def send(self, client: SyftClient) -> Self: """Send the object to a Syft Client""" - self.syft_node_location = client.id - self.syft_client_verify_key = client.verify_key + self._set_obj_location_(client.id, client.verify_key) self._save_to_blob_store() res = client.api.services.action.set(self) return res @@ -1416,7 +1420,16 @@ def _repr_markdown_(self) -> str: elif not self.is_twin: res = "Pointer" - return f"```python\n{res}\n```\n{self.syft_action_data_repr_}" + if isinstance(self.syft_action_data_cache, ActionDataEmpty): + data_repr_ = self.syft_action_data_repr_ + else: + data_repr_ = ( + self.syft_action_data_cache._repr_markdown_() + if hasattr(self.syft_action_data_cache, "_repr_markdown_") + else self.syft_action_data_cache.__repr__() + ) + + return f"```python\n{res}\n```\n{data_repr_}" def __repr__(self) -> str: if self.is_mock: @@ -1425,7 +1438,11 @@ def __repr__(self) -> str: res = "TwinPointer(Real)" if not self.is_twin: res = "Pointer" - return f"{res}:\n{self.syft_action_data_str_}" + if isinstance(self.syft_action_data_cache, ActionDataEmpty): + data_repr_ = self.syft_action_data_repr_ + else: + data_repr_ = self.syft_action_data_cache.__repr__() + return f"{res}:\n{data_repr_}" def __call__(self, *args: Any, **kwds: Any) -> Any: return self.__call__(*args, **kwds) diff --git a/packages/syft/src/syft/service/action/action_service.py b/packages/syft/src/syft/service/action/action_service.py index dcaaf0517bb..6341291bd0c 100644 --- a/packages/syft/src/syft/service/action/action_service.py +++ b/packages/syft/src/syft/service/action/action_service.py @@ -437,11 +437,11 @@ def execute( # relative from .plan import Plan - data_uploaded_to_blob_store = True + data_uploaded_to_blob_store = False if action.action_type == ActionType.CREATEOBJECT: result_action_object = Ok(action.create_object) data_uploaded_to_blob_store = ( - action.create_object.blob_storage_entry_id is not None + action.create_object.syft_blob_storage_entry_id is not None ) elif action.action_type == ActionType.FUNCTION: result_action_object = self.call_function(context, action) @@ -671,15 +671,15 @@ def execute_object( result_action_object = wrap_result(action.result_id, result) elif twin_mode == TwinMode.NONE and has_twin_inputs: # self isn't a twin but one of the inputs is - private_args = filter_twin_args(args, twin_mode=twin_mode) - private_kwargs = filter_twin_kwargs(kwargs, twin_mode=twin_mode) + private_args = filter_twin_args(args, twin_mode=TwinMode.PRIVATE) + private_kwargs = filter_twin_kwargs(kwargs, twin_mode=TwinMode.PRIVATE) private_result = target_method(*private_args, **private_kwargs) result_action_object_private = wrap_result( action.result_id, private_result ) - mock_args = filter_twin_args(args, twin_mode=twin_mode) - mock_kwargs = filter_twin_kwargs(kwargs, twin_mode=twin_mode) + mock_args = filter_twin_args(args, twin_mode=TwinMode.MOCK) + mock_kwargs = filter_twin_kwargs(kwargs, twin_mode=TwinMode.MOCK) mock_result = target_method(*mock_args, **mock_kwargs) result_action_object_mock = wrap_result(action.result_id, mock_result) From 82d8bb1d2e85e0b26e0e1f9a47147f89fc53a3ff Mon Sep 17 00:00:00 2001 From: Koen van der Veen Date: Tue, 29 Aug 2023 17:58:43 +0200 Subject: [PATCH 59/98] fix part of the tests --- .../src/syft/service/action/action_object.py | 120 ++++++++++++++---- .../src/syft/service/action/action_service.py | 40 +++--- packages/syft/src/syft/service/action/plan.py | 6 + .../syft/src/syft/service/request/request.py | 16 ++- .../src/syft/service/user/user_service.py | 7 +- packages/syft/src/syft/types/twin_object.py | 2 +- packages/syft/tests/syft/eager_test.py | 8 +- .../syft/service/action/action_object_test.py | 4 - 8 files changed, 143 insertions(+), 60 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index e6d72fc69cc..0ef1059ab0d 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -68,6 +68,10 @@ class ActionType(Enum): CREATEOBJECT = 16 +def repr_cls(c): + return f"{c.__module__}.{c.__name__}" + + @serializable() class Action(SyftObject): """Serializable Action object. @@ -267,7 +271,8 @@ def make_action_side_effect( action_type=context.action_type, ) context.action = action - except Exception: + except Exception as e: + raise e print(f"make_action_side_effect failed with {traceback.format_exc()}") return Err(f"make_action_side_effect failed with {traceback.format_exc()}") return Ok((context, args, kwargs)) @@ -276,6 +281,7 @@ def make_action_side_effect( class TraceResult: result = [] _client = None + is_tracing = False @classmethod def reset(cls): @@ -309,7 +315,11 @@ def convert_to_pointers( syft_node_location=api.node_uid, ) arg.syft_node_uid = node_uid - arg._save_to_blob_store() + r = arg._save_to_blob_store() + if isinstance(r, SyftError): + print(r) + else: + print("Set") arg = api.services.action.set(arg) arg_list.append(arg) @@ -322,7 +332,9 @@ def convert_to_pointers( syft_node_location=api.node_uid, ) arg.syft_node_uid = node_uid - arg._save_to_blob_store() + r = arg._save_to_blob_store() + if isinstance(r, SyftError): + print(r) arg = api.services.action.set(arg) kwarg_dict[k] = arg @@ -430,6 +442,7 @@ def debox_args_and_kwargs(args: Any, kwargs: Any) -> Tuple[Any, Any]: "__check_action_data", "as_empty_data", "_set_obj_location_", + "syft_action_data_cache", ] @@ -466,7 +479,11 @@ class ActionObject(SyftObject): @property def syft_action_data(self) -> Any: - if self.syft_blob_storage_entry_id and self.syft_created_at: + if ( + self.syft_blob_storage_entry_id + and self.syft_created_at + and not TraceResult.is_tracing + ): # If ActionDataEmpty then try to fetch it from store. if isinstance(self.syft_action_data_cache, ActionDataEmpty): blob_storage_read_method = from_api_or_context( @@ -480,6 +497,10 @@ def syft_action_data(self) -> Any: uid=self.syft_blob_storage_entry_id ) if isinstance(blob_retrieval_object, SyftError): + print( + "Detached actionobject, object exists but is not linked to data in the blob storage", + blob_retrieval_object, + ) return blob_retrieval_object self.syft_action_data_cache = blob_retrieval_object.read() @@ -487,6 +508,9 @@ def syft_action_data(self) -> Any: def _set_syft_action_data(self, data: Any) -> None: if not isinstance(data, ActionDataEmpty): + print( + f"setting action data {data} to node with id", self.syft_node_location + ) if isinstance(data, ActionFileData): storage_entry = CreateBlobStorageEntry.from_path(data.filepath) else: @@ -517,13 +541,18 @@ def _set_syft_action_data(self, data: Any) -> None: self.syft_action_data_type = type(data) - self.syft_action_data_repr_ = ( - data._repr_markdown_() - if hasattr(data, "_repr_markdown_") - else data.__repr__() - ) + if inspect.isclass(data): + self.syft_action_data_repr_ = repr_cls(data) + else: + self.syft_action_data_repr_ = ( + data._repr_markdown_() + if hasattr(data, "_repr_markdown_") + else data.__repr__() + ) self.syft_action_data_str_ = str(data) self.syft_has_bool_attr = hasattr(data, "__bool__") + else: + print("skipping writing actionobject to store, passed data was empty") self.syft_action_data_cache = data @@ -552,9 +581,14 @@ def __check_action_data(cls, values: dict) -> dict: if not isinstance(v, ActionDataEmpty): values["syft_action_data_type"] = type(v) - values["syft_action_data_repr_"] = ( - v._repr_markdown_() if hasattr(v, "_repr_markdown_") else v.__repr__() - ) + if inspect.isclass(v): + values["syft_action_data_repr_"] = repr_cls(v) + else: + values["syft_action_data_repr_"] = ( + v._repr_markdown_() + if hasattr(v, "_repr_markdown_") + else v.__repr__() + ) values["syft_action_data_str_"] = str(v) values["syft_has_bool_attr"] = hasattr(v, "__bool__") return values @@ -563,10 +597,14 @@ def _save_to_blob_store(self) -> Optional[SyftError]: data = self.syft_action_data if isinstance(data, SyftError): return data + if isinstance(data, ActionDataEmpty): + print(f"cannot store empty object {self.id}") + return data result = self._set_syft_action_data(data) if isinstance(result, SyftError): return result - self.syft_action_data_cache = self.as_empty_data() + if not TraceResult.is_tracing: + self.syft_action_data_cache = self.as_empty_data() @property def is_mock(self): @@ -658,6 +696,10 @@ def request(self, client): return client.api.services.request.submit(submit_request) def _syft_try_to_save_to_store(self, obj) -> None: + # print("trying to save to store", obj) + # print(self.syft_node_uid) + # print(self.syft_client_verify_key) + # print(obj.syft_node_uid) if self.syft_node_uid is None or self.syft_client_verify_key is None: return elif obj.syft_node_uid is not None: @@ -681,8 +723,14 @@ def _syft_try_to_save_to_store(self, obj) -> None: api = TraceResult._client.api if api is not None: - obj._set_node_location(api.node_uid, api.signing_key.verify_key) - obj._save_to_blob_store() + obj._set_obj_location_(api.node_uid, api.signing_key.verify_key) + res = obj._save_to_blob_store() + if isinstance(res, SyftError): + print(f"failed saving {obj} to blob storage, error: {res}") + else: + print("succesfully saved", obj) + else: + print("Did not save, api not found", obj) action = Action( path="", @@ -702,7 +750,9 @@ def _syft_try_to_save_to_store(self, obj) -> None: node_uid=self.syft_node_location, user_verify_key=self.syft_client_verify_key, ) - api.services.action.execute(action) + res = api.services.action.execute(action) + if isinstance(res, SyftError): + print("Failed to to store (arg) to store", res) def _syft_prepare_obj_uid(self, obj) -> LineageID: # We got the UID @@ -856,6 +906,8 @@ def send(self, client: SyftClient) -> Self: self._set_obj_location_(client.id, client.verify_key) self._save_to_blob_store() res = client.api.services.action.set(self) + if isinstance(res, ActionObject): + self.syft_created_at = res.syft_created_at return res def get_from(self, client: SyftClient) -> Any: @@ -1053,7 +1105,7 @@ def _syft_run_pre_hooks__( if result.is_ok(): context, result_args, result_kwargs = result.ok() else: - debug(f"Pre-hook failed with {result.err()}") + print(f"Pre-hook failed with {result.err()}") if name not in self._syft_dont_wrap_attrs(): if HOOK_ALWAYS in self._syft_pre_hooks__: for hook in self._syft_pre_hooks__[HOOK_ALWAYS]: @@ -1062,7 +1114,7 @@ def _syft_run_pre_hooks__( context, result_args, result_kwargs = result.ok() else: msg = result.err().replace("\\n", "\n") - debug(f"Pre-hook failed with {msg}") + print(f"Pre-hook failed with {msg}") if self.is_pointer: if name not in self._syft_dont_wrap_attrs(): @@ -1073,7 +1125,7 @@ def _syft_run_pre_hooks__( context, result_args, result_kwargs = result.ok() else: msg = result.err().replace("\\n", "\n") - debug(f"Pre-hook failed with {msg}") + print(f"Pre-hook failed with {msg}") return context, result_args, result_kwargs @@ -1318,6 +1370,7 @@ def wrapper(_self: Any, *args: Any, **kwargs: Any): except Exception: debug("name", name, "has no signature") + # third party return wrapper def _syft_setattr(self, name, value): @@ -1388,7 +1441,8 @@ def __getattribute__(self, name: str) -> Any: return self._syft_wrap_attribute_for_properties(name) # Handle anything else - return self._syft_wrap_attribute_for_methods(name) + res = self._syft_wrap_attribute_for_methods(name) + return res def __setattr__(self, name: str, value: Any) -> Any: defined_on_self = name in self.__dict__ or name in self.__private_attributes__ @@ -1423,11 +1477,14 @@ def _repr_markdown_(self) -> str: if isinstance(self.syft_action_data_cache, ActionDataEmpty): data_repr_ = self.syft_action_data_repr_ else: - data_repr_ = ( - self.syft_action_data_cache._repr_markdown_() - if hasattr(self.syft_action_data_cache, "_repr_markdown_") - else self.syft_action_data_cache.__repr__() - ) + if inspect.isclass(self.syft_action_data_cache): + data_repr_ = repr_cls(self.syft_action_data_cache) + else: + data_repr_ = ( + self.syft_action_data_cache._repr_markdown_() + if hasattr(self.syft_action_data_cache, "_repr_markdown_") + else self.syft_action_data_cache.__repr__() + ) return f"```python\n{res}\n```\n{data_repr_}" @@ -1441,14 +1498,20 @@ def __repr__(self) -> str: if isinstance(self.syft_action_data_cache, ActionDataEmpty): data_repr_ = self.syft_action_data_repr_ else: - data_repr_ = self.syft_action_data_cache.__repr__() + if inspect.isclass(self.syft_action_data_cache): + data_repr_ = repr_cls(self.syft_action_data_cache) + else: + data_repr_ = self.syft_action_data_cache.__repr__() return f"{res}:\n{data_repr_}" def __call__(self, *args: Any, **kwds: Any) -> Any: return self.__call__(*args, **kwds) def __str__(self) -> str: - return self.__str__() + if not inspect.isclass: + return self.__str__() + else: + return self.syft_action_data_str_ def __len__(self) -> int: return self.__len__() @@ -1480,6 +1543,9 @@ def __matmul__(self, other: Any) -> Any: def __eq__(self, other: Any) -> Any: return self._syft_output_action_object(self.__eq__(other)) + def __ne__(self, other: Any) -> Any: + return self._syft_output_action_object(self.__ne__(other)) + def __lt__(self, other: Any) -> Any: return self._syft_output_action_object(self.__lt__(other)) diff --git a/packages/syft/src/syft/service/action/action_service.py b/packages/syft/src/syft/service/action/action_service.py index 6341291bd0c..2d665a839b9 100644 --- a/packages/syft/src/syft/service/action/action_service.py +++ b/packages/syft/src/syft/service/action/action_service.py @@ -295,7 +295,7 @@ def execute_plan( for plan_action in plan.actions: action_res = self.execute(context, plan_action) - if action_res.is_err(): + if isinstance(action_res, SyftError): return action_res result_id = plan.outputs[0].id return self._get(context, result_id, TwinMode.MOCK, has_permission=True) @@ -437,12 +437,9 @@ def execute( # relative from .plan import Plan - data_uploaded_to_blob_store = False if action.action_type == ActionType.CREATEOBJECT: result_action_object = Ok(action.create_object) - data_uploaded_to_blob_store = ( - action.create_object.syft_blob_storage_entry_id is not None - ) + # print(action.create_object, "already in blob storage") elif action.action_type == ActionType.FUNCTION: result_action_object = self.call_function(context, action) else: @@ -457,9 +454,7 @@ def execute( f"Failed executing action {action}, could not resolve self: {resolved_self.err()}" ) resolved_self = resolved_self.ok() - if action.op == "__call__" and isinstance( - resolved_self.syft_action_data_type, Plan - ): + if action.op == "__call__" and resolved_self.syft_action_data_type == Plan: result_action_object = self.execute_plan( plan=resolved_self.syft_action_data, context=context, @@ -488,14 +483,18 @@ def execute( has_result_read_permission = self.has_read_permission_for_action_result( context, action ) - if not data_uploaded_to_blob_store: - result_action_object._set_obj_location_( - context.node.id, - context.credentials, - ) - blob_store_result = result_action_object._save_to_blob_store() - if isinstance(blob_store_result, SyftError): - return blob_store_result + + result_action_object._set_obj_location_( + context.node.id, + context.credentials, + ) + + blob_store_result = result_action_object._save_to_blob_store() + if isinstance(blob_store_result, SyftError): + print("error storing", result_action_object) + return blob_store_result + else: + print("stored", result_action_object) # pass permission information to the action store as extra kwargs context.extra_kwargs = { @@ -645,12 +644,13 @@ def execute_object( twin_mode: TwinMode = TwinMode.NONE, ) -> Result[Ok[Union[TwinObject, ActionObject]], Err[str]]: unboxed_resolved_self = resolved_self.syft_action_data - args, has_arg_twins = resolve_action_args(action, context, service) + _args, has_arg_twins = resolve_action_args(action, context, service) + kwargs, has_kwargs_twins = resolve_action_kwargs(action, context, service) - if args.is_err(): - return args + if _args.is_err(): + return _args else: - args = args.ok() + args = _args.ok() if kwargs.is_err(): return kwargs else: diff --git a/packages/syft/src/syft/service/action/plan.py b/packages/syft/src/syft/service/action/plan.py index e1add8949d5..298f34693bc 100644 --- a/packages/syft/src/syft/service/action/plan.py +++ b/packages/syft/src/syft/service/action/plan.py @@ -58,6 +58,7 @@ def __call__(self, *args, **kwargs): def planify(func): TraceResult.reset() ActionObject.add_trace_hook() + TraceResult.is_tracing = True worker = Worker.named(name="plan_building", reset=True, processes=0) client = worker.root_client TraceResult._client = client @@ -69,7 +70,12 @@ def planify(func): actions = TraceResult.result TraceResult.reset() code = inspect.getsource(func) + for a in actions: + if a.create_object is not None: + # warmup cache + a.create_object.syft_action_data # noqa: B018 plan = Plan(inputs=plan_kwargs, actions=actions, outputs=outputs, code=code) + TraceResult.is_tracing = False return ActionObject.from_obj(plan) diff --git a/packages/syft/src/syft/service/request/request.py b/packages/syft/src/syft/service/request/request.py index 28a40c648c5..36919026a5d 100644 --- a/packages/syft/src/syft/service/request/request.py +++ b/packages/syft/src/syft/service/request/request.py @@ -30,6 +30,7 @@ from ...types.transforms import add_node_uid_for_key from ...types.transforms import generate_id from ...types.transforms import transform +from ...types.twin_object import TwinObject from ...types.uid import LineageID from ...types.uid import UID from ...util import options @@ -122,13 +123,22 @@ def _run( permission=self.apply_permission_type, ) if action_store.has_permission(permission=owner_permission): + id_action = ( + action_obj.id + if not isinstance(action_obj.id, LineageID) + else action_obj.id.id + ) requesting_permission_action_obj = ActionObjectPermission( - uid=action_obj.id, + uid=id_action, credentials=context.requesting_user_credentials, permission=self.apply_permission_type, ) + if isinstance(action_obj, TwinObject): + uid_blob = action_obj.private.syft_blob_storage_entry_id + else: + uid_blob = action_obj.syft_blob_storage_entry_id requesting_permission_blob_obj = ActionObjectPermission( - uid=action_obj.syft_blob_storage_entry_id, + uid=uid_blob, credentials=context.requesting_user_credentials, permission=self.apply_permission_type, ) @@ -154,7 +164,7 @@ def _run( ) return Ok(SyftSuccess(message=f"{type(self)} Success")) except Exception as e: - print(f"failed to apply {type(self)}") + print(f"failed to apply {type(self)}", e) return Err(SyftError(message=str(e))) def apply(self, context: ChangeContext) -> Result[SyftSuccess, SyftError]: diff --git a/packages/syft/src/syft/service/user/user_service.py b/packages/syft/src/syft/service/user/user_service.py index b214313614d..dc4b4b5ebaa 100644 --- a/packages/syft/src/syft/service/user/user_service.py +++ b/packages/syft/src/syft/service/user/user_service.py @@ -210,7 +210,12 @@ def get_current_user( SyftError(message="User not found!") return SyftError(message=str(result.err())) - @service_method(path="user.update", name="update", roles=GUEST_ROLE_LEVEL) + @service_method( + path="user.update", + name="update", + roles=GUEST_ROLE_LEVEL, + autosplat=["user_update"], + ) def update( self, context: AuthedServiceContext, uid: UID, user_update: UserUpdate ) -> Union[UserView, SyftError]: diff --git a/packages/syft/src/syft/types/twin_object.py b/packages/syft/src/syft/types/twin_object.py index 02e77ceffce..2cd47a3dd32 100644 --- a/packages/syft/src/syft/types/twin_object.py +++ b/packages/syft/src/syft/types/twin_object.py @@ -82,5 +82,5 @@ def _save_to_blob_store(self): # self.syft_node_location, # self.syft_client_verify_key, # ) - self.private_obj._save_to_blob_store() + return self.private_obj._save_to_blob_store() # self.mock_obj._save_to_blob_store() diff --git a/packages/syft/tests/syft/eager_test.py b/packages/syft/tests/syft/eager_test.py index 6ccf9fb826d..6870e444811 100644 --- a/packages/syft/tests/syft/eager_test.py +++ b/packages/syft/tests/syft/eager_test.py @@ -30,7 +30,7 @@ def test_eager_permissions(worker, guest_client): assert all(res_root == [3, 3, 3, 3, 3, 3]) -def test_plan(worker, guest_client): +def test_plan(worker): root_domain_client = worker.root_client guest_client = worker.guest_client @@ -65,9 +65,9 @@ def my_plan(x=np.array([[2, 2, 2], [2, 2, 2]])): # noqa: B008 res_ptr.request(guest_client) # root approves result - root_domain_client.api.services.request[0].approve_with_client(root_domain_client) + root_domain_client.api.services.request[-1].approve_with_client(root_domain_client) - assert res_ptr.get() == 729 + assert res_ptr.get_from(guest_client) == 729 def test_plan_with_function_call(worker, guest_client): @@ -112,7 +112,7 @@ def my_plan(x=np.array([1, 2, 3, 4, 5, 6])): # noqa: B008 res_ptr = plan_ptr(x=pointer) assert all( - root_domain_client.api.services.action.get(res_ptr.id) + root_domain_client.api.services.action.get(res_ptr.id).syft_action_data == np.array([2, 3, 4, 5, 6, 7]) ) diff --git a/packages/syft/tests/syft/service/action/action_object_test.py b/packages/syft/tests/syft/service/action/action_object_test.py index b0d0b9faf3c..ed3600025f1 100644 --- a/packages/syft/tests/syft/service/action/action_object_test.py +++ b/packages/syft/tests/syft/service/action/action_object_test.py @@ -334,10 +334,6 @@ def test_actionobject_syft_point_to(): (True, "__and__", [False], {}, False), ((1, 1, 3), "count", [1], {}, 2), ([1, 2, 1], "count", [1], {}, 2), - ([1, 2, 3], "append", [4], {}, [1, 2, 3, 4]), - ({"a": 1, "b": 2}, "update", [{"c": 3}], {}, {"a": 1, "b": 2, "c": 3}), - ({1, 2, 3}, "add", [5], {}, {1, 2, 3, 5}), - ({1, 2, 3}, "clear", [], {}, {}), (complex(1, 2), "conjugate", [], {}, complex(1, -2)), ], ) From ca11ce71381046969ddfecc51486c3285e9b4d8a Mon Sep 17 00:00:00 2001 From: Koen van der Veen Date: Tue, 29 Aug 2023 23:29:29 +0200 Subject: [PATCH 60/98] fix more tests --- packages/syft/src/syft/service/action/action_object.py | 2 +- packages/syft/src/syft/service/dataset/dataset.py | 2 +- packages/syft/tests/syft/service/action/action_object_test.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 0ef1059ab0d..da421ef46c4 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -551,6 +551,7 @@ def _set_syft_action_data(self, data: Any) -> None: ) self.syft_action_data_str_ = str(data) self.syft_has_bool_attr = hasattr(data, "__bool__") + print("setting has bool", self.syft_has_bool_attr, self) else: print("skipping writing actionobject to store, passed data was empty") @@ -1228,7 +1229,6 @@ def _syft_attr_propagate_ids(self, context, name: str, result: Any) -> Any: "syft_action_data_repr_", "syft_action_data_str_", "syft_action_data_type", - "syft_has_bool_attr", ] for attr_name in object_attrs: attr_value = getattr(context.obj, attr_name, None) diff --git a/packages/syft/src/syft/service/dataset/dataset.py b/packages/syft/src/syft/service/dataset/dataset.py index 1ea2866c929..8650a37a667 100644 --- a/packages/syft/src/syft/service/dataset/dataset.py +++ b/packages/syft/src/syft/service/dataset/dataset.py @@ -361,7 +361,7 @@ def no_mock(self) -> None: # relative from ..action.action_object import ActionObject - self.mock = ActionObject.empty() + self.set_mock(ActionObject.empty(), False) def set_shape(self, shape: Tuple) -> None: self.shape = shape diff --git a/packages/syft/tests/syft/service/action/action_object_test.py b/packages/syft/tests/syft/service/action/action_object_test.py index ed3600025f1..07fd17ab4e2 100644 --- a/packages/syft/tests/syft/service/action/action_object_test.py +++ b/packages/syft/tests/syft/service/action/action_object_test.py @@ -552,7 +552,7 @@ def test_actionobject_syft_get_attr_context(): assert obj._syft_get_attr_context("capitalize") is orig_obj assert obj._syft_get_attr_context("__add__") is orig_obj - assert obj._syft_get_attr_context("syft_action_data") is obj + assert obj._syft_get_attr_context("syft_action_data") is obj.syft_action_data @pytest.mark.parametrize( From 0643a26f89c5131bf1432c0b02091c32c3b9f837 Mon Sep 17 00:00:00 2001 From: Koen van der Veen Date: Wed, 30 Aug 2023 00:06:36 +0200 Subject: [PATCH 61/98] more tests --- packages/syft/src/syft/service/action/action_object.py | 4 +++- packages/syft/tests/syft/code_verification_test.py | 10 ++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index da421ef46c4..38d08459bf9 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -592,6 +592,8 @@ def __check_action_data(cls, values: dict) -> dict: ) values["syft_action_data_str_"] = str(v) values["syft_has_bool_attr"] = hasattr(v, "__bool__") + else: + values["syft_action_data_type"] = ActionDataEmpty return values def _save_to_blob_store(self) -> Optional[SyftError]: @@ -1311,7 +1313,7 @@ def fake_func(*args: Any, **kwargs: Any) -> Any: debug(f"[__getattribute__] Handling method {name} ") if ( - isinstance(self.syft_action_data_type, ActionDataEmpty) + self.syft_action_data_type == ActionDataEmpty and name not in action_data_empty_must_run ): original_func = fake_func diff --git a/packages/syft/tests/syft/code_verification_test.py b/packages/syft/tests/syft/code_verification_test.py index c7182f118cf..c3a6a509fab 100644 --- a/packages/syft/tests/syft/code_verification_test.py +++ b/packages/syft/tests/syft/code_verification_test.py @@ -11,13 +11,13 @@ @pytest.fixture def data1() -> ActionObject: """Returns an Action Object with a NumPy dataset with values between -1 and 1""" - return NumpyArrayObject(syft_action_data=2 * np.random.rand(10, 10) - 1) + return NumpyArrayObject.from_obj(2 * np.random.rand(10, 10) - 1) @pytest.fixture def data2() -> ActionObject: """Returns an Action Object with a NumPy dataset with values between -1 and 1""" - return NumpyArrayObject(syft_action_data=2 * np.random.rand(10, 10) - 1) + return NumpyArrayObject.from_obj(2 * np.random.rand(10, 10) - 1) @pytest.fixture @@ -29,7 +29,7 @@ def empty1(data1) -> ActionObject: @pytest.fixture def empty2(data1) -> ActionObject: """Returns an Empty Action Object corresponding to data2""" - return NumpyArrayObject(syft_action_data=ActionDataEmpty(), id=data2.id) + return NumpyArrayObject.from_obj(ActionDataEmpty(), id=data2.id) def test_add_private(data1: ActionObject, data2: ActionObject) -> None: @@ -73,9 +73,7 @@ def test_kwargs(data1: ActionObject) -> None: def test_trace_single_op(data1: ActionObject) -> None: """Test that we can recreate the correct history hash using TraceMode""" result1 = data1.std() - trace_result = NumpyArrayObject( - syft_action_data=ActionDataEmpty(), id=data1.id - ).std() + trace_result = NumpyArrayObject.from_obj(ActionDataEmpty(), id=data1.id).std() assert result1.syft_history_hash == trace_result.syft_history_hash From 02f5dcc18ccc631b8225ab80b377579c6666ae75 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Wed, 30 Aug 2023 11:18:31 +0530 Subject: [PATCH 62/98] fix type checking in is_action_data_empty function --- packages/syft/src/syft/service/action/action_object.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 38d08459bf9..b1d9578f491 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -1692,8 +1692,8 @@ def debug_original_func(name: str, func: Callable) -> None: def is_action_data_empty(obj: Any) -> bool: - return isinstance(obj, AnyActionObject) and isinstance( - obj.syft_action_data_type, ActionDataEmpty + return isinstance(obj, AnyActionObject) and ( + obj.syft_action_data_type == ActionDataEmpty ) From c85d528feb65c45e1b5364b41a97cc35019e8ce2 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Wed, 30 Aug 2023 11:37:19 +0530 Subject: [PATCH 63/98] clean up prints --- .../src/syft/service/action/action_object.py | 28 +++++-------------- .../src/syft/service/action/action_service.py | 3 -- 2 files changed, 7 insertions(+), 24 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index b1d9578f491..dd06f2c6bda 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -317,9 +317,7 @@ def convert_to_pointers( arg.syft_node_uid = node_uid r = arg._save_to_blob_store() if isinstance(r, SyftError): - print(r) - else: - print("Set") + print(r.message) arg = api.services.action.set(arg) arg_list.append(arg) @@ -334,7 +332,7 @@ def convert_to_pointers( arg.syft_node_uid = node_uid r = arg._save_to_blob_store() if isinstance(r, SyftError): - print(r) + print(r.message) arg = api.services.action.set(arg) kwarg_dict[k] = arg @@ -498,7 +496,7 @@ def syft_action_data(self) -> Any: ) if isinstance(blob_retrieval_object, SyftError): print( - "Detached actionobject, object exists but is not linked to data in the blob storage", + "Detached action object, object exists but is not linked to data in the blob storage", blob_retrieval_object, ) return blob_retrieval_object @@ -508,9 +506,6 @@ def syft_action_data(self) -> Any: def _set_syft_action_data(self, data: Any) -> None: if not isinstance(data, ActionDataEmpty): - print( - f"setting action data {data} to node with id", self.syft_node_location - ) if isinstance(data, ActionFileData): storage_entry = CreateBlobStorageEntry.from_path(data.filepath) else: @@ -551,9 +546,8 @@ def _set_syft_action_data(self, data: Any) -> None: ) self.syft_action_data_str_ = str(data) self.syft_has_bool_attr = hasattr(data, "__bool__") - print("setting has bool", self.syft_has_bool_attr, self) else: - print("skipping writing actionobject to store, passed data was empty") + debug("skipping writing action object to store, passed data was empty.") self.syft_action_data_cache = data @@ -699,10 +693,6 @@ def request(self, client): return client.api.services.request.submit(submit_request) def _syft_try_to_save_to_store(self, obj) -> None: - # print("trying to save to store", obj) - # print(self.syft_node_uid) - # print(self.syft_client_verify_key) - # print(obj.syft_node_uid) if self.syft_node_uid is None or self.syft_client_verify_key is None: return elif obj.syft_node_uid is not None: @@ -730,10 +720,6 @@ def _syft_try_to_save_to_store(self, obj) -> None: res = obj._save_to_blob_store() if isinstance(res, SyftError): print(f"failed saving {obj} to blob storage, error: {res}") - else: - print("succesfully saved", obj) - else: - print("Did not save, api not found", obj) action = Action( path="", @@ -1108,7 +1094,7 @@ def _syft_run_pre_hooks__( if result.is_ok(): context, result_args, result_kwargs = result.ok() else: - print(f"Pre-hook failed with {result.err()}") + debug(f"Pre-hook failed with {result.err()}") if name not in self._syft_dont_wrap_attrs(): if HOOK_ALWAYS in self._syft_pre_hooks__: for hook in self._syft_pre_hooks__[HOOK_ALWAYS]: @@ -1117,7 +1103,7 @@ def _syft_run_pre_hooks__( context, result_args, result_kwargs = result.ok() else: msg = result.err().replace("\\n", "\n") - print(f"Pre-hook failed with {msg}") + debug(f"Pre-hook failed with {msg}") if self.is_pointer: if name not in self._syft_dont_wrap_attrs(): @@ -1128,7 +1114,7 @@ def _syft_run_pre_hooks__( context, result_args, result_kwargs = result.ok() else: msg = result.err().replace("\\n", "\n") - print(f"Pre-hook failed with {msg}") + debug(f"Pre-hook failed with {msg}") return context, result_args, result_kwargs diff --git a/packages/syft/src/syft/service/action/action_service.py b/packages/syft/src/syft/service/action/action_service.py index 2d665a839b9..d9bc717cf92 100644 --- a/packages/syft/src/syft/service/action/action_service.py +++ b/packages/syft/src/syft/service/action/action_service.py @@ -491,10 +491,7 @@ def execute( blob_store_result = result_action_object._save_to_blob_store() if isinstance(blob_store_result, SyftError): - print("error storing", result_action_object) return blob_store_result - else: - print("stored", result_action_object) # pass permission information to the action store as extra kwargs context.extra_kwargs = { From 15177fab897d9b016fa5f4cfcd025f58077590c7 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Wed, 30 Aug 2023 14:46:22 +0800 Subject: [PATCH 64/98] Fix notebook --- .../data-owner/02-account-management.ipynb | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/notebooks/tutorials/data-owner/02-account-management.ipynb b/notebooks/tutorials/data-owner/02-account-management.ipynb index 0e5708baea5..8b2426fba72 100644 --- a/notebooks/tutorials/data-owner/02-account-management.ipynb +++ b/notebooks/tutorials/data-owner/02-account-management.ipynb @@ -178,7 +178,7 @@ "id": "a7eb3bff", "metadata": {}, "source": [ - "Lets update the user we just created, and change the role using the `user.update` service method" + "Lets update the user we just created, and change the role using the `users.update` service method" ] }, { @@ -198,9 +198,7 @@ "metadata": {}, "outputs": [], "source": [ - "updated_user = client.users.update(new_user.id, \n", - " UserUpdate(role=ServiceRole.DATA_SCIENTIST, password=\"123\")\n", - ")" + "updated_user = client.users.update(uid=new_user.id, role=ServiceRole.DATA_SCIENTIST, password=\"123\")" ] }, { @@ -231,6 +229,16 @@ "ds_client = node.login(email=\"newuser@openmined.org\", password=\"123\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "75cc6719", + "metadata": {}, + "outputs": [], + "source": [ + "ds_client" + ] + }, { "cell_type": "code", "execution_count": null, @@ -254,7 +262,7 @@ "id": "82d0802d", "metadata": {}, "source": [ - "Lastly, we can delete users using the `user.delete` service method" + "Lastly, we can delete users using the `users.delete` service method" ] }, { @@ -419,14 +427,6 @@ "source": [ "client.users" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b83dd65a", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -445,7 +445,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.3" }, "toc": { "base_numbering": 1, From 11fa438f6150c37d6a0c8491a9679caf4d06adfb Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Wed, 30 Aug 2023 15:00:38 +0800 Subject: [PATCH 65/98] Fix user_test Update services.user.update to use auto_splat syntax --- packages/syft/src/syft/service/user/user.py | 13 +++++----- packages/syft/tests/syft/users/user_test.py | 27 ++++++++------------- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/packages/syft/src/syft/service/user/user.py b/packages/syft/src/syft/service/user/user.py index 55f8ef64fbf..003b550773a 100644 --- a/packages/syft/src/syft/service/user/user.py +++ b/packages/syft/src/syft/service/user/user.py @@ -190,7 +190,8 @@ def _set_password(self, new_password: str) -> Union[SyftError, SyftSuccess]: if api is None: return SyftError(message=f"You must login to {self.node_uid}") api.services.user.update( - uid=self.id, user_update=UserUpdate(password=new_password) + uid=self.id, + password=new_password, ) return SyftSuccess( message=f"Successfully updated password for " @@ -221,11 +222,11 @@ def set_email(self, email: str) -> Union[SyftSuccess, SyftError]: return SyftError(message=f"You must login to {self.node_uid}") try: - user_update = UserUpdate(email=email) - except ValidationError as e: # noqa: F841 + UserUpdate(email=email) + except ValidationError: return SyftError(message="{email} is not a valid email address.") - result = api.services.user.update(uid=self.id, user_update=user_update) + result = api.services.user.update(uid=self.id, email=email) if isinstance(result, SyftError): return result @@ -250,13 +251,13 @@ def update( ) if api is None: return SyftError(message=f"You must login to {self.node_uid}") - user_update = UserUpdate( + result = api.services.user.update( + uid=self.id, name=name, institution=institution, website=website, role=role, ) - result = api.services.user.update(uid=self.id, user_update=user_update) if isinstance(result, SyftError): return result diff --git a/packages/syft/tests/syft/users/user_test.py b/packages/syft/tests/syft/users/user_test.py index 5729c643166..54d1ebe03b4 100644 --- a/packages/syft/tests/syft/users/user_test.py +++ b/packages/syft/tests/syft/users/user_test.py @@ -12,7 +12,6 @@ from syft.service.context import AuthedServiceContext from syft.service.user.user import ServiceRole from syft.service.user.user import UserCreate -from syft.service.user.user import UserUpdate from syft.service.user.user import UserView GUEST_ROLES = [ServiceRole.GUEST] @@ -42,9 +41,7 @@ def get_mock_client(root_client, role): name=name, email=mail, password=password, password_verify=password ) user_id = [u for u in get_users(worker) if u.email == mail][0].id - assert worker.root_client.api.services.user.update( - user_id, UserUpdate(user_id=user_id, role=role) - ) + assert worker.root_client.api.services.user.update(user_id, role=role) client.login(email=mail, password=password) client._fetch_api(client.credentials) # hacky, but useful for testing: patch user id and role on client @@ -164,14 +161,14 @@ def test_user_update_roles(do_client, guest_client, ds_client, root_client, work clients = [get_mock_client(root_client, role) for role in DO_ROLES] for c in clients: assert worker.root_client.api.services.user.update( - c.user_id, UserUpdate(role=ServiceRole.ADMIN) + c.user_id, role=ServiceRole.ADMIN ) # DOs can update the roles of lower roles clients = [get_mock_client(root_client, role) for role in DS_ROLES] for c in clients: assert do_client.api.services.user.update( - c.user_id, UserUpdate(role=ServiceRole.DATA_SCIENTIST) + c.user_id, role=ServiceRole.DATA_SCIENTIST ) clients = [get_mock_client(root_client, role) for role in ADMIN_ROLES] @@ -179,9 +176,7 @@ def test_user_update_roles(do_client, guest_client, ds_client, root_client, work # DOs cannot update roles to greater than / equal to own role for c in clients: for target_role in [ServiceRole.DATA_OWNER, ServiceRole.ADMIN]: - assert not do_client.api.services.user.update( - c.user_id, UserUpdate(role=target_role) - ) + assert not do_client.api.services.user.update(c.user_id, role=target_role) # DOs cannot downgrade higher roles to lower levels clients = [ @@ -192,23 +187,21 @@ def test_user_update_roles(do_client, guest_client, ds_client, root_client, work for target_role in DO_ROLES: if target_role < c.role: assert not do_client.api.services.user.update( - c.user_id, UserUpdate(role=target_role) + c.user_id, role=target_role ) # DSs cannot update any roles clients = [get_mock_client(root_client, role) for role in ADMIN_ROLES] for c in clients: for target_role in ADMIN_ROLES: - assert not ds_client.api.services.user.update( - c.user_id, UserUpdate(role=target_role) - ) + assert not ds_client.api.services.user.update(c.user_id, role=target_role) # Guests cannot update any roles clients = [get_mock_client(root_client, role) for role in ADMIN_ROLES] for c in clients: for target_role in ADMIN_ROLES: assert not guest_client.api.services.user.update( - c.user_id, UserUpdate(role=target_role) + c.user_id, role=target_role ) @@ -220,16 +213,16 @@ def test_user_update(root_client): for target_client in target_clients: if executing_client.role != ServiceRole.ADMIN: assert not executing_client.api.services.user.update( - target_client.user_id, UserUpdate(name="abc") + target_client.user_id, name="abc" ) else: assert executing_client.api.services.user.update( - target_client.user_id, UserUpdate(name="abc") + target_client.user_id, name="abc" ) # you can update yourself assert executing_client.api.services.user.update( - executing_client.user_id, UserUpdate(name=Faker().name()) + executing_client.user_id, name=Faker().name() ) From 5455090d8f48977a4a41f4eb5f2132b6020c97b4 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Wed, 30 Aug 2023 15:48:57 +0800 Subject: [PATCH 66/98] Revert services.user.update autosplat --- notebooks/tutorials/data-owner/02-account-management.ipynb | 2 +- packages/syft/src/syft/service/user/user_service.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/notebooks/tutorials/data-owner/02-account-management.ipynb b/notebooks/tutorials/data-owner/02-account-management.ipynb index 8b2426fba72..b6a677cc880 100644 --- a/notebooks/tutorials/data-owner/02-account-management.ipynb +++ b/notebooks/tutorials/data-owner/02-account-management.ipynb @@ -198,7 +198,7 @@ "metadata": {}, "outputs": [], "source": [ - "updated_user = client.users.update(uid=new_user.id, role=ServiceRole.DATA_SCIENTIST, password=\"123\")" + "updated_user = client.users.update(new_user.id, UserUpdate(role=ServiceRole.DATA_SCIENTIST, password=\"123\"))" ] }, { diff --git a/packages/syft/src/syft/service/user/user_service.py b/packages/syft/src/syft/service/user/user_service.py index dc4b4b5ebaa..f488147bc61 100644 --- a/packages/syft/src/syft/service/user/user_service.py +++ b/packages/syft/src/syft/service/user/user_service.py @@ -214,7 +214,6 @@ def get_current_user( path="user.update", name="update", roles=GUEST_ROLE_LEVEL, - autosplat=["user_update"], ) def update( self, context: AuthedServiceContext, uid: UID, user_update: UserUpdate From 59646ccdfeb560d001434afbe6eef0ee1f8b052d Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Wed, 30 Aug 2023 15:58:09 +0800 Subject: [PATCH 67/98] EmailStr is str --- packages/syft/src/syft/service/user/user.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/syft/src/syft/service/user/user.py b/packages/syft/src/syft/service/user/user.py index 55f8ef64fbf..9a67bd84f1a 100644 --- a/packages/syft/src/syft/service/user/user.py +++ b/packages/syft/src/syft/service/user/user.py @@ -112,11 +112,11 @@ class UserUpdate(PartialSyftObject): __version__ = SYFT_OBJECT_VERSION_1 @pydantic.validator("email", pre=True) - def make_email(cls, v: EmailStr) -> Optional[EmailStr]: - return EmailStr(v) if isinstance(v, str) else v + def make_email(cls, v: Any) -> Optional[EmailStr]: + return EmailStr(v) if isinstance(v, str) and not isinstance(v, EmailStr) else v @pydantic.validator("role", pre=True) - def str_to_role(cls, v: Union[str, ServiceRole]) -> Optional[ServiceRole]: + def str_to_role(cls, v: Any) -> Optional[ServiceRole]: if isinstance(v, str) and hasattr(ServiceRole, v.upper()): return getattr(ServiceRole, v.upper()) return v From 692d2ed6fb828936eb1d3451ad0382b313c7aea0 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Wed, 30 Aug 2023 16:11:32 +0800 Subject: [PATCH 68/98] Remove unused definition --- packages/syft/src/syft/types/syft_metaclass.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/packages/syft/src/syft/types/syft_metaclass.py b/packages/syft/src/syft/types/syft_metaclass.py index 874c5e9df2b..762213b7ba4 100644 --- a/packages/syft/src/syft/types/syft_metaclass.py +++ b/packages/syft/src/syft/types/syft_metaclass.py @@ -7,10 +7,8 @@ from typing import Any from typing import Dict from typing import Generator -from typing import T from typing import Tuple from typing import Type -from typing import Union # third party from pydantic.fields import UndefinedType @@ -30,9 +28,6 @@ class Empty: pass -EmptyType = Union[T, Empty] - - class PartialModelMetaclass(ModelMetaclass): def __new__( meta: Type["PartialModelMetaclass"], *args: Any, **kwargs: Any From c818383a6a08150f446f2bd37f81a8a26a183f1e Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Wed, 30 Aug 2023 16:14:51 +0800 Subject: [PATCH 69/98] More correct typing --- packages/syft/src/syft/service/user/user.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/syft/src/syft/service/user/user.py b/packages/syft/src/syft/service/user/user.py index 9a67bd84f1a..6355475280c 100644 --- a/packages/syft/src/syft/service/user/user.py +++ b/packages/syft/src/syft/service/user/user.py @@ -112,11 +112,11 @@ class UserUpdate(PartialSyftObject): __version__ = SYFT_OBJECT_VERSION_1 @pydantic.validator("email", pre=True) - def make_email(cls, v: Any) -> Optional[EmailStr]: + def make_email(cls, v: Any) -> Any: return EmailStr(v) if isinstance(v, str) and not isinstance(v, EmailStr) else v @pydantic.validator("role", pre=True) - def str_to_role(cls, v: Any) -> Optional[ServiceRole]: + def str_to_role(cls, v: Any) -> Any: if isinstance(v, str) and hasattr(ServiceRole, v.upper()): return getattr(ServiceRole, v.upper()) return v From 4250785706523746d3ab2f3b9bbd396bcf4be9cd Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Wed, 30 Aug 2023 18:23:19 +0800 Subject: [PATCH 70/98] Attempt to fix RecursionError --- packages/syft/src/syft/service/action/numpy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/syft/src/syft/service/action/numpy.py b/packages/syft/src/syft/service/action/numpy.py index 1f3e706d9f3..8521581048a 100644 --- a/packages/syft/src/syft/service/action/numpy.py +++ b/packages/syft/src/syft/service/action/numpy.py @@ -58,8 +58,8 @@ class NumpyArrayObject(ActionObject, np.lib.mixins.NDArrayOperatorsMixin): # ) # return self == other - def __bool__(self) -> bool: - return bool(self.all()) + # def __bool__(self) -> bool: + # return bool(self.all()) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): inputs = tuple( From 6214d06be034289a881d11ed7dc2e60825254390 Mon Sep 17 00:00:00 2001 From: teo Date: Wed, 30 Aug 2023 14:12:26 +0300 Subject: [PATCH 71/98] fix numpy tests --- packages/syft/src/syft/service/action/action_object.py | 10 +++++++--- packages/syft/tests/syft/serde/numpy_functions_test.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index dd06f2c6bda..828d87eeec9 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -1292,15 +1292,16 @@ def _syft_wrap_attribute_for_properties(self, name: str) -> Any: def _syft_wrap_attribute_for_methods(self, name: str) -> Any: """Handle `__getattribute__` for methods.""" - + print(name, self.syft_action_data_type, self.id) # check for other types that aren't methods, functions etc def fake_func(*args: Any, **kwargs: Any) -> Any: + print("WHYYY???") return ActionDataEmpty(syft_internal_type=self.syft_internal_type) debug(f"[__getattribute__] Handling method {name} ") if ( - self.syft_action_data_type == ActionDataEmpty - and name not in action_data_empty_must_run + # self.syft_action_data_type == ActionDataEmpty + name not in action_data_empty_must_run and False ): original_func = fake_func else: @@ -1601,9 +1602,11 @@ def __rshift__(self, other: Any) -> Any: return self._syft_output_action_object(self.__rshift__(other)) def __iter__(self): + print("yep...", self.__iter__()) return self._syft_output_action_object(self.__iter__()) def __next__(self): + print("oh no...", self.__next__()) return self._syft_output_action_object(self.__next__()) # r ops @@ -1684,6 +1687,7 @@ def is_action_data_empty(obj: Any) -> bool: def has_action_data_empty(args: Any, kwargs: Any) -> bool: + print(args, kwargs) for a in args: if is_action_data_empty(a): return True diff --git a/packages/syft/tests/syft/serde/numpy_functions_test.py b/packages/syft/tests/syft/serde/numpy_functions_test.py index c312ac33f89..122b0739fae 100644 --- a/packages/syft/tests/syft/serde/numpy_functions_test.py +++ b/packages/syft/tests/syft/serde/numpy_functions_test.py @@ -93,5 +93,5 @@ def test_numpy_functions(func, func_arguments, request): else: original_result = eval(f"np.{func}({func_arguments})") - assert result == original_result + assert np.all(result == original_result) assert isinstance(result, ActionObject) From ff99866e47af9a4999e7fa6b66562959639de0a8 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Wed, 30 Aug 2023 19:17:13 +0800 Subject: [PATCH 72/98] Revert --- packages/syft/src/syft/service/action/numpy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/syft/src/syft/service/action/numpy.py b/packages/syft/src/syft/service/action/numpy.py index 8521581048a..1f3e706d9f3 100644 --- a/packages/syft/src/syft/service/action/numpy.py +++ b/packages/syft/src/syft/service/action/numpy.py @@ -58,8 +58,8 @@ class NumpyArrayObject(ActionObject, np.lib.mixins.NDArrayOperatorsMixin): # ) # return self == other - # def __bool__(self) -> bool: - # return bool(self.all()) + def __bool__(self) -> bool: + return bool(self.all()) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): inputs = tuple( From 2a7787f9392832bbc273a09e4d1c5700c7d8ac6f Mon Sep 17 00:00:00 2001 From: Koen van der Veen Date: Wed, 30 Aug 2023 13:35:41 +0200 Subject: [PATCH 73/98] fix some tests --- .../src/syft/service/action/action_object.py | 51 ++++++++++--------- .../src/syft/service/action/action_service.py | 2 +- .../syft/src/syft/service/action/numpy.py | 3 -- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index dd06f2c6bda..deadeeddc1e 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -441,6 +441,7 @@ def debox_args_and_kwargs(args: Any, kwargs: Any) -> Tuple[Any, Any]: "as_empty_data", "_set_obj_location_", "syft_action_data_cache", + "reload_cache", ] @@ -482,28 +483,32 @@ def syft_action_data(self) -> Any: and self.syft_created_at and not TraceResult.is_tracing ): - # If ActionDataEmpty then try to fetch it from store. - if isinstance(self.syft_action_data_cache, ActionDataEmpty): - blob_storage_read_method = from_api_or_context( - func_or_path="blob_storage.read", - syft_node_location=self.syft_node_location, - syft_client_verify_key=self.syft_client_verify_key, - ) - - if blob_storage_read_method is not None: - blob_retrieval_object = blob_storage_read_method( - uid=self.syft_blob_storage_entry_id - ) - if isinstance(blob_retrieval_object, SyftError): - print( - "Detached action object, object exists but is not linked to data in the blob storage", - blob_retrieval_object, - ) - return blob_retrieval_object - self.syft_action_data_cache = blob_retrieval_object.read() + self.reload_cache() return self.syft_action_data_cache + def reload_cache(self): + # If ActionDataEmpty then try to fetch it from store. + if isinstance(self.syft_action_data_cache, ActionDataEmpty): + blob_storage_read_method = from_api_or_context( + func_or_path="blob_storage.read", + syft_node_location=self.syft_node_location, + syft_client_verify_key=self.syft_client_verify_key, + ) + + if blob_storage_read_method is not None: + blob_retrieval_object = blob_storage_read_method( + uid=self.syft_blob_storage_entry_id + ) + if isinstance(blob_retrieval_object, SyftError): + print( + "Detached action object, object exists but is not linked to data in the blob storage", + blob_retrieval_object, + ) + return blob_retrieval_object + self.syft_action_data_cache = blob_retrieval_object.read() + self.syft_action_data_type = type(self.syft_action_data) + def _set_syft_action_data(self, data: Any) -> None: if not isinstance(data, ActionDataEmpty): if isinstance(data, ActionFileData): @@ -573,9 +578,9 @@ class Config: @pydantic.root_validator() def __check_action_data(cls, values: dict) -> dict: v = values.get("syft_action_data_cache") - if not isinstance(v, ActionDataEmpty): + if "syft_action_data_type" not in values: values["syft_action_data_type"] = type(v) - + if not isinstance(v, ActionDataEmpty): if inspect.isclass(v): values["syft_action_data_repr_"] = repr_cls(v) else: @@ -586,8 +591,6 @@ def __check_action_data(cls, values: dict) -> dict: ) values["syft_action_data_str_"] = str(v) values["syft_has_bool_attr"] = hasattr(v, "__bool__") - else: - values["syft_action_data_type"] = ActionDataEmpty return values def _save_to_blob_store(self) -> Optional[SyftError]: @@ -741,7 +744,7 @@ def _syft_try_to_save_to_store(self, obj) -> None: ) res = api.services.action.execute(action) if isinstance(res, SyftError): - print("Failed to to store (arg) to store", res) + print(f"Failed to to store (arg) {obj} to store, {res}") def _syft_prepare_obj_uid(self, obj) -> LineageID: # We got the UID diff --git a/packages/syft/src/syft/service/action/action_service.py b/packages/syft/src/syft/service/action/action_service.py index d9bc717cf92..700aa74f916 100644 --- a/packages/syft/src/syft/service/action/action_service.py +++ b/packages/syft/src/syft/service/action/action_service.py @@ -343,7 +343,7 @@ def set_attribute( # depending on permisisons? public_args = filter_twin_args(args, twin_mode=TwinMode.MOCK) public_val = public_args[0] - setattr(resolved_self.mock, name, public_val) + setattr(resolved_self.mock.syft_action_data, name, public_val) return Ok( TwinObject( id=action.result_id, diff --git a/packages/syft/src/syft/service/action/numpy.py b/packages/syft/src/syft/service/action/numpy.py index 1f3e706d9f3..3c19aa61bc2 100644 --- a/packages/syft/src/syft/service/action/numpy.py +++ b/packages/syft/src/syft/service/action/numpy.py @@ -58,9 +58,6 @@ class NumpyArrayObject(ActionObject, np.lib.mixins.NDArrayOperatorsMixin): # ) # return self == other - def __bool__(self) -> bool: - return bool(self.all()) - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): inputs = tuple( np.array(x.syft_action_data, dtype=x.dtype) From d20604b4014b32bbafe2dfc0efe371a31df01447 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Wed, 30 Aug 2023 17:53:24 +0530 Subject: [PATCH 74/98] revert user changes --- .../src/syft/service/action/action_object.py | 7 +++-- packages/syft/src/syft/service/user/user.py | 11 ++++---- packages/syft/tests/syft/users/user_test.py | 27 ++++++++++++------- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 3337d555456..b861830ad68 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -1295,16 +1295,15 @@ def _syft_wrap_attribute_for_properties(self, name: str) -> Any: def _syft_wrap_attribute_for_methods(self, name: str) -> Any: """Handle `__getattribute__` for methods.""" - print(name, self.syft_action_data_type, self.id) + # check for other types that aren't methods, functions etc def fake_func(*args: Any, **kwargs: Any) -> Any: - print("WHYYY???") return ActionDataEmpty(syft_internal_type=self.syft_internal_type) debug(f"[__getattribute__] Handling method {name} ") if ( - # self.syft_action_data_type == ActionDataEmpty - name not in action_data_empty_must_run and False + self.syft_action_data_type == ActionDataEmpty + and name not in action_data_empty_must_run ): original_func = fake_func else: diff --git a/packages/syft/src/syft/service/user/user.py b/packages/syft/src/syft/service/user/user.py index d881c0f93ea..1189c69eb16 100644 --- a/packages/syft/src/syft/service/user/user.py +++ b/packages/syft/src/syft/service/user/user.py @@ -190,8 +190,7 @@ def _set_password(self, new_password: str) -> Union[SyftError, SyftSuccess]: if api is None: return SyftError(message=f"You must login to {self.node_uid}") api.services.user.update( - uid=self.id, - password=new_password, + uid=self.id, user_update=UserUpdate(password=new_password) ) return SyftSuccess( message=f"Successfully updated password for " @@ -222,11 +221,11 @@ def set_email(self, email: str) -> Union[SyftSuccess, SyftError]: return SyftError(message=f"You must login to {self.node_uid}") try: - UserUpdate(email=email) + user_update = UserUpdate(email=email) except ValidationError: return SyftError(message="{email} is not a valid email address.") - result = api.services.user.update(uid=self.id, email=email) + result = api.services.user.update(uid=self.id, user_update=user_update) if isinstance(result, SyftError): return result @@ -251,13 +250,13 @@ def update( ) if api is None: return SyftError(message=f"You must login to {self.node_uid}") - result = api.services.user.update( - uid=self.id, + user_update = UserUpdate( name=name, institution=institution, website=website, role=role, ) + result = api.services.user.update(uid=self.id, user_update=user_update) if isinstance(result, SyftError): return result diff --git a/packages/syft/tests/syft/users/user_test.py b/packages/syft/tests/syft/users/user_test.py index 54d1ebe03b4..5729c643166 100644 --- a/packages/syft/tests/syft/users/user_test.py +++ b/packages/syft/tests/syft/users/user_test.py @@ -12,6 +12,7 @@ from syft.service.context import AuthedServiceContext from syft.service.user.user import ServiceRole from syft.service.user.user import UserCreate +from syft.service.user.user import UserUpdate from syft.service.user.user import UserView GUEST_ROLES = [ServiceRole.GUEST] @@ -41,7 +42,9 @@ def get_mock_client(root_client, role): name=name, email=mail, password=password, password_verify=password ) user_id = [u for u in get_users(worker) if u.email == mail][0].id - assert worker.root_client.api.services.user.update(user_id, role=role) + assert worker.root_client.api.services.user.update( + user_id, UserUpdate(user_id=user_id, role=role) + ) client.login(email=mail, password=password) client._fetch_api(client.credentials) # hacky, but useful for testing: patch user id and role on client @@ -161,14 +164,14 @@ def test_user_update_roles(do_client, guest_client, ds_client, root_client, work clients = [get_mock_client(root_client, role) for role in DO_ROLES] for c in clients: assert worker.root_client.api.services.user.update( - c.user_id, role=ServiceRole.ADMIN + c.user_id, UserUpdate(role=ServiceRole.ADMIN) ) # DOs can update the roles of lower roles clients = [get_mock_client(root_client, role) for role in DS_ROLES] for c in clients: assert do_client.api.services.user.update( - c.user_id, role=ServiceRole.DATA_SCIENTIST + c.user_id, UserUpdate(role=ServiceRole.DATA_SCIENTIST) ) clients = [get_mock_client(root_client, role) for role in ADMIN_ROLES] @@ -176,7 +179,9 @@ def test_user_update_roles(do_client, guest_client, ds_client, root_client, work # DOs cannot update roles to greater than / equal to own role for c in clients: for target_role in [ServiceRole.DATA_OWNER, ServiceRole.ADMIN]: - assert not do_client.api.services.user.update(c.user_id, role=target_role) + assert not do_client.api.services.user.update( + c.user_id, UserUpdate(role=target_role) + ) # DOs cannot downgrade higher roles to lower levels clients = [ @@ -187,21 +192,23 @@ def test_user_update_roles(do_client, guest_client, ds_client, root_client, work for target_role in DO_ROLES: if target_role < c.role: assert not do_client.api.services.user.update( - c.user_id, role=target_role + c.user_id, UserUpdate(role=target_role) ) # DSs cannot update any roles clients = [get_mock_client(root_client, role) for role in ADMIN_ROLES] for c in clients: for target_role in ADMIN_ROLES: - assert not ds_client.api.services.user.update(c.user_id, role=target_role) + assert not ds_client.api.services.user.update( + c.user_id, UserUpdate(role=target_role) + ) # Guests cannot update any roles clients = [get_mock_client(root_client, role) for role in ADMIN_ROLES] for c in clients: for target_role in ADMIN_ROLES: assert not guest_client.api.services.user.update( - c.user_id, role=target_role + c.user_id, UserUpdate(role=target_role) ) @@ -213,16 +220,16 @@ def test_user_update(root_client): for target_client in target_clients: if executing_client.role != ServiceRole.ADMIN: assert not executing_client.api.services.user.update( - target_client.user_id, name="abc" + target_client.user_id, UserUpdate(name="abc") ) else: assert executing_client.api.services.user.update( - target_client.user_id, name="abc" + target_client.user_id, UserUpdate(name="abc") ) # you can update yourself assert executing_client.api.services.user.update( - executing_client.user_id, name=Faker().name() + executing_client.user_id, UserUpdate(name=Faker().name()) ) From 11a7457847be37dffcec8ea35fa3ec33d95870a0 Mon Sep 17 00:00:00 2001 From: Koen van der Veen Date: Wed, 30 Aug 2023 14:37:13 +0200 Subject: [PATCH 75/98] fix action object tests --- packages/syft/src/syft/service/action/action_object.py | 10 ++++------ .../tests/syft/service/action/action_object_test.py | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 3337d555456..b3d90e70b1d 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -578,7 +578,7 @@ class Config: @pydantic.root_validator() def __check_action_data(cls, values: dict) -> dict: v = values.get("syft_action_data_cache") - if "syft_action_data_type" not in values: + if values.get("syft_action_data_type", None) is None: values["syft_action_data_type"] = type(v) if not isinstance(v, ActionDataEmpty): if inspect.isclass(v): @@ -1295,7 +1295,7 @@ def _syft_wrap_attribute_for_properties(self, name: str) -> Any: def _syft_wrap_attribute_for_methods(self, name: str) -> Any: """Handle `__getattribute__` for methods.""" - print(name, self.syft_action_data_type, self.id) + # check for other types that aren't methods, functions etc def fake_func(*args: Any, **kwargs: Any) -> Any: print("WHYYY???") @@ -1304,7 +1304,8 @@ def fake_func(*args: Any, **kwargs: Any) -> Any: debug(f"[__getattribute__] Handling method {name} ") if ( # self.syft_action_data_type == ActionDataEmpty - name not in action_data_empty_must_run and False + name not in action_data_empty_must_run + and False ): original_func = fake_func else: @@ -1605,11 +1606,9 @@ def __rshift__(self, other: Any) -> Any: return self._syft_output_action_object(self.__rshift__(other)) def __iter__(self): - print("yep...", self.__iter__()) return self._syft_output_action_object(self.__iter__()) def __next__(self): - print("oh no...", self.__next__()) return self._syft_output_action_object(self.__next__()) # r ops @@ -1690,7 +1689,6 @@ def is_action_data_empty(obj: Any) -> bool: def has_action_data_empty(args: Any, kwargs: Any) -> bool: - print(args, kwargs) for a in args: if is_action_data_empty(a): return True diff --git a/packages/syft/tests/syft/service/action/action_object_test.py b/packages/syft/tests/syft/service/action/action_object_test.py index 07fd17ab4e2..131b93d70da 100644 --- a/packages/syft/tests/syft/service/action/action_object_test.py +++ b/packages/syft/tests/syft/service/action/action_object_test.py @@ -1005,7 +1005,7 @@ def test_actionobject_syft_getattr_pandas(worker): obj = ActionObject.from_obj(orig_obj) - assert obj.columns == orig_obj.columns + assert (obj.columns == orig_obj.columns).all() obj.columns = ["a", "b", "c"] - assert obj.columns == ["a", "b", "c"] + assert (obj.columns == ["a", "b", "c"]).all() From 9f781f08b38e1ebdcbd171998c04949a3f5533cf Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Wed, 30 Aug 2023 18:07:21 +0530 Subject: [PATCH 76/98] set dtype of action object if set to None --- packages/syft/src/syft/service/action/action_object.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index b861830ad68..6c980e35929 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -578,7 +578,8 @@ class Config: @pydantic.root_validator() def __check_action_data(cls, values: dict) -> dict: v = values.get("syft_action_data_cache") - if "syft_action_data_type" not in values: + obj_dtype = values.get("syft_action_data_type") + if obj_dtype is None: values["syft_action_data_type"] = type(v) if not isinstance(v, ActionDataEmpty): if inspect.isclass(v): From 6877142c79611e82f6c4fd4848f4f2c1ce209363 Mon Sep 17 00:00:00 2001 From: Koen van der Veen Date: Wed, 30 Aug 2023 14:49:07 +0200 Subject: [PATCH 77/98] worker tests --- packages/syft/tests/syft/worker_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/syft/tests/syft/worker_test.py b/packages/syft/tests/syft/worker_test.py index c71f060b731..03772951d71 100644 --- a/packages/syft/tests/syft/worker_test.py +++ b/packages/syft/tests/syft/worker_test.py @@ -92,7 +92,7 @@ def test_action_store() -> None: assert set_result.is_ok() test_object_result = action_store.get(uid=uid, credentials=test_signing_key) assert test_object_result.is_ok() - assert test_object == test_object_result.ok() + assert (test_object == test_object_result.ok()).all() test_verift_key_2 = SyftVerifyKey.from_string(test_verify_key_string_2) test_object_result_fail = action_store.get(uid=uid, credentials=test_verift_key_2) @@ -203,7 +203,7 @@ def test_action_object_hooks() -> None: def pre_add(context: Any, *args: Any, **kwargs: Any) -> Any: # double it new_value = args[0] - new_value.syft_action_data = new_value.syft_action_data * 2 + new_value.syft_action_data_cache = new_value.syft_action_data_cache * 2 return Ok((context, (new_value,), kwargs)) def post_add(context: Any, name: str, new_result: Any) -> Any: From d643b6865bb42adc7a61d8d8fb87f8ef66dbd1ed Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Thu, 31 Aug 2023 13:30:26 +0800 Subject: [PATCH 78/98] nit: issubclass instead of exact type comparison --- packages/syft/src/syft/service/action/action_object.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index e5abbadedab..509c1d37a43 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -1302,7 +1302,7 @@ def fake_func(*args: Any, **kwargs: Any) -> Any: debug(f"[__getattribute__] Handling method {name} ") if ( - self.syft_action_data_type == ActionDataEmpty + issubclass(self.syft_action_data_type, ActionDataEmpty) and name not in action_data_empty_must_run ): original_func = fake_func @@ -1681,8 +1681,8 @@ def debug_original_func(name: str, func: Callable) -> None: def is_action_data_empty(obj: Any) -> bool: - return isinstance(obj, AnyActionObject) and ( - obj.syft_action_data_type == ActionDataEmpty + return isinstance(obj, AnyActionObject) and issubclass( + obj.syft_action_data_type, ActionDataEmpty ) From 1000e918b5d0e0877114f0c2965d892b1442a9fd Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Thu, 31 Aug 2023 13:31:19 +0800 Subject: [PATCH 79/98] Delete debug notebook --- .../Experimental/Shubham/00-load-data.ipynb | 755 ------------------ 1 file changed, 755 deletions(-) delete mode 100644 notebooks/Experimental/Shubham/00-load-data.ipynb diff --git a/notebooks/Experimental/Shubham/00-load-data.ipynb b/notebooks/Experimental/Shubham/00-load-data.ipynb deleted file mode 100644 index 79e34654aea..00000000000 --- a/notebooks/Experimental/Shubham/00-load-data.ipynb +++ /dev/null @@ -1,755 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Loading data into Syft Domain Server as a Data Owner\n", - "\n", - "Welcome to Syft! This tutorial consists of 4 Jupyter notebooks that covers the basics of Syft which includes\n", - "* [Uploading a private dataset as a Data Owner](./00-load-data.ipynb)\n", - "* [Submitting code to run analysis on the private dataset as a Data Scientist](./01-submit-code.ipynb)\n", - "* [Reviewing and approving the code as a Data Owner](02-review-code-and-approve.ipynb)\n", - "* [Downloading/Retrieving the results of the code execution as a Data Scientist](03-data-scientist-download-result.ipynb)\n", - "\n", - "In Syft, a **Data Owner** provides datasets which they would like to make available for study by an outside party they may or may not fully trust has good intentions. Meanwhile, **Data Scientists** are end users who desire to perform computations or answer a specific question using one or more Data Owners' datasets." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Install Syft & Import packages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "SYFT_VERSION = \">=0.8.2.b0,<0.9\"\n", - "package_string = f'\"syft{SYFT_VERSION}\"'\n", - "# %pip install {package_string} -f https://whls.blob.core.windows.net/unstable/index.html -q" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import syft as sy\n", - "sy.requires(SYFT_VERSION)\n", - "from syft import autocache\n", - "import pandas as pd" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Launch a Syft Domain Server" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# # Launch a fresh domain server named \"test-domain-1\" in dev mode on the local machine\n", - "# node = sy.orchestra.launch(name=\"test-domain-1\", port=\"auto\", dev_mode=True, reset=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# log into the node with default root credentials\n", - "domain_client = sy.login(url=\"http://localhost:8081\", email=\"info@openmined.org\", password=\"changethis\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# List the available API\n", - "domain_client.api" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Data Subjects\n", - "\n", - "Think of Data Subjects as individuals/organizations/institutions owning a dataset that you can pool together privately in Syft.\n", - "\n", - "For this notebook, we'll create a sample dataset that includes trade information of various commodities for different countries." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Check for existing Data Subjects\n", - "data_subjects = domain_client.data_subject_registry.get_all()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "data_subjects" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "assert len(data_subjects) == 0" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Add Data subjects" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "country = sy.DataSubject(name=\"Country\", aliases=[\"country_code\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "canada = sy.DataSubject(name=\"Canada\", aliases=[\"country_code:ca\"])\n", - "germany = sy.DataSubject(name=\"Germany\", aliases=[\"country_code:de\"])\n", - "spain = sy.DataSubject(name=\"Spain\", aliases=[\"country_code:es\"])\n", - "france = sy.DataSubject(name=\"France\", aliases=[\"country_code:fr\"])\n", - "japan = sy.DataSubject(name=\"Japan\", aliases=[\"country_code:jp\"])\n", - "uk = sy.DataSubject(name=\"United Kingdom\", aliases=[\"country_code:uk\"])\n", - "usa = sy.DataSubject(name=\"United States of America\", aliases=[\"country_code:us\"])\n", - "australia = sy.DataSubject(name=\"Australia\", aliases=[\"country_code:au\"])\n", - "india = sy.DataSubject(name=\"India\", aliases=[\"country_code:in\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "country.add_member(canada)\n", - "country.add_member(germany)\n", - "country.add_member(spain)\n", - "country.add_member(france)\n", - "country.add_member(japan)\n", - "country.add_member(uk)\n", - "country.add_member(usa)\n", - "country.add_member(australia)\n", - "country.add_member(india)\n", - "\n", - "country.members" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Adds the data subject and all its members to the registry\n", - "response = domain_client.data_subject_registry.add_data_subject(country)\n", - "response" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "assert response" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Lets look at the data subjects added to the data\n", - "data_subjects = domain_client.data_subject_registry.get_all()\n", - "data_subjects" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "assert len(data_subjects) == 10" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Prepare the dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For simplicity, we'll be working with Canada's trade dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "canada_dataset_url = \"https://github.com/OpenMined/datasets/blob/main/trade_flow/ca%20-%20feb%202021.csv?raw=True\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "df = pd.read_csv(autocache(canada_dataset_url))\n", - "df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In Syft, every dataset has two variants - **Mock** and **Private**.\n", - "\n", - "* **Mock** dataset is a mock/dummy version of the private data that can be accessed & read by the data scientists.\n", - "* **Private** dataset is the actual data that will never be accessed by the data scientist.\n", - "\n", - "To keep things simple, we sample different data points as Mock & Private. But in reality you would want to generate a random dataset for the Mock variant. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# private data samples\n", - "ca_data = df[0:10]\n", - "ca_data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Mock data samples\n", - "mock_ca_data = df[10:20]\n", - "mock_ca_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a Syft Dataset\n", - "\n", - "In Syft, `Dataset` is a collection of Assets. For example, `Dataset` can be a \"Lung Cancer Dataset\", and `Assets` will be train, test & validation splits for this dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "dataset = sy.Dataset(name=\"Canada Trade Value 1\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "dataset.set_description(\"Canada Trade Data\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "dataset.add_citation(\"Person, place or thing\")\n", - "dataset.add_url(\"https://github.com/OpenMined/datasets/tree/main/trade_flow\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "dataset.add_contributor(name=\"Andrew Trask\", \n", - " email=\"andrew@openmined.org\",\n", - " note=\"Andrew runs this domain and prepared the dataset metadata.\")\n", - "\n", - "dataset.add_contributor(name=\"Madhava Jay\", \n", - " email=\"madhava@openmined.org\",\n", - " note=\"Madhava tweaked the description to add the URL because Andrew forgot.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "dataset.contributors" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "assert len(dataset.contributors) == 2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Add Assets to the Syft Dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "ctf = sy.Asset(name=\"canada_trade_flow\")\n", - "ctf.set_description(\"Canada trade flow represents export & import of different commodities to other countries\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "ctf.add_contributor(name=\"Andrew Trask\", \n", - " email=\"andrew@openmined.org\",\n", - " note=\"Andrew runs this domain and prepared the asset.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# This is where we add the private data (pandas df/numpy array) to the `Asset`\n", - "ctf.set_obj(ca_data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# We must set the shape of this private data\n", - "ctf.set_shape(ca_data.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# We assign the data subject for whom this data belongs to, in this\n", - "ctf.add_data_subject(canada)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Optionally, if we don't want to add any Mock dataset\n", - "ctf.no_mock()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# We must add this Asset to our Dataset\n", - "dataset.add_asset(ctf)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# In case we want to remove a dataset & its associated assets\n", - "dataset.remove_asset(name=ctf.name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Let's assign the Mock data to the Asset by calling `set_mock` method\n", - "ctf.set_mock(mock_ca_data, mock_is_real=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Let's add our Asset back into our \"Canada Trade Value\" Dataset\n", - "dataset.add_asset(ctf)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Upload Syft Dataset to Domain Server" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "domain_client.upload_dataset(dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# We can list all the datasets on the Domain Server by invoking the following\n", - "datasets = domain_client.datasets.get_all()\n", - "datasets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "assert len(datasets) == 1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "datasets" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Reading the Syft Dataset from Domain Server\n", - "\n", - "Following the logical hierarchy of `Dataset`, `Asset`, and its variant, we can read the data as follows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "domain_client.datasets[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Reading the mock dataset\n", - "mock = domain_client.datasets[0].assets[0].mock" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mock" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "assert mock_ca_data.equals(mock)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "action_id = domain_client.datasets[0].assets[0].action_id" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "private_data = domain_client.api.services.action.get(action_id)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "private_data.syft_action_data_cache" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "private_data.syft_action_data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Reading the real dataset\n", - "# NOTE: Private data can be accessed by the Data Owners, but NOT the Data Scientists\n", - "real = domain_client.datasets[2].assets[0].data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "assert ca_data.equals(real)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a new Data Scientist account on the Domain Server\n", - "\n", - "Signup is disabled by default.\n", - "An Admin/DO can enable it by `domain_client.settings.allow_guest_signup(enable=True)`\n", - "\n", - "Refer to notebook [07-domain-register-control-flow](./07-domain-register-control-flow.ipynb) for more information." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "domain_client.register(name=\"Jane Doe\", email=\"jane@caltech.edu\", password=\"abc123\", password_verify=\"abc123\", institution=\"Caltech\", website=\"https://www.caltech.edu/\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Cleanup local domain server\n", - "if node.node_type.value == \"python\":\n", - " node.land()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": true - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From 534a6de4c1e6456b4e4f81e9e29ebae799ac6058 Mon Sep 17 00:00:00 2001 From: Madhava Jay Date: Thu, 31 Aug 2023 16:30:20 +1000 Subject: [PATCH 80/98] Pinned seaweedfs version - Enabled dev mode ports --- packages/grid/default.env | 3 ++- packages/grid/docker-compose.pull.yml | 4 ++-- packages/grid/docker-compose.test.yml | 14 +++++++------- packages/grid/docker-compose.yml | 3 ++- .../podman-kube/podman-syft-kube-config.yaml | 3 ++- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/packages/grid/default.env b/packages/grid/default.env index 676520c1569..69575d79f46 100644 --- a/packages/grid/default.env +++ b/packages/grid/default.env @@ -30,7 +30,8 @@ DOCKER_IMAGE_TRAEFIK=traefik TRAEFIK_VERSION=v2.10 REDIS_VERSION=6.2 RABBITMQ_VERSION=3 -DOCKER_IMAGE_SEAWEEDFS=chrislusf/seaweedfs:latest +SEAWEEDFS_VERSION=3.55 +DOCKER_IMAGE_SEAWEEDFS=chrislusf/seaweedfs VERSION=latest VERSION_HASH=unknown STACK_API_KEY="" diff --git a/packages/grid/docker-compose.pull.yml b/packages/grid/docker-compose.pull.yml index f69e67ed20c..a7d2cf9de22 100644 --- a/packages/grid/docker-compose.pull.yml +++ b/packages/grid/docker-compose.pull.yml @@ -6,8 +6,8 @@ services: # queue: # image: rabbitmq:${RABBITMQ_VERSION?Variable not Set}${RABBITMQ_MANAGEMENT:-} - # seaweedfs: - # image: "${DOCKER_IMAGE_SEAWEEDFS?Variable not set}" + seaweedfs: + image: "${DOCKER_IMAGE_SEAWEEDFS?Variable not set}:${SEAWEEDFS_VERSION}" # docker-host: # image: qoomon/docker-host diff --git a/packages/grid/docker-compose.test.yml b/packages/grid/docker-compose.test.yml index 9397c9af8f8..92716eaa469 100644 --- a/packages/grid/docker-compose.test.yml +++ b/packages/grid/docker-compose.test.yml @@ -27,13 +27,13 @@ services: ports: - "4000" - # seaweedfs: - # profiles: - # - blob-storage - # ports: - # - "9333" # admin - # - "8888" # filer - # - "8333" # S3 + seaweedfs: + profiles: + - blob-storage + ports: + - "9333" # admin + - "8888" # filer + - "8333" # S3 backend: environment: diff --git a/packages/grid/docker-compose.yml b/packages/grid/docker-compose.yml index 106ab372dd4..abfdd5aab89 100644 --- a/packages/grid/docker-compose.yml +++ b/packages/grid/docker-compose.yml @@ -217,7 +217,7 @@ services: - blob-storage depends_on: - proxy - image: "${DOCKER_IMAGE_SEAWEEDFS?Variable not set}" + image: "${DOCKER_IMAGE_SEAWEEDFS?Variable not set}:${SEAWEEDFS_VERSION}" environment: - S3_VOLUME_SIZE_MB=${S3_VOLUME_SIZE_MB:-1024} - S3_ROOT_USER=${S3_ROOT_USER:-admin} @@ -230,6 +230,7 @@ services: - seaweedfs-data:/data/blob - ./seaweedfs/filer.toml:/etc/seaweedfs/filer.toml - ./seaweedfs/start.sh:/etc/seaweedfs/start.sh + mongo: image: mongo:latest restart: always diff --git a/packages/grid/podman/podman-kube/podman-syft-kube-config.yaml b/packages/grid/podman/podman-kube/podman-syft-kube-config.yaml index 81d108795be..1684f1c3dfe 100644 --- a/packages/grid/podman/podman-kube/podman-syft-kube-config.yaml +++ b/packages/grid/podman/podman-kube/podman-syft-kube-config.yaml @@ -34,7 +34,8 @@ data: TRAEFIK_VERSION: v2.8.1 REDIS_VERSION: 6.2 RABBITMQ_VERSION: 3 - DOCKER_IMAGE_SEAWEEDFS: chrislusf/seaweedfs:latest + SEAWEEDFS_VERSION: 3.55 + DOCKER_IMAGE_SEAWEEDFS: chrislusf/seaweedfs:3.55 VERSION: 0.8.2-beta.6 VERSION_HASH: unknown STACK_API_KEY: "" From 7d7e7c8d05c623b0cdbad6a5e522384bdb566bda Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Thu, 31 Aug 2023 15:16:03 +0800 Subject: [PATCH 81/98] Resource safety --- .../syft/src/syft/service/action/action_data_empty.py | 3 --- packages/syft/src/syft/service/action/action_object.py | 8 +++++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_data_empty.py b/packages/syft/src/syft/service/action/action_data_empty.py index 6eb6e44a2a0..45a186ebb39 100644 --- a/packages/syft/src/syft/service/action/action_data_empty.py +++ b/packages/syft/src/syft/service/action/action_data_empty.py @@ -48,6 +48,3 @@ def __validate_file_path(cls, v: Union[str, Path]) -> Path: return v raise ValueError(f"Not a valid path to file. {v}") - - def as_buffer(self): - return open(self.filepath, "rb") diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 509c1d37a43..5758186c80d 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -528,11 +528,13 @@ def _set_syft_action_data(self, data: Any) -> None: return blob_deposit_object if isinstance(data, ActionFileData): - buffer = data.as_buffer() + with open(data.filepath, "rb") as f: + result = blob_deposit_object.write(f) else: - buffer = BytesIO(serialize(data, to_bytes=True)) + result = blob_deposit_object.write( + BytesIO(serialize(data, to_bytes=True)) + ) - result = blob_deposit_object.write(buffer) if isinstance(result, SyftError): return result self.syft_blob_storage_entry_id = ( From 1aec101e1c51154b3e79ed316cae6364c4745458 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Thu, 31 Aug 2023 15:16:26 +0800 Subject: [PATCH 82/98] Add example uploading retrieving file with blob storage --- notebooks/api/0.8/09-blob-storage.ipynb | 129 +++++++++++++++++++++++- 1 file changed, 124 insertions(+), 5 deletions(-) diff --git a/notebooks/api/0.8/09-blob-storage.ipynb b/notebooks/api/0.8/09-blob-storage.ipynb index d6a9abcba89..02574449435 100644 --- a/notebooks/api/0.8/09-blob-storage.ipynb +++ b/notebooks/api/0.8/09-blob-storage.ipynb @@ -41,6 +41,13 @@ "domain_client = node.login(email=\"info@openmined.org\", password=\"changethis\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Upload and retrieve SyftObject with blob storage (low level API)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -107,15 +114,89 @@ "user_object_read" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Upload and retrieve files with blob storage (low level API)" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# # Cleanup local domain server\n", - "if node.node_type.value == \"python\":\n", - " node.land()" + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def allocate_file(client: SyftClient, path: Path) -> BlobDeposit:\n", + " create_blob_storage_entry = CreateBlobStorageEntry.from_path(path)\n", + " return client.api.services.blob_storage.allocate(create_blob_storage_entry)\n", + "\n", + "\n", + "def upload_file(client: SyftClient, path: Path) -> sy.UID:\n", + " blob_deposit = allocate_file(client, path)\n", + " with open(path, \"rb\") as f:\n", + " blob_deposit.write(f)\n", + " return blob_deposit.blob_storage_entry_id\n", + "\n", + "\n", + "def retrieve_file(client, blob_storage_entry_id: sy.UID) -> Path:\n", + " blob_retrieval = client.api.services.blob_storage.read(blob_storage_entry_id)\n", + " file = blob_retrieval.read()\n", + " return Path(file.file_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_url = \"https://github.com/OpenMined/datasets/blob/main/trade_flow/ca%20-%20feb%202021.csv?raw=True\"\n", + "data_file = autocache(data_url, \"csv\")\n", + "data_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "uploaded_file_storage_id = upload_file(domain_client, data_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "retrieved_file = retrieve_file(domain_client, uploaded_file_storage_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Original file" ] }, { @@ -123,7 +204,45 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "pd.read_csv(data_file, nrows=5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Retrieved file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pd.read_csv(retrieved_file, nrows=5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "retrieved_file.unlink()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Cleanup local domain server\n", + "if node.node_type.value == \"python\":\n", + " node.land()" + ] } ], "metadata": { @@ -142,7 +261,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.11.3" }, "toc": { "base_numbering": 1, From f4b00f8f958cf0ee39f9dcf24894f0ba58ce7d0b Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Thu, 31 Aug 2023 15:33:34 +0800 Subject: [PATCH 83/98] Delete BlobStorageEntry after notebook runs --- notebooks/api/0.8/09-blob-storage.ipynb | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/notebooks/api/0.8/09-blob-storage.ipynb b/notebooks/api/0.8/09-blob-storage.ipynb index 02574449435..3d76d4988d5 100644 --- a/notebooks/api/0.8/09-blob-storage.ipynb +++ b/notebooks/api/0.8/09-blob-storage.ipynb @@ -114,6 +114,16 @@ "user_object_read" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# delete object in blob storage\n", + "domain_client.api.services.blob_storage.delete(blob_deposit.blob_storage_entry_id)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -233,6 +243,16 @@ "retrieved_file.unlink()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# delete file from blob storage\n", + "domain_client.api.services.blob_storage.delete(uploaded_file_storage_id)" + ] + }, { "cell_type": "code", "execution_count": null, From fea3bd7dab3910693da7a87f254bd27d8910b745 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Thu, 31 Aug 2023 15:34:25 +0800 Subject: [PATCH 84/98] Also delete permission when deleting BlobStorageEntry --- packages/syft/src/syft/service/blob_storage/service.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/syft/src/syft/service/blob_storage/service.py b/packages/syft/src/syft/service/blob_storage/service.py index 1858c1bc17a..30bf7fbee98 100644 --- a/packages/syft/src/syft/service/blob_storage/service.py +++ b/packages/syft/src/syft/service/blob_storage/service.py @@ -10,6 +10,7 @@ from ...store.blob_storage.on_disk import OnDiskBlobDeposit from ...store.blob_storage.seaweedfs import SeaweedFSBlobDeposit from ...store.document_store import DocumentStore +from ...store.document_store import UIDPartitionKey from ...types.blob_storage import BlobStorageEntry from ...types.blob_storage import BlobStorageMetadata from ...types.blob_storage import CreateBlobStorageEntry @@ -182,8 +183,8 @@ def delete( if isinstance(file_unlinked_result, SyftError): return file_unlinked_result - blob_storage_entry_deleted = self.stash.delete_by_uid( - context.credentials, uid=uid + blob_storage_entry_deleted = self.stash.delete( + context.credentials, UIDPartitionKey.with_obj(uid), has_permission=True ) if blob_storage_entry_deleted.is_ok(): return file_unlinked_result From f3af219b45af226342b76fabb0b847de4a0f7abb Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Thu, 31 Aug 2023 19:37:55 +0530 Subject: [PATCH 85/98] add Action Object creation from file --- notebooks/api/0.8/09-blob-storage.ipynb | 70 ++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/notebooks/api/0.8/09-blob-storage.ipynb b/notebooks/api/0.8/09-blob-storage.ipynb index 3d76d4988d5..c34f3a635fb 100644 --- a/notebooks/api/0.8/09-blob-storage.ipynb +++ b/notebooks/api/0.8/09-blob-storage.ipynb @@ -253,6 +253,74 @@ "domain_client.api.services.blob_storage.delete(uploaded_file_storage_id)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## From file using Action Object (Partial Functional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "canada_dataset_url = \"https://github.com/OpenMined/datasets/blob/main/trade_flow/ca%20-%20feb%202021.csv?raw=True\"\n", + "data_url = \"https://github.com/OpenMined/datasets/blob/main/trade_flow/ca%20-%20feb%202021.csv?raw=True\"\n", + "data_file = autocache(data_url, \"csv\")\n", + "data_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# creating an action object from file\n", + "action_object = sy.ActionObject.from_file(filepath=str(data_file))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_ptr = action_object.send(domain_client)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@sy.syft_function_single_use(trade_data=data_ptr)\n", + "def sum_trade_value_mil(trade_data):\n", + " import pandas as pd\n", + " from opendp.mod import enable_features\n", + " enable_features('contrib')\n", + " from opendp.measurements import make_base_laplace\n", + " aggregate = 0.\n", + " base_lap = make_base_laplace(scale=5.)\n", + " noise = base_lap(aggregate)\n", + "\n", + " df = pd.read_csv(data_ptr.syft_action_data.file_name, low_memory=False)\n", + " total = df[\"Trade Value (US$)\"].sum()\n", + " return (float(total / 1_000_000), float(noise))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sum_trade_value_mil(trade_data=data_ptr)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -281,7 +349,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.9.7" }, "toc": { "base_numbering": 1, From ba6cc83a52d6f2a4488ccb8f58fe2d5d6c4d5946 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Fri, 1 Sep 2023 00:33:10 +0800 Subject: [PATCH 86/98] Rename from_file -> from_path to be more in line with other methods --- notebooks/api/0.8/09-blob-storage.ipynb | 9 ++++----- .../src/syft/service/action/action_data_empty.py | 4 ++-- .../syft/src/syft/service/action/action_object.py | 13 ++++++++----- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/notebooks/api/0.8/09-blob-storage.ipynb b/notebooks/api/0.8/09-blob-storage.ipynb index c34f3a635fb..195e0007f19 100644 --- a/notebooks/api/0.8/09-blob-storage.ipynb +++ b/notebooks/api/0.8/09-blob-storage.ipynb @@ -170,8 +170,8 @@ "metadata": {}, "outputs": [], "source": [ - "data_url = \"https://github.com/OpenMined/datasets/blob/main/trade_flow/ca%20-%20feb%202021.csv?raw=True\"\n", - "data_file = autocache(data_url, \"csv\")\n", + "canada_dataset_url = \"https://github.com/OpenMined/datasets/blob/main/trade_flow/ca%20-%20feb%202021.csv?raw=True\"\n", + "data_file = autocache(canada_dataset_url, \"csv\")\n", "data_file" ] }, @@ -267,8 +267,7 @@ "outputs": [], "source": [ "canada_dataset_url = \"https://github.com/OpenMined/datasets/blob/main/trade_flow/ca%20-%20feb%202021.csv?raw=True\"\n", - "data_url = \"https://github.com/OpenMined/datasets/blob/main/trade_flow/ca%20-%20feb%202021.csv?raw=True\"\n", - "data_file = autocache(data_url, \"csv\")\n", + "data_file = autocache(canada_dataset_url, \"csv\")\n", "data_file" ] }, @@ -279,7 +278,7 @@ "outputs": [], "source": [ "# creating an action object from file\n", - "action_object = sy.ActionObject.from_file(filepath=str(data_file))" + "action_object = sy.ActionObject.from_path(path=data_file)" ] }, { diff --git a/packages/syft/src/syft/service/action/action_data_empty.py b/packages/syft/src/syft/service/action/action_data_empty.py index 45a186ebb39..d1e5ae44381 100644 --- a/packages/syft/src/syft/service/action/action_data_empty.py +++ b/packages/syft/src/syft/service/action/action_data_empty.py @@ -37,9 +37,9 @@ class ActionFileData(SyftObject): __canonical_name__ = "ActionFileData" __version__ = SYFT_OBJECT_VERSION_1 - filepath: Path + path: Path - @pydantic.validator("filepath", pre=True) + @pydantic.validator("path", pre=True) def __validate_file_path(cls, v: Union[str, Path]) -> Path: if isinstance(v, str): v = Path(v) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 5758186c80d..6342e1baf3d 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -5,6 +5,7 @@ from enum import Enum import inspect from io import BytesIO +from pathlib import Path import traceback import types from typing import Any @@ -512,7 +513,7 @@ def reload_cache(self): def _set_syft_action_data(self, data: Any) -> None: if not isinstance(data, ActionDataEmpty): if isinstance(data, ActionFileData): - storage_entry = CreateBlobStorageEntry.from_path(data.filepath) + storage_entry = CreateBlobStorageEntry.from_path(data.path) else: storage_entry = CreateBlobStorageEntry.from_obj(data) @@ -528,7 +529,7 @@ def _set_syft_action_data(self, data: Any) -> None: return blob_deposit_object if isinstance(data, ActionFileData): - with open(data.filepath, "rb") as f: + with open(data.path, "rb") as f: result = blob_deposit_object.write(f) else: result = blob_deposit_object.write( @@ -936,8 +937,8 @@ def as_empty(self): return ActionObject.empty(self.syft_internal_type, id, self.syft_lineage_id) @staticmethod - def from_file( - filepath: str, + def from_path( + path: Union[str, Path], id: Optional[UID] = None, syft_lineage_id: Optional[LineageID] = None, syft_client_verify_key: Optional[SyftVerifyKey] = None, @@ -948,7 +949,9 @@ def from_file( if id is not None and syft_lineage_id is not None and id != syft_lineage_id.id: raise ValueError("UID and LineageID should match") - syft_action_data = ActionFileData(filepath=filepath) + syft_action_data = ActionFileData( + path=path if isinstance(path, Path) else Path(path) + ) action_type = action_type_for_object(syft_action_data) action_object = action_type(syft_action_data_cache=syft_action_data) From d8def31341b7a6be11c8d748e14e7a9b69fec3e3 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Fri, 1 Sep 2023 00:34:17 +0800 Subject: [PATCH 87/98] Delete file downloaded and file stored in blob storage after running notebook --- notebooks/api/0.8/09-blob-storage.ipynb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/notebooks/api/0.8/09-blob-storage.ipynb b/notebooks/api/0.8/09-blob-storage.ipynb index 195e0007f19..9cc80cc6d46 100644 --- a/notebooks/api/0.8/09-blob-storage.ipynb +++ b/notebooks/api/0.8/09-blob-storage.ipynb @@ -320,6 +320,21 @@ "sum_trade_value_mil(trade_data=data_ptr)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## clean up\n", + "## delete downloaded file\n", + "import os\n", + "os.unlink(data_ptr.syft_action_data.file_name)\n", + "\n", + "## delete file from blob storage\n", + "domain_client.api.services.blob_storage.delete(action_object.syft_blob_storage_entry_id)" + ] + }, { "cell_type": "code", "execution_count": null, From c54abf76bf442abafcf5559e3ad89ac1ea403370 Mon Sep 17 00:00:00 2001 From: Madhava Jay Date: Fri, 1 Sep 2023 15:00:12 +1000 Subject: [PATCH 88/98] dev mode True has to trigger local template - if any environment variables of docker compose changes occur the template must be local to the source - changed docker volume from _ to - naming - fixed left over action.save command in notebook - added blob storage path print for debugging in database --- notebooks/api/0.8/05-custom-policy.ipynb | 2 +- packages/hagrid/hagrid/cli.py | 8 ++--- packages/syft/src/syft/types/blob_storage.py | 3 ++ tox.ini | 35 ++++++++++---------- 4 files changed, 26 insertions(+), 22 deletions(-) diff --git a/notebooks/api/0.8/05-custom-policy.ipynb b/notebooks/api/0.8/05-custom-policy.ipynb index e837464fa34..64866b96f5e 100644 --- a/notebooks/api/0.8/05-custom-policy.ipynb +++ b/notebooks/api/0.8/05-custom-policy.ipynb @@ -185,7 +185,7 @@ "source": [ "x = np.array([1,2,3])\n", "x_pointer = sy.ActionObject.from_obj(x)\n", - "domain_client.api.services.action.save(x_pointer)" + "domain_client.api.services.action.set(x_pointer)" ] }, { diff --git a/packages/hagrid/hagrid/cli.py b/packages/hagrid/hagrid/cli.py index 91ddcc13155..f21fb13a818 100644 --- a/packages/hagrid/hagrid/cli.py +++ b/packages/hagrid/hagrid/cli.py @@ -1317,11 +1317,11 @@ def create_launch_cmd( else: parsed_kwargs["image_name"] = "default" - if "tag" in kwargs and kwargs["tag"] is not None and kwargs["tag"] != "": - parsed_kwargs["tag"] = kwargs["tag"] + if parsed_kwargs["dev"] is True: + parsed_kwargs["tag"] = "local" else: - if parsed_kwargs["dev"] is True: - parsed_kwargs["tag"] = "local" + if "tag" in kwargs and kwargs["tag"] is not None and kwargs["tag"] != "": + parsed_kwargs["tag"] = kwargs["tag"] else: parsed_kwargs["tag"] = "latest" diff --git a/packages/syft/src/syft/types/blob_storage.py b/packages/syft/src/syft/types/blob_storage.py index 71f2c40aa5c..8ef880dd6d4 100644 --- a/packages/syft/src/syft/types/blob_storage.py +++ b/packages/syft/src/syft/types/blob_storage.py @@ -43,6 +43,9 @@ class SecureFilePathLocation(SyftObject): id: UID path: str + def __repr__(self) -> str: + return f"{self.path}" + @serializable() class SeaweedSecureFilePathLocation(SecureFilePathLocation): diff --git a/tox.ini b/tox.ini index 9b2eaee3479..eadc3ddcc9c 100644 --- a/tox.ini +++ b/tox.ini @@ -202,8 +202,9 @@ commands = ; reset volumes and create nodes bash -c "echo Starting Nodes; date" bash -c "docker rm -f $(docker ps -a -q) || true" - bash -c "docker volume rm test_domain_1_mongo-data --force || true" - bash -c "docker volume rm test_domain_1_credentials-data --force || true" + bash -c "docker volume rm test-domain-1_mongo-data --force || true" + bash -c "docker volume rm test-domain-1_credentials-data --force || true" + bash -c "docker volume rm test-domain-1_seaweedfs-data --force || true" bash -c 'HAGRID_ART=$HAGRID_ART hagrid launch test_domain_1 domain to docker:9081 $HAGRID_FLAGS --enable-signup --no-health-checks --verbose --no-warnings' @@ -262,21 +263,20 @@ commands = ; reset volumes and create nodes bash -c "echo Starting Nodes; date" bash -c "docker rm -f $(docker ps -a -q) || true" - bash -c "docker volume rm test_domain_1_mongo-data --force || true" - bash -c "docker volume rm test_domain_1_credentials-data --force || true" - bash -c "docker volume rm test_domain_2_mongo-data --force || true" - bash -c "docker volume rm test_domain_2_credentials-data --force || true" - bash -c "docker volume rm test_gateway_1_mongo-data --force || true" - bash -c "docker volume rm test_gateway_1_credentials-data --force || true" - bash -c "docker volume rm test_domain_1_seaweedfs-data --force || true" - bash -c "docker volume rm test_domain_2_seaweedfs-data --force || true" - bash -c "docker volume rm test_domain_1_app-redis-data --force || true" - bash -c "docker volume rm test_domain_2_app-redis-data --force || true" - bash -c "docker volume rm test_gateway_1_app-redis-data --force || true" - bash -c "docker volume rm test_domain_1_tailscale-data --force || true" - bash -c "docker volume rm test_domain_2_tailscale-data --force || true" - bash -c "docker volume rm test_gateway_1_tailscale-data --force || true" - bash -c "docker volume rm test_gateway_1_headscale-data --force || true" + bash -c "docker volume rm test-domain-1_mongo-data --force || true" + bash -c "docker volume rm test-domain-1_credentials-data --force || true" + bash -c "docker volume rm test-domain-1_seaweedfs-data --force || true" + bash -c "docker volume rm test-domain-2_mongo-data --force || true" + bash -c "docker volume rm test-domain-2_credentials-data --force || true" + bash -c "docker volume rm test-domain-2_seaweedfs-data --force || true" + bash -c "docker volume rm test-gateway-1_mongo-data --force || true" + bash -c "docker volume rm test-gateway-1_credentials-data --force || true" + bash -c "docker volume rm test-gateway-1_seaweedfs-data --force || true" + + bash -c "docker volume rm test-domain-1_tailscale-data --force || true" + bash -c "docker volume rm test-domain-2_tailscale-data --force || true" + bash -c "docker volume rm test-gateway-1_tailscale-data --force || true" + bash -c "docker volume rm test-gateway-1_headscale-data --force || true" bash -c 'HAGRID_ART=$HAGRID_ART hagrid launch test_gateway_1 network to docker:9081 $HAGRID_FLAGS --no-health-checks --verbose --no-warnings' bash -c 'HAGRID_ART=$HAGRID_ART hagrid launch test_domain_1 domain to docker:9082 $HAGRID_FLAGS --no-health-checks --enable-signup --verbose --no-warnings' @@ -465,6 +465,7 @@ commands = # Volume cleanup bash -c "docker volume rm test-domain-1_mongo-data --force || true" bash -c "docker volume rm test-domain-1_credentials-data --force || true" + bash -c "docker volume rm test-domain-1_seaweedfs-data --force || true" bash -c "echo Running with ORCHESTRA_DEPLOYMENT_TYPE=$ORCHESTRA_DEPLOYMENT_TYPE DEV_MODE=$DEV_MODE TEST_NOTEBOOK_PATHS=$TEST_NOTEBOOK_PATHS; date" bash -c "for subfolder in $(echo ${TEST_NOTEBOOK_PATHS} | tr ',' ' ');\ From edab08e1a70217a142dda4611a5a0df16aac3620 Mon Sep 17 00:00:00 2001 From: Shubham Gupta Date: Fri, 1 Sep 2023 12:52:28 +0530 Subject: [PATCH 89/98] fix referece for action object in accept_by_depositing_result --- packages/syft/src/syft/service/request/request.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/packages/syft/src/syft/service/request/request.py b/packages/syft/src/syft/service/request/request.py index 36919026a5d..ea9d17f6f9e 100644 --- a/packages/syft/src/syft/service/request/request.py +++ b/packages/syft/src/syft/service/request/request.py @@ -498,14 +498,14 @@ def accept_by_depositing_result(self, result: Any, force: bool = False): syft_node_location=api.node_uid, ) blob_store_result = action_object._save_to_blob_store() - if isinstance(result, SyftError): - return result + if isinstance(blob_store_result, SyftError): + return blob_store_result result = api.services.action.set(action_object) if isinstance(result, SyftError): return result ctx = AuthedServiceContext(credentials=api.signing_key.verify_key) - state.apply_output(context=ctx, outputs=action_object) + state.apply_output(context=ctx, outputs=result) policy_state_mutation = ObjectMutation( linked_obj=change.linked_obj, attr_name="output_policy", @@ -513,9 +513,7 @@ def accept_by_depositing_result(self, result: Any, force: bool = False): value=state, ) - action_object_link = LinkedObject.from_obj( - action_object, node_uid=self.node_uid - ) + action_object_link = LinkedObject.from_obj(result, node_uid=self.node_uid) permission_change = ActionStoreChange( linked_obj=action_object_link, apply_permission_type=ActionPermission.READ, From b63227e2fc3eba727cdd5993d831bccc4a1ac939 Mon Sep 17 00:00:00 2001 From: teo Date: Fri, 1 Sep 2023 11:28:47 +0300 Subject: [PATCH 90/98] used syft_action_data in output policy func --- notebooks/api/0.8/05-custom-policy.ipynb | 37 ++++++++----------- .../syft/service/code/user_code_service.py | 8 +++- .../syft/src/syft/service/request/request.py | 4 ++ 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/notebooks/api/0.8/05-custom-policy.ipynb b/notebooks/api/0.8/05-custom-policy.ipynb index 64866b96f5e..36412c6c9e1 100644 --- a/notebooks/api/0.8/05-custom-policy.ipynb +++ b/notebooks/api/0.8/05-custom-policy.ipynb @@ -159,7 +159,7 @@ "print(policy.init_kwargs)\n", "a_obj = sy.ActionObject.from_obj({'y': [1,2,3]})\n", "x = policy.apply_output(None, a_obj)\n", - "x" + "x['y']" ] }, { @@ -185,6 +185,16 @@ "source": [ "x = np.array([1,2,3])\n", "x_pointer = sy.ActionObject.from_obj(x)\n", + "x_pointer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e82409e4", + "metadata": {}, + "outputs": [], + "source": [ "domain_client.api.services.action.set(x_pointer)" ] }, @@ -205,21 +215,6 @@ " return {\"y\": x+1}" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "44565122-4ff4-4169-8e0a-db3b86bf53e1", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "@sy.syft_function(input_policy=sy.ExactMatch(x=x_pointer),\n", - " output_policy=sy.SingleExecutionExactOutput())\n", - "def train_mlp(x):\n", - " return x" - ] - }, { "cell_type": "code", "execution_count": null, @@ -293,19 +288,17 @@ "outputs": [], "source": [ "res_ptr = domain_client.code.func(x=x_pointer)\n", - "res = res_ptr.get()\n", - "res" + "res_ptr" ] }, { "cell_type": "code", "execution_count": null, - "id": "b8c74835", - "metadata": { - "tags": [] - }, + "id": "31e706e4", + "metadata": {}, "outputs": [], "source": [ + "res = res_ptr.get()\n", "res" ] }, diff --git a/packages/syft/src/syft/service/code/user_code_service.py b/packages/syft/src/syft/service/code/user_code_service.py index e4a9eba500b..cf8862c6a5d 100644 --- a/packages/syft/src/syft/service/code/user_code_service.py +++ b/packages/syft/src/syft/service/code/user_code_service.py @@ -240,6 +240,10 @@ def call( return SyftError("Output policy not approved", code) # Check if the OutputPolicy is valid + # stdlib + import sys + + print(output_policy.valid, file=sys.stderr) if not (is_valid := output_policy.valid): if len(output_policy.output_history) > 0: result = resolve_outputs( @@ -255,12 +259,14 @@ def call( Union[ActionObject, TwinObject], str ] = action_service._user_code_execute(context, code, kwarg2id) + print(output_result, file=sys.stderr) if output_result.is_err(): return SyftError(message=output_result.err()) result = output_result.ok() # Apply Output Policy to the results and update the OutputPolicyState - output_policy.apply_output(context=context, outputs=result) + output_policy.apply_output(context=context, outputs=result.syft_action_data) + print(output_policy, file=sys.stderr) code.output_policy = output_policy if not ( update_success := self.update_code_state( diff --git a/packages/syft/src/syft/service/request/request.py b/packages/syft/src/syft/service/request/request.py index ea9d17f6f9e..facc386ce2f 100644 --- a/packages/syft/src/syft/service/request/request.py +++ b/packages/syft/src/syft/service/request/request.py @@ -497,6 +497,10 @@ def accept_by_depositing_result(self, result: Any, force: bool = False): syft_client_verify_key=api.signing_key.verify_key, syft_node_location=api.node_uid, ) + # stdlib + import sys + + print(action_object.syft_action_data, file=sys.stderr) blob_store_result = action_object._save_to_blob_store() if isinstance(blob_store_result, SyftError): return blob_store_result From d40a957d64754f84d18d3a51df60104dc785230e Mon Sep 17 00:00:00 2001 From: teo Date: Fri, 1 Sep 2023 11:30:41 +0300 Subject: [PATCH 91/98] removed debug prints --- packages/syft/src/syft/service/code/user_code_service.py | 6 ------ packages/syft/src/syft/service/request/request.py | 4 ---- 2 files changed, 10 deletions(-) diff --git a/packages/syft/src/syft/service/code/user_code_service.py b/packages/syft/src/syft/service/code/user_code_service.py index cf8862c6a5d..8babbc71a4b 100644 --- a/packages/syft/src/syft/service/code/user_code_service.py +++ b/packages/syft/src/syft/service/code/user_code_service.py @@ -240,10 +240,6 @@ def call( return SyftError("Output policy not approved", code) # Check if the OutputPolicy is valid - # stdlib - import sys - - print(output_policy.valid, file=sys.stderr) if not (is_valid := output_policy.valid): if len(output_policy.output_history) > 0: result = resolve_outputs( @@ -259,14 +255,12 @@ def call( Union[ActionObject, TwinObject], str ] = action_service._user_code_execute(context, code, kwarg2id) - print(output_result, file=sys.stderr) if output_result.is_err(): return SyftError(message=output_result.err()) result = output_result.ok() # Apply Output Policy to the results and update the OutputPolicyState output_policy.apply_output(context=context, outputs=result.syft_action_data) - print(output_policy, file=sys.stderr) code.output_policy = output_policy if not ( update_success := self.update_code_state( diff --git a/packages/syft/src/syft/service/request/request.py b/packages/syft/src/syft/service/request/request.py index facc386ce2f..ea9d17f6f9e 100644 --- a/packages/syft/src/syft/service/request/request.py +++ b/packages/syft/src/syft/service/request/request.py @@ -497,10 +497,6 @@ def accept_by_depositing_result(self, result: Any, force: bool = False): syft_client_verify_key=api.signing_key.verify_key, syft_node_location=api.node_uid, ) - # stdlib - import sys - - print(action_object.syft_action_data, file=sys.stderr) blob_store_result = action_object._save_to_blob_store() if isinstance(blob_store_result, SyftError): return blob_store_result From abcae284c2b38a2a32f69133578e5d7cd03a3036 Mon Sep 17 00:00:00 2001 From: teo Date: Fri, 1 Sep 2023 11:54:28 +0300 Subject: [PATCH 92/98] revert change --- packages/syft/src/syft/service/code/user_code_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/syft/src/syft/service/code/user_code_service.py b/packages/syft/src/syft/service/code/user_code_service.py index 8babbc71a4b..e4a9eba500b 100644 --- a/packages/syft/src/syft/service/code/user_code_service.py +++ b/packages/syft/src/syft/service/code/user_code_service.py @@ -260,7 +260,7 @@ def call( result = output_result.ok() # Apply Output Policy to the results and update the OutputPolicyState - output_policy.apply_output(context=context, outputs=result.syft_action_data) + output_policy.apply_output(context=context, outputs=result) code.output_policy = output_policy if not ( update_success := self.update_code_state( From 68ce5493082a32c668d8982545ee3e81a683abca Mon Sep 17 00:00:00 2001 From: teo Date: Fri, 1 Sep 2023 11:57:04 +0300 Subject: [PATCH 93/98] changed example policy --- notebooks/Experimental/Shubham/test.ipynb | 1896 +++++++++++++++++++++ notebooks/api/0.8/05-custom-policy.ipynb | 2 + 2 files changed, 1898 insertions(+) create mode 100644 notebooks/Experimental/Shubham/test.ipynb diff --git a/notebooks/Experimental/Shubham/test.ipynb b/notebooks/Experimental/Shubham/test.ipynb new file mode 100644 index 00000000000..6f675c45457 --- /dev/null +++ b/notebooks/Experimental/Shubham/test.ipynb @@ -0,0 +1,1896 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import syft as sy\n", + "import logging\n", + "\n", + "logging.basicConfig(level=logging.DEBUG)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "node = sy.orchestra.launch(name='test', dev_mode=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "node" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logged into as \n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning: You are using a default password. Please change the password using `[your_client].me.set_password([new_password])`.

" + ], + "text/plain": [ + "SyftWarning: You are using a default password. Please change the password using `[your_client].me.set_password([new_password])`." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "domain_client = node.login(email=\"info@openmined.org\", password=\"changethis\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + "
\n", + " \"Logo\"\n",\n", + "

Welcome to test

\n", + "
\n", + " Node Type: Domain
Node Side Type: High Side
Syft Version: 0.8.2-beta.26
\n", + "
\n", + "
\n", + " ⓘ \n", + " This domain is run by the library PySyft to learn more about how it works visit\n", + " github.com/OpenMined/PySyft.\n", + "
\n", + "

Commands to Get Started

\n", + " \n", + "
    \n", + " \n", + "
  • <your_client>.projects - list projects
  • \n", + "
  • <your_client>.requests - list requests
  • \n", + "
  • <your_client>.users - list users
  • \n", + "
  • \n", + " <your_client>.requests.submit? - display function signature\n", + "
  • \n", + "
\n", + " \n", + "

\n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "domain_client" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from syft.service.action.action_object import Action\n", + "from syft.service.action.action_object import ActionObject\n", + "\n", + "def helper_make_action_obj(orig_obj):\n", + " obj_id = Action.make_id(None)\n", + " lin_obj_id = Action.make_result_id(obj_id)\n", + "\n", + " return ActionObject.from_obj(orig_obj, id=obj_id, syft_lineage_id=lin_obj_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_ipython_canary_method_should_not_exist_ None abc\n", + "_ipython_display_ None abc\n", + "_ipython_canary_method_should_not_exist_ None abc\n", + "_repr_mimebundle_ None abc\n", + "_ipython_canary_method_should_not_exist_ None abc\n", + "_repr_html_ None abc\n", + "_ipython_canary_method_should_not_exist_ None abc\n", + "_ipython_canary_method_should_not_exist_ None abc\n", + "_repr_svg_ None abc\n", + "_ipython_canary_method_should_not_exist_ None abc\n", + "_repr_png_ None abc\n", + "_ipython_canary_method_should_not_exist_ None abc\n", + "_repr_pdf_ None abc\n", + "_ipython_canary_method_should_not_exist_ None abc\n", + "_repr_jpeg_ None abc\n", + "_ipython_canary_method_should_not_exist_ None abc\n", + "_repr_latex_ None abc\n", + "_ipython_canary_method_should_not_exist_ None abc\n", + "_repr_json_ None abc\n", + "_ipython_canary_method_should_not_exist_ None abc\n", + "_repr_javascript_ None abc\n" + ] + }, + { + "data": { + "text/markdown": [ + "```python\n", + "Pointer\n", + "```\n", + "'abc'" + ], + "text/plain": [ + "Pointer:\n", + "'abc'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abc_obj = helper_make_action_obj(\"abc\")\n", + "abc_obj" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "domain_client.api.services.action" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BEFORE SET: \n", + "AFTER SET: \n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_ipython_display_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_mimebundle_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_html_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_svg_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_png_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_pdf_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_jpeg_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_latex_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_json_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_javascript_ abc\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TYPE AT SET: \n" + ] + }, + { + "data": { + "text/markdown": [ + "```python\n", + "Pointer\n", + "```\n", + "'abc'" + ], + "text/plain": [ + "Pointer:\n", + "'abc'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ptr = abc_obj.send(domain_client)\n", + "ptr" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/plain": [ + "'abc'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ptr.get()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "str" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abc_obj.syft_action_data_type" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "str" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ser = sy.serialize(abc_obj, to_bytes=True)\n", + "new_obj = sy.deserialize(ser, from_bytes=True)\n", + "new_obj.syft_action_data_type" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# abc_obj.syft_blob_storage_entry_id, abc_obj.syft_created_at" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abc_obj.id" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_ipython_canary_method_should_not_exist_ abc\n", + "_ipython_display_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_mimebundle_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_html_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_svg_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_png_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_pdf_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_jpeg_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_latex_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_json_ abc\n", + "_ipython_canary_method_should_not_exist_ abc\n", + "_repr_javascript_ abc\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/markdown": [ + "```python\n", + "Pointer\n", + "```\n", + "'abc'" + ], + "text/plain": [ + "Pointer:\n", + "'abc'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "remote_obj = domain_client.api.services.action.get(abc_obj.id)\n", + "remote_obj" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "str" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "remote_obj.syft_action_data_type" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# remote_obj.syft_created_at" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'abc'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "remote_obj.syft_action_data" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "syft.service.action.action_object.AnyActionObject" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(remote_obj)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "__eq__ abc\n", + "('abc',) {}\n", + "_ipython_canary_method_should_not_exist_ True\n", + "_ipython_display_ True\n", + "_ipython_canary_method_should_not_exist_ True\n", + "_repr_mimebundle_ True\n", + "_ipython_canary_method_should_not_exist_ True\n", + "_repr_html_ True\n", + "_ipython_canary_method_should_not_exist_ True\n", + "_ipython_canary_method_should_not_exist_ True\n", + "_repr_svg_ True\n", + "_ipython_canary_method_should_not_exist_ True\n", + "_repr_png_ True\n", + "_ipython_canary_method_should_not_exist_ True\n", + "_repr_pdf_ True\n", + "_ipython_canary_method_should_not_exist_ True\n", + "_repr_jpeg_ True\n", + "_ipython_canary_method_should_not_exist_ True\n", + "_repr_latex_ True\n", + "_ipython_canary_method_should_not_exist_ True\n", + "_repr_json_ True\n", + "_ipython_canary_method_should_not_exist_ True\n", + "_repr_javascript_ True\n" + ] + }, + { + "data": { + "text/markdown": [ + "```python\n", + "Pointer\n", + "```\n", + "True" + ], + "text/plain": [ + "Pointer:\n", + "True" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "remote_obj == \"abc\"" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "__ne__ abc\n", + "('abc',) {}\n" + ] + }, + { + "data": { + "text/plain": [ + "syft.service.action.action_object.AnyActionObject" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(remote_obj != \"abc\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "__contains__ abc\n", + "('z',) {}\n", + "__bool__ False\n", + "() {}\n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "'z' in remote_obj" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'a'" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "next(\"abc\".__iter__())" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "__iter__ abc\n", + "() {}\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "remote_obj.__iter__().syft_action_data_cache" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "str_iterator" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(\"abc\".__iter__())" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "str" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "remote_obj.syft_action_data_type" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "__iter__ abc\n", + "() {}\n", + "yep... abc\n", + "__iter__ abc\n", + "() {}\n", + "__next__ \n", + "() {}\n", + "oh no... abc\n", + "__next__ \n", + "() {}\n", + "abc\n", + "__next__ \n", + "() {}\n", + "oh no... abc\n", + "__next__ \n", + "() {}\n" + ] + } + ], + "source": [ + "for letter in remote_obj:\n", + " print(letter)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "__iter__ abc\n", + "() {}\n", + "yep... abc\n", + "__iter__ abc\n", + "() {}\n", + "__next__ \n", + "() {}\n", + "oh no... abc\n", + "__next__ \n", + "() {}\n", + "__eq__ b\n", + "('z',) {}\n", + "__bool__ False\n", + "() {}\n", + "__next__ \n", + "() {}\n", + "oh no... abc\n", + "__next__ \n", + "() {}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "<>:1: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n", + "<>:1: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n", + ":1: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n", + " any('z' is e or 'z' == e for e in remote_obj)\n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "any('z' is e or 'z' == e for e in remote_obj)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "__eq__ abc\n", + "('DAWDAWDA',) {}\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_ipython_display_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_mimebundle_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_html_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_svg_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_png_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_pdf_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_jpeg_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_latex_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_json_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_javascript_ False\n" + ] + }, + { + "data": { + "text/markdown": [ + "```python\n", + "Pointer\n", + "```\n", + "False" + ], + "text/plain": [ + "Pointer:\n", + "False" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result = abc_obj == \"DAWDAWDA\"\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "__eq__ abc\n", + "('DAWDAWDA',) {}\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_ipython_display_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_mimebundle_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_html_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_svg_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_png_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_pdf_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_jpeg_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_latex_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_json_ False\n", + "_ipython_canary_method_should_not_exist_ False\n", + "_repr_javascript_ False\n" + ] + }, + { + "data": { + "text/markdown": [ + "```python\n", + "Pointer\n", + "```\n", + "False" + ], + "text/plain": [ + "Pointer:\n", + "False" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abc_obj == \"DAWDAWDA\"" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from uuid import uuid4\n", + "\n", + "\n", + "def random_hash() -> str:\n", + " return uuid4().hex\n", + "\n", + "\n", + "def data():\n", + " return np.array([1, 2, 3])\n", + "\n", + "\n", + "def mock():\n", + " return np.array([1, 1, 1])\n", + "\n", + "\n", + "def make_asset_without_mock():\n", + " return {\n", + " \"name\": random_hash(),\n", + " \"data\": data(),\n", + " }\n", + " \n", + "def make_asset_with_mock():\n", + " return {**make_asset_without_mock(), \"mock\": mock()}\n", + "\n", + "\n", + "def make_asset_with_empty_mock():\n", + " return {**make_asset_without_mock(), \"mock\": ActionObject.empty()}" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "asset_with_mock = make_asset_with_mock()\n", + "asset_with_empty_mock = make_asset_with_empty_mock()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "from syft.service.dataset.dataset import CreateAsset as Asset\n", + "from syft.service.dataset.dataset import CreateDataset as Dataset\n", + "asset = Asset(**asset_with_mock, mock_is_real=True)\n", + "assert asset.mock_is_real\n", + "\n", + "asset.set_mock(mock_data=ActionObject.empty(), mock_is_real=False)\n", + "assert not asset.mock_is_real\n", + "\n", + "# with pytest.raises(ValidationError):\n", + "asset.mock_is_real = True" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "asset.mock_is_real" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "worker = sy.Worker.named(name='test')" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/1 [00:00\n", + " 0%| | 0/1 [00:00\n", + " \n", + "/* cyrillic-ext */\n", + "@font-face {\n", + " font-family: 'Open Sans';\n", + " font-style: normal;\n", + " font-weight: 300 800;\n", + " font-stretch: 100%;\n", + " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTSKmu0SC55K5gw.woff2) format('woff2');\n", + " unicode-range: U+0460-052F, U+1C80-1C88, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F;\n", + "}\n", + "/* cyrillic */\n", + "@font-face {\n", + " font-family: 'Open Sans';\n", + " font-style: normal;\n", + " font-weight: 300 800;\n", + " font-stretch: 100%;\n", + " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTSumu0SC55K5gw.woff2) format('woff2');\n", + " unicode-range: U+0301, U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116;\n", + "}\n", + "/* greek-ext */\n", + "@font-face {\n", + " font-family: 'Open Sans';\n", + " font-style: normal;\n", + " font-weight: 300 800;\n", + " font-stretch: 100%;\n", + " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTSOmu0SC55K5gw.woff2) format('woff2');\n", + " unicode-range: U+1F00-1FFF;\n", + "}\n", + "/* greek */\n", + "@font-face {\n", + " font-family: 'Open Sans';\n", + " font-style: normal;\n", + " font-weight: 300 800;\n", + " font-stretch: 100%;\n", + " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTSymu0SC55K5gw.woff2) format('woff2');\n", + " unicode-range: U+0370-03FF;\n", + "}\n", + "/* hebrew */\n", + "@font-face {\n", + " font-family: 'Open Sans';\n", + " font-style: normal;\n", + " font-weight: 300 800;\n", + " font-stretch: 100%;\n", + " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTS2mu0SC55K5gw.woff2) format('woff2');\n", + " unicode-range: U+0590-05FF, U+200C-2010, U+20AA, U+25CC, U+FB1D-FB4F;\n", + "}\n", + "/* vietnamese */\n", + "@font-face {\n", + " font-family: 'Open Sans';\n", + " font-style: normal;\n", + " font-weight: 300 800;\n", + " font-stretch: 100%;\n", + " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTSCmu0SC55K5gw.woff2) format('woff2');\n", + " unicode-range: U+0102-0103, U+0110-0111, U+0128-0129, U+0168-0169, U+01A0-01A1, U+01AF-01B0, U+0300-0301, U+0303-0304, U+0308-0309, U+0323, U+0329, U+1EA0-1EF9, U+20AB;\n", + "}\n", + "/* latin-ext */\n", + "@font-face {\n", + " font-family: 'Open Sans';\n", + " font-style: normal;\n", + " font-weight: 300 800;\n", + " font-stretch: 100%;\n", + " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTSGmu0SC55K5gw.woff2) format('woff2');\n", + " unicode-range: U+0100-02AF, U+0304, U+0308, U+0329, U+1E00-1E9F, U+1EF2-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF;\n", + "}\n", + "/* latin */\n", + "@font-face {\n", + " font-family: 'Open Sans';\n", + " font-style: normal;\n", + " font-weight: 300 800;\n", + " font-stretch: 100%;\n", + " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTS-mu0SC55I.woff2) format('woff2');\n", + " unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+0304, U+0308, U+0329, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;\n", + "}\n", + "\n", + " .syft-dataset {color: #464158;}\n", + " .syft-dataset h3,\n", + " .syft-dataset p\n", + " {font-family: 'Open Sans';}\n", + " .itables {font-family: 'Consolas', monospace, sans-serif;}\n", + " \n", + "
\n", + "

f143e2a87e8c4837b124c2f6cba284ad

\n", + "

\n", + "

Uploaded by:Jane Doe

\n", + "

Created on: None

\n", + "

URL:\n", + " None

\n", + "

Contributors:\n", + " to see full details call dataset.contributors

\n", + " \n", + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
\n", + "

CreateAsset Tupledict

\n", + "
\n", + "\n", + "
\n", + "
\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + " \n", + "
\n", + " \n", + "
\n", + "\n", + "

0

\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
\n", + " \n", + "\n", + " " + ], + "text/markdown": [ + "Syft Dataset: f143e2a87e8c4837b124c2f6cba284ad\n", + "\n", + "Assets:\n", + "\n", + "\t938da4acc8364a0f8632f14c34feb437: \n", + "\n", + "Description: \n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "syft.service.dataset.dataset.CreateDataset" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "ename": "IndexError", + "evalue": "list index out of range", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[51], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m guest_dataset \u001b[39m=\u001b[39m guest_datasets[\u001b[39m0\u001b[39;49m]\n", + "\u001b[0;31mIndexError\u001b[0m: list index out of range" + ] + } + ], + "source": [ + "guest_dataset = guest_datasets[0]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "syft_3.10", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/api/0.8/05-custom-policy.ipynb b/notebooks/api/0.8/05-custom-policy.ipynb index 36412c6c9e1..8b798d2a768 100644 --- a/notebooks/api/0.8/05-custom-policy.ipynb +++ b/notebooks/api/0.8/05-custom-policy.ipynb @@ -87,6 +87,8 @@ " return self.state[\"counts\"]\n", " \n", " def apply_output(self, context, outputs):\n", + " if hasattr(outputs, \"syft_action_data\"):\n", + " outputs = outputs.syft_action_data\n", " output_dict = {}\n", " if self.state[\"counts\"] < self.n_calls:\n", " for output_arg in self.downloadable_output_args:\n", From 851f84279c1944eb30d6daf713c910ad5514df93 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Fri, 1 Sep 2023 19:36:00 +0800 Subject: [PATCH 94/98] Remove unused methods --- packages/syft/src/syft/service/action/action_object.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 6342e1baf3d..f695cb7c929 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -559,8 +559,6 @@ def _set_syft_action_data(self, data: Any) -> None: self.syft_action_data_cache = data - syft_action_data = syft_action_data.setter(_set_syft_action_data) - @property def is_pointer(self) -> bool: return self.syft_node_uid is not None @@ -1052,10 +1050,6 @@ def empty( res.__dict__["syft_internal_type"] = syft_internal_type return res - def delete_data(self): - empty = ActionDataEmpty(syft_internal_type=self.syft_internal_type) - self.syft_action_data = empty - def __post_init__(self) -> None: """Add pre/post hooks.""" if HOOK_ALWAYS not in self._syft_pre_hooks__: From 6442b7a8b696e1fca82a2eb3093e04e672bf2f62 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Fri, 1 Sep 2023 19:47:26 +0800 Subject: [PATCH 95/98] Rename methods --- .../syft/src/syft/client/domain_client.py | 2 +- .../src/syft/service/action/action_object.py | 42 +++++++++---------- .../src/syft/service/action/action_service.py | 6 +-- .../syft/service/enclave/enclave_service.py | 2 +- .../syft/src/syft/service/request/request.py | 4 +- packages/syft/src/syft/types/twin_object.py | 6 +-- 6 files changed, 31 insertions(+), 31 deletions(-) diff --git a/packages/syft/src/syft/client/domain_client.py b/packages/syft/src/syft/client/domain_client.py index ca3dda482a1..7adaeaa8b39 100644 --- a/packages/syft/src/syft/client/domain_client.py +++ b/packages/syft/src/syft/client/domain_client.py @@ -81,7 +81,7 @@ def upload_dataset(self, dataset: CreateDataset) -> Union[SyftSuccess, SyftError syft_node_location=self.id, syft_client_verify_key=self.verify_key, ) - twin._save_to_blob_store() + twin._save_to_blob_storage() except Exception as e: return SyftError(message=f"Failed to create twin. {e}") response = self.api.services.action.set(twin) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index f695cb7c929..fd66b0057e4 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -190,7 +190,7 @@ class ActionObjectPointer: "get_from", # syft "get", # syft "delete_data", # syft - "_set_syft_action_data", # syft + "_save_to_blob_storage_", # syft "syft_action_data", # syft ] dont_wrap_output_attrs = [ @@ -316,7 +316,7 @@ def convert_to_pointers( syft_node_location=api.node_uid, ) arg.syft_node_uid = node_uid - r = arg._save_to_blob_store() + r = arg._save_to_blob_storage() if isinstance(r, SyftError): print(r.message) arg = api.services.action.set(arg) @@ -331,7 +331,7 @@ def convert_to_pointers( syft_node_location=api.node_uid, ) arg.syft_node_uid = node_uid - r = arg._save_to_blob_store() + r = arg._save_to_blob_storage() if isinstance(r, SyftError): print(r.message) arg = api.services.action.set(arg) @@ -435,8 +435,8 @@ def debox_args_and_kwargs(args: Any, kwargs: Any) -> Tuple[Any, Any]: "_repr_debug_", "as_empty", "get", - "_save_to_blob_store", - "_set_syft_action_data", + "_save_to_blob_storage", + "_save_to_blob_storage_", "syft_action_data", "__check_action_data", "as_empty_data", @@ -510,7 +510,7 @@ def reload_cache(self): self.syft_action_data_cache = blob_retrieval_object.read() self.syft_action_data_type = type(self.syft_action_data) - def _set_syft_action_data(self, data: Any) -> None: + def _save_to_blob_storage_(self, data: Any) -> None: if not isinstance(data, ActionDataEmpty): if isinstance(data, ActionFileData): storage_entry = CreateBlobStorageEntry.from_path(data.path) @@ -559,6 +559,19 @@ def _set_syft_action_data(self, data: Any) -> None: self.syft_action_data_cache = data + def _save_to_blob_storage(self) -> Optional[SyftError]: + data = self.syft_action_data + if isinstance(data, SyftError): + return data + if isinstance(data, ActionDataEmpty): + print(f"cannot store empty object {self.id}") + return data + result = self._save_to_blob_storage_(data) + if isinstance(result, SyftError): + return result + if not TraceResult.is_tracing: + self.syft_action_data_cache = self.as_empty_data() + @property def is_pointer(self) -> bool: return self.syft_node_uid is not None @@ -594,19 +607,6 @@ def __check_action_data(cls, values: dict) -> dict: values["syft_has_bool_attr"] = hasattr(v, "__bool__") return values - def _save_to_blob_store(self) -> Optional[SyftError]: - data = self.syft_action_data - if isinstance(data, SyftError): - return data - if isinstance(data, ActionDataEmpty): - print(f"cannot store empty object {self.id}") - return data - result = self._set_syft_action_data(data) - if isinstance(result, SyftError): - return result - if not TraceResult.is_tracing: - self.syft_action_data_cache = self.as_empty_data() - @property def is_mock(self): return self.syft_twin_type == TwinMode.MOCK @@ -721,7 +721,7 @@ def _syft_try_to_save_to_store(self, obj) -> None: if api is not None: obj._set_obj_location_(api.node_uid, api.signing_key.verify_key) - res = obj._save_to_blob_store() + res = obj._save_to_blob_storage() if isinstance(res, SyftError): print(f"failed saving {obj} to blob storage, error: {res}") @@ -897,7 +897,7 @@ def wrapper( def send(self, client: SyftClient) -> Self: """Send the object to a Syft Client""" self._set_obj_location_(client.id, client.verify_key) - self._save_to_blob_store() + self._save_to_blob_storage() res = client.api.services.action.set(self) if isinstance(res, ActionObject): self.syft_created_at = res.syft_created_at diff --git a/packages/syft/src/syft/service/action/action_service.py b/packages/syft/src/syft/service/action/action_service.py index 700aa74f916..149a896e5fd 100644 --- a/packages/syft/src/syft/service/action/action_service.py +++ b/packages/syft/src/syft/service/action/action_service.py @@ -61,7 +61,7 @@ def np_array(self, context: AuthedServiceContext, data: Any) -> Any: syft_node_location=context.node.id, syft_client_verify_key=context.credentials, ) - blob_store_result = np_obj._save_to_blob_store() + blob_store_result = np_obj._save_to_blob_storage() if isinstance(blob_store_result, SyftError): return blob_store_result @@ -236,7 +236,7 @@ def _user_code_execute( context.node.id, context.credentials, ) - blob_store_result = result_action_object._save_to_blob_store() + blob_store_result = result_action_object._save_to_blob_storage() if isinstance(blob_store_result, SyftError): return blob_store_result @@ -489,7 +489,7 @@ def execute( context.credentials, ) - blob_store_result = result_action_object._save_to_blob_store() + blob_store_result = result_action_object._save_to_blob_storage() if isinstance(blob_store_result, SyftError): return blob_store_result diff --git a/packages/syft/src/syft/service/enclave/enclave_service.py b/packages/syft/src/syft/service/enclave/enclave_service.py index bc4d289b5e9..c2050b9bf80 100644 --- a/packages/syft/src/syft/service/enclave/enclave_service.py +++ b/packages/syft/src/syft/service/enclave/enclave_service.py @@ -163,7 +163,7 @@ def propagate_inputs_to_enclave(user_code: UserCode, context: ChangeContext): enclave_client.api.node_uid, enclave_client.verify_key, ) - var_value._save_to_blob_store() + var_value._save_to_blob_storage() inputs[var_name] = var_value diff --git a/packages/syft/src/syft/service/request/request.py b/packages/syft/src/syft/service/request/request.py index ea9d17f6f9e..2c9b5741fd1 100644 --- a/packages/syft/src/syft/service/request/request.py +++ b/packages/syft/src/syft/service/request/request.py @@ -484,7 +484,7 @@ def accept_by_depositing_result(self, result: Any, force: bool = False): syft_client_verify_key=api.signing_key.verify_key, syft_node_location=api.node_uid, ) - blob_store_result = action_object._save_to_blob_store() + blob_store_result = action_object._save_to_blob_storage() if isinstance(blob_store_result, SyftError): return blob_store_result result = api.services.action.set(action_object) @@ -497,7 +497,7 @@ def accept_by_depositing_result(self, result: Any, force: bool = False): syft_client_verify_key=api.signing_key.verify_key, syft_node_location=api.node_uid, ) - blob_store_result = action_object._save_to_blob_store() + blob_store_result = action_object._save_to_blob_storage() if isinstance(blob_store_result, SyftError): return blob_store_result result = api.services.action.set(action_object) diff --git a/packages/syft/src/syft/types/twin_object.py b/packages/syft/src/syft/types/twin_object.py index 2cd47a3dd32..5529daaff53 100644 --- a/packages/syft/src/syft/types/twin_object.py +++ b/packages/syft/src/syft/types/twin_object.py @@ -72,7 +72,7 @@ def mock(self) -> ActionObject: mock.id = twin_id return mock - def _save_to_blob_store(self): + def _save_to_blob_storage(self): # Set node location and verify key self.private_obj._set_obj_location_( self.syft_node_location, @@ -82,5 +82,5 @@ def _save_to_blob_store(self): # self.syft_node_location, # self.syft_client_verify_key, # ) - return self.private_obj._save_to_blob_store() - # self.mock_obj._save_to_blob_store() + return self.private_obj._save_to_blob_storage() + # self.mock_obj._save_to_blob_storage() From 9483cf8cea8318efcab02add541418d4cf7976a8 Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Fri, 1 Sep 2023 19:54:38 +0800 Subject: [PATCH 96/98] Delete debug notebook --- notebooks/Experimental/Shubham/test.ipynb | 1896 --------------------- 1 file changed, 1896 deletions(-) delete mode 100644 notebooks/Experimental/Shubham/test.ipynb diff --git a/notebooks/Experimental/Shubham/test.ipynb b/notebooks/Experimental/Shubham/test.ipynb deleted file mode 100644 index 6f675c45457..00000000000 --- a/notebooks/Experimental/Shubham/test.ipynb +++ /dev/null @@ -1,1896 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import syft as sy\n", - "import logging\n", - "\n", - "logging.basicConfig(level=logging.DEBUG)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "node = sy.orchestra.launch(name='test', dev_mode=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "node" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logged into as \n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning: You are using a default password. Please change the password using `[your_client].me.set_password([new_password])`.

" - ], - "text/plain": [ - "SyftWarning: You are using a default password. Please change the password using `[your_client].me.set_password([new_password])`." - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "domain_client = node.login(email=\"info@openmined.org\", password=\"changethis\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - "
\n", - " \"Logo\"\n",\n", - "

Welcome to test

\n", - "
\n", - " Node Type: Domain
Node Side Type: High Side
Syft Version: 0.8.2-beta.26
\n", - "
\n", - "
\n", - " ⓘ \n", - " This domain is run by the library PySyft to learn more about how it works visit\n", - " github.com/OpenMined/PySyft.\n", - "
\n", - "

Commands to Get Started

\n", - " \n", - "
    \n", - " \n", - "
  • <your_client>.projects - list projects
  • \n", - "
  • <your_client>.requests - list requests
  • \n", - "
  • <your_client>.users - list users
  • \n", - "
  • \n", - " <your_client>.requests.submit? - display function signature\n", - "
  • \n", - "
\n", - " \n", - "

\n", - " " - ], - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "domain_client" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from syft.service.action.action_object import Action\n", - "from syft.service.action.action_object import ActionObject\n", - "\n", - "def helper_make_action_obj(orig_obj):\n", - " obj_id = Action.make_id(None)\n", - " lin_obj_id = Action.make_result_id(obj_id)\n", - "\n", - " return ActionObject.from_obj(orig_obj, id=obj_id, syft_lineage_id=lin_obj_id)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "_ipython_canary_method_should_not_exist_ None abc\n", - "_ipython_display_ None abc\n", - "_ipython_canary_method_should_not_exist_ None abc\n", - "_repr_mimebundle_ None abc\n", - "_ipython_canary_method_should_not_exist_ None abc\n", - "_repr_html_ None abc\n", - "_ipython_canary_method_should_not_exist_ None abc\n", - "_ipython_canary_method_should_not_exist_ None abc\n", - "_repr_svg_ None abc\n", - "_ipython_canary_method_should_not_exist_ None abc\n", - "_repr_png_ None abc\n", - "_ipython_canary_method_should_not_exist_ None abc\n", - "_repr_pdf_ None abc\n", - "_ipython_canary_method_should_not_exist_ None abc\n", - "_repr_jpeg_ None abc\n", - "_ipython_canary_method_should_not_exist_ None abc\n", - "_repr_latex_ None abc\n", - "_ipython_canary_method_should_not_exist_ None abc\n", - "_repr_json_ None abc\n", - "_ipython_canary_method_should_not_exist_ None abc\n", - "_repr_javascript_ None abc\n" - ] - }, - { - "data": { - "text/markdown": [ - "```python\n", - "Pointer\n", - "```\n", - "'abc'" - ], - "text/plain": [ - "Pointer:\n", - "'abc'" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "abc_obj = helper_make_action_obj(\"abc\")\n", - "abc_obj" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "domain_client.api.services.action" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "BEFORE SET: \n", - "AFTER SET: \n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_ipython_display_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_mimebundle_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_html_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_svg_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_png_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_pdf_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_jpeg_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_latex_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_json_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_javascript_ abc\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "TYPE AT SET: \n" - ] - }, - { - "data": { - "text/markdown": [ - "```python\n", - "Pointer\n", - "```\n", - "'abc'" - ], - "text/plain": [ - "Pointer:\n", - "'abc'" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ptr = abc_obj.send(domain_client)\n", - "ptr" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/plain": [ - "'abc'" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ptr.get()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "str" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "abc_obj.syft_action_data_type" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "str" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ser = sy.serialize(abc_obj, to_bytes=True)\n", - "new_obj = sy.deserialize(ser, from_bytes=True)\n", - "new_obj.syft_action_data_type" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "# abc_obj.syft_blob_storage_entry_id, abc_obj.syft_created_at" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "abc_obj.id" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "_ipython_canary_method_should_not_exist_ abc\n", - "_ipython_display_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_mimebundle_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_html_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_svg_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_png_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_pdf_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_jpeg_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_latex_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_json_ abc\n", - "_ipython_canary_method_should_not_exist_ abc\n", - "_repr_javascript_ abc\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "data": { - "text/markdown": [ - "```python\n", - "Pointer\n", - "```\n", - "'abc'" - ], - "text/plain": [ - "Pointer:\n", - "'abc'" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "remote_obj = domain_client.api.services.action.get(abc_obj.id)\n", - "remote_obj" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "str" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "remote_obj.syft_action_data_type" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "# remote_obj.syft_created_at" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'abc'" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "remote_obj.syft_action_data" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "syft.service.action.action_object.AnyActionObject" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(remote_obj)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "__eq__ abc\n", - "('abc',) {}\n", - "_ipython_canary_method_should_not_exist_ True\n", - "_ipython_display_ True\n", - "_ipython_canary_method_should_not_exist_ True\n", - "_repr_mimebundle_ True\n", - "_ipython_canary_method_should_not_exist_ True\n", - "_repr_html_ True\n", - "_ipython_canary_method_should_not_exist_ True\n", - "_ipython_canary_method_should_not_exist_ True\n", - "_repr_svg_ True\n", - "_ipython_canary_method_should_not_exist_ True\n", - "_repr_png_ True\n", - "_ipython_canary_method_should_not_exist_ True\n", - "_repr_pdf_ True\n", - "_ipython_canary_method_should_not_exist_ True\n", - "_repr_jpeg_ True\n", - "_ipython_canary_method_should_not_exist_ True\n", - "_repr_latex_ True\n", - "_ipython_canary_method_should_not_exist_ True\n", - "_repr_json_ True\n", - "_ipython_canary_method_should_not_exist_ True\n", - "_repr_javascript_ True\n" - ] - }, - { - "data": { - "text/markdown": [ - "```python\n", - "Pointer\n", - "```\n", - "True" - ], - "text/plain": [ - "Pointer:\n", - "True" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "remote_obj == \"abc\"" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "__ne__ abc\n", - "('abc',) {}\n" - ] - }, - { - "data": { - "text/plain": [ - "syft.service.action.action_object.AnyActionObject" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(remote_obj != \"abc\")" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "__contains__ abc\n", - "('z',) {}\n", - "__bool__ False\n", - "() {}\n" - ] - }, - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "'z' in remote_obj" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'a'" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "next(\"abc\".__iter__())" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "__iter__ abc\n", - "() {}\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "remote_obj.__iter__().syft_action_data_cache" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "str_iterator" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(\"abc\".__iter__())" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "str" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "remote_obj.syft_action_data_type" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "__iter__ abc\n", - "() {}\n", - "yep... abc\n", - "__iter__ abc\n", - "() {}\n", - "__next__ \n", - "() {}\n", - "oh no... abc\n", - "__next__ \n", - "() {}\n", - "abc\n", - "__next__ \n", - "() {}\n", - "oh no... abc\n", - "__next__ \n", - "() {}\n" - ] - } - ], - "source": [ - "for letter in remote_obj:\n", - " print(letter)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "__iter__ abc\n", - "() {}\n", - "yep... abc\n", - "__iter__ abc\n", - "() {}\n", - "__next__ \n", - "() {}\n", - "oh no... abc\n", - "__next__ \n", - "() {}\n", - "__eq__ b\n", - "('z',) {}\n", - "__bool__ False\n", - "() {}\n", - "__next__ \n", - "() {}\n", - "oh no... abc\n", - "__next__ \n", - "() {}\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "<>:1: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n", - "<>:1: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n", - ":1: SyntaxWarning: \"is\" with a literal. Did you mean \"==\"?\n", - " any('z' is e or 'z' == e for e in remote_obj)\n" - ] - }, - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "any('z' is e or 'z' == e for e in remote_obj)" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "__eq__ abc\n", - "('DAWDAWDA',) {}\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_ipython_display_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_mimebundle_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_html_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_svg_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_png_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_pdf_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_jpeg_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_latex_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_json_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_javascript_ False\n" - ] - }, - { - "data": { - "text/markdown": [ - "```python\n", - "Pointer\n", - "```\n", - "False" - ], - "text/plain": [ - "Pointer:\n", - "False" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "result = abc_obj == \"DAWDAWDA\"\n", - "result" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "__eq__ abc\n", - "('DAWDAWDA',) {}\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_ipython_display_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_mimebundle_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_html_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_svg_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_png_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_pdf_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_jpeg_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_latex_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_json_ False\n", - "_ipython_canary_method_should_not_exist_ False\n", - "_repr_javascript_ False\n" - ] - }, - { - "data": { - "text/markdown": [ - "```python\n", - "Pointer\n", - "```\n", - "False" - ], - "text/plain": [ - "Pointer:\n", - "False" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "abc_obj == \"DAWDAWDA\"" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "from uuid import uuid4\n", - "\n", - "\n", - "def random_hash() -> str:\n", - " return uuid4().hex\n", - "\n", - "\n", - "def data():\n", - " return np.array([1, 2, 3])\n", - "\n", - "\n", - "def mock():\n", - " return np.array([1, 1, 1])\n", - "\n", - "\n", - "def make_asset_without_mock():\n", - " return {\n", - " \"name\": random_hash(),\n", - " \"data\": data(),\n", - " }\n", - " \n", - "def make_asset_with_mock():\n", - " return {**make_asset_without_mock(), \"mock\": mock()}\n", - "\n", - "\n", - "def make_asset_with_empty_mock():\n", - " return {**make_asset_without_mock(), \"mock\": ActionObject.empty()}" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [], - "source": [ - "asset_with_mock = make_asset_with_mock()\n", - "asset_with_empty_mock = make_asset_with_empty_mock()" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [], - "source": [ - "from syft.service.dataset.dataset import CreateAsset as Asset\n", - "from syft.service.dataset.dataset import CreateDataset as Dataset\n", - "asset = Asset(**asset_with_mock, mock_is_real=True)\n", - "assert asset.mock_is_real\n", - "\n", - "asset.set_mock(mock_data=ActionObject.empty(), mock_is_real=False)\n", - "assert not asset.mock_is_real\n", - "\n", - "# with pytest.raises(ValidationError):\n", - "asset.mock_is_real = True" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "asset.mock_is_real" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [], - "source": [ - "worker = sy.Worker.named(name='test')" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/1 [00:00\n", - " 0%| | 0/1 [00:00\n", - " \n", - "/* cyrillic-ext */\n", - "@font-face {\n", - " font-family: 'Open Sans';\n", - " font-style: normal;\n", - " font-weight: 300 800;\n", - " font-stretch: 100%;\n", - " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTSKmu0SC55K5gw.woff2) format('woff2');\n", - " unicode-range: U+0460-052F, U+1C80-1C88, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F;\n", - "}\n", - "/* cyrillic */\n", - "@font-face {\n", - " font-family: 'Open Sans';\n", - " font-style: normal;\n", - " font-weight: 300 800;\n", - " font-stretch: 100%;\n", - " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTSumu0SC55K5gw.woff2) format('woff2');\n", - " unicode-range: U+0301, U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116;\n", - "}\n", - "/* greek-ext */\n", - "@font-face {\n", - " font-family: 'Open Sans';\n", - " font-style: normal;\n", - " font-weight: 300 800;\n", - " font-stretch: 100%;\n", - " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTSOmu0SC55K5gw.woff2) format('woff2');\n", - " unicode-range: U+1F00-1FFF;\n", - "}\n", - "/* greek */\n", - "@font-face {\n", - " font-family: 'Open Sans';\n", - " font-style: normal;\n", - " font-weight: 300 800;\n", - " font-stretch: 100%;\n", - " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTSymu0SC55K5gw.woff2) format('woff2');\n", - " unicode-range: U+0370-03FF;\n", - "}\n", - "/* hebrew */\n", - "@font-face {\n", - " font-family: 'Open Sans';\n", - " font-style: normal;\n", - " font-weight: 300 800;\n", - " font-stretch: 100%;\n", - " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTS2mu0SC55K5gw.woff2) format('woff2');\n", - " unicode-range: U+0590-05FF, U+200C-2010, U+20AA, U+25CC, U+FB1D-FB4F;\n", - "}\n", - "/* vietnamese */\n", - "@font-face {\n", - " font-family: 'Open Sans';\n", - " font-style: normal;\n", - " font-weight: 300 800;\n", - " font-stretch: 100%;\n", - " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTSCmu0SC55K5gw.woff2) format('woff2');\n", - " unicode-range: U+0102-0103, U+0110-0111, U+0128-0129, U+0168-0169, U+01A0-01A1, U+01AF-01B0, U+0300-0301, U+0303-0304, U+0308-0309, U+0323, U+0329, U+1EA0-1EF9, U+20AB;\n", - "}\n", - "/* latin-ext */\n", - "@font-face {\n", - " font-family: 'Open Sans';\n", - " font-style: normal;\n", - " font-weight: 300 800;\n", - " font-stretch: 100%;\n", - " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTSGmu0SC55K5gw.woff2) format('woff2');\n", - " unicode-range: U+0100-02AF, U+0304, U+0308, U+0329, U+1E00-1E9F, U+1EF2-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF;\n", - "}\n", - "/* latin */\n", - "@font-face {\n", - " font-family: 'Open Sans';\n", - " font-style: normal;\n", - " font-weight: 300 800;\n", - " font-stretch: 100%;\n", - " src: url(https://fonts.gstatic.com/s/opensans/v35/memvYaGs126MiZpBA-UvWbX2vVnXBbObj2OVTS-mu0SC55I.woff2) format('woff2');\n", - " unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+0304, U+0308, U+0329, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;\n", - "}\n", - "\n", - " .syft-dataset {color: #464158;}\n", - " .syft-dataset h3,\n", - " .syft-dataset p\n", - " {font-family: 'Open Sans';}\n", - " .itables {font-family: 'Consolas', monospace, sans-serif;}\n", - " \n", - "
\n", - "

f143e2a87e8c4837b124c2f6cba284ad

\n", - "

\n", - "

Uploaded by:Jane Doe

\n", - "

Created on: None

\n", - "

URL:\n", - " None

\n", - "

Contributors:\n", - " to see full details call dataset.contributors

\n", - " \n", - "\n", - "\n", - "\n", - "
\n", - "
\n", - "
\n", - "

CreateAsset Tupledict

\n", - "
\n", - "\n", - "
\n", - "
\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - " \n", - "
\n", - " \n", - "
\n", - "\n", - "

0

\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "
\n", - " \n", - "\n", - " " - ], - "text/markdown": [ - "Syft Dataset: f143e2a87e8c4837b124c2f6cba284ad\n", - "\n", - "Assets:\n", - "\n", - "\t938da4acc8364a0f8632f14c34feb437: \n", - "\n", - "Description: \n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "syft.service.dataset.dataset.CreateDataset" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dataset" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "ename": "IndexError", - "evalue": "list index out of range", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[51], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m guest_dataset \u001b[39m=\u001b[39m guest_datasets[\u001b[39m0\u001b[39;49m]\n", - "\u001b[0;31mIndexError\u001b[0m: list index out of range" - ] - } - ], - "source": [ - "guest_dataset = guest_datasets[0]" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "syft_3.10", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 115ca6e9c979329d9ff15d9644ef6f8bb26d552f Mon Sep 17 00:00:00 2001 From: Kien Dang Date: Fri, 1 Sep 2023 20:27:54 +0800 Subject: [PATCH 97/98] Return SyftError when attempting to save ActionDataEmpty --- packages/syft/src/syft/service/action/action_object.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index fd66b0057e4..a49bcedd780 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -564,8 +564,7 @@ def _save_to_blob_storage(self) -> Optional[SyftError]: if isinstance(data, SyftError): return data if isinstance(data, ActionDataEmpty): - print(f"cannot store empty object {self.id}") - return data + return SyftError(f"cannot store empty object {self.id}") result = self._save_to_blob_storage_(data) if isinstance(result, SyftError): return result From 0fed142fc9c8d956c86c97f8421197cc010a5190 Mon Sep 17 00:00:00 2001 From: Madhava Jay Date: Mon, 4 Sep 2023 17:42:27 +1000 Subject: [PATCH 98/98] Updated opendp code in blob storage notebook --- notebooks/api/0.8/09-blob-storage.ipynb | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/notebooks/api/0.8/09-blob-storage.ipynb b/notebooks/api/0.8/09-blob-storage.ipynb index 9cc80cc6d46..22902475a83 100644 --- a/notebooks/api/0.8/09-blob-storage.ipynb +++ b/notebooks/api/0.8/09-blob-storage.ipynb @@ -299,11 +299,16 @@ "@sy.syft_function_single_use(trade_data=data_ptr)\n", "def sum_trade_value_mil(trade_data):\n", " import pandas as pd\n", - " from opendp.mod import enable_features\n", - " enable_features('contrib')\n", - " from opendp.measurements import make_base_laplace\n", + " import opendp.prelude as dp\n", + " dp.enable_features(\"contrib\")\n", + " from opendp.measurements import make_laplace\n", " aggregate = 0.\n", - " base_lap = make_base_laplace(scale=5.)\n", + " base_lap = dp.m.make_base_laplace(\n", + " dp.atom_domain(T=float),\n", + " dp.absolute_distance(T=float),\n", + " scale=10.\n", + " )\n", + " \n", " noise = base_lap(aggregate)\n", "\n", " df = pd.read_csv(data_ptr.syft_action_data.file_name, low_memory=False)\n",