diff --git a/encord/client.py b/encord/client.py index da4c990d3..ffba6cb85 100644 --- a/encord/client.py +++ b/encord/client.py @@ -80,6 +80,7 @@ DatasetAccessSettings, DatasetData, DatasetDataLongPolling, + DatasetLinkItems, DatasetUser, DatasetUserRole, DatasetUsers, @@ -526,6 +527,13 @@ def upload_image( else: raise encord.exceptions.EncordException("Image upload failed.") + def link_items(self, item_uuids: List[uuid.UUID]) -> List[DataRow]: + return self._querier.basic_setter( + DatasetLinkItems, + uid=self._config.resource_id, + payload={"item_uuids": [str(item_uuid) for item_uuid in item_uuids]}, + ) + def delete_image_group(self, data_hash: str): """ This function is documented in :meth:`encord.dataset.Dataset.delete_image_group`. diff --git a/encord/dataset.py b/encord/dataset.py index 6a1cd565e..6d97428b5 100644 --- a/encord/dataset.py +++ b/encord/dataset.py @@ -1,6 +1,7 @@ from datetime import datetime from pathlib import Path from typing import Dict, Iterable, List, Optional, TextIO, Union +from uuid import UUID from encord.client import EncordClientDataset from encord.constants.enums import DataType @@ -47,6 +48,10 @@ def description(self) -> str: def storage_location(self) -> StorageLocation: return self._dataset_instance.storage_location + @property + def backing_folder_uuid(self) -> Optional[UUID]: + return self._dataset_instance.backing_folder_uuid + @property def data_rows(self) -> List[DataRow]: """ @@ -238,6 +243,9 @@ def upload_image( """ return self._client.upload_image(file_path, title, cloud_upload_settings) + def link_items(self, item_uuids: List[UUID]) -> List[DataRow]: + return self._client.link_items(item_uuids) + def delete_image_group(self, data_hash: str): """ Delete an image group in Encord storage. diff --git a/encord/orm/dataset.py b/encord/orm/dataset.py index 929b70ad6..63f5e0d1d 100644 --- a/encord/orm/dataset.py +++ b/encord/orm/dataset.py @@ -202,6 +202,7 @@ def __init__( images_data: Optional[List[dict]], signed_url: Optional[str], is_optimised_image_group: Optional[bool], + backing_item_uuid: Optional[UUID], ): parsed_images = None if images_data is not None: @@ -227,6 +228,7 @@ def __init__( "images_data": parsed_images, "signed_url": signed_url, "is_optimised_image_group": is_optimised_image_group, + "backing_item_uuid": backing_item_uuid, "_dirty_fields": [], } ) @@ -406,6 +408,13 @@ def is_image_sequence(self) -> Optional[bool]: """ return self["is_optimised_image_group"] + @property + def backing_item_uuid(self) -> UUID: + backing_item_uuid: Optional[UUID] = self.get("backing_item_uuid") + if not backing_item_uuid: + raise NotImplementedError("Storage API is not yet implemented by the service") + return backing_item_uuid + def refetch_data( self, *, @@ -467,6 +476,8 @@ def save(self) -> None: @classmethod def from_dict(cls, json_dict: Dict) -> DataRow: data_type = DataType.from_upper_case_string(json_dict["data_type"]) + backing_item_uuid_value = json_dict.get("backing_item_uuid") + backing_item_uuid = UUID(backing_item_uuid_value) if backing_item_uuid_value else None return DataRow( uid=json_dict["data_hash"], @@ -486,6 +497,7 @@ def from_dict(cls, json_dict: Dict) -> DataRow: duration=json_dict["duration"], signed_url=json_dict.get("signed_url"), is_optimised_image_group=json_dict.get("is_optimised_image_group"), + backing_item_uuid=backing_item_uuid, images_data=json_dict.get("images_data"), ) @@ -546,6 +558,7 @@ class DatasetInfo: type: int created_at: datetime last_edited_at: datetime + backing_folder_uuid: Optional[UUID] = None class Dataset(dict, Formatter): @@ -556,6 +569,7 @@ def __init__( data_rows: List[DataRow], dataset_hash: str, description: Optional[str] = None, + backing_folder_uuid: Optional[UUID] = None, ): """ DEPRECATED - prefer using the :class:`encord.dataset.Dataset` class instead. @@ -575,6 +589,7 @@ def __init__( "description": description, "dataset_type": storage_location, "data_rows": data_rows, + "backing_folder_uuid": backing_folder_uuid, } ) @@ -614,13 +629,24 @@ def data_rows(self) -> List[DataRow]: def data_rows(self, value: List[DataRow]) -> None: self["data_rows"] = value + @property + def backing_folder_uuid(self) -> Optional[UUID]: + return self["backing_folder_uuid"] + + @backing_folder_uuid.setter + def backing_folder_uuid(self, value: Optional[UUID]) -> None: + self["backing_folder_uuid"] = value + @classmethod def from_dict(cls, json_dict: Dict) -> Dataset: + backing_folder_uuid_value = json_dict.get("backing_folder_uuid") + return Dataset( title=json_dict["title"], description=json_dict["description"], storage_location=json_dict["dataset_type"], dataset_hash=json_dict["dataset_hash"], + backing_folder_uuid=UUID(backing_folder_uuid_value) if backing_folder_uuid_value else None, data_rows=DataRow.from_dict_list(json_dict.get("data_rows", [])), ) @@ -629,10 +655,16 @@ def from_dict(cls, json_dict: Dict) -> Dataset: class DatasetDataInfo(Formatter): data_hash: str title: str + backing_item_uuid: Optional[UUID] @classmethod def from_dict(cls, json_dict: Dict) -> DatasetDataInfo: - return DatasetDataInfo(json_dict["data_hash"], json_dict["title"]) + backing_item_uuid_value = json_dict.get("backing_item_uuid") + return DatasetDataInfo( + json_dict["data_hash"], + json_dict["title"], + UUID(backing_item_uuid_value) if backing_item_uuid_value else None, + ) @dataclasses.dataclass(frozen=True) @@ -679,6 +711,7 @@ def __init__( storage_location: int, dataset_hash: str, user_hash: str, + backing_folder_uuid: Optional[UUID], ): """ This class has dict-style accessors for backwards compatibility. @@ -696,6 +729,7 @@ def __init__( "type": storage_location, "dataset_hash": dataset_hash, "user_hash": user_hash, + "backing_folder_uuid": backing_folder_uuid, } ) @@ -731,13 +765,23 @@ def user_hash(self) -> str: def user_hash(self, value: str) -> None: self["user_hash"] = value + @property + def backing_folder_uuid(self) -> Optional[UUID]: + return self["backing_folder_uuid"] + + @backing_folder_uuid.setter + def backing_folder_uuid(self, value: Optional[UUID]) -> None: + self["backing_folder_uuid"] = value + @classmethod def from_dict(cls, json_dict: Dict) -> CreateDatasetResponse: + backing_folder_uuid_value = json_dict.get("backing_folder_uuid") return CreateDatasetResponse( title=json_dict["title"], storage_location=json_dict["type"], dataset_hash=json_dict["dataset_hash"], user_hash=json_dict["user_hash"], + backing_folder_uuid=UUID(backing_folder_uuid_value) if backing_folder_uuid_value else None, ) @@ -835,16 +879,11 @@ class SignedDicomsURL(base_orm.BaseListORM): class Video(base_orm.BaseORM): """A video object with supporting information.""" - DB_FIELDS = OrderedDict( - [ - ("data_hash", str), - ("title", str), - ("file_link", str), - ] - ) + DB_FIELDS = OrderedDict([("data_hash", str), ("title", str), ("file_link", str), ("backing_item_uuid", UUID)]) NON_UPDATABLE_FIELDS = { "data_hash", + "backing_item_uuid", } @@ -1015,15 +1054,14 @@ class DatasetDataLongPolling(Formatter): def from_dict(cls, json_dict: Dict) -> DatasetDataLongPolling: return DatasetDataLongPolling( status=LongPollingStatus(json_dict["status"]), - data_hashes_with_titles=[ - DatasetDataInfo( - data_hash=x["data_hash"], - title=x["title"], - ) - for x in json_dict["data_hashes_with_titles"] - ], + data_hashes_with_titles=[DatasetDataInfo.from_dict(x) for x in json_dict["data_hashes_with_titles"]], errors=json_dict["errors"], units_pending_count=json_dict["units_pending_count"], units_done_count=json_dict["units_done_count"], units_error_count=json_dict["units_error_count"], ) + + +@dataclasses.dataclass(frozen=True) +class DatasetLinkItems: + pass diff --git a/encord/user_client.py b/encord/user_client.py index 1f7b4bc29..73c41e359 100644 --- a/encord/user_client.py +++ b/encord/user_client.py @@ -155,6 +155,7 @@ def create_dataset( dataset_title: str, dataset_type: StorageLocation, dataset_description: Optional[str] = None, + create_backing_folder: bool = True, ) -> CreateDatasetResponse: """ Args: @@ -170,6 +171,7 @@ def create_dataset( dataset = { "title": dataset_title, "type": dataset_type, + "create_backing_folder": create_backing_folder, } if dataset_description: @@ -232,7 +234,13 @@ def get_datasets( """ properties_filter = self.__validate_filter(locals()) # a hack to be able to share validation code without too much c&p - data = self.querier.get_multiple(DatasetWithUserRole, payload={"filter": properties_filter}) + data = self.querier.get_multiple( + DatasetWithUserRole, + payload={ + "filter": properties_filter, + "enable_storage_api": True, + }, + ) def convert_dates(dataset): dataset["created_at"] = datetime_parser.isoparse(dataset["created_at"]) diff --git a/tests/orm/test_create_dataset_response.py b/tests/orm/test_create_dataset_response.py index 859be3fa4..4161fe458 100644 --- a/tests/orm/test_create_dataset_response.py +++ b/tests/orm/test_create_dataset_response.py @@ -1,5 +1,5 @@ """Unit tests for the dataset class""" - +import uuid from encord.orm.dataset import CreateDatasetResponse, StorageLocation @@ -7,6 +7,7 @@ "title": "CVAT imported baking dataset", "type": 0, "dataset_hash": "460505dd-89ea-485a-b4ea-417558a26889", + "backing_folder_uuid": "434df998-3aac-423d-bc29-1af33040e583", "user_hash": "yiA5JxmLEGSoEcJAuxr3AJdDDXE2", } @@ -14,6 +15,9 @@ def test_create_dataset_response_conversions(): create_dataset_response = CreateDatasetResponse.from_dict(DATASET_RESPONSE_JSON) + assert isinstance(create_dataset_response["backing_folder_uuid"], uuid.UUID) + create_dataset_response["backing_folder_uuid"] = str(create_dataset_response["backing_folder_uuid"]) + assert create_dataset_response == DATASET_RESPONSE_JSON diff --git a/tests/orm/test_dataset.py b/tests/orm/test_dataset.py index 5b082406c..2920cc631 100644 --- a/tests/orm/test_dataset.py +++ b/tests/orm/test_dataset.py @@ -14,9 +14,11 @@ "title": "CVAT imported baking dataset", "description": "Suiting description", "dataset_type": "CORD_STORAGE", + "backing_folder_uuid": None, "data_rows": [ { "data_hash": "87fb7247-794b-4dad-b378-4e574723c05e", + "backing_item_uuid": None, "data_title": "image-group-12dca", "created_at": "2022-01-05 18:51:05", "last_edited_at": "2022-01-05 19:23:56",