Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic datasets minimal feature #452

Merged
merged 5 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions encord/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
DatasetAccessSettings,
DatasetData,
DatasetDataLongPolling,
DatasetLinkItems,
DatasetUser,
DatasetUserRole,
DatasetUsers,
Expand Down Expand Up @@ -526,6 +527,13 @@ def upload_image(
else:
raise encord.exceptions.EncordException("Image upload failed.")

def link_items(self, item_uuids: List[uuid.UUID]) -> List[DataRow]:
return self._querier.basic_setter(
DatasetLinkItems,
uid=self._config.resource_id,
payload={"item_uuids": [str(item_uuid) for item_uuid in item_uuids]},
)
alexey-cord-tech marked this conversation as resolved.
Show resolved Hide resolved

def delete_image_group(self, data_hash: str):
"""
This function is documented in :meth:`encord.dataset.Dataset.delete_image_group`.
Expand Down
8 changes: 8 additions & 0 deletions encord/dataset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from datetime import datetime
from pathlib import Path
from typing import Dict, Iterable, List, Optional, TextIO, Union
from uuid import UUID

from encord.client import EncordClientDataset
from encord.constants.enums import DataType
Expand Down Expand Up @@ -47,6 +48,10 @@ def description(self) -> str:
def storage_location(self) -> StorageLocation:
return self._dataset_instance.storage_location

@property
def backing_folder_uuid(self) -> Optional[UUID]:
return self._dataset_instance.backing_folder_uuid

@property
def data_rows(self) -> List[DataRow]:
"""
Expand Down Expand Up @@ -238,6 +243,9 @@ def upload_image(
"""
return self._client.upload_image(file_path, title, cloud_upload_settings)

def link_items(self, item_uuids: List[UUID]) -> List[DataRow]:
return self._client.link_items(item_uuids)

def delete_image_group(self, data_hash: str):
"""
Delete an image group in Encord storage.
Expand Down
68 changes: 53 additions & 15 deletions encord/orm/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ def __init__(
images_data: Optional[List[dict]],
signed_url: Optional[str],
is_optimised_image_group: Optional[bool],
backing_item_uuid: Optional[UUID],
):
parsed_images = None
if images_data is not None:
Expand All @@ -227,6 +228,7 @@ def __init__(
"images_data": parsed_images,
"signed_url": signed_url,
"is_optimised_image_group": is_optimised_image_group,
"backing_item_uuid": backing_item_uuid,
"_dirty_fields": [],
}
)
Expand Down Expand Up @@ -406,6 +408,13 @@ def is_image_sequence(self) -> Optional[bool]:
"""
return self["is_optimised_image_group"]

@property
def backing_item_uuid(self) -> UUID:
backing_item_uuid: Optional[UUID] = self.get("backing_item_uuid")
if not backing_item_uuid:
raise NotImplementedError("Storage API is not yet implemented by the service")
return backing_item_uuid
alexey-cord-tech marked this conversation as resolved.
Show resolved Hide resolved

def refetch_data(
self,
*,
Expand Down Expand Up @@ -467,6 +476,8 @@ def save(self) -> None:
@classmethod
def from_dict(cls, json_dict: Dict) -> DataRow:
data_type = DataType.from_upper_case_string(json_dict["data_type"])
backing_item_uuid_value = json_dict.get("backing_item_uuid")
backing_item_uuid = UUID(backing_item_uuid_value) if backing_item_uuid_value in json_dict else None

return DataRow(
alexey-cord-tech marked this conversation as resolved.
Show resolved Hide resolved
uid=json_dict["data_hash"],
Expand All @@ -486,6 +497,7 @@ def from_dict(cls, json_dict: Dict) -> DataRow:
duration=json_dict["duration"],
signed_url=json_dict.get("signed_url"),
is_optimised_image_group=json_dict.get("is_optimised_image_group"),
backing_item_uuid=backing_item_uuid,
images_data=json_dict.get("images_data"),
)

Expand Down Expand Up @@ -546,6 +558,7 @@ class DatasetInfo:
type: int
created_at: datetime
last_edited_at: datetime
backing_folder_uuid: Optional[UUID] = None


class Dataset(dict, Formatter):
Expand All @@ -556,6 +569,7 @@ def __init__(
data_rows: List[DataRow],
dataset_hash: str,
description: Optional[str] = None,
backing_folder_uuid: Optional[UUID] = None,
):
"""
DEPRECATED - prefer using the :class:`encord.dataset.Dataset` class instead.
Expand All @@ -575,6 +589,7 @@ def __init__(
"description": description,
"dataset_type": storage_location,
"data_rows": data_rows,
"backing_folder_uuid": backing_folder_uuid,
}
)

Expand Down Expand Up @@ -614,13 +629,24 @@ def data_rows(self) -> List[DataRow]:
def data_rows(self, value: List[DataRow]) -> None:
self["data_rows"] = value

@property
def backing_folder_uuid(self) -> Optional[UUID]:
return self["backing_folder_uuid"]

@backing_folder_uuid.setter
def backing_folder_uuid(self, value: Optional[UUID]) -> None:
self["backing_folder_uuid"] = value

@classmethod
def from_dict(cls, json_dict: Dict) -> Dataset:
backing_folder_uuid_value = json_dict.get("backing_folder_uuid")

return Dataset(
title=json_dict["title"],
description=json_dict["description"],
storage_location=json_dict["dataset_type"],
dataset_hash=json_dict["dataset_hash"],
backing_folder_uuid=UUID(backing_folder_uuid_value) if backing_folder_uuid_value else None,
data_rows=DataRow.from_dict_list(json_dict.get("data_rows", [])),
)
alexey-cord-tech marked this conversation as resolved.
Show resolved Hide resolved
alexey-cord-tech marked this conversation as resolved.
Show resolved Hide resolved

Expand All @@ -629,10 +655,16 @@ def from_dict(cls, json_dict: Dict) -> Dataset:
class DatasetDataInfo(Formatter):
data_hash: str
title: str
backing_item_uuid: Optional[UUID]

@classmethod
def from_dict(cls, json_dict: Dict) -> DatasetDataInfo:
return DatasetDataInfo(json_dict["data_hash"], json_dict["title"])
backing_item_uuid_value = json_dict.get("backing_item_uuid")
return DatasetDataInfo(
json_dict["data_hash"],
json_dict["title"],
UUID(backing_item_uuid_value) if backing_item_uuid_value else None,
)
alexey-cord-tech marked this conversation as resolved.
Show resolved Hide resolved


@dataclasses.dataclass(frozen=True)
Expand Down Expand Up @@ -679,6 +711,7 @@ def __init__(
storage_location: int,
dataset_hash: str,
user_hash: str,
backing_folder_uuid: Optional[UUID],
):
"""
This class has dict-style accessors for backwards compatibility.
Expand All @@ -696,6 +729,7 @@ def __init__(
"type": storage_location,
"dataset_hash": dataset_hash,
"user_hash": user_hash,
"backing_folder_uuid": backing_folder_uuid,
}
)

Expand Down Expand Up @@ -731,13 +765,23 @@ def user_hash(self) -> str:
def user_hash(self, value: str) -> None:
self["user_hash"] = value

@property
def backing_folder_uuid(self) -> Optional[UUID]:
return self["backing_folder_uuid"]

@backing_folder_uuid.setter
def backing_folder_uuid(self, value: Optional[UUID]) -> None:
self["backing_folder_uuid"] = value
alexey-cord-tech marked this conversation as resolved.
Show resolved Hide resolved

@classmethod
def from_dict(cls, json_dict: Dict) -> CreateDatasetResponse:
backing_folder_uuid_value = json_dict.get("backing_folder_uuid")
return CreateDatasetResponse(
title=json_dict["title"],
storage_location=json_dict["type"],
dataset_hash=json_dict["dataset_hash"],
user_hash=json_dict["user_hash"],
backing_folder_uuid=UUID(backing_folder_uuid_value) if backing_folder_uuid_value else None,
)

alexey-cord-tech marked this conversation as resolved.
Show resolved Hide resolved

Expand Down Expand Up @@ -835,16 +879,11 @@ class SignedDicomsURL(base_orm.BaseListORM):
class Video(base_orm.BaseORM):
"""A video object with supporting information."""

DB_FIELDS = OrderedDict(
[
("data_hash", str),
("title", str),
("file_link", str),
]
)
DB_FIELDS = OrderedDict([("data_hash", str), ("title", str), ("file_link", str), ("backing_item_uuid", UUID)])

NON_UPDATABLE_FIELDS = {
"data_hash",
"backing_item_uuid",
}


Expand Down Expand Up @@ -1015,15 +1054,14 @@ class DatasetDataLongPolling(Formatter):
def from_dict(cls, json_dict: Dict) -> DatasetDataLongPolling:
return DatasetDataLongPolling(
status=LongPollingStatus(json_dict["status"]),
data_hashes_with_titles=[
DatasetDataInfo(
data_hash=x["data_hash"],
title=x["title"],
)
for x in json_dict["data_hashes_with_titles"]
],
data_hashes_with_titles=[DatasetDataInfo.from_dict(x) for x in json_dict["data_hashes_with_titles"]],
errors=json_dict["errors"],
units_pending_count=json_dict["units_pending_count"],
units_done_count=json_dict["units_done_count"],
units_error_count=json_dict["units_error_count"],
)
alexey-cord-tech marked this conversation as resolved.
Show resolved Hide resolved


@dataclasses.dataclass(frozen=True)
class DatasetLinkItems:
pass
alexey-cord-tech marked this conversation as resolved.
Show resolved Hide resolved
alexey-cord-tech marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 2 additions & 0 deletions encord/user_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ def create_dataset(
dataset_title: str,
dataset_type: StorageLocation,
dataset_description: Optional[str] = None,
create_backing_folder: bool = True,
) -> CreateDatasetResponse:
"""
Args:
Expand All @@ -170,6 +171,7 @@ def create_dataset(
dataset = {
"title": dataset_title,
"type": dataset_type,
"create_backing_folder": create_backing_folder,
}
alexey-cord-tech marked this conversation as resolved.
Show resolved Hide resolved

if dataset_description:
Expand Down
6 changes: 5 additions & 1 deletion tests/orm/test_create_dataset_response.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
"""Unit tests for the dataset class"""

import uuid

from encord.orm.dataset import CreateDatasetResponse, StorageLocation

DATASET_RESPONSE_JSON = {
"title": "CVAT imported baking dataset",
"type": 0,
"dataset_hash": "460505dd-89ea-485a-b4ea-417558a26889",
"backing_folder_uuid": "434df998-3aac-423d-bc29-1af33040e583",
"user_hash": "yiA5JxmLEGSoEcJAuxr3AJdDDXE2",
}


def test_create_dataset_response_conversions():
create_dataset_response = CreateDatasetResponse.from_dict(DATASET_RESPONSE_JSON)

assert isinstance(create_dataset_response["backing_folder_uuid"], uuid.UUID)
create_dataset_response["backing_folder_uuid"] = str(create_dataset_response["backing_folder_uuid"])

assert create_dataset_response == DATASET_RESPONSE_JSON


Expand Down
2 changes: 2 additions & 0 deletions tests/orm/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
"title": "CVAT imported baking dataset",
"description": "Suiting description",
"dataset_type": "CORD_STORAGE",
"backing_folder_uuid": None,
"data_rows": [
{
"data_hash": "87fb7247-794b-4dad-b378-4e574723c05e",
"backing_item_uuid": None,
"data_title": "image-group-12dca",
"created_at": "2022-01-05 18:51:05",
"last_edited_at": "2022-01-05 19:23:56",
Expand Down
Loading