Skip to content

Commit

Permalink
Datasets list API v2 - SDK (#796)
Browse files Browse the repository at this point in the history
  • Loading branch information
adam-encord authored Dec 5, 2024
1 parent f05cc9e commit 0473b8f
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 38 deletions.
12 changes: 12 additions & 0 deletions encord/common/time_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import contextlib
from datetime import datetime
from functools import lru_cache
from typing import Optional, Union

from dateutil import parser

Expand Down Expand Up @@ -29,3 +30,14 @@ def parse_datetime(time_string: str) -> datetime:
# As a last resort, employ fuzzy parsing, which is most expensive,
# but parses the most obscure timestamp formats
return parser.parse(time_string, fuzzy=True)


def parse_datetime_optional(_datetime: Optional[Union[str, datetime]]) -> Optional[datetime]:
if _datetime is None:
return None
elif isinstance(_datetime, datetime):
return _datetime
elif isinstance(_datetime, str):
return parse_datetime(_datetime)
else:
raise ValueError(f"parse_datetime_optional {type(_datetime)=} not supported")
42 changes: 41 additions & 1 deletion encord/orm/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import dataclasses
from collections import OrderedDict
from datetime import datetime
from enum import Enum, IntEnum
from enum import Enum, IntEnum, auto
from types import MappingProxyType
from typing import Any, Dict, List, Optional
from uuid import UUID
Expand All @@ -14,6 +14,7 @@
from encord.constants.enums import DataType
from encord.exceptions import EncordException
from encord.orm import base_orm
from encord.orm.analytics import CamelStrEnum
from encord.orm.base_dto import BaseDTO
from encord.orm.formatter import Formatter
from encord.utilities.common import _get_dict_without_none_keys
Expand All @@ -24,6 +25,18 @@ class DatasetUserRole(IntEnum):
USER = 1


class DatasetUserRoleV2(CamelStrEnum):
ADMIN = auto()
USER = auto()


def dataset_user_role_str_enum_to_int_enum(str_enum: DatasetUserRoleV2) -> DatasetUserRole:
return {
DatasetUserRoleV2.ADMIN: DatasetUserRole.ADMIN,
DatasetUserRoleV2.USER: DatasetUserRole.USER,
}[str_enum]


class DatasetUser(BaseDTO):
user_email: str
user_role: DatasetUserRole
Expand Down Expand Up @@ -1068,3 +1081,30 @@ class CreateDatasetPayload(BaseDTO):
class CreateDatasetResponseV2(BaseDTO):
dataset_uuid: UUID
backing_folder_uuid: Optional[UUID] = None # a 'not None' indicates a legacy "mirror" dataset was created


class DatasetsWithUserRolesListParams(BaseDTO):
title_eq: Optional[str]
title_like: Optional[str]
description_eq: Optional[str]
description_like: Optional[str]
created_before: Optional[datetime]
created_after: Optional[datetime]
edited_before: Optional[datetime]
edited_after: Optional[datetime]


class DatasetWithUserRole(BaseDTO):
dataset_uuid: UUID
title: str
description: str
created_at: datetime
last_edited_at: datetime
user_role: DatasetUserRoleV2

storage_location: Optional[StorageLocation] = None # legacy field: you can have data from mixed locations now
backing_folder_uuid: Optional[UUID] = None # if set, this indicates a legacy 'mirror' dataset


class DatasetsWithUserRolesListResponse(BaseDTO):
result: List[DatasetWithUserRole]
11 changes: 0 additions & 11 deletions encord/orm/dataset_with_user_role.py

This file was deleted.

53 changes: 33 additions & 20 deletions encord/user_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from encord.client_metadata_schema import get_client_metadata_schema, set_client_metadata_schema_from_dict
from encord.collection import Collection
from encord.common.deprecated import deprecated
from encord.common.time_parser import parse_datetime
from encord.common.time_parser import parse_datetime, parse_datetime_optional
from encord.configs import BearerConfig, SshConfig, UserConfig, get_env_ssh_key
from encord.constants.string_constants import TYPE_DATASET, TYPE_ONTOLOGY, TYPE_PROJECT
from encord.dataset import Dataset
Expand Down Expand Up @@ -61,13 +61,14 @@
CreateDatasetResponseV2,
DatasetAccessSettings,
DatasetInfo,
DatasetUserRole,
DatasetsWithUserRolesListParams,
DatasetsWithUserRolesListResponse,
DicomDeidentifyTask,
Images,
StorageLocation,
dataset_user_role_str_enum_to_int_enum,
)
from encord.orm.dataset import Dataset as OrmDataset
from encord.orm.dataset_with_user_role import DatasetWithUserRole
from encord.orm.deidentification import (
DicomDeIdGetResultLongPollingStatus,
DicomDeIdGetResultParams,
Expand Down Expand Up @@ -96,8 +97,7 @@
)
from encord.orm.project import Project as OrmProject
from encord.orm.project_with_user_role import ProjectWithUserRole
from encord.orm.storage import CreateStorageFolderPayload, ListFoldersParams, ListItemsParams, StorageItemType
from encord.orm.storage import StorageFolder as OrmStorageFolder
from encord.orm.storage import ListFoldersParams, ListItemsParams, StorageItemType
from encord.project import Project
from encord.storage import FoldersSortBy, StorageFolder, StorageItem
from encord.utilities.client_utilities import (
Expand Down Expand Up @@ -303,24 +303,37 @@ def get_datasets(
Returns:
list of (role, dataset) pairs for datasets matching filter conditions.
"""
properties_filter = self.__validate_filter(locals())
# a hack to be able to share validation code without too much c&p
data = self._querier.get_multiple(
DatasetWithUserRole,
payload={
"filter": properties_filter,
"enable_storage_api": True,
},
)

def convert_dates(dataset):
dataset["created_at"] = parse_datetime(dataset["created_at"])
dataset["last_edited_at"] = parse_datetime(dataset["last_edited_at"])
return dataset
res = self._api_client.get(
"/datasets/list",
params=DatasetsWithUserRolesListParams(
title_eq=title_eq,
title_like=title_like,
description_eq=desc_eq,
description_like=desc_like,
created_before=parse_datetime_optional(created_before),
created_after=parse_datetime_optional(created_after),
edited_before=parse_datetime_optional(edited_before),
edited_after=parse_datetime_optional(edited_after),
),
result_type=DatasetsWithUserRolesListResponse,
)

return [
{"dataset": DatasetInfo(**convert_dates(d.dataset)), "user_role": DatasetUserRole(d.user_role)}
for d in data
{
"dataset": DatasetInfo(
dataset_hash=str(x.dataset_uuid),
user_hash="field withdrawn for compliance reasons",
title=x.title,
description=x.description,
type=int(x.storage_location or 0),
created_at=x.created_at,
last_edited_at=x.last_edited_at,
backing_folder_uuid=x.backing_folder_uuid,
),
"user_role": dataset_user_role_str_enum_to_int_enum(x.user_role),
}
for x in res.result
]

@staticmethod
Expand Down
6 changes: 3 additions & 3 deletions encord/workflow/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from dataclasses import dataclass, field
from datetime import datetime
from typing import Iterable, Optional, Sequence, Tuple, Type, TypeVar
from typing import Iterable, List, Optional, Sequence, Tuple, Type, TypeVar
from uuid import UUID

from encord.http.bundle import Bundle, bundled_operation
Expand Down Expand Up @@ -49,7 +49,7 @@ class WorkflowAction(BaseDTO):
@dataclass
class BundledWorkflowActionPayload:
stage_uuid: UUID
actions: list[WorkflowAction]
actions: List[WorkflowAction]

def add(self, other: BundledWorkflowActionPayload) -> BundledWorkflowActionPayload:
assert self.stage_uuid == other.stage_uuid, "It's only possible to bundle actions for one stage at a time"
Expand All @@ -65,7 +65,7 @@ class WorkflowReviewAction(BaseDTO):
class BundledReviewActionPayload:
stage_uuid: UUID
task_uuid: UUID
actions: list[WorkflowReviewAction]
actions: List[WorkflowReviewAction]

def add(self, other: BundledReviewActionPayload) -> BundledReviewActionPayload:
assert self.stage_uuid == other.stage_uuid, "It's only possible to bundle actions for one stage at a time"
Expand Down
6 changes: 3 additions & 3 deletions tests/test_user_client_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def make_side_effects(project_response: Optional[MagicMock] = None):
def side_effects(*args, **kwargs):
if args[0].path_url.startswith("/public/user"):
request_type = json.loads(args[0].body)["query_type"]
if request_type == "datasetwithuserrole":
if request_type == "projectwithuserrole":
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.json.return_value = {"status": 200, "response": {}}
Expand Down Expand Up @@ -172,7 +172,7 @@ def test_v1_public_user_resource_when_initialised_with_ssh_key(mock_send, bearer
mock_send.side_effect = make_side_effects()

user_client = EncordUserClient.create_with_ssh_private_key(ssh_private_key=PRIVATE_KEY_PEM)
user_client.get_datasets()
user_client.get_projects()

assert mock_send.call_count == 1
for mock_call in mock_send.call_args_list:
Expand All @@ -188,7 +188,7 @@ def test_v1_public_user_resource_when_initialised_with_bearer_auth(mock_send, be
mock_send.side_effect = make_side_effects()

user_client = EncordUserClient.create_with_bearer_token(bearer_token)
user_client.get_datasets()
user_client.get_projects()

assert mock_send.call_count == 1
for mock_call in mock_send.call_args_list:
Expand Down

0 comments on commit 0473b8f

Please sign in to comment.