Skip to content

Commit

Permalink
Data upload cancel (#736)
Browse files Browse the repository at this point in the history
  • Loading branch information
adam-encord authored Nov 20, 2024
1 parent 363a95b commit bd26a09
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 7 deletions.
21 changes: 17 additions & 4 deletions encord/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,11 @@ def __add_data_to_dataset_get_result(
polling_elapsed_seconds = ceil(time.perf_counter() - polling_start_timestamp)
polling_available_seconds = max(0, timeout_seconds - polling_elapsed_seconds)

if polling_available_seconds == 0 or res.status in [LongPollingStatus.DONE, LongPollingStatus.ERROR]:
if polling_available_seconds == 0 or res.status in [
LongPollingStatus.DONE,
LongPollingStatus.ERROR,
LongPollingStatus.CANCELLED,
]:
return res

failed_requests_count = 0
Expand Down Expand Up @@ -781,11 +785,20 @@ def add_private_data_to_dataset_get_result(
polling_elapsed_seconds = ceil(time.perf_counter() - polling_start_timestamp)
polling_available_seconds = max(0, timeout_seconds - polling_elapsed_seconds)

if polling_available_seconds == 0 or res.status in [LongPollingStatus.DONE, LongPollingStatus.ERROR]:
if (polling_available_seconds == 0) or (
res.status
in [
LongPollingStatus.DONE,
LongPollingStatus.ERROR,
LongPollingStatus.CANCELLED,
]
):
return res

files_finished_count = res.units_done_count + res.units_error_count
files_total_count = res.units_pending_count + res.units_done_count + res.units_error_count
files_finished_count = res.units_done_count + res.units_error_count + res.units_cancelled_count
files_total_count = (
res.units_pending_count + res.units_done_count + res.units_error_count + res.units_cancelled_count
)

if files_finished_count != files_total_count:
logger.info(f"Processed {files_finished_count}/{files_total_count} files")
Expand Down
21 changes: 21 additions & 0 deletions encord/orm/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,6 +978,24 @@ class LongPollingStatus(str, Enum):
Information about errors is available in the `units_error_count: int` and `errors: List[str]` attributes.
"""

CANCELLED = "CANCELLED"
"""
Job was cancelled explicitly by the user through the Encord UI or via the Encord SDK using the
`add_data_to_folder_job_cancel` method.
In the context of this status:
- The job may have been partially processed, but it was explicitly interrupted before completion
by a user action.
- Cancellation can occur either manually through the Encord UI or programmatically using the SDK
method `add_data_to_folder_job_cancel`.
- Once a job is cancelled, no further processing will occur, and any processed data before the
cancellation will be available.
- The presence of cancelled data units (`units_cancelled_count`) indicates that some data upload
units were interrupted and cancelled before completion.
- If `ignore_errors` was set to `True`, the job may continue despite errors, and cancellation will
only apply to the unprocessed units.
"""


class DataUnitError(BaseDTO):
"""
Expand Down Expand Up @@ -1026,6 +1044,9 @@ class DatasetDataLongPolling(BaseDTO):
units_error_count: int
"""Number of upload job units that have error status."""

units_cancelled_count: int
"""Number of upload job units that have been cancelled."""


@dataclasses.dataclass(frozen=True)
class DatasetLinkItems:
Expand Down
7 changes: 7 additions & 0 deletions encord/orm/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,9 @@ class UploadLongPollingState(BaseDTO):
units_error_count: int
"""Number of upload job units that have error status."""

units_cancelled_count: int
"""Number of upload job units that have been cancelled."""

unit_errors: List[DataUnitError]
"""Structured list of per-item upload errors. See :class:`DataUnitError` for more details."""

Expand Down Expand Up @@ -663,3 +666,7 @@ class StorageItemsMigratePayload(BaseDTO):
to_integration_hash: Optional[UUID] = None
validate_access: bool = False
skip_missing: bool = False


class AddDataToFolderJobCancelResponse(BaseDTO):
units_cancelled_count: int
36 changes: 33 additions & 3 deletions encord/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1234,11 +1234,20 @@ def _add_data_to_folder_get_result(
polling_elapsed_seconds = ceil(time.perf_counter() - polling_start_timestamp)
polling_available_seconds = max(0, timeout_seconds - polling_elapsed_seconds)

if polling_available_seconds == 0 or res.status in [LongPollingStatus.DONE, LongPollingStatus.ERROR]:
if (polling_available_seconds == 0) or (
res.status
in [
LongPollingStatus.DONE,
LongPollingStatus.ERROR,
LongPollingStatus.CANCELLED,
]
):
return res

files_finished_count = res.units_done_count + res.units_error_count
files_total_count = res.units_pending_count + res.units_done_count + res.units_error_count
files_finished_count = res.units_done_count + res.units_error_count + res.units_cancelled_count
files_total_count = (
res.units_pending_count + res.units_done_count + res.units_error_count + res.units_cancelled_count
)

if files_finished_count != files_total_count:
logger.info(f"Processed {files_finished_count}/{files_total_count} files")
Expand All @@ -1254,6 +1263,27 @@ def _add_data_to_folder_get_result(

time.sleep(LONG_POLLING_SLEEP_ON_FAILURE_SECONDS)

def add_data_to_folder_job_cancel(
self,
upload_job_id: UUID,
) -> orm_storage.AddDataToFolderJobCancelResponse:
"""
Cancels a data upload in progress job, associated with this folder.
Args:
upload_job_id (UUID): The unique identifier for the upload job.
Returns:
AddDataToFolderJobCancelResponse: A response indicating the result of the cancelled job.
"""

return self._api_client.post(
f"storage/folders/{self.uuid}/data-upload-jobs/{upload_job_id}/cancel",
params=None,
payload=None,
result_type=orm_storage.AddDataToFolderJobCancelResponse,
)

@staticmethod
def _get_folder(api_client: ApiClient, folder_uuid: UUID) -> "StorageFolder":
orm_folder = api_client.get(
Expand Down
1 change: 1 addition & 0 deletions tests/orm/test_base_dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def test_complex_model_deserialization():
"units_pending_count": 5,
"units_done_count": 5,
"units_error_count": 2,
"units_cancelled_count": 0,
"data_hashes_with_titles": [
{"data_hash": "abc", "title": "dummy title", "backing_item_uuid": str(backing_item_uuid)}
],
Expand Down

0 comments on commit bd26a09

Please sign in to comment.