diff --git a/encord/client.py b/encord/client.py index f402e9789..dfbdb288a 100644 --- a/encord/client.py +++ b/encord/client.py @@ -346,7 +346,11 @@ def __add_data_to_dataset_get_result( polling_elapsed_seconds = ceil(time.perf_counter() - polling_start_timestamp) polling_available_seconds = max(0, timeout_seconds - polling_elapsed_seconds) - if polling_available_seconds == 0 or res.status in [LongPollingStatus.DONE, LongPollingStatus.ERROR]: + if polling_available_seconds == 0 or res.status in [ + LongPollingStatus.DONE, + LongPollingStatus.ERROR, + LongPollingStatus.CANCELLED, + ]: return res failed_requests_count = 0 @@ -781,11 +785,20 @@ def add_private_data_to_dataset_get_result( polling_elapsed_seconds = ceil(time.perf_counter() - polling_start_timestamp) polling_available_seconds = max(0, timeout_seconds - polling_elapsed_seconds) - if polling_available_seconds == 0 or res.status in [LongPollingStatus.DONE, LongPollingStatus.ERROR]: + if (polling_available_seconds == 0) or ( + res.status + in [ + LongPollingStatus.DONE, + LongPollingStatus.ERROR, + LongPollingStatus.CANCELLED, + ] + ): return res - files_finished_count = res.units_done_count + res.units_error_count - files_total_count = res.units_pending_count + res.units_done_count + res.units_error_count + files_finished_count = res.units_done_count + res.units_error_count + res.units_cancelled_count + files_total_count = ( + res.units_pending_count + res.units_done_count + res.units_error_count + res.units_cancelled_count + ) if files_finished_count != files_total_count: logger.info(f"Processed {files_finished_count}/{files_total_count} files") diff --git a/encord/orm/dataset.py b/encord/orm/dataset.py index 866d53a88..69eb3f98f 100644 --- a/encord/orm/dataset.py +++ b/encord/orm/dataset.py @@ -978,6 +978,24 @@ class LongPollingStatus(str, Enum): Information about errors is available in the `units_error_count: int` and `errors: List[str]` attributes. """ + CANCELLED = "CANCELLED" + """ + Job was cancelled explicitly by the user through the Encord UI or via the Encord SDK using the + `add_data_to_folder_job_cancel` method. + + In the context of this status: + - The job may have been partially processed, but it was explicitly interrupted before completion + by a user action. + - Cancellation can occur either manually through the Encord UI or programmatically using the SDK + method `add_data_to_folder_job_cancel`. + - Once a job is cancelled, no further processing will occur, and any processed data before the + cancellation will be available. + - The presence of cancelled data units (`units_cancelled_count`) indicates that some data upload + units were interrupted and cancelled before completion. + - If `ignore_errors` was set to `True`, the job may continue despite errors, and cancellation will + only apply to the unprocessed units. + """ + class DataUnitError(BaseDTO): """ @@ -1026,6 +1044,9 @@ class DatasetDataLongPolling(BaseDTO): units_error_count: int """Number of upload job units that have error status.""" + units_cancelled_count: int + """Number of upload job units that have been cancelled.""" + @dataclasses.dataclass(frozen=True) class DatasetLinkItems: diff --git a/encord/orm/storage.py b/encord/orm/storage.py index ed03fc5f5..b2b01ef21 100644 --- a/encord/orm/storage.py +++ b/encord/orm/storage.py @@ -153,6 +153,9 @@ class UploadLongPollingState(BaseDTO): units_error_count: int """Number of upload job units that have error status.""" + units_cancelled_count: int + """Number of upload job units that have been cancelled.""" + unit_errors: List[DataUnitError] """Structured list of per-item upload errors. See :class:`DataUnitError` for more details.""" @@ -663,3 +666,7 @@ class StorageItemsMigratePayload(BaseDTO): to_integration_hash: Optional[UUID] = None validate_access: bool = False skip_missing: bool = False + + +class AddDataToFolderJobCancelResponse(BaseDTO): + units_cancelled_count: int diff --git a/encord/storage.py b/encord/storage.py index edfeeb8ee..83ebbb693 100644 --- a/encord/storage.py +++ b/encord/storage.py @@ -1234,11 +1234,20 @@ def _add_data_to_folder_get_result( polling_elapsed_seconds = ceil(time.perf_counter() - polling_start_timestamp) polling_available_seconds = max(0, timeout_seconds - polling_elapsed_seconds) - if polling_available_seconds == 0 or res.status in [LongPollingStatus.DONE, LongPollingStatus.ERROR]: + if (polling_available_seconds == 0) or ( + res.status + in [ + LongPollingStatus.DONE, + LongPollingStatus.ERROR, + LongPollingStatus.CANCELLED, + ] + ): return res - files_finished_count = res.units_done_count + res.units_error_count - files_total_count = res.units_pending_count + res.units_done_count + res.units_error_count + files_finished_count = res.units_done_count + res.units_error_count + res.units_cancelled_count + files_total_count = ( + res.units_pending_count + res.units_done_count + res.units_error_count + res.units_cancelled_count + ) if files_finished_count != files_total_count: logger.info(f"Processed {files_finished_count}/{files_total_count} files") @@ -1254,6 +1263,27 @@ def _add_data_to_folder_get_result( time.sleep(LONG_POLLING_SLEEP_ON_FAILURE_SECONDS) + def add_data_to_folder_job_cancel( + self, + upload_job_id: UUID, + ) -> orm_storage.AddDataToFolderJobCancelResponse: + """ + Cancels a data upload in progress job, associated with this folder. + + Args: + upload_job_id (UUID): The unique identifier for the upload job. + + Returns: + AddDataToFolderJobCancelResponse: A response indicating the result of the cancelled job. + """ + + return self._api_client.post( + f"storage/folders/{self.uuid}/data-upload-jobs/{upload_job_id}/cancel", + params=None, + payload=None, + result_type=orm_storage.AddDataToFolderJobCancelResponse, + ) + @staticmethod def _get_folder(api_client: ApiClient, folder_uuid: UUID) -> "StorageFolder": orm_folder = api_client.get( diff --git a/tests/orm/test_base_dto.py b/tests/orm/test_base_dto.py index 4e5072574..b536dd3d6 100644 --- a/tests/orm/test_base_dto.py +++ b/tests/orm/test_base_dto.py @@ -74,6 +74,7 @@ def test_complex_model_deserialization(): "units_pending_count": 5, "units_done_count": 5, "units_error_count": 2, + "units_cancelled_count": 0, "data_hashes_with_titles": [ {"data_hash": "abc", "title": "dummy title", "backing_item_uuid": str(backing_item_uuid)} ],