From 72ba5897c64e312c769306cf7216f9e89331f816 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 15 Mar 2022 00:51:20 -0400 Subject: [PATCH 01/34] first implementation prefer header for execute job --- CHANGES.rst | 4 + docs/source/processes.rst | 185 ++++++++++++------ weaver/processes/builtin/__init__.py | 2 + weaver/processes/builtin/file2string_array.py | 5 + .../processes/builtin/file_index_selector.py | 2 +- weaver/processes/builtin/jsonarray2netcdf.py | 2 +- weaver/processes/builtin/metalink2netcdf.py | 2 +- weaver/processes/execution.py | 63 ++++-- weaver/processes/utils.py | 17 +- weaver/wps/service.py | 2 +- weaver/wps_restapi/swagger_definitions.py | 25 ++- 11 files changed, 229 insertions(+), 80 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 856825c72..b064bdf40 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,6 +12,10 @@ Changes Changes: -------- +- Support ``Prefer`` header with ``wait`` or ``respond-async`` directives to select ``Job`` execution mode either + as synchronous or asynchronous task, according to supported ``jobControlOptions`` of the relevant ``Process`` being + executed (resolves `#247 `_). +- Increase minor version of all ``builtin`` processes that will now be executable in wither (a)synchronous modes. - Improve conformance for returned status codes and error messages when requesting results for an unfinished, failed, or dismissed ``Job``. - Adjust conformance item references to correspond with `OGC API - Processes: Part 2` renamed from `Transactions` to diff --git a/docs/source/processes.rst b/docs/source/processes.rst index d93c5f3ff..052efbef1 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -424,80 +424,151 @@ This section will first describe the basics of this request format, and after go and parametrization of various input/output combinations. Let's employ the following example of JSON body sent to the :term:`Job` execution to better illustrate the requirements. -.. code-block:: json +.. table:: + :class: code-table + :align: center + + +-----------------------------------------------+-----------------------------------------------+ + | .. code-block:: json | .. code-block:: json | + | :caption: Job Execution Payload as Listing | :caption: Job Execution Payload as Mapping | + | | | + | { | { | + | "mode": "async", | "mode": "async", | + | "response": "document", | "response": "document", | + | "inputs": [ | "inputs": { | + | { | "input-file": { | + | "id": "input-file", | "href": "` request. As soon as any task worker becomes -available, it will pick any leftover queued :term:`Job` to execute it. - -The second field is ``response``. At the time being, `Weaver` only supports ``document`` value. This parameter is -present only for compatibility with other :term:`ADES` implementation, but does not actually affects `Weaver`'s -response. - -Following are the ``inputs`` definition. This is the most important section of the request body. It defines which -parameters to forward to the referenced :term:`Process` to be executed. All ``id`` elements in this :term:`Job` request +The ``inputs`` definition is the most important section of the request body. It is also the only one that is completely +required when submitting the execution request, even for a no-input process (an empty mapping is needed in such case). +It defines which parameters +to forward to the referenced :term:`Process` to be executed. All ``id`` elements in this :term:`Job` request body must correspond to valid ``inputs`` from the definition returned by :ref:`DescribeProcess ` response. Obviously, all formatting requirements (i.e.: proper file :term:`MIME-types`), data types (e.g.: ``int``, ``string``, etc.) and validations rules (e.g.: ``minOccurs``, ``AllowedValues``, etc.) must also be fulfilled. When providing files as input, multiple protocols are supported. See later section :ref:`File Reference Types` for details. -Finally, the ``outputs`` section defines, for each ``id`` corresponding to the :term:`Process` definition, how to -report the produced outputs from a successful :term:`Job` completion. Again, `Weaver` only implement the -``reference`` result for the time being as this is the most common variation. In this case, the produced file is -stored locally and exposed externally with returned reference URL. The other (unimplemented) mode ``value`` would -return the contents directly in the response instead of the URL. +The ``outputs`` section defines, for each ``id`` corresponding to the :term:`Process` definition, how to +report the produced outputs from a successful :term:`Job` completion. For the time being, `Weaver` only implement the +``reference`` result as this is the most common variation. In this case, the produced file is +stored locally and exposed externally with returned reference URL. The other mode ``value`` returns the contents +directly in the response instead of the URL. -.. note:: - Other parameters can be added to the request to provide further functionalities. Above fields are the minimum - requirements to request a :term:`Job`. Please refer to the |exec-api|_ definition for all applicable features. +.. fixme:: + Transmission mode ``value`` not implemented. Only ``reference`` is supported. + https://github.com/crim-ca/weaver/issues/377 + +.. fixme:: + The ``response`` field is only supported with the ``document`` value. This parameter is present only for + compatibility with other :term:`ADES` implementation, but does not actually affects `Weaver`'s response. + + Response representation mode ``raw`` to be implemented. + https://github.com/crim-ca/weaver/issues/376 + +.. fixme:: + When ``outputs`` section is omitted, it simply means that the :term:`Process` to be executed should return all + outputs it offers in the created :ref:`Job Results `. Because no representation modes is specified + for individual outputs, `Weaver` automatically selects ``reference`` as it makes all outputs more easily accessible + with distinct URL afterwards. If the ``outputs`` section is specified, but that one of the outputs defined in the + :ref:`Process Description ` is not specified, that output should be omitted from the produced + results. For the time being, because only ``reference`` representation is offered for produced output files, this + filtering is not implemented as it offers no additional advantage that accessing files directly with their distinct + URLs. This could be added later if `Multipart` raw data representation is required. + Please |submit-issue|_ to request this feature if it is relevant for your use-cases. + + Filtering not implemented (everything always available). + https://github.com/crim-ca/weaver/issues/380 -.. note:: - Since most of the time, returned files are not human readable or are simply too large to be displayed, the - ``transmissionMode: value`` is rarely employed. Also, it is to be noted that outputs representing ``LiteralData`` - (which is even more uncommon) would automatically be represented as ``value`` without explicitly requesting it, - as there would not be any file to return. If this poses problem or you encounter a valid use-case where ``value`` - would be useful for your needs, please |submit-issue|_ to request the feature. .. |exec-api| replace:: OpenAPI Execute .. _exec-api: `exec-req`_ +.. _proc_exec_mode: + +Execution Mode +~~~~~~~~~~~~~~~~~~~~~ + +.. todo:: Prefer Header details + +.. warning:: + It is important to remember that the ``Prefer`` header is indeed a *preference*. If `Weaver` deems it cannot + allocate a worker to execute the task `synchronously` within a reasonable delay, it can enforce the `asynchronous` + execution. The `asynchronous` mode is also *prioritized* for running longer :term:`Job` submitted over the task + queue, as this allows `Weaver` to offer better availability for all requests submitted by its users. + The `synchronous` mode should be reserved only for very quick and relatively low computation intensive executions. + +The ``mode`` field displayed in the body is another method to tell whether to run the :term:`Process` in a blocking +(``sync``) or non-blocking (``async``) manner. Note that support is limited for mode ``sync`` as this use case is often +more cumbersome than ``async`` execution. Effectively, ``sync`` mode requires to have a task worker executor available +to run the :term:`Job` (otherwise it fails immediately due to lack of processing resource), and the requester must wait +for the *whole* execution to complete to obtain the result. Given that :term:`Process` could take a very long time to +complete, it is not practical to execute them in this manner and potentially have to wait hours to retrieve outputs. +Instead, the preferred and default approach is to request an ``async`` :term:`Job` execution. When doing so, `Weaver` +will add this to a task queue for processing, and will immediately return a :term:`Job` identifier and location where +the user can probe for its status, using :ref:`Monitoring ` request. As soon as any task worker becomes +available, it will pick any leftover queued :term:`Job` to execute it. + +.. note:: + The ``mode`` field is an older methodology that precedes the official :term:`OGC API - Processes` method using + the ``Prefer`` header. It is recommended to employ the ``Prefer`` header that ensures higher interoperability with + other services using the same standard. The ``mode`` field is deprecated and preserved only for backward + compatibility purpose. + +When requesting a `synchronous` execution, and provided a worker was available to pick and complete the task before +the maximum ``wait`` time was reached, the final status will be directly returned. Therefore, the contents obtained this +way will be identical to any following :ref:`Job Status ` request. If no worker is available, or if +the worker that picked the :term:`Job` cannot complete it in time (either because it takes too long to execute or had +to wait on resources for too long), the :term:`Job` execution will automatically switch to `asynchronous` mode. + +The distinction between an `asynchronous` or `synchronous` response when executing a :term:`Job` can be +observed in multiple ways. The easiest is with the HTTP status code of the response, 200 being for +a :term:`Job` *entirely completed* synchronously, and 201 for a created :term:`Job` that should be +:ref:`monitored ` asynchronously. Another method is to observe the ``"status"`` value. If the +status is ``accepted`` or ``running``, it means the operation is guaranteed to be `asynchronous`, since `synchronous` +always return a final status (``succeeded`` or ``failed``). Note that a final status is possible in both modes, so +seing one of those values does not *guarantee* it was executed `synchronously`, but the complete :term:`Job` status +can be immediately requested with the :ref:`GetStatus ` request. In general, a `synchronous` response +will be much more verbose than an `asynchronous` one, since details are not yet all available. Finally, it is possible +to extract the ``Preference-Applied`` response header which will clearly indicate if the submitted ``Prefer`` header +was respected (because it could be with available worker resources) or not. In general, this means that if +the :term:`Job` submission request was not provided with ``Prefer: wait=X`` **AND** replied with the +same ``Preference-Applied`` value, it is safe to assume `Weaver` decided to queue the :term:`Job` for `asynchronous` +execution. That :term:`Job` could be executed immediately, or at a later time, according to worker availability. + +.. _proc_exec_steps: + Execution Steps ~~~~~~~~~~~~~~~~~~~~~ @@ -508,7 +579,7 @@ parametrization details, etc.), followed by ``running`` when effectively reachin :term:`Application Package` operation. This status will remain as such until the operation completes, either with ``succeeded`` or ``failed`` status. -At any moment during ``async`` execution, the :term:`Job` status can be requested using |status-req|_. Note that +At any moment during `asynchronous` execution, the :term:`Job` status can be requested using |status-req|_. Note that depending on the timing at which the user executes this request and the availability of task workers, it could be possible that the :term:`Job` be already in ``running`` state, or even ``failed`` in case of early problem detected. diff --git a/weaver/processes/builtin/__init__.py b/weaver/processes/builtin/__init__.py index aadc737b6..7d1f5843c 100644 --- a/weaver/processes/builtin/__init__.py +++ b/weaver/processes/builtin/__init__.py @@ -14,6 +14,7 @@ from weaver.database import get_db from weaver.datatype import Process from weaver.exceptions import PackageExecutionError, PackageNotFound, ProcessNotAccessible, ProcessNotFound +from weaver.execute import ExecuteControlOption from weaver.processes.constants import CWL_REQUIREMENT_APP_BUILTIN from weaver.processes.types import ProcessType from weaver.processes.wps_package import PACKAGE_EXTENSIONS, get_process_definition @@ -145,6 +146,7 @@ def register_builtin_processes(container): processDescriptionURL=process_url, processEndpointWPS1=get_wps_url(container), executeEndpoint="/".join([process_url, "jobs"]), + jobControlOptions=ExecuteControlOption.values(), visibility=Visibility.PUBLIC, )) diff --git a/weaver/processes/builtin/file2string_array.py b/weaver/processes/builtin/file2string_array.py index 280c11451..9f9eca4ef 100644 --- a/weaver/processes/builtin/file2string_array.py +++ b/weaver/processes/builtin/file2string_array.py @@ -17,6 +17,11 @@ LOGGER.addHandler(logging.StreamHandler(sys.stdout)) LOGGER.setLevel(logging.INFO) +# process details +__version__ = "1.1" +__title__ = "File to String-Array" +__abstract__ = __doc__ # NOTE: '__doc__' is fetched directly, this is mostly to be informative + OUTPUT_CWL_JSON = "cwl.output.json" diff --git a/weaver/processes/builtin/file_index_selector.py b/weaver/processes/builtin/file_index_selector.py index 9f4f79865..7dac13540 100644 --- a/weaver/processes/builtin/file_index_selector.py +++ b/weaver/processes/builtin/file_index_selector.py @@ -26,7 +26,7 @@ LOGGER.setLevel(logging.INFO) # process details -__version__ = "1.0" +__version__ = "1.1" __title__ = "File Index Selector" __abstract__ = __doc__ # NOTE: '__doc__' is fetched directly, this is mostly to be informative diff --git a/weaver/processes/builtin/jsonarray2netcdf.py b/weaver/processes/builtin/jsonarray2netcdf.py index 0cdee5d49..d6a675349 100644 --- a/weaver/processes/builtin/jsonarray2netcdf.py +++ b/weaver/processes/builtin/jsonarray2netcdf.py @@ -28,7 +28,7 @@ LOGGER.setLevel(logging.INFO) # process details -__version__ = "1.1" +__version__ = "1.2" __title__ = "JSON array to NetCDF" __abstract__ = __doc__ # NOTE: '__doc__' is fetched directly, this is mostly to be informative diff --git a/weaver/processes/builtin/metalink2netcdf.py b/weaver/processes/builtin/metalink2netcdf.py index 45c85ca43..c5a8007e8 100644 --- a/weaver/processes/builtin/metalink2netcdf.py +++ b/weaver/processes/builtin/metalink2netcdf.py @@ -27,7 +27,7 @@ LOGGER.setLevel(logging.INFO) # process details -__version__ = "1.0" +__version__ = "1.1" __title__ = "Metalink to NetCDF" __abstract__ = __doc__ # NOTE: '__doc__' is fetched directly, this is mostly to be informative diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index 94c1909ed..3273fb476 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -12,7 +12,7 @@ from weaver.database import get_db from weaver.datatype import Process, Service -from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteTransmissionMode +from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteResponse, ExecuteTransmissionMode from weaver.formats import AcceptLanguage, ContentType from weaver.notify import encrypt_email, notify_job_complete from weaver.owsexceptions import OWSNoApplicableCode @@ -21,8 +21,18 @@ from weaver.processes.convert import get_field, ows2json_output_data from weaver.processes.types import ProcessType from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory, map_status -from weaver.store.base import StoreJobs -from weaver.utils import get_any_id, get_any_value, get_registry, get_settings, now, raise_on_xml_exception, wait_secs +from weaver.store.base import StoreJobs, StoreProcesses +from weaver.utils import ( + get_any_id, + get_any_value, + get_header, + get_registry, + get_settings, + now, + parse_prefer_header_execute_mode, + raise_on_xml_exception, + wait_secs +) from weaver.visibility import Visibility from weaver.wps.utils import ( check_wps_status, @@ -483,7 +493,7 @@ def submit_job(request, reference, tags=None): headers = dict(request.headers) settings = get_settings(request) return submit_job_handler(json_body, settings, service_url, provider_id, process_id, is_workflow, is_local, - visibility, language=lang, auth=headers, tags=tags, user=user, context=context) + visibility, language=lang, headers=headers, tags=tags, user=user, context=context) # FIXME: this should not be necessary if schema validators correctly implement OneOf(values) @@ -516,7 +526,7 @@ def submit_job_handler(payload, # type: JSON is_local=True, # type: bool visibility=None, # type: Optional[AnyVisibility] language=None, # type: Optional[str] - auth=None, # type: Optional[HeaderCookiesType] + headers=None, # type: Optional[HeaderCookiesType] tags=None, # type: Optional[List[str]] user=None, # type: Optional[int] context=None, # type: Optional[str] @@ -533,37 +543,58 @@ def submit_job_handler(payload, # type: JSON # TODO: remove when all parameter variations are supported # FIXME: - # - support 'sync' and 'Prefer' header variants (https://github.com/crim-ca/weaver/issues/247) # - support 'response: raw' (https://github.com/crim-ca/weaver/issues/376) # - allow omitting 'outputs' (https://github.com/crim-ca/weaver/issues/375) _validate_job_parameters(json_body) + db = get_db(settings) + headers = headers or {} + if is_local: + proc_store = db.get_store(StoreProcesses) + process = proc_store.fetch_by_id(process_id) + job_ctl_opts = process.jobControlOptions + else: + job_ctl_opts = ExecuteControlOption.values() + mode, wait, applied = parse_prefer_header_execute_mode(headers, job_ctl_opts) + get_header("prefer", headers, pop=True) is_execute_async = ExecuteMode.get(json_body["mode"]) != ExecuteMode.SYNC # convert auto to async notification_email = json_body.get("notification_email") encrypted_email = encrypt_email(notification_email, settings) if notification_email else None - store = get_db(settings).get_store(StoreJobs) + store = db.get_store(StoreJobs) # type: StoreJobs job = store.save_job(task_id=Status.ACCEPTED, process=process_id, service=provider_id, inputs=json_body.get("inputs"), is_local=is_local, is_workflow=is_workflow, access=visibility, user_id=user, execute_async=is_execute_async, custom_tags=tags, notification_email=encrypted_email, accept_language=language, context=context) job.save_log(logger=LOGGER, message="Job task submitted for execution.", status=Status.ACCEPTED, progress=0) job = store.update_job(job) - result = execute_process.delay(job_id=job.id, wps_url=clean_ows_url(service_url), headers=auth) - LOGGER.debug("Celery pending task [%s] for job [%s].", result.id, job.id) - - # local/provider process location location_base = "/providers/{provider_id}".format(provider_id=provider_id) if provider_id else "" - location = "{base_url}{location_base}/processes/{process_id}/jobs/{job_id}".format( + location_url = "{base_url}{location_base}/processes/{process_id}/jobs/{job_id}".format( base_url=get_wps_restapi_base_url(settings), location_base=location_base, process_id=process_id, - job_id=job.id) - body_data = { + job_id=job.id + ) + + result = execute_process.delay(job_id=job.id, wps_url=clean_ows_url(service_url), headers=headers) + LOGGER.debug("Celery pending task [%s] for job [%s].", result.id, job.id) + if mode == ExecuteMode.SYNC and wait: + LOGGER.debug("Celery task requested as sync if it completes before (wait=%ss)", wait) + result.wait(timeout=wait) + if result.ready(): + job = store.fetch_by_id(job.id) + body = job.json(container=settings, self_link="status") + body["location"] = location_url + return body + else: + LOGGER.debug("Celery task requested as sync took too long to complete (wait=%ss). Continue in async.", wait) + + LOGGER.debug("Celery task submitted to run async.", wait) + body = { "jobID": job.id, "processID": job.process, "providerID": provider_id, # dropped by validator if not applicable "status": map_status(Status.ACCEPTED), - "location": location + "location": location_url } - return body_data + return body diff --git a/weaver/processes/utils.py b/weaver/processes/utils.py index 900a33558..f40175025 100644 --- a/weaver/processes/utils.py +++ b/weaver/processes/utils.py @@ -40,6 +40,7 @@ log_unhandled_exceptions ) from weaver.processes.types import ProcessType +from weaver.status import JOB_STATUS_CATEGORIES, StatusCategory, map_status from weaver.store.base import StoreProcesses, StoreServices from weaver.utils import get_sane_name, get_settings, get_url_without_query from weaver.visibility import Visibility @@ -103,13 +104,25 @@ def get_process(process_id=None, request=None, settings=None, store=None): def get_job_submission_response(body): - # type: (JSON) -> HTTPCreated + # type: (JSON) -> Union[HTTPOk, HTTPCreated] """ - Generates the successful response from contents returned by job submission process. + Generates the successful response from contents returned by :term:`Job` submission process. + + If :term:`Job` already finished processing within requested ``Prefer: wait=X`` seconds delay (and if allowed by + the :term:`Process` ``jobControlOptions``), return the successful status immediately instead of created status. + + Otherwise, return the status monitoring location of the created :term:`Job` to be monitored asynchronously. .. seealso:: :func:`weaver.processes.execution.submit_job` + :func:`weaver.processes.execution.submit_job_handler` """ + status = map_status(body.get("status")) + if status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: + body["description"] = sd.CompletedJobResponse.description + body = sd.CompletedJobStatusSchema().deserialize(body) + return HTTPOk(location=body["location"], json=body) + body["description"] = sd.CreatedLaunchJobResponse.description body = sd.CreatedJobStatusSchema().deserialize(body) return HTTPCreated(location=body["location"], json=body) diff --git a/weaver/wps/service.py b/weaver/wps/service.py index 5edb73f4e..c9fa00683 100644 --- a/weaver/wps/service.py +++ b/weaver/wps/service.py @@ -238,7 +238,7 @@ def _submit_job(self, wps_request): data = wps2json_job_payload(wps_request, wps_process) body = submit_job_handler(data, self.settings, proc.processEndpointWPS1, process_id=pid, is_local=True, is_workflow=is_workflow, visibility=Visibility.PUBLIC, - language=wps_request.language, tags=tags, auth=dict(req.headers), context=ctx) + language=wps_request.language, tags=tags, headers=dict(req.headers), context=ctx) # if Accept was JSON, provide response content as is # if anything else (even */*), return as XML diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 19f3de6a5..ef00c16e8 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -3010,7 +3010,14 @@ class ExecuteInputOutputs(ExtendedMappingSchema): class Execute(ExecuteInputOutputs): - mode = JobExecuteModeEnum() + mode = JobExecuteModeEnum( + missing=drop, + description=( + "Desired execution mode specified directly. This is intended for backward compatibility support. " + "To obtain more control over execution mode selection, employ the official Prefer header instead " + "(see for more details: https://pavics-weaver.readthedocs.io/en/latest/processes.html#execution-mode)." + ) + ) notification_email = ExtendedSchemaNode( String(), missing=drop, @@ -4299,6 +4306,20 @@ class CreatedLaunchJobResponse(ExtendedMappingSchema): body = CreatedJobStatusSchema() +class CompletedJobLocationHeader(ResponseHeaders): + Location = URL(description="Status location of the completed job execution.") + + +class CompletedJobStatusSchema(DescriptionSchema, JobStatusInfo): + pass + + +class CompletedJobResponse(ExtendedMappingSchema): + description = "Job submitted and completed execution synchronously." + header = CompletedJobLocationHeader() + body = CompletedJobStatusSchema() + + class OkDeleteProcessJobResponse(ExtendedMappingSchema): header = ResponseHeaders() body = DismissedJobSchema() @@ -4695,11 +4716,13 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "501": NotImplementedPostProviderResponse(), } post_provider_process_job_responses = { + "200": CompletedJobResponse(description="success"), "201": CreatedLaunchJobResponse(description="success"), "403": ForbiddenProviderAccessResponseSchema(), "500": InternalServerErrorResponseSchema(), } post_process_jobs_responses = { + "200": CompletedJobResponse(description="success"), "201": CreatedLaunchJobResponse(description="success"), "403": ForbiddenProviderAccessResponseSchema(), "500": InternalServerErrorResponseSchema(), From 6de71bc825c2a04a9d78e6688dd12d58f41527d1 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 15 Mar 2022 14:47:12 -0400 Subject: [PATCH 02/34] support Prefer header properly (resolves #247) + allow omitting outputs in execute (fix #375) --- CHANGES.rst | 18 ++++- config/weaver.ini.example | 16 ++++ docs/source/configuration.rst | 60 +++++++++++++-- docs/source/faq.rst | 2 +- docs/source/processes.rst | 89 +++++++++++++++-------- tests/functional/test_builtin.py | 81 +++++++++++++++++++-- tests/utils.py | 14 +++- weaver/processes/execution.py | 54 +++++++++----- weaver/processes/utils.py | 11 +-- weaver/utils.py | 19 ++++- weaver/wps/service.py | 10 ++- weaver/wps_restapi/processes/processes.py | 4 +- weaver/wps_restapi/providers/providers.py | 4 +- weaver/wps_restapi/quotation/quotes.py | 18 ++++- weaver/wps_restapi/swagger_definitions.py | 44 ++++++++--- 15 files changed, 350 insertions(+), 94 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index b064bdf40..d1fd75fa3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -13,9 +13,11 @@ Changes Changes: -------- - Support ``Prefer`` header with ``wait`` or ``respond-async`` directives to select ``Job`` execution mode either - as synchronous or asynchronous task, according to supported ``jobControlOptions`` of the relevant ``Process`` being - executed (resolves `#247 `_). + as *synchronous* or *asynchronous* task, according to supported ``jobControlOptions`` of the relevant ``Process`` + being executed (resolves `#247 `_). - Increase minor version of all ``builtin`` processes that will now be executable in wither (a)synchronous modes. +- Add ``weaver.exec_sync_max_wait`` and ``weaver.quote_sync_max_wait`` settings allowing custom definition for the + maximum duration that can be specified to wait for a `synchronous` response from task workers. - Improve conformance for returned status codes and error messages when requesting results for an unfinished, failed, or dismissed ``Job``. - Adjust conformance item references to correspond with `OGC API - Processes: Part 2` renamed from `Transactions` to @@ -24,9 +26,17 @@ Changes: (resolves `#180 `_). - Improve ``Process`` undeployment to consider running ``Job`` to block its removal while in use. +Important Note +-------------- +- In order to support *synchronous* execution, setting ``RESULT_BACKEND`` **MUST** be specified in + the ``weaver.ini`` configuration file. + See `Weaver INI Configuration Example `_ + in section ``[celery]`` for more details. + Fixes: ------ -- No change. +- Fix ``outputs`` permitted to be completely omitted from the execution request + (resolves `#375 `_). .. _changes_4.14.0: @@ -327,7 +337,7 @@ Fixes: - Fix parsing of inputs for `OpenSearch` parameters lookup that was assuming inputs were always provided as listing definition, not considering possible mapping definition. - Fix incorrect documentation section ``Package as External Execution Unit Reference`` where content was omitted - and incorrectly anchored as following ``process-esgf-cwt`` section. + and incorrectly anchored as following ``ESGF-CWT`` section. .. _changes_4.4.0: diff --git a/config/weaver.ini.example b/config/weaver.ini.example index ee216224b..625e668f6 100644 --- a/config/weaver.ini.example +++ b/config/weaver.ini.example @@ -67,6 +67,13 @@ weaver.ssl_verify = true # see 'requests_options.yml.example' weaver.request_options = +# --- Weaver Execution settings --- + +# maximum wait time allowed for Prefer header to run Job/Quote synchronously +# over this limit, they will automatically fallback to asynchronous execution/estimation +weaver.exec_sync_max_wait = 20 +weaver.quote_sync_max_wait = 20 + # --- Weaver CWL settings --- # NOTE: [experimental] # enforce provided effective user/group identifiers for Application Package execution @@ -147,6 +154,15 @@ weaver.vault_dir = /tmp/vault [celery] #USE_CELERYCONFIG = True BROKER_URL = mongodb://mongodb:27017/celery +# Result backend is required for SYNC execution. +# Using only the backend type matching the broker URL will automatically resolve to use it database location. +# For an alternative result location, provide the full backend directly rather than using "mongodb_backend_settings". +# This setting is not correctly parsed (dict) by "pyramid_celery", and separate [celery:mongodb_backend_settings] is +# also not found. If more configuration are required, consider using a "celeryconfig" (and "USE_CELERYCONFIG = True"). +# That configuration should be placed at the root of weaver since "pyramid_celery" doesn't support custom locations. +# https://github.com/sontek/pyramid_celery/pull/89 +RESULT_BACKEND = mongodb +# RESULT_BACKEND = mongodb://mongodb:27017/celery ### # wsgi server configuration diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index 868da7742..5b7449e08 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -213,14 +213,36 @@ they are optional and which default value or operation is applied in each situat completion if an email was provided in the :ref:`Execute ` request body (see also: :ref:`Email Notification`). +.. versionadded:: 4.15.0 + +- | ``weaver.exec_sync_max_wait`` + | (default: ``20``, :class:`int`, seconds) + | + | Defines the maximum duration allowed for running a :term:`Job` execution in `synchronous` mode. + | + | See :ref:`proc_exec_mode` for more details on the feature and how to employ it. + | Ensure `Celery`_ worker is configured as specified below. + +.. versionadded:: 4.15.0 + +- | ``weaver.quote_sync_max_wait`` + | (default: ``20``, :class:`int`, seconds) + | + | Defines the maximum duration allowed for running a :term:`Quote` estimation in `synchronous` mode. + | + | See :ref:`proc_exec_mode` for more details on the feature and how to employ it. + | Ensure `Celery`_ worker is configured as specified below. .. note:: Since `Weaver` employs `Celery`_ as task queue manager and `MongoDB`_ as backend, relevant settings for the - |celery-config|_ and the |celery-mongo|_ should be referred to. Processing of task jobs and results reporting + |celery-config|_ and the |celery-mongo|_ should be employed. Processing of task jobs and results reporting is accomplished according to the specific implementation of these services. Therefore, all applicable settings and extensions should be available for custom server configuration and scaling as needed. +.. warning:: + In order to support `synchronous` execution, the ``RESULT_BACKEND`` setting **MUST** be defined. + .. |celery-config| replace:: configuration of Celery .. _celery-config: https://docs.celeryproject.org/en/latest/userguide/configuration.html#configuration .. |celery-mongo| replace:: configuration of MongoDB Backend @@ -344,16 +366,42 @@ simply set setting ``weaver.wps_processes_file`` as *undefined* (i.e.: nothing a Configuration of Request Options ======================================= -.. todo:: complete docs +.. versionadded:: 1.8.0 -:term:`Request Options` +It is possible to define :term:`Request Options` that consist of additional arguments that will be passed down to +:func:`weaver.utils.request_extra`, which essentially call a traditional request using :mod:`requests` module, but +with extended handling capabilities such as caching, retrying, and file reference support. The specific parameters +that are passed down for individual requests depend whether a match based on URL (optionally with regex rules) and +method definitions can be found in the :term:`Request Options` file. This file should be provided using +the ``weaver.request_options`` configuration setting. Using this definition, it is possible to provide specific +requests handling options, such as extended timeout, authentication arguments, SSL certification verification setting, +etc. on a per-request basis, leave other requests unaffected and generally more secure. -``weaver.ssl_verify`` +.. seealso:: + File `request_options.yml.example`_ provides more details and sample :term:`YAML` format of the expected contents + for :term:`Request Options` feature. +.. seealso:: + Please refer to :func:`weaver.utils.request_extra` documentation directly for supported parameters and capabilities. -.. versionadded:: 1.8.0 -`request_options.yml.example`_ +- | ``weaver.request_options = `` + | (default: ``None``) + | + | Path of the :term:`Request Options` definitions to employ. + + +- | ``weaver.ssl_verify = true|false`` + | (default: ``true``) + | + | Toggle the SSL certificate verification across all requests. + +.. warning:: + It is **NOT** recommended to disable SSL verification across all requests for security reasons + (avoid man-in-the-middle attacks). This is crucial for requests that involve any form of authentication, secured + access or personal user data references. This should be employed only for quickly resolving issues during + development. Consider fixing SSL certificates on problematic servers, or disable the verification on a per-request + basis using :term:`Request Options` for acceptable cases. Starting the Application diff --git a/docs/source/faq.rst b/docs/source/faq.rst index ff3208cf7..02c1fed96 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -46,7 +46,7 @@ Please refer to below references for more details. .. seealso:: - - Supported :term:`Application Package` definitions in :ref:`process-wps-rest` deployment. + - Supported :term:`Application Package` definitions in :ref:`proc_wps_rest` deployment. - :ref:`Deploy ` request. diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 052efbef1..3ef6b5258 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -30,6 +30,8 @@ Each one of them are accessible through the same API interface, but they have di and :ref:`Execute ` request payloads for diverse set of applications. +.. _proc_builtin: + Builtin ------- @@ -55,6 +57,8 @@ As of the latest release, following `builtin` processes are available: All `builtin` processes are marked with :py:data:`weaver.processes.constants.CWL_REQUIREMENT_APP_BUILTIN` in the :term:`CWL` ``hints`` section and are all defined in :py:mod:`weaver.processes.builtin`. +.. _proc_wps_12: + WPS-1/2 ------- @@ -99,7 +103,7 @@ Please refer to :ref:`Configuration of WPS Processes` section for more details o .. seealso:: - `Remote Provider`_ -.. _process-wps-rest: +.. _proc_wps_rest: WPS-REST -------- @@ -189,7 +193,7 @@ Where the referenced file hosted at ``"https://remote-file-server.com/my-package "<...>": "<...>" -.. _process-esgf-cwt: +.. _proc_esgf_cwt: ESGF-CWT ---------- @@ -263,7 +267,7 @@ be indicated in the logs with the appropriate step and message where the error o :ref:`proc_workflow_ops` provides more details on each of the internal operations accomplished by individual step :term:`Process` chained in a :term:`Workflow`. -.. _process-remote-provider: +.. _proc_remote_provider: Remote Provider -------------------- @@ -317,7 +321,7 @@ An example body of the `register provider`_ request could be as follows: } -Then, processes of this registered :ref:`process-remote-provider` will be accessible. For example, if the referenced +Then, processes of this registered :ref:`proc_remote_provider` will be accessible. For example, if the referenced service by the above URL add a WPS process identified by ``my-process``, its JSON description would be obtained with following request (`DescribeProviderProcess`_): @@ -466,8 +470,20 @@ and parametrization of various input/output combinations. Let's employ the follo Other parameters can be added to the request to provide further functionalities. Above fields are the minimum requirements to request a :term:`Job`. Please refer to the |exec-api|_ definition for all applicable features. -Basic Details -~~~~~~~~~~~~~~~~~ +.. seealso:: + - :ref:`proc_exec_body` and :ref:`proc_exec_mode` details applicable for `Weaver` specifically. + - `OGC API - Processes, Process Outputs `_ + for more general details on ``transmissionMode`` parameter. + - `OGC API - Processes, Execution Mode `_ + for more general details on the execution negotiation (formerly with ``mode`` parameter) and more recently + with ``Prefer`` header. + - `OGC API - Processes, Response `_ + for a complete listing of available ``response`` formats considering all other parameters. + +.. _proc_exec_body: + +Execution Body +~~~~~~~~~~~~~~~~~~ The ``inputs`` definition is the most important section of the request body. It is also the only one that is completely required when submitting the execution request, even for a no-input process (an empty mapping is needed in such case). @@ -485,6 +501,20 @@ report the produced outputs from a successful :term:`Job` completion. For the ti stored locally and exposed externally with returned reference URL. The other mode ``value`` returns the contents directly in the response instead of the URL. +When ``outputs`` section is omitted, it simply means that the :term:`Process` to be executed should return all +outputs it offers in the created :ref:`Job Results `. In such case, because no representation modes +is specified for individual outputs, `Weaver` automatically selects ``reference`` as it makes all outputs more easily +accessible with distinct URL afterwards. If the ``outputs`` section is specified, but that one of the outputs defined +in the :ref:`Process Description ` is not specified, that output should be omitted from the produced +results. For the time being, because only ``reference`` representation is offered for produced output files, this +filtering is not implemented as it offers no additional advantage for files accessed directly with their distinct URLs. +This could be added later if ``Multipart`` raw data representation is required. +Please |submit-issue|_ to request this feature if it is relevant for your use-cases. + +.. fixme:: + Filtering not implemented (everything always available). + https://github.com/crim-ca/weaver/issues/380 + .. fixme:: Transmission mode ``value`` not implemented. Only ``reference`` is supported. https://github.com/crim-ca/weaver/issues/377 @@ -496,21 +526,6 @@ directly in the response instead of the URL. Response representation mode ``raw`` to be implemented. https://github.com/crim-ca/weaver/issues/376 -.. fixme:: - When ``outputs`` section is omitted, it simply means that the :term:`Process` to be executed should return all - outputs it offers in the created :ref:`Job Results `. Because no representation modes is specified - for individual outputs, `Weaver` automatically selects ``reference`` as it makes all outputs more easily accessible - with distinct URL afterwards. If the ``outputs`` section is specified, but that one of the outputs defined in the - :ref:`Process Description ` is not specified, that output should be omitted from the produced - results. For the time being, because only ``reference`` representation is offered for produced output files, this - filtering is not implemented as it offers no additional advantage that accessing files directly with their distinct - URLs. This could be added later if `Multipart` raw data representation is required. - Please |submit-issue|_ to request this feature if it is relevant for your use-cases. - - Filtering not implemented (everything always available). - https://github.com/crim-ca/weaver/issues/380 - - .. |exec-api| replace:: OpenAPI Execute .. _exec-api: `exec-req`_ @@ -520,14 +535,28 @@ directly in the response instead of the URL. Execution Mode ~~~~~~~~~~~~~~~~~~~~~ -.. todo:: Prefer Header details +In order to select how to execute a :term:`Process`, either `synchronously` or `asynchronously`, the ``Prefer`` header +should be specified. If omitted, `Weaver` defaults to `asynchronous` execution. To execute `asynchronously` explicitly, +``Prefer: respond-async`` should be used. Otherwise, the `synchronous` execution can be requested +with ``Prefer: wait=X`` where ``X`` is the duration in seconds to wait for a response. If no worker becomes available +within that time, or if this value is greater than ``weaver.exec_sync_max_wait``, the :term:`Job` will resume +`asynchronously` and the response will be returned. Furthermore, `synchronous` and `asynchronous` execution of +a :term:`Process` can only be requested for corresponding ``jobControlOptions`` it reports as supported in +its :ref:`Process Description `. It is important to provide the ``jobControlOptions`` parameter with +applicable modes when :ref:`Deploying a Process ` to allow it to run as desired. By default, `Weaver` +will assume that deployed processes are only `asynchronous` to handle longer operations. + +.. versionchanged:: + By default, every :ref:`proc_builtin` :term:`Process` can accept both modes. + All previously deployed processes will only allow `asynchronous` execution, as only this one was supported. + This should be reported in their ``jobControlOptions``. .. warning:: It is important to remember that the ``Prefer`` header is indeed a *preference*. If `Weaver` deems it cannot allocate a worker to execute the task `synchronously` within a reasonable delay, it can enforce the `asynchronous` execution. The `asynchronous` mode is also *prioritized* for running longer :term:`Job` submitted over the task queue, as this allows `Weaver` to offer better availability for all requests submitted by its users. - The `synchronous` mode should be reserved only for very quick and relatively low computation intensive executions. + The `synchronous` mode should be reserved only for very quick and relatively low computation intensive operations. The ``mode`` field displayed in the body is another method to tell whether to run the :term:`Process` in a blocking (``sync``) or non-blocking (``async``) manner. Note that support is limited for mode ``sync`` as this use case is often @@ -536,14 +565,14 @@ to run the :term:`Job` (otherwise it fails immediately due to lack of processing for the *whole* execution to complete to obtain the result. Given that :term:`Process` could take a very long time to complete, it is not practical to execute them in this manner and potentially have to wait hours to retrieve outputs. Instead, the preferred and default approach is to request an ``async`` :term:`Job` execution. When doing so, `Weaver` -will add this to a task queue for processing, and will immediately return a :term:`Job` identifier and location where -the user can probe for its status, using :ref:`Monitoring ` request. As soon as any task worker becomes -available, it will pick any leftover queued :term:`Job` to execute it. +will add this to a task queue for processing, and will immediately return a :term:`Job` identifier and ``Location`` +where the user can probe for its status, using :ref:`Monitoring ` request. As soon as any task worker +becomes available, it will pick any leftover queued :term:`Job` to execute it. .. note:: The ``mode`` field is an older methodology that precedes the official :term:`OGC API - Processes` method using - the ``Prefer`` header. It is recommended to employ the ``Prefer`` header that ensures higher interoperability with - other services using the same standard. The ``mode`` field is deprecated and preserved only for backward + the ``Prefer`` header. It is recommended to employ the ``Prefer`` header that ensures higher interoperability + with other services using the same standard. The ``mode`` field is deprecated and preserved only for backward compatibility purpose. When requesting a `synchronous` execution, and provided a worker was available to pick and complete the task before @@ -742,7 +771,7 @@ combinations. | |ADES| | - `WPS-1/2`_ | |file_scheme| | Convert to |http_scheme| [#file2http]_ | | | - `ESGF-CWT`_ +---------------+-------------------------------------------+ | | - `WPS-REST`_ (remote) [#wps3]_ | |http_scheme| | Nothing (unmodified) | -| | - :ref:`process-remote-provider` +---------------+-------------------------------------------+ +| | - :ref:`proc_remote_provider` +---------------+-------------------------------------------+ | | | |s3_scheme| | Fetch and convert to |http_scheme| [#s3]_ | | | +---------------+-------------------------------------------+ | | | |vault_ref| | Convert to |http_scheme| [#vault2http]_ | @@ -766,7 +795,7 @@ combinations. | |HYBRID| | - `WPS-1/2`_ | |file_scheme| | Convert to |http_scheme| [#file2http]_ | | | - `ESGF-CWT`_ +---------------+-------------------------------------------+ | | - `WPS-REST`_ (remote) [#wps3]_ | |http_scheme| | Nothing (unmodified) | -| | - :ref:`process-remote-provider` +---------------+-------------------------------------------+ +| | - :ref:`proc_remote_provider` +---------------+-------------------------------------------+ | | | |s3_scheme| | Fetch and convert to |http_scheme| [#s3]_ | | | *Note*: |HYBRID| assumes |ADES| role +---------------+-------------------------------------------+ | | (remote processes) | |vault_ref| | Convert to |http_scheme| [#vault2http]_ | diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py index be249f3f5..d8961ba7c 100644 --- a/tests/functional/test_builtin.py +++ b/tests/functional/test_builtin.py @@ -11,6 +11,7 @@ from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteResponse, ExecuteTransmissionMode from weaver.formats import ContentType from weaver.processes.builtin import register_builtin_processes +from weaver.status import Status if TYPE_CHECKING: from weaver.typedefs import JSON @@ -58,7 +59,7 @@ def test_jsonarray2netcdf_describe_old_schema(self): assert isinstance(body["process"]["outputs"][0]["formats"], list) assert len(body["process"]["outputs"][0]["formats"]) == 1 assert body["process"]["outputs"][0]["formats"][0]["mediaType"] == ContentType.APP_NETCDF - assert body["jobControlOptions"] == [ExecuteControlOption.ASYNC] + assert body["jobControlOptions"] == [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] assert body["outputTransmission"] == [ExecuteTransmissionMode.REFERENCE] def test_jsonarray2netcdf_describe_ogc_schema(self): @@ -82,10 +83,10 @@ def test_jsonarray2netcdf_describe_ogc_schema(self): assert isinstance(body["outputs"]["output"]["formats"], list) assert len(body["outputs"]["output"]["formats"]) == 1 assert body["outputs"]["output"]["formats"][0]["mediaType"] == ContentType.APP_NETCDF - assert body["jobControlOptions"] == [ExecuteControlOption.ASYNC] + assert body["jobControlOptions"] == [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] assert body["outputTransmission"] == [ExecuteTransmissionMode.REFERENCE] - def test_jsonarray2netcdf_execute(self): + def test_jsonarray2netcdf_execute_async(self): dirname = tempfile.gettempdir() nc_data = "Hello NetCDF!" with contextlib.ExitStack() as stack_exec: @@ -112,9 +113,77 @@ def test_jsonarray2netcdf_execute(self): assert resp.status_code == 201, "Error: {}".format(resp.json) assert resp.content_type in ContentType.APP_JSON + # following details not available yet in async, but are in sync + assert "created" not in resp.json + assert "finished" not in resp.json + assert "duration" not in resp.json + assert "progress" not in resp.json + job_url = resp.json["location"] results = self.monitor_job(job_url) + output_url = job_url + "/outputs" + resp = self.app.get(output_url, headers=self.json_headers) + assert resp.status_code == 200, "Error job outputs:\n{}".format(resp.json) + outputs = resp.json + + self.validate_results(results, outputs, nc_data) + + def test_jsonarray2netcdf_execute_sync(self): + dirname = tempfile.gettempdir() + nc_data = "Hello NetCDF!" + with contextlib.ExitStack() as stack_exec: + tmp_ncdf = tempfile.NamedTemporaryFile(dir=dirname, mode="w", suffix=".nc") + tmp_json = tempfile.NamedTemporaryFile(dir=dirname, mode="w", suffix=".json") + tmp_ncdf = stack_exec.enter_context(tmp_ncdf) # noqa + tmp_json = stack_exec.enter_context(tmp_json) # noqa + tmp_ncdf.write(nc_data) + tmp_ncdf.seek(0) + tmp_json.write(json.dumps(["file://{}".format(os.path.join(dirname, tmp_ncdf.name))])) + tmp_json.seek(0) + data = { + "inputs": [{"id": "input", "href": os.path.join(dirname, tmp_json.name)}], + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}], + } + headers = {"Prefer": "wait=10"} + headers.update(self.json_headers) + + for mock_exec in mocked_execute_celery(): + stack_exec.enter_context(mock_exec) + path = "/processes/jsonarray2netcdf/jobs" + resp = mocked_sub_requests(self.app, "post_json", path, + data=data, headers=headers, only_local=True) + + assert resp.status_code == 200, "Error: {}".format(resp.json) + assert resp.content_type in ContentType.APP_JSON + + # since sync, all status details are already available! + assert resp.json["status"] == Status.SUCCEEDED + assert "Location" in resp.headers + # validate indeed sync + assert resp.headers["Preference-Applied"] == headers["Prefer"] + # following details not available yet in async, but are in sync + assert isinstance(resp.json["created"], str) and resp.json["created"] + assert isinstance(resp.json["finished"], str) and resp.json["finished"] + assert isinstance(resp.json["duration"], str) and resp.json["duration"] + assert isinstance(resp.json["progress"], int) and resp.json["progress"] == 100 + + job_url = resp.headers["Location"] + out_url = f"{job_url}/results" + resp = self.app.get(out_url, headers=self.json_headers) + assert resp.status_code == 200 + assert resp.content_type == ContentType.APP_JSON + results = resp.json + + output_url = job_url + "/outputs" + resp = self.app.get(output_url, headers=self.json_headers) + assert resp.status_code == 200, "Error job outputs:\n{}".format(resp.json) + outputs = resp.json + + self.validate_results(results, outputs, nc_data) + + def validate_results(self, results, outputs, data): + # first validate format of OGC-API results assert "output" in results, "Expected result ID 'output' in response body" assert isinstance(results["output"], dict), "Container of result ID 'output' should be a dict" @@ -134,13 +203,9 @@ def test_jsonarray2netcdf_execute(self): assert os.path.split(nc_real_path)[-1] == os.path.split(nc_path)[-1] assert os.path.isfile(nc_real_path) with open(nc_real_path, "r") as f: - assert f.read() == nc_data + assert f.read() == data # if everything was valid for results, validate equivalent but differently formatted outputs response - output_url = job_url + "/outputs" - resp = self.app.get(output_url, headers=self.json_headers) - assert resp.status_code == 200, "Error job outputs:\n{}".format(resp.json) - outputs = resp.json assert outputs["outputs"][0]["id"] == "output" nc_path = outputs["outputs"][0]["href"] assert isinstance(nc_path, str) and len(nc_path) diff --git a/tests/utils.py b/tests/utils.py index 1ed286c0e..d05521307 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -20,6 +20,7 @@ from typing import TYPE_CHECKING # Note: do NOT import 'boto3' here otherwise 'moto' will not be able to mock it effectively +from celery.exceptions import TimeoutError as CeleryTaskTimeoutError import colander import mock import moto @@ -216,8 +217,10 @@ def setup_config_with_celery(config): settings = config.get_settings() # override celery loader to specify configuration directly instead of ini file + celery_mongodb_url = "mongodb://{}:{}/celery".format(settings.get("mongodb.host"), settings.get("mongodb.port")) celery_settings = { - "CELERY_BROKER_URL": "mongodb://{}:{}/celery".format(settings.get("mongodb.host"), settings.get("mongodb.port")) + "broker_url": celery_mongodb_url, + "result_backend": celery_mongodb_url # for sync exec } pyramid_celery.loaders.INILoader.read_configuration = mock.MagicMock(return_value=celery_settings) config.include("pyramid_celery") @@ -865,6 +868,15 @@ class MockTask(object): def id(self): return self._id + # since delay is mocked and blocks to execute, assume sync is complete at this point + # all following methods return what would be returned normally in sync mode + + def wait(*_, **__): + raise CeleryTaskTimeoutError + + def ready(*_, **__): + return True + task = MockTask() def mock_execute_task(*args, **kwargs): diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index 3273fb476..bd449b781 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -5,6 +5,7 @@ import colander from celery.utils.log import get_task_logger +from celery.exceptions import TimeoutError as CeleryTaskTimeoutError from owslib.util import clean_ows_url from owslib.wps import ComplexDataInput from pyramid.httpexceptions import HTTPBadRequest, HTTPNotAcceptable, HTTPNotImplemented @@ -23,6 +24,7 @@ from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory, map_status from weaver.store.base import StoreJobs, StoreProcesses from weaver.utils import ( + as_int, get_any_id, get_any_value, get_header, @@ -440,7 +442,7 @@ def map_locations(job, settings): def submit_job(request, reference, tags=None): - # type: (Request, Union[Service, Process], Optional[List[str]]) -> JSON + # type: (Request, Union[Service, Process], Optional[List[str]]) -> Tuple[JSON, HeadersType] """ Generates the job submission from details retrieved in the request. @@ -496,17 +498,18 @@ def submit_job(request, reference, tags=None): visibility, language=lang, headers=headers, tags=tags, user=user, context=context) -# FIXME: this should not be necessary if schema validators correctly implement OneOf(values) def _validate_job_parameters(json_body): # type: (JSON) -> None """ - Tests supported parameters not automatically validated by colander deserialize. + Tests supported parameters not automatically validated by colander deserialize since they are optional. """ - if json_body["mode"] not in [ExecuteMode.ASYNC, ExecuteMode.AUTO]: - raise HTTPNotImplemented(detail="Execution mode '{}' not supported.".format(json_body["mode"])) + exec_mode = json_body.get("mode") + if exec_mode not in [None, ExecuteMode.ASYNC, ExecuteMode.AUTO]: + raise HTTPNotImplemented(detail=f"Execution mode '{exec_mode}' not supported.") - if json_body["response"] != ExecuteResponse.DOCUMENT: - raise HTTPNotImplemented(detail="Execution response type '{}' not supported.".format(json_body["response"])) + resp_mode = json_body.get("response") + if resp_mode not in [None, ExecuteResponse.DOCUMENT]: + raise HTTPNotImplemented(detail=f"Execution response type '{resp_mode}' not supported.") outputs = json_body.get("outputs", []) if isinstance(outputs, dict): @@ -514,7 +517,7 @@ def _validate_job_parameters(json_body): for job_output in outputs: mode = job_output["transmissionMode"] if mode not in ExecuteTransmissionMode.values(): - raise HTTPNotImplemented(detail="Execute transmissionMode '{}' not supported.".format(mode)) + raise HTTPNotImplemented(detail=f"Execute transmissionMode '{mode}' not supported.") def submit_job_handler(payload, # type: JSON @@ -530,7 +533,7 @@ def submit_job_handler(payload, # type: JSON tags=None, # type: Optional[List[str]] user=None, # type: Optional[int] context=None, # type: Optional[str] - ): # type: (...) -> JSON + ): # type: (...) -> Tuple[JSON, HeadersType] """ Submits the job to the Celery worker with provided parameters. @@ -554,10 +557,16 @@ def submit_job_handler(payload, # type: JSON job_ctl_opts = process.jobControlOptions else: job_ctl_opts = ExecuteControlOption.values() - mode, wait, applied = parse_prefer_header_execute_mode(headers, job_ctl_opts) + max_wait = as_int(settings.get("weaver.exec_sync_max_wait"), default=20) + mode, wait, applied = parse_prefer_header_execute_mode(headers, job_ctl_opts, max_wait) get_header("prefer", headers, pop=True) + if not applied: # whatever returned is a default, consider 'mode' in body as alternative + is_execute_async = ExecuteMode.get(json_body.get("mode")) != ExecuteMode.SYNC # convert auto to async + else: + # as per https://datatracker.ietf.org/doc/html/rfc7240#section-2 + # Prefer header not resolve as valid still proces + is_execute_async = mode != ExecuteMode.SYNC - is_execute_async = ExecuteMode.get(json_body["mode"]) != ExecuteMode.SYNC # convert auto to async notification_email = json_body.get("notification_email") encrypted_email = encrypt_email(notification_email, settings) if notification_email else None @@ -575,21 +584,32 @@ def submit_job_handler(payload, # type: JSON process_id=process_id, job_id=job.id ) + resp_headers = {"Location": location_url} + resp_headers.update(applied) result = execute_process.delay(job_id=job.id, wps_url=clean_ows_url(service_url), headers=headers) LOGGER.debug("Celery pending task [%s] for job [%s].", result.id, job.id) - if mode == ExecuteMode.SYNC and wait: + if not is_execute_async: LOGGER.debug("Celery task requested as sync if it completes before (wait=%ss)", wait) - result.wait(timeout=wait) + try: + result.wait(timeout=wait) + except CeleryTaskTimeoutError: + pass if result.ready(): job = store.fetch_by_id(job.id) body = job.json(container=settings, self_link="status") body["location"] = location_url - return body + return body, resp_headers else: LOGGER.debug("Celery task requested as sync took too long to complete (wait=%ss). Continue in async.", wait) - - LOGGER.debug("Celery task submitted to run async.", wait) + # sync not respected, therefore must drop it + # since both could be provided as alternative preferences, drop only async with limited subset + prefer = get_header("Preference-Applied", headers, pop=True) + _, _, async_applied = parse_prefer_header_execute_mode({"Prefer": prefer}, [ExecuteMode.ASYNC]) + if async_applied: + resp_headers.update(async_applied) + + LOGGER.debug("Celery task submitted to run async.") body = { "jobID": job.id, "processID": job.process, @@ -597,4 +617,4 @@ def submit_job_handler(payload, # type: JSON "status": map_status(Status.ACCEPTED), "location": location_url } - return body + return body, resp_headers diff --git a/weaver/processes/utils.py b/weaver/processes/utils.py index f40175025..d3cfe58b9 100644 --- a/weaver/processes/utils.py +++ b/weaver/processes/utils.py @@ -42,7 +42,7 @@ from weaver.processes.types import ProcessType from weaver.status import JOB_STATUS_CATEGORIES, StatusCategory, map_status from weaver.store.base import StoreProcesses, StoreServices -from weaver.utils import get_sane_name, get_settings, get_url_without_query +from weaver.utils import get_header, get_sane_name, get_settings, get_url_without_query from weaver.visibility import Visibility from weaver.wps.utils import get_wps_client from weaver.wps_restapi import swagger_definitions as sd @@ -103,8 +103,8 @@ def get_process(process_id=None, request=None, settings=None, store=None): raise HTTPBadRequest("Invalid schema:\n[{0!r}].".format(ex)) -def get_job_submission_response(body): - # type: (JSON) -> Union[HTTPOk, HTTPCreated] +def get_job_submission_response(body, headers): + # type: (JSON, AnyHeadersContainer) -> Union[HTTPOk, HTTPCreated] """ Generates the successful response from contents returned by :term:`Job` submission process. @@ -118,14 +118,15 @@ def get_job_submission_response(body): :func:`weaver.processes.execution.submit_job_handler` """ status = map_status(body.get("status")) + location = get_header("location", headers) if status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: body["description"] = sd.CompletedJobResponse.description body = sd.CompletedJobStatusSchema().deserialize(body) - return HTTPOk(location=body["location"], json=body) + return HTTPOk(location=location, json=body, headers=headers) body["description"] = sd.CreatedLaunchJobResponse.description body = sd.CreatedJobStatusSchema().deserialize(body) - return HTTPCreated(location=body["location"], json=body) + return HTTPCreated(location=location, json=body, headers=headers) def map_progress(progress, range_min, range_max): diff --git a/weaver/utils.py b/weaver/utils.py index fee3624a0..4334311fa 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -387,12 +387,17 @@ def parse_prefer_header_execute_mode( This defines all conditions how to handle ``Prefer`` against applicable :term:`Process` description. - :rfc:`7240#section-4.1` HTTP Prefer header ``respond-async`` + .. seealso:: + If ``Prefer`` format is valid, but server decides it cannot be respected, it can be transparently ignored + (:rfc:`7240#section-2`). The server must respond with ``Preference-Applied`` indicating preserved preferences + it decided to respect. + :param header_container: Request headers to retrieve preference, if any available. :param supported_modes: Execute modes that are permitted for the operation that received the ``Prefer`` header. Resolved mode will respect this constrain following specification requirements of :term:`OGC API - Processes`. :param wait_max: - Maximum wait time enforced by the server. If requested wait time is greater, 'wait' preference will not be + Maximum wait time enforced by the server. If requested wait time is greater, ``wait`` preference will not be applied and will fallback to asynchronous response. :return: Tuple of resolved execution mode, wait time if specified, and header of applied preferences if possible. @@ -498,6 +503,18 @@ def is_uuid(maybe_uuid): return re.match(UUID_PATTERN, str(maybe_uuid)) is not None +def as_int(value, default): + # type: (Any, int) -> int + """ + Ensures a value is converted to :class:`int`. + """ + try: + return int(value) + except Exception: + pass + return default + + def parse_extra_options(option_str, sep=","): # type: (str, str) -> Dict[str, Optional[str]] """ diff --git a/weaver/wps/service.py b/weaver/wps/service.py index c9fa00683..cdcd81e1a 100644 --- a/weaver/wps/service.py +++ b/weaver/wps/service.py @@ -236,9 +236,11 @@ def _submit_job(self, wps_request): is_workflow = proc.type == ProcessType.WORKFLOW tags = req.args.get("tags", "").split(",") + ["xml", "wps-{}".format(wps_request.version)] data = wps2json_job_payload(wps_request, wps_process) - body = submit_job_handler(data, self.settings, proc.processEndpointWPS1, - process_id=pid, is_local=True, is_workflow=is_workflow, visibility=Visibility.PUBLIC, - language=wps_request.language, tags=tags, headers=dict(req.headers), context=ctx) + body, headers = submit_job_handler( + data, self.settings, proc.processEndpointWPS1, + process_id=pid, is_local=True, is_workflow=is_workflow, visibility=Visibility.PUBLIC, + language=wps_request.language, tags=tags, headers=dict(req.headers), context=ctx + ) # if Accept was JSON, provide response content as is # if anything else (even */*), return as XML @@ -247,7 +249,7 @@ def _submit_job(self, wps_request): # way to provide explicitly Accept header. Even our Wps1Process as Workflow step depends on this behaviour. accept_type = get_header("Accept", req.headers) if accept_type == ContentType.APP_JSON: - resp = get_job_submission_response(body) + resp = get_job_submission_response(body, headers) setattr(resp, "_update_status", lambda *_, **__: None) # patch to avoid pywps server raising return resp diff --git a/weaver/wps_restapi/processes/processes.py b/weaver/wps_restapi/processes/processes.py index 954afcb08..bdaec0e14 100644 --- a/weaver/wps_restapi/processes/processes.py +++ b/weaver/wps_restapi/processes/processes.py @@ -274,5 +274,5 @@ def submit_local_job(request): Execution location and method is according to deployed Application Package. """ process = get_process(request=request) - body = submit_job(request, process, tags=["wps-rest"]) - return get_job_submission_response(body) + body, headers = submit_job(request, process, tags=["wps-rest"]) + return get_job_submission_response(body, headers) diff --git a/weaver/wps_restapi/providers/providers.py b/weaver/wps_restapi/providers/providers.py index a4be46d9f..256524967 100644 --- a/weaver/wps_restapi/providers/providers.py +++ b/weaver/wps_restapi/providers/providers.py @@ -215,5 +215,5 @@ def submit_provider_job(request): store = get_db(request).get_store(StoreServices) provider_id = request.matchdict.get("provider_id") service = store.fetch_by_name(provider_id) - body = submit_job(request, service, tags=["wps-rest"]) - return get_job_submission_response(body) + body, headers = submit_job(request, service, tags=["wps-rest"]) + return get_job_submission_response(body, headers) diff --git a/weaver/wps_restapi/quotation/quotes.py b/weaver/wps_restapi/quotation/quotes.py index 1dd630216..033abfb77 100644 --- a/weaver/wps_restapi/quotation/quotes.py +++ b/weaver/wps_restapi/quotation/quotes.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING import colander +from celery.exceptions import TimeoutError as CeleryTaskTimeoutError from pyramid.httpexceptions import HTTPAccepted, HTTPBadRequest, HTTPCreated, HTTPNotFound, HTTPOk from weaver.config import WeaverFeature, get_weaver_configuration @@ -15,7 +16,7 @@ from weaver.quotation.estimation import process_quote_estimator from weaver.sort import Sort from weaver.store.base import StoreBills, StoreProcesses, StoreQuotes -from weaver.utils import get_settings, parse_prefer_header_execute_mode +from weaver.utils import as_int, get_header, get_settings, parse_prefer_header_execute_mode from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.processes.processes import submit_local_job @@ -81,13 +82,17 @@ def request_quote(request): } quote = Quote(**quote_info) quote = quote_store.save_quote(quote) - mode, wait, applied = parse_prefer_header_execute_mode(request.headers, process.jobControlOptions) + max_wait = as_int(settings.get("weaver.quote_sync_max_wait"), default=20) + mode, wait, applied = parse_prefer_header_execute_mode(request.headers, process.jobControlOptions, max_wait) result = process_quote_estimator.delay(quote.id) LOGGER.debug("Celery pending task [%s] for quote [%s].", result.id, quote.id) if mode == ExecuteMode.SYNC and wait: LOGGER.debug("Celery task requested as sync if it completes before (wait=%ss)", wait) - result.wait(timeout=wait) + try: + result.wait(timeout=wait) + except CeleryTaskTimeoutError: + pass if result.ready(): quote = quote_store.fetch_by_id(quote.id) data = quote.json() @@ -95,6 +100,13 @@ def request_quote(request): data.update({"links": quote.links(settings)}) data = sd.CreatedQuoteResponse().deserialize(data) return HTTPCreated(json=data) + else: + LOGGER.debug("Celery task requested as sync took too long to complete (wait=%ss). Continue in async.", wait) + # sync not respected, therefore must drop it + # since both could be provided as alternative preferences, drop only async with limited subset + prefer = get_header("Preference-Applied", applied, pop=True) + _, _, async_applied = parse_prefer_header_execute_mode({"Prefer": prefer}, [ExecuteMode.ASYNC]) + applied = async_applied data = quote.partial() data.update({"description": sd.AcceptedQuoteResponse.description}) diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index ef00c16e8..a0c89e43a 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -3000,30 +3000,41 @@ class ExecuteInputOutputs(ExtendedMappingSchema): inputs = ExecuteInputValues(default={}, description="Values submitted for execution.") outputs = ExecuteOutputSpec( # FIXME: add documentation reference link OGC/Weaver for further details. - description="Defines which outputs to be obtained from the execution (filtered or all), " - "as well as the reporting method for each output according to 'transmissionMode', " - "the 'response' type, and the execution 'mode' provided.", - # FIXME: allow omitting 'outputs' (https://github.com/crim-ca/weaver/issues/375) - # maybe this is good enough, but should have a proper test for it - # default={} + description=( + "Defines which outputs to be obtained from the execution (filtered or all), " + "as well as the reporting method for each output according to 'transmissionMode', " + "the 'response' type, and the execution 'mode' provided " + "(see for more details: https://pavics-weaver.readthedocs.io/en/latest/processes.html#execution-body)." + ), + default={} ) class Execute(ExecuteInputOutputs): mode = JobExecuteModeEnum( missing=drop, + default=ExecuteMode.AUTO, + deprecated=True, description=( "Desired execution mode specified directly. This is intended for backward compatibility support. " "To obtain more control over execution mode selection, employ the official Prefer header instead " "(see for more details: https://pavics-weaver.readthedocs.io/en/latest/processes.html#execution-mode)." ) ) + response = JobResponseOptionsEnum( + missing=drop, + default=ExecuteResponse.DOCUMENT, + description=( + "Indicates the desired representation format of the response. " + "(see for more details: https://pavics-weaver.readthedocs.io/en/latest/processes.html#execution-body)." + ) + ) notification_email = ExtendedSchemaNode( String(), missing=drop, validator=Email(), - description="Optionally send a notification email when the job is done.") - response = JobResponseOptionsEnum() + description="Optionally send a notification email when the job is done." + ) class QuoteStatusSchema(ExtendedSchemaNode): @@ -4296,8 +4307,20 @@ class NotImplementedPostProviderResponse(ExtendedMappingSchema): description = "Provider registration not supported using specified definition." +class PreferenceAppliedHeader(ExtendedSchemaNode): + description = "Applied preferences from submitted 'Prefer' header after validation." + name = "Preference-Applied" + schema_type = String + example = "wait=10s, respond-async" + + +class LocationHeader(URL): + name = "Location" + + class CreatedJobLocationHeader(ResponseHeaders): - Location = URL(description="Status monitoring location of the job execution.") + location = LocationHeader(description="Status monitoring location of the job execution.") + prefer_applied = PreferenceAppliedHeader(missing=drop) class CreatedLaunchJobResponse(ExtendedMappingSchema): @@ -4307,7 +4330,8 @@ class CreatedLaunchJobResponse(ExtendedMappingSchema): class CompletedJobLocationHeader(ResponseHeaders): - Location = URL(description="Status location of the completed job execution.") + location = LocationHeader(description="Status location of the completed job execution.") + prefer_applied = PreferenceAppliedHeader(missing=drop) class CompletedJobStatusSchema(DescriptionSchema, JobStatusInfo): From 161cf9ccfd307fa33d7c7d14d414d903ae871808 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 15 Mar 2022 15:47:52 -0400 Subject: [PATCH 03/34] adjust conformance prefer return none (relates to #414) --- .../json/opensearch_describe_process.json | 2 +- weaver/wps_restapi/api.py | 23 +++++++++++++------ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tests/opensearch/json/opensearch_describe_process.json b/tests/opensearch/json/opensearch_describe_process.json index 20709b6cd..aa1f4df5c 100644 --- a/tests/opensearch/json/opensearch_describe_process.json +++ b/tests/opensearch/json/opensearch_describe_process.json @@ -135,7 +135,7 @@ ], "version": "1.0.0", "jobControlOptions": [ - "async" + "async-execute" ], "outputTransmission": [ "reference" diff --git a/weaver/wps_restapi/api.py b/weaver/wps_restapi/api.py index 9c454dc01..97db3c9cd 100644 --- a/weaver/wps_restapi/api.py +++ b/weaver/wps_restapi/api.py @@ -321,23 +321,33 @@ def api_conformance(request): # noqa: F811 # ogcapi_proc_core + "/per/core/process-execute-input-inline-bbox", ogcapi_proc_core + "/per/core/process-execute-sync-job", ogcapi_proc_core + "/per/core/limit-response", + # ogcapi_proc_core + "/per/core/limit-default-minimum-maximum", ogcapi_proc_core + "/per/core/prev", ogcapi_proc_core + "/per/job-list/limit-response", ogcapi_proc_core + "/per/job-list/prev", # ogcapi_proc_core + "/rec/core/access-control-expose-headers", ogcapi_proc_core + "/rec/core/api-definition-oas", ogcapi_proc_core + "/rec/core/cross-origin", + ogcapi_proc_core + "/rec/core/content-length", # ogcapi_proc_core + "/rec/core/html", + # ogcapi_proc_core + "/rec/core/http-head", ogcapi_proc_core + "/rec/core/job-status", + ogcapi_proc_core + "/rec/core/job-results-async-many-json-prefer-none", + # FIXME: https://github.com/crim-ca/weaver/issues/414 + # ogcapi_proc_core + "/rec/core/job-results-async-many-json-prefer-minimal", + # ogcapi_proc_core + "/rec/core/job-results-async-many-json-prefer-representation", + # ogcapi_proc_core + "/per/core/job-results-async-many-other-formats", + ogcapi_proc_core + "/rec/core/process-execute-sync-many-json-prefer-none", + # ogcapi_proc_core + "/rec/core/process-execute-sync-many-json-prefer-minimal", + # ogcapi_proc_core + "/rec/core/process-execute-sync-many-json-prefer-representation", # ogcapi_proc_core + "/rec/core/link-header", ogcapi_proc_core + "/rec/core/ogc-process-description", # FIXME: error details (for all below: https://github.com/crim-ca/weaver/issues/320) # ogcapi_proc_core + "/rec/core/problem-details", - # FIXME: https://github.com/crim-ca/weaver/issues/247 (Prefer header) - # ogcapi_proc_core + "/rec/core/process-execute-handle-prefer", - # ogcapi_proc_core + "/rec/core/process-execute-honor-prefer", - # ogcapi_proc_core + "/rec/core/process-execute-mode-auto", - # ogcapi_proc_core + "/rec/core/process-execute-preference-applied", + ogcapi_proc_core + "/rec/core/process-execute-handle-prefer", + ogcapi_proc_core + "/rec/core/process-execute-honor-prefer", + ogcapi_proc_core + "/rec/core/process-execute-mode-auto", + ogcapi_proc_core + "/rec/core/process-execute-preference-applied", ogcapi_proc_core + "/rec/core/process-execute-sync-document-ref", ogcapi_proc_core + "/rec/core/next-1", ogcapi_proc_core + "/rec/core/next-2", @@ -382,8 +392,7 @@ def api_conformance(request): # noqa: F811 ogcapi_proc_core + "/req/core/process", ogcapi_proc_core + "/req/core/process-success", ogcapi_proc_core + "/req/core/process-exception/no-such-process", - # FIXME: https://github.com/crim-ca/weaver/issues/247 (Prefer header) - # ogcapi_proc_core + "/req/core/process-execute-auto-execution-mode", + ogcapi_proc_core + "/req/core/process-execute-auto-execution-mode", ogcapi_proc_core + "/req/core/process-execute-default-execution-mode", ogcapi_proc_core + "/req/core/process-execute-default-outputs", ogcapi_proc_core + "/req/core/process-execute-input-array", From 55208721b6d4db4b4e6c9b59ff1dbec1eb385881 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 15 Mar 2022 18:07:03 -0400 Subject: [PATCH 04/34] remove references to fix #247 in CLI, always using async mode with monitoring to obtain sync status --- weaver/cli.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/weaver/cli.py b/weaver/cli.py index 350f8d231..d8017ab1d 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -548,10 +548,6 @@ def _update_files(self, inputs, url=None): auth_headers = {sd.XAuthVaultFileHeader.name: multi_tokens} return update_inputs, auth_headers - # FIXME: support sync (https://github.com/crim-ca/weaver/issues/247) - # :param execute_async: - # Execute the process asynchronously (user must call :meth:`monitor` themselves, - # or synchronously where monitoring is done automatically until completion before returning. def execute(self, process_id, # type: str inputs=None, # type: Optional[Union[str, JSON]] @@ -577,6 +573,12 @@ def execute(self, .. seealso:: :ref:`proc_op_execute` + .. note:: + Execution requests are always accomplished asynchronously. To obtain the final :term:`Job` status as if + they were executed synchronously, provide the :paramref:`monitor` argument. This offers more flexibility + over servers that could decide to ignore sync/async preferences, and avoids closing/timeout connection + errors that could occur for long running processes, since status is pooled iteratively rather than waiting. + :param process_id: Identifier of the process to execute. :param inputs: Literal :term:`JSON` or :term:`YAML` contents of the inputs submitted and inserted into the execution body, @@ -605,8 +607,7 @@ def execute(self, return result values, auth_headers = result data = { - # NOTE: since sync is not yet properly implemented in Weaver, simulate with monitoring after if requested - # FIXME: support 'sync' (https://github.com/crim-ca/weaver/issues/247) + # NOTE: Backward compatibility for servers that only know ``mode`` and don't handle ``Prefer`` header. "mode": ExecuteMode.ASYNC, "inputs": values, # FIXME: support 'response: raw' (https://github.com/crim-ca/weaver/issues/376) @@ -627,7 +628,7 @@ def execute(self, LOGGER.info("Executing [%s] with inputs:\n%s", process_id, OutputFormat.convert(values, OutputFormat.JSON_STR)) path = f"{base}/processes/{process_id}/execution" # use OGC-API compliant endpoint (not '/jobs') - headers = {} + headers = {"Prefer": "respond-async"} # for more recent servers, OGC-API compliant async request headers.update(self._headers) headers.update(auth_headers) resp = request_extra("POST", path, json=data, headers=headers, settings=self._settings) @@ -1216,11 +1217,6 @@ def make_parser(): Example: ``-I message='Hello Weaver' -I value:int=1234`` """) ) - # FIXME: support sync (https://github.com/crim-ca/weaver/issues/247) - # op_execute.add_argument( - # "-A", "--async", dest="execute_async", - # help="" - # ) op_execute.add_argument( "-M", "--monitor", dest="monitor", action="store_true", help="Automatically perform the monitoring operation following job submission to retrieve final results. " From bed33da67497793a1170da7072d45b4038b648c9 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 15 Mar 2022 18:10:31 -0400 Subject: [PATCH 05/34] remove reference to fix #375 in CLI, outputs can be omitted but not filtered (#380) --- weaver/cli.py | 1 - 1 file changed, 1 deletion(-) diff --git a/weaver/cli.py b/weaver/cli.py index d8017ab1d..9d834c060 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -616,7 +616,6 @@ def execute(self, # FIXME: allow 'transmissionMode: value/reference' selection (https://github.com/crim-ca/weaver/issues/377) "outputs": {} } - # FIXME: since (https://github.com/crim-ca/weaver/issues/375) not implemented, auto-populate all the outputs result = self.describe(process_id, url=base) if not result.success: return OperationResult(False, "Could not obtain process description for execution.", From 42640cd8cc8bc035583d8b44c179982c481529ab Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 15 Mar 2022 21:30:41 -0400 Subject: [PATCH 06/34] fix test to consider job exec params now optional to allow alternatives --- tests/wps_restapi/test_processes.py | 58 ++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/tests/wps_restapi/test_processes.py b/tests/wps_restapi/test_processes.py index 19aff4641..dea84bd86 100644 --- a/tests/wps_restapi/test_processes.py +++ b/tests/wps_restapi/test_processes.py @@ -1004,28 +1004,52 @@ def test_execute_process_no_json_body(self): assert resp.content_type == ContentType.APP_JSON def test_execute_process_missing_required_params(self): + """ + Validate execution against missing parameters. + + .. versionchanged:: 4.15.0 + Multiple parameters are not **required** anymore because the alternative with ``Prefer`` header + for :term:`OGC API - Processes` compliance is permitted. When the values are specified through, + they should still be validated to provide relevant error details to the user. + """ execute_data = self.get_process_execute_template(fully_qualified_name(self)) # remove components for testing different cases - execute_data_tests = [deepcopy(execute_data) for _ in range(7)] - execute_data_tests[0].pop("outputs") - execute_data_tests[1].pop("mode") - execute_data_tests[2].pop("response") - execute_data_tests[3]["mode"] = "random" - execute_data_tests[4]["response"] = "random" - execute_data_tests[5]["inputs"] = [{"test_input": "test_value"}] # noqa # bad format on purpose - execute_data_tests[6]["outputs"] = [{"id": "test_output", "transmissionMode": "random"}] + execute_data_tests = [[True, deepcopy(execute_data)] for _ in range(7)] + execute_data_tests[0][0] = False + execute_data_tests[0][1].pop("outputs") + execute_data_tests[1][0] = False + execute_data_tests[1][1].pop("mode") + execute_data_tests[2][0] = False + execute_data_tests[2][1].pop("response") + execute_data_tests[3][1]["mode"] = "random" + execute_data_tests[4][1]["response"] = "random" + execute_data_tests[5][1]["inputs"] = [{"test_input": "test_value"}] # noqa # bad format on purpose + execute_data_tests[6][1]["outputs"] = [{"id": "test_output", "transmissionMode": "random"}] + + def no_op(*_, **__): + return Status.SUCCEEDED path = "/processes/{}/jobs".format(self.process_public.identifier) - for i, exec_data in enumerate(execute_data_tests): - data_json = json.dumps(exec_data, indent=2) - with stopit.ThreadingTimeout(3) as timeout: # timeout to kill execution if schema validation did not raise - resp = self.app.post_json(path, params=exec_data, headers=self.json_headers, expect_errors=True) - msg = "Failed with test variation '{}' with status '{}' using:\n{}" - assert resp.status_code in [400, 422], msg.format(i, resp.status_code, data_json) - assert resp.content_type == ContentType.APP_JSON, msg.format(i, resp.content_type) - msg = "Killed test '{}' request taking too long using:\n{}".format(i, data_json) - assert timeout.state == timeout.EXECUTED, msg + with contextlib.ExitStack() as stack_exec: + for mock_exec in mocked_execute_celery(func_execute_task=no_op): + stack_exec.enter_context(mock_exec) + for i, (is_invalid, exec_data) in enumerate(execute_data_tests): + data_json = json.dumps(exec_data, indent=2) + try: + # timeout to kill execution if schema validation did not raise + with stopit.ThreadingTimeout(3) as timeout: + resp = self.app.post_json(path, params=exec_data, headers=self.json_headers, expect_errors=True) + msg = "Failed with test variation '{}' with status '{}' using:\n{}" + code = [400, 422] if is_invalid else [201] + assert resp.status_code in code, msg.format(i, resp.status_code, data_json) + assert resp.content_type == ContentType.APP_JSON, msg.format(i, resp.content_type) + except stopit.TimeoutException: + # if required, not normal to have passed validation + # if optional, valid since omitting field does not raise missing field in schema + if is_invalid: + msg = "Killed test '{}' request taking too long using:\n{}".format(i, data_json) + assert timeout.state == timeout.EXECUTED, msg def test_execute_process_dont_cast_one_of(self): """ From 637adcab7296f22ef0ae3e14cfab1c1a78055ca9 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 15 Mar 2022 21:36:06 -0400 Subject: [PATCH 07/34] fix lint --- docs/_static/custom.css | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 6cd44e584..f649bbc24 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,4 +1,3 @@ - /* override readthedocs theme to enforce using full-screen width for content */ .wy-nav-content { max-width: none; From 7b938fc164db8334d6aae62dd0bbae0a25d51cee Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 15 Mar 2022 22:41:21 -0400 Subject: [PATCH 08/34] celery beat to worker docker command for task result cleaup --- .dockerignore | 5 +++++ .gitignore | 5 +++++ CHANGES.rst | 2 ++ docker/Dockerfile-worker | 4 ++-- weaver/processes/execution.py | 5 +++-- weaver/typedefs.py | 3 +++ 6 files changed, 20 insertions(+), 4 deletions(-) diff --git a/.dockerignore b/.dockerignore index d6268db1b..71a7d2156 100644 --- a/.dockerignore +++ b/.dockerignore @@ -18,6 +18,11 @@ env package-lock.json node_modules +## Celery +celeryconfig* +celery-config* +celerybeat-schedule.* + ## Python / Extensions etc. *~ *.mo diff --git a/.gitignore b/.gitignore index 44f8811fc..b92754eec 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,11 @@ node_modules ## Docker #Dockerfile +## Celery +celeryconfig* +celery-config* +celerybeat-schedule.* + ## Python / Extensions etc. *~ *.mo diff --git a/CHANGES.rst b/CHANGES.rst index d1fd75fa3..bf9609332 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -18,6 +18,8 @@ Changes: - Increase minor version of all ``builtin`` processes that will now be executable in wither (a)synchronous modes. - Add ``weaver.exec_sync_max_wait`` and ``weaver.quote_sync_max_wait`` settings allowing custom definition for the maximum duration that can be specified to wait for a `synchronous` response from task workers. +- Add ``-B`` (``celery beat``) option to Docker command of ``weaver-worker`` to run scheduled task in parallel + to ``celery worker`` in order to periodically cleanup task results introduced by *synchronous* execution. - Improve conformance for returned status codes and error messages when requesting results for an unfinished, failed, or dismissed ``Job``. - Adjust conformance item references to correspond with `OGC API - Processes: Part 2` renamed from `Transactions` to diff --git a/docker/Dockerfile-worker b/docker/Dockerfile-worker index 16f0c4c4e..bb32aa319 100644 --- a/docker/Dockerfile-worker +++ b/docker/Dockerfile-worker @@ -13,9 +13,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && apt update \ # NOTE: # Only install CLI package, 'docker-ce' and 'containerd.io' not required as they should be provided by host. - # Docker sibliing execution is expected. See 'docker/docker-compose.yml.example' for details. + # Docker sibling execution is expected. See 'docker/docker-compose.yml.example' for details. && apt install --no-install-recommends docker-ce-cli \ && rm -rf /var/lib/apt/lists/* # run app -CMD celery worker -E -A pyramid_celery.celery_app --ini "${APP_CONFIG_DIR}/weaver.ini" +CMD celery worker -B -E -A pyramid_celery.celery_app --ini "${APP_CONFIG_DIR}/weaver.ini" diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index bd449b781..e089f8ac5 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -60,7 +60,7 @@ from weaver.datatype import Job from weaver.processes.convert import OWS_Input_Type, ProcessOWS from weaver.status import StatusType - from weaver.typedefs import HeadersType, HeaderCookiesType, JSON, SettingsType + from weaver.typedefs import CeleryResult, HeadersType, HeaderCookiesType, JSON, SettingsType from weaver.visibility import AnyVisibility @@ -587,7 +587,8 @@ def submit_job_handler(payload, # type: JSON resp_headers = {"Location": location_url} resp_headers.update(applied) - result = execute_process.delay(job_id=job.id, wps_url=clean_ows_url(service_url), headers=headers) + wps_url = clean_ows_url(service_url) + result = execute_process.delay(job_id=job.id, wps_url=wps_url, headers=headers) # type: CeleryResult LOGGER.debug("Celery pending task [%s] for job [%s].", result.id, job.id) if not is_execute_async: LOGGER.debug("Celery task requested as sync if it completes before (wait=%ss)", wait) diff --git a/weaver/typedefs.py b/weaver/typedefs.py index b9d8fd00a..25293eadf 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -24,6 +24,7 @@ FileSystemPathType = str from celery.app import Celery + from celery.result import AsyncResult, EagerResult, GroupResult, ResultSet from owslib.wps import BoundingBoxDataInput, ComplexDataInput, Process as ProcessOWS, WPSExecution from pyramid.httpexceptions import HTTPSuccessful, HTTPRedirection from pyramid.registry import Registry @@ -298,3 +299,5 @@ def __call__(self, message: str, progress: Number, status: AnyStatusType, *args: "inputs": JobInputs, "outputs": JobOutputs, }) + + CeleryResult = Union[AsyncResult, EagerResult, GroupResult, ResultSet] From b59949f817b1fcc625c53445b52a066412a86cc4 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 15 Mar 2022 22:48:29 -0400 Subject: [PATCH 09/34] ignore linkcheck celery docs down (relates to https://github.com/celery/celery/issues/7351) --- docs/source/conf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/conf.py b/docs/source/conf.py index 339995ed6..fa436c798 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -423,6 +423,8 @@ def doc_redirect_include(file_path): # dummy values r"http[s]*://localhost.*/", r"http[s]*://example.com.*", + # ignore celery docs having problem (https://github.com/celery/celery/issues/7351) + "https://docs.celeryproject.org/", "https://mouflon.dkrz.de/", # following have sporadic downtimes "https://esgf-data.dkrz.de/", From ff2f94c3de77f1fbec6210139a10a7a3a1c883b3 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 15 Mar 2022 23:11:40 -0400 Subject: [PATCH 10/34] fix lint --- tests/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index d05521307..1ee27b7dc 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -871,10 +871,10 @@ def id(self): # since delay is mocked and blocks to execute, assume sync is complete at this point # all following methods return what would be returned normally in sync mode - def wait(*_, **__): + def wait(self, *_, **__): raise CeleryTaskTimeoutError - def ready(*_, **__): + def ready(self, *_, **__): return True task = MockTask() From 9d56a4e59ac41909f73c9bcedd137dd06aba0d49 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 16 Mar 2022 11:13:07 -0400 Subject: [PATCH 11/34] ensure jobControlOptions and outputTransmission are reported sorted --- weaver/datatype.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/weaver/datatype.py b/weaver/datatype.py index ea701700d..54da9d201 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -1805,7 +1805,7 @@ def jobControlOptions(self): # noqa: N802 jco = [opt for opt in jco if opt is not None] if len(jco) == 0: jco.append(ExecuteControlOption.ASYNC) - self["jobControlOptions"] = jco + self["jobControlOptions"] = list(sorted(jco)) return dict.__getitem__(self, "jobControlOptions") @property @@ -1818,7 +1818,7 @@ def outputTransmission(self): # noqa: N802 out = [mode for mode in out if mode is not None] if len(out) == 0: out.append(ExecuteTransmissionMode.REFERENCE) - self["outputTransmission"] = out + self["outputTransmission"] = list(sorted(out)) return dict.__getitem__(self, "outputTransmission") @property From 25fdd0e86fc19efead9aad581d4f3bdf8a1248c0 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 16 Mar 2022 11:16:07 -0400 Subject: [PATCH 12/34] replace all outputTransmission 'reference' by 'value' to correctly report default Weaver behaviour --- CHANGES.rst | 2 + .../AggregateESGF/execute.json | 2 +- .../CatalogSearch/execute.json | 2 +- .../CatalogSearch/package.cwl | 2 +- .../execute.json | 2 +- .../DockerCopyImages/execute.json | 2 +- .../DockerCopyNestedOutDir/execute.json | 2 +- .../DockerStageImages/execute.json | 2 +- .../Finch_IceDays/execute.json | 2 +- .../SubsetESGF/execute.json | 2 +- .../SubsetNASAESGF/execute.json | 2 +- .../WorkflowChainCopy/execute.json | 2 +- .../WorkflowChainStrings/execute.json | 2 +- .../WorkflowESGF/execute.json | 2 +- .../WorkflowFileToSubsetCRIM/execute.json | 2 +- .../WorkflowStageCopyImages/execute.json | 2 +- .../WorkflowSubsetIceDays/execute.json | 2 +- .../execute.json | 2 +- .../execute.json | 2 +- .../WorkflowSubsetPicker/execute.json | 2 +- tests/functional/test_builtin.py | 8 ++-- tests/functional/test_docker_app.py | 4 +- tests/functional/test_quoting.py | 2 +- tests/functional/test_wps_package.py | 14 +++---- tests/functional/test_wps_provider.py | 4 +- tests/opensearch/json/opensearch_deploy.json | 2 +- .../json/opensearch_describe_process.json | 2 +- tests/opensearch/json/opensearch_execute.json | 4 +- tests/opensearch/json/opensearch_process.json | 4 +- .../resources/test_describe_process_wps3.json | 2 +- tests/wps_restapi/test_jobs.py | 2 +- tests/wps_restapi/test_processes.py | 2 +- tests/wps_restapi/test_providers.py | 4 +- weaver/datatype.py | 6 +-- weaver/processes/convert.py | 2 +- weaver/processes/wps3_process.py | 2 +- .../examples/providers_processes_details.json | 40 +++++++++---------- weaver/wps_restapi/swagger_definitions.py | 8 ++-- 38 files changed, 77 insertions(+), 75 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index bf9609332..7577d240f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -39,6 +39,8 @@ Fixes: ------ - Fix ``outputs`` permitted to be completely omitted from the execution request (resolves `#375 `_). +- Fix all instances of ``outputTransmission`` reported as ``reference`` although `Weaver` behaves with the ``value`` + method, which is to return values and file references in content body, instead of HTTP header ``Link`` references. .. _changes_4.14.0: diff --git a/tests/functional/application-packages/AggregateESGF/execute.json b/tests/functional/application-packages/AggregateESGF/execute.json index 925a870a8..35037c126 100644 --- a/tests/functional/application-packages/AggregateESGF/execute.json +++ b/tests/functional/application-packages/AggregateESGF/execute.json @@ -22,7 +22,7 @@ "outputs": [ { "id": "output", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/CatalogSearch/execute.json b/tests/functional/application-packages/CatalogSearch/execute.json index 0a0949de0..48322f9c9 100644 --- a/tests/functional/application-packages/CatalogSearch/execute.json +++ b/tests/functional/application-packages/CatalogSearch/execute.json @@ -10,7 +10,7 @@ "outputs": [ { "id": "output", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/CatalogSearch/package.cwl b/tests/functional/application-packages/CatalogSearch/package.cwl index b0b995099..c6ae0f45c 100644 --- a/tests/functional/application-packages/CatalogSearch/package.cwl +++ b/tests/functional/application-packages/CatalogSearch/package.cwl @@ -39,7 +39,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ] }, "executionUnit": [ diff --git a/tests/functional/application-packages/ColibriFlyingpigeon_SubsetBbox/execute.json b/tests/functional/application-packages/ColibriFlyingpigeon_SubsetBbox/execute.json index 0247ea813..4f8d82642 100644 --- a/tests/functional/application-packages/ColibriFlyingpigeon_SubsetBbox/execute.json +++ b/tests/functional/application-packages/ColibriFlyingpigeon_SubsetBbox/execute.json @@ -26,7 +26,7 @@ "outputs": [ { "id": "output", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/DockerCopyImages/execute.json b/tests/functional/application-packages/DockerCopyImages/execute.json index 6d0c771e2..5a2f3884e 100644 --- a/tests/functional/application-packages/DockerCopyImages/execute.json +++ b/tests/functional/application-packages/DockerCopyImages/execute.json @@ -10,7 +10,7 @@ "outputs": [ { "id": "output_files", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/DockerCopyNestedOutDir/execute.json b/tests/functional/application-packages/DockerCopyNestedOutDir/execute.json index 94e0479b9..61c6e2e5d 100644 --- a/tests/functional/application-packages/DockerCopyNestedOutDir/execute.json +++ b/tests/functional/application-packages/DockerCopyNestedOutDir/execute.json @@ -10,7 +10,7 @@ "outputs": [ { "id": "output_files", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/DockerStageImages/execute.json b/tests/functional/application-packages/DockerStageImages/execute.json index fb3d6d8bb..350c5705a 100644 --- a/tests/functional/application-packages/DockerStageImages/execute.json +++ b/tests/functional/application-packages/DockerStageImages/execute.json @@ -10,7 +10,7 @@ "outputs": [ { "id": "staging_output", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/Finch_IceDays/execute.json b/tests/functional/application-packages/Finch_IceDays/execute.json index 0870dd611..dda3863eb 100644 --- a/tests/functional/application-packages/Finch_IceDays/execute.json +++ b/tests/functional/application-packages/Finch_IceDays/execute.json @@ -14,7 +14,7 @@ "outputs": [ { "id": "output_netcdf", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/SubsetESGF/execute.json b/tests/functional/application-packages/SubsetESGF/execute.json index 66353dfb9..6d0db78f9 100644 --- a/tests/functional/application-packages/SubsetESGF/execute.json +++ b/tests/functional/application-packages/SubsetESGF/execute.json @@ -54,7 +54,7 @@ "outputs": [ { "id": "output", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/SubsetNASAESGF/execute.json b/tests/functional/application-packages/SubsetNASAESGF/execute.json index d57906574..e07a28830 100644 --- a/tests/functional/application-packages/SubsetNASAESGF/execute.json +++ b/tests/functional/application-packages/SubsetNASAESGF/execute.json @@ -50,7 +50,7 @@ "outputs": [ { "id": "output", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/WorkflowChainCopy/execute.json b/tests/functional/application-packages/WorkflowChainCopy/execute.json index 26428dd91..963ea522b 100644 --- a/tests/functional/application-packages/WorkflowChainCopy/execute.json +++ b/tests/functional/application-packages/WorkflowChainCopy/execute.json @@ -10,7 +10,7 @@ "outputs": [ { "id": "output", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/WorkflowChainStrings/execute.json b/tests/functional/application-packages/WorkflowChainStrings/execute.json index 69426ca4d..0a5e91f78 100644 --- a/tests/functional/application-packages/WorkflowChainStrings/execute.json +++ b/tests/functional/application-packages/WorkflowChainStrings/execute.json @@ -4,7 +4,7 @@ }, "outputs": { "output": { - "transmissionMode": "reference" + "transmissionMode": "value" } } } diff --git a/tests/functional/application-packages/WorkflowESGF/execute.json b/tests/functional/application-packages/WorkflowESGF/execute.json index 31c52ee18..6253c1dbf 100644 --- a/tests/functional/application-packages/WorkflowESGF/execute.json +++ b/tests/functional/application-packages/WorkflowESGF/execute.json @@ -18,7 +18,7 @@ "outputs": [ { "id": "output_netcdf", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/WorkflowFileToSubsetCRIM/execute.json b/tests/functional/application-packages/WorkflowFileToSubsetCRIM/execute.json index 46ce4760a..10e76a0f1 100644 --- a/tests/functional/application-packages/WorkflowFileToSubsetCRIM/execute.json +++ b/tests/functional/application-packages/WorkflowFileToSubsetCRIM/execute.json @@ -26,7 +26,7 @@ "outputs": [ { "id": "output", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/WorkflowStageCopyImages/execute.json b/tests/functional/application-packages/WorkflowStageCopyImages/execute.json index b499bd81b..79aaea34f 100644 --- a/tests/functional/application-packages/WorkflowStageCopyImages/execute.json +++ b/tests/functional/application-packages/WorkflowStageCopyImages/execute.json @@ -10,7 +10,7 @@ "outputs": [ { "id": "output", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/WorkflowSubsetIceDays/execute.json b/tests/functional/application-packages/WorkflowSubsetIceDays/execute.json index d1a64db22..01778a692 100644 --- a/tests/functional/application-packages/WorkflowSubsetIceDays/execute.json +++ b/tests/functional/application-packages/WorkflowSubsetIceDays/execute.json @@ -30,7 +30,7 @@ "outputs": [ { "id": "output", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/WorkflowSubsetLLNL_SubsetCRIM/execute.json b/tests/functional/application-packages/WorkflowSubsetLLNL_SubsetCRIM/execute.json index ac83ebba4..e5c6fddc9 100644 --- a/tests/functional/application-packages/WorkflowSubsetLLNL_SubsetCRIM/execute.json +++ b/tests/functional/application-packages/WorkflowSubsetLLNL_SubsetCRIM/execute.json @@ -51,7 +51,7 @@ "outputs": [ { "id": "output", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/WorkflowSubsetNASAESGF_SubsetCRIM/execute.json b/tests/functional/application-packages/WorkflowSubsetNASAESGF_SubsetCRIM/execute.json index 7e2b3a2cf..4af0849c4 100644 --- a/tests/functional/application-packages/WorkflowSubsetNASAESGF_SubsetCRIM/execute.json +++ b/tests/functional/application-packages/WorkflowSubsetNASAESGF_SubsetCRIM/execute.json @@ -46,7 +46,7 @@ "outputs": [ { "id": "output", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/application-packages/WorkflowSubsetPicker/execute.json b/tests/functional/application-packages/WorkflowSubsetPicker/execute.json index d98858ab8..b22e58274 100644 --- a/tests/functional/application-packages/WorkflowSubsetPicker/execute.json +++ b/tests/functional/application-packages/WorkflowSubsetPicker/execute.json @@ -30,7 +30,7 @@ "outputs": [ { "id": "output", - "transmissionMode": "reference" + "transmissionMode": "value" } ] } diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py index d8961ba7c..7c3bbc299 100644 --- a/tests/functional/test_builtin.py +++ b/tests/functional/test_builtin.py @@ -60,7 +60,7 @@ def test_jsonarray2netcdf_describe_old_schema(self): assert len(body["process"]["outputs"][0]["formats"]) == 1 assert body["process"]["outputs"][0]["formats"][0]["mediaType"] == ContentType.APP_NETCDF assert body["jobControlOptions"] == [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] - assert body["outputTransmission"] == [ExecuteTransmissionMode.REFERENCE] + assert body["outputTransmission"] == [ExecuteTransmissionMode.VALUE] def test_jsonarray2netcdf_describe_ogc_schema(self): resp = self.app.get("/processes/jsonarray2netcdf", headers=self.json_headers) @@ -84,7 +84,7 @@ def test_jsonarray2netcdf_describe_ogc_schema(self): assert len(body["outputs"]["output"]["formats"]) == 1 assert body["outputs"]["output"]["formats"][0]["mediaType"] == ContentType.APP_NETCDF assert body["jobControlOptions"] == [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] - assert body["outputTransmission"] == [ExecuteTransmissionMode.REFERENCE] + assert body["outputTransmission"] == [ExecuteTransmissionMode.VALUE] def test_jsonarray2netcdf_execute_async(self): dirname = tempfile.gettempdir() @@ -102,7 +102,7 @@ def test_jsonarray2netcdf_execute_async(self): "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, "inputs": [{"id": "input", "href": os.path.join(dirname, tmp_json.name)}], - "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}], + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}], } for mock_exec in mocked_execute_celery(): @@ -143,7 +143,7 @@ def test_jsonarray2netcdf_execute_sync(self): tmp_json.seek(0) data = { "inputs": [{"id": "input", "href": os.path.join(dirname, tmp_json.name)}], - "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}], + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}], } headers = {"Prefer": "wait=10"} headers.update(self.json_headers) diff --git a/tests/functional/test_docker_app.py b/tests/functional/test_docker_app.py index 9e3a066fa..1195f478d 100644 --- a/tests/functional/test_docker_app.py +++ b/tests/functional/test_docker_app.py @@ -148,7 +148,7 @@ def test_execute_wps_rest_resp_json(self): {"id": "file", "href": tmp_file.name}, ], "outputs": [ - {"id": self.out_key, "transmissionMode": ExecuteTransmissionMode.REFERENCE}, + {"id": self.out_key, "transmissionMode": ExecuteTransmissionMode.VALUE}, ] } for mock_exec in mocked_execute_celery(): @@ -335,7 +335,7 @@ def test_execute_docker_embedded_python_script(self): {"id": "cost", "value": cost} ], "outputs": [ - {"id": "quote", "transmissionMode": ExecuteTransmissionMode.REFERENCE}, + {"id": "quote", "transmissionMode": ExecuteTransmissionMode.VALUE}, ] } resp = mocked_sub_requests(self.app, "POST", path, json=body, headers=self.json_headers, only_local=True) diff --git a/tests/functional/test_quoting.py b/tests/functional/test_quoting.py index 280e19fef..558e0a62e 100644 --- a/tests/functional/test_quoting.py +++ b/tests/functional/test_quoting.py @@ -84,7 +84,7 @@ def test_quote_atomic_process(self, mocked_estimate): }, "outputs": { "output": { - "transmissionMode": ExecuteTransmissionMode.REFERENCE + "transmissionMode": ExecuteTransmissionMode.VALUE } } } diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index e95d980f1..326d17735 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -639,7 +639,7 @@ def test_execute_file_type_io_format_references(self): data.update({ "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, - "outputs": {"output": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}} + "outputs": {"output": {"transmissionMode": ExecuteTransmissionMode.VALUE}} }) with contextlib.ExitStack() as stack_exec: for mock_exec in mocked_execute_celery(): @@ -1414,7 +1414,7 @@ def test_execute_job_with_accept_languages(self): "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, "inputs": [{"id": "message", "value": "test"}], - "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}] + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}] } headers = deepcopy(self.json_headers) @@ -1770,7 +1770,7 @@ def test_execute_job_with_context_output_dir(self): "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, "inputs": [{"id": "message", "value": "test"}], - "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}] + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}] } headers = deepcopy(self.json_headers) @@ -1846,7 +1846,7 @@ def test_execute_job_with_custom_file_name(self): "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, "inputs": [{"id": "input_file", "href": tmp_http}], - "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}] + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}] } resp = mocked_sub_requests(self.app, "post_json", proc_url, timeout=5, data=exec_body, headers=headers, only_local=True) @@ -1892,7 +1892,7 @@ def test_dismiss_job(self): "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, "inputs": [{"id": "delay", "value": 1}], - "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}] + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}] } with contextlib.ExitStack() as stack_exec: @@ -2459,8 +2459,8 @@ def test_execute_application_package_process_with_bucket(self): {"id": "input_with_s3", "href": test_bucket_ref}, ], "outputs": [ - {"id": "output_from_http", "transmissionMode": ExecuteTransmissionMode.REFERENCE}, - {"id": "output_from_s3", "transmissionMode": ExecuteTransmissionMode.REFERENCE}, + {"id": "output_from_http", "transmissionMode": ExecuteTransmissionMode.VALUE}, + {"id": "output_from_s3", "transmissionMode": ExecuteTransmissionMode.VALUE}, ] } with contextlib.ExitStack() as stack_exec: diff --git a/tests/functional/test_wps_provider.py b/tests/functional/test_wps_provider.py index c03e1ca9f..ff72bb585 100644 --- a/tests/functional/test_wps_provider.py +++ b/tests/functional/test_wps_provider.py @@ -187,7 +187,7 @@ def test_register_describe_execute_ncdump(self, mock_responses): assert "GetCapabilities" in links["service-desc"] assert ExecuteControlOption.ASYNC in body["jobControlOptions"] - assert ExecuteTransmissionMode.REFERENCE in body["outputTransmission"] + assert ExecuteTransmissionMode.VALUE in body["outputTransmission"] # validate execution submission # (don't actually execute because server is mocked, only validate parsing of I/O and job creation) @@ -198,7 +198,7 @@ def test_register_describe_execute_ncdump(self, mock_responses): "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, "inputs": [{"id": "dataset", "href": exec_file}], - "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}] + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}] } status_url = resources.TEST_REMOTE_SERVER_URL + "/status.xml" output_url = resources.TEST_REMOTE_SERVER_URL + "/output.txt" diff --git a/tests/opensearch/json/opensearch_deploy.json b/tests/opensearch/json/opensearch_deploy.json index 34a902d5b..423f9698e 100644 --- a/tests/opensearch/json/opensearch_deploy.json +++ b/tests/opensearch/json/opensearch_deploy.json @@ -79,7 +79,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ] }, "executionUnit": [ diff --git a/tests/opensearch/json/opensearch_describe_process.json b/tests/opensearch/json/opensearch_describe_process.json index aa1f4df5c..b1788da6a 100644 --- a/tests/opensearch/json/opensearch_describe_process.json +++ b/tests/opensearch/json/opensearch_describe_process.json @@ -138,7 +138,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ] } } diff --git a/tests/opensearch/json/opensearch_execute.json b/tests/opensearch/json/opensearch_execute.json index 5033245a8..825b81b1f 100644 --- a/tests/opensearch/json/opensearch_execute.json +++ b/tests/opensearch/json/opensearch_execute.json @@ -22,7 +22,7 @@ "outputs": [ { "id": "output", - "transmissionMode": "reference" + "transmissionMode": "value" } ] -} \ No newline at end of file +} diff --git a/tests/opensearch/json/opensearch_process.json b/tests/opensearch/json/opensearch_process.json index a35af044d..2af09286d 100644 --- a/tests/opensearch/json/opensearch_process.json +++ b/tests/opensearch/json/opensearch_process.json @@ -87,7 +87,7 @@ ], "keywords": [], "outputTransmission": [ - "reference" + "value" ], "outputs": [ { @@ -236,7 +236,7 @@ ], "keywords": [], "outputTransmission": [ - "reference" + "value" ], "outputs": [ { diff --git a/tests/resources/test_describe_process_wps3.json b/tests/resources/test_describe_process_wps3.json index 695715821..778fd2d2f 100644 --- a/tests/resources/test_describe_process_wps3.json +++ b/tests/resources/test_describe_process_wps3.json @@ -31,7 +31,7 @@ "executeEndpoint": "https://remote-server.com/processes/test-remote-process-wps3/jobs" }, "outputTransmission": [ - "reference" + "value" ], "jobControlOptions": [ "async-execute" diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index 997eca80a..1c9f7263f 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -537,7 +537,7 @@ def test_get_jobs_by_encrypted_email(self): email = "some.test@crim.ca" body = { "inputs": [{"id": "test_input", "data": "test"}], - "outputs": [{"id": "test_output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}], + "outputs": [{"id": "test_output", "transmissionMode": ExecuteTransmissionMode.VALUE}], "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, "notification_email": email diff --git a/tests/wps_restapi/test_processes.py b/tests/wps_restapi/test_processes.py index dea84bd86..96c694d41 100644 --- a/tests/wps_restapi/test_processes.py +++ b/tests/wps_restapi/test_processes.py @@ -136,7 +136,7 @@ def get_process_execute_template(test_input="not-specified"): ], "outputs": [ {"id": "test_output", - "transmissionMode": ExecuteTransmissionMode.REFERENCE} + "transmissionMode": ExecuteTransmissionMode.VALUE} ], "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, diff --git a/tests/wps_restapi/test_providers.py b/tests/wps_restapi/test_providers.py index 6a72f9fcc..a61fb2580 100644 --- a/tests/wps_restapi/test_providers.py +++ b/tests/wps_restapi/test_providers.py @@ -346,7 +346,7 @@ def test_get_provider_process_description_old_schema(self): assert len(body["jobControlOptions"]) == 1 assert ExecuteControlOption.ASYNC in body["jobControlOptions"] assert len(body["outputTransmission"]) == 1 - assert ExecuteTransmissionMode.REFERENCE in body["outputTransmission"] + assert ExecuteTransmissionMode.VALUE in body["outputTransmission"] assert "inputs" in process and isinstance(process["inputs"], list) assert all(isinstance(p_io, dict) and "id" in p_io for p_io in process["inputs"]) assert "outputs" in process and isinstance(process["outputs"], list) @@ -377,7 +377,7 @@ def test_get_provider_process_description_ogc_schema(self): assert len(process["jobControlOptions"]) == 1 assert ExecuteControlOption.ASYNC in process["jobControlOptions"] assert len(process["outputTransmission"]) == 1 - assert ExecuteTransmissionMode.REFERENCE in process["outputTransmission"] + assert ExecuteTransmissionMode.VALUE in process["outputTransmission"] assert "inputs" in process and isinstance(process["inputs"], dict) assert all(isinstance(p_io, str) and isinstance(process["inputs"][p_io], dict) for p_io in process["inputs"]) assert all("id" not in process["inputs"][p_io] for p_io in process["inputs"]) diff --git a/weaver/datatype.py b/weaver/datatype.py index 54da9d201..b7c5fe7c0 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -1811,13 +1811,13 @@ def jobControlOptions(self): # noqa: N802 @property def outputTransmission(self): # noqa: N802 # type: () -> List[AnyExecuteTransmissionMode] - out = self.setdefault("outputTransmission", [ExecuteTransmissionMode.REFERENCE]) + out = self.setdefault("outputTransmission", [ExecuteTransmissionMode.VALUE]) if not isinstance(out, list): # eg: None, bw-compat - out = [ExecuteTransmissionMode.REFERENCE] + out = [ExecuteTransmissionMode.VALUE] out = [ExecuteTransmissionMode.get(mode) for mode in out] out = [mode for mode in out if mode is not None] if len(out) == 0: - out.append(ExecuteTransmissionMode.REFERENCE) + out.append(ExecuteTransmissionMode.VALUE) self["outputTransmission"] = list(sorted(out)) return dict.__getitem__(self, "outputTransmission") diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index d84314be5..66f9950b1 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -1611,7 +1611,7 @@ def wps2json_job_payload(wps_request, wps_process): else: data_output = wps_request.outputs[oid] if as_ref: - data_output["transmissionMode"] = ExecuteTransmissionMode.REFERENCE + data_output["transmissionMode"] = ExecuteTransmissionMode.VALUE else: data_output["transmissionMode"] = ExecuteTransmissionMode.VALUE data_output["id"] = oid diff --git a/weaver/processes/wps3_process.py b/weaver/processes/wps3_process.py index 8383f0044..23e0deb1d 100644 --- a/weaver/processes/wps3_process.py +++ b/weaver/processes/wps3_process.py @@ -241,7 +241,7 @@ def prepare(self): def format_outputs(self, workflow_outputs): # type: (JobOutputs) -> JobOutputs for output in workflow_outputs: - output.update({"transmissionMode": ExecuteTransmissionMode.REFERENCE}) + output.update({"transmissionMode": ExecuteTransmissionMode.VALUE}) return workflow_outputs def dispatch(self, process_inputs, process_outputs): diff --git a/weaver/wps_restapi/examples/providers_processes_details.json b/weaver/wps_restapi/examples/providers_processes_details.json index 6a12ce45d..e60b59483 100644 --- a/weaver/wps_restapi/examples/providers_processes_details.json +++ b/weaver/wps_restapi/examples/providers_processes_details.json @@ -11,7 +11,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/ColibriFlyingpigeon_SubsetBbox", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -28,7 +28,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/OutardeFlyingpigeon_SubsetBbox", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -47,7 +47,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/Staging_S2L1C", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -66,7 +66,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/Staging_S2L1C-mock-docker", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -85,7 +85,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/WaterExtent_S2-mock-docker", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -104,7 +104,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/WorkflowWaterExtent", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -123,7 +123,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/WorkflowWaterExtent-mock", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -159,7 +159,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/anti-spoofing", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -178,7 +178,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/docker-demo-cat", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -195,7 +195,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/docker-python-script", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -212,7 +212,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/file2string_array", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -239,7 +239,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/image-utils", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -258,7 +258,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/jsonarray2netcdf", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -313,7 +313,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/las2tif", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -332,7 +332,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/metalink2netcdf", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -352,7 +352,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/python-script", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -370,7 +370,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/sleep", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -397,7 +397,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/test_blurring", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -424,7 +424,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/test_generation", "processEndpointWPS1": "http://localhost:4002/ows/wps", @@ -443,7 +443,7 @@ "async-execute" ], "outputTransmission": [ - "reference" + "value" ], "processDescriptionURL": "http://localhost:4002/processes/test_workflow", "processEndpointWPS1": "http://localhost:4002/ows/wps", diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index a0c89e43a..86649a0a6 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -1411,8 +1411,8 @@ class JobResponseOptionsEnum(ExtendedSchemaNode): class TransmissionModeEnum(ExtendedSchemaNode): schema_type = String title = "TransmissionMode" - default = ExecuteTransmissionMode.REFERENCE - example = ExecuteTransmissionMode.REFERENCE + default = ExecuteTransmissionMode.VALUE + example = ExecuteTransmissionMode.VALUE validator = OneOf(ExecuteTransmissionMode.values()) @@ -2509,8 +2509,8 @@ class ExceptionReportType(ExtendedMappingSchema): class ProcessControl(ExtendedMappingSchema): jobControlOptions = JobControlOptionsList(missing=[ExecuteControlOption.ASYNC], default=[ExecuteControlOption.ASYNC]) - outputTransmission = TransmissionModeList(missing=[ExecuteTransmissionMode.REFERENCE], - default=[ExecuteTransmissionMode.REFERENCE]) + outputTransmission = TransmissionModeList(missing=[ExecuteTransmissionMode.VALUE], + default=[ExecuteTransmissionMode.VALUE]) class ProcessLocations(ExtendedMappingSchema): From b51bbfeff766394fd5bfc7fd65051d486d4286b0 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Thu, 17 Mar 2022 00:59:41 -0400 Subject: [PATCH 13/34] implement Link header creation for transmissionMode=reference (relates to #377) --- CHANGES.rst | 35 +++-- tests/functional/test_builtin.py | 4 +- tests/wps_restapi/test_jobs.py | 86 +++++++++++ weaver/cli.py | 43 +++++- weaver/datatype.py | 30 ++-- weaver/processes/convert.py | 46 +++++- weaver/store/base.py | 14 +- weaver/store/mongodb.py | 5 +- weaver/typedefs.py | 12 ++ weaver/wps_restapi/colander_extras.py | 49 +++++-- weaver/wps_restapi/constants.py | 12 +- weaver/wps_restapi/jobs/jobs.py | 166 +++++++++++++++++----- weaver/wps_restapi/swagger_definitions.py | 61 ++++++-- 13 files changed, 470 insertions(+), 93 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 7577d240f..eafd68f9f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,6 +10,16 @@ Changes `Unreleased `_ (latest) ======================================================================== +Important: +---------- +- In order to support *synchronous* execution, setting ``RESULT_BACKEND`` **MUST** be specified in + the ``weaver.ini`` configuration file. + See `Weaver INI Configuration Example `_ + in section ``[celery]`` for more details. +- With resolution and added support of ``transmissionMode`` handling according to `OGC API - Processes` specification, + requests that where submitted with ``reference`` outputs will produce results in a different format than previously + since this parameter was ignored and always returned ``value`` representation. + Changes: -------- - Support ``Prefer`` header with ``wait`` or ``respond-async`` directives to select ``Job`` execution mode either @@ -20,6 +30,17 @@ Changes: maximum duration that can be specified to wait for a `synchronous` response from task workers. - Add ``-B`` (``celery beat``) option to Docker command of ``weaver-worker`` to run scheduled task in parallel to ``celery worker`` in order to periodically cleanup task results introduced by *synchronous* execution. +- Add support of ``transmissionMode`` handling as ``reference`` to generate HTTP ``Link`` references for results + requested this way (resolves `#377 `_). +- Updated every ``Process`` to report that they support ``outputTransmission`` both as ``reference`` and ``value``, + since handling of results is accomplished by `Weaver` itself, regardless of the application being executed. +- Add `CLI` option ``-R/--ref/--reference`` for ``execute`` operation allowing to request corresponding ``outputs`` + by ID to be returned using the ``transmissionMode: reference`` method, producing HTTP ``Link`` headers for those + entries rather than inserting values in the response content body. +- Add requested ``outputs`` into response of ``GET /jobs/{jobId}/inputs`` to obtain submitted ``Job`` definitions. +- Add query parameter ``schema`` for ``GET /jobs/{jobId}/inputs`` (and corresponding endpoints under ``/processes`` + and ``/providers``) allowing to retrieve submitted input values and requested outputs with either ``OGC``/``OLD`` + formats. - Improve conformance for returned status codes and error messages when requesting results for an unfinished, failed, or dismissed ``Job``. - Adjust conformance item references to correspond with `OGC API - Processes: Part 2` renamed from `Transactions` to @@ -28,19 +49,15 @@ Changes: (resolves `#180 `_). - Improve ``Process`` undeployment to consider running ``Job`` to block its removal while in use. -Important Note --------------- -- In order to support *synchronous* execution, setting ``RESULT_BACKEND`` **MUST** be specified in - the ``weaver.ini`` configuration file. - See `Weaver INI Configuration Example `_ - in section ``[celery]`` for more details. - Fixes: ------ - Fix ``outputs`` permitted to be completely omitted from the execution request (resolves `#375 `_). -- Fix all instances of ``outputTransmission`` reported as ``reference`` although `Weaver` behaves with the ``value`` - method, which is to return values and file references in content body, instead of HTTP header ``Link`` references. +- Fix ``outputs`` permitted as explicit empty mapping or list as equivalent to omitting them, defining by default + that all ``outputs`` should be returned with ``transmissionMode: value`` for ``Job`` execution. +- Fix all instances of ``outputTransmission`` reported as ``reference`` in ``Process`` descriptions, although `Weaver` + behaved with the ``value`` method, which is to return values and file references in content body, instead of + HTTP ``Link`` header references. .. _changes_4.14.0: diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py index 7c3bbc299..4230a9485 100644 --- a/tests/functional/test_builtin.py +++ b/tests/functional/test_builtin.py @@ -60,7 +60,7 @@ def test_jsonarray2netcdf_describe_old_schema(self): assert len(body["process"]["outputs"][0]["formats"]) == 1 assert body["process"]["outputs"][0]["formats"][0]["mediaType"] == ContentType.APP_NETCDF assert body["jobControlOptions"] == [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] - assert body["outputTransmission"] == [ExecuteTransmissionMode.VALUE] + assert body["outputTransmission"] == [ExecuteTransmissionMode.REFERENCE, ExecuteTransmissionMode.VALUE] def test_jsonarray2netcdf_describe_ogc_schema(self): resp = self.app.get("/processes/jsonarray2netcdf", headers=self.json_headers) @@ -84,7 +84,7 @@ def test_jsonarray2netcdf_describe_ogc_schema(self): assert len(body["outputs"]["output"]["formats"]) == 1 assert body["outputs"]["output"]["formats"][0]["mediaType"] == ContentType.APP_NETCDF assert body["jobControlOptions"] == [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] - assert body["outputTransmission"] == [ExecuteTransmissionMode.VALUE] + assert body["outputTransmission"] == [ExecuteTransmissionMode.REFERENCE, ExecuteTransmissionMode.VALUE] def test_jsonarray2netcdf_execute_async(self): dirname = tempfile.gettempdir() diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index 1c9f7263f..01beb7aac 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -1,3 +1,4 @@ +import colander import contextlib import datetime import json @@ -1366,3 +1367,88 @@ def test_job_results_errors(self): assert resp.json["cause"] == cause assert resp.json["type"].endswith(error_type) # ignore http full reference, not always there assert "links" in resp.json + + def test_jobs_inputs_outputs_validations(self): + """ + Ensure that inputs/outputs submitted or returned can be represented and validated across various formats. + """ + default_trans_mode = {"transmissionMode": ExecuteTransmissionMode.VALUE} + + job_none = sd.Execute().deserialize({}) + assert job_none == { + "inputs": {}, + "outputs": {}, + "mode": ExecuteMode.AUTO, + "response": ExecuteResponse.DOCUMENT + } + + job_in_none = sd.Execute().deserialize({"outputs": {"random": default_trans_mode}}) + assert job_in_none == { + "inputs": {}, + "outputs": {"random": default_trans_mode}, + "mode": ExecuteMode.AUTO, + "response": ExecuteResponse.DOCUMENT + } + + job_in_empty_dict = sd.Execute().deserialize({"inputs": {}, "outputs": {"random": default_trans_mode}}) + assert job_in_empty_dict == { + "inputs": {}, + "outputs": {"random": default_trans_mode}, + "mode": ExecuteMode.AUTO, + "response": ExecuteResponse.DOCUMENT + } + + job_in_empty_list = sd.Execute().deserialize({"inputs": [], "outputs": {"random": default_trans_mode}}) + assert job_in_empty_list == { + "inputs": [], + "outputs": {"random": default_trans_mode}, + "mode": ExecuteMode.AUTO, + "response": ExecuteResponse.DOCUMENT + } + + job_out_none = sd.Execute().deserialize({"inputs": {"random": "ok"}}) + assert job_out_none == { + "inputs": {"random": "ok"}, + "outputs": {}, + "mode": ExecuteMode.AUTO, + "response": ExecuteResponse.DOCUMENT + } + + job_out_empty_dict = sd.Execute().deserialize({"inputs": {"random": "ok"}, "outputs": {}}) + assert job_out_empty_dict == { + "inputs": {"random": "ok"}, + "outputs": {}, + "mode": ExecuteMode.AUTO, + "response": ExecuteResponse.DOCUMENT + } + + job_out_empty_list = sd.Execute().deserialize({"inputs": {"random": "ok"}, "outputs": []}) + assert job_out_empty_list == { + "inputs": {"random": "ok"}, + "outputs": [], + "mode": ExecuteMode.AUTO, + "response": ExecuteResponse.DOCUMENT + } + + job_out_defined = sd.Execute().deserialize({ + "inputs": {"random": "ok"}, + "outputs": {"random": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}} + }) + assert job_out_defined == { + "inputs": {"random": "ok"}, + "outputs": {"random": {"transmissionMode": ExecuteTransmissionMode.REFERENCE}}, + "mode": ExecuteMode.AUTO, + "response": ExecuteResponse.DOCUMENT + } + + with self.assertRaises(colander.Invalid): + sd.Execute().deserialize({"inputs": "value"}) + + with self.assertRaises(colander.Invalid): + sd.Execute().deserialize({"outputs": "value"}) + + with self.assertRaises(colander.Invalid): + sd.Execute().deserialize({"outputs": {"random": "value"}}) + + with self.assertRaises(colander.Invalid): + sd.Execute().deserialize({"outputs": {"random": {"transmissionMode": "bad"}}}) diff --git a/weaver/cli.py b/weaver/cli.py index 9d834c060..ce170ac48 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -42,7 +42,7 @@ from weaver.wps_restapi import swagger_definitions as sd if TYPE_CHECKING: - from typing import Any, Dict, Optional, Tuple, Union + from typing import Any, Dict, Iterable, Optional, Tuple, Union from requests import Response @@ -558,6 +558,7 @@ def execute(self, show_links=True, # type: bool show_headers=False, # type: bool output_format=None, # type: Optional[AnyOutputFormat] + output_refs=None, # type: Optional[Iterable[str]] ): # type: (...) -> OperationResult """ Execute a :term:`Job` for the specified :term:`Process` with provided inputs. @@ -594,6 +595,12 @@ def execute(self, :param show_links: Indicate if ``links`` section should be preserved in returned result body. :param show_headers: Indicate if response headers should be returned in result output. :param output_format: Select an alternate output representation of the result body contents. + :param output_refs: + Indicates which outputs by ID to be returned as HTTP Link header reference instead of body content value. + With reference transmission mode, outputs that contain literal data will be linked by ``text/plain`` file + containing the data. outputs that refer to a file reference will simply contain that URL reference as link. + With value transmission mode (default behavior when outputs are not specified in this list), outputs are + returned as direct values (literal or href) within the response content body. :returns: Results of the operation. """ if isinstance(inputs, list) and all(isinstance(item, list) for item in inputs): @@ -612,8 +619,7 @@ def execute(self, "inputs": values, # FIXME: support 'response: raw' (https://github.com/crim-ca/weaver/issues/376) "response": ExecuteResponse.DOCUMENT, - # FIXME: allow omitting 'outputs' (https://github.com/crim-ca/weaver/issues/375) - # FIXME: allow 'transmissionMode: value/reference' selection (https://github.com/crim-ca/weaver/issues/377) + # FIXME: allow filtering 'outputs' (https://github.com/crim-ca/weaver/issues/380) "outputs": {} } result = self.describe(process_id, url=base) @@ -621,9 +627,11 @@ def execute(self, return OperationResult(False, "Could not obtain process description for execution.", body=result.body, headers=result.headers, code=result.code, text=result.text) outputs = result.body.get("outputs") + output_refs = set(output_refs or []) for output_id in outputs: # use 'value' to have all outputs reported in body as 'value/href' rather than 'Link' headers - data["outputs"][output_id] = {"transmissionMode": ExecuteTransmissionMode.VALUE} + out_mode = ExecuteTransmissionMode.REFERENCE if output_id in output_refs else ExecuteTransmissionMode.VALUE + data["outputs"][output_id] = {"transmissionMode": out_mode} LOGGER.info("Executing [%s] with inputs:\n%s", process_id, OutputFormat.convert(values, OutputFormat.JSON_STR)) path = f"{base}/processes/{process_id}/execution" # use OGC-API compliant endpoint (not '/jobs') @@ -841,6 +849,8 @@ def results(self, resp = request_extra("GET", result_url, headers=self._headers, settings=self._settings) res_out = self._parse_result(resp, output_format=output_format, show_links=show_links, show_headers=show_headers) + + # FIXME: consider results that were requested with transmissionMOde="reference" (for check+download) outputs = res_out.body if not res_out.success or not isinstance(res_out.body, dict): return OperationResult(False, "Could not retrieve any output results from job.", outputs) @@ -1216,6 +1226,29 @@ def make_parser(): Example: ``-I message='Hello Weaver' -I value:int=1234`` """) ) + # FIXME: allow filtering 'outputs' (https://github.com/crim-ca/weaver/issues/380) + # Only specified ones are returned, if none specified, return all. + # op_execute.add_argument( + # "-O", "--output", + op_execute.add_argument( + "-R", "--ref", "--reference", metavar="REFERENCE", dest="output_refs", nargs=1, action="append", + help=inspect.cleandoc(""" + Indicates which outputs by ID to be returned as HTTP Link header reference instead of body content value. + This defines the output transmission mode when submitting the execution request. + + With reference transmission mode, + outputs that contain literal data will be linked by ``text/plain`` file containing the data. + Outputs that refer to a file reference will simply contain that URL reference as link. + + With value transmission mode (default behavior when outputs are not specified in this list), outputs are + returned as direct values (literal or href) within the response content body. + + When requesting any output to be returned by reference, option ``-H/--headers`` should be considered as + well to return the provided ``Link`` headers for these outputs on the command line. + + Example: ``-R output-one -R output-two`` + """) + ) op_execute.add_argument( "-M", "--monitor", dest="monitor", action="store_true", help="Automatically perform the monitoring operation following job submission to retrieve final results. " @@ -1288,7 +1321,7 @@ def make_parser(): op_results = WeaverArgumentParser( "results", description=( - "Obtain the output results description of a job. " + "Obtain the output results from a job successfully executed. " "This operation can also download them from the remote server if requested." ), formatter_class=ParagraphFormatter, diff --git a/weaver/datatype.py b/weaver/datatype.py index b7c5fe7c0..c3827b0c5 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -67,6 +67,8 @@ AnyProcess, AnySettingsContainer, AnyUUID, + ExecutionInputs, + ExecutionOutputs, Number, CWL, JSON, @@ -756,19 +758,29 @@ def type(self): return "provider" def _get_inputs(self): - # type: () -> List[Optional[Dict[str, JSON]]] + # type: () -> Optional[ExecutionInputs] if self.get("inputs") is None: - self["inputs"] = list() + return {} return dict.__getitem__(self, "inputs") def _set_inputs(self, inputs): - # type: (List[Optional[Dict[str, JSON]]]) -> None - if not isinstance(inputs, list): - raise TypeError(f"Type 'list' is required for '{self.__name__}.inputs'") + # type: (Optional[ExecutionInputs]) -> None self["inputs"] = inputs # allows to correctly update list by ref using 'job.inputs.extend()' - inputs = property(_get_inputs, _set_inputs) + inputs = property(_get_inputs, _set_inputs, doc="Input values and reference submitted for execution.") + + def _get_outputs(self): + # type: () -> Optional[ExecutionOutputs] + if self.get("outputs") is None: + return {} + return dict.__getitem__(self, "outputs") + + def _set_outputs(self, outputs): + # type: (Optional[ExecutionOutputs]) -> None + self["outputs"] = outputs + + outputs = property(_get_outputs, _set_outputs, doc="Output transmission modes submitted for execution.") @property def user_id(self): @@ -969,7 +981,7 @@ def _set_results(self, results): self["results"] = results # allows to correctly update list by ref using 'job.results.extend()' - results = property(_get_results, _set_results) + results = property(_get_results, _set_results, doc="Output values and references that resulted from execution.") def _get_exceptions(self): # type: () -> List[Union[str, Dict[str, str]]] @@ -1811,13 +1823,13 @@ def jobControlOptions(self): # noqa: N802 @property def outputTransmission(self): # noqa: N802 # type: () -> List[AnyExecuteTransmissionMode] - out = self.setdefault("outputTransmission", [ExecuteTransmissionMode.VALUE]) + out = self.setdefault("outputTransmission", ExecuteTransmissionMode.values()) if not isinstance(out, list): # eg: None, bw-compat out = [ExecuteTransmissionMode.VALUE] out = [ExecuteTransmissionMode.get(mode) for mode in out] out = [mode for mode in out if mode is not None] if len(out) == 0: - out.append(ExecuteTransmissionMode.VALUE) + out.extend(ExecuteTransmissionMode.values()) self["outputTransmission"] = list(sorted(out)) return dict.__getitem__(self, "outputTransmission") diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index 66f9950b1..6f2e6e6f5 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -82,9 +82,11 @@ CWL_Output_Type, ExecutionInputs, ExecutionInputsList, + ExecutionOutputs, JobValueFile, JSON ) + from weaver.wps_restapi.constants import JobInputsOutputsSchemaType # typing shortcuts # pylint: disable=C0103,invalid-name @@ -1018,7 +1020,7 @@ def _get_file_input(input_data): def convert_input_values_schema(inputs, schema): - # type: (ExecutionInputs, ProcessSchemaType) -> ExecutionInputs + # type: (ExecutionInputs, JobInputsOutputsSchemaType) -> ExecutionInputs """ Convert execution input values between equivalent formats. @@ -1026,6 +1028,8 @@ def convert_input_values_schema(inputs, schema): :param schema: Desired schema. :return: Converted inputs. """ + if isinstance(schema, str): + schema = schema.upper() if ( (schema == ProcessSchema.OGC and isinstance(inputs, dict)) or (schema == ProcessSchema.OLD and isinstance(inputs, list)) @@ -1078,6 +1082,46 @@ def convert_input_values_schema(inputs, schema): raise NotImplementedError(f"Unknown conversion format of input values for schema: [{schema}]") +def convert_output_params_schema(outputs, schema): + # type: (ExecutionOutputs, JobInputsOutputsSchemaType) -> ExecutionOutputs + """ + Convert execution output parameters between equivalent formats. + + .. warning:: + These outputs are not *values* (i.e.: *results*), but *submitted* :term:`Job` outputs for return definitions. + Contents are transferred as-is without any consideration of ``value`` or ``href`` fields. + + :param outputs: Outputs to convert. + :param schema: Desired schema. + :return: Converted outputs. + """ + if isinstance(schema, str): + schema = schema.upper() + if ( + (schema == ProcessSchema.OGC and isinstance(outputs, dict)) or + (schema == ProcessSchema.OLD and isinstance(outputs, list)) + ): + return outputs + if ( + (schema == ProcessSchema.OGC and not isinstance(outputs, list)) or + (schema == ProcessSchema.OLD and not isinstance(outputs, dict)) + ): + name = fully_qualified_name(outputs) + raise ValueError(f"Unknown conversion method to schema [{schema}] for outputs of type [{name}]: {outputs}") + if schema == ProcessSchema.OGC: + out_dict = {} + for out in outputs: + out_id = get_any_id(out, pop=True) + out_dict[out_id] = out + return out_dict + if schema == ProcessSchema.OLD: + out_list = [{"id": out} for out in outputs] + for out in out_list: + out.update(outputs[out["id"]]) + return out_list + raise NotImplementedError(f"Unknown conversion format of outputs definitions for schema: [{schema}]") + + def repr2json_input_values(inputs): # type: (List[str]) -> ExecutionInputsList """ diff --git a/weaver/store/base.py b/weaver/store/base.py index 49f96a621..5634172e2 100644 --- a/weaver/store/base.py +++ b/weaver/store/base.py @@ -3,13 +3,20 @@ if TYPE_CHECKING: import datetime - from typing import Any, Dict, List, Optional, Tuple, Union + from typing import Dict, List, Optional, Tuple, Union from pyramid.request import Request from pywps import Process as ProcessWPS from weaver.datatype import Bill, Job, Process, Quote, Service, VaultFile - from weaver.typedefs import AnyUUID, DatetimeIntervalType, SettingsType, TypedDict + from weaver.typedefs import ( + AnyUUID, + ExecutionInputs, + ExecutionOutputs, + DatetimeIntervalType, + SettingsType, + TypedDict + ) JobGroupCategory = TypedDict("JobGroupCategory", {"category": Dict[str, Optional[str]], "count": int, "jobs": List[Job]}) @@ -113,7 +120,8 @@ def save_job(self, task_id, # type: str process, # type: str service=None, # type: Optional[str] - inputs=None, # type: Optional[List[Any]] + inputs=None, # type: Optional[ExecutionInputs] + outputs=None, # type: Optional[ExecutionOutputs] is_workflow=False, # type: bool is_local=False, # type: bool execute_async=True, # type: bool diff --git a/weaver/store/mongodb.py b/weaver/store/mongodb.py index 903ad2c0a..052c41398 100644 --- a/weaver/store/mongodb.py +++ b/weaver/store/mongodb.py @@ -54,7 +54,7 @@ from weaver.processes.types import AnyProcessType from weaver.store.base import DatetimeIntervalType, JobGroupCategory, JobSearchResult - from weaver.typedefs import AnyProcess, AnyProcessClass, AnyUUID, AnyValueType + from weaver.typedefs import AnyProcess, AnyProcessClass, AnyUUID, AnyValueType, ExecutionInputs, ExecutionOutputs from weaver.visibility import AnyVisibility MongodbValue = Union[AnyValueType, datetime.datetime] @@ -572,7 +572,8 @@ def save_job(self, task_id, # type: AnyUUID process, # type: str service=None, # type: Optional[str] - inputs=None, # type: Optional[List[Any]] + inputs=None, # type: Optional[ExecutionInputs] + outputs=None, # type: Optional[ExecutionOutputs] is_workflow=False, # type: bool is_local=False, # type: bool execute_async=True, # type: bool diff --git a/weaver/typedefs.py b/weaver/typedefs.py index 25293eadf..49d25862e 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -42,6 +42,7 @@ from webtest.response import TestResponse from werkzeug.wrappers import Request as WerkzeugRequest + from weaver.execute import AnyExecuteTransmissionMode from weaver.processes.wps_process_base import WpsProcessInterface from weaver.datatype import Process from weaver.status import AnyStatusType @@ -291,6 +292,17 @@ def __call__(self, message: str, progress: Number, status: AnyStatusType, *args: ExecutionInputsList = List[JobValueItem] # when schema='weaver.processes.constants.ProcessSchema.OLD' ExecutionInputs = Union[ExecutionInputsList, ExecutionInputsMap] + ExecutionOutputObject = TypedDict("ExecutionOutputObject", { + "transmissionMode": str + }, total=False) + ExecutionOutputItem = TypedDict("ExecutionOutputItem", { + "id": str, + "transmissionMode": str + }, total=False) + ExecutionOutputsList = List[ExecutionOutputItem] + ExecutionOutputsMap = Dict[str, ExecutionOutputObject] + ExecutionOutputs = Union[ExecutionOutputsList, ExecutionOutputsMap] + # reference employed as 'JobMonitorReference' by 'WPS1Process' JobExecution = TypedDict("JobExecution", {"execution": WPSExecution}) diff --git a/weaver/wps_restapi/colander_extras.py b/weaver/wps_restapi/colander_extras.py index 7306e72fb..301f96760 100644 --- a/weaver/wps_restapi/colander_extras.py +++ b/weaver/wps_restapi/colander_extras.py @@ -1152,17 +1152,53 @@ def _validate_nodes(self): ExtendedSchemaBase._validate(node) +class StrictMappingSchema(ExtendedMappingSchema): + """ + Object schema that will ``raise`` any unknown field not represented by children schema. + + This is equivalent to `OpenAPI` object mapping with ``additionalProperties: false``. + This type is useful for defining a dictionary that matches *exactly* a specific set of values and children schema. + + ..note:: + When doing schema deserialization to validate it, unknown keys would normally be removed without this class + (default behaviour is to ``ignore`` them). With this schema, content under an unknown key is fails validation. + + .. seealso:: + :class:`PermissiveMappingSchema` + """ + def __init__(self, *args, **kwargs): + kwargs["unknown"] = "raise" + super(StrictMappingSchema, self).__init__(*args, **kwargs) + # sub-type mapping itself must also have 'raise' such that its own 'deserialize' copies the fields over + self.typ.unknown = "raise" + + +class EmptyMappingSchema(StrictMappingSchema): + """ + Mapping that guarantees it is completely empty for validation during deserialization. + + Any children added to this schema are removed automatically. + """ + def __init__(self, *args, **kwargs): + super(EmptyMappingSchema, self).__init__(*args, **kwargs) + self.children = [] + + class PermissiveMappingSchema(ExtendedMappingSchema): """ - Object schema that will allow *any unknown* field to remain present in the resulting deserialization. + Object schema that will ``preserve`` any unknown field to remain present in the resulting deserialization. This type is useful for defining a dictionary where some field names are not known in advance, or when more optional keys that don't need to all be exhaustively provided in the schema are acceptable. - When doing schema deserialization to validate it, unknown keys would normally be removed without this class - (default behaviour is to ``ignore`` them). With this schema, content under an unknown key is ``preserved`` - as it was received without any validation. Other fields that are explicitly specified with sub-schema nodes - will still be validated as per usual behaviour. + ..note:: + When doing schema deserialization to validate it, unknown keys would normally be removed without this class + (default behaviour is to ``ignore`` them). With this schema, content under an unknown key using ``preserve`` + are passed down without any validation. Other fields that are explicitly specified with sub-schema nodes + will still be validated as per usual behaviour. + + .. seealso:: + :class:`StrictMappingSchema` Example:: @@ -1785,9 +1821,6 @@ class NotKeywordSchema(KeywordMapper): Corresponds to the ``not`` specifier of `OpenAPI` specification. - This is equivalent to `OpenAPI` object mapping with ``additionalProperties: false``, but is more explicit in - the definition of invalid or conflicting field names with explicit definitions during deserialization. - Example:: class RequiredItem(ExtendedMappingSchema): diff --git a/weaver/wps_restapi/constants.py b/weaver/wps_restapi/constants.py index 7c99527f3..71089599a 100644 --- a/weaver/wps_restapi/constants.py +++ b/weaver/wps_restapi/constants.py @@ -3,7 +3,7 @@ from weaver.base import Constants -class JobOutputsSchema(Constants): +class JobInputsOutputsSchema(Constants): """ Schema selector to represent a :term:`Job` output results. """ @@ -16,9 +16,9 @@ class JobOutputsSchema(Constants): if TYPE_CHECKING: from weaver.typedefs import Literal - JobOutputsSchemaType = Literal[ - JobOutputsSchema.OGC_STRICT, - JobOutputsSchema.OLD_STRICT, - JobOutputsSchema.OGC, - JobOutputsSchema.OLD + JobInputsOutputsSchemaType = Literal[ + JobInputsOutputsSchema.OGC_STRICT, + JobInputsOutputsSchema.OLD_STRICT, + JobInputsOutputsSchema.OGC, + JobInputsOutputsSchema.OLD ] diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index d9392c935..c92767317 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -31,16 +31,22 @@ ServiceNotFound, log_unhandled_exceptions ) +from weaver.execute import ExecuteTransmissionMode from weaver.formats import ContentType, OutputFormat, get_format, repr_json from weaver.owsexceptions import OWSNoApplicableCode, OWSNotFound -from weaver.processes.convert import any2wps_literal_datatype +from weaver.processes.convert import ( + any2wps_literal_datatype, + convert_input_values_schema, + convert_output_params_schema, + get_field +) from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory, map_status from weaver.store.base import StoreJobs, StoreProcesses, StoreServices from weaver.utils import get_any_id, get_any_value, get_path_kvp, get_settings, get_weaver_url, is_uuid from weaver.visibility import Visibility from weaver.wps.utils import get_wps_output_dir, get_wps_output_url from weaver.wps_restapi import swagger_definitions as sd -from weaver.wps_restapi.constants import JobOutputsSchema +from weaver.wps_restapi.constants import JobInputsOutputsSchema from weaver.wps_restapi.providers.utils import forbid_local_only from weaver.wps_restapi.swagger_definitions import datetime_interval_parser @@ -49,8 +55,8 @@ from pyramid.httpexceptions import HTTPException - from weaver.typedefs import AnySettingsContainer, AnyValueType, JSON - from weaver.wps_restapi.constants import JobOutputsSchemaType + from weaver.typedefs import AnySettingsContainer, AnyUUID, AnyValueType, HeadersTupleType, JSON, SettingsType + from weaver.wps_restapi.constants import JobInputsOutputsSchemaType LOGGER = get_task_logger(__name__) @@ -214,8 +220,66 @@ def get_job_list_links(job_total, filters, request): return links -def get_results(job, container, value_key=None, schema=JobOutputsSchema.OLD): - # type: (Job, AnySettingsContainer, Optional[str], JobOutputsSchemaType) -> Union[List[JSON], JSON] +def get_schema_query(schema, strict=True): + # type: (Optional[JobInputsOutputsSchemaType], bool) -> Optional[JobInputsOutputsSchemaType] + if not schema: + return None + # unescape query (eg: "OGC+strict" becomes "OGC string" from URL parsing) + schema_checked = str(schema).replace(" ", "+").lower() + if JobInputsOutputsSchema.get(schema_checked) is None: + raise HTTPBadRequest(json={ + "type": "InvalidParameterValue", + "detail": "Query parameter 'schema' value is invalid.", + "status": HTTPBadRequest.code, + "locator": "query", + "value": str(schema), + }) + if not strict: + return schema_checked.split("+")[0] + return schema_checked + + +def make_result_link(result_id, result, job_id, settings): + # type: (str, Union[JSON, List[JSON]], AnyUUID, SettingsType) -> List[str] + """ + Convert a result definition as ``value`` into the corresponding ``reference`` for output transmission. + + .. seealso:: + :rfc:`8288`: HTTP ``Link`` header specification. + """ + values = result if isinstance(result, list) else [result] + suffixes = list(f".{idx}" for idx in range(len(values))) if isinstance(result, list) else [""] + wps_url = get_wps_output_url(settings).strip("/") + links = [] + for suffix, value in zip(suffixes, values): + key = get_any_value(result, key=True) + if key != "href": + # literal data to be converted to link + # plain text file must be created containing the raw literal data + typ = ContentType.TEXT_PLAIN # as per '/rec/core/process-execute-sync-document-ref' + enc = "UTF-8" + out = get_wps_output_dir(settings) + val = get_any_value(value, data=True, file=False) + loc = os.path.join(job_id, result_id + suffix + ".txt") + url = f"{wps_url}/{loc}" + path = os.path.join(out, loc) + with open(path, mode="w", encoding=enc) as out_file: + out_file.write(val) + else: + fmt = get_field(result, "format", default={"mediaType": ContentType.TEXT_PLAIN}) + typ = get_field(fmt, "mime_type", search_variations=True, default=ContentType.TEXT_PLAIN) + enc = get_field(fmt, "encoding", search_variations=True, default=None) + url = get_any_value(value, data=False, file=True) # should already include full path + links.append(f"<{url}>; rel=\"{result_id}{suffix}\"; type={typ}; charset={enc}") + return links + + +def get_results(job, # type: Job + container, # type: AnySettingsContainer + value_key=None, # type: Optional[str] + schema=JobInputsOutputsSchema.OLD, # type: JobInputsOutputsSchemaType + link_references=False, # type: bool + ): # type: (...) -> Tuple[Union[List[JSON], JSON], HeadersTupleType] """ Obtains the job results with extended full WPS output URL as applicable and according to configuration settings. @@ -225,23 +289,33 @@ def get_results(job, container, value_key=None, schema=JobOutputsSchema.OLD): If not specified, the returned values will have the appropriate ``data``/``href`` key according to the content. Otherwise, all values will have the specified key. :param schema: - Selects which schema to employ for representing the output results. - :returns: list of all outputs each with minimally an ID and value under the requested key. - """ - wps_url = get_wps_output_url(container) + Selects which schema to employ for representing the output results (listing or mapping). + :param link_references: + If enabled, an output that was requested by reference instead of value will be returned as ``Link`` reference. + :returns: + Tuple with: + - List or mapping of all outputs each with minimally an ID and value under the requested key. + - List of ``Link`` headers for reference outputs when requested. Empty otherwise. + """ + settings = get_settings(container) + wps_url = get_wps_output_url(settings) if not wps_url.endswith("/"): wps_url = wps_url + "/" - schema = JobOutputsSchema.get(str(schema).lower(), default=JobOutputsSchema.OLD) + schema = JobInputsOutputsSchema.get(str(schema).lower(), default=JobInputsOutputsSchema.OLD) strict = schema.endswith("+strict") schema = schema.split("+")[0] - ogc_api = schema == JobOutputsSchema.OGC + ogc_api = schema == JobInputsOutputsSchema.OGC outputs = {} if ogc_api else [] fmt_key = "mediaType" if ogc_api else "mimeType" + out_ref = convert_output_params_schema(job.outputs, JobInputsOutputsSchema.OGC) if link_references else {} + references = {} for result in job.results: rtype = "data" if any(k in result for k in ["data", "value"]) else "href" value = get_any_value(result) - out_id = get_any_id(result) out_key = rtype + out_id = get_any_id(result) + out_mode = out_ref.get(out_id, {}).get("transmissionMode") + as_ref = link_references and out_mode == ExecuteTransmissionMode.REFERENCE if rtype == "href": # fix paths relative to instance endpoint, but leave explicit links as is (eg: S3 bucket, remote HTTP, etc.) if value.startswith("/"): @@ -258,11 +332,11 @@ def get_results(job, container, value_key=None, schema=JobOutputsSchema.OLD): result["mimeType"] = get_format(value, default=ContentType.TEXT_PLAIN).mime_type if ogc_api or not strict: output["type"] = result["mimeType"] - if not ogc_api or not strict: + if not ogc_api or not strict or as_ref: output["format"] = {fmt_key: result["mimeType"]} - for field in ["encoding", "schema"]: - if field in result: - output["format"][field] = result[field] + for field in ["encoding", "schema"]: + if field in result: + output["format"][field] = result[field] elif rtype != "href": # literal data # FIXME: BoundingBox not implemented (https://github.com/crim-ca/weaver/issues/51) @@ -272,20 +346,29 @@ def get_results(job, container, value_key=None, schema=JobOutputsSchema.OLD): else: output["dataType"] = dtype - if ogc_api: - if out_id in outputs: - output_list = outputs[out_id] + if ogc_api or as_ref: + mapping = references if as_ref else outputs + if out_id in mapping: + output_list = mapping[out_id] if not isinstance(output_list, list): output_list = [output_list] output_list.append(output) - outputs[out_id] = output_list + mapping[out_id] = output_list else: - outputs[out_id] = output + mapping[out_id] = output else: # if ordered insert supported by python version, insert ID first output = dict([("id", out_id)] + list(output.items())) # noqa outputs.append(output) - return outputs + + # needed to collect and aggregate outputs of same ID first in case of array + # convert any requested link references using indices if needed + headers = [] + for out_id, output in references.items(): + res_links = make_result_link(out_id, output, job.id, settings) + headers.extend([("Link", link) for link in res_links]) + + return outputs, headers def validate_service_process(request): @@ -654,13 +737,19 @@ def cancel_job_batch(request): def get_job_inputs(request): # type: (Request) -> HTTPException """ - Retrieve the inputs of a job. + Retrieve the inputs values and outputs definitions of a job. """ job = get_job(request) - inputs = {"inputs": job.inputs} - inputs.update({"links": job.links(request, self_link="inputs")}) - inputs = sd.JobInputsBody().deserialize(inputs) - return HTTPOk(json=inputs) + schema = get_schema_query(request.params.get("schema"), strict=False) + job_inputs = job.inputs + job_outputs = job.outputs + if schema: + job_inputs = convert_input_values_schema(job_inputs, schema) + job_outputs = convert_output_params_schema(job_outputs, schema) + body = {"inputs": job_inputs, "outputs": job_outputs} + body.update({"links": job.links(request, self_link="inputs")}) + body = sd.JobInputsBody().deserialize(body) + return HTTPOk(json=body) @sd.provider_outputs_service.get(tags=[sd.TAG_JOBS, sd.TAG_RESULTS, sd.TAG_PROCESSES], renderer=OutputFormat.JSON, @@ -673,13 +762,14 @@ def get_job_inputs(request): def get_job_outputs(request): # type: (Request) -> HTTPException """ - Retrieve the outputs of a job. + Retrieve the output values resulting from a job execution. """ job = get_job(request) raise_job_dismissed(job, request) raise_job_bad_status(job, request) - schema = request.params.get("schema") - outputs = {"outputs": get_results(job, request, schema=str(schema).replace(" ", "+"))} # unescape query + schema = get_schema_query(request.params.get("schema")) + results, _ = get_results(job, request, schema=schema, link_references=False) + outputs = {"outputs": results} outputs.update({"links": job.links(request, self_link="outputs")}) outputs = sd.JobOutputsBody().deserialize(outputs) return HTTPOk(json=outputs) @@ -706,10 +796,16 @@ def get_job_results(request): "code": "ResultsNotReady", "description": "Job status is '{}'. Results are not yet available.".format(job_status) }) - results = get_results(job, request, value_key="value", schema=JobOutputsSchema.OGC) - # note: cannot add links in this case because variable OutputID keys are directly at the root - results = sd.Result().deserialize(results) - return HTTPOk(json=results) + + results, refs = get_results(job, request, value_key="value", + schema=JobInputsOutputsSchema.OGC, link_references=True) + # note: + # Cannot add "links" field in response body because variable Output ID keys are directly at the root + # Possible conflict with an output that would be named "links". + + if results: # avoid error if all by reference + results = sd.Result().deserialize(results) + return HTTPOk(json=results, headers=refs) @sd.provider_exceptions_service.get(tags=[sd.TAG_JOBS, sd.TAG_EXCEPTIONS, sd.TAG_PROVIDERS], diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 86649a0a6..0290642e5 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -50,6 +50,7 @@ from weaver.wps_restapi.colander_extras import ( AllOfKeywordSchema, AnyOfKeywordSchema, + EmptyMappingSchema, ExtendedBoolean as Boolean, ExtendedFloat as Float, ExtendedInteger as Integer, @@ -66,7 +67,7 @@ StringRange, XMLObject ) -from weaver.wps_restapi.constants import JobOutputsSchema +from weaver.wps_restapi.constants import JobInputsOutputsSchema from weaver.wps_restapi.patches import ServiceOnlyExplicitGetHead as Service # warning: don't use 'cornice.Service' if TYPE_CHECKING: @@ -2324,21 +2325,43 @@ class ProviderInputsEndpoint(ProviderPath, ProcessPath, JobPath): header = RequestHeaders() +class JobInputsOutputsQuery(ExtendedMappingSchema): + schema = ExtendedSchemaNode( + String(), + title="JobInputsOutputsQuerySchema", + example=JobInputsOutputsSchema.OGC, + default=JobInputsOutputsSchema.OLD, + validator=OneOfCaseInsensitive(JobInputsOutputsSchema.values()), + summary="Selects the schema employed for representation of submitted job inputs and outputs.", + description=( + "Selects the schema employed for representing job inputs and outputs that were submitted for execution. " + f"When '{JobInputsOutputsSchema.OLD}' is employed, listing of object with IDs is returned. " + f"When '{JobInputsOutputsSchema.OGC}' is employed, mapping of object definitions is returned. " + "If no schema is requested, the original formats from submission are employed, which could be a mix of " + "both representations. Providing a schema forces their corresponding conversion as applicable." + ) + ) + + class JobInputsEndpoint(JobPath): header = RequestHeaders() + querystring = JobInputsOutputsQuery() class JobOutputQuery(ExtendedMappingSchema): schema = ExtendedSchemaNode( - String(), example=JobOutputsSchema.OGC, default=JobOutputsSchema.OLD, - validator=OneOfCaseInsensitive(JobOutputsSchema.values()), + String(), + title="JobOutputResultsSchema", + example=JobInputsOutputsSchema.OGC, + default=JobInputsOutputsSchema.OLD, + validator=OneOfCaseInsensitive(JobInputsOutputsSchema.values()), summary="Selects the schema employed for representation of job outputs.", description=( "Selects the schema employed for representation of job outputs for providing file Content-Type details. " - f"When '{JobOutputsSchema.OLD}' is employed, 'format.mimeType' is used and 'type' is reported as well. " - f"When '{JobOutputsSchema.OGC}' is employed, 'format.mediaType' is used and 'type' is reported as well. " + f"When '{JobInputsOutputsSchema.OLD}' is employed, 'format.mimeType' is used and 'type' is reported as well. " + f"When '{JobInputsOutputsSchema.OGC}' is employed, 'format.mediaType' is used and 'type' is reported as well. " "When the '+strict' value is added, only the 'format' or 'type' will be represented according to the " - f"reference standard ({JobOutputsSchema.OGC}, {JobOutputsSchema.OLD}) representation." + f"reference standard ({JobInputsOutputsSchema.OGC}, {JobInputsOutputsSchema.OLD}) representation." ) ) @@ -2453,9 +2476,16 @@ class ExecuteOutputSpecList(ExtendedSequenceSchema): output = ExecuteOutputItem() -class ExecuteOutputSpecMap(ExtendedMappingSchema): - input_id = ExecuteOutputDefinition(variable="{input-id}", title="ExecuteOutputSpecMap", - description="Desired output reporting method.") +class ExecuteOutputMapAdditionalProperties(ExtendedMappingSchema): + output_id = ExecuteOutputDefinition(variable="{output-id}", title="ExecuteOutputSpecMap", + description="Desired output reporting method.") + + +class ExecuteOutputSpecMap(AnyOfKeywordSchema): + _any_of = [ + ExecuteOutputMapAdditionalProperties(), # normal {"": {...}} + EmptyMappingSchema(), # allows explicitly provided {} + ] class ExecuteOutputSpec(OneOfKeywordSchema): @@ -2966,12 +2996,19 @@ class ExecuteInputData(OneOfKeywordSchema): # items: # $ref: "inlineOrRefData.yaml" # -class ExecuteInputMapValues(ExtendedMappingSchema): +class ExecuteInputMapAdditionalProperties(ExtendedMappingSchema): schema_ref = f"{OGC_API_SCHEMA_URL}/{OGC_API_SCHEMA_VERSION}/core/openapi/schemas/execute.yaml" input_id = ExecuteInputData(variable="{input-id}", title="ExecuteInputValue", description="Received mapping input value definition during job submission.") +class ExecuteInputMapValues(AnyOfKeywordSchema): + _any_of = [ + ExecuteInputMapAdditionalProperties(), # normal {"": {...}} + EmptyMappingSchema(), # allows explicitly provided {} + ] + + class ExecuteInputValues(OneOfKeywordSchema): _one_of = [ # OLD format: {"inputs": [{"id": "", "value": }, ...]} @@ -2999,7 +3036,6 @@ class ExecuteInputOutputs(ExtendedMappingSchema): # - 'tests.wps_restapi.test_colander_extras.test_oneof_variable_dict_or_list' inputs = ExecuteInputValues(default={}, description="Values submitted for execution.") outputs = ExecuteOutputSpec( - # FIXME: add documentation reference link OGC/Weaver for further details. description=( "Defines which outputs to be obtained from the execution (filtered or all), " "as well as the reporting method for each output according to 'transmissionMode', " @@ -3683,8 +3719,7 @@ class Result(ExtendedMappingSchema): ) -class JobInputsBody(ExtendedMappingSchema): - inputs = ExecuteInputValues() +class JobInputsBody(ExecuteInputOutputs): links = LinkList(missing=drop) From 12138b48f5058a15d8ea40487858ace12bdf75bd Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Thu, 17 Mar 2022 01:19:58 -0400 Subject: [PATCH 14/34] http 204 no content for by-ref only results + add 204 to OpenAPI response schema --- weaver/wps_restapi/jobs/jobs.py | 4 +++- weaver/wps_restapi/swagger_definitions.py | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index c92767317..7e7d87a21 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -8,6 +8,7 @@ from colander import Invalid from pyramid.httpexceptions import ( HTTPBadRequest, + HTTPNoContent, HTTPNotFound, HTTPOk, HTTPPermanentRedirect, @@ -805,7 +806,8 @@ def get_job_results(request): if results: # avoid error if all by reference results = sd.Result().deserialize(results) - return HTTPOk(json=results, headers=refs) + HTTPOk(json=results, headers=refs) + return HTTPNoContent(headers=refs) @sd.provider_exceptions_service.get(tags=[sd.TAG_JOBS, sd.TAG_EXCEPTIONS, sd.TAG_PROVIDERS], diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 0290642e5..dae9068b5 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -564,6 +564,11 @@ class LinkLanguage(ExtendedMappingSchema): hreflang = Language(missing=drop, description="Language of the content located at the link.") +class LinkHeader(ExtendedSchemaNode): + schema_type = String + example = "; rel=\"relation\"; type=text/plain" + + class MetadataBase(ExtendedMappingSchema): title = ExtendedSchemaNode(String(), missing=drop) @@ -4457,6 +4462,21 @@ class OkGetJobResultsResponse(ExtendedMappingSchema): body = Result() +class NoContentJobResultsHeaders(NoContent): + content_length = ContentLengthHeader(example="0") + link = LinkHeader(description=( + "Link to a result requested by reference output transmission. " + "Link relation indicates the result ID. " + "Additional parameters indicate expected content-type of the resource. " + "Literal data requested by reference are returned with contents dumped to plain text file." + )) + + +class NoContentJobResultsResponse(ExtendedMappingSchema): + header = NoContentJobResultsHeaders() + body = NoContent(default="") + + class CreatedQuoteExecuteResponse(ExtendedMappingSchema): header = ResponseHeaders() body = CreatedQuotedJobStatusSchema() @@ -4876,6 +4896,7 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "value": EXAMPLES["job_results.json"], } }), + "204": NoContentJobResultsResponse(description="success"), "400": InvalidJobResponseSchema(), "404": NotFoundJobResponseSchema(), "410": GoneJobResponseSchema(), From 895d25dfd5c1e6b7c35a62cb5c6eb85017c41bad Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Thu, 17 Mar 2022 01:32:37 -0400 Subject: [PATCH 15/34] add notes regarding order of reported jobControlOptions --- tests/test_datatype.py | 1 + weaver/datatype.py | 26 +++++++++++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/tests/test_datatype.py b/tests/test_datatype.py index 6ca56767e..c0721c41c 100644 --- a/tests/test_datatype.py +++ b/tests/test_datatype.py @@ -59,6 +59,7 @@ def test_process_job_control_options_resolution(): proc = Process(id="test-{}".format(uuid.uuid4()), package={}, jobControlOptions=[ExecuteControlOption.SYNC]) assert proc.jobControlOptions == [ExecuteControlOption.SYNC] + # See ordering note in 'jobControlOptions' property proc = Process(id="test-{}".format(uuid.uuid4()), package={}, jobControlOptions=[ExecuteControlOption.SYNC, ExecuteControlOption.ASYNC]) assert proc.jobControlOptions == [ExecuteControlOption.SYNC, ExecuteControlOption.ASYNC] diff --git a/weaver/datatype.py b/weaver/datatype.py index 54da9d201..17cf82692 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -1798,14 +1798,34 @@ def outputs(self): @property def jobControlOptions(self): # noqa: N802 # type: () -> List[AnyExecuteControlOption] - jco = self.setdefault("jobControlOptions", [ExecuteControlOption.ASYNC]) + """ + Control options that indicate which :term:`Job` execution modes are supported by the :term:`Process`. + + .. note:: + + There are no official mentions about the ordering of ``jobControlOptions``. + Nevertheless, it is often expected that the first item can be considered the default mode when none is + requested explicitly (at execution time). With the definition of execution mode through the ``Prefer`` + header, `Weaver` has the option to decide if it wants to honor this header, according to available + resources and :term:`Job` duration. + + For this reason, ``async`` is placed first by default when nothing was defined during deployment, + since it is the preferred mode in `Weaver`. If deployment included items though, they are preserved as is. + This allows to re-deploy a :term:`Process` to a remote non-`Weaver` :term:`ADES` preserving the original + :term:`Process` definition. + + .. seealso:: + Discussion about expected ordering of ``jobControlOptions``: + https://github.com/opengeospatial/ogcapi-processes/issues/171#issuecomment-836819528 + """ + jco = self.setdefault("jobControlOptions", [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC]) if not isinstance(jco, list): # eg: None, bw-compat - jco = [ExecuteControlOption.ASYNC] + jco = [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] jco = [ExecuteControlOption.get(opt) for opt in jco] jco = [opt for opt in jco if opt is not None] if len(jco) == 0: jco.append(ExecuteControlOption.ASYNC) - self["jobControlOptions"] = list(sorted(jco)) + self["jobControlOptions"] = jco # no alpha order important! return dict.__getitem__(self, "jobControlOptions") @property From 34fc11983fd6197bd444721c905ebbc4f4545d27 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Thu, 17 Mar 2022 01:49:03 -0400 Subject: [PATCH 16/34] adjust test for new handling of jobControlOptions --- tests/test_datatype.py | 12 +++++++----- weaver/datatype.py | 7 ++++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/test_datatype.py b/tests/test_datatype.py index c0721c41c..552827c91 100644 --- a/tests/test_datatype.py +++ b/tests/test_datatype.py @@ -47,14 +47,16 @@ def _replace_specials(value): def test_process_job_control_options_resolution(): - # invalid or matching default mode should be corrected to default async list - for test_process in [ + # invalid or matching default mode should be corrected to default modes list + for i, test_process in enumerate([ Process(id="test-{}".format(uuid.uuid4()), package={}, jobControlOptions=None), Process(id="test-{}".format(uuid.uuid4()), package={}, jobControlOptions=[None]), Process(id="test-{}".format(uuid.uuid4()), package={}, jobControlOptions=[]), - Process(id="test-{}".format(uuid.uuid4()), package={}, jobControlOptions=[ExecuteControlOption.ASYNC]), - ]: - assert test_process.jobControlOptions == [ExecuteControlOption.ASYNC] + ]): + assert test_process.jobControlOptions == [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], f"Test {i}" + # explicitly provided modes are used as is, especially if partial (allow disabling some modes) + proc = Process(id="test-{}".format(uuid.uuid4()), package={}, jobControlOptions=[ExecuteControlOption.ASYNC]) + assert proc.jobControlOptions == [ExecuteControlOption.ASYNC] # other valid definitions should be preserved as is proc = Process(id="test-{}".format(uuid.uuid4()), package={}, jobControlOptions=[ExecuteControlOption.SYNC]) diff --git a/weaver/datatype.py b/weaver/datatype.py index 17cf82692..d80751262 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -1818,13 +1818,14 @@ def jobControlOptions(self): # noqa: N802 Discussion about expected ordering of ``jobControlOptions``: https://github.com/opengeospatial/ogcapi-processes/issues/171#issuecomment-836819528 """ - jco = self.setdefault("jobControlOptions", [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC]) + jco_default = [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] + jco = self.setdefault("jobControlOptions", jco_default) if not isinstance(jco, list): # eg: None, bw-compat - jco = [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] + jco = jco_default jco = [ExecuteControlOption.get(opt) for opt in jco] jco = [opt for opt in jco if opt is not None] if len(jco) == 0: - jco.append(ExecuteControlOption.ASYNC) + jco = jco_default self["jobControlOptions"] = jco # no alpha order important! return dict.__getitem__(self, "jobControlOptions") From b70cb9c512648e5105d02cdd6441d6999c97096e Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Thu, 17 Mar 2022 16:12:46 -0400 Subject: [PATCH 17/34] revert weaver default jobControlOption async only - if sync needed, must be provided at deploy time --- weaver/datatype.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/weaver/datatype.py b/weaver/datatype.py index d80751262..0cbf4c01d 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -1818,7 +1818,8 @@ def jobControlOptions(self): # noqa: N802 Discussion about expected ordering of ``jobControlOptions``: https://github.com/opengeospatial/ogcapi-processes/issues/171#issuecomment-836819528 """ - jco_default = [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] + # Weaver's default async only, must override explicitly during deploy if sync is needed + jco_default = [ExecuteControlOption.ASYNC] jco = self.setdefault("jobControlOptions", jco_default) if not isinstance(jco, list): # eg: None, bw-compat jco = jco_default From 0e9119add35ff5459f985150c8cccdda72edee50 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Thu, 17 Mar 2022 16:13:58 -0400 Subject: [PATCH 18/34] revert test for supported jobControlOptions --- tests/test_datatype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_datatype.py b/tests/test_datatype.py index 552827c91..ee6eb42de 100644 --- a/tests/test_datatype.py +++ b/tests/test_datatype.py @@ -53,7 +53,7 @@ def test_process_job_control_options_resolution(): Process(id="test-{}".format(uuid.uuid4()), package={}, jobControlOptions=[None]), Process(id="test-{}".format(uuid.uuid4()), package={}, jobControlOptions=[]), ]): - assert test_process.jobControlOptions == [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC], f"Test {i}" + assert test_process.jobControlOptions == [ExecuteControlOption.ASYNC], f"Test {i}" # explicitly provided modes are used as is, especially if partial (allow disabling some modes) proc = Process(id="test-{}".format(uuid.uuid4()), package={}, jobControlOptions=[ExecuteControlOption.ASYNC]) assert proc.jobControlOptions == [ExecuteControlOption.ASYNC] From 170c6fdaab6d25ab475f990734e7a6cbd936c70e Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Mon, 21 Mar 2022 12:06:18 -0400 Subject: [PATCH 19/34] ignore except too broad for int check --- weaver/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weaver/utils.py b/weaver/utils.py index 4334311fa..176abda26 100644 --- a/weaver/utils.py +++ b/weaver/utils.py @@ -510,7 +510,7 @@ def as_int(value, default): """ try: return int(value) - except Exception: + except Exception: # noqa: W0703 # nosec: B110 pass return default From 2ba06794c69c3ec6b3ccbd84b4e07f8fb9535a55 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 22 Mar 2022 12:35:20 -0400 Subject: [PATCH 20/34] fix doc indents --- docs/source/processes.rst | 40 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 3ef6b5258..8d85a0428 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -436,26 +436,26 @@ and parametrization of various input/output combinations. Let's employ the follo | .. code-block:: json | .. code-block:: json | | :caption: Job Execution Payload as Listing | :caption: Job Execution Payload as Mapping | | | | - | { | { | - | "mode": "async", | "mode": "async", | - | "response": "document", | "response": "document", | - | "inputs": [ | "inputs": { | - | { | "input-file": { | - | "id": "input-file", | "href": " Date: Tue, 22 Mar 2022 14:32:00 -0400 Subject: [PATCH 21/34] quote requests typing --- weaver/wps_restapi/quotation/quotes.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/weaver/wps_restapi/quotation/quotes.py b/weaver/wps_restapi/quotation/quotes.py index 033abfb77..0811bf531 100644 --- a/weaver/wps_restapi/quotation/quotes.py +++ b/weaver/wps_restapi/quotation/quotes.py @@ -23,6 +23,8 @@ if TYPE_CHECKING: from weaver.datatype import Process + from weaver.typedefs import AnyResponseType, PyramidRequest + LOGGER = logging.getLogger(__name__) @@ -30,6 +32,7 @@ schema=sd.PostProcessQuoteRequestEndpoint(), response_schemas=sd.post_quotes_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def request_quote(request): + # type: (PyramidRequest) -> AnyResponseType """ Request a quotation for a process. """ @@ -121,6 +124,7 @@ def request_quote(request): schema=sd.QuotesEndpoint(), response_schemas=sd.get_quote_list_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def get_quote_list(request): + # type: (PyramidRequest) -> AnyResponseType """ Get list of quotes IDs. """ @@ -149,6 +153,7 @@ def get_quote_list(request): schema=sd.QuoteEndpoint(), response_schemas=sd.get_quote_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def get_quote_info(request): + # type: (PyramidRequest) -> AnyResponseType """ Get quote information. """ @@ -167,6 +172,7 @@ def get_quote_info(request): schema=sd.PostQuote(), response_schemas=sd.post_quote_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def execute_quote(request): + # type: (PyramidRequest) -> AnyResponseType """ Execute a quoted process. """ From bf99d10364ca2609438dba300c604f33b0ce96a0 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 22 Mar 2022 14:47:40 -0400 Subject: [PATCH 22/34] add ref execute schema not required inputs --- weaver/wps_restapi/swagger_definitions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index a0c89e43a..d9961046d 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -2997,6 +2997,9 @@ class ExecuteInputOutputs(ExtendedMappingSchema): # - 'tests.wps_restapi.test_providers.WpsRestApiProcessesTest.test_execute_process_no_error_not_required_params' # - 'tests.wps_restapi.test_providers.WpsRestApiProcessesTest.test_get_provider_process_no_inputs' # - 'tests.wps_restapi.test_colander_extras.test_oneof_variable_dict_or_list' + # + # OGC 'execute.yaml' also does not enforce any required item. + schema_ref = f"{OGC_API_SCHEMA_URL}/{OGC_API_SCHEMA_VERSION}/core/openapi/schemas/execute.yaml" inputs = ExecuteInputValues(default={}, description="Values submitted for execution.") outputs = ExecuteOutputSpec( # FIXME: add documentation reference link OGC/Weaver for further details. From c81a99c65987b7eff189b4bd80d7c7860bea76db Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 22 Mar 2022 14:54:11 -0400 Subject: [PATCH 23/34] fix lint --- weaver/cli.py | 10 +++++----- weaver/wps_restapi/swagger_definitions.py | 6 ++++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/weaver/cli.py b/weaver/cli.py index 535d9659f..192fc756a 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -1236,16 +1236,16 @@ def make_parser(): Indicates which outputs by ID to be returned as HTTP Link header reference instead of body content value. This defines the output transmission mode when submitting the execution request. - With reference transmission mode, + With reference transmission mode, outputs that contain literal data will be linked by ``text/plain`` file containing the data. Outputs that refer to a file reference will simply contain that URL reference as link. - With value transmission mode (default behavior when outputs are not specified in this list), outputs are + With value transmission mode (default behavior when outputs are not specified in this list), outputs are returned as direct values (literal or href) within the response content body. - - When requesting any output to be returned by reference, option ``-H/--headers`` should be considered as + + When requesting any output to be returned by reference, option ``-H/--headers`` should be considered as well to return the provided ``Link`` headers for these outputs on the command line. - + Example: ``-R output-one -R output-two`` """) ) diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 829ac90f7..3c9e98792 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -2363,8 +2363,10 @@ class JobOutputQuery(ExtendedMappingSchema): summary="Selects the schema employed for representation of job outputs.", description=( "Selects the schema employed for representation of job outputs for providing file Content-Type details. " - f"When '{JobInputsOutputsSchema.OLD}' is employed, 'format.mimeType' is used and 'type' is reported as well. " - f"When '{JobInputsOutputsSchema.OGC}' is employed, 'format.mediaType' is used and 'type' is reported as well. " + f"When '{JobInputsOutputsSchema.OLD}' is employed, " + "'format.mimeType' is used and 'type' is reported as well. " + f"When '{JobInputsOutputsSchema.OGC}' is employed, " + "'format.mediaType' is used and 'type' is reported as well. " "When the '+strict' value is added, only the 'format' or 'type' will be represented according to the " f"reference standard ({JobInputsOutputsSchema.OGC}, {JobInputsOutputsSchema.OLD}) representation." ) From fd0e83e42ff0e624bdcba0878c9cc5b40e81a342 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 22 Mar 2022 23:49:15 -0400 Subject: [PATCH 24/34] adjust sync result returned directly + support response=raw for single outputs or multi-by-ref (relates to #376) --- CHANGES.rst | 4 + tests/functional/test_builtin.py | 296 ++++++--- tests/functional/utils.py | 2 +- weaver/datatype.py | 46 +- weaver/execute.py | 11 + weaver/processes/execution.py | 64 +- weaver/processes/utils.py | 26 - weaver/store/base.py | 2 + weaver/store/mongodb.py | 3 + weaver/typedefs.py | 8 +- weaver/wps/service.py | 9 +- weaver/wps_restapi/api.py | 12 +- weaver/wps_restapi/jobs/jobs.py | 596 +----------------- weaver/wps_restapi/jobs/utils.py | 696 ++++++++++++++++++++++ weaver/wps_restapi/processes/processes.py | 5 +- weaver/wps_restapi/providers/providers.py | 7 +- weaver/wps_restapi/swagger_definitions.py | 14 +- 17 files changed, 1070 insertions(+), 731 deletions(-) create mode 100644 weaver/wps_restapi/jobs/utils.py diff --git a/CHANGES.rst b/CHANGES.rst index fd447dc9a..40d9da52a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -34,6 +34,10 @@ Changes: requested this way (resolves `#377 `_). - Updated every ``Process`` to report that they support ``outputTransmission`` both as ``reference`` and ``value``, since handling of results is accomplished by `Weaver` itself, regardless of the application being executed. +- Add partial support of ``response=raw`` parameter for execution request submission in order to handle results to + be returned accordingly to specified ``outputTransmission`` by ``reference`` or ``value``. + Multipart contents for multi-output results are not yet supported + (relates to `#376 `_). - Add `CLI` option ``-R/--ref/--reference`` for ``execute`` operation allowing to request corresponding ``outputs`` by ID to be returned using the ``transmissionMode: reference`` method, producing HTTP ``Link`` headers for those entries rather than inserting values in the response content body. diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py index 4230a9485..1ee5a69b1 100644 --- a/tests/functional/test_builtin.py +++ b/tests/functional/test_builtin.py @@ -86,30 +86,80 @@ def test_jsonarray2netcdf_describe_ogc_schema(self): assert body["jobControlOptions"] == [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] assert body["outputTransmission"] == [ExecuteTransmissionMode.REFERENCE, ExecuteTransmissionMode.VALUE] - def test_jsonarray2netcdf_execute_async(self): + def setup_inputs(self, stack): dirname = tempfile.gettempdir() nc_data = "Hello NetCDF!" + tmp_ncdf = tempfile.NamedTemporaryFile(dir=dirname, mode="w", suffix=".nc") + tmp_json = tempfile.NamedTemporaryFile(dir=dirname, mode="w", suffix=".json") + tmp_ncdf = stack.enter_context(tmp_ncdf) # noqa + tmp_json = stack.enter_context(tmp_json) # noqa + tmp_ncdf.write(nc_data) + tmp_ncdf.seek(0) + tmp_json.write(json.dumps(["file://{}".format(os.path.join(dirname, tmp_ncdf.name))])) + tmp_json.seek(0) + body = {"inputs": [{"id": "input", "href": os.path.join(dirname, tmp_json.name)}]} + return body, nc_data + + def validate_results(self, results, outputs, data, links): + # first validate format of OGC-API results + if results is not None: + assert isinstance(results, dict) + assert "output" in results, "Expected result ID 'output' in response body" + assert isinstance(results["output"], dict), "Container of result ID 'output' should be a dict" + assert "href" in results["output"] + assert "format" in results["output"] + fmt = results["output"]["format"] # type: JSON + assert isinstance(fmt, dict), "Result format should be provided with content details" + assert "mediaType" in fmt + assert isinstance(fmt["mediaType"], str), "Result format Content-Type should be a single string definition" + assert fmt["mediaType"] == ContentType.APP_NETCDF, "Result 'output' format expected to be NetCDF file" + nc_href = results["output"]["href"] + assert isinstance(nc_href, str) and len(nc_href) + elif links: + assert isinstance(links, list) and len(links) == 1 and isinstance(links[0], tuple) + assert "rel=\"output\"" in links[0][1] + assert f"type={ContentType.APP_NETCDF}" in links[0][1] + nc_link = links[0][1].split(" ")[0] + assert nc_link.startswith("<") and nc_link.startswith(">") + nc_href = nc_link[1:-1] + else: + nc_href = None + + settings = get_settings_from_testapp(self.app) + wps_path = settings.get("weaver.wps_output_path") + wps_dir = settings.get("weaver.wps_output_dir") + wps_out = "{}{}".format(settings.get("weaver.url"), wps_path) + + # validate results if applicable + if nc_href is not None: + nc_real_path = nc_href.replace(wps_out, wps_dir) + assert nc_href.startswith(wps_out) + assert os.path.split(nc_real_path)[-1] == os.path.split(nc_href)[-1] + assert os.path.isfile(nc_real_path) + with open(nc_real_path, "r") as f: + assert f.read() == data + + # if everything was valid for results, validate equivalent but differently formatted outputs response + assert outputs["outputs"][0]["id"] == "output" + nc_href = outputs["outputs"][0]["href"] + assert isinstance(nc_href, str) and len(nc_href) + assert nc_href.startswith(wps_out) + nc_real_path = nc_href.replace(wps_out, wps_dir) + assert os.path.split(nc_real_path)[-1] == os.path.split(nc_href)[-1] + + def test_jsonarray2netcdf_execute_async(self): with contextlib.ExitStack() as stack_exec: - tmp_ncdf = tempfile.NamedTemporaryFile(dir=dirname, mode="w", suffix=".nc") - tmp_json = tempfile.NamedTemporaryFile(dir=dirname, mode="w", suffix=".json") - tmp_ncdf = stack_exec.enter_context(tmp_ncdf) # noqa - tmp_json = stack_exec.enter_context(tmp_json) # noqa - tmp_ncdf.write(nc_data) - tmp_ncdf.seek(0) - tmp_json.write(json.dumps(["file://{}".format(os.path.join(dirname, tmp_ncdf.name))])) - tmp_json.seek(0) - data = { + body, nc_data = self.setup_inputs(stack_exec) + body.update({ "mode": ExecuteMode.ASYNC, "response": ExecuteResponse.DOCUMENT, - "inputs": [{"id": "input", "href": os.path.join(dirname, tmp_json.name)}], "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}], - } - + }) for mock_exec in mocked_execute_celery(): stack_exec.enter_context(mock_exec) path = "/processes/jsonarray2netcdf/jobs" resp = mocked_sub_requests(self.app, "post_json", path, - data=data, headers=self.json_headers, only_local=True) + data=body, headers=self.json_headers, only_local=True) assert resp.status_code == 201, "Error: {}".format(resp.json) assert resp.content_type in ContentType.APP_JSON @@ -127,48 +177,180 @@ def test_jsonarray2netcdf_execute_async(self): assert resp.status_code == 200, "Error job outputs:\n{}".format(resp.json) outputs = resp.json - self.validate_results(results, outputs, nc_data) + self.validate_results(results, outputs, nc_data, None) - def test_jsonarray2netcdf_execute_sync(self): - dirname = tempfile.gettempdir() - nc_data = "Hello NetCDF!" + def test_jsonarray2netcdf_execute_async_output_by_reference_dontcare_response_document(self): + """ + Jobs submitted with ``response=document`` are not impacted by ``transmissionMode``. + + The results schema should always be returned when document is requested. + + .. seealso:: + https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-document + """ with contextlib.ExitStack() as stack_exec: - tmp_ncdf = tempfile.NamedTemporaryFile(dir=dirname, mode="w", suffix=".nc") - tmp_json = tempfile.NamedTemporaryFile(dir=dirname, mode="w", suffix=".json") - tmp_ncdf = stack_exec.enter_context(tmp_ncdf) # noqa - tmp_json = stack_exec.enter_context(tmp_json) # noqa - tmp_ncdf.write(nc_data) - tmp_ncdf.seek(0) - tmp_json.write(json.dumps(["file://{}".format(os.path.join(dirname, tmp_ncdf.name))])) - tmp_json.seek(0) - data = { - "inputs": [{"id": "input", "href": os.path.join(dirname, tmp_json.name)}], - "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}], - } - headers = {"Prefer": "wait=10"} - headers.update(self.json_headers) + body, nc_data = self.setup_inputs(stack_exec) + body.update({ + "response": ExecuteResponse.DOCUMENT, # by value/reference don't care because of this + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}], + }) + for mock_exec in mocked_execute_celery(): + stack_exec.enter_context(mock_exec) + path = "/processes/jsonarray2netcdf/jobs" + resp = mocked_sub_requests(self.app, "post_json", path, + data=body, headers=self.json_headers, only_local=True) + + assert resp.status_code == 201, "Error: {}".format(resp.json) + assert resp.content_type in ContentType.APP_JSON + job_url = resp.json["location"] + self.monitor_job(job_url, return_status=True) # don't fetch results automatically + + resp = self.app.get("{}/results".format(job_url), headers=self.json_headers) + assert resp.status_code == 200, "Error: {}".format(resp.text) + assert resp.content_type == ContentType.APP_JSON + result_links = [hdr for hdr in resp.headers if hdr[0].lower() == "link"] + assert len(result_links) == 0 + results = resp.json + + # even though results are requested by Link reference, + # Weaver still offers them with document on outputs endpoint + output_url = job_url + "/outputs" + resp = self.app.get(output_url, headers=self.json_headers) + assert resp.status_code == 200, "Error job outputs:\n{}".format(resp.text) + outputs = resp.json + + self.validate_results(results, outputs, nc_data, result_links) + + def test_jsonarray2netcdf_execute_async_output_by_value_response_raw(self): + """ + Jobs submitted with ``response=raw`` and single output as ``transmissionMode=value`` must return its raw data. + + .. seealso:: + https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-one + """ + with contextlib.ExitStack() as stack_exec: + body, nc_data = self.setup_inputs(stack_exec) + body.update({ + "response": ExecuteResponse.RAW, # by value/reference important here + # NOTE: quantity of outputs important as well + # since single output, content-type is directly that output (otherwise should be multipart) + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}], # data dump + }) + for mock_exec in mocked_execute_celery(): + stack_exec.enter_context(mock_exec) + path = "/processes/jsonarray2netcdf/jobs" + resp = mocked_sub_requests(self.app, "post_json", path, + data=body, headers=self.json_headers, only_local=True) + + assert resp.status_code == 201, "Error: {}".format(resp.text) + assert resp.content_type in ContentType.APP_JSON + job_url = resp.json["location"] + self.monitor_job(job_url, return_status=True) # don't fetch results automatically + + resp = self.app.get("{}/results".format(job_url), headers=self.json_headers) + assert resp.status_code < 400, "Error: {}".format(resp.text) + assert resp.status_code == 200, "Body should contain literal raw data dump" + assert resp.content_type in ContentType.APP_NETCDF, "raw result by value should be directly the content-type" + assert resp.text == nc_data, "raw result by value should be directly the data content" + assert resp.headers + result_links = [hdr for hdr in resp.headers if hdr[0].lower() == "link"] + assert len(result_links) == 0 + + # even though results are requested by raw data, + # Weaver still offers them with document on outputs endpoint + output_url = job_url + "/outputs" + resp = self.app.get(output_url, headers=self.json_headers) + assert resp.status_code == 200, "Error job outputs:\n{}".format(resp.text) + outputs = resp.json + self.validate_results(None, outputs, nc_data, result_links) + + def test_jsonarray2netcdf_execute_async_output_by_reference_response_raw(self): + """ + Jobs submitted with ``response=raw`` and single output as ``transmissionMode=reference`` must a link. + + Contents should be empty, and the reference should be provided with HTTP ``Link`` header. + + .. seealso:: + https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref + """ + with contextlib.ExitStack() as stack_exec: + body, nc_data = self.setup_inputs(stack_exec) + body.update({ + "response": ExecuteResponse.RAW, # by value/reference important here + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.REFERENCE}], # Link header + }) for mock_exec in mocked_execute_celery(): stack_exec.enter_context(mock_exec) path = "/processes/jsonarray2netcdf/jobs" resp = mocked_sub_requests(self.app, "post_json", path, - data=data, headers=headers, only_local=True) + data=body, headers=self.json_headers, only_local=True) + + assert resp.status_code == 201, "Error: {}".format(resp.json) + assert resp.content_type in ContentType.APP_JSON + job_url = resp.json["location"] + self.monitor_job(job_url, return_status=True) # don't fetch results automatically + + resp = self.app.get("{}/results".format(job_url), headers=self.json_headers) + assert resp.status_code < 400, "Error: {}".format(resp.json) + assert resp.status_code == 204, "Body should be empty since all outputs requested by reference (Link header)" + assert resp.content_type is None + assert resp.headers + result_links = [hdr for hdr in resp.headers if hdr[0].lower() == "link"] + + # even though results are requested by Link reference, + # Weaver still offers them with document on outputs endpoint + output_url = job_url + "/outputs" + resp = self.app.get(output_url, headers=self.json_headers) + assert resp.status_code == 200, "Error job outputs:\n{}".format(resp.json) + outputs = resp.json + + self.validate_results(None, outputs, nc_data, result_links) + + def test_jsonarray2netcdf_execute_sync(self): + """ + Job submitted with ``mode=sync`` or ``Prefer`` header for sync should respond directly with the results schema. + + .. seealso:: + https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response + """ + with contextlib.ExitStack() as stack_exec: + body, nc_data = self.setup_inputs(stack_exec) + body.update({ + "response": ExecuteResponse.DOCUMENT, + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}] + }) + for mock_exec in mocked_execute_celery(): + stack_exec.enter_context(mock_exec) + headers = {"Prefer": "wait=10"} + headers.update(self.json_headers) + path = "/processes/jsonarray2netcdf/jobs" + resp = mocked_sub_requests(self.app, "post_json", path, + data=body, headers=headers, only_local=True) assert resp.status_code == 200, "Error: {}".format(resp.json) assert resp.content_type in ContentType.APP_JSON - # since sync, all status details are already available! - assert resp.json["status"] == Status.SUCCEEDED + # since sync, results are directly available instead of job status + # even if results are returned directly (instead of status), + # status location link is available for reference as needed assert "Location" in resp.headers - # validate indeed sync + # validate sync was indeed applied (in normal situation, not considering mock test that runs in sync) assert resp.headers["Preference-Applied"] == headers["Prefer"] - # following details not available yet in async, but are in sync - assert isinstance(resp.json["created"], str) and resp.json["created"] - assert isinstance(resp.json["finished"], str) and resp.json["finished"] - assert isinstance(resp.json["duration"], str) and resp.json["duration"] - assert isinstance(resp.json["progress"], int) and resp.json["progress"] == 100 + # following details should not be available since results are returned in sync instead of async job status + for field in ["status", "created", "finished", "duration", "progress"]: + assert field not in resp.json + # validate that job can still be found and its metadata are defined although executed in sync job_url = resp.headers["Location"] + resp = self.app.get(job_url, headers=self.json_headers) + assert resp.status_code == 200 + assert resp.content_type == ContentType.APP_JSON + for field in ["status", "created", "finished", "duration", "progress"]: + assert field in resp.json + assert resp.json["status"] == Status.SUCCEEDED + assert resp.json["progress"] == 100 + out_url = f"{job_url}/results" resp = self.app.get(out_url, headers=self.json_headers) assert resp.status_code == 200 @@ -180,34 +362,4 @@ def test_jsonarray2netcdf_execute_sync(self): assert resp.status_code == 200, "Error job outputs:\n{}".format(resp.json) outputs = resp.json - self.validate_results(results, outputs, nc_data) - - def validate_results(self, results, outputs, data): - - # first validate format of OGC-API results - assert "output" in results, "Expected result ID 'output' in response body" - assert isinstance(results["output"], dict), "Container of result ID 'output' should be a dict" - assert "href" in results["output"] - assert "format" in results["output"] - fmt = results["output"]["format"] # type: JSON - assert isinstance(fmt, dict), "Result format should be provided with content details" - assert "mediaType" in fmt - assert isinstance(fmt["mediaType"], str), "Result format Content-Type should be a single string definition" - assert fmt["mediaType"] == ContentType.APP_NETCDF, "Result 'output' format expected to be NetCDF file" - nc_path = results["output"]["href"] - assert isinstance(nc_path, str) and len(nc_path) - settings = get_settings_from_testapp(self.app) - wps_out = "{}{}".format(settings.get("weaver.url"), settings.get("weaver.wps_output_path")) - nc_real_path = nc_path.replace(wps_out, settings.get("weaver.wps_output_dir")) - assert nc_path.startswith(wps_out) - assert os.path.split(nc_real_path)[-1] == os.path.split(nc_path)[-1] - assert os.path.isfile(nc_real_path) - with open(nc_real_path, "r") as f: - assert f.read() == data - - # if everything was valid for results, validate equivalent but differently formatted outputs response - assert outputs["outputs"][0]["id"] == "output" - nc_path = outputs["outputs"][0]["href"] - assert isinstance(nc_path, str) and len(nc_path) - assert nc_path.startswith(wps_out) - assert os.path.split(nc_real_path)[-1] == os.path.split(nc_path)[-1] + self.validate_results(results, outputs, nc_data, None) diff --git a/tests/functional/utils.py b/tests/functional/utils.py index c807b69cb..6b459fde4 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -284,7 +284,7 @@ def check_job_status(_resp, running=False): if return_status or expect_failed: return resp.json resp = self.app.get("{}/results".format(status_url), headers=self.json_headers) - assert resp.status_code == 200, "Error job info:\n{}".format(resp.json) + assert resp.status_code == 200, "Error job info:\n{}".format(resp.text) return resp.json def get_outputs(self, status_url): diff --git a/weaver/datatype.py b/weaver/datatype.py index d07ea3c7b..add6a9210 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -30,7 +30,7 @@ from weaver import xml_util from weaver.exceptions import ProcessInstanceError, ServiceParsingError -from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteTransmissionMode +from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteResponse, ExecuteTransmissionMode from weaver.formats import AcceptLanguage, ContentType, repr_json from weaver.processes.constants import ProcessSchema from weaver.processes.convert import get_field, null, ows2json, wps2json_io @@ -58,7 +58,7 @@ from owslib.wps import WebProcessingService - from weaver.execute import AnyExecuteControlOption, AnyExecuteTransmissionMode + from weaver.execute import AnyExecuteControlOption, AnyExecuteMode, AnyExecuteResponse, AnyExecuteTransmissionMode from weaver.processes.constants import ProcessSchemaType from weaver.processes.types import AnyProcessType from weaver.quotation.status import AnyQuoteStatus @@ -837,6 +837,21 @@ def status_location(self, location_url): raise TypeError(f"Type 'str' is required for '{self.__name__}.status_location'") self["status_location"] = location_url + def status_url(self, container=None): + # type: (Optional[AnySettingsContainer]) -> str + """ + Obtain the resolved endpoint where the :term:`Job` status information can be obtained. + """ + settings = get_settings(container) + location_base = "/providers/{provider_id}".format(provider_id=self.service) if self.service else "" + location_url = "{base_url}{location_base}/processes/{process_id}/jobs/{job_id}".format( + base_url=get_wps_restapi_base_url(settings), + location_base=location_base, + process_id=self.process, + job_id=self.id + ) + return location_url + @property def notification_email(self): # type: () -> Optional[str] @@ -873,18 +888,39 @@ def execute_sync(self): @property def execution_mode(self): - # type: () -> ExecuteMode + # type: () -> AnyExecuteMode return ExecuteMode.get(self.get("execution_mode"), ExecuteMode.ASYNC) @execution_mode.setter def execution_mode(self, mode): - # type: (Union[ExecuteMode, str]) -> None + # type: (Union[AnyExecuteMode, str]) -> None exec_mode = ExecuteMode.get(mode) if exec_mode not in ExecuteMode: modes = list(ExecuteMode.values()) raise ValueError(f"Invalid value for '{self.__name__}.execution_mode'. Must be one of {modes}") self["execution_mode"] = mode + @property + def execution_response(self): + # type: () -> AnyExecuteResponse + out = self.setdefault("execution_response", ExecuteResponse.DOCUMENT) + if out not in ExecuteResponse.values(): + out = ExecuteResponse.DOCUMENT + self["execution_response"] = out + return out + + @execution_response.setter + def execution_response(self, response): + # type: (Optional[Union[AnyExecuteResponse, str]]) -> None + if response is None: + exec_resp = ExecuteResponse.DOCUMENT + else: + exec_resp = ExecuteResponse.get(response) + if exec_resp not in ExecuteResponse: + resp = list(ExecuteResponse.values()) + raise ValueError(f"Invalid value for '{self.__name__}.execution_response'. Must be one of {resp}") + self["execution_response"] = exec_resp + @property def is_local(self): # type: () -> bool @@ -1216,10 +1252,12 @@ def params(self): "service": self.service, "process": self.process, "inputs": self.inputs, + "outputs": self.outputs, "user_id": self.user_id, "status": self.status, "status_message": self.status_message, "status_location": self.status_location, + "execution_response": self.execution_response, "execution_mode": self.execution_mode, "is_workflow": self.is_workflow, "created": self.created, diff --git a/weaver/execute.py b/weaver/execute.py index 986640de8..a51787447 100644 --- a/weaver/execute.py +++ b/weaver/execute.py @@ -2,6 +2,9 @@ from weaver.base import Constants +if TYPE_CHECKING: + from typing import List + class ExecuteMode(Constants): AUTO = "auto" @@ -13,6 +16,14 @@ class ExecuteControlOption(Constants): ASYNC = "async-execute" SYNC = "sync-execute" + @classmethod + def values(cls): + # type: () -> List[AnyExecuteControlOption] + """ + Return default control options in specific order according to preferred modes for execution by `Weaver`. + """ + return [ExecuteControlOption.ASYNC, ExecuteControlOption.SYNC] + class ExecuteResponse(Constants): RAW = "raw" diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index e089f8ac5..f8b34a4cf 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -8,12 +8,12 @@ from celery.exceptions import TimeoutError as CeleryTaskTimeoutError from owslib.util import clean_ows_url from owslib.wps import ComplexDataInput -from pyramid.httpexceptions import HTTPBadRequest, HTTPNotAcceptable, HTTPNotImplemented +from pyramid.httpexceptions import HTTPBadRequest, HTTPNotAcceptable from pyramid_celery import celery_app as app from weaver.database import get_db from weaver.datatype import Process, Service -from weaver.execute import ExecuteControlOption, ExecuteMode, ExecuteResponse, ExecuteTransmissionMode +from weaver.execute import ExecuteControlOption, ExecuteMode from weaver.formats import AcceptLanguage, ContentType from weaver.notify import encrypt_email, notify_job_complete from weaver.owsexceptions import OWSNoApplicableCode @@ -46,7 +46,7 @@ load_pywps_config ) from weaver.wps_restapi import swagger_definitions as sd -from weaver.wps_restapi.utils import get_wps_restapi_base_url +from weaver.wps_restapi.jobs.utils import get_job_results_response, get_job_submission_response LOGGER = logging.getLogger(__name__) if TYPE_CHECKING: @@ -60,7 +60,7 @@ from weaver.datatype import Job from weaver.processes.convert import OWS_Input_Type, ProcessOWS from weaver.status import StatusType - from weaver.typedefs import CeleryResult, HeadersType, HeaderCookiesType, JSON, SettingsType + from weaver.typedefs import AnyResponseType, CeleryResult, HeadersType, HeaderCookiesType, JSON, SettingsType from weaver.visibility import AnyVisibility @@ -442,7 +442,7 @@ def map_locations(job, settings): def submit_job(request, reference, tags=None): - # type: (Request, Union[Service, Process], Optional[List[str]]) -> Tuple[JSON, HeadersType] + # type: (Request, Union[Service, Process], Optional[List[str]]) -> AnyResponseType """ Generates the job submission from details retrieved in the request. @@ -498,28 +498,6 @@ def submit_job(request, reference, tags=None): visibility, language=lang, headers=headers, tags=tags, user=user, context=context) -def _validate_job_parameters(json_body): - # type: (JSON) -> None - """ - Tests supported parameters not automatically validated by colander deserialize since they are optional. - """ - exec_mode = json_body.get("mode") - if exec_mode not in [None, ExecuteMode.ASYNC, ExecuteMode.AUTO]: - raise HTTPNotImplemented(detail=f"Execution mode '{exec_mode}' not supported.") - - resp_mode = json_body.get("response") - if resp_mode not in [None, ExecuteResponse.DOCUMENT]: - raise HTTPNotImplemented(detail=f"Execution response type '{resp_mode}' not supported.") - - outputs = json_body.get("outputs", []) - if isinstance(outputs, dict): - outputs = [dict(id=out, **keys) for out, keys in outputs.items()] - for job_output in outputs: - mode = job_output["transmissionMode"] - if mode not in ExecuteTransmissionMode.values(): - raise HTTPNotImplemented(detail=f"Execute transmissionMode '{mode}' not supported.") - - def submit_job_handler(payload, # type: JSON settings, # type: SettingsType service_url, # type: str @@ -533,7 +511,7 @@ def submit_job_handler(payload, # type: JSON tags=None, # type: Optional[List[str]] user=None, # type: Optional[int] context=None, # type: Optional[str] - ): # type: (...) -> Tuple[JSON, HeadersType] + ): # type: (...) -> AnyResponseType """ Submits the job to the Celery worker with provided parameters. @@ -544,11 +522,6 @@ def submit_job_handler(payload, # type: JSON except colander.Invalid as ex: raise HTTPBadRequest("Invalid schema: [{}]".format(str(ex))) - # TODO: remove when all parameter variations are supported - # FIXME: - # - support 'response: raw' (https://github.com/crim-ca/weaver/issues/376) - # - allow omitting 'outputs' (https://github.com/crim-ca/weaver/issues/375) - _validate_job_parameters(json_body) db = get_db(settings) headers = headers or {} if is_local: @@ -566,6 +539,7 @@ def submit_job_handler(payload, # type: JSON # as per https://datatracker.ietf.org/doc/html/rfc7240#section-2 # Prefer header not resolve as valid still proces is_execute_async = mode != ExecuteMode.SYNC + exec_resp = json_body.get("response") notification_email = json_body.get("notification_email") encrypted_email = encrypt_email(notification_email, settings) if notification_email else None @@ -573,17 +547,12 @@ def submit_job_handler(payload, # type: JSON store = db.get_store(StoreJobs) # type: StoreJobs job = store.save_job(task_id=Status.ACCEPTED, process=process_id, service=provider_id, inputs=json_body.get("inputs"), is_local=is_local, is_workflow=is_workflow, - access=visibility, user_id=user, execute_async=is_execute_async, custom_tags=tags, - notification_email=encrypted_email, accept_language=language, context=context) + access=visibility, user_id=user, context=context, + execute_async=is_execute_async, execute_response=exec_resp, + custom_tags=tags, notification_email=encrypted_email, accept_language=language) job.save_log(logger=LOGGER, message="Job task submitted for execution.", status=Status.ACCEPTED, progress=0) job = store.update_job(job) - location_base = "/providers/{provider_id}".format(provider_id=provider_id) if provider_id else "" - location_url = "{base_url}{location_base}/processes/{process_id}/jobs/{job_id}".format( - base_url=get_wps_restapi_base_url(settings), - location_base=location_base, - process_id=process_id, - job_id=job.id - ) + location_url = job.status_url(settings) resp_headers = {"Location": location_url} resp_headers.update(applied) @@ -598,9 +567,15 @@ def submit_job_handler(payload, # type: JSON pass if result.ready(): job = store.fetch_by_id(job.id) + # when sync is successful, it must return the results direct instead of status info + # see: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response + if job.status == Status.SUCCEEDED: + return get_job_results_response(job, settings, headers=resp_headers) + # otherwise return the error status body = job.json(container=settings, self_link="status") body["location"] = location_url - return body, resp_headers + resp = get_job_submission_response(body, resp_headers, error=True) + return resp else: LOGGER.debug("Celery task requested as sync took too long to complete (wait=%ss). Continue in async.", wait) # sync not respected, therefore must drop it @@ -618,4 +593,5 @@ def submit_job_handler(payload, # type: JSON "status": map_status(Status.ACCEPTED), "location": location_url } - return body, resp_headers + resp = get_job_submission_response(body, resp_headers) + return resp diff --git a/weaver/processes/utils.py b/weaver/processes/utils.py index d3cfe58b9..fdd536640 100644 --- a/weaver/processes/utils.py +++ b/weaver/processes/utils.py @@ -103,32 +103,6 @@ def get_process(process_id=None, request=None, settings=None, store=None): raise HTTPBadRequest("Invalid schema:\n[{0!r}].".format(ex)) -def get_job_submission_response(body, headers): - # type: (JSON, AnyHeadersContainer) -> Union[HTTPOk, HTTPCreated] - """ - Generates the successful response from contents returned by :term:`Job` submission process. - - If :term:`Job` already finished processing within requested ``Prefer: wait=X`` seconds delay (and if allowed by - the :term:`Process` ``jobControlOptions``), return the successful status immediately instead of created status. - - Otherwise, return the status monitoring location of the created :term:`Job` to be monitored asynchronously. - - .. seealso:: - :func:`weaver.processes.execution.submit_job` - :func:`weaver.processes.execution.submit_job_handler` - """ - status = map_status(body.get("status")) - location = get_header("location", headers) - if status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: - body["description"] = sd.CompletedJobResponse.description - body = sd.CompletedJobStatusSchema().deserialize(body) - return HTTPOk(location=location, json=body, headers=headers) - - body["description"] = sd.CreatedLaunchJobResponse.description - body = sd.CreatedJobStatusSchema().deserialize(body) - return HTTPCreated(location=location, json=body, headers=headers) - - def map_progress(progress, range_min, range_max): # type: (Number, Number, Number) -> Number """ diff --git a/weaver/store/base.py b/weaver/store/base.py index 5634172e2..c2cc1f423 100644 --- a/weaver/store/base.py +++ b/weaver/store/base.py @@ -9,6 +9,7 @@ from pywps import Process as ProcessWPS from weaver.datatype import Bill, Job, Process, Quote, Service, VaultFile + from weaver.execute import AnyExecuteResponse from weaver.typedefs import ( AnyUUID, ExecutionInputs, @@ -125,6 +126,7 @@ def save_job(self, is_workflow=False, # type: bool is_local=False, # type: bool execute_async=True, # type: bool + execute_response=None, # type: Optional[AnyExecuteResponse] custom_tags=None, # type: Optional[List[str]] user_id=None, # type: Optional[int] access=None, # type: Optional[str] diff --git a/weaver/store/mongodb.py b/weaver/store/mongodb.py index 052c41398..fe8e8dd29 100644 --- a/weaver/store/mongodb.py +++ b/weaver/store/mongodb.py @@ -52,6 +52,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union from pymongo.collection import Collection + from weaver.execute import AnyExecuteResponse from weaver.processes.types import AnyProcessType from weaver.store.base import DatetimeIntervalType, JobGroupCategory, JobSearchResult from weaver.typedefs import AnyProcess, AnyProcessClass, AnyUUID, AnyValueType, ExecutionInputs, ExecutionOutputs @@ -577,6 +578,7 @@ def save_job(self, is_workflow=False, # type: bool is_local=False, # type: bool execute_async=True, # type: bool + execute_response=None, # type: Optional[AnyExecuteResponse] custom_tags=None, # type: Optional[List[str]] user_id=None, # type: Optional[int] access=None, # type: Optional[str] @@ -610,6 +612,7 @@ def save_job(self, "inputs": inputs, "status": map_status(Status.ACCEPTED), "execute_async": execute_async, + "execution_response": execute_response, "is_workflow": is_workflow, "is_local": is_local, "created": created if created else now(), diff --git a/weaver/typedefs.py b/weaver/typedefs.py index 49d25862e..5f3cf3421 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -42,7 +42,6 @@ from webtest.response import TestResponse from werkzeug.wrappers import Request as WerkzeugRequest - from weaver.execute import AnyExecuteTransmissionMode from weaver.processes.wps_process_base import WpsProcessInterface from weaver.datatype import Process from weaver.status import AnyStatusType @@ -302,6 +301,13 @@ def __call__(self, message: str, progress: Number, status: AnyStatusType, *args: ExecutionOutputsList = List[ExecutionOutputItem] ExecutionOutputsMap = Dict[str, ExecutionOutputObject] ExecutionOutputs = Union[ExecutionOutputsList, ExecutionOutputsMap] + ExecutionResultObject = TypedDict("ExecutionResultObject", { + "value": Optional[AnyValueType], + "href": Optional[str], + "type": Optional[str], + }, total=False) + ExecutionResultArray = List[ExecutionResultObject] + ExecutionResults = Dict[str, Union[ExecutionResultObject, ExecutionResultArray]] # reference employed as 'JobMonitorReference' by 'WPS1Process' JobExecution = TypedDict("JobExecution", {"execution": WPSExecution}) diff --git a/weaver/wps/service.py b/weaver/wps/service.py index cdcd81e1a..42b726820 100644 --- a/weaver/wps/service.py +++ b/weaver/wps/service.py @@ -21,7 +21,7 @@ from weaver.processes.convert import wps2json_job_payload from weaver.processes.execution import submit_job_handler from weaver.processes.types import ProcessType -from weaver.processes.utils import get_job_submission_response, get_process +from weaver.processes.utils import get_process from weaver.store.base import StoreProcesses from weaver.utils import get_header, get_registry, get_settings, get_weaver_url from weaver.visibility import Visibility @@ -34,6 +34,7 @@ load_pywps_config ) from weaver.wps_restapi import swagger_definitions as sd +from weaver.wps_restapi.jobs.utils import get_job_submission_response LOGGER = logging.getLogger(__name__) if TYPE_CHECKING: @@ -236,7 +237,7 @@ def _submit_job(self, wps_request): is_workflow = proc.type == ProcessType.WORKFLOW tags = req.args.get("tags", "").split(",") + ["xml", "wps-{}".format(wps_request.version)] data = wps2json_job_payload(wps_request, wps_process) - body, headers = submit_job_handler( + resp = submit_job_handler( data, self.settings, proc.processEndpointWPS1, process_id=pid, is_local=True, is_workflow=is_workflow, visibility=Visibility.PUBLIC, language=wps_request.language, tags=tags, headers=dict(req.headers), context=ctx @@ -249,11 +250,11 @@ def _submit_job(self, wps_request): # way to provide explicitly Accept header. Even our Wps1Process as Workflow step depends on this behaviour. accept_type = get_header("Accept", req.headers) if accept_type == ContentType.APP_JSON: - resp = get_job_submission_response(body, headers) + resp = get_job_submission_response(resp.body, resp.headers) setattr(resp, "_update_status", lambda *_, **__: None) # patch to avoid pywps server raising return resp - return body + return resp.body @handle_known_exceptions def prepare_process_for_execution(self, identifier): diff --git a/weaver/wps_restapi/api.py b/weaver/wps_restapi/api.py index 97db3c9cd..f06dfa35c 100644 --- a/weaver/wps_restapi/api.py +++ b/weaver/wps_restapi/api.py @@ -380,11 +380,12 @@ def api_conformance(request): # noqa: F811 ogcapi_proc_core + "/req/core/job-results-failed", ogcapi_proc_core + "/req/core/job-results", ogcapi_proc_core + "/req/core/job-results-async-document", + # FIXME: support raw multipart (https://github.com/crim-ca/weaver/issues/376) # ogcapi_proc_core + "/req/core/job-results-async-raw-mixed-multi", - # ogcapi_proc_core + "/req/core/job-results-async-raw-ref", + ogcapi_proc_core + "/req/core/job-results-async-raw-ref", # ogcapi_proc_core + "/req/core/job-results-async-raw-value-multi", - # ogcapi_proc_core + "/req/core/job-results-async-raw-value-one", - # ogcapi_proc_core + "/req/core/job-results-success-sync", + ogcapi_proc_core + "/req/core/job-results-async-raw-value-one", + ogcapi_proc_core + "/req/core/job-results-success-sync", ogcapi_proc_core + "/req/core/job-success", ogcapi_proc_core + "/req/core/landingpage-op", ogcapi_proc_core + "/req/core/landingpage-success", @@ -409,9 +410,10 @@ def api_conformance(request): # noqa: F811 ogcapi_proc_core + "/req/core/process-execute-success-async", ogcapi_proc_core + "/req/core/process-execute-sync-document", # ogcapi_proc_core + "/req/core/process-execute-sync-raw-mixed-multi", - # ogcapi_proc_core + "/req/core/process-execute-sync-raw-ref", + ogcapi_proc_core + "/req/core/process-execute-sync-raw-ref", + # FIXME: support raw multipart (https://github.com/crim-ca/weaver/issues/376) # ogcapi_proc_core + "/req/core/process-execute-sync-raw-value-multi", - # ogcapi_proc_core + "/req/core/process-execute-sync-raw-value-one", + ogcapi_proc_core + "/req/core/process-execute-sync-raw-value-one", ogcapi_proc_core + "/req/core/pl-limit-definition", ogcapi_proc_core + "/req/core/pl-limit-response", ogcapi_proc_core + "/req/core/process-list", diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index 7e7d87a21..9111a0825 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -1,434 +1,39 @@ -import math -import os -import shutil -from copy import deepcopy from typing import TYPE_CHECKING from celery.utils.log import get_task_logger from colander import Invalid -from pyramid.httpexceptions import ( - HTTPBadRequest, - HTTPNoContent, - HTTPNotFound, - HTTPOk, - HTTPPermanentRedirect, - HTTPUnauthorized, - HTTPUnprocessableEntity -) -from pyramid.request import Request -from pyramid_celery import celery_app +from pyramid.httpexceptions import HTTPBadRequest, HTTPOk, HTTPPermanentRedirect, HTTPUnprocessableEntity from notify import encrypt_email from weaver.database import get_db from weaver.datatype import Job -from weaver.exceptions import ( - InvalidIdentifierValue, - JobGone, - JobInvalidParameter, - JobNotFound, - ProcessNotAccessible, - ProcessNotFound, - ServiceNotAccessible, - ServiceNotFound, - log_unhandled_exceptions -) -from weaver.execute import ExecuteTransmissionMode -from weaver.formats import ContentType, OutputFormat, get_format, repr_json -from weaver.owsexceptions import OWSNoApplicableCode, OWSNotFound -from weaver.processes.convert import ( - any2wps_literal_datatype, - convert_input_values_schema, - convert_output_params_schema, - get_field -) -from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory, map_status -from weaver.store.base import StoreJobs, StoreProcesses, StoreServices -from weaver.utils import get_any_id, get_any_value, get_path_kvp, get_settings, get_weaver_url, is_uuid -from weaver.visibility import Visibility -from weaver.wps.utils import get_wps_output_dir, get_wps_output_url +from weaver.exceptions import JobNotFound, log_unhandled_exceptions +from weaver.formats import OutputFormat, repr_json +from weaver.processes.convert import convert_input_values_schema, convert_output_params_schema +from weaver.store.base import StoreJobs +from weaver.utils import get_settings from weaver.wps_restapi import swagger_definitions as sd -from weaver.wps_restapi.constants import JobInputsOutputsSchema -from weaver.wps_restapi.providers.utils import forbid_local_only +from weaver.wps_restapi.jobs.utils import ( + dismiss_job_task, + get_job, + get_job_list_links, + get_job_results_response, + get_results, + get_schema_query, + raise_job_bad_status, + raise_job_dismissed, + validate_service_process +) from weaver.wps_restapi.swagger_definitions import datetime_interval_parser if TYPE_CHECKING: - from typing import Dict, Iterable, List, Optional, Tuple, Union + from typing import Iterable, List - from pyramid.httpexceptions import HTTPException - - from weaver.typedefs import AnySettingsContainer, AnyUUID, AnyValueType, HeadersTupleType, JSON, SettingsType - from weaver.wps_restapi.constants import JobInputsOutputsSchemaType + from weaver.typedefs import JSON, AnyResponseType, PyramidRequest LOGGER = get_task_logger(__name__) -def get_job(request): - # type: (Request) -> Job - """ - Obtain a job from request parameters. - - :returns: Job information if found. - :raise HTTPNotFound: with JSON body details on missing/non-matching job, process, provider IDs. - """ - job_id = request.matchdict.get("job_id") - try: - if not is_uuid(job_id): - raise JobInvalidParameter - store = get_db(request).get_store(StoreJobs) - job = store.fetch_by_id(job_id) - except (JobInvalidParameter, JobNotFound) as exc: - exception = type(exc) - if exception is JobInvalidParameter: - desc = "Invalid job reference is not a valid UUID." - else: - desc = "Could not find job with specified reference." - title = "NoSuchJob" - raise exception( - # new format: https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_job-exception-no-such-job - json={ - "title": title, - "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/no-such-job", - "detail": desc, - "status": exception.code, - "cause": str(job_id) - }, - code=title, locator="JobID", description=desc # old format - ) - - provider_id = request.matchdict.get("provider_id", job.service) - process_id = request.matchdict.get("process_id", job.process) - if provider_id: - forbid_local_only(request) - - if job.service != provider_id: - title = "NoSuchProvider" - desc = "Could not find job reference corresponding to specified provider reference." - raise OWSNotFound( - # new format: https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_job-exception-no-such-job - json={ - "title": title, - "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/no-such-job", - "detail": desc, - "status": OWSNotFound.code, - "cause": str(process_id) - }, - code=title, locator="provider", description=desc # old format - ) - if job.process != process_id: - title = "NoSuchProcess" - desc = "Could not find job reference corresponding to specified process reference." - raise OWSNotFound( - # new format: https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_job-exception-no-such-job - # note: although 'no-such-process' error, return 'no-such-job' because process could exist, only mismatches - json={ - "title": title, - "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/no-such-job", - "detail": desc, - "status": OWSNotFound.code, - "cause": str(process_id) - }, - code=title, locator="process", description=desc # old format - ) - return job - - -def get_job_list_links(job_total, filters, request): - # type: (int, Dict[str, AnyValueType], Request) -> List[JSON] - """ - Obtains a list of all relevant links for the corresponding job listing defined by query parameter filters. - - :raises IndexError: if the paging values are out of bounds compared to available total :term:`Job` matching search. - """ - base_url = get_weaver_url(request) - - # reapply queries that must be given to obtain the same result in case of subsequent requests (sort, limits, etc.) - kvp_params = {param: value for param, value in request.params.items() if param != "page"} - # patch datetime that have some extra character manipulation (reapply '+' auto-converted to ' ' by params parser) - if "datetime" in kvp_params: - kvp_params["datetime"] = kvp_params["datetime"].replace(" ", "+") - alt_kvp = deepcopy(kvp_params) - - # request job uses general endpoint, obtain the full path if any service/process was given as alternate location - if request.path.startswith(sd.jobs_service.path): - job_path = base_url + sd.jobs_service.path - alt_path = None - parent_url = None - # cannot generate full path apply for 'service' by itself - if filters["process"] and filters["service"]: - alt_path = base_url + sd.provider_jobs_service.path.format( - provider_id=filters["service"], process_id=filters["process"] - ) - parent_url = alt_path.rsplit("/", 1)[0] - elif filters["process"]: - alt_path = base_url + sd.process_jobs_service.path.format(process_id=filters["process"]) - parent_url = alt_path.rsplit("/", 1)[0] - for param in ["service", "provider", "process"]: - alt_kvp.pop(param, None) - # path is whichever specific service/process endpoint, jobs are pre-filtered by them - # transform sub-endpoints into matching query parameters and use generic path as alternate location - else: - job_path = base_url + request.path - alt_path = base_url + sd.jobs_service.path - alt_kvp["process"] = filters["process"] - if filters["service"]: - alt_kvp["provider"] = filters["service"] - parent_url = job_path.rsplit("/", 1)[0] - - cur_page = filters["page"] - per_page = filters["limit"] - max_page = max(math.ceil(job_total / per_page) - 1, 0) - if cur_page < 0 or cur_page > max_page: - raise IndexError(f"Page index {cur_page} is out of range from [0,{max_page}].") - - alt_links = [] - if alt_path: - alt_links = [{ - "href": get_path_kvp(alt_path, page=cur_page, **alt_kvp), "rel": "alternate", - "type": ContentType.APP_JSON, "title": "Alternate endpoint with equivalent set of filtered jobs." - }] - - links = alt_links + [ - {"href": job_path, "rel": "collection", - "type": ContentType.APP_JSON, "title": "Complete job listing (no filtering queries applied)."}, - {"href": base_url + sd.jobs_service.path, "rel": "search", - "type": ContentType.APP_JSON, "title": "Generic query endpoint to search for jobs."}, - {"href": job_path + "?detail=false", "rel": "preview", - "type": ContentType.APP_JSON, "title": "Job listing summary (UUID and count only)."}, - {"href": job_path, "rel": "http://www.opengis.net/def/rel/ogc/1.0/job-list", - "type": ContentType.APP_JSON, "title": "List of registered jobs."}, - {"href": get_path_kvp(job_path, page=cur_page, **kvp_params), "rel": "current", - "type": ContentType.APP_JSON, "title": "Current page of job query listing."}, - {"href": get_path_kvp(job_path, page=0, **kvp_params), "rel": "first", - "type": ContentType.APP_JSON, "title": "First page of job query listing."}, - {"href": get_path_kvp(job_path, page=max_page, **kvp_params), "rel": "last", - "type": ContentType.APP_JSON, "title": "Last page of job query listing."}, - ] - if cur_page > 0: - links.append({ - "href": get_path_kvp(job_path, page=cur_page - 1, **kvp_params), "rel": "prev", - "type": ContentType.APP_JSON, "title": "Previous page of job query listing." - }) - if cur_page < max_page: - links.append({ - "href": get_path_kvp(job_path, page=cur_page + 1, **kvp_params), "rel": "next", - "type": ContentType.APP_JSON, "title": "Next page of job query listing." - }) - if parent_url: - links.append({ - "href": parent_url, "rel": "up", - "type": ContentType.APP_JSON, "title": "Parent collection for which listed jobs apply." - }) - return links - - -def get_schema_query(schema, strict=True): - # type: (Optional[JobInputsOutputsSchemaType], bool) -> Optional[JobInputsOutputsSchemaType] - if not schema: - return None - # unescape query (eg: "OGC+strict" becomes "OGC string" from URL parsing) - schema_checked = str(schema).replace(" ", "+").lower() - if JobInputsOutputsSchema.get(schema_checked) is None: - raise HTTPBadRequest(json={ - "type": "InvalidParameterValue", - "detail": "Query parameter 'schema' value is invalid.", - "status": HTTPBadRequest.code, - "locator": "query", - "value": str(schema), - }) - if not strict: - return schema_checked.split("+")[0] - return schema_checked - - -def make_result_link(result_id, result, job_id, settings): - # type: (str, Union[JSON, List[JSON]], AnyUUID, SettingsType) -> List[str] - """ - Convert a result definition as ``value`` into the corresponding ``reference`` for output transmission. - - .. seealso:: - :rfc:`8288`: HTTP ``Link`` header specification. - """ - values = result if isinstance(result, list) else [result] - suffixes = list(f".{idx}" for idx in range(len(values))) if isinstance(result, list) else [""] - wps_url = get_wps_output_url(settings).strip("/") - links = [] - for suffix, value in zip(suffixes, values): - key = get_any_value(result, key=True) - if key != "href": - # literal data to be converted to link - # plain text file must be created containing the raw literal data - typ = ContentType.TEXT_PLAIN # as per '/rec/core/process-execute-sync-document-ref' - enc = "UTF-8" - out = get_wps_output_dir(settings) - val = get_any_value(value, data=True, file=False) - loc = os.path.join(job_id, result_id + suffix + ".txt") - url = f"{wps_url}/{loc}" - path = os.path.join(out, loc) - with open(path, mode="w", encoding=enc) as out_file: - out_file.write(val) - else: - fmt = get_field(result, "format", default={"mediaType": ContentType.TEXT_PLAIN}) - typ = get_field(fmt, "mime_type", search_variations=True, default=ContentType.TEXT_PLAIN) - enc = get_field(fmt, "encoding", search_variations=True, default=None) - url = get_any_value(value, data=False, file=True) # should already include full path - links.append(f"<{url}>; rel=\"{result_id}{suffix}\"; type={typ}; charset={enc}") - return links - - -def get_results(job, # type: Job - container, # type: AnySettingsContainer - value_key=None, # type: Optional[str] - schema=JobInputsOutputsSchema.OLD, # type: JobInputsOutputsSchemaType - link_references=False, # type: bool - ): # type: (...) -> Tuple[Union[List[JSON], JSON], HeadersTupleType] - """ - Obtains the job results with extended full WPS output URL as applicable and according to configuration settings. - - :param job: job from which to retrieve results. - :param container: any container giving access to instance settings (to resolve reference output location). - :param value_key: - If not specified, the returned values will have the appropriate ``data``/``href`` key according to the content. - Otherwise, all values will have the specified key. - :param schema: - Selects which schema to employ for representing the output results (listing or mapping). - :param link_references: - If enabled, an output that was requested by reference instead of value will be returned as ``Link`` reference. - :returns: - Tuple with: - - List or mapping of all outputs each with minimally an ID and value under the requested key. - - List of ``Link`` headers for reference outputs when requested. Empty otherwise. - """ - settings = get_settings(container) - wps_url = get_wps_output_url(settings) - if not wps_url.endswith("/"): - wps_url = wps_url + "/" - schema = JobInputsOutputsSchema.get(str(schema).lower(), default=JobInputsOutputsSchema.OLD) - strict = schema.endswith("+strict") - schema = schema.split("+")[0] - ogc_api = schema == JobInputsOutputsSchema.OGC - outputs = {} if ogc_api else [] - fmt_key = "mediaType" if ogc_api else "mimeType" - out_ref = convert_output_params_schema(job.outputs, JobInputsOutputsSchema.OGC) if link_references else {} - references = {} - for result in job.results: - rtype = "data" if any(k in result for k in ["data", "value"]) else "href" - value = get_any_value(result) - out_key = rtype - out_id = get_any_id(result) - out_mode = out_ref.get(out_id, {}).get("transmissionMode") - as_ref = link_references and out_mode == ExecuteTransmissionMode.REFERENCE - if rtype == "href": - # fix paths relative to instance endpoint, but leave explicit links as is (eg: S3 bucket, remote HTTP, etc.) - if value.startswith("/"): - value = str(value).lstrip("/") - if "://" not in value: - value = wps_url + value - elif ogc_api: - out_key = "value" - elif value_key: - out_key = value_key - output = {out_key: value} - if rtype == "href": # required for the rest to be there, other fields optional - if "mimeType" not in result: - result["mimeType"] = get_format(value, default=ContentType.TEXT_PLAIN).mime_type - if ogc_api or not strict: - output["type"] = result["mimeType"] - if not ogc_api or not strict or as_ref: - output["format"] = {fmt_key: result["mimeType"]} - for field in ["encoding", "schema"]: - if field in result: - output["format"][field] = result[field] - elif rtype != "href": - # literal data - # FIXME: BoundingBox not implemented (https://github.com/crim-ca/weaver/issues/51) - dtype = result.get("dataType", any2wps_literal_datatype(value, is_value=True) or "string") - if ogc_api: - output["dataType"] = {"name": dtype} - else: - output["dataType"] = dtype - - if ogc_api or as_ref: - mapping = references if as_ref else outputs - if out_id in mapping: - output_list = mapping[out_id] - if not isinstance(output_list, list): - output_list = [output_list] - output_list.append(output) - mapping[out_id] = output_list - else: - mapping[out_id] = output - else: - # if ordered insert supported by python version, insert ID first - output = dict([("id", out_id)] + list(output.items())) # noqa - outputs.append(output) - - # needed to collect and aggregate outputs of same ID first in case of array - # convert any requested link references using indices if needed - headers = [] - for out_id, output in references.items(): - res_links = make_result_link(out_id, output, job.id, settings) - headers.extend([("Link", link) for link in res_links]) - - return outputs, headers - - -def validate_service_process(request): - # type: (Request) -> Tuple[Optional[str], Optional[str]] - """ - Verifies that service or process specified by path or query will raise the appropriate error if applicable. - """ - service_name = ( - request.matchdict.get("provider_id", None) or - request.params.get("provider", None) or - request.params.get("service", None) # backward compatibility - ) - process_name = ( - request.matchdict.get("process_id", None) or - request.params.get("process", None) or - request.params.get("processID", None) # OGC-API conformance - ) - item_test = None - item_type = None - - try: - service = None - if service_name: - forbid_local_only(request) - item_type = "Service" - item_test = service_name - store = get_db(request).get_store(StoreServices) - service = store.fetch_by_name(service_name, visibility=Visibility.PUBLIC) - if process_name: - item_type = "Process" - item_test = process_name - # local process - if not service: - store = get_db(request).get_store(StoreProcesses) - store.fetch_by_id(process_name, visibility=Visibility.PUBLIC) - # remote process - else: - processes = service.processes(request) - if process_name not in [p.id for p in processes]: - raise ProcessNotFound - except (ServiceNotFound, ProcessNotFound): - raise HTTPNotFound(json={ - "code": "NoSuch{}".format(item_type), - "description": "{} of id '{}' cannot be found.".format(item_type, item_test) - }) - except (ServiceNotAccessible, ProcessNotAccessible): - raise HTTPUnauthorized(json={ - "code": "Unauthorized{}".format(item_type), - "description": "{} of id '{}' is not accessible.".format(item_type, item_test) - }) - except InvalidIdentifierValue as ex: - raise HTTPBadRequest(json={ - "code": InvalidIdentifierValue.__name__, - "description": str(ex) - }) - - return service_name, process_name - - @sd.provider_jobs_service.get(tags=[sd.TAG_JOBS, sd.TAG_PROVIDERS], renderer=OutputFormat.JSON, schema=sd.GetProviderJobsEndpoint(), response_schemas=sd.get_prov_all_jobs_responses) @sd.process_jobs_service.get(tags=[sd.TAG_PROCESSES, sd.TAG_JOBS], renderer=OutputFormat.JSON, @@ -437,7 +42,7 @@ def validate_service_process(request): schema=sd.GetJobsEndpoint(), response_schemas=sd.get_all_jobs_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def get_queried_jobs(request): - # type: (Request) -> HTTPOk + # type: (PyramidRequest) -> HTTPOk """ Retrieve the list of jobs which can be filtered, sorted, paged and categorized using query parameters. """ @@ -526,7 +131,7 @@ def _job_list(jobs): # type: (Iterable[Job]) -> List[JSON] schema=sd.JobEndpoint(), response_schemas=sd.get_single_job_status_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def get_job_status(request): - # type: (Request) -> HTTPOk + # type: (PyramidRequest) -> HTTPOk """ Retrieve the status of a job. """ @@ -535,133 +140,6 @@ def get_job_status(request): return HTTPOk(json=job_status) -def raise_job_bad_status(job, container=None): - # type: (Job, Optional[AnySettingsContainer]) -> None - """ - Raise the appropriate message for :term:`Job` not ready or unable to retrieve output results due to status. - """ - if job.status != Status.SUCCEEDED: - links = job.links(container=container) - if job.status == Status.FAILED: - err_code = None - err_info = None - err_known_modules = [ - "pywps.exceptions", - "owslib.wps", - "weaver.exceptions", - "weaver.owsexceptions", - ] - # try to infer the cause, fallback to generic error otherwise - for error in job.exceptions: - try: - if isinstance(error, dict): - err_code = error.get("Code") - err_info = error.get("Text") - elif isinstance(error, str) and any(error.startswith(mod) for mod in err_known_modules): - err_code, err_info = error.split(":", 1) - err_code = err_code.split(".")[-1].strip() - err_info = err_info.strip() - except Exception: - err_code = None - if err_code: - break - if not err_code: # default - err_code = OWSNoApplicableCode.code - err_info = "unknown" - # /req/core/job-results-failed - raise HTTPBadRequest(json={ - "title": "JobResultsFailed", - "type": err_code, - "detail": "Job results not available because execution failed.", - "status": HTTPBadRequest.code, - "cause": err_info, - "links": links - }) - - # /req/core/job-results-exception/results-not-ready - raise HTTPBadRequest(json={ - "title": "JobResultsNotReady", - "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/result-not-ready", - "detail": "Job is not ready to obtain results.", - "status": HTTPBadRequest.code, - "cause": {"status": job.status}, - "links": links - }) - - -def raise_job_dismissed(job, container=None): - # type: (Job, Optional[AnySettingsContainer]) -> None - """ - Raise the appropriate messages for dismissed :term:`Job` status. - """ - if job.status == Status.DISMISSED: - # provide the job status links since it is still available for reference - settings = get_settings(container) - job_links = job.links(settings) - job_links = [link for link in job_links if link["rel"] in ["status", "alternate", "collection", "up"]] - raise JobGone( - json={ - "title": "JobDismissed", - "type": "JobDismissed", - "status": JobGone.code, - "detail": "Job was dismissed and artifacts have been removed.", - "cause": {"status": job.status}, - "value": str(job.id), - "links": job_links - } - ) - - -def dismiss_job_task(job, container): - # type: (Job, AnySettingsContainer) -> Job - """ - Cancels any pending or running :mod:`Celery` task and removes completed job artifacts. - - .. note:: - The :term:`Job` object itself is not deleted, only its artifacts. - Therefore, its inputs, outputs, logs, exceptions, etc. are still available in the database, - but corresponding files that would be exposed by ``weaver.wps_output`` configurations are removed. - - :param job: Job to cancel or cleanup. - :param container: Application settings. - :return: Updated and dismissed job. - """ - raise_job_dismissed(job, container) - if job.status in JOB_STATUS_CATEGORIES[StatusCategory.RUNNING]: - # signal to stop celery task. Up to it to terminate remote if any. - LOGGER.debug("Job [%s] dismiss operation: Canceling task [%s]", job.id, job.task_id) - celery_app.control.revoke(job.task_id, terminate=True) - - wps_out_dir = get_wps_output_dir(container) - job_out_dir = os.path.join(wps_out_dir, str(job.id)) - job_out_log = os.path.join(wps_out_dir, str(job.id) + ".log") - job_out_xml = os.path.join(wps_out_dir, str(job.id) + ".xml") - if os.path.isdir(job_out_dir): - LOGGER.debug("Job [%s] dismiss operation: Removing output results.", job.id) - shutil.rmtree(job_out_dir, onerror=lambda func, path, _exc: LOGGER.warning( - "Job [%s] dismiss operation: Failed to delete [%s] due to [%s]", job.id, job_out_dir, _exc - )) - if os.path.isfile(job_out_log): - LOGGER.debug("Job [%s] dismiss operation: Removing output logs.", job.id) - try: - os.remove(job_out_log) - except OSError as exc: - LOGGER.warning("Job [%s] dismiss operation: Failed to delete [%s] due to [%s]", job.id, job_out_log, exc) - if os.path.isfile(job_out_xml): - LOGGER.debug("Job [%s] dismiss operation: Removing output WPS status.", job.id) - try: - os.remove(job_out_xml) - except OSError as exc: - LOGGER.warning("Job [%s] dismiss operation: Failed to delete [%s] due to [%s]", job.id, job_out_xml, exc) - - LOGGER.debug("Job [%s] dismiss operation: Updating job status.") - store = get_db(container).get_store(StoreJobs) - job.status_message = "Job {}.".format(Status.DISMISSED) - job.status = map_status(Status.DISMISSED) - job = store.update_job(job) - return job - - @sd.provider_job_service.delete(tags=[sd.TAG_JOBS, sd.TAG_DISMISS, sd.TAG_PROVIDERS], renderer=OutputFormat.JSON, schema=sd.ProviderJobEndpoint(), response_schemas=sd.delete_prov_job_responses) @sd.process_job_service.delete(tags=[sd.TAG_JOBS, sd.TAG_DISMISS, sd.TAG_PROCESSES], renderer=OutputFormat.JSON, @@ -670,6 +148,7 @@ def dismiss_job_task(job, container): schema=sd.JobEndpoint(), response_schemas=sd.delete_job_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def cancel_job(request): + # type: (PyramidRequest) -> AnyResponseType """ Dismiss a planned or running job execution, or remove result artifacts of a completed job. @@ -695,6 +174,7 @@ def cancel_job(request): schema=sd.DeleteJobsEndpoint(), response_schemas=sd.delete_jobs_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def cancel_job_batch(request): + # type: (PyramidRequest) -> AnyResponseType """ Dismiss operation for multiple jobs. @@ -736,7 +216,7 @@ def cancel_job_batch(request): schema=sd.JobInputsEndpoint(), response_schemas=sd.get_job_inputs_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def get_job_inputs(request): - # type: (Request) -> HTTPException + # type: (PyramidRequest) -> AnyResponseType """ Retrieve the inputs values and outputs definitions of a job. """ @@ -761,7 +241,7 @@ def get_job_inputs(request): schema=sd.JobOutputsEndpoint(), response_schemas=sd.get_job_outputs_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def get_job_outputs(request): - # type: (Request) -> HTTPException + # type: (PyramidRequest) -> AnyResponseType """ Retrieve the output values resulting from a job execution. """ @@ -784,30 +264,13 @@ def get_job_outputs(request): schema=sd.JobResultsEndpoint(), response_schemas=sd.get_job_results_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def get_job_results(request): - # type: (Request) -> HTTPException + # type: (PyramidRequest) -> AnyResponseType """ Retrieve the results of a job. """ job = get_job(request) - raise_job_dismissed(job, request) - raise_job_bad_status(job, request) - job_status = map_status(job.status) - if job_status in JOB_STATUS_CATEGORIES[StatusCategory.RUNNING]: - raise HTTPNotFound(json={ - "code": "ResultsNotReady", - "description": "Job status is '{}'. Results are not yet available.".format(job_status) - }) - - results, refs = get_results(job, request, value_key="value", - schema=JobInputsOutputsSchema.OGC, link_references=True) - # note: - # Cannot add "links" field in response body because variable Output ID keys are directly at the root - # Possible conflict with an output that would be named "links". - - if results: # avoid error if all by reference - results = sd.Result().deserialize(results) - HTTPOk(json=results, headers=refs) - return HTTPNoContent(headers=refs) + resp = get_job_results_response(job, request) + return resp @sd.provider_exceptions_service.get(tags=[sd.TAG_JOBS, sd.TAG_EXCEPTIONS, sd.TAG_PROVIDERS], @@ -819,6 +282,7 @@ def get_job_results(request): schema=sd.ProcessExceptionsEndpoint(), response_schemas=sd.get_exceptions_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def get_job_exceptions(request): + # type: (PyramidRequest) -> AnyResponseType """ Retrieve the exceptions of a job. """ @@ -836,6 +300,7 @@ def get_job_exceptions(request): schema=sd.ProcessLogsEndpoint(), response_schemas=sd.get_logs_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def get_job_logs(request): + # type: (PyramidRequest) -> AnyResponseType """ Retrieve the logs of a job. """ @@ -856,6 +321,7 @@ def get_job_logs(request): response_schemas=sd.get_result_redirect_responses) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def redirect_job_result(request): + # type: (PyramidRequest) -> AnyResponseType """ Deprecated job result endpoint that is now returned by corresponding outputs path with added links. """ diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py new file mode 100644 index 000000000..eb56cec8f --- /dev/null +++ b/weaver/wps_restapi/jobs/utils.py @@ -0,0 +1,696 @@ +import math +import os +import shutil +from copy import deepcopy +from typing import TYPE_CHECKING + +from celery.utils.log import get_task_logger +from pyramid.httpexceptions import ( + HTTPBadRequest, + HTTPCreated, + HTTPNoContent, + HTTPNotFound, + HTTPNotImplemented, + HTTPOk, + HTTPUnauthorized +) +from pyramid.response import FileResponse +from pyramid_celery import celery_app + +from weaver.database import get_db +from weaver.datatype import Job +from weaver.exceptions import ( + InvalidIdentifierValue, + JobGone, + JobInvalidParameter, + JobNotFound, + ProcessNotAccessible, + ProcessNotFound, + ServiceNotAccessible, + ServiceNotFound +) +from weaver.execute import ExecuteResponse, ExecuteTransmissionMode +from weaver.formats import ContentType, get_format +from weaver.owsexceptions import OWSNoApplicableCode, OWSNotFound +from weaver.processes.convert import any2wps_literal_datatype, convert_output_params_schema, get_field +from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory, map_status +from weaver.store.base import StoreJobs, StoreProcesses, StoreServices +from weaver.utils import ( + get_any_id, + get_any_value, + get_file_headers, + get_header, + get_path_kvp, + get_settings, + get_weaver_url, + is_uuid +) +from weaver.visibility import Visibility +from weaver.wps.utils import get_wps_output_dir, get_wps_output_url, map_wps_output_location +from weaver.wps_restapi import swagger_definitions as sd +from weaver.wps_restapi.constants import JobInputsOutputsSchema +from weaver.wps_restapi.providers.utils import forbid_local_only + +if TYPE_CHECKING: + from typing import Dict, List, Optional, Tuple, Union + + from weaver.typedefs import ( + AnyHeadersContainer, + AnyRequestType, + AnyResponseType, + AnySettingsContainer, + AnyUUID, + AnyValueType, + ExecutionResultArray, + ExecutionResultObject, + ExecutionResults, + HeadersTupleType, + JSON, + PyramidRequest, + SettingsType + ) + from weaver.wps_restapi.constants import JobInputsOutputsSchemaType + +LOGGER = get_task_logger(__name__) + + +def get_job(request): + # type: (PyramidRequest) -> Job + """ + Obtain a job from request parameters. + + :returns: Job information if found. + :raise HTTPNotFound: with JSON body details on missing/non-matching job, process, provider IDs. + """ + job_id = request.matchdict.get("job_id") + try: + if not is_uuid(job_id): + raise JobInvalidParameter + store = get_db(request).get_store(StoreJobs) + job = store.fetch_by_id(job_id) + except (JobInvalidParameter, JobNotFound) as exc: + exception = type(exc) + if exception is JobInvalidParameter: + desc = "Invalid job reference is not a valid UUID." + else: + desc = "Could not find job with specified reference." + title = "NoSuchJob" + raise exception( + # new format: https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_job-exception-no-such-job + json={ + "title": title, + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/no-such-job", + "detail": desc, + "status": exception.code, + "cause": str(job_id) + }, + code=title, locator="JobID", description=desc # old format + ) + + provider_id = request.matchdict.get("provider_id", job.service) + process_id = request.matchdict.get("process_id", job.process) + if provider_id: + forbid_local_only(request) + + if job.service != provider_id: + title = "NoSuchProvider" + desc = "Could not find job reference corresponding to specified provider reference." + raise OWSNotFound( + # new format: https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_job-exception-no-such-job + json={ + "title": title, + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/no-such-job", + "detail": desc, + "status": OWSNotFound.code, + "cause": str(process_id) + }, + code=title, locator="provider", description=desc # old format + ) + if job.process != process_id: + title = "NoSuchProcess" + desc = "Could not find job reference corresponding to specified process reference." + raise OWSNotFound( + # new format: https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_job-exception-no-such-job + # note: although 'no-such-process' error, return 'no-such-job' because process could exist, only mismatches + json={ + "title": title, + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/no-such-job", + "detail": desc, + "status": OWSNotFound.code, + "cause": str(process_id) + }, + code=title, locator="process", description=desc # old format + ) + return job + + +def get_job_list_links(job_total, filters, request): + # type: (int, Dict[str, AnyValueType], AnyRequestType) -> List[JSON] + """ + Obtains a list of all relevant links for the corresponding job listing defined by query parameter filters. + + :raises IndexError: if the paging values are out of bounds compared to available total :term:`Job` matching search. + """ + base_url = get_weaver_url(request) + + # reapply queries that must be given to obtain the same result in case of subsequent requests (sort, limits, etc.) + kvp_params = {param: value for param, value in request.params.items() if param != "page"} + # patch datetime that have some extra character manipulation (reapply '+' auto-converted to ' ' by params parser) + if "datetime" in kvp_params: + kvp_params["datetime"] = kvp_params["datetime"].replace(" ", "+") + alt_kvp = deepcopy(kvp_params) + + # request job uses general endpoint, obtain the full path if any service/process was given as alternate location + if request.path.startswith(sd.jobs_service.path): + job_path = base_url + sd.jobs_service.path + alt_path = None + parent_url = None + # cannot generate full path apply for 'service' by itself + if filters["process"] and filters["service"]: + alt_path = base_url + sd.provider_jobs_service.path.format( + provider_id=filters["service"], process_id=filters["process"] + ) + parent_url = alt_path.rsplit("/", 1)[0] + elif filters["process"]: + alt_path = base_url + sd.process_jobs_service.path.format(process_id=filters["process"]) + parent_url = alt_path.rsplit("/", 1)[0] + for param in ["service", "provider", "process"]: + alt_kvp.pop(param, None) + # path is whichever specific service/process endpoint, jobs are pre-filtered by them + # transform sub-endpoints into matching query parameters and use generic path as alternate location + else: + job_path = base_url + request.path + alt_path = base_url + sd.jobs_service.path + alt_kvp["process"] = filters["process"] + if filters["service"]: + alt_kvp["provider"] = filters["service"] + parent_url = job_path.rsplit("/", 1)[0] + + cur_page = filters["page"] + per_page = filters["limit"] + max_page = max(math.ceil(job_total / per_page) - 1, 0) + if cur_page < 0 or cur_page > max_page: + raise IndexError(f"Page index {cur_page} is out of range from [0,{max_page}].") + + alt_links = [] + if alt_path: + alt_links = [{ + "href": get_path_kvp(alt_path, page=cur_page, **alt_kvp), "rel": "alternate", + "type": ContentType.APP_JSON, "title": "Alternate endpoint with equivalent set of filtered jobs." + }] + + links = alt_links + [ + {"href": job_path, "rel": "collection", + "type": ContentType.APP_JSON, "title": "Complete job listing (no filtering queries applied)."}, + {"href": base_url + sd.jobs_service.path, "rel": "search", + "type": ContentType.APP_JSON, "title": "Generic query endpoint to search for jobs."}, + {"href": job_path + "?detail=false", "rel": "preview", + "type": ContentType.APP_JSON, "title": "Job listing summary (UUID and count only)."}, + {"href": job_path, "rel": "http://www.opengis.net/def/rel/ogc/1.0/job-list", + "type": ContentType.APP_JSON, "title": "List of registered jobs."}, + {"href": get_path_kvp(job_path, page=cur_page, **kvp_params), "rel": "current", + "type": ContentType.APP_JSON, "title": "Current page of job query listing."}, + {"href": get_path_kvp(job_path, page=0, **kvp_params), "rel": "first", + "type": ContentType.APP_JSON, "title": "First page of job query listing."}, + {"href": get_path_kvp(job_path, page=max_page, **kvp_params), "rel": "last", + "type": ContentType.APP_JSON, "title": "Last page of job query listing."}, + ] + if cur_page > 0: + links.append({ + "href": get_path_kvp(job_path, page=cur_page - 1, **kvp_params), "rel": "prev", + "type": ContentType.APP_JSON, "title": "Previous page of job query listing." + }) + if cur_page < max_page: + links.append({ + "href": get_path_kvp(job_path, page=cur_page + 1, **kvp_params), "rel": "next", + "type": ContentType.APP_JSON, "title": "Next page of job query listing." + }) + if parent_url: + links.append({ + "href": parent_url, "rel": "up", + "type": ContentType.APP_JSON, "title": "Parent collection for which listed jobs apply." + }) + return links + + +def get_schema_query(schema, strict=True): + # type: (Optional[JobInputsOutputsSchemaType], bool) -> Optional[JobInputsOutputsSchemaType] + if not schema: + return None + # unescape query (eg: "OGC+strict" becomes "OGC string" from URL parsing) + schema_checked = str(schema).replace(" ", "+").lower() + if JobInputsOutputsSchema.get(schema_checked) is None: + raise HTTPBadRequest(json={ + "type": "InvalidParameterValue", + "detail": "Query parameter 'schema' value is invalid.", + "status": HTTPBadRequest.code, + "locator": "query", + "value": str(schema), + }) + if not strict: + return schema_checked.split("+")[0] + return schema_checked + + +def make_result_link(result_id, result, job_id, settings): + # type: (str, Union[ExecutionResultObject, ExecutionResultArray], AnyUUID, SettingsType) -> List[str] + """ + Convert a result definition as ``value`` into the corresponding ``reference`` for output transmission. + + .. seealso:: + :rfc:`8288`: HTTP ``Link`` header specification. + """ + values = result if isinstance(result, list) else [result] + suffixes = list(f".{idx}" for idx in range(len(values))) if isinstance(result, list) else [""] + wps_url = get_wps_output_url(settings).strip("/") + links = [] + for suffix, value in zip(suffixes, values): + key = get_any_value(result, key=True) + if key != "href": + # literal data to be converted to link + # plain text file must be created containing the raw literal data + typ = ContentType.TEXT_PLAIN # as per '/rec/core/process-execute-sync-document-ref' + enc = "UTF-8" + out = get_wps_output_dir(settings) + val = get_any_value(value, data=True, file=False) + loc = os.path.join(job_id, result_id + suffix + ".txt") + url = f"{wps_url}/{loc}" + path = os.path.join(out, loc) + with open(path, mode="w", encoding=enc) as out_file: + out_file.write(val) + else: + fmt = get_field(result, "format", default={"mediaType": ContentType.TEXT_PLAIN}) + typ = get_field(fmt, "mime_type", search_variations=True, default=ContentType.TEXT_PLAIN) + enc = get_field(fmt, "encoding", search_variations=True, default=None) + url = get_any_value(value, data=False, file=True) # should already include full path + links.append(f"<{url}>; rel=\"{result_id}{suffix}\"; type={typ}; charset={enc}") + return links + + +def get_results(job, # type: Job + container, # type: AnySettingsContainer + value_key=None, # type: Optional[str] + schema=JobInputsOutputsSchema.OLD, # type: JobInputsOutputsSchemaType + link_references=False, # type: bool + ): # type: (...) -> Tuple[ExecutionResults, HeadersTupleType] + """ + Obtains the job results with extended full WPS output URL as applicable and according to configuration settings. + + :param job: job from which to retrieve results. + :param container: any container giving access to instance settings (to resolve reference output location). + :param value_key: + If not specified, the returned values will have the appropriate ``data``/``href`` key according to the content. + Otherwise, all values will have the specified key. + :param schema: + Selects which schema to employ for representing the output results (listing or mapping). + :param link_references: + If enabled, an output that was requested by reference instead of value will be returned as ``Link`` reference. + :returns: + Tuple with: + - List or mapping of all outputs each with minimally an ID and value under the requested key. + - List of ``Link`` headers for reference outputs when requested. Empty otherwise. + """ + settings = get_settings(container) + wps_url = get_wps_output_url(settings) + if not wps_url.endswith("/"): + wps_url = wps_url + "/" + schema = JobInputsOutputsSchema.get(str(schema).lower(), default=JobInputsOutputsSchema.OLD) + strict = schema.endswith("+strict") + schema = schema.split("+")[0] + ogc_api = schema == JobInputsOutputsSchema.OGC + outputs = {} if ogc_api else [] + fmt_key = "mediaType" if ogc_api else "mimeType" + out_ref = convert_output_params_schema(job.outputs, JobInputsOutputsSchema.OGC) if link_references else {} + references = {} + for result in job.results: + rtype = "data" if any(k in result for k in ["data", "value"]) else "href" + value = get_any_value(result) + out_key = rtype + out_id = get_any_id(result) + out_mode = out_ref.get(out_id, {}).get("transmissionMode") + as_ref = link_references and out_mode == ExecuteTransmissionMode.REFERENCE + if rtype == "href": + # fix paths relative to instance endpoint, but leave explicit links as is (eg: S3 bucket, remote HTTP, etc.) + if value.startswith("/"): + value = str(value).lstrip("/") + if "://" not in value: + value = wps_url + value + elif ogc_api: + out_key = "value" + elif value_key: + out_key = value_key + output = {out_key: value} + if rtype == "href": # required for the rest to be there, other fields optional + if "mimeType" not in result: + result["mimeType"] = get_format(value, default=ContentType.TEXT_PLAIN).mime_type + if ogc_api or not strict: + output["type"] = result["mimeType"] + if not ogc_api or not strict or as_ref: + output["format"] = {fmt_key: result["mimeType"]} + for field in ["encoding", "schema"]: + if field in result: + output["format"][field] = result[field] + elif rtype != "href": + # literal data + # FIXME: BoundingBox not implemented (https://github.com/crim-ca/weaver/issues/51) + dtype = result.get("dataType", any2wps_literal_datatype(value, is_value=True) or "string") + if ogc_api: + output["dataType"] = {"name": dtype} + else: + output["dataType"] = dtype + + if ogc_api or as_ref: + mapping = references if as_ref else outputs + if out_id in mapping: + output_list = mapping[out_id] + if not isinstance(output_list, list): + output_list = [output_list] + output_list.append(output) + mapping[out_id] = output_list + else: + mapping[out_id] = output + else: + # if ordered insert supported by python version, insert ID first + output = dict([("id", out_id)] + list(output.items())) # noqa + outputs.append(output) + + # needed to collect and aggregate outputs of same ID first in case of array + # convert any requested link references using indices if needed + headers = [] + for out_id, output in references.items(): + res_links = make_result_link(out_id, output, job.id, settings) + headers.extend([("Link", link) for link in res_links]) + + return outputs, headers + + +def get_job_results_response(job, container, headers=None): + # type: (Job, AnySettingsContainer, Optional[AnyHeadersContainer]) -> AnyResponseType + """ + Generates the :term:`OGC` compliant :term:`Job` results response according to submitted execution parameters. + + Parameters that impact the format of the response are: + - Amount of outputs to be returned. + - Parameter ``response: raw|document`` + - Parameter ``transmissionMode: value|reference`` per output if ``response: raw``. + + .. seealso:: + More details available for each combination: + - https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response + - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 + + :param job: Job for which to generate the results response. + :param container: Application settings. + :param headers: Additional headers to provide in the response. + """ + raise_job_dismissed(job, container) + raise_job_bad_status(job, container) + job_status = map_status(job.status) + if job_status in JOB_STATUS_CATEGORIES[StatusCategory.RUNNING]: + raise HTTPNotFound(json={ + "code": "ResultsNotReady", + "title": "JobResultsNotReady", + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/result-not-ready", + "detail": "Job is not ready to obtain results.", + "status": HTTPNotFound.code, + "cause": {"status": job.status}, + }) + + # Document ignores values/references + # See: + # - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 (/req/core/job-results-async-document) + # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-document + is_raw = job.execution_response == ExecuteResponse.RAW + results, refs = get_results(job, container, value_key="value", + schema=JobInputsOutputsSchema.OGC, + link_references=is_raw) # type: Union[ExecutionResults, HeadersTupleType] + headers = headers or {} + if "location" not in headers: + headers["Location"] = job.status_url(container) + + if not is_raw: + # note: + # Cannot add "links" field in response body because variable Output ID keys are directly at the root + # Possible conflict with an output that would be named "links". + results = sd.Result().deserialize(results) + return HTTPOk(json=results, headers=headers) + + if not results: # avoid schema validation error if all by reference + # Status code 204 for empty body + # see: + # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref + refs.extend(headers.items()) + return HTTPNoContent(headers=refs) + + # raw response can be only data value, only link or a mix of them + if results: + # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-one + out_info = list(results.items())[0][-1] + out_type = get_any_value(out_info, key=True) + out_data = get_any_value(out_info) + + # FIXME: https://github.com/crim-ca/weaver/issues/376 + # implement multipart, both for multi-output IDs and array-output under same ID + if len(results) > 1 or (isinstance(out_data, list) and len(out_data) > 1): + # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-value-multi + raise HTTPNotImplemented(json={ + "code": "NotImplemented", + "type": "NotImplemented", + "detail": "Multipart results with 'transmissionMode=value' and 'response=raw' not implemented.", + }) + + # single value only + out_data = out_data[0] if isinstance(out_data, list) else out_data + if out_type == "href": + out_path = map_wps_output_location(out_data, container, exists=True, url=False) + out_type = out_info.get("type") # noqa + out_headers = get_file_headers(out_path, download_headers=True, content_headers=True, content_type=out_type) + resp = FileResponse(out_path) + resp.headers.update(out_headers) + resp.headers.update(headers) + else: + resp = HTTPOk(body=out_data, charset="UTF-8", content_type=ContentType.TEXT_PLAIN, headers=headers) + else: + resp = HTTPOk(headers=headers) + if refs: + # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-ref + # https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-raw-mixed-multi + resp.headerlist.extend(refs) + return resp + + +def get_job_submission_response(body, headers, error=False): + # type: (JSON, AnyHeadersContainer, bool) -> Union[HTTPOk, HTTPCreated] + """ + Generates the successful response from contents returned by :term:`Job` submission process. + + If :term:`Job` already finished processing within requested ``Prefer: wait=X`` seconds delay (and if allowed by + the :term:`Process` ``jobControlOptions``), return the successful status immediately instead of created status. + + Otherwise, return the status monitoring location of the created :term:`Job` to be monitored asynchronously. + + .. seealso:: + :func:`weaver.processes.execution.submit_job` + :func:`weaver.processes.execution.submit_job_handler` + """ + status = map_status(body.get("status")) + location = get_header("location", headers) + if status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: + if error: + http_class = HTTPBadRequest + http_desc = sd.FailedSyncJobResponse.description + else: + http_class = HTTPOk + http_desc = sd.CompletedJobResponse.description + body = sd.CompletedJobStatusSchema().deserialize(body) + + body["description"] = http_desc + return http_class(location=location, json=body, headers=headers) + + body["description"] = sd.CreatedLaunchJobResponse.description + body = sd.CreatedJobStatusSchema().deserialize(body) + return HTTPCreated(location=location, json=body, headers=headers) + + +def validate_service_process(request): + # type: (PyramidRequest) -> Tuple[Optional[str], Optional[str]] + """ + Verifies that service or process specified by path or query will raise the appropriate error if applicable. + """ + service_name = ( + request.matchdict.get("provider_id", None) or + request.params.get("provider", None) or + request.params.get("service", None) # backward compatibility + ) + process_name = ( + request.matchdict.get("process_id", None) or + request.params.get("process", None) or + request.params.get("processID", None) # OGC-API conformance + ) + item_test = None + item_type = None + + try: + service = None + if service_name: + forbid_local_only(request) + item_type = "Service" + item_test = service_name + store = get_db(request).get_store(StoreServices) + service = store.fetch_by_name(service_name, visibility=Visibility.PUBLIC) + if process_name: + item_type = "Process" + item_test = process_name + # local process + if not service: + store = get_db(request).get_store(StoreProcesses) + store.fetch_by_id(process_name, visibility=Visibility.PUBLIC) + # remote process + else: + processes = service.processes(request) + if process_name not in [p.id for p in processes]: + raise ProcessNotFound + except (ServiceNotFound, ProcessNotFound): + raise HTTPNotFound(json={ + "code": "NoSuch{}".format(item_type), + "description": "{} of id '{}' cannot be found.".format(item_type, item_test) + }) + except (ServiceNotAccessible, ProcessNotAccessible): + raise HTTPUnauthorized(json={ + "code": "Unauthorized{}".format(item_type), + "description": "{} of id '{}' is not accessible.".format(item_type, item_test) + }) + except InvalidIdentifierValue as ex: + raise HTTPBadRequest(json={ + "code": InvalidIdentifierValue.__name__, + "description": str(ex) + }) + + return service_name, process_name + + +def raise_job_bad_status(job, container=None): + # type: (Job, Optional[AnySettingsContainer]) -> None + """ + Raise the appropriate message for :term:`Job` not ready or unable to retrieve output results due to status. + """ + if job.status != Status.SUCCEEDED: + links = job.links(container=container) + if job.status == Status.FAILED: + err_code = None + err_info = None + err_known_modules = [ + "pywps.exceptions", + "owslib.wps", + "weaver.exceptions", + "weaver.owsexceptions", + ] + # try to infer the cause, fallback to generic error otherwise + for error in job.exceptions: + try: + if isinstance(error, dict): + err_code = error.get("Code") + err_info = error.get("Text") + elif isinstance(error, str) and any(error.startswith(mod) for mod in err_known_modules): + err_code, err_info = error.split(":", 1) + err_code = err_code.split(".")[-1].strip() + err_info = err_info.strip() + except Exception: + err_code = None + if err_code: + break + if not err_code: # default + err_code = OWSNoApplicableCode.code + err_info = "unknown" + # /req/core/job-results-failed + raise HTTPBadRequest(json={ + "title": "JobResultsFailed", + "type": err_code, + "detail": "Job results not available because execution failed.", + "status": HTTPBadRequest.code, + "cause": err_info, + "links": links + }) + + # /req/core/job-results-exception/results-not-ready + raise HTTPNotFound(json={ + "title": "JobResultsNotReady", + "type": "http://www.opengis.net/def/exceptions/ogcapi-processes-1/1.0/result-not-ready", + "detail": "Job is not ready to obtain results.", + "status": HTTPNotFound.code, + "cause": {"status": job.status}, + "links": links + }) + + +def raise_job_dismissed(job, container=None): + # type: (Job, Optional[AnySettingsContainer]) -> None + """ + Raise the appropriate messages for dismissed :term:`Job` status. + """ + if job.status == Status.DISMISSED: + # provide the job status links since it is still available for reference + settings = get_settings(container) + job_links = job.links(settings) + job_links = [link for link in job_links if link["rel"] in ["status", "alternate", "collection", "up"]] + raise JobGone( + json={ + "title": "JobDismissed", + "type": "JobDismissed", + "status": JobGone.code, + "detail": "Job was dismissed and artifacts have been removed.", + "cause": {"status": job.status}, + "value": str(job.id), + "links": job_links + } + ) + + +def dismiss_job_task(job, container): + # type: (Job, AnySettingsContainer) -> Job + """ + Cancels any pending or running :mod:`Celery` task and removes completed job artifacts. + + .. note:: + The :term:`Job` object itself is not deleted, only its artifacts. + Therefore, its inputs, outputs, logs, exceptions, etc. are still available in the database, + but corresponding files that would be exposed by ``weaver.wps_output`` configurations are removed. + + :param job: Job to cancel or cleanup. + :param container: Application settings. + :return: Updated and dismissed job. + """ + raise_job_dismissed(job, container) + if job.status in JOB_STATUS_CATEGORIES[StatusCategory.RUNNING]: + # signal to stop celery task. Up to it to terminate remote if any. + LOGGER.debug("Job [%s] dismiss operation: Canceling task [%s]", job.id, job.task_id) + celery_app.control.revoke(job.task_id, terminate=True) + + wps_out_dir = get_wps_output_dir(container) + job_out_dir = os.path.join(wps_out_dir, str(job.id)) + job_out_log = os.path.join(wps_out_dir, str(job.id) + ".log") + job_out_xml = os.path.join(wps_out_dir, str(job.id) + ".xml") + if os.path.isdir(job_out_dir): + LOGGER.debug("Job [%s] dismiss operation: Removing output results.", job.id) + shutil.rmtree(job_out_dir, onerror=lambda func, path, _exc: LOGGER.warning( + "Job [%s] dismiss operation: Failed to delete [%s] due to [%s]", job.id, job_out_dir, _exc + )) + if os.path.isfile(job_out_log): + LOGGER.debug("Job [%s] dismiss operation: Removing output logs.", job.id) + try: + os.remove(job_out_log) + except OSError as exc: + LOGGER.warning("Job [%s] dismiss operation: Failed to delete [%s] due to [%s]", job.id, job_out_log, exc) + if os.path.isfile(job_out_xml): + LOGGER.debug("Job [%s] dismiss operation: Removing output WPS status.", job.id) + try: + os.remove(job_out_xml) + except OSError as exc: + LOGGER.warning("Job [%s] dismiss operation: Failed to delete [%s] due to [%s]", job.id, job_out_xml, exc) + + LOGGER.debug("Job [%s] dismiss operation: Updating job status.") + store = get_db(container).get_store(StoreJobs) + job.status_message = "Job {}.".format(Status.DISMISSED) + job.status = map_status(Status.DISMISSED) + job = store.update_job(job) + return job diff --git a/weaver/wps_restapi/processes/processes.py b/weaver/wps_restapi/processes/processes.py index bdaec0e14..0f4659c43 100644 --- a/weaver/wps_restapi/processes/processes.py +++ b/weaver/wps_restapi/processes/processes.py @@ -18,7 +18,7 @@ from weaver.formats import OutputFormat, repr_json from weaver.processes import opensearch from weaver.processes.execution import submit_job -from weaver.processes.utils import deploy_process_from_payload, get_job_submission_response, get_process +from weaver.processes.utils import deploy_process_from_payload, get_process from weaver.status import Status from weaver.store.base import StoreJobs, StoreProcesses from weaver.utils import fully_qualified_name, get_any_id @@ -274,5 +274,4 @@ def submit_local_job(request): Execution location and method is according to deployed Application Package. """ process = get_process(request=request) - body, headers = submit_job(request, process, tags=["wps-rest"]) - return get_job_submission_response(body, headers) + return submit_job(request, process, tags=["wps-rest"]) diff --git a/weaver/wps_restapi/providers/providers.py b/weaver/wps_restapi/providers/providers.py index 256524967..ef8579a80 100644 --- a/weaver/wps_restapi/providers/providers.py +++ b/weaver/wps_restapi/providers/providers.py @@ -17,8 +17,6 @@ from weaver.exceptions import ServiceNotFound, ServiceParsingError, log_unhandled_exceptions from weaver.formats import OutputFormat from weaver.owsexceptions import OWSMissingParameterValue, OWSNotImplemented -from weaver.processes.execution import submit_job -from weaver.processes.utils import get_job_submission_response from weaver.store.base import StoreServices from weaver.utils import get_any_id, get_settings from weaver.wps.utils import get_wps_client @@ -212,8 +210,9 @@ def submit_provider_job(request): """ Execute a remote provider process. """ + from weaver.processes.execution import submit_job # isort:skip # noqa: E402 # pylint: disable=C0413 + store = get_db(request).get_store(StoreServices) provider_id = request.matchdict.get("provider_id") service = store.fetch_by_name(provider_id) - body, headers = submit_job(request, service, tags=["wps-rest"]) - return get_job_submission_response(body, headers) + return submit_job(request, service, tags=["wps-rest"]) diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 3c9e98792..4152f7372 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -3065,7 +3065,8 @@ class Execute(ExecuteInputOutputs): "Desired execution mode specified directly. This is intended for backward compatibility support. " "To obtain more control over execution mode selection, employ the official Prefer header instead " "(see for more details: https://pavics-weaver.readthedocs.io/en/latest/processes.html#execution-mode)." - ) + ), + validator=OneOf(ExecuteMode.values()) ) response = JobResponseOptionsEnum( missing=drop, @@ -3073,7 +3074,8 @@ class Execute(ExecuteInputOutputs): description=( "Indicates the desired representation format of the response. " "(see for more details: https://pavics-weaver.readthedocs.io/en/latest/processes.html#execution-body)." - ) + ), + validator=OneOf(ExecuteResponse.values()) ) notification_email = ExtendedSchemaNode( String(), @@ -4389,6 +4391,10 @@ class CompletedJobResponse(ExtendedMappingSchema): body = CompletedJobStatusSchema() +class FailedSyncJobResponse(CompletedJobResponse): + description = "Job submitted and failed synchronous execution. See server logs for more details." + + class OkDeleteProcessJobResponse(ExtendedMappingSchema): header = ResponseHeaders() body = DismissedJobSchema() @@ -4802,12 +4808,16 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): post_provider_process_job_responses = { "200": CompletedJobResponse(description="success"), "201": CreatedLaunchJobResponse(description="success"), + "204": NoContentJobResultsResponse(description="success"), + "400": FailedSyncJobResponse(), "403": ForbiddenProviderAccessResponseSchema(), "500": InternalServerErrorResponseSchema(), } post_process_jobs_responses = { "200": CompletedJobResponse(description="success"), "201": CreatedLaunchJobResponse(description="success"), + "204": NoContentJobResultsResponse(description="success"), + "400": FailedSyncJobResponse(), "403": ForbiddenProviderAccessResponseSchema(), "500": InternalServerErrorResponseSchema(), } From 68ba237d57e1bfddd0f96419d745728f68a6a7a3 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 23 Mar 2022 00:40:26 -0400 Subject: [PATCH 25/34] update docs with new response=raw, mode=sync and transmissionMode=value/reference supports (relates to #376, #377) --- docs/source/processes.rst | 85 +++++++++++++++++++++++++------------- docs/source/references.rst | 12 +++++- 2 files changed, 66 insertions(+), 31 deletions(-) diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 8d85a0428..5253b51d9 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -477,9 +477,12 @@ and parametrization of various input/output combinations. Let's employ the follo - `OGC API - Processes, Execution Mode `_ for more general details on the execution negotiation (formerly with ``mode`` parameter) and more recently with ``Prefer`` header. - - `OGC API - Processes, Response `_ + - |ogc-exec-sync-responses|_ and |ogc-exec-async-responses|_ for a complete listing of available ``response`` formats considering all other parameters. +.. |exec-api| replace:: OpenAPI Execute +.. _exec-api: `exec-req`_ + .. _proc_exec_body: Execution Body @@ -512,23 +515,11 @@ This could be added later if ``Multipart`` raw data representation is required. Please |submit-issue|_ to request this feature if it is relevant for your use-cases. .. fixme:: - Filtering not implemented (everything always available). + Filtering of ``outputs`` not implemented (everything always available). https://github.com/crim-ca/weaver/issues/380 -.. fixme:: - Transmission mode ``value`` not implemented. Only ``reference`` is supported. - https://github.com/crim-ca/weaver/issues/377 - -.. fixme:: - The ``response`` field is only supported with the ``document`` value. This parameter is present only for - compatibility with other :term:`ADES` implementation, but does not actually affects `Weaver`'s response. - - Response representation mode ``raw`` to be implemented. - https://github.com/crim-ca/weaver/issues/376 - -.. |exec-api| replace:: OpenAPI Execute -.. _exec-api: `exec-req`_ - +Other parameters presented in the above examples, namely ``mode`` and ``response`` are further detailed in +the following :ref:`proc_exec_mode` section. .. _proc_exec_mode: @@ -584,17 +575,36 @@ to wait on resources for too long), the :term:`Job` execution will automatically The distinction between an `asynchronous` or `synchronous` response when executing a :term:`Job` can be observed in multiple ways. The easiest is with the HTTP status code of the response, 200 being for a :term:`Job` *entirely completed* synchronously, and 201 for a created :term:`Job` that should be -:ref:`monitored ` asynchronously. Another method is to observe the ``"status"`` value. If the -status is ``accepted`` or ``running``, it means the operation is guaranteed to be `asynchronous`, since `synchronous` -always return a final status (``succeeded`` or ``failed``). Note that a final status is possible in both modes, so -seing one of those values does not *guarantee* it was executed `synchronously`, but the complete :term:`Job` status -can be immediately requested with the :ref:`GetStatus ` request. In general, a `synchronous` response -will be much more verbose than an `asynchronous` one, since details are not yet all available. Finally, it is possible -to extract the ``Preference-Applied`` response header which will clearly indicate if the submitted ``Prefer`` header -was respected (because it could be with available worker resources) or not. In general, this means that if -the :term:`Job` submission request was not provided with ``Prefer: wait=X`` **AND** replied with the -same ``Preference-Applied`` value, it is safe to assume `Weaver` decided to queue the :term:`Job` for `asynchronous` -execution. That :term:`Job` could be executed immediately, or at a later time, according to worker availability. +:ref:`monitored ` asynchronously. Another method is to observe the ``"status"`` value. +Effectively, a :term:`Job` that is executed `asynchronously` will return status information contents, while +a `synchronous` :term:`Job` will return the results directly, along a ``Location`` header referring to the +equivalent contents returned by :ref:`GetStatus ` as in the case of `asynchronous` :term:`Job`. +It is also possible to extract the ``Preference-Applied`` response header which will clearly indicate if the +submitted ``Prefer`` header was respected (because it could be with available worker resources) or not. +In general, this means that if the :term:`Job` submission request was not provided with ``Prefer: wait=X`` **AND** +replied with the same ``Preference-Applied`` value, it is safe to assume `Weaver` decided to queue the :term:`Job` +for `asynchronous` execution. That :term:`Job` could be executed immediately, or at a later time, according to +worker availability. + +It is also possible that a ``failed`` :term:`Job`, even when `synchronous`, will respond with equivalent contents +to the status location instead of results. This is because it is impossible for `Weaver` to return +the result(s) as outputs would not be generated by the incomplete :term:`Job`. + +Finally, the ``response`` parameter defines how to return the results produced by the :term:`Process`. +When ``response=document``, regardless of ``mode=async`` or ``mode=sync``, and regardless of requested +outputs ``transmissionMode=value`` or ``transmissionMode=reference``, the results will be returned in +a :term:`JSON` format containing either literal values or URL references to produced files. If ``mode=async``, +this results *document* is obtained with |result-req|_ request, while ``mode=sync`` returns it directly. +When ``response=raw``, the specific contents (type and quantity), HTTP ``Link`` headers or a mix of those components +depends both on the number of available :term:`Process` outputs, which ones were requested, and how they were +requested (i.e.: ``transmissionMode``). It is also possible that further content negotiation gets involved +accordingly to the ``Accept`` header and available ``Content-Type`` of the outputs if multiple formats are supported +by the :term:`Process`. For more details regarding those combination, the official +|ogc-exec-sync-responses|_ and |ogc-exec-async-responses|_ should be employed as reference. + +For any of the previous combinations, it is always possible to obtain :term:`Job` outputs, along with logs, exceptions +and other details using the :ref:`proc_op_result` endpoints. + .. _proc_exec_steps: @@ -1233,10 +1243,10 @@ format is employed according to the chosen location. .. _proc_op_result: -Obtaining output results, logs or errors +Obtaining results, outputs, logs or errors --------------------------------------------------------------------- -In the case of successful :term:`Job` execution, the outputs can be retrieved with |result-req|_ request to list +In the case of successful :term:`Job` execution, the *outputs* can be retrieved with |output-req|_ request to list each corresponding output ``id`` with the generated file reference URL. Keep in mind that the purpose of those URLs are only to fetch the results (not persistent storage), and could therefore be purged after some reasonable amount of time. The format should be similar to the following example, with minor variations according to :ref:`Configuration` @@ -1253,6 +1263,15 @@ parameters for the base :term:`WPS` output location: ] } +For the :term:`OGC` compliant endpoint, the |result-req| request can be employed instead. +In the event of a :term:`Job` executed with ``response=document``, the contents will be very similar. +On the other hand, a :term:`Job` submitted with ``response=raw`` can produce many alternative variations according +to :term:`OGC` requirements. For this reason, the *outputs* endpoint will always provide all data and file references +in the response body as :term:`Job`, no matter the original ``response`` format. The *outputs* endpoint can also +receive additional query parameters, such as ``schema``, to return contents formatted similarly to *results*, but +enforcing a :term:`JSON` body as if ``response=document`` was specified during submission of the :term:`Process` +execution. + In situations where the :term:`Job` resulted into ``failed`` status, the |except-req|_ can be use to retrieve the potential cause of failure, by capturing any raised exception. Below is an example of such exception details. @@ -1288,6 +1307,14 @@ Note again that the more the :term:`Process` is verbose, the more tracking will .. literalinclude:: ../../weaver/wps_restapi/examples/job_logs.json :language: json + +.. note:: + All endpoints to retrieve any of the above information about a :term:`Job` can either be requested directly + (i.e.: ``/jobs/{jobID}/...``) or with equivalent :term:`Provider` and/or :term:`Process` prefixed endpoints, + if the requested :term:`Job` did refer to those :term:`Provider` and/or :term:`Process`. + A *local* :term:`Process` would have its :term:`Job` references as ``/processes/{processId}/jobs/{jobID}/...`` + while a :ref:`proc_remote_provider` will use ``/provider/{providerName}/processes/{processId}/jobs/{jobID}/...``. + .. _vault: Uploading File to the Vault diff --git a/docs/source/references.rst b/docs/source/references.rst index 418d0557c..bb473deb5 100644 --- a/docs/source/references.rst +++ b/docs/source/references.rst @@ -56,6 +56,10 @@ .. _ogc-home: `ogc`_ .. |ogc-proc-api| replace:: OGC API - Processes .. _ogc-proc-api: https://github.com/opengeospatial/ogcapi-processes +.. |ogc-exec-sync-responses| replace:: OGC API - Processes, Responses (sync) +.. _ogc-exec-sync-responses: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response +.. |ogc-exec-async-responses| replace:: OGC API - Processes, Responses (async) +.. _ogc-exec-async-responses: https://docs.ogc.org/is/18-062r2/18-062r2.html#response_7 .. |pywps| replace:: PyWPS .. _pywps: https://github.com/geopython/pywps/ .. |pywps-status| replace:: Progress and Status Report @@ -135,8 +139,12 @@ .. _status-req-name: `status-req`_ .. |status-req| replace:: ``GET {WEAVER_URL}/processes/{id}/jobs/{id}`` (GetStatus) .. _status-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/Status%2Fpaths%2F~1processes~1{process_id}~1jobs~1{job_id}%2Fget -.. |result-req| replace:: ``GET {WEAVER_URL}/processes/{id}/jobs/{id}/result`` (GetResult) -.. _result-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/Results%2Fpaths%2F~1processes~1%7Bprocess_id%7D~1jobs~1%7Bjob_id%7D~1result%2Fget +.. |inputs-req| replace:: ``GET {WEAVER_URL}/jobs/{id}/inputs`` (Inputs) +.. _input-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/inputs/paths/~1jobs~1{job_id}~1inputs/get +.. |output-req| replace:: ``GET {WEAVER_URL}/jobs/{id}/outputs`` (Outputs) +.. _output-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/outputs/paths/~1jobs~1{job_id}~1outputs/get +.. |result-req| replace:: ``GET {WEAVER_URL}/jobs/{id}/results`` (Results) +.. _result-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/Results/paths/~1jobs~1{job_id}~1results/get .. |update-token-req| replace:: Update Token .. _update-token-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/UpdateToken/paths/~1processes~1{process_id}/put .. |vault-upload-req| replace:: Vault File Upload (POST) From b58fb4a12fa11b3d19bffb33ef09bc02d39fa173 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 23 Mar 2022 00:43:40 -0400 Subject: [PATCH 26/34] fix imports linting --- tests/wps_restapi/test_jobs.py | 2 +- weaver/processes/utils.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index 01beb7aac..7edbf80c8 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -1,4 +1,3 @@ -import colander import contextlib import datetime import json @@ -12,6 +11,7 @@ from distutils.version import LooseVersion from typing import TYPE_CHECKING +import colander import mock import pyramid.testing import pytest diff --git a/weaver/processes/utils.py b/weaver/processes/utils.py index fdd536640..42194115b 100644 --- a/weaver/processes/utils.py +++ b/weaver/processes/utils.py @@ -40,9 +40,8 @@ log_unhandled_exceptions ) from weaver.processes.types import ProcessType -from weaver.status import JOB_STATUS_CATEGORIES, StatusCategory, map_status from weaver.store.base import StoreProcesses, StoreServices -from weaver.utils import get_header, get_sane_name, get_settings, get_url_without_query +from weaver.utils import get_sane_name, get_settings, get_url_without_query from weaver.visibility import Visibility from weaver.wps.utils import get_wps_client from weaver.wps_restapi import swagger_definitions as sd From dfc4f76e4014ce3e07f306c44329f6e0168c7e6a Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 23 Mar 2022 00:57:15 -0400 Subject: [PATCH 27/34] fix tests handling dispatched WPS request monitoring --- weaver/wps/service.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/weaver/wps/service.py b/weaver/wps/service.py index 42b726820..4a7627f11 100644 --- a/weaver/wps/service.py +++ b/weaver/wps/service.py @@ -242,6 +242,9 @@ def _submit_job(self, wps_request): process_id=pid, is_local=True, is_workflow=is_workflow, visibility=Visibility.PUBLIC, language=wps_request.language, tags=tags, headers=dict(req.headers), context=ctx ) + # enforced JSON results with submitted data that includes 'response=document' + # use 'json_body' to work with any 'response' implementation + body = resp.json_body # if Accept was JSON, provide response content as is # if anything else (even */*), return as XML @@ -250,11 +253,11 @@ def _submit_job(self, wps_request): # way to provide explicitly Accept header. Even our Wps1Process as Workflow step depends on this behaviour. accept_type = get_header("Accept", req.headers) if accept_type == ContentType.APP_JSON: - resp = get_job_submission_response(resp.body, resp.headers) + resp = get_job_submission_response(body, resp.headers) setattr(resp, "_update_status", lambda *_, **__: None) # patch to avoid pywps server raising return resp - return resp.body + return body @handle_known_exceptions def prepare_process_for_execution(self, identifier): From d7330fb8d1a9875cbe895be8e652f95a7234c576 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 23 Mar 2022 00:59:03 -0400 Subject: [PATCH 28/34] fix dismiss mock --- tests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils.py b/tests/utils.py index 1ee27b7dc..62ee033dc 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -907,7 +907,7 @@ def mocked_dismiss_process(): mock_celery_app = mock.MagicMock() mock_celery_app.control = mock.MagicMock() mock_celery_app.control.revoke = mock.MagicMock() - mock_celery_revoke = mock.patch("weaver.wps_restapi.jobs.jobs.celery_app", return_value=mock_celery_app) + mock_celery_revoke = mock.patch("weaver.wps_restapi.jobs.utils.celery_app", return_value=mock_celery_app) try: with mock_celery_revoke: From 50ff933259c167306bfe7122ce834cdec767e683 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 23 Mar 2022 12:57:54 -0400 Subject: [PATCH 29/34] fix test for response=raw/transmissionMode=reference + document job inputs endpoint --- docs/source/processes.rst | 4 ++++ tests/functional/test_builtin.py | 2 +- tests/functional/test_quoting.py | 3 ++- weaver/processes/execution.py | 4 ++-- weaver/store/mongodb.py | 1 + weaver/wps_restapi/jobs/utils.py | 9 ++++++--- 6 files changed, 16 insertions(+), 7 deletions(-) diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 5253b51d9..3f59ce924 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -1272,6 +1272,10 @@ receive additional query parameters, such as ``schema``, to return contents form enforcing a :term:`JSON` body as if ``response=document`` was specified during submission of the :term:`Process` execution. +In order to better understand the parameters that where submitted during :term:`Job` creation, the |inputs-req|_ +can be employed. This will return both the data and reference inputs that were submitted, as well as +the *requested outputs* to retrieve any relevant ``transmissionMode`` definition. + In situations where the :term:`Job` resulted into ``failed`` status, the |except-req|_ can be use to retrieve the potential cause of failure, by capturing any raised exception. Below is an example of such exception details. diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py index 1ee5a69b1..df0ecd9b1 100644 --- a/tests/functional/test_builtin.py +++ b/tests/functional/test_builtin.py @@ -296,7 +296,7 @@ def test_jsonarray2netcdf_execute_async_output_by_reference_response_raw(self): assert resp.status_code == 204, "Body should be empty since all outputs requested by reference (Link header)" assert resp.content_type is None assert resp.headers - result_links = [hdr for hdr in resp.headers if hdr[0].lower() == "link"] + result_links = [hdr for hdr in resp.headers if hdr[0] == "Link"] # even though results are requested by Link reference, # Weaver still offers them with document on outputs endpoint diff --git a/tests/functional/test_quoting.py b/tests/functional/test_quoting.py index 558e0a62e..1fecbf993 100644 --- a/tests/functional/test_quoting.py +++ b/tests/functional/test_quoting.py @@ -67,7 +67,8 @@ def deploy_test_processes(cls): def test_quote_bad_inputs(self): path = sd.process_quotes_service.path.format(process_id="Echo") - resp = mocked_sub_requests(self.app, "POST", path, json={}, headers=self.json_headers, only_local=True) + data = {"inputs": [1, 2, 3]} + resp = mocked_sub_requests(self.app, "POST", path, json=data, headers=self.json_headers, only_local=True) assert resp.status_code == 400 @mock.patch("weaver.quotation.estimation.estimate_process_quote", side_effect=mocked_estimate_process_quote) diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index f8b34a4cf..cd2fcc4f7 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -546,8 +546,8 @@ def submit_job_handler(payload, # type: JSON store = db.get_store(StoreJobs) # type: StoreJobs job = store.save_job(task_id=Status.ACCEPTED, process=process_id, service=provider_id, - inputs=json_body.get("inputs"), is_local=is_local, is_workflow=is_workflow, - access=visibility, user_id=user, context=context, + inputs=json_body.get("inputs"), outputs=json_body.get("outputs"), + is_local=is_local, is_workflow=is_workflow, access=visibility, user_id=user, context=context, execute_async=is_execute_async, execute_response=exec_resp, custom_tags=tags, notification_email=encrypted_email, accept_language=language) job.save_log(logger=LOGGER, message="Job task submitted for execution.", status=Status.ACCEPTED, progress=0) diff --git a/weaver/store/mongodb.py b/weaver/store/mongodb.py index fe8e8dd29..448cd9cdb 100644 --- a/weaver/store/mongodb.py +++ b/weaver/store/mongodb.py @@ -610,6 +610,7 @@ def save_job(self, "service": service, # provider identifier (WPS service) "process": process, # process identifier (WPS request) "inputs": inputs, + "outputs": outputs, "status": map_status(Status.ACCEPTED), "execute_async": execute_async, "execution_response": execute_response, diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index eb56cec8f..ccb0eaebf 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -283,7 +283,10 @@ def make_result_link(result_id, result, job_id, settings): typ = get_field(fmt, "mime_type", search_variations=True, default=ContentType.TEXT_PLAIN) enc = get_field(fmt, "encoding", search_variations=True, default=None) url = get_any_value(value, data=False, file=True) # should already include full path - links.append(f"<{url}>; rel=\"{result_id}{suffix}\"; type={typ}; charset={enc}") + if fmt == ContentType.TEXT_PLAIN and not enc: # only if text, otherwise binary content could differ + enc = "UTF-8" # default both omit/empty + encoding = f"; charset={enc}" if enc else "" + links.append(f"<{url}>; rel=\"{result_id}{suffix}\"; type={typ}{encoding}") return links @@ -416,13 +419,13 @@ def get_job_results_response(job, container, headers=None): "cause": {"status": job.status}, }) - # Document ignores values/references + # when 'response=document', ignore 'transmissionMode=value|reference', respect it when 'response=raw' # See: # - https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 (/req/core/job-results-async-document) # - https://docs.ogc.org/is/18-062r2/18-062r2.html#req_core_process-execute-sync-document is_raw = job.execution_response == ExecuteResponse.RAW results, refs = get_results(job, container, value_key="value", - schema=JobInputsOutputsSchema.OGC, + schema=JobInputsOutputsSchema.OGC, # not strict to provide more format details link_references=is_raw) # type: Union[ExecutionResults, HeadersTupleType] headers = headers or {} if "location" not in headers: From 4d9b43a266c7bebcf9fc4e93db0cdbd897f0d275 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 23 Mar 2022 13:22:49 -0400 Subject: [PATCH 30/34] fix doc refs --- docs/source/processes.rst | 6 +++--- docs/source/references.rst | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 3f59ce924..8de5bd82c 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -594,7 +594,7 @@ Finally, the ``response`` parameter defines how to return the results produced b When ``response=document``, regardless of ``mode=async`` or ``mode=sync``, and regardless of requested outputs ``transmissionMode=value`` or ``transmissionMode=reference``, the results will be returned in a :term:`JSON` format containing either literal values or URL references to produced files. If ``mode=async``, -this results *document* is obtained with |result-req|_ request, while ``mode=sync`` returns it directly. +this results *document* is obtained with |results-req|_ request, while ``mode=sync`` returns it directly. When ``response=raw``, the specific contents (type and quantity), HTTP ``Link`` headers or a mix of those components depends both on the number of available :term:`Process` outputs, which ones were requested, and how they were requested (i.e.: ``transmissionMode``). It is also possible that further content negotiation gets involved @@ -1246,7 +1246,7 @@ format is employed according to the chosen location. Obtaining results, outputs, logs or errors --------------------------------------------------------------------- -In the case of successful :term:`Job` execution, the *outputs* can be retrieved with |output-req|_ request to list +In the case of successful :term:`Job` execution, the *outputs* can be retrieved with |outputs-req|_ request to list each corresponding output ``id`` with the generated file reference URL. Keep in mind that the purpose of those URLs are only to fetch the results (not persistent storage), and could therefore be purged after some reasonable amount of time. The format should be similar to the following example, with minor variations according to :ref:`Configuration` @@ -1263,7 +1263,7 @@ parameters for the base :term:`WPS` output location: ] } -For the :term:`OGC` compliant endpoint, the |result-req| request can be employed instead. +For the :term:`OGC` compliant endpoint, the |results-req| request can be employed instead. In the event of a :term:`Job` executed with ``response=document``, the contents will be very similar. On the other hand, a :term:`Job` submitted with ``response=raw`` can produce many alternative variations according to :term:`OGC` requirements. For this reason, the *outputs* endpoint will always provide all data and file references diff --git a/docs/source/references.rst b/docs/source/references.rst index bb473deb5..8a9f1da2a 100644 --- a/docs/source/references.rst +++ b/docs/source/references.rst @@ -140,11 +140,11 @@ .. |status-req| replace:: ``GET {WEAVER_URL}/processes/{id}/jobs/{id}`` (GetStatus) .. _status-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/Status%2Fpaths%2F~1processes~1{process_id}~1jobs~1{job_id}%2Fget .. |inputs-req| replace:: ``GET {WEAVER_URL}/jobs/{id}/inputs`` (Inputs) -.. _input-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/inputs/paths/~1jobs~1{job_id}~1inputs/get -.. |output-req| replace:: ``GET {WEAVER_URL}/jobs/{id}/outputs`` (Outputs) -.. _output-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/outputs/paths/~1jobs~1{job_id}~1outputs/get -.. |result-req| replace:: ``GET {WEAVER_URL}/jobs/{id}/results`` (Results) -.. _result-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/Results/paths/~1jobs~1{job_id}~1results/get +.. _inputs-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/inputs/paths/~1jobs~1{job_id}~1inputs/get +.. |outputs-req| replace:: ``GET {WEAVER_URL}/jobs/{id}/outputs`` (Outputs) +.. _outputs-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/outputs/paths/~1jobs~1{job_id}~1outputs/get +.. |results-req| replace:: ``GET {WEAVER_URL}/jobs/{id}/results`` (Results) +.. _results-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/Results/paths/~1jobs~1{job_id}~1results/get .. |update-token-req| replace:: Update Token .. _update-token-req: https://pavics-weaver.readthedocs.io/en/latest/api.html#tag/UpdateToken/paths/~1processes~1{process_id}/put .. |vault-upload-req| replace:: Vault File Upload (POST) From b1a0883e8b47d2ff09f0786a62db9ae1645b47b1 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 23 Mar 2022 18:52:15 -0400 Subject: [PATCH 31/34] add CLI proper support of --ref for transmissionMode=reference and --download option of those file links --- tests/functional/test_cli.py | 101 ++++++++++++++++++ weaver/cli.py | 195 +++++++++++++++++++++++++---------- weaver/formats.py | 5 +- 3 files changed, 247 insertions(+), 54 deletions(-) diff --git a/tests/functional/test_cli.py b/tests/functional/test_cli.py index 2ee00ff1e..55ed61f10 100644 --- a/tests/functional/test_cli.py +++ b/tests/functional/test_cli.py @@ -744,6 +744,107 @@ def test_execute_auto_monitor(self): assert any(f"\"status\": \"{Status.SUCCEEDED}\"" in line for line in lines) assert any("\"rel\": \"http://www.opengis.net/def/rel/ogc/1.0/results\"" in line for line in lines) + def test_execute_result_by_reference(self): + """ + Validate option to obtain outputs by reference returned with ``Link`` header. + + Result obtained is validated both with API outputs and extended auto-download outputs. + """ + proc = self.test_process["Echo"] + with contextlib.ExitStack() as stack_exec: + out_tmp = stack_exec.enter_context(tempfile.TemporaryDirectory()) + stack_exec.enter_context(mocked_wps_output(self.settings)) + for mock_exec_proc in mocked_execute_celery(): + stack_exec.enter_context(mock_exec_proc) + + msg = "TEST MESSAGE!" + lines = mocked_sub_requests( + self.app, run_command, + [ + # "weaver", + "execute", + "-u", self.url, + "-p", proc, + "-I", f"message='{msg}'", + "-R", "output", + "-M", + "-T", 10, + "-W", 1, + "-F", OutputFormat.YAML, + ], + trim=False, + entrypoint=weaver_cli, + only_local=True, + ) + assert "jobID: " in lines[0] # don't care value, self-handled + assert any(f"status: {Status.SUCCEEDED}" in line for line in lines) + + job_id = lines[0].split(":")[-1].strip() + lines = mocked_sub_requests( + self.app, run_command, + [ + # "weaver", + "results", + "-u", self.url, + "-j", job_id, + "-H", # must display header to get 'Link' + "-F", OutputFormat.YAML, + ], + trim=False, + entrypoint=weaver_cli, + only_local=True, + ) + sep = lines.index("---") + headers = lines[:sep] + content = lines[sep+1:-1] # ignore final newline + assert len(headers) and any("Link:" in hdr for hdr in headers) + assert content == ["null"], "When no download involved, body should be the original no-content results." + + lines = mocked_sub_requests( + self.app, run_command, + [ + # "weaver", + "results", + "-u", self.url, + "-j", job_id, + "-H", # must display header to get 'Link' + "-F", OutputFormat.YAML, + "-D", + "-O", out_tmp + ], + trim=False, + entrypoint=weaver_cli, + only_local=True, + ) + sep = lines.index("---") + headers = lines[:sep] + content = lines[sep+1:] + + assert len(content), "Content should have been populated from download to provide downloaded file paths." + link = None + for header in headers: + if "Link:" in header: + link = header.split(":", 1)[-1].strip() + break + assert link + link = link.split(";")[0].strip("<>") + path = map_wps_output_location(link, self.settings, url=False) + assert os.path.isfile(path), "Original file results should exist in job output dir." + + # path should be in contents as well, pre-resolved within download dir (not same as job output dir) + assert len([line for line in content if "path:" in line]) == 1 + path = None + for line in content: + if "path:" in line: + path = line.split(":", 1)[-1].strip() + break + assert path + assert path.startswith(out_tmp) + assert os.path.isfile(path) + with open(path, "r") as file: + data = file.read() + assert msg in data # technically, output is log of echoed input message, so not exactly equal + def test_execute_help_details(self): """ Verify that formatting of the execute operation help provides multiple paragraphs with more details. diff --git a/weaver/cli.py b/weaver/cli.py index 192fc756a..81a758fc0 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -11,6 +11,7 @@ from urllib.parse import urlparse import yaml +from webob.headers import ResponseHeaders from yaml.scanner import ScannerError from weaver import __meta__ @@ -35,6 +36,7 @@ get_file_headers, load_file, null, + parse_kvp, request_extra, setup_loggers ) @@ -42,16 +44,17 @@ from weaver.wps_restapi import swagger_definitions as sd if TYPE_CHECKING: - from typing import Any, Dict, Iterable, Optional, Tuple, Union + from typing import Any, Dict, Iterable, List, Optional, Tuple, Union from requests import Response # avoid failing sphinx-argparse documentation # https://github.com/ashb/sphinx-argparse/issues/7 try: - from weaver.typedefs import CWL, JSON, ExecutionInputsMap, HeadersType + from weaver.typedefs import AnyHeadersContainer, CWL, JSON, ExecutionInputsMap, ExecutionResults, HeadersType except ImportError: - CWL = JSON = ExecutionInputsMap = HeadersType = Any # avoid linter issue + # avoid linter issue + AnyHeadersContainer = CWL = JSON = ExecutionInputsMap = ExecutionResults = HeadersType = Any try: from weaver.formats import AnyOutputFormat from weaver.processes.constants import ProcessSchemaType @@ -80,7 +83,7 @@ class OperationResult(AutoBase): """ success = False # type: Optional[bool] message = "" # type: Optional[str] - headers = {} # type: Optional[HeadersType] + headers = {} # type: Optional[AnyHeadersContainer] body = {} # type: Optional[Union[JSON, str]] code = None # type: Optional[int] @@ -88,7 +91,7 @@ def __init__(self, success=None, # type: Optional[bool] message=None, # type: Optional[str] body=None, # type: Optional[Union[str, JSON]] - headers=None, # type: Optional[HeadersType] + headers=None, # type: Optional[AnyHeadersContainer] text=None, # type: Optional[str] code=None, # type: Optional[int] **kwargs, # type: Any @@ -96,7 +99,7 @@ def __init__(self, super(OperationResult, self).__init__(**kwargs) self.success = success self.message = message - self.headers = headers + self.headers = ResponseHeaders(headers) if headers is not None else None self.body = body self.text = text self.code = code @@ -123,6 +126,28 @@ def text(self, text): # type: (str) -> None self["text"] = text + def links(self, header_names=None): + # type: (Optional[List[str]]) -> ResponseHeaders + """ + Obtain HTTP headers sorted in the result that corresponds to any link reference. + + :param header_names: + Limit link names to be considered. + By default, considered headers are ``Link``, ``Content-Location`` and ``Location``. + """ + if not self.headers: + return ResponseHeaders([]) + if not isinstance(self.headers, ResponseHeaders): + self.headers = ResponseHeaders(self.headers) + if not header_names: + header_names = ["Link", "Content-Location", "Location"] + header_names = [hdr.lower() for hdr in header_names] + link_headers = ResponseHeaders() + for hdr_n, hdr_v in self.headers.items(): + if hdr_n.lower() in header_names: + link_headers.add(hdr_n, hdr_v) + return link_headers + class WeaverClient(object): """ @@ -164,7 +189,7 @@ def _parse_url(url): return parsed_url.rsplit("/", 1)[0] if parsed_url.endswith("/") else parsed_url @staticmethod - def _parse_result(response, # type: Response + def _parse_result(response, # type: Union[Response, OperationResult] body=None, # type: Optional[JSON] # override response body message=None, # type: Optional[str] # override message/description in contents success=None, # type: Optional[bool] # override resolved success @@ -173,33 +198,45 @@ def _parse_result(response, # type: Response nested_links=None, # type: Optional[str] output_format=None, # type: Optional[AnyOutputFormat] ): # type: (...) -> OperationResult - hdr = dict(response.headers) + # multi-header of same name, for example to support many Link + headers = ResponseHeaders(response.headers) + code = getattr(response, "status_code", None) or getattr(response, "code", None) _success = False try: - body = body or response.json() - if not show_links: - if nested_links: - nested = body.get(nested_links, []) - if isinstance(nested, list): - for item in nested: - item.pop("links", None) - body.pop("links", None) - msg = message or body.get("description", body.get("message", "undefined")) - if response.status_code >= 400: - if not msg: + msg = None + ctype = headers.get("Content-Type") + content = getattr(response, "content", None) or getattr(response, "body", None) + if not body and content and ctype and ContentType.APP_JSON in ctype and hasattr(response, "json"): + body = response.json() + if isinstance(body, dict): + if not show_links: + if nested_links: + nested = body.get(nested_links, []) + if isinstance(nested, list): + for item in nested: + item.pop("links", None) + body.pop("links", None) + msg = message or body.get("description", body.get("message", "undefined")) + if code >= 400: + if not msg and isinstance(body, dict): msg = body.get("error", body.get("exception", "unknown")) else: _success = True + if not msg: + msg = "undefined" text = OutputFormat.convert(body, output_format or OutputFormat.JSON_STR, item_root="result") - except Exception: # noqa - text = body = response.text + except Exception as exc: # noqa msg = "Could not parse body." + text = body = response.text + LOGGER.warning(msg, exc_info=exc) if show_headers: - s_hdr = OutputFormat.convert({"Headers": hdr}, OutputFormat.YAML) - text = f"{s_hdr}---\n{text}" + # convert potential multi-equal-key headers into a JSON/YAML serializable format + hdr_l = [{hdr_name: hdr_val} for hdr_name, hdr_val in sorted(headers.items())] + hdr_s = OutputFormat.convert({"Headers": hdr_l}, OutputFormat.YAML) + text = f"{hdr_s}---\n{text}" if success is not None: _success = success - return OperationResult(_success, msg, body, hdr, text=text, code=response.status_code) + return OperationResult(_success, msg, body, headers, text=text, code=code) @staticmethod def _parse_deploy_body(body, process_id): @@ -617,7 +654,6 @@ def execute(self, # NOTE: Backward compatibility for servers that only know ``mode`` and don't handle ``Prefer`` header. "mode": ExecuteMode.ASYNC, "inputs": values, - # FIXME: support 'response: raw' (https://github.com/crim-ca/weaver/issues/376) "response": ExecuteResponse.DOCUMENT, # FIXME: allow filtering 'outputs' (https://github.com/crim-ca/weaver/issues/380) "outputs": {} @@ -629,8 +665,16 @@ def execute(self, outputs = result.body.get("outputs") output_refs = set(output_refs or []) for output_id in outputs: - # use 'value' to have all outputs reported in body as 'value/href' rather than 'Link' headers - out_mode = ExecuteTransmissionMode.REFERENCE if output_id in output_refs else ExecuteTransmissionMode.VALUE + if output_id in output_refs: + # If any 'reference' is requested explicitly, must switch to 'response=raw' + # since 'response=document' ignores 'transmissionMode' definitions. + data["response"] = ExecuteResponse.RAW + # Use 'value' to have all outputs reported in body as 'value/href' rather than 'Link' headers. + out_mode = ExecuteTransmissionMode.REFERENCE + else: + # make sure to set value to outputs not requested as reference in case another one needs reference + # mode doesn't matter if no output by reference requested since 'response=document' would be used + out_mode = ExecuteTransmissionMode.VALUE data["outputs"][output_id] = {"transmissionMode": out_mode} LOGGER.info("Executing [%s] with inputs:\n%s", process_id, OutputFormat.convert(values, OutputFormat.JSON_STR)) @@ -817,6 +861,65 @@ def monitor(self, once = False return OperationResult(False, f"Monitoring timeout reached ({timeout}s). Job did not complete in time.") + def _download_references(self, outputs, out_links, out_dir, job_id): + # type: (ExecutionResults, AnyHeadersContainer, str, str) -> ExecutionResults + """ + Download file references from results response contents and link headers. + + Downloaded files extend the results contents with ``path`` and ``source`` fields to indicate where the + retrieved files have been saved and where they came from. When files are found by HTTP header links, they + are added to the output contents to generate a combined representation in the operation result. + """ + if not isinstance(outputs, dict): + # default if links-only needed later (insert as content for printed output) + outputs = {} # type: ExecutionResults + + # download file results + if not (any("href" in value for value in outputs.values()) or len(out_links)): + return OperationResult(False, "Outputs were found but none are downloadable (only raw values?).", outputs) + if not out_dir: + out_dir = os.path.join(os.path.realpath(os.path.curdir), job_id) + os.makedirs(out_dir, exist_ok=True) + LOGGER.info("Will store job [%s] output results in [%s]", job_id, out_dir) + + # download outputs from body content + LOGGER.debug("%s outputs in results content.", "Processing" if len(outputs) else "No") + for output, value in outputs.items(): + is_list = True + if not isinstance(value, list): + value = [value] + is_list = False + for i, item in enumerate(value): + if "href" in item: + file_path = fetch_file(item["href"], out_dir, link=False) + if is_list: + outputs[output][i]["path"] = file_path + outputs[output][i]["source"] = "body" + else: + outputs[output]["path"] = file_path + outputs[output]["source"] = "body" + + # download links from headers + LOGGER.debug("%s outputs in results link headers.", "Processing" if len(out_links) else "No") + for _, link_header in ResponseHeaders(out_links).items(): + link, params = link_header.split(";", 1) + href = link.strip("<>") + params = parse_kvp(params, multi_value_sep=None, accumulate_keys=False) + ctype = (params.get("type") or [None])[0] + rel = params["rel"][0].split(".") + output = rel[0] + is_array = len(rel) > 1 and str.isnumeric(rel[1]) + file_path = fetch_file(href, out_dir, link=False) + value = {"href": href, "type": ctype, "path": file_path, "source": "link"} + if output in outputs: + if isinstance(outputs[output], dict): # in case 'rel=". OperationResult @@ -1231,7 +1320,7 @@ def make_parser(): # op_execute.add_argument( # "-O", "--output", op_execute.add_argument( - "-R", "--ref", "--reference", metavar="REFERENCE", dest="output_refs", nargs=1, action="append", + "-R", "--ref", "--reference", metavar="REFERENCE", dest="output_refs", action="append", help=inspect.cleandoc(""" Indicates which outputs by ID to be returned as HTTP Link header reference instead of body content value. This defines the output transmission mode when submitting the execution request. diff --git a/weaver/formats.py b/weaver/formats.py index 969e511e8..e99bc0e7b 100644 --- a/weaver/formats.py +++ b/weaver/formats.py @@ -165,7 +165,10 @@ def convert(cls, data, to, item_root="item"): xml = xml.strip() return xml if fmt in [OutputFormat.YML, OutputFormat.YAML]: - return yaml.safe_dump(data, indent=2, sort_keys=False) + yml = yaml.safe_dump(data, indent=2, sort_keys=False, width=float("inf")) + if yml.endswith("\n...\n"): # added when data is single literal or None instead of list/object + yml = yml[:-4] + return yml return data From c7295aedbe707f2634ab57911a00d1ce376949e9 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 23 Mar 2022 19:10:18 -0400 Subject: [PATCH 32/34] fix msg when listing results directly --- weaver/cli.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/weaver/cli.py b/weaver/cli.py index 81a758fc0..4029a2336 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -216,14 +216,13 @@ def _parse_result(response, # type: Union[Response, OperationResult] for item in nested: item.pop("links", None) body.pop("links", None) - msg = message or body.get("description", body.get("message", "undefined")) + msg = body.get("description", body.get("message", "undefined")) if code >= 400: if not msg and isinstance(body, dict): msg = body.get("error", body.get("exception", "unknown")) else: _success = True - if not msg: - msg = "undefined" + msg = message or getattr(response, "message", None) or msg or "undefined" text = OutputFormat.convert(body, output_format or OutputFormat.JSON_STR, item_root="result") except Exception as exc: # noqa msg = "Could not parse body." From 7860833237978a4a55ebb22651d7a04e49231696 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 23 Mar 2022 19:21:01 -0400 Subject: [PATCH 33/34] fix broken doc link --- docs/source/references.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/references.rst b/docs/source/references.rst index 8a9f1da2a..60341070c 100644 --- a/docs/source/references.rst +++ b/docs/source/references.rst @@ -59,7 +59,7 @@ .. |ogc-exec-sync-responses| replace:: OGC API - Processes, Responses (sync) .. _ogc-exec-sync-responses: https://docs.ogc.org/is/18-062r2/18-062r2.html#sc_execute_response .. |ogc-exec-async-responses| replace:: OGC API - Processes, Responses (async) -.. _ogc-exec-async-responses: https://docs.ogc.org/is/18-062r2/18-062r2.html#response_7 +.. _ogc-exec-async-responses: https://docs.ogc.org/is/18-062r2/18-062r2.html#_response_7 .. |pywps| replace:: PyWPS .. _pywps: https://github.com/geopython/pywps/ .. |pywps-status| replace:: Progress and Status Report From 0b50dc1d491e4ec5257523337d3dc21b45a292e4 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 23 Mar 2022 19:38:23 -0400 Subject: [PATCH 34/34] fix imports --- tests/utils.py | 2 +- weaver/processes/execution.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index 62ee033dc..57d8216bb 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -20,13 +20,13 @@ from typing import TYPE_CHECKING # Note: do NOT import 'boto3' here otherwise 'moto' will not be able to mock it effectively -from celery.exceptions import TimeoutError as CeleryTaskTimeoutError import colander import mock import moto import pkg_resources import pyramid_celery import responses +from celery.exceptions import TimeoutError as CeleryTaskTimeoutError from owslib.wps import Languages, WebProcessingService from pyramid import testing from pyramid.config import Configurator diff --git a/weaver/processes/execution.py b/weaver/processes/execution.py index cd2fcc4f7..ee4f877b3 100644 --- a/weaver/processes/execution.py +++ b/weaver/processes/execution.py @@ -4,8 +4,8 @@ from typing import TYPE_CHECKING import colander -from celery.utils.log import get_task_logger from celery.exceptions import TimeoutError as CeleryTaskTimeoutError +from celery.utils.log import get_task_logger from owslib.util import clean_ows_url from owslib.wps import ComplexDataInput from pyramid.httpexceptions import HTTPBadRequest, HTTPNotAcceptable