Skip to content

Commit

Permalink
[wip] workaround fix for CWL type [null, enum, array-enum] without ex…
Browse files Browse the repository at this point in the history
…plicit name (relates to common-workflow-language/cwltool#1908)
  • Loading branch information
fmigneault committed Sep 19, 2023
1 parent 55a2026 commit 04baaa0
Show file tree
Hide file tree
Showing 5 changed files with 375 additions and 7 deletions.
4 changes: 4 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ Fixes:
Links will only be listed within the returned ``processSummary`` to respect the `OGC API - Processes` schema.
- Fix `CLI` not removing embedded ``links`` in ``processSummary`` from ``deploy`` operation response
when ``-nL``/``--no-links`` option is specified.
- Fix `CWL` definitions combining nested ``enum`` types as ``["null", <enum>, {type: array, items: <enum>]`` without an
explicit ``name`` or ``SchemaDefRequirement`` causing failing ``schema_salad`` resolution under ``cwltool``. A patch
is applied for the moment to inject a temporary ``name`` to let the `CWL` engine succeed schema validation (relates
to `common-workflow-language/cwltool#1908 <https://github.com/common-workflow-language/cwltool/issues/1908>`_).

.. _changes_4.31.0:

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,293 @@
processDescription:
process:
id: Finch_EnsembleGridPointWetdays
# inputs:
# lat:
# schema:
# oneOf:
# - type: string
# - type: array
# items: string
# lon:
# schema:
# oneOf:
# - type: string
# - type: array
# items: string
# start_date:
# schema:
# required: false
# type: string
# end_date:
# schema:
# required: false
# type: string
# ensemble_percentiles:
# schema:
# required: false
# type: string
# default: 10,50,90
# average:
# schema:
# required: false
# type: boolean
# default: false
# dataset:
# schema:
# required: false
# type: string
# enum:
# - humidex-daily
# - candcs-u5
# - candcs-u6
# - bccaqv2
# default: candcs-u5
# scenario:
# schema:
# required: false
# oneOf:
# - type: string
# enum:
# - ssp126
# - rcp85
# - rcp45
# - rcp26
# - ssp585
# - ssp245
# - type: array
# items:
# type: string
# enum:
# - ssp126
# - rcp85
# - rcp45
# - rcp26
# - ssp585
# - ssp245
# models:
# schema:
# required: false
# default: all
# oneOf:
# - type: string
# enum:
# - KACE-1-0-G
# - CCSM4
# - MIROC5
# - EC-Earth3-Veg
# - TaiESM1
# - GFDL-ESM4
# - GFDL-CM3
# - CanESM5
# - HadGEM3-GC31-LL
# - INM-CM4-8
# - IPSL-CM5A-MR
# - EC-Earth3
# - GFDL-ESM2G
# - humidex_models
# - GFDL-ESM2M
# - MIROC-ESM
# - CSIRO-Mk3-6-0
# - MPI-ESM-LR
# - NorESM1-M
# - CNRM-CM5
# - all
# - GISS-E2-1-G
# - 24models
# - MPI-ESM1-2-HR
# - CNRM-ESM2-1
# - CNRM-CM6-1
# - CanESM2
# - FGOALS-g3
# - NorESM1-ME
# - IPSL-CM6A-LR
# - CMCC-ESM2
# - pcic12
# - EC-Earth3-Veg-LR
# - ACCESS-ESM1-5
# - MRI-CGCM3
# - MIROC-ESM-CHEM
# - NorESM2-MM
# - bcc-csm1-1-m
# - BNU-ESM
# - UKESM1-0-LL
# - CESM1-CAM5
# - MIROC-ES2L
# - MRI-ESM2-0
# - HadGEM2-ES
# - MIROC6
# - MPI-ESM-MR
# - INM-CM5-0
# - bcc-csm1-1
# - BCC-CSM2-MR
# - ACCESS-CM2
# - NorESM2-LM
# - IPSL-CM5A-LR
# - FGOALS-g2
# - HadGEM2-AO
# - 26models
# - MPI-ESM1-2-LR
# - KIOST-ESM
# - type: array
# items:
# type: string
# enum:
# - KACE-1-0-G
# - CCSM4
# - MIROC5
# - EC-Earth3-Veg
# - TaiESM1
# - GFDL-ESM4
# - GFDL-CM3
# - CanESM5
# - HadGEM3-GC31-LL
# - INM-CM4-8
# - IPSL-CM5A-MR
# - EC-Earth3
# - GFDL-ESM2G
# - humidex_models
# - GFDL-ESM2M
# - MIROC-ESM
# - CSIRO-Mk3-6-0
# - MPI-ESM-LR
# - NorESM1-M
# - CNRM-CM5
# - all
# - GISS-E2-1-G
# - 24models
# - MPI-ESM1-2-HR
# - CNRM-ESM2-1
# - CNRM-CM6-1
# - CanESM2
# - FGOALS-g3
# - NorESM1-ME
# - IPSL-CM6A-LR
# - CMCC-ESM2
# - pcic12
# - EC-Earth3-Veg-LR
# - ACCESS-ESM1-5
# - MRI-CGCM3
# - MIROC-ESM-CHEM
# - NorESM2-MM
# - bcc-csm1-1-m
# - BNU-ESM
# - UKESM1-0-LL
# - CESM1-CAM5
# - MIROC-ES2L
# - MRI-ESM2-0
# - HadGEM2-ES
# - MIROC6
# - MPI-ESM-MR
# - INM-CM5-0
# - bcc-csm1-1
# - BCC-CSM2-MR
# - ACCESS-CM2
# - NorESM2-LM
# - IPSL-CM5A-LR
# - FGOALS-g2
# - HadGEM2-AO
# - 26models
# - MPI-ESM1-2-LR
# - KIOST-ESM
# thresh:
# schema:
# required: false
# type: string
# default: "1.0 mm/day"
# freq:
# schema:
# required: false
# default: YS
# type: string
# enum:
# - YS
# - QS-DEC
# - AS-JUL
# - MS
# op:
# schema:
# type: string
# required: false
# default: '>='
# enum:
# - '>='
# - '>'
# - gt
# - ge
# month:
# schema:
# required: false
# oneOf:
# - type: integer
# enum: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ]
# - type: array
# items:
# type: integer
# enum: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ]
# season:
# schema:
# required: false
# type: string
# enum:
# - SON
# - MAM
# - JJA
# - DJF
# check_missing:
# schema:
# required: false
# type: string
# enum:
# - pct
# - at_least_n
# - wmo
# - skip
# - from_context
# - any
# missing_options:
# schema:
# required: false
# type: string
# contentMediaType: application/json
# cf_compliance:
# schema:
# required: false
# default: warn
# type: string
# enum:
# - raise
# - log
# - warn
# data_validation:
# schema:
# required: false
# default: raise
# type: string
# enum:
# - raise
# - log
# - warn
# output_name:
# schema:
# required: false
# type: string
# output_format:
# schema:
# required: false
# default: netcdf
# type: string
# enum:
# - csv
# - netcdf
# csv_precision:
# schema:
# required: false
# type: integer
# outputs:
# output:
# schema:
# type: string
# contentMediaType: text/plain
jobControlOptions:
- async-execute
outputTransmission:
Expand Down
33 changes: 28 additions & 5 deletions weaver/processes/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,14 +122,14 @@
AnySettingsContainer,
AnyValueType,
CWL,
CWL_Input_Type,
CWL_IO_ComplexType,
CWL_IO_DataType,
CWL_IO_EnumSymbols,
CWL_IO_FileValue,
CWL_IO_LiteralType,
CWL_IO_Type,
CWL_IO_Value,
CWL_Output_Type,
CWL_SchemaNames,
ExecutionInputs,
ExecutionInputsList,
ExecutionInputsMap,
Expand Down Expand Up @@ -176,7 +176,6 @@
"supported_formats": NotRequired[List[JSON_Format]],
}, total=False)
JSON_IO_ListOrMap = Union[List[JSON], Dict[str, Union[JSON, str]]]
CWL_IO_Type = Union[CWL_Input_Type, CWL_Output_Type]
PKG_IO_Type = Union[JSON_IO_Type, WPS_IO_Type]
ANY_IO_Type = Union[CWL_IO_Type, JSON_IO_Type, WPS_IO_Type, OWS_IO_Type]
ANY_Format_Type = Union[Dict[str, Optional[str]], Format]
Expand Down Expand Up @@ -1175,6 +1174,28 @@ def get_cwl_io_type_name(io_type):
return io_type


def resolve_cwl_io_type_schema(io_info, cwl_schema_names=None):
# type: (CWL_IO_Type, Optional[CWLSchemaNames]) -> CWL_IO_Type
"""
Reverse :term:`CWL` schema references by name back to their full :term:`CWL` I/O definition.
.. seealso::
- :meth:`weaver.processes.wps_package.WpsPackage.make_inputs`
- :meth:`weaver.processes.wps_package.WpsPackage.update_cwl_schema_names`
"""
if not isinstance(io_info, dict) or not cwl_schema_names:
return io_info
io_type = io_info.get("type")
io_item = io_info.get("items")
if io_type == "array" and isinstance(io_item, str) and io_item in cwl_schema_names:
io_info = io_info.copy() # avoid undoing CWL tool parsing/resolution
io_info["items"] = cwl_schema_names[io_item]._props
elif isinstance(io_type, str) and io_type in cwl_schema_names:
io_info = io_info.copy() # avoid undoing CWL tool parsing/resolution
io_info["type"] = cwl_schema_names[io_type]._props
return io_info


@dataclass
class CWLIODefinition(object):
"""
Expand Down Expand Up @@ -1272,8 +1293,8 @@ def __iter__(self):
"""


def get_cwl_io_type(io_info, strict=True):
# type: (CWL_IO_Type, bool) -> CWLIODefinition
def get_cwl_io_type(io_info, strict=True, cwl_schema_names=None):
# type: (CWL_IO_Type, bool, Optional[CWL_SchemaNames]) -> CWLIODefinition
"""
Obtains the basic type of the CWL input and identity if it is optional.
Expand All @@ -1290,6 +1311,7 @@ def get_cwl_io_type(io_info, strict=True):
:param io_info: :term:`CWL` definition to parse.
:param strict: Indicates if only pure :term:`CWL` definition is allowed, or allow implicit data-type conversions.
:param cwl_schema_names: Mapping of CWL type schema references to resolve in long form if used in a definition.
:return: tuple of guessed base type and flag indicating if it can be null (optional input).
"""
io_type = get_cwl_io_type_name(io_info["type"])
Expand All @@ -1316,6 +1338,7 @@ def get_cwl_io_type(io_info, strict=True):
io_base_type = None
for i, typ in enumerate(io_type, start=int(is_null)):
typ = get_cwl_io_type_name(typ)
typ = resolve_cwl_io_type_schema(typ, cwl_schema_names)
io_name = io_info["name"]
sub_type = {"type": typ, "name": f"{io_name}[{i}]"} # type: CWL_IO_Type
array_io_def = parse_cwl_array_type(sub_type, strict=strict)
Expand Down
Loading

0 comments on commit 04baaa0

Please sign in to comment.