From e2f1e272d0821039dee4e5e079167ba1f815993c Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 13 Oct 2023 11:50:30 -0400 Subject: [PATCH] fix schema references and properties in rendered JSON schema/contents --- CHANGES.rst | 10 +- tests/wps_restapi/test_colander_extras.py | 157 +++++++++++++++++++++- weaver/wps_restapi/api.py | 26 ++-- weaver/wps_restapi/colander_extras.py | 88 +++++++++--- weaver/wps_restapi/swagger_definitions.py | 40 ++++-- 5 files changed, 271 insertions(+), 50 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index fbb4e19e5..583fc2665 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -12,11 +12,17 @@ Changes Changes: -------- -- No change. +- Add schema validation and reference to the `API` landing page, with additional parameters to respect `OGC` schema. +- Add multiple `JSON` schema references for schema classes that are represented by corresponding `OGC` definitions. Fixes: ------ -- No change. +- Fix auto-insertion of ``$schema`` and ``$id`` URI references into `JSON` schema and their data content representation. + When in `OpenAPI` context, schemas now correctly report their ``$id`` as the reference schema they represent (usually + from external `OGC` schema references), and ``$schema`` as the `JSON` meta-schema. When representing `JSON` data + contents validated against a `JSON` schema, the ``$schema`` property is used instead to refer to that schema. + All auto-insertions of these references can be enabled or disabled with options depending on what is more sensible + for presenting results from various `API` responses. .. _changes_4.33.0: diff --git a/tests/wps_restapi/test_colander_extras.py b/tests/wps_restapi/test_colander_extras.py index 4d9551e40..fbcdd2427 100644 --- a/tests/wps_restapi/test_colander_extras.py +++ b/tests/wps_restapi/test_colander_extras.py @@ -13,7 +13,7 @@ from weaver.wps_restapi import colander_extras as ce, swagger_definitions as sd if TYPE_CHECKING: - from typing import List, Tuple, Type, Union + from typing import List, Optional, Tuple, Type, Union from weaver.typedefs import JSON @@ -946,3 +946,158 @@ def test_media_type_pattern(): pass else: pytest.fail(f"Expected valid format from [{test_schema.__name__}] with: '{test_value}'") + + +@pytest.mark.parametrize( + [ + "schema", + "schema_include", + "schema_include_deserialize", + "schema_include_convert_type", + "schema_expected_deserialize", + "schema_expected_convert_type", + "schema_meta", + "schema_meta_include", + "schema_meta_include_convert_type", + "schema_meta_expected_convert_type", + ], + [ + ( + "https://schema.com/item", True, True, True, "https://schema.com/item", "https://schema.com/item", + "https://json-schema.com#", True, True, "https://json-schema.com#", + ), + ( + "https://schema.com/item", False, True, True, None, None, + "https://json-schema.com#", True, True, "https://json-schema.com#", + ), + ( + "https://schema.com/item", True, True, True, "https://schema.com/item", "https://schema.com/item", + "https://json-schema.com#", False, True, None, + ), + ( + "https://schema.com/item", False, True, True, None, None, + "https://json-schema.com#", False, True, None, + ), + ( + None, True, True, True, None, None, + "https://json-schema.com#", True, True, "https://json-schema.com#", + ), + ( + "https://schema.com/item", True, True, True, "https://schema.com/item", "https://schema.com/item", + None, True, True, None, + ), + ( + "https://schema.com/item", True, False, True, None, "https://schema.com/item", + "https://json-schema.com#", True, True, "https://json-schema.com#", + ), + ( + "https://schema.com/item", True, True, False, "https://schema.com/item", None, + "https://json-schema.com#", True, False, None, + ), + ( + None, True, True, True, None, None, + None, True, True, None, + ), + ( + # even when provided by attribute/argument, invalid URIs are ignored + # this is to avoid injecting them and generate invalid JSON schema/data instances + "--not-an-uri!", True, True, True, None, None, + "://not_an_uri", True, True, None, + ), + ] +) +def test_schema_ref_resolution( + schema, # type: Optional[str] + schema_include, # type: bool + schema_include_deserialize, # type: bool + schema_include_convert_type, # type: bool + schema_expected_deserialize, # type: Optional[str] + schema_expected_convert_type, # type: Optional[str] + schema_meta, # type: Optional[str] + schema_meta_include, # type: bool + schema_meta_include_convert_type, # type: bool + schema_meta_expected_convert_type, # type: Optional[str] +): # type: (...) -> None + class MapByAttribute(ce.ExtendedMappingSchema): + title = "Item" + child = ce.ExtendedSchemaNode(ce.ExtendedString()) + + # do the same as if _schema, _schema_meta, etc. were set at the same place as 'title' + # but use 'setattr' here to allow combinations with/without them being set as needed + for schema_field, schema_value in [ + ("_schema", schema), + ("_schema_include", schema_include), + ("_schema_include_deserialize", schema_include_deserialize), + ("_schema_include_convert_type", schema_include_convert_type), + ("_schema_meta", schema_meta), + ("_schema_meta_include", schema_meta_include), + ("_schema_meta_include_convert_type", schema_meta_include_convert_type), + ]: + setattr(MapByAttribute, schema_field, schema_value) + + class MapByArgument(ce.ExtendedMappingSchema): + title = "Item" + child = ce.ExtendedSchemaNode(ce.ExtendedString()) + + # must be valid for both mappings above + test_data = {"child": "test"} + + for schema_node in [ + MapByAttribute(), + MapByArgument( + schema=schema, + schema_include=schema_include, + schema_include_deserialize=schema_include_deserialize, + schema_include_convert_type=schema_include_convert_type, + schema_meta=schema_meta, + schema_meta_include=schema_meta_include, + schema_meta_include_convert_type=schema_meta_include_convert_type, + ), + ]: + dispatcher = ce.OAS3TypeConversionDispatcher() + schema_json = dispatcher(schema_node) + schema_data = schema_node.deserialize(test_data) + + err_msg = f"Failed using {schema_node}" + assert "$id" not in schema_data, err_msg + if schema_expected_convert_type: + assert "$id" in schema_json, err_msg + assert schema_json["$id"] == schema_expected_convert_type, err_msg + else: + assert "$id" not in schema_json, err_msg + if schema_expected_deserialize: + assert "$schema" in schema_data, err_msg + assert schema_data["$schema"] == schema_expected_deserialize, err_msg + else: + assert "$schema" not in schema_data, err_msg + if schema_meta_expected_convert_type: + assert "$schema" in schema_json, err_msg + assert schema_json["$schema"] == schema_meta_expected_convert_type, err_msg + else: + assert "$schema" not in schema_json, err_msg + + # check that the remaining of the resolution is as expected + schema_json.pop("$id", None) + schema_data.pop("$id", None) + schema_json.pop("$schema", None) + schema_data.pop("$schema", None) + assert schema_data == test_data, err_msg + assert schema_json == { + "type": "object", + "title": "Item", + "required": ["child"], + "properties": { + "child": {"type": "string", "title": "child"} + }, + "additionalProperties": {} + }, err_msg + + +def test_schema_ref_resolution_include(): + schema_node = sd.ProviderSummarySchema() + assert schema_node._schema_meta_include is True, "Cannot run test without pre-condition" + assert schema_node._schema, "Cannot run test without pre-condition" + + def_handler = ce.OAS3DefinitionHandler + schema_json = ce.OAS3BodyParameterConverter().convert(schema_node, def_handler) + print(schema_json) diff --git a/weaver/wps_restapi/api.py b/weaver/wps_restapi/api.py index aed8b26cd..070b21e3a 100644 --- a/weaver/wps_restapi/api.py +++ b/weaver/wps_restapi/api.py @@ -519,17 +519,21 @@ def api_frontpage_body(settings): "rel": "wps-schema-2", "type": ContentType.TEXT_XML, "title": "WPS 2.0 XML validation schemas entrypoint."}, ]) - return { - "message": "Weaver Information", - "configuration": weaver_config, - "description": __meta__.__description__, - "parameters": [ - {"name": "api", "enabled": weaver_api, "url": weaver_api_url, "api": weaver_api_oas_ui}, - {"name": "vault", "enabled": weaver_vault}, - {"name": "wps", "enabled": weaver_wps, "url": weaver_wps_url}, - ], - "links": weaver_links, - } + body = sd.FrontpageSchema().deserialize( + { + "message": "Weaver Information", + "configuration": weaver_config, + "description": __meta__.__description__, + "attribution": __meta__.__author__, + "parameters": [ + {"name": "api", "enabled": weaver_api, "url": weaver_api_url, "api": weaver_api_oas_ui}, + {"name": "vault", "enabled": weaver_vault}, + {"name": "wps", "enabled": weaver_wps, "url": weaver_wps_url}, + ], + "links": weaver_links, + } + ) + return body @sd.api_versions_service.get(tags=[sd.TAG_API], renderer=OutputFormat.JSON, diff --git a/weaver/wps_restapi/colander_extras.py b/weaver/wps_restapi/colander_extras.py index 1dc201d6d..a2a488432 100644 --- a/weaver/wps_restapi/colander_extras.py +++ b/weaver/wps_restapi/colander_extras.py @@ -123,10 +123,12 @@ NO_DOUBLE_SLASH_PATTERN = r"(?!.*//.*$)" URL_REGEX = colander.URL_REGEX.replace(r"://)?", rf"://)?{NO_DOUBLE_SLASH_PATTERN}") URL = colander.Regex(URL_REGEX, msg=colander._("Must be a URL"), flags=re.IGNORECASE) -URI_REGEX = colander.URI_REGEX.replace(r"://", r"://(?!//)") -FILE_URI = colander.Regex(URI_REGEX, msg=colander._("Must be a file:// URI scheme"), flags=re.IGNORECASE) +FILE_URL_REGEX = colander.URI_REGEX.replace(r"://", r"://(?!//)") +FILE_URI = colander.Regex(FILE_URL_REGEX, msg=colander._("Must be a file:// URI scheme"), flags=re.IGNORECASE) +URI_REGEX = rf"{colander.URL_REGEX[:-1]}(?:#?|[#?]\S+)$" +URI = colander.Regex(URI_REGEX, msg=colander._("Must be a URI"), flags=re.IGNORECASE) STRING_FORMATTERS.update({ - "uri": {"converter": BaseStringTypeConverter, "validator": URL}, + "uri": {"converter": BaseStringTypeConverter, "validator": URI}, "url": {"converter": BaseStringTypeConverter, "validator": URL}, "file": {"converter": BaseStringTypeConverter, "validator": FILE_URI}, }) @@ -1180,31 +1182,52 @@ class SchemaRefMappingSchema(ExtendedNodeInterface, ExtendedSchemaBase): """ Mapping schema that supports auto-insertion of JSON-schema references provided in the definition. - When the :class:`colander.MappingSchema` defines ``_schema = ""`` with a valid URL, - all validations will automatically insert the corresponding ``$schema`` or ``$id`` field with this URL to - the deserialized :term:`OpenAPI` schema using :class:`SchemaRefConverter`, and to the deserialized :term:`JSON` - content, respectively. When injecting the ``$id`` reference into the :term:`JSON` object, the ``$schema`` will - instead refer to the ``schema_meta`` attribute that default to the :term:`JSON` meta-schema. + Schema references are resolved under two distinct contexts: + + 1. When generating the :term:`JSON` schema representation of the current schema node, for :term:`OpenAPI` + representation, the ``_schema`` attribute will indicate the ``$id`` value that identifies this schema, + while the ``_schema_meta`` will provide the ``$schema`` property that refers to the :term:`JSON` meta-schema + used by default to define it. + + 2. When deserializing :term:`JSON` data that should be validated against the current schema node, the generated + :term:`JSON` data will include the ``$schema`` property using the ``_schema`` attribute. In this case, + the ``$id`` is omitted as that :term:`JSON` represents an instance of the schema, but not its identity. Alternatively, the parameters ``schema`` and ``schema_meta`` can be passed as keyword arguments when instantiating - the schema node. The references injection can be disabled with ``schema_meta_include`` and ``schema_include``. + the schema node. The references injection in the :term:`JSON` schema and data can be disabled with parameters + ``schema_include`` and ``schema_meta_include``, or the corresponding class attributes. Furthermore, options + ``schema_include_deserialize``, ``schema_include_convert_type`` and ``schema_meta_include_convert_type`` can be + used to control individually each schema inclusion during either the type conversion context (:term:`JSON` schema) + or the deserialization context (:term:`JSON` data validation). """ _extension = "_ext_schema_ref" - _ext_schema_options = ["_schema_meta", "_schema_meta_include", "_schema", "_schema_include"] + _ext_schema_options = [ + "_schema_meta", + "_schema_meta_include", + "_schema_meta_include_convert_type", + "_schema", + "_schema_include", + "_schema_include_deserialize", + "_schema_include_convert_type", + ] _ext_schema_fields = ["_id", "_schema"] # typings and attributes to help IDEs flag that the field is available/overridable _schema_meta = Draft7Validator.META_SCHEMA["$schema"] # type: str - _schema_meta_include = False # type: bool - _schema = None # type: str - _schema_include = True # type: bool + _schema_meta_include = True # type: bool + _schema_meta_include_convert_type = True # type: bool + + _schema = None # type: str + _schema_include = True # type: bool + _schema_include_deserialize = True # type: bool + _schema_include_convert_type = True # type: bool def __init__(self, *args, **kwargs): for schema_key in self._schema_options: schema_field = schema_key[1:] - schema_value = kwargs.pop(schema_field, None) - if schema_value not in ["", None]: + schema_value = kwargs.pop(schema_field, object) + if schema_value is not object: setattr(self, schema_key, schema_value) super(SchemaRefMappingSchema, self).__init__(*args, **kwargs) setattr(self, SchemaRefMappingSchema._extension, True) @@ -1219,7 +1242,7 @@ def __init__(self, *args, **kwargs): @staticmethod def _is_schema_ref(schema_ref): # type: (Any) -> bool - return isinstance(schema_ref, str) and URL.match_object.match(schema_ref) + return isinstance(schema_ref, str) and URI.match_object.match(schema_ref) @property def _schema_options(self): @@ -1231,6 +1254,9 @@ def _schema_fields(self): def _schema_deserialize(self, cstruct, schema_meta, schema_id): # type: (OpenAPISchema, Optional[str], Optional[str]) -> OpenAPISchema + """ + Applies the relevant schema references and properties depending on :term:`JSON` schema/data conversion context. + """ if not isinstance(cstruct, dict): return cstruct if not getattr(self, SchemaRefMappingSchema._extension, False): @@ -1255,20 +1281,38 @@ def _schema_deserialize(self, cstruct, schema_meta, schema_id): return schema_result def _deserialize_impl(self, cstruct): # pylint: disable=W0222,signature-differs + """ + Converts the data using validation against the :term:`JSON` schema definition. + """ + # meta-schema always disabled in this context since irrelevant + # refer to the "id" of the parent schema representing this data using "$schema" + # this is not "official" JSON requirement, but very common in practice + schema_id = None + schema_id_include = getattr(self, "_schema_include", False) + schema_id_include_deserialize = getattr(self, "_schema_include_deserialize", False) + if schema_id_include and schema_id_include_deserialize: + schema_id = getattr(self, "_schema", None) + if schema_id: + return self._schema_deserialize(cstruct, schema_id, None) + return cstruct + + def convert_type(self, cstruct): # pylint: disable=W0222,signature-differs + """ + Converts the node to obtain the :term:`JSON` schema definition. + """ schema_id = schema_meta = None schema_id_include = getattr(self, "_schema_include", False) + schema_id_include_convert_type = getattr(self, "_schema_include_convert_type", False) schema_meta_include = getattr(self, "_schema_meta_include", False) - if schema_meta_include: - schema_meta = getattr(self, "_schema_meta", None) - if schema_id_include: + schema_meta_include_convert_type = getattr(self, "_schema_meta_include_convert_type", False) + if schema_id_include and schema_id_include_convert_type: schema_id = getattr(self, "_schema", None) + if schema_meta_include and schema_meta_include_convert_type: + schema_meta = getattr(self, "_schema_meta", None) if schema_id or schema_meta: return self._schema_deserialize(cstruct, schema_meta, schema_id) return cstruct - def convert_type(self, cstruct): # pylint: disable=W0222,signature-differs - return SchemaRefMappingSchema._deserialize_impl(self, cstruct) - @staticmethod @abstractmethod def schema_type(): diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 40bbd4e28..56f81988b 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -92,6 +92,7 @@ StrictMappingSchema, StringOneOf, StringRange, + URI, XMLObject ) from weaver.wps_restapi.constants import ConformanceCategory @@ -158,11 +159,16 @@ OGC_API_SCHEMA_EXT_WORKFLOW = f"{OGC_API_SCHEMA_BASE}/openapi/schemas/processes-workflows" # official/published references -OGC_API_PROC_PART1 = "https://schemas.opengis.net/ogcapi/processes/part1/1.0" -OGC_API_PROC_PART1_SCHEMAS = f"{OGC_API_PROC_PART1}/openapi/schemas" -OGC_API_PROC_PART1_RESPONSES = f"{OGC_API_PROC_PART1}/openapi/responses" -OGC_API_PROC_PART1_PARAMETERS = f"{OGC_API_PROC_PART1}/openapi/parameters" -OGC_API_PROC_PART1_EXAMPLES = f"{OGC_API_PROC_PART1}/examples" +OGC_API_SCHEMAS_URL = "https://schemas.opengis.net" +OGC_API_COMMON_PART1_BASE = f"{OGC_API_SCHEMAS_URL}/ogcapi/common/part1/1.0" +OGC_API_COMMON_PART1_SCHEMAS = f"{OGC_API_COMMON_PART1_BASE}/openapi/schemas" +OGC_API_PROC_PART1_BASE = f"{OGC_API_SCHEMAS_URL}/ogcapi/processes/part1/1.0" +OGC_API_PROC_PART1_SCHEMAS = f"{OGC_API_PROC_PART1_BASE}/openapi/schemas" +OGC_API_PROC_PART1_RESPONSES = f"{OGC_API_PROC_PART1_BASE}/openapi/responses" +OGC_API_PROC_PART1_PARAMETERS = f"{OGC_API_PROC_PART1_BASE}/openapi/parameters" +OGC_API_PROC_PART1_EXAMPLES = f"{OGC_API_PROC_PART1_BASE}/examples" +OGC_WPS_1_BASE = f"{OGC_API_SCHEMAS_URL}/wps/1.0.0" +OGC_WPS_2_BASE = f"{OGC_API_SCHEMAS_URL}/wps/2.0" WEAVER_SCHEMA_VERSION = "master" WEAVER_SCHEMA_URL = f"https://raw.githubusercontent.com/crim-ca/weaver/{WEAVER_SCHEMA_VERSION}/weaver/schemas" @@ -749,7 +755,8 @@ class LinkBase(LinkLanguage, MetadataBase): class Link(LinkRelationship, LinkBase): - pass + _schema = f"{OGC_API_COMMON_PART1_SCHEMAS}/link.json" + _schema_include_deserialize = False # only in OpenAPI otherwise too verbose class MetadataValue(NotKeywordSchema, ValueLanguage, MetadataBase): @@ -825,7 +832,7 @@ class Format(ExtendedMappingSchema): """ Used to respect ``mediaType`` field as suggested per `OGC-API`. """ - _schema_include = False # exclude "$id" added on each sub-deserialize (too verbose, only for reference) + _schema_include_deserialize = False # only in OpenAPI otherwise too verbose _schema = f"{OGC_API_PROC_PART1_SCHEMAS}/format.yaml" mediaType = MediaType(default=ContentType.TEXT_PLAIN, example=ContentType.APP_JSON) @@ -5085,8 +5092,14 @@ class FrontpageParameters(ExtendedSequenceSchema): class FrontpageSchema(LandingPage, DescriptionSchema): + _schema = f"{OGC_API_COMMON_PART1_SCHEMAS}/landingPage.json" + _sort_first = ["title", "configuration", "message", "description", "attribution"] + _sort_after = ["parameters", "links"] + + title = ExtendedSchemaNode(String(), default="Weaver", example="Weaver") message = ExtendedSchemaNode(String(), default="Weaver Information", example="Weaver Information") configuration = ExtendedSchemaNode(String(), default="default", example="default") + attribution = ExtendedSchemaNode(String(), description="Short representation of the API maintainers.") parameters = FrontpageParameters() @@ -5119,6 +5132,7 @@ class ConformanceList(ExtendedSequenceSchema): class ConformanceSchema(ExtendedMappingSchema): + _schema = f"{OGC_API_COMMON_PART1_SCHEMAS}/confClasses.json" conformsTo = ConformanceList() @@ -5750,7 +5764,7 @@ class ErrorDetail(ExtendedMappingSchema): class OWSErrorCode(ExtendedSchemaNode): schema_type = String example = "InvalidParameterValue" - description = "OWS error code." + description = "OWS error code or URI reference that identifies the problem type." class OWSExceptionResponse(ExtendedMappingSchema): @@ -5773,16 +5787,16 @@ class ErrorCause(OneOfKeywordSchema): class ErrorJsonResponseBodySchema(ExtendedMappingSchema): - _schema = f"{OGC_API_PROC_PART1_SCHEMAS}/exception.yaml" + _schema = f"{OGC_API_COMMON_PART1_SCHEMAS}/exception.json" description = "JSON schema for exceptions based on RFC 7807" - type = OWSErrorCode() + type = OWSErrorCode() # only this is required title = ExtendedSchemaNode(String(), description="Short description of the error.", missing=drop) detail = ExtendedSchemaNode(String(), description="Detail about the error cause.", missing=drop) - status = ExtendedSchemaNode(Integer(), description="Error status code.", example=500) + status = ExtendedSchemaNode(Integer(), description="Error status code.", example=500, missing=drop) cause = ErrorCause(missing=drop) value = ErrorCause(missing=drop) error = ErrorDetail(missing=drop) - instance = ExtendedSchemaNode(String(), missing=drop) + instance = ExtendedSchemaNode(String(), validator=URI, missing=drop) exception = OWSExceptionResponse(missing=drop) @@ -7011,11 +7025,9 @@ def validate_node_schema(schema_node, cstruct): schema_node.deserialize(cstruct) schema_file = schema_node._schema.replace(WEAVER_SCHEMA_URL, WEAVER_SCHEMA_DIR) schema_path = [] - schema_ref = "" if "#" in schema_file: schema_file, schema_ref = schema_file.split("#", 1) schema_path = [ref for ref in schema_ref.split("/") if ref] - schema_ref = f"#{schema_ref}" schema_base = schema = load_file(schema_file) if schema_path: for part in schema_path: