Open-EO · soxofaan · Feb 7, 2024 · Jan 29, 2024 · Jan 29, 2024 · Jan 29, 2024
diff --git a/.gitignore b/.gitignore
@@ -169,3 +169,7 @@ poetry.toml
 
 # LSP config files
 pyrightconfig.json
+
+# vscode
+.vscode/
+pytest.ini
diff --git a/README.md b/README.md
@@ -40,7 +40,23 @@ focussing on a specific API aspect to test or verify
     ```
 - **WP3 Validation of process metadata** (lead implementation partner: EODC)
   - Main location: [`src/openeo_test_suite/tests/processes/metadata`](./src/openeo_test_suite/tests/processes/metadata)
-  - TODO: [Open-EO/openeo-test-suite#19](https://github.com/Open-EO/openeo-test-suite/issues/19)
+  - Defines tests to validate openEO process metadata against specs
+    defined in the [openeo-processes](https://github.com/Open-EO/openeo-processes) project
+  - Functional tests concern actual values and behavior of processes (like parameters and return values),
+    failures in these tests should be looked into and fixed.
+  - Non-functional tests concern descriptions and other metadata of processes that have no impact on the actual behavior of the process,
+    failures in these tests should be taken as warnings, but don't necessarily need to be fixed. These can be skipped by adding
+    '-m "not optional"' to the pytest command.
+  - Usage example for running these tests against a desired openEO backend URL:
+    ```bash
+    pytest src/openeo_test_suite/tests/processes/metadata \
+      --html=reports/process-metadata.html \
+      --openeo-backend-url=openeo.example \
+      --tb=no \
+      -vv
+    ```
+    It is recommended to run these tests with the `--tb=no` option to avoid excessive output of no substance.
+    and the `-vv` option to get more detailed output.
 - **WP4 General openEO API compliance validation** (lead implementation partner: EODC)
   - TODO: [Open-EO/openeo-test-suite#20](https://github.com/Open-EO/openeo-test-suite/issues/20)
 - **WP5 Individual process testing** (lead implementation partner: M. Mohr)
@@ -305,7 +321,6 @@ Some general guidelines:
   extend existing tests or add new tests at `src/openeo_test_suite/tests/collections`.
 - Validation of process metadata:
   add new tests to `src/openeo_test_suite/tests/processes/metadata`.
-  - TODO: [Open-EO/openeo-test-suite#19](https://github.com/Open-EO/openeo-test-suite/issues/19)
 - General openEO API compliance validation:
   - TODO: [Open-EO/openeo-test-suite#20](https://github.com/Open-EO/openeo-test-suite/issues/20)
 - Individual process testing:

diff --git a/src/openeo_test_suite/lib/internal-tests/test_process_registry.py b/src/openeo_test_suite/lib/internal-tests/test_process_registry.py
@@ -19,7 +19,7 @@ def test_get_process_match(self, process_registry):
         assert process_data.process_id == "add"
         assert process_data.level == "L1"
         assert process_data.experimental is False
-        assert process_data.path.name == "add.json5"
+        assert process_data.path.name == "add.json"
         assert len(process_data.tests)
 
     def test_get_process_no_match(self, process_registry):
@@ -37,7 +37,7 @@ def test_get_all_processes_add(self, process_registry):
 
         assert add.level == "L1"
         assert add.experimental is False
-        assert add.path.name == "add.json5"
+        assert add.path.name == "add.json"
         assert len(add.tests)
 
         add00 = {"arguments": {"x": 0, "y": 0}, "returns": 0}
@@ -50,7 +50,7 @@ def test_get_all_processes_divide(self, process_registry):
 
         assert divide.level == "L1"
         assert divide.experimental is False
-        assert divide.path.name == "divide.json5"
+        assert divide.path.name == "divide.json"
         assert len(divide.tests)
 
         divide0 = {

diff --git a/src/openeo_test_suite/lib/process_registry.py b/src/openeo_test_suite/lib/process_registry.py
@@ -1,3 +1,5 @@
+import itertools
+import json
 import logging
 import os
 from dataclasses import dataclass
@@ -16,6 +18,7 @@ class ProcessData:
     """Process data, including profile level and list of tests"""
 
     process_id: str
+    spec: dict
     level: str
     tests: List[dict]  # TODO: also make dataclass for each test?
     experimental: bool
@@ -32,22 +35,20 @@ def __init__(self, root: Optional[Path] = None):
         """
         :param root: Root directory of the tests folder in  openeo-processes project
         """
-        self._root = Path(
-            root
-            # TODO: eliminate need for this env var?
-            or os.environ.get("OPENEO_TEST_SUITE_PROCESSES_TEST_ROOT")
-            or self._guess_root()
-        )
+
+        self._root = Path(root or self._guess_root())
+        self._root_json5 = Path(self._root.joinpath("tests"))
+
         # Lazy load cache
         self._processes: Union[None, List[ProcessData]] = None
 
     def _guess_root(self):
         # TODO: avoid need for guessing and properly include assets in (installed) package
         project_root = Path(openeo_test_suite.__file__).parents[2]
         candidates = [
-            project_root / "assets/processes/tests",
-            Path("./assets/processes/tests"),
-            Path("./openeo-test-suite/assets/processes/tests"),
+            project_root / "assets/processes",
+            Path("./assets/processes"),
+            Path("./openeo-test-suite/assets/processes"),
         ]
         for candidate in candidates:
             if candidate.exists() and candidate.is_dir():
@@ -62,18 +63,33 @@ def _load(self) -> Iterator[ProcessData]:
         if not self._root.is_dir():
             raise ValueError(f"Invalid process test root directory: {self._root}")
         _log.info(f"Loading process definitions from {self._root}")
-        for path in self._root.glob("*.json5"):
+
+        process_paths = itertools.chain(
+            self._root.glob("*.json"), self._root.glob("proposals/*.json")
+        )
+        for path in process_paths:
+            test_metadata_path = self._root_json5 / f"{path.stem}.json5"
             try:
                 with path.open() as f:
-                    data = json5.load(f)
+                    data = json.load(f)
                 if data["id"] != path.stem:
                     raise ValueError(
                         f"Process id mismatch between id {data['id']!r} and filename {path.name!r}"
                     )
+                # Metadata is stored in sibling json file
+                if test_metadata_path.exists():
+                    with test_metadata_path.open() as f:
+                        metadata = json5.load(f)
+                else:
+                    metadata = {}
+
                 yield ProcessData(
                     process_id=data["id"],
-                    level=data.get("level"),
-                    tests=data.get("tests", []),
+                    spec=data,
+                    level=metadata.get(
+                        "level", "L4"
+                    ),  # default to L4 is intended for processes without a specific level
+                    tests=metadata.get("tests", []),
                     experimental=data.get("experimental", False),
                     path=path,
                 )

diff --git a/src/openeo_test_suite/tests/processes/metadata/test_process_metadata.py b/src/openeo_test_suite/tests/processes/metadata/test_process_metadata.py
@@ -0,0 +1,151 @@
+from typing import List
+import pytest
+from openeo_test_suite.lib.process_registry import ProcessData
+from openeo_test_suite.lib.process_selection import get_selected_processes
+import logging
+
+
+@pytest.fixture(scope="module")
+def api_processes(connection) -> List[dict]:
+    return connection.list_processes()
+
+
+def _get_test_id(val):
+    if isinstance(val, ProcessData):
+        return val.process_id
+
+
+@pytest.mark.parametrize(
+    "expected_process",
+    [p for p in get_selected_processes()],
+    ids=_get_test_id,
+)
+def test_process_metadata_functional(api_processes, expected_process, skipper):
+    """
+    Tests if the metadata of processes are correct, first tests if the process exists,
+    then tests if the parameters of processes are correct and finally tests if the return type of processes is correct.
+
+    Any process that has no metadata is skipped.
+
+    These are the functional parts of the process metadata e.g.: the parameters and return type.
+    """
+
+    skipper.skip_if_unsupported_process(expected_process.process_id)
+
+    actual_process = [
+        process
+        for process in api_processes
+        if process["id"] == expected_process.process_id
+    ][0]
+    # Tests if the parameters of processes are correct
+
+    expected_parameters = expected_process.spec.get("parameters", [])
+    actual_parameters = actual_process["parameters"]
+
+    if len(expected_parameters) > len(actual_parameters):
+        logging.warn(
+            f"Process {expected_process.process_id} has {len(expected_parameters)} expected parameters, but only {len(actual_parameters)} actual parameters"
+        )
+        for expected_parameter in expected_parameters[len(actual_parameters) :]:
+            assert (
+                "default" in expected_parameter
+            ), f"Missing Parameter {expected_parameter} has no default value. Cannot remove required parameters."
+
+    if len(expected_parameters) < len(actual_parameters):
+        logging.warn(
+            f"Process {expected_process.process_id} has {len(expected_parameters)} expected parameters, but {len(actual_parameters)} actual parameters"
+        )
+        for actual_parameter in actual_parameters[len(expected_parameters) :]:
+            assert (
+                "default" in actual_parameter
+            ), f"Additional Parameter {actual_parameter} has no default value."
+
+    # Check if the expected parameters are in the actual parameters
+    # throw a warning if there are added parameters with default values
+
+    for expected_parameter, actual_parameter in zip(
+        expected_parameters, actual_parameters
+    ):
+        # Tests if parameter names are equivalent
+        assert expected_parameter["name"] == actual_parameter["name"]
+        # Tests if optionality of parameters is equivalent
+        assert expected_parameter.get("optional", False) == actual_parameter.get(
+            "optional", False
+        )
+        # Tests if the type of parameters is equivalent
+        assert expected_parameter["schema"] == actual_parameter["schema"]
+
+    # Tests if the return type of processes is correct
+    expected_return_type = expected_process.spec.get("returns", {})
+
+    actual_return_type = actual_process["returns"]
+    assert expected_return_type["schema"] == actual_return_type["schema"]
+
+    # Tests the deprecated and experimental flags (Should be true if expected process is true as well, but not the other way around)
+    if expected_process.spec.get("experimental", False):
+        assert actual_process.get("experimental", False)
+
+    if expected_process.spec.get("deprecated", False):
+        assert actual_process.get("deprecated", False)
+
+
+@pytest.mark.optional
+@pytest.mark.parametrize(
+    "expected_process",
+    [p for p in get_selected_processes()],
+    ids=_get_test_id,
+)
+def test_process_metadata_non_functional(api_processes, expected_process, skipper):
+    """
+    Tests if the non-functional metadata of processes are correct (descriptions and categories), first tests if the process exists,
+    then tests if the categories of processes are correct.
+    """
+
+    skipper.skip_if_unsupported_process(expected_process.process_id)
+
+    actual_process = [
+        process
+        for process in api_processes
+        if process["id"] == expected_process.process_id
+    ][0]
+
+    # Tests if the categories of processes is equivalent
+    assert expected_process.spec.get("categories", []) == actual_process["categories"]
+
+    # Tests if the description of processes is equivalent
+    assert expected_process.spec.get("description", "") == actual_process["description"]
+
+    # Tests if the summary of processes is equivalent
+    assert expected_process.spec.get("summary", "") == actual_process["summary"]
+
+    # Tests if the description of parameters is equivalent
+    expected_parameters = expected_process.spec.get("parameters", [])
+    actual_parameters = actual_process["parameters"]
+
+    assert len(expected_parameters) == len(actual_parameters)
+
+    for expected_parameter, actual_parameter in zip(
+        expected_parameters, actual_parameters
+    ):
+        assert expected_parameter.get("description", "") == actual_parameter.get(
+            "description", ""
+        )
+
+    # Tests if the description of returns is equivalent
+    expected_return_type = expected_process.spec.get("returns", {})
+    actual_return_type = actual_process["returns"]
+    assert expected_return_type.get("description", "") == actual_return_type.get(
+        "description", ""
+    )
+
+    # Tests if the links of processes are equivalent
+    expected_links = expected_process.spec.get("links", [])
+    actual_links = actual_process.get("links", [])
+
+    expected_links.sort(key=lambda x: x.get("href", ""))
+    actual_links.sort(key=lambda x: x.get("href", ""))
+
+    for expected_link, actual_link in zip(expected_links, actual_links):
+        assert expected_link.get("href", "") == actual_link.get("href", "")
+        assert expected_link.get("rel", "") == actual_link.get("rel", "")
+        assert expected_link.get("title", "") == actual_link.get("title", "")