Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: WP3 Process metadata validation #44

Merged
merged 16 commits into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,7 @@ poetry.toml

# LSP config files
pyrightconfig.json

# vscode
.vscode/
pytest.ini
19 changes: 17 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,23 @@ focussing on a specific API aspect to test or verify
```
- **WP3 Validation of process metadata** (lead implementation partner: EODC)
- Main location: [`src/openeo_test_suite/tests/processes/metadata`](./src/openeo_test_suite/tests/processes/metadata)
- TODO: [Open-EO/openeo-test-suite#19](https://github.com/Open-EO/openeo-test-suite/issues/19)
- Defines tests to validate openEO process metadata against specs
defined in the [openeo-processes](https://github.com/Open-EO/openeo-processes) project
- Functional tests concern actual values and behavior of processes (like parameters and return values),
failures in these tests should be looked into and fixed.
- Non-functional tests concern descriptions and other metadata of processes that have no impact on the actual behavior of the process,
failures in these tests should be taken as warnings, but don't necessarily need to be fixed. These can be skipped by adding
'-m "not optional"' to the pytest command.
- Usage example for running these tests against a desired openEO backend URL:
```bash
pytest src/openeo_test_suite/tests/processes/metadata \
--html=reports/process-metadata.html \
--openeo-backend-url=openeo.example \
--tb=no \
-vv
```
It is recommended to run these tests with the `--tb=no` option to avoid excessive output of no substance.
and the `-vv` option to get more detailed output.
GeraldIr marked this conversation as resolved.
Show resolved Hide resolved
- **WP4 General openEO API compliance validation** (lead implementation partner: EODC)
- TODO: [Open-EO/openeo-test-suite#20](https://github.com/Open-EO/openeo-test-suite/issues/20)
- **WP5 Individual process testing** (lead implementation partner: M. Mohr)
Expand Down Expand Up @@ -305,7 +321,6 @@ Some general guidelines:
extend existing tests or add new tests at `src/openeo_test_suite/tests/collections`.
- Validation of process metadata:
add new tests to `src/openeo_test_suite/tests/processes/metadata`.
- TODO: [Open-EO/openeo-test-suite#19](https://github.com/Open-EO/openeo-test-suite/issues/19)
- General openEO API compliance validation:
- TODO: [Open-EO/openeo-test-suite#20](https://github.com/Open-EO/openeo-test-suite/issues/20)
- Individual process testing:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_get_process_match(self, process_registry):
assert process_data.process_id == "add"
assert process_data.level == "L1"
assert process_data.experimental is False
assert process_data.path.name == "add.json5"
assert process_data.path.name == "add.json"
assert len(process_data.tests)

def test_get_process_no_match(self, process_registry):
Expand All @@ -37,7 +37,7 @@ def test_get_all_processes_add(self, process_registry):

assert add.level == "L1"
assert add.experimental is False
assert add.path.name == "add.json5"
assert add.path.name == "add.json"
assert len(add.tests)

add00 = {"arguments": {"x": 0, "y": 0}, "returns": 0}
Expand All @@ -50,7 +50,7 @@ def test_get_all_processes_divide(self, process_registry):

assert divide.level == "L1"
assert divide.experimental is False
assert divide.path.name == "divide.json5"
assert divide.path.name == "divide.json"
assert len(divide.tests)

divide0 = {
Expand Down
42 changes: 29 additions & 13 deletions src/openeo_test_suite/lib/process_registry.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import itertools
import json
import logging
import os
from dataclasses import dataclass
Expand All @@ -16,6 +18,7 @@ class ProcessData:
"""Process data, including profile level and list of tests"""

process_id: str
spec: dict
level: str
tests: List[dict] # TODO: also make dataclass for each test?
experimental: bool
Expand All @@ -32,22 +35,20 @@ def __init__(self, root: Optional[Path] = None):
"""
:param root: Root directory of the tests folder in openeo-processes project
"""
self._root = Path(
root
# TODO: eliminate need for this env var?
or os.environ.get("OPENEO_TEST_SUITE_PROCESSES_TEST_ROOT")
or self._guess_root()
)

self._root = Path(root or self._guess_root())
self._root_json5 = Path(self._root.joinpath("tests"))

# Lazy load cache
self._processes: Union[None, List[ProcessData]] = None

def _guess_root(self):
# TODO: avoid need for guessing and properly include assets in (installed) package
project_root = Path(openeo_test_suite.__file__).parents[2]
candidates = [
project_root / "assets/processes/tests",
Path("./assets/processes/tests"),
Path("./openeo-test-suite/assets/processes/tests"),
project_root / "assets/processes",
Path("./assets/processes"),
Path("./openeo-test-suite/assets/processes"),
]
for candidate in candidates:
if candidate.exists() and candidate.is_dir():
Expand All @@ -62,18 +63,33 @@ def _load(self) -> Iterator[ProcessData]:
if not self._root.is_dir():
raise ValueError(f"Invalid process test root directory: {self._root}")
_log.info(f"Loading process definitions from {self._root}")
for path in self._root.glob("*.json5"):

process_paths = itertools.chain(
self._root.glob("*.json"), self._root.glob("proposals/*.json")
)
for path in process_paths:
test_metadata_path = self._root_json5 / f"{path.stem}.json5"
try:
with path.open() as f:
data = json5.load(f)
data = json.load(f)
if data["id"] != path.stem:
raise ValueError(
f"Process id mismatch between id {data['id']!r} and filename {path.name!r}"
)
# Metadata is stored in sibling json file
GeraldIr marked this conversation as resolved.
Show resolved Hide resolved
if test_metadata_path.exists():
with test_metadata_path.open() as f:
metadata = json5.load(f)
else:
metadata = {}

yield ProcessData(
process_id=data["id"],
level=data.get("level"),
tests=data.get("tests", []),
spec=data,
level=metadata.get(
"level", "L4"
), # default to L4 is intended for processes without a specific level
tests=metadata.get("tests", []),
experimental=data.get("experimental", False),
soxofaan marked this conversation as resolved.
Show resolved Hide resolved
path=path,
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
from typing import List
import pytest
from openeo_test_suite.lib.process_registry import ProcessData
from openeo_test_suite.lib.process_selection import get_selected_processes
import logging


@pytest.fixture(scope="module")
def api_processes(connection) -> List[dict]:
return connection.list_processes()


def _get_test_id(val):
if isinstance(val, ProcessData):
return val.process_id


@pytest.mark.parametrize(
"expected_process",
[p for p in get_selected_processes()],
ids=_get_test_id,
)
GeraldIr marked this conversation as resolved.
Show resolved Hide resolved
def test_process_metadata_functional(api_processes, expected_process, skipper):
"""
Tests if the metadata of processes are correct, first tests if the process exists,
then tests if the parameters of processes are correct and finally tests if the return type of processes is correct.

Any process that has no metadata is skipped.

These are the functional parts of the process metadata e.g.: the parameters and return type.
"""

skipper.skip_if_unsupported_process(expected_process.process_id)

actual_process = [
process
for process in api_processes
if process["id"] == expected_process.process_id
][0]
# Tests if the parameters of processes are correct

expected_parameters = expected_process.spec.get("parameters", [])
actual_parameters = actual_process["parameters"]

if len(expected_parameters) > len(actual_parameters):
logging.warn(
f"Process {expected_process.process_id} has {len(expected_parameters)} expected parameters, but only {len(actual_parameters)} actual parameters"
)
for expected_parameter in expected_parameters[len(actual_parameters) :]:
assert (
"default" in expected_parameter
), f"Missing Parameter {expected_parameter} has no default value. Cannot remove required parameters."

if len(expected_parameters) < len(actual_parameters):
logging.warn(
f"Process {expected_process.process_id} has {len(expected_parameters)} expected parameters, but {len(actual_parameters)} actual parameters"
)
for actual_parameter in actual_parameters[len(expected_parameters) :]:
assert (
"default" in actual_parameter
), f"Additional Parameter {actual_parameter} has no default value."

# Check if the expected parameters are in the actual parameters
# throw a warning if there are added parameters with default values

for expected_parameter, actual_parameter in zip(
GeraldIr marked this conversation as resolved.
Show resolved Hide resolved
expected_parameters, actual_parameters
):
# Tests if parameter names are equivalent
assert expected_parameter["name"] == actual_parameter["name"]
# Tests if optionality of parameters is equivalent
assert expected_parameter.get("optional", False) == actual_parameter.get(
"optional", False
)
# Tests if the type of parameters is equivalent
assert expected_parameter["schema"] == actual_parameter["schema"]

# Tests if the return type of processes is correct
expected_return_type = expected_process.spec.get("returns", {})

actual_return_type = actual_process["returns"]
assert expected_return_type["schema"] == actual_return_type["schema"]

# Tests the deprecated and experimental flags (Should be true if expected process is true as well, but not the other way around)
if expected_process.spec.get("experimental", False):
assert actual_process.get("experimental", False)

if expected_process.spec.get("deprecated", False):
assert actual_process.get("deprecated", False)


@pytest.mark.optional
@pytest.mark.parametrize(
"expected_process",
[p for p in get_selected_processes()],
ids=_get_test_id,
)
def test_process_metadata_non_functional(api_processes, expected_process, skipper):
GeraldIr marked this conversation as resolved.
Show resolved Hide resolved
"""
Tests if the non-functional metadata of processes are correct (descriptions and categories), first tests if the process exists,
then tests if the categories of processes are correct.
"""

skipper.skip_if_unsupported_process(expected_process.process_id)

actual_process = [
process
for process in api_processes
if process["id"] == expected_process.process_id
][0]

# Tests if the categories of processes is equivalent
assert expected_process.spec.get("categories", []) == actual_process["categories"]

# Tests if the description of processes is equivalent
assert expected_process.spec.get("description", "") == actual_process["description"]

# Tests if the summary of processes is equivalent
assert expected_process.spec.get("summary", "") == actual_process["summary"]

# Tests if the description of parameters is equivalent
expected_parameters = expected_process.spec.get("parameters", [])
actual_parameters = actual_process["parameters"]

assert len(expected_parameters) == len(actual_parameters)

for expected_parameter, actual_parameter in zip(
expected_parameters, actual_parameters
):
assert expected_parameter.get("description", "") == actual_parameter.get(
"description", ""
)

# Tests if the description of returns is equivalent
expected_return_type = expected_process.spec.get("returns", {})
actual_return_type = actual_process["returns"]
assert expected_return_type.get("description", "") == actual_return_type.get(
"description", ""
)

# Tests if the links of processes are equivalent
expected_links = expected_process.spec.get("links", [])
actual_links = actual_process.get("links", [])

expected_links.sort(key=lambda x: x.get("href", ""))
actual_links.sort(key=lambda x: x.get("href", ""))

for expected_link, actual_link in zip(expected_links, actual_links):
GeraldIr marked this conversation as resolved.
Show resolved Hide resolved
assert expected_link.get("href", "") == actual_link.get("href", "")
assert expected_link.get("rel", "") == actual_link.get("rel", "")
assert expected_link.get("title", "") == actual_link.get("title", "")
Loading