Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better reuse of process selection logic #37

Merged
merged 7 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/internal-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ jobs:
steps:
- name: Clone repo
uses: actions/checkout@v2
with:
submodules: recursive
- name: Set up python
uses: actions/setup-python@v4
with:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/pytest-collect.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ jobs:
steps:
- name: Clone repo
uses: actions/checkout@v2
with:
submodules: recursive
- name: Set up python
uses: actions/setup-python@v4
with:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ venv/
ENV/
env.bak/
venv.bak/
venv-*/

# Spyder project settings
.spyderproject
Expand Down
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,12 @@ process selection options:
- Example: `--process-levels=L1,L2`.`
- A level does not imply other levels, so each desired level must be specified explicitly.
For example, L2 does **not** include L1 automatically.
- `--experimental`: Enables tests for experimental processes.
By default experimental processes will be skipped.

If neither `--processes` nor `--process-levels` are specified, all processes are considered.
If both are specified, the union of both will be considered.

- `--experimental`: By default, experimental processes (or experimental process tests) are ignored.
Enabling this option will consider experimental processes and tests.


### Runner for individual process testing
Expand Down
Empty file.
122 changes: 122 additions & 0 deletions src/openeo_test_suite/lib/internal-tests/test_process_registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import pytest

from openeo_test_suite.lib.process_registry import ProcessRegistry


class TestProcessRegistry:
# Some example processes for some levels
PROCESS_EXAMPLES_L1 = ["add", "divide", "apply_dimension", "reduce_dimension"]
PROCESS_EXAMPLES_L2 = ["aggregate_temporal", "if"]
PROCESS_EXAMPLES_L3 = ["apply_neighborhood", "merge_cubes"]
PROCESS_EXAMPLES_EXPERIMENTAL = ["apply_polygon"]

@pytest.fixture(scope="class")
def process_registry(self) -> ProcessRegistry:
return ProcessRegistry()

def test_get_all_processes_basic(self, process_registry):
processes = list(process_registry.get_all_processes())
assert len(processes) > 0

def test_get_all_processes_add(self, process_registry):
(add,) = [
p for p in process_registry.get_all_processes() if p.process_id == "add"
]

assert add.level == "L1"
assert add.experimental is False
assert add.path.name == "add.json5"
assert len(add.tests)

add00 = {"arguments": {"x": 0, "y": 0}, "returns": 0}
assert add00 in add.tests

def test_get_all_processes_divide(self, process_registry):
(divide,) = [
p for p in process_registry.get_all_processes() if p.process_id == "divide"
]

assert divide.level == "L1"
assert divide.experimental is False
assert divide.path.name == "divide.json5"
assert len(divide.tests)

divide0 = {
"arguments": {"x": 1, "y": 0},
"returns": float("inf"),
"throws": "DivisionByZero",
}
assert divide0 in divide.tests

def test_get_processes_filtered_default(self, process_registry):
pids = [p.process_id for p in process_registry.get_processes_filtered()]
assert len(pids) > 100
for pid in (
self.PROCESS_EXAMPLES_L1
+ self.PROCESS_EXAMPLES_L2
+ self.PROCESS_EXAMPLES_L3
):
assert pid in pids
for pid in self.PROCESS_EXAMPLES_EXPERIMENTAL:
assert pid not in pids

def test_get_processes_filtered_with_process_ids(self, process_registry):
pids = [
p.process_id
for p in process_registry.get_processes_filtered(
process_ids=["add", "divide"]
)
]
assert sorted(pids) == ["add", "divide"]

def test_get_processes_filtered_with_process_levels(self, process_registry):
pids_l1 = [
p.process_id
for p in process_registry.get_processes_filtered(process_levels=["L1"])
]
pids_l23 = [
p.process_id
for p in process_registry.get_processes_filtered(
process_levels=["L2", "L3"]
)
]
for pid in self.PROCESS_EXAMPLES_L1:
assert pid in pids_l1
assert pid not in pids_l23
for pid in self.PROCESS_EXAMPLES_L2:
assert pid not in pids_l1
assert pid in pids_l23
for pid in self.PROCESS_EXAMPLES_L3:
assert pid not in pids_l1
assert pid in pids_l23
for pid in self.PROCESS_EXAMPLES_EXPERIMENTAL:
assert pid not in pids_l1
assert pid not in pids_l23

def test_get_processes_filtered_with_process_ids_and_levels(self, process_registry):
pids = [
p.process_id
for p in process_registry.get_processes_filtered(
process_ids=["min", "max"], process_levels=["L2"]
)
]
for pid in ["min", "max"] + self.PROCESS_EXAMPLES_L2:
assert pid in pids
for pid in (
self.PROCESS_EXAMPLES_L1
+ self.PROCESS_EXAMPLES_L3
+ self.PROCESS_EXAMPLES_EXPERIMENTAL
):
assert pid not in pids

def test_get_processes_filtered_with_experimental(self, process_registry):
pids = [
p.process_id
for p in process_registry.get_processes_filtered(
process_ids=["min", "max"], process_levels=["L3"], experimental=True
)
]
for pid in ["min", "max"] + self.PROCESS_EXAMPLES_L3:
assert pid in pids
for pid in self.PROCESS_EXAMPLES_EXPERIMENTAL:
assert pid in pids
20 changes: 20 additions & 0 deletions src/openeo_test_suite/lib/internal-tests/test_process_selection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from openeo_test_suite.lib.process_selection import csv_to_list


def test_csv_to_list():
assert csv_to_list() == []
assert csv_to_list("") == []
assert csv_to_list(" ") == []
assert csv_to_list(" , ") == []
assert csv_to_list("foo") == ["foo"]
assert csv_to_list("foo,bar,baz") == ["foo", "bar", "baz"]
assert csv_to_list(",foo,bar,baz,") == ["foo", "bar", "baz"]
assert csv_to_list(" ,foo , bar, baz , ") == ["foo", "bar", "baz"]
assert csv_to_list(" ,foo ,,, bar, , baz , ") == ["foo", "bar", "baz"]


def test_csv_to_list_none_on_empty():
assert csv_to_list(none_on_empty=True) is None
assert csv_to_list("", none_on_empty=True) is None
assert csv_to_list(" ", none_on_empty=True) is None
assert csv_to_list(" , ", none_on_empty=True) is None
44 changes: 44 additions & 0 deletions src/openeo_test_suite/lib/internal-tests/test_skipping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import openeo
import pytest
from openeo import DataCube

from openeo_test_suite.lib.skipping import extract_processes_from_process_graph


def test_extract_processes_from_process_graph_basic():
pg = {"add35": {"process_id": "add", "arguments": {"x": 3, "y": 5}, "result": True}}
assert extract_processes_from_process_graph(pg) == {"add"}


@pytest.fixture
def s2_cube() -> openeo.DataCube:
return openeo.DataCube.load_collection(
collection_id="S2", bands=["B02", "B03"], connection=None, fetch_metadata=False
)


def test_extract_processes_from_process_graph_cube_simple(s2_cube):
assert extract_processes_from_process_graph(s2_cube) == {"load_collection"}


def test_extract_processes_from_process_graph_cube_reduce_temporal(s2_cube):
cube = s2_cube.reduce_temporal("mean")
assert extract_processes_from_process_graph(cube) == {
"load_collection",
"reduce_dimension",
"mean",
}


def test_extract_processes_from_process_graph_cube_reduce_bands(s2_cube):
b2 = s2_cube.band("B02")
b3 = s2_cube.band("B03")
cube = (b3 - b2) / (b3 + b2)
assert extract_processes_from_process_graph(cube) == {
"load_collection",
"reduce_dimension",
"array_element",
"subtract",
"add",
"divide",
}
112 changes: 112 additions & 0 deletions src/openeo_test_suite/lib/process_registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import logging
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Iterable, Iterator, List, Optional, Union

import json5

import openeo_test_suite

_log = logging.getLogger(__name__)


@dataclass(frozen=True)
class ProcessData:
"""Process data, including profile level and list of tests"""

process_id: str
level: str
tests: List[dict] # TODO: also make dataclass for each test?
experimental: bool
path: Path


class ProcessRegistry:
"""
Registry of processes, metadata (level, experimental flag)
and related tests defined in openeo-processes project
"""

def __init__(self, root: Optional[Path] = None):
"""
:param root: Root directory of the tests folder in openeo-processes project
"""
self._root = Path(
root
# TODO: eliminate need for this env var?
or os.environ.get("OPENEO_TEST_SUITE_PROCESSES_TEST_ROOT")
or self._guess_root()
)
# Lazy load cache
self._processes: Union[None, List[ProcessData]] = None

def _guess_root(self):
# TODO: avoid need for guessing and properly include assets in (installed) package
project_root = Path(openeo_test_suite.__file__).parents[2]
candidates = [
project_root / "assets/processes/tests",
Path("./assets/processes/tests"),
Path("./openeo-test-suite/assets/processes/tests"),
]
for candidate in candidates:
if candidate.exists() and candidate.is_dir():
return candidate
raise ValueError(
f"Could not find valid processes test root directory (tried {candidates})"
)

def _load(self) -> Iterator[ProcessData]:
"""Collect all processes"""
# TODO: cache or preload this in __init__? Or even reuse across instances?
if not self._root.is_dir():
raise ValueError(f"Invalid process test root directory: {self._root}")
_log.info(f"Loading process definitions from {self._root}")
for path in self._root.glob("*.json5"):
try:
with path.open() as f:
data = json5.load(f)
assert data["id"] == path.stem
yield ProcessData(
process_id=data["id"],
level=data.get("level"),
tests=data.get("tests", []),
experimental=data.get("experimental", False),
path=path,
)
except Exception as e:
# TODO: good idea to skip broken definitions? Why not just fail hard?
_log.error(f"Failed to load process data from {path}: {e!r}")

def get_all_processes(self) -> Iterable[ProcessData]:
if self._processes is None:
self._processes = list(self._load())
return iter(self._processes)

def get_processes_filtered(
self,
process_ids: Optional[List[str]] = None,
process_levels: Optional[List[str]] = None,
experimental: bool = False,
) -> Iterable[ProcessData]:
"""
Collect processes matching with additional filtering:

:param process_ids: allow list of process ids (empty/None means allow all)
:param process_levels: allow list of process levels (empty/None means allow all)
:param experimental: allow experimental processes or not?
"""
for process_data in self.get_all_processes():
pid = process_data.process_id
level = process_data.level

if process_data.experimental and not experimental:
continue

if process_ids and pid in process_ids:
yield process_data
elif process_levels and level in process_levels:
yield process_data
elif not process_ids and not process_levels:
# No id or level allow lists: no filtering
yield process_data
Loading
Loading