Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
f6520d1
add option for using new multiscales convention in optimized conversion
d-v-b Nov 20, 2025
686cb51
remove OOP converter and use plain functions
d-v-b Nov 21, 2025
58b578d
use pydantic models for fingerprinting sentinel2 product
d-v-b Nov 21, 2025
5bea14b
fix multiscales to use da.coarsen and propagate encoding
d-v-b Nov 21, 2025
407f167
ensure that dtype is preserved after resampling
d-v-b Nov 24, 2025
9170fd6
add new multiscales JSON example
d-v-b Nov 24, 2025
0b8243f
add mypy pydantic plugin
d-v-b Nov 24, 2025
7ea938a
lint
d-v-b Nov 24, 2025
8ad6365
add s1 and s2 demo data to tests, and don't test against remote urls
d-v-b Nov 24, 2025
c5f9e09
fix e2e tests
d-v-b Nov 24, 2025
3dd13fb
remove network test workflow from CI
d-v-b Nov 24, 2025
baeb5ec
remove extra type definition and update tests
d-v-b Nov 24, 2025
959a419
remove explicit zarr groups in favor of dynamic test fixtures
d-v-b Nov 24, 2025
7b08f9a
docstrings
d-v-b Nov 24, 2025
e02d2e0
Enhance CRS initialization and update S2 optimization commands
emmanuelmathot Nov 26, 2025
f4e9de8
Refactor code formatting for clarity in S2 optimization functions
emmanuelmathot Nov 26, 2025
cd93e34
fix failing / warning tests
d-v-b Nov 27, 2025
4d7be8f
add strict JSON schema equality check to e2e tests
d-v-b Nov 27, 2025
94e9040
support both flavors of multiscale metadata
d-v-b Nov 28, 2025
5ed7f02
dont manage return codes in cli functions
d-v-b Nov 28, 2025
b4752d6
add s2 optimized test
d-v-b Nov 28, 2025
c3739a3
add optimized geozarr exmaple hierarchies
d-v-b Nov 28, 2025
3b83c68
format JSON documents
d-v-b Nov 28, 2025
9c6a85b
mid-debug of e2e tests
d-v-b Nov 28, 2025
b0f1e11
WIP e2e fixes
d-v-b Dec 1, 2025
45dd9a4
make cf standard name validator become a pass-through when no interne…
d-v-b Dec 1, 2025
dc0e561
update example schemas
d-v-b Dec 1, 2025
2a6f00f
narrow type to just tuples in types.py
d-v-b Dec 1, 2025
b8cc1df
refactor consolidation
d-v-b Dec 1, 2025
6db66f2
use consolidated=False in conversion
d-v-b Dec 1, 2025
a231d20
update tests
d-v-b Dec 1, 2025
c924c89
lint
d-v-b Dec 1, 2025
9cba2e2
add both multiscales types to output
d-v-b Dec 1, 2025
2a5ca55
update comments in tests
d-v-b Dec 1, 2025
fde389b
refactor multiscale metadata and design attributes class to handle bo…
d-v-b Dec 2, 2025
a04cdbe
use explicit validation for multiscale datasets
d-v-b Dec 2, 2025
8dec0e2
allow extra for multiscales
d-v-b Dec 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 84 additions & 58 deletions src/eopf_geozarr/data_api/geozarr/common.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
"""Common utilities for GeoZarr data API."""

from __future__ import annotations

import io
import urllib
import urllib.request
from dataclasses import dataclass
from typing import Annotated, Any, Mapping, Self, TypeGuard, TypeVar
from typing import Annotated, Any, Mapping, NotRequired, Self, TypeGuard, TypeVar
from urllib.error import URLError

from cf_xarray.utils import parse_cf_standard_name_table
from pydantic import AfterValidator, BaseModel, Field, model_validator
from pydantic.experimental.missing_sentinel import MISSING
from typing_extensions import Final, Literal, Protocol, runtime_checkable
from typing_extensions import Final, Literal, Protocol, TypedDict, runtime_checkable

from eopf_geozarr.data_api.geozarr.multiscales import tms, zcm
from eopf_geozarr.data_api.geozarr.projjson import ProjJSON
from eopf_geozarr.data_api.geozarr.types import ResamplingMethod


@dataclass(frozen=True)
Expand Down Expand Up @@ -220,57 +222,6 @@ def array_dimensions(self) -> tuple[str, ...]: ...
attributes: BaseDataArrayAttrs


class TileMatrixLimit(BaseModel):
""""""

tileMatrix: str
minTileCol: int
minTileRow: int
maxTileCol: int
maxTileRow: int


class TileMatrix(BaseModel):
id: str
scaleDenominator: float
cellSize: float
pointOfOrigin: tuple[float, float]
tileWidth: int
tileHeight: int
matrixWidth: int
matrixHeight: int


class TileMatrixSet(BaseModel):
id: str
title: str | None = None
crs: str | None = None
supportedCRS: str | None = None
orderedAxes: tuple[str, str] | None = None
tileMatrices: tuple[TileMatrix, ...]


class TMSMultiscales(BaseModel, extra="allow"):
"""
Multiscale metadata for a GeoZarr dataset based on the OGC TileMatrixSet standard

Attributes
----------
tile_matrix_set : str
The tile matrix set identifier for the multiscale dataset.
resampling_method : ResamplingMethod
The name of the resampling method for the multiscale dataset.
tile_matrix_set_limits : dict[str, TileMatrixSetLimits] | None, optional
The tile matrix set limits for the multiscale dataset.
"""

tile_matrix_set: TileMatrixSet
resampling_method: ResamplingMethod
# TODO: ensure that the keys match tile_matrix_set.tileMatrices[$index].id
# TODO: ensure that the keys match the tileMatrix attribute
tile_matrix_limits: dict[str, TileMatrixLimit] | None = None


class DatasetAttrs(BaseModel, extra="allow"):
"""
Attributes for a GeoZarr dataset.
Expand All @@ -295,24 +246,99 @@ def check_grid_mapping(model: TDataSetLike) -> TDataSetLike:
"""
if model.members is not None:
for name, member in model.members.items():
if member.attributes.grid_mapping not in model.members:
if (
hasattr(member.attributes, "grid_mapping")
and isinstance(member.attributes.grid_mapping, str)
and member.attributes.grid_mapping not in model.members
):
msg = f"Grid mapping variable '{member.attributes.grid_mapping}' declared by {name} was not found in dataset members"
raise ValueError(msg)
return model


class MultiscaleGroupAttrs(BaseModel, extra="allow"):
class MultiscaleMeta(BaseModel):
"""
Attributes for Multiscale GeoZarr dataset. Can be a mix of TMS multiscale
or ZCM multiscale metadata
"""

layout: tuple[zcm.ScaleLevel, ...] | MISSING = MISSING
resampling_method: str | MISSING = MISSING
tile_matrix_set: tms.TileMatrixSet | MISSING = MISSING
tile_matrix_limits: dict[str, tms.TileMatrixLimit] | MISSING = MISSING

@model_validator(mode="after")
def valid_zcm(self) -> Self:
"""
Ensure that the ZCM metadata, if present, is valid
"""
if self.layout is not MISSING:
zcm.Multiscales(**self.model_dump())

return self

@model_validator(mode="after")
def valid_tms(self) -> Self:
"""
Ensure that the TMS metadata, if present, is valid
"""
if self.tile_matrix_set is not MISSING:
tms.Multiscales(**self.model_dump())

return self


class MultiscaleGroupAttrs(BaseModel):
"""
Attributes for Multiscale GeoZarr dataset.

A Multiscale dataset is a collection of Dataet
A Multiscale dataset is a collection of Dataset

Attributes
----------
multiscales: MultiscaleAttrs
"""

multiscales: TMSMultiscales
zarr_conventions_version: Literal["0.1.0"] | MISSING = MISSING
zarr_conventions: zcm.MultiscaleConventions | MISSING = MISSING
multiscales: MultiscaleMeta

_zcm_multiscales: zcm.Multiscales | None = None
_tms_multiscales: tms.Multiscales | None = None

@model_validator(mode="after")
def valid_zcm_and_tms(self) -> Self:
"""
Ensure that the ZCM metadata, if present, is valid, and that TMS metadata, if present,
is valid, and that at least one of the two is present.
"""
if self.zarr_conventions is not MISSING:
self._zcm_multiscales = zcm.Multiscales(**self.multiscales.model_dump())
if self.multiscales.tile_matrix_limits is not MISSING:
self._tms_multiscales = tms.Multiscales(
tile_matrix_limits=self.multiscales.tile_matrix_limits,
resampling_method=self.multiscales.resampling_method, # type: ignore[arg-type]
tile_matrix_set=self.multiscales.tile_matrix_set,
)
if self._tms_multiscales is None and self._zcm_multiscales is None:
raise ValueError(
"Either ZCM multiscales or TMS multiscales must be present"
)
return self

@property
def multiscale_meta(self) -> MultiscaleMetaDict:
out: MultiscaleMetaDict = {}
if self._tms_multiscales is not None:
out["tms"] = self._tms_multiscales
if self._zcm_multiscales is not None:
out["zcm"] = self._zcm_multiscales
return out


class MultiscaleMetaDict(TypedDict):
tms: NotRequired[tms.Multiscales]
zcm: NotRequired[zcm.Multiscales]


def is_none(data: object) -> TypeGuard[None]:
Expand Down
Empty file.
56 changes: 56 additions & 0 deletions src/eopf_geozarr/data_api/geozarr/multiscales/tms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from __future__ import annotations

from pydantic import BaseModel

from eopf_geozarr.data_api.geozarr.types import ResamplingMethod


class TileMatrix(BaseModel):
id: str
scaleDenominator: float
cellSize: float
pointOfOrigin: tuple[float, float]
tileWidth: int
tileHeight: int
matrixWidth: int
matrixHeight: int


class TileMatrixSet(BaseModel):
id: str
title: str | None = None
crs: str | None = None
supportedCRS: str | None = None
orderedAxes: tuple[str, str] | None = None
tileMatrices: tuple[TileMatrix, ...]


class TileMatrixLimit(BaseModel):
""""""

tileMatrix: str
minTileCol: int
minTileRow: int
maxTileCol: int
maxTileRow: int


class Multiscales(BaseModel, extra="allow"):
"""
Multiscale metadata for a GeoZarr dataset based on the OGC TileMatrixSet standard

Attributes
----------
tile_matrix_set : str
The tile matrix set identifier for the multiscale dataset.
resampling_method : ResamplingMethod
The name of the resampling method for the multiscale dataset.
tile_matrix_set_limits : dict[str, TileMatrixSetLimits] | None, optional
The tile matrix set limits for the multiscale dataset.
"""

tile_matrix_set: TileMatrixSet
resampling_method: ResamplingMethod
# TODO: ensure that the keys match tile_matrix_set.tileMatrices[$index].id
# TODO: ensure that the keys match the tileMatrix attribute
tile_matrix_limits: dict[str, TileMatrixLimit] | None = None
93 changes: 93 additions & 0 deletions src/eopf_geozarr/data_api/geozarr/multiscales/zcm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from __future__ import annotations

from typing import Literal, NotRequired

from pydantic import BaseModel
from pydantic.experimental.missing_sentinel import MISSING
from typing_extensions import TypedDict

ConventionID = Literal["d35379db-88df-4056-af3a-620245f8e347"]


class MultiscaleConvention(TypedDict):
version: Literal["0.1.0"]
schema: Literal[
"https://raw.githubusercontent.com/zarr-conventions/multiscales/refs/tags/v0.1.0/schema.json"
]
name: Literal["multiscales"]
description: Literal["Multiscale layout of zarr datasets"]
spec: Literal[
"https://github.com/zarr-conventions/multiscales/blob/v0.1.0/README.md"
]


MultiscaleConventions = TypedDict( # type: ignore[misc]
"MultiscaleConventions",
{"d35379db-88df-4056-af3a-620245f8e347": MultiscaleConvention},
closed=False,
)

MULTISCALE_CONVENTION: MultiscaleConventions = { # type: ignore[typeddict-unknown-key]
"d35379db-88df-4056-af3a-620245f8e347": {
"version": "0.1.0",
"schema": "https://raw.githubusercontent.com/zarr-conventions/multiscales/refs/tags/v0.1.0/schema.json",
"name": "multiscales",
"description": "Multiscale layout of zarr datasets",
"spec": "https://github.com/zarr-conventions/multiscales/blob/v0.1.0/README.md",
}
}


class ConventionAttributes(BaseModel):
zarr_conventions_version: Literal["0.1.0"]
zarr_conventions: MultiscaleConventions

model_config = {"extra": "allow"}


class TransformJSON(TypedDict):
scale: NotRequired[tuple[float, ...]]
translation: NotRequired[tuple[float, ...]]


class Transform(BaseModel):
scale: tuple[float, ...] | MISSING = MISSING
translation: tuple[float, ...] | MISSING = MISSING


class ScaleLevelJSON(TypedDict):
asset: str
derived_from: NotRequired[str]
transform: TransformJSON
resampling_method: NotRequired[str]


class ScaleLevel(BaseModel):
asset: str
derived_from: str | MISSING = MISSING
transform: Transform | MISSING = MISSING
resampling_method: str | MISSING = MISSING

model_config = {"extra": "allow"}


class MultiscalesJSON(TypedDict):
layout: tuple[ScaleLevelJSON, ...]
resampling_method: NotRequired[str]


class Multiscales(BaseModel):
layout: tuple[ScaleLevel, ...]
resampling_method: str | MISSING = MISSING

model_config = {"extra": "allow"}

class MultiscalesAttrsJSON(TypedDict):
zarr_conventions_version: Literal["0.1.0"]
zarr_conventions: MultiscaleConventions
multiscales: Multiscales


class MultiscalesAttrs(ConventionAttributes):
multiscales: Multiscales
model_config = {"extra": "allow"}
23 changes: 19 additions & 4 deletions src/eopf_geozarr/data_api/geozarr/v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@

from __future__ import annotations

from typing import Any, Self
from typing import Self

from pydantic import model_validator
from pydantic_zarr.v3 import ArraySpec, GroupSpec

from eopf_geozarr.data_api.geozarr.common import (
BaseDataArrayAttrs,
DatasetAttrs,
GridMappingAttrs,
MultiscaleGroupAttrs,
check_grid_mapping,
check_valid_coordinates,
Expand Down Expand Up @@ -37,7 +38,21 @@ def array_dimensions(self) -> tuple[str, ...]:
return self.dimension_names


class Dataset(GroupSpec[DatasetAttrs, GroupSpec[Any, Any] | DataArray]):
class GridMappingVariable(ArraySpec[GridMappingAttrs]):
"""
A Zarr array that represents a GeoZarr grid mapping variable.

The attributes of this array are defined in `GridMappingAttrs`.

References
----------
https://cfconventions.org/Data/cf-conventions/cf-conventions-1.12/cf-conventions.html#grid-mappings-and-projections
"""

...


class Dataset(GroupSpec[DatasetAttrs, DataArray | GridMappingVariable]):
"""
A GeoZarr Dataset.
"""
Expand All @@ -58,11 +73,11 @@ def check_valid_coordinates(self) -> Self:
return check_valid_coordinates(self)

@model_validator(mode="after")
def validate_grid_mapping(self) -> Self:
def check_grid_mapping(self) -> Self:
return check_grid_mapping(self)


class MultiscaleGroup(GroupSpec[MultiscaleGroupAttrs, DataArray | GroupSpec[Any, Any]]):
class MultiscaleGroup(GroupSpec[MultiscaleGroupAttrs, Dataset]):
"""
A GeoZarr Multiscale Group.
"""
Expand Down
Loading
Loading