Skip to content

Sqlalchemy compositional #248

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 58 commits into from
Feb 21, 2022
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
c1b7bb0
Split Driver
nickeopti Jan 31, 2022
d587759
Create TerracottaDriver
nickeopti Jan 31, 2022
959ecbf
Move functionality up into Driver
nickeopti Jan 31, 2022
e4fff5d
Split Driver
nickeopti Jan 31, 2022
c936905
Rename accordingly to Driver refactor
nickeopti Jan 31, 2022
230d081
Update tests according to Driver refactor
nickeopti Jan 31, 2022
8447a12
Merge branch 'sqlalchemy' into sqlalchemy-compositional
nickeopti Jan 31, 2022
19b84dc
Remove leftover debugging prints
nickeopti Jan 31, 2022
5161aec
move most logic from raster driver to raster.py module
dionhaefner Jan 31, 2022
9477bf7
go straight to :walrus: jail
dionhaefner Jan 31, 2022
56256ac
... and to py3.6 jail
dionhaefner Jan 31, 2022
afd865a
Add test for key standardization
nickeopti Jan 31, 2022
ac19a83
Merge branch 'sqlalchemy-compositional' of https://github.com/DHI-GRA…
nickeopti Jan 31, 2022
6e5b95a
Test raster retrieval with all resampling methods
nickeopti Jan 31, 2022
37cb0d0
Add test for raster.get_raster_tile
nickeopti Jan 31, 2022
76185e3
Test unknown resampling method
nickeopti Jan 31, 2022
0d1096f
Test raster.get_metadata with large_raster_threshold exceeded
nickeopti Jan 31, 2022
551ae7c
bump coverage
dionhaefner Feb 1, 2022
a2ab041
resolve merge conflicts
dionhaefner Feb 1, 2022
8d7ad06
replace type ignore with assertion
dionhaefner Feb 1, 2022
33373e1
:lipstick:
dionhaefner Feb 1, 2022
36eaf1d
Rename driver files and make key standardization a method
nickeopti Feb 1, 2022
046720d
Remember the new/renamed files!
nickeopti Feb 1, 2022
bd45e00
Use underscores in meta_store and raster_store
nickeopti Feb 1, 2022
d5d1b09
Also standardize the where/keys for get_datasets()
nickeopti Feb 1, 2022
3fdd90a
Rename to squeeze
nickeopti Feb 1, 2022
84a5219
Improve repr
nickeopti Feb 1, 2022
1086d52
Rename to GeoTiffRasterStore
nickeopti Feb 1, 2022
65cd29a
Rename to RelationalMetaStore
nickeopti Feb 1, 2022
fed5a66
Don't use too implicit hacks
nickeopti Feb 1, 2022
ab6449f
Update test to new repr
nickeopti Feb 1, 2022
9ad93b4
Merge branch 'sqlalchemy-compositional' of https://github.com/DHI-GRA…
nickeopti Feb 1, 2022
4ab4bdd
Rename filepath to handle
nickeopti Feb 1, 2022
f89052e
Don't print anything
nickeopti Feb 1, 2022
ca15c4a
Rename *_stores
nickeopti Feb 1, 2022
401728a
Re-rename keys to where
nickeopti Feb 1, 2022
62be08d
Check for missing dataset in get_metadata, not in squeeze
nickeopti Feb 4, 2022
236f677
Define keystype explicitly
nickeopti Feb 4, 2022
c8d93ee
Make keys standardization type check
nickeopti Feb 4, 2022
f523ebe
Improve descriptiveness of metadata reload comment
nickeopti Feb 4, 2022
0cce1b7
Re-rename handle to path
nickeopti Feb 4, 2022
8aad626
update docstrings
mrpgraae Feb 20, 2022
0c1c94c
pin pytest<7.0
mrpgraae Feb 20, 2022
06a6d1a
do not assemble rio env in driver
dionhaefner Feb 21, 2022
da9f20f
Update filename in module docstring
mrpgraae Feb 21, 2022
891185a
docstring polish :memo:
mrpgraae Feb 21, 2022
dc835a6
Improve reprs and satisfy flake8
nickeopti Feb 21, 2022
7e75ff4
Improve normalised path from sqlite metastores and update relevant docs
nickeopti Feb 21, 2022
69b7876
Update filenames in first line of files to reflect their actual filen…
nickeopti Feb 21, 2022
06d6d49
Always stringify url_or_path
nickeopti Feb 21, 2022
722e7df
Rename *Driver classes to *MetaStore
nickeopti Feb 21, 2022
41a26e7
Remove references to rasters in meta stores's documentation
nickeopti Feb 21, 2022
3b65c8f
Simplify docstrings in internal base_classes.py
nickeopti Feb 21, 2022
5a9969e
Fix bug (on Windows paths) in sqlite metastore _normalize_path
nickeopti Feb 21, 2022
d8b1ea2
Specify arguments to MetaStore.insert
nickeopti Feb 21, 2022
cd0fe1d
Specify path in meta stores to be of type str
nickeopti Feb 21, 2022
1556dce
Use SQLAlchemy dialect+driver terminology
nickeopti Feb 21, 2022
b2ebcba
fix API docs
dionhaefner Feb 21, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ jobs:

- name: Initialize mypy
run: |
mypy . > /dev/null || true
mypy --install-types --non-interactive
mypy --install-types --non-interactive . || true

- name: Run tests
run: |
Expand Down
16 changes: 11 additions & 5 deletions terracotta/drivers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
import urllib.parse as urlparse
from pathlib import Path

from terracotta.drivers.base import Driver
from terracotta.drivers.base import MetaStore
from terracotta.drivers.driver import TerracottaDriver
from terracotta.drivers.raster_base import RasterDriver

URLOrPathType = Union[str, Path]


def load_driver(provider: str) -> Type[Driver]:
def load_driver(provider: str) -> Type[MetaStore]:
if provider == 'sqlite-remote':
from terracotta.drivers.sqlite_remote import RemoteSQLiteDriver
return RemoteSQLiteDriver
Expand Down Expand Up @@ -42,10 +44,10 @@ def auto_detect_provider(url_or_path: Union[str, Path]) -> str:
return 'sqlite'


_DRIVER_CACHE: Dict[Tuple[URLOrPathType, str, int], Driver] = {}
_DRIVER_CACHE: Dict[Tuple[URLOrPathType, str, int], TerracottaDriver] = {}


def get_driver(url_or_path: URLOrPathType, provider: str = None) -> Driver:
def get_driver(url_or_path: URLOrPathType, provider: str = None) -> TerracottaDriver:
"""Retrieve Terracotta driver instance for the given path.

This function always returns the same instance for identical inputs.
Expand Down Expand Up @@ -85,6 +87,10 @@ def get_driver(url_or_path: URLOrPathType, provider: str = None) -> Driver:
cache_key = (normalized_path, provider, os.getpid())

if cache_key not in _DRIVER_CACHE:
_DRIVER_CACHE[cache_key] = DriverClass(url_or_path)
driver = TerracottaDriver(
metastore=DriverClass(url_or_path),
rasterstore=RasterDriver()
)
_DRIVER_CACHE[cache_key] = driver

return _DRIVER_CACHE[cache_key]
93 changes: 49 additions & 44 deletions terracotta/drivers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
import functools
from abc import ABC, abstractmethod
from collections import OrderedDict
from typing import (Any, Callable, Dict, List, Mapping, Sequence, Tuple,
TypeVar, Union)
from typing import (Any, Callable, Dict, List, Mapping, Optional, Sequence,
Tuple, TypeVar, Union, cast)

KeysType = Mapping[str, str]
MultiValueKeysType = Mapping[str, Union[str, List[str]]]
Number = TypeVar('Number', int, float)
T = TypeVar('T')

Expand All @@ -22,14 +24,15 @@ def requires_connection(
return functools.partial(requires_connection, verify=verify)

@functools.wraps(fun)
def inner(self: Driver, *args: Any, **kwargs: Any) -> T:
def inner(self: MetaStore, *args: Any, **kwargs: Any) -> T:
assert fun is not None
with self.connect(verify=verify):
# Apparently mypy thinks fun might still be None, hence the ignore:
return fun(self, *args, **kwargs) # type: ignore
return fun(self, *args, **kwargs)

return inner


class Driver(ABC):
class MetaStore(ABC):
"""Abstract base class for all Terracotta data backends.

Defines a common interface for all drivers.
Expand Down Expand Up @@ -105,14 +108,14 @@ def get_keys(self) -> OrderedDict:
pass

@abstractmethod
def get_datasets(self, where: Mapping[str, Union[str, List[str]]] = None,
def get_datasets(self, where: MultiValueKeysType = None,
page: int = 0, limit: int = None) -> Dict[Tuple[str, ...], Any]:
# Get all known dataset key combinations matching the given constraints,
# and a handle to retrieve the data (driver dependent)
pass

@abstractmethod
def get_metadata(self, keys: Union[Sequence[str], Mapping[str, str]]) -> Dict[str, Any]:
def get_metadata(self, keys: KeysType) -> Optional[Dict[str, Any]]:
"""Return all stored metadata for given keys.

Arguments:
Expand All @@ -136,19 +139,50 @@ def get_metadata(self, keys: Union[Sequence[str], Mapping[str, str]]) -> Dict[st
"""
pass

@abstractmethod
def insert(self, keys: KeysType,
handle: Any, **kwargs: Any) -> None:
"""Register a new dataset. Used to populate metadata database.

Arguments:

keys: Keys of the dataset. Can either be given as a sequence of key values, or
as a mapping ``{key_name: key_value}``.
handle: Handle to access dataset (driver dependent).

"""
pass

@abstractmethod
def delete(self, keys: KeysType) -> None:
"""Remove a dataset from the metadata database.

Arguments:

keys: Keys of the dataset. Can either be given as a sequence of key values, or
as a mapping ``{key_name: key_value}``.

"""
pass

def __repr__(self) -> str:
return f'{self.__class__.__name__}(\'{self.path}\')'


class RasterStore(ABC):

@abstractmethod
# TODO: add accurate signature if mypy ever supports conditional return types
def get_raster_tile(self, keys: Union[Sequence[str], Mapping[str, str]], *,
def get_raster_tile(self, handle: str, *,
tile_bounds: Sequence[float] = None,
tile_size: Sequence[int] = (256, 256),
preserve_values: bool = False,
asynchronous: bool = False) -> Any:
"""Load a raster tile with given keys and bounds.
"""Load a raster tile with given handle and bounds.

Arguments:

keys: Keys of the requested dataset. Can either be given as a sequence of key values,
or as a mapping ``{key_name: key_value}``.
handle: Handle of the requested dataset.
tile_bounds: Physical bounds of the tile to read, in Web Mercator projection (EPSG3857).
Reads the whole dataset if not given.
tile_size: Shape of the output array to return. Must be two-dimensional.
Expand All @@ -168,39 +202,10 @@ def get_raster_tile(self, keys: Union[Sequence[str], Mapping[str, str]], *,
"""
pass

@staticmethod
@abstractmethod
def compute_metadata(data: Any, *,
def compute_metadata(self, handle: str, *,
extra_metadata: Any = None,
**kwargs: Any) -> Dict[str, Any]:
use_chunks: bool = None,
max_shape: Sequence[int] = None) -> Dict[str, Any]:
# Compute metadata for a given input file (driver dependent)
pass

@abstractmethod
def insert(self, keys: Union[Sequence[str], Mapping[str, str]],
handle: Any, **kwargs: Any) -> None:
"""Register a new dataset. Used to populate metadata database.

Arguments:

keys: Keys of the dataset. Can either be given as a sequence of key values, or
as a mapping ``{key_name: key_value}``.
handle: Handle to access dataset (driver dependent).

"""
pass

@abstractmethod
def delete(self, keys: Union[Sequence[str], Mapping[str, str]]) -> None:
"""Remove a dataset from the metadata database.

Arguments:

keys: Keys of the dataset. Can either be given as a sequence of key values, or
as a mapping ``{key_name: key_value}``.

"""
pass

def __repr__(self) -> str:
return f'{self.__class__.__name__}(\'{self.path}\')'
177 changes: 177 additions & 0 deletions terracotta/drivers/driver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
from typing import (Any, Callable, Collection, Dict, Mapping,
Sequence, Tuple, TypeVar, Union, cast)
import contextlib
import functools
from collections import OrderedDict

import terracotta
from terracotta import exceptions
from terracotta.drivers.base import (KeysType, MetaStore, MultiValueKeysType,
RasterStore, requires_connection)

ExtendedKeysType = Union[Sequence[str], KeysType]
T = TypeVar('T')


def only_element(iterable: Collection[T]) -> T:
if not iterable:
raise exceptions.DatasetNotFoundError('No dataset found')
assert len(iterable) == 1
return next(iter(iterable))


def standardize_keys(
fun: Callable[..., T] = None, *,
requires_all_keys: bool = False
) -> Union[Callable[..., T], functools.partial]:
if fun is None:
return functools.partial(standardize_keys, requires_all_keys=requires_all_keys)

@functools.wraps(fun)
def inner(
self: "TerracottaDriver",
keys: ExtendedKeysType = None,
*args: Any, **kwargs: Any
) -> T:
if requires_all_keys and (keys is None or len(keys) != len(self.key_names)):
raise exceptions.InvalidKeyError(
f'Got wrong number of keys (available keys: {self.key_names})'
)

if isinstance(keys, Mapping):
keys = dict(keys.items())
elif isinstance(keys, Sequence):
keys = dict(zip(self.key_names, keys))
elif keys is None:
keys = {}
else:
raise exceptions.InvalidKeyError(
'Encountered unknown key type, expected Mapping or Sequence'
)

unknown_keys = set(keys) - set(self.key_names)
if unknown_keys:
raise exceptions.InvalidKeyError(
f'Encountered unrecognized keys {unknown_keys} (available keys: {self.key_names})'
)

# Apparently mypy thinks fun might still be None, hence the ignore:
return fun(self, keys, *args, **kwargs) # type: ignore
return inner


class TerracottaDriver:

def __init__(self, metastore: MetaStore, rasterstore: RasterStore) -> None:
self.metastore = metastore
self.rasterstore = rasterstore

settings = terracotta.get_settings()
self.LAZY_LOADING_MAX_SHAPE: Tuple[int, int] = settings.LAZY_LOADING_MAX_SHAPE

@property
def db_version(self) -> str:
return self.metastore.db_version

@property
def key_names(self) -> Tuple[str, ...]:
return self.metastore.key_names

def create(self, keys: Sequence[str], *,
key_descriptions: Mapping[str, str] = None) -> None:
self.metastore.create(keys=keys, key_descriptions=key_descriptions)

def connect(self, verify: bool = True) -> contextlib.AbstractContextManager:
return self.metastore.connect(verify=verify)

@requires_connection
def get_keys(self) -> OrderedDict:
return self.metastore.get_keys()

@requires_connection
@standardize_keys
def get_datasets(self, keys: MultiValueKeysType = None,
page: int = 0, limit: int = None) -> Dict[Tuple[str, ...], Any]:
return self.metastore.get_datasets(
where=keys,
page=page,
limit=limit
)

@requires_connection
@standardize_keys(requires_all_keys=True)
def get_metadata(self, keys: ExtendedKeysType) -> Dict[str, Any]:
keys = cast(KeysType, keys)

metadata = self.metastore.get_metadata(keys)

if metadata is None:
# metadata is not computed yet, trigger lazy loading
handle = only_element(self.get_datasets(keys).values())
metadata = self.compute_metadata(handle, max_shape=self.LAZY_LOADING_MAX_SHAPE)
self.insert(keys, handle, metadata=metadata)

# this is necessary to make the lazy loading tests pass...
metadata = self.metastore.get_metadata(keys)
assert metadata is not None

return metadata

@requires_connection
@standardize_keys(requires_all_keys=True)
def insert(
self, keys: ExtendedKeysType,
handle: Any, *,
override_path: str = None,
metadata: Mapping[str, Any] = None,
skip_metadata: bool = False,
**kwargs: Any
) -> None:
keys = cast(KeysType, keys)

if metadata is None and not skip_metadata:
metadata = self.compute_metadata(handle)

self.metastore.insert(
keys=keys,
handle=override_path or handle,
metadata=metadata,
**kwargs
)

@requires_connection
@standardize_keys(requires_all_keys=True)
def delete(self, keys: ExtendedKeysType) -> None:
keys = cast(KeysType, keys)

self.metastore.delete(keys)

# @standardize_keys(requires_all_keys=True)
def get_raster_tile(self, keys: ExtendedKeysType, *,
tile_bounds: Sequence[float] = None,
tile_size: Sequence[int] = (256, 256),
preserve_values: bool = False,
asynchronous: bool = False) -> Any:
handle = only_element(self.get_datasets(keys).values())

return self.rasterstore.get_raster_tile(
handle=handle,
tile_bounds=tile_bounds,
tile_size=tile_size,
preserve_values=preserve_values,
asynchronous=asynchronous,
)

def compute_metadata(self, handle: str, *,
extra_metadata: Any = None,
use_chunks: bool = None,
max_shape: Sequence[int] = None) -> Dict[str, Any]:
return self.rasterstore.compute_metadata(
handle=handle,
extra_metadata=extra_metadata,
use_chunks=use_chunks,
max_shape=max_shape,
)

def __repr__(self) -> str:
return self.metastore.__repr__()
Loading