Skip to content

Commit

Permalink
Merge pull request #240 from DHI-GRAS/sqlalchemy
Browse files Browse the repository at this point in the history
Sqlalchemy
  • Loading branch information
nickeopti authored Feb 21, 2022
2 parents 16d6077 + 2f7f9bb commit 7011a23
Show file tree
Hide file tree
Showing 26 changed files with 2,095 additions and 2,018 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,7 @@ jobs:
- name: Initialize mypy
run: |
mypy . > /dev/null || true
mypy --install-types --non-interactive
mypy --install-types --non-interactive . || true
- name: Run tests
run: |
Expand Down
39 changes: 18 additions & 21 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,27 @@ Get a driver instance

.. autofunction:: terracotta.get_driver

SQLite driver
-------------
TerracottaDriver
----------------

.. autoclass:: terracotta.drivers.sqlite.SQLiteDriver
.. autoclass:: terracotta.drivers.TerracottaDriver
:members:
:undoc-members:
:special-members: __init__
:inherited-members:

Remote SQLite driver
--------------------

.. autoclass:: terracotta.drivers.sqlite_remote.RemoteSQLiteDriver
:members:
:undoc-members:
:special-members: __init__
:inherited-members:
:exclude-members: delete, insert, create
Supported metadata stores
-------------------------

MySQL driver
------------
SQLite metadata store
+++++++++++++++++++++

.. autoclass:: terracotta.drivers.mysql.MySQLDriver
:members:
:undoc-members:
:special-members: __init__
:inherited-members:
.. autoclass:: terracotta.drivers.sqlite_meta_store.SQLiteMetaStore

Remote SQLite metadata store
++++++++++++++++++++++++++++

.. autoclass:: terracotta.drivers.sqlite_remote_meta_store.RemoteSQLiteMetaStore

MySQL metadata store
++++++++++++++++++++

.. autoclass:: terracotta.drivers.mysql_meta_store.MySQLMetaStore
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Framework :: Flask',
'Operating System :: Microsoft :: Windows :: Windows 10',
'Operating System :: MacOS :: MacOS X',
Expand Down Expand Up @@ -72,6 +73,7 @@
'shapely',
'rasterio>=1.0',
'shapely',
'sqlalchemy',
'toml',
'tqdm'
],
Expand Down
37 changes: 23 additions & 14 deletions terracotta/cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def validate(src_path: str, strict: bool = True) -> bool:
def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover
"""
Implementation from
https://github.com/cogeotiff/rio-cogeo/blob/0f00a6ee1eff602014fbc88178a069bd9f4a10da/rio_cogeo/cogeo.py
https://github.com/cogeotiff/rio-cogeo/blob/a07d914e2d898878417638bbc089179f01eb5b28/rio_cogeo/cogeo.py#L385
This function is the rasterio equivalent of
https://svn.osgeo.org/gdal/trunk/gdal/swig/python/samples/validate_cloud_optimized_geotiff.py
Expand All @@ -44,15 +44,13 @@ def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover
errors.append('The file is not a GeoTIFF')
return errors, warnings, details

filelist = [os.path.basename(f) for f in src.files]
src_bname = os.path.basename(src_path)
if len(filelist) > 1 and src_bname + '.ovr' in filelist:
if any(os.path.splitext(x)[-1] == '.ovr' for x in src.files):
errors.append(
'Overviews found in external .ovr file. They should be internal'
)

overviews = src.overviews(1)
if src.width >= 512 or src.height >= 512:
if src.width > 512 and src.height > 512:
if not src.is_tiled:
errors.append(
'The file is greater than 512xH or 512xW, but is not tiled'
Expand All @@ -65,16 +63,28 @@ def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover
)

ifd_offset = int(src.get_tag_item('IFD_OFFSET', 'TIFF', bidx=1))
ifd_offsets = [ifd_offset]
# Starting from GDAL 3.1, GeoTIFF and COG have ghost headers
# e.g:
# """
# GDAL_STRUCTURAL_METADATA_SIZE=000140 bytes
# LAYOUT=IFDS_BEFORE_DATA
# BLOCK_ORDER=ROW_MAJOR
# BLOCK_LEADER=SIZE_AS_UINT4
# BLOCK_TRAILER=LAST_4_BYTES_REPEATED
# KNOWN_INCOMPATIBLE_EDITION=NO
# """
#
# This header should be < 200bytes
if ifd_offset > 300:
errors.append(
f'The offset of the main IFD should be < 300. It is {ifd_offset} instead'
)

ifd_offsets = [ifd_offset]
details['ifd_offsets'] = {}
details['ifd_offsets']['main'] = ifd_offset

if not overviews == sorted(overviews):
if overviews and overviews != sorted(overviews):
errors.append('Overviews should be sorted')

for ix, dec in enumerate(overviews):
Expand Down Expand Up @@ -111,23 +121,22 @@ def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover
)
)

block_offset = int(src.get_tag_item('BLOCK_OFFSET_0_0', 'TIFF', bidx=1))
if not block_offset:
errors.append('Missing BLOCK_OFFSET_0_0')
block_offset = src.get_tag_item('BLOCK_OFFSET_0_0', 'TIFF', bidx=1)

data_offset = int(block_offset) if block_offset else 0
data_offsets = [data_offset]
details['data_offsets'] = {}
details['data_offsets']['main'] = data_offset

for ix, dec in enumerate(overviews):
data_offset = int(
src.get_tag_item('BLOCK_OFFSET_0_0', 'TIFF', bidx=1, ovr=ix)
block_offset = src.get_tag_item(
'BLOCK_OFFSET_0_0', 'TIFF', bidx=1, ovr=ix
)
data_offset = int(block_offset) if block_offset else 0
data_offsets.append(data_offset)
details['data_offsets']['overview_{}'.format(ix)] = data_offset

if data_offsets[-1] < ifd_offsets[-1]:
if data_offsets[-1] != 0 and data_offsets[-1] < ifd_offsets[-1]:
if len(overviews) > 0:
errors.append(
'The offset of the first block of the smallest overview '
Expand Down Expand Up @@ -156,7 +165,7 @@ def check_raster_file(src_path: str) -> ValidationInfo: # pragma: no cover

for ix, dec in enumerate(overviews):
with rasterio.open(src_path, OVERVIEW_LEVEL=ix) as ovr_dst:
if ovr_dst.width >= 512 or ovr_dst.height >= 512:
if ovr_dst.width > 512 and ovr_dst.height > 512:
if not ovr_dst.is_tiled:
errors.append('Overview of index {} is not tiled'.format(ix))

Expand Down
55 changes: 35 additions & 20 deletions terracotta/drivers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,36 @@
Define an interface to retrieve Terracotta drivers.
"""

import os
from typing import Union, Tuple, Dict, Type
import urllib.parse as urlparse
from pathlib import Path

from terracotta.drivers.base import Driver
from terracotta.drivers.base_classes import MetaStore
from terracotta.drivers.terracotta_driver import TerracottaDriver
from terracotta.drivers.geotiff_raster_store import GeoTiffRasterStore

URLOrPathType = Union[str, Path]


def load_driver(provider: str) -> Type[Driver]:
def load_driver(provider: str) -> Type[MetaStore]:
if provider == 'sqlite-remote':
from terracotta.drivers.sqlite_remote import RemoteSQLiteDriver
return RemoteSQLiteDriver
from terracotta.drivers.sqlite_remote_meta_store import RemoteSQLiteMetaStore
return RemoteSQLiteMetaStore

if provider == 'mysql':
from terracotta.drivers.mysql import MySQLDriver
return MySQLDriver
from terracotta.drivers.mysql_meta_store import MySQLMetaStore
return MySQLMetaStore

if provider == 'sqlite':
from terracotta.drivers.sqlite import SQLiteDriver
return SQLiteDriver
from terracotta.drivers.sqlite_meta_store import SQLiteMetaStore
return SQLiteMetaStore

raise ValueError(f'Unknown database provider {provider}')


def auto_detect_provider(url_or_path: Union[str, Path]) -> str:
parsed_path = urlparse.urlparse(str(url_or_path))
def auto_detect_provider(url_or_path: str) -> str:
parsed_path = urlparse.urlparse(url_or_path)

scheme = parsed_path.scheme
if scheme == 's3':
Expand All @@ -41,10 +44,10 @@ def auto_detect_provider(url_or_path: Union[str, Path]) -> str:
return 'sqlite'


_DRIVER_CACHE: Dict[Tuple[URLOrPathType, str], Driver] = {}
_DRIVER_CACHE: Dict[Tuple[URLOrPathType, str, int], TerracottaDriver] = {}


def get_driver(url_or_path: URLOrPathType, provider: str = None) -> Driver:
def get_driver(url_or_path: URLOrPathType, provider: str = None) -> TerracottaDriver:
"""Retrieve Terracotta driver instance for the given path.
This function always returns the same instance for identical inputs.
Expand All @@ -65,25 +68,37 @@ def get_driver(url_or_path: URLOrPathType, provider: str = None) -> Driver:
>>> import terracotta as tc
>>> tc.get_driver('tc.sqlite')
SQLiteDriver('/home/terracotta/tc.sqlite')
TerracottaDriver(
meta_store=SQLiteDriver('/home/terracotta/tc.sqlite'),
raster_store=GeoTiffRasterStore()
)
>>> tc.get_driver('mysql://root@localhost/tc')
MySQLDriver('mysql://root@localhost:3306/tc')
TerracottaDriver(
meta_store=MySQLDriver('mysql+pymysql://localhost:3306/tc'),
raster_store=GeoTiffRasterStore()
)
>>> # pass provider if path is given in a non-standard way
>>> tc.get_driver('root@localhost/tc', provider='mysql')
MySQLDriver('mysql://root@localhost:3306/tc')
TerracottaDriver(
meta_store=MySQLDriver('mysql+pymysql://localhost:3306/tc'),
raster_store=GeoTiffRasterStore()
)
"""
url_or_path = str(url_or_path)

if provider is None: # try and auto-detect
provider = auto_detect_provider(url_or_path)

if isinstance(url_or_path, Path) or provider == 'sqlite':
url_or_path = str(Path(url_or_path).resolve())

DriverClass = load_driver(provider)
normalized_path = DriverClass._normalize_path(url_or_path)
cache_key = (normalized_path, provider)
cache_key = (normalized_path, provider, os.getpid())

if cache_key not in _DRIVER_CACHE:
_DRIVER_CACHE[cache_key] = DriverClass(url_or_path)
driver = TerracottaDriver(
meta_store=DriverClass(url_or_path),
raster_store=GeoTiffRasterStore()
)
_DRIVER_CACHE[cache_key] = driver

return _DRIVER_CACHE[cache_key]
Loading

0 comments on commit 7011a23

Please sign in to comment.