Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into stac-ext-models
Browse files Browse the repository at this point in the history
  • Loading branch information
fmigneault committed Nov 28, 2023
2 parents 943ef28 + 6e885d9 commit f37f065
Show file tree
Hide file tree
Showing 10 changed files with 112 additions and 61 deletions.
13 changes: 12 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,18 @@
## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)

<!-- insert list items of new changes here -->
* Fix datacube extension creation to match schema.

## [0.4.0](https://github.com/crim-ca/stac-populator/tree/0.4.0) (2023-11-27)


* Replace logic to resolve and load specific implementation configuration file of a populator to avoid depending on
inconsistent caller (`python <impl-module.py>` vs `stac-populator run <impl>`).
* Fix configuration file of populator implementation not found when package is installed.
* Allow a populator implementation to override the desired configuration file.
* Add missing CLI `default="full"` mode for `CMIP6_UofT` populator implementation.
* Fix Docker entrypoint to use `stac-populator` to make call to the CLI more convenient.
* Add `get_logger` function to avoid repeated configuration across modules.
* Make sure that each implementation and module employs their own logger.

## [0.3.0](https://github.com/crim-ca/stac-populator/tree/0.3.0) (2023-11-16)

Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ MAKEFILE_NAME := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST))
-include Makefile.config
APP_ROOT := $(abspath $(lastword $(MAKEFILE_NAME))/..)
APP_NAME := STACpopulator
APP_VERSION ?= 0.3.0
APP_VERSION ?= 0.4.0

DOCKER_COMPOSE_FILES := -f "$(APP_ROOT)/docker/docker-compose.yml"
DOCKER_TAG := ghcr.io/crim-ca/stac-populator:$(APP_VERSION)
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# STAC Catalog Populator

![Latest Version](https://img.shields.io/badge/latest%20version-0.3.0-blue?logo=github)
![Commits Since Latest](https://img.shields.io/github/commits-since/crim-ca/stac-populator/0.3.0.svg?logo=github)
![Latest Version](https://img.shields.io/badge/latest%20version-0.4.0-blue?logo=github)
![Commits Since Latest](https://img.shields.io/github/commits-since/crim-ca/stac-populator/0.4.0.svg?logo=github)
![GitHub License](https://img.shields.io/github/license/crim-ca/stac-populator)

This repository contains a framework [STACpopulator](STACpopulator)
Expand Down Expand Up @@ -61,7 +61,7 @@ You can also employ the pre-built Docker, which can be called as follows,
where `[command]` corresponds to any of the above example operations.

```shell
docker run -ti ghcr.io/crim-ca/stac-populator:0.3.0 [command]
docker run -ti ghcr.io/crim-ca/stac-populator:0.4.0 [command]
```

*Note*: <br>
Expand Down
2 changes: 1 addition & 1 deletion STACpopulator/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.3.0"
__version__ = "0.4.0"
27 changes: 22 additions & 5 deletions STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import argparse
import json
from typing import Any, MutableMapping, NoReturn, Optional
import os
from typing import Any, MutableMapping, NoReturn, Optional, Union

from requests.sessions import Session
from pystac.extensions.datacube import DatacubeExtension
Expand All @@ -12,7 +13,9 @@
from STACpopulator.input import GenericLoader, ErrorLoader, THREDDSLoader
from STACpopulator.models import GeoJSONPolygon
from STACpopulator.populator_base import STACpopulatorBase
from STACpopulator.stac_utils import LOGGER
from STACpopulator.stac_utils import get_logger

LOGGER = get_logger(__name__)


class CMIP6populator(STACpopulatorBase):
Expand All @@ -25,13 +28,21 @@ def __init__(
data_loader: GenericLoader,
update: Optional[bool] = False,
session: Optional[Session] = None,
config_file: Optional[Union[os.PathLike[str], str]] = None,
) -> None:
"""Constructor
:param stac_host: URL to the STAC API
:type stac_host: str
:param data_loader: loader to iterate over ingestion data.
"""
super().__init__(stac_host, data_loader, update=update, session=session)
super().__init__(
stac_host,
data_loader,
update=update,
session=session,
config_file=config_file,
)

def create_stac_item(self, item_name: str, item_data: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
"""Creates the STAC item.
Expand Down Expand Up @@ -77,8 +88,14 @@ def make_parser() -> argparse.ArgumentParser:
parser.add_argument("stac_host", type=str, help="STAC API address")
parser.add_argument("href", type=str, help="URL to a THREDDS catalog or a NCML XML with CMIP6 metadata.")
parser.add_argument("--update", action="store_true", help="Update collection and its items")
parser.add_argument("--mode", choices=["full", "single"],
parser.add_argument("--mode", choices=["full", "single"], default="full",
help="Operation mode, processing the full dataset or only the single reference.")
parser.add_argument(
"--config", type=str, help=(
"Override configuration file for the populator. "
"By default, uses the adjacent configuration to the implementation class."
)
)
add_request_options(parser)
return parser

Expand All @@ -94,7 +111,7 @@ def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn:
# To be implemented
data_loader = ErrorLoader()

c = CMIP6populator(ns.stac_host, data_loader, update=ns.update, session=session)
c = CMIP6populator(ns.stac_host, data_loader, update=ns.update, session=session, config_file=ns.config)
c.ingest()


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
from STACpopulator.input import STACDirectoryLoader
from STACpopulator.models import GeoJSONPolygon
from STACpopulator.populator_base import STACpopulatorBase
from STACpopulator.stac_utils import LOGGER
from STACpopulator.stac_utils import get_logger

LOGGER = get_logger(__name__)


class DirectoryPopulator(STACpopulatorBase):
Expand Down
46 changes: 32 additions & 14 deletions STACpopulator/populator_base.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import functools
import logging
import inspect
import os
from abc import ABC, abstractmethod
from datetime import datetime
from typing import Any, Optional, Type
from typing import Any, MutableMapping, Optional, Type, Union

import pystac
from colorlog import ColoredFormatter
from requests.sessions import Session

from STACpopulator.api_requests import (
Expand All @@ -15,16 +15,10 @@
)
from STACpopulator.input import GenericLoader
from STACpopulator.models import AnyGeometry
from STACpopulator.stac_utils import load_collection_configuration, url_validate
from STACpopulator.stac_utils import get_logger, load_config, url_validate

LOGGER = logging.getLogger(__name__)
LOGFORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s"
formatter = ColoredFormatter(LOGFORMAT)
stream = logging.StreamHandler()
stream.setFormatter(formatter)
LOGGER.addHandler(stream)
LOGGER.setLevel(logging.INFO)
LOGGER.propagate = False

LOGGER = get_logger(__name__)


class STACpopulatorBase(ABC):
Expand All @@ -34,6 +28,7 @@ def __init__(
data_loader: GenericLoader,
update: Optional[bool] = False,
session: Optional[Session] = None,
config_file: Optional[Union[os.PathLike[str], str]] = "collection_config.yml",
) -> None:
"""Constructor
Expand All @@ -45,7 +40,8 @@ def __init__(
"""

super().__init__()
self._collection_info = None
self._collection_config_path = config_file
self._collection_info: MutableMapping[str, Any] = None
self._session = session
self.load_config()

Expand All @@ -58,7 +54,29 @@ def __init__(
self.create_stac_collection()

def load_config(self):
self._collection_info = load_collection_configuration()
"""
Reads details of the STAC Collection to be created from a configuration file.
Once called, the collection information attribute should be set with relevant mapping attributes.
"""
# use explicit override, or default to local definition
if not self._collection_config_path or not os.path.isfile(self._collection_config_path):
impl_path = inspect.getfile(self.__class__)
impl_dir = os.path.dirname(impl_path)
impl_cfg = os.path.join(impl_dir, "collection_config.yml")
self._collection_config_path = impl_cfg

LOGGER.info("Using populator collection configuration file: [%s]", self._collection_config_path)
collection_info = load_config(self._collection_config_path)

req_definitions = ["title", "id", "description", "keywords", "license"]
for req in req_definitions:
if req not in collection_info.keys():
mgs = f"'{req}' is required in the configuration file [{self._collection_config_path}]"
LOGGER.error(mgs)
raise RuntimeError(mgs)

self._collection_info = collection_info

@property
def collection_name(self) -> str:
Expand Down
61 changes: 31 additions & 30 deletions STACpopulator/stac_utils.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
import logging
import os
import re
import sys
from enum import Enum
from typing import Any, Literal, MutableMapping, Type
from typing import Any, Literal, MutableMapping, Type, Union

import numpy as np
import pystac
import yaml
from colorlog import ColoredFormatter

LOGGER = logging.getLogger(__name__)
LOG_FORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s"
formatter = ColoredFormatter(LOG_FORMAT)
stream = logging.StreamHandler()
stream.setFormatter(formatter)
LOGGER.addHandler(stream)
LOGGER.setLevel(logging.INFO)
LOGGER.propagate = False

def get_logger(
name: str,
log_fmt: str = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s",
) -> logging.Logger:
logger = logging.getLogger(name)
formatter = ColoredFormatter(log_fmt)
stream = logging.StreamHandler()
stream.setFormatter(formatter)
logger.addHandler(stream)
logger.setLevel(logging.INFO)
logger.propagate = False
return logger


LOGGER = get_logger(__name__)


def url_validate(target: str) -> bool:
Expand All @@ -44,31 +51,25 @@ def url_validate(target: str) -> bool:
return True if re.match(url_regex, target) else False


def load_collection_configuration() -> MutableMapping[str, Any]:
"""Reads details of the STAC Collection to be created from a configuration file. the
code expects a "collection_config.yml" file to be present in the app directory.
def load_config(
config_file: Union[os.PathLike[str], str],
) -> MutableMapping[str, Any]:
"""Reads a generic YAML or JSON configuration file.
:raises RuntimeError: If the configuration file is not present
:raises RuntimeError: If required values are not present in the configuration file
:return: A python dictionary describing the details of the Collection
:raises OSError: If the configuration file is not present
:raises ValueError: If the configuration file is not correctly formatted.
:return: A python dictionary describing a generic configuration.
:rtype: MutableMapping[str, Any]
"""
collection_info_filename = "collection_config.yml"
app_directory = os.path.dirname(sys.argv[0])

if not os.path.exists(os.path.join(app_directory, collection_info_filename)):
raise RuntimeError(f"Missing {collection_info_filename} file for this implementation")

with open(os.path.join(app_directory, collection_info_filename)) as f:
collection_info = yaml.load(f, yaml.Loader)
if not os.path.isfile(config_file):
raise OSError(f"Missing configuration file does not exist: [{config_file}]")

req_definitions = ["title", "id", "description", "keywords", "license"]
for req in req_definitions:
if req not in collection_info.keys():
LOGGER.error(f"'{req}' is required in the configuration file")
raise RuntimeError(f"'{req}' is required in the configuration file")
with open(config_file) as f:
config_info = yaml.load(f, yaml.Loader)

return collection_info
if not isinstance(config_info, dict) or not config_info:
raise ValueError(f"Invalid configuration file does not define a mapping: [{config_file}]")
return config_info


def collection2literal(collection, property="label") -> "Type[Literal]":
Expand Down
5 changes: 2 additions & 3 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ LABEL description.short="STAC Populator"
LABEL description.long="Utility to populate STAC Catalog, Collections and Items from various dataset/catalog sources."
LABEL maintainer="Francis Charette-Migneault <[email protected]>"
LABEL vendor="CRIM"
LABEL version="0.3.0"
LABEL version="0.4.0"

# setup paths
ENV APP_DIR=/opt/local/src/stac-populator
Expand All @@ -28,5 +28,4 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
RUN groupadd -r stac && useradd -r -g stac stac
USER stac

# FIXME: use common CLI
CMD ["bash"]
ENTRYPOINT ["stac-populator"]
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@ exclude = [
"tests*",
]

[tool.setuptools.package-data]
STACpopulator = ["**/collection_config.yml"]

[project]
name = "STACpopulator"
version = "0.3.0"
version = "0.4.0"
description = "Utility to populate STAC Catalog, Collections and Items from various dataset/catalog sources."
requires-python = ">=3.10"
dependencies = [
Expand Down Expand Up @@ -116,7 +119,7 @@ directory = "reports/coverage/html"
output = "reports/coverage.xml"

[tool.bumpversion]
current_version = "0.3.0"
current_version = "0.4.0"
commit = true
commit_args = "--no-verify"
tag = true
Expand Down

0 comments on commit f37f065

Please sign in to comment.