Skip to content

Commit

Permalink
Merge pull request #42 from crim-ca/cli-config-loc
Browse files Browse the repository at this point in the history
  • Loading branch information
fmigneault authored Nov 28, 2023
2 parents 6aa01ef + 4bf10e8 commit ab03f0b
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 58 deletions.
10 changes: 8 additions & 2 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@

## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)

<!-- insert list items of new changes here -->
* Fix datacube extension creation to match schema.
* Replace logic to resolve and load specific implementation configuration file of a populator to avoid depending on
inconsistent caller (`python <impl-module.py>` vs `stac-populator run <impl>`).
* Fix configuration file of populator implementation not found when package is installed.
* Allow a populator implementation to override the desired configuration file.
* Add missing CLI `default="full"` mode for `CMIP6_UofT` populator implementation.
* Fix Docker entrypoint to use `stac-populator` to make call to the CLI more convenient.
* Add `get_logger` function to avoid repeated configuration across modules.
* Make sure that each implementation and module employs their own logger.

## [0.3.0](https://github.com/crim-ca/stac-populator/tree/0.3.0) (2023-11-16)

Expand Down
30 changes: 22 additions & 8 deletions STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import argparse
import json
import os
from datetime import datetime
from typing import Any, List, Literal, MutableMapping, NoReturn, Optional
from typing import Any, List, Literal, MutableMapping, NoReturn, Optional, Union

import pydantic_core
import pyessv
Expand All @@ -14,7 +15,9 @@
from STACpopulator.input import GenericLoader, ErrorLoader, THREDDSLoader
from STACpopulator.models import GeoJSONPolygon, STACItemProperties
from STACpopulator.populator_base import STACpopulatorBase
from STACpopulator.stac_utils import LOGGER, STAC_item_from_metadata, collection2literal
from STACpopulator.stac_utils import get_logger, STAC_item_from_metadata, collection2literal

LOGGER = get_logger(__name__)

# CMIP6 controlled vocabulary (CV)
CV = pyessv.WCRP.CMIP6
Expand Down Expand Up @@ -105,15 +108,20 @@ def __init__(
data_loader: GenericLoader,
update: Optional[bool] = False,
session: Optional[Session] = None,
config_file: Optional[Union[os.PathLike[str], str]] = None,
) -> None:
"""Constructor
:param stac_host: URL to the STAC API
:type stac_host: str
:param thredds_catalog_url: the URL to the THREDDS catalog to ingest
:type thredds_catalog_url: str
:param data_loader: loader to iterate over ingestion data.
"""
super().__init__(stac_host, data_loader, update=update, session=session)
super().__init__(
stac_host,
data_loader,
update=update,
session=session,
config_file=config_file,
)

@staticmethod
def make_cmip6_item_id(attrs: MutableMapping[str, Any]) -> str:
Expand Down Expand Up @@ -171,8 +179,14 @@ def make_parser() -> argparse.ArgumentParser:
parser.add_argument("stac_host", type=str, help="STAC API address")
parser.add_argument("thredds_catalog_URL", type=str, help="URL to the CMIP6 THREDDS catalog")
parser.add_argument("--update", action="store_true", help="Update collection and its items")
parser.add_argument("--mode", choices=["full", "single"],
parser.add_argument("--mode", choices=["full", "single"], default="full",
help="Operation mode, processing the full dataset or only the single reference.")
parser.add_argument(
"--config", type=str, help=(
"Override configuration file for the populator. "
"By default, uses the adjacent configuration to the implementation class."
)
)
add_request_options(parser)
return parser

Expand All @@ -188,7 +202,7 @@ def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn:
# To be implemented
data_loader = ErrorLoader()

c = CMIP6populator(ns.stac_host, data_loader, update=ns.update, session=session)
c = CMIP6populator(ns.stac_host, data_loader, update=ns.update, session=session, config_file=ns.config)
c.ingest()


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
from STACpopulator.input import STACDirectoryLoader
from STACpopulator.models import GeoJSONPolygon, STACItemProperties
from STACpopulator.populator_base import STACpopulatorBase
from STACpopulator.stac_utils import LOGGER
from STACpopulator.stac_utils import get_logger

LOGGER = get_logger(__name__)


class DirectoryPopulator(STACpopulatorBase):
Expand Down
46 changes: 32 additions & 14 deletions STACpopulator/populator_base.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import functools
import logging
import inspect
import os
from abc import ABC, abstractmethod
from datetime import datetime
from typing import Any, Optional
from typing import Any, MutableMapping, Optional, Union

import pystac
from colorlog import ColoredFormatter
from requests.sessions import Session

from STACpopulator.api_requests import (
Expand All @@ -14,16 +14,10 @@
stac_host_reachable,
)
from STACpopulator.input import GenericLoader
from STACpopulator.stac_utils import load_collection_configuration, url_validate
from STACpopulator.stac_utils import get_logger, load_config, url_validate

LOGGER = logging.getLogger(__name__)
LOGFORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s"
formatter = ColoredFormatter(LOGFORMAT)
stream = logging.StreamHandler()
stream.setFormatter(formatter)
LOGGER.addHandler(stream)
LOGGER.setLevel(logging.INFO)
LOGGER.propagate = False

LOGGER = get_logger(__name__)


class STACpopulatorBase(ABC):
Expand All @@ -33,6 +27,7 @@ def __init__(
data_loader: GenericLoader,
update: Optional[bool] = False,
session: Optional[Session] = None,
config_file: Optional[Union[os.PathLike[str], str]] = "collection_config.yml",
) -> None:
"""Constructor
Expand All @@ -44,7 +39,8 @@ def __init__(
"""

super().__init__()
self._collection_info = None
self._collection_config_path = config_file
self._collection_info: MutableMapping[str, Any] = None
self._session = session
self.load_config()

Expand All @@ -57,7 +53,29 @@ def __init__(
self.create_stac_collection()

def load_config(self):
self._collection_info = load_collection_configuration()
"""
Reads details of the STAC Collection to be created from a configuration file.
Once called, the collection information attribute should be set with relevant mapping attributes.
"""
# use explicit override, or default to local definition
if not self._collection_config_path or not os.path.isfile(self._collection_config_path):
impl_path = inspect.getfile(self.__class__)
impl_dir = os.path.dirname(impl_path)
impl_cfg = os.path.join(impl_dir, "collection_config.yml")
self._collection_config_path = impl_cfg

LOGGER.info("Using populator collection configuration file: [%s]", self._collection_config_path)
collection_info = load_config(self._collection_config_path)

req_definitions = ["title", "id", "description", "keywords", "license"]
for req in req_definitions:
if req not in collection_info.keys():
mgs = f"'{req}' is required in the configuration file [{self._collection_config_path}]"
LOGGER.error(mgs)
raise RuntimeError(mgs)

self._collection_info = collection_info

@property
def collection_name(self) -> str:
Expand Down
62 changes: 31 additions & 31 deletions STACpopulator/stac_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import datetime
import json
import logging
import os
import re
import sys
from typing import Any, Literal, MutableMapping
from typing import Any, Literal, MutableMapping, Union

import numpy as np
import pystac
Expand All @@ -13,14 +11,22 @@

from STACpopulator.models import STACItem

LOGGER = logging.getLogger(__name__)
LOG_FORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s"
formatter = ColoredFormatter(LOG_FORMAT)
stream = logging.StreamHandler()
stream.setFormatter(formatter)
LOGGER.addHandler(stream)
LOGGER.setLevel(logging.INFO)
LOGGER.propagate = False

def get_logger(
name: str,
log_fmt: str = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s",
) -> logging.Logger:
logger = logging.getLogger(name)
formatter = ColoredFormatter(log_fmt)
stream = logging.StreamHandler()
stream.setFormatter(formatter)
logger.addHandler(stream)
logger.setLevel(logging.INFO)
logger.propagate = False
return logger


LOGGER = get_logger(__name__)


def url_validate(target: str) -> bool:
Expand All @@ -47,31 +53,25 @@ def url_validate(target: str) -> bool:
return True if re.match(url_regex, target) else False


def load_collection_configuration() -> MutableMapping[str, Any]:
"""Reads details of the STAC Collection to be created from a configuration file. the
code expects a "collection_config.yml" file to be present in the app directory.
def load_config(
config_file: Union[os.PathLike[str], str],
) -> MutableMapping[str, Any]:
"""Reads a generic YAML or JSON configuration file.
:raises RuntimeError: If the configuration file is not present
:raises RuntimeError: If required values are not present in the configuration file
:return: A python dictionary describing the details of the Collection
:raises OSError: If the configuration file is not present
:raises ValueError: If the configuration file is not correctly formatted.
:return: A python dictionary describing a generic configuration.
:rtype: MutableMapping[str, Any]
"""
collection_info_filename = "collection_config.yml"
app_directory = os.path.dirname(sys.argv[0])

if not os.path.exists(os.path.join(app_directory, collection_info_filename)):
raise RuntimeError(f"Missing {collection_info_filename} file for this implementation")

with open(os.path.join(app_directory, collection_info_filename)) as f:
collection_info = yaml.load(f, yaml.Loader)
if not os.path.isfile(config_file):
raise OSError(f"Missing configuration file does not exist: [{config_file}]")

req_definitions = ["title", "id", "description", "keywords", "license"]
for req in req_definitions:
if req not in collection_info.keys():
LOGGER.error(f"'{req}' is required in the configuration file")
raise RuntimeError(f"'{req}' is required in the configuration file")
with open(config_file) as f:
config_info = yaml.load(f, yaml.Loader)

return collection_info
if not isinstance(config_info, dict) or not config_info:
raise ValueError(f"Invalid configuration file does not define a mapping: [{config_file}]")
return config_info


def collection2literal(collection, property="label"):
Expand Down
3 changes: 1 addition & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,4 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
RUN groupadd -r stac && useradd -r -g stac stac
USER stac

# FIXME: use common CLI
CMD ["bash"]
ENTRYPOINT ["stac-populator"]
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ exclude = [
"tests*",
]

[tool.setuptools.package-data]
STACpopulator = ["**/collection_config.yml"]

[project]
name = "STACpopulator"
version = "0.3.0"
Expand Down

0 comments on commit ab03f0b

Please sign in to comment.