From 050aa01ad53cae5b48b22c4dc5cada971b0cfd90 Mon Sep 17 00:00:00 2001 From: jonavellecuerdo Date: Wed, 10 Jan 2024 13:03:49 -0500 Subject: [PATCH] Encapsulate configuration functions in a class Why these changes are being introduced: * Streamline logging configuration and environment variable management How this addresses that need: * Create Config class * Clearly identify required vs. optional env vars * Update dependent modules to use Config * Replace os.getenv() calls with Config. * Update unit tests in test_config Side effects of this change: * None --- harvester/cli.py | 11 +++--- harvester/config.py | 81 ++++++++++++++++++++++++++++++-------------- harvester/oai.py | 6 ++-- tests/conftest.py | 8 +++++ tests/test_config.py | 67 +++++++++++++++++++++++++++--------- 5 files changed, 125 insertions(+), 48 deletions(-) diff --git a/harvester/cli.py b/harvester/cli.py index cd949de..65a95ee 100644 --- a/harvester/cli.py +++ b/harvester/cli.py @@ -10,11 +10,13 @@ import click from sickle.oaiexceptions import NoRecordsMatch -from harvester.config import configure_logger, configure_sentry +from harvester.config import Config from harvester.oai import OAIClient, write_records, write_sets logger = logging.getLogger(__name__) +CONFIG = Config() + @click.group() @click.option( @@ -40,10 +42,9 @@ def main(ctx: click.Context, host: str, output_file: str, verbose: bool) -> None ctx.obj["START_TIME"] = perf_counter() ctx.obj["HOST"] = host ctx.obj["OUTPUT_FILE"] = output_file - - root_logger = logging.getLogger() - logger.info(configure_logger(root_logger, verbose)) - logger.info(configure_sentry()) + logger.info(CONFIG.configure_logger(verbose)) + logger.info(CONFIG.configure_sentry()) + CONFIG.check_required_env_vars() @main.command() diff --git a/harvester/config.py b/harvester/config.py index 589bb62..1af6a1f 100644 --- a/harvester/config.py +++ b/harvester/config.py @@ -3,6 +3,7 @@ """harvester.config module.""" import logging import os +from typing import Any import sentry_sdk @@ -12,30 +13,60 @@ MAX_ALLOWED_ERRORS = 10 -def configure_logger(logger: logging.Logger, verbose: bool) -> str: - if verbose: - logging.basicConfig( - format="%(asctime)s %(levelname)s %(name)s.%(funcName)s() line %(lineno)d: " - "%(message)s" - ) - logger.setLevel(logging.DEBUG) +class Config: + REQUIRED_ENV_VARS = ("WORKSPACE", "SENTRY_DSN") + OPTIONAL_ENV_VARS = ("RECORD_SKIP_LIST", "STATUS_UPDATE_INTERVAL") + + def __init__(self, logger: logging.Logger | None = None): + """Set root logger as default when creating class instance.""" + if logger is None: + self.logger = logging.getLogger() + else: + self.logger = logger + + def check_required_env_vars(self) -> None: + """Method to raise exception if required env vars not set.""" + missing_vars = [var for var in self.REQUIRED_ENV_VARS if not os.getenv(var)] + if missing_vars: + message = f"Missing required environment variables: {', '.join(missing_vars)}" + raise OSError(message) + + def configure_logger(self, verbose: bool) -> str: for handler in logging.root.handlers: - handler.addFilter(logging.Filter("harvester")) - else: - logging.basicConfig( - format=("%(asctime)s %(levelname)s %(name)s.%(funcName)s(): %(message)s") + handler.filters.clear() + if verbose: + logging.basicConfig( + format="%(asctime)s %(levelname)s %(name)s.%(funcName)s() " + "line %(lineno)d: %(message)s" + ) + self.logger.setLevel(logging.DEBUG) + for handler in logging.root.handlers: + handler.addFilter(logging.Filter("harvester")) + else: + logging.basicConfig( + format=("%(asctime)s %(levelname)s %(name)s.%(funcName)s(): %(message)s") + ) + self.logger.setLevel(logging.INFO) + return ( + f"Logger '{self.logger.name}' configured with level=" + f"{logging.getLevelName(self.logger.getEffectiveLevel())}" ) - logger.setLevel(logging.INFO) - return ( - f"Logger '{logger.name}' configured with level=" - f"{logging.getLevelName(logger.getEffectiveLevel())}" - ) - - -def configure_sentry() -> str: - env = os.getenv("WORKSPACE") - sentry_dsn = os.getenv("SENTRY_DSN") - if sentry_dsn and sentry_dsn.lower() != "none": - sentry_sdk.init(sentry_dsn, environment=env) - return f"Sentry DSN found, exceptions will be sent to Sentry with env={env}" - return "No Sentry DSN found, exceptions will not be sent to Sentry" + + def configure_sentry(self) -> str: + sentry_dsn = self.SENTRY_DSN + if sentry_dsn and sentry_dsn.lower() != "none": + sentry_sdk.init(sentry_dsn, environment=self.WORKSPACE) + return ( + "Sentry DSN found, exceptions will be sent to Sentry with env=" + f"{self.WORKSPACE}" + ) + return "No Sentry DSN found, exceptions will not be sent to Sentry" + + def __getattr__(self, name: str) -> Any: # noqa: ANN401 + """Provide dot notation access to configurations and env vars on this class.""" + if name in self.REQUIRED_ENV_VARS or name in self.OPTIONAL_ENV_VARS: + if name == "STATUS_UPDATE_INTERVAL": + return os.getenv(name, "1000") + return os.getenv(name) + message = f"'{name}' not a valid configuration variable" + raise AttributeError(message) diff --git a/harvester/oai.py b/harvester/oai.py index d4ccf32..9627c65 100644 --- a/harvester/oai.py +++ b/harvester/oai.py @@ -4,7 +4,6 @@ import json import logging -import os from collections.abc import Iterator from typing import Any, Literal @@ -19,12 +18,15 @@ MAX_ALLOWED_ERRORS, MAX_RETRIES, RETRY_STATUS_CODES, + Config, ) from harvester.exceptions import MaxAllowedErrorsReached from harvester.utils import send_sentry_message logger = logging.getLogger(__name__) +CONFIG = Config() + class OAIClient: def __init__( @@ -157,7 +159,7 @@ def write_records(records: Iterator, filepath: str) -> int: for record in records: file.write(" ".encode() + record.raw.encode() + "\n".encode()) count += 1 - if count % int(os.getenv("STATUS_UPDATE_INTERVAL", "1000")) == 0: + if count % int(CONFIG.STATUS_UPDATE_INTERVAL) == 0: logger.info( "Status update: %s records written to output file so far!", count, diff --git a/tests/conftest.py b/tests/conftest.py index b9e56ba..29ff60c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,12 +3,20 @@ import pytest from click.testing import CliRunner +from harvester.config import Config + @pytest.fixture(autouse=True) def _test_env(monkeypatch): + monkeypatch.setenv("SENTRY_DSN", "None") monkeypatch.setenv("WORKSPACE", "test") +@pytest.fixture +def config(): + return Config() + + @pytest.fixture def runner(): return CliRunner() diff --git a/tests/test_config.py b/tests/test_config.py index 874fa5c..e6e7060 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,35 +1,70 @@ import logging -from harvester.config import configure_logger, configure_sentry +import pytest +from harvester.config import Config -def test_configure_logger_not_verbose(): - logger = logging.getLogger(__name__) - result = configure_logger(logger, verbose=False) - assert logger.getEffectiveLevel() == int(logging.INFO) - assert result == "Logger 'tests.test_config' configured with level=INFO" + +def test_configure_logger_defaults_to_root_logger(): + config = Config(logger=None) + assert config.logger.name == "root" -def test_configure_logger_verbose(caplog): +def test_configure_logger_accepts_specific_logger(): logger = logging.getLogger(__name__) - result = configure_logger(logger, verbose=True) - assert logger.getEffectiveLevel() == int(logging.DEBUG) - assert result == "Logger 'tests.test_config' configured with level=DEBUG" + config = Config(logger=logger) + assert config.logger.name == "tests.test_config" + + +def test_configure_logger_not_verbose(config): + result = config.configure_logger(verbose=False) + assert config.logger.getEffectiveLevel() == int(logging.INFO) + assert result == "Logger 'root' configured with level=INFO" -def test_configure_sentry_no_env_variable(monkeypatch): +def test_configure_logger_verbose(config): + result = config.configure_logger(verbose=True) + assert config.logger.getEffectiveLevel() == int(logging.DEBUG) + assert result == "Logger 'root' configured with level=DEBUG" + + +def test_configure_sentry_no_env_variable(config, monkeypatch): + config.configure_logger(verbose=False) monkeypatch.delenv("SENTRY_DSN", raising=False) - result = configure_sentry() + result = config.configure_sentry() assert result == "No Sentry DSN found, exceptions will not be sent to Sentry" -def test_configure_sentry_env_variable_is_none(monkeypatch): +def test_configure_sentry_env_variable_is_none(config, monkeypatch): monkeypatch.setenv("SENTRY_DSN", "None") - result = configure_sentry() + result = config.configure_sentry() assert result == "No Sentry DSN found, exceptions will not be sent to Sentry" -def test_configure_sentry_env_variable_is_dsn(monkeypatch): +def test_configure_sentry_env_variable_is_dsn(config, monkeypatch): monkeypatch.setenv("SENTRY_DSN", "https://1234567890@00000.ingest.sentry.io/123456") - result = configure_sentry() + result = config.configure_sentry() assert result == "Sentry DSN found, exceptions will be sent to Sentry with env=test" + + +def test_config_check_required_env_vars_success(config): + config.check_required_env_vars() + + +def test_config_check_required_env_vars_error(config, monkeypatch): + monkeypatch.delenv("SENTRY_DSN") + with pytest.raises( + OSError, match="Missing required environment variables: SENTRY_DSN" + ): + config.check_required_env_vars() + + +def test_config_env_var_access_success(config): + assert config.STATUS_UPDATE_INTERVAL == "1000" + + +def test_config_env_var_access_error(config): + with pytest.raises( + AttributeError, match="'DOES_NOT_EXIST' not a valid configuration variable" + ): + _ = config.DOES_NOT_EXIST