diff --git a/.github/CONTRIBUTING.rst b/.github/CONTRIBUTING.rst index dd725fdf..29e8495a 100644 --- a/.github/CONTRIBUTING.rst +++ b/.github/CONTRIBUTING.rst @@ -139,7 +139,7 @@ Before you submit a pull request, please follow these guidelines: * `numpydoc`_ * `reStructuredText (ReST)`_ -5. The pull request should work for Python 3.7 as well as raise test coverage. +5. The pull request should work for Python 3.8+ as well as raise test coverage. Pull requests are also checked for documentation build status and for `PEP8`_ compliance. The build statuses and build errors for pull requests can be found at: diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fd9ce15b..7b722b41 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,10 +13,14 @@ jobs: matrix: tox-env: [black] steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - name: Cancel previous runs + uses: styfle/cancel-workflow-action@0.9.1 with: - python-version: 3.7 + access_token: ${{ github.token }} + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.8" - name: Install tox run: pip install tox - name: Run linting suite @@ -28,19 +32,19 @@ jobs: strategy: matrix: include: - - python-version: 3.7 - tox-env: py37 - allowed_to_fail: false - - python-version: 3.8 + - python-version: "3.8" tox-env: py38 allowed_to_fail: false - - python-version: 3.9 + - python-version: "3.9" tox-env: py39 allowed_to_fail: false + - python-version: "3.10" + tox-env: py310 + allowed_to_fail: false steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install tox @@ -57,16 +61,19 @@ jobs: matrix: python-version: [3.8] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Setup conda with Python ${{ matrix.python-version }} uses: s-weigand/setup-conda@v1 with: update-conda: true python-version: ${{ matrix.python-version }} conda-channels: conda-forge, defaults + - name: Install mamba + run: | + conda install -c conda-forge mamba - name: Conda env configuration run: | - conda env create -f environment.yml + mamba env create -f environment.yml source activate clisops pip install -e ".[dev]" - name: Test with conda diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 96ee5cbc..9b375770 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,58 +3,57 @@ default_language_version: repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v4.3.0 hooks: - id: trailing-whitespace - language_version: python3 exclude: setup.cfg - id: end-of-file-fixer - language_version: python3 exclude: setup.cfg - id: check-yaml - language_version: python3 - id: debug-statements - language_version: python3 -- repo: https://github.com/ambv/black - rev: 21.4b2 +- repo: https://github.com/psf/black + rev: 22.3.0 hooks: - id: black - language_version: python3 - args: ["--target-version", "py36"] + args: ["--target-version", "py38"] - repo: https://github.com/pycqa/flake8 - rev: 3.9.1 + rev: 4.0.1 hooks: - id: flake8 - language_version: python3 args: ['--config=setup.cfg'] -#- repo: https://github.com/pre-commit/mirrors-autopep8 -# rev: v1.4.4 -# hooks: -# - id: autopep8 -# args: ['--global-config=setup.cfg','--in-place'] -- repo: https://github.com/timothycrosley/isort - rev: 5.8.0 +- repo: https://github.com/PyCQA/isort + rev: 5.10.1 hooks: - id: isort - language_version: python3 args: ['--profile', 'black'] + exclude: clisops/core/__init__.py #- repo: https://github.com/pycqa/pydocstyle -# rev: 5.0.2 +# rev: 6.1.1 # hooks: # - id: pydocstyle -# args: ["--conventions=numpy"] +# args: ["--convention=numpy"] - repo: https://github.com/asottile/pyupgrade - rev: v2.14.0 + rev: v2.34.0 hooks: - id: pyupgrade - language_version: python3 + args: ['--py38-plus'] - repo: meta hooks: - id: check-hooks-apply - id: check-useless-excludes - repo: https://github.com/kynan/nbstripout - rev: 0.4.0 + rev: 0.5.0 hooks: - id: nbstripout - language_version: python3 files: ".ipynb" + +ci: + autofix_commit_msg: | + [pre-commit.ci] auto fixes from pre-commit.com hooks + for more information, see https://pre-commit.ci + autofix_prs: true + autoupdate_branch: '' + autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' + autoupdate_schedule: weekly + skip: [] + submodules: false diff --git a/.readthedocs.yml b/.readthedocs.yml index cefa3f5f..a05caf46 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -8,28 +8,21 @@ version: 2 # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py - -# Build documentation with MkDocs -#mkdocs: -# configuration: mkdocs.yml + fail_on_warning: true # Optionally build your docs in additional formats such as PDF and ePub -formats: all +formats: + - pdf # Optionally set the version of Python and requirements required to build your docs -#python: -# version: 3.7 python: - version: 3.7 install: - method: pip path: . extra_requirements: - docs - -# conda: -# environment: docs/environment.yml - build: - image: stable + os: ubuntu-20.04 + tools: + python: "3.8" diff --git a/AUTHORS.rst b/AUTHORS.rst index 4750a7c6..bdcd7a7c 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -20,3 +20,4 @@ Contributors * Pascal Bourgault bourgault.pascal@ouranos.ca `@aulemahal `_ * David Huard huard.david@ouranos.ca `@huard `_ +* Martin Schupfner schupfner@dkrz.de `@sol1105 `_ diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index cbe663db..598d5e55 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -85,7 +85,7 @@ Ready to contribute? Here's how to set up ``clisops`` for local development. tests, including testing other Python versions with tox: $ flake8 clisops tests - $ black --target-version py36 clisops tests + $ black --target-version py38 clisops tests $ python setup.py test # (or pytest) $ tox @@ -106,6 +106,35 @@ Ready to contribute? Here's how to set up ``clisops`` for local development. #. Submit a pull request through the GitHub website. + +Logging +------- + +``clisops`` uses the `loguru `_ library as its primary logging engine. In order to integrate this kind of logging in processes, we can use their logger: + +.. code-block:: python + from loguru import logger + logger.warning("This a warning message!") + +The mechanism for enabling log reporting in scripts/notebooks using ``loguru`` is as follows: + +.. code-block:: python + import sys + from loguru import logger + + logger.enable("clisops") + LEVEL = "INFO || DEBUG || WARNING || etc." + logger.add(sys.stdout, level=LEVEL) # for logging to stdout + # or + logger.add("my_log_file.log", level=LEVEL, enqueue=True) # for logging to a file + +For convenience, a preset logger configuration can be enabled via `clisops.enable_logging()`. + +.. code-block:: python + from clisops import enable_logging + + enable_logging() + Pull Request Guidelines ----------------------- @@ -116,6 +145,6 @@ Before you submit a pull request, check that it meets these guidelines: #. If the pull request adds functionality, the docs should be updated. Put your new functionality into a function with a docstring, and add the feature to the list in README.md. -#. The pull request should work for Python 3.7, 3.8, and 3.9. Check +#. The pull request should work for Python 3.8, 3.9, and 3.10. Check https://github.com/roocs/clisops/actions and make sure that the tests pass for all supported Python versions. diff --git a/HISTORY.rst b/HISTORY.rst index 105c8d06..219fbad5 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,12 +1,63 @@ Version History =============== +v0.9.2 (unreleased) +------------------- + +Breaking Changes +^^^^^^^^^^^^^^^^ +* Support has been dropped for Python3.7 and extended to Python3.10. Python3.7 is no longer tested in GitHub actions. + +v0.9.1 (2022-05-12) +------------------- + +Bug fixes +^^^^^^^^^ +* Fix inconsistent bounds in metadata after subset operation (#224). + +Other Changes +^^^^^^^^^^^^^ +* Use ``roocs-utils`` 0.6.2 to avoid test failure (#226). +* Removed unneeded testing dep from environment.yml (#223). +* Merged pre-commit autoupdate (#227). + +v0.9.0 (2022-04-13) +------------------- + +New Features +^^^^^^^^^^^^ +* ``clisops.ops.average.average_time`` and ``clisops.core.average.average_time`` added (#211). Allowing averaging over time frequencies of day, month and year. +* New function ``create_time_bounds`` in ``clisops.utils.time_utils``, to generate time bounds for temporally averaged datasets. + +* ``clisops`` now uses the `loguru `_ library as its primary logging engine (#216). + The mechanism for enabling log reporting in scripts/notebooks using ``loguru`` is as follows: + +.. code-block:: python + + import sys + from loguru import logger + + logger.activate("clisops") + LEVEL = "INFO || DEBUG || WARNING || etc." + logger.add(sys.stdout, level=LEVEL) # for logging to stdout + # or + logger.add("my_log_file.log", level=LEVEL, enqueue=True) # for logging to a file + +Other Changes +^^^^^^^^^^^^^ +* Pandas now pinned below version 1.4.0. +* Pre-commit configuration updated with code style conventions (black, pyupgrade) set to Python3.7+ (#219). +* ``loguru`` is now an install dependency, with ``pytest-loguru`` as a development-only dependency. +* Added function to convert the longitude axis between different longitude frames (eg. [-180, 180] and [0, 360]) (#217, #218). + v0.8.0 (2022-01-13) ------------------- New Features ^^^^^^^^^^^^ * ``clisops.core.average.average_shape`` copies the global and variable attributes from the input data to the results. +* ``clisops.ops.average.average_time`` and ``clisops.core.average.average_time`` added. Allowing averaging over time frequencies of day, month and year. +* New function ``create_time_bounds`` in ``clisops.utils.time_utils``, to generate time bounds for temporally averaged datasets. Bug fixes ^^^^^^^^^ @@ -21,7 +72,6 @@ Other Changes * Minimum pygeos version set to 0.9. * Replace ``cascaded_union`` by ``unary_union`` to anticipate a `shapely` deprecation. - v0.7.0 (2021-10-26) ------------------- @@ -43,7 +93,6 @@ Other Changes ^^^^^^^^^^^^^ * Python 3.6 no longer tested in GitHub actions. - v0.6.5 (2021-06-10) ------------------- @@ -87,7 +136,6 @@ Other Changes ^^^^^^^^^^^^^ * Error message improved to include longitude bounds of the dataset when the bounds requested in ``ops.subset.subset`` are not within range and rolling could not be completed. - v0.6.2 (2021-03-22) ------------------- @@ -103,17 +151,18 @@ New Features v0.6.1 (2021-02-23) ------------------- + Bug Fixes ^^^^^^^^^ * Add ``cf-xarray`` as dependency. This is a dependency of ``roocs-utils``>=0.2.1 so is not a breaking change. * Remove ``python-dateutil``, ``fiona`` and ``geojson`` as dependencies, no longer needed. - v0.6.0 (2021-02-22) ------------------- + Breaking Changes ^^^^^^^^^^^^^^^^ -* New dev dependency: ``GitPython``==3.1.12 +* New dev dependency: ``GitPython``\ ==3.1.12 * ``roocs-utils``>=0.2.1 required. New Features @@ -138,7 +187,6 @@ Other Changes * Added functionality to ``core.subset.create_mask`` so it can accept ``GeoDataFrames`` with non-integer indexes. * ``clisops.utils.file_namers`` adjusted to allow values to be overwritten and extras to be added to the end before the file extension. - v0.5.1 (2021-01-11) ------------------- @@ -154,7 +202,7 @@ Other Changes v0.5.0 (2020-12-17) ------------------- +------------------- Breaking Changes ^^^^^^^^^^^^^^^^ @@ -171,7 +219,7 @@ Other Changes v0.4.0 (2020-11-10) ------------------ +------------------- Adding new features, updating doc strings and documentation and inclusion of static type support. @@ -207,7 +255,6 @@ Bug Fixes * Nudging time values to nearest available in dataset to fix a bug where subsetting failed when the exact date did not exist in the dataset. - Other Changes ^^^^^^^^^^^^^ @@ -218,7 +265,6 @@ Other Changes * md files changed to rst. * tests now use ``mini-esgf-data`` by default. - v0.3.1 (2020-08-04) ------------------- @@ -226,9 +272,8 @@ Other Changes ^^^^^^^^^^^^^ * Add missing ``rtree`` dependency to ensure correct spatial indexing. - v0.3.0 (2020-07-23) ------------------- +------------------- Other Changes ^^^^^^^^^^^^^ @@ -244,7 +289,7 @@ Other Changes v0.2.0 (2020-06-19) ------------------- +------------------- New Features ^^^^^^^^^^^^^ @@ -260,6 +305,6 @@ Other Changes v0.1.0 (2020-04-22) ------------------- +------------------- * First release. diff --git a/README.rst b/README.rst index 3c102c90..5112eebc 100644 --- a/README.rst +++ b/README.rst @@ -1,8 +1,6 @@ - clisops - climate simulation operations ======================================= - .. image:: https://img.shields.io/pypi/v/clisops.svg :target: https://pypi.python.org/pypi/clisops :alt: Pypi @@ -15,7 +13,6 @@ clisops - climate simulation operations :target: https://clisops.readthedocs.io/en/latest/?badge=latest :alt: Documentation - The ``clisops`` package (pronounced "clie-sops") provides a python library for running *data-reduction* operations on `Xarray `_ data sets or files that can be interpreted by Xarray. These basic operations (subsetting, averaging and @@ -30,7 +27,6 @@ the results. ``clisops`` can be used stand-alone to read individual, or groups of, NetCDF files directly. - * Free software: BSD * Documentation: https://clisops.readthedocs.io. @@ -45,15 +41,13 @@ The package provides the following operations: * regrid Credits -======= +------- This package was created with ``Cookiecutter`` and the ``audreyr/cookiecutter-pypackage`` project template. - * Cookiecutter: https://github.com/audreyr/cookiecutter * cookiecutter-pypackage: https://github.com/audreyr/cookiecutter-pypackage - .. image:: https://img.shields.io/badge/code%20style-black-000000.svg :target: https://github.com/python/black :alt: Python Black diff --git a/clisops/__init__.py b/clisops/__init__.py index 4432bf83..f2d6d2d2 100644 --- a/clisops/__init__.py +++ b/clisops/__init__.py @@ -1,21 +1,35 @@ -# -*- coding: utf-8 -*- """Top-level package for clisops.""" - -import logging.config import os +import warnings +from loguru import logger from roocs_utils.config import get_config -import clisops - from .__version__ import __author__, __email__, __version__ +from .utils.common import enable_logging + + +def showwarning(message, *args, **kwargs): + """Inject warnings from `warnings.warn` into `loguru`.""" + logger.warning(message) + showwarning_(message, *args, **kwargs) + + +showwarning_ = warnings.showwarning +warnings.showwarning = showwarning + +# Disable logging for clisops and remove the logger that is instantiated on import +logger.disable("clisops") +logger.remove() + + +# Workaround for roocs_utils to not re-import clisops +class Package: + __file__ = __file__ # noqa -logging.config.fileConfig( - os.path.join(os.path.dirname(__file__), "etc", "logging.conf"), - disable_existing_loggers=False, -) -CONFIG = get_config(clisops) +package = Package() +CONFIG = get_config(package) # Set the memory limit for each dask chunk chunk_memory_limit = CONFIG["clisops:read"].get("chunk_memory_limit", None) diff --git a/clisops/__version__.py b/clisops/__version__.py index ec28529d..9efa15fb 100644 --- a/clisops/__version__.py +++ b/clisops/__version__.py @@ -1,8 +1,7 @@ -# -*- coding: utf-8 -*- # This information is located in its own file so that it can be loaded # without importing the main package when its dependencies are not installed. # See: https://packaging.python.org/guides/single-sourcing-package-version __author__ = "Elle Smith" __email__ = "eleanor.smith@stfc.ac.uk" -__version__ = "0.8.0" +__version__ = "0.9.1" diff --git a/clisops/core/__init__.py b/clisops/core/__init__.py index 75479579..6800c6cc 100644 --- a/clisops/core/__init__.py +++ b/clisops/core/__init__.py @@ -1,4 +1,3 @@ -from .regrid import Grid, Weights, regrid from .subset import ( create_mask, subset_bbox, @@ -10,3 +9,5 @@ subset_time_by_components, subset_time_by_values, ) + +from .regrid import Grid, Weights, regrid diff --git a/clisops/core/average.py b/clisops/core/average.py index eea5d3a2..ee64e6ee 100644 --- a/clisops/core/average.py +++ b/clisops/core/average.py @@ -3,15 +3,25 @@ from pathlib import Path from typing import Sequence, Tuple, Union +import cf_xarray import geopandas as gpd import numpy as np import xarray as xr from roocs_utils.exceptions import InvalidParameterValue -from roocs_utils.xarray_utils.xarray_utils import get_coord_type, known_coord_types +from roocs_utils.xarray_utils.xarray_utils import ( + get_coord_by_type, + get_coord_type, + known_coord_types, +) + +from clisops.utils.time_utils import create_time_bounds from .subset import shape_bbox_indexer -__all__ = ["average_over_dims", "average_shape"] +__all__ = ["average_over_dims", "average_shape", "average_time"] + +# see https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects +freqs = {"day": "1D", "month": "1MS", "year": "1AS"} def average_shape( @@ -218,3 +228,69 @@ def average_over_dims( if isinstance(ds, xr.Dataset): return xr.merge((ds_averaged_over_dims, untouched_ds)) return ds_averaged_over_dims + + +def average_time( + ds: Union[xr.DataArray, xr.Dataset], + freq: str, +) -> Union[xr.DataArray, xr.Dataset]: + """ + Average a DataArray or Dataset over the time frequency specified. + + Parameters + ---------- + ds : Union[xr.DataArray, xr.Dataset] + Input values. + freq: str + The frequency to average over. One of "month", "year". + + Returns + ------- + Union[xr.DataArray, xr.Dataset] + New Dataset or DataArray object averaged over the indicated time frequency. + + Examples + -------- + >>> from clisops.core.average import average_time # doctest: +SKIP + >>> pr = xr.open_dataset(path_to_pr_file).pr # doctest: +SKIP + ... + # Average data array over each month + >>> prAvg = average_time(pr, freq='month') # doctest: +SKIP + """ + + if not freq: + raise InvalidParameterValue( + "At least one frequency for averaging must be provided" + ) + + if freq not in list(freqs.keys()): + raise InvalidParameterValue( + f"Time frequency for averaging must be one of {list(freqs.keys())}." + ) + + # check time coordinate exists and get name + # For roocs_utils 0.5.0 + t = get_coord_by_type(ds, "time", ignore_aux_coords=False) + if t is None: + raise Exception("Time dimension could not be found") + # For latest roocs_utils (master) + # try: + # t = get_coord_by_type(ds, "time", ignore_aux_coords=False) + # except KeyError: + # raise Exception("Time dimension could not be found") + + # resample and average over time + ds_t_avg = ds.resample(indexer={t.name: freqs[freq]}).mean( + dim=t.name, skipna=True, keep_attrs=True + ) + + # generate time_bounds depending on frequency + time_bounds = create_time_bounds(ds_t_avg, freq) + + # get name of bounds dimension for time + bnds = ds.cf.get_bounds_dim_name("time") + + # add time bounds to dataset + ds_t_avg = ds_t_avg.assign({"time_bnds": ((t.name, bnds), np.asarray(time_bounds))}) + + return ds_t_avg diff --git a/clisops/core/regrid.py b/clisops/core/regrid.py index 56e78d32..e99b7966 100644 --- a/clisops/core/regrid.py +++ b/clisops/core/regrid.py @@ -1,16 +1,15 @@ """Regrid module.""" +import functools import warnings from pathlib import Path from typing import Tuple, Union -import functools -from pkg_resources import parse_version import cf_xarray as cfxr import numpy as np import roocs_grids import scipy import xarray as xr - +from pkg_resources import parse_version XESMF_MINIMUM_VERSION = "0.6.0" @@ -288,21 +287,21 @@ def __str__(self): def __repr__(self): info = ( - "clisops {}\n".format(self.__str__()) + f"clisops {self.__str__()}\n" + ( - "Lat x Lon: {} x {}\n".format(self.nlat, self.nlon) + f"Lat x Lon: {self.nlat} x {self.nlon}\n" if self.type != "irregular" else "" ) - + "Gridcells: {}\n".format(self.ncells) - + "Format: {}\n".format(self.format) - + "Type: {}\n".format(self.type) - + "Extent: {}\n".format(self.extent) - + "Source: {}\n".format(self.source) + + f"Gridcells: {self.ncells}\n" + + f"Format: {self.format}\n" + + f"Type: {self.type}\n" + + f"Extent: {self.extent}\n" + + f"Source: {self.source}\n" + "Bounds? {}\n".format( self.lat_bnds is not None and self.lon_bnds is not None ) - + "Permanent Mask: {}".format(self.mask) + + f"Permanent Mask: {self.mask}" ) return info diff --git a/clisops/core/subset.py b/clisops/core/subset.py index 0a5790f2..225b61f6 100644 --- a/clisops/core/subset.py +++ b/clisops/core/subset.py @@ -10,7 +10,7 @@ import geopandas as gpd import numpy as np import xarray -from pandas.api.types import is_integer_dtype +from pandas.api.types import is_integer_dtype # noqa from pyproj import Geod from pyproj.crs import CRS from pyproj.exceptions import CRSError @@ -259,11 +259,11 @@ def func_checker(*args, **kwargs): f"Input longitude bounds ({kwargs[lon]}) cross the 0 degree meridian but" " dataset longitudes are all positive." ) - if np.all((ds_lon <= 0) | (np.isnan(ds_lon))) and np.any(kwargs[lon] > 0): + if np.all((ds_lon <= 0) | (np.isnan(ds_lon))) and np.any(kwargs[lon] > 180): if isinstance(kwargs[lon], float): kwargs[lon] -= 360 else: - kwargs[lon][kwargs[lon] < 0] -= 360 + kwargs[lon][kwargs[lon] <= 180] -= 360 return func(*args, **kwargs) @@ -648,11 +648,11 @@ def _curvilinear_grid_exterior_polygon(ds, mode="bbox"): from shapely.ops import unary_union def round_up(x, decimal=1): - f = 10 ** decimal + f = 10**decimal return math.ceil(x * f) / f def round_down(x, decimal=1): - f = 10 ** decimal + f = 10**decimal return math.floor(x * f) / f if mode == "bbox": diff --git a/clisops/etc/logging.conf b/clisops/etc/logging.conf deleted file mode 100644 index 3a0d6ceb..00000000 --- a/clisops/etc/logging.conf +++ /dev/null @@ -1,28 +0,0 @@ -[loggers] -keys=root,simpleExample - -[handlers] -keys=consoleHandler - -[formatters] -keys=simpleFormatter - -[logger_root] -level=INFO -handlers=consoleHandler - -[logger_simpleExample] -level=DEBUG -handlers=consoleHandler -qualname=simpleExample -propagate=0 - -[handler_consoleHandler] -class=StreamHandler -level=INFO -formatter=simpleFormatter -args=(sys.stdout,) - -[formatter_simpleFormatter] -format=%(asctime)s - %(name)s - %(levelname)s - %(message)s -datefmt= diff --git a/clisops/ops/average.py b/clisops/ops/average.py index bc644384..26586bc0 100644 --- a/clisops/ops/average.py +++ b/clisops/ops/average.py @@ -2,26 +2,17 @@ from typing import List, Optional, Tuple, Union import xarray as xr +from roocs_utils.exceptions import InvalidParameterValue from roocs_utils.parameter.dimension_parameter import DimensionParameter -from roocs_utils.xarray_utils.xarray_utils import ( - convert_coord_to_axis, - get_coord_type, - known_coord_types, - open_xr_dataset, -) - -from clisops import logging, utils +from roocs_utils.xarray_utils.xarray_utils import convert_coord_to_axis + from clisops.core import average from clisops.ops.base_operation import Operation -from clisops.utils.common import expand_wildcards from clisops.utils.file_namers import get_file_namer -from clisops.utils.output_utils import get_output, get_time_slices -__all__ = [ - "average_over_dims", -] +# from clisops.utils.output_utils import get_output, get_time_slices -LOGGER = logging.getLogger(__file__) +__all__ = ["average_over_dims", "average_time"] class Average(Operation): @@ -97,3 +88,75 @@ def average_over_dims( """ op = Average(**locals()) return op.process() + + +class AverageTime(Operation): + def _resolve_params(self, **params): + freq = params.get("freq", None) + + if not freq: + raise InvalidParameterValue( + "At least one frequency for averaging must be provided" + ) + + if freq not in list(average.freqs.keys()): + raise InvalidParameterValue( + f"Time frequency for averaging must be one of {list(average.freqs.keys())}." + ) + + self.params = {"freq": freq} + + def _get_file_namer(self): + extra = f"_avg-{self.params.get('freq')}" + namer = get_file_namer(self._file_namer)(extra=extra) + + return namer + + def _calculate(self): + avg_ds = average.average_time( + self.ds, + self.params.get("freq", None), + ) + + return avg_ds + + +def average_time( + ds, + freq: str, + output_dir: Optional[Union[str, Path]] = None, + output_type="netcdf", + split_method="time:auto", + file_namer="standard", +) -> List[Union[xr.Dataset, str]]: + """ + + Parameters + ---------- + ds: Union[xr.Dataset, str] + freq: str + The frequency to average over. One of "month", "year". + output_dir: Optional[Union[str, Path]] = None + output_type: {"netcdf", "nc", "zarr", "xarray"} + split_method: {"time:auto"} + file_namer: {"standard", "simple"} + + Returns + ------- + List[Union[xr.Dataset, str]] + A list of the outputs in the format selected; str corresponds to file paths if the + output format selected is a file. + + Examples + -------- + | ds: xarray Dataset or "cmip5.output1.MOHC.HadGEM2-ES.rcp85.mon.atmos.Amon.r1i1p1.latest.tas" + | dims: ['latitude', 'longitude'] + | ignore_undetected_dims: False + | output_dir: "/cache/wps/procs/req0111" + | output_type: "netcdf" + | split_method: "time:auto" + | file_namer: "standard" + + """ + op = AverageTime(**locals()) + return op.process() diff --git a/clisops/ops/base_operation.py b/clisops/ops/base_operation.py index f5bbb0b2..b2230328 100644 --- a/clisops/ops/base_operation.py +++ b/clisops/ops/base_operation.py @@ -1,17 +1,15 @@ from pathlib import Path import xarray as xr +from loguru import logger from roocs_utils.xarray_utils.xarray_utils import get_main_variable, open_xr_dataset -from clisops import logging, utils from clisops.utils.common import expand_wildcards from clisops.utils.file_namers import get_file_namer from clisops.utils.output_utils import get_output, get_time_slices -LOGGER = logging.getLogger(__file__) - -class Operation(object): +class Operation: """ Base class for all Operations. """ @@ -73,13 +71,15 @@ def _remove_redundant_fill_values(self, ds): """ Get coordinate variables and remove fill values added by xarray (CF conventions say that coordinate variables cannot have missing values). Get bounds variables and remove fill values added by xarray. + + See issue: https://github.com/roocs/clisops/issues/224 """ if isinstance(ds, xr.Dataset): main_var = get_main_variable(ds) for coord_id in ds[main_var].coords: # remove fill value from coordinate variables - if ds.coords[coord_id].dims == (coord_id,): - ds[coord_id].encoding["_FillValue"] = None + # if ds.coords[coord_id].dims == (coord_id,): + ds[coord_id].encoding["_FillValue"] = None # remove fill value from bounds variables if they exist try: bnd = ds.cf.get_bounds(coord_id).name @@ -88,6 +88,29 @@ def _remove_redundant_fill_values(self, ds): continue return ds + def _remove_redundant_coordinates_from_bounds(self, ds): + """ + This method removes redundant coordinates from bounds, example: + + double time_bnds(time, bnds) ; + time_bnds:coordinates = "height" ; + + Programs like cdo will complain about this: + + Warning (cdf_set_var): Inconsistent variable definition for time_bnds! + + See issue: https://github.com/roocs/clisops/issues/224 + """ + if isinstance(ds, xr.Dataset): + main_var = get_main_variable(ds) + for coord_id in ds[main_var].coords: + try: + bnd = ds.cf.get_bounds(coord_id).name + ds[bnd].encoding["coordinates"] = None + except KeyError: + continue + return ds + def process(self): """ Main processing method used by all sub-classes. @@ -110,6 +133,8 @@ def process(self): # remove fill values from lat/lon/time if required processed_ds = self._remove_redundant_fill_values(processed_ds) + # remove redundant coordinates from bounds + processed_ds = self._remove_redundant_coordinates_from_bounds(processed_ds) # Work out how many outputs should be created based on the size # of the array. Manage this as a list of time slices. @@ -127,7 +152,7 @@ def process(self): else: result_ds = processed_ds.sel(time=slice(tslice[0], tslice[1])) - LOGGER.info(f"Processing {self.__class__.__name__} for times: {tslice}") + logger.info(f"Processing {self.__class__.__name__} for times: {tslice}") # Get the output (file or xarray Dataset) # When this is a file: xarray will read all the data and write the file diff --git a/clisops/ops/regrid.py b/clisops/ops/regrid.py index ba9386c7..cfd55a74 100644 --- a/clisops/ops/regrid.py +++ b/clisops/ops/regrid.py @@ -2,8 +2,8 @@ from typing import List, Optional, Tuple, Union import xarray as xr +from loguru import logger -from clisops import logging from clisops.core import Grid, Weights from clisops.core import regrid as core_regrid from clisops.ops.base_operation import Operation @@ -14,8 +14,6 @@ "regrid", ] -LOGGER = logging.getLogger(__file__) - supported_regridding_methods = ["conservative", "patch", "nearest_s2d", "bilinear"] @@ -62,7 +60,7 @@ def _resolve_params(self, **params): "Please choose one of %s." % ", ".join(supported_regridding_methods) ) - LOGGER.debug( + logger.debug( f"Input parameters: method: {method}, grid: {grid}, adaptive_masking: {adaptive_masking_threshold}" ) @@ -87,7 +85,7 @@ def _resolve_params(self, **params): # which specifies a default filename (which has but not much to do with the filename we would give the weight file). # Better option might be to have the Weights class extend the Regridder class or to define # a __str__() method for the Weights class. - LOGGER.debug( + logger.debug( "Resolved parameters: grid_in: {}, grid_out: {}, regridder: {}".format( self.params.get("grid_in").__str__(), self.params.get("grid_out").__str__(), diff --git a/clisops/ops/subset.py b/clisops/ops/subset.py index b6504a35..a92fccce 100644 --- a/clisops/ops/subset.py +++ b/clisops/ops/subset.py @@ -2,14 +2,13 @@ from typing import Dict, List, Optional, Tuple, Union import xarray as xr +from loguru import logger from roocs_utils.parameter import parameterise from roocs_utils.parameter.area_parameter import AreaParameter from roocs_utils.parameter.level_parameter import LevelParameter from roocs_utils.parameter.time_components_parameter import TimeComponentsParameter from roocs_utils.parameter.time_parameter import TimeParameter -from roocs_utils.xarray_utils.xarray_utils import open_xr_dataset -from clisops import logging from clisops.core import ( subset_bbox, subset_level, @@ -18,18 +17,14 @@ subset_time_by_components, subset_time_by_values, ) -from clisops.core.subset import assign_bounds, get_lat, get_lon +from clisops.core.subset import assign_bounds, get_lat, get_lon # noqa from clisops.ops.base_operation import Operation from clisops.utils.common import expand_wildcards -from clisops.utils.dataset_utils import check_lon_alignment +from clisops.utils.dataset_utils import cf_convert_between_lon_frames from clisops.utils.file_namers import get_file_namer from clisops.utils.output_utils import get_output, get_time_slices -__all__ = [ - "Subset", -] - -LOGGER = logging.getLogger(__file__) +__all__ = ["Subset", "subset"] class Subset(Operation): @@ -40,7 +35,7 @@ def _resolve_params(self, **params): level = params.get("level", None) time_comps = params.get("time_components", None) - LOGGER.debug( + logger.debug( f"Mapping parameters: time: {time}, area: {area}, " f"level: {level}, time_components: {time_comps}." ) @@ -87,9 +82,12 @@ def _calculate(self): ) # subset with space and optionally time and level - LOGGER.debug(f"subset_bbox with parameters: {self.params}") + logger.debug(f"subset_bbox with parameters: {self.params}") # bounds are always ascending, so if lon is descending rolling will not work. - ds = check_lon_alignment(self.ds, self.params.get("lon_bnds")) + ds, lb, ub = cf_convert_between_lon_frames( + self.ds, self.params.get("lon_bnds") + ) + self.params["lon_bnds"] = (lb, ub) try: kwargs = {} valid_args = [ @@ -109,8 +107,9 @@ def _calculate(self): lon_min, lon_max = lon.values.min(), lon.values.max() raise Exception( f"The requested longitude subset {self.params.get('lon_bnds')} is not within the longitude bounds " - f"of this dataset and the data could not be converted to this longitude frame successfully. " - f"Please re-run your request with longitudes within the bounds of the dataset: ({lon_min:.2f}, {lon_max:.2f})" + "of this dataset and the data could not be converted to this longitude frame successfully. " + "Please re-run your request with longitudes within the bounds of the dataset: " + f"({lon_min:.2f}, {lon_max:.2f})" ) else: kwargs = {} @@ -120,7 +119,7 @@ def _calculate(self): # Subset over time interval if requested if any(kwargs.values()): - LOGGER.debug(f"subset_time with parameters: {kwargs}") + logger.debug(f"subset_time with parameters: {kwargs}") result = subset_time(self.ds, **kwargs) # Subset a series of time values if requested elif self.params.get("time_values"): @@ -146,18 +145,18 @@ def _calculate(self): ) self.params["first_level"], self.params["last_level"] = last, first - LOGGER.debug(f"subset_level with parameters: {kwargs}") + logger.debug(f"subset_level with parameters: {kwargs}") result = subset_level(result, **kwargs) elif self.params.get("level_values", None): kwargs = {"level_values": self.params["level_values"]} - LOGGER.debug(f"subset_level_by_values with parameters: {kwargs}") + logger.debug(f"subset_level_by_values with parameters: {kwargs}") result = subset_level_by_values(result, **kwargs) # Now apply time components if specified time_comps = self.params.get("time_components") if time_comps: - LOGGER.debug(f"subset_by_time_components with parameters: {time_comps}") + logger.debug(f"subset_by_time_components with parameters: {time_comps}") result = subset_time_by_components(result, time_components=time_comps) return result @@ -190,26 +189,14 @@ def subset( split_method="time:auto", file_namer="standard", ) -> List[Union[xr.Dataset, str]]: - """ + """Subset operation. Parameters ---------- ds: Union[xr.Dataset, str] time: Optional[Union[str, Tuple[str, str], TimeParameter]] = None, - area: Optional[ - Union[ - str, - Tuple[ - Union[int, float, str], - Union[int, float, str], - Union[int, float, str], - Union[int, float, str], - ], - AreaParameter - ] - ] = None, - level: Optional[Union[str, Tuple[Union[int, float, str], Union[int, float, str]], - LevelParameter] = None, + area: str or AreaParameter or Tuple[Union[int, float, str], Union[int, float, str], Union[int, float, str], Union[int, float, str]], optional + level: Optional[Union[str, Tuple[Union[int, float, str], Union[int, float, str]], LevelParameter] = None, time_components: Optional[Union[str, Dict, TimeComponentsParameter]] = None, output_dir: Optional[Union[str, Path]] = None output_type: {"netcdf", "nc", "zarr", "xarray"} @@ -237,9 +224,9 @@ def subset( Note ---- - If you request a selection range (such as level, latitude or longitude) that specifies the lower - and upper bounds in the opposite direction to the actual coordinate values then clisops.ops.subset - will detect this issue and reverse your selection before returning the data subset. + If you request a selection range (such as level, latitude or longitude) that specifies the lower + and upper bounds in the opposite direction to the actual coordinate values then clisops.ops.subset + will detect this issue and reverse your selection before returning the data subset. """ op = Subset(**locals()) return op.process() diff --git a/clisops/utils/common.py b/clisops/utils/common.py index 62efc6fb..a8c1c8a3 100644 --- a/clisops/utils/common.py +++ b/clisops/utils/common.py @@ -1,7 +1,8 @@ +import sys from pathlib import Path -from typing import Union +from typing import List, Union -from roocs_utils.parameter import parameterise +from loguru import logger def expand_wildcards(paths: Union[str, Path]) -> list: @@ -9,3 +10,36 @@ def expand_wildcards(paths: Union[str, Path]) -> list: path = Path(paths).expanduser() parts = path.parts[1:] if path.is_absolute() else path.parts return [f for f in Path(path.root).glob(str(Path("").joinpath(*parts)))] + + +def _logging_examples() -> None: + """Testing module""" + logger.trace("0") + logger.debug("1") + logger.info("2") + logger.success("2.5") + logger.warning("3") + logger.error("4") + logger.critical("5") + + +def enable_logging() -> List[int]: + logger.enable("clisops") + + config = dict( + handlers=[ + dict( + sink=sys.stdout, + format="{time:YYYY-MM-DD HH:mm:ss.SSS Z UTC}" + " | {level} | {name}:{function}:{line}" + " | {message}", + level="INFO", + ), + dict( + sink=sys.stderr, + format="{time:YYYY-MM-DD HH:mm:ss.SSS Z UTC} | {level} | {name}:{function}:{line} | {message}", + level="WARNING", + ), + ] + ) + return logger.configure(**config) diff --git a/clisops/utils/dataset_utils.py b/clisops/utils/dataset_utils.py index 73ce0075..baf27659 100644 --- a/clisops/utils/dataset_utils.py +++ b/clisops/utils/dataset_utils.py @@ -1,22 +1,25 @@ import math +import warnings +from typing import Optional +import cf_xarray as cfxr import cftime import numpy as np +import xarray as xr +from roocs_utils.exceptions import InvalidParameterValue from roocs_utils.utils.time_utils import str_to_AnyCalendarDateTime from roocs_utils.xarray_utils.xarray_utils import get_coord_by_type -from clisops import logging - -LOGGER = logging.getLogger(__file__) - def calculate_offset(lon, first_element_value): - """ - Calculate the number of elements to roll the dataset by in order to have - longitude from within requested bounds. + """Calculate the number of elements to roll the dataset by in order to have longitude from within requested bounds. - :param lon: longitude coordinate of xarray dataset. - :param first_element_value: the value of the first element of the longitude array to roll to. + Parameters + ---------- + lon + Longitude coordinate of xarray dataset. + first_element_value + The value of the first element of the longitude array to roll to. """ # get resolution of data res = lon.values[1] - lon.values[0] @@ -33,6 +36,228 @@ def calculate_offset(lon, first_element_value): return offset +def _crosses_0_meridian(lon_c: xr.DataArray): + """Determine whether grid extents over the 0-meridian. + + Assumes approximate constant width of grid cells. + + Parameters + ---------- + lon_c: xr.DataArray + Longitude coordinate variable in the longitude frame [-180, 180]. + + Returns + ------- + bool + True for a dataset crossing the 0-meridian, False else. + """ + if not isinstance(lon_c, xr.DataArray): + raise InvalidParameterValue("Input needs to be of type xarray.DataArray.") + + # Not crossing the 0-meridian if all values are positive or negative + lon_n = lon_c.where(lon_c <= 0, 0) + lon_p = lon_c.where(lon_c >= 0, 0) + if lon_n.all() or lon_p.all(): + return False + + # Determine min/max lon values + xc_min = float(lon_c.min()) + xc_max = float(lon_c.max()) + + # Determine resolution in zonal direction + if lon_c.ndim == 1: + xc_inc = (xc_max - xc_min) / (lon_c.sizes[lon_c.dims[0]] - 1) + else: + xc_inc = (xc_max - xc_min) / (lon_c.sizes[lon_c.dims[1]] - 1) + + # Generate a histogram with bins for sections along a latitudinal circle, + # width of the bins/sections dependent on the resolution in x-direction + atol = 2.0 * xc_inc + extent_hist = np.histogram( + lon_c, + bins=np.arange(xc_min - xc_inc, xc_max + atol, atol), + ) + + # If the counts for all bins are greater than zero, the grid is considered crossing the 0-meridian + if np.all(extent_hist[0]): + return True + else: + return False + + +def _convert_interval_between_lon_frames(low, high): + """Convert a longitude interval to another longitude frame, returns Tuple of two floats.""" + diff = high - low + if low < 0 and high > 0: + raise ValueError( + "Cannot convert longitude interval if it includes the 0°- or 180°-meridian." + ) + elif low < 0: + return tuple(sorted((low + 360.0, low + 360.0 + diff))) + elif low < 180 and high > 180: + raise ValueError( + "Cannot convert longitude interval if it includes the 0°- or 180°-meridian." + ) + elif high > 180: + return tuple(sorted((high - 360.0 - diff, high - 360.0))) + else: + return float(low), float(high) + + +def cf_convert_between_lon_frames(ds_in, lon_interval): + """Convert ds or lon_interval (whichever deems appropriate) to the other longitude frame, if the longitude frames do not match. + + If ds and lon_interval are defined on different longitude frames ([-180, 180] and [0, 360]), + this function will convert one of the input parameters to the other longitude frame, preferably + the lon_interval. + Adjusts shifted longitude frames [0-x, 360-x] in the dataset to one of the two standard longitude + frames, dependent on the specified lon_interval. + In case of curvilinear grids featuring an additional 1D x-coordinate of the projection, + this projection x-coordinate will not get converted. + + Parameters + ---------- + ds_in: xarray.Dataset or xarray.DataArray + xarray data object with defined longitude dimension. + lon_interval: tuple or list + length-2-tuple or -list of floats or integers denoting the bounds of the longitude interval. + + Returns + ------- + Tuple(ds, lon_low, lon_high) + The xarray.Dataset and the bounds of the longitude interval, potentially adjusted in terms + of their defined longitude frame. + """ + # Collect input specs + if isinstance(ds_in, (xr.Dataset, xr.DataArray)): + lon = detect_coordinate(ds_in, "longitude") + lat = detect_coordinate(ds_in, "latitude") + lon_bnds = detect_bounds(ds_in, lon) + # lat_bnds = detect_bounds(ds_in, lat) + # Do not consider bounds in gridtype detection (yet fails due to open_mfdataset bug that adds + # time dimension to bounds - todo) + gridtype = detect_gridtype( + ds_in, lon=lon, lat=lat + ) # lat_bnds=lat_bnds, lon_bnds = lon_bnds) + ds = ds_in.copy() + else: + raise InvalidParameterValue( + "This function requires an xarray.DataArray or xarray.Dataset as input." + ) + low, high = lon_interval + lon_min, lon_max = ds.coords[lon].values.min(), ds.coords[lon].values.max() + atol = 0.5 + + # Check longitude + # For longitude frames other than [-180, 180] and [0, 360] in the dataset the following assumptions + # are being made: + # - fixpoint is the 0-meridian + # - the lon_interval is either defined in the longitude frame [-180, 180] or [0, 360] + if lon_max - lon_min > 360 + atol or lon_min < -360 - atol or lon_max > 360 + atol: + raise ValueError( + "The longitude coordinate values have to lie within the interval " + "[-360, 360] degrees and not exceed an extent of 360 degrees." + ) + + # Conversion between longitude frames if required + if (lon_min <= low or np.isclose(low, lon_min, atol=atol)) and ( + lon_max >= high or np.isclose(high, lon_max, atol=atol) + ): + return ds, low, high + + # Conversion: longitude is a singleton dimension + elif (ds[lon].ndim == 1 and ds.sizes[ds[lon].dims[0]] == 1) or ( + ds[lon].ndim > 1 and ds.sizes[ds[lon].dims[1]] == 1 + ): + if low < 0 and lon_min > 0: + ds[lon] = ds[lon].where(ds[lon] <= 180, ds[lon] - 360.0) + if lon_bnds: + ds[lon_bnds] = ds[lon_bnds].where( + ds[lon_bnds] > 180, ds[lon_bnds] - 360.0 + ) + elif low > 0 and lon_min < 0: + ds[lon] = ds[lon].where(ds[lon] >= 0, ds[lon] + 360.0) + if lon_bnds: + ds[lon_bnds] = ds[lon_bnds].where( + ds[lon_bnds] >= 0, ds[lon_bnds] + 360.0 + ) + return ds, low, high + + # Conversion: 1D or 2D longitude coordinate variable + else: + # regional [0 ... 180] + if lon_min >= 0 and lon_max <= 180: + return ds, low, high + + # shifted frame beyond -180, eg. [-300, 60] + elif lon_min < -180 - atol: + if low < 0: + ds[lon] = ds[lon].where(ds[lon] > -180, ds[lon] + 360.0) + if lon_bnds: + ds[lon_bnds] = ds[lon_bnds].where( + ds[lon_bnds] > -180, ds[lon_bnds] + 360.0 + ) + elif low >= 0: + ds[lon] = ds[lon].where(ds[lon] >= 0, ds[lon] + 360.0) + if lon_bnds: + ds[lon_bnds] = ds[lon_bnds].where( + ds[lon_bnds] >= 0, ds[lon_bnds] + 360.0 + ) + + # shifted frame beyond 0, eg. [-60, 300] + elif lon_min < -atol and lon_max > 180 + atol: + if low < 0: + ds[lon] = ds[lon].where(ds[lon] <= 180, ds[lon] - 360.0) + if lon_bnds: + ds[lon_bnds] = ds[lon_bnds].where( + ds[lon_bnds] <= 180, ds[lon_bnds] - 360.0 + ) + elif low >= 0: + ds[lon] = ds[lon].where(ds[lon] >= 0, ds[lon] + 360.0) + if lon_bnds: + ds[lon_bnds] = ds[lon_bnds].where( + ds[lon_bnds] >= 0, ds[lon_bnds] + 360.0 + ) + + # [-180 ... 180] + elif lon_min < 0: + # interval includes 180°-meridian: convert dataset to [0, 360] + if low < 180 and high > 180: + ds[lon] = ds[lon].where(ds[lon] >= 0, ds[lon] + 360.0) + if lon_bnds: + ds[lon_bnds] = ds[lon_bnds].where( + ds[lon_bnds] >= 0, ds[lon_bnds] + 360.0 + ) + # interval does not include 180°-meridian: convert interval to [-180,180] + else: + if low >= 0: + low, high = _convert_interval_between_lon_frames(low, high) + return ds, low, high + + # [0 ... 360] + else: + # interval positive, return unchanged + if low >= 0: + return ds, low, high + # interval includes 0°-meridian: convert dataset to [-180, 180] + elif high > 0: + ds[lon] = ds[lon].where(ds[lon] <= 180, ds[lon] - 360.0) + if lon_bnds: + ds[lon_bnds] = ds[lon_bnds].where( + ds[lon_bnds] <= 180, ds[lon_bnds] - 360.0 + ) + # interval negative + else: + low, high = _convert_interval_between_lon_frames(low, high) + return ds, low, high + + # 1D coordinate variable: Sort, since order might no longer be ascending / descending + if gridtype == "regular_lat_lon": + ds = ds.sortby(lon) + + return ds, low, high + + def check_lon_alignment(ds, lon_bnds): """ Check whether the longitude subset requested is within the bounds of the dataset. If not try to roll the dataset so @@ -89,20 +314,27 @@ def check_lon_alignment(ds, lon_bnds): def adjust_date_to_calendar(da, date, direction="backwards"): - """ - Check that the date specified exists in the calendar type of the dataset. If not, - change the date a day at a time (up to a maximum of 5 times) to find a date that does exist. + """Check that the date specified exists in the calendar type of the dataset. + If not present, changes the date a day at a time (up to a maximum of 5 times) to find a date that does exist. The direction to change the date by is indicated by 'direction'. - :param da: xarray.Dataset or xarray.DataArray - :param date: The date to check, as a string. - :param direction: The direction to move in days to find a date that does exist. - 'backwards' means the search will go backwards in time until an existing date is found. - 'forwards' means the search will go forwards in time. - The default is 'backwards'. + Parameters + ---------- + da: xarray.Dataset or xarray.DataArray + The data to examine. + date: str + The date to check. + direction: str + The direction to move in days to find a date that does exist. + 'backwards' means the search will go backwards in time until an existing date is found. + 'forwards' means the search will go forwards in time. + The default is 'backwards'. - :return: (str) The next possible existing date in the calendar of the dataset. + Returns + ------- + str + The next possible existing date in the calendar of the dataset. """ # turn date into AnyCalendarDateTime object d = str_to_AnyCalendarDateTime(date) @@ -135,3 +367,141 @@ def adjust_date_to_calendar(da, date, direction="backwards"): raise ValueError( f"Could not find an existing date near {date} in the calendar: {cal}" ) + + +def detect_coordinate(ds, coord_type): + """Use cf_xarray to obtain the variable name of the requested coordinate. + + Parameters + ---------- + ds: xarray.Dataset, xarray.DataArray + Dataset the coordinate variable name shall be obtained from. + coord_type: str + Coordinate type understood by cf-xarray, eg. 'lat', 'lon', ... + + Raises + ------ + AttributeError + Raised if the requested coordinate cannot be identified. + + Returns + ------- + str + Coordinate variable name. + """ + error_msg = f"A {coord_type} coordinate cannot be identified in the dataset." + + # Make use of cf-xarray accessor + try: + coord = ds.cf[coord_type] + # coord = get_coord_by_type(ds, coord_type, ignore_aux_coords=False) + except KeyError: + raise KeyError(error_msg) + + # Return the name of the coordinate variable + try: + return coord.name + except AttributeError: + raise AttributeError(error_msg) + + +def detect_bounds(ds, coordinate) -> Optional[str]: + """Use cf_xarray to obtain the variable name of the requested coordinates bounds. + + Parameters + ---------- + ds: xarray.Dataset, xarray.DataArray + Dataset the coordinate bounds variable name shall be obtained from. + coordinate : str + Name of the coordinate variable to determine the bounds from. + + Returns + ------- + str or None + Returns the variable name of the requested coordinate bounds. + Returns None if the variable has no bounds or if they cannot be identified. + """ + try: + return ds.cf.bounds[coordinate][0] + except (KeyError, AttributeError): + warnings.warn( + "For coordinate variable '%s' no bounds can be identified." % coordinate + ) + return + + +def detect_gridtype(ds, lon, lat, lon_bnds=None, lat_bnds=None): + """Detect type of the grid as one of "regular_lat_lon", "curvilinear", "unstructured". + + Assumes the grid description / structure follows the CF conventions. + """ + # 1D coordinate variables + if ds[lat].ndim == 1 and ds[lon].ndim == 1: + lat_1D = ds[lat].dims[0] + lon_1D = ds[lon].dims[0] + if not lat_bnds or not lon_bnds: + if lat_1D == lon_1D: + return "unstructured" + else: + return "regular_lat_lon" + else: + # unstructured: bounds [ncells, nvertices] + if ( + lat_1D == lon_1D + and all([ds[bnds].ndim == 2 for bnds in [lon_bnds, lat_bnds]]) + and all( + [ + ds.dims[dim] > 2 + for dim in [ + ds[lon_bnds].dims[-1], + ds[lat_bnds].dims[-1], + ] + ] + ) + ): + return "unstructured" + # rectilinear: bounds [nlat/nlon, 2] + elif ( + all([ds[bnds].ndim == 2 for bnds in [lon_bnds, lat_bnds]]) + and ds.dims[ds.cf.get_bounds_dim_name(lon)] == 2 + ): + return "regular_lat_lon" + else: + raise Exception("The grid type is not supported.") + + # 2D coordinate variables + elif ds[lat].ndim == 2 and ds[lon].ndim == 2: + # Test for curvilinear or restructure lat/lon coordinate variables + # todo: Check if regular_lat_lon despite 2D + # - requires additional function checking + # lat[:,i]==lat[:,j] for all i,j + # lon[i,:]==lon[j,:] for all i,j + # - and if that is the case to extract lat/lon and *_bnds + # lat[:]=lat[:,j], lon[:]=lon[j,:] + # lat_bnds[:, 2]=[min(lat_bnds[:,j, :]), max(lat_bnds[:,j, :])] + # lon_bnds similar + if not ds[lat].shape == ds[lon].shape: + raise InvalidParameterValue( + "The horizontal coordinate variables have differing shapes." + ) + else: + if not lat_bnds or not lon_bnds: + return "curvilinear" + else: + # Shape of curvilinear bounds either [nlat, nlon, 4] or [nlat+1, nlon+1] + if list(ds[lat].shape) + [4] == list(ds[lat_bnds].shape) and list( + ds[lon].shape + ) + [4] == list(ds[lon_bnds].shape): + return "curvilinear" + elif [si + 1 for si in ds[lat].shape] == list(ds[lat_bnds].shape) and [ + si + 1 for si in ds[lon].shape + ] == list(ds[lon_bnds].shape): + return "curvilinear" + else: + raise Exception("The grid type is not supported.") + + # >2D coordinate variables, or coordinate variables of different dimensionality + else: + raise InvalidParameterValue( + "The horizontal coordinate variables have more than 2 dimensions." + ) diff --git a/clisops/utils/file_namers.py b/clisops/utils/file_namers.py index b2bf8c0d..db0e4ae2 100644 --- a/clisops/utils/file_namers.py +++ b/clisops/utils/file_namers.py @@ -12,7 +12,7 @@ def get_file_namer(name): return namers.get(name, StandardFileNamer) -class _BaseFileNamer(object): +class _BaseFileNamer: """File namer base class""" def __init__(self, replace=None, extra=""): diff --git a/clisops/utils/output_utils.py b/clisops/utils/output_utils.py index 1e5c78b1..bf73d833 100644 --- a/clisops/utils/output_utils.py +++ b/clisops/utils/output_utils.py @@ -10,12 +10,11 @@ import dask import pandas as pd import xarray as xr +from loguru import logger from roocs_utils.utils.common import parse_size from roocs_utils.xarray_utils import xarray_utils as xu -from clisops import CONFIG, chunk_memory_limit, logging - -LOGGER = logging.getLogger(__file__) +from clisops import CONFIG, chunk_memory_limit SUPPORTED_FORMATS = { "netcdf": {"method": "to_netcdf", "extension": "nc"}, @@ -200,11 +199,11 @@ def get_output(ds, output_type, output_dir, namer): the output format and chunking """ format_writer = get_format_writer(output_type) - LOGGER.info(f"format_writer={format_writer}, output_type={output_type}") + logger.info(f"format_writer={format_writer}, output_type={output_type}") # If there is no writer for this output type, just return the `ds` object if not format_writer: - LOGGER.info(f"Returning output as {type(ds)}") + logger.info(f"Returning output as {type(ds)}") return ds # Use the file namer to get the required file name @@ -235,7 +234,7 @@ def get_output(ds, output_type, output_dir, namer): tmp_dir = tempfile.TemporaryDirectory(dir=staging_dir) fname = os.path.basename(output_path) target_path = os.path.join(tmp_dir.name, fname) - LOGGER.info(f"Writing to temporary path: {target_path}") + logger.info(f"Writing to temporary path: {target_path}") else: target_path = output_path @@ -257,5 +256,5 @@ def get_output(ds, output_type, output_dir, namer): time.sleep(3) tmp_dir.cleanup() - LOGGER.info(f"Wrote output file: {output_path}") + logger.info(f"Wrote output file: {output_path}") return output_path diff --git a/clisops/utils/time_utils.py b/clisops/utils/time_utils.py new file mode 100644 index 00000000..ae037967 --- /dev/null +++ b/clisops/utils/time_utils.py @@ -0,0 +1,39 @@ +def create_time_bounds(ds, freq): + """ + Generate time bounds for datasets that have been temporally averaged. + Averaging frequencies supported are yearly, monthly and daily. + """ + # get datetime class + dt_cls = ds.time.values[0].__class__ + + if freq == "month": + time_bounds = [ + [ + dt_cls(tm.year, tm.month, tm.day), + dt_cls(tm.year, tm.month, tm.daysinmonth), + ] + for tm in ds.time.values + ] + + elif freq == "year": + # get number of days in december for calendar + dec_days = dt_cls(2000, 12, 1).daysinmonth + # generate time bounds + time_bounds = [ + [dt_cls(tm.year, 1, 1), dt_cls(tm.year, 12, dec_days)] + for tm in ds.time.values + ] + + elif freq == "day": + time_bounds = [ + [ + dt_cls(tm.year, tm.month, tm.day, 0, 0, 0), + dt_cls(tm.year, tm.month, tm.day, 23, 59, 59), + ] + for tm in ds.time.values + ] + + else: + raise Exception("Time frequency not supported for creation of time bounds.") + + return time_bounds diff --git a/clisops/utils/tutorial.py b/clisops/utils/tutorial.py index d37d3eb0..7e78280b 100644 --- a/clisops/utils/tutorial.py +++ b/clisops/utils/tutorial.py @@ -1,7 +1,6 @@ """Testing and tutorial utilities module.""" # Most of this code copied and adapted from xarray, xclim, and raven import hashlib -import logging import re from pathlib import Path from typing import List, Optional, Sequence, Union @@ -10,12 +9,12 @@ from urllib.request import urlretrieve import requests +from loguru import logger from xarray import Dataset from xarray import open_dataset as _open_dataset _default_cache_dir = Path.home() / ".clisops_testing_data" -LOGGER = logging.getLogger(__file__) __all__ = ["get_file", "open_dataset", "query_folder"] @@ -36,7 +35,7 @@ def _get( ) -> Path: cache_dir = cache_dir.absolute() local_file = cache_dir / branch / fullname - md5name = fullname.with_suffix("{}.md5".format(suffix)) + md5name = fullname.with_suffix(f"{suffix}.md5") md5file = cache_dir / branch / md5name if not local_file.is_file(): @@ -45,12 +44,12 @@ def _get( local_file.parent.mkdir(parents=True, exist_ok=True) url = "/".join((github_url, "raw", branch, fullname.as_posix())) - LOGGER.info("Fetching remote file: %s" % fullname.as_posix()) + logger.info("Fetching remote file: %s" % fullname.as_posix()) urlretrieve(url, local_file) try: url = "/".join((github_url, "raw", branch, md5name.as_posix())) - LOGGER.info("Fetching remote file md5: %s" % md5name.as_posix()) + logger.info("Fetching remote file md5: %s" % md5name.as_posix()) urlretrieve(url, md5file) except HTTPError as e: msg = f"{md5name.as_posix()} not found. Aborting file retrieval." @@ -69,7 +68,7 @@ def _get( """ raise OSError(msg) except OSError as e: - LOGGER.error(e) + logger.error(e) return local_file @@ -220,7 +219,7 @@ def open_dataset( return ds except OSError: msg = "OPeNDAP file not read. Verify that service is available." - LOGGER.error(msg) + logger.error(msg) raise local_file = _get( diff --git a/docs/api.rst b/docs/api.rst index b84003b4..435cc11b 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -11,7 +11,7 @@ Core subset functionality :show-inheritance: Core average functionality -========================= +========================== .. automodule:: clisops.core.average :members: @@ -29,8 +29,8 @@ Subset operation :show-inheritance: -Average operation -================= +Average operations +================== .. automodule:: clisops.ops.average :noindex: @@ -70,10 +70,20 @@ File Namers Dataset Utilities -================ +================= .. automodule:: clisops.utils.dataset_utils :noindex: :members: :undoc-members: :show-inheritance: + + +Time Utilities +============== + +.. automodule:: clisops.utils.time_utils + :noindex: + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py index 72d8953a..1845c872 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- # # clisops documentation build configuration file, created by # sphinx-quickstart on Fri Jun 9 13:47:02 2017. @@ -23,9 +22,12 @@ import os import sys -sys.path.insert(0, os.path.abspath("..")) +import xarray + +xarray.DataArray.__module__ = "xarray" +xarray.Dataset.__module__ = "xarray" -import sphinx_rtd_theme +sys.path.insert(0, os.path.abspath("..")) # -- General configuration --------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. @@ -45,11 +47,15 @@ "IPython.sphinxext.ipython_console_highlighting", ] +autosectionlabel_prefix_document = True +autosectionlabel_maxdepth = 2 + napoleon_numpy_docstring = True napoleon_use_rtype = False napoleon_use_param = False napoleon_use_ivar = True +nbsphinx_allow_errors = True nbsphinx_execute = "auto" nbsphinx_timeout = 300 @@ -60,7 +66,8 @@ # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = [".rst", ".ipynb"] +source_suffix = [".rst"] +# note: do not add .ipynb when nbspinx is enabled, otherwise you get the "missing title" error # The master toctree document. master_doc = "index" @@ -75,7 +82,7 @@ # the built documents. # # The short X.Y version. -version = "0.8.0" +version = "0.9.1" # The full version, including alpha/beta/rc tags. release = version @@ -84,7 +91,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/docs/notebooks b/docs/notebooks index f411073e..8f9a5b2e 120000 --- a/docs/notebooks +++ b/docs/notebooks @@ -1 +1 @@ -../notebooks +../notebooks \ No newline at end of file diff --git a/environment.yml b/environment.yml index 327b1b97..59d6fddd 100644 --- a/environment.yml +++ b/environment.yml @@ -3,23 +3,23 @@ channels: - conda-forge - defaults dependencies: - - python=3.8 + - python>=3.8 - pip - - numpy>=1.16 - - xarray>=0.15 - - pandas>=1.0.3 + - bottleneck>=1.3.1,<1.4 + - cf_xarray>=0.7.0 - cftime>=1.4.1 + - dask>=2.6.0 + - geopandas>=0.7 + - loguru>=0.5.3 - netCDF4>=1.4 + - numpy>=1.16,<1.22 + - pandas>=1.0.3,<1.4 - poppler>=0.67 - - shapely>=1.6 - - geopandas>=0.7 - - xesmf>=0.6.2 - - dask>=2.6.0 - - bottleneck>=1.3.1,<1.4 + - pygeos>=0.9 - pyproj>=2.5 + - pooch - requests>=2.0 - - roocs-utils>=0.5.0 - - cf_xarray>=0.5.1 - - pygeos>=0.9 -# - pip: -# - roocs-utils @ git+https://github.com/roocs/roocs-utils.git@master#egg=roocs-utils + - roocs-utils>=0.6.2,<0.7 + - shapely>=1.6 + - xarray>=0.15 + - xesmf>=0.6.2 diff --git a/notebooks/average_over_dims.ipynb b/notebooks/average_over_dims.ipynb index 37221623..3e052143 100644 --- a/notebooks/average_over_dims.ipynb +++ b/notebooks/average_over_dims.ipynb @@ -1,5 +1,16 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Averaging over dimensions of the dataset\n", + "\n", + "The average over dimensions operation makes use of `clisops.core.average` to process the datasets and to set the output type and the output file names.\n", + "\n", + "It is possible to average over none or any number of time, longitude, latitude or level dimensions in the dataset." + ] + }, { "cell_type": "code", "execution_count": null, @@ -26,18 +37,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Averaging over dimensions of the dataset\n", - "\n", - "The average over dimensions operation makes use of `clisops.core.average` to process the datasets and to set the output type and the output file names.\n", - "\n", - "It is possible to average over none or any number of time, longitude, latitude or level dimensions in the dataset." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Parameters\n", + "## Parameters\n", "\n", "Parameters taken by the `average_over_dims` are below:\n", "\n", @@ -83,7 +83,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Average over one dimension" + "## Average over one dimension" ] }, { @@ -108,7 +108,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Average over two dimensions\n", + "## Average over two dimensions\n", "\n", "Averaging over two dimensions is just as simple as averaging over one. The dimensions to be averaged over should be passed in as a sequence." ] @@ -135,7 +135,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Allowed dimensions\n", + "## Allowed dimensions\n", "\n", "It is only possible to average over longtiude, latitude, level and time. If a different dimension is provided to average over an error will be raised." ] @@ -161,7 +161,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Dimensions not found" + "## Dimensions not found" ] }, { @@ -223,7 +223,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# No dimensions supplied" + "## No dimensions supplied" ] }, { @@ -253,7 +253,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# An example of averaging over level" + "## An example of averaging over level" ] }, { @@ -287,7 +287,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -301,7 +301,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.6" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/notebooks/core_subset.ipynb b/notebooks/core_subset.ipynb index 1aa4e65d..bfa79d53 100644 --- a/notebooks/core_subset.ipynb +++ b/notebooks/core_subset.ipynb @@ -1682,7 +1682,7 @@ "metadata": { "keep_output": true, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1696,7 +1696,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.6" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/notebooks/subset.ipynb b/notebooks/subset.ipynb index 56db9c40..86523c75 100644 --- a/notebooks/subset.ipynb +++ b/notebooks/subset.ipynb @@ -1,5 +1,14 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Subsetting\n", + "\n", + "The subset operation makes use of `clisops.core.subset` to process the datasets and to set the output type and the output file names." + ] + }, { "cell_type": "code", "execution_count": null, @@ -32,15 +41,6 @@ " os.remove(\"./output_001.nc\")" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Subsetting\n", - "\n", - "The subset operation makes use of `clisops.core.subset` to process the datasets and to set the output type and the output file names." - ] - }, { "cell_type": "code", "execution_count": null, @@ -57,8 +57,7 @@ "source": [ "The `subset` process takes several parameters:\n", "\n", - "Parameters\n", - "----------\n", + "## Subsetting Parameters\n", "\n", " ds: Union[xr.Dataset, str, Path]\n", " time: Optional[Union[str, TimeParameter]]\n", @@ -330,6 +329,7 @@ } ], "metadata": { + "celltoolbar": "Edit Metadata", "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", @@ -345,7 +345,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/requirements.txt b/requirements.txt index 60f25198..f853ca10 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,19 +1,21 @@ -numpy>=1.16 +numpy>=1.16,<1.22 xarray>=0.15 -pandas>=1.0.3 +pandas>=1.0.3,<1.4 cftime>=1.4.1 netCDF4>=1.4 shapely>=1.6 geopandas>=0.7 dask[complete]>=2.6 pyproj>=2.5 -requests>=2.0 -roocs-utils>=0.3.0 -cf-xarray>=0.6.3 scipy>=1.5.4 roocs-grids @ git+https://github.com/roocs/roocs-grids.git@main#egg=roocs-grids # Optional (if using regridding) -# pangeo-xesmf>=0.6.0 +# pangeo-xesmf>=0.6.3 +pooch +cf-xarray>=0.7.0 +#cf-xarray @ git+https://github.com/xarray-contrib/cf-xarray/@main#egg=cf-xarray bottleneck>=1.3.1 -roocs-utils>=0.5.0 +requests>=2.0 +roocs-utils>=0.6.2,<0.7 # roocs-utils @ git+https://github.com/roocs/roocs-utils.git@master#egg=roocs-utils +loguru>=0.5.3 diff --git a/requirements_dev.txt b/requirements_dev.txt index a1567ef7..cb837475 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -9,9 +9,10 @@ Sphinx sphinx-rtd-theme twine pytest +pytest-loguru pytest-runner pre-commit>=2.9.0 -black>=20.8b1 +black>=21.12b0 nbsphinx nbconvert ipython diff --git a/setup.cfg b/setup.cfg index c8b92f72..41d0ffde 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.8.0 +current_version = 0.9.1 commit = True tag = True @@ -43,7 +43,7 @@ test = pytest [tool:pytest] collect_ignore = ["setup.py"] -addopts = --verbose tests +addopts = --verbose filterwarnings = ignore::UserWarning markers = diff --git a/setup.py b/setup.py index 8522b06e..e472430e 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*- """The setup script.""" import os @@ -13,7 +12,7 @@ _long_description = open(os.path.join(here, "README.rst")).read() about = dict() -with open(os.path.join(here, "clisops", "__version__.py"), "r") as f: +with open(os.path.join(here, "clisops", "__version__.py")) as f: exec(f.read(), about) requirements = [line.strip() for line in open("requirements.txt")] @@ -30,6 +29,7 @@ "nbsphinx", "pandoc", "ipython", + "ipython_genutils", "ipykernel", "jupyter_client", "matplotlib", @@ -58,9 +58,9 @@ "Operating System :: POSIX :: Linux", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Topic :: Security", "Topic :: Internet", "Topic :: Scientific/Engineering", @@ -70,7 +70,7 @@ ], description="clisops - climate simulation operations.", license=__license__, - python_requires=">=3.7.0", + python_requires=">=3.8.0", install_requires=requirements, long_description=_long_description, long_description_content_type="text/x-rst", diff --git a/tests/_common.py b/tests/_common.py index a51a2303..db6c01a3 100644 --- a/tests/_common.py +++ b/tests/_common.py @@ -1,8 +1,10 @@ import os import tempfile from pathlib import Path +from typing import Optional import xarray as xr +from _pytest.logging import LogCaptureFixture # noqa from jinja2 import Template ROOCS_CFG = Path(tempfile.gettempdir(), "roocs.ini").as_posix() @@ -16,6 +18,32 @@ ).as_posix() +class ContextLogger: + """Helper function for safe logging management in pytests""" + + def __init__(self, caplog: Optional[LogCaptureFixture] = False): + from loguru import logger + + self.logger = logger + self.using_caplog = False + if caplog: + self.using_caplog = True + + def __enter__(self): + self.logger.enable("clisops") + return self.logger + + def __exit__(self, exc_type, exc_val, exc_tb): + """If test is supplying caplog, pytest will manage teardown.""" + + self.logger.disable("clisops") + if not self.using_caplog: + try: + self.logger.remove() + except ValueError: + pass + + def assert_vars_equal(var_id, *ds_list, extras=None): """Extract variable/DataArray `var_id` from each Dataset in the `ds_list`. Check they are all the same by comparing the arrays and common attributes. @@ -145,6 +173,17 @@ def cmip6_archive_base(): "master/test_data/badc/cmip6/data/CMIP6/ScenarioMIP/MIROC/MIROC6/ssp119/r1i1p1f1/Amon/ta/gn/files/d20190807/ta_Amon_MIROC6_ssp119_r1i1p1f1_gn_201501-202412.nc", ).as_posix() +CMIP6_TASMIN = Path( + MINI_ESGF_CACHE_DIR, + "master/test_data/badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/Amon/tasmin/gn/v20190710/tasmin_Amon_MPI-ESM1-2-HR_historical_r1i1p1f1_gn_201001-201412.nc", +).as_posix() + +# Dataset with julian calender +CMIP6_JULIAN = Path( + MINI_ESGF_CACHE_DIR, + "master/test_data/badc/cmip6/data/CMIP6/CMIP/CCCR-IITM/IITM-ESM/1pctCO2/r1i1p1f1/Omon/tos/gn/v20191204/tos_Omon_IITM-ESM_1pctCO2_r1i1p1f1_gn_193001-193412.nc", +).as_posix() + C3S_CORDEX_AFR_TAS = Path( MINI_ESGF_CACHE_DIR, "master/test_data/pool/data/CORDEX/data/cordex/output/AFR-22/GERICS/MPI-M-MPI-ESM-LR/historical/r1i1p1/GERICS-REMO2015/v1/day/tas/v20201015/*.nc", @@ -175,13 +214,11 @@ def cmip6_archive_base(): "c3s-cmip5/output1/BCC/bcc-csm1-1-m/historical/mon/ocean/Omon/r1i1p1/tos/v20120709/*.nc", ).as_posix() - CMIP6_TOS = Path( MINI_ESGF_CACHE_DIR, "master/test_data/badc/cmip6/data/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Omon/tos/gn/v20190710/tos_Omon_MPI-ESM1-2-LR_historical_r1i1p1f1_gn_185001-186912.nc", ).as_posix() - # test daatsets used for regridding tests - one time step, full lat/lon # cmip6 atmosphere grid # x-res = 2.0 @@ -202,3 +239,35 @@ def cmip6_archive_base(): MINI_ESGF_CACHE_DIR, "master/test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/day/land/day/r1i1p1/latest/mrsos/mrsos_day_HadGEM2-ES_rcp85_r1i1p1_20051201.nc", ).as_posix() + +# CMIP6 dataset with weird range in its longitude coordinate (-300, 60) +CMIP6_GFDL_EXTENT = Path( + MINI_ESGF_CACHE_DIR, + "master/test_data/badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/historical/r1i1p1f1/Omon/sos/gn/v20180701/sos_Omon_GFDL-CM4_historical_r1i1p1f1_gn_185001.nc", +).as_posix() + +# CMIP6 2nd dataset with weird range in its longitude coordinate (-280, 80) +CMIP6_IITM_EXTENT = Path( + MINI_ESGF_CACHE_DIR, + "master/test_data/badc/cmip6/data/CMIP6/CMIP/CCCR-IITM/IITM-ESM/1pctCO2/r1i1p1f1/Omon/tos/gn/v20191204/tos_Omon_IITM-ESM_1pctCO2_r1i1p1f1_gn_193001.nc", +).as_posix() + +CMIP6_OCE_HALO_CNRM = Path( + MINI_ESGF_CACHE_DIR, + "master/test_data/badc/cmip6/data/CMIP6/CMIP/CNRM-CERFACS/CNRM-CM6-1-HR/historical/r1i1p1f2/Omon/tos/gn/v20191021/tos_Omon_CNRM-CM6-1-HR_historical_r1i1p1f2_gn_185001.nc", +).as_posix() + +CMIP6_UNSTR_FESOM_LR = Path( + MINI_ESGF_CACHE_DIR, + "master/test_data/badc/cmip6/data/CMIP6/CMIP/AWI/AWI-ESM-1-1-LR/historical/r1i1p1f1/Omon/tos/gn/v20200212/tos_Omon_AWI-ESM-1-1-LR_historical_r1i1p1f1_gn_185001.nc", +).as_posix() + +CMIP6_UNSTR_ICON_A = Path( + MINI_ESGF_CACHE_DIR, + "master/test_data/badc/cmip6/data/CMIP6/CMIP/MPI-M/ICON-ESM-LR/historical/r1i1p1f1/Amon/tas/gn/v20210215/tas_Amon_ICON-ESM-LR_historical_r1i1p1f1_gn_185001.nc", +).as_posix() + +CORDEX_TAS_ONE_TIMESTEP = Path( + MINI_ESGF_CACHE_DIR, + "master/test_data/pool/data/CORDEX/data/cordex/output/EUR-22/GERICS/MPI-M-MPI-ESM-LR/rcp85/r1i1p1/GERICS-REMO2015/v1/mon/tas/v20191029/tas_EUR-22_MPI-M-MPI-ESM-LR_rcp85_r1i1p1_GERICS-REMO2015_v1_mon_202101.nc", +).as_posix() diff --git a/tests/conftest.py b/tests/conftest.py index 2f616795..d8488de8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,7 @@ import pandas as pd import pytest import xarray as xr +from _pytest.logging import caplog as _caplog # noqa from git import Repo from clisops.utils import get_file diff --git a/tests/core/test_average.py b/tests/core/test_average.py index 790d541f..535d5f30 100644 --- a/tests/core/test_average.py +++ b/tests/core/test_average.py @@ -6,10 +6,12 @@ import xarray as xr from pkg_resources import parse_version from roocs_utils.exceptions import InvalidParameterValue +from roocs_utils.xarray_utils import xarray_utils as xu from clisops.core import average from clisops.utils import get_file +from .._common import CMIP5_RH, CMIP6_SICONC_DAY from .._common import XCLIM_TESTS_DATA as TESTS_DATA try: @@ -173,3 +175,54 @@ def test_average_wrong_format(self): str(exc.value) == "Dimensions for averaging must be one of ['time', 'level', 'latitude', 'longitude']" ) + + +class TestAverageTime: + month_ds = CMIP5_RH + day_ds = CMIP6_SICONC_DAY + + def test_average_month(self): + ds = xu.open_xr_dataset(self.day_ds) + assert ds.time.shape == (60225,) + + avg_ds = average.average_time(ds, freq="month") + assert avg_ds.time.shape == (1980,) + + def test_average_year(self): + ds = xu.open_xr_dataset(self.month_ds) + assert ds.time.shape == (1752,) + + avg_ds = average.average_time(ds, freq="year") + assert avg_ds.time.shape == (147,) + + def test_no_freq(self): + ds = xu.open_xr_dataset(self.month_ds) + + with pytest.raises(InvalidParameterValue) as exc: + average.average_time(ds, freq=None) + assert str(exc.value) == "At least one frequency for averaging must be provided" + + def test_incorrect_freq(self): + ds = xu.open_xr_dataset(self.month_ds) + + with pytest.raises(InvalidParameterValue) as exc: + average.average_time(ds, freq="wrong") + assert ( + str(exc.value) + == "Time frequency for averaging must be one of ['day', 'month', 'year']." + ) + + def test_freq_wrong_format(self): + ds = xu.open_xr_dataset(self.month_ds) + + with pytest.raises(InvalidParameterValue) as exc: + average.average_time(ds, freq=0) + assert str(exc.value) == "At least one frequency for averaging must be provided" + + def test_no_time(self): + ds = xu.open_xr_dataset(self.month_ds) + ds = ds.drop_dims("time") + + with pytest.raises(Exception) as exc: + average.average_time(ds, freq="year") + assert str(exc.value) == "Time dimension could not be found" diff --git a/tests/core/test_subset.py b/tests/core/test_subset.py index 44d08542..9947aff5 100644 --- a/tests/core/test_subset.py +++ b/tests/core/test_subset.py @@ -618,6 +618,7 @@ def test_time(self): np.testing.assert_array_equal(out.time.max().dt.day, 15) def test_raise(self): + # 1st case da = xr.open_dataset(self.nc_poslons).tas with pytest.raises(ValueError): subset.subset_bbox( @@ -628,10 +629,24 @@ def test_raise(self): end_date="2055", ) + # 2nd case da = xr.open_dataset(self.nc_2dlonlat).tasmax.drop_vars(names=["lon", "lat"]) with pytest.raises(Exception): subset.subset_bbox(da, lon_bnds=self.lon, lat_bnds=self.lat) + # 3rd case + ds = xr.Dataset( + data_vars={"var": (("lat", "lon"), np.ones((5, 10)))}, + coords={ + "lat": ("lat", np.zeros(5)), + "lon": ("lon", np.arange(-10, 0, 1)), + }, + ) + ds["lat"].attrs["standard_name"] = "latitude" + ds["lon"].attrs["standard_name"] = "longitude" + with pytest.raises(ValueError): + subset.subset_bbox(ds, lon_bnds=(-0.1, 1.0)) + def test_warnings(self): da = xr.open_dataset(self.nc_poslons).tas da = da.assign_coords(lon=(da.lon - 360)) diff --git a/tests/ops/test_average.py b/tests/ops/test_average.py index 964c7a26..4175fbe7 100644 --- a/tests/ops/test_average.py +++ b/tests/ops/test_average.py @@ -8,9 +8,9 @@ import clisops from clisops import CONFIG -from clisops.ops.average import average_over_dims +from clisops.ops.average import average_over_dims, average_time -from .._common import CMIP5_TAS +from .._common import C3S_CORDEX_EUR_ZG500, CMIP5_TAS, CMIP6_SICONC_DAY def _check_output_nc(result, fname="output_001.nc"): @@ -18,7 +18,7 @@ def _check_output_nc(result, fname="output_001.nc"): def _load_ds(fpath): - return xr.open_mfdataset(fpath) + return xr.open_mfdataset(fpath, use_cftime=True) def test_average_basic_data_array(cmip5_tas_file): @@ -217,3 +217,139 @@ def test_aux_variables(): ) assert "do_i_get_written" in result[0].variables + + +def test_average_over_years(): + ds = _load_ds(CMIP5_TAS) # monthly dataset + + # check initial dataset + assert ds.time.shape == (3530,) + assert ds.time.values[0].isoformat() == "2005-12-16T00:00:00" + assert ds.time.values[-1].isoformat() == "2299-12-16T00:00:00" + + result = average_time( + CMIP5_TAS, + freq="year", + output_type="xarray", + ) + + time_length = ds.time.values[-1].year - ds.time.values[0].year + 1 + assert result[0].time.shape == (time_length,) # get number of years + assert result[0].time.values[0].isoformat() == "2005-01-01T00:00:00" + assert result[0].time.values[-1].isoformat() == "2299-01-01T00:00:00" + + # test time bounds + assert [t.isoformat() for t in result[0].time_bnds.values[0]] == [ + "2005-01-01T00:00:00", + "2005-12-30T00:00:00", + ] + assert [t.isoformat() for t in result[0].time_bnds.values[-1]] == [ + "2299-01-01T00:00:00", + "2299-12-30T00:00:00", + ] + + +def test_average_over_months(): + ds = _load_ds(CMIP6_SICONC_DAY) # monthly dataset + + # check initial dataset + assert ds.time.shape == (60225,) + assert ds.time.values[0].isoformat() == "1850-01-01T12:00:00" + assert ds.time.values[-1].isoformat() == "2014-12-31T12:00:00" + + # average over time + result = average_time( + CMIP6_SICONC_DAY, + freq="month", + output_type="xarray", + ) + + time_length = ( + ds.time.values[-1].year - ds.time.values[0].year + 1 + ) * 12 # get number of months + + assert result[0].time.shape == (time_length,) + assert result[0].time.values[0].isoformat() == "1850-01-01T00:00:00" + assert result[0].time.values[-1].isoformat() == "2014-12-01T00:00:00" + + # test time bounds + assert [t.isoformat() for t in result[0].time_bnds.values[0]] == [ + "1850-01-01T00:00:00", + "1850-01-31T00:00:00", + ] + assert [t.isoformat() for t in result[0].time_bnds.values[-1]] == [ + "2014-12-01T00:00:00", + "2014-12-31T00:00:00", + ] + + +def test_average_time_no_freq(): + with pytest.raises(InvalidParameterValue) as exc: + # average over time + average_time( + CMIP6_SICONC_DAY, + freq=None, + output_type="xarray", + ) + assert str(exc.value) == "At least one frequency for averaging must be provided" + + +def test_average_time_incorrect_freq(): + with pytest.raises(InvalidParameterValue) as exc: + # average over time + average_time( + CMIP6_SICONC_DAY, + freq="week", + output_type="xarray", + ) + assert ( + str(exc.value) + == "Time frequency for averaging must be one of ['day', 'month', 'year']." + ) + + +def test_average_time_file_name(tmpdir): + result = average_time( + CMIP5_TAS, + freq="year", + output_type="nc", + output_dir=tmpdir, + ) + + _check_output_nc( + result, fname="tas_mon_HadGEM2-ES_rcp85_r1i1p1_20050101-22990101_avg-year.nc" + ) + + +def test_average_time_cordex(): + ds = _load_ds(C3S_CORDEX_EUR_ZG500) + + # check initial dataset + assert ds.time.shape == (3653,) + assert ds.time.values[0].isoformat() == "2071-01-01T12:00:00" + assert ds.time.values[-1].isoformat() == "2080-12-31T12:00:00" + + # average over time + result = average_time( + C3S_CORDEX_EUR_ZG500, + freq="month", + output_type="xarray", + ) + + time_length = ( + ds.time.values[-1].year - ds.time.values[0].year + 1 + ) * 12 # get number of months + + assert result[0].time.shape == (time_length,) + assert result[0].time.values[0].isoformat() == "2071-01-01T00:00:00" + assert result[0].time.values[-1].isoformat() == "2080-12-01T00:00:00" + + # test time bounds + assert [t.isoformat() for t in result[0].time_bnds.values[0]] == [ + "2071-01-01T00:00:00", + "2071-01-31T00:00:00", + ] + assert [t.isoformat() for t in result[0].time_bnds.values[-1]] == [ + "2080-12-01T00:00:00", + "2080-12-31T00:00:00", + ] diff --git a/tests/ops/test_regrid.py b/tests/ops/test_regrid.py index b107ca38..d9409264 100644 --- a/tests/ops/test_regrid.py +++ b/tests/ops/test_regrid.py @@ -13,7 +13,6 @@ from .._common import CMIP5_MRSOS_ONE_TIME_STEP - XESMF_IMPORT_MSG = "xESMF >= 0.6.0 is needed for regridding functionalities." diff --git a/tests/ops/test_subset.py b/tests/ops/test_subset.py index ed746bf0..8b794153 100644 --- a/tests/ops/test_subset.py +++ b/tests/ops/test_subset.py @@ -30,6 +30,7 @@ CMIP6_SICONC, CMIP6_SICONC_DAY, CMIP6_TA, + CMIP6_TASMIN, CMIP6_TOS, CMIP6_TOS_ONE_TIME_STEP, _check_output_nc, @@ -1211,27 +1212,19 @@ def test_reverse_lon_curvilinear(self, load_esgf_test_data): np.testing.assert_array_equal(result[0].tos, result_rev[0].tos) def test_reverse_lon_cross_meridian_curvilinear(self, load_esgf_test_data): - # can't roll because ds has a curvilinear grid - with pytest.raises(Exception) as exc: - subset( - ds=CMIP6_TOS_ONE_TIME_STEP, - area=(-70, -45, 240, 45), - output_type="xarray", - ) - - # can't roll because ds has a curvilinear grid - with pytest.raises(Exception) as exc_rev: - subset( - ds=CMIP6_TOS_ONE_TIME_STEP, - area=(240, -45, -70, 45), - output_type="xarray", - ) + result = subset( + ds=CMIP6_TOS_ONE_TIME_STEP, + area=(-70, -45, 120, 45), + output_type="xarray", + ) - assert ( - str(exc.value) - == "The requested longitude subset (-70.0, 240.0) is not within the longitude bounds of this dataset and the data could not be converted to this longitude frame successfully. Please re-run your request with longitudes within the bounds of the dataset: (0.01, 360.00)" + result_rev = subset( + ds=CMIP6_TOS_ONE_TIME_STEP, + area=(120, -45, -70, 45), + output_type="xarray", ) - assert str(exc.value) == str(exc_rev.value) + + np.testing.assert_array_equal(result[0].tos, result_rev[0].tos) def test_reverse_lat_and_lon_curvilinear(self, load_esgf_test_data): result = subset( @@ -1612,8 +1605,10 @@ def test_subset_nc_no_fill_value(cmip5_tas_file, tmpdir): ds.to_netcdf(f"{tmpdir}/test_fill_values.nc") ds = _load_ds(f"{tmpdir}/test_fill_values.nc") + # assert np.isnan(float(ds.time.encoding.get("_FillValue"))) assert np.isnan(float(ds.lat.encoding.get("_FillValue"))) assert np.isnan(float(ds.lon.encoding.get("_FillValue"))) + assert np.isnan(float(ds.height.encoding.get("_FillValue"))) assert np.isnan(float(ds.lat_bnds.encoding.get("_FillValue"))) assert np.isnan(float(ds.lon_bnds.encoding.get("_FillValue"))) @@ -1621,9 +1616,61 @@ def test_subset_nc_no_fill_value(cmip5_tas_file, tmpdir): # check that there is no fill value in encoding for coordinate variables and bounds res = _load_ds(result) + assert "_FillValue" not in res.time.encoding + assert "_FillValue" not in res.lat.encoding + assert "_FillValue" not in res.lon.encoding + assert "_FillValue" not in res.height.encoding + + assert "_FillValue" not in res.lat_bnds.encoding + assert "_FillValue" not in res.lon_bnds.encoding + assert "_FillValue" not in res.time_bnds.encoding + + +def test_subset_cmip5_nc_consistent_bounds(cmip5_tas_file, tmpdir): + """Tests clisops subset function with a time subset and check the metadata""" + result = subset( + ds=CMIP5_TAS, + time=time_interval("2005-01-01T00:00:00", "2020-12-30T00:00:00"), + output_dir=tmpdir, + output_type="nc", + file_namer="simple", + ) + res = _load_ds(result) + # check fill value in bounds + assert "_FillValue" not in res.lat_bnds.encoding + assert "_FillValue" not in res.lon_bnds.encoding + assert "_FillValue" not in res.time_bnds.encoding + # check fill value in coordinates + assert "_FillValue" not in res.time.encoding assert "_FillValue" not in res.lat.encoding assert "_FillValue" not in res.lon.encoding + assert "_FillValue" not in res.height.encoding + # check coordinates in bounds + assert "coordinates" not in res.lat_bnds.encoding + assert "coordinates" not in res.lon_bnds.encoding + assert "coordinates" not in res.time_bnds.encoding + +def test_subset_cmip6_nc_consistent_bounds(cmip5_tas_file, tmpdir): + """Tests clisops subset function with a time subset and check the metadata""" + result = subset( + ds=CMIP6_TASMIN, + time=time_interval("2010-01-01T00:00:00", "2010-12-31T00:00:00"), + output_dir=tmpdir, + output_type="nc", + file_namer="simple", + ) + res = _load_ds(result) + # check fill value in bounds assert "_FillValue" not in res.lat_bnds.encoding assert "_FillValue" not in res.lon_bnds.encoding assert "_FillValue" not in res.time_bnds.encoding + # check fill value in coordinates + assert "_FillValue" not in res.time.encoding + assert "_FillValue" not in res.lat.encoding + assert "_FillValue" not in res.lon.encoding + assert "_FillValue" not in res.height.encoding + # check coordinates in bounds + assert "coordinates" not in res.lat_bnds.encoding + assert "coordinates" not in res.lon_bnds.encoding + assert "coordinates" not in res.time_bnds.encoding diff --git a/tests/test_file_namers.py b/tests/test_file_namers.py index fc2560fc..2c36700d 100644 --- a/tests/test_file_namers.py +++ b/tests/test_file_namers.py @@ -58,7 +58,7 @@ def test_SimpleFileNamer_with_chunking(load_esgf_test_data, tmpdir): def test_StandardFileNamer_no_project_match(): s = get_file_namer("standard")() - class Thing(object): + class Thing: pass mock_ds = Thing() diff --git a/tests/test_logging.py b/tests/test_logging.py new file mode 100644 index 00000000..8b57a2d7 --- /dev/null +++ b/tests/test_logging.py @@ -0,0 +1,63 @@ +import sys + +import pytest + +from clisops.utils.common import _logging_examples, enable_logging # noqa +from tests._common import ContextLogger + + +class TestLoggingFuncs: + @pytest.mark.xfail( + reason="pytest-loguru does not implement logging levels for caplog yet." + ) + def test_logging_configuration(self, caplog): + with ContextLogger(caplog): + caplog.set_level("WARNING", logger="clisops") + + _logging_examples() # noqa + + assert ("clisops.utils.common", 10, "1") not in caplog.record_tuples + assert ("clisops.utils.common", 40, "4") in caplog.record_tuples + + def test_disabled_enabled_logging(self, capsys): + with ContextLogger() as _logger: + + _logger.disable("clisops") + + # CLISOPS disabled + _logger.add(sys.stderr, level="WARNING") + _logger.add(sys.stdout, level="INFO") + + _logging_examples() # noqa + + captured = capsys.readouterr() + assert "WARNING" not in captured.err + assert "INFO" not in captured.out + + # re-enable CLISOPS logging + _logger.enable("clisops") + + _logging_examples() # noqa + + captured = capsys.readouterr() + assert "INFO" not in captured.err + assert "WARNING" in captured.err + assert "INFO" in captured.out + + def test_logging_enabler(self, capsys): + with ContextLogger(): + + _logging_examples() # noqa + + captured = capsys.readouterr() + assert "WARNING" not in captured.err + assert "INFO" not in captured.out + + enable_logging() + + _logging_examples() # noqa + + captured = capsys.readouterr() + assert "INFO" not in captured.err + assert "WARNING" in captured.err + assert "INFO" in captured.out diff --git a/tests/test_output_utils.py b/tests/test_output_utils.py index eff990ca..d77c1b98 100644 --- a/tests/test_output_utils.py +++ b/tests/test_output_utils.py @@ -1,12 +1,11 @@ import os +import sys import tempfile -from glob import glob from pathlib import Path -from unittest import mock import xarray as xr -from clisops import CONFIG, logging +from clisops import CONFIG from clisops.utils.common import expand_wildcards from clisops.utils.file_namers import get_file_namer from clisops.utils.output_utils import ( @@ -15,9 +14,7 @@ get_output, get_time_slices, ) -from tests._common import CMIP5_TAS, CMIP6_TOS - -LOGGER = logging.getLogger(__file__) +from tests._common import CMIP5_TAS, CMIP6_TOS, ContextLogger def _open(coll): @@ -90,30 +87,37 @@ def test_get_time_slices_multiple_slices(load_esgf_test_data): assert resp[1] == second -def test_tmp_dir_created_with_staging_dir(): - # copy part of function that creates tmp dir to check that it is created - CONFIG["clisops:write"]["output_staging_dir"] = "tests/" - staging_dir = CONFIG["clisops:write"].get("output_staging_dir", "") +def test_tmp_dir_created_with_staging_dir(tmpdir): + with ContextLogger() as _logger: + _logger.add(sys.stdout, level="INFO") - output_path = "./output_001.nc" + staging = Path(tmpdir).joinpath("tests") + staging.mkdir(exist_ok=True) - if os.path.isdir(staging_dir): - tmp_dir = tempfile.TemporaryDirectory(dir=staging_dir) - fname = os.path.basename(output_path) - target_path = os.path.join(tmp_dir.name, fname) - LOGGER.info(f"Writing to temporary path: {target_path}") - else: - target_path = output_path + # copy part of function that creates tmp dir to check that it is created + CONFIG["clisops:write"]["output_staging_dir"] = staging + staging_dir = CONFIG["clisops:write"].get("output_staging_dir", "") + + output_path = "./output_001.nc" - assert target_path != "output_001.nc" - assert len(glob("tests/tmp*")) == 1 - assert "tests/tmp" in glob("tests/tmp*")[0] + if os.path.isdir(staging_dir): + tmp_dir = tempfile.TemporaryDirectory(dir=staging_dir) + fname = os.path.basename(output_path) + target_path = os.path.join(tmp_dir.name, fname) + _logger.info(f"Writing to temporary path: {target_path}") + else: + target_path = output_path - # delete the temporary directory - tmp_dir.cleanup() + assert target_path != "output_001.nc" + temp_test_folders = [f for f in staging.glob("tmp*")] + assert len(temp_test_folders) == 1 + assert "tests/tmp" in temp_test_folders[0].as_posix() def test_tmp_dir_not_created_with_no_staging_dir(): + # with ContextLogger() as _logger: + # _logger.add(sys.stdout, level="INFO") + # copy part of function that creates tmp dir to check that it is not created when no staging dir CONFIG["clisops:write"]["output_staging_dir"] = "" staging_dir = CONFIG["clisops:write"].get("output_staging_dir", "") @@ -124,7 +128,6 @@ def test_tmp_dir_not_created_with_no_staging_dir(): tmp_dir = tempfile.TemporaryDirectory(dir=staging_dir) fname = os.path.basename(output_path) target_path = os.path.join(tmp_dir.name, fname) - LOGGER.info(f"Writing to temporary path: {target_path}") else: target_path = output_path @@ -132,48 +135,59 @@ def test_tmp_dir_not_created_with_no_staging_dir(): def test_no_staging_dir(caplog): + with ContextLogger(caplog) as _logger: + _logger.add(sys.stdout, level="INFO") + caplog.set_level("INFO", logger="clisops") - CONFIG["clisops:write"]["output_staging_dir"] = "" - ds = _open(CMIP5_TAS) - output_path = get_output( - ds, output_type="nc", output_dir=".", namer=get_file_namer("simple")() - ) + CONFIG["clisops:write"]["output_staging_dir"] = "" + ds = _open(CMIP5_TAS) + output_path = get_output( + ds, output_type="nc", output_dir=".", namer=get_file_namer("simple")() + ) - assert "Writing to temporary path: " not in caplog.text - assert output_path == "output_001.nc" + assert "Writing to temporary path: " not in caplog.text + assert output_path == "output_001.nc" - os.remove("output_001.nc") + os.remove("output_001.nc") def test_invalid_staging_dir(caplog): - # check stagin dir not used with invalid directory - CONFIG["clisops:write"]["output_staging_dir"] = "test/not/real/dir/" + with ContextLogger(caplog) as _logger: + _logger.add(sys.stdout, level="INFO") + caplog.set_level("INFO", logger="clisops") - ds = _open(CMIP5_TAS) - output_path = get_output( - ds, output_type="nc", output_dir=".", namer=get_file_namer("simple")() - ) - assert "Writing to temporary path: " not in caplog.text + # check staging dir not used with invalid directory + CONFIG["clisops:write"]["output_staging_dir"] = "test/not/real/dir/" - assert output_path == "output_001.nc" + ds = _open(CMIP5_TAS) + output_path = get_output( + ds, output_type="nc", output_dir=".", namer=get_file_namer("simple")() + ) + assert "Writing to temporary path: " not in caplog.text + assert output_path == "output_001.nc" - os.remove("output_001.nc") + os.remove("output_001.nc") -def test_staging_dir_used(caplog): - # check staging dir used when valid directory - CONFIG["clisops:write"]["output_staging_dir"] = "tests/" +def test_staging_dir_used(caplog, tmpdir): + with ContextLogger(caplog) as _logger: + _logger.add(sys.stdout, level="INFO") + caplog.set_level("INFO", logger="clisops") - ds = _open(CMIP5_TAS) + # check staging dir used when valid directory + staging = Path(tmpdir).joinpath("tests") + staging.mkdir(exist_ok=True) + CONFIG["clisops:write"]["output_staging_dir"] = str(staging) + ds = _open(CMIP5_TAS) - output_path = get_output( - ds, output_type="nc", output_dir=".", namer=get_file_namer("simple")() - ) + output_path = get_output( + ds, output_type="nc", output_dir=".", namer=get_file_namer("simple")() + ) - assert "Writing to temporary path: tests/" in caplog.text - assert output_path == "output_001.nc" + assert f"Writing to temporary path: {staging}" in caplog.text + assert output_path == "output_001.nc" - os.remove("output_001.nc") + Path("output_001.nc").unlink() def test_final_output_path_staging_dir(): @@ -198,15 +212,19 @@ def test_final_output_path_no_staging_dir(): os.remove("output_001.nc") -def test_tmp_dir_deleted(): - # check temporary directory under stagin dir gets deleted after data has bee staged - CONFIG["clisops:write"]["output_staging_dir"] = "tests/" +def test_tmp_dir_deleted(tmpdir): + # check temporary directory under staging dir gets deleted after data has bee staged + staging = Path(tmpdir).joinpath("tests") + staging.mkdir(exist_ok=True) + CONFIG["clisops:write"]["output_staging_dir"] = staging + + # CONFIG["clisops:write"]["output_staging_dir"] = "tests/" ds = _open(CMIP5_TAS) get_output(ds, output_type="nc", output_dir=".", namer=get_file_namer("simple")()) # check that no tmpdir directories exist - assert glob("tests/tmp*") == [] + assert len([f for f in staging.glob("tmp*")]) == 0 os.remove("output_001.nc") diff --git a/tests/test_roll.py b/tests/test_roll.py index c5360d8f..45228000 100644 --- a/tests/test_roll.py +++ b/tests/test_roll.py @@ -44,8 +44,8 @@ def test_roll_lon_minus_180(load_esgf_test_data): ds, lon = setup_test() # check longitude is 0 to 360 initially - assert isclose(lon.values.min(), 0, abs_tol=10 ** 2) - assert isclose(lon.values.max(), 360, abs_tol=10 ** 2) + assert isclose(lon.values.min(), 0, abs_tol=10**2) + assert isclose(lon.values.max(), 360, abs_tol=10**2) # roll longitude by -180 ds = ds.roll(shifts={f"{lon.name}": -180}, roll_coords=True) @@ -159,8 +159,8 @@ def test_convert_lon_coords(tmpdir, load_esgf_test_data): ds.coords[lon.name] = (ds.coords[lon.name] + 180) % 360 - 180 ds = ds.sortby(ds[lon.name]) - assert isclose(ds.lon.values.min(), -180, abs_tol=10 ** 2) - assert isclose(ds.lon.values.max(), 180, abs_tol=10 ** 2) + assert isclose(ds.lon.values.min(), -180, abs_tol=10**2) + assert isclose(ds.lon.values.max(), 180, abs_tol=10**2) result = subset( ds=ds, @@ -191,8 +191,8 @@ def test_roll_convert_lon_coords(load_esgf_test_data): ds_roll.coords[lon.name] = ds_roll.coords[lon.name] - 180 - assert isclose(ds_roll.lon.values.min(), -180, abs_tol=10 ** 2) - assert isclose(ds_roll.lon.values.max(), 180, abs_tol=10 ** 2) + assert isclose(ds_roll.lon.values.min(), -180, abs_tol=10**2) + assert isclose(ds_roll.lon.values.max(), 180, abs_tol=10**2) result = subset( ds=ds_roll, @@ -253,8 +253,8 @@ def test_compare_methods(load_esgf_test_data): ds_roll.coords[lon.name] = ds_roll.coords[lon.name] - 180 - assert isclose(ds_roll.lon.values.min(), -180, abs_tol=10 ** 2) - assert isclose(ds_roll.lon.values.max(), 180, abs_tol=10 ** 2) + assert isclose(ds_roll.lon.values.min(), -180, abs_tol=10**2) + assert isclose(ds_roll.lon.values.max(), 180, abs_tol=10**2) result1 = subset( ds=ds_roll, @@ -270,8 +270,8 @@ def test_compare_methods(load_esgf_test_data): ds.coords[lon.name] = (ds.coords[lon.name] + 180) % 360 - 180 ds = ds.sortby(ds[lon.name]) - assert isclose(ds.lon.values.min(), -180, abs_tol=10 ** 2) - assert isclose(ds.lon.values.max(), 180, abs_tol=10 ** 2) + assert isclose(ds.lon.values.min(), -180, abs_tol=10**2) + assert isclose(ds.lon.values.max(), 180, abs_tol=10**2) result2 = subset( ds=ds, diff --git a/tests/test_utils/test_dataset_utils.py b/tests/test_utils/test_dataset_utils.py index 47ed1ed2..a9ed9e2c 100644 --- a/tests/test_utils/test_dataset_utils.py +++ b/tests/test_utils/test_dataset_utils.py @@ -1,16 +1,29 @@ +import cf_xarray as cfxr +import numpy as np import pytest import xarray as xr -from clisops.utils.dataset_utils import adjust_date_to_calendar +import clisops.utils.dataset_utils as clidu -from .._common import CMIP6_SICONC +from .._common import ( + C3S_CORDEX_AFR_TAS, + C3S_CORDEX_ANT_SFC_WIND, + CMIP6_GFDL_EXTENT, + CMIP6_IITM_EXTENT, + CMIP6_OCE_HALO_CNRM, + CMIP6_SICONC, + CMIP6_TAS_ONE_TIME_STEP, + CMIP6_TOS_ONE_TIME_STEP, + CMIP6_UNSTR_ICON_A, + CORDEX_TAS_ONE_TIMESTEP, +) def test_add_day(): da = xr.open_dataset(CMIP6_SICONC, use_cftime=True) date = "2012-02-29T00:00:00" - new_date = adjust_date_to_calendar(da, date, "forwards") + new_date = clidu.adjust_date_to_calendar(da, date, "forwards") assert new_date == "2012-03-01T00:00:00" @@ -19,7 +32,7 @@ def test_sub_day(): da = xr.open_dataset(CMIP6_SICONC, use_cftime=True) date = "2012-02-30T00:00:00" - new_date = adjust_date_to_calendar(da, date, "backwards") + new_date = clidu.adjust_date_to_calendar(da, date, "backwards") assert new_date == "2012-02-28T00:00:00" @@ -29,7 +42,7 @@ def test_invalid_day(): date = "2012-02-29T00:00:00" with pytest.raises(Exception) as exc: - adjust_date_to_calendar(da, date, "odd") + clidu.adjust_date_to_calendar(da, date, "odd") assert ( str(exc.value) == "Invalid value for direction: odd. This should be either 'backwards' to indicate subtracting a day or 'forwards' for adding a day." @@ -41,7 +54,251 @@ def test_date_out_of_expected_range(): date = "2012-00-01T00:00:00" with pytest.raises(Exception) as exc: - adjust_date_to_calendar(da, date, "forwards") + clidu.adjust_date_to_calendar(da, date, "forwards") assert ( str(exc.value) == "Invalid input 0 for month. Expected value between 1 and 12." ) + + +def test_detect_coordinate_and_bounds(): + "Test detect_bounds and detect_coordinate functions." + ds_a = xr.open_mfdataset(C3S_CORDEX_AFR_TAS, use_cftime=True, combine="by_coords") + ds_b = xr.open_mfdataset( + C3S_CORDEX_ANT_SFC_WIND, use_cftime=True, combine="by_coords" + ) + ds_c = xr.open_dataset(CMIP6_UNSTR_ICON_A) + ds_d = xr.open_dataset(CMIP6_OCE_HALO_CNRM) + + # check lat, lon are found + lat_a = clidu.detect_coordinate(ds_a, "latitude") + lon_a = clidu.detect_coordinate(ds_a, "longitude") + lat_b = clidu.detect_coordinate(ds_b, "latitude") + lon_b = clidu.detect_coordinate(ds_b, "longitude") + lat_c = clidu.detect_coordinate(ds_c, "latitude") + lon_c = clidu.detect_coordinate(ds_c, "longitude") + lat_d = clidu.detect_coordinate(ds_d, "latitude") + lon_d = clidu.detect_coordinate(ds_d, "longitude") + + # assert the correct variables have been found + assert lat_a == "lat" + assert lon_a == "lon" + assert lat_b == "lat" + assert lon_b == "lon" + assert lat_c == "latitude" + assert lon_c == "longitude" + assert lat_d == "lat" + assert lon_d == "lon" + + # assert detected bounds + assert clidu.detect_bounds(ds_a, lat_a) == "lat_vertices" + assert clidu.detect_bounds(ds_a, lon_a) == "lon_vertices" + assert clidu.detect_bounds(ds_b, lat_b) is None + assert clidu.detect_bounds(ds_b, lon_b) is None + assert clidu.detect_bounds(ds_c, lat_c) == "latitude_bnds" + assert clidu.detect_bounds(ds_c, lon_c) == "longitude_bnds" + assert clidu.detect_bounds(ds_d, lat_d) == "lat_bnds" + assert clidu.detect_bounds(ds_d, lon_d) == "lon_bnds" + + # test that latitude and longitude are still found when they are data variables + # reset coords sets lat and lon as data variables + ds_a = ds_a.reset_coords([lat_a, lon_a]) + ds_b = ds_b.reset_coords([lat_b, lon_b]) + ds_c = ds_c.reset_coords([lat_c, lon_c]) + ds_d = ds_d.reset_coords([lat_d, lon_d]) + assert lat_a == clidu.detect_coordinate(ds_a, "latitude") + assert lon_a == clidu.detect_coordinate(ds_a, "longitude") + assert lat_b == clidu.detect_coordinate(ds_b, "latitude") + assert lon_b == clidu.detect_coordinate(ds_b, "longitude") + assert lat_c == clidu.detect_coordinate(ds_c, "latitude") + assert lon_c == clidu.detect_coordinate(ds_c, "longitude") + assert lat_d == clidu.detect_coordinate(ds_d, "latitude") + assert lon_d == clidu.detect_coordinate(ds_d, "longitude") + + +def test_detect_gridtype(): + "Test the function detect_gridtype" + ds_a = xr.open_dataset(CMIP6_UNSTR_ICON_A, use_cftime=True) + ds_b = xr.open_dataset(CMIP6_TOS_ONE_TIME_STEP, use_cftime=True) + ds_c = xr.open_dataset(CMIP6_TAS_ONE_TIME_STEP, use_cftime=True) + assert ( + clidu.detect_gridtype( + ds_a, + lat="latitude", + lon="longitude", + lat_bnds="latitude_bnds", + lon_bnds="longitude_bnds", + ) + == "unstructured" + ) + assert ( + clidu.detect_gridtype( + ds_b, + lat="latitude", + lon="longitude", + lat_bnds="vertices_latitude", + lon_bnds="vertices_longitude", + ) + == "curvilinear" + ) + assert ( + clidu.detect_gridtype( + ds_c, lat="lat", lon="lon", lat_bnds="lat_bnds", lon_bnds="lon_bnds" + ) + == "regular_lat_lon" + ) + + +def test_crosses_0_meridian(): + "Test the _crosses_0_meridian function" + # Case 1 - longitude crossing 180° meridian + lon = np.arange(160.0, 200.0, 1.0) + + # convert to [-180, 180], min and max now suggest 0-meridian crossing + lon = np.where(lon > 180, lon - 360, lon) + + da = xr.DataArray(dims=["x"], coords={"x": lon}) + assert not clidu._crosses_0_meridian(da["x"]) + + # Case 2 - regional dataset ranging [315 .. 66] but for whatever reason not defined on + # [-180, 180] longitude frame + ds = xr.open_dataset(CORDEX_TAS_ONE_TIMESTEP) + assert np.isclose(ds["lon"].min(), 0, atol=0.1) + assert np.isclose(ds["lon"].max(), 360, atol=0.1) + + # Convert to -180, 180 frame and confirm crossing 0-meridian + ds, ll, lu = clidu.cf_convert_between_lon_frames(ds, (-180, 180)) + assert np.isclose(ds["lon"].min(), -45.4, atol=0.1) + assert np.isclose(ds["lon"].max(), 66.1, atol=0.1) + assert clidu._crosses_0_meridian(ds["lon"]) + + +def test_convert_interval_between_lon_frames(): + "Test the helper function _convert_interval_between_lon_frames" + # Convert from 0,360 to -180,180 longitude frame and vice versa + assert clidu._convert_interval_between_lon_frames(20, 60) == (20, 60) + assert clidu._convert_interval_between_lon_frames(190, 200) == (-170, -160) + assert clidu._convert_interval_between_lon_frames(-20, -90) == (270, 340) + + # Exception when crossing 0°- or 180°-meridian + with pytest.raises( + Exception, + match="Cannot convert longitude interval if it includes the 0°- or 180°-meridian.", + ): + clidu._convert_interval_between_lon_frames(170, 300) + with pytest.raises( + Exception, + match="Cannot convert longitude interval if it includes the 0°- or 180°-meridian.", + ): + clidu._convert_interval_between_lon_frames(-30, 10) + + +def test_convert_lon_frame_bounds(): + "Test the function cf_convert_between_lon_frames" + # Load tutorial dataset defined on [200,330] + ds = xr.tutorial.open_dataset("air_temperature") + assert ds["lon"].min() == 200.0 + assert ds["lon"].max() == 330.0 + + # Create bounds + dsb = ds.cf.add_bounds("lon") + + # Convert to other lon frame + conv, ll, lu = clidu.cf_convert_between_lon_frames(dsb, (-180, 180)) + + assert conv["lon"].values[0] == -160.0 + assert conv["lon"].values[-1] == -30.0 + + # Check bounds are containing the respective cell centers + assert np.all(conv["lon"].values[:] > conv["lon_bounds"].values[0, :]) + assert np.all(conv["lon"].values[:] < conv["lon_bounds"].values[1, :]) + + # Convert only lon_interval + conv, ll, lu = clidu.cf_convert_between_lon_frames(dsb, (-180, -10)) + + assert conv["lon"].min() == 200.0 + assert conv["lon"].max() == 330.0 + assert ll == 180.0 + assert lu == 350.0 + + +def test_convert_lon_frame_shifted_bounds(): + ds = xr.open_dataset(CMIP6_GFDL_EXTENT, use_cftime=True) + + # confirm shifted frame + assert np.isclose(ds["lon"].min(), -300.0, atol=0.5) + assert np.isclose(ds["lon"].max(), 60.0, atol=0.5) + + # convert to [-180, 180] + ds_a, ll, lu = clidu.cf_convert_between_lon_frames(ds, (-180, 180)) + assert (ll, lu) == (-180, 180) + assert np.isclose(ds_a["lon"].min(), -180.0, atol=0.5) + assert np.isclose(ds_a["lon"].max(), 180.0, atol=0.5) + assert np.isclose(ds_a["lon_bnds"].min(), -180.0, atol=0.5) + assert np.isclose(ds_a["lon_bnds"].max(), 180.0, atol=0.5) + + # convert to [0, 360] + ds_b, ll, lu = clidu.cf_convert_between_lon_frames(ds, (0, 360)) + assert (ll, lu) == (0, 360) + assert np.isclose(ds_b["lon"].min(), 0.0, atol=0.5) + assert np.isclose(ds_b["lon"].max(), 360.0, atol=0.5) + assert np.isclose(ds_b["lon_bnds"].min(), 0.0, atol=0.5) + assert np.isclose(ds_b["lon_bnds"].max(), 360.0, atol=0.5) + + # convert intermediate result to [0, 360] + ds_c, ll, lu = clidu.cf_convert_between_lon_frames(ds_a, (0, 360)) + assert (ll, lu) == (0, 360) + assert np.isclose(ds_c["lon"].min(), 0.0, atol=0.5) + assert np.isclose(ds_c["lon"].max(), 360.0, atol=0.5) + assert np.isclose(ds_c["lon_bnds"].min(), 0.0, atol=0.5) + assert np.isclose(ds_c["lon_bnds"].max(), 360.0, atol=0.5) + + # convert intermediate result to [-180, 180] + ds_d, ll, lu = clidu.cf_convert_between_lon_frames(ds_a, (-180, 180)) + assert (ll, lu) == (-180, 180) + assert np.isclose(ds_d["lon"].min(), -180.0, atol=0.5) + assert np.isclose(ds_d["lon"].max(), 180.0, atol=0.5) + assert np.isclose(ds_d["lon_bnds"].min(), -180.0, atol=0.5) + assert np.isclose(ds_d["lon_bnds"].max(), 180.0, atol=0.5) + + # assert projection coordinate sorted + assert np.all(ds_d["x"].values[1:] - ds_d["x"].values[:-1] > 0.0) + assert np.all(ds_c["x"].values[1:] - ds_c["x"].values[:-1] > 0.0) + + +def test_convert_lon_frame_shifted_no_bounds(): + ds = xr.open_dataset(CMIP6_IITM_EXTENT, use_cftime=True) + + # confirm shifted frame + assert np.isclose(ds["longitude"].min(), -280.0, atol=1.0) + assert np.isclose(ds["longitude"].max(), 80.0, atol=1.0) + + # convert to [-180, 180] + ds_a, ll, lu = clidu.cf_convert_between_lon_frames(ds, (-180, 180)) + assert (ll, lu) == (-180, 180) + assert np.isclose(ds_a["longitude"].min(), -180.0, atol=1.0) + assert np.isclose(ds_a["longitude"].max(), 180.0, atol=1.0) + + # convert to [0, 360] + ds_b, ll, lu = clidu.cf_convert_between_lon_frames(ds, (0, 360)) + assert (ll, lu) == (0, 360) + assert np.isclose(ds_b["longitude"].min(), 0.0, atol=1.0) + assert np.isclose(ds_b["longitude"].max(), 360.0, atol=1.0) + + # convert intermediate result to [0, 360] + ds_c, ll, lu = clidu.cf_convert_between_lon_frames(ds_a, (0, 360)) + assert (ll, lu) == (0, 360) + assert np.isclose(ds_c["longitude"].min(), 0.0, atol=1.0) + assert np.isclose(ds_c["longitude"].max(), 360.0, atol=1.0) + + # convert intermediate result to [-180, 180] + ds_d, ll, lu = clidu.cf_convert_between_lon_frames(ds_a, (-180, 180)) + assert (ll, lu) == (-180, 180) + assert np.isclose(ds_d["longitude"].min(), -180.0, atol=1.0) + assert np.isclose(ds_d["longitude"].max(), 180.0, atol=1.0) + + # assert projection coordinate sorted + assert np.all(ds_d["x"].values[1:] - ds_d["x"].values[:-1] > 0.0) + assert np.all(ds_c["x"].values[1:] - ds_c["x"].values[:-1] > 0.0) + + +# todo: add a few more tests of cf_convert_lon_frame using xe.util functions to create regional and global datasets diff --git a/tox.ini b/tox.ini index 978108eb..d0c88553 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{37,38,39}, black, docs +envlist = py{38,39,310}, black, docs requires = pip >= 21.0 opts = -v @@ -16,7 +16,7 @@ deps = black commands = flake8 clisops tests - black --check --target-version py37 clisops tests --exclude tests/mini-esgf-data + black --check --target-version py38 clisops tests --exclude tests/mini-esgf-data [testenv:docs] extras = docs @@ -28,10 +28,11 @@ whitelist_externals = [testenv] setenv = + PYTEST_ADDOPTS = "--color=yes" PYTHONPATH = {toxinidir} GDAL_VERSION = 3.0.0 COV_CORE_SOURCE= -passenv = CI TRAVIS TRAVIS_* PROJ_DIR LD_LIBRARY_PATH GDAL_VERSION GDAL_DATA PATH +passenv = CI PROJ_DIR LD_LIBRARY_PATH GDAL_VERSION GDAL_DATA PATH extras = dev install_command = python -m pip install --no-user {opts} {packages} download = True