diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..3bf1101 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "pip" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "monthly" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..03c2c1c --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,56 @@ +name: Tests + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + + # Allow job to be triggered manually. + workflow_dispatch: + +# Cancel in-progress jobs when pushing to the same branch. +concurrency: + cancel-in-progress: true + group: ${{ github.workflow }}-${{ github.ref }} + +jobs: + + tests: + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + os: [ "ubuntu-latest" ] + python-version: [ "3.7", "3.8", "3.9", "3.10", "3.11" ] + + name: Python ${{ matrix.python-version }} on OS ${{ matrix.os }} + steps: + + - name: Acquire sources + uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + architecture: x64 + cache: 'pip' + cache-dependency-path: | + requirements-docs.txt + requirements-release.txt + requirements-test.txt + setup.py + + - name: Run tests + run: make test + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v3 + with: + files: ./coverage.xml + flags: unittests + env_vars: OS,PYTHON + name: codecov-umbrella + fail_ci_if_error: false diff --git a/.gitignore b/.gitignore index 8e8d8ae..c9fc2b4 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ /tmp /dist /doc/_build +.coverage* +coverage.xml diff --git a/CHANGES.rst b/CHANGES.rst index a5b106e..7396fa9 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,14 @@ phenodata changelog development =========== +- Fix ``nearest-station`` with ``--format=json`` +- Fix filtering by ``int64``-type identifiers, see GH-7 +- Fix SQL filtering with DuckDB +- Tests: Add software tests +- Improve documentation +- CI: Add GHA configuration to invoke software tests +- CI/Tests: Fix installation on Python 3.7 to 3.9 +- Dependencies: Switch from ``appdirs`` to ``platformdirs`` 2020-12-29 0.11.0 ================= diff --git a/Makefile b/Makefile index 4a6ee9c..c58c300 100644 --- a/Makefile +++ b/Makefile @@ -1,43 +1,67 @@ +# ============= +# Configuration +# ============= + +$(eval venv := .venv) +$(eval pip := $(venv)/bin/pip) +$(eval python := $(venv)/bin/python) +$(eval pytest := $(venv)/bin/pytest) +$(eval bumpversion := $(venv)/bin/bumpversion) +$(eval twine := $(venv)/bin/twine) + + # ============ # Main targets # ============ +# Run software tests. +.PHONY: test +test: install-package install-tests + $(pytest) + # Release this piece of software # Synopsis: # make release bump=minor (major,minor,patch) -release: bumpversion push sdist pypi-upload +release: bumpversion push build pypi-upload # Build the documentation docs-html: install-doctools - $(eval venvpath := ".venv_project") touch doc/index.rst - export SPHINXBUILD="`pwd`/$(venvpath)/bin/sphinx-build"; cd doc; make html + export SPHINXBUILD="`pwd`/$(venv)/bin/sphinx-build"; cd doc; make html # =============== # Utility targets # =============== bumpversion: install-releasetools - $(eval venvpath := ".venv_project") - @$(venvpath)/bin/bumpversion $(bump) + @$(bumpversion) $(bump) push: git push && git push --tags -sdist: - $(eval venvpath := ".venv_project") - @$(venvpath)/bin/python setup.py sdist +build: + @$(python) -m build pypi-upload: install-releasetools - $(eval venvpath := ".venv_project") - @$(venvpath)/bin/twine upload --skip-existing dist/*.tar.gz + @$(twine) upload --skip-existing dist/*.tar.gz + + +# ================= +# Installer targets +# ================= + +install-package: + @test -e $(python) || python3 -m venv $(venv) + $(pip) install --prefer-binary --editable=.[test,develop,release,sql] install-doctools: - $(eval venvpath := ".venv_project") - @test -e $(venvpath)/bin/python || `command -v virtualenv` --python=`command -v python` $(venvpath) - @$(venvpath)/bin/pip install --quiet --requirement requirements-docs.txt --upgrade + @test -e $(python) || python3 -m venv $(venv) + $(pip) install --requirement requirements-docs.txt --upgrade install-releasetools: - $(eval venvpath := ".venv_project") - @test -e $(venvpath)/bin/python || `command -v virtualenv` --python=`command -v python` $(venvpath) - @$(venvpath)/bin/pip install --quiet --requirement requirements-release.txt --upgrade + @test -e $(python) || python3 -m venv $(venv) + $(pip) install --requirement requirements-release.txt --upgrade + +install-tests: + @test -e $(python) || python3 -m venv $(venv) + $(pip) install --requirement requirements-tests.txt --upgrade diff --git a/README.rst b/README.rst index 81b4b94..4e9826e 100644 --- a/README.rst +++ b/README.rst @@ -1,40 +1,55 @@ -.. image:: https://img.shields.io/badge/Python-2.7,%203.7,%203.8,%203.9-green.svg +.. image:: https://github.com/earthobservations/phenodata/workflows/Tests/badge.svg + :target: https://github.com/earthobservations/phenodata/actions?workflow=Tests + +.. image:: https://codecov.io/gh/earthobservations/phenodata/branch/main/graph/badge.svg + :target: https://codecov.io/gh/earthobservations/phenodata + :alt: Test suite code coverage + +.. image:: https://img.shields.io/pypi/pyversions/phenodata.svg :target: https://pypi.org/project/phenodata/ .. image:: https://img.shields.io/pypi/v/phenodata.svg :target: https://pypi.org/project/phenodata/ -.. image:: https://img.shields.io/github/tag/earthobservations/phenodata.svg - :target: https://github.com/earthobservations/phenodata +.. image:: https://img.shields.io/pypi/status/phenodata.svg + :target: https://pypi.org/project/phenodata/ -.. image:: https://img.shields.io/pypi/dm/phenodata.svg +.. image:: https://img.shields.io/pypi/l/phenodata.svg :target: https://pypi.org/project/phenodata/ +.. image:: https://static.pepy.tech/badge/phenodata/month + :target: https://pepy.tech/project/phenodata + | -################################################# -phenodata - phenology data acquisition for humans -################################################# +######### +phenodata +######### + +*Phenology data acquisition for humans.* ***** About ***** -phenodata is a data acquisition and manipulation toolkit for open access phenology data. -It is written in Python. -Currently, it implements data wrappers for acquiring phenology observation data published -on the DWD Climate Data Center (CDC) FTP server operated by »Deutscher Wetterdienst« (DWD). +phenodata is a data acquisition and manipulation toolkit for open access +phenology data. It is written in Python. + +Currently, it implements data wrappers for acquiring phenology observation +data published on the DWD Climate Data Center (CDC) FTP server operated by +»Deutscher Wetterdienst« (DWD). -Under the hood, it uses the fine Pandas_ data analysis library for data mangling, amongst others. +Under the hood, it uses the `pandas`_ data analysis library for data mangling, +amongst others. -.. _Pandas: https://pandas.pydata.org/ **************** Acknowledgements **************** -Thanks to the many observers, »Deutscher Wetterdienst«, + +Thanks to the many observers of »Deutscher Wetterdienst« (DWD), the »Global Phenological Monitoring programme« and all people working behind the scenes for their commitment in recording the observations and for making the excellent datasets available to the community. You know who you are. @@ -46,7 +61,8 @@ Getting started Introduction ============ -For most acquisition tasks, you must choose from one of two different datasets: `annual-reporters`_ and `immediate-reporters`_. +For most acquisition tasks, you must choose from one of two different datasets: +`annual-reporters`_ and `immediate-reporters`_. To improve data acquisition performance, also consider applying the ``--filename=`` parameter for file name filtering. @@ -55,19 +71,17 @@ Example: When using ``--filename=Hasel,Schneegloeckchen``, only file names conta ``Hasel`` or ``Schneegloeckchen`` will be retrieved, thus minimizing the required effort to acquire all files. -.. _annual-reporters: https://www.dwd.de/DE/klimaumwelt/klimaueberwachung/phaenologie/daten_deutschland/jahresmelder/jahresmelder_node.html -.. _immediate-reporters: https://www.dwd.de/DE/klimaumwelt/klimaueberwachung/phaenologie/daten_deutschland/sofortmelder/sofortmelder_node.html Install ======= + If you know your way around Python, installing this software is really easy:: - pip install phenodata --upgrade + pip install 'phenodata[sql]' --upgrade -Please refer to the `virtualenv`_ page about further recommendations how to install and use this software. - -.. _virtualenv: https://github.com/earthobservations/phenodata/blob/main/doc/virtualenv.rst +Please refer to the `virtualenv`_ page about further recommendations how to +install and use this software. Usage @@ -81,12 +95,12 @@ Usage phenodata list-phases --source=dwd [--format=csv] phenodata list-stations --source=dwd --dataset=immediate [--all] [--filter=berlin] [--sort=Stationsname] [--format=csv] phenodata nearest-station --source=dwd --dataset=immediate --latitude=52.520007 --longitude=13.404954 [--format=csv] - phenodata nearest-stations --source=dwd --dataset=immediate [--all] --latitude=52.520007 --longitude=13.404954 [--limit=10] [--format=csv] + phenodata nearest-stations --source=dwd --dataset=immediate --latitude=52.520007 --longitude=13.404954 [--all] [--limit=10] [--format=csv] phenodata list-quality-levels --source=dwd [--format=csv] phenodata list-quality-bytes --source=dwd [--format=csv] phenodata list-filenames --source=dwd --dataset=immediate --partition=recent [--filename=Hasel,Schneegloeckchen] [--year=2017] phenodata list-urls --source=dwd --dataset=immediate --partition=recent [--filename=Hasel,Schneegloeckchen] [--year=2017] - phenodata (observations|forecast) --source=dwd --dataset=immediate --partition=recent [--filename=Hasel,Schneegloeckchen] [--station-id=164,717] [--species-id=113,127] [--phase-id=5] [--quality-level=10] [--quality-byte=1,2,3] [--station=berlin,brandenburg] [--species=hazel,snowdrop] [--species-preset=mellifera-de-primary] [--phase=flowering] [--quality=ROUTKLI] [--year=2017] [--humanize] [--show-ids] [--language=german] [--long-station] [--sort=Datum] [--format=csv] [--verbose] + phenodata (observations|forecast) --source=dwd --dataset=immediate --partition=recent [--filename=Hasel,Schneegloeckchen] [--station-id=7521,7532] [--species-id=113,127] [--phase-id=5] [--quality-level=10] [--quality-byte=1,2,3] [--station=berlin,brandenburg] [--species=hazel,snowdrop] [--species-preset=mellifera-de-primary] [--phase=flowering] [--quality=ROUTKLI] [--year=2017] [--humanize] [--show-ids] [--language=german] [--long-station] [--sort=Datum] [--format=csv] [--verbose] phenodata drop-cache --source=dwd phenodata --version phenodata (-h | --help) @@ -202,19 +216,19 @@ Observations of hazel and snowdrop (dito), but for station ids 164 and 717 only: phenodata observations \ --source=dwd --dataset=annual --partition=recent \ - --filename=Hasel,Schneegloeckchen --station-id=164,717 + --filename=Hasel,Schneegloeckchen --station-id=7521,7532 All observations for station ids 164 and 717 in years 2016 and 2017:: phenodata observations \ --source=dwd --dataset=annual --partition=recent \ - --station-id=164,717 --year=2016,2017 + --station-id=7521,7532 --year=2020,2021 All observations for station ids 164 and 717 and species ids 113 and 127:: phenodata observations \ --source=dwd --dataset=annual --partition=recent \ - --station-id=164,717 --species-id=113,127 + --station-id=7521,7532 --species-id=113,127 All invalid observations:: @@ -230,7 +244,7 @@ using grouping and by computing the "mean" value of the "Jultag" column:: phenodata forecast \ --source=dwd --dataset=annual --partition=recent \ --filename=Hasel,Schneegloeckchen,Apfel,Birne \ - --station-id=12132,10961 --format=string + --station-id=7521,7532 --format=string @@ -251,7 +265,6 @@ output texts in the German language if possible:: --station-id=12132 \ --humanize --language=german - Forecasting =========== @@ -300,32 +313,32 @@ Query observations by using textual representation of "station" information:: --station=berlin,brandenburg \ --humanize --sort=Datum -Observations near Munich for species "hazel" or "snowdrop" in 2018:: +Observations near Munich for species "hazel" or "snowdrop" in 2022:: phenodata observations \ --source=dwd --dataset=annual --partition=recent \ --station=münchen \ --species=hazel,snowdrop \ - --year=2018 \ + --year=2022 \ --humanize --sort=Datum -Observations for any "flowering" events in 2017 and 2018 around Munich:: +Observations for any "flowering" events in 2021 and 2022 around Munich:: phenodata observations \ --source=dwd --dataset=annual --partition=recent \ --station=münchen \ --phase=flowering \ - --year=2017,2018 \ + --year=2021,2022 \ --humanize --sort=Datum -Same observations but with "ROUTKLI" quality:: +Same observations but with ``ROUTKLI`` quality:: phenodata observations \ --source=dwd --dataset=annual --partition=recent \ --station=münchen \ --phase=flowering \ - --quality=ROUTKLI \ - --year=2017 \ + --quality='nicht beanstandet' \ + --year=2021 \ --humanize --sort=Datum Investigate some "flowering" observations near Munich which have seen corrections last year:: @@ -335,7 +348,7 @@ Investigate some "flowering" observations near Munich which have seen correction --station=münchen \ --phase=flowering \ --quality=korrigiert \ - --year=2017 \ + --year=2022 \ --humanize --sort=Datum @@ -350,7 +363,7 @@ Sort by species and date. --station=thüringen,bayern \ --species=Hasel,Schneeglöckchen,Sal-Weide,Löwenzahn,Süßkirsche,Apfel,Winterraps,Robinie,Winter-Linde,Heidekraut \ --phase-id=5 \ - --year=2015,2016,2017 \ + --year=2021,2022,2023 \ --humanize --language=german \ --sort=Spezies,Datum @@ -362,7 +375,7 @@ Sort by date. --source=dwd --dataset=annual --partition=recent \ --station=köln \ --phase="beginning of flowering" \ - --year=2015,2016,2017 \ + --year=2021,2022,2023 \ --humanize --language=german \ --sort=Datum \ --species-preset=mellifera-de-primary @@ -373,28 +386,29 @@ Sort by date. `presets.json `__. + ******************* Project information ******************* -About -===== -The "phenodata" program is released under the GNU AGPL license. -Its source code lives on `GitHub `_ and -the Python package is published to `PyPI `_. -You might also want to have a look at the `documentation `_. - -The software has been tested on Python 2.7. +Resources +========= +- `Source code `_ +- `Documentation `_ +- `Python Package Index (PyPI) `_ +Contributions +============= If you'd like to contribute you're most welcome! Spend some time taking a look around, locate a bug, design issue or spelling mistake and then send us a pull request or create an issue. Thanks in advance for your efforts, we really appreciate any help or feedback. -Development +Discussions =========== -Discussions around the development of ``phenodata`` and its applications are taking place at: +Discussions around the development of ``phenodata`` and its applications are +taking place at the Hiveeyes forum: - https://community.hiveeyes.org/t/phanologischer-kalender-fur-trachtpflanzen/664 - https://community.hiveeyes.org/t/phenodata-ein-datenbezug-und-manipulations-toolkit-fur-open-access-phanologiedaten/2892 @@ -403,16 +417,19 @@ Discussions around the development of ``phenodata`` and its applications are tak - https://community.hiveeyes.org/t/phanologie-und-imkerliche-eingriffe-bei-den-bienen/705 - https://community.hiveeyes.org/t/phenological-calendar-for-france/800 +Development +=========== +In order to setup a development environment on your workstation, please head +over to the `development sandbox`_ documentation. When you see the software +tests succeed, you should be ready to start hacking. Code license ============ -Licensed under the GNU AGPL license. See LICENSE_ file for details. - -.. _LICENSE: https://github.com/earthobservations/phenodata/blob/main/LICENSE +The project is licensed under the terms of the GNU AGPL license, see `LICENSE`_. Data license ============ -The DWD has information about their re-use policy in German and English. +The DWD has information about their data re-use policy in German and English. Please refer to the respective Disclaimer (`de `__, `en `__) @@ -426,3 +443,11 @@ Disclaimer The project and its authors are not affiliated with DWD, USA-NPN or any other data provider in any way. It is a sole project from the community for making data more accessible in the spirit of open data. + + +.. _annual-reporters: https://www.dwd.de/DE/klimaumwelt/klimaueberwachung/phaenologie/daten_deutschland/jahresmelder/jahresmelder_node.html +.. _development sandbox: doc/development.rst +.. _immediate-reporters: https://www.dwd.de/DE/klimaumwelt/klimaueberwachung/phaenologie/daten_deutschland/sofortmelder/sofortmelder_node.html +.. _LICENSE: https://github.com/earthobservations/phenodata/blob/main/LICENSE +.. _pandas: https://pandas.pydata.org/ +.. _virtualenv: https://github.com/earthobservations/phenodata/blob/main/doc/virtualenv.rst diff --git a/TODO.rst b/doc/backlog.rst similarity index 80% rename from TODO.rst rename to doc/backlog.rst index c32bcdc..2e512d2 100644 --- a/TODO.rst +++ b/doc/backlog.rst @@ -1,10 +1,32 @@ -############## -phenodata todo -############## +################# +phenodata backlog +################# -****** -Prio 1 -****** + +*********** +Iteration 1 +*********** + +Bugs +==== +- [o] Croaks when acquiring "forecast" data with "--humanize" and "--show-ids" options + https://github.com/earthobservations/phenodata/issues/6 + +Features +======== +- [o] Switch from FTP to HTTP +- [o] Docs: Add remark about outdated ``--year`` values in README + +Infrastructure +============== +- [o] Provide Docker images +- [o] Migrate to ``pyproject.toml``, with all the bells + and whistles like ``poe check`` + + +*********** +Iteration 2 +*********** - [x] Introduce parameter "--format", which can be "tabulate:psql", "json", "xml", "vcf" - [x] There are still spaces around, e.g. "phenodata list-phases --source=dwd --format=csv" - [x] Filter by quality indicators @@ -17,18 +39,19 @@ Prio 1 - [x] Implement text-searching in stations, species, phases and quality information - [x] Implement wishlist re. preselected groups of species as "mellifera" flavours -****** -Prio 2 -****** + +*********** +Iteration 3 +*********** - [x] Suppress or move resource acquisition log messages to DEBUG log level and replace by progress indicator - [x] Use "appdirs" module for computing cache storage location - [x] Add command "phenodata drop-cache" - [x] "Jultag" auch bei "--humanize" nicht unterdrücken wegen https://community.hiveeyes.org/t/phanologischer-kalender/664/45 -****** -Prio 3 -****** +*********** +Iteration 4 +*********** - [o] Render like https://www.zamg.ac.at/zamgWeb/pict/phaenospiegel/archive/pheno_overview_Austria_web_1_2016.png - [o] Display effective criteria just before performing the work - [o] Output "phenodata info" as DataFrame diff --git a/doc/conf.py b/doc/conf.py index 63dbf80..7e4b18b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -20,7 +20,7 @@ # -- Project information ----------------------------------------------------- project = u'phenodata' -copyright = u'2018, Andreas Motl' +copyright = u'2018-2023, earthobservations' author = u'Andreas Motl' # The short X.Y version @@ -61,7 +61,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -88,7 +88,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +# html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. diff --git a/doc/development.rst b/doc/development.rst new file mode 100644 index 0000000..1967a2d --- /dev/null +++ b/doc/development.rst @@ -0,0 +1,26 @@ +########### +Development +########### + + +******* +Sandbox +******* + +Acquire sources, create Python virtualenv, install package and dependencies, +and run software tests:: + + git clone https://github.com/earthobservations/phenodata + cd phenodata + make test + + +***** +Tests +***** + +In order to run tests individually, enter the virtualenv, and invoke ``pytest`` +directly, like:: + + source .venv/bin/activate + pytest -k sql diff --git a/doc/index.rst b/doc/index.rst index d031c39..f20b250 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -9,13 +9,16 @@ phenodata - phenology data acquisition for humans .. toctree:: :maxdepth: 2 :caption: Contents: + :glob: dwd usa-npn ipg-gpm - links + research virtualenv + * + Indices and tables ================== diff --git a/doc/virtualenv.rst b/doc/virtualenv.rst index 1404f07..289f196 100644 --- a/doc/virtualenv.rst +++ b/doc/virtualenv.rst @@ -4,33 +4,24 @@ Python virtualenv About ===== -virtualenv_ is a tool to create isolated Python environments. + +`virtualenv`_ is a tool to create isolated Python environments. We recommend it for installing the software and its dependencies independently of your Python distribution. - Install ======= -Create Python virtualenv:: - - # Either use Python 2.7 ... - virtualenv --no-site-packages --python=python2 .venv27 +Create a Python `virtualenv`_:: - # ... or Python 3.6 - virtualenv --no-site-packages --python=python3 .venv36 + python3 -m venv .venv Install:: # Activate virtualenv - source .venv27/bin/activate - - # or - source .venv36/bin/activate + source .venv/bin/activate # Install Python package - pip install phenodata - + pip install phenodata[sql] .. _virtualenv: https://virtualenv.pypa.io/ - diff --git a/phenodata/command.py b/phenodata/command.py index 14cf000..a9d2ab7 100644 --- a/phenodata/command.py +++ b/phenodata/command.py @@ -26,7 +26,7 @@ def run(): phenodata list-phases --source=dwd [--format=csv] phenodata list-stations --source=dwd --dataset=immediate [--all] [--filter=berlin] [--sort=Stationsname] [--format=csv] phenodata nearest-station --source=dwd --dataset=immediate --latitude=52.520007 --longitude=13.404954 [--format=csv] - phenodata nearest-stations --source=dwd --dataset=immediate [--all] --latitude=52.520007 --longitude=13.404954 [--limit=10] [--format=csv] + phenodata nearest-stations --source=dwd --dataset=immediate --latitude=52.520007 --longitude=13.404954 [--all] [--limit=10] [--format=csv] phenodata list-quality-levels --source=dwd [--format=csv] phenodata list-quality-bytes --source=dwd [--format=csv] phenodata list-filenames --source=dwd --dataset=immediate --partition=recent [--filename=Hasel,Schneegloeckchen] [--year=2017] @@ -184,7 +184,7 @@ def run(): # Query results if data is not None and options["sql"]: import duckdb - data = duckdb.query(data, "data", options["sql"]).df() + data = duckdb.query(query=options["sql"], alias="data").df() # Format and output results if data is not None: @@ -215,7 +215,9 @@ def run(): output = data.to_csv(encoding='utf-8', index=showindex) elif output_format == 'json': - output = data.to_json(orient='table', date_format='iso') + if showindex: + data = data.reset_index() + output = data.to_json(orient='records', date_format='iso') elif output_format == 'string': output = data.to_string() diff --git a/phenodata/dwd/cdc.py b/phenodata/dwd/cdc.py index d98bc0c..d8c8d2c 100644 --- a/phenodata/dwd/cdc.py +++ b/phenodata/dwd/cdc.py @@ -26,7 +26,7 @@ class DwdCdcClient(object): def get_dataframe(self, url=None, path=None, index_column=None, coerce_int=False): """ - Read single CSV file from FTP url and convert to Pandas DataFrame object. + Read single CSV file from FTP url and convert to pandas DataFrame object. Obtains either a full ``url`` parameter or a ``path`` parameter for addressing the remote resource. If the ``path`` parameter is given, @@ -89,7 +89,7 @@ def read_csv(self, url): def csv_to_dataframe(self, stream, index_column=None, coerce_int=False): """ - Read CSV data from stream into Pandas DataFrame object. + Read CSV data from stream into pandas DataFrame object. Optionally obtains ``index_column`` parameter. Use this to set the index of designated index column. @@ -102,7 +102,7 @@ def csv_to_dataframe(self, stream, index_column=None, coerce_int=False): #if not stream or stream.len == 0: # return - # Read CSV into Pandas DataFrame + # Read CSV into pandas DataFrame. # https://pandas.pydata.org/pandas-docs/stable/io.html df = pd.read_csv( stream, engine='c', encoding='utf-8', diff --git a/phenodata/dwd/pheno.py b/phenodata/dwd/pheno.py index f4ffcc7..7605514 100644 --- a/phenodata/dwd/pheno.py +++ b/phenodata/dwd/pheno.py @@ -122,23 +122,13 @@ def get_stations(self, filter=None, all=False): def nearest_station(self, latitude, longitude, all=False): """ - Select most current stations datasets. - - Stolen from https://github.com/marians/dwd-weather + Select closest station. """ - closest = None - closest_distance = 99999999999 - for index, station in self.get_stations(all=all).iterrows(): - d = haversine_distance((longitude, latitude), - (station["geograph.Laenge"], station["geograph.Breite"])) - if d < closest_distance: - closest = station - closest_distance = d - return closest.to_frame() + return self.nearest_stations(latitude, longitude, all=all).head(1) def nearest_stations(self, latitude, longitude, all=False, limit=10): """ - Select most current stations datasets. + Select closest stations. Stolen from https://github.com/marians/dwd-weather """ @@ -265,8 +255,8 @@ def get_forecast(self, options, forecast_year=None, humanize=False): def query(self, partition=None, files=None): """ - The FTP/Pandas workhorse, converges data from multiple observation data - CSV files on upstream CDC FTP server into a single Pandas DataFrame object. + The FTP/pandas workhorse, converges data from multiple observation data + CSV files on upstream CDC FTP server into a single pandas DataFrame object. - Obtains ``partition`` parameter which can be either ``annual`` or ``immediate``. - Obtains optional ``files`` parameter which will be applied @@ -374,7 +364,7 @@ def flux(self, results, criteria=None): 'phase-id': 'Phase_id', } - # Lowlevel filtering based on IDs + # Low-level filtering based on IDs # For each designated field, add ``.isin`` criteria to "boolean index" expression expression = True for key, field in list(isin_map.items()): @@ -382,7 +372,8 @@ def flux(self, results, criteria=None): continue reference = results[field] if key in criteria and criteria[key]: - expression &= reference.isin(criteria[key]) + values = map(int, criteria[key]) + expression &= reference.isin(values) # Apply filter expression to DataFrame if type(expression) is not bool: diff --git a/phenodata/ftp.py b/phenodata/ftp.py index d8119e7..fe14d3c 100644 --- a/phenodata/ftp.py +++ b/phenodata/ftp.py @@ -6,7 +6,7 @@ import sys import arrow import shutil -import appdirs +import platformdirs import logging import requests_ftp import dogpile.cache @@ -20,7 +20,7 @@ class CacheManager(object): def __init__(self): # Path to cache directory, system agnostic - self.cache_path = os.path.join(appdirs.user_cache_dir(appname='phenodata', appauthor=False), 'dwd-ftp') + self.cache_path = os.path.join(platformdirs.user_cache_dir(appname='phenodata', appauthor=False), 'dwd-ftp') if sys.version_info.major >= 3: self.cache_path = os.path.join(self.cache_path, 'py{}'.format(sys.version_info.major)) if not os.path.exists(self.cache_path): diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..aa446b3 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,14 @@ +[pytest] + +minversion = 2.0 +addopts = -rA --verbosity=3 --cov=phenodata --cov-report=term-missing --cov-report=xml +testpaths = + phenodata + tests + +log_level = INFO +log_format = %(asctime)-15s.%(msecs)03d [%(name)-35s] %(levelname)-8s: %(message)s +log_date_format = %Y-%m-%dT%H:%M:%S + +log_cli = true +log_cli_level = INFO diff --git a/requirements-docs.txt b/requirements-docs.txt index 9057a2a..d5f1ee5 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -1,2 +1,2 @@ -Sphinx==1.7.1 -sphinx_rtd_theme==0.2.5b2 +Sphinx<7 +sphinx_rtd_theme<2 diff --git a/requirements-release.txt b/requirements-release.txt index 2a065e6..17275d8 100644 --- a/requirements-release.txt +++ b/requirements-release.txt @@ -1,3 +1,4 @@ -bumpversion==0.5.3 -twine==1.11.0 -keyring==11.1.0 +build +bump2version>=1,<2 +keyring>=20,<24 +twine>=3,<5 diff --git a/requirements-tests.txt b/requirements-tests.txt new file mode 100644 index 0000000..42bf522 --- /dev/null +++ b/requirements-tests.txt @@ -0,0 +1,4 @@ +datadiff>=2.0,<3 +marko<2 +pytest>=6.1.0,<8 +pytest-cov<5 diff --git a/setup.py b/setup.py index e365040..fcf14a4 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,4 @@ import os -import sys from setuptools import setup, find_packages here = os.path.abspath(os.path.dirname(__file__)) @@ -7,17 +6,17 @@ CHANGES = open(os.path.join(here, 'CHANGES.rst')).read() requires = [ - 'pandas>=0.23.4', - 'requests>=2.18.4', - 'requests-ftp>=0.3.1', - 'docopt>=0.6.2', + 'arrow>=0.12.1,<0.18', # Verified to work on 0.17.0. 'attrs>=17.4.0', - 'tabulate>=0.8.2', - 'dogpile.cache>=0.6.5', - 'arrow>=0.12.1', - 'tqdm>=4.19.7', - 'appdirs>=1.4.3', + 'docopt>=0.6.2', + 'dogpile.cache>=0.6.5,<1', # Verified to work on 1.1.1. 'future', + 'pandas>=0.23.4,<1.3', # Verified to work on 1.2.0. + 'platformdirs<4', + 'requests>=2.18.4,<3', + 'requests-ftp>=0.3.1,<4', # Verified to work on 0.3.1. + 'tabulate>=0.8.2,<0.9', # Verified to work on 0.8.7. + 'tqdm>=4,<5', ] test_requires = [ @@ -27,11 +26,15 @@ version='0.11.0', description='phenodata is a data acquisition and manipulation toolkit for open access phenology data', long_description=README, - license="AGPL 3", + license="AGPL 3, EUPL 1.2", classifiers=[ "Programming Language :: Python", - "Programming Language :: Python :: 2", "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Development Status :: 4 - Beta", "Environment :: Console", "Environment :: Web Environment", @@ -40,6 +43,7 @@ "Intended Audience :: End Users/Desktop", "Intended Audience :: Information Technology", "Intended Audience :: Science/Research", + "License :: OSI Approved :: European Union Public Licence 1.2 (EUPL 1.2)", "License :: OSI Approved :: GNU Affero General Public License v3", "Natural Language :: English", "Natural Language :: German", @@ -76,7 +80,7 @@ install_requires=requires, tests_require=test_requires, extras_require={ - 'sql': ['duckdb'] + 'sql': ['duckdb>=0.3,<0.7'] }, dependency_links=[ ], diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..721189e --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,26 @@ +import re + +import pytest + +from tests.util import run_command + + +def test_cli_info(capsys): + """ + CLI test: Verify `phenodata info` works. + """ + run_command("phenodata info") + + out, err = capsys.readouterr() + assert "phenodata is a data acquisition and manipulation toolkit" in out + + +def test_cli_version(capsys): + """ + CLI test: Verify `phenodata info` works. + """ + with pytest.raises(SystemExit): + run_command("phenodata --version") + + out, err = capsys.readouterr() + assert re.match("phenodata \d+\.\d+\.\d+.*", out) diff --git a/tests/test_forecast.py b/tests/test_forecast.py new file mode 100644 index 0000000..fc6d2d7 --- /dev/null +++ b/tests/test_forecast.py @@ -0,0 +1,62 @@ +import json + +from datadiff.tools import assert_equal + +from tests.util import run_command + + + + +def test_cli_forecast_immediate_recent(capsys): + """ + CLI test: Verify the `forecast` subcommand works. + """ + run_command("phenodata forecast --source=dwd --dataset=immediate --partition=recent --filename=Hasel --station-id=7521,7532 --humanize --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Jahr": 2023, + "Datum": "2023-02-26", + "Tag": 57, + "Spezies": "common hazel", + "Phase": "beginning of flowering", + "Station": "Norder-Hever-Koog, Schleswig-Holstein" + } + assert_equal(response[0], first) + + +def test_cli_forecast_annual_recent(capsys): + """ + CLI test: Verify the `forecast` subcommand works, also select German. + + Event sequence for each species + ------------------------------- + Forecast of all events at station "Berlin-Dahlem". + Use all species of the "primary group" (dito). + Sort by species and date, ascending. + + """ + run_command(""" + phenodata forecast \ + --source=dwd --dataset=annual --partition=recent \ + --filename=Apfel \ + --station-id=12132 \ + --humanize --language=german \ + --sort=Spezies,Datum \ + --format=json + """) + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Jahr": 2023, + "Datum": "2023-04-04", + "Tag": 94, + "Spezies": "Apfel, frühe Reife", + "Phase": "Austrieb Beginn", + "Station": "Berlin-Dahlem, Berlin" + } + assert_equal(response[0], first) diff --git a/tests/test_metadata.py b/tests/test_metadata.py new file mode 100644 index 0000000..9dbde41 --- /dev/null +++ b/tests/test_metadata.py @@ -0,0 +1,175 @@ +import json +import sys + +import pytest +from datadiff.tools import assert_equal + +from tests.util import run_command + + +def test_cli_list_species(capsys): + """ + CLI test: Verify the `list-species` subcommand works. + """ + run_command("phenodata list-species --source=dwd --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Objekt_ID": 25, + "Objekt": "Rüben", + "Objekt_englisch": "beet", + "Objekt_latein": "Beta vulgaris" + } + assert_equal(response[0], first) + + +def test_cli_list_phases(capsys): + """ + CLI test: Verify the `list-phases` subcommand works. + """ + run_command("phenodata list-phases --source=dwd --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Phase_ID": 1, + "Phase": "Ergrünen Beginn", + "Phase_englisch": "beginning of turning green" + } + assert_equal(response[0], first) + + +def test_cli_list_quality_levels(capsys): + """ + CLI test: Verify the `list-quality-levels` subcommand works. + """ + run_command("phenodata list-quality-levels --source=dwd --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + reference = [ + { + "Qualitaetsniveau": 1, + "Beschreibung": "nur formale Prüfung beim Entschlüsseln und Laden" + }, + { + "Qualitaetsniveau": 7, + "Beschreibung": "in ROUTINE geprüft, aber keine Korrekturen (z.B. RR_UN vor Korrektur)" + }, + { + "Qualitaetsniveau": 10, + "Beschreibung": "in ROUTINE geprüft, routinemäßige Korrektur beendet" + } + ] + assert_equal(response, reference) + + +@pytest.mark.skipif(sys.platform == "linux", reason="Charset encoding weirdness!") +def test_cli_list_quality_bytes(capsys): + """ + CLI test: Verify the `list-quality-bytes` subcommand works. + """ + run_command("phenodata list-quality-bytes --source=dwd --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + reference = [ + { + "Qualiaetsbyte": 0, + "Beschreibung": "Feldwert ungeprüft" + }, + { + "Qualiaetsbyte": 1, + "Beschreibung": "Feldwert nicht beanstandet" + }, + { + "Qualiaetsbyte": 2, + "Beschreibung": "Feldwert korrigiert" + }, + { + "Qualiaetsbyte": 3, + "Beschreibung": "Feldwert trotz Beanstandung bestätigt" + }, + { + "Qualiaetsbyte": 5, + "Beschreibung": "Feldwert zweifelhaft" + }, + { + "Qualiaetsbyte": 7, + "Beschreibung": "ungültiges Eintrittsdatum, z.B. 31. April, wird automatisch" + }, + { + "Qualiaetsbyte": 8, + "Beschreibung": "Feldwert falsch" + } + ] + assert_equal(response, reference) + + +def test_cli_list_filenames_immediate_recent(capsys): + """ + CLI test: Verify the `list-filenames` subcommand works. + """ + run_command("phenodata list-filenames --source=dwd --dataset=immediate --partition=recent") + + out, err = capsys.readouterr() + response = out.splitlines() + + assert_equal(response[0], "PH_Sofortmelder_Landwirtschaft_Kulturpflanze_Dauergruenland_akt.txt") + assert_equal(response[-1], "PH_Sofortmelder_Wildwachsende_Pflanze_Wiesen-Fuchsschwanz_akt.txt") + + +def test_cli_list_filenames_immediate_historical(capsys): + """ + CLI test: Verify the `list-filenames` subcommand works. + """ + run_command("phenodata list-filenames --source=dwd --dataset=immediate --partition=historical") + + out, err = capsys.readouterr() + response = out.splitlines() + + assert_equal(response[0], "PH_Sofortmelder_Landwirtschaft_Kulturpflanze_Dauergruenland_1979_2021_hist.txt") + assert_equal(response[-1], "PH_Sofortmelder_Wildwachsende_Pflanze_Wiesen-Fuchsschwanz_1979_2021_hist.txt") + + +def test_cli_list_filenames_annual_recent(capsys): + """ + CLI test: Verify the `list-filenames` subcommand works. + """ + run_command("phenodata list-filenames --source=dwd --dataset=annual --partition=recent") + + out, err = capsys.readouterr() + response = out.splitlines() + + assert_equal(response[0], "PH_Jahresmelder_Landwirtschaft_Kulturpflanze_Dauergruenland_akt.txt") + assert_equal(response[-1], "PH_Jahresmelder_Wildwachsende_Pflanze_Zweigriffliger_Weissdorn_akt.txt") + + +def test_cli_list_filenames_annual_historical(capsys): + """ + CLI test: Verify the `list-filenames` subcommand works. + """ + run_command("phenodata list-filenames --source=dwd --dataset=annual --partition=historical") + + out, err = capsys.readouterr() + response = out.splitlines() + + assert_equal(response[0], "PH_Jahresmelder_Landwirtschaft_Kulturpflanze_Dauergruenland_1936_2021_hist.txt") + assert_equal(response[-1], "PH_Jahresmelder_Wildwachsende_Pflanze_Zweigriffliger_Weissdorn_1936_2021_hist.txt") + + +def test_cli_list_urls_immediate_recent(capsys): + """ + CLI test: Verify the `list-urls` subcommand works. + """ + run_command("phenodata list-urls --source=dwd --dataset=immediate --partition=recent") + + out, err = capsys.readouterr() + response = out.splitlines() + + assert response[0].startswith("ftp://opendata.dwd.de/climate_environment/CDC/observations_germany/phenology/immediate_reporters/crops/recent") diff --git a/tests/test_observations.py b/tests/test_observations.py new file mode 100644 index 0000000..e861352 --- /dev/null +++ b/tests/test_observations.py @@ -0,0 +1,245 @@ +import json + +import marko +from datadiff.tools import assert_equal + +from tests.util import run_command + + + + +def test_cli_observations_immediate_recent_filter_station_id(capsys): + """ + CLI test: Verify the `observations` subcommand works. + """ + run_command("phenodata observations --source=dwd --dataset=immediate --partition=recent --filename=Hasel --station-id=19475 --humanize --show-ids --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Jahr": 2021, + "Datum": "2021-02-11", + "Tag": 42, + "Spezies": "common hazel [113]", + "Phase": "beginning of flowering [5]", + "Station": "Angermünde (Ph), Brandenburg [19475]", + "QS-Level": "ROUTKLI validated [7]", + "QS-Byte": "Feldwert nicht beanstandet [1]" + } + assert_equal(response[0], first) + + +def test_cli_observations_immediate_recent_filter_station_name(capsys): + """ + CLI test: Verify the `observations` subcommand works. + """ + run_command("phenodata observations --source=dwd --dataset=annual --partition=recent --filename=Hasel --station=berlin,brandenburg --humanize --sort=Datum --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Jahr": 2021, + "Datum": "2021-02-01", + "Tag": 32, + "Spezies": "common hazel", + "Phase": "beginning of flowering", + "Station": "Wall, Brandenburg", + "QS-Level": "ROUTKLI validated and corrected", + "QS-Byte": "Feldwert nicht beanstandet" + } + assert_equal(response[0], first) + + +def test_cli_observations_immediate_historical(capsys): + """ + CLI test: Verify the `observations` subcommand works. + """ + run_command("phenodata observations --source=dwd --dataset=immediate --partition=historical --filename=Hasel --station=berlin,brandenburg --humanize --sort=Datum --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Jahr": 2007, + "Datum": "2007-01-12", + "Tag": 12, + "Spezies": "common hazel", + "Phase": "beginning of flowering", + "Station": "Prenzlau, Brandenburg", + "QS-Level": "ROUTKLI validated", + "QS-Byte": "Feldwert nicht beanstandet" + } + assert_equal(response[0], first) + + +def test_cli_observations_annual_recent(capsys): + """ + CLI test: Verify the `observations` subcommand works. + """ + run_command("phenodata observations --source=dwd --dataset=annual --partition=recent --filename=Hasel --station=berlin,brandenburg --humanize --sort=Datum --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Jahr": 2021, + "Datum": "2021-02-01", + "Tag": 32, + "Spezies": "common hazel", + "Phase": "beginning of flowering", + "Station": "Wall, Brandenburg", + "QS-Level": "ROUTKLI validated and corrected", + "QS-Byte": "Feldwert nicht beanstandet" + } + assert_equal(response[0], first) + + +def test_cli_observations_annual_historical(capsys): + """ + CLI test: Verify the `observations` subcommand works. + """ + run_command("phenodata observations --source=dwd --dataset=annual --partition=historical --filename=Hasel --station=berlin,brandenburg --humanize --sort=Datum --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Jahr": 1936, + "Datum": "1936-03-10", + "Tag": 70, + "Spezies": "common hazel", + "Phase": "beginning of flowering", + "Station": "Berlin-Dahlem, Berlin", + "QS-Level": "Load time checks", + "QS-Byte": "Feldwert nicht beanstandet" + } + assert_equal(response[0], first) + + +def test_cli_observations_filter_year(capsys): + """ + CLI test: Verify the `observations` subcommand works, with filtering by year. + """ + run_command("phenodata observations --source=dwd --dataset=immediate --partition=recent --filename=Hasel --station-id=7521,7532 --year=2020,2021 --humanize --show-ids --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Jahr": 2021, + "Datum": "2021-02-24", + "Tag": 55, + "Spezies": "common hazel [113]", + "Phase": "beginning of flowering [5]", + "Station": "Norder-Hever-Koog, Schleswig-Holstein [7532]", + "QS-Level": "ROUTKLI validated [7]", + "QS-Byte": "Feldwert nicht beanstandet [1]" + } + assert_equal(response[0], first) + + +def test_cli_observations_filter_species_id(capsys): + """ + CLI test: Verify the `observations` subcommand works, with filtering by species-id. + """ + run_command("phenodata observations --source=dwd --dataset=immediate --partition=recent --filename=Hasel --station-id=7521,7532 --species-id=113,127 --humanize --show-ids --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Jahr": 2021, + "Datum": "2021-02-24", + "Tag": 55, + "Spezies": "common hazel [113]", + "Phase": "beginning of flowering [5]", + "Station": "Norder-Hever-Koog, Schleswig-Holstein [7532]", + "QS-Level": "ROUTKLI validated [7]", + "QS-Byte": "Feldwert nicht beanstandet [1]" + } + assert_equal(response[0], first) + + +def test_cli_observations_filter_invalid_readings(capsys): + """ + CLI test: Verify the `observations` subcommand works, with filtering by quality-byte. + """ + run_command("phenodata observations --source=dwd --dataset=immediate --partition=recent --filename=Hasel --quality-byte=5,6,7,8 --humanize --show-ids --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Jahr": 2021, + "Datum": "2021-02-20", + "Tag": 51, + "Spezies": "common hazel [113]", + "Phase": "beginning of flowering [5]", + "Station": "Kirchdorf b. Sulingen, Niedersachsen [7857]", + "QS-Level": "ROUTKLI validated [7]", + "QS-Byte": "Feldwert zweifelhaft [5]" + } + assert_equal(response[0], first) + + +def test_cli_observations_filter_sql(capsys): + """ + CLI test: Verify the `observations` subcommand works, with filtering by quality-byte. + """ + run_command(""" + phenodata observations \ + --source=dwd --dataset=annual --partition=recent \ + --filename=Hasel \ + --year=2022 \ + --species-preset=mellifera-de-primary --phase="beginning of flowering" \ + --humanize --language=german \ + --sql="SELECT * FROM data WHERE Station LIKE '%Berlin%' ORDER BY Datum" \ + --format=json + """) + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Jahr": 2022, + "Datum": "2022-01-11", + "Tag": 11, + "Spezies": "Hasel", + "Phase": "Blüte Beginn", + "Station": "Berlin-Marienfelde, Berlin", + "QS-Level": "ROUTKLI geprüft und korrigiert", + "QS-Byte": "Feldwert nicht beanstandet" + } + assert_equal(response[0], first) + + +def test_cli_observations_format_csv(capsys): + """ + CLI test: Verify the `observations` subcommand works with CSV output. + """ + run_command("phenodata observations --source=dwd --dataset=immediate --partition=recent --filename=Hasel --station-id=19475 --humanize --show-ids --format=csv") + + out, err = capsys.readouterr() + assert out.startswith(""" +Jahr,Datum,Tag,Spezies,Phase,Station,QS-Level,QS-Byte +2021,2021-02-11,42,common hazel [113],beginning of flowering [5],"Angermünde (Ph), Brandenburg [19475]",ROUTKLI validated [7],Feldwert nicht beanstandet [1] +2022,2022-01-28,28,common hazel [113],beginning of flowering [5],"Angermünde (Ph), Brandenburg [19475]",Load time checks [1],Feldwert nicht beanstandet [1] + """.strip()) + + +def test_cli_observations_format_tabular(capsys): + """ + CLI test: Verify the `observations` subcommand works with tabular output. + + `tabular:pipe` actually yields a Markdown table, so let's validate it using a Markdown parser. + """ + run_command("phenodata observations --source=dwd --dataset=immediate --partition=recent --filename=Hasel --station-id=19475 --humanize --show-ids --format=tabular:pipe") + + out, err = capsys.readouterr() + + html = marko.convert(out) + + assert html.startswith("

| Jahr | Datum | Tag | Spezies") + assert html.endswith("| Feldwert nicht beanstandet [1] |

\n") diff --git a/tests/test_stations.py b/tests/test_stations.py new file mode 100644 index 0000000..738cc7c --- /dev/null +++ b/tests/test_stations.py @@ -0,0 +1,95 @@ +import json + +from datadiff.tools import assert_equal + +from tests.util import run_command + + +def test_cli_stations(capsys): + """ + CLI test: Verify the `list-stations` subcommand works. + """ + run_command("phenodata list-stations --source=dwd --dataset=immediate --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Stations_id": 662, + "Stationsname": "Braunschweig", + "geograph.Breite": 52.2915, + "geograph.Laenge": 10.4464, + "Stationshoehe": 81, + "Naturraumgruppe_Code": 62, + "Naturraumgruppe": "Weser-Aller-Flachland", + "Naturraum_Code": 6230, + "Naturraum": "Burgdorf-Peiner Geestplatten", + "Datum Stationsaufloesung": None, + "Bundesland": "Niedersachsen" + } + assert_equal(response[0], first) + + +nearest_station = { + "Stations_id": 12365, + "Stationsname": "Wansdorf", + "Distanz": 25167.5671969595, + "geograph.Breite": 52.65, + "geograph.Laenge": 13.1, + "Stationshoehe": 35, + "Naturraumgruppe_Code": 78, + "Naturraumgruppe": "Luchland", + "Naturraum_Code": 7820, + "Naturraum": "Bellin und Glin", + "Datum Stationsaufloesung": None, + "Bundesland": "Brandenburg" +} + + +def test_cli_nearest_stations(capsys): + """ + CLI test: Verify the `nearest-stations` subcommand works. + """ + run_command("phenodata nearest-stations --source=dwd --dataset=immediate --latitude=52.520007 --longitude=13.404954 --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + assert_equal(response[0], nearest_station) + + +def test_cli_nearest_station(capsys): + """ + CLI test: Verify the `nearest-station` subcommand works. + """ + run_command("phenodata nearest-station --source=dwd --dataset=immediate --latitude=52.520007 --longitude=13.404954 --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + assert_equal(response[0], nearest_station) + + +def test_cli_stations_filter_string(capsys): + """ + CLI test: Verify the `list-stations` subcommand works, with filtering by string. + """ + run_command("phenodata list-stations --source=dwd --dataset=annual --filter='Fränkische Alb' --format=json") + + out, err = capsys.readouterr() + response = json.loads(out) + + first = { + "Stations_id": 2895, + "Stationsname": "Lauterhofen-Trautmannshofen", + "geograph.Breite": 49.3442, + "geograph.Laenge": 11.5664, + "Stationshoehe": 585, + "Naturraumgruppe_Code": 8, + "Naturraumgruppe": "Fränkische Alb (Frankenalb)", + "Naturraum_Code": 810, + "Naturraum": "Mittlere Frankenalb", + "Datum Stationsaufloesung": None, + "Bundesland": "Bayern" + } + assert_equal(response[0], first) diff --git a/tests/util.py b/tests/util.py new file mode 100644 index 0000000..5542528 --- /dev/null +++ b/tests/util.py @@ -0,0 +1,9 @@ +import shlex +import sys + +from phenodata.command import run + + +def run_command(command: str): + sys.argv = shlex.split(command.strip()) + run()