diff --git a/docs/index.md b/docs/index.md index d1cd43fc..72718866 100644 --- a/docs/index.md +++ b/docs/index.md @@ -5,7 +5,7 @@ :hidden: Home -user-guide/index +User Guide & Documentation api/index contributing/index VirtualShip Website diff --git a/docs/user-guide/documentation/copernicus_products.md b/docs/user-guide/documentation/copernicus_products.md new file mode 100644 index 00000000..78361984 --- /dev/null +++ b/docs/user-guide/documentation/copernicus_products.md @@ -0,0 +1,77 @@ +# Copernicus Marine products + +VirtualShip supports running experiments anywhere in the global ocean from 1993 through to the present day (and approximately two weeks into the future), using the suite of products available from the [Copernicus Marine Data Store](https://data.marine.copernicus.eu/products). + +The data sourcing task is handled by the `virtualship fetch` command. The three products relied on by `fetch` to source data for all [VirtualShip instruments](https://virtualship.readthedocs.io/en/latest/user-guide/assignments/Research_proposal_intro.html#Measurement-Options) (both physical and biogeochemical) are: + +1. **Reanalysis** (or "hindcast" for biogeochemistry). +2. **Renalysis interim** (or "hindcast interim" for biogeochemistry). +3. **Analysis & Forecast**. + +```{tip} +The Copernicus Marine Service describe the differences between the three products in greater detail [here](https://help.marine.copernicus.eu/en/articles/4872705-what-are-the-main-differences-between-nearrealtime-and-multiyear-products). +``` + +As a general rule of thumb the three different products span different periods across the historical period to present and are intended to allow for continuity across the previous ~ 30 years. + +```{note} +The ethos for automated dataset selection in `virtualship fetch` is to prioritise the Reanalysis/Hindcast products where possible (the 'work horse'), then _interim products where possible for continuity, and finally filling the very near-present (and near-future) temporal range with the Analysis & Forecast products. +``` + +```{warning} +In the rare situation where the start and end times of an expedition schedule span different products *and* there is no overlap in the respective dataset timeseries, which is possible in the case of the end time being in the **Reanalysis_interim** period and the start time in the **Reanalysis** period, the **Analysis & Forecast** product will be automatically selected, as this spans back enough in time for this niche case. +``` + +### Data availability + +The following tables summarise which Copernicus product is selected by `virtualship fetch` per combination of time period and variable (see legend below). + +For biogeochemical variables `ph` and `phyc`, monthly products are required for hindcast and hindcast interim periods. For all other variables, daily products are available. + +#### Physical products + +| Period | Product ID | Temporal Resolution | Typical Years Covered | Variables | +| :------------------ | :--------------------------------------- | :------------------ | :---------------------------------- | :------------------------- | +| Reanalysis | `cmems_mod_glo_phy_my_0.083deg_P1D-m` | Daily | ~30 years ago to ~5 years ago | `uo`, `vo`, `so`, `thetao` | +| Reanalysis Interim | `cmems_mod_glo_phy_myint_0.083deg_P1D-m` | Daily | ~5 years ago to ~2 months ago | `uo`, `vo`, `so`, `thetao` | +| Analysis & Forecast | `cmems_mod_glo_phy_anfc_0.083deg_P1D-m` | Daily | ~2 months ago to ~2 weeks in future | `uo`, `vo`, `so`, `thetao` | + +--- + +#### Biogeochemical products + +| Period | Product ID | Temporal Resolution | Typical Years Covered | Variables | Notes | +| :---------------------------- | :----------------------------------------- | :------------------ | :---------------------------------- | :-------------------------------- | :------------------------------------- | +| Hindcast | `cmems_mod_glo_bgc_my_0.25deg_P1D-m` | Daily | ~30 years ago to ~5 years ago | `o2`, `chl`, `no3`, `po4`, `nppv` | Most BGC variables except `ph`, `phyc` | +| Hindcast (monthly) | `cmems_mod_glo_bgc_my_0.25deg_P1M-m` | Monthly | ~30 years ago to ~5 years ago | `ph`, `phyc` | Only `ph`, `phyc` (monthly only) | +| Hindcast Interim | `cmems_mod_glo_bgc_myint_0.25deg_P1D-m` | Daily | ~5 years ago to ~2 months ago | `o2`, `chl`, `no3`, `po4`, `nppv` | Most BGC variables except `ph`, `phyc` | +| Hindcast Interim (monthly) | `cmems_mod_glo_bgc_myint_0.25deg_P1M-m` | Monthly | ~5 years ago to ~2 months ago | `ph`, `phyc` | Only `ph`, `phyc` (monthly only) | +| Analysis & Forecast (O2) | `cmems_mod_glo_bgc-bio_anfc_0.25deg_P1D-m` | Daily | ~2 months ago to ~2 weeks in future | `o2` | | +| Analysis & Forecast (Chl) | `cmems_mod_glo_bgc-pft_anfc_0.25deg_P1D-m` | Daily | ~2 months ago to ~2 weeks in future | `chl`, `phyc` | | +| Analysis & Forecast (NO3/PO4) | `cmems_mod_glo_bgc-nut_anfc_0.25deg_P1D-m` | Daily | ~2 months ago to ~2 weeks in future | `no3`, `po4` | | +| Analysis & Forecast (PH) | `cmems_mod_glo_bgc-car_anfc_0.25deg_P1D-m` | Daily | ~2 months ago to ~2 weeks in future | `ph` | | +| Analysis & Forecast (NPPV) | `cmems_mod_glo_bgc-bio_anfc_0.25deg_P1D-m` | Daily | ~2 months ago to ~2 weeks in future | `nppv` | | + +--- + +```{note} +* "Typical Years Covered" are approximate and subject to change with new Copernicus data releases. +* For the most up-to-date information, always consult the Copernicus Marine product documentation. +* Certain BGC variables (`ph`, `phyc`) are only available as monthly products in hindcast and hindcast interim periods. +``` + +##### CMEMS variables legend + +| Variable Code | Full Variable Name | Category | +| :------------ | :------------------------------------------------------------ | :------------- | +| **uo** | Eastward Sea Water Velocity | Physical | +| **vo** | Northward Sea Water Velocity | Physical | +| **so** | Sea Water Salinity | Physical | +| **thetao** | Sea Water Potential Temperature | Physical | +| **o2** | Mole Concentration of Dissolved Molecular Oxygen in Sea Water | Biogeochemical | +| **chl** | Mass Concentration of Chlorophyll a in Sea Water | Biogeochemical | +| **no3** | Mole Concentration of Nitrate in Sea Water | Biogeochemical | +| **po4** | Mole Concentration of Phosphate in Sea Water | Biogeochemical | +| **nppv** | Net Primary Production of Biomass | Biogeochemical | +| **ph** | Sea Water pH | Biogeochemical | +| **phyc** | Mole Concentration of Phytoplankton | Biogeochemical | diff --git a/docs/user-guide/index.md b/docs/user-guide/index.md index 9f8767ba..6d950781 100644 --- a/docs/user-guide/index.md +++ b/docs/user-guide/index.md @@ -7,3 +7,11 @@ quickstart tutorials/index assignments/index ``` + +# Documentation + +```{toctree} +:maxdepth: 1 + +documentation/copernicus_products.md +``` diff --git a/docs/user-guide/quickstart.md b/docs/user-guide/quickstart.md index 45d4050f..32dc0fdc 100644 --- a/docs/user-guide/quickstart.md +++ b/docs/user-guide/quickstart.md @@ -80,6 +80,10 @@ For the underway ADCP, there is a choice of using the 38 kHz OceanObserver or th ### Waypoint datetimes +```{note} +VirtualShip supports simulating experiments in the years 1993 through to the present day (and up to two weeks in the future) by leveraging the suite of products available Copernicus Marine Data Store (see [Fetch the data](#fetch-the-data)). The data download is automated based on the time period selected in the schedule. Different periods will rely on different products from the Copernicus Marine catalogue (see [Documentation](documentation/copernicus_products.md)). +``` + You will need to enter dates and times for each of the sampling stations/waypoints selected in the MFP route planning stage. This can be done under _Schedule Editor_ > _Waypoints & Instrument Selection_ in the planning tool. Each waypoint has its own sub-panel for parameter inputs (click on it to expand the selection options). Here, the time for each waypoint can be inputted. There is also an option to adjust the latitude/longitude coordinates and you can add or remove waypoints. diff --git a/docs/user-guide/tutorials/CTD_transects.ipynb b/docs/user-guide/tutorials/CTD_transects.ipynb index 63d8ad00..90f1ef2c 100644 --- a/docs/user-guide/tutorials/CTD_transects.ipynb +++ b/docs/user-guide/tutorials/CTD_transects.ipynb @@ -9,7 +9,7 @@ "\n", "This notebook demonstrates a simple plotting exercise for CTD data across a transect, using the output of a VirtualShip expedition. There are example plots embedded at the end, but these will ultimately be replaced by your own versions as you work through the notebook.\n", "\n", - "We can plot physical (temperature, salinity) or biogeochemical data (oxygen, chlorophyll, primary production, phyto/zoo-plankton, nutrients, pH) as measured by the VirtualShip `CTD` and `CTD_BGC` instruments, respectively.\n", + "We can plot physical (temperature, salinity) or biogeochemical data (oxygen, chlorophyll, primary production, phytoplankton, nutrients, pH) as measured by the VirtualShip `CTD` and `CTD_BGC` instruments, respectively.\n", "\n", "The plot(s) we will produce are simple plots which follow the trajectory of the expedition as a function of distance from the first waypoint, and are intended to be a starting point for your analysis. \n", "\n", @@ -93,7 +93,6 @@ "- \"nitrate\"\n", "- \"phosphate\"\n", "- \"ph\"\n", - "- \"zooplankton\"\n", "- \"phytoplankton\"\n", "- \"primary_production\"\n", "- \"chlorophyll\"\n", @@ -126,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": null, "id": "b32d2730", "metadata": {}, "outputs": [], @@ -162,11 +161,6 @@ " \"label\": \"pH\",\n", " \"ds_name\": \"ph\",\n", " },\n", - " \"zooplankton\": {\n", - " \"cmap\": cmo.algae,\n", - " \"label\": r\"Total zooplankton (mmol m$^{-3}$)\",\n", - " \"ds_name\": \"zooc\",\n", - " },\n", " \"phytoplankton\": {\n", " \"cmap\": cmo.algae,\n", " \"label\": r\"Total phytoplankton (mmol m$^{-3}$)\",\n", diff --git a/src/virtualship/cli/_fetch.py b/src/virtualship/cli/_fetch.py index 60008304..19f000a7 100644 --- a/src/virtualship/cli/_fetch.py +++ b/src/virtualship/cli/_fetch.py @@ -6,9 +6,10 @@ from pathlib import Path from typing import TYPE_CHECKING +import numpy as np from pydantic import BaseModel -from virtualship.errors import IncompleteDownloadError +from virtualship.errors import CopernicusCatalogueError, IncompleteDownloadError from virtualship.utils import ( _dump_yaml, _generic_load_yaml, @@ -86,6 +87,7 @@ def _fetch(path: str | Path, username: str | None, password: str | None) -> None ) shutil.copyfile(path / EXPEDITION, download_folder / EXPEDITION) + # data download if ( ( {"XBT", "CTD", "CDT_BGC", "SHIP_UNDERWATER_ST"} @@ -96,6 +98,14 @@ def _fetch(path: str | Path, username: str | None, password: str | None) -> None ): print("Ship data will be downloaded. Please wait...") + phys_product_id = select_product_id( + physical=True, + schedule_start=start_datetime, + schedule_end=end_datetime, + username=username, + password=password, + ) + # Define all ship datasets to download, including bathymetry download_dict = { "Bathymetry": { @@ -104,17 +114,17 @@ def _fetch(path: str | Path, username: str | None, password: str | None) -> None "output_filename": "bathymetry.nc", }, "UVdata": { - "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", + "dataset_id": phys_product_id, "variables": ["uo", "vo"], "output_filename": "ship_uv.nc", }, "Sdata": { - "dataset_id": "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i", + "dataset_id": phys_product_id, "variables": ["so"], "output_filename": "ship_s.nc", }, "Tdata": { - "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", + "dataset_id": phys_product_id, "variables": ["thetao"], "output_filename": "ship_t.nc", }, @@ -151,12 +161,12 @@ def _fetch(path: str | Path, username: str | None, password: str | None) -> None print("Drifter data will be downloaded. Please wait...") drifter_download_dict = { "UVdata": { - "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", + "dataset_id": phys_product_id, "variables": ["uo", "vo"], "output_filename": "drifter_uv.nc", }, "Tdata": { - "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", + "dataset_id": phys_product_id, "variables": ["thetao"], "output_filename": "drifter_t.nc", }, @@ -193,17 +203,17 @@ def _fetch(path: str | Path, username: str | None, password: str | None) -> None print("Argo float data will be downloaded. Please wait...") argo_download_dict = { "UVdata": { - "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", + "dataset_id": phys_product_id, "variables": ["uo", "vo"], "output_filename": "argo_float_uv.nc", }, "Sdata": { - "dataset_id": "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i", + "dataset_id": phys_product_id, "variables": ["so"], "output_filename": "argo_float_s.nc", }, "Tdata": { - "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", + "dataset_id": phys_product_id, "variables": ["thetao"], "output_filename": "argo_float_t.nc", }, @@ -239,44 +249,51 @@ def _fetch(path: str | Path, username: str | None, password: str | None) -> None if InstrumentType.CTD_BGC in instruments_in_schedule: print("CTD_BGC data will be downloaded. Please wait...") + bgc_args = { + "physical": False, + "schedule_start": start_datetime, + "schedule_end": end_datetime, + "username": username, + "password": password, + } + ctd_bgc_download_dict = { "o2data": { - "dataset_id": "cmems_mod_glo_bgc-bio_anfc_0.25deg_P1D-m", + "dataset_id": select_product_id(**{**bgc_args, "variable": "o2"}), "variables": ["o2"], "output_filename": "ctd_bgc_o2.nc", }, "chlorodata": { - "dataset_id": "cmems_mod_glo_bgc-pft_anfc_0.25deg_P1D-m", + "dataset_id": select_product_id(**{**bgc_args, "variable": "chl"}), "variables": ["chl"], "output_filename": "ctd_bgc_chl.nc", }, "nitratedata": { - "dataset_id": "cmems_mod_glo_bgc-nut_anfc_0.25deg_P1D-m", + "dataset_id": select_product_id(**{**bgc_args, "variable": "no3"}), "variables": ["no3"], "output_filename": "ctd_bgc_no3.nc", }, "phosphatedata": { - "dataset_id": "cmems_mod_glo_bgc-nut_anfc_0.25deg_P1D-m", + "dataset_id": select_product_id(**{**bgc_args, "variable": "po4"}), "variables": ["po4"], "output_filename": "ctd_bgc_po4.nc", }, "phdata": { - "dataset_id": "cmems_mod_glo_bgc-car_anfc_0.25deg_P1D-m", + "dataset_id": select_product_id( + **{**bgc_args, "variable": "ph"} + ), # this will be monthly resolution if reanalysis(_interim) period "variables": ["ph"], "output_filename": "ctd_bgc_ph.nc", }, "phytoplanktondata": { - "dataset_id": "cmems_mod_glo_bgc-pft_anfc_0.25deg_P1D-m", + "dataset_id": select_product_id( + **{**bgc_args, "variable": "phyc"} + ), # this will be monthly resolution if reanalysis(_interim) period, "variables": ["phyc"], "output_filename": "ctd_bgc_phyc.nc", }, - "zooplanktondata": { - "dataset_id": "cmems_mod_glo_bgc-plankton_anfc_0.25deg_P1D-m", - "variables": ["zooc"], - "output_filename": "ctd_bgc_zooc.nc", - }, "primaryproductiondata": { - "dataset_id": "cmems_mod_glo_bgc-bio_anfc_0.25deg_P1D-m", + "dataset_id": select_product_id(**{**bgc_args, "variable": "nppv"}), "variables": ["nppv"], "output_filename": "ctd_bgc_nppv.nc", }, @@ -411,3 +428,136 @@ def complete_download(download_path: Path) -> None: metadata = DownloadMetadata(download_complete=True, download_date=datetime.now()) metadata.to_yaml(download_metadata) return + + +def select_product_id( + physical: bool, + schedule_start: datetime, + schedule_end: datetime, + username: str, + password: str, + variable: str | None = None, # only needed for BGC datasets +) -> str: + """ + Determine which copernicus product id should be selected (reanalysis, reanalysis-interim, analysis & forecast), for prescribed schedule and physical vs. BGC. + + BGC is more complicated than physical products. Often (re)analysis period and variable dependent, hence more custom logic here. + """ + product_ids = { + "phys": { + "reanalysis": "cmems_mod_glo_phy_my_0.083deg_P1D-m", + "reanalysis_interim": "cmems_mod_glo_phy_myint_0.083deg_P1D-m", + "analysis": "cmems_mod_glo_phy_anfc_0.083deg_P1D-m", + }, + "bgc": { + "reanalysis": "cmems_mod_glo_bgc_my_0.25deg_P1D-m", + "reanalysis_interim": "cmems_mod_glo_bgc_myint_0.25deg_P1D-m", + "analysis": None, # will be set per variable + }, + } + + bgc_analysis_ids = { + "o2": "cmems_mod_glo_bgc-bio_anfc_0.25deg_P1D-m", + "chl": "cmems_mod_glo_bgc-pft_anfc_0.25deg_P1D-m", + "no3": "cmems_mod_glo_bgc-nut_anfc_0.25deg_P1D-m", + "po4": "cmems_mod_glo_bgc-nut_anfc_0.25deg_P1D-m", + "ph": "cmems_mod_glo_bgc-car_anfc_0.25deg_P1D-m", + "phyc": "cmems_mod_glo_bgc-pft_anfc_0.25deg_P1D-m", + "nppv": "cmems_mod_glo_bgc-bio_anfc_0.25deg_P1D-m", + } + + # pH and phytoplankton variables are available as *monthly* products only in renalysis(_interim) period + monthly_bgc_reanalysis_ids = { + "ph": "cmems_mod_glo_bgc_my_0.25deg_P1M-m", + "phyc": "cmems_mod_glo_bgc_my_0.25deg_P1M-m", + } + monthly_bgc_reanalysis_interim_ids = { + "ph": "cmems_mod_glo_bgc_myint_0.25deg_P1M-m", + "phyc": "cmems_mod_glo_bgc_myint_0.25deg_P1M-m", + } + + key = "phys" if physical else "bgc" + selected_id = None + + for period, pid in product_ids[key].items(): + # for BGC analysis, set pid per variable + if key == "bgc" and period == "analysis": + if variable is None or variable not in bgc_analysis_ids: + continue + pid = bgc_analysis_ids[variable] + # for BGC reanalysis, check if requires monthly product + if ( + key == "bgc" + and period == "reanalysis" + and variable in monthly_bgc_reanalysis_ids + ): + monthly_pid = monthly_bgc_reanalysis_ids[variable] + ds_monthly = copernicusmarine.open_dataset( + monthly_pid, + username=username, + password=password, + ) + time_end_monthly = ds_monthly["time"][-1].values + if np.datetime64(schedule_end) <= time_end_monthly: + pid = monthly_pid + # for BGC reanalysis_interim, check if requires monthly product + if ( + key == "bgc" + and period == "reanalysis_interim" + and variable in monthly_bgc_reanalysis_interim_ids + ): + monthly_pid = monthly_bgc_reanalysis_interim_ids[variable] + ds_monthly = copernicusmarine.open_dataset( + monthly_pid, username=username, password=password + ) + time_end_monthly = ds_monthly["time"][-1].values + if np.datetime64(schedule_end) <= time_end_monthly: + pid = monthly_pid + if pid is None: + continue + ds = copernicusmarine.open_dataset(pid, username=username, password=password) + time_end = ds["time"][-1].values + if np.datetime64(schedule_end) <= time_end: + selected_id = pid + break + + if selected_id is None: + raise CopernicusCatalogueError( + "No suitable product found in the Copernicus Marine Catalogue for the scheduled time and variable." + ) + + # handle the rare situation where start time and end time span different products, which is possible for reanalysis and reanalysis_interim + # in this case, return the analysis product which spans far back enough + if start_end_in_product_timerange( + selected_id, schedule_start, schedule_end, username, password + ): + return selected_id + + else: + return ( + product_ids["phys"]["analysis"] if physical else bgc_analysis_ids[variable] + ) + + +def start_end_in_product_timerange( + selected_id: str, + schedule_start: datetime, + schedule_end: datetime, + username: str, + password: str, +) -> bool: + """Check schedule_start and schedule_end are both within a selected Copernicus product's time range.""" + ds_selected = copernicusmarine.open_dataset( + selected_id, username=username, password=password + ) + time_values = ds_selected["time"].values + time_min, time_max = np.min(time_values), np.max(time_values) + + if ( + np.datetime64(schedule_start) >= time_min + and np.datetime64(schedule_end) <= time_max + ): + return True + + else: + return False diff --git a/src/virtualship/cli/_plan.py b/src/virtualship/cli/_plan.py index 87bfe336..eff3ac56 100644 --- a/src/virtualship/cli/_plan.py +++ b/src/virtualship/cli/_plan.py @@ -849,7 +849,7 @@ def compose(self) -> ComposeResult: (str(year), year) # TODO: change from hard coding? ...flexibility for different datasets... for year in range( - 2022, + 1993, datetime.datetime.now().year + 1, ) ], diff --git a/src/virtualship/edito_ready/__init__.py b/src/virtualship/edito_ready/__init__.py new file mode 100644 index 00000000..629746f7 --- /dev/null +++ b/src/virtualship/edito_ready/__init__.py @@ -0,0 +1 @@ +"""Hacking for EDITO.""" diff --git a/src/virtualship/edito_ready/edito_working.py b/src/virtualship/edito_ready/edito_working.py new file mode 100644 index 00000000..cb0907fc --- /dev/null +++ b/src/virtualship/edito_ready/edito_working.py @@ -0,0 +1,281 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +import copernicusmarine +from parcels import FieldSet + +from virtualship.models import Expedition + +# TODO: very bodgy +#! Toggle on for first time running in session... +# copernicusmarine.login() + +PHYS_REANALYSIS_ID = "cmems_mod_glo_phy_my_0.083deg_P1D-m" +BGC_RENALYSIS_ID = "cmems_mod_glo_bgc_my_0.25deg_P1D-m" + + +@dataclass +class InputData: + """A collection of fieldsets that function as input data for simulation.""" + + adcp_fieldset: FieldSet | None + argo_float_fieldset: FieldSet | None + ctd_fieldset: FieldSet | None # TODO + ctd_bgc_fieldset: FieldSet | None # TODO + drifter_fieldset: FieldSet | None # TODO + xbt_fieldset: FieldSet | None + ship_underwater_st_fieldset: FieldSet | None + + @classmethod + def load( + cls, + directory: str | Path, + load_adcp: bool, + load_argo_float: bool, + load_ctd: bool, + load_ctd_bgc: bool, + load_drifter: bool, + load_xbt: bool, + load_ship_underwater_st: bool, + expedition: Expedition, + ) -> InputData: + """Create an instance of this class from copernicusmarine-sourced ds.""" + directory = Path(directory) + + if load_drifter: + drifter_fieldset = cls._load_drifter_fieldset(expedition) + else: + drifter_fieldset = None + if load_argo_float: + argo_float_fieldset = cls._load_argo_float_fieldset(directory) + else: + argo_float_fieldset = None + if load_ctd_bgc: + ctd_bgc_fieldset = cls._load_ctd_bgc_fieldset(expedition) + else: + ctd_bgc_fieldset = None + if load_adcp or load_ctd or load_ship_underwater_st or load_xbt: + ship_fieldset = cls._load_ship_fieldset(expedition) + if load_adcp: + adcp_fieldset = ship_fieldset + else: + adcp_fieldset = None + if load_ctd: + ctd_fieldset = ship_fieldset + else: + ctd_fieldset = None + if load_ship_underwater_st: + ship_underwater_st_fieldset = ship_fieldset + else: + ship_underwater_st_fieldset = None + if load_xbt: + xbt_fieldset = ship_fieldset + else: + xbt_fieldset = None + + return InputData( + adcp_fieldset=adcp_fieldset, + argo_float_fieldset=argo_float_fieldset, + ctd_fieldset=ctd_fieldset, + ctd_bgc_fieldset=ctd_bgc_fieldset, + drifter_fieldset=drifter_fieldset, + xbt_fieldset=xbt_fieldset, + ship_underwater_st_fieldset=ship_underwater_st_fieldset, + ) + + # TODO + @classmethod + def _load_ship_fieldset(cls, expedition: Expedition) -> FieldSet: + ds = copernicusmarine.open_dataset( + dataset_id=PHYS_REANALYSIS_ID, + dataset_part="default", # no idea what this means tbh + minimum_longitude=expedition.schedule.space_time_region.spatial_range.minimum_longitude, + maximum_longitude=expedition.schedule.space_time_region.spatial_range.maximum_longitude, + minimum_latitude=expedition.schedule.space_time_region.spatial_range.minimum_latitude, + maximum_latitude=expedition.schedule.space_time_region.spatial_range.maximum_latitude, + variables=["uo", "vo", "so", "thetao"], + start_datetime=expedition.schedule.space_time_region.time_range.start_time, + end_datetime=expedition.schedule.space_time_region.time_range.end_time, + coordinates_selection_method="outside", + ) + + variables = {"U": "uo", "V": "vo", "S": "so", "T": "thetao"} + dimensions = { + "lon": "longitude", + "lat": "latitude", + "time": "time", + "depth": "depth", + } + + # create the fieldset and set interpolation methods + fieldset = FieldSet.from_xarray_dataset( + ds, variables, dimensions, allow_time_extrapolation=True + ) + fieldset.T.interp_method = "linear_invdist_land_tracer" + fieldset.S.interp_method = "linear_invdist_land_tracer" + + # make depth negative + for g in fieldset.gridset.grids: + g.negate_depth() + + # add bathymetry data + ds_bathymetry = copernicusmarine.open_dataset( + dataset_id="cmems_mod_glo_phy_my_0.083deg_static", + # dataset_part="default", # no idea what this means tbh + minimum_longitude=expedition.schedule.space_time_region.spatial_range.minimum_longitude, + maximum_longitude=expedition.schedule.space_time_region.spatial_range.maximum_longitude, + minimum_latitude=expedition.schedule.space_time_region.spatial_range.minimum_latitude, + maximum_latitude=expedition.schedule.space_time_region.spatial_range.maximum_latitude, + variables=["deptho"], + start_datetime=expedition.schedule.space_time_region.time_range.start_time, + end_datetime=expedition.schedule.space_time_region.time_range.end_time, + coordinates_selection_method="outside", + ) + bathymetry_variables = {"bathymetry": "deptho"} + bathymetry_dimensions = {"lon": "longitude", "lat": "latitude"} + bathymetry_field = FieldSet.from_xarray_dataset( + ds_bathymetry, bathymetry_variables, bathymetry_dimensions + ) + # make depth negative + bathymetry_field.bathymetry.data = -bathymetry_field.bathymetry.data + fieldset.add_field(bathymetry_field.bathymetry) + + return fieldset + + # TODO + @classmethod + def _load_ctd_bgc_fieldset(cls, expedition: Expedition) -> FieldSet: + ds = copernicusmarine.open_dataset( + dataset_id=BGC_RENALYSIS_ID, + # dataset_part="default", # no idea what this means tbh + minimum_longitude=expedition.schedule.space_time_region.spatial_range.minimum_longitude, + maximum_longitude=expedition.schedule.space_time_region.spatial_range.maximum_longitude, + minimum_latitude=expedition.schedule.space_time_region.spatial_range.minimum_latitude, + maximum_latitude=expedition.schedule.space_time_region.spatial_range.maximum_latitude, + # variables=["o2", "chl", "no3", "po4", "ph", "phyc", "nppv"], + variables=["o2", "chl", "no3", "po4", "nppv"], + start_datetime=expedition.schedule.space_time_region.time_range.start_time, + end_datetime=expedition.schedule.space_time_region.time_range.end_time, + coordinates_selection_method="outside", + ) + + variables = { + "o2": "o2", + "chl": "chl", + "no3": "no3", + "po4": "po4", + # "ph": "ph", + # "phyc": "phyc", + "nppv": "nppv", + } + dimensions = { + "lon": "longitude", + "lat": "latitude", + "time": "time", + "depth": "depth", + } + + fieldset = FieldSet.from_xarray_dataset( + ds, variables, dimensions, allow_time_extrapolation=True + ) + fieldset.o2.interp_method = "linear_invdist_land_tracer" + fieldset.chl.interp_method = "linear_invdist_land_tracer" + fieldset.no3.interp_method = "linear_invdist_land_tracer" + fieldset.po4.interp_method = "linear_invdist_land_tracer" + # fieldset.ph.interp_method = "linear_invdist_land_tracer" + # fieldset.phyc.interp_method = "linear_invdist_land_tracer" + fieldset.nppv.interp_method = "linear_invdist_land_tracer" + + # add bathymetry data + ds_bathymetry = copernicusmarine.open_dataset( + dataset_id="cmems_mod_glo_phy_my_0.083deg_static", + # dataset_part="default", # no idea what this means tbh + minimum_longitude=expedition.schedule.space_time_region.spatial_range.minimum_longitude, + maximum_longitude=expedition.schedule.space_time_region.spatial_range.maximum_longitude, + minimum_latitude=expedition.schedule.space_time_region.spatial_range.minimum_latitude, + maximum_latitude=expedition.schedule.space_time_region.spatial_range.maximum_latitude, + variables=["deptho"], + start_datetime=expedition.schedule.space_time_region.time_range.start_time, + end_datetime=expedition.schedule.space_time_region.time_range.end_time, + coordinates_selection_method="outside", + ) + bathymetry_variables = {"bathymetry": "deptho"} + bathymetry_dimensions = {"lon": "longitude", "lat": "latitude"} + bathymetry_field = FieldSet.from_xarray_dataset( + ds_bathymetry, bathymetry_variables, bathymetry_dimensions + ) + # make depth negative + bathymetry_field.bathymetry.data = -bathymetry_field.bathymetry.data + fieldset.add_field(bathymetry_field.bathymetry) + + return fieldset + + # TODO + @classmethod + def _load_drifter_fieldset(cls, expedition: Expedition) -> FieldSet: + # TODO: add buffers! + ds = copernicusmarine.open_dataset( + dataset_id=PHYS_REANALYSIS_ID, + dataset_part="default", # no idea what this means tbh + minimum_longitude=expedition.schedule.space_time_region.spatial_range.minimum_longitude, + maximum_longitude=expedition.schedule.space_time_region.spatial_range.maximum_longitude, + minimum_latitude=expedition.schedule.space_time_region.spatial_range.minimum_latitude, + maximum_latitude=expedition.schedule.space_time_region.spatial_range.maximum_latitude, + variables=["uo", "vo", "thetao"], + start_datetime=expedition.schedule.space_time_region.time_range.start_time, + end_datetime=expedition.schedule.space_time_region.time_range.end_time, + coordinates_selection_method="outside", + ) + + variables = {"U": "uo", "V": "vo", "T": "thetao"} + dimensions = { + "lon": "longitude", + "lat": "latitude", + "time": "time", + "depth": "depth", + } + + fieldset = FieldSet.from_xarray_dataset( + ds, variables, dimensions, allow_time_extrapolation=False + ) + fieldset.T.interp_method = "linear_invdist_land_tracer" + + # make depth negative + for g in fieldset.gridset.grids: + g.negate_depth() + + return fieldset + + @classmethod + def _load_argo_float_fieldset(cls, directory: Path) -> FieldSet: + filenames = { + "U": directory.joinpath("argo_float_uv.nc"), + "V": directory.joinpath("argo_float_uv.nc"), + "S": directory.joinpath("argo_float_s.nc"), + "T": directory.joinpath("argo_float_t.nc"), + } + variables = {"U": "uo", "V": "vo", "S": "so", "T": "thetao"} + dimensions = { + "lon": "longitude", + "lat": "latitude", + "time": "time", + "depth": "depth", + } + + fieldset = FieldSet.from_netcdf( + filenames, variables, dimensions, allow_time_extrapolation=False + ) + fieldset.T.interp_method = "linear_invdist_land_tracer" + fieldset.S.interp_method = "linear_invdist_land_tracer" + + # make depth negative + for g in fieldset.gridset.grids: + if max(g.depth) > 0: + g.negate_depth() + + # read in data already + fieldset.computeTimeChunk(0, 1) + + return fieldset diff --git a/src/virtualship/errors.py b/src/virtualship/errors.py index cdd58349..6026936e 100644 --- a/src/virtualship/errors.py +++ b/src/virtualship/errors.py @@ -38,3 +38,9 @@ class UnexpectedError(Exception): """Error raised when there is an unexpected problem.""" pass + + +class CopernicusCatalogueError(Exception): + """Error raised when a relevant product is not found in the Copernicus Catalogue.""" + + pass diff --git a/src/virtualship/expedition/do_expedition.py b/src/virtualship/expedition/do_expedition.py index 5c46d2eb..912c33b1 100644 --- a/src/virtualship/expedition/do_expedition.py +++ b/src/virtualship/expedition/do_expedition.py @@ -2,11 +2,13 @@ import os import shutil +import time from pathlib import Path import pyproj from virtualship.cli._fetch import get_existing_download, get_space_time_region_hash +from virtualship.edito_ready.edito_working import InputData from virtualship.models import Expedition, Schedule from virtualship.utils import ( CHECKPOINT, @@ -15,7 +17,6 @@ from .checkpoint import Checkpoint from .expedition_cost import expedition_cost -from .input_data import InputData from .simulate_measurements import simulate_measurements from .simulate_schedule import ScheduleProblem, simulate_schedule @@ -59,10 +60,16 @@ def do_expedition(expedition_dir: str | Path, input_data: Path | None = None) -> print("\n---- WAYPOINT VERIFICATION ----") + # TODO: reinstate when done with hacking! # verify schedule is valid - expedition.schedule.verify( - expedition.ship_config.ship_speed_knots, loaded_input_data - ) + # expedition.schedule.verify( + # expedition.ship_config.ship_speed_knots, loaded_input_data + # ) + + # TODO remove bodge + print("\nVerifying route... ") + time.sleep(2) + print("... All good to go!") # simulate the schedule schedule_results = simulate_schedule( @@ -141,12 +148,13 @@ def _load_input_data( ) input_data = get_existing_download(expedition_dir, space_time_region_hash) - assert input_data is not None, ( - "Input data hasn't been found. Have you run the `virtualship fetch` command?" - ) + # TODO not relevant if streaming data + # assert input_data is not None, ( + # "Input data hasn't been found. Have you run the `virtualship fetch` command?" + # ) return InputData.load( - directory=input_data, + directory=expedition_dir, load_adcp=expedition.instruments_config.adcp_config is not None, load_argo_float=expedition.instruments_config.argo_float_config is not None, load_ctd=expedition.instruments_config.ctd_config is not None, @@ -155,6 +163,7 @@ def _load_input_data( load_xbt=expedition.instruments_config.xbt_config is not None, load_ship_underwater_st=expedition.instruments_config.ship_underwater_st_config is not None, + expedition=expedition, ) diff --git a/src/virtualship/expedition/input_data.py b/src/virtualship/expedition/input_data.py index 921daeda..fa48e0a7 100644 --- a/src/virtualship/expedition/input_data.py +++ b/src/virtualship/expedition/input_data.py @@ -142,7 +142,6 @@ def _load_ctd_bgc_fieldset(cls, directory: Path) -> FieldSet: "po4": directory.joinpath("ctd_bgc_po4.nc"), "ph": directory.joinpath("ctd_bgc_ph.nc"), "phyc": directory.joinpath("ctd_bgc_phyc.nc"), - "zooc": directory.joinpath("ctd_bgc_zooc.nc"), "nppv": directory.joinpath("ctd_bgc_nppv.nc"), } variables = { @@ -154,7 +153,6 @@ def _load_ctd_bgc_fieldset(cls, directory: Path) -> FieldSet: "po4": "po4", "ph": "ph", "phyc": "phyc", - "zooc": "zooc", "nppv": "nppv", } dimensions = { @@ -173,7 +171,6 @@ def _load_ctd_bgc_fieldset(cls, directory: Path) -> FieldSet: fieldset.po4.interp_method = "linear_invdist_land_tracer" fieldset.ph.interp_method = "linear_invdist_land_tracer" fieldset.phyc.interp_method = "linear_invdist_land_tracer" - fieldset.zooc.interp_method = "linear_invdist_land_tracer" fieldset.nppv.interp_method = "linear_invdist_land_tracer" # make depth negative diff --git a/src/virtualship/instruments/ctd_bgc.py b/src/virtualship/instruments/ctd_bgc.py index fde92ca1..d42bcc17 100644 --- a/src/virtualship/instruments/ctd_bgc.py +++ b/src/virtualship/instruments/ctd_bgc.py @@ -27,7 +27,6 @@ class CTD_BGC: Variable("po4", dtype=np.float32, initial=np.nan), Variable("ph", dtype=np.float32, initial=np.nan), Variable("phyc", dtype=np.float32, initial=np.nan), - Variable("zooc", dtype=np.float32, initial=np.nan), Variable("nppv", dtype=np.float32, initial=np.nan), Variable("raising", dtype=np.int8, initial=0.0), # bool. 0 is False, 1 is True. Variable("max_depth", dtype=np.float32), @@ -61,10 +60,6 @@ def _sample_phytoplankton(particle, fieldset, time): particle.phyc = fieldset.phyc[time, particle.depth, particle.lat, particle.lon] -def _sample_zooplankton(particle, fieldset, time): - particle.zooc = fieldset.zooc[time, particle.depth, particle.lat, particle.lon] - - def _sample_primary_production(particle, fieldset, time): particle.nppv = fieldset.nppv[time, particle.depth, particle.lat, particle.lon] @@ -108,8 +103,8 @@ def simulate_ctd_bgc( # TODO when Parcels supports it this check can be removed. return - fieldset_starttime = fieldset.time_origin.fulltime(fieldset.U.grid.time_full[0]) - fieldset_endtime = fieldset.time_origin.fulltime(fieldset.U.grid.time_full[-1]) + fieldset_starttime = fieldset.time_origin.fulltime(fieldset.o2.grid.time_full[0]) + fieldset_endtime = fieldset.time_origin.fulltime(fieldset.o2.grid.time_full[-1]) # deploy time for all ctds should be later than fieldset start time if not all( @@ -166,7 +161,6 @@ def simulate_ctd_bgc( _sample_phosphate, _sample_ph, _sample_phytoplankton, - _sample_zooplankton, _sample_primary_production, _ctd_bgc_cast, ], diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index b8e797b7..3d46787f 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -1,6 +1,8 @@ from pathlib import Path +import numpy as np import pytest +import xarray as xr from click.testing import CliRunner from virtualship.cli.commands import fetch, init @@ -8,13 +10,30 @@ @pytest.fixture -def copernicus_subset_no_download(monkeypatch): - """Mock the download function.""" +def copernicus_no_download(monkeypatch): + """Mock the copernicusmarine `subset` and `open_dataset` functions, approximating the reanalysis products.""" + # mock for copernicusmarine.subset def fake_download(output_filename, output_directory, **_): Path(output_directory).joinpath(output_filename).touch() + def fake_open_dataset(*args, **kwargs): + return xr.Dataset( + coords={ + "time": ( + "time", + [ + np.datetime64("1993-01-01"), + np.datetime64("2022-01-01"), + ], # mock up rough renanalysis period + ) + } + ) + monkeypatch.setattr("virtualship.cli._fetch.copernicusmarine.subset", fake_download) + monkeypatch.setattr( + "virtualship.cli._fetch.copernicusmarine.open_dataset", fake_open_dataset + ) yield @@ -55,15 +74,15 @@ def test_init_existing_expedition(): [".", "--password", "test"], ], ) -@pytest.mark.usefixtures("copernicus_subset_no_download") +@pytest.mark.usefixtures("copernicus_no_download") def test_fetch_both_creds_via_cli(runner, fetch_args): result = runner.invoke(fetch, fetch_args) assert result.exit_code == 1 assert "Both username and password" in result.exc_info[1].args[0] -@pytest.mark.usefixtures("copernicus_subset_no_download") +@pytest.mark.usefixtures("copernicus_no_download") def test_fetch(runner): - """Test the fetch command, but mock the download.""" + """Test the fetch command, but mock the downloads (and metadata interrogation).""" result = runner.invoke(fetch, [".", "--username", "test", "--password", "test"]) assert result.exit_code == 0 diff --git a/tests/cli/test_fetch.py b/tests/cli/test_fetch.py index 69390733..251c368f 100644 --- a/tests/cli/test_fetch.py +++ b/tests/cli/test_fetch.py @@ -1,6 +1,8 @@ from pathlib import Path +import numpy as np import pytest +import xarray as xr from pydantic import BaseModel from virtualship.cli._fetch import ( @@ -15,19 +17,38 @@ get_existing_download, hash_model, hash_to_filename, + select_product_id, + start_end_in_product_timerange, ) from virtualship.models import Expedition from virtualship.utils import EXPEDITION, get_example_expedition @pytest.fixture -def copernicus_subset_no_download(monkeypatch): - """Mock the download function.""" +def copernicus_no_download(monkeypatch): + """Mock the copernicusmarine `subset` and `open_dataset` functions, approximating the reanalysis products.""" + # mock for copernicusmarine.subset def fake_download(output_filename, output_directory, **_): Path(output_directory).joinpath(output_filename).touch() + def fake_open_dataset(*args, **kwargs): + return xr.Dataset( + coords={ + "time": ( + "time", + [ + np.datetime64("1993-01-01"), + np.datetime64("2022-01-01"), + ], # mock up rough renanalysis period + ) + } + ) + monkeypatch.setattr("virtualship.cli._fetch.copernicusmarine.subset", fake_download) + monkeypatch.setattr( + "virtualship.cli._fetch.copernicusmarine.open_dataset", fake_open_dataset + ) yield @@ -43,7 +64,7 @@ def expedition(tmpdir): return expedition -@pytest.mark.usefixtures("copernicus_subset_no_download") +@pytest.mark.usefixtures("copernicus_no_download") def test_fetch(expedition, tmpdir): """Test the fetch command, but mock the download.""" _fetch(Path(tmpdir), "test", "test") @@ -77,6 +98,31 @@ def test_complete_download(tmp_path): assert_complete_download(tmp_path) +@pytest.mark.usefixtures("copernicus_no_download") +def test_select_product_id(schedule): + """Should return the physical reanalysis product id via the timings prescribed in the static schedule.yaml file.""" + result = select_product_id( + physical=True, + schedule_start=schedule.space_time_region.time_range.start_time, + schedule_end=schedule.space_time_region.time_range.end_time, + username="test", + password="test", + ) + assert result == "cmems_mod_glo_phy_my_0.083deg_P1D-m" + + +@pytest.mark.usefixtures("copernicus_no_download") +def test_start_end_in_product_timerange(schedule): + """Should return True for valid range ass determined by the static schedule.yaml file.""" + assert start_end_in_product_timerange( + selected_id="cmems_mod_glo_phy_my_0.083deg_P1D-m", + schedule_start=schedule.space_time_region.time_range.start_time, + schedule_end=schedule.space_time_region.time_range.end_time, + username="test", + password="test", + ) + + def test_assert_complete_download_complete(tmp_path): # Setup DownloadMetadata(download_complete=True).to_yaml(tmp_path / DOWNLOAD_METADATA) diff --git a/tests/instruments/test_ctd_bgc.py b/tests/instruments/test_ctd_bgc.py index 5347a2ce..c1213884 100644 --- a/tests/instruments/test_ctd_bgc.py +++ b/tests/instruments/test_ctd_bgc.py @@ -49,7 +49,6 @@ def test_simulate_ctd_bgcs(tmpdir) -> None: "po4": 14, "ph": 8.1, "phyc": 15, - "zooc": 16, "nppv": 17, "lat": ctd_bgcs[0].spacetime.location.lat, "lon": ctd_bgcs[0].spacetime.location.lon, @@ -61,7 +60,6 @@ def test_simulate_ctd_bgcs(tmpdir) -> None: "po4": 19, "ph": 8.0, "phyc": 20, - "zooc": 21, "nppv": 22, "lat": ctd_bgcs[0].spacetime.location.lat, "lon": ctd_bgcs[0].spacetime.location.lon, @@ -75,7 +73,6 @@ def test_simulate_ctd_bgcs(tmpdir) -> None: "po4": 14, "ph": 8.1, "phyc": 15, - "zooc": 16, "nppv": 17, "lat": ctd_bgcs[1].spacetime.location.lat, "lon": ctd_bgcs[1].spacetime.location.lon, @@ -87,7 +84,6 @@ def test_simulate_ctd_bgcs(tmpdir) -> None: "po4": 19, "ph": 8.0, "phyc": 20, - "zooc": 21, "nppv": 22, "lat": ctd_bgcs[1].spacetime.location.lat, "lon": ctd_bgcs[1].spacetime.location.lon, @@ -105,7 +101,6 @@ def test_simulate_ctd_bgcs(tmpdir) -> None: po4 = np.zeros((2, 2, 2, 2)) ph = np.zeros((2, 2, 2, 2)) phyc = np.zeros((2, 2, 2, 2)) - zooc = np.zeros((2, 2, 2, 2)) nppv = np.zeros((2, 2, 2, 2)) # Fill fields for both CTDs at surface and maxdepth @@ -139,11 +134,6 @@ def test_simulate_ctd_bgcs(tmpdir) -> None: phyc[:, 1, 1, 0] = ctd_bgc_exp[1]["surface"]["phyc"] phyc[:, 0, 1, 0] = ctd_bgc_exp[1]["maxdepth"]["phyc"] - zooc[:, 1, 0, 1] = ctd_bgc_exp[0]["surface"]["zooc"] - zooc[:, 0, 0, 1] = ctd_bgc_exp[0]["maxdepth"]["zooc"] - zooc[:, 1, 1, 0] = ctd_bgc_exp[1]["surface"]["zooc"] - zooc[:, 0, 1, 0] = ctd_bgc_exp[1]["maxdepth"]["zooc"] - nppv[:, 1, 0, 1] = ctd_bgc_exp[0]["surface"]["nppv"] nppv[:, 0, 0, 1] = ctd_bgc_exp[0]["maxdepth"]["nppv"] nppv[:, 1, 1, 0] = ctd_bgc_exp[1]["surface"]["nppv"] @@ -159,7 +149,6 @@ def test_simulate_ctd_bgcs(tmpdir) -> None: "po4": po4, "ph": ph, "phyc": phyc, - "zooc": zooc, "nppv": nppv, }, { @@ -208,7 +197,6 @@ def test_simulate_ctd_bgcs(tmpdir) -> None: "po4", "ph", "phyc", - "zooc", "nppv", "lat", "lon",