diff --git a/src/virtualship/cli/_fetch.py b/src/virtualship/cli/_fetch.py index 4eb0d2d3..340edfd9 100644 --- a/src/virtualship/cli/_fetch.py +++ b/src/virtualship/cli/_fetch.py @@ -1,20 +1,242 @@ from __future__ import annotations import hashlib -from datetime import datetime +import shutil +from datetime import datetime, timedelta from pathlib import Path from typing import TYPE_CHECKING from pydantic import BaseModel -from virtualship.utils import _dump_yaml, _generic_load_yaml +from virtualship.utils import ( + _dump_yaml, + _generic_load_yaml, + _get_schedule, + _get_ship_config, +) if TYPE_CHECKING: from virtualship.expedition.space_time_region import SpaceTimeRegion +import click +import copernicusmarine +from copernicusmarine.core_functions.credentials_utils import InvalidUsernameOrPassword + +import virtualship.cli._creds as creds +from virtualship.utils import SCHEDULE + DOWNLOAD_METADATA = "download_metadata.yaml" +def _fetch(path: str | Path, username: str | None, password: str | None) -> None: + """ + Download input data for an expedition. + + Entrypoint for the tool to download data based on space-time region provided in the + schedule file. Data is downloaded from Copernicus Marine, credentials for which can be + obtained via registration: https://data.marine.copernicus.eu/register . Credentials can + be provided on prompt, via command line arguments, or via a YAML config file. Run + `virtualship fetch` on an expedition for more info. + """ + if sum([username is None, password is None]) == 1: + raise ValueError("Both username and password must be provided when using CLI.") + + path = Path(path) + + data_folder = path / "data" + data_folder.mkdir(exist_ok=True) + + schedule = _get_schedule(path) + ship_config = _get_ship_config(path) + + schedule.verify( + ship_config.ship_speed_knots, + input_data=None, + check_space_time_region=True, + ignore_missing_fieldsets=True, + ) + + space_time_region_hash = get_space_time_region_hash(schedule.space_time_region) + + existing_download = get_existing_download(data_folder, space_time_region_hash) + if existing_download is not None: + click.echo( + f"Data download for space-time region already completed ('{existing_download}')." + ) + return + + creds_path = path / creds.CREDENTIALS_FILE + username, password = creds.get_credentials_flow(username, password, creds_path) + + # Extract space_time_region details from the schedule + spatial_range = schedule.space_time_region.spatial_range + time_range = schedule.space_time_region.time_range + start_datetime = time_range.start_time + end_datetime = time_range.end_time + instruments_in_schedule = schedule.get_instruments() + + # Create download folder and set download metadata + download_folder = data_folder / hash_to_filename(space_time_region_hash) + download_folder.mkdir() + DownloadMetadata(download_complete=False).to_yaml( + download_folder / DOWNLOAD_METADATA + ) + shutil.copyfile(path / SCHEDULE, download_folder / SCHEDULE) + + if ( + ( + {"XBT", "CTD", "SHIP_UNDERWATER_ST"} + & set(instrument.name for instrument in instruments_in_schedule) + ) + or hasattr(ship_config, "ship_underwater_st_config") + or hasattr(ship_config, "adcp_config") + ): + print("Ship data will be downloaded") + + # Define all ship datasets to download, including bathymetry + download_dict = { + "Bathymetry": { + "dataset_id": "cmems_mod_glo_phy_my_0.083deg_static", + "variables": ["deptho"], + "output_filename": "bathymetry.nc", + }, + "UVdata": { + "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", + "variables": ["uo", "vo"], + "output_filename": "ship_uv.nc", + }, + "Sdata": { + "dataset_id": "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i", + "variables": ["so"], + "output_filename": "ship_s.nc", + }, + "Tdata": { + "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", + "variables": ["thetao"], + "output_filename": "ship_t.nc", + }, + } + + # Iterate over all datasets and download each based on space_time_region + try: + for dataset in download_dict.values(): + copernicusmarine.subset( + dataset_id=dataset["dataset_id"], + variables=dataset["variables"], + minimum_longitude=spatial_range.minimum_longitude, + maximum_longitude=spatial_range.maximum_longitude, + minimum_latitude=spatial_range.minimum_latitude, + maximum_latitude=spatial_range.maximum_latitude, + start_datetime=start_datetime, + end_datetime=end_datetime, + minimum_depth=abs(spatial_range.minimum_depth), + maximum_depth=abs(spatial_range.maximum_depth), + output_filename=dataset["output_filename"], + output_directory=download_folder, + username=username, + password=password, + overwrite=True, + coordinates_selection_method="outside", + ) + except InvalidUsernameOrPassword as e: + shutil.rmtree(download_folder) + raise e + + complete_download(download_folder) + click.echo("Ship data download based on space-time region completed.") + + if "DRIFTER" in instruments_in_schedule: + print("Drifter data will be downloaded") + drifter_download_dict = { + "UVdata": { + "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", + "variables": ["uo", "vo"], + "output_filename": "drifter_uv.nc", + }, + "Tdata": { + "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", + "variables": ["thetao"], + "output_filename": "drifter_t.nc", + }, + } + + # Iterate over all datasets and download each based on space_time_region + try: + for dataset in drifter_download_dict.values(): + copernicusmarine.subset( + dataset_id=dataset["dataset_id"], + variables=dataset["variables"], + minimum_longitude=spatial_range.minimum_longitude - 3.0, + maximum_longitude=spatial_range.maximum_longitude + 3.0, + minimum_latitude=spatial_range.minimum_latitude - 3.0, + maximum_latitude=spatial_range.maximum_latitude + 3.0, + start_datetime=start_datetime, + end_datetime=end_datetime + timedelta(days=21), + minimum_depth=abs(1), + maximum_depth=abs(1), + output_filename=dataset["output_filename"], + output_directory=download_folder, + username=username, + password=password, + overwrite=True, + coordinates_selection_method="outside", + ) + except InvalidUsernameOrPassword as e: + shutil.rmtree(download_folder) + raise e + + complete_download(download_folder) + click.echo("Drifter data download based on space-time region completed.") + + if "ARGO_FLOAT" in instruments_in_schedule: + print("Argo float data will be downloaded") + argo_download_dict = { + "UVdata": { + "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", + "variables": ["uo", "vo"], + "output_filename": "argo_float_uv.nc", + }, + "Sdata": { + "dataset_id": "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i", + "variables": ["so"], + "output_filename": "argo_float_s.nc", + }, + "Tdata": { + "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", + "variables": ["thetao"], + "output_filename": "argo_float_t.nc", + }, + } + + # Iterate over all datasets and download each based on space_time_region + try: + for dataset in argo_download_dict.values(): + copernicusmarine.subset( + dataset_id=dataset["dataset_id"], + variables=dataset["variables"], + minimum_longitude=spatial_range.minimum_longitude - 3.0, + maximum_longitude=spatial_range.maximum_longitude + 3.0, + minimum_latitude=spatial_range.minimum_latitude - 3.0, + maximum_latitude=spatial_range.maximum_latitude + 3.0, + start_datetime=start_datetime, + end_datetime=end_datetime + timedelta(days=21), + minimum_depth=abs(1), + maximum_depth=abs(spatial_range.maximum_depth), + output_filename=dataset["output_filename"], + output_directory=download_folder, + username=username, + password=password, + overwrite=True, + coordinates_selection_method="outside", + ) + except InvalidUsernameOrPassword as e: + shutil.rmtree(download_folder) + raise e + + complete_download(download_folder) + click.echo("Argo_float data download based on space-time region completed.") + + def _hash(s: str, *, length: int) -> str: """Create a hash of a string.""" assert length % 2 == 0, "Length must be even." diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index 777015bc..706dc79a 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -1,30 +1,13 @@ -import shutil -from datetime import timedelta from pathlib import Path import click -import copernicusmarine -from copernicusmarine.core_functions.credentials_utils import InvalidUsernameOrPassword -import virtualship.cli._creds as creds from virtualship import utils -from virtualship.cli._fetch import ( - DOWNLOAD_METADATA, - DownloadMetadata, - complete_download, - get_existing_download, - get_space_time_region_hash, - hash_to_filename, -) -from virtualship.expedition.do_expedition import ( - _get_schedule, - _get_ship_config, - do_expedition, -) +from virtualship.cli._fetch import _fetch +from virtualship.expedition.do_expedition import do_expedition from virtualship.utils import ( SCHEDULE, SHIP_CONFIG, - get_instruments_in_schedule, mfp_to_yaml, ) @@ -38,7 +21,9 @@ "--from-mfp", type=str, default=None, - help='Partially initialise a project from an exported xlsx or csv file from NIOZ\' Marine Facilities Planning tool (specifically the "Export Coordinates > DD" option). User edits are required after initialisation.', + help="Partially initialise a project from an exported xlsx or csv file from NIOZ' " + 'Marine Facilities Planning tool (specifically the "Export Coordinates > DD" option). ' + "User edits are required after initialisation.", ) def init(path, from_mfp): """ @@ -108,198 +93,7 @@ def fetch(path: str | Path, username: str | None, password: str | None) -> None: be provided on prompt, via command line arguments, or via a YAML config file. Run `virtualship fetch` on a expedition for more info. """ - if sum([username is None, password is None]) == 1: - raise ValueError("Both username and password must be provided when using CLI.") - - path = Path(path) - - data_folder = path / "data" - data_folder.mkdir(exist_ok=True) - - schedule = _get_schedule(path) - ship_config = _get_ship_config(path) - - if schedule.space_time_region is None: - raise ValueError( - "space_time_region not found in schedule, please define it to fetch the data." - ) - - space_time_region_hash = get_space_time_region_hash(schedule.space_time_region) - - existing_download = get_existing_download(data_folder, space_time_region_hash) - if existing_download is not None: - click.echo( - f"Data download for space-time region already completed ('{existing_download}')." - ) - return - - creds_path = path / creds.CREDENTIALS_FILE - username, password = creds.get_credentials_flow(username, password, creds_path) - - # Extract space_time_region details from the schedule - spatial_range = schedule.space_time_region.spatial_range - time_range = schedule.space_time_region.time_range - start_datetime = time_range.start_time - end_datetime = time_range.end_time - instruments_in_schedule = get_instruments_in_schedule(schedule) - - # Create download folder and set download metadata - download_folder = data_folder / hash_to_filename(space_time_region_hash) - download_folder.mkdir() - DownloadMetadata(download_complete=False).to_yaml( - download_folder / DOWNLOAD_METADATA - ) - shutil.copyfile(path / SCHEDULE, download_folder / SCHEDULE) - - if ( - (set(["XBT", "CTD", "SHIP_UNDERWATER_ST"]) & set(instruments_in_schedule)) - or hasattr(ship_config, "ship_underwater_st_config") - or hasattr(ship_config, "adcp_config") - ): - print("Ship data will be downloaded") - - # Define all ship datasets to download, including bathymetry - download_dict = { - "Bathymetry": { - "dataset_id": "cmems_mod_glo_phy_my_0.083deg_static", - "variables": ["deptho"], - "output_filename": "bathymetry.nc", - }, - "UVdata": { - "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", - "variables": ["uo", "vo"], - "output_filename": "ship_uv.nc", - }, - "Sdata": { - "dataset_id": "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i", - "variables": ["so"], - "output_filename": "ship_s.nc", - }, - "Tdata": { - "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", - "variables": ["thetao"], - "output_filename": "ship_t.nc", - }, - } - - # Iterate over all datasets and download each based on space_time_region - try: - for dataset in download_dict.values(): - copernicusmarine.subset( - dataset_id=dataset["dataset_id"], - variables=dataset["variables"], - minimum_longitude=spatial_range.minimum_longitude, - maximum_longitude=spatial_range.maximum_longitude, - minimum_latitude=spatial_range.minimum_latitude, - maximum_latitude=spatial_range.maximum_latitude, - start_datetime=start_datetime, - end_datetime=end_datetime, - minimum_depth=abs(spatial_range.minimum_depth), - maximum_depth=abs(spatial_range.maximum_depth), - output_filename=dataset["output_filename"], - output_directory=download_folder, - username=username, - password=password, - overwrite=True, - coordinates_selection_method="outside", - ) - except InvalidUsernameOrPassword as e: - shutil.rmtree(download_folder) - raise e - - complete_download(download_folder) - click.echo("Ship data download based on space-time region completed.") - - if "DRIFTER" in instruments_in_schedule: - print("Drifter data will be downloaded") - drifter_download_dict = { - "UVdata": { - "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", - "variables": ["uo", "vo"], - "output_filename": "drifter_uv.nc", - }, - "Tdata": { - "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", - "variables": ["thetao"], - "output_filename": "drifter_t.nc", - }, - } - - # Iterate over all datasets and download each based on space_time_region - try: - for dataset in drifter_download_dict.values(): - copernicusmarine.subset( - dataset_id=dataset["dataset_id"], - variables=dataset["variables"], - minimum_longitude=spatial_range.minimum_longitude - 3.0, - maximum_longitude=spatial_range.maximum_longitude + 3.0, - minimum_latitude=spatial_range.minimum_latitude - 3.0, - maximum_latitude=spatial_range.maximum_latitude + 3.0, - start_datetime=start_datetime, - end_datetime=end_datetime + timedelta(days=21), - minimum_depth=abs(1), - maximum_depth=abs(1), - output_filename=dataset["output_filename"], - output_directory=download_folder, - username=username, - password=password, - overwrite=True, - coordinates_selection_method="outside", - ) - except InvalidUsernameOrPassword as e: - shutil.rmtree(download_folder) - raise e - - complete_download(download_folder) - click.echo("Drifter data download based on space-time region completed.") - - if "ARGO_FLOAT" in instruments_in_schedule: - print("Argo float data will be downloaded") - argo_download_dict = { - "UVdata": { - "dataset_id": "cmems_mod_glo_phy-cur_anfc_0.083deg_PT6H-i", - "variables": ["uo", "vo"], - "output_filename": "argo_float_uv.nc", - }, - "Sdata": { - "dataset_id": "cmems_mod_glo_phy-so_anfc_0.083deg_PT6H-i", - "variables": ["so"], - "output_filename": "argo_float_s.nc", - }, - "Tdata": { - "dataset_id": "cmems_mod_glo_phy-thetao_anfc_0.083deg_PT6H-i", - "variables": ["thetao"], - "output_filename": "argo_float_t.nc", - }, - } - - # Iterate over all datasets and download each based on space_time_region - try: - for dataset in argo_download_dict.values(): - copernicusmarine.subset( - dataset_id=dataset["dataset_id"], - variables=dataset["variables"], - minimum_longitude=spatial_range.minimum_longitude - 3.0, - maximum_longitude=spatial_range.maximum_longitude + 3.0, - minimum_latitude=spatial_range.minimum_latitude - 3.0, - maximum_latitude=spatial_range.maximum_latitude + 3.0, - start_datetime=start_datetime, - end_datetime=end_datetime + timedelta(days=21), - minimum_depth=abs(1), - maximum_depth=abs(spatial_range.maximum_depth), - output_filename=dataset["output_filename"], - output_directory=download_folder, - username=username, - password=password, - overwrite=True, - coordinates_selection_method="outside", - ) - except InvalidUsernameOrPassword as e: - shutil.rmtree(download_folder) - raise e - - complete_download(download_folder) - click.echo("Argo_float data download based on space-time region completed.") + _fetch(path, username, password) @click.command() diff --git a/src/virtualship/expedition/__init__.py b/src/virtualship/expedition/__init__.py index 051ef50d..9137f7b1 100644 --- a/src/virtualship/expedition/__init__.py +++ b/src/virtualship/expedition/__init__.py @@ -1,6 +1,7 @@ """Everything for simulating an expedition.""" from .do_expedition import do_expedition +from .input_data import InputData from .instrument_type import InstrumentType from .schedule import Schedule from .ship_config import ( @@ -19,6 +20,7 @@ "ArgoFloatConfig", "CTDConfig", "DrifterConfig", + "InputData", "InstrumentType", "Schedule", "ShipConfig", diff --git a/src/virtualship/expedition/checkpoint.py b/src/virtualship/expedition/checkpoint.py index 27a85657..75dd2356 100644 --- a/src/virtualship/expedition/checkpoint.py +++ b/src/virtualship/expedition/checkpoint.py @@ -49,3 +49,31 @@ def from_yaml(cls, file_path: str | Path) -> Checkpoint: with open(file_path) as file: data = yaml.safe_load(file) return Checkpoint(**data) + + def verify(self, schedule: Schedule) -> None: + """ + Verify that the given schedule matches the checkpoint's past schedule. + + This method checks if the waypoints in the given schedule match the waypoints + in the checkpoint's past schedule up to the length of the past schedule. + If there's a mismatch, it raises a CheckpointError. + + :param schedule: The schedule to verify against the checkpoint. + :type schedule: Schedule + :raises CheckpointError: If the past waypoints in the given schedule + have been changed compared to the checkpoint. + :return: None + """ + if ( + not schedule.waypoints[: len(self.past_schedule.waypoints)] + == self.past_schedule.waypoints + ): + raise CheckpointError( + "Past waypoints in schedule have been changed! Restore past schedule and only change future waypoints." + ) + + +class CheckpointError(RuntimeError): + """An error in the checkpoint.""" + + pass diff --git a/src/virtualship/expedition/do_expedition.py b/src/virtualship/expedition/do_expedition.py index b199bb4a..97a66f7f 100644 --- a/src/virtualship/expedition/do_expedition.py +++ b/src/virtualship/expedition/do_expedition.py @@ -9,9 +9,8 @@ from virtualship.cli._fetch import get_existing_download, get_space_time_region_hash from virtualship.utils import ( CHECKPOINT, - SCHEDULE, - SHIP_CONFIG, - get_instruments_in_schedule, + _get_schedule, + _get_ship_config, ) from .checkpoint import Checkpoint @@ -21,7 +20,9 @@ from .ship_config import ShipConfig from .simulate_measurements import simulate_measurements from .simulate_schedule import ScheduleProblem, simulate_schedule -from .verify_schedule import verify_schedule + +# projection used to sail between waypoints +projection = pyproj.Geod(ellps="WGS84") def do_expedition(expedition_dir: str | Path, input_data: Path | None = None) -> None: @@ -29,7 +30,7 @@ def do_expedition(expedition_dir: str | Path, input_data: Path | None = None) -> Perform an expedition, providing terminal feedback and file output. :param expedition_dir: The base directory for the expedition. - :param input_data: Input data folder folder (override used for testing). + :param input_data: Input data folder (override used for testing). """ if isinstance(expedition_dir, str): expedition_dir = Path(expedition_dir) @@ -37,21 +38,8 @@ def do_expedition(expedition_dir: str | Path, input_data: Path | None = None) -> ship_config = _get_ship_config(expedition_dir) schedule = _get_schedule(expedition_dir) - # remove instrument configurations that are not in schedule - instruments_in_schedule = get_instruments_in_schedule(schedule) - - for instrument in [ - "ARGO_FLOAT", - "DRIFTER", - "XBT", - "CTD", - ]: # TODO make instrument names consistent capitals or lowercase throughout codebase - if ( - hasattr(ship_config, instrument.lower() + "_config") - and instrument not in instruments_in_schedule - ): - print(f"{instrument} configuration provided but not in schedule.") - setattr(ship_config, instrument.lower() + "_config", None) + # Verify ship_config file is consistent with schedule + ship_config.verify(schedule) # load last checkpoint checkpoint = _load_checkpoint(expedition_dir) @@ -59,17 +47,7 @@ def do_expedition(expedition_dir: str | Path, input_data: Path | None = None) -> checkpoint = Checkpoint(past_schedule=Schedule(waypoints=[])) # verify that schedule and checkpoint match - if ( - not schedule.waypoints[: len(checkpoint.past_schedule.waypoints)] - == checkpoint.past_schedule.waypoints - ): - print( - "Past waypoints in schedule have been changed! Restore past schedule and only change future waypoints." - ) - return - - # projection used to sail between waypoints - projection = pyproj.Geod(ellps="WGS84") + checkpoint.verify(schedule) # load fieldsets input_data = _load_input_data( @@ -79,8 +57,8 @@ def do_expedition(expedition_dir: str | Path, input_data: Path | None = None) -> input_data=input_data, ) - # verify schedule makes sense - verify_schedule(projection, ship_config, schedule, input_data) + # verify schedule is valid + schedule.verify(ship_config.ship_speed_knots, input_data) # simulate the schedule schedule_results = simulate_schedule( @@ -129,16 +107,6 @@ def do_expedition(expedition_dir: str | Path, input_data: Path | None = None) -> print("Your measurements can be found in the results directory.") -def _get_ship_config(expedition_dir: Path) -> ShipConfig | None: - file_path = expedition_dir.joinpath(SHIP_CONFIG) - try: - return ShipConfig.from_yaml(file_path) - except FileNotFoundError as e: - raise FileNotFoundError( - f'Ship config not found. Save it to "{file_path}".' - ) from e - - def _load_input_data( expedition_dir: Path, schedule: Schedule, @@ -174,24 +142,6 @@ def _load_input_data( ) -def _get_schedule(expedition_dir: Path) -> Schedule: - """Load Schedule object from yaml config file in `expedition_dir`.""" - file_path = expedition_dir.joinpath(SCHEDULE) - try: - return Schedule.from_yaml(file_path) - except FileNotFoundError as e: - raise FileNotFoundError(f'Schedule not found. Save it to "{file_path}".') from e - - -def _get_ship_config(expedition_dir: Path) -> Schedule: - """Load Schedule object from yaml config file in `expedition_dir`.""" - file_path = expedition_dir.joinpath(SHIP_CONFIG) - try: - return ShipConfig.from_yaml(file_path) - except FileNotFoundError as e: - raise FileNotFoundError(f'Config not found. Save it to "{file_path}".') from e - - def _load_checkpoint(expedition_dir: Path) -> Checkpoint | None: file_path = expedition_dir.joinpath(CHECKPOINT) try: diff --git a/src/virtualship/expedition/schedule.py b/src/virtualship/expedition/schedule.py index 5e41e00e..6c3bbfd1 100644 --- a/src/virtualship/expedition/schedule.py +++ b/src/virtualship/expedition/schedule.py @@ -2,14 +2,22 @@ from __future__ import annotations +import itertools +from datetime import timedelta from pathlib import Path import pydantic +import pyproj import yaml +from parcels import FieldSet +from .input_data import InputData +from .instrument_type import InstrumentType from .space_time_region import SpaceTimeRegion from .waypoint import Waypoint +projection: pyproj.Geod = pyproj.Geod(ellps="WGS84") + class Schedule(pydantic.BaseModel): """Schedule of the virtual ship.""" @@ -44,3 +52,167 @@ def from_yaml(cls, file_path: str | Path) -> Schedule: with open(file_path) as file: data = yaml.safe_load(file) return Schedule(**data) + + def get_instruments(self) -> set[InstrumentType]: + """ + Retrieve a set of unique instruments used in the schedule. + + This method iterates through all waypoints in the schedule and collects + the instruments associated with each waypoint. It returns a set of unique + instruments, either as objects or as names. + + :raises CheckpointError: If the past waypoints in the given schedule + have been changed compared to the checkpoint. + :return: set: A set of unique instruments used in the schedule. + + """ + instruments_in_schedule = [] + for waypoint in self.waypoints: + if waypoint.instrument: + for instrument in waypoint.instrument: + if instrument: + instruments_in_schedule.append(instrument) + return set(instruments_in_schedule) + + def verify( + self, + ship_speed: float, + input_data: InputData | None, + *, + check_space_time_region: bool = False, + ignore_missing_fieldsets: bool = False, + ) -> None: + """ + Verify the feasibility and correctness of the schedule's waypoints. + + This method checks various conditions to ensure the schedule is valid: + 1. At least one waypoint is provided. + 2. The first waypoint has a specified time. + 3. Waypoint times are in ascending order. + 4. All waypoints are in water (not on land). + 5. The ship can arrive on time at each waypoint given its speed. + + :param ship_speed: The ship's speed in knots. + :param input_data: An InputData object containing fieldsets used to check if waypoints are on water. + :param check_space_time_region: whether to check for missing space_time_region. + :param ignore_missing_fieldsets: whether to ignore warning for missing field sets. + :raises PlanningError: If any of the verification checks fail, indicating infeasible or incorrect waypoints. + :raises NotImplementedError: If an instrument in the schedule is not implemented. + :return: None. The method doesn't return a value but raises exceptions if verification fails. + """ + if check_space_time_region and self.space_time_region is None: + raise ScheduleError( + "space_time_region not found in schedule, please define it to fetch the data." + ) + + if len(self.waypoints) == 0: + raise ScheduleError("At least one waypoint must be provided.") + + # check first waypoint has a time + if self.waypoints[0].time is None: + raise ScheduleError("First waypoint must have a specified time.") + + # check waypoint times are in ascending order + timed_waypoints = [wp for wp in self.waypoints if wp.time is not None] + if not all( + [next.time >= cur.time for cur, next in itertools.pairwise(timed_waypoints)] + ): + raise ScheduleError( + "Each waypoint should be timed after all previous waypoints" + ) + + # check if all waypoints are in water + # this is done by picking an arbitrary provided fieldset and checking if UV is not zero + + print("Verifying all waypoints are on water..") + + # get all available fieldsets + available_fieldsets = [] + if input_data is not None: + fieldsets = [ + input_data.adcp_fieldset, + input_data.argo_float_fieldset, + input_data.ctd_fieldset, + input_data.drifter_fieldset, + input_data.ship_underwater_st_fieldset, + ] + for fs in fieldsets: + if fs is not None: + available_fieldsets.append(fs) + + # check if there are any fieldsets, else it's an error + if len(available_fieldsets) == 0: + if not ignore_missing_fieldsets: + print( + "Cannot verify because no fieldsets have been loaded. This is probably " + "because you are not using any instruments in your schedule. This is not a problem, " + "but carefully check your waypoint locations manually." + ) + + else: + # pick any + fieldset = available_fieldsets[0] + # get waypoints with 0 UV + land_waypoints = [ + (wp_i, wp) + for wp_i, wp in enumerate(self.waypoints) + if _is_on_land_zero_uv(fieldset, wp) + ] + # raise an error if there are any + if len(land_waypoints) > 0: + raise ScheduleError( + f"The following waypoints are on land: {['#' + str(wp_i) + ' ' + str(wp) for (wp_i, wp) in land_waypoints]}" + ) + print("Good, all waypoints are on water.") + + # check that ship will arrive on time at each waypoint (in case no unexpected event happen) + time = self.waypoints[0].time + for wp_i, (wp, wp_next) in enumerate( + zip(self.waypoints, self.waypoints[1:], strict=False) + ): + if wp.instrument is InstrumentType.CTD: + time += timedelta(minutes=20) + + geodinv: tuple[float, float, float] = projection.inv( + wp.location.lon, + wp.location.lat, + wp_next.location.lon, + wp_next.location.lat, + ) + distance = geodinv[2] + + time_to_reach = timedelta(seconds=distance / ship_speed * 3600 / 1852) + arrival_time = time + time_to_reach + + if wp_next.time is None: + time = arrival_time + elif arrival_time > wp_next.time: + raise ScheduleError( + f"Waypoint planning is not valid: would arrive too late at waypoint number {wp_i + 2}. " + f"location: {wp_next.location} time: {wp_next.time} instrument: {wp_next.instrument}" + ) + else: + time = wp_next.time + + +class ScheduleError(RuntimeError): + """An error in the schedule.""" + + pass + + +def _is_on_land_zero_uv(fieldset: FieldSet, waypoint: Waypoint) -> bool: + """ + Check if waypoint is on land by assuming zero velocity means land. + + :param fieldset: The fieldset to sample the velocity from. + :param waypoint: The waypoint to check. + :returns: If the waypoint is on land. + """ + return fieldset.UV.eval( + 0, + fieldset.gridset.grids[0].depth[0], + waypoint.location.lat, + waypoint.location.lon, + applyConversion=False, + ) == (0.0, 0.0) diff --git a/src/virtualship/expedition/ship_config.py b/src/virtualship/expedition/ship_config.py index 8969f1c4..b411443f 100644 --- a/src/virtualship/expedition/ship_config.py +++ b/src/virtualship/expedition/ship_config.py @@ -10,6 +10,9 @@ from virtualship.utils import _validate_numeric_mins_to_timedelta +from .instrument_type import InstrumentType +from .schedule import Schedule + class ArgoFloatConfig(pydantic.BaseModel): """Configuration for argos floats.""" @@ -188,3 +191,86 @@ def from_yaml(cls, file_path: str | Path) -> ShipConfig: with open(file_path) as file: data = yaml.safe_load(file) return ShipConfig(**data) + + def verify(self, schedule: Schedule) -> None: + """ + Verify the ship configuration against the provided schedule. + + This function performs two main tasks: + 1. Removes instrument configurations that are not present in the schedule. + 2. Verifies that all instruments in the schedule have corresponding configurations. + + Parameters + ---------- + schedule : Schedule + The schedule object containing the planned instruments and waypoints. + + Returns + ------- + None + + Raises + ------ + ConfigError + If an instrument in the schedule does not have a corresponding configuration. + + Notes + ----- + - Prints a message if a configuration is provided for an instrument not in the schedule. + - Sets the configuration to None for instruments not in the schedule. + - Raises a ConfigError for each instrument in the schedule that lacks a configuration. + + """ + instruments_in_schedule = schedule.get_instruments() + + for instrument in [ + "ARGO_FLOAT", + "DRIFTER", + "XBT", + "CTD", + ]: # TODO make instrument names consistent capitals or lowercase throughout codebase + if hasattr(self, instrument.lower() + "_config") and not any( + instrument == schedule_instrument.name + for schedule_instrument in instruments_in_schedule + ): + print(f"{instrument} configuration provided but not in schedule.") + setattr(self, instrument.lower() + "_config", None) + + # verify instruments in schedule have configuration + for instrument in instruments_in_schedule: + try: + InstrumentType(instrument) + except ValueError as e: + raise NotImplementedError("Instrument not supported.") from e + + if instrument == InstrumentType.ARGO_FLOAT and ( + not hasattr(self, "argo_float_config") or self.argo_float_config is None + ): + raise ConfigError( + "Planning has a waypoint with Argo float instrument, but configuration does not configure Argo floats." + ) + if instrument == InstrumentType.CTD and ( + not hasattr(self, "ctd_config") or self.ctd_config is None + ): + raise ConfigError( + "Planning has a waypoint with CTD instrument, but configuration does not configure CTDs." + ) + if instrument == InstrumentType.DRIFTER and ( + not hasattr(self, "drifter_config") or self.drifter_config is None + ): + raise ConfigError( + "Planning has a waypoint with drifter instrument, but configuration does not configure drifters." + ) + + if instrument == InstrumentType.XBT and ( + not hasattr(self, "xbt_config") or self.xbt_config is None + ): + raise ConfigError( + "Planning has a waypoint with XBT instrument, but configuration does not configure XBT." + ) + + +class ConfigError(RuntimeError): + """An error in the config.""" + + pass diff --git a/src/virtualship/expedition/verify_schedule.py b/src/virtualship/expedition/verify_schedule.py deleted file mode 100644 index d3b01011..00000000 --- a/src/virtualship/expedition/verify_schedule.py +++ /dev/null @@ -1,164 +0,0 @@ -"""verify_schedule function and supporting classes.""" - -import itertools -from datetime import timedelta - -import pyproj -from parcels import FieldSet - -from .input_data import InputData -from .instrument_type import InstrumentType -from .schedule import Schedule -from .ship_config import ShipConfig -from .waypoint import Waypoint - - -def verify_schedule( - projection: pyproj.Geod, - ship_config: ShipConfig, - schedule: Schedule, - input_data: InputData, -) -> None: - """ - Verify waypoints are ordered by time, first waypoint has a start time, and that schedule is feasible in terms of time if no unexpected events happen. - - :param projection: projection used to sail between waypoints. - :param ship_config: The cruise ship_configuration. - :param schedule: The schedule to verify. - :param input_data: Fieldsets that can be used to check for zero UV condition (is waypoint on land). - :raises PlanningError: If waypoints are not feasible or incorrect. - :raises NotImplementedError: If instrument is in schedule that is not implememented. - """ - if len(schedule.waypoints) == 0: - raise PlanningError("At least one waypoint must be provided.") - - # check first waypoint has a time - if schedule.waypoints[0].time is None: - raise PlanningError("First waypoint must have a specified time.") - - # check waypoint times are in ascending order - timed_waypoints = [wp for wp in schedule.waypoints if wp.time is not None] - if not all( - [next.time >= cur.time for cur, next in itertools.pairwise(timed_waypoints)] - ): - raise PlanningError( - "Each waypoint should be timed after all previous waypoints" - ) - - # check if all waypoints are in water - # this is done by picking an arbitrary provided fieldset and checking if UV is not zero - - print("Verifying all waypoints are on water..") - - # get all available fieldsets - available_fieldsets = [ - fs - for fs in [ - input_data.adcp_fieldset, - input_data.argo_float_fieldset, - input_data.ctd_fieldset, - input_data.drifter_fieldset, - input_data.ship_underwater_st_fieldset, - ] - if fs is not None - ] - - # check if there are any fieldsets, else its an error - if len(available_fieldsets) == 0: - print( - "Cannot verify because no fieldsets have been loaded. This is probably because you are not using any instruments in your schedule. This is not a problem, but carefully check your waypoint locations manually.." - ) - - else: - # pick any - fieldset = available_fieldsets[0] - # get waypoints with 0 UV - land_waypoints = [ - (wp_i, wp) - for wp_i, wp in enumerate(schedule.waypoints) - if _is_on_land_zero_uv(fieldset, wp) - ] - # raise an error if there are any - if len(land_waypoints) > 0: - raise PlanningError( - f"The following waypoints are on land: {['#' + str(wp_i) + ' ' + str(wp) for (wp_i, wp) in land_waypoints]}" - ) - print("Good, all waypoints are on water.") - - # check that ship will arrive on time at each waypoint (in case no unexpected event happen) - time = schedule.waypoints[0].time - for wp_i, (wp, wp_next) in enumerate( - zip(schedule.waypoints, schedule.waypoints[1:], strict=False) - ): - if wp.instrument is InstrumentType.CTD: - time += timedelta(minutes=20) - - geodinv: tuple[float, float, float] = projection.inv( - wp.location.lon, wp.location.lat, wp_next.location.lon, wp_next.location.lat - ) - distance = geodinv[2] - - time_to_reach = timedelta( - seconds=distance / ship_config.ship_speed_knots * 3600 / 1852 - ) - arrival_time = time + time_to_reach - - if wp_next.time is None: - time = arrival_time - elif arrival_time > wp_next.time: - raise PlanningError( - f"Waypoint planning is not valid: would arrive too late at a waypoint number {wp_i + 1}. location: {wp_next.location} time: {wp_next.time} instrument: {wp_next.instrument}" - ) - else: - time = wp_next.time - - # verify instruments in schedule have configuration - for wp in schedule.waypoints: - if wp.instrument is not None: - for instrument in ( - wp.instrument if isinstance(wp.instrument, list) else [wp.instrument] - ): - if instrument not in InstrumentType: - raise NotImplementedError("Instrument not supported.") - - if ( - instrument == InstrumentType.ARGO_FLOAT - and ship_config.argo_float_config is None - ): - raise PlanningError( - "Planning has waypoint with Argo float instrument, but configuration does not configure Argo floats." - ) - if instrument == InstrumentType.CTD and ship_config.ctd_config is None: - raise PlanningError( - "Planning has waypoint with CTD instrument, but configuration does not configure CTDs." - ) - if ( - instrument == InstrumentType.DRIFTER - and ship_config.drifter_config is None - ): - raise PlanningError( - "Planning has waypoint with drifter instrument, but configuration does not configure drifters." - ) - - -class PlanningError(RuntimeError): - """An error in the schedule.""" - - pass - - -def _is_on_land_zero_uv(fieldset: FieldSet, waypoint: Waypoint) -> bool: - """ - Check if waypoint is on land by assuming zero velocity means land. - - :param fieldset: The fieldset to sample the velocity from. - :param waypoint: The waypoint to check. - :returns: If the waypoint is on land. - """ - return fieldset.UV.eval( - 0, - fieldset.gridset.grids[0].depth[0], - waypoint.location.lat, - waypoint.location.lon, - applyConversion=False, - ) == (0.0, 0.0) diff --git a/src/virtualship/utils.py b/src/virtualship/utils.py index a3ad133d..c798a3dd 100644 --- a/src/virtualship/utils.py +++ b/src/virtualship/utils.py @@ -1,9 +1,16 @@ +from __future__ import annotations + import os import warnings from datetime import timedelta from functools import lru_cache from importlib.resources import files -from typing import TextIO +from pathlib import Path +from typing import TYPE_CHECKING, TextIO + +if TYPE_CHECKING: + from virtualship.expedition.schedule import Schedule + from virtualship.expedition.ship_config import ShipConfig import pandas as pd import yaml @@ -219,11 +226,24 @@ def _validate_numeric_mins_to_timedelta(value: int | float | timedelta) -> timed return timedelta(minutes=value) -def get_instruments_in_schedule(schedule): - instruments_in_schedule = [] - for waypoint in schedule.waypoints: - if waypoint.instrument: - for instrument in waypoint.instrument: - if instrument: - instruments_in_schedule.append(instrument.name) - return instruments_in_schedule +def _get_schedule(expedition_dir: Path) -> Schedule: + """Load Schedule object from yaml config file in `expedition_dir`.""" + from virtualship.expedition.schedule import Schedule + + file_path = expedition_dir.joinpath(SCHEDULE) + try: + return Schedule.from_yaml(file_path) + except FileNotFoundError as e: + raise FileNotFoundError(f'Schedule not found. Save it to "{file_path}".') from e + + +def _get_ship_config(expedition_dir: Path) -> ShipConfig: + from virtualship.expedition.ship_config import ShipConfig + + file_path = expedition_dir.joinpath(SHIP_CONFIG) + try: + return ShipConfig.from_yaml(file_path) + except FileNotFoundError as e: + raise FileNotFoundError( + f'Ship config not found. Save it to "{file_path}".' + ) from e diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 05a5fc72..015c3267 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -14,9 +14,7 @@ def copernicus_subset_no_download(monkeypatch): def fake_download(output_filename, output_directory, **_): Path(output_directory).joinpath(output_filename).touch() - monkeypatch.setattr( - "virtualship.cli.commands.copernicusmarine.subset", fake_download - ) + monkeypatch.setattr("virtualship.cli._fetch.copernicusmarine.subset", fake_download) yield diff --git a/tests/cli/test_fetch.py b/tests/cli/test_fetch.py index 53801e5b..3102ca8b 100644 --- a/tests/cli/test_fetch.py +++ b/tests/cli/test_fetch.py @@ -7,6 +7,7 @@ DOWNLOAD_METADATA, DownloadMetadata, IncompleteDownloadError, + _fetch, assert_complete_download, complete_download, create_hash, @@ -15,6 +16,50 @@ hash_model, hash_to_filename, ) +from virtualship.expedition.schedule import Schedule +from virtualship.expedition.ship_config import ShipConfig +from virtualship.utils import get_example_config, get_example_schedule + + +@pytest.fixture +def copernicus_subset_no_download(monkeypatch): + """Mock the download function.""" + + def fake_download(output_filename, output_directory, **_): + Path(output_directory).joinpath(output_filename).touch() + + monkeypatch.setattr("virtualship.cli._fetch.copernicusmarine.subset", fake_download) + yield + + +@pytest.fixture +def schedule(tmpdir): + out_path = tmpdir.join("schedule.yaml") + + with open(out_path, "w") as file: + file.write(get_example_schedule()) + + schedule = Schedule.from_yaml(out_path) + + return schedule + + +@pytest.fixture +def ship_config(tmpdir): + out_path = tmpdir.join("ship_config.yaml") + + with open(out_path, "w") as file: + file.write(get_example_config()) + + ship_config = ShipConfig.from_yaml(out_path) + + return ship_config + + +@pytest.mark.usefixtures("copernicus_subset_no_download") +def test_fetch(schedule, ship_config, tmpdir): + """Test the fetch command, but mock the download.""" + _fetch(Path(tmpdir), "test", "test") def test_create_hash(): diff --git a/tests/expedition/test_schedule.py b/tests/expedition/test_schedule.py index 33ffc74d..50c35d2f 100644 --- a/tests/expedition/test_schedule.py +++ b/tests/expedition/test_schedule.py @@ -1,10 +1,21 @@ from datetime import datetime, timedelta +from pathlib import Path + +import pyproj +import pytest from virtualship import Location -from virtualship.expedition import Schedule, Waypoint +from virtualship.expedition import Waypoint +from virtualship.expedition.do_expedition import _load_input_data +from virtualship.expedition.schedule import Schedule, ScheduleError +from virtualship.utils import _get_ship_config + +projection = pyproj.Geod(ellps="WGS84") +expedition_dir = Path("expedition_dir") -def test_schedule(tmpdir) -> None: + +def test_import_export_schedule(tmpdir) -> None: out_path = tmpdir.join("schedule.yaml") # arbitrary time for testing @@ -24,3 +35,127 @@ def test_schedule(tmpdir) -> None: schedule2 = Schedule.from_yaml(out_path) assert schedule == schedule2 + + +def test_verify_schedule() -> None: + schedule = Schedule( + waypoints=[ + Waypoint(location=Location(0, 0), time=datetime(2022, 1, 1, 1, 0, 0)), + Waypoint(location=Location(1, 0), time=datetime(2022, 1, 2, 1, 0, 0)), + ] + ) + + ship_config = _get_ship_config(expedition_dir) + + schedule.verify(ship_config.ship_speed_knots, None) + + +def test_get_instruments() -> None: + schedule = Schedule( + waypoints=[ + Waypoint(location=Location(0, 0), instrument=["CTD"]), + Waypoint(location=Location(1, 0), instrument=["XBT", "ARGO_FLOAT"]), + Waypoint(location=Location(1, 0), instrument=["CTD"]), + ] + ) + + assert set(instrument.name for instrument in schedule.get_instruments()) == { + "CTD", + "XBT", + "ARGO_FLOAT", + } + + +@pytest.mark.parametrize( + "schedule,check_space_time_region,error,match", + [ + pytest.param( + Schedule(waypoints=[]), + False, + ScheduleError, + "At least one waypoint must be provided.", + id="NoWaypoints", + ), + pytest.param( + Schedule( + waypoints=[ + Waypoint(location=Location(0, 0)), + Waypoint( + location=Location(1, 0), time=datetime(2022, 1, 1, 1, 0, 0) + ), + ] + ), + False, + ScheduleError, + "First waypoint must have a specified time.", + id="FirstWaypointHasTime", + ), + pytest.param( + Schedule( + waypoints=[ + Waypoint( + location=Location(0, 0), time=datetime(2022, 1, 2, 1, 0, 0) + ), + Waypoint(location=Location(0, 0)), + Waypoint( + location=Location(1, 0), time=datetime(2022, 1, 1, 1, 0, 0) + ), + ] + ), + False, + ScheduleError, + "Each waypoint should be timed after all previous waypoints", + id="SequentialWaypoints", + ), + pytest.param( + Schedule( + waypoints=[ + Waypoint( + location=Location(0, 0), time=datetime(2022, 1, 1, 1, 0, 0) + ), + Waypoint( + location=Location(1, 0), time=datetime(2022, 1, 1, 1, 1, 0) + ), + ] + ), + False, + ScheduleError, + "Waypoint planning is not valid: would arrive too late at waypoint number 2...", + id="NotEnoughTime", + ), + pytest.param( + Schedule( + waypoints=[ + Waypoint( + location=Location(0, 0), time=datetime(2022, 1, 1, 1, 0, 0) + ), + Waypoint( + location=Location(1, 0), time=datetime(2022, 1, 2, 1, 1, 0) + ), + ] + ), + True, + ScheduleError, + "space_time_region not found in schedule, please define it to fetch the data.", + id="NoSpaceTimeRegion", + ), + ], +) +def test_verify_schedule_errors( + schedule: Schedule, check_space_time_region: bool, error, match +) -> None: + ship_config = _get_ship_config(expedition_dir) + + input_data = _load_input_data( + expedition_dir, + schedule, + ship_config, + input_data=Path("expedition_dir/input_data"), + ) + + with pytest.raises(error, match=match): + schedule.verify( + ship_config.ship_speed_knots, + input_data, + check_space_time_region=check_space_time_region, + ) diff --git a/tests/expedition/test_ship_config.py b/tests/expedition/test_ship_config.py new file mode 100644 index 00000000..44bfd524 --- /dev/null +++ b/tests/expedition/test_ship_config.py @@ -0,0 +1,114 @@ +from pathlib import Path + +import pytest + +from virtualship.expedition.schedule import Schedule +from virtualship.expedition.ship_config import ConfigError, ShipConfig +from virtualship.utils import get_example_config, get_example_schedule + +expedition_dir = Path("expedition_dir") + + +@pytest.fixture +def schedule(tmp_file): + with open(tmp_file, "w") as file: + file.write(get_example_schedule()) + return Schedule.from_yaml(tmp_file) + + +@pytest.fixture +def schedule_no_xbt(schedule): + for waypoint in schedule.waypoints: + if waypoint.instrument and any( + instrument.name == "XBT" for instrument in waypoint.instrument + ): + waypoint.instrument = [ + instrument + for instrument in waypoint.instrument + if instrument.name != "XBT" + ] + + return schedule + + +@pytest.fixture +def ship_config(tmp_file): + with open(tmp_file, "w") as file: + file.write(get_example_config()) + return ShipConfig.from_yaml(tmp_file) + + +@pytest.fixture +def ship_config_no_xbt(ship_config): + delattr(ship_config, "xbt_config") + return ship_config + + +@pytest.fixture +def ship_config_no_ctd(ship_config): + delattr(ship_config, "ctd_config") + return ship_config + + +@pytest.fixture +def ship_config_no_argo_float(ship_config): + delattr(ship_config, "argo_float_config") + return ship_config + + +@pytest.fixture +def ship_config_no_drifter(ship_config): + delattr(ship_config, "drifter_config") + return ship_config + + +def test_import_export_ship_config(ship_config, tmp_file) -> None: + ship_config.to_yaml(tmp_file) + ship_config_2 = ShipConfig.from_yaml(tmp_file) + assert ship_config == ship_config_2 + + +def test_verify_ship_config(ship_config, schedule) -> None: + ship_config.verify(schedule) + + +def test_verify_ship_config_no_instrument(ship_config, schedule_no_xbt) -> None: + ship_config.verify(schedule_no_xbt) + + +@pytest.mark.parametrize( + "ship_config_fixture,error,match", + [ + pytest.param( + "ship_config_no_xbt", + ConfigError, + "Planning has a waypoint with XBT instrument, but configuration does not configure XBT.", + id="ShipConfigNoXBT", + ), + pytest.param( + "ship_config_no_ctd", + ConfigError, + "Planning has a waypoint with CTD instrument, but configuration does not configure CTD.", + id="ShipConfigNoCTD", + ), + pytest.param( + "ship_config_no_argo_float", + ConfigError, + "Planning has a waypoint with Argo float instrument, but configuration does not configure Argo floats.", + id="ShipConfigNoARGO_FLOAT", + ), + pytest.param( + "ship_config_no_drifter", + ConfigError, + "Planning has a waypoint with drifter instrument, but configuration does not configure drifters.", + id="ShipConfigNoDRIFTER", + ), + ], +) +def test_verify_ship_config_errors( + request, schedule, ship_config_fixture, error, match +) -> None: + ship_config = request.getfixturevalue(ship_config_fixture) + + with pytest.raises(error, match=match): + ship_config.verify(schedule)