diff --git a/environment.yml b/environment.yml index e5f6ea5b..42319ad6 100644 --- a/environment.yml +++ b/environment.yml @@ -12,6 +12,7 @@ dependencies: - pip - pyyaml - copernicusmarine >= 2 + - openpyxl # linting - pre-commit diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py index db4309b0..34ac1c21 100644 --- a/src/virtualship/cli/commands.py +++ b/src/virtualship/cli/commands.py @@ -16,7 +16,7 @@ hash_to_filename, ) from virtualship.expedition.do_expedition import _get_schedule, do_expedition -from virtualship.utils import SCHEDULE, SHIP_CONFIG +from virtualship.utils import SCHEDULE, SHIP_CONFIG, mfp_to_yaml @click.command() @@ -24,8 +24,18 @@ "path", type=click.Path(exists=False, file_okay=False, dir_okay=True), ) -def init(path): - """Initialize a directory for a new expedition, with an example schedule and ship config files.""" +@click.option( + "--from-mfp", + type=str, + default=None, + help='Partially initialise a project from an exported xlsx or csv file from NIOZ\' Marine Facilities Planning tool (specifically the "Export Coordinates > DD" option). User edits are required after initialisation.', +) +def init(path, from_mfp): + """ + Initialize a directory for a new expedition, with an example schedule and ship config files. + + If --mfp-file is provided, it will generate the schedule from the MPF file instead. + """ path = Path(path) path.mkdir(exist_ok=True) @@ -43,7 +53,20 @@ def init(path): ) config.write_text(utils.get_example_config()) - schedule.write_text(utils.get_example_schedule()) + if from_mfp: + mfp_file = Path(from_mfp) + # Generate schedule.yaml from the MPF file + click.echo(f"Generating schedule from {mfp_file}...") + mfp_to_yaml(mfp_file, schedule) + click.echo( + "\n⚠️ The generated schedule does not contain time values. " + "\nPlease edit 'schedule.yaml' and manually add the necessary time values." + "\n🕒 Expected time format: 'YYYY-MM-DD HH:MM:SS' (e.g., '2023-10-20 01:00:00').\n" + ) + else: + # Create a default example schedule + # schedule_body = utils.get_example_schedule() + schedule.write_text(utils.get_example_schedule()) click.echo(f"Created '{config.name}' and '{schedule.name}' at {path}.") diff --git a/src/virtualship/expedition/instrument_type.py b/src/virtualship/expedition/instrument_type.py index 556d8464..82360c7b 100644 --- a/src/virtualship/expedition/instrument_type.py +++ b/src/virtualship/expedition/instrument_type.py @@ -9,3 +9,4 @@ class InstrumentType(Enum): CTD = "CTD" DRIFTER = "DRIFTER" ARGO_FLOAT = "ARGO_FLOAT" + XBT = "XBT" diff --git a/src/virtualship/expedition/space_time_region.py b/src/virtualship/expedition/space_time_region.py index 37aaee08..22008805 100644 --- a/src/virtualship/expedition/space_time_region.py +++ b/src/virtualship/expedition/space_time_region.py @@ -42,13 +42,17 @@ def _check_lon_lat_domain(self) -> Self: class TimeRange(BaseModel): """Defines the temporal boundaries for a space-time region.""" - start_time: datetime - end_time: datetime + #! TODO: Remove the `| None` for `start_time` and `end_time`, and have the MFP functionality not use pydantic (with testing to avoid codebase drift) + start_time: datetime | None = None + end_time: datetime | None = None @model_validator(mode="after") def _check_time_range(self) -> Self: - if not self.start_time < self.end_time: - raise ValueError("start_time must be before end_time") + if ( + self.start_time and self.end_time + ): #! TODO: remove this check once `start_time` and `end_time` are required + if not self.start_time < self.end_time: + raise ValueError("start_time must be before end_time") return self diff --git a/src/virtualship/expedition/waypoint.py b/src/virtualship/expedition/waypoint.py index 85e99181..018ccecb 100644 --- a/src/virtualship/expedition/waypoint.py +++ b/src/virtualship/expedition/waypoint.py @@ -1,16 +1,23 @@ """Waypoint class.""" -from dataclasses import dataclass from datetime import datetime +from pydantic import BaseModel, field_serializer + from ..location import Location from .instrument_type import InstrumentType -@dataclass -class Waypoint: +class Waypoint(BaseModel): """A Waypoint to sail to with an optional time and an optional instrument.""" location: Location time: datetime | None = None instrument: InstrumentType | list[InstrumentType] | None = None + + @field_serializer("instrument") + def serialize_instrument(self, instrument): + """Ensure InstrumentType is serialized as a string (or list of strings).""" + if isinstance(instrument, list): + return [inst.value for inst in instrument] + return instrument.value if instrument else None diff --git a/src/virtualship/utils.py b/src/virtualship/utils.py index 95d47d31..43608b60 100644 --- a/src/virtualship/utils.py +++ b/src/virtualship/utils.py @@ -2,6 +2,7 @@ from importlib.resources import files from typing import TextIO +import pandas as pd import yaml from pydantic import BaseModel @@ -37,3 +38,116 @@ def _dump_yaml(model: BaseModel, stream: TextIO) -> str | None: def _generic_load_yaml(data: str, model: BaseModel) -> BaseModel: """Load a yaml string into a pydantic model.""" return model.model_validate(yaml.safe_load(data)) + + +def mfp_to_yaml(excel_file_path: str, yaml_output_path: str): # noqa: D417 + """ + Generates a YAML file with spatial and temporal information based on instrument data from MFP excel file. + + Parameters + ---------- + - excel_file_path (str): Path to the Excel file containing coordinate and instrument data. + + The function: + 1. Reads instrument and location data from the Excel file. + 2. Determines the maximum depth and buffer based on the instruments present. + 3. Ensures longitude and latitude values remain valid after applying buffer adjustments. + 4. returns the yaml information. + + """ + # Importing Schedule and related models from expedition module + from virtualship.expedition.instrument_type import InstrumentType + from virtualship.expedition.schedule import Schedule + from virtualship.expedition.space_time_region import ( + SpaceTimeRegion, + SpatialRange, + TimeRange, + ) + from virtualship.expedition.waypoint import Location, Waypoint + + # Expected column headers + expected_columns = {"Station Type", "Name", "Latitude", "Longitude", "Instrument"} + + # Read data from Excel + coordinates_data = pd.read_excel(excel_file_path) + + # Check if the headers match the expected ones + actual_columns = set(coordinates_data.columns) + + missing_columns = expected_columns - actual_columns + if missing_columns: + raise ValueError( + f"Error: Found columns {list(actual_columns)}, but expected columns {list(expected_columns)}. " + "Are you sure that you're using the correct export from MFP?" + ) + + extra_columns = actual_columns - expected_columns + if extra_columns: + print( + f"Warning: Found additional unexpected columns {list(extra_columns)}. " + "Manually added columns have no effect. " + "If the MFP export format changed, please submit an issue: " + "https://github.com/OceanParcels/virtualship/issues." + ) + + # Drop unexpected columns (optional, only if you want to ensure strict conformity) + coordinates_data = coordinates_data[list(expected_columns)] + + # Continue with the rest of the function after validation... + coordinates_data = coordinates_data.dropna() + + # maximum depth (in meters), buffer (in degrees) for each instrument + instrument_max_depths = { + "XBT": 2000, + "CTD": 5000, + "DRIFTER": 1, + "ARGO_FLOAT": 2000, + } + + unique_instruments = set() + + for instrument_list in coordinates_data["Instrument"]: + instruments = instrument_list.split(", ") + unique_instruments |= set(instruments) + + # Determine the maximum depth based on the unique instruments + maximum_depth = max( + instrument_max_depths.get(instrument, 0) for instrument in unique_instruments + ) + + spatial_range = SpatialRange( + minimum_longitude=coordinates_data["Longitude"].min(), + maximum_longitude=coordinates_data["Longitude"].max(), + minimum_latitude=coordinates_data["Latitude"].min(), + maximum_latitude=coordinates_data["Latitude"].max(), + minimum_depth=0, + maximum_depth=maximum_depth, + ) + + # Create space-time region object + space_time_region = SpaceTimeRegion( + spatial_range=spatial_range, + time_range=TimeRange(), + ) + + # Generate waypoints + waypoints = [] + for _, row in coordinates_data.iterrows(): + instruments = [ + InstrumentType(instrument) for instrument in row["Instrument"].split(", ") + ] + waypoints.append( + Waypoint( + instrument=instruments, + location=Location(latitude=row["Latitude"], longitude=row["Longitude"]), + ) + ) + + # Create Schedule object + schedule = Schedule( + waypoints=waypoints, + space_time_region=space_time_region, + ) + + # Save to YAML file + schedule.to_yaml(yaml_output_path) diff --git a/tests/expedition/test_schedule.py b/tests/expedition/test_schedule.py index fd1ed959..33ffc74d 100644 --- a/tests/expedition/test_schedule.py +++ b/tests/expedition/test_schedule.py @@ -12,9 +12,11 @@ def test_schedule(tmpdir) -> None: schedule = Schedule( waypoints=[ - Waypoint(Location(0, 0), time=base_time, instrument=None), + Waypoint(location=Location(0, 0), time=base_time, instrument=None), Waypoint( - Location(1, 1), time=base_time + timedelta(hours=1), instrument=None + location=Location(1, 1), + time=base_time + timedelta(hours=1), + instrument=None, ), ] ) diff --git a/tests/expedition/test_simulate_schedule.py b/tests/expedition/test_simulate_schedule.py index 8f92c678..01544c42 100644 --- a/tests/expedition/test_simulate_schedule.py +++ b/tests/expedition/test_simulate_schedule.py @@ -20,8 +20,8 @@ def test_simulate_schedule_feasible() -> None: ship_config.ship_speed_meter_per_second = 5.14 schedule = Schedule( waypoints=[ - Waypoint(Location(0, 0), base_time), - Waypoint(Location(0.01, 0), base_time + timedelta(days=1)), + Waypoint(location=Location(0, 0), time=base_time), + Waypoint(location=Location(0.01, 0), time=base_time + timedelta(days=1)), ] ) @@ -38,8 +38,8 @@ def test_simulate_schedule_too_far() -> None: ship_config = ShipConfig.from_yaml("expedition_dir/ship_config.yaml") schedule = Schedule( waypoints=[ - Waypoint(Location(0, 0), base_time), - Waypoint(Location(1.0, 0), base_time + timedelta(minutes=1)), + Waypoint(location=Location(0, 0), time=base_time), + Waypoint(location=Location(1.0, 0), time=base_time + timedelta(minutes=1)), ] ) diff --git a/tests/test_mfp_to_yaml.py b/tests/test_mfp_to_yaml.py new file mode 100644 index 00000000..e6446947 --- /dev/null +++ b/tests/test_mfp_to_yaml.py @@ -0,0 +1,102 @@ +from unittest.mock import patch + +import pandas as pd +import pytest + +from virtualship.expedition.instrument_type import InstrumentType +from virtualship.expedition.schedule import Schedule +from virtualship.utils import mfp_to_yaml + +# Sample correct MFP data +VALID_MFP_DATA = pd.DataFrame( + { + "Station Type": ["A", "B", "C"], + "Name": ["Station1", "Station2", "Station3"], + "Latitude": [30, 31, 32], + "Longitude": [-44, -45, -46], + "Instrument": ["CTD, DRIFTER", "ARGO_FLOAT", "XBT, CTD, DRIFTER"], + } +) + +# Missing required columns +MISSING_HEADERS_DATA = pd.DataFrame( + {"Station Type": ["A"], "Name": ["Station1"], "Latitude": [10.5]} +) + +# Extra unexpected columns +EXTRA_HEADERS_DATA = VALID_MFP_DATA.copy() +EXTRA_HEADERS_DATA["Unexpected Column"] = ["Extra1", "Extra2", "Extra3"] + + +@patch("pandas.read_excel", return_value=VALID_MFP_DATA) +def test_mfp_to_yaml_success(mock_read_excel, tmp_path): + """Test that mfp_to_yaml correctly processes a valid MFP Excel file.""" + yaml_output_path = tmp_path / "schedule.yaml" + + # Run function (No need to mock open() for YAML, real file is created) + mfp_to_yaml("mock_file.xlsx", yaml_output_path) + + # Ensure the YAML file was written + assert yaml_output_path.exists() + + # Load YAML and validate contents + data = Schedule.from_yaml(yaml_output_path) + + assert len(data.waypoints) == 3 + assert data.waypoints[0].instrument == [InstrumentType.CTD, InstrumentType.DRIFTER] + assert data.waypoints[1].instrument == [InstrumentType.ARGO_FLOAT] + assert data.waypoints[2].instrument == [ + InstrumentType.XBT, + InstrumentType.CTD, + InstrumentType.DRIFTER, + ] + + +@patch("pandas.read_excel", return_value=MISSING_HEADERS_DATA) +def test_mfp_to_yaml_missing_headers(mock_read_excel, tmp_path): + """Test that mfp_to_yaml raises an error when required columns are missing.""" + yaml_output_path = tmp_path / "schedule.yaml" + + with pytest.raises( + ValueError, match="Error: Found columns .* but expected columns .*" + ): + mfp_to_yaml("mock_file.xlsx", yaml_output_path) + + +@patch("pandas.read_excel", return_value=EXTRA_HEADERS_DATA) +@patch("builtins.print") # Capture printed warnings +def test_mfp_to_yaml_extra_headers(mock_print, mock_read_excel, tmp_path): + """Test that mfp_to_yaml prints a warning when extra columns are found.""" + yaml_output_path = tmp_path / "schedule.yaml" + + # Run function + mfp_to_yaml("mock_file.xlsx", yaml_output_path) + + # Ensure a warning message was printed + mock_print.assert_any_call( + "Warning: Found additional unexpected columns ['Unexpected Column']. " + "Manually added columns have no effect. " + "If the MFP export format changed, please submit an issue: " + "https://github.com/OceanParcels/virtualship/issues." + ) + + +@patch("pandas.read_excel", return_value=VALID_MFP_DATA) +def test_mfp_to_yaml_instrument_conversion(mock_read_excel, tmp_path): + """Test that instruments are correctly converted into InstrumentType enums.""" + yaml_output_path = tmp_path / "schedule.yaml" + + # Run function + mfp_to_yaml("mock_file.xlsx", yaml_output_path) + + # Load the generated YAML + data = Schedule.from_yaml(yaml_output_path) + + assert isinstance(data.waypoints[0].instrument, list) + assert data.waypoints[0].instrument == [InstrumentType.CTD, InstrumentType.DRIFTER] + assert data.waypoints[1].instrument == [InstrumentType.ARGO_FLOAT] + assert data.waypoints[2].instrument == [ + InstrumentType.XBT, + InstrumentType.CTD, + InstrumentType.DRIFTER, + ]