Parcels-code · erikvansebille · Feb 14, 2025 · Jan 28, 2025 · Jan 28, 2025 · Feb 3, 2025
diff --git a/environment.yml b/environment.yml
@@ -12,6 +12,7 @@ dependencies:
   - pip
   - pyyaml
   - copernicusmarine >= 2
+  - openpyxl
 
   # linting
   - pre-commit

diff --git a/src/virtualship/cli/commands.py b/src/virtualship/cli/commands.py
@@ -16,16 +16,26 @@
     hash_to_filename,
 )
 from virtualship.expedition.do_expedition import _get_schedule, do_expedition
-from virtualship.utils import SCHEDULE, SHIP_CONFIG
+from virtualship.utils import SCHEDULE, SHIP_CONFIG, mfp_to_yaml
 
 
 @click.command()
 @click.argument(
     "path",
     type=click.Path(exists=False, file_okay=False, dir_okay=True),
 )
-def init(path):
-    """Initialize a directory for a new expedition, with an example schedule and ship config files."""
+@click.option(
+    "--mfp-file",
+    type=str,
+    default=None,
+    help='Partially initialise a project from an exported xlsx or csv file from NIOZ\' Marine Facilities Planning tool (specifically the "Export Coordinates > DD" option). User edits are required after initialisation.',
+)
+def init(path, mfp_file):
+    """
+    Initialize a directory for a new expedition, with an example schedule and ship config files.
+
+    If --mfp-file is provided, it will generate the schedule from the MPF file instead.
+    """
     path = Path(path)
     path.mkdir(exist_ok=True)
 
@@ -43,7 +53,20 @@ def init(path):
         )
 
     config.write_text(utils.get_example_config())
-    schedule.write_text(utils.get_example_schedule())
+
+    if mfp_file:
+        # Generate schedule.yaml from the MPF file
+        click.echo(f"Generating schedule from {mfp_file}...")
+        schedule_body = mfp_to_yaml(mfp_file, schedule)
+        click.echo(
+            "\n⚠️  The generated schedule does not contain time values. "
+            "\nPlease edit 'schedule.yaml' and manually add the necessary time values."
+            "\n🕒  Expected time format: 'YYYY-MM-DD HH:MM:SS' (e.g., '2023-10-20 01:00:00').\n"
+        )
+    else:
+        # Create a default example schedule
+        # schedule_body = utils.get_example_schedule()
+        schedule.write_text(utils.get_example_schedule())
 
     click.echo(f"Created '{config.name}' and '{schedule.name}' at {path}.")
 

diff --git a/src/virtualship/expedition/instrument_type.py b/src/virtualship/expedition/instrument_type.py
@@ -9,3 +9,4 @@ class InstrumentType(Enum):
     CTD = "CTD"
     DRIFTER = "DRIFTER"
     ARGO_FLOAT = "ARGO_FLOAT"
+    XBT = "XBT"
diff --git a/src/virtualship/expedition/space_time_region.py b/src/virtualship/expedition/space_time_region.py
@@ -42,13 +42,14 @@ def _check_lon_lat_domain(self) -> Self:
 class TimeRange(BaseModel):
     """Defines the temporal boundaries for a space-time region."""
 
-    start_time: datetime
-    end_time: datetime
+    start_time: datetime | None = None
+    end_time: datetime | None = None
 
     @model_validator(mode="after")
     def _check_time_range(self) -> Self:
-        if not self.start_time < self.end_time:
-            raise ValueError("start_time must be before end_time")
+        if self.start_time and self.end_time:
+            if not self.start_time < self.end_time:
+                raise ValueError("start_time must be before end_time")
         return self
 
 

diff --git a/src/virtualship/expedition/waypoint.py b/src/virtualship/expedition/waypoint.py
@@ -1,16 +1,28 @@
 """Waypoint class."""
 
-from dataclasses import dataclass
 from datetime import datetime
 
+from pydantic import BaseModel, field_serializer
+
 from ..location import Location
 from .instrument_type import InstrumentType
 
 
-@dataclass
-class Waypoint:
+class Waypoint(BaseModel):
     """A Waypoint to sail to with an optional time and an optional instrument."""
 
     location: Location
     time: datetime | None = None
     instrument: InstrumentType | list[InstrumentType] | None = None
+
+    @field_serializer("instrument")
+    def serialize_instrument(self, instrument):
+        """Ensure InstrumentType is serialized as a string (or list of strings)."""
+        if isinstance(instrument, list):
+            return [inst.value for inst in instrument]
+        return instrument.value if instrument else None
+
+    @field_serializer("time")
+    def serialize_time(self, time):
+        """Ensure datetime is formatted properly in YAML."""
+        return time.strftime("%Y-%m-%d %H:%M:%S") if time else None
diff --git a/src/virtualship/utils.py b/src/virtualship/utils.py
@@ -2,6 +2,7 @@
 from importlib.resources import files
 from typing import TextIO
 
+import pandas as pd
 import yaml
 from pydantic import BaseModel
 
@@ -37,3 +38,133 @@ def _dump_yaml(model: BaseModel, stream: TextIO) -> str | None:
 def _generic_load_yaml(data: str, model: BaseModel) -> BaseModel:
     """Load a yaml string into a pydantic model."""
     return model.model_validate(yaml.safe_load(data))
+
+
+def mfp_to_yaml(excel_file_path: str, yaml_output_path: str):
+    """
+    Generates a YAML file with spatial and temporal information based on instrument data from MFP excel file.
+
+    Parameters
+    ----------
+    - excel_file_path (str): Path to the Excel file containing coordinate and instrument data.
+
+    The function:
+    1. Reads instrument and location data from the Excel file.
+    2. Determines the maximum depth and buffer based on the instruments present.
+    3. Ensures longitude and latitude values remain valid after applying buffer adjustments.
+    4. returns the yaml information.
+
+    """
+    # Importing Schedule and related models from expedition module
+    from virtualship.expedition.instrument_type import InstrumentType
+    from virtualship.expedition.schedule import Schedule
+    from virtualship.expedition.space_time_region import (
+        SpaceTimeRegion,
+        SpatialRange,
+        TimeRange,
+    )
+    from virtualship.expedition.waypoint import Location, Waypoint
+
+    # Expected column headers
+    expected_columns = {"Station Type", "Name", "Latitude", "Longitude", "Instrument"}
+
+    # Read data from Excel
+    coordinates_data = pd.read_excel(excel_file_path)
+
+    # Check if the headers match the expected ones
+    actual_columns = set(coordinates_data.columns)
+
+    missing_columns = expected_columns - actual_columns
+    if missing_columns:
+        raise ValueError(
+            f"Error: Found columns {list(actual_columns)}, but expected columns {list(expected_columns)}. "
+            "Are you sure that you're using the correct export from MFP?"
+        )
+
+    extra_columns = actual_columns - expected_columns
+    if extra_columns:
+        print(
+            f"Warning: Found additional unexpected columns {list(extra_columns)}. "
+            "Manually added columns have no effect. "
+            "If the MFP export format changed, please submit an issue: "
+            "https://github.com/OceanParcels/virtualship/issues."
+        )
+
+    # Drop unexpected columns (optional, only if you want to ensure strict conformity)
+    coordinates_data = coordinates_data[list(expected_columns)]
+
+    # Continue with the rest of the function after validation...
+    coordinates_data = coordinates_data.dropna()
+
+    # Define maximum depth (in meters) and buffer (in degrees) for each instrument
+    instrument_properties = {
+        "XBT": {"maximum_depth": 2000, "buffer": 1},
+        "CTD": {"maximum_depth": 5000, "buffer": 1},
+        "DRIFTER": {"maximum_depth": 1, "buffer": 5},
+        "ARGO_FLOAT": {"maximum_depth": 2000, "buffer": 5},
+    }
+
+    # Extract unique instruments from dataset using a set
+    unique_instruments = set()
+
+    for instrument_list in coordinates_data["Instrument"]:
+        instruments = instrument_list.split(
+            ", "
+        )  # Split by ", " to get individual instruments
+        unique_instruments |= set(instruments)  # Union update with set of instruments
+
+    # Determine the maximum depth based on the unique instruments
+    maximum_depth = max(
+        instrument_properties.get(inst, {"maximum_depth": 0})["maximum_depth"]
+        for inst in unique_instruments
+    )
+    minimum_depth = 0
+
+    # Determine the buffer based on the maximum buffer of the instruments present
+    buffer = max(
+        instrument_properties.get(inst, {"buffer": 0})["buffer"]
+        for inst in unique_instruments
+    )
+
+    # Adjusted spatial range
+    min_longitude = coordinates_data["Longitude"].min() - buffer
+    max_longitude = coordinates_data["Longitude"].max() + buffer
+    min_latitude = coordinates_data["Latitude"].min() - buffer
+    max_latitude = coordinates_data["Latitude"].max() + buffer
+
+    spatial_range = SpatialRange(
+        minimum_longitude=coordinates_data["Longitude"].min() - buffer,
+        maximum_longitude=coordinates_data["Longitude"].max() + buffer,
+        minimum_latitude=coordinates_data["Latitude"].min() - buffer,
+        maximum_latitude=coordinates_data["Latitude"].max() + buffer,
+        minimum_depth=0,
+        maximum_depth=maximum_depth,
+    )
+
+    # Create space-time region object
+    space_time_region = SpaceTimeRegion(
+        spatial_range=spatial_range,
+        time_range=TimeRange(),
+    )
+
+    # Generate waypoints
+    waypoints = []
+    for _, row in coordinates_data.iterrows():
+        instruments = [
+            InstrumentType(instrument) for instrument in row["Instrument"].split(", ")
+        ]
+        waypoints.append(
+            Waypoint(
+                instrument=instruments,
+                location=Location(latitude=row["Latitude"], longitude=row["Longitude"]),
+            )
+        )
+
+    # Create Schedule object
+    schedule = Schedule(
+        waypoints=waypoints,
+        space_time_region=space_time_region,
+    )
+
+    # Save to YAML file
+    schedule.to_yaml(yaml_output_path)
diff --git a/tests/test_mfp_to_yaml.py b/tests/test_mfp_to_yaml.py
@@ -0,0 +1,102 @@
+from unittest.mock import patch
+
+import pandas as pd
+import pytest
+
+from virtualship.expedition.instrument_type import InstrumentType
+from virtualship.expedition.schedule import Schedule
+from virtualship.utils import mfp_to_yaml
+
+# Sample correct MFP data
+VALID_MFP_DATA = pd.DataFrame(
+    {
+        "Station Type": ["A", "B", "C"],
+        "Name": ["Station1", "Station2", "Station3"],
+        "Latitude": [30, 31, 32],
+        "Longitude": [-44, -45, -46],
+        "Instrument": ["CTD, DRIFTER", "ARGO_FLOAT", "XBT, CTD, DRIFTER"],
+    }
+)
+
+# Missing required columns
+MISSING_HEADERS_DATA = pd.DataFrame(
+    {"Station Type": ["A"], "Name": ["Station1"], "Latitude": [10.5]}
+)
+
+# Extra unexpected columns
+EXTRA_HEADERS_DATA = VALID_MFP_DATA.copy()
+EXTRA_HEADERS_DATA["Unexpected Column"] = ["Extra1", "Extra2", "Extra3"]
+
+
+@patch("pandas.read_excel", return_value=VALID_MFP_DATA)
+def test_mfp_to_yaml_success(mock_read_excel, tmp_path):
+    """Test that mfp_to_yaml correctly processes a valid MFP Excel file."""
+    yaml_output_path = tmp_path / "schedule.yaml"
+
+    # Run function (No need to mock open() for YAML, real file is created)
+    mfp_to_yaml("mock_file.xlsx", yaml_output_path)
+
+    # Ensure the YAML file was written
+    assert yaml_output_path.exists()
+
+    # Load YAML and validate contents
+    data = Schedule.from_yaml(yaml_output_path)
+
+    assert len(data.waypoints) == 3
+    assert data.waypoints[0].instrument == [InstrumentType.CTD, InstrumentType.DRIFTER]
+    assert data.waypoints[1].instrument == [InstrumentType.ARGO_FLOAT]
+    assert data.waypoints[2].instrument == [
+        InstrumentType.XBT,
+        InstrumentType.CTD,
+        InstrumentType.DRIFTER,
+    ]
+
+
+@patch("pandas.read_excel", return_value=MISSING_HEADERS_DATA)
+def test_mfp_to_yaml_missing_headers(mock_read_excel, tmp_path):
+    """Test that mfp_to_yaml raises an error when required columns are missing."""
+    yaml_output_path = tmp_path / "schedule.yaml"
+
+    with pytest.raises(
+        ValueError, match="Error: Found columns .* but expected columns .*"
+    ):
+        mfp_to_yaml("mock_file.xlsx", yaml_output_path)
+
+
+@patch("pandas.read_excel", return_value=EXTRA_HEADERS_DATA)
+@patch("builtins.print")  # Capture printed warnings
+def test_mfp_to_yaml_extra_headers(mock_print, mock_read_excel, tmp_path):
+    """Test that mfp_to_yaml prints a warning when extra columns are found."""
+    yaml_output_path = tmp_path / "schedule.yaml"
+
+    # Run function
+    mfp_to_yaml("mock_file.xlsx", yaml_output_path)
+
+    # Ensure a warning message was printed
+    mock_print.assert_any_call(
+        "Warning: Found additional unexpected columns ['Unexpected Column']. "
+        "Manually added columns have no effect. "
+        "If the MFP export format changed, please submit an issue: "
+        "https://github.com/OceanParcels/virtualship/issues."
+    )
+
+
+@patch("pandas.read_excel", return_value=VALID_MFP_DATA)
+def test_mfp_to_yaml_instrument_conversion(mock_read_excel, tmp_path):
+    """Test that instruments are correctly converted into InstrumentType enums."""
+    yaml_output_path = tmp_path / "schedule.yaml"
+
+    # Run function
+    mfp_to_yaml("mock_file.xlsx", yaml_output_path)
+
+    # Load the generated YAML
+    data = Schedule.from_yaml(yaml_output_path)
+
+    assert isinstance(data.waypoints[0].instrument, list)
+    assert data.waypoints[0].instrument == [InstrumentType.CTD, InstrumentType.DRIFTER]
+    assert data.waypoints[1].instrument == [InstrumentType.ARGO_FLOAT]
+    assert data.waypoints[2].instrument == [
+        InstrumentType.XBT,
+        InstrumentType.CTD,
+        InstrumentType.DRIFTER,
+    ]