Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
5c5bf9a
draft script for converting MFP CSV to YAML schedule
iuryt Jan 28, 2025
40c7ff9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 28, 2025
366dede
add openpyxl
iuryt Feb 3, 2025
d046444
add mfp_to_yaml function
iuryt Feb 3, 2025
e3199fa
add new command to init to accept mfp file as input
iuryt Feb 3, 2025
d9fe46a
delete files from scripts/
iuryt Feb 3, 2025
7dc9bd7
deleted scripts files
iuryt Feb 3, 2025
a79433c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 3, 2025
11332f8
export the schedule body instead of saving file
iuryt Feb 13, 2025
ad54992
change name of cli param and adapt for new mfp_to_yaml function
iuryt Feb 13, 2025
66adb18
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 13, 2025
2672afa
add warning message for time entry on yaml
iuryt Feb 13, 2025
a370641
change to pydantic and change name of variables
iuryt Feb 13, 2025
b87d944
add XBT
iuryt Feb 13, 2025
eba08b8
accept nonetype time
iuryt Feb 13, 2025
c0a52ac
change to Waypoint to BaseModel and add field_serializer for instrume…
iuryt Feb 13, 2025
526d2af
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 13, 2025
c51043d
remove restriction for version
iuryt Feb 14, 2025
f3daaa7
add checking for columns from excel file
iuryt Feb 14, 2025
4c59420
add unit tests
iuryt Feb 14, 2025
b67b15d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 14, 2025
6f63cd4
Add update comments and var naming
VeckoTheGecko Feb 14, 2025
222df85
Remove buffering from mfp conversion
VeckoTheGecko Feb 14, 2025
c94567b
update references to Waypoint
VeckoTheGecko Feb 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dependencies:
- pip
- pyyaml
- copernicusmarine >= 2
- openpyxl

# linting
- pre-commit
Expand Down
31 changes: 27 additions & 4 deletions src/virtualship/cli/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,26 @@
hash_to_filename,
)
from virtualship.expedition.do_expedition import _get_schedule, do_expedition
from virtualship.utils import SCHEDULE, SHIP_CONFIG
from virtualship.utils import SCHEDULE, SHIP_CONFIG, mfp_to_yaml


@click.command()
@click.argument(
"path",
type=click.Path(exists=False, file_okay=False, dir_okay=True),
)
def init(path):
"""Initialize a directory for a new expedition, with an example schedule and ship config files."""
@click.option(
"--from-mfp",
type=str,
default=None,
help='Partially initialise a project from an exported xlsx or csv file from NIOZ\' Marine Facilities Planning tool (specifically the "Export Coordinates > DD" option). User edits are required after initialisation.',
)
def init(path, from_mfp):
"""
Initialize a directory for a new expedition, with an example schedule and ship config files.

If --mfp-file is provided, it will generate the schedule from the MPF file instead.
"""
path = Path(path)
path.mkdir(exist_ok=True)

Expand All @@ -43,7 +53,20 @@ def init(path):
)

config.write_text(utils.get_example_config())
schedule.write_text(utils.get_example_schedule())
if from_mfp:
mfp_file = Path(from_mfp)
# Generate schedule.yaml from the MPF file
click.echo(f"Generating schedule from {mfp_file}...")
mfp_to_yaml(mfp_file, schedule)
click.echo(
"\n⚠️ The generated schedule does not contain time values. "
"\nPlease edit 'schedule.yaml' and manually add the necessary time values."
"\n🕒 Expected time format: 'YYYY-MM-DD HH:MM:SS' (e.g., '2023-10-20 01:00:00').\n"
)
else:
# Create a default example schedule
# schedule_body = utils.get_example_schedule()
schedule.write_text(utils.get_example_schedule())

click.echo(f"Created '{config.name}' and '{schedule.name}' at {path}.")

Expand Down
1 change: 1 addition & 0 deletions src/virtualship/expedition/instrument_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ class InstrumentType(Enum):
CTD = "CTD"
DRIFTER = "DRIFTER"
ARGO_FLOAT = "ARGO_FLOAT"
XBT = "XBT"
12 changes: 8 additions & 4 deletions src/virtualship/expedition/space_time_region.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,17 @@ def _check_lon_lat_domain(self) -> Self:
class TimeRange(BaseModel):
"""Defines the temporal boundaries for a space-time region."""

start_time: datetime
end_time: datetime
#! TODO: Remove the `| None` for `start_time` and `end_time`, and have the MFP functionality not use pydantic (with testing to avoid codebase drift)
start_time: datetime | None = None
end_time: datetime | None = None

@model_validator(mode="after")
def _check_time_range(self) -> Self:
if not self.start_time < self.end_time:
raise ValueError("start_time must be before end_time")
if (
self.start_time and self.end_time
): #! TODO: remove this check once `start_time` and `end_time` are required
if not self.start_time < self.end_time:
raise ValueError("start_time must be before end_time")
return self


Expand Down
13 changes: 10 additions & 3 deletions src/virtualship/expedition/waypoint.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
"""Waypoint class."""

from dataclasses import dataclass
from datetime import datetime

from pydantic import BaseModel, field_serializer

from ..location import Location
from .instrument_type import InstrumentType


@dataclass
class Waypoint:
class Waypoint(BaseModel):
"""A Waypoint to sail to with an optional time and an optional instrument."""

location: Location
time: datetime | None = None
instrument: InstrumentType | list[InstrumentType] | None = None

@field_serializer("instrument")
def serialize_instrument(self, instrument):
"""Ensure InstrumentType is serialized as a string (or list of strings)."""
if isinstance(instrument, list):
return [inst.value for inst in instrument]
return instrument.value if instrument else None
114 changes: 114 additions & 0 deletions src/virtualship/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from importlib.resources import files
from typing import TextIO

import pandas as pd
import yaml
from pydantic import BaseModel

Expand Down Expand Up @@ -37,3 +38,116 @@ def _dump_yaml(model: BaseModel, stream: TextIO) -> str | None:
def _generic_load_yaml(data: str, model: BaseModel) -> BaseModel:
"""Load a yaml string into a pydantic model."""
return model.model_validate(yaml.safe_load(data))


def mfp_to_yaml(excel_file_path: str, yaml_output_path: str): # noqa: D417
"""
Generates a YAML file with spatial and temporal information based on instrument data from MFP excel file.

Parameters
----------
- excel_file_path (str): Path to the Excel file containing coordinate and instrument data.

The function:
1. Reads instrument and location data from the Excel file.
2. Determines the maximum depth and buffer based on the instruments present.
3. Ensures longitude and latitude values remain valid after applying buffer adjustments.
4. returns the yaml information.

"""
# Importing Schedule and related models from expedition module
from virtualship.expedition.instrument_type import InstrumentType
from virtualship.expedition.schedule import Schedule
from virtualship.expedition.space_time_region import (
SpaceTimeRegion,
SpatialRange,
TimeRange,
)
from virtualship.expedition.waypoint import Location, Waypoint

# Expected column headers
expected_columns = {"Station Type", "Name", "Latitude", "Longitude", "Instrument"}

# Read data from Excel
coordinates_data = pd.read_excel(excel_file_path)

# Check if the headers match the expected ones
actual_columns = set(coordinates_data.columns)

missing_columns = expected_columns - actual_columns
if missing_columns:
raise ValueError(
f"Error: Found columns {list(actual_columns)}, but expected columns {list(expected_columns)}. "
"Are you sure that you're using the correct export from MFP?"
)

extra_columns = actual_columns - expected_columns
if extra_columns:
print(
f"Warning: Found additional unexpected columns {list(extra_columns)}. "
"Manually added columns have no effect. "
"If the MFP export format changed, please submit an issue: "
"https://github.com/OceanParcels/virtualship/issues."
)

# Drop unexpected columns (optional, only if you want to ensure strict conformity)
coordinates_data = coordinates_data[list(expected_columns)]

# Continue with the rest of the function after validation...
coordinates_data = coordinates_data.dropna()

# maximum depth (in meters), buffer (in degrees) for each instrument
instrument_max_depths = {
"XBT": 2000,
"CTD": 5000,
"DRIFTER": 1,
"ARGO_FLOAT": 2000,
}

unique_instruments = set()

for instrument_list in coordinates_data["Instrument"]:
instruments = instrument_list.split(", ")
unique_instruments |= set(instruments)

# Determine the maximum depth based on the unique instruments
maximum_depth = max(
instrument_max_depths.get(instrument, 0) for instrument in unique_instruments
)

spatial_range = SpatialRange(
minimum_longitude=coordinates_data["Longitude"].min(),
maximum_longitude=coordinates_data["Longitude"].max(),
minimum_latitude=coordinates_data["Latitude"].min(),
maximum_latitude=coordinates_data["Latitude"].max(),
minimum_depth=0,
maximum_depth=maximum_depth,
)

# Create space-time region object
space_time_region = SpaceTimeRegion(
spatial_range=spatial_range,
time_range=TimeRange(),
)

# Generate waypoints
waypoints = []
for _, row in coordinates_data.iterrows():
instruments = [
InstrumentType(instrument) for instrument in row["Instrument"].split(", ")
]
waypoints.append(
Waypoint(
instrument=instruments,
location=Location(latitude=row["Latitude"], longitude=row["Longitude"]),
)
)

# Create Schedule object
schedule = Schedule(
waypoints=waypoints,
space_time_region=space_time_region,
)

# Save to YAML file
schedule.to_yaml(yaml_output_path)
6 changes: 4 additions & 2 deletions tests/expedition/test_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ def test_schedule(tmpdir) -> None:

schedule = Schedule(
waypoints=[
Waypoint(Location(0, 0), time=base_time, instrument=None),
Waypoint(location=Location(0, 0), time=base_time, instrument=None),
Waypoint(
Location(1, 1), time=base_time + timedelta(hours=1), instrument=None
location=Location(1, 1),
time=base_time + timedelta(hours=1),
instrument=None,
),
]
)
Expand Down
8 changes: 4 additions & 4 deletions tests/expedition/test_simulate_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ def test_simulate_schedule_feasible() -> None:
ship_config.ship_speed_meter_per_second = 5.14
schedule = Schedule(
waypoints=[
Waypoint(Location(0, 0), base_time),
Waypoint(Location(0.01, 0), base_time + timedelta(days=1)),
Waypoint(location=Location(0, 0), time=base_time),
Waypoint(location=Location(0.01, 0), time=base_time + timedelta(days=1)),
]
)

Expand All @@ -38,8 +38,8 @@ def test_simulate_schedule_too_far() -> None:
ship_config = ShipConfig.from_yaml("expedition_dir/ship_config.yaml")
schedule = Schedule(
waypoints=[
Waypoint(Location(0, 0), base_time),
Waypoint(Location(1.0, 0), base_time + timedelta(minutes=1)),
Waypoint(location=Location(0, 0), time=base_time),
Waypoint(location=Location(1.0, 0), time=base_time + timedelta(minutes=1)),
]
)

Expand Down
102 changes: 102 additions & 0 deletions tests/test_mfp_to_yaml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from unittest.mock import patch

import pandas as pd
import pytest

from virtualship.expedition.instrument_type import InstrumentType
from virtualship.expedition.schedule import Schedule
from virtualship.utils import mfp_to_yaml

# Sample correct MFP data
VALID_MFP_DATA = pd.DataFrame(
{
"Station Type": ["A", "B", "C"],
"Name": ["Station1", "Station2", "Station3"],
"Latitude": [30, 31, 32],
"Longitude": [-44, -45, -46],
"Instrument": ["CTD, DRIFTER", "ARGO_FLOAT", "XBT, CTD, DRIFTER"],
}
)

# Missing required columns
MISSING_HEADERS_DATA = pd.DataFrame(
{"Station Type": ["A"], "Name": ["Station1"], "Latitude": [10.5]}
)

# Extra unexpected columns
EXTRA_HEADERS_DATA = VALID_MFP_DATA.copy()
EXTRA_HEADERS_DATA["Unexpected Column"] = ["Extra1", "Extra2", "Extra3"]


@patch("pandas.read_excel", return_value=VALID_MFP_DATA)
def test_mfp_to_yaml_success(mock_read_excel, tmp_path):
"""Test that mfp_to_yaml correctly processes a valid MFP Excel file."""
yaml_output_path = tmp_path / "schedule.yaml"

# Run function (No need to mock open() for YAML, real file is created)
mfp_to_yaml("mock_file.xlsx", yaml_output_path)

# Ensure the YAML file was written
assert yaml_output_path.exists()

# Load YAML and validate contents
data = Schedule.from_yaml(yaml_output_path)

assert len(data.waypoints) == 3
assert data.waypoints[0].instrument == [InstrumentType.CTD, InstrumentType.DRIFTER]
assert data.waypoints[1].instrument == [InstrumentType.ARGO_FLOAT]
assert data.waypoints[2].instrument == [
InstrumentType.XBT,
InstrumentType.CTD,
InstrumentType.DRIFTER,
]


@patch("pandas.read_excel", return_value=MISSING_HEADERS_DATA)
def test_mfp_to_yaml_missing_headers(mock_read_excel, tmp_path):
"""Test that mfp_to_yaml raises an error when required columns are missing."""
yaml_output_path = tmp_path / "schedule.yaml"

with pytest.raises(
ValueError, match="Error: Found columns .* but expected columns .*"
):
mfp_to_yaml("mock_file.xlsx", yaml_output_path)


@patch("pandas.read_excel", return_value=EXTRA_HEADERS_DATA)
@patch("builtins.print") # Capture printed warnings
def test_mfp_to_yaml_extra_headers(mock_print, mock_read_excel, tmp_path):
"""Test that mfp_to_yaml prints a warning when extra columns are found."""
yaml_output_path = tmp_path / "schedule.yaml"

# Run function
mfp_to_yaml("mock_file.xlsx", yaml_output_path)

# Ensure a warning message was printed
mock_print.assert_any_call(
"Warning: Found additional unexpected columns ['Unexpected Column']. "
"Manually added columns have no effect. "
"If the MFP export format changed, please submit an issue: "
"https://github.com/OceanParcels/virtualship/issues."
)


@patch("pandas.read_excel", return_value=VALID_MFP_DATA)
def test_mfp_to_yaml_instrument_conversion(mock_read_excel, tmp_path):
"""Test that instruments are correctly converted into InstrumentType enums."""
yaml_output_path = tmp_path / "schedule.yaml"

# Run function
mfp_to_yaml("mock_file.xlsx", yaml_output_path)

# Load the generated YAML
data = Schedule.from_yaml(yaml_output_path)

assert isinstance(data.waypoints[0].instrument, list)
assert data.waypoints[0].instrument == [InstrumentType.CTD, InstrumentType.DRIFTER]
assert data.waypoints[1].instrument == [InstrumentType.ARGO_FLOAT]
assert data.waypoints[2].instrument == [
InstrumentType.XBT,
InstrumentType.CTD,
InstrumentType.DRIFTER,
]