Skip to content

Commit 0357ac4

Browse files
iurytpre-commit-ci[bot]VeckoTheGecko
authored
Converting MFP CSV to YAML schedule (#111)
* draft script for converting MFP CSV to YAML schedule * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add openpyxl * add mfp_to_yaml function * add new command to init to accept mfp file as input * delete files from scripts/ * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * export the schedule body instead of saving file * change name of cli param and adapt for new mfp_to_yaml function * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add warning message for time entry on yaml * change to pydantic and change name of variables * add XBT * accept nonetype time * change to Waypoint to BaseModel and add field_serializer for instrument and time * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove restriction for version * add checking for columns from excel file * add unit tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add update comments and var naming * Remove buffering from mfp conversion * update references to Waypoint --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Vecko <[email protected]>
1 parent 96437cb commit 0357ac4

File tree

8 files changed

+270
-17
lines changed

8 files changed

+270
-17
lines changed

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ dependencies:
1212
- pip
1313
- pyyaml
1414
- copernicusmarine >= 2
15+
- openpyxl
1516

1617
# linting
1718
- pre-commit

src/virtualship/cli/commands.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,26 @@
1616
hash_to_filename,
1717
)
1818
from virtualship.expedition.do_expedition import _get_schedule, do_expedition
19-
from virtualship.utils import SCHEDULE, SHIP_CONFIG
19+
from virtualship.utils import SCHEDULE, SHIP_CONFIG, mfp_to_yaml
2020

2121

2222
@click.command()
2323
@click.argument(
2424
"path",
2525
type=click.Path(exists=False, file_okay=False, dir_okay=True),
2626
)
27-
def init(path):
28-
"""Initialize a directory for a new expedition, with an example schedule and ship config files."""
27+
@click.option(
28+
"--from-mfp",
29+
type=str,
30+
default=None,
31+
help='Partially initialise a project from an exported xlsx or csv file from NIOZ\' Marine Facilities Planning tool (specifically the "Export Coordinates > DD" option). User edits are required after initialisation.',
32+
)
33+
def init(path, from_mfp):
34+
"""
35+
Initialize a directory for a new expedition, with an example schedule and ship config files.
36+
37+
If --mfp-file is provided, it will generate the schedule from the MPF file instead.
38+
"""
2939
path = Path(path)
3040
path.mkdir(exist_ok=True)
3141

@@ -43,7 +53,20 @@ def init(path):
4353
)
4454

4555
config.write_text(utils.get_example_config())
46-
schedule.write_text(utils.get_example_schedule())
56+
if from_mfp:
57+
mfp_file = Path(from_mfp)
58+
# Generate schedule.yaml from the MPF file
59+
click.echo(f"Generating schedule from {mfp_file}...")
60+
mfp_to_yaml(mfp_file, schedule)
61+
click.echo(
62+
"\n⚠️ The generated schedule does not contain time values. "
63+
"\nPlease edit 'schedule.yaml' and manually add the necessary time values."
64+
"\n🕒 Expected time format: 'YYYY-MM-DD HH:MM:SS' (e.g., '2023-10-20 01:00:00').\n"
65+
)
66+
else:
67+
# Create a default example schedule
68+
# schedule_body = utils.get_example_schedule()
69+
schedule.write_text(utils.get_example_schedule())
4770

4871
click.echo(f"Created '{config.name}' and '{schedule.name}' at {path}.")
4972

src/virtualship/expedition/space_time_region.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,17 @@ def _check_lon_lat_domain(self) -> Self:
4242
class TimeRange(BaseModel):
4343
"""Defines the temporal boundaries for a space-time region."""
4444

45-
start_time: datetime
46-
end_time: datetime
45+
#! TODO: Remove the `| None` for `start_time` and `end_time`, and have the MFP functionality not use pydantic (with testing to avoid codebase drift)
46+
start_time: datetime | None = None
47+
end_time: datetime | None = None
4748

4849
@model_validator(mode="after")
4950
def _check_time_range(self) -> Self:
50-
if not self.start_time < self.end_time:
51-
raise ValueError("start_time must be before end_time")
51+
if (
52+
self.start_time and self.end_time
53+
): #! TODO: remove this check once `start_time` and `end_time` are required
54+
if not self.start_time < self.end_time:
55+
raise ValueError("start_time must be before end_time")
5256
return self
5357

5458

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,23 @@
11
"""Waypoint class."""
22

3-
from dataclasses import dataclass
43
from datetime import datetime
54

5+
from pydantic import BaseModel, field_serializer
6+
67
from ..location import Location
78
from .instrument_type import InstrumentType
89

910

10-
@dataclass
11-
class Waypoint:
11+
class Waypoint(BaseModel):
1212
"""A Waypoint to sail to with an optional time and an optional instrument."""
1313

1414
location: Location
1515
time: datetime | None = None
1616
instrument: InstrumentType | list[InstrumentType] | None = None
17+
18+
@field_serializer("instrument")
19+
def serialize_instrument(self, instrument):
20+
"""Ensure InstrumentType is serialized as a string (or list of strings)."""
21+
if isinstance(instrument, list):
22+
return [inst.value for inst in instrument]
23+
return instrument.value if instrument else None

src/virtualship/utils.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from importlib.resources import files
33
from typing import TextIO
44

5+
import pandas as pd
56
import yaml
67
from pydantic import BaseModel
78

@@ -37,3 +38,116 @@ def _dump_yaml(model: BaseModel, stream: TextIO) -> str | None:
3738
def _generic_load_yaml(data: str, model: BaseModel) -> BaseModel:
3839
"""Load a yaml string into a pydantic model."""
3940
return model.model_validate(yaml.safe_load(data))
41+
42+
43+
def mfp_to_yaml(excel_file_path: str, yaml_output_path: str): # noqa: D417
44+
"""
45+
Generates a YAML file with spatial and temporal information based on instrument data from MFP excel file.
46+
47+
Parameters
48+
----------
49+
- excel_file_path (str): Path to the Excel file containing coordinate and instrument data.
50+
51+
The function:
52+
1. Reads instrument and location data from the Excel file.
53+
2. Determines the maximum depth and buffer based on the instruments present.
54+
3. Ensures longitude and latitude values remain valid after applying buffer adjustments.
55+
4. returns the yaml information.
56+
57+
"""
58+
# Importing Schedule and related models from expedition module
59+
from virtualship.expedition.instrument_type import InstrumentType
60+
from virtualship.expedition.schedule import Schedule
61+
from virtualship.expedition.space_time_region import (
62+
SpaceTimeRegion,
63+
SpatialRange,
64+
TimeRange,
65+
)
66+
from virtualship.expedition.waypoint import Location, Waypoint
67+
68+
# Expected column headers
69+
expected_columns = {"Station Type", "Name", "Latitude", "Longitude", "Instrument"}
70+
71+
# Read data from Excel
72+
coordinates_data = pd.read_excel(excel_file_path)
73+
74+
# Check if the headers match the expected ones
75+
actual_columns = set(coordinates_data.columns)
76+
77+
missing_columns = expected_columns - actual_columns
78+
if missing_columns:
79+
raise ValueError(
80+
f"Error: Found columns {list(actual_columns)}, but expected columns {list(expected_columns)}. "
81+
"Are you sure that you're using the correct export from MFP?"
82+
)
83+
84+
extra_columns = actual_columns - expected_columns
85+
if extra_columns:
86+
print(
87+
f"Warning: Found additional unexpected columns {list(extra_columns)}. "
88+
"Manually added columns have no effect. "
89+
"If the MFP export format changed, please submit an issue: "
90+
"https://github.com/OceanParcels/virtualship/issues."
91+
)
92+
93+
# Drop unexpected columns (optional, only if you want to ensure strict conformity)
94+
coordinates_data = coordinates_data[list(expected_columns)]
95+
96+
# Continue with the rest of the function after validation...
97+
coordinates_data = coordinates_data.dropna()
98+
99+
# maximum depth (in meters), buffer (in degrees) for each instrument
100+
instrument_max_depths = {
101+
"XBT": 2000,
102+
"CTD": 5000,
103+
"DRIFTER": 1,
104+
"ARGO_FLOAT": 2000,
105+
}
106+
107+
unique_instruments = set()
108+
109+
for instrument_list in coordinates_data["Instrument"]:
110+
instruments = instrument_list.split(", ")
111+
unique_instruments |= set(instruments)
112+
113+
# Determine the maximum depth based on the unique instruments
114+
maximum_depth = max(
115+
instrument_max_depths.get(instrument, 0) for instrument in unique_instruments
116+
)
117+
118+
spatial_range = SpatialRange(
119+
minimum_longitude=coordinates_data["Longitude"].min(),
120+
maximum_longitude=coordinates_data["Longitude"].max(),
121+
minimum_latitude=coordinates_data["Latitude"].min(),
122+
maximum_latitude=coordinates_data["Latitude"].max(),
123+
minimum_depth=0,
124+
maximum_depth=maximum_depth,
125+
)
126+
127+
# Create space-time region object
128+
space_time_region = SpaceTimeRegion(
129+
spatial_range=spatial_range,
130+
time_range=TimeRange(),
131+
)
132+
133+
# Generate waypoints
134+
waypoints = []
135+
for _, row in coordinates_data.iterrows():
136+
instruments = [
137+
InstrumentType(instrument) for instrument in row["Instrument"].split(", ")
138+
]
139+
waypoints.append(
140+
Waypoint(
141+
instrument=instruments,
142+
location=Location(latitude=row["Latitude"], longitude=row["Longitude"]),
143+
)
144+
)
145+
146+
# Create Schedule object
147+
schedule = Schedule(
148+
waypoints=waypoints,
149+
space_time_region=space_time_region,
150+
)
151+
152+
# Save to YAML file
153+
schedule.to_yaml(yaml_output_path)

tests/expedition/test_schedule.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ def test_schedule(tmpdir) -> None:
1212

1313
schedule = Schedule(
1414
waypoints=[
15-
Waypoint(Location(0, 0), time=base_time, instrument=None),
15+
Waypoint(location=Location(0, 0), time=base_time, instrument=None),
1616
Waypoint(
17-
Location(1, 1), time=base_time + timedelta(hours=1), instrument=None
17+
location=Location(1, 1),
18+
time=base_time + timedelta(hours=1),
19+
instrument=None,
1820
),
1921
]
2022
)

tests/expedition/test_simulate_schedule.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ def test_simulate_schedule_feasible() -> None:
2020
ship_config.ship_speed_meter_per_second = 5.14
2121
schedule = Schedule(
2222
waypoints=[
23-
Waypoint(Location(0, 0), base_time),
24-
Waypoint(Location(0.01, 0), base_time + timedelta(days=1)),
23+
Waypoint(location=Location(0, 0), time=base_time),
24+
Waypoint(location=Location(0.01, 0), time=base_time + timedelta(days=1)),
2525
]
2626
)
2727

@@ -38,8 +38,8 @@ def test_simulate_schedule_too_far() -> None:
3838
ship_config = ShipConfig.from_yaml("expedition_dir/ship_config.yaml")
3939
schedule = Schedule(
4040
waypoints=[
41-
Waypoint(Location(0, 0), base_time),
42-
Waypoint(Location(1.0, 0), base_time + timedelta(minutes=1)),
41+
Waypoint(location=Location(0, 0), time=base_time),
42+
Waypoint(location=Location(1.0, 0), time=base_time + timedelta(minutes=1)),
4343
]
4444
)
4545

tests/test_mfp_to_yaml.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
from unittest.mock import patch
2+
3+
import pandas as pd
4+
import pytest
5+
6+
from virtualship.expedition.instrument_type import InstrumentType
7+
from virtualship.expedition.schedule import Schedule
8+
from virtualship.utils import mfp_to_yaml
9+
10+
# Sample correct MFP data
11+
VALID_MFP_DATA = pd.DataFrame(
12+
{
13+
"Station Type": ["A", "B", "C"],
14+
"Name": ["Station1", "Station2", "Station3"],
15+
"Latitude": [30, 31, 32],
16+
"Longitude": [-44, -45, -46],
17+
"Instrument": ["CTD, DRIFTER", "ARGO_FLOAT", "XBT, CTD, DRIFTER"],
18+
}
19+
)
20+
21+
# Missing required columns
22+
MISSING_HEADERS_DATA = pd.DataFrame(
23+
{"Station Type": ["A"], "Name": ["Station1"], "Latitude": [10.5]}
24+
)
25+
26+
# Extra unexpected columns
27+
EXTRA_HEADERS_DATA = VALID_MFP_DATA.copy()
28+
EXTRA_HEADERS_DATA["Unexpected Column"] = ["Extra1", "Extra2", "Extra3"]
29+
30+
31+
@patch("pandas.read_excel", return_value=VALID_MFP_DATA)
32+
def test_mfp_to_yaml_success(mock_read_excel, tmp_path):
33+
"""Test that mfp_to_yaml correctly processes a valid MFP Excel file."""
34+
yaml_output_path = tmp_path / "schedule.yaml"
35+
36+
# Run function (No need to mock open() for YAML, real file is created)
37+
mfp_to_yaml("mock_file.xlsx", yaml_output_path)
38+
39+
# Ensure the YAML file was written
40+
assert yaml_output_path.exists()
41+
42+
# Load YAML and validate contents
43+
data = Schedule.from_yaml(yaml_output_path)
44+
45+
assert len(data.waypoints) == 3
46+
assert data.waypoints[0].instrument == [InstrumentType.CTD, InstrumentType.DRIFTER]
47+
assert data.waypoints[1].instrument == [InstrumentType.ARGO_FLOAT]
48+
assert data.waypoints[2].instrument == [
49+
InstrumentType.XBT,
50+
InstrumentType.CTD,
51+
InstrumentType.DRIFTER,
52+
]
53+
54+
55+
@patch("pandas.read_excel", return_value=MISSING_HEADERS_DATA)
56+
def test_mfp_to_yaml_missing_headers(mock_read_excel, tmp_path):
57+
"""Test that mfp_to_yaml raises an error when required columns are missing."""
58+
yaml_output_path = tmp_path / "schedule.yaml"
59+
60+
with pytest.raises(
61+
ValueError, match="Error: Found columns .* but expected columns .*"
62+
):
63+
mfp_to_yaml("mock_file.xlsx", yaml_output_path)
64+
65+
66+
@patch("pandas.read_excel", return_value=EXTRA_HEADERS_DATA)
67+
@patch("builtins.print") # Capture printed warnings
68+
def test_mfp_to_yaml_extra_headers(mock_print, mock_read_excel, tmp_path):
69+
"""Test that mfp_to_yaml prints a warning when extra columns are found."""
70+
yaml_output_path = tmp_path / "schedule.yaml"
71+
72+
# Run function
73+
mfp_to_yaml("mock_file.xlsx", yaml_output_path)
74+
75+
# Ensure a warning message was printed
76+
mock_print.assert_any_call(
77+
"Warning: Found additional unexpected columns ['Unexpected Column']. "
78+
"Manually added columns have no effect. "
79+
"If the MFP export format changed, please submit an issue: "
80+
"https://github.com/OceanParcels/virtualship/issues."
81+
)
82+
83+
84+
@patch("pandas.read_excel", return_value=VALID_MFP_DATA)
85+
def test_mfp_to_yaml_instrument_conversion(mock_read_excel, tmp_path):
86+
"""Test that instruments are correctly converted into InstrumentType enums."""
87+
yaml_output_path = tmp_path / "schedule.yaml"
88+
89+
# Run function
90+
mfp_to_yaml("mock_file.xlsx", yaml_output_path)
91+
92+
# Load the generated YAML
93+
data = Schedule.from_yaml(yaml_output_path)
94+
95+
assert isinstance(data.waypoints[0].instrument, list)
96+
assert data.waypoints[0].instrument == [InstrumentType.CTD, InstrumentType.DRIFTER]
97+
assert data.waypoints[1].instrument == [InstrumentType.ARGO_FLOAT]
98+
assert data.waypoints[2].instrument == [
99+
InstrumentType.XBT,
100+
InstrumentType.CTD,
101+
InstrumentType.DRIFTER,
102+
]

0 commit comments

Comments
 (0)