Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,4 @@ poetry.toml
pyrightconfig.json

# End of https://www.toptal.com/developers/gitignore/api/python
.DS_Store
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "ECCOv4-py"]
path = ECCOv4-py
url = [email protected]:ECCO-GROUP/ECCOv4-py.git
1 change: 1 addition & 0 deletions ECCOv4-py
Submodule ECCOv4-py added at 83f04e
174 changes: 174 additions & 0 deletions emu_utilities/budget.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
from __future__ import annotations

import struct
import numpy as np
import xarray as xr
from pathlib import Path
from datetime import datetime
from dateutil.relativedelta import relativedelta
from typing import TYPE_CHECKING
from .emu_utilities import EMU
import re
if TYPE_CHECKING:
from numpy.typing import NDArray

__all__ = ["load_budget"]


class EMUBudget(EMU):
"""Handles loading and processing of EMU budget data."""

def __init__(self, run_directory: str) -> None:
super().__init__(run_directory)
self.validate_tool("budg")
self.budget_file = self.find_budget_file()
self.time = self._build_time_axis()

def find_budget_file(self) -> Path:
files = sorted(self.directory.glob("output/emu_budg.sum_*"))
if not files:
raise FileNotFoundError("No emu_budg.sum_* file found.")
return files[0]

def _build_time_axis(self) -> NDArray[datetime]:
"""Construct time axis as datetime array (monthly, starts at Jan 1992)."""
with open(self.budget_file, "rb") as f:
f.seek(4) # Skip ibud
nmonths = struct.unpack(">l", f.read(4))[0]

base_date = datetime(1992, 1, 15)
return np.array([base_date + relativedelta(months=i) for i in range(nmonths)], dtype="O")

def _parse_budget_info(self, info_path: Path) -> dict:
"""Parse the budg.info file and return metadata with separate variable name and units."""
meta = {}

if not info_path.exists():
print(f"Warning: budg.info not found at {info_path}")
return meta

with open(info_path, "r") as f:
for line in f:
line = line.strip()

if "budget sampled from" in line.lower():
meta["source_path"] = line.split("from")[-1].strip()

elif "budget is monthly" in line.lower():
meta["temporal_resolution"] = "monthly"

elif re.search(r"budget variable\s*:", line, re.IGNORECASE):
match = re.search(r"budget variable\s*:\s*(.+)", line, re.IGNORECASE)
if match:
full = match.group(1).strip()
# Split into variable name and units
var_match = re.match(r"(.+?)\s*\((.+)\)", full)
if var_match:
meta["budget_name"] = var_match.group(1).strip()
meta["budget_units"] = var_match.group(2).strip() + " / s"
else:
meta["budget_name"] = full
meta["budget_units"] = "unknown"

elif "pert_i" in line and "pert_j" in line and "pert_k" in line:
match = re.search(r"pert_i,\s*pert_j,\s*pert_k\s*=\s*(\d+)\s+(\d+)\s+(\d+)", line)
if match:
meta["i"] = int(match.group(1))
meta["j"] = int(match.group(2))
meta["k"] = int(match.group(3))

elif "budget model grid location" in line.lower():
meta["location_str"] = line.split("-->")[-1].strip()

elif "iobjf =" in line:
match = re.search(r"iobjf\s*=\s*(\d+)", line)
if match:
meta["iobjf (multiplier)"] = int(match.group(1))

elif "ifunc =" in line:
match = re.search(r"ifunc\s*=\s*(\d+)", line)
if match:
meta["ifunc"] = int(match.group(1))

return meta

def load_budget_data(self) -> tuple[list[str], NDArray[np.float32]]:
"""Read EMU budget binary file and return names and 2D array [nvar, ntime]."""
with open(self.budget_file, "rb") as f:
ibud = struct.unpack(">l", f.read(4))[0]
if not (1 <= ibud <= 5):
raise ValueError(f"Invalid budget ID: {ibud}")
ibud -= 1

nmonths = struct.unpack(">l", f.read(4))[0]
emubudg_name = []
emubudg = np.zeros((0, nmonths), dtype=np.float32)

while True:
fvar_bytes = f.read(12)
if len(fvar_bytes) < 12:
break
fvar = fvar_bytes.decode("utf-8").strip()
emubudg_name.append(fvar)

fdum = np.array(struct.unpack(f">{nmonths}f", f.read(4 * nmonths)), dtype=np.float32)
emubudg = np.vstack((emubudg, fdum))

return emubudg_name, emubudg, ibud

def make_dataset(self) -> xr.Dataset:
names, emubudg, ibud = self.load_budget_data()
nmonths = len(self.time)
nvar = len(names)

lhs = emubudg[1, :]
rhs = emubudg[2, :].copy()
for i in range(3, nvar):
rhs += emubudg[i, :]

# adv
adv = np.zeros(nmonths, dtype=np.float32)
for i in range(nvar):
if "adv" in names[i]:
adv += emubudg[i, :]

# mix
mix = np.zeros(nmonths, dtype=np.float32)
for i in range(nvar):
if "mix" in names[i]:
mix += emubudg[i, :]

# frc
frc = np.zeros(nmonths, dtype=np.float32)
for i in range(nvar):
if names[i] not in ["dt", "lhs"]:
if all(k not in names[i] for k in ["adv", "mix", "lhs", "dt"]):
frc += emubudg[i, :]

# Build dataset
data_vars = {
"lhs": (["time"], lhs),
"rhs": (["time"], rhs),
"advection": (["time"], adv),
"mixing": (["time"], mix),
"surface_forcing": (["time"], frc),
}

coords = {"time": self.time}
ds = self.create_base_dataset(data_vars, coords)

info_files = list(self.directory.glob("output/budg.info"))

if info_files:
meta = self._parse_budget_info(info_files[0])
ds.attrs.update(meta)
else:
print("Warning: No budg.info file found.")

return ds


def load_budget(run_directory: str) -> xr.Dataset:
"""Load EMU budget data as an xarray Dataset."""
emu = EMUBudget(run_directory)
return emu.make_dataset()
70 changes: 13 additions & 57 deletions emu_utilities/tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@

import numpy as np
import xarray as xr
import sys

from .emu_utilities import EMU, find_time_from_file_names
from .resample import llc_compact_to_tiles
# import the ECCOv4 py library
sys.path.insert(0,'../ECCOv4-py')
import ecco_v4_py as ecco

from .emu_utilities import EMU

if TYPE_CHECKING:
from numpy import datetime64
Expand Down Expand Up @@ -40,43 +44,6 @@ def __init__(self, run_directory: str, mean: bool) -> None:
raise ValueError(f"Expected EMU tool 'trcr', but got '{self.tool}' from directory: {self.run_name}")
self.mean = mean

def find_time(self) -> NDArray[datetime64]:
"""Extract timestamps from tracer file names.

Returns:
Array of datetime64 objects representing available timestamps.
"""
pattern = "output/ptracer_mon_mean.*.data" if self.mean else "output/ptracer_mon_snap.*.data"
return np.array(find_time_from_file_names(self.directory, pattern))

def load_data(self, trcr_files) -> NDArray[np.float32]:
"""Load tracer data from binary files and apply appropriate scaling.

Applies vertical and horizontal area weights to correctly represent
the tracer concentration in each cell.

Args:
trcr_files: List of tracer data files to load.

Returns:
Structured array of tracer data with applied weights.
"""
data = np.full(
(self.time.size, self.nr, self.ntiles, self.ny // self.ntiles, self.nx), np.nan, dtype=np.float32
)

for i, trcr_file in enumerate(trcr_files):
with open(trcr_file, "rb") as f:
full_data = np.fromfile(f, dtype=">f4").astype(np.float32)
data[i] = llc_compact_to_tiles(full_data.reshape((self.nr, self.ny, self.nx)))
for k in range(self.nr):
# Apply vertical and horizontal area weights to correctly represent tracer concentration
data[i, k, :, :, :] = (
data[i, k, :, :, :] * self._coordinate_factory.drf[k] * self._coordinate_factory.hfacc[k, :, :, :]
)

return data

def make_dataset(self) -> xr.Dataset:
"""Create an xarray Dataset from tracer data.

Expand All @@ -87,25 +54,14 @@ def make_dataset(self) -> xr.Dataset:
Dataset containing tracer data with appropriate coordinates and metadata.
"""
if self.mean:
trcr_files = list(self.directory.glob("output/ptracer_mon_mean.*.data"))
ds = ecco.load_ecco_vars_from_mds(self.directory.glob("output"),
mds_grid_dir = self.directory.glob("temp"),
mds_files = "ptracer_mon_mean")
else:
trcr_files = list(self.directory.glob("output/ptracer_mon_snap.*.data"))
time_unsorted = self.find_time()
sort_idx = np.argsort(time_unsorted)
self.time = time_unsorted[sort_idx]
trcr_files = [trcr_files[i] for i in sort_idx]

data = self.load_data(trcr_files)

coords = self._coordinate_factory.create_tile_coordinates(include_z=True, include_time=True, times=self.time)
data_vars = {
"tracer": (["time", "k", "tile", "j", "i"], data),
}
ds = self.create_base_dataset(data_vars, coords)

# Apply ocean mask to exclude land areas
mask = self._coordinate_factory.create_mask(include_z=True)
ds = ds.where(mask > 0)
ds = ecco.load_ecco_vars_from_mds(self.directory.glob("output"),
mds_grid_dir = self.directory.glob("temp"),
mds_files = "ptracer_mon_mean")
ds = ds.rename({"TRAC01":"tracer"})

# Calculate depth-integrated tracer values
ds["tracer_depth_integrated"] = (ds["tracer"] * mask).sum(dim="k", min_count=1)
Expand Down