diff --git a/.gitignore b/.gitignore index b9e16f4..f506c67 100644 --- a/.gitignore +++ b/.gitignore @@ -191,3 +191,4 @@ poetry.toml pyrightconfig.json # End of https://www.toptal.com/developers/gitignore/api/python +.DS_Store diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..160b587 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "ECCOv4-py"] + path = ECCOv4-py + url = git@github.com:ECCO-GROUP/ECCOv4-py.git diff --git a/ECCOv4-py b/ECCOv4-py new file mode 160000 index 0000000..83f04e0 --- /dev/null +++ b/ECCOv4-py @@ -0,0 +1 @@ +Subproject commit 83f04e03004ba5a15d0b42321fcfc1e56944fe3f diff --git a/emu_utilities/budget.py b/emu_utilities/budget.py new file mode 100644 index 0000000..d060e58 --- /dev/null +++ b/emu_utilities/budget.py @@ -0,0 +1,174 @@ +from __future__ import annotations + +import struct +import numpy as np +import xarray as xr +from pathlib import Path +from datetime import datetime +from dateutil.relativedelta import relativedelta +from typing import TYPE_CHECKING +from .emu_utilities import EMU +import re +if TYPE_CHECKING: + from numpy.typing import NDArray + +__all__ = ["load_budget"] + + +class EMUBudget(EMU): + """Handles loading and processing of EMU budget data.""" + + def __init__(self, run_directory: str) -> None: + super().__init__(run_directory) + self.validate_tool("budg") + self.budget_file = self.find_budget_file() + self.time = self._build_time_axis() + + def find_budget_file(self) -> Path: + files = sorted(self.directory.glob("output/emu_budg.sum_*")) + if not files: + raise FileNotFoundError("No emu_budg.sum_* file found.") + return files[0] + + def _build_time_axis(self) -> NDArray[datetime]: + """Construct time axis as datetime array (monthly, starts at Jan 1992).""" + with open(self.budget_file, "rb") as f: + f.seek(4) # Skip ibud + nmonths = struct.unpack(">l", f.read(4))[0] + + base_date = datetime(1992, 1, 15) + return np.array([base_date + relativedelta(months=i) for i in range(nmonths)], dtype="O") + + def _parse_budget_info(self, info_path: Path) -> dict: + """Parse the budg.info file and return metadata with separate variable name and units.""" + meta = {} + + if not info_path.exists(): + print(f"Warning: budg.info not found at {info_path}") + return meta + + with open(info_path, "r") as f: + for line in f: + line = line.strip() + + if "budget sampled from" in line.lower(): + meta["source_path"] = line.split("from")[-1].strip() + + elif "budget is monthly" in line.lower(): + meta["temporal_resolution"] = "monthly" + + elif re.search(r"budget variable\s*:", line, re.IGNORECASE): + match = re.search(r"budget variable\s*:\s*(.+)", line, re.IGNORECASE) + if match: + full = match.group(1).strip() + # Split into variable name and units + var_match = re.match(r"(.+?)\s*\((.+)\)", full) + if var_match: + meta["budget_name"] = var_match.group(1).strip() + meta["budget_units"] = var_match.group(2).strip() + " / s" + else: + meta["budget_name"] = full + meta["budget_units"] = "unknown" + + elif "pert_i" in line and "pert_j" in line and "pert_k" in line: + match = re.search(r"pert_i,\s*pert_j,\s*pert_k\s*=\s*(\d+)\s+(\d+)\s+(\d+)", line) + if match: + meta["i"] = int(match.group(1)) + meta["j"] = int(match.group(2)) + meta["k"] = int(match.group(3)) + + elif "budget model grid location" in line.lower(): + meta["location_str"] = line.split("-->")[-1].strip() + + elif "iobjf =" in line: + match = re.search(r"iobjf\s*=\s*(\d+)", line) + if match: + meta["iobjf (multiplier)"] = int(match.group(1)) + + elif "ifunc =" in line: + match = re.search(r"ifunc\s*=\s*(\d+)", line) + if match: + meta["ifunc"] = int(match.group(1)) + + return meta + + def load_budget_data(self) -> tuple[list[str], NDArray[np.float32]]: + """Read EMU budget binary file and return names and 2D array [nvar, ntime].""" + with open(self.budget_file, "rb") as f: + ibud = struct.unpack(">l", f.read(4))[0] + if not (1 <= ibud <= 5): + raise ValueError(f"Invalid budget ID: {ibud}") + ibud -= 1 + + nmonths = struct.unpack(">l", f.read(4))[0] + emubudg_name = [] + emubudg = np.zeros((0, nmonths), dtype=np.float32) + + while True: + fvar_bytes = f.read(12) + if len(fvar_bytes) < 12: + break + fvar = fvar_bytes.decode("utf-8").strip() + emubudg_name.append(fvar) + + fdum = np.array(struct.unpack(f">{nmonths}f", f.read(4 * nmonths)), dtype=np.float32) + emubudg = np.vstack((emubudg, fdum)) + + return emubudg_name, emubudg, ibud + + def make_dataset(self) -> xr.Dataset: + names, emubudg, ibud = self.load_budget_data() + nmonths = len(self.time) + nvar = len(names) + + lhs = emubudg[1, :] + rhs = emubudg[2, :].copy() + for i in range(3, nvar): + rhs += emubudg[i, :] + + # adv + adv = np.zeros(nmonths, dtype=np.float32) + for i in range(nvar): + if "adv" in names[i]: + adv += emubudg[i, :] + + # mix + mix = np.zeros(nmonths, dtype=np.float32) + for i in range(nvar): + if "mix" in names[i]: + mix += emubudg[i, :] + + # frc + frc = np.zeros(nmonths, dtype=np.float32) + for i in range(nvar): + if names[i] not in ["dt", "lhs"]: + if all(k not in names[i] for k in ["adv", "mix", "lhs", "dt"]): + frc += emubudg[i, :] + + # Build dataset + data_vars = { + "lhs": (["time"], lhs), + "rhs": (["time"], rhs), + "advection": (["time"], adv), + "mixing": (["time"], mix), + "surface_forcing": (["time"], frc), + } + + coords = {"time": self.time} + ds = self.create_base_dataset(data_vars, coords) + + info_files = list(self.directory.glob("output/budg.info")) + + if info_files: + meta = self._parse_budget_info(info_files[0]) + ds.attrs.update(meta) + else: + print("Warning: No budg.info file found.") + + return ds + + +def load_budget(run_directory: str) -> xr.Dataset: + """Load EMU budget data as an xarray Dataset.""" + emu = EMUBudget(run_directory) + return emu.make_dataset() diff --git a/emu_utilities/tracer.py b/emu_utilities/tracer.py index ac542d9..0bd823c 100644 --- a/emu_utilities/tracer.py +++ b/emu_utilities/tracer.py @@ -4,9 +4,13 @@ import numpy as np import xarray as xr +import sys -from .emu_utilities import EMU, find_time_from_file_names -from .resample import llc_compact_to_tiles +# import the ECCOv4 py library +sys.path.insert(0,'../ECCOv4-py') +import ecco_v4_py as ecco + +from .emu_utilities import EMU if TYPE_CHECKING: from numpy import datetime64 @@ -40,43 +44,6 @@ def __init__(self, run_directory: str, mean: bool) -> None: raise ValueError(f"Expected EMU tool 'trcr', but got '{self.tool}' from directory: {self.run_name}") self.mean = mean - def find_time(self) -> NDArray[datetime64]: - """Extract timestamps from tracer file names. - - Returns: - Array of datetime64 objects representing available timestamps. - """ - pattern = "output/ptracer_mon_mean.*.data" if self.mean else "output/ptracer_mon_snap.*.data" - return np.array(find_time_from_file_names(self.directory, pattern)) - - def load_data(self, trcr_files) -> NDArray[np.float32]: - """Load tracer data from binary files and apply appropriate scaling. - - Applies vertical and horizontal area weights to correctly represent - the tracer concentration in each cell. - - Args: - trcr_files: List of tracer data files to load. - - Returns: - Structured array of tracer data with applied weights. - """ - data = np.full( - (self.time.size, self.nr, self.ntiles, self.ny // self.ntiles, self.nx), np.nan, dtype=np.float32 - ) - - for i, trcr_file in enumerate(trcr_files): - with open(trcr_file, "rb") as f: - full_data = np.fromfile(f, dtype=">f4").astype(np.float32) - data[i] = llc_compact_to_tiles(full_data.reshape((self.nr, self.ny, self.nx))) - for k in range(self.nr): - # Apply vertical and horizontal area weights to correctly represent tracer concentration - data[i, k, :, :, :] = ( - data[i, k, :, :, :] * self._coordinate_factory.drf[k] * self._coordinate_factory.hfacc[k, :, :, :] - ) - - return data - def make_dataset(self) -> xr.Dataset: """Create an xarray Dataset from tracer data. @@ -87,25 +54,14 @@ def make_dataset(self) -> xr.Dataset: Dataset containing tracer data with appropriate coordinates and metadata. """ if self.mean: - trcr_files = list(self.directory.glob("output/ptracer_mon_mean.*.data")) + ds = ecco.load_ecco_vars_from_mds(self.directory.glob("output"), + mds_grid_dir = self.directory.glob("temp"), + mds_files = "ptracer_mon_mean") else: - trcr_files = list(self.directory.glob("output/ptracer_mon_snap.*.data")) - time_unsorted = self.find_time() - sort_idx = np.argsort(time_unsorted) - self.time = time_unsorted[sort_idx] - trcr_files = [trcr_files[i] for i in sort_idx] - - data = self.load_data(trcr_files) - - coords = self._coordinate_factory.create_tile_coordinates(include_z=True, include_time=True, times=self.time) - data_vars = { - "tracer": (["time", "k", "tile", "j", "i"], data), - } - ds = self.create_base_dataset(data_vars, coords) - - # Apply ocean mask to exclude land areas - mask = self._coordinate_factory.create_mask(include_z=True) - ds = ds.where(mask > 0) + ds = ecco.load_ecco_vars_from_mds(self.directory.glob("output"), + mds_grid_dir = self.directory.glob("temp"), + mds_files = "ptracer_mon_mean") + ds = ds.rename({"TRAC01":"tracer"}) # Calculate depth-integrated tracer values ds["tracer_depth_integrated"] = (ds["tracer"] * mask).sum(dim="k", min_count=1)