diff --git a/changelog.md b/changelog.md index 8beb442..e22d40f 100644 --- a/changelog.md +++ b/changelog.md @@ -5,7 +5,6 @@ * New Quality Filter to remove Broadband data during certain time periods due to sensitivity issues * New Quality Filter to compare against an empirical GMPE (Atkinson 2022) to remove significant outliers * Add ability to generate a report to compare NZGMDB versions -* Automatic report creation * Add GMC skipped reasons * Update to use the new NZCVM 2.09 basins * TPVZ calculation fix @@ -22,6 +21,7 @@ * Change processing to use remove response instead of remove sensitivity * Updated CMT solutions for domain regions * Increased date range to end of 2025 +* Vs30 and Z1.0 / 2.5 for missing station metadata ## Version 4.3 - July 25 **2000-01-01 to 2024-12-31** * Sensitivity Fix (previously always taking first value not for actual datetime expected) diff --git a/nzgmdb/data_processing/process_observed.py b/nzgmdb/data_processing/process_observed.py index f1bdfa8..e0fc776 100644 --- a/nzgmdb/data_processing/process_observed.py +++ b/nzgmdb/data_processing/process_observed.py @@ -9,12 +9,9 @@ import numpy as np import pandas as pd from obspy import read_inventory -from obspy.clients.fdsn import Client as FDSN_Client -from obspy.core.inventory import Inventory import qcore.timeseries as ts from nzgmdb.data_processing import waveform_manipulation -from nzgmdb.management import config as cfg from nzgmdb.management import custom_errors, file_structure from nzgmdb.mseed_management import reading @@ -25,7 +22,6 @@ def process_single_mseed( fmax_df: pd.DataFrame | None = None, bypass_df: pd.DataFrame | None = None, xml_dir: Path | None = None, - inventory: Inventory | None = None, ): """ Process a single mseed file and save the processed data to a txt file @@ -46,8 +42,6 @@ def process_single_mseed( The bypass records containing custom fmin, fmax values xml_dir : Path, optional The directory containing the station xml files for inventory information - inventory : Inventory, optional - The inventory information for the mseed file Returns ------- @@ -247,11 +241,6 @@ def process_mseeds_to_txt( ) bypass_df = None if bypass_records_ffp is None else pd.read_csv(bypass_records_ffp) - config = cfg.Config() - channel_codes = config.get_value("channel_codes") - client = FDSN_Client("GEONET") - inventory = client.get_stations(channel=channel_codes, level="response") - # Use multiprocessing to process the mseed files with multiprocessing.Pool(processes=n_procs) as pool: skipped_records = pool.map( @@ -261,7 +250,6 @@ def process_mseeds_to_txt( fmax_df=fmax_df, bypass_df=bypass_df, xml_dir=xml_dir, - inventory=inventory, ), mseed_files, ) diff --git a/nzgmdb/data_retrieval/sites.py b/nzgmdb/data_retrieval/sites.py index 113b966..e525a09 100644 --- a/nzgmdb/data_retrieval/sites.py +++ b/nzgmdb/data_retrieval/sites.py @@ -6,14 +6,179 @@ from pathlib import Path import fiona +import numpy as np import pandas as pd +import rasterio from obspy.clients.fdsn import Client as FDSN_Client +from pyproj import Transformer +from scipy.spatial import cKDTree from nzgmdb.data_retrieval import tect_domain from nzgmdb.management import config as cfg from nzgmdb.management.data_registry import NZGMDB_DATA from qcore import point_in_polygon -from velocity_modelling import registry +from velocity_modelling import registry, threshold + + +def fill_gaps_with_nearest( + coords: np.ndarray, + values: np.ndarray, + invalid_mask: np.ndarray | None = None, + k: int = 8, +) -> np.ndarray: + """ + Fill NaN or invalid values using nearest-neighbour averaging. + + Parameters + ---------- + coords : (N, 2) array_like + Coordinates of the points (e.g., [x, y] or [lon, lat]). + values : (N,) array_like + Values at the points, with NaN for invalid/missing values. + invalid_mask : (N,) array_like, optional + Boolean mask indicating invalid points. If None, NaNs in `values` are used. + k : int, default=8 + Number of nearest neighbors to consider for averaging. + + Returns + ------- + ndarray + Values with NaNs filled using nearest-neighbour averaging. + """ + + coords = np.asarray(coords) + values = np.asarray(values).astype(float) + + # ---- Enforce correct shapes ---- + if values.ndim == 2 and values.shape[1] == 1: + values = values.ravel() + + if coords.ndim != 2 or coords.shape[1] != 2: + raise ValueError("coords must be of shape (N, 2)") + + if invalid_mask is None: + invalid_mask = np.isnan(values) + else: + invalid_mask = np.asarray(invalid_mask) + if invalid_mask.ndim == 2 and invalid_mask.shape[1] == 1: + invalid_mask = invalid_mask.ravel() + + valid_mask = ~invalid_mask + + if not valid_mask.any(): + return np.full_like(values, np.nan) + + # ---- Build KDTree ---- + tree = cKDTree(coords[valid_mask]) + valid_values = values[valid_mask] + + # ---- Fill invalid points ---- + for idx in np.where(invalid_mask)[0]: + coord = coords[idx] + kk = min(k, len(valid_values)) + _, nn = tree.query(coord, k=kk) + values[idx] = np.nanmean(valid_values[nn]) + + return values + + +def sample_points_from_geotiff( + file_path: Path, + latlon_points: np.ndarray, + band: int = 1, +) -> np.ndarray: + """ + Sample a GeoTIFF raster at given latitude/longitude points. + + Parameters + ---------- + file_path : Path + Path to the GeoTIFF file. + latlon_points : (N, 2) array_like + Input points as [lat, lon] in EPSG:4326. + band : int, default=1 + Raster band to sample (1-based index). + + Returns + ------- + ndarray + Sampled raster values. NaN where points fall outside the raster + or where raster contains nodata. + """ + + # ---- Normalize inputs ---- + file_path = Path(file_path) + latlon_points = np.asarray(latlon_points, dtype=float) + + lat = latlon_points[:, 0] + lon = latlon_points[:, 1] + + # Prepare output (NaN by default) + samples = np.full(lat.shape, np.nan, dtype=float) + + # ---- Open raster ---- + with rasterio.open(file_path) as ds: + + if ds.crs is None: + raise ValueError("Raster CRS is undefined.") + + # CRS of input coordinates (WGS84 lat/lon) + input_crs = rasterio.crs.CRS.from_epsg(4326) + + # ---- Transform coordinates if needed ---- + if ds.crs == input_crs: + # Raster already in lat/lon + x = lon + y = lat + else: + # Transform lat/lon → raster CRS + transformer = Transformer.from_crs( + input_crs, + ds.crs, + always_xy=True, + ) + x, y = transformer.transform(lon, lat) + + # ---- Determine which points lie inside raster bounds ---- + bounds = ds.bounds + inside = ( + (x >= bounds.left) + & (x <= bounds.right) + & (y >= bounds.bottom) + & (y <= bounds.top) + ) + + if not np.any(inside): + return samples.reshape(-1, 1) + + # ---- Sample raster at valid points ---- + coords = list(zip(x[inside], y[inside])) + + raw_values = np.array( + [v[0] for v in ds.sample(coords, indexes=band)], + dtype=float, + ) + + # ---- Handle nodata ---- + if ds.nodata is not None: + raw_values[raw_values == ds.nodata] = np.nan + + # ---- Apply scale and offset if defined ---- + scale = 1.0 + offset = 0.0 + + if ds.scales is not None: + scale = ds.scales[band - 1] + + if ds.offsets is not None: + offset = ds.offsets[band - 1] + + values = raw_values * scale + offset + + # ---- Insert values back into output ---- + samples[inside] = values + + return samples.reshape(-1, 1) def create_site_table_response() -> pd.DataFrame: @@ -51,7 +216,26 @@ def create_site_table_response() -> pd.DataFrame: station_info, columns=["net", "sta", "lat", "lon", "elev", "creation_date", "end_date"], ) - sta_df = sta_df.drop_duplicates(["net", "sta"]).reset_index(drop=True) + sta_df = sta_df.drop_duplicates(["net", "sta"]) + + bbox = config.get_value("bbox") # [min_lon, min_lat, max_lon, max_lat] + min_lon, min_lat, max_lon, max_lat = bbox + + # Ensure lat/lon are present and within latitude bounds + mask_lat = ( + sta_df["lat"].notna() + & sta_df["lon"].notna() + & (sta_df["lat"] >= min_lat) + & (sta_df["lat"] <= max_lat) + ) + + # Handle antimeridian crossing: if min_lon > max_lon use OR + if min_lon <= max_lon: + mask_lon = (sta_df["lon"] >= min_lon) & (sta_df["lon"] <= max_lon) + else: + mask_lon = (sta_df["lon"] >= min_lon) | (sta_df["lon"] <= max_lon) + + sta_df = sta_df.loc[mask_lat & mask_lon] # Get the Geonet metadata summary information geo_meta_summary_df = pd.read_csv( @@ -62,8 +246,6 @@ def create_site_table_response() -> pd.DataFrame: geo_meta_summary_df = geo_meta_summary_df.rename( columns={ "Name": "sta", - "Lat": "lat", - "Long": "lon", "NZS1170SiteClass": "site_class", "Vs30_median": "Vs30", "Sigmaln_Vs30": "Vs30_std", @@ -82,12 +264,14 @@ def create_site_table_response() -> pd.DataFrame: ) merged_df = geo_meta_summary_df.merge( - sta_df[["net", "elev", "sta", "creation_date", "end_date"]], + sta_df[["net", "sta", "lat", "lon", "elev", "creation_date", "end_date"]], on="sta", - how="left", + how="outer", ) - # Fill Elevation NaN values from sta_df + # Fill Lat, Lon, Elevation NaN values from sta_df merged_df["elev"] = merged_df["Elevation"].combine_first(merged_df["elev"]) + merged_df["lat"] = merged_df["Lat"].combine_first(merged_df["lat"]) + merged_df["lon"] = merged_df["Long"].combine_first(merged_df["lon"]) # Specify the required files for fiona NZGMDB_DATA.fetch("nt_domains_kiran.shp") NZGMDB_DATA.fetch("nt_domains_kiran.dbf") @@ -101,6 +285,71 @@ def create_site_table_response() -> pd.DataFrame: # Rename the domain column tect_merged_df = tect_merged_df.rename(columns={"domain_no": "site_domain_no"}) + # Only compute thresholds for stations where Z1.0 is missing + mask_missing_z1 = tect_merged_df["Z1.0"].isna() + if mask_missing_z1.any(): + # Prepare stations DataFrame for only missing rows, indexed by station code + stations = tect_merged_df.loc[mask_missing_z1, ["sta", "lon", "lat"]].set_index( + "sta" + )[["lon", "lat"]] + try: + nzcvm_version = config.get_value("nzcvm_version") + thresholds = threshold.compute_station_thresholds( + stations, model_version=nzcvm_version + ) + # Merge computed thresholds back (computed columns will be suffixed) + tect_merged_df = tect_merged_df.merge( + thresholds[["Z1.0(km)", "Z2.5(km)", "sigma"]], + left_on="sta", + right_index=True, + how="left", + ) + + # Add in the computed values where missing + tect_merged_df["Z1.0"] = tect_merged_df["Z1.0"].combine_first( + tect_merged_df.get("Z1.0(km)") * 1000.0 + ) + tect_merged_df["Z2.5"] = tect_merged_df["Z2.5"].combine_first( + tect_merged_df.get("Z2.5(km)") * 1000.0 + ) + tect_merged_df["Z1.0_std"] = tect_merged_df["Z1.0_std"].combine_first( + tect_merged_df.get("sigma") + ) + tect_merged_df["Z2.5_std"] = tect_merged_df["Z2.5_std"].combine_first( + tect_merged_df.get("sigma") + ) + + # Set extra ref / quality fields + tect_merged_df.loc[ + mask_missing_z1, ["Z1.0_ref", "Z2.5_ref", "Q_Z1.0", "Q_Z2.5"] + ] = ["NZCVM (2026)", "NZCVM (2026)", "Q3", "Q3"] + + # Get the file path to the combined MVN GeoTIFF + NZGMDB_DATA.fetch("combined_mvn_wgs84.tif") + file_path = Path(NZGMDB_DATA.abspath) / "combined_mvn_wgs84.tif" + + # Compute Vs30 for missing values + points = tect_merged_df.loc[mask_missing_z1, ["lat", "lon"]].to_numpy() + vs30_values = sample_points_from_geotiff(file_path, points).ravel() + + # Fill missing gaps in Vs30 using nearest-neighbour averaging + coords = np.column_stack([points[:, 1], points[:, 0]]) + vs30_values_filled = fill_gaps_with_nearest(coords, vs30_values) + vs30_values_filled_rounded = np.round(vs30_values_filled) + + # Update Vs30 and related fields + tect_merged_df.loc[mask_missing_z1, "Vs30"] = vs30_values_filled_rounded + + # Ensure reference and quality fields are set for Vs30 where filled + vs30_mask = mask_missing_z1 & ~tect_merged_df["Vs30"].isna() + tect_merged_df.loc[vs30_mask, "Vs30_Ref"] = "Foster et al. (2019)" + tect_merged_df.loc[vs30_mask, "Q_Vs30"] = "Q3" + + except (FileNotFoundError, ValueError, RuntimeError): + raise UserWarning( + "Could not compute thresholds for missing Z1.0 values, check correct setup for NZCVM" + ) + # Select specific columns site_df = tect_merged_df[ [ diff --git a/nzgmdb/management/data_registry.py b/nzgmdb/management/data_registry.py index 56703a4..6abc76a 100644 --- a/nzgmdb/management/data_registry.py +++ b/nzgmdb/management/data_registry.py @@ -5,6 +5,7 @@ REGISTRY = { "hik_kerm_fault_300km_wgs84_poslon.txt": "sha256:1a199978b6c9c608f8473539b639a8825c1091167da3d14b07c7268528320e03", "Geonet_Metadata_Summary_v1.4.csv": "sha256:7884422c3fcae0810c02948ba1a3bd39ba5793ba28189e90d730541be1c207c0", + "combined_mvn_wgs84.tif": "sha256:b0aed1d441a3c441d784a5dd9016314ee74df164dcafee6e233211a43cbfba0f", "puy_slab2_dep_02.26.18.xyz": "sha256:9ebe4feab4ee3b80e3fe403f2d873f94e4d7f06d937d721cc9e154ecee83e3c0", "reyners_relocations.csv": "sha256:7795c60dae67af14eb590b0d919fa850e2f2fe2fb3beb077cbac14d27eb8faf5", "focal_mech_tectonic_domain_v1.csv": "sha256:1f1e0c4b7f9ca1b87fb2ca4883e587f330fe82b5bbf9ebb6eb8f4d12aa2e1936", @@ -40,6 +41,7 @@ URLS = { "focal_mech_tectonic_domain_v1.csv": "https://www.dropbox.com/scl/fi/zseg304cbjmti7gg5tdyv/focal_mech_tectonic_domain_v1.csv?rlkey=kfb9ttvnv9yi9zftixw6kmz4v&st=4j9pgpgj&dl=1", "Geonet_Metadata_Summary_v1.4.csv": "https://www.dropbox.com/scl/fi/iev7qmoqqzvc5quhf8mk8/Geonet-Metadata-Summary_v1.4.csv?rlkey=7twwwck5iy5zao7lwao6xodvm&st=6m3elzuu&dl=1", + "combined_mvn_wgs84.tif": "https://www.dropbox.com/scl/fi/lzqijoivcg4wzybj06rsh/combined_mvn_wgs84.tif?rlkey=3g4milzk41c2lcsdggvy2ieql&st=re0izlz4&dl=1", "GeoNet_CMT_solutions_20201129_PreferredNodalPlane_v1.csv": "https://www.dropbox.com/scl/fi/fq28jx8jlbozj0d1x5tnq/GeoNet_CMT_solutions_20201129_PreferredNodalPlane_v1.csv?rlkey=30xj6n7ara0vz4t8kg4pz8w5s&st=63x7nr3j&dl=1", "hik_kerm_fault_300km_wgs84_poslon.txt": "https://www.dropbox.com/scl/fi/ig3ajufpv4xg2qjfxxuup/hik_kerm_fault_300km_wgs84_poslon.txt?rlkey=9jajfkq2elrzwzzgh6px17k8e&st=6ham2oox&dl=1", "Mw_rrup.txt": "https://www.dropbox.com/scl/fi/e3o9v9ze9e4955xxtrl14/Mw_rrup.txt?rlkey=c663zntx7gaeyxt04i97r62nu&st=6ri3c620&dl=1", diff --git a/requirements.txt b/requirements.txt index e987d2c..86e8edb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,6 +13,7 @@ gmprocess h5py pooch matplotlib +rasterio oq_wrapper>=2025.12.3, qcore-utils>=2025.12.1, source_modelling>=2025.12.1, diff --git a/tests/test_sites.py b/tests/test_sites.py new file mode 100644 index 0000000..b175eb0 --- /dev/null +++ b/tests/test_sites.py @@ -0,0 +1,355 @@ +from collections.abc import Iterator +from pathlib import Path +from types import TracebackType +from typing import Any, Self + +import numpy as np +import pandas as pd +import pytest +from rasterio.io import MemoryFile +from rasterio.transform import from_origin + +from nzgmdb.data_retrieval import sites + + +class _DummyFionaCollection: + """ + Minimal context manager that mimics a Fiona Collection. + + Parameters + ---------- + shapes : list + Iterable of shape-like records to yield when iterated. + + Returns + ------- + _DummyFionaCollection + Context manager instance that can be iterated over. + """ + + def __init__(self, shapes: list[Any]) -> None: # noqa: D107 + self._shapes = shapes + + def __enter__(self) -> Self: # noqa: D105 + return self + + def __exit__( # noqa: D105 + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: TracebackType | None, + ) -> bool: + return False + + def __iter__(self) -> Iterator[Any]: # noqa: D105 + return iter(self._shapes) + + +def _make_test_geotiff( + width: int = 10, + height: int = 10, + nodata: float = -9999.0, +) -> MemoryFile: + """ + Create an in-memory single-band GeoTIFF for testing. + + Parameters + ---------- + width : int, default=10 + Raster width in pixels. + height : int, default=10 + Raster height in pixels. + nodata : float, default=-9999.0 + NoData value written to the dataset (and to one pixel in the raster). + + Returns + ------- + memfile : rasterio.io.MemoryFile + In-memory GeoTIFF containing a simple gradient field with a NoData pixel. + """ + data = np.arange(width * height, dtype=np.float32).reshape(height, width) + transform = from_origin(0.0, 10.0, 1.0, 1.0) + data[0, 0] = nodata + + memfile = MemoryFile() + with memfile.open( + driver="GTiff", + height=height, + width=width, + count=1, + dtype=data.dtype, + crs="EPSG:4326", + transform=transform, + nodata=nodata, + ) as ds: + ds.write(data, 1) + + return memfile + + +def test_sample_points_from_geotiff_inside_outside_and_nodata() -> None: + """ + Test that GeoTIFF sampling returns finite values for in-bounds points, and + returns NaN for out-of-bounds or NoData pixels. + + Returns + ------- + None + """ + memfile = _make_test_geotiff() + with memfile.open() as ds: + points = np.array( + [ + [9.5, 0.5], + [5.5, 5.5], + [20.0, 5.0], + [-5.0, 5.0], + [5.0, 20.0], + ], + dtype=float, + ) + out = sites.sample_points_from_geotiff(ds.name, points).ravel() + + assert out.shape == (len(points),) + assert np.isnan(out[0]) + assert np.isfinite(out[1]) + assert np.isnan(out[2]) + assert np.isnan(out[3]) + assert np.isnan(out[4]) + + +def test_fill_gaps_with_nearest_fills_nans_and_preserves_finite() -> None: + """ + Test that `fill_gaps_with_nearest` fills NaN entries while preserving + existing finite values. + + Returns + ------- + None + """ + coords = np.array( + [[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]], + dtype=float, + ) + values = np.array([10.0, 20.0, np.nan, 40.0], dtype=float) + + filled = sites.fill_gaps_with_nearest(coords, values, k=3) + + assert filled.shape == values.shape + assert np.isfinite(filled[2]) + assert filled[0] == 10.0 + assert filled[1] == 20.0 + assert filled[3] == 40.0 + + +@pytest.mark.parametrize( + "points", + [ + np.array( + [ + [-36.8485, 174.7633], + [-41.2865, 174.7762], + [-43.5321, 172.6362], + [-45.0312, 168.6626], + ], + dtype=float, + ), + np.array( + [[-34.0, 172.5], [-47.5, 166.0], [-41.0, 179.9], [-41.0, 166.5]], + dtype=float, + ), + np.array( + [[-30.0, 174.0], [-50.0, 170.0], [-41.0, 160.0], [-41.0, -175.0]], + dtype=float, + ), + np.array( + [ + [48.8566, 2.3522], + [51.5074, -0.1278], + [52.5200, 13.4050], + [41.9028, 12.4964], + ], + dtype=float, + ), + np.vstack( + [ + np.array([[-36.8485, 174.7633], [-41.2865, 174.7762]], dtype=float), + np.array([[48.8566, 2.3522], [51.5074, -0.1278]], dtype=float), + np.array([[-43.5321, 172.6362], [-45.0312, 168.6626]], dtype=float), + ] + ), + ], +) +def test_site_updates_vs30_and_z1_fields_only_when_available( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + points: np.ndarray, +) -> None: + """ + Integration-style test of site table population logic using monkeypatches. + + Verifies expected output columns exist and that Vs30 and Z1.0 / Z2.5 values + (and their reference/quality fields) are only set when data are available. + + Parameters + ---------- + monkeypatch : pytest.MonkeyPatch + Pytest fixture used to patch external dependencies. + tmp_path : pathlib.Path + Pytest fixture providing a temporary directory for test artifacts. + points : numpy.ndarray, shape (N, 2) + Input points as [lat, lon] used to construct a minimal metadata table. + + Returns + ------- + None + """ + + class _Cfg: + def get_value(self, key: str): + if key == "channel_codes": + return "HHZ" + if key == "bbox": + return [0.0, -90.0, 360.0, 90.0] + if key == "nzcvm_version": + return "test" + return None + + monkeypatch.setattr(sites.cfg, "Config", _Cfg) + + class _DummyInv(list): + pass + + class _DummyClient: + def __init__(self, *_args, **_kwargs): + pass + + def get_stations(self, *args, **kwargs): + return _DummyInv() + + monkeypatch.setattr(sites, "FDSN_Client", _DummyClient) + + monkeypatch.setattr( + sites.fiona, + "open", + lambda *_a, **_k: _DummyFionaCollection([]), + ) + + # Do NOT monkeypatch NZGMDB_DATA.abspath (read-only property). + # Instead, patch fetch() to return a temp file path for the tif and anything else requested. + combined_tif = tmp_path / "combined_mvn_wgs84.tif" + combined_tif.touch() + + def _fetch(name: str, *args, **kwargs): + p = tmp_path / name + p.parent.mkdir(parents=True, exist_ok=True) + p.touch(exist_ok=True) + return str(p) + + monkeypatch.setattr(sites.NZGMDB_DATA, "fetch", _fetch) + + geo = pd.DataFrame( + { + "Name": [f"S{i}" for i in range(len(points))], + "Lat": points[:, 0], + "Long": points[:, 1], + "Elevation": np.zeros(len(points)), + "Vs30_median": [np.nan] * len(points), + "Sigmaln_Vs30": [np.nan] * len(points), + "T_median": [np.nan] * len(points), + "sigmaln_T": [np.nan] * len(points), + "Q_T": [None] * len(points), + "D_T": [None] * len(points), + "T_Ref": [None] * len(points), + "Z1.0_median": [np.nan] * len(points), + "sigmaln_Z1.0": [np.nan] * len(points), + "Z1.0_Ref": [None] * len(points), + "Z2.5_median": [np.nan] * len(points), + "sigmaln_Z2.5": [np.nan] * len(points), + "Z2.5_Ref": [None] * len(points), + "NZS1170SiteClass": [None] * len(points), + } + ) + monkeypatch.setattr(pd, "read_csv", lambda *_a, **_k: geo) + + def _find_domain_from_shapes(df: pd.DataFrame, _shapes: list) -> pd.DataFrame: + out = df.copy() + out["domain_no"] = 1 + return out + + monkeypatch.setattr( + sites.tect_domain, "find_domain_from_shapes", _find_domain_from_shapes + ) + + def _compute_station_thresholds( + stations: pd.DataFrame, model_version: str = None + ) -> pd.DataFrame: + n = len(stations) + return pd.DataFrame( + { + "Z1.0(km)": np.full(n, 0.5), + "Z2.5(km)": np.full(n, 2.0), + "sigma": np.full(n, 0.25), + }, + index=stations.index, + ) + + monkeypatch.setattr( + sites.threshold, "compute_station_thresholds", _compute_station_thresholds + ) + + def _sample_points_from_geotiff( + file_path: str, + latlon_points: np.ndarray, + band: int = 1, + ) -> np.ndarray: + n = len(latlon_points) + vals = np.linspace(100.0, 500.0, n).astype(float) + if n >= 2: + vals[0] = np.nan + return vals.reshape(-1, 1) + + monkeypatch.setattr( + sites, "sample_points_from_geotiff", _sample_points_from_geotiff + ) + + def _fill_gaps_with_nearest( + coords: np.ndarray, + values: np.ndarray, + invalid_mask: np.ndarray = None, + k: int = 8, + ) -> np.ndarray: + values = np.asarray(values, dtype=float).copy() + values[np.isnan(values)] = 250.0 + return values + + monkeypatch.setattr(sites, "fill_gaps_with_nearest", _fill_gaps_with_nearest) + + site_df = sites.create_site_table_response() + + for col in [ + "sta", + "lat", + "lon", + "Vs30", + "Q_Vs30", + "Vs30_Ref", + "Z1.0", + "Z2.5", + "Z1.0_ref", + "Z2.5_ref", + "Q_Z1.0", + "Q_Z2.5", + ]: + assert col in site_df.columns + + assert site_df["Z1.0"].notna().any() + assert site_df["Z2.5"].notna().any() + assert (site_df["Z1.0_ref"].dropna() == "NZCVM (2026)").all() + assert (site_df["Z2.5_ref"].dropna() == "NZCVM (2026)").all() + + vs30_non_nan = site_df["Vs30"].notna() + assert (site_df.loc[vs30_non_nan, "Vs30_Ref"] == "Foster et al. (2019)").all() + assert (site_df.loc[vs30_non_nan, "Q_Vs30"] == "Q3").all() + assert site_df.loc[~vs30_non_nan, "Vs30_Ref"].isna().all() + assert site_df.loc[~vs30_non_nan, "Q_Vs30"].isna().all()