ucgmsim · joelridden · Jan 20, 2026 · Jan 5, 2026 · Jan 6, 2026 · Jan 6, 2026
diff --git a/changelog.md b/changelog.md
@@ -1,7 +1,7 @@
 # NZGMDB Changelog
 
-## Version 4.4 - September 25 **2000-01-01 to 2024-12-31**
-* Adding Broadband data (HH) to the database
+## Version 4.4 - Feb 26 **2000-01-01 to 2025-12-31**
+* Adding Broadband data (HH, BH) to the database
 * New Quality Filter to remove Broadband data during certain time periods due to sensitivity issues
 * New Quality Filter to compare against an empirical GMPE (Atkinson 2022) to remove significant outliers
 * Add ability to generate a report to compare NZGMDB versions
@@ -18,6 +18,10 @@
 * Adjust magnitude filters: full ≥ 2.5; quality-filtered ≥ 3.5.
 * Added Hyp_depth to geometry table
 * Adjusted is_ground_level logic by adding in FDSN Inventory data
+* Add XML inventory information per station
+* Change processing to use remove response instead of remove sensitivity
+* Updated CMT solutions for domain regions
+* Increased date range to end of 2025
 
 ## Version 4.3 - July 25 **2000-01-01 to 2024-12-31**
 * Sensitivity Fix (previously always taking first value not for actual datetime expected)

diff --git a/nzgmdb/calculation/snr.py b/nzgmdb/calculation/snr.py
@@ -9,6 +9,7 @@
 
 import numpy as np
 import pandas as pd
+from obspy import read_inventory
 from pandas.errors import EmptyDataError
 
 from IM import im_calculation, snr_calculation
@@ -23,6 +24,7 @@ def compute_snr_for_single_mseed(
     output_dir: Path,
     ko_directory: Path,
     common_frequency_vector: np.ndarray = im_calculation.DEFAULT_FREQUENCIES,
+    xml_dir: Path = None,
 ):
     """
     Compute the SNR for a single mseed file
@@ -39,6 +41,9 @@ def compute_snr_for_single_mseed(
         Path to the directory containing the Ko matrices
     common_frequency_vector : np.ndarray, optional
         Common frequency vector to extract for SNR and FAS, by default None
+    xml_dir : Path, optional
+        Path to the directory containing the StationXML files, by default None
+        If None, will try to extract inventory from FDSN
 
     Returns
     -------
@@ -58,9 +63,25 @@ def compute_snr_for_single_mseed(
     # Get the event_id
     event_id = file_structure.get_event_id_from_mseed(mseed_file)
 
+    if xml_dir is None:
+        inventory = None
+    else:
+        # Load the inventory information
+        inventory_file = xml_dir / f"NZ.{station}.xml"
+        if not inventory_file.is_file():
+            inventory = None
+        else:
+            inventory = read_inventory(inventory_file)
+
     # Read mseed information
     try:
-        waveform = reading.create_waveform_from_mseed(mseed_file, pre_process=True, apply_taper=False, apply_zero_padding=False)
+        waveform = reading.create_waveform_from_mseed(
+            mseed_file,
+            pre_process=True,
+            apply_taper=False,
+            apply_zero_padding=False,
+            inventory=inventory,
+        )
     except custom_errors.InventoryNotFoundError:
         skipped_record_dict = {
             "record_id": mseed_file.stem,
@@ -238,6 +259,7 @@ def compute_snr_for_mseed_data(
     snr_fas_output_dir.mkdir(parents=True, exist_ok=True)
     batch_dir = meta_output_dir / "snr_batch_files"
     batch_dir.mkdir(parents=True, exist_ok=True)
+    xml_dir = file_structure.get_stationxml_dir(data_dir)
 
     config = cfg.Config()
     # Creating the common frequency vector if not provided
@@ -295,6 +317,7 @@ def compute_snr_for_mseed_data(
                         output_dir=snr_fas_output_dir,
                         ko_directory=ko_directory,
                         common_frequency_vector=common_frequency_vector,
+                        xml_dir=xml_dir,
                     ),
                     batch,
                 )

diff --git a/nzgmdb/data_processing/process_observed.py b/nzgmdb/data_processing/process_observed.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 import pandas as pd
+from obspy import read_inventory
 
 import qcore.timeseries as ts
 from nzgmdb.data_processing import waveform_manipulation
@@ -20,6 +21,7 @@ def process_single_mseed(
     gmc_df: pd.DataFrame | None = None,
     fmax_df: pd.DataFrame | None = None,
     bypass_df: pd.DataFrame | None = None,
+    xml_dir: Path | None = None,
 ):
     """
     Process a single mseed file and save the processed data to a txt file
@@ -38,6 +40,8 @@ def process_single_mseed(
         The Fmax values
     bypass_df : pd.DataFrame, optional
         The bypass records containing custom fmin, fmax values
+    xml_dir : Path, optional
+        The directory containing the station xml files for inventory information
 
     Returns
     -------
@@ -65,9 +69,19 @@ def process_single_mseed(
         skipped_record = pd.DataFrame([skipped_record_dict])
         return skipped_record
 
+    if xml_dir is None:
+        inventory = None
+    else:
+        # Load the inventory information
+        inventory_file = xml_dir / f"NZ.{station}.xml"
+        if not inventory_file.is_file():
+            inventory = None
+        else:
+            inventory = read_inventory(inventory_file)
+
     # Perform initial pre-processing
     try:
-        mseed = waveform_manipulation.initial_preprocessing(mseed)
+        mseed = waveform_manipulation.initial_preprocessing(mseed, inventory=inventory)
     except custom_errors.InventoryNotFoundError:
         skipped_record_dict = {
             "record_id": mseed_stem,
@@ -217,6 +231,7 @@ def process_mseeds_to_txt(
     """
     # Get the raw waveform mseed files
     waveform_dir = file_structure.get_waveform_dir(main_dir)
+    xml_dir = file_structure.get_stationxml_dir(main_dir)
     mseed_files = waveform_dir.rglob("*.mseed")
 
     # Load the GMC, Fmax and bypass records
@@ -237,6 +252,7 @@ def process_mseeds_to_txt(
                 gmc_df=gmc_df,
                 fmax_df=fmax_df,
                 bypass_df=bypass_df,
+                xml_dir=xml_dir,
             ),
             mseed_files,
         )

diff --git a/nzgmdb/data_processing/quality_db.py b/nzgmdb/data_processing/quality_db.py
@@ -552,7 +552,7 @@ def filter_troublesome_sensitivity(
     sensitivity_ignore = pd.read_csv(NZGMDB_DATA.fetch("sensitivity_ignore.csv"))
 
     # Ensure datetime columns are in datetime format
-    catalogue["datetime"] = pd.to_datetime(catalogue["datetime"])
+    catalogue["datetime"] = pd.to_datetime(catalogue["datetime"], format="ISO8601")
     sensitivity_ignore["start_date"] = pd.to_datetime(sensitivity_ignore["start_date"])
     sensitivity_ignore["end_date"] = pd.to_datetime(sensitivity_ignore["end_date"])
 

diff --git a/nzgmdb/data_processing/waveform_manipulation.py b/nzgmdb/data_processing/waveform_manipulation.py
@@ -3,6 +3,7 @@
 """
 
 import numpy as np
+from obspy import Inventory
 from obspy.clients.fdsn import Client as FDSN_Client
 from obspy.clients.fdsn.header import FDSNNoDataException
 from obspy.core.stream import Stream
@@ -13,7 +14,10 @@
 
 
 def initial_preprocessing(
-    mseed: Stream, apply_taper: bool = True, apply_zero_padding: bool = True
+    mseed: Stream,
+    apply_taper: bool = True,
+    apply_zero_padding: bool = True,
+    inventory: Inventory = None,
 ):
     """
     Basic pre-processing of the waveform data
@@ -33,6 +37,8 @@ def initial_preprocessing(
         Whether to apply the tapering, by default True
     apply_zero_padding : bool, optional
         Whether to apply zero padding, by default True
+    inventory : Inventory, optional
+        The inventory object to use for sensitivity removal, by default None (Will try extract from FDSN if not provided)
 
     Returns
     -------
@@ -73,21 +79,23 @@ def initial_preprocessing(
     location = mseed[0].stats.location
     channel = mseed[0].stats.channel
 
-    # Get Station Information from geonet clients
-    # Fetching here instead of passing the inventory object as searching for the station, network, and channel
-    # information takes a long time as it's implemented in a for loop
-    try:
-        client_NZ = FDSN_Client("GEONET")
-        inv = client_NZ.get_stations(
-            level="response", network="NZ", station=station, location=location
-        )
-    except FDSNNoDataException:
-        raise custom_errors.InventoryNotFoundError(
-            f"No inventory information found for station {station} with location {location}"
-        )
+    if inventory is not None:
+        try:
+            client_NZ = FDSN_Client("GEONET")
+            inv = client_NZ.get_stations(
+                level="response", network="NZ", station=station, location=location
+            )
+        except FDSNNoDataException:
+            raise custom_errors.InventoryNotFoundError(
+                f"No inventory information found for station {station} with location {location}"
+            )
+    else:
+        inv = inventory
 
     try:
-        mseed = mseed.remove_sensitivity(inventory=inv)
+        # Ensure we get the correct output type for strong motion vs broadband
+        output_type = "ACC" if channel[:2] in ["HN", "BN"] else "VEL"
+        mseed = mseed.remove_response(inventory=inv, output=output_type)
     except ValueError:
         raise custom_errors.SensitivityRemovalError(
             f"Failed to remove sensitivity for station {station} with location {location}"

diff --git a/nzgmdb/data_retrieval/inventory_xml.py b/nzgmdb/data_retrieval/inventory_xml.py
@@ -0,0 +1,47 @@
+"""
+Fetches inventory data from the obspy FDSN client and saves it as StationXML files.
+"""
+
+import datetime
+from pathlib import Path
+
+from obspy.clients.fdsn import Client as FDSN_Client
+
+from nzgmdb.management import file_structure
+
+
+def fetch_and_save_inventory(
+    main_dir: Path,
+    stations: list[str],
+    starttime: str = "2000-01-01",
+    endtime: str = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d"),
+):
+    """
+    Fetches inventory data from the obspy FDSN client and saves it as StationXML files.
+
+    Parameters
+    ----------
+    main_dir : Path
+        The main directory where the StationXML files will be saved.
+    stations : list[str]
+        A list of station codes to fetch the inventory data for.
+    starttime : str, optional
+        The start time for the inventory data, by default "2000-01-01".
+    endtime : str, optional
+        The end time for the inventory data, by default the current date.
+    """
+    client = FDSN_Client("GEONET")
+
+    xml_dir = file_structure.get_stationxml_dir(main_dir)
+    xml_dir.mkdir(parents=True, exist_ok=True)
+
+    for sta in stations:
+        inv = client.get_stations(
+            network="NZ",
+            station=sta,
+            starttime=starttime,
+            endtime=endtime,
+            level="response",
+        )
+        fname = xml_dir / f"NZ.{sta}.xml"
+        inv.write(fname, format="STATIONXML")
diff --git a/nzgmdb/data_retrieval/sites.py b/nzgmdb/data_retrieval/sites.py
@@ -45,8 +45,11 @@ def create_site_table_response() -> pd.DataFrame:
                     station.end_date,
                 ]
             )
-    sta_df = pd.DataFrame(station_info, columns=["net", "sta", "lat", "lon", "elev", "creation_date", "end_date"])
-    sta_df = sta_df.drop_duplicates().reset_index(drop=True)
+    sta_df = pd.DataFrame(
+        station_info,
+        columns=["net", "sta", "lat", "lon", "elev", "creation_date", "end_date"],
+    )
+    sta_df = sta_df.drop_duplicates(["net", "sta"]).reset_index(drop=True)
 
     # Get the Geonet metadata summary information
     geo_meta_summary_df = pd.read_csv(
@@ -77,7 +80,9 @@ def create_site_table_response() -> pd.DataFrame:
     )
 
     merged_df = geo_meta_summary_df.merge(
-        sta_df[["net", "elev", "sta"]], on="sta", how="left"
+        sta_df[["net", "elev", "sta", "creation_date", "end_date"]],
+        on="sta",
+        how="left",
     )
     # Specify the required files for fiona
     NZGMDB_DATA.fetch("TectonicDomains_Feb2021_8_NZTM.shp")

diff --git a/nzgmdb/data_retrieval/waveform_extraction.py b/nzgmdb/data_retrieval/waveform_extraction.py
@@ -24,6 +24,7 @@
 from pandas.errors import EmptyDataError
 
 from nzgmdb.data_processing import filtering
+from nzgmdb.data_retrieval import inventory_xml
 from nzgmdb.management import config as cfg
 from nzgmdb.management import custom_errors, file_structure
 from nzgmdb.mseed_management import creation
@@ -77,7 +78,6 @@ def get_inital_stream(
                     channel_codes,
                     start_time,
                     end_time,
-                    attach_response=True,
                 )
             break
         except FDSNNoDataException:
@@ -976,7 +976,7 @@ def extract_waveforms(
         station_extraction_table["evid_sta"] = (
             station_extraction_table["evid"] + "_" + station_extraction_table["sta"]
         )
-        # Mkae a evid_sta column in the only_record_ids
+        # Make a evid_sta column in the only_record_ids
         only_record_ids["evid_sta"] = (
             only_record_ids["record_id"].str.split("_").str[0]
             + "_"
@@ -1087,6 +1087,12 @@ def extract_waveforms(
                     batch_dir / f"multi_trace_issues_{batch_index}.csv", index=False
                 )
 
+    # Grab all the station xmls and write them as outputs
+    unique_sites = station_extraction_table["sta"].unique()
+    print(f"Fetching station XML metadata for {len(unique_sites)} unique sites")
+    inventory_xml.fetch_and_save_inventory(main_dir, unique_sites)
+    print("Station XML metadata fetching complete.")
+
     # Combine all the event and sta_mag dataframes
     sta_mag_dfs = []
     skipped_records_dfs = []

diff --git a/nzgmdb/management/file_structure.py b/nzgmdb/management/file_structure.py
@@ -281,3 +281,20 @@ def get_gmc_dir(main_dir: Path) -> Path:
         The directory where GMC results are stored.
     """
     return main_dir / "gmc"
+
+
+def get_stationxml_dir(main_dir: Path) -> Path:
+    """
+    Get the directory for storing StationXML files.
+
+    Parameters
+    ----------
+    main_dir : Path
+        The main directory of the NZGMDB results.
+
+    Returns
+    -------
+    Path
+        The directory where StationXML files are stored.
+    """
+    return main_dir / "stationxml"
diff --git a/nzgmdb/mseed_management/reading.py b/nzgmdb/mseed_management/reading.py
@@ -6,6 +6,7 @@
 
 import mseedlib
 import numpy as np
+from obspy import Inventory
 from obspy.core import Stream, Trace, UTCDateTime
 
 from nzgmdb.data_processing import waveform_manipulation
@@ -73,6 +74,7 @@ def create_waveform_from_mseed(
     pre_process: bool = False,
     apply_taper: bool = False,
     apply_zero_padding: bool = False,
+    inventory: Inventory | None = None,
 ):
     """
     Create a waveform object from a mseed file
@@ -89,6 +91,8 @@ def create_waveform_from_mseed(
         Whether to apply a taper to the data, by default False (Only used when pre_process is True)
     apply_zero_padding : bool (optional)
         Whether to apply zero padding to the data, by default False (Only used when pre_process is True)
+    inventory : Inventory, optional
+        The inventory object to use for sensitivity removal, by default None (Will try extract from FDSN if not provided)
 
     Returns
     -------
@@ -115,7 +119,10 @@ def create_waveform_from_mseed(
     # Process the data if needed
     if pre_process:
         mseed = waveform_manipulation.initial_preprocessing(
-            mseed, apply_taper=apply_taper, apply_zero_padding=apply_zero_padding
+            mseed,
+            apply_taper=apply_taper,
+            apply_zero_padding=apply_zero_padding,
+            inventory=inventory,
         )
 
     # Stack the data