ucgmsim · joelridden · Sep 8, 2025 · Sep 8, 2025 · Sep 8, 2025 · Sep 8, 2025
diff --git a/.github/workflows/types.yml b/.github/workflows/types.yml
@@ -0,0 +1,23 @@
+name: Type Check
+on: [pull_request]
+jobs:
+  typecheck:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
+      - name: Install project with types
+        run: uv sync --all-extras --dev
+
+      - name: Run type checking with ty
+        run: uv run ty check --exclude setup.py --exclude nzgmdb/CCLD/ccldpy.py
diff --git a/nzgmdb/calculation/distances.py b/nzgmdb/calculation/distances.py
@@ -1044,7 +1044,8 @@ def calc_distances(main_dir: Path, n_procs: int = 1):
 
     # Get the station information
     client_NZ = FDSN_Client("GEONET")
-    inventory = client_NZ.get_stations()
+    channel_codes = config.get_value("channel_codes")
+    inventory = client_NZ.get_stations(channel=channel_codes, level="station")
     station_info = []
     for network in inventory:
         for station in network:

diff --git a/nzgmdb/calculation/snr.py b/nzgmdb/calculation/snr.py
@@ -9,6 +9,8 @@
 
 import numpy as np
 import pandas as pd
+from obspy.clients.fdsn import Client as FDSN_Client
+from obspy.core.inventory import Inventory
 from pandas.errors import EmptyDataError
 
 from IM import im_calculation, snr_calculation
@@ -23,6 +25,7 @@ def compute_snr_for_single_mseed(
     output_dir: Path,
     ko_directory: Path,
     common_frequency_vector: np.ndarray = im_calculation.DEFAULT_FREQUENCIES,
+    inventory: Inventory | None = None,
 ):
     """
     Compute the SNR for a single mseed file
@@ -39,6 +42,9 @@ def compute_snr_for_single_mseed(
         Path to the directory containing the Ko matrices
     common_frequency_vector : np.ndarray, optional
         Common frequency vector to extract for SNR and FAS, by default None
+    inventory : Inventory, optional
+        The inventory information for the mseed file, by default None
+        (Only used to improve performance when reading the mseed file)
 
     Returns
     -------
@@ -60,7 +66,13 @@ def compute_snr_for_single_mseed(
 
     # Read mseed information
     try:
-        waveform = reading.create_waveform_from_mseed(mseed_file, pre_process=True, apply_taper=False, apply_zero_padding=False)
+        waveform = reading.create_waveform_from_mseed(
+            mseed_file,
+            pre_process=True,
+            apply_taper=False,
+            apply_zero_padding=False,
+            inventory=inventory,
+        )
     except custom_errors.InventoryNotFoundError:
         skipped_record_dict = {
             "record_id": mseed_file.stem,
@@ -267,6 +279,11 @@ def compute_snr_for_mseed_data(
     # Load the phase arrival table
     phase_table = pd.read_csv(phase_table_path)
 
+    # Load the inventory
+    client = FDSN_Client("GEONET")
+    channel_codes = config.get_value("channel_codes")
+    inventory = client.get_stations(channel=channel_codes, level="response")
+
     # Load the bypass records if provided
     if bypass_records_ffp is not None:
         bypass_records = pd.read_csv(bypass_records_ffp)
@@ -295,6 +312,7 @@ def compute_snr_for_mseed_data(
                         output_dir=snr_fas_output_dir,
                         ko_directory=ko_directory,
                         common_frequency_vector=common_frequency_vector,
+                        inventory=inventory,
                     ),
                     batch,
                 )

diff --git a/nzgmdb/config/config.yaml b/nzgmdb/config/config.yaml
@@ -24,10 +24,7 @@ priority_phase_list:
   - Pn
   - Pg
   - Pb
-channel_codes:
-  - HN?
-  - BN?
-  - HH?
+channel_codes: "HN?,BN?,HH?,BH?"
 percentage_gap_allowed: 0.1
 is_large_overlap: 0.5
 # Mseed Variables

diff --git a/nzgmdb/config/machine_config.yaml b/nzgmdb/config/machine_config.yaml
@@ -24,7 +24,7 @@ mantle:
   upload: 28
 hypocentre:
   geonet: 44
-  extraction: 44
+  extraction: 32
   tec_domain: 44
   phase_table: 44
   snr: 18
@@ -33,4 +33,16 @@ hypocentre:
   process: 44
   im: 12
   distances: 44
-  upload: 44
+  upload: 44
+rch:
+  geonet: 128
+  extraction: 32
+  tec_domain: 128
+  phase_table: 128
+  snr: 64
+  fmax: 128
+  gmc: 64
+  process: 128
+  im: 64
+  distances: 128
+  upload: 128
diff --git a/nzgmdb/data_processing/merge_flatfiles.py b/nzgmdb/data_processing/merge_flatfiles.py
@@ -170,7 +170,7 @@ def add_ground_level(
     """
     # Find the station location information with the inventory lat, lon and elev
     config = cfg.Config()
-    channel_codes = ",".join(config.get_value("channel_codes"))
+    channel_codes = config.get_value("channel_codes")
     client_NZ = FDSN_Client("GEONET")
     inventory = client_NZ.get_stations(channel=channel_codes, level="response")
     station_info = [

diff --git a/nzgmdb/data_processing/process_observed.py b/nzgmdb/data_processing/process_observed.py
@@ -8,9 +8,12 @@
 
 import numpy as np
 import pandas as pd
+from obspy.clients.fdsn import Client as FDSN_Client
+from obspy.core.inventory import Inventory
 
 import qcore.timeseries as ts
 from nzgmdb.data_processing import waveform_manipulation
+from nzgmdb.management import config as cfg
 from nzgmdb.management import custom_errors, file_structure
 from nzgmdb.mseed_management import reading
 
@@ -20,6 +23,7 @@ def process_single_mseed(
     gmc_df: pd.DataFrame | None = None,
     fmax_df: pd.DataFrame | None = None,
     bypass_df: pd.DataFrame | None = None,
+    inventory: Inventory | None = None,
 ):
     """
     Process a single mseed file and save the processed data to a txt file
@@ -38,6 +42,8 @@ def process_single_mseed(
         The Fmax values
     bypass_df : pd.DataFrame, optional
         The bypass records containing custom fmin, fmax values
+    inventory : Inventory, optional
+        The inventory information for the mseed file
 
     Returns
     -------
@@ -67,7 +73,7 @@ def process_single_mseed(
 
     # Perform initial pre-processing
     try:
-        mseed = waveform_manipulation.initial_preprocessing(mseed)
+        mseed = waveform_manipulation.initial_preprocessing(mseed, inventory=inventory)
     except custom_errors.InventoryNotFoundError:
         skipped_record_dict = {
             "record_id": mseed_stem,
@@ -229,6 +235,11 @@ def process_mseeds_to_txt(
         )
     bypass_df = None if bypass_records_ffp is None else pd.read_csv(bypass_records_ffp)
 
+    config = cfg.Config()
+    channel_codes = config.get_value("channel_codes")
+    client = FDSN_Client("GEONET")
+    inventory = client.get_stations(channel=channel_codes, level="response")
+
     # Use multiprocessing to process the mseed files
     with multiprocessing.Pool(processes=n_procs) as pool:
         skipped_records = pool.map(
@@ -237,6 +248,7 @@ def process_mseeds_to_txt(
                 gmc_df=gmc_df,
                 fmax_df=fmax_df,
                 bypass_df=bypass_df,
+                inventory=inventory,
             ),
             mseed_files,
         )

diff --git a/nzgmdb/data_processing/quality_db.py b/nzgmdb/data_processing/quality_db.py
@@ -760,6 +760,7 @@ def filter_duplicate_channels(
     2. HN channels (Strong motion, high frequency)
     3. BN channels (Strong motion, lower frequency)
     4. HH channels (Broadband, high frequency)
+    5. BH channels (Broadband, lower frequency)
 
     If multiple records have the same priority, the first one encountered is kept.
     All other duplicates are removed and returned in the skipped records.
@@ -785,15 +786,15 @@ def filter_duplicate_channels(
     catalogue["bypass"] = catalogue["record_id"].isin(bypass_records)
 
     # Step 3: Define priority levels
-    priority = {"HN": 1, "BN": 2, "HH": 3}
-    catalogue["chan_priority"] = catalogue["chan"].map(priority).fillna(4)
+    priority = {"HN": 1, "BN": 2, "HH": 3, "BH": 4}
+    catalogue["chan_priority"] = catalogue["chan"].map(priority).fillna(5)
     # Step 4: Override priority for bypass records
     catalogue.loc[catalogue["bypass"], "chan_priority"] = 0
 
     # Step 5: Sort by priority and select top-priority row per group
     catalog_sorted = catalogue.sort_values(by=["evid_sta", "chan_priority"])
-    # Remove records with priority 4 (not HN, BN, HH)
-    catalog_sorted = catalog_sorted[catalog_sorted["chan_priority"] < 4]
+    # Remove records with priority 4 (not HN, BN, HH, BH)
+    catalog_sorted = catalog_sorted[catalog_sorted["chan_priority"] < 5]
     best_dups = catalog_sorted.groupby("evid_sta", as_index=False).nth(0)
 
     # Step 6: Identify which records to drop (the non-best ones)
@@ -872,35 +873,33 @@ def apply_all_filters(
     fmin_max = fmin_max if fmin_max is not None else config.get_value("fmin_max")
     mag_min = min_mag if min_mag is not None else config.get_value("quality_min_mag")
 
+    catalogue_copy = catalogue.copy()
+
     # Filter by magnitude
-    skipped_records_mag = filter_mag(catalogue.copy(), mag_min)
+    skipped_records_mag = filter_mag(catalogue_copy, mag_min)
 
     # Find ground level locations
     skipped_records_ground = filter_ground_level_locations(
-        catalogue.copy(), bypass_records
+        catalogue_copy, bypass_records
     )
 
     # Find has score mean
-    skipped_records_has_score = filter_has_score_mean(catalogue.copy(), bypass_records)
+    skipped_records_has_score = filter_has_score_mean(catalogue_copy, bypass_records)
 
     # Find score mean
-    skipped_records_score = filter_score_mean(
-        catalogue.copy(), score_min, bypass_records
-    )
+    skipped_records_score = filter_score_mean(catalogue_copy, score_min, bypass_records)
 
     # Find multi mean
-    skipped_records_multi = filter_multi_mean(
-        catalogue.copy(), multi_max, bypass_records
-    )
+    skipped_records_multi = filter_multi_mean(catalogue_copy, multi_max, bypass_records)
 
     # Find fmax
-    skipped_records_fmax = filter_fmax(catalogue.copy(), fmax_min, bypass_records)
+    skipped_records_fmax = filter_fmax(catalogue_copy, fmax_min, bypass_records)
 
     # Find fmin
-    skipped_records_fmin = filter_fmin(catalogue.copy(), fmin_max, bypass_records)
+    skipped_records_fmin = filter_fmin(catalogue_copy, fmin_max, bypass_records)
 
     # Find missing station information
-    skipped_records_sta = filter_missing_sta_info(catalogue.copy(), bypass_records)
+    skipped_records_sta = filter_missing_sta_info(catalogue_copy, bypass_records)
 
     # Find clipped records
     skipped_records_clipped = apply_clipNet_filter(clipped_records_ffp, bypass_records)
@@ -910,12 +909,12 @@ def apply_all_filters(
 
     # Find troublesome sensitivity records
     skipped_records_sensitivity = filter_troublesome_sensitivity(
-        catalogue.copy(), bypass_records
+        catalogue_copy, bypass_records
     )
 
     # Find empirical predictions
     skipped_records_empirical = filter_empirical_predictions(
-        catalogue.copy(), bypass_records
+        catalogue_copy, bypass_records
     )
 
     # Combine all the skipped records

diff --git a/nzgmdb/data_processing/waveform_manipulation.py b/nzgmdb/data_processing/waveform_manipulation.py
@@ -5,6 +5,7 @@
 import numpy as np
 from obspy.clients.fdsn import Client as FDSN_Client
 from obspy.clients.fdsn.header import FDSNNoDataException
+from obspy.core.inventory import Inventory
 from obspy.core.stream import Stream
 from scipy import integrate, signal
 
@@ -13,7 +14,10 @@
 
 
 def initial_preprocessing(
-    mseed: Stream, apply_taper: bool = True, apply_zero_padding: bool = True
+    mseed: Stream,
+    apply_taper: bool = True,
+    apply_zero_padding: bool = True,
+    inventory: Inventory = None,
 ):
     """
     Basic pre-processing of the waveform data
@@ -33,6 +37,8 @@ def initial_preprocessing(
         Whether to apply the tapering, by default True
     apply_zero_padding : bool, optional
         Whether to apply zero padding, by default True
+    inventory : Inventory, optional
+        The inventory object containing the response information, by default None
 
     Returns
     -------
@@ -73,29 +79,34 @@ def initial_preprocessing(
     location = mseed[0].stats.location
     channel = mseed[0].stats.channel
 
-    # Get Station Information from geonet clients
-    # Fetching here instead of passing the inventory object as searching for the station, network, and channel
-    # information takes a long time as it's implemented in a for loop
-    try:
-        client_NZ = FDSN_Client("GEONET")
-        inv = client_NZ.get_stations(
-            level="response", network="NZ", station=station, location=location
-        )
-    except FDSNNoDataException:
-        raise custom_errors.InventoryNotFoundError(
-            f"No inventory information found for station {station} with location {location}"
-        )
+    if inventory is not None:
+        # Select only the required station and location from the inventory
+        inv_selected = inventory.select(station=station, location=location)
+        if len(inv_selected) == 0:
+            raise custom_errors.InventoryNotFoundError(
+                f"No inventory information found for station {station} with location {location}"
+            )
+    else:
+        try:
+            client_NZ = FDSN_Client("GEONET")
+            inv_selected = client_NZ.get_stations(
+                level="response", network="NZ", station=station, location=location
+            )
+        except FDSNNoDataException:
+            raise custom_errors.InventoryNotFoundError(
+                f"No inventory information found for station {station} with location {location}"
+            )
 
     try:
-        mseed = mseed.remove_sensitivity(inventory=inv)
+        mseed = mseed.remove_sensitivity(inventory=inv_selected)
     except ValueError:
         raise custom_errors.SensitivityRemovalError(
             f"Failed to remove sensitivity for station {station} with location {location}"
         )
 
     # Rotate
     try:
-        mseed.rotate("->ZNE", inventory=inv)
+        mseed.rotate("->ZNE", inventory=inv_selected)
     except (
         Exception  # noqa: BLE001
     ):  # Due to obspy raising an Exception instead of a specific error