Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# NZGMDB Changelog

## Version 4.4 - September 25 **2000-01-01 to 2024-12-31**
* Adding Broadband data (HH) to the database
## Version 4.4 - Feb 26 **2000-01-01 to 2025-12-31**
* Adding Broadband data (HH, BH) to the database
* New Quality Filter to remove Broadband data during certain time periods due to sensitivity issues
* New Quality Filter to compare against an empirical GMPE (Atkinson 2022) to remove significant outliers
* Add ability to generate a report to compare NZGMDB versions
Expand All @@ -18,6 +18,10 @@
* Adjust magnitude filters: full ≥ 2.5; quality-filtered ≥ 3.5.
* Added Hyp_depth to geometry table
* Adjusted is_ground_level logic by adding in FDSN Inventory data
* Add XML inventory information per station
* Change processing to use remove response instead of remove sensitivity
* Updated CMT solutions for domain regions
* Increased date range to end of 2025

## Version 4.3 - July 25 **2000-01-01 to 2024-12-31**
* Sensitivity Fix (previously always taking first value not for actual datetime expected)
Expand Down
14 changes: 14 additions & 0 deletions nzgmdb/calculation/snr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import numpy as np
import pandas as pd
from obspy import read_inventory
from obspy.clients.fdsn import Client as FDSN_Client
from obspy.core.inventory import Inventory
from pandas.errors import EmptyDataError
Expand All @@ -25,6 +26,7 @@ def compute_snr_for_single_mseed(
output_dir: Path,
ko_directory: Path,
common_frequency_vector: np.ndarray = im_calculation.DEFAULT_FREQUENCIES,
xml_dir: Path = None,
inventory: Inventory | None = None,
):
"""
Expand All @@ -42,6 +44,9 @@ def compute_snr_for_single_mseed(
Path to the directory containing the Ko matrices
common_frequency_vector : np.ndarray, optional
Common frequency vector to extract for SNR and FAS, by default None
xml_dir : Path, optional
Path to the directory containing the StationXML files, by default None
If None, will try to extract inventory from FDSN
inventory : Inventory, optional
The inventory information for the mseed file, by default None
(Only used to improve performance when reading the mseed file)
Expand All @@ -64,6 +69,13 @@ def compute_snr_for_single_mseed(
# Get the event_id
event_id = file_structure.get_event_id_from_mseed(mseed_file)

inventory = None
if xml_dir:
# Load the inventory information
inventory_file = xml_dir / f"NZ.{station}.xml"
if inventory_file.is_file():
inventory = read_inventory(inventory_file)

# Read mseed information
try:
waveform = reading.create_waveform_from_mseed(
Expand Down Expand Up @@ -250,6 +262,7 @@ def compute_snr_for_mseed_data(
snr_fas_output_dir.mkdir(parents=True, exist_ok=True)
batch_dir = meta_output_dir / "snr_batch_files"
batch_dir.mkdir(parents=True, exist_ok=True)
xml_dir = file_structure.get_stationxml_dir(data_dir)

config = cfg.Config()
# Creating the common frequency vector if not provided
Expand Down Expand Up @@ -312,6 +325,7 @@ def compute_snr_for_mseed_data(
output_dir=snr_fas_output_dir,
ko_directory=ko_directory,
common_frequency_vector=common_frequency_vector,
xml_dir=xml_dir,
inventory=inventory,
),
batch,
Expand Down
13 changes: 13 additions & 0 deletions nzgmdb/data_processing/process_observed.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import numpy as np
import pandas as pd
from obspy import read_inventory
from obspy.clients.fdsn import Client as FDSN_Client
from obspy.core.inventory import Inventory

Expand All @@ -23,6 +24,7 @@ def process_single_mseed(
gmc_df: pd.DataFrame | None = None,
fmax_df: pd.DataFrame | None = None,
bypass_df: pd.DataFrame | None = None,
xml_dir: Path | None = None,
inventory: Inventory | None = None,
):
"""
Expand All @@ -42,6 +44,8 @@ def process_single_mseed(
The Fmax values
bypass_df : pd.DataFrame, optional
The bypass records containing custom fmin, fmax values
xml_dir : Path, optional
The directory containing the station xml files for inventory information
inventory : Inventory, optional
The inventory information for the mseed file

Expand Down Expand Up @@ -71,6 +75,13 @@ def process_single_mseed(
skipped_record = pd.DataFrame([skipped_record_dict])
return skipped_record

inventory = None
if xml_dir:
# Load the inventory information
inventory_file = xml_dir / f"NZ.{station}.xml"
if inventory_file.is_file():
inventory = read_inventory(inventory_file)

# Perform initial pre-processing
try:
mseed = waveform_manipulation.initial_preprocessing(mseed, inventory=inventory)
Expand Down Expand Up @@ -223,6 +234,7 @@ def process_mseeds_to_txt(
"""
# Get the raw waveform mseed files
waveform_dir = file_structure.get_waveform_dir(main_dir)
xml_dir = file_structure.get_stationxml_dir(main_dir)
mseed_files = waveform_dir.rglob("*.mseed")

# Load the GMC, Fmax and bypass records
Expand All @@ -248,6 +260,7 @@ def process_mseeds_to_txt(
gmc_df=gmc_df,
fmax_df=fmax_df,
bypass_df=bypass_df,
xml_dir=xml_dir,
inventory=inventory,
),
mseed_files,
Expand Down
2 changes: 1 addition & 1 deletion nzgmdb/data_processing/quality_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ def filter_troublesome_sensitivity(
sensitivity_ignore = pd.read_csv(NZGMDB_DATA.fetch("sensitivity_ignore.csv"))

# Ensure datetime columns are in datetime format
catalogue["datetime"] = pd.to_datetime(catalogue["datetime"])
catalogue["datetime"] = pd.to_datetime(catalogue["datetime"], format="ISO8601")
sensitivity_ignore["start_date"] = pd.to_datetime(sensitivity_ignore["start_date"])
sensitivity_ignore["end_date"] = pd.to_datetime(sensitivity_ignore["end_date"])

Expand Down
22 changes: 9 additions & 13 deletions nzgmdb/data_processing/waveform_manipulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
"""

import numpy as np
from obspy import Inventory
from obspy.clients.fdsn import Client as FDSN_Client
from obspy.clients.fdsn.header import FDSNNoDataException
from obspy.core.inventory import Inventory
from obspy.core.stream import Stream
from scipy import integrate, signal

Expand Down Expand Up @@ -38,7 +38,7 @@ def initial_preprocessing(
apply_zero_padding : bool, optional
Whether to apply zero padding, by default True
inventory : Inventory, optional
The inventory object containing the response information, by default None
The inventory object to use for sensitivity removal, by default None (Will try extract from FDSN if not provided)

Returns
-------
Expand Down Expand Up @@ -79,17 +79,11 @@ def initial_preprocessing(
location = mseed[0].stats.location
channel = mseed[0].stats.channel

if inventory is not None:
# Select only the required station and location from the inventory
inv_selected = inventory.select(station=station, location=location)
if len(inv_selected) == 0:
raise custom_errors.InventoryNotFoundError(
f"No inventory information found for station {station} with location {location}"
)
else:
inv = inventory
if inv is None:
try:
client_NZ = FDSN_Client("GEONET")
inv_selected = client_NZ.get_stations(
inv = client_NZ.get_stations(
level="response", network="NZ", station=station, location=location
)
except FDSNNoDataException:
Expand All @@ -98,15 +92,17 @@ def initial_preprocessing(
)

try:
mseed = mseed.remove_sensitivity(inventory=inv_selected)
# Ensure we get the correct output type for strong motion vs broadband
output_type = "ACC" if channel[:2] in ["HN", "BN"] else "VEL"
mseed = mseed.remove_response(inventory=inv, output=output_type)
except ValueError:
raise custom_errors.SensitivityRemovalError(
f"Failed to remove sensitivity for station {station} with location {location}"
)

# Rotate
try:
mseed.rotate("->ZNE", inventory=inv_selected)
mseed.rotate("->ZNE", inventory=inv)
except (
Exception # noqa: BLE001
): # Due to obspy raising an Exception instead of a specific error
Expand Down
57 changes: 57 additions & 0 deletions nzgmdb/data_retrieval/inventory_xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
Fetches inventory data from the obspy FDSN client and saves it as StationXML files.
"""

import datetime
from pathlib import Path

from obspy.clients.fdsn import Client as FDSN_Client
from obspy.clients.fdsn.header import FDSNNoDataException

from nzgmdb.management import file_structure


def fetch_and_save_inventory(
main_dir: Path,
stations: list[str],
starttime: str = "2000-01-01",
endtime: str = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d"),
):
"""
Fetches inventory data from the obspy FDSN client and saves it as StationXML files.

Parameters
----------
main_dir : Path
The main directory where the StationXML files will be saved.
stations : list[str]
A list of station codes to fetch the inventory data for.
starttime : str, optional
The start time for the inventory data, by default "2000-01-01".
endtime : str, optional
The end time for the inventory data, by default the current date.
"""
client = FDSN_Client("GEONET")

xml_dir = file_structure.get_stationxml_dir(main_dir)
xml_dir.mkdir(parents=True, exist_ok=True)

all_stations = ",".join(stations)

try:
inv = client.get_stations(
network="NZ",
station=all_stations,
starttime=starttime,
endtime=endtime,
level="response",
)
for sta in stations:
sel = inv.select(station=sta)
if not sel.networks:
print(f"Warning: No inventory data found for station {sta}. Skipping.")
continue
fname = xml_dir / f"NZ.{sta}.xml"
sel.write(fname, format="STATIONXML")
except FDSNNoDataException:
print("No inventory data found for the specified stations and time range.")
6 changes: 4 additions & 2 deletions nzgmdb/data_retrieval/sites.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def create_site_table_response() -> pd.DataFrame:
station_info,
columns=["net", "sta", "lat", "lon", "elev", "creation_date", "end_date"],
)
sta_df = sta_df.drop_duplicates().reset_index(drop=True)
sta_df = sta_df.drop_duplicates(["net", "sta"]).reset_index(drop=True)

# Get the Geonet metadata summary information
geo_meta_summary_df = pd.read_csv(
Expand Down Expand Up @@ -82,7 +82,9 @@ def create_site_table_response() -> pd.DataFrame:
)

merged_df = geo_meta_summary_df.merge(
sta_df[["net", "elev", "sta"]], on="sta", how="left"
sta_df[["net", "elev", "sta", "creation_date", "end_date"]],
on="sta",
how="left",
)
# Specify the required files for fiona
NZGMDB_DATA.fetch("TectonicDomains_Feb2021_8_NZTM.shp")
Expand Down
8 changes: 7 additions & 1 deletion nzgmdb/data_retrieval/waveform_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from pandas.errors import EmptyDataError

from nzgmdb.data_processing import filtering
from nzgmdb.data_retrieval import inventory_xml
from nzgmdb.management import config as cfg
from nzgmdb.management import custom_errors, file_structure
from nzgmdb.mseed_management import creation
Expand Down Expand Up @@ -78,7 +79,6 @@ def get_inital_stream(
channel_codes,
start_time,
end_time,
attach_response=True,
)
break
except FDSNTooManyRequestsException:
Expand Down Expand Up @@ -1106,6 +1106,12 @@ def extract_waveforms(
batch_dir / f"multi_trace_issues_{batch_index}.csv", index=False
)

# Grab all the station xmls and write them as outputs
unique_sites = station_extraction_table["sta"].unique()
print(f"Fetching station XML metadata for {len(unique_sites)} unique sites")
inventory_xml.fetch_and_save_inventory(main_dir, unique_sites)
print("Station XML metadata fetching complete.")

# Combine all the event and sta_mag dataframes
sta_mag_dfs = []
skipped_records_dfs = []
Expand Down
17 changes: 17 additions & 0 deletions nzgmdb/management/file_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,20 @@ def get_gmc_dir(main_dir: Path) -> Path:
The directory where GMC results are stored.
"""
return main_dir / "gmc"


def get_stationxml_dir(main_dir: Path) -> Path:
"""
Get the directory for storing StationXML files.

Parameters
----------
main_dir : Path
The main directory of the NZGMDB results.

Returns
-------
Path
The directory where StationXML files are stored.
"""
return main_dir / "stationxml"
7 changes: 3 additions & 4 deletions nzgmdb/mseed_management/reading.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def create_waveform_from_mseed(
pre_process: bool = False,
apply_taper: bool = False,
apply_zero_padding: bool = False,
inventory: Inventory = None,
inventory: Inventory | None = None,
):
"""
Create a waveform object from a mseed file
Expand All @@ -91,9 +91,8 @@ def create_waveform_from_mseed(
Whether to apply a taper to the data, by default False (Only used when pre_process is True)
apply_zero_padding : bool (optional)
Whether to apply zero padding to the data, by default False (Only used when pre_process is True)
inventory : Inventory (optional)
The inventory information for the mseed file, by default None
(Only used to improve performance when pre_process is True)
inventory : Inventory, optional
The inventory object to use for sensitivity removal, by default None (Will try extract from FDSN if not provided)

Returns
-------
Expand Down
Loading
Loading