Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# NZGMDB Changelog

## Version 4.4 - September 25 **2000-01-01 to 2024-12-31**
* Adding Broadband data (HH) to the database
## Version 4.4 - Feb 26 **2000-01-01 to 2025-12-31**
* Adding Broadband data (HH, BH) to the database
* New Quality Filter to remove Broadband data during certain time periods due to sensitivity issues
* New Quality Filter to compare against an empirical GMPE (Atkinson 2022) to remove significant outliers
* Add ability to generate a report to compare NZGMDB versions
Expand All @@ -18,6 +18,10 @@
* Adjust magnitude filters: full ≥ 2.5; quality-filtered ≥ 3.5.
* Added Hyp_depth to geometry table
* Adjusted is_ground_level logic by adding in FDSN Inventory data
* Add XML inventory information per station
* Change processing to use remove response instead of remove sensitivity
* Updated CMT solutions for domain regions
* Increased date range to end of 2025

## Version 4.3 - July 25 **2000-01-01 to 2024-12-31**
* Sensitivity Fix (previously always taking first value not for actual datetime expected)
Expand Down
25 changes: 24 additions & 1 deletion nzgmdb/calculation/snr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import numpy as np
import pandas as pd
from obspy import read_inventory
from pandas.errors import EmptyDataError

from IM import im_calculation, snr_calculation
Expand All @@ -23,6 +24,7 @@ def compute_snr_for_single_mseed(
output_dir: Path,
ko_directory: Path,
common_frequency_vector: np.ndarray = im_calculation.DEFAULT_FREQUENCIES,
xml_dir: Path = None,
):
"""
Compute the SNR for a single mseed file
Expand All @@ -39,6 +41,9 @@ def compute_snr_for_single_mseed(
Path to the directory containing the Ko matrices
common_frequency_vector : np.ndarray, optional
Common frequency vector to extract for SNR and FAS, by default None
xml_dir : Path, optional
Path to the directory containing the StationXML files, by default None
If None, will try to extract inventory from FDSN

Returns
-------
Expand All @@ -58,9 +63,25 @@ def compute_snr_for_single_mseed(
# Get the event_id
event_id = file_structure.get_event_id_from_mseed(mseed_file)

if xml_dir is None:
inventory = None
else:
# Load the inventory information
inventory_file = xml_dir / f"NZ.{station}.xml"
if not inventory_file.is_file():
inventory = None
else:
inventory = read_inventory(inventory_file)

# Read mseed information
try:
waveform = reading.create_waveform_from_mseed(mseed_file, pre_process=True, apply_taper=False, apply_zero_padding=False)
waveform = reading.create_waveform_from_mseed(
mseed_file,
pre_process=True,
apply_taper=False,
apply_zero_padding=False,
inventory=inventory,
)
except custom_errors.InventoryNotFoundError:
skipped_record_dict = {
"record_id": mseed_file.stem,
Expand Down Expand Up @@ -238,6 +259,7 @@ def compute_snr_for_mseed_data(
snr_fas_output_dir.mkdir(parents=True, exist_ok=True)
batch_dir = meta_output_dir / "snr_batch_files"
batch_dir.mkdir(parents=True, exist_ok=True)
xml_dir = file_structure.get_stationxml_dir(data_dir)

config = cfg.Config()
# Creating the common frequency vector if not provided
Expand Down Expand Up @@ -295,6 +317,7 @@ def compute_snr_for_mseed_data(
output_dir=snr_fas_output_dir,
ko_directory=ko_directory,
common_frequency_vector=common_frequency_vector,
xml_dir=xml_dir,
),
batch,
)
Expand Down
18 changes: 17 additions & 1 deletion nzgmdb/data_processing/process_observed.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import numpy as np
import pandas as pd
from obspy import read_inventory

import qcore.timeseries as ts
from nzgmdb.data_processing import waveform_manipulation
Expand All @@ -20,6 +21,7 @@ def process_single_mseed(
gmc_df: pd.DataFrame | None = None,
fmax_df: pd.DataFrame | None = None,
bypass_df: pd.DataFrame | None = None,
xml_dir: Path | None = None,
):
"""
Process a single mseed file and save the processed data to a txt file
Expand All @@ -38,6 +40,8 @@ def process_single_mseed(
The Fmax values
bypass_df : pd.DataFrame, optional
The bypass records containing custom fmin, fmax values
xml_dir : Path, optional
The directory containing the station xml files for inventory information

Returns
-------
Expand Down Expand Up @@ -65,9 +69,19 @@ def process_single_mseed(
skipped_record = pd.DataFrame([skipped_record_dict])
return skipped_record

if xml_dir is None:
inventory = None
else:
# Load the inventory information
inventory_file = xml_dir / f"NZ.{station}.xml"
if not inventory_file.is_file():
inventory = None
else:
inventory = read_inventory(inventory_file)

# Perform initial pre-processing
try:
mseed = waveform_manipulation.initial_preprocessing(mseed)
mseed = waveform_manipulation.initial_preprocessing(mseed, inventory=inventory)
except custom_errors.InventoryNotFoundError:
skipped_record_dict = {
"record_id": mseed_stem,
Expand Down Expand Up @@ -217,6 +231,7 @@ def process_mseeds_to_txt(
"""
# Get the raw waveform mseed files
waveform_dir = file_structure.get_waveform_dir(main_dir)
xml_dir = file_structure.get_stationxml_dir(main_dir)
mseed_files = waveform_dir.rglob("*.mseed")

# Load the GMC, Fmax and bypass records
Expand All @@ -237,6 +252,7 @@ def process_mseeds_to_txt(
gmc_df=gmc_df,
fmax_df=fmax_df,
bypass_df=bypass_df,
xml_dir=xml_dir,
),
mseed_files,
)
Expand Down
2 changes: 1 addition & 1 deletion nzgmdb/data_processing/quality_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ def filter_troublesome_sensitivity(
sensitivity_ignore = pd.read_csv(NZGMDB_DATA.fetch("sensitivity_ignore.csv"))

# Ensure datetime columns are in datetime format
catalogue["datetime"] = pd.to_datetime(catalogue["datetime"])
catalogue["datetime"] = pd.to_datetime(catalogue["datetime"], format="ISO8601")
sensitivity_ignore["start_date"] = pd.to_datetime(sensitivity_ignore["start_date"])
sensitivity_ignore["end_date"] = pd.to_datetime(sensitivity_ignore["end_date"])

Expand Down
36 changes: 22 additions & 14 deletions nzgmdb/data_processing/waveform_manipulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import numpy as np
from obspy import Inventory
from obspy.clients.fdsn import Client as FDSN_Client
from obspy.clients.fdsn.header import FDSNNoDataException
from obspy.core.stream import Stream
Expand All @@ -13,7 +14,10 @@


def initial_preprocessing(
mseed: Stream, apply_taper: bool = True, apply_zero_padding: bool = True
mseed: Stream,
apply_taper: bool = True,
apply_zero_padding: bool = True,
inventory: Inventory = None,
):
"""
Basic pre-processing of the waveform data
Expand All @@ -33,6 +37,8 @@ def initial_preprocessing(
Whether to apply the tapering, by default True
apply_zero_padding : bool, optional
Whether to apply zero padding, by default True
inventory : Inventory, optional
The inventory object to use for sensitivity removal, by default None (Will try extract from FDSN if not provided)

Returns
-------
Expand Down Expand Up @@ -73,21 +79,23 @@ def initial_preprocessing(
location = mseed[0].stats.location
channel = mseed[0].stats.channel

# Get Station Information from geonet clients
# Fetching here instead of passing the inventory object as searching for the station, network, and channel
# information takes a long time as it's implemented in a for loop
try:
client_NZ = FDSN_Client("GEONET")
inv = client_NZ.get_stations(
level="response", network="NZ", station=station, location=location
)
except FDSNNoDataException:
raise custom_errors.InventoryNotFoundError(
f"No inventory information found for station {station} with location {location}"
)
if inventory is not None:
try:
client_NZ = FDSN_Client("GEONET")
inv = client_NZ.get_stations(
level="response", network="NZ", station=station, location=location
)
except FDSNNoDataException:
raise custom_errors.InventoryNotFoundError(
f"No inventory information found for station {station} with location {location}"
)
else:
inv = inventory

try:
mseed = mseed.remove_sensitivity(inventory=inv)
# Ensure we get the correct output type for strong motion vs broadband
output_type = "ACC" if channel[:2] in ["HN", "BN"] else "VEL"
mseed = mseed.remove_response(inventory=inv, output=output_type)
except ValueError:
raise custom_errors.SensitivityRemovalError(
f"Failed to remove sensitivity for station {station} with location {location}"
Expand Down
47 changes: 47 additions & 0 deletions nzgmdb/data_retrieval/inventory_xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
Fetches inventory data from the obspy FDSN client and saves it as StationXML files.
"""

import datetime
from pathlib import Path

from obspy.clients.fdsn import Client as FDSN_Client

from nzgmdb.management import file_structure


def fetch_and_save_inventory(
main_dir: Path,
stations: list[str],
starttime: str = "2000-01-01",
endtime: str = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d"),
):
"""
Fetches inventory data from the obspy FDSN client and saves it as StationXML files.

Parameters
----------
main_dir : Path
The main directory where the StationXML files will be saved.
stations : list[str]
A list of station codes to fetch the inventory data for.
starttime : str, optional
The start time for the inventory data, by default "2000-01-01".
endtime : str, optional
The end time for the inventory data, by default the current date.
"""
client = FDSN_Client("GEONET")

xml_dir = file_structure.get_stationxml_dir(main_dir)
xml_dir.mkdir(parents=True, exist_ok=True)

for sta in stations:
inv = client.get_stations(
network="NZ",
station=sta,
starttime=starttime,
endtime=endtime,
level="response",
)
fname = xml_dir / f"NZ.{sta}.xml"
inv.write(fname, format="STATIONXML")
11 changes: 8 additions & 3 deletions nzgmdb/data_retrieval/sites.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,11 @@ def create_site_table_response() -> pd.DataFrame:
station.end_date,
]
)
sta_df = pd.DataFrame(station_info, columns=["net", "sta", "lat", "lon", "elev", "creation_date", "end_date"])
sta_df = sta_df.drop_duplicates().reset_index(drop=True)
sta_df = pd.DataFrame(
station_info,
columns=["net", "sta", "lat", "lon", "elev", "creation_date", "end_date"],
)
sta_df = sta_df.drop_duplicates(["net", "sta"]).reset_index(drop=True)

# Get the Geonet metadata summary information
geo_meta_summary_df = pd.read_csv(
Expand Down Expand Up @@ -77,7 +80,9 @@ def create_site_table_response() -> pd.DataFrame:
)

merged_df = geo_meta_summary_df.merge(
sta_df[["net", "elev", "sta"]], on="sta", how="left"
sta_df[["net", "elev", "sta", "creation_date", "end_date"]],
on="sta",
how="left",
)
# Specify the required files for fiona
NZGMDB_DATA.fetch("TectonicDomains_Feb2021_8_NZTM.shp")
Expand Down
10 changes: 8 additions & 2 deletions nzgmdb/data_retrieval/waveform_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from pandas.errors import EmptyDataError

from nzgmdb.data_processing import filtering
from nzgmdb.data_retrieval import inventory_xml
from nzgmdb.management import config as cfg
from nzgmdb.management import custom_errors, file_structure
from nzgmdb.mseed_management import creation
Expand Down Expand Up @@ -77,7 +78,6 @@ def get_inital_stream(
channel_codes,
start_time,
end_time,
attach_response=True,
)
break
except FDSNNoDataException:
Expand Down Expand Up @@ -976,7 +976,7 @@ def extract_waveforms(
station_extraction_table["evid_sta"] = (
station_extraction_table["evid"] + "_" + station_extraction_table["sta"]
)
# Mkae a evid_sta column in the only_record_ids
# Make a evid_sta column in the only_record_ids
only_record_ids["evid_sta"] = (
only_record_ids["record_id"].str.split("_").str[0]
+ "_"
Expand Down Expand Up @@ -1087,6 +1087,12 @@ def extract_waveforms(
batch_dir / f"multi_trace_issues_{batch_index}.csv", index=False
)

# Grab all the station xmls and write them as outputs
unique_sites = station_extraction_table["sta"].unique()
print(f"Fetching station XML metadata for {len(unique_sites)} unique sites")
inventory_xml.fetch_and_save_inventory(main_dir, unique_sites)
print("Station XML metadata fetching complete.")

# Combine all the event and sta_mag dataframes
sta_mag_dfs = []
skipped_records_dfs = []
Expand Down
17 changes: 17 additions & 0 deletions nzgmdb/management/file_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,20 @@ def get_gmc_dir(main_dir: Path) -> Path:
The directory where GMC results are stored.
"""
return main_dir / "gmc"


def get_stationxml_dir(main_dir: Path) -> Path:
"""
Get the directory for storing StationXML files.

Parameters
----------
main_dir : Path
The main directory of the NZGMDB results.

Returns
-------
Path
The directory where StationXML files are stored.
"""
return main_dir / "stationxml"
9 changes: 8 additions & 1 deletion nzgmdb/mseed_management/reading.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import mseedlib
import numpy as np
from obspy import Inventory
from obspy.core import Stream, Trace, UTCDateTime

from nzgmdb.data_processing import waveform_manipulation
Expand Down Expand Up @@ -73,6 +74,7 @@ def create_waveform_from_mseed(
pre_process: bool = False,
apply_taper: bool = False,
apply_zero_padding: bool = False,
inventory: Inventory | None = None,
):
"""
Create a waveform object from a mseed file
Expand All @@ -89,6 +91,8 @@ def create_waveform_from_mseed(
Whether to apply a taper to the data, by default False (Only used when pre_process is True)
apply_zero_padding : bool (optional)
Whether to apply zero padding to the data, by default False (Only used when pre_process is True)
inventory : Inventory, optional
The inventory object to use for sensitivity removal, by default None (Will try extract from FDSN if not provided)

Returns
-------
Expand All @@ -115,7 +119,10 @@ def create_waveform_from_mseed(
# Process the data if needed
if pre_process:
mseed = waveform_manipulation.initial_preprocessing(
mseed, apply_taper=apply_taper, apply_zero_padding=apply_zero_padding
mseed,
apply_taper=apply_taper,
apply_zero_padding=apply_zero_padding,
inventory=inventory,
)

# Stack the data
Expand Down
Loading
Loading