diff --git a/changelog.md b/changelog.md index c8ddb58..8beb442 100644 --- a/changelog.md +++ b/changelog.md @@ -1,7 +1,7 @@ # NZGMDB Changelog -## Version 4.4 - September 25 **2000-01-01 to 2024-12-31** -* Adding Broadband data (HH) to the database +## Version 4.4 - Feb 26 **2000-01-01 to 2025-12-31** +* Adding Broadband data (HH, BH) to the database * New Quality Filter to remove Broadband data during certain time periods due to sensitivity issues * New Quality Filter to compare against an empirical GMPE (Atkinson 2022) to remove significant outliers * Add ability to generate a report to compare NZGMDB versions @@ -18,6 +18,10 @@ * Adjust magnitude filters: full ≄ 2.5; quality-filtered ≄ 3.5. * Added Hyp_depth to geometry table * Adjusted is_ground_level logic by adding in FDSN Inventory data +* Add XML inventory information per station +* Change processing to use remove response instead of remove sensitivity +* Updated CMT solutions for domain regions +* Increased date range to end of 2025 ## Version 4.3 - July 25 **2000-01-01 to 2024-12-31** * Sensitivity Fix (previously always taking first value not for actual datetime expected) diff --git a/nzgmdb/calculation/snr.py b/nzgmdb/calculation/snr.py index 81c5cc0..faea1ee 100644 --- a/nzgmdb/calculation/snr.py +++ b/nzgmdb/calculation/snr.py @@ -9,8 +9,7 @@ import numpy as np import pandas as pd -from obspy.clients.fdsn import Client as FDSN_Client -from obspy.core.inventory import Inventory +from obspy import read_inventory from pandas.errors import EmptyDataError from IM import im_calculation, snr_calculation @@ -25,7 +24,7 @@ def compute_snr_for_single_mseed( output_dir: Path, ko_directory: Path, common_frequency_vector: np.ndarray = im_calculation.DEFAULT_FREQUENCIES, - inventory: Inventory | None = None, + xml_dir: Path | None = None, ): """ Compute the SNR for a single mseed file @@ -42,9 +41,9 @@ def compute_snr_for_single_mseed( Path to the directory containing the Ko matrices common_frequency_vector : np.ndarray, optional Common frequency vector to extract for SNR and FAS, by default None - inventory : Inventory, optional - The inventory information for the mseed file, by default None - (Only used to improve performance when reading the mseed file) + xml_dir : Path, optional + Path to the directory containing the StationXML files, by default None + If None, will try to extract inventory from FDSN Returns ------- @@ -64,6 +63,13 @@ def compute_snr_for_single_mseed( # Get the event_id event_id = file_structure.get_event_id_from_mseed(mseed_file) + inventory = None + if xml_dir: + # Load the inventory information + inventory_file = xml_dir / f"NZ.{station}.xml" + if inventory_file.is_file(): + inventory = read_inventory(inventory_file) + # Read mseed information try: waveform = reading.create_waveform_from_mseed( @@ -250,6 +256,7 @@ def compute_snr_for_mseed_data( snr_fas_output_dir.mkdir(parents=True, exist_ok=True) batch_dir = meta_output_dir / "snr_batch_files" batch_dir.mkdir(parents=True, exist_ok=True) + xml_dir = file_structure.get_stationxml_dir(data_dir) config = cfg.Config() # Creating the common frequency vector if not provided @@ -279,11 +286,6 @@ def compute_snr_for_mseed_data( # Load the phase arrival table phase_table = pd.read_csv(phase_table_path) - # Load the inventory - client = FDSN_Client("GEONET") - channel_codes = config.get_value("channel_codes") - inventory = client.get_stations(channel=channel_codes, level="response") - # Load the bypass records if provided if bypass_records_ffp is not None: bypass_records = pd.read_csv(bypass_records_ffp) @@ -312,7 +314,7 @@ def compute_snr_for_mseed_data( output_dir=snr_fas_output_dir, ko_directory=ko_directory, common_frequency_vector=common_frequency_vector, - inventory=inventory, + xml_dir=xml_dir, ), batch, ) diff --git a/nzgmdb/data_processing/process_observed.py b/nzgmdb/data_processing/process_observed.py index d99a1c3..f1bdfa8 100644 --- a/nzgmdb/data_processing/process_observed.py +++ b/nzgmdb/data_processing/process_observed.py @@ -8,6 +8,7 @@ import numpy as np import pandas as pd +from obspy import read_inventory from obspy.clients.fdsn import Client as FDSN_Client from obspy.core.inventory import Inventory @@ -23,6 +24,7 @@ def process_single_mseed( gmc_df: pd.DataFrame | None = None, fmax_df: pd.DataFrame | None = None, bypass_df: pd.DataFrame | None = None, + xml_dir: Path | None = None, inventory: Inventory | None = None, ): """ @@ -42,6 +44,8 @@ def process_single_mseed( The Fmax values bypass_df : pd.DataFrame, optional The bypass records containing custom fmin, fmax values + xml_dir : Path, optional + The directory containing the station xml files for inventory information inventory : Inventory, optional The inventory information for the mseed file @@ -71,6 +75,13 @@ def process_single_mseed( skipped_record = pd.DataFrame([skipped_record_dict]) return skipped_record + inventory = None + if xml_dir: + # Load the inventory information + inventory_file = xml_dir / f"NZ.{station}.xml" + if inventory_file.is_file(): + inventory = read_inventory(inventory_file) + # Perform initial pre-processing try: mseed = waveform_manipulation.initial_preprocessing(mseed, inventory=inventory) @@ -223,6 +234,7 @@ def process_mseeds_to_txt( """ # Get the raw waveform mseed files waveform_dir = file_structure.get_waveform_dir(main_dir) + xml_dir = file_structure.get_stationxml_dir(main_dir) mseed_files = waveform_dir.rglob("*.mseed") # Load the GMC, Fmax and bypass records @@ -248,6 +260,7 @@ def process_mseeds_to_txt( gmc_df=gmc_df, fmax_df=fmax_df, bypass_df=bypass_df, + xml_dir=xml_dir, inventory=inventory, ), mseed_files, diff --git a/nzgmdb/data_processing/quality_db.py b/nzgmdb/data_processing/quality_db.py index 4e61efd..fdfbc3b 100644 --- a/nzgmdb/data_processing/quality_db.py +++ b/nzgmdb/data_processing/quality_db.py @@ -552,7 +552,7 @@ def filter_troublesome_sensitivity( sensitivity_ignore = pd.read_csv(NZGMDB_DATA.fetch("sensitivity_ignore.csv")) # Ensure datetime columns are in datetime format - catalogue["datetime"] = pd.to_datetime(catalogue["datetime"]) + catalogue["datetime"] = pd.to_datetime(catalogue["datetime"], format="ISO8601") sensitivity_ignore["start_date"] = pd.to_datetime(sensitivity_ignore["start_date"]) sensitivity_ignore["end_date"] = pd.to_datetime(sensitivity_ignore["end_date"]) diff --git a/nzgmdb/data_processing/waveform_manipulation.py b/nzgmdb/data_processing/waveform_manipulation.py index 32c2e5d..9c06568 100644 --- a/nzgmdb/data_processing/waveform_manipulation.py +++ b/nzgmdb/data_processing/waveform_manipulation.py @@ -3,9 +3,9 @@ """ import numpy as np +from obspy import Inventory from obspy.clients.fdsn import Client as FDSN_Client from obspy.clients.fdsn.header import FDSNNoDataException -from obspy.core.inventory import Inventory from obspy.core.stream import Stream from scipy import integrate, signal @@ -38,7 +38,7 @@ def initial_preprocessing( apply_zero_padding : bool, optional Whether to apply zero padding, by default True inventory : Inventory, optional - The inventory object containing the response information, by default None + The inventory object to use for sensitivity removal, by default None (Will try to extract from FDSN if not provided) Returns ------- @@ -79,17 +79,11 @@ def initial_preprocessing( location = mseed[0].stats.location channel = mseed[0].stats.channel - if inventory is not None: - # Select only the required station and location from the inventory - inv_selected = inventory.select(station=station, location=location) - if len(inv_selected) == 0: - raise custom_errors.InventoryNotFoundError( - f"No inventory information found for station {station} with location {location}" - ) - else: + inv = inventory + if inv is None: try: client_NZ = FDSN_Client("GEONET") - inv_selected = client_NZ.get_stations( + inv = client_NZ.get_stations( level="response", network="NZ", station=station, location=location ) except FDSNNoDataException: @@ -98,7 +92,9 @@ def initial_preprocessing( ) try: - mseed = mseed.remove_sensitivity(inventory=inv_selected) + # Ensure we get the correct output type for strong motion vs broadband + output_type = "ACC" if channel[:2] in ["HN", "BN"] else "VEL" + mseed = mseed.remove_response(inventory=inv, output=output_type) except ValueError: raise custom_errors.SensitivityRemovalError( f"Failed to remove sensitivity for station {station} with location {location}" @@ -106,7 +102,7 @@ def initial_preprocessing( # Rotate try: - mseed.rotate("->ZNE", inventory=inv_selected) + mseed.rotate("->ZNE", inventory=inv) except ( Exception # noqa: BLE001 ): # Due to obspy raising an Exception instead of a specific error diff --git a/nzgmdb/data_retrieval/inventory_xml.py b/nzgmdb/data_retrieval/inventory_xml.py new file mode 100644 index 0000000..824786c --- /dev/null +++ b/nzgmdb/data_retrieval/inventory_xml.py @@ -0,0 +1,57 @@ +""" +Fetches inventory data from the obspy FDSN client and saves it as StationXML files. +""" + +import datetime +from pathlib import Path + +from obspy.clients.fdsn import Client as FDSN_Client +from obspy.clients.fdsn.header import FDSNNoDataException + +from nzgmdb.management import file_structure + + +def fetch_and_save_inventory( + main_dir: Path, + stations: list[str], + starttime: str = "2000-01-01", + endtime: str = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d"), +): + """ + Fetches inventory data from the obspy FDSN client and saves it as StationXML files. + + Parameters + ---------- + main_dir : Path + The main directory where the StationXML files will be saved. + stations : list[str] + A list of station codes to fetch the inventory data for. + starttime : str, optional + The start time for the inventory data, by default "2000-01-01". + endtime : str, optional + The end time for the inventory data, by default the current date. + """ + client = FDSN_Client("GEONET") + + xml_dir = file_structure.get_stationxml_dir(main_dir) + xml_dir.mkdir(parents=True, exist_ok=True) + + all_stations = ",".join(stations) + + try: + inv = client.get_stations( + network="NZ", + station=all_stations, + starttime=starttime, + endtime=endtime, + level="response", + ) + for sta in stations: + sel = inv.select(station=sta) + if not sel.networks: + print(f"Warning: No inventory data found for station {sta}. Skipping.") + continue + fname = xml_dir / f"NZ.{sta}.xml" + sel.write(fname, format="STATIONXML") + except FDSNNoDataException: + print("No inventory data found for the specified stations and time range.") diff --git a/nzgmdb/data_retrieval/sites.py b/nzgmdb/data_retrieval/sites.py index 4c68d96..dc03963 100644 --- a/nzgmdb/data_retrieval/sites.py +++ b/nzgmdb/data_retrieval/sites.py @@ -51,7 +51,7 @@ def create_site_table_response() -> pd.DataFrame: station_info, columns=["net", "sta", "lat", "lon", "elev", "creation_date", "end_date"], ) - sta_df = sta_df.drop_duplicates().reset_index(drop=True) + sta_df = sta_df.drop_duplicates(["net", "sta"]).reset_index(drop=True) # Get the Geonet metadata summary information geo_meta_summary_df = pd.read_csv( @@ -82,7 +82,9 @@ def create_site_table_response() -> pd.DataFrame: ) merged_df = geo_meta_summary_df.merge( - sta_df[["net", "elev", "sta"]], on="sta", how="left" + sta_df[["net", "elev", "sta", "creation_date", "end_date"]], + on="sta", + how="left", ) # Specify the required files for fiona NZGMDB_DATA.fetch("TectonicDomains_Feb2021_8_NZTM.shp") diff --git a/nzgmdb/data_retrieval/waveform_extraction.py b/nzgmdb/data_retrieval/waveform_extraction.py index 5c31507..ced5892 100644 --- a/nzgmdb/data_retrieval/waveform_extraction.py +++ b/nzgmdb/data_retrieval/waveform_extraction.py @@ -25,6 +25,7 @@ from pandas.errors import EmptyDataError from nzgmdb.data_processing import filtering +from nzgmdb.data_retrieval import inventory_xml from nzgmdb.management import config as cfg from nzgmdb.management import custom_errors, file_structure from nzgmdb.mseed_management import creation @@ -78,7 +79,6 @@ def get_inital_stream( channel_codes, start_time, end_time, - attach_response=True, ) break except FDSNTooManyRequestsException: @@ -1106,6 +1106,12 @@ def extract_waveforms( batch_dir / f"multi_trace_issues_{batch_index}.csv", index=False ) + # Grab all the station xmls and write them as outputs + unique_sites = station_extraction_table["sta"].unique() + print(f"Fetching station XML metadata for {len(unique_sites)} unique sites") + inventory_xml.fetch_and_save_inventory(main_dir, unique_sites) + print("Station XML metadata fetching complete.") + # Combine all the event and sta_mag dataframes sta_mag_dfs = [] skipped_records_dfs = [] diff --git a/nzgmdb/management/file_structure.py b/nzgmdb/management/file_structure.py index ea40b78..d5a85df 100644 --- a/nzgmdb/management/file_structure.py +++ b/nzgmdb/management/file_structure.py @@ -281,3 +281,20 @@ def get_gmc_dir(main_dir: Path) -> Path: The directory where GMC results are stored. """ return main_dir / "gmc" + + +def get_stationxml_dir(main_dir: Path) -> Path: + """ + Get the directory for storing StationXML files. + + Parameters + ---------- + main_dir : Path + The main directory of the NZGMDB results. + + Returns + ------- + Path + The directory where StationXML files are stored. + """ + return main_dir / "stationxml" diff --git a/nzgmdb/mseed_management/reading.py b/nzgmdb/mseed_management/reading.py index 2ecaae9..91c1699 100644 --- a/nzgmdb/mseed_management/reading.py +++ b/nzgmdb/mseed_management/reading.py @@ -74,7 +74,7 @@ def create_waveform_from_mseed( pre_process: bool = False, apply_taper: bool = False, apply_zero_padding: bool = False, - inventory: Inventory = None, + inventory: Inventory | None = None, ): """ Create a waveform object from a mseed file @@ -91,9 +91,8 @@ def create_waveform_from_mseed( Whether to apply a taper to the data, by default False (Only used when pre_process is True) apply_zero_padding : bool (optional) Whether to apply zero padding to the data, by default False (Only used when pre_process is True) - inventory : Inventory (optional) - The inventory information for the mseed file, by default None - (Only used to improve performance when pre_process is True) + inventory : Inventory, optional + The inventory object to use for sensitivity removal, by default None (Will try to extract from FDSN if not provided) Returns ------- diff --git a/nzgmdb/phase_arrival/run_phasenet.py b/nzgmdb/phase_arrival/run_phasenet.py index 5f48eef..a41d9d6 100644 --- a/nzgmdb/phase_arrival/run_phasenet.py +++ b/nzgmdb/phase_arrival/run_phasenet.py @@ -9,7 +9,7 @@ import mseedlib import numpy as np import pandas as pd -from obspy import Stream, Trace, UTCDateTime +from obspy import Inventory, Stream, Trace, UTCDateTime from obspy.clients.fdsn import Client as FDSN_Client from obspy.clients.fdsn.header import FDSNNoDataException @@ -110,7 +110,12 @@ def run_phase_net( return p_wave_ix, s_wave_ix -def process_mseed(mseed_file: Path, h5_ffp: Path, bypass_row: pd.Series = None): +def process_mseed( + mseed_file: Path, + h5_ffp: Path, + bypass_row: pd.Series = None, + inventory: Inventory = None, +): """ Process an mseed file and return the phase arrival data. @@ -122,6 +127,8 @@ def process_mseed(mseed_file: Path, h5_ffp: Path, bypass_row: pd.Series = None): Path to the HDF5 file to save the probability series. bypass_row : pd.Series, optional A row from the bypass file with known p and s wave datetimes, by default None + inventory : Inventory, optional + The inventory object to use for sensitivity removal, by default None (Will try extract from FDSN if not provided) Returns ------- @@ -187,33 +194,26 @@ def process_mseed(mseed_file: Path, h5_ffp: Path, bypass_row: pd.Series = None): station = mseed[0].stats.station location = mseed[0].stats.location - # Get Station Information from geonet clients - # Fetching here instead of passing the inventory object as searching for the station, network, and channel - # information takes a long time as it's implemented in a for loop - try: - client_NZ = FDSN_Client("GEONET") - inv = client_NZ.get_stations( - level="response", network="NZ", station=station, location=location - ) - except FDSNNoDataException: - skipped_record = pd.DataFrame( - { - "record_id": [mseed_file.stem], - "reason": ["Failed to find Inventory information"], - } - ) - create_empty_h5_file(h5_ffp, mseed_file.stem) - return None, skipped_record - - # Add the response (Same for all channels) - # this is done so that the sensitivity can be removed otherwise it tries to find the exact same channel - # which can fail when including the inventory information - response = next(cha.response for sta in inv.networks[0] for cha in sta.channels) - for tr in mseed: - tr.stats.response = response + if inventory is None: + try: + client_NZ = FDSN_Client("GEONET") + inv = client_NZ.get_stations( + level="response", network="NZ", station=station, location=location + ) + except FDSNNoDataException: + skipped_record = pd.DataFrame( + { + "record_id": [mseed_file.stem], + "reason": ["Failed to find Inventory information"], + } + ) + create_empty_h5_file(h5_ffp, mseed_file.stem) + return None, skipped_record + else: + inv = inventory try: - mseed = mseed.remove_sensitivity() + mseed = mseed.remove_response(inventory=inv, output="ACC") except ValueError: skipped_record = pd.DataFrame( { @@ -301,9 +301,7 @@ def process_mseed(mseed_file: Path, h5_ffp: Path, bypass_row: pd.Series = None): ) -def run_phasenet( - mseed_files_ffp: Path, output_dir: Path, bypass_ffp: Path | None = None -): +def run_phasenet(mseed_files_ffp: Path, output_dir: Path, bypass_ffp: Path = None): """ Run PhaseNet on the mseed files. diff --git a/nzgmdb/scripts/upload_to_dropbox.py b/nzgmdb/scripts/upload_to_dropbox.py index af1865b..c1aac9a 100644 --- a/nzgmdb/scripts/upload_to_dropbox.py +++ b/nzgmdb/scripts/upload_to_dropbox.py @@ -177,6 +177,14 @@ def main( snr_files = list(snr_fas_dir.rglob("*.csv")) snr_fas_zip = zip_files(snr_files, output_dir, f"snr_fas_{version}") + # 6) Zip XML inventory files if they exist + stationxml_dir = file_structure.get_stationxml_dir(input_dir) + if stationxml_dir.exists(): + xml_files = list(stationxml_dir.rglob("*.xml")) + xml_zip = zip_files(xml_files, output_dir, f"stationxml_{version}") + # Upload XML zip to Dropbox + upload_zip_to_dropbox(xml_zip, dropbox_version_dir) + # Upload everything to Dropbox failed_files = [] failed_files.append(upload_zip_to_dropbox(flatfiles_zip, dropbox_version_dir)) @@ -328,6 +336,7 @@ def download_dropbox_archive( ignore_waveforms: Annotated[bool, typer.Option()] = False, ignore_snrfas: Annotated[bool, typer.Option()] = False, ignore_quality: Annotated[bool, typer.Option()] = False, + ignore_xml: Annotated[bool, typer.Option()] = False, ) -> None: """ Download the NZGMDB archive from Dropbox. @@ -348,6 +357,8 @@ def download_dropbox_archive( Whether to ignore downloading SNR FAS files. ignore_quality : bool Whether to ignore downloading quality flatfiles. + ignore_xml : bool + Whether to ignore downloading station XML files. """ dropbox_version_dir = f"{DROPBOX_PATH}/{version}" @@ -357,6 +368,7 @@ def download_dropbox_archive( snr_fas_zip = f"snr_fas_{version}.zip" quality_zip = f"quality_flatfiles_{version}.zip" skipped_zip = f"skipped_{version}.zip" + xml_zip = f"stationxml_{version}.zip" # Local paths for the zip files outputs @@ -364,6 +376,7 @@ def download_dropbox_archive( snr_fas_dir = file_structure.get_snr_fas_dir(output_dir) waveform_dir = file_structure.get_waveform_dir(output_dir) quality_dir = file_structure.get_quality_db_dir(output_dir) + xml_dir = file_structure.get_stationxml_dir(output_dir) zip_dir = output_dir / "zips" zip_dir.mkdir(exist_ok=True) @@ -426,6 +439,13 @@ def download_dropbox_archive( (dropbox_zip_path, zip_dir / wf_zip, local_extract_path) ) + # Gather stationxml zip if exists + if not ignore_xml: + xml_dir.mkdir(exist_ok=True) + zips_to_download.append( + f"{dropbox_version_dir}/{xml_zip}", zip_dir / xml_zip, xml_dir + ) + # Ensure there is something to download assert len(zips_to_download) > 0, "No zips to download." diff --git a/wiki/Calculate-SNR.md b/wiki/Calculate-SNR.md index 2afc1ed..5b6290d 100644 --- a/wiki/Calculate-SNR.md +++ b/wiki/Calculate-SNR.md @@ -50,7 +50,7 @@ Raw MSEED files undergo the following preprocessing steps: 1. **Demean and detrend** - Remove offset and linear trends 2. **Taper** - Apply 5% cosine taper to both ends 3. **Zero padding** - Add 5 seconds of zeros at start and end -4. **Remove instrument response** - Apply sensitivity correction using station inventory +4. **Remove instrument response** - Remove instrument response using station metadata 5. **Rotate components** - Rotate horizontal components to North-East-Vertical (NEZ) 6. **Gravity normalisation** - Divide acceleration data by the acceleration due to gravity (9.81 m/s²) diff --git a/wiki/Process-Records.md b/wiki/Process-Records.md index a613db8..7796e51 100644 --- a/wiki/Process-Records.md +++ b/wiki/Process-Records.md @@ -75,7 +75,7 @@ Each MSEED file undergoes a comprehensive processing workflow: 2. **Demean and Detrend** - Remove offset and linear trends 3. **Taper Application** - Apply 5% Tukey taper to both ends 4. **Zero Padding** - Add 5 seconds of zeros at start and end -5. **Inventory Response Removal** - Remove instrument sensitivity using station metadata +5. **Inventory Response Removal** - Remove instrument response using station metadata 6. **Component Rotation** - Rotate horizontal components to North-East-Vertical (NEZ) 7. **Gravity normalisation** - Divide acceleration data by the acceleration due to gravity (9.81 m/s²) diff --git a/wiki/Upload-Dropbox.md b/wiki/Upload-Dropbox.md index 4613084..5b9adcc 100644 --- a/wiki/Upload-Dropbox.md +++ b/wiki/Upload-Dropbox.md @@ -100,6 +100,11 @@ The process creates several thematic ZIP archives: - Contains quality-filtered flatfiles from the quality_db directory - Only created if quality database processing was enabled +**6. Station XML Inventory Archive (`stationxml_{version}.zip`)** *(if present)* +- Contains station XML inventory files +- Only created if station XML inventories were generated +- Recursively includes all `*.xml` files from the stationxml directory + ### šŸ”¹ Waveform Packaging **Hierarchical Waveform Archives:** @@ -147,6 +152,7 @@ dropbox:/QuakeCoRE/Public/NZGMDB/{version}/ ā”œā”€ā”€ skipped_{version}.zip # Failed records documentation ā”œā”€ā”€ snr_fas_{version}.zip # SNR and FAS data ā”œā”€ā”€ quality_flatfiles_{version}.zip # Quality-filtered database (optional) +ā”œā”€ā”€ stationxml_{version}.zip # Station XML inventories (optional) └── waveforms/ ā”œā”€ā”€ {year1}.zip # Year-level waveform archive ā”œā”€ā”€ {year2}.zip # Year-level waveform archive @@ -169,6 +175,7 @@ All created ZIP files are stored locally in `{input_directory}/zips/` before upl ā”œā”€ā”€ skipped_{version}.zip ā”œā”€ā”€ snr_fas_{version}.zip ā”œā”€ā”€ quality_flatfiles_{version}.zip +ā”œā”€ā”€ stationxml_{version}.zip ā”œā”€ā”€ failed_files.txt # (if any uploads failed) └── waveforms/ ā”œā”€ā”€ {year}.zip diff --git a/wiki/Waveform-Extraction.md b/wiki/Waveform-Extraction.md index c9ca00c..8564bd1 100644 --- a/wiki/Waveform-Extraction.md +++ b/wiki/Waveform-Extraction.md @@ -1,6 +1,7 @@ # ∿ Waveform Extraction This step in the NZGMDB pipeline downloads waveform data for each station and event listed in the station extraction table. It manages missing data issues, and outputs the results in a structured format for further analysis in MSEED files. +This step also extracts the Station XML inventory information for each station. --- @@ -64,7 +65,7 @@ Below is an example with a waveform to illustrate the window: Waveforms are downloaded using the FDSN Client with specific constraints: #### **Channel Selection** -- **Channel Selection:** `channel_codes: [HN?, BN?, HH?]` from configuration, where HN and BN are Strong Motion channels and HH is Broadband +- **Channel Selection:** `channel_codes: [HN?, BN?, HH?, BH?]` from configuration, where HN and BN are Strong Motion channels and HH and BH are Broadband - **Three-component data:** Horizontal (N-S, E-W) and vertical components #### **Error Handling** @@ -210,7 +211,13 @@ For each station-event pair, magnitude information is extracted: ## šŸ“¦ Outputs -As part of the waveform extraction process, several output files are generated in the `flatfiles/` directory as well as the mseed files in the `waveforms/` directory: +As part of the waveform extraction process, several output files are generated in the `flatfiles/` directory as well as the mseed files in the `waveforms/` directory and the station xml inventories in the `stationxml/` directory. + +### šŸ”¹ Station XML Inventory Files +- **Location:** `stationxml/` subdirectory +- **Format:** StationXML (`.xml`) +- **Naming convention:** `{network}_{station}.xml` +- **Content:** Station metadata including sensor response information ### šŸ”¹ Station Magnitude Table (`station_magnitude_table.csv`)