Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 44 additions & 30 deletions seismic/ASDFdatabase/FederatedASDFDataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@
import click

class FederatedASDFDataSet():
def __init__(self, asdf_source, force_reindex=False, logger=None,
def __init__(self, asdf_source, fast=True, force_reindex=False, logger=None,
single_item_read_limit_in_mb=1024,
single_threaded_access=True):
"""
Initializer for FederatedASDFDataSet.

:param asdf_source: Path to a text file containing a list of ASDF files. \
Entries can be commented out with '#'
:param fast: enables in-memory optimizations for faster queries
:param force_reindex: Force reindex even if a preexisting db file is found
:param logger: logger instance
:param single_item_read_limit_in_mb: buffer size for Obspy reads
Expand All @@ -41,25 +42,23 @@ def __init__(self, asdf_source, force_reindex=False, logger=None,
"""
self.logger = logger
self.asdf_source = asdf_source
self._unique_coordinates = None
self._earth_radius = 6371 # km

# Instantiate implementation class
self.fds = _FederatedASDFDataSetImpl(asdf_source, force_reindex=force_reindex, logger=logger,
self.fds = _FederatedASDFDataSetImpl(asdf_source, fast=fast,
force_reindex=force_reindex, logger=logger,
single_item_read_limit_in_mb=single_item_read_limit_in_mb,
single_threaded_access=single_threaded_access)

# Populate coordinates
self._unique_coordinates = defaultdict(list)

rtps_dict = defaultdict()
for ds_dict in self.fds.asdf_station_coordinates:
for key in list(ds_dict.keys()):
self._unique_coordinates[key] = [ds_dict[key][0], ds_dict[key][1]]

lon, lat, _ = ds_dict[key]
rtps_dict[key] = [self._earth_radius,
np.radians(90 - ds_dict[key][1]),
np.radians(ds_dict[key][0])]
np.radians(90 - lat),
np.radians(lon)]
# end for
# end for

Expand All @@ -80,8 +79,7 @@ def unique_coordinates(self):

:return: dictionary containing [lon, lat] coordinates indexed by 'net.sta'
"""
return self._unique_coordinates

return self.fds._unique_coordinates
# end func

def corrections_enabled(self):
Expand Down Expand Up @@ -124,7 +122,7 @@ def get_closest_stations(self, lon, lat, nn=1):

# end func

def get_global_time_range(self, network, station=None, location=None, channel=None):
def get_recording_timespan(self, network, station=None, location=None, channel=None):
"""
:param network: network code
:param station: station code
Expand All @@ -134,19 +132,18 @@ def get_global_time_range(self, network, station=None, location=None, channel=No
min is set to 2100-01-01T00:00:00.000000Z and max is set to 1900-01-01T00:00:00.000000Z
"""

return self.fds.get_global_time_range(network, station=station, location=location, channel=channel)

return self.fds.get_recording_timespan(network, station=station, location=location, channel=channel)
# end func

def get_nslc_coverage(self):
def get_all_recording_timespans(self):
"""
Get a structured numpy array with named columns
'net', 'sta', 'loc', 'cha', 'min_st', 'max_et'
representing contents of the database
@return:
"""

results = self.fds.get_nslc_coverage()
results = self.fds.get_all_recording_timespans()
return results
# end if

Expand Down Expand Up @@ -255,8 +252,7 @@ def get_inventory(self, network=None, station=None):
return inv
# end func

def find_gaps(self, network=None, station=None, location=None,
channel=None, start_date_ts=None, end_date_ts=None,
def find_gaps(self, network=None, station=None, location=None, channel=None, starttime=None, endtime=None,
min_gap_length=86400):
"""
This function returns gaps in data as a numpy array with columns: net, sta, loc, cha, start_timestamp,
Expand All @@ -265,27 +261,39 @@ def find_gaps(self, network=None, station=None, location=None,
@param station: station code
@param location: location code
@param channel: channel code
@param start_date_ts: start timestamp
@param end_date_ts: end timestamp
@param min_gap_length: minimum length of gap; smaller gaps in data are ignored
@param starttime: start timestamp
@param endtime: end timestamp
@param min_gap_length: minimum length of gap in seconds; smaller gaps in data are ignored
@return:
"""
return self.fds.find_gaps(network, station, location, channel, start_date_ts, end_date_ts, min_gap_length)
return self.fds.find_gaps(network, station, location, channel, starttime, endtime, min_gap_length)
# end func

def get_coverage(self, network=None):
def get_recording_duration(self, network=None, station=None, location=None, channel=None,
starttime=None, endtime=None, cumulative=False):
"""
Generates coverage for the entire data holdings for a selected network.
@param network: network code
@return: Numpy record array with columns: net, sta, loc, cha,
start_timestamp, end_timestamp
Fetches total recording duration in seconds. Note that 'duration_seconds' in the output exclude data-gaps

@param network:
@param station:
@param location:
@param channel:
@param starttime:
@param endtime:
@param cumulative: returns cumulative recording times, otherwise blocks of start- and end-times
@return: Numpy record array with columns, if cumulative=False:
net, sta, loc, cha, block_st, block_et
, otherwise:
net, sta, loc, cha, lon, lat, min_st, max_et, duration_seconds
"""

rows = self.fds.get_coverage(network=network)
rows = self.fds.get_recording_duration(network=network, station=station, location=location, channel=channel,
starttime=starttime, endtime=endtime, cumulative=cumulative)
return rows
# end func
# end class


CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
@click.command(context_settings=CONTEXT_SETTINGS)
@click.argument('asdf-source', required=True,
Expand All @@ -312,16 +320,22 @@ def process(asdf_source, force_reindex, generate_summary):
with open(ofn, 'w') as fh:
fh.write('# net, sta, loc, cha, lon, lat, min_starttime, max_endtime, duration_months\n')

rows = ds.get_coverage()
rows = ds.get_recording_duration(cumulative=True)
for row in rows:
net, sta, loc, cha, lon, lat, min_st, max_et = row
duration_months = (max_et - min_st) / (86400 * 30)
net, sta, loc, cha, min_st, max_et, duration_seconds = row
duration_months = duration_seconds / (86400 * 30)

lon, lat = ds.unique_coordinates['{}.{}'.format(net, sta)]
line = '{},{},{},{},{:3.4f},{:3.4f},{},{},{:5.3f}\n'.\
format(net, sta, loc, cha, lon, lat,
UTCDateTime(min_st).strftime('%Y-%m-%dT%H:%M:%S'),
UTCDateTime(max_et).strftime('%Y-%m-%dT%H:%M:%S'),
duration_months)

if(duration_seconds > (max_et - min_st)):
logger.warn('Potential overlapping data found: {}'.format(line.strip()))
# end if

fh.write(line)
# end for
# end with
Expand Down
Loading