24
24
import click
25
25
26
26
class FederatedASDFDataSet ():
27
- def __init__ (self , asdf_source , force_reindex = False , logger = None ,
27
+ def __init__ (self , asdf_source , fast = True , force_reindex = False , logger = None ,
28
28
single_item_read_limit_in_mb = 1024 ,
29
29
single_threaded_access = True ):
30
30
"""
31
31
Initializer for FederatedASDFDataSet.
32
32
33
33
:param asdf_source: Path to a text file containing a list of ASDF files. \
34
34
Entries can be commented out with '#'
35
+ :param fast: enables in-memory optimizations for faster queries
35
36
:param force_reindex: Force reindex even if a preexisting db file is found
36
37
:param logger: logger instance
37
38
:param single_item_read_limit_in_mb: buffer size for Obspy reads
@@ -41,25 +42,23 @@ def __init__(self, asdf_source, force_reindex=False, logger=None,
41
42
"""
42
43
self .logger = logger
43
44
self .asdf_source = asdf_source
44
- self ._unique_coordinates = None
45
45
self ._earth_radius = 6371 # km
46
46
47
47
# Instantiate implementation class
48
- self .fds = _FederatedASDFDataSetImpl (asdf_source , force_reindex = force_reindex , logger = logger ,
48
+ self .fds = _FederatedASDFDataSetImpl (asdf_source , fast = fast ,
49
+ force_reindex = force_reindex , logger = logger ,
49
50
single_item_read_limit_in_mb = single_item_read_limit_in_mb ,
50
51
single_threaded_access = single_threaded_access )
51
52
52
53
# Populate coordinates
53
- self ._unique_coordinates = defaultdict (list )
54
-
55
54
rtps_dict = defaultdict ()
56
55
for ds_dict in self .fds .asdf_station_coordinates :
57
56
for key in list (ds_dict .keys ()):
58
- self ._unique_coordinates [key ] = [ds_dict [key ][0 ], ds_dict [key ][1 ]]
59
57
58
+ lon , lat , _ = ds_dict [key ]
60
59
rtps_dict [key ] = [self ._earth_radius ,
61
- np .radians (90 - ds_dict [ key ][ 1 ] ),
62
- np .radians (ds_dict [ key ][ 0 ] )]
60
+ np .radians (90 - lat ),
61
+ np .radians (lon )]
63
62
# end for
64
63
# end for
65
64
@@ -80,8 +79,7 @@ def unique_coordinates(self):
80
79
81
80
:return: dictionary containing [lon, lat] coordinates indexed by 'net.sta'
82
81
"""
83
- return self ._unique_coordinates
84
-
82
+ return self .fds ._unique_coordinates
85
83
# end func
86
84
87
85
def corrections_enabled (self ):
@@ -124,7 +122,7 @@ def get_closest_stations(self, lon, lat, nn=1):
124
122
125
123
# end func
126
124
127
- def get_global_time_range (self , network , station = None , location = None , channel = None ):
125
+ def get_recording_timespan (self , network , station = None , location = None , channel = None ):
128
126
"""
129
127
:param network: network code
130
128
:param station: station code
@@ -134,19 +132,18 @@ def get_global_time_range(self, network, station=None, location=None, channel=No
134
132
min is set to 2100-01-01T00:00:00.000000Z and max is set to 1900-01-01T00:00:00.000000Z
135
133
"""
136
134
137
- return self .fds .get_global_time_range (network , station = station , location = location , channel = channel )
138
-
135
+ return self .fds .get_recording_timespan (network , station = station , location = location , channel = channel )
139
136
# end func
140
137
141
- def get_nslc_coverage (self ):
138
+ def get_all_recording_timespans (self ):
142
139
"""
143
140
Get a structured numpy array with named columns
144
141
'net', 'sta', 'loc', 'cha', 'min_st', 'max_et'
145
142
representing contents of the database
146
143
@return:
147
144
"""
148
145
149
- results = self .fds .get_nslc_coverage ()
146
+ results = self .fds .get_all_recording_timespans ()
150
147
return results
151
148
# end if
152
149
@@ -255,8 +252,7 @@ def get_inventory(self, network=None, station=None):
255
252
return inv
256
253
# end func
257
254
258
- def find_gaps (self , network = None , station = None , location = None ,
259
- channel = None , start_date_ts = None , end_date_ts = None ,
255
+ def find_gaps (self , network = None , station = None , location = None , channel = None , starttime = None , endtime = None ,
260
256
min_gap_length = 86400 ):
261
257
"""
262
258
This function returns gaps in data as a numpy array with columns: net, sta, loc, cha, start_timestamp,
@@ -265,27 +261,39 @@ def find_gaps(self, network=None, station=None, location=None,
265
261
@param station: station code
266
262
@param location: location code
267
263
@param channel: channel code
268
- @param start_date_ts : start timestamp
269
- @param end_date_ts : end timestamp
270
- @param min_gap_length: minimum length of gap; smaller gaps in data are ignored
264
+ @param starttime : start timestamp
265
+ @param endtime : end timestamp
266
+ @param min_gap_length: minimum length of gap in seconds ; smaller gaps in data are ignored
271
267
@return:
272
268
"""
273
- return self .fds .find_gaps (network , station , location , channel , start_date_ts , end_date_ts , min_gap_length )
269
+ return self .fds .find_gaps (network , station , location , channel , starttime , endtime , min_gap_length )
274
270
# end func
275
271
276
- def get_coverage (self , network = None ):
272
+ def get_recording_duration (self , network = None , station = None , location = None , channel = None ,
273
+ starttime = None , endtime = None , cumulative = False ):
277
274
"""
278
- Generates coverage for the entire data holdings for a selected network.
279
- @param network: network code
280
- @return: Numpy record array with columns: net, sta, loc, cha,
281
- start_timestamp, end_timestamp
275
+ Fetches total recording duration in seconds. Note that 'duration_seconds' in the output exclude data-gaps
276
+
277
+ @param network:
278
+ @param station:
279
+ @param location:
280
+ @param channel:
281
+ @param starttime:
282
+ @param endtime:
283
+ @param cumulative: returns cumulative recording times, otherwise blocks of start- and end-times
284
+ @return: Numpy record array with columns, if cumulative=False:
285
+ net, sta, loc, cha, block_st, block_et
286
+ , otherwise:
287
+ net, sta, loc, cha, lon, lat, min_st, max_et, duration_seconds
282
288
"""
283
289
284
- rows = self .fds .get_coverage (network = network )
290
+ rows = self .fds .get_recording_duration (network = network , station = station , location = location , channel = channel ,
291
+ starttime = starttime , endtime = endtime , cumulative = cumulative )
285
292
return rows
286
293
# end func
287
294
# end class
288
295
296
+
289
297
CONTEXT_SETTINGS = dict (help_option_names = ['-h' , '--help' ])
290
298
@click .command (context_settings = CONTEXT_SETTINGS )
291
299
@click .argument ('asdf-source' , required = True ,
@@ -312,16 +320,22 @@ def process(asdf_source, force_reindex, generate_summary):
312
320
with open (ofn , 'w' ) as fh :
313
321
fh .write ('# net, sta, loc, cha, lon, lat, min_starttime, max_endtime, duration_months\n ' )
314
322
315
- rows = ds .get_coverage ( )
323
+ rows = ds .get_recording_duration ( cumulative = True )
316
324
for row in rows :
317
- net , sta , loc , cha , lon , lat , min_st , max_et = row
318
- duration_months = ( max_et - min_st ) / (86400 * 30 )
325
+ net , sta , loc , cha , min_st , max_et , duration_seconds = row
326
+ duration_months = duration_seconds / (86400 * 30 )
319
327
328
+ lon , lat = ds .unique_coordinates ['{}.{}' .format (net , sta )]
320
329
line = '{},{},{},{},{:3.4f},{:3.4f},{},{},{:5.3f}\n ' .\
321
330
format (net , sta , loc , cha , lon , lat ,
322
331
UTCDateTime (min_st ).strftime ('%Y-%m-%dT%H:%M:%S' ),
323
332
UTCDateTime (max_et ).strftime ('%Y-%m-%dT%H:%M:%S' ),
324
333
duration_months )
334
+
335
+ if (duration_seconds > (max_et - min_st )):
336
+ logger .warn ('Potential overlapping data found: {}' .format (line .strip ()))
337
+ # end if
338
+
325
339
fh .write (line )
326
340
# end for
327
341
# end with
0 commit comments