14
14
import obspyh5
15
15
from obspyh5 import dataset2trace , is_obspyh5 , trace2group
16
16
from os .path import splitext
17
-
17
+ from typing import DefaultDict
18
18
from seismic .units_utils import KM_PER_DEG
19
19
from rf .rfstream import rfstats , obj2stats
20
20
from collections import defaultdict
21
- from pandas import DataFrame
22
21
# pylint: disable=invalid-name
23
22
24
23
34
33
35
34
def safe_iter_event_data (events , inventory , get_waveforms , use_rfstats = True , phase = 'P' ,
36
35
request_window = None , pad = 10 , pbar = None ,
37
- status :DataFrame = None , ** kwargs ):
36
+ status : DefaultDict [ str , int ] = None , log = None , ** kwargs ):
38
37
"""
39
38
Return iterator yielding three component streams per station and event.
40
39
@@ -48,7 +47,8 @@ def safe_iter_event_data(events, inventory, get_waveforms, use_rfstats=True, pha
48
47
:param request_window: requested time window around the onset of the phase
49
48
:param float pad: padding in seconds around request window
50
49
:param pbar: tqdm_ instance for displaying a progressbar
51
- :param status: an empty pandas DataFrame for retrieving statistics
50
+ :param status: a defaultdict for retrieving statistics
51
+ :param log: a python logging instance
52
52
:param kwargs: all other kwargs are passed to `~rf.rfstream.rfstats()`
53
53
54
54
:return: three component streams with raw data
@@ -82,19 +82,18 @@ def _get_stations(inventory):
82
82
pbar .total = len (events ) * len (stations )
83
83
# end if
84
84
85
+ fmt = "{:<15} {:<30} {:>9} {:>9} {:>7} {:>5} {:>15}"
86
+ if (log is not None ):
87
+ log .info (fmt .format ("seed_id" , "origin_time" , "lon" , "lat" , "depth" , "mag" , "status" )+ '\n ' ,
88
+ extra = {'simple' : True })
89
+ # end if
85
90
for i , (event , seedid ) in enumerate (itertools .product (events , stations )):
86
91
if pbar is not None : pbar .update (1 )
87
92
origin = (event .preferred_origin () or event .origins [0 ])
88
93
magnitude = (event .preferred_magnitude () or event .magnitudes [0 ])
89
94
origin_time , elon , elat , edepth , eMw = origin ['time' ], origin ['longitude' ], \
90
95
origin ['latitude' ], origin ['depth' ], magnitude .mag
91
- row_items = [seedid , origin_time , elon , elat , edepth / 1e3 , eMw ]
92
-
93
- # initialize status data-frame
94
- if (i == 0 and status is not None ):
95
- cols = ['seed_id' , 'origin_time' , 'lon' , 'lat' , 'depth' , 'magnitude' , 'status' ]
96
- for col in cols : status [col ] = None
97
- # end if
96
+ row_items = [seedid , origin_time .strftime ('%Y-%m-%dT%H:%M:%S.%f' ), elon , elat , edepth / 1e3 , eMw ]
98
97
99
98
try :
100
99
# exclude datetime from call to get_coordinates to ensure incorrect
@@ -103,7 +102,7 @@ def _get_stations(inventory):
103
102
args = (seedid [:- 1 ] + stations [seedid ], None )
104
103
coords = inventory .get_coordinates (* args )
105
104
except Exception : # station not available at that time
106
- if (status is not None ): status . loc [ i ] = [ * row_items , 'Invalid inventory' ]
105
+ if (log is not None ): log . info ( fmt . format ( * row_items , 'Invalid inventory' ), extra = { 'simple' : True })
107
106
continue
108
107
# end try
109
108
@@ -115,11 +114,11 @@ def _get_stations(inventory):
115
114
from warnings import warn
116
115
warn ('Error "%s" in rfstats call for event %s, station %s.'
117
116
% (exception , event .resource_id , seedid ))
118
- if ( status is not None ): status . loc [ i ] = [ * row_items , 'Invalid rfstats' ]
117
+ if ( log is not None ): log . info ( fmt . format ( * row_items , 'Invalid rfstats' ), extra = { 'simple' : True })
119
118
continue
120
119
# end try
121
120
if not stats :
122
- if ( status is not None ): status . loc [ i ] = [ * row_items , 'Invalid rfstats' ]
121
+ if ( log is not None ): log . info ( fmt . format ( * row_items , 'Invalid rfstats' ), extra = { 'simple' : True })
123
122
continue
124
123
# end if
125
124
# end if
@@ -146,11 +145,11 @@ def _get_stations(inventory):
146
145
stream .merge ()
147
146
148
147
if (len (stream ) == 0 ):
149
- if ( status is not None ): status . loc [ i ] = [ * row_items , 'No data' ]
148
+ if ( log is not None ): log . info ( fmt . format ( * row_items , 'No data' ), extra = { 'simple' : True })
150
149
continue
151
150
# end if
152
151
except Exception : # no data available
153
- if ( status is not None ): status . loc [ i ] = [ * row_items , 'Bad data' ]
152
+ if ( log is not None ): log . info ( fmt . format ( * row_items , 'No data' ), extra = { 'simple' : True })
154
153
continue
155
154
# end try
156
155
@@ -178,7 +177,7 @@ def _get_stations(inventory):
178
177
warn ('Need 3 component seismograms. %d components '
179
178
'detected for event %s, station %s.'
180
179
% (len (stream ), event .resource_id , seedid ))
181
- if ( status is not None ): status . loc [ i ] = [ * row_items , 'Missing components' ]
180
+ if ( log is not None ): log . info ( fmt . format ( * row_items , 'Missing components' ), extra = { 'simple' : True })
182
181
continue
183
182
# end if
184
183
@@ -200,7 +199,7 @@ def has_masked_values(data_stream):
200
199
from warnings import warn
201
200
warn ('Gaps or overlaps detected for event %s, station %s.'
202
201
% (event .resource_id , seedid ))
203
- if ( status is not None ): status . loc [ i ] = [ * row_items , 'Patchy data' ]
202
+ if ( log is not None ): log . info ( fmt . format ( * row_items , 'Patchy data' ), extra = { 'simple' : True })
204
203
continue
205
204
else :
206
205
for tr in stream : tr .data = np .array (tr .data )
@@ -211,7 +210,8 @@ def has_masked_values(data_stream):
211
210
tr .stats .update (stats )
212
211
# end for
213
212
214
- if (status is not None ): status .loc [i ] = [* row_items , 'Good data' ]
213
+ if (log is not None ): log .info (fmt .format (* row_items , 'Good data' ), extra = {'simple' : True })
214
+ if (status is not None ): status [seedid ] += 1
215
215
yield RFStream (stream )
216
216
# end for
217
217
# end func
0 commit comments