Skip to content

Commit 217d66c

Browse files
committed
Minor updates to reporting
1 parent b62aa49 commit 217d66c

File tree

5 files changed

+44
-37
lines changed

5 files changed

+44
-37
lines changed

seismic/extract_event_traces.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -288,14 +288,14 @@ def extract_data(recording_timespan_getter, waveform_getter,
288288

289289
stream_count = 0
290290
sta_stream = Stream()
291-
status = DataFrame()
291+
status = defaultdict(int)
292+
log.info('Data extraction stats:\n')
292293
for s in safe_iter_event_data(curr_cat, curr_inv, waveform_getter,
293294
use_rfstats=rfstats_map[wave],
294295
phase=phase_map[wave],
295296
tt_model=tt_model, pbar=None,
296297
request_window=request_window,
297-
pad=pad,
298-
status=status):
298+
pad=pad, status=status, log=log):
299299
# Write traces to output file in append mode so that arbitrarily large file
300300
# can be processed. If the file already exists, then existing streams will
301301
# be overwritten rather than duplicated.
@@ -350,27 +350,24 @@ def extract_data(recording_timespan_getter, waveform_getter,
350350
if(len(sta_stream)):
351351
write_h5_event_stream(event_trace_datafile, sta_stream, index=h5_index, mode='a')
352352
else:
353-
t = Trace(data=np.array([]),
353+
t = Trace(data=np.array([0]),
354354
header={'network': net, 'station': sta,
355355
'location': loc, 'channel': 'XXX',
356356
'wave_type': wave,
357357
'station_longitude': sta_lon,
358358
'station_latitude': sta_lat,
359-
'event_time': UTCDateTime.now()})
359+
'event_time': UTCDateTime(0)})
360360
write_h5_event_stream(event_trace_datafile, Stream([t]), index=h5_index, mode='a')
361361
# end if
362362
# end if
363363
comm.Barrier()
364364
# end for
365365

366-
if(len(status)):
367-
status.index += 1
368-
log.info('Data extraction stats:\n{}\n'.format(status.to_string()))
369-
# end if
370-
log.info('Summary: good data found for {}/{} events.'.format\
371-
(np.sum(np.array(status['status']=='Good data')) if(len(status)) else 0,
372-
len(status)))
373-
if(len(curr_cat) != len(status)): log.warning('All events may not have been processed..')
366+
log.info('\nSummary:\n', extra={'simple': True})
367+
for k, v in status.items():
368+
log.info('{}: good data found for {}/{} events.'.format \
369+
(k, v, len(curr_cat)), extra={'simple': True})
370+
# end for
374371

375372
warn_str = \
376373
" No {} traces found for {}! Added a null trace.".format(DESCS[wave], nsl)

seismic/misc.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,22 @@ def setup_logger(name, log_file=None, level=logging.INFO, propagate=False):
2323
"""
2424
Function to setup a logger; adapted from stackoverflow
2525
"""
26+
class ConditionalFormatter(logging.Formatter):
27+
def format(self, record):
28+
if hasattr(record, 'simple') and record.simple:
29+
return record.getMessage()
30+
else:
31+
return logging.Formatter.format(self, record)
32+
# end if
33+
# end func
34+
# end class
35+
2636
handler = None
2737
if(log_file):
2838
handler = logging.FileHandler(log_file, mode='w')
2939
# end if
3040

31-
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
41+
formatter = ConditionalFormatter('%(asctime)s %(levelname)s %(message)s')
3242

3343
logger = logging.getLogger(name+log_file if log_file else '')
3444
logger.setLevel(level)

seismic/rf_station_orientations.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ def analyze_station_orientations(ned, curation_opts=DEFAULT_CURATION_OPTS,
120120
logger = logging.getLogger(__name__ + ':' + full_code)
121121
logger.setLevel(logging.INFO)
122122

123-
# check if ned has valid data
124-
if not np.any(np.array([len(stream[0].data) for _, _, stream in ned])):
123+
# check if ned has valid data. Note that null traces are length 1
124+
if not np.any(np.array([(len(stream[0].data) > 1) for _, _, stream in ned])):
125125
return results
126126
# end if
127127

seismic/stream_io.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,10 @@
1414
import obspyh5
1515
from obspyh5 import dataset2trace, is_obspyh5, trace2group
1616
from os.path import splitext
17-
17+
from typing import DefaultDict
1818
from seismic.units_utils import KM_PER_DEG
1919
from rf.rfstream import rfstats, obj2stats
2020
from collections import defaultdict
21-
from pandas import DataFrame
2221
# pylint: disable=invalid-name
2322

2423

@@ -34,7 +33,7 @@
3433

3534
def safe_iter_event_data(events, inventory, get_waveforms, use_rfstats=True, phase='P',
3635
request_window=None, pad=10, pbar=None,
37-
status:DataFrame=None, **kwargs):
36+
status: DefaultDict[str, int] = None, log=None, **kwargs):
3837
"""
3938
Return iterator yielding three component streams per station and event.
4039
@@ -48,7 +47,8 @@ def safe_iter_event_data(events, inventory, get_waveforms, use_rfstats=True, pha
4847
:param request_window: requested time window around the onset of the phase
4948
:param float pad: padding in seconds around request window
5049
:param pbar: tqdm_ instance for displaying a progressbar
51-
:param status: an empty pandas DataFrame for retrieving statistics
50+
:param status: a defaultdict for retrieving statistics
51+
:param log: a python logging instance
5252
:param kwargs: all other kwargs are passed to `~rf.rfstream.rfstats()`
5353
5454
:return: three component streams with raw data
@@ -82,19 +82,18 @@ def _get_stations(inventory):
8282
pbar.total = len(events) * len(stations)
8383
# end if
8484

85+
fmt = "{:<15} {:<30} {:>9} {:>9} {:>7} {:>5} {:>15}"
86+
if(log is not None):
87+
log.info(fmt.format("seed_id", "origin_time", "lon", "lat", "depth", "mag", "status")+'\n',
88+
extra={'simple': True})
89+
# end if
8590
for i, (event, seedid) in enumerate(itertools.product(events, stations)):
8691
if pbar is not None: pbar.update(1)
8792
origin = (event.preferred_origin() or event.origins[0])
8893
magnitude = (event.preferred_magnitude() or event.magnitudes[0])
8994
origin_time, elon, elat, edepth, eMw = origin['time'], origin['longitude'], \
9095
origin['latitude'], origin['depth'], magnitude.mag
91-
row_items = [seedid, origin_time, elon, elat, edepth/1e3, eMw]
92-
93-
# initialize status data-frame
94-
if (i == 0 and status is not None):
95-
cols = ['seed_id', 'origin_time', 'lon', 'lat', 'depth', 'magnitude', 'status']
96-
for col in cols: status[col] = None
97-
# end if
96+
row_items = [seedid, origin_time.strftime('%Y-%m-%dT%H:%M:%S.%f'), elon, elat, edepth/1e3, eMw]
9897

9998
try:
10099
# exclude datetime from call to get_coordinates to ensure incorrect
@@ -103,7 +102,7 @@ def _get_stations(inventory):
103102
args = (seedid[:-1] + stations[seedid], None)
104103
coords = inventory.get_coordinates(*args)
105104
except Exception: # station not available at that time
106-
if(status is not None): status.loc[i] = [*row_items, 'Invalid inventory']
105+
if(log is not None): log.info(fmt.format(*row_items, 'Invalid inventory'), extra={'simple': True})
107106
continue
108107
# end try
109108

@@ -115,11 +114,11 @@ def _get_stations(inventory):
115114
from warnings import warn
116115
warn('Error "%s" in rfstats call for event %s, station %s.'
117116
% (exception, event.resource_id, seedid))
118-
if(status is not None): status.loc[i] = [*row_items, 'Invalid rfstats']
117+
if (log is not None): log.info(fmt.format(*row_items, 'Invalid rfstats'), extra={'simple': True})
119118
continue
120119
# end try
121120
if not stats:
122-
if(status is not None): status.loc[i] = [*row_items, 'Invalid rfstats']
121+
if (log is not None): log.info(fmt.format(*row_items, 'Invalid rfstats'), extra={'simple': True})
123122
continue
124123
# end if
125124
# end if
@@ -146,11 +145,11 @@ def _get_stations(inventory):
146145
stream.merge()
147146

148147
if(len(stream) == 0):
149-
if(status is not None): status.loc[i] = [*row_items, 'No data']
148+
if (log is not None): log.info(fmt.format(*row_items, 'No data'), extra={'simple': True})
150149
continue
151150
# end if
152151
except Exception: # no data available
153-
if(status is not None): status.loc[i] = [*row_items, 'Bad data']
152+
if (log is not None): log.info(fmt.format(*row_items, 'No data'), extra={'simple': True})
154153
continue
155154
# end try
156155

@@ -178,7 +177,7 @@ def _get_stations(inventory):
178177
warn('Need 3 component seismograms. %d components '
179178
'detected for event %s, station %s.'
180179
% (len(stream), event.resource_id, seedid))
181-
if(status is not None): status.loc[i] = [*row_items, 'Missing components']
180+
if (log is not None): log.info(fmt.format(*row_items, 'Missing components'), extra={'simple': True})
182181
continue
183182
# end if
184183

@@ -200,7 +199,7 @@ def has_masked_values(data_stream):
200199
from warnings import warn
201200
warn('Gaps or overlaps detected for event %s, station %s.'
202201
% (event.resource_id, seedid))
203-
if(status is not None): status.loc[i] = [*row_items, 'Patchy data']
202+
if (log is not None): log.info(fmt.format(*row_items, 'Patchy data'), extra={'simple': True})
204203
continue
205204
else:
206205
for tr in stream: tr.data = np.array(tr.data)
@@ -211,7 +210,8 @@ def has_masked_values(data_stream):
211210
tr.stats.update(stats)
212211
# end for
213212

214-
if(status is not None): status.loc[i] = [*row_items, 'Good data']
213+
if (log is not None): log.info(fmt.format(*row_items, 'Good data'), extra={'simple': True})
214+
if(status is not None): status[seedid] += 1
215215
yield RFStream(stream)
216216
# end for
217217
# end func

seismic/swp_station_orientations.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -723,8 +723,8 @@ def dump_swp_data(r1phi, r1cc, r2phi, r2cc, e_array):
723723
logger = logging.getLogger(__name__ + ':' + full_code)
724724
logger.setLevel(logging.INFO)
725725

726-
# check if ned has valid data
727-
if not np.any(np.array([len(stream[0].data) for _, _, stream in ned])):
726+
# check if ned has valid data. Note that null traces are length 1
727+
if not np.any(np.array([(len(stream[0].data) > 1) for _, _, stream in ned])):
728728
return results
729729
# end if
730730

0 commit comments

Comments
 (0)