Skip to content

Commit 8fd7d6f

Browse files
authored
Xcorr updates (#267)
* Added more robust subset-stacking * Miscellaneous Changes * Improved data-coverage function in FederatedASDFDataSet * Improved handling of location codes in cross-correlation routines
1 parent a22a2cf commit 8fd7d6f

16 files changed

+136
-87
lines changed

seismic/ASDFdatabase/FederatedASDFDataSet.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,13 +138,15 @@ def get_global_time_range(self, network, station=None, location=None, channel=No
138138

139139
# end func
140140

141-
def get_nslc_list(self):
141+
def get_nslc_coverage(self):
142142
"""
143-
Get a list of all net, sta, loc, cha combinations featured in the database
143+
Get a structured numpy array with named columns
144+
'net', 'sta', 'loc', 'cha', 'min_st', 'max_et'
145+
representing contents of the database
144146
@return:
145147
"""
146148

147-
results = self.fds.get_nslc_list()
149+
results = self.fds.get_nslc_coverage()
148150
return results
149151
# end if
150152

seismic/ASDFdatabase/_FederatedASDFDataSetImpl.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -490,11 +490,17 @@ def get_global_time_range(self, network, station=None, location=None, channel=No
490490
return min, max
491491
# end func
492492

493-
def get_nslc_list(self):
494-
query = "select net, sta, loc, cha from nslc"
493+
def get_nslc_coverage(self):
494+
query = "select net, sta, loc, cha, st, et from nslc"
495495
rows = self.conn.execute(query).fetchall()
496496

497-
return rows
497+
fields = {'names': ['net', 'sta', 'loc', 'cha', 'min_st', 'max_et'],
498+
'formats': ['U10', 'U10', 'U10', 'U10', 'f8', 'f8']}
499+
result = np.zeros(len(rows), dtype=fields)
500+
501+
for i, row in enumerate(rows): result[i] = row
502+
503+
return result
498504
# end if
499505

500506
def get_stations(self, starttime, endtime, network=None, station=None, location=None, channel=None):

seismic/ASDFdatabase/cwb2asdf/cwb2asdf.py

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828
from ordered_set import OrderedSet as set
2929
from tqdm import tqdm
3030
from seismic.misc import split_list
31-
from seismic.misc import recursive_glob
3231
from seismic.ASDFdatabase.utils import cleanse_inventory
32+
from seismic.misc import recursive_glob
3333

3434
def make_ASDF_tag(tr, tag):
3535
# def make_ASDF_tag(ri, tag):
@@ -45,7 +45,7 @@ def make_ASDF_tag(tr, tag):
4545
# end func
4646

4747
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
48-
BUFFER_LENGTH = 1000
48+
BUFFER_LENGTH = 2000
4949

5050
@click.command(context_settings=CONTEXT_SETTINGS)
5151
@click.argument('input-folder', required=True,
@@ -60,7 +60,7 @@ def make_ASDF_tag(tr, tag):
6060
@click.option('--min-length-sec', type=int, default=None, help="Minimum length in seconds")
6161
@click.option('--merge-threshold', type=int, default=None, help="Merge traces if the number of traces fetched for an "
6262
"interval exceeds this threshold")
63-
@click.option('--ntraces-per-file', type=int, default=3600, help="Maximum number of traces per file; if exceeded, the "
63+
@click.option('--ntraces-per-file', type=int, default=600, help="Maximum number of traces per file; if exceeded, the "
6464
"file is ignored.")
6565
@click.option('--dry-run', default=False, is_flag=True, show_default=True,
6666
help="Dry run only reports stations that were not found in the stationXML files, for "
@@ -75,6 +75,7 @@ def process(input_folder, inventory_folder, output_file_name, file_pattern,
7575
OUTPUT_FILE_NAME: Name of output ASDF file \n
7676
"""
7777

78+
inventory_added = defaultdict(bool)
7879
def _read_inventories(inventory_folder):
7980
inv_files = recursive_glob(inventory_folder, '*.xml')
8081

@@ -103,8 +104,11 @@ def _write(ds, ostream, inventory_dict, netsta_set):
103104
# end try
104105

105106
for item in netsta_set:
107+
if(inventory_added[item]): continue
108+
106109
try:
107110
ds.add_stationxml(inventory_dict[item])
111+
inventory_added[item] = True
108112
except Exception as e:
109113
print(e)
110114
print('Failed to append inventory:')
@@ -129,10 +133,7 @@ def _write(ds, ostream, inventory_dict, netsta_set):
129133
inv = _read_inventories(inventory_folder)
130134

131135
# generate a list of files
132-
paths = [i for i in os.listdir(input_folder) if os.path.isfile(os.path.join(input_folder, i))]
133-
expr = re.compile(fnmatch.translate(file_pattern), re.IGNORECASE)
134-
files = [os.path.join(input_folder, j) for j in paths if re.match(expr, j)]
135-
136+
files = recursive_glob(input_folder, file_pattern)
136137
files = np.array(files)
137138
random.Random(nproc).shuffle(files)
138139
#print(files); exit(0)
@@ -143,21 +144,30 @@ def _write(ds, ostream, inventory_dict, netsta_set):
143144
stationlist = []
144145
filtered_files = []
145146
for file in tqdm(files, desc='Reading trace headers: '):
146-
#_, _, net, sta, _ = file.split('.')
147-
#tokens = os.path.basename(file).split('.')
148-
#net, sta = tokens[0], tokens[1]
147+
net = sta = None
149148

150-
st = []
151-
try:
152-
st = read(file, headonly=True)
153-
except Exception as e:
154-
print(e)
155-
continue
156-
# end try
157-
if(len(st) == 0): continue
149+
if(True):
150+
try:
151+
fn = os.path.basename(file)
152+
net, sta = fn.split('.')[:2]
153+
except:
154+
continue
155+
# end try
156+
#tokens = os.path.basename(file).split('.')
157+
#net, sta = tokens[0], tokens[1]
158+
else:
159+
st = []
160+
try:
161+
st = read(file, headonly=True)
162+
except Exception as e:
163+
print(e)
164+
continue
165+
# end try
166+
if(len(st) == 0): continue
158167

159-
net = st[0].meta.network
160-
sta = st[0].meta.station
168+
net = st[0].meta.network
169+
sta = st[0].meta.station
170+
# end if
161171

162172
ustations.add('%s.%s' % (net, sta))
163173
networklist.append(net)

seismic/ASDFdatabase/viewer/FederatedASDFViewer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -482,9 +482,9 @@ def addWidget(emitter):
482482
# populate net, sta, loc, cha dict
483483
self.nslc_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
484484

485-
nslc_list = self.fds.get_nslc_list()
486-
for row in nslc_list:
487-
net, sta, loc, cha = row
485+
nslc_coverage = self.fds.get_nslc_coverage()
486+
for row in nslc_coverage:
487+
net, sta, loc, cha, _, _ = row
488488
self.nslc_dict[net][sta][loc].append(cha)
489489
# end for
490490

seismic/misc_p.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,11 @@ def __init__(self, output_folder, restart_mode=False):
3131

3232
if(self.restart_mode):
3333
if(os.path.exists(self.proc_fn)):
34-
self.prev_progress = int(open(self.proc_fn).read())
34+
try:
35+
self.prev_progress = int(open(self.proc_fn).read())
36+
except:
37+
pass
38+
# end try
3539
# end if
3640
# end if
3741
# end func
@@ -41,12 +45,13 @@ def increment(self):
4145
if(self.restart_mode and (self.prev_progress > 0) and (self.progress < self.prev_progress)):
4246
return False
4347
else:
44-
tmpfn = self.proc_fn + '.tmp'
45-
f = open(tmpfn, 'w+')
46-
f.write(str(self.progress))
47-
f.close()
48-
os.rename(tmpfn, self.proc_fn)
49-
48+
try:
49+
f = open(self.proc_fn, 'w+')
50+
f.write(str(self.progress))
51+
f.close()
52+
except:
53+
pass
54+
# end try
5055
return True
5156
# end if
5257
# end func

seismic/xcorqc/correlator.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,6 @@ def cull_pairs(pairs, keep_list_fn):
193193

194194
startTime = UTCDateTime(start_time)
195195
endTime = UTCDateTime(end_time)
196-
stationsCache = defaultdict(list)
197196
for pair in proc_stations[rank]:
198197
netsta1, netsta2 = pair
199198

@@ -212,7 +211,9 @@ def get_loccha(cha1, cha2):
212211
cha1 and cha2 -- e.g. ['.SHZ', '00.BHZ'], ['01.HHZ']
213212
"""
214213
result = [[], []]
215-
for chidx, (netsta, cha, ds) in enumerate(zip((netsta1, netsta2), (cha1, cha2), (ds1, ds2))):
214+
for chidx, (netsta, cha, ds) in enumerate(zip((netsta1, netsta2),
215+
(cha1, cha2),
216+
(ds1, ds2))):
216217
if('*' in cha1):
217218
cha = cha.replace('*', '.*') # hack to capture simple regex comparisons
218219
# end if
@@ -225,12 +226,14 @@ def get_loccha(cha1, cha2):
225226

226227
net, sta = netsta.split('.')
227228

228-
if((start_time, end_time, net, sta) in stationsCache):
229-
stations = stationsCache[(start_time, end_time, net, sta)]
230-
else:
231-
stations = ds.fds.get_stations(start_time, end_time, net, sta)
232-
stationsCache[(start_time, end_time, net, sta)] = stations
233-
# end if
229+
# find a list of entries where network and station names match and
230+
# start- and end-times overlap with data coverage. Note that this is
231+
# an approximate estimate and an actual cross-correlation may not be
232+
# computed due to gaps in data
233+
stations = ds.nslc_coverage[(ds.nslc_coverage['net'] == net) & \
234+
(ds.nslc_coverage['sta'] == sta) & \
235+
(ds.nslc_coverage['max_et'] >= startTime.timestamp) & \
236+
(ds.nslc_coverage['min_st'] <= endTime.timestamp)]
234237

235238
loc_pref = location_preferences_dict[netsta]
236239
ulocs = set()

seismic/xcorqc/subset_stacker.py

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def __init__(self):
6161
self.DIST_MINMAX = [d['DIST_MIN'], d['DIST_MAX']]
6262
self.EMAG_MINMAX = [d['EMAG_MIN'], d['EMAG_MAX']]
6363
self.AZ_TOL = d['AZ_TOL']
64+
self.param_dict = d
6465

6566
self.gc = None
6667
if(self.rank == 0):
@@ -91,6 +92,24 @@ def stack(self, spooled_matrix:SpooledMatrix,
9192
as defined in the manuscript
9293
"""
9394

95+
def circular_select(angles, min_angle, max_angle):
96+
# Normalize angles to [0, 360)
97+
angles = np.mod(angles, 360);
98+
min_angle = np.mod(min_angle, 360);
99+
max_angle = np.mod(max_angle, 360);
100+
101+
result = None
102+
if min_angle <= max_angle:
103+
# Linear range (e.g., 10-20)
104+
result = (angles >= min_angle) & (angles <= max_angle)
105+
else:
106+
# Circular wraparound range (e.g., 350-10)
107+
result = (angles >= min_angle) | (angles <= max_angle)
108+
# end if
109+
110+
return result
111+
# end func
112+
94113
def get_affected_indices(source_eids, pat, swat, swet):
95114
"""
96115
Finds indices of CC windows affected by P and SW energy
@@ -173,20 +192,15 @@ def get_affected_indices(source_eids, pat, swat, swet):
173192
eids2 = (edistdeg2 >= self.DIST_MINMAX[0]) & (edistdeg2 <= self.DIST_MINMAX[1])
174193

175194
# find event indices within azimuth of stations 1 and 2
176-
eids1_inside_az = eids1 & ((eaz1 >= (baz - self.AZ_TOL)) & (eaz1 <= (baz + self.AZ_TOL)))
177-
eids2_inside_az = eids2 & ((eaz2 >= (az - self.AZ_TOL)) & (eaz2 <= (az + self.AZ_TOL)))
178-
195+
eids1_inside_az = eids1 & circular_select(eaz1, (baz - self.AZ_TOL), (baz + self.AZ_TOL))
196+
eids2_inside_az = eids2 & circular_select(eaz2, (az - self.AZ_TOL), (az + self.AZ_TOL))
179197
eids_inside_az = eids1_inside_az | eids2_inside_az
180198

181199
# find event indices outside azimuth of both stations
182200
eids_outside_az = ~(eids_inside_az)
183201

184202
if(True):
185203
# sanity check
186-
test = (eids1 & ((eaz1 < (baz - self.AZ_TOL)) | (eaz1 > (baz + self.AZ_TOL)))) & \
187-
(eids2 & ((eaz2 < (az - self.AZ_TOL)) | (eaz2 > (az + self.AZ_TOL))))
188-
189-
assert np.alltrue(eids_outside_az == test)
190204
assert len(set(np.where(eids1_inside_az | eids2_inside_az)[0]).intersection( \
191205
set(np.where(eids_outside_az)[0]))) == 0
192206
# end if
@@ -236,18 +250,18 @@ def get_affected_indices(source_eids, pat, swat, swet):
236250
wc_XeiUXec = np.sum(idsXeiUXec)
237251
wc_Xeo = np.sum(idsXeo)
238252

239-
mean /= float(wc)
240-
mean_Xei /= float(wc_Xei)
241-
mean_Xec /= float(wc_Xec)
242-
mean_XeiUXec /= float(wc_XeiUXec)
243-
mean_Xeo /= float(wc_Xeo)
253+
if(wc > 0): mean /= float(wc)
254+
if(wc_Xei > 0): mean_Xei /= float(wc_Xei)
255+
if(wc_Xec > 0): mean_Xec /= float(wc_Xec)
256+
if(wc_XeiUXec > 0): mean_XeiUXec /= float(wc_XeiUXec)
257+
if(wc_Xeo > 0): mean_Xeo /= float(wc_Xeo)
244258

245-
#"""
259+
"""
246260
np.savez('stack3outputs.npz', xcf=mean,
247261
xcf1=mean_Xei, xcf2=mean_Xec, xcf3=mean_XeiUXec,
248262
xcf4=mean_Xeo, idsXei=idsXei, idsXec=idsXec,
249263
idsXeiUXec=idsXeiUXec, idsXeo=idsXeo)
250-
#"""
264+
"""
251265

252266
return mean, mean_Xei, mean_Xec, mean_XeiUXec, mean_Xeo, \
253267
wc, wc_Xei, wc_Xec, wc_XeiUXec, wc_Xeo

seismic/xcorqc/utils.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ def __init__(self, asdf_file_name, netsta_list='*'):
2020
self._earth_radius = 6371 # km
2121

2222
self.fds = FederatedASDFDataSet(asdf_file_name)
23+
self.nslc_coverage = self.fds.get_nslc_coverage()
24+
2325
# Gather station metadata
2426
netsta_list_subset = set(netsta_list.split(' ')) if netsta_list != '*' else netsta_list
2527
self.netsta_list = []
@@ -179,6 +181,7 @@ def read_subset_stacker_config()->dict:
179181
"EMAG_MAX",
180182
"AZ_TOL"]
181183
fn = os.path.join(os.getcwd(), 'subset_stack.conf')
184+
d = {}
182185
try:
183186
d = read_key_value_pairs(fn, keys, strict=True)
184187
for k in keys[1:]:
@@ -400,7 +403,7 @@ def from_nc(cls, nc_file):
400403
"""
401404
try:
402405
ds = ncDataset(nc_file)
403-
xcorr = np.array(ds.variables['xcorr'])
406+
xcorr = np.array(ds.variables['X'])
404407
shp = xcorr.shape
405408
ncols = 0
406409

0 commit comments

Comments
 (0)