Skip to content

Commit cb2e703

Browse files
pannaraleprayush
authored andcommitted
Handling vetoes and timeslides in pygrb mini followups (gwastro#4941)
* Use veto and segments files in pycbc_pygrb_minifollowups; enforce using the correct timeslide when following triggers/injections * Added note about pycbc_pygrb_minifollowups in pycbc_pygrb_page_tables * Updated construct_trials and generalized extract_basic_trig_properties to extract_trig_properties * Edited var name to something more meaningful * More accurate docstring * Aesthetics and missing import * Simplified pycbc_pygrb_minifollowups and fixed a sign! * Removed empty line * f-string and bare except
1 parent 17c3dd8 commit cb2e703

File tree

3 files changed

+115
-66
lines changed

3 files changed

+115
-66
lines changed

Diff for: bin/pygrb/pycbc_pygrb_minifollowups

+75-26
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,23 @@ def add_wiki_row(outfile, cols):
5656

5757

5858
def make_timeseries_plot(workflow, trig_file, snr_type, central_time,
59-
shift_time, out_dir, ifo=None, tags=None):
59+
out_dir, ifo=None, seg_files=None,
60+
veto_file=None, tags=None):
6061
"""Adds a node for a timeseries of PyGRB results to the workflow"""
6162

6263
tags = [] if tags is None else tags
6364

65+
# Ensure that zero-lag data is used in these follow-up plots and store
66+
# the slide-id option originally provided
67+
orig_slide_id = None
68+
if workflow.cp.has_option('pygrb_plot_snr_timeseries','slide-id'):
69+
orig_slide_id = workflow.cp.get('pygrb_plot_snr_timeseries','slide-id')
70+
workflow.cp.add_options_to_section(
71+
'pygrb_plot_snr_timeseries',
72+
[('slide-id', '0')],
73+
True
74+
)
75+
6476
# Initialize job node with its tags
6577
grb_name = workflow.cp.get('workflow', 'trigger-name')
6678
extra_tags = ['GRB'+grb_name]
@@ -71,28 +83,42 @@ def make_timeseries_plot(workflow, trig_file, snr_type, central_time,
7183
ifos=workflow.ifos, out_dir=out_dir,
7284
tags=tags+extra_tags).create_node()
7385
node.add_input_opt('--trig-file', trig_file)
86+
# Include the onsource trial if this is a follow up on the onsource
87+
if 'loudest_onsource_event' in tags:
88+
node.add_opt('--onsource')
89+
# Pass the segments files and veto file
90+
if seg_files:
91+
node.add_input_list_opt('--seg-files', seg_files)
92+
if veto_file:
93+
node.add_input_opt('--veto-file', veto_file)
7494
node.new_output_file_opt(workflow.analysis_time, '.png',
7595
'--output-file', tags=extra_tags)
7696
# Quantity to be displayed on the y-axis of the plot
7797
node.add_opt('--y-variable', snr_type)
7898
if ifo is not None:
7999
node.add_opt('--ifo', ifo)
80-
reset_central_time = shift_time - central_time
81-
# Horizontal axis range the = prevents errors with negative times
82-
x_lims = str(-5.+reset_central_time)+','+str(reset_central_time+5.)
83-
node.add_opt('--x-lims='+x_lims)
100+
node.add_opt('--x-lims=-5.,5.')
84101
# Plot title
85102
if ifo is not None:
86103
title_str = f"'{ifo} SNR at {central_time:.3f} (s)'"
87104
else:
88-
title_str = "'%s SNR at %.3f (s)'" % (snr_type.capitalize(),
89-
central_time)
105+
title_str = f"'{snr_type.capitalize()} SNR at {central_time:.3f} (s)'"
90106
node.add_opt('--trigger-time', central_time)
91107
node.add_opt('--plot-title', title_str)
92108

93109
# Add job node to workflow
94110
workflow += node
95111

112+
# Revert the config parser back to how it was given
113+
if orig_slide_id:
114+
workflow.cp.add_options_to_section(
115+
'pygrb_plot_snr_timeseries',
116+
[('slide-id', orig_slide_id)],
117+
True
118+
)
119+
else:
120+
workflow.cp.remove_option('pygrb_plot_snr_timeseries','slide-id')
121+
96122
return node.output_files
97123

98124

@@ -107,6 +133,11 @@ parser.add_argument('--followups-file',
107133
help="HDF file with the triggers/injections to follow up")
108134
parser.add_argument('--wiki-file',
109135
help="Name of file to save wiki-formatted table in")
136+
parser.add_argument("-a", "--seg-files", nargs="+", action="store",
137+
default=[], help="The location of the buffer, " +
138+
"onsource and offsource txt segment files.")
139+
parser.add_argument("-V", "--veto-file", action="store",
140+
help="The location of the xml veto file.")
110141
wf.add_workflow_command_line_group(parser)
111142
wf.add_workflow_settings_cli(parser, include_subdax_opts=True)
112143
ppu.pygrb_add_bestnr_cut_opt(parser)
@@ -142,50 +173,68 @@ trig_file = resolve_url_to_file(os.path.abspath(args.trig_file))
142173
ifos = ppu.extract_ifos(os.path.abspath(args.trig_file))
143174
num_ifos = len(ifos)
144175

176+
# File instance of the veto file
177+
veto_file = args.veto_file
178+
start_rundir = os.getcwd()
179+
if veto_file:
180+
veto_file = os.path.join(start_rundir, args.veto_file)
181+
veto_file = wf.resolve_url_to_file(veto_file)
182+
183+
# Convert the segments files to a FileList
184+
seg_files = wf.FileList([
185+
wf.resolve_url_to_file(os.path.join(start_rundir, f))
186+
for f in args.seg_files
187+
])
188+
145189
# (Loudest) off/on-source events are on time-slid data so the
146190
# try will succeed, as it finds the time shift columns.
147191
is_injection_followup = True
148192
try:
149193
time_shift = fp[ifos[0]+' time shift (s)'][0]
150194
is_injection_followup = False
151-
except:
195+
except KeyError:
152196
pass
153197

154-
155198
# Loop over triggers/injections to be followed up
156199
for num_event in range(num_events):
157200
files = FileList([])
158-
logging.info('Processing event: %s', num_event)
201+
logging.info('Processing event: %s', num_event+1)
159202
gps_time = fp['GPS time'][num_event]
160203
gps_time = gps_time.astype(float)
204+
tags = args.tags + [str(num_event+1)]
161205
if wiki_file:
162206
row = []
163207
for key in fp.keys():
164208
row.append(fp[key][num_event])
165209
add_wiki_row(wiki_file, row)
166-
# Handle off/on-source loudest triggers follow-up (which are on slid data)
167-
if not is_injection_followup:
168-
for ifo in ifos:
169-
time_shift = fp[ifo+' time shift (s)'][num_event]
170-
ifo_time = gps_time - time_shift
171-
files += make_timeseries_plot(workflow, trig_file,
172-
'single', gps_time, ifo_time,
173-
args.output_dir, ifo=ifo,
174-
tags=args.tags + [str(num_event)])
175-
files += mini.make_qscan_plot(workflow, ifo, ifo_time,
176-
args.output_dir,
177-
tags=args.tags + [str(num_event)])
178210
# Handle injections (which are on unslid data)
179-
else:
211+
if is_injection_followup:
180212
for snr_type in ['reweighted', 'coherent']:
181213
files += make_timeseries_plot(workflow, trig_file,
182-
snr_type, gps_time, gps_time,
214+
snr_type, gps_time,
183215
args.output_dir, ifo=None,
184-
tags=args.tags + [str(num_event)])
216+
seg_files=seg_files,
217+
veto_file=veto_file,
218+
tags=tags)
185219
for ifo in ifos:
186220
files += mini.make_qscan_plot(workflow, ifo, gps_time,
187221
args.output_dir,
188-
tags=args.tags + [str(num_event)])
222+
tags=tags)
223+
# Handle off/on-source loudest triggers follow-up (which may be on slid
224+
# data in the case of the off-source)
225+
else:
226+
for i_ifo, ifo in enumerate(ifos):
227+
time_shift = fp[ifo+' time shift (s)'][num_event]
228+
ifo_time = gps_time + time_shift
229+
files += make_timeseries_plot(workflow, trig_file,
230+
'single', ifo_time,
231+
args.output_dir, ifo=ifo,
232+
seg_files=seg_files,
233+
veto_file=veto_file,
234+
tags=tags)
235+
files += mini.make_qscan_plot(workflow, ifo, ifo_time,
236+
args.output_dir,
237+
tags=tags)
189238

190239
layouts += list(layout.grouper(files, 2))
191240

Diff for: bin/pygrb/pycbc_pygrb_page_tables

+1-1
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ if lofft_outfile:
395395
d.append(bestnr)
396396
td.append(d)
397397

398-
# th: table header
398+
# th: table header [pycbc_pygrb_minifollowups looks for 'Slide Num']
399399
th = ['Trial', 'Slide Num', 'p-value', 'GPS time',
400400
'Rec. m1', 'Rec. m2', 'Rec. Mc', 'Rec. spin1z', 'Rec. spin2z',
401401
'Rec. RA', 'Rec. Dec', 'SNR', 'Chi^2', 'Null SNR']

Diff for: pycbc/results/pygrb_postprocessing_utils.py

+39-39
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
Module to generate PyGRB figures: scatter plots and timeseries.
2424
"""
2525

26-
import os
2726
import logging
2827
import argparse
2928
import copy
@@ -33,6 +32,7 @@
3332
from scipy import stats
3433
import ligo.segments as segments
3534
from pycbc.events.coherent import reweightedsnr_cut
35+
from pycbc.events import veto
3636
from pycbc import add_common_pycbc_options
3737
from pycbc.io.hdf import HFile
3838

@@ -242,7 +242,7 @@ def _extract_vetoes(veto_file, ifos, offsource):
242242
vetoes[ifo] = segments.segmentlist([offsource]) - clean_segs[ifo]
243243
vetoes.coalesce()
244244
for ifo in ifos:
245-
for v in vetoes[ifo]:
245+
for v in vetoes[ifo]:
246246
v_span = v[1] - v[0]
247247
logging.info("%ds of data vetoed at GPS time %d",
248248
v_span, v[0])
@@ -269,7 +269,7 @@ def _slide_vetoes(vetoes, slide_dict_or_list, slide_id, ifos):
269269

270270

271271
# =============================================================================
272-
# Recursive function to reach all datasets in an HDF file handle
272+
# Recursive function to reach all datasets in an HDF file handle
273273
# =============================================================================
274274
def _dataset_iterator(g, prefix=''):
275275
"""Reach all datasets in an HDF file handle"""
@@ -368,9 +368,6 @@ def load_data(input_file, ifos, rw_snr_threshold=None, data_tag=None,
368368
return trigs_dict
369369

370370

371-
372-
373-
374371
# =============================================================================
375372
# Detector utils:
376373
# * Function to calculate the antenna response F+^2 + Fx^2
@@ -460,38 +457,35 @@ def sort_trigs(trial_dict, trigs, slide_dict, seg_dict):
460457
# =============================================================================
461458
# Extract trigger properties and store them as dictionaries
462459
# =============================================================================
463-
def extract_basic_trig_properties(trial_dict, trigs, slide_dict, seg_dict,
464-
opts):
465-
"""Extract and store as dictionaries time, SNR, and BestNR of
466-
time-slid triggers"""
460+
def extract_trig_properties(trial_dict, trigs, slide_dict, seg_dict, keys):
461+
"""Extract and store as dictionaries specific keys of time-slid
462+
triggers (trigs) compatibly with the trials dictionary (trial_dict)"""
467463

468464
# Sort the triggers into each slide
469465
sorted_trigs = sort_trigs(trial_dict, trigs, slide_dict, seg_dict)
470-
logger.info("Triggers sorted.")
466+
n_surviving_trigs = sum([len(i) for i in sorted_trigs.values()])
467+
msg = f"{n_surviving_trigs} triggers found within the trials dictionary "
468+
msg += "and sorted."
469+
logging.info(msg)
470+
471+
found_trigs = {}
472+
for key in keys:
473+
found_trigs[key] = {}
471474

472-
# Build the 3 dictionaries
473-
trig_time = {}
474-
trig_snr = {}
475-
trig_bestnr = {}
476475
for slide_id in slide_dict:
477476
slide_trigs = sorted_trigs[slide_id]
478477
indices = numpy.nonzero(
479478
numpy.isin(trigs['network/event_id'], slide_trigs))[0]
480-
if slide_trigs:
481-
trig_time[slide_id] = trigs['network/end_time_gc'][
482-
indices]
483-
trig_snr[slide_id] = trigs['network/coherent_snr'][
484-
indices]
485-
else:
486-
trig_time[slide_id] = numpy.asarray([])
487-
trig_snr[slide_id] = numpy.asarray([])
488-
trig_bestnr[slide_id] = reweightedsnr_cut(
489-
trigs['network/reweighted_snr'][indices],
490-
opts.newsnr_threshold)
479+
for key in keys:
480+
if slide_trigs:
481+
found_trigs[key][slide_id] = get_coinc_snr(trigs)[indices] \
482+
if key == 'network/coincident_snr' else trigs[key][indices]
483+
else:
484+
found_trigs[key][slide_id] = numpy.asarray([])
491485

492-
logger.info("Time, SNR, and BestNR of triggers extracted.")
486+
logging.info("Triggers information extracted.")
493487

494-
return trig_time, trig_snr, trig_bestnr
488+
return found_trigs
495489

496490

497491
# =============================================================================
@@ -582,9 +576,11 @@ def load_segment_dict(hdf_file_path):
582576
# =============================================================================
583577
# Construct the trials from the timeslides, segments, and vetoes
584578
# =============================================================================
585-
def construct_trials(seg_files, seg_dict, ifos, slide_dict, vetoes):
586-
"""Constructs trials from triggers, timeslides, segments and vetoes"""
579+
def construct_trials(seg_files, seg_dict, ifos, slide_dict, veto_file,
580+
hide_onsource=True):
581+
"""Constructs trials from segments, timeslides, and vetoes"""
587582

583+
logging.info("Constructing trials.")
588584
trial_dict = {}
589585

590586
# Get segments
@@ -593,19 +589,23 @@ def construct_trials(seg_files, seg_dict, ifos, slide_dict, vetoes):
593589
# Separate segments
594590
trial_time = abs(segs['on'])
595591

592+
# Determine the veto segments
593+
vetoes = _extract_vetoes(veto_file, ifos, segs['off'])
594+
595+
# Slide vetoes over trials: this can only *reduce* the analysis time
596596
for slide_id in slide_dict:
597-
# These can only *reduce* the analysis time
598597
curr_seg_list = seg_dict[slide_id]
599598

600-
# Construct the buffer segment list
599+
# Fill in the buffer segment list if the onsource data must be hidden
601600
seg_buffer = segments.segmentlist()
602-
for ifo in ifos:
603-
slide_offset = slide_dict[slide_id][ifo]
604-
seg_buffer.append(segments.segment(segs['buffer'][0] -
605-
slide_offset,
606-
segs['buffer'][1] -
607-
slide_offset))
608-
seg_buffer.coalesce()
601+
if hide_onsource:
602+
for ifo in ifos:
603+
slide_offset = slide_dict[slide_id][ifo]
604+
seg_buffer.append(segments.segment(segs['buffer'][0] -
605+
slide_offset,
606+
segs['buffer'][1] -
607+
slide_offset))
608+
seg_buffer.coalesce()
609609

610610
# Construct the ifo-indexed dictionary of slid veteoes
611611
slid_vetoes = _slide_vetoes(vetoes, slide_dict, slide_id, ifos)

0 commit comments

Comments
 (0)