From 161bcea4a608a370fb06bd6e6d81b77ce0d03638 Mon Sep 17 00:00:00 2001 From: Francesco Pannarale Date: Tue, 19 Nov 2024 09:47:30 -0700 Subject: [PATCH] Handling vetoes and timeslides in pygrb mini followups (#4941) * Use veto and segments files in pycbc_pygrb_minifollowups; enforce using the correct timeslide when following triggers/injections * Added note about pycbc_pygrb_minifollowups in pycbc_pygrb_page_tables * Updated construct_trials and generalized extract_basic_trig_properties to extract_trig_properties * Edited var name to something more meaningful * More accurate docstring * Aesthetics and missing import * Simplified pycbc_pygrb_minifollowups and fixed a sign! * Removed empty line * f-string and bare except --- bin/pygrb/pycbc_pygrb_minifollowups | 101 +++++++++++++++----- bin/pygrb/pycbc_pygrb_page_tables | 2 +- pycbc/results/pygrb_postprocessing_utils.py | 78 +++++++-------- 3 files changed, 115 insertions(+), 66 deletions(-) diff --git a/bin/pygrb/pycbc_pygrb_minifollowups b/bin/pygrb/pycbc_pygrb_minifollowups index 78c2b4c97e5..0e172f2cb79 100644 --- a/bin/pygrb/pycbc_pygrb_minifollowups +++ b/bin/pygrb/pycbc_pygrb_minifollowups @@ -56,11 +56,23 @@ def add_wiki_row(outfile, cols): def make_timeseries_plot(workflow, trig_file, snr_type, central_time, - shift_time, out_dir, ifo=None, tags=None): + out_dir, ifo=None, seg_files=None, + veto_file=None, tags=None): """Adds a node for a timeseries of PyGRB results to the workflow""" tags = [] if tags is None else tags + # Ensure that zero-lag data is used in these follow-up plots and store + # the slide-id option originally provided + orig_slide_id = None + if workflow.cp.has_option('pygrb_plot_snr_timeseries','slide-id'): + orig_slide_id = workflow.cp.get('pygrb_plot_snr_timeseries','slide-id') + workflow.cp.add_options_to_section( + 'pygrb_plot_snr_timeseries', + [('slide-id', '0')], + True + ) + # Initialize job node with its tags grb_name = workflow.cp.get('workflow', 'trigger-name') extra_tags = ['GRB'+grb_name] @@ -71,28 +83,42 @@ def make_timeseries_plot(workflow, trig_file, snr_type, central_time, ifos=workflow.ifos, out_dir=out_dir, tags=tags+extra_tags).create_node() node.add_input_opt('--trig-file', trig_file) + # Include the onsource trial if this is a follow up on the onsource + if 'loudest_onsource_event' in tags: + node.add_opt('--onsource') + # Pass the segments files and veto file + if seg_files: + node.add_input_list_opt('--seg-files', seg_files) + if veto_file: + node.add_input_opt('--veto-file', veto_file) node.new_output_file_opt(workflow.analysis_time, '.png', '--output-file', tags=extra_tags) # Quantity to be displayed on the y-axis of the plot node.add_opt('--y-variable', snr_type) if ifo is not None: node.add_opt('--ifo', ifo) - reset_central_time = shift_time - central_time - # Horizontal axis range the = prevents errors with negative times - x_lims = str(-5.+reset_central_time)+','+str(reset_central_time+5.) - node.add_opt('--x-lims='+x_lims) + node.add_opt('--x-lims=-5.,5.') # Plot title if ifo is not None: title_str = f"'{ifo} SNR at {central_time:.3f} (s)'" else: - title_str = "'%s SNR at %.3f (s)'" % (snr_type.capitalize(), - central_time) + title_str = f"'{snr_type.capitalize()} SNR at {central_time:.3f} (s)'" node.add_opt('--trigger-time', central_time) node.add_opt('--plot-title', title_str) # Add job node to workflow workflow += node + # Revert the config parser back to how it was given + if orig_slide_id: + workflow.cp.add_options_to_section( + 'pygrb_plot_snr_timeseries', + [('slide-id', orig_slide_id)], + True + ) + else: + workflow.cp.remove_option('pygrb_plot_snr_timeseries','slide-id') + return node.output_files @@ -107,6 +133,11 @@ parser.add_argument('--followups-file', help="HDF file with the triggers/injections to follow up") parser.add_argument('--wiki-file', help="Name of file to save wiki-formatted table in") +parser.add_argument("-a", "--seg-files", nargs="+", action="store", + default=[], help="The location of the buffer, " + + "onsource and offsource txt segment files.") +parser.add_argument("-V", "--veto-file", action="store", + help="The location of the xml veto file.") wf.add_workflow_command_line_group(parser) wf.add_workflow_settings_cli(parser, include_subdax_opts=True) ppu.pygrb_add_bestnr_cut_opt(parser) @@ -142,50 +173,68 @@ trig_file = resolve_url_to_file(os.path.abspath(args.trig_file)) ifos = ppu.extract_ifos(os.path.abspath(args.trig_file)) num_ifos = len(ifos) +# File instance of the veto file +veto_file = args.veto_file +start_rundir = os.getcwd() +if veto_file: + veto_file = os.path.join(start_rundir, args.veto_file) + veto_file = wf.resolve_url_to_file(veto_file) + +# Convert the segments files to a FileList +seg_files = wf.FileList([ + wf.resolve_url_to_file(os.path.join(start_rundir, f)) + for f in args.seg_files +]) + # (Loudest) off/on-source events are on time-slid data so the # try will succeed, as it finds the time shift columns. is_injection_followup = True try: time_shift = fp[ifos[0]+' time shift (s)'][0] is_injection_followup = False -except: +except KeyError: pass - # Loop over triggers/injections to be followed up for num_event in range(num_events): files = FileList([]) - logging.info('Processing event: %s', num_event) + logging.info('Processing event: %s', num_event+1) gps_time = fp['GPS time'][num_event] gps_time = gps_time.astype(float) + tags = args.tags + [str(num_event+1)] if wiki_file: row = [] for key in fp.keys(): row.append(fp[key][num_event]) add_wiki_row(wiki_file, row) - # Handle off/on-source loudest triggers follow-up (which are on slid data) - if not is_injection_followup: - for ifo in ifos: - time_shift = fp[ifo+' time shift (s)'][num_event] - ifo_time = gps_time - time_shift - files += make_timeseries_plot(workflow, trig_file, - 'single', gps_time, ifo_time, - args.output_dir, ifo=ifo, - tags=args.tags + [str(num_event)]) - files += mini.make_qscan_plot(workflow, ifo, ifo_time, - args.output_dir, - tags=args.tags + [str(num_event)]) # Handle injections (which are on unslid data) - else: + if is_injection_followup: for snr_type in ['reweighted', 'coherent']: files += make_timeseries_plot(workflow, trig_file, - snr_type, gps_time, gps_time, + snr_type, gps_time, args.output_dir, ifo=None, - tags=args.tags + [str(num_event)]) + seg_files=seg_files, + veto_file=veto_file, + tags=tags) for ifo in ifos: files += mini.make_qscan_plot(workflow, ifo, gps_time, args.output_dir, - tags=args.tags + [str(num_event)]) + tags=tags) + # Handle off/on-source loudest triggers follow-up (which may be on slid + # data in the case of the off-source) + else: + for i_ifo, ifo in enumerate(ifos): + time_shift = fp[ifo+' time shift (s)'][num_event] + ifo_time = gps_time + time_shift + files += make_timeseries_plot(workflow, trig_file, + 'single', ifo_time, + args.output_dir, ifo=ifo, + seg_files=seg_files, + veto_file=veto_file, + tags=tags) + files += mini.make_qscan_plot(workflow, ifo, ifo_time, + args.output_dir, + tags=tags) layouts += list(layout.grouper(files, 2)) diff --git a/bin/pygrb/pycbc_pygrb_page_tables b/bin/pygrb/pycbc_pygrb_page_tables index fd7ed4ed9bc..6d53fc14efa 100755 --- a/bin/pygrb/pycbc_pygrb_page_tables +++ b/bin/pygrb/pycbc_pygrb_page_tables @@ -395,7 +395,7 @@ if lofft_outfile: d.append(bestnr) td.append(d) - # th: table header + # th: table header [pycbc_pygrb_minifollowups looks for 'Slide Num'] th = ['Trial', 'Slide Num', 'p-value', 'GPS time', 'Rec. m1', 'Rec. m2', 'Rec. Mc', 'Rec. spin1z', 'Rec. spin2z', 'Rec. RA', 'Rec. Dec', 'SNR', 'Chi^2', 'Null SNR'] diff --git a/pycbc/results/pygrb_postprocessing_utils.py b/pycbc/results/pygrb_postprocessing_utils.py index b29bd1d1ddd..a6da15f26b4 100644 --- a/pycbc/results/pygrb_postprocessing_utils.py +++ b/pycbc/results/pygrb_postprocessing_utils.py @@ -23,7 +23,6 @@ Module to generate PyGRB figures: scatter plots and timeseries. """ -import os import logging import argparse import copy @@ -33,6 +32,7 @@ from scipy import stats import ligo.segments as segments from pycbc.events.coherent import reweightedsnr_cut +from pycbc.events import veto from pycbc import add_common_pycbc_options from pycbc.io.hdf import HFile @@ -242,7 +242,7 @@ def _extract_vetoes(veto_file, ifos, offsource): vetoes[ifo] = segments.segmentlist([offsource]) - clean_segs[ifo] vetoes.coalesce() for ifo in ifos: - for v in vetoes[ifo]: + for v in vetoes[ifo]: v_span = v[1] - v[0] logging.info("%ds of data vetoed at GPS time %d", v_span, v[0]) @@ -269,7 +269,7 @@ def _slide_vetoes(vetoes, slide_dict_or_list, slide_id, ifos): # ============================================================================= -# Recursive function to reach all datasets in an HDF file handle +# Recursive function to reach all datasets in an HDF file handle # ============================================================================= def _dataset_iterator(g, prefix=''): """Reach all datasets in an HDF file handle""" @@ -368,9 +368,6 @@ def load_data(input_file, ifos, rw_snr_threshold=None, data_tag=None, return trigs_dict - - - # ============================================================================= # Detector utils: # * Function to calculate the antenna response F+^2 + Fx^2 @@ -460,38 +457,35 @@ def sort_trigs(trial_dict, trigs, slide_dict, seg_dict): # ============================================================================= # Extract trigger properties and store them as dictionaries # ============================================================================= -def extract_basic_trig_properties(trial_dict, trigs, slide_dict, seg_dict, - opts): - """Extract and store as dictionaries time, SNR, and BestNR of - time-slid triggers""" +def extract_trig_properties(trial_dict, trigs, slide_dict, seg_dict, keys): + """Extract and store as dictionaries specific keys of time-slid + triggers (trigs) compatibly with the trials dictionary (trial_dict)""" # Sort the triggers into each slide sorted_trigs = sort_trigs(trial_dict, trigs, slide_dict, seg_dict) - logger.info("Triggers sorted.") + n_surviving_trigs = sum([len(i) for i in sorted_trigs.values()]) + msg = f"{n_surviving_trigs} triggers found within the trials dictionary " + msg += "and sorted." + logging.info(msg) + + found_trigs = {} + for key in keys: + found_trigs[key] = {} - # Build the 3 dictionaries - trig_time = {} - trig_snr = {} - trig_bestnr = {} for slide_id in slide_dict: slide_trigs = sorted_trigs[slide_id] indices = numpy.nonzero( numpy.isin(trigs['network/event_id'], slide_trigs))[0] - if slide_trigs: - trig_time[slide_id] = trigs['network/end_time_gc'][ - indices] - trig_snr[slide_id] = trigs['network/coherent_snr'][ - indices] - else: - trig_time[slide_id] = numpy.asarray([]) - trig_snr[slide_id] = numpy.asarray([]) - trig_bestnr[slide_id] = reweightedsnr_cut( - trigs['network/reweighted_snr'][indices], - opts.newsnr_threshold) + for key in keys: + if slide_trigs: + found_trigs[key][slide_id] = get_coinc_snr(trigs)[indices] \ + if key == 'network/coincident_snr' else trigs[key][indices] + else: + found_trigs[key][slide_id] = numpy.asarray([]) - logger.info("Time, SNR, and BestNR of triggers extracted.") + logging.info("Triggers information extracted.") - return trig_time, trig_snr, trig_bestnr + return found_trigs # ============================================================================= @@ -582,9 +576,11 @@ def load_segment_dict(hdf_file_path): # ============================================================================= # Construct the trials from the timeslides, segments, and vetoes # ============================================================================= -def construct_trials(seg_files, seg_dict, ifos, slide_dict, vetoes): - """Constructs trials from triggers, timeslides, segments and vetoes""" +def construct_trials(seg_files, seg_dict, ifos, slide_dict, veto_file, + hide_onsource=True): + """Constructs trials from segments, timeslides, and vetoes""" + logging.info("Constructing trials.") trial_dict = {} # Get segments @@ -593,19 +589,23 @@ def construct_trials(seg_files, seg_dict, ifos, slide_dict, vetoes): # Separate segments trial_time = abs(segs['on']) + # Determine the veto segments + vetoes = _extract_vetoes(veto_file, ifos, segs['off']) + + # Slide vetoes over trials: this can only *reduce* the analysis time for slide_id in slide_dict: - # These can only *reduce* the analysis time curr_seg_list = seg_dict[slide_id] - # Construct the buffer segment list + # Fill in the buffer segment list if the onsource data must be hidden seg_buffer = segments.segmentlist() - for ifo in ifos: - slide_offset = slide_dict[slide_id][ifo] - seg_buffer.append(segments.segment(segs['buffer'][0] - - slide_offset, - segs['buffer'][1] - - slide_offset)) - seg_buffer.coalesce() + if hide_onsource: + for ifo in ifos: + slide_offset = slide_dict[slide_id][ifo] + seg_buffer.append(segments.segment(segs['buffer'][0] - + slide_offset, + segs['buffer'][1] - + slide_offset)) + seg_buffer.coalesce() # Construct the ifo-indexed dictionary of slid veteoes slid_vetoes = _slide_vetoes(vetoes, slide_dict, slide_id, ifos)