diff --git a/analysis/scripts/config.sh b/analysis/scripts/config.sh new file mode 100644 index 0000000000..9c92d293e8 --- /dev/null +++ b/analysis/scripts/config.sh @@ -0,0 +1,12 @@ +xml=$1 +output=$2 +start=$4 +end=$5 +cutset=$6 +mode=$7 + +export ALIBUILD_WORK_DIR=/afs/cern.ch/user/s/schuetha/work/public/data_work_flow/sw +source /cvmfs/sndlhc.cern.ch/SNDLHC-2025/Jan30/setUp.sh +eval `alienv -a slc9_x86-64 load --no-refresh sndsw/master-local1` +echo "Finished setting up SNDSW env" +export EOSSHIP=root://eosuser.cern.ch \ No newline at end of file diff --git a/analysis/scripts/count_the_run.py b/analysis/scripts/count_the_run.py new file mode 100644 index 0000000000..7b01f945e5 --- /dev/null +++ b/analysis/scripts/count_the_run.py @@ -0,0 +1,72 @@ +import argparse +import xml.etree.ElementTree as ET + +def count_runs(xml_file, min_events, outfile): + tree = ET.parse(xml_file) + root = tree.getroot() + + runs_selected = [] + n = 0 + + # Find all anywhere under the root + for run in root.findall(".//run"): + run_number = run.findtext("run_number") + n_events_text = run.findtext("n_events", "0") + + # Skip if missing run_number + if run_number is None: + continue + + try: + n_events = int(n_events_text) + + except ValueError: + # Bad number in XML – skip this run + continue + + n += 1 + + if n_events >= min_events: + runs_selected.append(run_number) + + floor = n // min_events + mod = n % min_events + + with open(outfile, "w") as f: + for rn in range(1, floor + 1 if mod == 0 else floor + 2): + if rn == 1: + f.write(f"{rn}\t{rn*min_events-1}\n") + + if (rn)*min_events - 1 >= n and mod != 0: + f.write(f"{(rn - 1)*min_events}\t{n}\n") + break + + elif rn > 1 and rn <= floor + 1 and mod != 0: + f.write(f"{(rn-1)*min_events}\t{rn*min_events-1}\n") + + elif rn > 1 and rn <= floor + 1 and mod == 0: + f.write(f"{(rn-1)*min_events}\t{rn*min_events}\n") + + return len(runs_selected) + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Select runs from an XML runlist based on minimum number of events." + ) + parser.add_argument( + "-xml", "--xml_file", required=True, + help="Path to the XML runlist file." + ) + parser.add_argument( + "-n", "--min-events", dest="min_events", type=int, default=0, + help="number of event per job." + ) + parser.add_argument( + "-o", "--output", default="se.txt", + help="Output text file with selected run_numbers (default: se.txt)." + ) + + args = parser.parse_args() + + n_selected = count_runs(args.xml_file, args.min_events, args.output) + print(f"Total runs selected: {n_selected}") \ No newline at end of file diff --git a/analysis/scripts/run_neutrino_filter_xml.sh b/analysis/scripts/run_neutrino_filter_xml.sh new file mode 100755 index 0000000000..1846b154b7 --- /dev/null +++ b/analysis/scripts/run_neutrino_filter_xml.sh @@ -0,0 +1,85 @@ +#!/bin/bash +set -o errexit -o pipefail -o noclobber +source $3/config.sh "$@" +set -o nounset + +# Iterate through every automatically +echo "Reading the XML file: $xml" +for run in $(xmllint --xpath 'count(/runlist/runs/run)' "${xml}"); do :; done # pre-check count + +mode_list=(STAGE1 RECO STAGE2) + +if [[ ! ${mode_list[@]} =~ $mode ]] +then + echo Mode $mode not available. It must be one of "${mode_list[*]}" + echo Exitting. + exit +fi + +for i in $(seq $start $end) +do + run_number=$(xmllint --xpath "string(/runlist/runs/run[$i]/run_number)" "${xml}") + start_year=$(xmllint --xpath "string(/runlist/runs/run[$i]/start)" "${xml}") + year=${start_year:0:4} + end=$(xmllint --xpath "string(/runlist/runs/run[$i]/end)" "${xml}") + n_events=$(xmllint --xpath "string(/runlist/runs/run[$i]/n_events)" "${xml}") + n_files=$(xmllint --xpath "string(/runlist/runs/run[$i]/n_files)" "${xml}") + n_files=$((n_files-1)) # to make the seq 0 based + path=$(xmllint --xpath "string(/runlist/runs/run[$i]/path)" "${xml}") + + if [ "$mode" == "STAGE1" ] + then + for j in $(seq 0 "$n_files") + do + if [ -f "${output}/${run_number}/${mode}/filtered_MC_00${run_number}_${j}.root" ] + then + echo "File ${output}/${run_number}/${mode}/filtered_MC_00${run_number}_${j}.root exists. Skipping." + continue + fi + js=$(printf "%03d" $((j))) + neutrinoFilterGoldenSample ${path}/sndsw_raw-0${js}.root filtered_MC_00${run_number}_${j}.root ${cutset} + mkdir -p ${output}/${run_number}/${mode}/ + xrdcp -f ./filtered_MC_00${run_number}_${j}.root ${output}/${run_number}/${mode}/ + rm -f ./filtered_MC_00${run_number}_${j}.root + done + + elif [ "$mode" == "RECO" ] + then + geo_file=$( root -l -b -q -e '.L sndGeometryGetter.cxx+' -e "std::string csv=std::string(gSystem->Getenv(\"SNDSW_ROOT\"))+\"/analysis/tools/geo_paths.csv\"; \ + std::cout << snd::analysis_tools::GetGeoPath(csv, ${run_number}) << std::endl;" | tail -n 1 ) + for j in $(seq 0 "$n_files") + do + if [ -f "${output}/${run_number}/${mode}/filtered_MC_00${run_number}_${j}__muonReco.root" ] + then + echo "File ${output}/${run_number}/${mode}/filtered_MC_00${run_number}_${j}.root exists. Skipping." + continue + fi + python3 $SNDSW_ROOT/shipLHC/run_muonRecoSND.py -f ${output}/${run_number}/STAGE1/filtered_MC_00${run_number}_${j}.root -g ${geo_file} -c passing_mu_DS -sc 1 -s ./ -hf linearSlopeIntercept -o + mkdir -p ${output}/${run_number}/${mode}/ + xrdcp -f ./filtered_MC_00${run_number}_${j}__muonReco.root ${output}/${run_number}/${mode}/ + rm -f ./filtered_MC_00${run_number}_${j}__muonReco.root + done + + elif [ "$mode" == "STAGE2" ] + then + geo_file=$( root -l -b -q -e '.L sndGeometryGetter.cxx+' -e "std::string csv=std::string(gSystem->Getenv(\"SNDSW_ROOT\"))+\"/analysis/tools/geo_paths.csv\"; \ + std::cout << snd::analysis_tools::GetGeoPath(csv, ${run_number}) << std::endl;" | tail -n 1 ) + for j in $(seq 0 "$n_files") + do + if [ -f "${output}/${run_number}/${mode}/filtered_MC_00${run_number}_${j}_stage2_noscifi2.root" ] + then + echo "File "${output}/${run_number}/${mode}/filtered_MC_00${run_number}_${j}_stage2_noscifi2.root" exists. Skipping." + continue + fi + python3 ${SNDSW_ROOT}/analysis/neutrinoFilterGoldenSample_stage2.py -f ${output}/${run_number}/STAGE1/filtered_MC_00${run_number}_${j}.root -t ${output}/${run_number}/RECO/filtered_MC_00${run_number}_${j}__muonReco.root -o filtered_MC_00${run_number}_${j}_stage2_noscifi2.root -g ${geo_file}; + mkdir -p ${output}/${run_number}/${mode}/ + xrdcp -f ./filtered_MC_00${run_number}_${j}_stage2_noscifi2.root ${output}/${run_number}/${mode}/ + rm -f ./filtered_MC_00${run_number}_${j}_stage2_noscifi2.root + done + + else + echo "Mode $mode not recognized. Exitting." + exit + fi + echo "Finished processing run number $run_number" +done \ No newline at end of file diff --git a/analysis/scripts/run_neutrino_xml_all_stage.sh b/analysis/scripts/run_neutrino_xml_all_stage.sh new file mode 100755 index 0000000000..23e82d8260 --- /dev/null +++ b/analysis/scripts/run_neutrino_xml_all_stage.sh @@ -0,0 +1,40 @@ +#!/bin/bash +set -o errexit -o pipefail -o noclobber +source $3/config.sh "$@" +set -o nounset + +# Iterate through every automatically +echo "Reading the XML file: $xml" +for run in $(xmllint --xpath 'count(/runlist/runs/run)' "${xml}"); do :; done # pre-check count + + +for i in $(seq $start $end) +do + run_number=$(xmllint --xpath "string(/runlist/runs/run[$i]/run_number)" "${xml}") + start_year=$(xmllint --xpath "string(/runlist/runs/run[$i]/start)" "${xml}") + year=${start_year:0:4} + end=$(xmllint --xpath "string(/runlist/runs/run[$i]/end)" "${xml}") + n_events=$(xmllint --xpath "string(/runlist/runs/run[$i]/n_events)" "${xml}") + n_files=$(xmllint --xpath "string(/runlist/runs/run[$i]/n_files)" "${xml}") + n_files=$((n_files-1)) # to make the seq 0 based + path=$(xmllint --xpath "string(/runlist/runs/run[$i]/path)" "${xml}") + geo_file=$( root -l -b -q -e '.L sndGeometryGetter.cxx+' -e "std::string csv=std::string(gSystem->Getenv(\"SNDSW_ROOT\"))+\"/analysis/tools/geo_paths.csv\"; \ + std::cout << snd::analysis_tools::GetGeoPath(csv, ${run_number}) << std::endl;" | tail -n 1 ) + + for j in $(seq 0 "$n_files") + do + js=$(printf "%03d" $((j))) + neutrinoFilterGoldenSample ${path}/sndsw_raw-0${js}.root filtered_MC_00${run_number}_${j}.root ${cutset} + python3 $SNDSW_ROOT/shipLHC/run_muonRecoSND.py -f filtered_MC_00${run_number}_${j}.root -g ${geo_file} -c passing_mu_DS -sc 1 -s ./ -hf linearSlopeIntercept -o + python3 ${SNDSW_ROOT}/analysis/neutrinoFilterGoldenSample_stage2.py -f ${run_number}/filtered_MC_00${run_number}_${j}.root -t filtered_MC_00${run_number}_${j}__muonReco.root -o filtered_MC_00${run_number}_${j}_stage2_noscifi2.root -g ${geo_file}; + done + + mkdir -p ${output}/${run_number}/ + xrdcp -f ./filtered_MC_00${run_number}_*.root ${output}/${run_number}/ + xrdcp -f ./filtered_MC_00${run_number}_*__muonReco.root ${output}/${run_number}/ + xrdcp -f ./filtered_MC_00${run_number}_*_stage2_noscifi2.root ${output}/${run_number}/ + rm -f ./filtered_MC_00${run_number}_*.root + rm -f ./filtered_MC_00${run_number}_*__muonReco.root + rm -f ./filtered_MC_00${run_number}_*_stage2_noscifi2.root + echo "Finished processing run number $run_number" +done \ No newline at end of file