-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlumi_list_from_das.py
executable file
·119 lines (94 loc) · 4.99 KB
/
lumi_list_from_das.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python
"""
Script to create lumilist of lumisections for MC or data dataset
It is designed to only work on *one* sample (& its ext, if it exists),
not multiple:
e.g.:
OK: /QCD_Pt_300to470_TuneCP5_13TeV_pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic*/MINIAODSIM
Not OK: /QCD_Pt_*to*_TuneCP5_13TeV_pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic*/MINIAODSIM
Requires you to have a valid voms proxy, as it calls DAS.
"""
from __future__ import print_function
import sys,os
import argparse
import FWCore.PythonUtilities.LumiList as LumiList
from Utilities.General.cmssw_das_client import get_data
sys.path.append(os.environ["CMSSW_BASE"]+"/src/UHH2/scripts/crab")
from DasQuery import autocomplete_Datasets,check_voms
def get_mc_lumi_list(inputDataset="/QCD_Pt_300to470_TuneCP5_13TeV_pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic*/MINIAODSIM"):
"""Get the LumiList object(s) for dataset(s) matching `inputDataset`
inputDataset:
if a str, will ask DAS to autocomplate (can contain wildcards)
if a list/tuple/set[str], will iterate over each entry in the list, without asking DAS to autocomplete.
This is because the user might have cached the dataset names before calling this function, and we don't want to call DAS more than necessary.
returns: a dict with an entry for each dataset user inputs with das string as key and LumiList as value
raises RuntimeError if no valid voms proxy
raises TypeError if inputDataset incorrect type
"""
if not check_voms():
raise RuntimeError("Missing voms proxy")
if isinstance(inputDataset, str):
inputDatasets = autocomplete_Datasets([inputDataset])
elif not isinstance(inputDataset, (list, set, tuple)):
raise TypeError('get_mc_lumi_list: `inputDataset` expects str or list/tuple/set[str]')
result = {}
for dataset in inputDatasets:
print(dataset)
json_dict = get_data(host='https://cmsweb.cern.ch', query="run lumi file dataset="+dataset, idx=0, limit=0, threshold=300)
lumi_list = LumiList.LumiList()
try:
n_files = len(json_dict['data'])
printout = round(n_files / 10)
for i, file_info in enumerate(json_dict['data']):
if (i>n_files):
break
if i % printout == 0:
print("{}% done...".format(100 * i / n_files))
ls = file_info['lumi'][0]['number']
run = file_info['run'][0]['run_number']
lumi_list += LumiList.LumiList(runsAndLumis={run: ls})
except Exception as e:
print('Did not find lumis for', dataset)
print(e)
result.update({dataset:lumi_list})
return result
def write_lumi_list(inputDataset="/QCD_Pt_1000to1400_TuneCP5_13TeV_pythia8/RunIIFall17MiniAODv2-PU2017_12Apr2018_94X_mc2017_realistic_v14*/MINIAODSIM", filename="test.json"):
"""Get lumilist for datasets matching `inputDataset`, and write result to `filename`
If another matching dataset, writes to `filename` but with an extra '_ext' before the file extension.
raises ValueError if inputDataset matches more than 2 samples (we allow 2 incase we have nominal+ext sample),
or matches 0 samples
"""
inputDatasets = autocomplete_Datasets([inputDataset])
if len(inputDatasets) > 2:
for d in inputDatasets:
print(d)
raise ValueError("The given inputDataset DAS string corresponds to more than two samples. This is a bit unusual. "
"Since this script can only handle up to 2 datasets, please use a more specific dataset pattern.")
elif len(inputDatasets) == 0:
raise ValueError("No matching datasets for the dataset pattern")
results = get_mc_lumi_list(inputDataset)
results_keys = list(results.keys())
if len(results) == 1:
results[results_keys[0]].writeJSON(fileName=filename)
elif len(results) > 1:
# if there are two results assume its nominal+ext sample:
# but it might sort the "ext" sample first, so we should
# check which is which.
def _print_save(key, fname):
print("Saved", key, "to", fname)
results[key].writeJSON(fileName=fname)
stem, ext = os.path.splitext(filename)
ext_filename = stem + "_ext" + ext
if '_ext' in results_keys[0]:
_print_save(results_keys[0], ext_filename)
_print_save(results_keys[1], filename)
else:
_print_save(results_keys[0], filename)
_print_save(results_keys[1], ext_filename)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("dataset", help="Dataset name to get lumilist")
parser.add_argument("output", help="Output JSON filename")
args = parser.parse_args()
write_lumi_list(inputDataset=args.dataset, filename=args.output)