-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathremote_nano.py
executable file
·394 lines (349 loc) · 15.2 KB
/
remote_nano.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
#!/bin/env python
#__________initial imports__________
import binConfig #s
import checkEnvironment #s
from datetime import datetime #s
import optparse,os,time,pickle,subprocess,shutil,sys,getpass,re #s
import logging #s
import ROOT #s
from condor_submit import checkAndRenewVomsProxy #d
from collections import OrderedDict #d
from commands import getoutput #d
# Create logger
log = logging.getLogger('remote')
# New: added by Brenda FE
log.setLevel(logging.DEBUG)
# New: create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
# New: create formatter
formatter = logging.Formatter('%(levelname)s - %(name)s (%(asctime)s): %(message)s','%H:%M:%S')
# New: add formater to ch
ch.setFormatter(formatter)
# New: add ch to logger
log.addHandler(ch)
#__________end initial imports_______
#__________store generators__________
generators = OrderedDict()
generators['madgraph'] = 'MG'
generators['powheg'] = 'PH'
generators['herwig6'] = 'HW'
generators['herwigpp'] = 'HP'
generators['herwig'] = 'HW'
generators['sherpa'] = 'SP'
generators['amcatnlo'] = 'AM'
generators['alpgen'] = 'AG'
generators['calchep'] = 'CA'
generators['comphep'] = 'CO'
generators['lpair'] = 'LP'
generators['pythia6'] = 'P6'
generators['pythia8'] = 'P8'
generators['pythia'] = 'PY'
generators['gg2ww'] = 'GG'
generators['gg2zz'] = 'GG'
generators['gg2vv'] = 'GG'
generators['JHUGen'] = 'JG'
generators['blackmax'] = 'BM'
generators['unknown'] = ''
#__________end store generators__________
# list of generators used for hadronization on top of another generator (will be removed from name)
showers = [ 'pythia8', 'pythia6', 'pythia', 'herwigpp']
# list of tags which will be removed from name (case insensitive)
blacklist = ['13tev',
'madspin',
'FXFX',
'MLM',
'NNPDF30',
'TuneCUEP8M1',
'TuneCUETP8M1',
'TuneCUETP8M2T4']
#__________establish sample name__________
def parse_name(dataset, options):
# format of datasetpath: /.../.../...
# first part contains name + additional tags ( cme, tune, .. )
# second part has additional information ( campaign, extention sample? ,... )
# third part contains sample 'format' (AOD, MINIAOD, ...)
dataset_split = dataset.split('/') #splitting the name
ds_pt1 = dataset_split[1] #first part of the name
ds_pt2 = dataset_split[2] #second part of the name
ds_pt3 = "MC" if "SIM" in dataset_split[3] else "data" #third part of the name
if ds_pt3 =="data":
#ds_pt3+="_"+ds_pt2.split("-")[0][-1]
runeratmp = ds_pt2.split("-")[0]
runera = runeratmp.split("_")[0]
nanoaodver = ds_pt2.split("-")[1]
datenano = nanoaodver.split("_")[0]
ds_pt3m = runera+"_"+datenano # Get the full name of the run era.
for generator in generators.keys():
# subn() performs sub(), but returns tuple (new_string, number_of_subs_made)
# using (?i) at the beginning of a regular expression makes it case insensitive
( ds_pt1, n ) = re.subn( r'(?i)[_-]' + generator, '', ds_pt1 )
if n > 0:
_generator = generator
for shower in showers:
ds_pt1 = re.sub( r'(?i)[_-]' + shower, '', ds_pt1 )
break
else:
_generator = 'unknown'
#for item in blacklist: #I don't think that we want the blacklist b/c file names are different in the newer productions.
#ds_pt1 = re.sub( r'(?i)[_-]*' + item, '', ds_pt1 )
match = re.search('ext\d\d*',ds_pt2)
#match = re.search('ext\d\d*',ds_pt2),options.cme
#name = ds_pt1 + "_" + options.cme + "TeV_" + match.group() + "_" + options.postfix + generators[_generator]+"_"+ds_pt3
if "MC" in ds_pt3:
if match:
name = ds_pt1 + "_" + "TeV_" + match.group() + "_" + generators[_generator]+"_"+ds_pt3
else:
name = ds_pt1 + "_" + "TeV_" + generators[_generator]+"_"+ds_pt3 #This may need to change for data. Let's check.
if "data" in ds_pt3:
name = ds_pt1 + "_" + ds_pt3m
return name
#__________end establish sample name__________
def bins(file_list, bin_size):
binning_list = []
maxsize=0
current_bin=[]
for i, file in enumerate(file_list):
current_bin.append(file)
maxsize+=file_list[file]
# one file per bin
if maxsize>bin_size or (maxsize+file_list[file]*0.5)>bin_size:
#print(maxsize,current_bin)
binning_list.append(current_bin)
current_bin=[]
maxsize=0
elif i==len(file_list)-1:
binning_list.append(current_bin)
return binning_list
#__________be able to read the files via xrootd__________
def getDatasetFileList(sample):
command = 'dasgoclient --query="file dataset=%s | grep file.name, file.size"' % (sample)
output = getoutput(command)
fileList = {}
for line in output.split(os.linesep):
#print line.split()
file,size=line.split()
fileList['root://cmsxrootd.fnal.gov//'+file]=int(size)
#fileList['root://cms-xrd-global.cern.ch/'+file]=int(size)
return fileList
#__________end be able to read the files via xrootd__________
#__________begin list of samples__________
def getFilesfromFile(cfgFile, options):
sampleList={}
file = open(cfgFile,'r')
for line in file:
if line[0]=="#" or len(line.split())==0:
continue
sample=line.strip() #Skip the guys who are commented.
#print line # this are the lines in the sample list
file_lists=bins(getDatasetFileList(sample),6400) #size in bytes 3GB
sampleList[parse_name(sample,options)]=file_lists
return sampleList
#__________end list of samples__________
#__________setting config files and proper flags__________
def makeExe(options,inputfiles,outputfile,sample,year):
from string import Template
exe="""
#!/bin/bash -e
sleep $[ ( $RANDOM % 30 ) ]
date
cd ${_CONDOR_SCRATCH_DIR}
tar -xvzf exe.tar.gz
ls
isData=$ISDATA
echo $isData
if [ $isData == "true" ]
then
echo "switch data to true"
sed -r -i -e 's/(isData\s+)(0|false)/isData true/' -e 's/(CalculatePUS[a-z]+\s+)(1|true)/CalculatePUSystematics false/' \
$CONFIGDIR/Run_info.in
else
echo "switch data to false"
sed -r -i -e 's/(isData\s+)(1|true)/isData false/' -e 's/(CalculatePUS[a-z]+\s+)(0|false)/CalculatePUSystematics true/' \
$CONFIGDIR/Run_info.in
fi
./Analyzer -in $INPUTFILES -out $OUPUTFILE -y $YEAR -C $CONFIGDIR $CONTOLLREGION
xrdcp -sf $_CONDOR_SCRATCH_DIR/$OUPUTFILE $OUTPUTFOLDER$SAMPLE/$OUPUTFILE
"""
for fileDir in binConfig.cpFiles:
exe+="rm -r "+fileDir+" \n"
exe+="rm -r $CONFIGDIR \n"
exe+="rm -r *.root \n"
exe+="rm -r *.tar.gz \n"
CR=""
if options.CR:
CR="-CR"
#isdata= "RunII" in inputfiles[0] or "Tune" in inputfiles[0]
#isdata= not "_Run20" in inputfiles[0]
isdata= "Run20" in inputfiles[0] # Added by Brenda FE 09-24-2019
d = dict(
CONFIGDIR=options.configdir,
INPUTFILES=" ".join(inputfiles),
OUPUTFILE=outputfile,
OUTPUTFOLDER=options.outputFolder,
SAMPLE=sample,
CONTOLLREGION=CR,
ISDATA="true" if isdata else "false",
YEAR=year,
)
exe=Template(exe).safe_substitute(d)
exeFile=open("run_"+outputfile.replace(".root","")+".sh","w+")
exeFile.write(exe)
exeFile.close()
#__________end setting config files and proper flags__________
def main():
date_time = datetime.now()
usage = '%prog [options] CONFIG_FILE'
parser = optparse.OptionParser( usage = usage )
parser.add_option( '-C', '--configdir', default = "PartDet", metavar = 'DIRECTORY',
help = 'Define the config directory. [default = %default]')
parser.add_option( '-c', '--CR', action = 'store_true', default = False,
help = 'Run with the CR flag. [default = %default]')
parser.add_option( '-o', '--outputFolder', default = "root://cmseos.fnal.gov//store/user/%s/REPLACEBYTAG/"%(getpass.getuser()), metavar = 'DIRECTORY',
help = 'Define path for the output files [default = %default]')
parser.add_option( '-l', '--local',action = 'store_true', default = False,
help = 'run localy over the files [default = %default]')
parser.add_option( '-f', '--force',action = 'store_true', default = False,
help = 'Force the output folder to be overwritten. [default = %default]')
parser.add_option( '--debug', metavar = 'LEVEL', default = 'INFO',
help= 'Set the debug level. Allowed values: ERROR, WARNING, INFO, DEBUG. [default = %default]' )
parser.add_option( '--filesFromACCRE', action = 'store_true', default = False,
help= 'Use the files from ACCRE [default = %default]' )
parser.add_option( '-t', '--Tag', default = "run_%s_%s_%s_%s_%s"%(date_time.year,
date_time.month,
date_time.day,
date_time.hour,
date_time.minute,
), metavar = 'DIRECTORY',
help = 'Define a Tag for the output directory. [default = %default]' )
( options, args ) = parser.parse_args()
if len( args ) != 1:
parser.error( 'Exactly one CONFIG_FILE required!' )
options.outputFolder=options.outputFolder.replace("REPLACEBYTAG",options.Tag)
# Brenda Fabela 08/27/2019
# print("Config file directory: " + args.configdir)
#cfgFile = args[ 0 ]
#print("Input sample list file: {0}").format(cfgFile)
#runyearidx = cfgFile.find("201")
#runyear = cfgFile[runyearidx:runyearidx+4]
#print("Run year index: {0}").format(runyearidx)
#print("Run year: {0}").format(runyear)
#exit()
print("You may enter your grid password here. Do not enter anything to use the available proxy.")
passphrase = getpass.getpass()
if passphrase=="":
passphrase = None
else:
checkAndRenewVomsProxy(passphrase=passphrase)
format = '%(levelname)s from %(name)s at %(asctime)s: %(message)s'
date = '%F %H:%M:%S'
# logging.basicConfig( level = logging._levelNames[ options.debug ], format = format, datefmt = date )
log.info("Welcome to the wonders of color!")
try:
cmssw_version, cmssw_base, scram_arch = checkEnvironment.checkEnvironment()
except EnvironmentError as err:
log.error( err )
log.info( 'Exiting...' )
sys.exit( err.errno )
if os.path.exists(options.outputFolder.replace("root://cmseos.fnal.gov/","/eos/uscms/")) and not options.force:
log.error("The outpath "+options.outputFolder+" already exists pick a new one or use --force")
sys.exit(3)
elif options.force and os.path.exists(options.outputFolder.replace("root://cmseos.fnal.gov/","/eos/uscms/")):
shutil.rmtree(options.outputFolder.replace("root://cmseos.fnal.gov/","/eos/uscms/"))
os.makedirs(options.outputFolder.replace("root://cmseos.fnal.gov/","/eos/uscms/"))
# Added for the newest version of the Analyzer.
cfgFile = args[ 0 ]
#print("Input sample list file: {0}").format(cfgFile)
runyearidx = cfgFile.find("201")
if(runyearidx == -1):
log.error("The run year is not specified. Make sure it is included in your sample list filename (e.g. 2016MCSamples.txt)")
sys.exit(3)
runyear = cfgFile[runyearidx:runyearidx+4]
#print("Run year index: {0}").format(runyearidx)
#print("Run year: {0}").format(runyear)
#exit()
sampleList=getFilesfromFile(cfgFile,options)
thisdir=os.getcwd()
exepath=os.path.join(options.Tag,"exe")
if os.path.exists(exepath):
shutil.rmtree(exepath)
os.makedirs(exepath)
anadir=binConfig.PathtoExecutable.replace("/uscms/home/","/uscms_data/d3/").replace("nobackup/","")
for fileDir in binConfig.cpFiles:
if os.path.isdir(os.path.join(anadir,fileDir)):
shutil.copytree(os.path.join(anadir,fileDir),os.path.join(exepath,fileDir))
else:
shutil.copy(os.path.join(anadir,fileDir),os.path.join(exepath,fileDir))
shutil.copytree(os.path.join(anadir,options.configdir),os.path.join(exepath,options.configdir))
os.chdir(exepath)
command="tar czf exe.tar.gz *"
subprocess.call(command, shell=True)
os.chdir(thisdir)
pathtozip=os.path.join(os.path.abspath(exepath),"exe.tar.gz")
n_jobs=0
for sample in sampleList:
n_jobs+=len(sampleList[sample])
print(("There will be %d jobs in total"%n_jobs)) #total number of jobs
sbumittedjobs=0
for sample in sampleList:
os.chdir(thisdir)
if os.path.exists(os.path.join(options.Tag,sample)) and not options.force:
log.error("The samplepath "+os.path.join(options.Tag,sample)+" already exists use the --force")
sys.exit(3)
elif options.force and os.path.exists(os.path.join(options.Tag,sample)):
shutil.rmtree(os.path.join(options.Tag,sample))
os.makedirs(os.path.join(options.Tag,sample))
os.chdir(os.path.join(options.Tag,sample))
## I know not the way we want to trigger stuff
wrapper="""#!/bin/bash -e
export SCRAM_ARCH=slc7_amd64_gcc700
ls -lrth
source /cvmfs/cms.cern.ch/cmsset_default.sh
eval `scramv1 project CMSSW CMSSW_10_2_18`
cd CMSSW_10_2_18
ls -lrth
eval `scramv1 runtime -sh`
cp ../$@ run.sh
chmod u+x run.sh
./run.sh
"""
condor_jdl="executable = "+os.path.join(os.getcwd(),"wrapper.sh")+"\n"
condor_jdl+="""
universe = vanilla
Error = err.$(Process)_$(Cluster)
Output = out.$(Process)_$(Cluster)
Log = condor_$(Cluster).log
transfer_input_files = %s
should_transfer_files = YES
when_to_transfer_output = ON_EXIT
request_memory = 0.5 GB
Notification = NEVER
x509userproxy = $ENV(X509_USER_PROXY)
"""%(", ".join([pathtozip]+[os.path.join(os.getcwd(),"run_%s_%d.sh"%(sample,i)) for i,binned in enumerate(sampleList[sample]) ]) )
f=open("wrapper.sh","w")
f.write(wrapper)
f.close()
for i,binned in enumerate(sampleList[sample]):
makeExe(options,binned,"%s_%d.root"%(sample,i),sample,runyear)
condor_jdl+="arguments = %s \n"%("run_%s_%d.sh"%(sample,i))
condor_jdl+="queue\n"
condor_jdl+="\n"
f=open("condor.jdl","w")
f.write(condor_jdl)
f.close()
log.info("Submitting sample %s"%sample)
command="condor_submit condor.jdl"
log.debug(command)
subprocess.call(command, shell=True)
os.chdir(thisdir)
legacy_file=open("submitted_samples.txt","w")
legacy_file.write("outFolder:%s\n"%options.outputFolder.replace("root://cmseos.fnal.gov/","/eos/uscms/"))
for sample in sampleList:
legacy_file.write("%s\n"%(sample))
legacy_file.close()
log.info("Thanks for zapping in, bye bye")
log.info("The out files will be in "+options.outputFolder)
log.info("Check the status with condor_q %s"%(getpass.getuser()))
log.info("When finished run ./add_root_files.py")
if __name__ == '__main__':
main()