Skip to content

Commit 37445f8

Browse files
committed
ported HEPPY_PR2_7_2_X from CMS
1 parent d6e5d4e commit 37445f8

16 files changed

+215
-147
lines changed

BuildFile.xml

-21
This file was deleted.

analyzers/SimpleTreeProducer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def process(self, event):
1717
self.tree.fill('test_variable', event.input.var1)
1818
self.tree.tree.Fill()
1919

20-
def write(self):
20+
def write(self, setup):
2121
self.rootfile.Write()
2222
self.rootfile.Close()
2323

framework/__init__.py

-1
Original file line numberDiff line numberDiff line change
@@ -1 +0,0 @@
1-
#Automatically created by SCRAM

framework/analyzer.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,12 @@ def __init__(self, cfg_ana, cfg_comp, looperName ):
3232
self.cfg_ana = cfg_ana
3333
self.cfg_comp = cfg_comp
3434
self.looperName = looperName
35-
self.dirName = '/'.join( [self.looperName, self.name] )
36-
os.mkdir( self.dirName )
35+
if hasattr(cfg_ana,"nosubdir") and cfg_ana.nosubdir:
36+
self.dirName = self.looperName
37+
else:
38+
self.dirName = '/'.join( [self.looperName, self.name] )
39+
os.mkdir( self.dirName )
40+
3741

3842
# this is the main logger corresponding to the looper.
3943
# each analyzer could also declare its own logger
@@ -62,7 +66,7 @@ def process(self, event ):
6266
print self.cfg_ana.name
6367

6468

65-
def write(self):
69+
def write(self, setup):
6670
"""Called by Looper.write, for all analyzers.
6771
Just overload it if you have histograms to write."""
6872
self.counters.write( self.dirName )

framework/chain_test.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -3,39 +3,51 @@
33
import shutil
44

55
from heppy.framework.chain import Chain
6+
from heppy.utils.testtree import create_tree
7+
8+
testfname = 'test_tree.root'
69

710
class ChainTestCase(unittest.TestCase):
811

912
def setUp(self):
10-
self.file = '../../test/test_tree.root'
11-
self.chain = Chain(self.file, 'test_tree')
13+
self.chain = Chain(testfname, 'test_tree')
1214

1315
def test_file(self):
14-
self.assertTrue(os.path.isfile(self.file))
16+
'''Test that the test file exists'''
17+
self.assertTrue(os.path.isfile(testfname))
1518

16-
def test_guess_name(self):
19+
def test_wrong_filename(self):
1720
self.assertRaises(ValueError,
18-
Chain, 'self.file')
21+
Chain, 'non_existing_file.root')
22+
23+
def test_guess_treename(self):
24+
chain = Chain(testfname)
25+
self.assertEqual(len(self.chain), 100)
1926

2027
def test_load_1(self):
28+
'''Test that the chain has the correct number of entries'''
2129
self.assertEqual(len(self.chain), 100)
2230

2331
def test_load_2(self):
24-
tmpfile = self.file.replace('test_tree', 'test_tree_2_tmp')
25-
shutil.copyfile(self.file, tmpfile)
26-
chain = Chain(self.file.replace('.root', '*.root'), 'test_tree')
32+
'''Test chaining of two files.'''
33+
tmpfile = testfname.replace('test_tree', 'test_tree_2_tmp')
34+
shutil.copyfile(testfname, tmpfile)
35+
chain = Chain(testfname.replace('.root', '*.root'), 'test_tree')
2736
self.assertEqual(len(chain), 200)
2837
os.remove(tmpfile)
2938

3039
def test_iterate(self):
40+
'''Test iteration'''
3141
for ev in self.chain:
3242
pass
3343
self.assertTrue(True)
3444

3545
def test_get(self):
46+
'''Test direct event access'''
3647
event = self.chain[2]
3748
self.assertEqual(event.var1, 2.)
3849

3950

4051
if __name__ == '__main__':
52+
create_tree(testfname)
4153
unittest.main()

framework/config.py

+5-37
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,11 @@ def __init__(self, name, files, tree_name=None, triggers=None, **kwargs):
135135
self.dataset_entries = 0
136136
self.isData = False
137137
self.isMC = False
138+
self.isEmbed = False
138139

139140
class DataComponent( Component ):
140141

141-
def __init__(self, name, files, intLumi, triggers, json=None):
142+
def __init__(self, name, files, intLumi=None, triggers=[], json=None):
142143
super(DataComponent, self).__init__(name, files, triggers)
143144
self.isData = True
144145
self.intLumi = intLumi
@@ -154,10 +155,9 @@ def getWeight( self, intLumi = None):
154155

155156

156157
class MCComponent( Component ):
157-
def __init__(self, name, files, triggers, xSection,
158-
nGenEvents,
159-
# vertexWeight,tauEffWeight, muEffWeight,
160-
effCorrFactor, **kwargs ):
158+
def __init__(self, name, files, triggers=[], xSection=1,
159+
nGenEvents=None,
160+
effCorrFactor=None, **kwargs ):
161161
super( MCComponent, self).__init__( name = name,
162162
files = files,
163163
triggers = triggers, **kwargs )
@@ -195,35 +195,3 @@ def __str__(self):
195195
return '\n'.join([comp, sequence, services])
196196

197197

198-
if __name__ == '__main__':
199-
200-
from heppy.framework.chain import Chain as Events
201-
from heppy.analyzers.Printer import Printer
202-
203-
class Ana1(object):
204-
pass
205-
ana1 = Analyzer(
206-
Ana1,
207-
toto = '1',
208-
tata = 'a'
209-
)
210-
ana2 = Analyzer(
211-
Printer,
212-
'instance1'
213-
)
214-
sequence = Sequence( [ana1, ana2] )
215-
216-
DYJets = MCComponent(
217-
name = 'DYJets',
218-
files ='blah_mc.root',
219-
xSection = 3048.,
220-
nGenEvents = 34915945,
221-
triggers = ['HLT_MC'],
222-
vertexWeight = 1.,
223-
effCorrFactor = 1 )
224-
selectedComponents = [DYJets]
225-
sequence = [ana1, ana2]
226-
config = Config( components = selectedComponents,
227-
sequence = sequence,
228-
events_class = Events )
229-
print config

framework/config_test.py

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class Ana1(object):
4545
from heppy.framework.chain import Chain as Events
4646
config = Config( components = [comp1],
4747
sequence = [ana1],
48+
services = [],
4849
events_class = Events )
4950

5051

framework/eventsalbers.py

-6
This file was deleted.

framework/looper.py

+67-22
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,11 @@
44
import os
55
import sys
66
import imp
7-
import copy
87
import logging
98
import pprint
10-
from platform import platform
9+
from math import ceil
1110
from event import Event
12-
11+
import timeit
1312

1413
class Setup(object):
1514
'''The Looper creates a Setup object to hold information relevant during
@@ -32,7 +31,7 @@ def __init__(self, config, services):
3231
services: dictionary of services indexed by service name.
3332
The service name has the form classObject_instanceLabel
3433
as in this example:
35-
heppy.framework.services.tfile.TFileService_myhists
34+
<base_heppy_path>.framework.services.tfile.TFileService_myhists
3635
To find out about the service name of a given service,
3736
load your configuration file in python, and print the service.
3837
'''
@@ -51,7 +50,7 @@ class Looper(object):
5150
def __init__( self, name,
5251
config,
5352
nEvents=None,
54-
firstEvent=0, nPrint=0 ):
53+
firstEvent=0, nPrint=0, timeReport=False ):
5554
"""Handles the processing of an event sample.
5655
An Analyzer is built for each Config.Analyzer present
5756
in sequence. The Looper can then be used to process an event,
@@ -79,20 +78,33 @@ def __init__( self, name,
7978
self.nEvents = nEvents
8079
self.firstEvent = firstEvent
8180
self.nPrint = int(nPrint)
81+
self.timeReport = [ {'time':0.0,'events':0} for a in self.analyzers ] if timeReport else False
8282
tree_name = None
8383
if( hasattr(self.cfg_comp, 'tree_name') ):
8484
tree_name = self.cfg_comp.tree_name
8585
if len(self.cfg_comp.files)==0:
8686
errmsg = 'please provide at least an input file in the files attribute of this component\n' + str(self.cfg_comp)
8787
raise ValueError( errmsg )
8888
self.events = config.events_class(self.cfg_comp.files, tree_name)
89+
if hasattr(self.cfg_comp, 'fineSplit'):
90+
fineSplitIndex, fineSplitFactor = self.cfg_comp.fineSplit
91+
if fineSplitFactor > 1:
92+
if len(self.cfg_comp.files) != 1:
93+
raise RuntimeError, "Any component with fineSplit > 1 is supposed to have just a single file, while %s has %s" % (self.cfg_comp.name, self.cfg_comp.files)
94+
totevents = min(len(self.events),int(nEvents)) if (nEvents and int(nEvents) not in [-1,0]) else len(self.events)
95+
self.nEvents = int(ceil(totevents/float(fineSplitFactor)))
96+
self.firstEvent = firstEvent + fineSplitIndex * self.nEvents
97+
#print "For component %s will process %d events starting from the %d one" % (self.cfg_comp.name, self.nEvents, self.firstEvent)
8998
# self.event is set in self.process
9099
self.event = None
91100
services = dict()
92101
for cfg_serv in config.services:
93102
service = self._build(cfg_serv)
94103
services[cfg_serv.name] = service
95-
self.setup = Setup( copy.deepcopy(config), services)
104+
# would like to provide a copy of the config to the setup,
105+
# so that analyzers cannot modify the config of other analyzers.
106+
# but cannot copy the autofill config.
107+
self.setup = Setup(config, services)
96108

97109
def _build(self, cfg):
98110
theClass = cfg.class_object
@@ -102,14 +114,16 @@ def _build(self, cfg):
102114
def _prepareOutput(self, name):
103115
index = 0
104116
tmpname = name
105-
while True:
117+
while True and index < 2000:
106118
try:
107119
# print 'mkdir', self.name
108120
os.mkdir( tmpname )
109121
break
110122
except OSError:
111123
index += 1
112124
tmpname = '%s_%d' % (name, index)
125+
if index == 2000:
126+
raise ValueError( "More than 2000 output folder with same name or 2000 attempts failed, please clean-up, change name or check permissions")
113127
return tmpname
114128

115129

@@ -141,8 +155,18 @@ def loop(self):
141155
# if iEv == nEvents:
142156
# break
143157
if iEv%100 ==0:
144-
print 'event', iEv
158+
# print 'event', iEv
159+
if not hasattr(self,'start_time'):
160+
print 'event', iEv
161+
self.start_time = timeit.default_timer()
162+
self.start_time_event = iEv
163+
else:
164+
print 'event %d (%.1f ev/s)' % (iEv, (iEv-self.start_time_event)/float(timeit.default_timer() - self.start_time))
165+
145166
self.process( iEv )
167+
if iEv<self.nPrint:
168+
print self.event
169+
146170
except UserWarning:
147171
print 'Stopped loop following a UserWarning exception'
148172
for analyzer in self.analyzers:
@@ -162,10 +186,16 @@ def process(self, iEv ):
162186
"""
163187
self.event = Event(iEv, self.events[iEv], self.setup)
164188
self.iEvent = iEv
165-
for analyzer in self.analyzers:
189+
for i,analyzer in enumerate(self.analyzers):
166190
if not analyzer.beginLoopCalled:
167191
analyzer.beginLoop()
168-
if analyzer.process( self.event ) == False:
192+
start = timeit.default_timer()
193+
ret = analyzer.process( self.event )
194+
if self.timeReport:
195+
self.timeReport[i]['events'] += 1
196+
if self.timeReport[i]['events'] > 0:
197+
self.timeReport[i]['time'] += timeit.default_timer() - start
198+
if ret == False:
169199
return (False, analyzer.name)
170200
if iEv<self.nPrint:
171201
self.logger.info( self.event.__str__() )
@@ -177,8 +207,17 @@ def write(self):
177207
See Analyzer.Write for more information.
178208
"""
179209
for analyzer in self.analyzers:
180-
analyzer.write()
210+
analyzer.write(self.setup)
181211
self.setup.close()
212+
213+
if self.timeReport:
214+
allev = max([x['events'] for x in self.timeReport])
215+
print "\n ---- TimeReport (all times in ms; first evt is skipped) ---- "
216+
print "%9s %9s %9s %9s %s" % ("processed","all evts","time/proc", " time/all", "analyer")
217+
print "%9s %9s %9s %9s %s" % ("---------","--------","---------", "---------", "-------------")
218+
for ana,rep in zip(self.analyzers,self.timeReport):
219+
print "%9d %9d %10.2f %10.2f %s" % ( rep['events'], allev, 1000*rep['time']/(rep['events']-1) if rep['events']>1 else 0, 1000*rep['time']/(allev-1) if allev > 1 else 0, ana.name)
220+
print ""
182221
pass
183222

184223

@@ -187,16 +226,22 @@ def write(self):
187226
import pickle
188227
import sys
189228
import os
190-
191-
cfgFileName = sys.argv[1]
192-
pckfile = open( cfgFileName, 'r' )
193-
config = pickle.load( pckfile )
194-
comp = config.components[0]
195-
events_class = config.events_class
196-
looper = Looper( 'Loop', comp,
197-
config.sequence,
198-
config.services,
199-
events_class,
200-
nPrint = 5)
229+
if len(sys.argv) == 2 :
230+
cfgFileName = sys.argv[1]
231+
pckfile = open( cfgFileName, 'r' )
232+
config = pickle.load( pckfile )
233+
comp = config.components[0]
234+
events_class = config.events_class
235+
elif len(sys.argv) == 3 :
236+
cfgFileName = sys.argv[1]
237+
file = open( cfgFileName, 'r' )
238+
cfg = imp.load_source( 'cfg', cfgFileName, file)
239+
compFileName = sys.argv[2]
240+
pckfile = open( compFileName, 'r' )
241+
comp = pickle.load( pckfile )
242+
cfg.config.components=[comp]
243+
events_class = cfg.config.events_class
244+
245+
looper = Looper( 'Loop', cfg.config,nPrint = 5)
201246
looper.loop()
202247
looper.write()

init.sh

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ export PYTHONPATH=$PWD/..:$PYTHONPATH
66
# set up executable directory
77
cd $HEPPY/bin
88
ln -sf ../scripts/*.py .
9+
ln -sf ../scripts/heppy .
910
chmod +x *
1011
cd .. > /dev/null
1112

0 commit comments

Comments
 (0)