forked from dcampora/velopix_tracking
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvalidator_lite.py
405 lines (358 loc) · 16.5 KB
/
validator_lite.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
from event_model import track, hit
import argparse
import errno
import os
import numpy as np
import itertools
class validator_event(object):
"""A SOA datastructure for events"""
def __init__(self, sensor_Zs, sensor_hitStarts, sensor_hitNums,
hit_IDs, hit_Xs, hit_Ys, hit_Zs, hits, mcp_to_hits=None):
self.sensor_Zs = sensor_Zs
self.sensor_hitStarts = sensor_hitStarts
self.sensor_hitNums = sensor_hitNums
self.hit_IDs = hit_IDs
self.hit_Xs, self.hit_Ys, self.hit_Zs = hit_Xs, hit_Ys, hit_Zs
self.hits = hits
self.hits_by_id = {h.id:h for h in self.hits}
self.mcp_to_hits = mcp_to_hits
self.hit_to_mcp = None
self.particles = None
if self.mcp_to_hits is not None:
self.particles = list(self.mcp_to_hits.keys())
self.hit_to_mcp = {h:[] for h in self.hits}
for mcp,mhits in iter(mcp_to_hits.items()):
for h in mhits:
self.hit_to_mcp[h].append(mcp)
def get_hit(self, hit_id):
return self.hits_by_id[hit_id]
class MCParticle(object):
"""Store information about a Monte-Carlo simulation particle"""
def __init__(self, pkey, pid, p, pt, eta, phi, velohits):
"""Construct a new particle from
its numeric key (arbitrary integer used in input file)
its pID (integer providing information on the particle type)
its p, pt, eta and phi parameters
its assocated velopixel hits"""
self.pkey = pkey
self.pid = pid
self.velohits = velohits
self.p = p
self.pt = pt
self.eta = eta
self.phi = phi
# flags - set them directly after initializing the object
self.islong = False
self.isdown = False
self.isvelo = False
self.isut = False
self.strangelong = False
self.strangedown = False
self.fromb = False
self.fromd = False
self.over5 = abs(self.p) > 5000.
def __str__(self):
s = "MCParticle %d:\n"%self.pkey
s += "\tpid:\t%d"%(self.pid)
s += "\tp:\t%g"%(self.p)
s += "\tp:\t%g"%(self.pt)
s += "\teta:\t%g"%(self.eta)
s += "\tphi:\t%g\n"%(self.phi)
s += "\tislong:\t%r"%(self.islong)
s += "\tisdown:\t%r"%(self.isdown)
s += "\tisvelo:\t%r"%(self.isvelo)
s += "\tisut:\t%r\n"%(self.isut)
s += "\tstrangelong:\t%r"%(self.strangelong)
s += "\tstrangedown:\t%r"%(self.strangedown)
s += "\tfromb:\t%r"%(self.fromb)
s += "\tfromd:\t%r"%(self.fromd)
return s
def __repr__(self):
return "%s(%r)" % (self.__class__, self.__dict__)
def parse_json_data(json_data):
json_event = json_data["event"]
hits = []
for x, y, z, hid in zip(json_event["hit_x"], json_event["hit_y"], json_event["hit_z"], json_event["hit_id"]):
hits.append(hit(x, y, z, hid))
mcp_to_hits = {}
if json_data["montecarlo"]:
description = json_data["montecarlo"]["description"]
particles = json_data["montecarlo"]["particles"]
hdict = {h.id:h for h in hits}
d = {description[i]:i for i in range(len(description))}
for p in particles:
trackhits = [hdict[hid] for hid in p[d["mcp_hits"]]]
mcp = MCParticle(p[d["mcp_key"]], p[d["mcp_id"]], p[d["mcp_p"]], p[d["mcp_pt"]], p[d["mcp_eta"]], p[d["mcp_phi"]], trackhits)
mcp.islong, mcp.isdown, mcp.isvelo, mcp.isut = p[d["mcp_islong"]], p[d["mcp_isdown"]], p[d["mcp_isvelo"]], p[d["mcp_isut"]]
mcp.strangelong, mcp.strangedown, mcp.fromb, mcp.fromd = p[d["mcp_strangelong"]], p[d["mcp_strangedown"]], p[d["mcp_fromb"]], p[d["mcp_fromd"]]
mcp_to_hits[mcp] = trackhits
return validator_event (json_event["sensor_module_z"], json_event["sensor_hits_starting_index"],
json_event["sensor_number_of_hits"], json_event["hit_id"], json_event["hit_x"],
json_event["hit_y"], json_event["hit_z"], hits, mcp_to_hits)
class Efficiency(object):
def __init__(self, t2p, p2t, particles, event, label):
self.label = label
self.n_particles = 0
self.n_reco = 0
self.n_pure = 0
self.n_clones = 0
self.n_events = 0
self.n_heff = 0
self.n_hits = 0
self.recoeff = []
self.purity = []
self.hiteff = []
self.recoeffT= 0.0
self.purityT = 0.0
self.hiteffT = 0.0
self.avg_recoeff = 0.0
self.avg_purity = 0.0
self.avg_hiteff = 0.0
self.add_event(t2p, p2t, particles, event)
def add_event(self, t2p, p2t, particles, event):
self.n_events += 1
self.n_particles += len(particles)
self.n_reco += len(reconstructed(p2t))
self.recoeff.append(1.*self.n_reco/self.n_particles)
self.n_clones += sum([len(t)-1 for t in list(clones(t2p).values())])
hit_eff = hit_efficinecy(t2p, event.hit_to_mcp, event.mcp_to_hits)
purities = [pp[0] for _, pp in iter(t2p.items()) if pp[1] is not None]
self.n_pure +=np.sum(purities)
self.n_heff +=np.sum(list(hit_eff.values()))
self.n_hits +=len(hit_eff)
if len(hit_eff) > 0: self.hiteff.append(np.mean(list(hit_eff.values())))
if len(purities) > 0: self.purity.append(np.mean(purities))
if len(self.recoeff) > 0: self.avg_recoeff = 100.*np.mean(self.recoeff)
if len(self.purity) > 0: self.avg_purity = 100.*np.mean(self.purity)
if len(self.hiteff) > 0: self.avg_hiteff = 100.*np.mean(self.hiteff)
if self.n_particles > 0:
self.recoeffT = 100. * self.n_reco / self.n_particles
if self.n_reco > 0:
self.purityT = 100. * self.n_pure / (self.n_reco + self.n_clones)
def __str__(self):
clone_percentage = 0
if self.n_reco > 0: clone_percentage = 100.*self.n_clones/self.n_reco
hit_eff = 0
if self.n_hits > 0: hit_eff = 100.*self.n_heff/self.n_hits
s = "%18s : %8d from %8d (%5.1f%%, %5.1f%%) %8d clones (%6.2f%%), purity: (%6.2f%%, %6.2f%%), hitEff: (%6.2f%%, %6.2f%%)"%(self.label
, self.n_reco, self.n_particles, self.recoeffT, self.avg_recoeff
, self.n_clones, clone_percentage, self.purityT
, self.avg_purity,self.avg_hiteff, hit_eff)
return s
def __repr__(self):
return "%s(%r)" % (self.__class__, self.__dict__)
def update_efficiencies(eff, event, tracks, weights, label, cond):
#t2p_filtered = {t:(w,p) for t,(w,p) in t2p.iteritems() if (p is None) or cond(p)}
particles_filtered = {p for p in event.particles if cond(p)}
pidx_filtered = [-1]
if len(particles_filtered) > 0:
pidx_filtered, particles_filtered = zip(*[(ip,p) for ip, p in enumerate(event.particles) if cond(p)])
else:
return eff
weights_filtered = weights[:,np.array(list(pidx_filtered))]
t2p,p2t = hit_purity(tracks, particles_filtered, weights_filtered)
if eff is None:
eff = Efficiency(t2p, p2t, particles_filtered, event, label)
else:
eff.add_event(t2p, p2t, particles_filtered, event)
return eff
def comp_weights(tracks, event):
"""
Compute w(t,p)
The fraction of hits a particle p contributes to all hits of a track t.
Keyword arguments:
tracks -- a list of reconstructed tracks
event -- an insance of event_model.Event holding all information related to this event.
"""
w = np.zeros((len(tracks), len(event.particles)))
for i, j in itertools.product(range(len(tracks)), range(len(event.particles))):
trackhits = tracks[i].hits
nhits = len(trackhits)
particle = event.particles[j]
# try:
nhits_from_p = len([h for h in trackhits if event.hit_to_mcp[h].count(particle) > 0])
# except:
# print(event.hit_to_mcp)
# raise
w[i,j] = float(nhits_from_p)/nhits
return w
def hit_purity(tracks, particles, weights):
"""
Construct purity and reconstruction tables
This function generates two dicts. The first stores the purities for all
tracks. A track that has no particle associated (max(w(t,p)) <= 0.7) still
stores the max particle weight but has no particle associated.
The second table stores for each particle track for which this particle header
the maximum weight. If none of its weights were > 0.7 it said to be not
reconstructed.
Keyword arguments:
tracks -- a list of reconstructed tracks
particles -- a list of Monte-Carlo particles
weights -- the w(t,p) table calculated with comp_weights
"""
# initialize hit purities for tracks t.
t2p = {t:(0.0, None) for t in tracks}
# initialize reconstruction table for particles p.
p2t = {p:(0.0, None) for p in particles}
for i in range(len(tracks)):
# for each track get all particle weights and detmine max
wtp, nwtp = np.max(weights[i,:]), np.argmax(weights[i,:])
if wtp > 0.7:
t2p[tracks[i]] = (wtp, particles[nwtp])
else:
# store the weight anyway but don't associate a particle since this
# track has no particle associated (i.e. it is a ghost)
t2p[tracks[i]] = (wtp, None)
for i in range(len(particles)):
wtp, nwtp = np.max(weights[:,i]), np.argmax(weights[:,i])
if wtp > 0.7:
p2t[particles[i]] = (wtp, tracks[nwtp])
else:
p2t[particles[i]] = (wtp, None)
return t2p, p2t
def hit_efficinecy(t2p, hit_to_mcp, mcp_to_hits):
"""
Hit efficiency for associated tracks.
Calculate the hit efficiency for pairs of tracks and their associated
particle (if any exists).
Keyword arguments:
t2p -- hit purity table as caclulated by hit_purity()
hit_to_mcp -- hit to MC particles dictionary from event data structure
mcp_to_hits -- MC particle to hits dictionary from event data structure
"""
hit_eff = {}
for track, (_, particle) in iter(t2p.items()):
if particle is None:
continue
# for each track that is associated to a particle
# number of hits from particle on track
hits_p_on_t = sum([hit_to_mcp[h].count(particle) for h in track.hits])
# # hits from p on t / total # hits from p
hit_eff[(track, particle)] = float(hits_p_on_t)/len(mcp_to_hits[particle])
return hit_eff
def reconstructed(p2t):
"Returns all reconstructed tracks"
return [t for _,(_,t) in iter(p2t.items()) if t is not None]
def clones(t2p):
"Returns dictionary of particles that have clones (multiple track assocs)"
p2t = {}
for track, (_, particle) in iter(t2p.items()):
if particle is not None:
# for each associated track
if particle not in p2t:
# if this is the first time we see this particle, initialize
p2t[particle] = []
# add this track to the list of tracks associated with this particle
p2t[particle].append(track)
# if more than one track is associated with a particle, it is a clone.
return {p:t for p,t in iter(p2t.items()) if len(t) > 1}
def ghosts(t2p):
"Return ghosts, i.e. list of tracks with no particle associated"
return [t for t,pp in iter(t2p.items()) if pp[1] is None]
def ghost_rate(t2p):
"Returns the fraction of unassociated tracks (fake tracks) and number of ghosts"
ntracks = len(t2p.keys())
nghosts = len(ghosts(t2p))
return float(nghosts)/ntracks, nghosts
def validate_print(events_json_data, tracks_list):
tracking_data = []
for event, tracks in zip(events_json_data, tracks_list):
tracking_data.append((parse_json_data(event), tracks))
n_tracks = 0
avg_ghost_rate = 0.0
n_allghsots = 0
eff_velo = None
eff_long = None
eff_long5 = None
eff_long_strange = None
eff_long_strange5 = None
eff_long_fromb = None
eff_long_fromb5 = None
for event, tracks in tracking_data:
n_tracks += len(tracks)
weights = comp_weights(tracks, event)
t2p, _ = hit_purity(tracks, event.particles, weights)
grate, nghosts = ghost_rate(t2p)
n_allghsots += nghosts
avg_ghost_rate += grate
eff_velo = update_efficiencies(eff_velo, event, tracks, weights, 'velo'
, lambda p: p.isvelo and (abs(p.pid) != 11))
eff_long = update_efficiencies(eff_long, event, tracks, weights, 'long'
, lambda p: p.islong and (abs(p.pid) != 11))
eff_long5 = update_efficiencies(eff_long5, event, tracks, weights, 'long>5GeV'
, lambda p: p.islong and p.over5 and (abs(p.pid) != 11))
eff_long_strange = update_efficiencies(eff_long_strange, event, tracks, weights, 'long_strange'
, lambda p: p.islong and p.strangelong and (abs(p.pid) != 11))
eff_long_strange5 = update_efficiencies(eff_long_strange5, event, tracks, weights, 'long_strange>5GeV'
, lambda p: p.islong and p.over5 and p.strangelong and (abs(p.pid) != 11))
eff_long_fromb = update_efficiencies(eff_long_fromb, event, tracks, weights, 'long_fromb'
, lambda p: p.islong and p.fromb and (abs(p.pid) != 11))
eff_long_fromb5 = update_efficiencies(eff_long_fromb5, event, tracks, weights, 'long_fromb>5GeV'
, lambda p: p.islong and p.over5 and p.fromb and (abs(p.pid) != 11))
nevents = len(tracking_data)
# print("me test printing: ", n_tracks, n_allghsots, 100.*avg_ghost_rate/nevents, )
#print("Average reconstruction efficiency: %6.2f%% (%d #tracks of %d reconstructible particles)"%(100*avg_recoeff/nevents, n_allreco, n_allparticles))
print("%d tracks including %8d ghosts (%5.1f%%). Event average %5.1f%%"
%(n_tracks, n_allghsots, 100.*n_allghsots/n_tracks, 100.*avg_ghost_rate/nevents))
#print("Number of clones: %d"%n_clones)
#print("Average hit purity: %6.2f%%"%(100*avg_purity/nevents))
#print("Average hit efficiency: %6.2f%%"%(100*avg_hiteff/nevents))
print(eff_velo)
print(eff_long)
print(eff_long5)
print(eff_long_strange)
print(eff_long_strange5)
print(eff_long_fromb)
print(eff_long_fromb5)
def validate(events_json_data, tracks_list, particle_type="long>5GeV"):
'''Returns just the Efficiency object of the particle_type requested.
particle_type can be one of {'velo', 'long', 'long>5GeV', 'long_strange',
'long_strange>5GeV', 'long_fromb', 'long_fromb>5GeV'}.
'''
tracking_data = []
for event, tracks in zip(events_json_data, tracks_list):
tracking_data.append((parse_json_data(event), tracks))
particle_lambda = {
'velo': lambda p: p.isvelo and (abs(p.pid) != 11),
'long': lambda p: p.islong and (abs(p.pid) != 11),
'long>5GeV': lambda p: p.islong and p.over5 and (abs(p.pid) != 11),
'long_strange': lambda p: p.islong and p.strangelong and (abs(p.pid) != 11),
'long_strange>5GeV': lambda p: p.islong and p.over5 and p.strangelong and (abs(p.pid) != 11),
'long_fromb': lambda p: p.islong and p.fromb and (abs(p.pid) != 11),
'long_fromb>5GeV': lambda p: p.islong and p.over5 and p.fromb and (abs(p.pid) != 11)
}
eff = None
for event, tracks in tracking_data:
weights = comp_weights(tracks, event)
eff = update_efficiencies(eff, event, tracks, weights, particle_type,
particle_lambda[particle_type])
return eff
def validate_efficiency(events_json_data, tracks_list, particle_type="long>5GeV"):
'''Returns just the Reconstruction Efficiency of the particle_type requested,
as a value in [0,1].
'''
return validate(events_json_data, tracks_list, particle_type).recoeffT / 100.0
def validate_clone_fraction(events_json_data, tracks_list, particle_type="long>5GeV"):
'''Returns just the Clone Fraction of the particle_type requested,
as a value in [0,1].
'''
eff = validate(events_json_data, tracks_list, particle_type)
return eff.n_clones / eff.n_reco
def validate_ghost_fraction(events_json_data, tracks_list):
'''Returns just the Clone Fraction of the particle_type requested,
as a value in [0, 1].
'''
tracking_data = []
for event, tracks in zip(events_json_data, tracks_list):
tracking_data.append((parse_json_data(event), tracks))
n_tracks = 0
avg_ghost_rate = 0.0
n_allghsots = 0
for event, tracks in tracking_data:
n_tracks += len(tracks)
weights = comp_weights(tracks, event)
t2p, _ = hit_purity(tracks, event.particles, weights)
grate, nghosts = ghost_rate(t2p)
n_allghsots += nghosts
avg_ghost_rate += grate
return n_allghsots / n_tracks