Skip to content

Commit

Permalink
Add pending changes
Browse files Browse the repository at this point in the history
  • Loading branch information
qgp committed Sep 6, 2024
1 parent 0307576 commit dc363a8
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 23 deletions.
8 changes: 7 additions & 1 deletion machine_learning_hep/analysis/analyzer_jets.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,7 @@ def _analyze(self, method = 'sidesub'):
self.logger.debug("Final histogram: %s, jet pT %g to %g",
var, range_ptjet[0], range_ptjet[1])
# self.logger.debug(print_histogram(hproj_sel))
ROOT.gStyle.SetOptStat(0)
self._save_hist(
hproj_sel,
f'uf/h_{var}_{method}_unfolded_{mcordata}_' +
Expand Down Expand Up @@ -966,11 +967,16 @@ def estimate_feeddown(self):
h_effkine_gen = self._build_effkine(
rfile.Get(f'h_effkine_fd_gen_nocuts_{var}'),
rfile.Get(f'h_effkine_fd_gen_cut_{var}'))
self._save_hist(h_effkine_gen, f'fd/h_effkine-ptjet-{var}_fd_gen.png', 'text')
h_effkine_det = self._build_effkine(
rfile.Get(f'h_effkine_fd_det_nocuts_{var}'),
rfile.Get(f'h_effkine_fd_det_cut_{var}'))
self._save_hist(h_effkine_det, f'fd/h_effkine-ptjet-{var}_fd_det.png', 'text')
h_response = rfile.Get(f'h_response_fd_{var}')
h_response.Print('v')
self._save_hist(project_hist(h_response, [0, 3], {}), f'fd/h_response_ptjet_{var}.png')
self._save_hist(project_hist(h_response, [1, 4], {}), f'fd/h_response_pthf_{var}.png')
self._save_hist(project_hist(h_response, [2, 5], {}), f'fd/h_response_shape_{var}.png')
h_response.Print('all')
print(f'fd folding for {var=}')
h_response_norm = norm_response(h_response, 3)
h3_fd_gen.Multiply(h_effkine_gen)
Expand Down
48 changes: 29 additions & 19 deletions machine_learning_hep/processer_jet.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,17 @@ def _calculate_variables(self, df, verify=False): # pylint: disable=invalid-name
df['hfPx'] = df.fPt * np.cos(df.fPhi)
df['hfPy'] = df.fPt * np.sin(df.fPhi)
df['hfPz'] = df.fPt * np.sinh(df.fEta)
df['piPx'] = df.jetPx - df.hfPx
df['piPy'] = df.jetPy - df.hfPy
df['piPz'] = df.jetPz - df.hfPz
df['zpar_num'] = df.jetPx * df.hfPx + df.jetPy * df.hfPy + df.jetPz * df.hfPz
df['zpar_den'] = df.jetPx * df.jetPx + df.jetPy * df.jetPy + df.jetPz * df.jetPz
df['zpar'] = df.zpar_num / df.zpar_den
df[df['zpar'] == 1.]['zpar'] = .99999 # move 1 to last bin
df[df['zpar'] >= 1.]['zpar'] = .99999 # move 1 to last bin
df['nsub21'] = df.fNSub2 / df.fNSub1
df['E_D'] = np.sqrt(1.86**2 + df.hfPx**2 + df.hfPy**2 + df.hfPz**2)
df['E_pi'] = np.sqrt(.139**2 + df.piPx**2 + df.piPy**2 + df.piPz**2)
df['M_D_pi'] = np.sqrt((df.E_D + df.E_pi)**2 - df.jetPx**2 - df.jetPy**2 - df.jetPz**2)

self.logger.debug('zg')
df['zg_array'] = np.array(.5 - abs(df.fPtSubLeading / (df.fPtLeading + df.fPtSubLeading) - .5))
Expand Down Expand Up @@ -223,6 +229,8 @@ def process_histomass_single(self, index):
dfquery(df, 'idx_match >= 0', inplace=True)

self._calculate_variables(df)
# FIXME: suppress D*, move to DB
df = df[(abs(df.M_D_pi - 2.01) > .01) & (df.fJetNConstituents == 2)]

for obs, spec in self.cfg('observables', {}).items():
self.logger.debug('preparing histograms for %s', obs)
Expand Down Expand Up @@ -272,7 +280,8 @@ def process_efficiency_single(self, index):
h_response_fd = {var:
create_hist(
f'h_response_fd_{var}',
f";p_{{T}}^{{jet}} (GeV/#it{{c}});{var};p_{{T}}^{{jet}} (GeV/#it{{c}});{var};p_{{T}} (GeV/#it{{c}})",
f";p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{var};" +
f"p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{var}",
self.binarrays_ptjet['det'][var], self.binarrays_obs['det']['fPt'], self.binarrays_obs['det'][var],
self.binarrays_ptjet['gen'][var], self.binarrays_obs['gen']['fPt'], self.binarrays_obs['gen'][var])
for var in self.cfg('observables', []) if not '-' in var}
Expand Down Expand Up @@ -371,7 +380,8 @@ def process_efficiency_single(self, index):

if cat in dfmatch and dfmatch[cat] is not None:
self._prepare_response(dfmatch[cat], h_effkine, h_response, cat, var)
self._prepare_response_fd(dfmatch[cat], h_effkine_fd, h_response_fd, var)
if cat == 'np':
self._prepare_response_fd(dfmatch[cat], h_effkine_fd, h_response_fd, var)
f = self.cfg('frac_mcana', .2)
_, df_mccorr = self.split_df(dfmatch[cat], f if f < 1. else 0.)
self._prepare_response(df_mccorr, h_effkine_frac, h_response_frac, cat, var)
Expand Down Expand Up @@ -419,24 +429,24 @@ def _prepare_response_fd(self, dfi, h_effkine, h_response, var):
axis_var_gen = get_axis(h_response[var], 5)

df = dfi
fill_hist(h_response[var], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen', 'fJetPt_gen', 'fPt_gen', f'{var}_gen']])

# TODO: the first cut should be taken care of by under-/overflow bins, check their usage in analyzer
df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) &
(df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) &
(df[var] >= axis_var_det.GetXmin()) & (df[var] < axis_var_det.GetXmax())]
# df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) &
# (df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) &
# (df[var] >= axis_var_det.GetXmin()) & (df[var] < axis_var_det.GetXmax())]
fill_hist(h_effkine[('det', 'nocuts', var)], df[['fJetPt', 'fPt', var]])
df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) &
(df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) &
(df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())]
# df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) &
# (df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) &
# (df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())]
fill_hist(h_effkine[('det', 'cut', var)], df[['fJetPt', 'fPt', var]])

fill_hist(h_response[var], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen', 'fJetPt_gen', 'fPt_gen', f'{var}_gen']])

df = dfi
df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) &
(df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) &
(df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())]
fill_hist(h_effkine[('gen', 'nocuts', var)], df[['fJetPt_gen', 'fPt', f'{var}_gen']])
df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) &
(df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) &
(df[f'{var}'] >= axis_var_det.GetXmin()) & (df[f'{var}'] < axis_var_det.GetXmax())]
fill_hist(h_effkine[('gen', 'cut', var)], df[['fJetPt_gen', 'fPt', f'{var}_gen']])
# df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) &
# (df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) &
# (df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())]
fill_hist(h_effkine[('gen', 'nocuts', var)], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen']])
# df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) &
# (df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) &
# (df[f'{var}'] >= axis_var_det.GetXmin()) & (df[f'{var}'] < axis_var_det.GetXmax())]
fill_hist(h_effkine[('gen', 'cut', var)], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen']])
10 changes: 7 additions & 3 deletions machine_learning_hep/utils/hist.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def norm_response(response, dim_out):
set_bin_val(response_norm, bin_out + bin_in, get_bin_val(response, bin_out + bin_in) / norm)
set_bin_err(response_norm, bin_out + bin_in, get_bin_err(response, bin_out + bin_in) / norm)
total += get_bin_val(response_norm, bin_out + bin_in)
print(f'distributed {bin_in=} to {total=} counts')
# print(f'distributed {bin_in=} to {total=} counts')
return response_norm


Expand All @@ -320,13 +320,17 @@ def fold_hist(hist, response):
total = 0.
for bin_out in itertools.product(*(range(1, get_nbins(hfold, i) + 1) for i in range(get_dim(hfold)))):
total += get_bin_val(response, bin_out + bin_in)
print(f'redistributed {bin_in=} to {total=} counts')
# print(f'redistributed {bin_in=} to {total=} counts')

for bin_out in itertools.product(*(range(1, get_nbins(hfold, i) + 1) for i in range(get_dim(hfold)))):
val = 0.
err = 0.
for bin_in in itertools.product(*(range(1, get_nbins(hist, i) + 1) for i in range(get_dim(hist)))):
print(f'{bin_out=} collecting {bin_in=} with weight {get_bin_val(response, bin_out + bin_in)}')
# if bin_in == bin_out:
# val = get_bin_val(hist, bin_in)
# err = get_bin_err(hist, bin_in)**2
# continue
# print(f'{bin_out=} collecting {bin_in=} with weight {get_bin_val(response, bin_out + bin_in)}')
val += get_bin_val(hist, bin_in) * get_bin_val(response, bin_out + bin_in)
err += get_bin_err(hist, bin_in)**2 * get_bin_val(response, bin_out + bin_in)**2
set_bin_val(hfold, bin_out, val)
Expand Down

0 comments on commit dc363a8

Please sign in to comment.