From bce4779f1d9bbe396ef8f5e44c028c2a900ec96b Mon Sep 17 00:00:00 2001 From: Jochen Klein Date: Thu, 5 Sep 2024 13:39:27 +0200 Subject: [PATCH 1/5] Fix feeddown folding --- .../analysis/analyzer_jets.py | 17 ++++++----- machine_learning_hep/processer_jet.py | 2 +- machine_learning_hep/utils/hist.py | 28 ++++++++++++++----- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/machine_learning_hep/analysis/analyzer_jets.py b/machine_learning_hep/analysis/analyzer_jets.py index 803b3f151d..3639957d88 100644 --- a/machine_learning_hep/analysis/analyzer_jets.py +++ b/machine_learning_hep/analysis/analyzer_jets.py @@ -232,7 +232,7 @@ def calculate_efficiencies(self): # gen-level efficiency for feeddown estimation h_eff_gen = h_genmatch[cat].Clone() h_eff_gen.Divide(h_gen[cat]) - self._save_hist(h_eff_gen, f'eff/h_effgen_{cat}.png') + self._save_hist(h_eff_gen, f'eff/h_effgen_{cat}.png', 'text') self.hcandeff_gen[cat] = h_eff_gen # matching loss @@ -962,7 +962,6 @@ def estimate_feeddown(self): f';p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{var}', bins_ptjet, self.bins_candpt, bins_obs[var]) fill_hist_fast(h3_fd_gen_orig, df[['pt_jet', 'pt_cand', f'{colname}']]) - self._save_hist(project_hist(h3_fd_gen_orig, [0, 2], {}), f'fd/h_ptjet-{var}_feeddown_gen_noeffscaling.png') # new method h3_fd_gen = h3_fd_gen_orig.Clone() @@ -970,10 +969,11 @@ def estimate_feeddown(self): self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_gen.png') # apply np efficiency for ipt in range(get_nbins(h3_fd_gen, 1)): - eff_np = self.hcandeff_gen['np'].GetBinContent(ipt+1) - for iptjet, ishape in itertools.product( - range(get_nbins(h3_fd_gen, 0)), range(get_nbins(h3_fd_gen, 2))): - scale_bin(h3_fd_gen, eff_np, iptjet+1, ipt+1, ishape+1) + for iptjet in range(get_nbins(h3_fd_gen, 0)): + eff_np = self.hcandeff_gen['np'].GetBinContent(iptjet + 1, ipt + 1) + print(f'scaling with {eff_np=} for {iptjet=}, {ipt=}', flush=True) + for ishape in range(get_nbins(h3_fd_gen, 2)): + scale_bin(h3_fd_gen, eff_np, iptjet+1, ipt+1, ishape+1) self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_gen_geneff.png') # 3d folding incl. kinematic efficiencies @@ -985,6 +985,8 @@ def estimate_feeddown(self): rfile.Get(f'h_effkine_fd_det_nocuts_{var}'), rfile.Get(f'h_effkine_fd_det_cut_{var}')) h_response = rfile.Get(f'h_response_fd_{var}') + h_response.Print('v') + print(f'fd folding for {var=}') h_response_norm = norm_response(h_response, 3) h3_fd_gen.Multiply(h_effkine_gen) self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_gen_genkine.png') @@ -1002,14 +1004,15 @@ def estimate_feeddown(self): for iptjet, ishape in itertools.product( range(get_nbins(h3_fd_det, 0)), range(get_nbins(h3_fd_det, 2))): scale_bin(h3_fd_det, 1./eff_pr, iptjet+1, ipt+1, ishape+1) - self._save_hist(project_hist(h3_fd_det, [0, 2], {}), f'fd/h_ptjet-{var}_fdnew_det_deteff.png') # project to 2d (ptjet-shape) h_fd_det = project_hist(h3_fd_det, [0, 2], {}) + self._save_hist(h_fd_det, f'fd/h_ptjet-{var}_fdnew_det_deteff.png') # old method h3_fd_gen = h3_fd_gen_orig.Clone() ensure_sumw2(h3_fd_gen) + self._save_hist(project_hist(h3_fd_gen, [0, 2], {}), f'fd/h_ptjet-{var}_feeddown_gen_noeffscaling.png') for ipt in range(get_nbins(h3_fd_gen, 1)): eff_pr = self.hcandeff['pr'].GetBinContent(ipt+1) eff_np = self.hcandeff['np'].GetBinContent(ipt+1) diff --git a/machine_learning_hep/processer_jet.py b/machine_learning_hep/processer_jet.py index f50422e15a..6f583b2439 100644 --- a/machine_learning_hep/processer_jet.py +++ b/machine_learning_hep/processer_jet.py @@ -438,7 +438,7 @@ def _prepare_response_fd(self, dfi, h_effkine, h_response, var): (df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())] fill_hist(h_effkine[('det', 'cut', var)], df[['fJetPt', 'fPt', var]]) - fill_hist(h_response[var], df[['fJetPt', 'fPt', f'{var}', 'fJetPt_gen', 'fPt_gen', f'{var}_gen']]) + fill_hist(h_response[var], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen', 'fJetPt_gen', 'fPt_gen', f'{var}_gen']]) df = dfi df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) & diff --git a/machine_learning_hep/utils/hist.py b/machine_learning_hep/utils/hist.py index 1bbe330320..0a74260d4c 100644 --- a/machine_learning_hep/utils/hist.py +++ b/machine_learning_hep/utils/hist.py @@ -293,13 +293,19 @@ def norm_response(response, dim_out): for bin_in in itertools.product(*(range(1, get_nbins(response_norm, iaxis) + 1) for iaxis in range(dim_out, get_dim(response_norm)))): for iaxis, val in enumerate(bin_in, dim_out): - get_axis(response_norm, iaxis).SetRange(val, val) - norm = response_norm.Projection(0).Integral() + get_axis(response, iaxis).SetRange(val, val) + # norm = response.Projection(0).Integral() + norm = 0. + for bin_out in itertools.product(*(range(1, get_nbins(response, i) + 1) for i in range(dim_out))): + norm += get_bin_val(response, bin_out + bin_in) if np.isclose(norm, 0.): continue - for bin_out in itertools.product(*(range(1, get_nbins(response_norm, i)+1) for i in range(dim_out))): - set_bin_val(response_norm, bin_out + bin_in, get_bin_val(response_norm, bin_out + bin_in) / norm) - set_bin_err(response_norm, bin_out + bin_in, get_bin_err(response_norm, bin_out + bin_in) / norm) + total = 0. + for bin_out in itertools.product(*(range(1, get_nbins(response_norm, i) + 1) for i in range(dim_out))): + set_bin_val(response_norm, bin_out + bin_in, get_bin_val(response, bin_out + bin_in) / norm) + set_bin_err(response_norm, bin_out + bin_in, get_bin_err(response, bin_out + bin_in) / norm) + total += get_bin_val(response_norm, bin_out + bin_in) + print(f'distributed {bin_in=} to {total=} counts') return response_norm @@ -309,10 +315,18 @@ def fold_hist(hist, response): dim_out = get_dim(response) - get_dim(hist) axes_spec = list(np.array(get_axis(response, i).GetXbins(), 'd') for i in range(dim_out)) hfold = create_hist('test', 'test', *axes_spec) - for bin_out in itertools.product(*(range(1, get_nbins(hfold, i)+1) for i in range(get_dim(hfold)))): + # TODO: setup axes + for bin_in in itertools.product(*(range(1, get_nbins(hist, i) + 1) for i in range(get_dim(hist)))): + total = 0. + for bin_out in itertools.product(*(range(1, get_nbins(hfold, i) + 1) for i in range(get_dim(hfold)))): + total += get_bin_val(response, bin_out + bin_in) + print(f'redistributed {bin_in=} to {total=} counts') + + for bin_out in itertools.product(*(range(1, get_nbins(hfold, i) + 1) for i in range(get_dim(hfold)))): val = 0. err = 0. - for bin_in in itertools.product(*(range(1, get_nbins(hist, i)+1) for i in range(get_dim(hist)))): + for bin_in in itertools.product(*(range(1, get_nbins(hist, i) + 1) for i in range(get_dim(hist)))): + print(f'{bin_out=} collecting {bin_in=} with weight {get_bin_val(response, bin_out + bin_in)}') val += get_bin_val(hist, bin_in) * get_bin_val(response, bin_out + bin_in) err += get_bin_err(hist, bin_in)**2 * get_bin_val(response, bin_out + bin_in)**2 set_bin_val(hfold, bin_out, val) From cd9e57d3e4da73a91b4e4724f5314dfde9b8ef17 Mon Sep 17 00:00:00 2001 From: Jochen Klein Date: Fri, 6 Sep 2024 22:28:04 +0200 Subject: [PATCH 2/5] Add pending changes --- .../analysis/analyzer_jets.py | 8 +++- machine_learning_hep/processer_jet.py | 46 +++++++++++-------- machine_learning_hep/utils/hist.py | 10 ++-- 3 files changed, 42 insertions(+), 22 deletions(-) diff --git a/machine_learning_hep/analysis/analyzer_jets.py b/machine_learning_hep/analysis/analyzer_jets.py index 3639957d88..a3e16eefe9 100644 --- a/machine_learning_hep/analysis/analyzer_jets.py +++ b/machine_learning_hep/analysis/analyzer_jets.py @@ -854,6 +854,7 @@ def _analyze(self, method = 'sidesub'): self.logger.debug("Final histogram: %s, jet pT %g to %g", var, range_ptjet[0], range_ptjet[1]) # self.logger.debug(print_histogram(hproj_sel)) + ROOT.gStyle.SetOptStat(0) self._save_hist( hproj_sel, f'uf/h_{var}_{method}_unfolded_{mcordata}_' + @@ -981,11 +982,16 @@ def estimate_feeddown(self): h_effkine_gen = self._build_effkine( rfile.Get(f'h_effkine_fd_gen_nocuts_{var}'), rfile.Get(f'h_effkine_fd_gen_cut_{var}')) + self._save_hist(h_effkine_gen, f'fd/h_effkine-ptjet-{var}_fd_gen.png', 'text') h_effkine_det = self._build_effkine( rfile.Get(f'h_effkine_fd_det_nocuts_{var}'), rfile.Get(f'h_effkine_fd_det_cut_{var}')) + self._save_hist(h_effkine_det, f'fd/h_effkine-ptjet-{var}_fd_det.png', 'text') h_response = rfile.Get(f'h_response_fd_{var}') - h_response.Print('v') + self._save_hist(project_hist(h_response, [0, 3], {}), f'fd/h_response_ptjet_{var}.png') + self._save_hist(project_hist(h_response, [1, 4], {}), f'fd/h_response_pthf_{var}.png') + self._save_hist(project_hist(h_response, [2, 5], {}), f'fd/h_response_shape_{var}.png') + h_response.Print('all') print(f'fd folding for {var=}') h_response_norm = norm_response(h_response, 3) h3_fd_gen.Multiply(h_effkine_gen) diff --git a/machine_learning_hep/processer_jet.py b/machine_learning_hep/processer_jet.py index 6f583b2439..1b3e06d226 100644 --- a/machine_learning_hep/processer_jet.py +++ b/machine_learning_hep/processer_jet.py @@ -131,11 +131,17 @@ def _calculate_variables(self, df, verify=False): # pylint: disable=invalid-name df['hfPx'] = df.fPt * np.cos(df.fPhi) df['hfPy'] = df.fPt * np.sin(df.fPhi) df['hfPz'] = df.fPt * np.sinh(df.fEta) + df['piPx'] = df.jetPx - df.hfPx + df['piPy'] = df.jetPy - df.hfPy + df['piPz'] = df.jetPz - df.hfPz df['zpar_num'] = df.jetPx * df.hfPx + df.jetPy * df.hfPy + df.jetPz * df.hfPz df['zpar_den'] = df.jetPx * df.jetPx + df.jetPy * df.jetPy + df.jetPz * df.jetPz df['zpar'] = df.zpar_num / df.zpar_den df[df['zpar'] >= 1.]['zpar'] = .999 # move 1 to last bin df['nsub21'] = df.fNSub2 / df.fNSub1 + df['E_D'] = np.sqrt(1.86**2 + df.hfPx**2 + df.hfPy**2 + df.hfPz**2) + df['E_pi'] = np.sqrt(.139**2 + df.piPx**2 + df.piPy**2 + df.piPz**2) + df['M_D_pi'] = np.sqrt((df.E_D + df.E_pi)**2 - df.jetPx**2 - df.jetPy**2 - df.jetPz**2) self.logger.debug('zg') df['zg_array'] = np.array(.5 - abs(df.fPtSubLeading / (df.fPtLeading + df.fPtSubLeading) - .5)) @@ -232,6 +238,8 @@ def process_histomass_single(self, index): dfquery(df, 'idx_match >= 0', inplace=True) self._calculate_variables(df) + # FIXME: suppress D*, move to DB + df = df[(abs(df.M_D_pi - 2.01) > .01) & (df.fJetNConstituents == 2)] for obs, spec in self.cfg('observables', {}).items(): self.logger.debug('preparing histograms for %s', obs) @@ -281,7 +289,8 @@ def process_efficiency_single(self, index): h_response_fd = {var: create_hist( f'h_response_fd_{var}', - f";p_{{T}}^{{jet}} (GeV/#it{{c}});{var};p_{{T}}^{{jet}} (GeV/#it{{c}});{var};p_{{T}} (GeV/#it{{c}})", + f";p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{var};" + + f"p_{{T}}^{{jet}} (GeV/#it{{c}});p_{{T}}^{{HF}} (GeV/#it{{c}});{var}", self.binarrays_ptjet['det'][var], self.binarrays_obs['det']['fPt'], self.binarrays_obs['det'][var], self.binarrays_ptjet['gen'][var], self.binarrays_obs['gen']['fPt'], self.binarrays_obs['gen'][var]) for var in self.cfg('observables', []) if not '-' in var} @@ -380,7 +389,8 @@ def process_efficiency_single(self, index): if cat in dfmatch and dfmatch[cat] is not None: self._prepare_response(dfmatch[cat], h_effkine, h_response, cat, var) - self._prepare_response_fd(dfmatch[cat], h_effkine_fd, h_response_fd, var) + if cat == 'np': + self._prepare_response_fd(dfmatch[cat], h_effkine_fd, h_response_fd, var) f = self.cfg('frac_mcana', .2) _, df_mccorr = self.split_df(dfmatch[cat], f if f < 1. else 0.) self._prepare_response(df_mccorr, h_effkine_frac, h_response_frac, cat, var) @@ -428,24 +438,24 @@ def _prepare_response_fd(self, dfi, h_effkine, h_response, var): axis_var_gen = get_axis(h_response[var], 5) df = dfi + fill_hist(h_response[var], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen', 'fJetPt_gen', 'fPt_gen', f'{var}_gen']]) + # TODO: the first cut should be taken care of by under-/overflow bins, check their usage in analyzer - df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) & - (df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) & - (df[var] >= axis_var_det.GetXmin()) & (df[var] < axis_var_det.GetXmax())] + # df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) & + # (df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) & + # (df[var] >= axis_var_det.GetXmin()) & (df[var] < axis_var_det.GetXmax())] fill_hist(h_effkine[('det', 'nocuts', var)], df[['fJetPt', 'fPt', var]]) - df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) & - (df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) & - (df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())] + # df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) & + # (df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) & + # (df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())] fill_hist(h_effkine[('det', 'cut', var)], df[['fJetPt', 'fPt', var]]) - fill_hist(h_response[var], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen', 'fJetPt_gen', 'fPt_gen', f'{var}_gen']]) - df = dfi - df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) & - (df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) & - (df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())] - fill_hist(h_effkine[('gen', 'nocuts', var)], df[['fJetPt_gen', 'fPt', f'{var}_gen']]) - df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) & - (df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) & - (df[f'{var}'] >= axis_var_det.GetXmin()) & (df[f'{var}'] < axis_var_det.GetXmax())] - fill_hist(h_effkine[('gen', 'cut', var)], df[['fJetPt_gen', 'fPt', f'{var}_gen']]) + # df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) & + # (df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) & + # (df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())] + fill_hist(h_effkine[('gen', 'nocuts', var)], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen']]) + # df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) & + # (df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) & + # (df[f'{var}'] >= axis_var_det.GetXmin()) & (df[f'{var}'] < axis_var_det.GetXmax())] + fill_hist(h_effkine[('gen', 'cut', var)], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen']]) diff --git a/machine_learning_hep/utils/hist.py b/machine_learning_hep/utils/hist.py index 0a74260d4c..402b6ac25b 100644 --- a/machine_learning_hep/utils/hist.py +++ b/machine_learning_hep/utils/hist.py @@ -305,7 +305,7 @@ def norm_response(response, dim_out): set_bin_val(response_norm, bin_out + bin_in, get_bin_val(response, bin_out + bin_in) / norm) set_bin_err(response_norm, bin_out + bin_in, get_bin_err(response, bin_out + bin_in) / norm) total += get_bin_val(response_norm, bin_out + bin_in) - print(f'distributed {bin_in=} to {total=} counts') + # print(f'distributed {bin_in=} to {total=} counts') return response_norm @@ -320,13 +320,17 @@ def fold_hist(hist, response): total = 0. for bin_out in itertools.product(*(range(1, get_nbins(hfold, i) + 1) for i in range(get_dim(hfold)))): total += get_bin_val(response, bin_out + bin_in) - print(f'redistributed {bin_in=} to {total=} counts') + # print(f'redistributed {bin_in=} to {total=} counts') for bin_out in itertools.product(*(range(1, get_nbins(hfold, i) + 1) for i in range(get_dim(hfold)))): val = 0. err = 0. for bin_in in itertools.product(*(range(1, get_nbins(hist, i) + 1) for i in range(get_dim(hist)))): - print(f'{bin_out=} collecting {bin_in=} with weight {get_bin_val(response, bin_out + bin_in)}') + # if bin_in == bin_out: + # val = get_bin_val(hist, bin_in) + # err = get_bin_err(hist, bin_in)**2 + # continue + # print(f'{bin_out=} collecting {bin_in=} with weight {get_bin_val(response, bin_out + bin_in)}') val += get_bin_val(hist, bin_in) * get_bin_val(response, bin_out + bin_in) err += get_bin_err(hist, bin_in)**2 * get_bin_val(response, bin_out + bin_in)**2 set_bin_val(hfold, bin_out, val) From 4f421771d3c792f13d7912de7308663d4a1dac65 Mon Sep 17 00:00:00 2001 From: Jochen Klein Date: Sat, 7 Sep 2024 21:14:31 +0200 Subject: [PATCH 3/5] Suppress D* --- .../database_ml_parameters_D0Jet_pp.yml | 32 +++++++++++++------ machine_learning_hep/processer_jet.py | 2 +- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml index f81792f477..b210883aee 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml @@ -446,8 +446,8 @@ D0Jet_pp: components: sig: fn: 'Gaussian::peak(m[1.,5.], mean[1.85,1.89], sigma_g1[.01,.08])' - bkg: - fn: 'Gaussian::wide(m, mean, sigma_wide[.05,1.])' + wide: + fn: 'Gaussian::wide(m, mean, expr("n*sigma_g1", n[1.,5.], sigma_g1))' model: fn: 'SUM::sig(frac_wide[0.,.3]*wide, peak)' - level: mcrefl @@ -530,49 +530,49 @@ D0Jet_pp: - level: mc ptrange: [1., 3.] range: [1.69, 2.04] - fix_params: ['frac_l', 'mean_l', 'mean_r', 'sigma_l', 'sigma_r', 'frac_wide', 'sigma_g1', 'sigma_wide'] + fix_params: ['frac_l', 'mean_l', 'mean_r', 'sigma_l', 'sigma_r', 'frac_wide', 'sigma_g1', 'n'] components: model: fn: 'SUM::sigrefl(frac_refl[0.,1.]*refl, sig)' - level: mc ptrange: [3., 4.] range: [1.68, 2.06] - fix_params: ['frac_l', 'mean_l', 'mean_r', 'sigma_l', 'sigma_r', 'frac_wide', 'sigma_g1', 'sigma_wide'] + fix_params: ['frac_l', 'mean_l', 'mean_r', 'sigma_l', 'sigma_r', 'frac_wide', 'sigma_g1', 'n'] components: model: fn: 'SUM::sigrefl(frac_refl[0.,1.]*refl, sig)' - level: mc ptrange: [4., 5.] range: [1.64, 2.08] - fix_params: ['frac_l', 'mean_l', 'mean_r', 'sigma_l', 'sigma_r', 'frac_wide', 'sigma_g1', 'sigma_wide'] + fix_params: ['frac_l', 'mean_l', 'mean_r', 'sigma_l', 'sigma_r', 'frac_wide', 'sigma_g1', 'n'] components: model: fn: 'SUM::sigrefl(frac_refl[0.,1.]*refl, sig)' - level: mc ptrange: [5., 6.] range: [1.64, 2.10] - fix_params: ['frac_l', 'mean_l', 'mean_r', 'sigma_l', 'sigma_r', 'frac_wide', 'sigma_g1', 'sigma_wide'] + fix_params: ['frac_l', 'mean_l', 'mean_r', 'sigma_l', 'sigma_r', 'frac_wide', 'sigma_g1', 'n'] components: model: fn: 'SUM::sigrefl(frac_refl[0.,1.]*refl, sig)' - level: mc ptrange: [6., 8.] range: [1.60, 2.14] - fix_params: ['frac_l', 'mean_l', 'mean_r', 'sigma_l', 'sigma_r', 'frac_wide', 'sigma_g1', 'sigma_wide'] + fix_params: ['frac_l', 'mean_l', 'mean_r', 'sigma_l', 'sigma_r', 'frac_wide', 'sigma_g1', 'n'] components: model: fn: 'SUM::sigrefl(frac_refl[0.,1.]*refl, sig)' - level: mc ptrange: [8., 12.] range: [1.52, 2.30] - fix_params: ['frac_l', 'mean_l', 'mean_r', 'sigma_l', 'sigma_r', 'frac_wide', 'sigma_g1', 'sigma_wide'] + fix_params: ['frac_l', 'mean_l', 'mean_r', 'sigma_l', 'sigma_r', 'frac_wide', 'sigma_g1', 'n'] components: model: fn: 'SUM::sigrefl(frac_refl[0.,1.]*refl, sig)' - level: mc ptrange: [12., 48.] range: [1.40, 2.40] - fix_params: ['frac_l', 'mean_l', 'mean_r', 'sigma_l', 'sigma_r', 'frac_wide', 'sigma_g1', 'sigma_wide'] + fix_params: ['frac_l', 'mean_l', 'mean_r', 'sigma_l', 'sigma_r', 'frac_wide', 'sigma_g1', 'n'] components: model: fn: 'SUM::sigrefl(frac_refl[0.,1.]*refl, sig)' @@ -803,7 +803,19 @@ D0Jet_pp: # Additional cuts applied before mass histogram is filled use_cuts: True # systematics - cuts: ["mlBkgScore < 0.02", "mlBkgScore < 0.02", "mlBkgScore < 0.02", "mlBkgScore < 0.05", "mlBkgScore < 0.06", "mlBkgScore < 0.08", "mlBkgScore < 0.08", "mlBkgScore < 0.10", "mlBkgScore < 0.10", "mlBkgScore < 0.20", "mlBkgScore < 0.25", "mlBkgScore < 0.30"] # (sel_an_binmin bins) systematics FIXME: Update for new model. + cuts: + - "mlBkgScore < 0.02" + - "mlBkgScore < 0.02" + - "mlBkgScore < 0.02" + - "mlBkgScore < 0.05" + - "mlBkgScore < 0.06" + - "mlBkgScore < 0.08" + - "mlBkgScore < 0.08" + - "mlBkgScore < 0.10" + - "mlBkgScore < 0.10" + - "mlBkgScore < 0.20" + - "mlBkgScore < 0.25" + - "mlBkgScore < 0.30" systematics: # used in machine_learning_hep/analysis/systematics.py probvariation: diff --git a/machine_learning_hep/processer_jet.py b/machine_learning_hep/processer_jet.py index 1b3e06d226..6d82a0d816 100644 --- a/machine_learning_hep/processer_jet.py +++ b/machine_learning_hep/processer_jet.py @@ -239,7 +239,7 @@ def process_histomass_single(self, index): self._calculate_variables(df) # FIXME: suppress D*, move to DB - df = df[(abs(df.M_D_pi - 2.01) > .01) & (df.fJetNConstituents == 2)] + df = df[(abs(df.M_D_pi - 2.01) > .01) | (df.fJetNConstituents > 2)] for obs, spec in self.cfg('observables', {}).items(): self.logger.debug('preparing histograms for %s', obs) From a51b54f985b6f2ea3291e5ef1d8c1544a73805f7 Mon Sep 17 00:00:00 2001 From: Jochen Klein Date: Thu, 12 Sep 2024 13:34:49 +0200 Subject: [PATCH 4/5] Update fdsub --- .../analysis/analyzer_jets.py | 7 +++++ .../database_ml_parameters_D0Jet_pp.yml | 1 + machine_learning_hep/processer_jet.py | 26 +++++++++---------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/machine_learning_hep/analysis/analyzer_jets.py b/machine_learning_hep/analysis/analyzer_jets.py index a3e16eefe9..cfecd43a7a 100644 --- a/machine_learning_hep/analysis/analyzer_jets.py +++ b/machine_learning_hep/analysis/analyzer_jets.py @@ -1060,6 +1060,13 @@ def estimate_feeddown(self): hfeeddown_det.Divide(h_effkine_det) self._save_hist(hfeeddown_det, f'fd/h_ptjet-{var}_feeddown_det_kineeffscaled.png') + c = TCanvas() + c.cd() + hc = project_hist(h_fd_det, [1], {0: (3, 3)}).DrawCopy() + hc.SetLineColor(ROOT.kViolet) + hc = project_hist(hfeeddown_det, [1], {0: (3, 3)}).DrawCopy('same') + self._save_canvas(c, f'fd/h_{var}_fdcmp.png') + if self.cfg('fd_folding_method') == '3d': self.logger.info('using 3d folding for feeddown estimation for %s', var) hfeeddown_det = h_fd_det diff --git a/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml b/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml index b210883aee..d2d9a3f14e 100644 --- a/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml +++ b/machine_learning_hep/data/data_run3/database_ml_parameters_D0Jet_pp.yml @@ -701,6 +701,7 @@ D0Jet_pp: frac_mcana: .2 # fraction of MC sample for the closure fd_root: '/data2/vkucera/powheg/trees_powheg_fd_central.root' # systematics fd_parquet: '/data2/jklein/powheg/trees_powheg_fd_central.parquet' # systematics + fd_folding_method: 3d # obsolete? proc_type: Jets # used diff --git a/machine_learning_hep/processer_jet.py b/machine_learning_hep/processer_jet.py index 6d82a0d816..209cb010b7 100644 --- a/machine_learning_hep/processer_jet.py +++ b/machine_learning_hep/processer_jet.py @@ -438,24 +438,24 @@ def _prepare_response_fd(self, dfi, h_effkine, h_response, var): axis_var_gen = get_axis(h_response[var], 5) df = dfi - fill_hist(h_response[var], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen', 'fJetPt_gen', 'fPt_gen', f'{var}_gen']]) + fill_hist(h_response[var], df[['fJetPt', 'fPt', f'{var}', 'fJetPt_gen', 'fPt_gen', f'{var}_gen']]) # TODO: the first cut should be taken care of by under-/overflow bins, check their usage in analyzer - # df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) & - # (df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) & - # (df[var] >= axis_var_det.GetXmin()) & (df[var] < axis_var_det.GetXmax())] + df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) & + (df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) & + (df[var] >= axis_var_det.GetXmin()) & (df[var] < axis_var_det.GetXmax())] fill_hist(h_effkine[('det', 'nocuts', var)], df[['fJetPt', 'fPt', var]]) - # df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) & - # (df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) & - # (df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())] + df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) & + (df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) & + (df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())] fill_hist(h_effkine[('det', 'cut', var)], df[['fJetPt', 'fPt', var]]) df = dfi - # df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) & - # (df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) & - # (df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())] + df = df.loc[(df.fJetPt_gen >= axis_ptjet_gen.GetXmin()) & (df.fJetPt_gen < axis_ptjet_gen.GetXmax()) & + (df.fPt_gen >= axis_pthf_gen.GetXmin()) & (df.fPt_gen < axis_pthf_gen.GetXmax()) & + (df[f'{var}_gen'] >= axis_var_gen.GetXmin()) & (df[f'{var}_gen'] < axis_var_gen.GetXmax())] fill_hist(h_effkine[('gen', 'nocuts', var)], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen']]) - # df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) & - # (df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) & - # (df[f'{var}'] >= axis_var_det.GetXmin()) & (df[f'{var}'] < axis_var_det.GetXmax())] + df = df.loc[(df.fJetPt >= axis_ptjet_det.GetXmin()) & (df.fJetPt < axis_ptjet_det.GetXmax()) & + (df.fPt >= axis_pthf_det.GetXmin()) & (df.fPt < axis_pthf_det.GetXmax()) & + (df[f'{var}'] >= axis_var_det.GetXmin()) & (df[f'{var}'] < axis_var_det.GetXmax())] fill_hist(h_effkine[('gen', 'cut', var)], df[['fJetPt_gen', 'fPt_gen', f'{var}_gen']]) From 71888704716367e04fad5f509b9e01b61e3515e9 Mon Sep 17 00:00:00 2001 From: Jochen Klein Date: Thu, 12 Sep 2024 13:34:58 +0200 Subject: [PATCH 5/5] Add .envrc --- .envrc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .envrc diff --git a/.envrc b/.envrc new file mode 100644 index 0000000000..2e24311f54 --- /dev/null +++ b/.envrc @@ -0,0 +1,15 @@ +if [[ `hostname` = "alicecerno2" ]] && [[ -z ${ROOT_BASEDIR} ]]; then + + PYTHON_VERSION=3.10.14 + ROOT_VERSION=v6-32-04 + ROOUNFOLD_VERSION=2.0.1 + + PREFIX=/home/pyadmin/software_mlhep + layout python ${PREFIX}/install/pyenv/versions/${PYTHON_VERSION}/bin/python3 + path_add PYTHONPATH ${PREFIX}/install/root-${ROOT_VERSION}_py-${PYTHON_VERSION}/lib + PATH_add ${PREFIX}/install/root-${ROOT_VERSION}_py-${PYTHON_VERSION}/bin + path_add LD_LIBRARY_PATH ${PREFIX}/install/root-${ROOT_VERSION}_py-${PYTHON_VERSION}/lib + # path_add LD_LIBRARY_PATH ${PREFIX}/install/RooUnfold-${ROOUNFOLD_VERSION}_root-${ROOT_VERSION}_py-${PYTHON_VERSION}/lib + path_add LD_LIBRARY_PATH ${PREFIX}/build/RooUnfold-${ROOUNFOLD_VERSION}_root-${ROOT_VERSION}_py-${PYTHON_VERSION} + +fi