Skip to content

Problem with Frequentist Calculator in UpperLimit compu #175

@emixose98

Description

@emixose98

Hi, we have been working in computing Upper Limits on the Br of a decay B->4mu, by performing a test by making use of an exponential + double cristall ball hypothesis, where in the signal core only the Br is allowed to float. The Nback parameter and the alpha (single event sensitivity) are gaussian constrained and the argument of the exponential is fixed. I attach a plot of the sidebands to give some context. The alpha = 2e-10. The background distribution follows an exponential law and it can be seen in the attached documents.

We are making use of the Frequentist calculator which is included in hepstats but we have found an interesting behaviour. We are running 1000 pseudoexperiments per Br scan point, being 10 in total for the moment. It happens that the resulting distribution of the expected limit at 95% never reaches the 0.05 line, being practically constant through the full scan. We are wondering whether there was any report like this one before and whether there would be an easy way to tackle it...since we are a bit lost at the moment.

Many thanks for attending us with this issue!

PD: I also attach a copy of the script I am currently using.

CLs_B4mu_0.25GeV_0.40GeV_0ps.pdf
B4mu_Prompt-4.pdf

Script


import zfit, zfit_physics
import numpy as np

from hepstats.hypotests.parameters import POI
from hepstats.hypotests.calculators import FrequentistCalculator
from hepstats.hypotests.calculators import AsymptoticCalculator
from hepstats.hypotests import Discovery
from hepstats.hypotests import UpperLimit
from hepstats.hypotests.parameters import POIarray
from zfit.minimize import DefaultToyStrategy

import uncertainties as unc
import tensorflow as tf



def read_pickle(path):

    with open(path, 'rb') as file:

        pickle_file = pkl.load(file)

    return pickle_file


def SingleFit(label,model,Br_hypo, res_back, mc_long, alpha, action):

    ## (...)

    if action == 'Limit':
    
        obs = zfit.Space("B_M", limits=(4850, 6000))

        if model == 'Exp':
            
            lambda_long = zfit.Parameter('exponent_long_'+label, res_back['lambda']['value'], -0.1, 0.1, floating=False)
            N_back_long = zfit.Parameter('Nback_long_'+label, res_back['Yield']['value'],0,100)

            back_long = zfit.pdf.Exponential(lambda_long, obs=obs, name="Exponential_PDF")
            Back_long = back_long.create_extended(N_back_long)



        elif model == 'Argus':
            
            N_back_long = zfit.Parameter('yield_long_', res_back['Yield']['value'], 0, 35)
            m0_long = zfit.Parameter('m0_long_',res_back['m0']['value'],6000, floating=False)
            p_long = zfit.Parameter('p_long_', res_back['p']['value'],-2.0,1.0)
            c_long = zfit.Parameter('c_long_', res_back['c']['value'],-20,20)

            pdf_long = zfit_physics.pdf.Argus(m0=m0_long, p=p_long, c=c_long, obs=obs)
            
            Back_long = pdf_long.create_extended(N_back_long)

            m0_long_val = res_back['m0']['value']; m0_long_unc = res_back['m0']['error']


            p_long_val = res_back['p']['value']; p_long_unc = res_back['p']['error']

            c_long_val = res_back['c']['value']; c_long_unc = res_back['c']['error']


        # Signal -> Equal amount for Long and Down

        alpha_long = zfit.Parameter('SES_long_'+label, alpha.nominal_value, 1e-11, 1e-9)

        Br_Fit = zfit.Parameter("Br_sig_"+label, Br_hypo, -1e-11,8e-9)

        def N(signal_Br, signal_alpha):
            # Ensure that N_long is zero if Br is negative
            return signal_Br / signal_alpha


        N_long = zfit.ComposedParameter('N_long_'+label, N, params=[Br_Fit, alpha_long])
        

        ## Signal Long model

        mu_long = zfit.Parameter("mu_long_"+label,mc_long['mu']['value'], 5200, 5500, floating=False)
        sigma_long = zfit.Parameter("sigma_long_"+label, mc_long['sigma']['value'], 10,22, floating=False)
        alpha_r_long = zfit.Parameter("alpha_r_long_"+label, mc_long['alpha_r']['value'], mc_long['alpha_r']['value']-2,mc_long['alpha_r']['value']+2, floating=False)
        n_r_long = zfit.Parameter("n_r_long_"+label, 10, floating=False)
        alpha_l_long = zfit.Parameter("alpha_l_long_"+label, mc_long['alpha_l']['value'], mc_long['alpha_l']['value']-2, mc_long['alpha_l']['value']+2, floating=False)
        n_l_long = zfit.Parameter("n_l_long_"+label,5, floating = False)
        
        signal_long = zfit.pdf.DoubleCB(obs=obs, mu=mu_long, sigma = sigma_long,
                                alphal=alpha_l_long, alphar=alpha_r_long, nl=n_l_long, nr=n_r_long)

        Signal_long = signal_long.create_extended(N_long)

        # Create the combined PDFs for long and down samplers

        model_long = zfit.pdf.SumPDF([Back_long, Signal_long]) 

        ## Creating constraints

        constraints_long = {"params": [alpha_long],
                            "observation": [alpha.nominal_value],
                            "uncertainty": [alpha.std_dev]
                            }
        
        constraint_long = zfit.constraint.GaussianConstraint(**constraints_long)

        ## Long/Down samplers

        minimizer = zfit.minimize.Minuit(strategy=DefaultToyStrategy(), tol = 0.01,minimize_strategy=0, use_minuit_grad=True)

        data_long = model_long.create_sampler()

        data_long.resample()

        nll_long = zfit.loss.ExtendedUnbinnedNLL(model_long, data_long, constraints=constraint_long)

        ## Computing UpperLimit

        calculator = FrequentistCalculator(input=nll_long, minimizer=minimizer, ntoysnull=1000, ntoysalt=1000)

        bkg_only = POI(Br_Fit, 0)

        sig_yield= POIarray(Br_Fit, np.linspace(3e-10,5e-9,10))

        ul = UpperLimit(calculator=calculator, poinull=sig_yield, poialt=bkg_only, qtilde=True)

        plotlimit(ul, model, 'B4mu_0.25GeV_0.40GeV_0ps',CLs=False)
            
        limit = ul.upperlimit(alpha=0.05)

        print(limit)

def plotlimit(ul, model, label, CLs=True, ax=None):
    """plot pvalue scan for different values of a parameter of interest (observed, expected and +/- sigma bands)

    Args:
        ul: UpperLimit instance
        alpha (float, default=0.05): significance level
        CLs (bool, optional): if `True` uses pvalues as $$p_{cls}=p_{null}/p_{alt}=p_{clsb}/p_{clb}$$
            else as $$p_{clsb} = p_{null}$
        ax (matplotlib axis, optionnal)
    """
    if ax is None:
        ax = plt.gca()

    alpha = 0.05

    poivalues = ul.poinull.values
    pvalues = ul.pvalues(CLs=CLs)

    if CLs:
        cls_clr = "r"
        clsb_clr = "b"
    else:
        cls_clr = "b"
        clsb_clr = "r"

    color_1sigma = "deepskyblue"
    color_2sigma = "lightskyblue"

    """
    ax.plot(
        poivalues,
        pvalues["cls"],
        label="Observed CL$_{s}$",
        marker=".",
        color="k",
        markerfacecolor=cls_clr,
        markeredgecolor=cls_clr,
        linewidth=2.0,
        ms=11,
    )

    ax.plot(
        poivalues,
        pvalues["clsb"],
        label="Observed CL$_{s+b}$",
        marker=".",
        color="k",
        markerfacecolor=clsb_clr,
        markeredgecolor=clsb_clr,
        linewidth=2.0,
        ms=11,
        linestyle=":",
    )

    ax.plot(
        poivalues,
        pvalues["clb"],
        label="Observed CL$_{b}$",
        marker=".",
        color="k",
        markerfacecolor="k",
        markeredgecolor="k",
        linewidth=2.0,
        ms=11,
    )"""

    ax.plot(
        poivalues,
        pvalues["expected"],
        label="Expected CL$_{s}-$Median",
        color="k",
        linestyle="--",
        linewidth=1.5,
        ms=10,
    )

    ax.plot(
        [poivalues[0], poivalues[-1]],
        [alpha, alpha],
        color="r",
        linestyle="-",
        linewidth=1.5,
    )

    ax.fill_between(
        poivalues,
        pvalues["expected"],
        pvalues["expected_p1"],
        facecolor=color_1sigma,
        label="Expected CL$_{s} \\pm 1 \\sigma$",
        alpha=0.8,
    )

    ax.fill_between(
        poivalues,
        pvalues["expected"],
        pvalues["expected_m1"],
        facecolor=color_1sigma,
        alpha=0.8,
    )

    ax.fill_between(
        poivalues,
        pvalues["expected_p1"],
        pvalues["expected_p2"],
        facecolor=color_2sigma,
        label="Expected CL$_{s} \\pm 2 \\sigma$",
        alpha=0.8,
    )

    ax.fill_between(
        poivalues,
        pvalues["expected_m1"],
        pvalues["expected_m2"],
        facecolor=color_2sigma,
        alpha=0.8,
    )

    ax.set_ylim(-0.01, 1.1)
    ax.set_ylabel("p-value")
    ax.set_xlabel("Br", loc='center')
    ax.legend(loc="best", fontsize=20)

    plt.savefig('/home3/emilio.fernandez/BtoNmuon/Multimuons/Limits/{0}/CLs_{1}.pdf'.format(model, label))

    return ax

Code

import sys
sys.path.append('/home3/emilio.fernandez/BtoNmuon/Multimuons')

import pickle as pkl
import matplotlib.pyplot as plt

alpha_l = unc.ufloat(2.945e-10, 0.491e-10)

Reading fits' results

mode = 'Exp'

path = '/home3/emilio.fernandez/BtoNmuon/Multimuons/Fits/'

data_long = read_pickle(path+'Sidebands_{0}/Long/B4mu_Prompt.pkl'.format(mode))

mc_long = read_pickle(path+'MCResults/Long/B4mu_0.25GeV_0.40GeV_0ps_2018.pkl')

SingleFit('B4muK', '{0}'.format(mode),5e-10, data_long, mc_long, alpha_l, 'Limit')

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions