diff --git a/Data/Pantheon_old/lcparam_full.txt b/Data/Pantheon_old/lcparam_full.txt index 1aceb65..4a540d1 100644 --- a/Data/Pantheon_old/lcparam_full.txt +++ b/Data/Pantheon_old/lcparam_full.txt @@ -1028,21 +1028,21 @@ 1.800000000000000044e+00 1.799601000000000006e+00 2.620110000000000028e+01 1.893198849999999993e+02 6.227817900000000151e+01 2.259999999999999787e+00 2.259600999999999971e+00 2.687699999999999889e+01 1.891565860000000043e+02 6.230917399999999873e+01 1.913999999999999924e+00 1.914828999999999892e+00 2.616090000000000160e+01 3.444306600000000174e+01 -5.256643000000000399e+00 -1.300000000000000044e+00 1.301201999999999970e+00 2.552015000000000100e+01 0.000000000000000000e+00 0.000000000000000000e+00 -1.340000000000000080e+00 1.341202000000000005e+00 2.546519999999999939e+01 0.000000000000000000e+00 0.000000000000000000e+00 -1.020000000000000018e+00 1.021201999999999943e+00 2.488234999999999886e+01 0.000000000000000000e+00 0.000000000000000000e+00 -7.349999999999999867e-01 7.362020000000000231e-01 2.370605000000000118e+01 0.000000000000000000e+00 0.000000000000000000e+00 -1.120000000000000107e+00 1.121202000000000032e+00 2.506390000000000029e+01 0.000000000000000000e+00 0.000000000000000000e+00 -1.229999999999999982e+00 1.231201999999999908e+00 2.528614999999999924e+01 0.000000000000000000e+00 0.000000000000000000e+00 -1.229999999999999982e+00 1.231201999999999908e+00 2.549849999999999994e+01 0.000000000000000000e+00 0.000000000000000000e+00 -8.539999999999999813e-01 8.552020000000000177e-01 2.431279999999999930e+01 0.000000000000000000e+00 0.000000000000000000e+00 -1.370000000000000107e+00 1.371202000000000032e+00 2.551589999999999847e+01 0.000000000000000000e+00 0.000000000000000000e+00 -9.751999999999999558e-01 9.764019999999999921e-01 2.470609999999999928e+01 0.000000000000000000e+00 0.000000000000000000e+00 -9.699999999999999734e-01 9.712020000000000097e-01 2.511544999999999916e+01 0.000000000000000000e+00 0.000000000000000000e+00 -7.399999999999999911e-01 7.412020000000000275e-01 2.384459999999999980e+01 0.000000000000000000e+00 0.000000000000000000e+00 -1.389999999999999902e+00 1.391202000000000050e+00 2.536305000000000121e+01 0.000000000000000000e+00 0.000000000000000000e+00 -1.304999999999999938e+00 1.306202000000000085e+00 2.526419999999999888e+01 0.000000000000000000e+00 0.000000000000000000e+00 -9.350000000000000533e-01 9.362019999999999786e-01 2.426660000000000039e+01 0.000000000000000000e+00 0.000000000000000000e+00 -1.014000000000000012e+00 1.015201999999999938e+00 2.485444999999999993e+01 0.000000000000000000e+00 0.000000000000000000e+00 -1.314999999999999947e+00 1.316202000000000094e+00 2.554234999999999900e+01 0.000000000000000000e+00 0.000000000000000000e+00 -1.092000000000000082e+00 1.093202000000000007e+00 2.463834999999999908e+01 0.000000000000000000e+00 0.000000000000000000e+00 +1.300000000000000044e+00 1.301201999999999970e+00 2.552015000000000100e+01 5.315630000000000166e+01 -2.777959999999999852e+01 +1.340000000000000080e+00 1.341202000000000005e+00 2.546519999999999939e+01 1.892880000000000109e+02 6.219140000000000157e+01 +1.020000000000000018e+00 1.021201999999999943e+00 2.488234999999999886e+01 1.893369999999999891e+02 6.222820000000000107e+01 +7.349999999999999867e-01 7.362020000000000231e-01 2.370605000000000118e+01 5.309308300000000003e+01 -2.774080000000000013e+01 +1.120000000000000107e+00 1.121202000000000032e+00 2.506390000000000029e+01 1.890579999999999927e+02 6.220210000000000150e+01 +1.229999999999999982e+00 1.231201999999999908e+00 2.528614999999999924e+01 1.890954999999999870e+02 6.230644399999999905e+01 +1.229999999999999982e+00 1.231201999999999908e+00 2.549849999999999994e+01 1.892359999999999900e+02 6.221479999999999677e+01 +8.539999999999999813e-01 8.552020000000000177e-01 2.431279999999999930e+01 1.891450000000000102e+02 6.226359999999999673e+01 +1.370000000000000107e+00 1.371202000000000032e+00 2.551589999999999847e+01 5.304180000000000206e+01 -2.783050000000000068e+01 +9.751999999999999558e-01 9.764019999999999921e-01 2.470609999999999928e+01 5.310560000000000258e+01 -2.775080000000000169e+01 +9.699999999999999734e-01 9.712020000000000097e-01 2.511544999999999916e+01 1.895374999999999943e+02 6.231312199999999990e+01 +7.399999999999999911e-01 7.412020000000000275e-01 2.384459999999999980e+01 5.307562500000000227e+01 -2.773626399999999848e+01 +1.389999999999999902e+00 1.391202000000000050e+00 2.536305000000000121e+01 1.892249999999999943e+02 6.213969999999999771e+01 +1.304999999999999938e+00 1.306202000000000085e+00 2.526419999999999888e+01 5.310329999999999728e+01 -2.777159999999999940e+01 +9.350000000000000533e-01 9.362019999999999786e-01 2.426660000000000039e+01 1.893710000000000093e+02 6.219109999999999872e+01 +1.014000000000000012e+00 1.015201999999999938e+00 2.485444999999999993e+01 3.542530000000000001e+01 -3.364799999999999791e+00 +1.314999999999999947e+00 1.316202000000000094e+00 2.554234999999999900e+01 3.544369999999999976e+01 -3.382299999999999862e+00 +1.092000000000000082e+00 1.093202000000000007e+00 2.463834999999999908e+01 1.873568960000000061e+02 1.849053000000000058e+00 diff --git a/Notebooks/Validation_cosmo.ipynb b/Notebooks/Validation_cosmo.ipynb index 84c1b18..255e776 100644 --- a/Notebooks/Validation_cosmo.ipynb +++ b/Notebooks/Validation_cosmo.ipynb @@ -22,6 +22,62 @@ "import matplotlib.pyplot as plt" ] }, + { + "cell_type": "code", + "execution_count": 38, + "id": "6767992e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frodo\n", + "Koekemoer\n", + "Patuxent\n", + "Rakke\n", + "SCP06C0\n" + ] + } + ], + "source": [ + "# To make the positions fo teh pantheon dataset \n", + "\n", + "\"\"\" More importantly I notice some of teh supernovae no longer exist in the Panhteon+ compilation I dont know why !!\n", + "\"\"\"\n", + "\n", + "pant_old_names = np.loadtxt('../Data/Pantheon_old/lcparam_full_long.txt', dtype='str', usecols=0, skiprows=1)\n", + "pant_old = np.loadtxt('../Data/Pantheon_old/lcparam_full.txt', )\n", + "pant_new_names = np.loadtxt('../Data/Pantheon+/Pantheon+SH0ES.dat', skiprows=1, dtype='str', usecols=0 )\n", + "pant_new = np.loadtxt('../Data/Pantheon+/Pantheon+SH0ES.dat', skiprows=1, usecols=(26, 27))\n", + "# taken from github of Pantheon+\n", + "others = np.array([[53.093083,-27.74080], [189.095500, 62.306444], [189.537500, 62.313122], [53.075625, -27.736264], [187.356896, 1.849053]]) \n", + "\n", + "j = 0\n", + "for i in range(len(pant_old_names[1030:])):\n", + " if pant_old_names[1030+i] in pant_new_names:\n", + " \n", + " index_new = np.where(pant_new_names == pant_old_names[1030+i])\n", + " pant_old[1030+i, 3] = pant_new[index_new, 0]\n", + " pant_old[1030+i, 4] = pant_new[index_new, 1]\n", + " else:\n", + " # print('Not found')\n", + " print(pant_old_names[1030+i])\n", + " pant_old[1030+i, 3] = others[j, 0]\n", + " pant_old[1030+i, 4] = others[j, 1]\n", + " j += 1\n", + "\n", + "# np.savetxt('../Data/Pantheon_old/lcparam_full_2.txt', pant_old)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "5cc1d83f", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 3, @@ -482,7 +538,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.8.11" } }, "nbformat": 4, diff --git a/marcia/GPconfig.py b/marcia/GPconfig.py index 11bca39..6229086 100644 --- a/marcia/GPconfig.py +++ b/marcia/GPconfig.py @@ -18,7 +18,7 @@ filename = 'GPconfig.ini' # self scale implies all the tasks have the same length scale paramter. -config['GENERAL'] = {'n_Tasks': n_Tasks, 'self_scale': True} +config['GENERAL'] = {'n_Tasks': n_Tasks, 'self_scale': True, 'ind_scatter': False} config['KERNEL'] = {'Interpolate': True, 'Method': 'cubic', 'n_points': 100 } @@ -39,6 +39,8 @@ config.set(Task, 'l_max', '10.0') config.set(Task, 'sigma_f_min', '0.001') config.set(Task, 'sigma_f_max', '10.0') + config.set(Task, 'sig_int_min', '0.001') + config.set(Task, 'sig_int_max', '5.0') # if necessary to include a intrinsic scatter or an offset to the covariance matrix config['INTRINSIC_SCATTER'] = {'sigma_int': True, 'offset': True} @@ -72,6 +74,9 @@ def __init__(self, filename): # Set the self_scale parameter self.self_scale = config.getboolean('GENERAL', 'self_scale') + # Set the ind_scatter parameter + self.ind_scatter = config.getboolean('GENERAL', 'ind_scatter') + # Set the interpolation parameters self.Interpolate = config.getboolean('KERNEL', 'Interpolate') self.Method = config.get('KERNEL', 'Method') @@ -103,7 +108,10 @@ def __init__(self, filename): l_max = config.getfloat(Task, 'l_max') sigma_f_min = config.getfloat(Task, 'sigma_f_min') sigma_f_max = config.getfloat(Task, 'sigma_f_max') + sig_int_min = config.getfloat(Task, 'sig_int_min') + sig_int_max = config.getfloat(Task, 'sig_int_max') + # Create the GPparams class - self.__dict__[Task] = {'model': model, 'nu': nu, 'l_min': l_min, 'l_max': l_max, 'sigma_f_min': sigma_f_min, 'sigma_f_max': sigma_f_max} + self.__dict__[Task] = {'model': model, 'nu': nu, 'l_min': l_min, 'l_max': l_max, 'sigma_f_min': sigma_f_min, 'sigma_f_max': sigma_f_max, 'sig_int_min': sig_int_min, 'sig_int_max': sig_int_max} diff --git a/marcia/database.py b/marcia/database.py index f75195b..06867a6 100644 --- a/marcia/database.py +++ b/marcia/database.py @@ -90,8 +90,10 @@ def get_data(self,choose): return self.get_GRB() elif choose == 'SNE': return self.get_SNE() - elif choose == 'QSA': - return self.get_QSA() + elif choose == 'QSO': + return self.get_QSO() + elif choose == 'QSO_full': + return self.get_QSO_full() elif choose == 'Planck_TT': return self.get_CMB_planck_TT() elif choose == 'Planck_EE': @@ -245,8 +247,8 @@ def get_CMB_planck_TE(self): @load_data_once def get_data_pantheon_old(self): - datapath = os.path.join(__datapath__, 'Pantheon_old','lcparam_select.txt') - covpath = os.path.join(__datapath__, 'Pantheon_old','cov_select.txt') + datapath = os.path.join(__datapath__, 'Pantheon_old','lcparam_full.txt') + covpath = os.path.join(__datapath__, 'Pantheon_old','cov_full.txt') zcmb, zhel, mb, ra, dec = np.loadtxt(datapath).T cov = np.loadtxt(covpath) return zcmb, zhel, mb, cov, ra, dec diff --git a/marcia/likelihood.py b/marcia/likelihood.py index 7ef27ee..864c3a3 100644 --- a/marcia/likelihood.py +++ b/marcia/likelihood.py @@ -55,7 +55,7 @@ def chisq_BAO_alam(self,theta): return np.dot(dist- distance_theory, np.dot(np.linalg.inv(cov), dist - distance_theory)) - def chisq_Pantheon_plus(self,theta): + def chisq_Pantheon_plus(self,theta, residual = False): cmb_z, mb, covariance = self.db.get_pantheon_plus() helio_z = self.db.get_pantheon_plus(Zhel=True) distance_theory = self.theory.distance_modulus(theta, cmb_z, helio_z) @@ -66,9 +66,12 @@ def chisq_Pantheon_plus(self,theta): icov = np.linalg.inv(covariance) self.inv_covariance['Pantheon_plus'] = icov - return np.dot(delta, np.dot(icov, delta)) + if residual: + return delta, np.sqrt(np.diag(covariance)) + else: + return np.dot(delta, np.dot(icov, delta)) - def chisq_Pantheon_old(self,theta): + def chisq_Pantheon_old(self,theta, residual = False): cmb_z, mb, covariance = self.db.get_pantheon_old() helio_z = self.db.get_pantheon_old(Zhel=True) distance_theory = self.theory.distance_modulus(theta, cmb_z, helio_z) @@ -78,17 +81,22 @@ def chisq_Pantheon_old(self,theta): else: icov = np.linalg.inv(covariance) self.inv_covariance['Pantheon_old'] = icov - - return np.dot(delta, np.dot(icov, delta)) + if residual: + return delta, np.sqrt(np.diag(covariance)) + else: + return np.dot(delta, np.dot(icov, delta)) - def chisq_QSO_dm(self,theta): + def chisq_QSO(self,theta, residual = False): p = self.params(theta) #pdb.set_trace() z,dm,cov = self.db.get_QSO() - distance_theory = self.theory.distance_modulus(theta, z,z) + distance_theory = self.theory.distance_modulus(theta, z,z) - p.M_b delta = dm - distance_theory var = np.diag(cov) - return np.sum(delta**2./(var + p.qso_sigma**2)) + if residual: + return delta, np.sqrt(var) + else: + return np.sum(delta**2./(var + p.qso_sigma**2)) def chisq_QSO_full(self,theta): p = self.params(theta) @@ -147,6 +155,10 @@ def __init__(self, data, GP_filename=None, cosmo_filename=None): self.nmodel = self.nTasks self.self_scale = MTGP['self_scale'] self.scatter = MTGP['sigma_int'] + self.offset = MTGP['offset'] + + # extra paramter per dataset to be added to the theta + self.ind_scatter = MTGP['ind_scatter'] # To define the priors for the GP hyperparameters using the the GPconfig file self.priors = [] @@ -154,6 +166,9 @@ def __init__(self, data, GP_filename=None, cosmo_filename=None): i = i+1 # To start from 1 self.priors.append([MTGP[f'Task_{i}']['sigma_f_min'], MTGP[f'Task_{i}']['sigma_f_max']]) self.priors.append([MTGP[f'Task_{i}']['l_min'], MTGP[f'Task_{i}']['l_max']]) + if self.ind_scatter: + self.priors.append([MTGP[f'Task_{i}']['sig_int_min'], MTGP[f'Task_{i}']['sig_int_max']]) + self.priors = np.array(self.priors) @@ -184,44 +199,68 @@ def set_theta(self, theta): # and possibly with intrinsic scatter at the end [..., sigma_int] # Final form is [[sigma_f_1, l_1], [sigma_f_2, l_2], ...] - if self.self_scale == False and len(theta) != 2*self.nTasks and self.scatter == False: - raise ValueError('The number of hyperparameters does not match the number of tasks') - elif self.self_scale == False and len(theta) != 2*self.nTasks+1 and self.scatter == True: - pass + + if self.self_scale == False: + params_GP = [] + if len(theta)>=2*self.nTasks: + for i in range(self.nTasks): + params_GP.append([theta[2*i], theta[2*i+1]]) + else: + raise ValueError(f'The number of hyperparameters does not match the number of tasks, there must be atleast {2*self.nTasks} hyperparameters') + + elif self.self_scale == True: + params_GP =[] + if len(theta)>=self.nTasks+1: + for i in range(self.nTasks): + params_GP.append([theta[i], theta[self.nTasks]]) + else: + raise ValueError(f'The number of hyperparameters does not match the number of tasks, there must be atleast {self.nTasks+1} hyperparameters') + + + if self.ind_scatter == True and len(theta)>=self.nTasks+2: + params_scatter = theta[-self.nTasks:] else: - # rearrange the theta values into a list of lists - params = [] - for i in range(self.nTasks): - params.append([theta[2*i], theta[2*i+1]]) + params_scatter = [self.scatter]*self.nTasks - return params + return params_GP, params_scatter def logPrior(self, theta): # To set the priors for the GP hyperparameters for (lower, upper), value in zip(self.priors, theta): if not lower < value < upper: - # print(value, lower, upper) - # sys.exit() return -np.inf return 0.0 - def logLike(self,theta): - chi2 = self.chisq(theta) + def logLike(self,theta_GP, theta_scatter): + chi2 = self.chisq(theta_GP, theta_scatter) return -0.5*chi2 def logProb(self, theta): + lp = self.logPrior(theta) - theta = self.set_theta(theta) + theta_GP, theta_scatter = self.set_theta(theta) if not np.isfinite(lp): return -np.inf - return lp + self.logLike(theta) + return lp + self.logLike(theta_GP, theta_scatter) - def chisq(self,theta): + def chisq(self, theta_GP, theta_scatter): # We define the GP likelihood here chi2 = 0 + + # To set the scatter covariance matrix which is a diagonal matrix of size len of ntasks + scatter_matrix = self.offset*np.eye(len(self.D_covmat)) + scatter_diag = [] + for i in range(self.nTasks): + # create arrays of length of the data for each task + scatter_diag = np.append(scatter_diag, len(np.array(self.d.x[f'x{i}']))*[theta_scatter[i]**2.]) + + int_scatter_covmat = scatter_matrix + np.diag(scatter_diag) + # To call the GP class to get the covariance matrix - GP_covmat = self.GP_kernels.Cov_Mat(theta) - Total_covmat = GP_covmat + self.D_covmat + 1e-6*np.eye(len(self.D_covmat)) + GP_covmat = self.GP_kernels.Cov_Mat(theta_GP) + + # To calculate the total covariance matrix + Total_covmat = GP_covmat + self.D_covmat + int_scatter_covmat # To perform cholesky decomposition L_inv = np.linalg.inv(Total_covmat) @@ -235,6 +274,4 @@ def chisq(self,theta): y_inv = np.dot(L_inv, y) chi2 = np.dot(y, y_inv.T) + log_det - offset - return chi2 - - + return chi2 \ No newline at end of file diff --git a/marcia/sampler.py b/marcia/sampler.py index 51cf69a..b49b57f 100644 --- a/marcia/sampler.py +++ b/marcia/sampler.py @@ -1,6 +1,7 @@ import emcee import numpy as np from marcia import Likelihood as lk +# from marcia import Likelihood_GP as lkGP from getdist import plots, MCSamples import scipy.optimize as op from chainconsumer import ChainConsumer @@ -10,8 +11,10 @@ class Sampler: def __init__(self, model, parameters, data, initial_guess, prior_file=None, - max_n=100000, nwalkers=100, sampler_file='sampler.h5', converge=False,): + max_n=100000, nwalkers=100, sampler_file='sampler.h5', converge=False, prior_dist = 'uniform', burnin = 0.3, thin = 1): + self.data = data + self.model = model self.likelihood = lk(model, parameters, data, prior_file) self.ndim = len(self.likelihood.priors) self.nwalkers = nwalkers @@ -20,6 +23,12 @@ def __init__(self, model, parameters, data, initial_guess, prior_file=None, self.HDFBackend = emcee.backends.HDFBackend(sampler_file) self.converge = converge self.mle = {} + self.priors = np.array(self.likelihood.priors) + self.prior_dist = prior_dist + + # default burnin and thinning + self.burnin = burnin + self.thin = thin def MLE(self, verbose=True): if 'result' not in self.mle: @@ -32,9 +41,13 @@ def MLE(self, verbose=True): return self.mle['result'] - def sampler_pos(self): - mle = self.MLE() - pos = [mle + 1e-4 * np.random.randn(self.ndim) for _ in range(self.nwalkers)] + def sampler_pos(self, prior_dist='uniform'): + + if prior_dist == 'uniform': + pos = np.random.uniform(self.priors[:,0], self.priors[:,1], size = (self.nwalkers,self.ndim)) + elif prior_dist == 'best-fit': + mle = self.MLE() + pos = [mle + 1e-4 * np.random.randn(self.ndim) for _ in range(self.nwalkers)] return pos def sampler(self,reset=False): @@ -73,13 +86,13 @@ def sampler(self,reset=False): break old_tau = tau else: - sampler.run_mcmc(self.sampler_pos(), self.max_n, progress=True) + sampler.run_mcmc(self.sampler_pos(prior_dist = self.prior_dist), self.max_n, progress=True) else: if reset: print(f'Reseting sampling from iteration: {last_iteration}') self.HDFBackend.reset(self.nwalkers, self.ndim) return self.sampler() - print(f'Already completed {last_iteration} iterations') + print(f'Chain exists with {last_iteration} iterations: increase iterations to continue sampling') def get_burnin(self): try: @@ -88,25 +101,20 @@ def get_burnin(self): thin = int(0.5 * np.min(tau)) print(f'Burn-in: {burnin} and thin: {thin}') except: + print('Autocorrelation time could not be calculated, increase the number of iterations') - burnin = 100 - thin = 10 - print(f'Burn-in: {burnin} and thin: {thin}[DEFAULT VALUES]') + burnin = self.burnin + thin = self.thin + print(f'Imposing default Burn-in: {burnin} and thin: {thin} by hand') return burnin, thin - def get_chain(self, getdist=False,burnin=None,thin=None): + def get_chain(self, getdist=False): sampler = self.HDFBackend if sampler.iteration < self.max_n: print(f'Only {sampler.iteration} iterations completed') print(f'You should run the sampler to finsih the sampling of {self.max_n} iterations') - if (burnin is None) and (thin is None): - burnin, thin = self.get_burnin() - elif burnin is None: - burnin,_ = self.get_burnin() - elif thin is None: - _,thin = self.get_burning() - - samples = sampler.get_chain(discard=burnin, thin=thin, flat=True) + burnin, thin = self.get_burnin() + samples = sampler.get_chain(discard=int(burnin*self.max_n), thin=thin, flat=True) if getdist: lnprob = sampler.get_log_prob(discard=burnin, thin=thin, flat=True) lnprior = sampler.get_blobs(discard=burnin, thin=thin, flat=True) @@ -115,8 +123,8 @@ def get_chain(self, getdist=False,burnin=None,thin=None): samples = np.concatenate((lnprob[:, None], lnprior[:, None], samples), axis=1) return samples - def corner_plot(self, getdist=False,burnin=None,thin=None): - chains = self.get_chain(getdist,burnin,thin) + def corner_plot(self, getdist=False): + chains = self.get_chain() names = self.likelihood.theory.param.parameters labels = [p.replace('$', '') for p in self.likelihood.theory.labels] if getdist: @@ -137,3 +145,17 @@ def get_simple_stat(self): data[name]['mean'] = np.mean(samples[:,i]) data[name]['std'] = np.std(samples[:,i]) return data + + +# class GPSampler(Sampler): +# """ +# Class to sample the GP parameters that inherits from the Sampler class +# """ +# def __init__(self, GP_filename = None): +# super().__init__() +# data_list = self.data + +# if GP_filename is None: +# GP_filename = 'GPconfig.ini' + +# self.likelihood = lkGP(data = data_list, GP_filename = GP_filename) \ No newline at end of file