Skip to content

Commit

Permalink
Optimizations
Browse files Browse the repository at this point in the history
  • Loading branch information
Dan-RAI committed Jan 12, 2023
1 parent d17e6c1 commit 414a366
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 10 deletions.
18 changes: 12 additions & 6 deletions python/PascalX/genescorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def load_genome(self,file,ccol=1,cid=0,csymb=5,cstx=2,cetx=3,cs=4,cb=None,chrSta
GEN = genome.genome()
GEN.load_genome(file,ccol,cid,csymb,cstx,cetx,cs,cb,chrStart,splitchr,NAgeneid,useNAgenes,header)

self._GENOME = GEN
self._GENEID = GEN._GENEID
self._GENESYMB = GEN._GENESYMB
self._GENEIDtoSYMB = GEN._GENEIDtoSYMB
Expand All @@ -142,7 +143,7 @@ def load_genome(self,file,ccol=1,cid=0,csymb=5,cstx=2,cetx=3,cs=4,cb=None,chrSta
self._SKIPPED = GEN._SKIPPED


def load_mapping(self,file,gcol=0,rcol=1,wcol=None,delimiter="\t",a1col=None,a2col=None,bcol=None,pfilter=1,header=False,joint=True):
def load_mapping(self,file,gcol=0,rcol=1,wcol=None,delimiter="\t",a1col=None,a2col=None,bcol=None,pfilter=1,header=False,joint=True,symbol=False):
"""
Loads a SNP to gene mapping
Expand All @@ -159,17 +160,22 @@ def load_mapping(self,file,gcol=0,rcol=1,wcol=None,delimiter="\t",a1col=None,a2c
pfilter(float): Only include rows with wcol < pfilter
header(bool): Header present
joint(bool): Use mapping SNPs and gene window based SNPs
symbol(bool): True: Gene id is a gene symbol; False: Gene id is an ensembl gene id
Note:
* A loaded mapping takes precedence over a loaded positional gene annotation
* The mapping data is stored statically (same for all class initializations)
"""
M = mapper()
M.load_mapping(file,gcol,rcol,wcol,a1col,a2col,bcol,delimiter,pfilter,header)
self._MAP = M._GENEIDtoSNP
self._iMAP = M._SNPtoGENEID
self._joint = joint
if symbol and self._GENOME is None:
print("For symbol==True a genome has to be loaded first (use .load_genome)")
else:
M = mapper(self._GENOME)
M.load_mapping(file,gcol,rcol,wcol,a1col,a2col,bcol,delimiter,pfilter,header,symbol)
self._MAP = M._GENEIDtoSNP
self._iMAP = M._SNPtoGENEID
self._joint = joint

def load_GWAS(self,file,rscol=0,pcol=1,bcol=None,a1col=None,a2col=None,delimiter=None,header=False,NAid='NA',log10p=False,cutoff=1e-300):
"""
Expand Down
15 changes: 12 additions & 3 deletions python/PascalX/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@


class mapper:
def __init__(self):
def __init__(self,genome=None):
self._GENEIDtoSNP = {}
self._SNPtoGENEID = {}
self._GENEDATA = {}

def load_mapping(self,file,gcol=0,rcol=1,wcol=None,a1col=None,a2col=None,bcol=None,delimiter="\t",pfilter=1,header=False):

self._GENOME = genome

def load_mapping(self,file,gcol=0,rcol=1,wcol=None,a1col=None,a2col=None,bcol=None,delimiter="\t",pfilter=1,header=False,symbol=False):
"""
Loads a SNP to gene mapping
Expand All @@ -42,6 +44,7 @@ def load_mapping(self,file,gcol=0,rcol=1,wcol=None,a1col=None,a2col=None,bcol=No
delimiter(string): Character used to separate columns
header(bool): Header present
pfilter(float): Only include rows with wcol < pfilter
symbol(bool): Gene id are gene symbols (requires genome to be set on init)
"""
self._GENEIDtoSNP = {}
self._SNPtoGENEID = {}
Expand All @@ -67,6 +70,12 @@ def load_mapping(self,file,gcol=0,rcol=1,wcol=None,a1col=None,a2col=None,bcol=No

gid = line[gcol]

if symbol:
if gid in self._GENOME._GENESYMB:
gid = self._GENOME._GENESYMB[gid]
else:
continue

if gid not in self._GENEIDtoSNP:
self._GENEIDtoSNP[gid] = {} #[[],[],[],[],[]]

Expand Down
107 changes: 106 additions & 1 deletion python/wchissum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,55 @@ double oneminwchissum_m1nc0_satterthwaite_1000d(double* lambda, int N, double X)
return x.convert_to<double>();
}


extern "C"
double oneminwchissum_m1nc0_satterthwaite_auto(double* lambda, int N, double X) {

// Calc g,h
double sum1 = 0;
double sum2 = 0;
for(int i = 0; i < N; i++) {
sum1 += lambda[i];
sum2 += lambda[i]*lambda[i];
}

double g = sum2/sum1;
double h = sum1*sum1/sum2;

gamma_distribution<double> gamma(h/2,2*g);

double res = 1. - cdf(gamma,X);

if (res < 1e-15) {
float128 x = float128(X);

gamma_distribution<float128> gamma(h/2,2*g);
res = ( 1. - cdf(gamma,x) ).convert_to<double>();

if (res < 1e-32) {
number<cpp_bin_float<100>> x(X);
gamma_distribution<number<cpp_bin_float<100>>> gamma(h/2,2*g);
res = ( number<cpp_bin_float<100>>(1.) - cdf(gamma,x) ).convert_to<double>();

if (res < 1e-98) {
number<cpp_bin_float<200>> x(X);
gamma_distribution<number<cpp_bin_float<200>>> gamma(h/2,2*g);
res = ( number<cpp_bin_float<200>>(1.) - cdf(gamma,x) ).convert_to<double>();

if (res < 1e-195) {
number<cpp_bin_float<300>> x(X);
gamma_distribution<number<cpp_bin_float<300>>> gamma(h/2,2*g);
res = ( number<cpp_bin_float<300>>(1.) - cdf(gamma,x) ).convert_to<double>();
}
}
}
}

// Calc and return solution
return res;
}

/*
extern "C"
double oneminwchissum_m1nc0_satterthwaite_auto(double* lambda, int N, double X) {
double res = oneminwchissum_m1nc0_satterthwaite(lambda,N,X);
Expand All @@ -698,6 +747,7 @@ double oneminwchissum_m1nc0_satterthwaite_auto(double* lambda, int N, double X)
return res;
}
*/

/*
Pearson's approximation
Expand Down Expand Up @@ -771,6 +821,7 @@ double oneminwchissum_m1nc0_pearson_100d(double* lambda, int N, double X) {

double h = c2*c2*c2/(c3*c3);
double y = std::max(0.,(X-c1)*sqrt(h/c2)+h);

number<cpp_bin_float<100>> Y(y);

// Calc cdf
Expand Down Expand Up @@ -878,6 +929,60 @@ double oneminwchissum_m1nc0_pearson_1000d(double* lambda, int N, double X) {
return x.convert_to<double>();
}

extern "C"
double oneminwchissum_m1nc0_pearson_auto(double* lambda, int N, double X) {
double c1 = 0;
double c2 = 0;
double c3 = 0;

for(int i = 0; i < N; i++) {
c1 += lambda[i];
c2 += lambda[i]*lambda[i];
c3 += lambda[i]*lambda[i]*lambda[i];
}

double h = c2*c2*c2/(c3*c3);
double y = std::max(0.,(X-c1)*sqrt(h/c2)+h);

chi_squared chisq(h);

double res = 1. - cdf(chisq,y);

if (res < 1e-15) {
float128 Y(y);

chi_squared_distribution<float128> chisq(h);
res = ( 1. - cdf(chisq,Y) ).convert_to<double>();

if (res < 1e-32) {
number<cpp_bin_float<1000>> Y(y);
chi_squared_distribution<number<cpp_bin_float<100>>> chisq(h);

res = ( 1. - cdf(chisq,Y) ).convert_to<double>();

if (res < 1e-98) {
number<cpp_bin_float<200>> Y(y);
chi_squared_distribution<number<cpp_bin_float<200>>> chisq(h);

res = ( 1. - cdf(chisq,Y) ).convert_to<double>();

if (res < 1e-195) {
number<cpp_bin_float<300>> Y(y);
chi_squared_distribution<number<cpp_bin_float<300>>> chisq(h);

res = ( 1. - cdf(chisq,Y) ).convert_to<double>();
}
}
}
}

// Calc and return solution
return res;
}



/*
extern "C"
double oneminwchissum_m1nc0_pearson_auto(double* lambda, int N, double X) {
double res = oneminwchissum_m1nc0_pearson(lambda,N,X);
Expand All @@ -900,7 +1005,7 @@ double oneminwchissum_m1nc0_pearson_auto(double* lambda, int N, double X) {
return res;
}

*/
/*
Saddle-point approximation
(central case)
Expand Down

0 comments on commit 414a366

Please sign in to comment.