Skip to content

Commit

Permalink
Method option added to cmd interface
Browse files Browse the repository at this point in the history
  • Loading branch information
Dan-RAI committed Jan 16, 2023
1 parent 2e7ab3e commit adb3502
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 18 deletions.
11 changes: 6 additions & 5 deletions pascalx
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ parser_genescoring.add_argument("gwas",help="tab separated file with GWAS data (
parser_genescoring.add_argument("-sh","--skip_head",type=lambda x: (str(x).lower() == 'true'),default=True,help="first line is header [True|False], default=True")
parser_genescoring.add_argument("-cr","--col_rsid",type=int,default=0,help="column with rsids, default=0")
parser_genescoring.add_argument("-cp","--col_pval",type=int,default=1,help="column with p-values, default=1")

parser_genescoring.add_argument("-m","--method",type=str,default='saddle',help="method for gene scoring [saddle|auto|pearson|satterthwaite|ruben|davies], default=saddle")
parser_genescoring.add_argument("-mr","--rescore",type=bool,default=True,help="Rescore failed genes with backup method [True|False]")

# X-scoring
parser_xscoring = subparsers.add_parser('xscoring',description="Xscorer")
Expand Down Expand Up @@ -114,9 +115,9 @@ if __name__ == "__main__":
print("Starting genescoring")

if args.chr == 'all':
R = G.score_all(parallel=args.parallel,nobar=args.nobar)
R = G.score_all(parallel=args.parallel,nobar=args.nobar,method=args.method,autorescore=args.rescore)
else:
R = G.score_chr(chrs=list(ast.literal_eval(args.chr)),parallel=args.parallel,nobar=args.nobar)
R = G.score_chr(chrs=list(ast.literal_eval(args.chr)),parallel=args.parallel,nobar=args.nobar,method=args.method,autorescore=args.rescore)

if args.pathway is None:
G.save_scores(args.outfile+".tsv")
Expand Down Expand Up @@ -157,9 +158,9 @@ if __name__ == "__main__":
P = pathway.chi2rank(G)
M = P.load_modules(args.pathway,args.col_name,args.col_symb)
if args.chr == 'all':
R = P.score(M,parallel=args.parallel,nobar=args.nobar,genes_only=args.genes_only)
R = P.score(M,parallel=args.parallel,nobar=args.nobar,genes_only=args.genes_only,method=args.method,autorescore=args.rescore)
else:
R = P.score(M,parallel=args.parallel,nobar=args.nobar,chrs_only=args.chr,genes_only=args.genes_only)
R = P.score(M,parallel=args.parallel,nobar=args.nobar,chrs_only=args.chr,genes_only=args.genes_only,method=args.method,autorescore=args.rescore)

if args.genes_only:
G.save_scores(args.outfile+".tsv")
Expand Down
60 changes: 47 additions & 13 deletions python/PascalX/genescorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ def _scoremain(self,gene,unloadRef=False,method='saddle',mode='auto',reqacc=1e-1
if G[i] in self._GENEID:

with lock:
pbar.set_postfix_str(str(self._GENEID[G[i]][4]))
pbar.set_postfix_str(str(self._GENEID[G[i]][4]).ljust(15))


cr = self._GENEID[G[i]][0]
Expand Down Expand Up @@ -963,7 +963,7 @@ def _scoremain(self,gene,unloadRef=False,method='saddle',mode='auto',reqacc=1e-1


with lock:
pbar.set_postfix_str("done")
pbar.set_postfix_str("done".ljust(15))
pbar.close()

return RESULT,FAIL,TOTALFAIL
Expand All @@ -985,6 +985,21 @@ def score(self,gene,parallel=1,unloadRef=False,method='saddle',mode='auto',reqac
autorescore(bool): Automatically try to re-score failed genes via Pearson's algorithm
"""

# Check method
methods = ['auto','saddle','pearson','satterthwaite','ruben','davies']

if method not in methods:
print("No valid scoring method set. Available methods:",methods)
return None
else:
print("Scoring with method",method)

if type(autorescore) is str and autorescore not in methods:
print("No valid scoring method set for rescoring. Available methods:",methods)
return None


G = []

for i in range(0,len(gene)):
Expand All @@ -1004,12 +1019,13 @@ def score(self,gene,parallel=1,unloadRef=False,method='saddle',mode='auto',reqac
S = np.array_split(G,parallel)

result_objs = []
pool = mp.Pool(max(1,min(parallel,mp.cpu_count())))
pool = mp.Pool(max(1,min(len(S),mp.cpu_count())))

for i in range(0,len(S)):

result = pool.apply_async(self._scoremain, (S[i],True,method,mode,reqacc,intlimit,'',i,nobar,lock,keep_idx))
result_objs.append(result)

if len(S[i]) > 0:
result = pool.apply_async(self._scoremain, (S[i],True,method,mode,reqacc,intlimit,'',i,nobar,lock,keep_idx))
result_objs.append(result)

results = [result.get() for result in result_objs]

Expand All @@ -1031,9 +1047,21 @@ def score(self,gene,parallel=1,unloadRef=False,method='saddle',mode='auto',reqac
for X in R[0]:
self._SCORES[X[0]] = float(X[1])

if autorescore and len(R[1]) > 0:
print("Rescoreing failed genes")
R = self.rescore(R,method='pearson',mode='auto',reqacc=1e-100,intlimit=10000000,parallel=parallel,nobar=nobar,keep_idx=keep_idx)
if (len(R[1]) > 0 and
(
(type(autorescore)==bool and autorescore)
or autorescore in methods
)
):

if type(autorescore)==bool:
method = 'pearson'
else:
method = autorescore

print("Rescoreing failed genes with method",method)

R = self.rescore(R,method=method,mode='auto',reqacc=1e-100,intlimit=10000000,parallel=parallel,nobar=nobar,keep_idx=keep_idx)
if len(R[1])>0:
print(len(R[1]),"genes failed to be scored")

Expand Down Expand Up @@ -1091,14 +1119,15 @@ def rescore(self,RESULT,method='pearson',mode='auto',reqacc=1e-100,intlimit=1000
RES = [[],[],[]]
S = np.array_split(G,parallel)

pool = mp.Pool(max(1,min(parallel,mp.cpu_count())))
pool = mp.Pool(max(1,min(len(S),mp.cpu_count())))

result_objs = []

for i in range(0,len(S)):

result = pool.apply_async(self._scoremain, (S[i],True,method,mode,reqacc,intlimit,'',i,nobar,lock,keep_idx))
result_objs.append(result)
if len(S[i]) > 0:
result = pool.apply_async(self._scoremain, (S[i],True,method,mode,reqacc,intlimit,'',i,nobar,lock,keep_idx))
result_objs.append(result)

results = [result.get() for result in result_objs]

Expand All @@ -1116,6 +1145,9 @@ def rescore(self,RESULT,method='pearson',mode='auto',reqacc=1e-100,intlimit=1000
for X in RES[0]:
self._SCORES[X[0]] = float(X[1])

print(len(RES[0]),"genes successfully rescored.")
if len(RES[1]) > 0:
print(len(RES[1]),"genes still failed.")

RESULT[2].extend(RES[2])
RESULT[1].clear()
Expand Down Expand Up @@ -1158,7 +1190,9 @@ def score_chr(self,chrs,unloadRef=False,method='saddle',mode='auto',reqacc=1e-10
res = self.score(G,parallel,unloadRef,method,mode,reqacc,intlimit,nobar,autorescore,keep_idx)

toc = time.time()
print("[time]:",str(round(toc-tic,2))+"s;",round(len(G)/(toc-tic),2),"genes/s")

if res is not None:
print("[time]:",str(round(toc-tic,2))+"s;",round(len(G)/(toc-tic),2),"genes/s")

return res

Expand Down

0 comments on commit adb3502

Please sign in to comment.