forked from pfenninglab/halLiftover-postprocessing
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetMaxScorePositionFromWig.py
74 lines (71 loc) · 3.71 KB
/
getMaxScorePositionFromWig.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import sys
import gzip
import argparse
from argparse import Namespace
import os
import pybedtools as bt
import shutil
from getMaxScorePositionFromBedgraph import getMaxScorePositionFromBedgraph
def parseArgument():
# Parse the input
parser=argparse.ArgumentParser(description=\
"Get the position with the max. score from a wig file for each region; if there is a tie, choose the center-most score")
parser.add_argument("--bedFileName", required=True,\
help='Bed file with the regions; should be sorted by chromosome, start, end')
parser.add_argument("--wigFileName", required=True,\
help='wig file name with the scores')
parser.add_argument("--chromSizesFileName", required=True,\
help='chrom sizes file name for the query genome')
parser.add_argument("--bigwigFileName", required=False,\
help='bigwig file with the scores that will be outputted as intermediate')
parser.add_argument("--bedgraphFileName", required=False,\
help='bedgraph file with the scores that will be outputted as intermediate; will be sorted by chromosome, start, end')
parser.add_argument("--gz", action="store_true", required=False,\
help='The input bed file is gzipped and the intermediate sorted bedgraph file will be gzipped')
parser.add_argument("--highestScoreLocationFileName", required=True,\
help='bed file where position with the highest score that is closest to the center will be written')
parser.add_argument("--codePath", required=False, default="",\
help='Path to UCSC Genome Browser utilities executables, should end with / if it is not an empty String')
parser.add_argument("--tempDir", required=False, default="/scratch",\
help='Temporary directory for pybedtools')
options = parser.parse_args()
return options
def getMaxScorePositionFromWig(options):
# Get the position with the max. score from a wig file for each region; if there is a tie, choose the center-most score
bigwigFileName = options.bigwigFileName
wigFileNameElements = options.wigFileName.strip().split(".")
wigFileNamePrefix = ".".join(wigFileNameElements[0:-1])
if not bigwigFileName:
# Create the name of the intermediate bigwig file
bigwigFileName = wigFileNamePrefix + ".bw"
os.system(" ".join([options.codePath + "wigToBigWig", options.wigFileName, options.chromSizesFileName, bigwigFileName]))
unsortedBedgraphFileName = wigFileNamePrefix + ".bedgraph"
os.system(" ".join([options.codePath + "bigWigToBedGraph", bigwigFileName, unsortedBedgraphFileName]))
bedgraphFileName = options.bedgraphFileName
if not bedgraphFileName:
# Create the name of the intermediate bedgraph file
bedgraphFileName = wigFileNamePrefix + "_sorted.bedgraph"
elif options.gz:
# Remove the gz from the end of the bedgraph file name
bedgraphFileName = bedgraphFileName[0:-3]
bt.BedTool(unsortedBedgraphFileName).sort().saveas(bedgraphFileName)
os.remove(unsortedBedgraphFileName)
if options.gz:
# gzip the bedgraph file
gzipBedgraphFileName = options.bedgraphFileName
if not gzipBedgraphFileName:
# Create the name of the intermediate gzipped bedgraph file
gzipBedgraphFileName = bedgraphFileName + ".gz"
with open(bedgraphFileName, "rb") as f_in, gzip.open(gzipBedgraphFileName, "wb") as f_out:
# Compress the bedgraph file
shutil.copyfileobj(f_in, f_out)
os.remove(bedgraphFileName)
bedgraphFileName = gzipBedgraphFileName
maxScorePositionOptions = Namespace(bedFileName=options.bedFileName, bedgraphFileName=bedgraphFileName, gz=options.gz, \
highestScoreLocationFileName=options.highestScoreLocationFileName)
getMaxScorePositionFromBedgraph(maxScorePositionOptions)
if __name__=="__main__":
options = parseArgument()
bt.helpers.set_tempdir(options.tempDir)
getMaxScorePositionFromWig(options)
bt.helpers.cleanup()