forked from pfenninglab/halLiftover-postprocessing
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmakeRunHalLiftoverSingleBedScript.py
62 lines (59 loc) · 3.38 KB
/
makeRunHalLiftoverSingleBedScript.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import sys
import argparse
def parseArgument():
# Parse the input
parser=argparse.ArgumentParser(description=\
"Make a script that will run halLiftover on a single file and map it to a list of species")
parser.add_argument("--bedFileName", required=True,\
help='bed file')
parser.add_argument("--querySpecies", required=True,\
help='Name of species that will be lifted')
parser.add_argument("--speciesToLiftFileName", required=True,\
help='File with target species, add a second column to specifiy a different output suffix than the species name')
parser.add_argument("--CactusFileName", required=True,\
help='Name of the Cactus file')
parser.add_argument("--halLiftoverPath", required=False, default="/home/ikaplow/RegulatoryElementEvolutionProject/src/hal/bin",\
help='Path to hal-Liftover executable')
parser.add_argument("--numInputFilePartsToRemoveForOutput", type=int, required=False, default=2,\
help='Number of parts of the input file name to remove from the end when creating the output file name')
parser.add_argument("--gz", action="store_true", required=False,\
help='The input file is gzipped')
parser.add_argument("--scriptFileName", required=True,\
help='Name of the file where the script will be recorded')
options = parser.parse_args()
return options
def makeRunHalLiftoverSingleBedScript(options):
# Make a script that will run halLiftover on a list of files and map each file to a list of species
speciesToLiftFile = open(options.speciesToLiftFileName)
speciesToLiftDict = {}
for line in speciesToLiftFile:
# Iterate through the species to lift and add each and its corresponding output suffix to the dictionary
lineElements = line.strip().split("\t")
if len(lineElements) > 1:
# The output suffix for the species is different from the species name
speciesToLiftDict[lineElements[0]] = lineElements[1]
else:
speciesToLiftDict[lineElements[0]] = lineElements[0]
speciesToLiftFile.close()
halLiftoverCmd = options.halLiftoverPath + "/halLiftover"
scriptFile = open(options.scriptFileName, 'w+')
bedFileNameElements = options.bedFileName.split(".")
outputFileNamePrefix = ".".join(bedFileNameElements[0:0-options.numInputFilePartsToRemoveForOutput])
for species in speciesToLiftDict:
# Iterate through the species and write a line in the script for lifting over the query species to each target species
if species == options.querySpecies:
# The current species is the same as the species of the bed file, so skip it
continue
outputFileName = outputFileNamePrefix + "_" + speciesToLiftDict[species] + ".bed"
if options.gz:
# The input bed file is gzipped
scriptFile.write(" ".join(["zcat", options.bedFileName, "|", halLiftoverCmd, "--bedType 4", options.CactusFileName, \
options.querySpecies, "stdin", species, outputFileName]) + "\n")
else:
# The input bed file is not gzipped
scriptFile.write(" ".join([halLiftoverCmd, "--bedType 4", options.CactusFileName, options.querySpecies, \
options.bedFileName, species, outputFileName]) + "\n")
scriptFile.close()
if __name__=="__main__":
options = parseArgument()
makeRunHalLiftoverSingleBedScript(options)