Skip to content

Commit

Permalink
Merge pull request #22 from yhoogstrate/ensembl_bed_conversion
Browse files Browse the repository at this point in the history
Added utility for GTF to BED conversion
  • Loading branch information
yhoogstrate committed Mar 11, 2016
2 parents 3fb94ac + a05234b commit 89217af
Show file tree
Hide file tree
Showing 10 changed files with 227 additions and 2 deletions.
4 changes: 4 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
2016-03-11 Youri Hoogstrate

* Version 2.11.5: Added utility to create appropriate BED files from GTF files

2016-03-11 Youri Hoogstrate

* Version 2.11.4: Reduces memory footprint for high number of samples
Expand Down
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,17 @@ In this case the alias of the BED-file, hg19, will later be used to link it to d

-a "hg18:somefile_hg18.bed" "hg19:somefile_hg19.bed"

#### Obtain BED file -> fuma-gencode-gtf-to-bed ####

Because obtaining such files turns out to more difficult than expected, we have provided an extra utility named `fuma-gencode-gtf-to-bed`.
The user should start with download a GTF file from (at least tested with) GenCode. Then user should proceed with running the following command:

fuma-gencode-gtf-to-bed -o converted.bed input.gtf

The utility will use all annotations in the GTF file and will aggregate all exons per `transcript_id`, while it will use the gene_id as unique identifier in the BED file. The reason for this is that if transcripts that belong to the same gene while they are quite distant to each other (or homologues using the same name, which happens), they will be annotated per transcript such that the long distance between the transcripts will not unneccesairily be marked as part of that gene. In case multiple transcripts from the same gene are annotated upon each other, FuMa will treat them as the same gene as long as their identifier is the same, which is the case since the `gene_id` is being used for this.

This tool should work for all GTF files for which all entries have a proper and uniquely wise correct definition of the `gene_id` and `transcript_id`.

#### -s ADD_SAMPLE ####
To provide FuMa a fusion gene detection experiment, it should be provided with the "-s" argument which should follow the following syntax:

Expand Down
78 changes: 78 additions & 0 deletions bin/fuma-gencode-gtf-to-bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/usr/bin/env python

import fuma,sys

from fuma.CLI import CLI_ensmble_gtf_to_bed_converter

args = CLI_ensmble_gtf_to_bed_converter()

## GTF
## - start: 1-based
## - end: 1-based
##
## BED
## - start: 0-based
## - end: 1-based

#print args
#print args.genecode_gtf_file

idx = {}

# Parse the GTF file
with open(args.genecode_gtf_file[0],"r") as fh:
for line in fh:
line = line.strip()
if len(line) > 0:
if line[0] != "#":
params = line.split("\t")

gene_id = params[8].split("gene_id",1)[1].split(";",1)[0].strip(" ").strip('"')
transcript_id = params[8].split("transcript_id",1)[1].split(";",1)[0].strip(" ").strip('"')
#exon_number = params[8].split("exon_number",1)[1].split(";",1)[0].strip(" ").strip('"')

start = int(params[3])
end = int(params[4])
inversed = (end < start)

min_pos = min(start,end)
max_pos = max(start,end)

if not idx.has_key(transcript_id):
idx[transcript_id] = {}

if not idx[transcript_id].has_key(params[0]):
idx[transcript_id][params[0]] = [min_pos,max_pos,inversed,params[6],gene_id,params[0]]

if min_pos < idx[transcript_id][params[0]][0]:
# if inversion and strand is identical, overwrite
if inversed == idx[transcript_id][params[0]][2] and params[6] == idx[transcript_id][params[0]][3]:
idx[transcript_id][params[0]][0] = min_pos
else:
raise Exception("Error: transcript annotated in different directions:\n"+line)

if max_pos > idx[transcript_id][params[0]][1]:
# if inversion and strand is identical, overwrite
if inversed == idx[transcript_id][params[0]][2] and params[6] == idx[transcript_id][params[0]][3]:
idx[transcript_id][params[0]][1] = max_pos
else:
raise Exception("Error: transcript annotated in different directions:\n"+line)

# Export to BED
lines = set()
for t in sorted(idx.keys()):# Sorted is essential to get the same output and for functional testing
if len(idx[t].keys()) != 1:
raise Exception("Error: " + t + " has either no annotated chromosomes or multiple")
else:
data = idx[t][idx[t].keys()[0]]

out = data[5]# chr
out += "\t"+str(data[0]-1)# start
out += "\t"+str(data[1])# end
out += "\t"+data[4]#.split(".",1)[0] << trick to get rid of suffixes of ensembl ID's

lines.update([out])

# Lines are unique by using sets
for line in sorted(lines):
print line
22 changes: 22 additions & 0 deletions fuma/CLI.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ def show_formats():

def CLI(argv=None):
"""Command Line Interface
base command line interface of FuMa
"""
parser = argparse.ArgumentParser()

Expand Down Expand Up @@ -106,3 +108,23 @@ def CLI(argv=None):
else:
# Argumented parameters are used in the unit tests.
return parser.parse_args(argv)


def CLI_ensmble_gtf_to_bed_converter(argv=None):
"""
CLI for gtf to bed converter
"""

parser = argparse.ArgumentParser()

parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,epilog="For more info please visit:\n<https://github.com/yhoogstrate/fuma>")
parser.add_argument('-V','--version', action='version', version=textwrap.dedent("%(prog)s "+fuma.__version__+"\n\nCopyright (C) 2013-"+str(datetime.datetime.now().year)+" Youri Hoogstrate.\n\nLicense GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\nThis is free software: you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.\n"))

parser.add_argument("-o","--output",help="output filename; '-' for stdout",default="-")
parser.add_argument("genecode_gtf_file",nargs=1,help="Input GTF file, e.g. 'gencode_gtf_file.gtf' - not as .gz")

if(argv == None):
return parser.parse_args()
else:
# Argumented parameters are used in the unit tests.
return parser.parse_args(argv)
2 changes: 1 addition & 1 deletion fuma/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<http://epydoc.sourceforge.net/manual-fields.html#fields-synonyms>
"""

__version_info__ = ('2', '11', '4')
__version_info__ = ('2', '11', '5')
__version__ = '.'.join(__version_info__) if (len(__version_info__) == 3) else '.'.join(__version_info__[0:3])+"-"+__version_info__[3]
__author__ = 'Youri Hoogstrate'
__homepage__ = 'https://github.com/yhoogstrate/fuma'
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
maintainer=fuma.__author__,
license=fuma.__license__,
url=fuma.__homepage__,
scripts=["bin/fuma","bin/defuse-clusters-to-CG",'bin/chimerascan-exclude-transcriptome-events',"bin/fusioncatcher-to-CG","bin/chimerascan-relative-bedpe-to-CG","bin/fuma-list-to-boolean-list"],
scripts=["bin/fuma","bin/defuse-clusters-to-CG",'bin/chimerascan-exclude-transcriptome-events',"bin/fusioncatcher-to-CG","bin/chimerascan-relative-bedpe-to-CG","bin/fuma-list-to-boolean-list","bin/fuma-gencode-gtf-to-bed"],
packages=['fuma'],
test_suite="tests",
install_requires=['HTSeq >= 0.6.1','numpy'],
Expand Down
10 changes: 10 additions & 0 deletions tests/data/gencode_hg19.subset.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
chr1 110952 129173 ENSG00000238009.2
chr1 129080 133566 ENSG00000238009.2
chr1 29553 31097 ENSG00000243485.2
chr1 30266 31109 ENSG00000243485.2
chr1 30365 30503 ENSG00000243485.2
chr1 34553 36081 ENSG00000237613.2
chr1 35244 36073 ENSG00000237613.2
chr1 69090 70008 ENSG00000186092.4
chr1 89294 120932 ENSG00000238009.2
chr1 92229 129217 ENSG00000238009.2
27 changes: 27 additions & 0 deletions tests/data/gencode_hg19.subset.gtf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
##description: test file
##provider: STAR Fusion wiki
chr1 HAVANA exon 29554 30039 . + . gene_id "ENSG00000243485.2"; transcript_id "ENST00000473358.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "MIR1302-11"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "MIR1302-11-001"; exon_number 1; exon_id "ENSE00001947070.1"; level 2; tag "not_best_in_genome_evidence"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002840.1";
chr1 HAVANA exon 30564 30667 . + . gene_id "ENSG00000243485.2"; transcript_id "ENST00000473358.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "MIR1302-11"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "MIR1302-11-001"; exon_number 2; exon_id "ENSE00001922571.1"; level 2; tag "not_best_in_genome_evidence"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002840.1";
chr1 HAVANA exon 30976 31097 . + . gene_id "ENSG00000243485.2"; transcript_id "ENST00000473358.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "MIR1302-11"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "MIR1302-11-001"; exon_number 3; exon_id "ENSE00001827679.1"; level 2; tag "not_best_in_genome_evidence"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002840.1";
chr1 HAVANA exon 30267 30667 . + . gene_id "ENSG00000243485.2"; transcript_id "ENST00000469289.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "MIR1302-11"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "MIR1302-11-002"; exon_number 1; exon_id "ENSE00001841699.1"; level 2; tag "not_best_in_genome_evidence"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002841.2";
chr1 HAVANA exon 30976 31109 . + . gene_id "ENSG00000243485.2"; transcript_id "ENST00000469289.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "MIR1302-11"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "MIR1302-11-002"; exon_number 2; exon_id "ENSE00001890064.1"; level 2; tag "not_best_in_genome_evidence"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002841.2";
chr1 ENSEMBL exon 30366 30503 . + . gene_id "ENSG00000243485.2"; transcript_id "ENST00000607096.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "MIR1302-11"; transcript_type "miRNA"; transcript_status "KNOWN"; transcript_name "MIR1302-11-201"; exon_number 1; exon_id "ENSE00003695741.1"; level 3; tag "basic"; havana_gene "OTTHUMG00000000959.2";
chr1 HAVANA exon 35721 36081 . - . gene_id "ENSG00000237613.2"; transcript_id "ENST00000417324.1"; gene_type "lincRNA"; gene_status "KNOWN"; gene_name "FAM138A"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "FAM138A-001"; exon_number 1; exon_id "ENSE00001656588.1"; level 2; tag "basic"; havana_gene "OTTHUMG00000000960.1"; havana_transcript "OTTHUMT00000002842.1";
chr1 HAVANA exon 35277 35481 . - . gene_id "ENSG00000237613.2"; transcript_id "ENST00000417324.1"; gene_type "lincRNA"; gene_status "KNOWN"; gene_name "FAM138A"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "FAM138A-001"; exon_number 2; exon_id "ENSE00001669267.1"; level 2; tag "basic"; havana_gene "OTTHUMG00000000960.1"; havana_transcript "OTTHUMT00000002842.1";
chr1 HAVANA exon 34554 35174 . - . gene_id "ENSG00000237613.2"; transcript_id "ENST00000417324.1"; gene_type "lincRNA"; gene_status "KNOWN"; gene_name "FAM138A"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "FAM138A-001"; exon_number 3; exon_id "ENSE00001727627.1"; level 2; tag "basic"; havana_gene "OTTHUMG00000000960.1"; havana_transcript "OTTHUMT00000002842.1";
chr1 HAVANA exon 35721 36073 . - . gene_id "ENSG00000237613.2"; transcript_id "ENST00000461467.1"; gene_type "lincRNA"; gene_status "KNOWN"; gene_name "FAM138A"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "FAM138A-002"; exon_number 1; exon_id "ENSE00001618781.2"; level 2; havana_gene "OTTHUMG00000000960.1"; havana_transcript "OTTHUMT00000002843.1";
chr1 HAVANA exon 35245 35481 . - . gene_id "ENSG00000237613.2"; transcript_id "ENST00000461467.1"; gene_type "lincRNA"; gene_status "KNOWN"; gene_name "FAM138A"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "FAM138A-002"; exon_number 2; exon_id "ENSE00001874421.1"; level 2; havana_gene "OTTHUMG00000000960.1"; havana_transcript "OTTHUMT00000002843.1";
chr1 HAVANA exon 69091 70008 . + . gene_id "ENSG00000186092.4"; transcript_id "ENST00000335137.3"; gene_type "protein_coding"; gene_status "KNOWN"; gene_name "OR4F5"; transcript_type "protein_coding"; transcript_status "KNOWN"; transcript_name "OR4F5-001"; exon_number 1; exon_id "ENSE00002319515.1"; level 2; tag "basic"; tag "appris_principal"; tag "CCDS"; ccdsid "CCDS30547.1"; havana_gene "OTTHUMG00000001094.1"; havana_transcript "OTTHUMT00000003223.1";
chr1 HAVANA exon 120775 120932 . - . gene_id "ENSG00000238009.2"; transcript_id "ENST00000466430.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP11-34P13.7"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP11-34P13.7-001"; exon_number 1; exon_id "ENSE00001606755.2"; level 2; tag "not_best_in_genome_evidence"; tag "basic"; havana_gene "OTTHUMG00000001096.2"; havana_transcript "OTTHUMT00000003225.1";
chr1 HAVANA exon 112700 112804 . - . gene_id "ENSG00000238009.2"; transcript_id "ENST00000466430.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP11-34P13.7"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP11-34P13.7-001"; exon_number 2; exon_id "ENSE00001957285.1"; level 2; tag "not_best_in_genome_evidence"; tag "basic"; havana_gene "OTTHUMG00000001096.2"; havana_transcript "OTTHUMT00000003225.1";
chr1 HAVANA exon 92091 92240 . - . gene_id "ENSG00000238009.2"; transcript_id "ENST00000466430.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP11-34P13.7"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP11-34P13.7-001"; exon_number 3; exon_id "ENSE00001944529.1"; level 2; tag "not_best_in_genome_evidence"; tag "basic"; havana_gene "OTTHUMG00000001096.2"; havana_transcript "OTTHUMT00000003225.1";
chr1 HAVANA exon 89295 91629 . - . gene_id "ENSG00000238009.2"; transcript_id "ENST00000466430.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP11-34P13.7"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP11-34P13.7-001"; exon_number 4; exon_id "ENSE00001846804.1"; level 2; tag "not_best_in_genome_evidence"; tag "basic"; havana_gene "OTTHUMG00000001096.2"; havana_transcript "OTTHUMT00000003225.1";
chr1 HAVANA exon 129055 129217 . - . gene_id "ENSG00000238009.2"; transcript_id "ENST00000477740.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP11-34P13.7"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP11-34P13.7-003"; exon_number 1; exon_id "ENSE00001919246.1"; level 2; tag "not_best_in_genome_evidence"; havana_gene "OTTHUMG00000001096.2"; havana_transcript "OTTHUMT00000003688.1";
chr1 HAVANA exon 120721 120932 . - . gene_id "ENSG00000238009.2"; transcript_id "ENST00000477740.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP11-34P13.7"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP11-34P13.7-003"; exon_number 2; exon_id "ENSE00001171005.3"; level 2; tag "not_best_in_genome_evidence"; havana_gene "OTTHUMG00000001096.2"; havana_transcript "OTTHUMT00000003688.1";
chr1 HAVANA exon 112700 112804 . - . gene_id "ENSG00000238009.2"; transcript_id "ENST00000477740.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP11-34P13.7"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP11-34P13.7-003"; exon_number 3; exon_id "ENSE00001957285.1"; level 2; tag "not_best_in_genome_evidence"; havana_gene "OTTHUMG00000001096.2"; havana_transcript "OTTHUMT00000003688.1";
chr1 HAVANA exon 92230 92240 . - . gene_id "ENSG00000238009.2"; transcript_id "ENST00000477740.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP11-34P13.7"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP11-34P13.7-003"; exon_number 4; exon_id "ENSE00001896976.1"; level 2; tag "not_best_in_genome_evidence"; havana_gene "OTTHUMG00000001096.2"; havana_transcript "OTTHUMT00000003688.1";
chr1 HAVANA exon 129055 129173 . - . gene_id "ENSG00000238009.2"; transcript_id "ENST00000471248.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP11-34P13.7"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP11-34P13.7-002"; exon_number 1; exon_id "ENSE00001934975.1"; level 2; tag "not_best_in_genome_evidence"; havana_gene "OTTHUMG00000001096.2"; havana_transcript "OTTHUMT00000003687.1";
chr1 HAVANA exon 112700 112804 . - . gene_id "ENSG00000238009.2"; transcript_id "ENST00000471248.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP11-34P13.7"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP11-34P13.7-002"; exon_number 2; exon_id "ENSE00001957285.1"; level 2; tag "not_best_in_genome_evidence"; havana_gene "OTTHUMG00000001096.2"; havana_transcript "OTTHUMT00000003687.1";
chr1 HAVANA exon 110953 111357 . - . gene_id "ENSG00000238009.2"; transcript_id "ENST00000471248.1"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP11-34P13.7"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP11-34P13.7-002"; exon_number 3; exon_id "ENSE00001879696.1"; level 2; tag "not_best_in_genome_evidence"; havana_gene "OTTHUMG00000001096.2"; havana_transcript "OTTHUMT00000003687.1";
chr1 HAVANA exon 133374 133566 . - . gene_id "ENSG00000238009.2"; transcript_id "ENST00000453576.2"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP11-34P13.7"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP11-34P13.7-004"; exon_number 1; exon_id "ENSE00001737600.2"; level 2; havana_gene "OTTHUMG00000001096.2"; havana_transcript "OTTHUMT00000003689.1";
chr1 HAVANA exon 129081 129223 . - . gene_id "ENSG00000238009.2"; transcript_id "ENST00000453576.2"; gene_type "lincRNA"; gene_status "NOVEL"; gene_name "RP11-34P13.7"; transcript_type "lincRNA"; transcript_status "KNOWN"; transcript_name "RP11-34P13.7-004"; exon_number 2; exon_id "ENSE00001827073.1"; level 2; havana_gene "OTTHUMG00000001096.2"; havana_transcript "OTTHUMT00000003689.1";
10 changes: 10 additions & 0 deletions tests/data/gencode_hg19.subset.sorted.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
chr1 29553 31097 ENSG00000243485.2
chr1 30266 31109 ENSG00000243485.2
chr1 30365 30503 ENSG00000243485.2
chr1 34553 36081 ENSG00000237613.2
chr1 35244 36073 ENSG00000237613.2
chr1 69090 70008 ENSG00000186092.4
chr1 89294 120932 ENSG00000238009.2
chr1 92229 129217 ENSG00000238009.2
chr1 110952 129173 ENSG00000238009.2
chr1 129080 133566 ENSG00000238009.2
63 changes: 63 additions & 0 deletions tests/test_GencodeGTF.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python

"""[License: GNU General Public License v3 (GPLv3)]
This file is part of FuMa.
FuMa is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
FuMa is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Documentation as defined by:
<http://epydoc.sourceforge.net/manual-fields.html#fields-synonyms>
"""

import unittest,logging,sys,os
logging.basicConfig(level=logging.INFO,format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",stream=sys.stdout)

from fuma.ParseBED import ParseBED

class TestParseBED(unittest.TestCase):
def test_01(self):
inputfile = "tests/data/gencode_hg19.subset.gtf"
outputfile = "tests/data/gencode_hg19.subset.bed"

command = "export PYTHONPATH=$PYTHONPATH\":fuma:../fuma\" ;\n\n" # ensure the fuma lib is accessible for testing (also without installation)
command += ("bin/fuma-gencode-gtf-to-bed\\\n"
" "+inputfile
)

result = os.popen(command).read()
validation = open(outputfile,"r").read()

self.assertEqual(result, validation)

def test_02(self):
inputfile = "tests/data/gencode_hg19.subset.gtf"
outputfile = "tests/data/gencode_hg19.subset.sorted.bed"

command = "export PYTHONPATH=$PYTHONPATH\":fuma:../fuma\" ;\n\n" # ensure the fuma lib is accessible for testing (also without installation)
command += ("bin/fuma-gencode-gtf-to-bed \\\n"
" "+inputfile+" | sort -k1,1V -k2,2g -k3,3g "
)


result = os.popen(command).read()
validation = open(outputfile,"r").read()

self.assertEqual(result, validation)

def main():
unittest.main()

if __name__ == '__main__':
main()

0 comments on commit 89217af

Please sign in to comment.