Skip to content

Commit

Permalink
Merge pull request #18 from galaxy-genome-annotation/gff_go
Browse files Browse the repository at this point in the history
Fix loading of GO terms from GFF
  • Loading branch information
abretaud committed May 4, 2021
2 parents 99208a9 + 5b77f27 commit 27f5edc
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 5 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ $ chakin feature load_fasta \
## History
- 2.3.6
- Fix loading of GO terms from GFF
- 2.3.5
- Fix has_table() calls with recent sqlalchemy versions
Expand Down
4 changes: 4 additions & 0 deletions chado/feature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,10 @@ def load_gff(self, gff, analysis_id, organism_id, landmark_type=None, re_protein
if lm.feature_id not in self._landmark_cache[lm.uniquename]:
self._landmark_cache[lm.uniquename].append(lm.feature_id)

# Preload GO terms
db = 'GO'
self.ci._preload_dbxref2cvterms(db)

examiner = GFF.GFFExaminer()
gff_handle = open(gff)
gff_limits = examiner.available_limits(gff_handle)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name="chado",
version='2.3.5',
version='2.3.6',
description="Chado library",
author="Anthony Bretaudeau",
author_email="[email protected]",
Expand Down
2 changes: 1 addition & 1 deletion test-data/annot.gff
Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,4 @@ scaffold00001 phytozome6 CDS 4061250 4061345 . + 0 Parent=PAC:18136225;PACid=181
scaffold00001 phytozome6 CDS 4061417 4061500 . + 0 Parent=PAC:18136225;PACid=18136225
scaffold00001 phytozome6 CDS 4061617 4061719 . + 0 Parent=PAC:18136225;PACid=18136225
scaffold00001 phytozome6 CDS 4061823 4061905 . + 2 Parent=PAC:18136225;PACid=18136225
scaffold00001 phytozome6 three_prime_UTR 4061906 4062210 . + . ID="some_special_cds";Parent=PAC:18136225;PACid=18136225;Derives_from=PAC:18136217;Ontology_term=GO:000001,GO:00002;Target=scaffold00001 120 320 -
scaffold00001 phytozome6 three_prime_UTR 4061906 4062210 . + . ID="some_special_cds";Parent=PAC:18136225;PACid=18136225;Derives_from=PAC:18136217;Ontology_term=GO:000001,GO:00002,GO:0006812;Target=scaffold00001 120 320 -
12 changes: 9 additions & 3 deletions test/gff_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,11 +272,13 @@ def test_load_gff(self):

terms = {cvt.cvterm.name: cvt.cvterm.dbxref.db_id for cvt in derivesfrom[0].feature_cvterm_collection}

assert len(terms) == 2, "gff>ontology_term loaded correctly"
assert len(terms) == 3, "gff>ontology_term loaded correctly"
assert '000001' in terms, "gff>ontology_term loaded correctly"
assert '00002' in terms, "gff>ontology_term loaded correctly"
assert 'cation transport' in terms, "gff>ontology_term loaded correctly"
assert terms['000001'] == dbs['GO'], "gff>ontology_term loaded correctly"
assert terms['00002'] == dbs['GO'], "gff>ontology_term loaded correctly"
assert terms['cation transport'] == dbs['GO'], "gff>ontology_term loaded correctly"

# Target location
assert len(derivesfrom[0].featureloc_collection) == 2, "gff>target loc ok"
Expand Down Expand Up @@ -618,11 +620,13 @@ def test_load_gff_twice(self):

terms = {cvt.cvterm.name: cvt.cvterm.dbxref.db_id for cvt in derivesfrom[0].feature_cvterm_collection}

assert len(terms) == 2, "gff>ontology_term loaded correctly"
assert len(terms) == 3, "gff>ontology_term loaded correctly"
assert '000001' in terms, "gff>ontology_term loaded correctly"
assert '00002' in terms, "gff>ontology_term loaded correctly"
assert 'cation transport' in terms, "gff>ontology_term loaded correctly"
assert terms['000001'] == dbs['GO'], "gff>ontology_term loaded correctly"
assert terms['00002'] == dbs['GO'], "gff>ontology_term loaded correctly"
assert terms['cation transport'] == dbs['GO'], "gff>ontology_term loaded correctly"

# Target location
assert len(derivesfrom[0].featureloc_collection) == 2, "gff>target loc ok"
Expand Down Expand Up @@ -1301,11 +1305,13 @@ def test_load_gff_addonly(self):

terms = {cvt.cvterm.name: cvt.cvterm.dbxref.db_id for cvt in derivesfrom[0].feature_cvterm_collection}

assert len(terms) == 2, "gff>ontology_term loaded correctly"
assert len(terms) == 3, "gff>ontology_term loaded correctly"
assert '000001' in terms, "gff>ontology_term loaded correctly"
assert '00002' in terms, "gff>ontology_term loaded correctly"
assert 'cation transport' in terms, "gff>ontology_term loaded correctly"
assert terms['000001'] == dbs['GO'], "gff>ontology_term loaded correctly"
assert terms['00002'] == dbs['GO'], "gff>ontology_term loaded correctly"
assert terms['cation transport'] == dbs['GO'], "gff>ontology_term loaded correctly"

# Target location
assert len(derivesfrom[0].featureloc_collection) == 2, "gff>target loc ok"
Expand Down

0 comments on commit 27f5edc

Please sign in to comment.