Skip to content

Commit

Permalink
Fix interproscan loader failing to load IPR by name
Browse files Browse the repository at this point in the history
  • Loading branch information
abretaud committed Nov 18, 2019
1 parent 7685251 commit 1dcbfd5
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 7 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ $ chakin feature load_fasta \
- 2.3.2
- Fix interproscan loader only loading the first result of XML v5
- Fix interproscan loader failing to load IPR by name
- 2.3.1
- Fix data loading in Tripal database
Expand Down
16 changes: 16 additions & 0 deletions chado/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def _reset_cache(self):
self._featured_dirty_rels = None
self._analysisfeature_cache = None
self._analysisprop_cache = None
self._interpro_cache = None

self.cache_existing = True

Expand Down Expand Up @@ -336,3 +337,18 @@ def _add_feat_cvterm_with_id(self, feat, cvterm_id, pub_id=None):
if feat not in self._featcvterm_cache:
self._featcvterm_cache[feat] = []
self._featcvterm_cache[feat].append(cvterm_id)

def _init_interpro_cache(self, force=False):

if self._interpro_cache is not None and force:
self._interpro_cache = None

if self._interpro_cache is None:
self._interpro_cache = {}
if self.cache_existing:
res = self.session.query(self.model.dbxref.accession, self.model.cvterm.cvterm_id) \
.join(self.model.db, self.model.db.db_id == self.model.dbxref.db_id) \
.filter(self.model.db.name == "INTERPRO") \
.join(self.model.cvterm, self.model.dbxref.dbxref_id == self.model.cvterm.dbxref_id)

self._interpro_cache = {x.accession: x.cvterm_id for x in res}
21 changes: 14 additions & 7 deletions chado/load/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ def interpro(self, analysis_id, organism_id, input, parse_go=False, re_name=None
# Cache analysisfeature content for given analysis_id
self._init_analysisfeature_cache(analysis_id)
self._init_featcvterm_cache()
self._init_interpro_cache()

# Cache all existing cvterms from GO cv
db = 'GO'
Expand Down Expand Up @@ -501,13 +502,19 @@ def _load_ipr_terms(self, ipr_terms, feature_id, analysis_id, skip_missing):
# load the IPR terms that way, we need to just add them
# as we encounter them. If the term already exists
# we do not want to update it.
cvterm_id = self.ci.create_cvterm(ipr_term['ipr_name'], 'INTERPRO', 'INTERPRO', term_definition=ipr_term['ipr_desc'], accession=ipr_id)
if not cvterm_id:
if skip_missing:
warn('Could not find cvterm %s %s, skipping it', ipr_id, ipr_term['ipr_name'])
continue
else:
raise Exception('Could not find cvterm %s %s' % ipr_id, ipr_term['ipr_name'])

# Check using IPRnumber (in case ipr_name changed at some point in time)
if ipr_id in self._interpro_cache:
cvterm_id = self._interpro_cache[ipr_id]
else:
cvterm_id = self.ci.create_cvterm(ipr_term['ipr_name'], 'INTERPRO', 'INTERPRO', term_definition=ipr_term['ipr_desc'], accession=ipr_id)
if not cvterm_id:
if skip_missing:
warn('Could not find cvterm %s %s, skipping it', ipr_id, ipr_term['ipr_name'])
continue
else:
raise Exception('Could not find cvterm %s %s' % ipr_id, ipr_term['ipr_name'])
self._interpro_cache[ipr_id] = cvterm_id

# Insert IPR terms into the feature_cvterm table
# the default pub_id of 1 (NULL) is used. if the cvterm already exists then just skip adding it
Expand Down

0 comments on commit 1dcbfd5

Please sign in to comment.