forked from MatthiasLienhard/isotools
-
Notifications
You must be signed in to change notification settings - Fork 1
Open
Description
I encountered a KeyError when trying to add ORFs to an isotools.Transcriptome object using the add_orf_prediction method.
The long error message copied below is returned.
I think that the error is caused by lowercase characters for bases (atcg) in the genome Fasta file, as the function tries to calculate the Kozak score by looking up values in the Kozak matrix, which has only uppercase characters for row names.
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File [/data/CaiyuLab/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/pandas/core/indexes/base.py:3812](http://localhost:8889/lab/tree/pbisoseq-smk/nb/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/pandas/core/indexes/base.py#line=3811), in Index.get_loc(self, key)
3811 try:
-> 3812 return self._engine.get_loc(casted_key)
3813 except KeyError as err:
File pandas[/_libs/index.pyx:167](http://localhost:8889/_libs/index.pyx#line=166), in pandas._libs.index.IndexEngine.get_loc()
File pandas[/_libs/index.pyx:196](http://localhost:8889/_libs/index.pyx#line=195), in pandas._libs.index.IndexEngine.get_loc()
File pandas[/_libs/hashtable_class_helper.pxi:7088](http://localhost:8889/_libs/hashtable_class_helper.pxi#line=7087), in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas[/_libs/hashtable_class_helper.pxi:7096](http://localhost:8889/_libs/hashtable_class_helper.pxi#line=7095), in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'a'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Cell In[8], line 1
----> 1 isoseq.add_orf_prediction(
2 "../ref/merged.strelka.variants.GT_AA.consensus.fasta"
3 )
File [/data/CaiyuLab/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/isotools/_transcriptome_filter.py:139](http://localhost:8889/lab/tree/pbisoseq-smk/nb/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/isotools/_transcriptome_filter.py#line=138), in add_orf_prediction(self, genome_fn, progress_bar, filter_transcripts, filter_ref_transcripts, min_len, max_5utr_len, min_kozak, prefer_annotated_init, kozak_matrix, fickett_score, hexamer_file)
137 if gene.chrom in genome_fh.references:
138 if filter_transcripts is not None:
--> 139 gene.add_orfs(
140 genome_fh,
141 reference=False,
142 prefer_annotated_init=prefer_annotated_init,
143 minlen=min_len,
144 min_kozak=min_kozak,
145 max_5utr_len=max_5utr_len,
146 tr_filter=filter_transcripts,
147 kozak_matrix=kozak_matrix,
148 get_fickett=fickett_score,
149 coding_hexamers=coding,
150 noncoding_hexamers=noncoding,
151 )
152 if filter_ref_transcripts is not None:
153 gene.add_orfs(
154 genome_fh,
155 reference=True,
(...) 164 noncoding_hexamers=noncoding,
165 )
File [/data/CaiyuLab/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/isotools/gene.py:779](http://localhost:8889/lab/tree/pbisoseq-smk/nb/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/isotools/gene.py#line=778), in Gene.add_orfs(self, genome_fh, tr_filter, reference, minlen, min_kozak, max_5utr_len, prefer_annotated_init, start_codons, stop_codons, kozak_matrix, get_fickett, coding_hexamers, noncoding_hexamers)
777 if kozak_matrix is not None:
778 if kozak is None:
--> 779 orf_dict["kozak"] = kozak_score(tr_seq, start, kozak_matrix)
780 else:
781 orf_dict["kozak"] = kozak
File [/data/CaiyuLab/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/isotools/_utils.py:220](http://localhost:8889/lab/tree/pbisoseq-smk/nb/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/isotools/_utils.py#line=219), in kozak_score(sequence, pos, pwm)
219 def kozak_score(sequence, pos, pwm=DEFAULT_KOZAK_PWM):
--> 220 return sum(
221 pwm.loc[sequence[pos + i], i]
222 for i in pwm.columns
223 if pos + i >= 0 and pos + i < len(sequence)
224 )
File [/data/CaiyuLab/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/isotools/_utils.py:221](http://localhost:8889/lab/tree/pbisoseq-smk/nb/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/isotools/_utils.py#line=220), in <genexpr>(.0)
219 def kozak_score(sequence, pos, pwm=DEFAULT_KOZAK_PWM):
220 return sum(
--> 221 pwm.loc[sequence[pos + i], i]
222 for i in pwm.columns
223 if pos + i >= 0 and pos + i < len(sequence)
224 )
File [/data/CaiyuLab/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/pandas/core/indexing.py:1183](http://localhost:8889/lab/tree/pbisoseq-smk/nb/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/pandas/core/indexing.py#line=1182), in _LocationIndexer.__getitem__(self, key)
1181 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
1182 if self._is_scalar_access(key):
-> 1183 return self.obj._get_value(*key, takeable=self._takeable)
1184 return self._getitem_tuple(key)
1185 else:
1186 # we by definition only have the 0th axis
File [/data/CaiyuLab/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/pandas/core/frame.py:4226](http://localhost:8889/lab/tree/pbisoseq-smk/nb/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/pandas/core/frame.py#line=4225), in DataFrame._get_value(self, index, col, takeable)
4220 engine = self.index._engine
4222 if not isinstance(self.index, MultiIndex):
4223 # CategoricalIndex: Trying to use the engine fastpath may give incorrect
4224 # results if our categories are integers that dont match our codes
4225 # IntervalIndex: IntervalTree has no get_loc
-> 4226 row = self.index.get_loc(index)
4227 return series._values[row]
4229 # For MultiIndex going through engine effectively restricts us to
4230 # same-length tuples; see test_get_set_value_no_partial_indexing
File [/data/CaiyuLab/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/pandas/core/indexes/base.py:3819](http://localhost:8889/lab/tree/pbisoseq-smk/nb/pbisoseq-smk/nb/nb-env/lib/python3.12/site-packages/pandas/core/indexes/base.py#line=3818), in Index.get_loc(self, key)
3814 if isinstance(casted_key, slice) or (
3815 isinstance(casted_key, abc.Iterable)
3816 and any(isinstance(x, slice) for x in casted_key)
3817 ):
3818 raise InvalidIndexError(key)
-> 3819 raise KeyError(key) from err
3820 except TypeError:
3821 # If we have a listlike key, _check_indexing_error will raise
3822 # InvalidIndexError. Otherwise we fall through and re-raise
3823 # the TypeError.
3824 self._check_indexing_error(key)
KeyError: 'a'
Metadata
Metadata
Assignees
Labels
No labels