Skip to content

Commit

Permalink
Update annotate.py to fix a TypeError with annotations.py
Browse files Browse the repository at this point in the history
Fixed error regarding unhashable SeqFeatures.

Error was as follows:

```bash
 warnings.warn(
Traceback (most recent call last):
  File "/projects/kr58/software/conda_envs/bactabolize_v1.0.1/bin/bactabolize", line 10, in <module>
    sys.exit(entry())
  File "/projects/kr58/software/conda_envs/bactabolize_v1.0.1/lib/python3.9/site-packages/bactabolize/main.py", line 23, in entry
    run_draft_model(config)
  File "/projects/kr58/software/conda_envs/bactabolize_v1.0.1/lib/python3.9/site-packages/bactabolize/main.py", line 49, in run_draft_model
    annotate.run(config.assembly_fp, config.assembly_genbank_fp)
  File "/projects/kr58/software/conda_envs/bactabolize_v1.0.1/lib/python3.9/site-packages/bactabolize/annotate.py", line 50, in run
    match_existing_orfs_updated_annotations(output_fp, assembly_genbank_fp)
  File "/projects/kr58/software/conda_envs/bactabolize_v1.0.1/lib/python3.9/site-packages/bactabolize/annotate.py", line 128, in match_existing_orfs_updated_annotations
    features_matched = discover_overlaps(positions, overlap_min)
  File "/projects/kr58/software/conda_envs/bactabolize_v1.0.1/lib/python3.9/site-packages/bactabolize/annotate.py", line 202, in discover_overlaps
    if (feature_new, feature_existing) in features_matched:
TypeError: unhashable type: 'SeqFeature'
```
  • Loading branch information
bananabenana authored Nov 6, 2024
1 parent 5db7f88 commit 5d24bec
Showing 1 changed file with 27 additions and 9 deletions.
36 changes: 27 additions & 9 deletions bactabolize/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,22 @@ def match_existing_orfs_updated_annotations(new_fp, existing_fp, overlap_min=0.8
# Find overlaps
positions = contig_positions_new[contig] + contig_positions_existing[contig]
features_matched = discover_overlaps(positions, overlap_min)
# Discover those not matched
features_matched_flat = set()
for features in features_matched:
features_matched_flat.update(features)
new_unmatched = set(features_new[contig]).difference(features_matched_flat)
existing_unmatched = set(features_existing[contig]).difference(features_matched_flat)

# Discover those not matched using location comparison
features_matched_new = [f[0] for f in features_matched]
features_matched_existing = [f[1] for f in features_matched]

# Find unmatched features by comparing locations
new_unmatched = []
for feature in features_new[contig]:
if not any(f.location == feature.location for f in features_matched_new):
new_unmatched.append(feature)

existing_unmatched = []
for feature in features_existing[contig]:
if not any(f.location == feature.location for f in features_matched_existing):
existing_unmatched.append(feature)

# For each matched update bounds update locus tag, product, gene (if present) to match existing
quals = ('locus_tag', 'product', 'gene')
features_updated = list()
Expand All @@ -141,6 +151,7 @@ def match_existing_orfs_updated_annotations(new_fp, existing_fp, overlap_min=0.8
continue
feature_new.qualifiers[qual] = feature_existing.qualifiers[qual]
features_updated.append(feature_new)

# Add existing ORFs that had no match
features_updated.extend(new_unmatched)
features_updated.extend(existing_unmatched)
Expand All @@ -152,6 +163,7 @@ def match_existing_orfs_updated_annotations(new_fp, existing_fp, overlap_min=0.8
print(f'\t{len(existing_unmatched)} existing features unmatched')
print(f'\t{len(new_unmatched)} re-annotated features unmatched')
print(f'\t{len(features_updated)} total features')

# Update new genbank with new feature set
update_genbank_annotations(new_fp, contig_features_updated)

Expand Down Expand Up @@ -179,7 +191,8 @@ def discover_overlaps(positions, overlap_min):
# pylint: disable=too-many-branches
in_new = list()
in_existing = list()
features_matched = set()
# Change from set to list to store matches
features_matched = []
for position in sorted(positions, key=lambda k: k['position']):
# Add features we're entering and remove those we're exiting
if position['type'] == 'start':
Expand All @@ -199,7 +212,12 @@ def discover_overlaps(positions, overlap_min):
for feature_existing in in_existing:
if feature_new.strand != feature_existing.strand:
continue
if (feature_new, feature_existing) in features_matched:
# Check if this pair is already matched by comparing locations
already_matched = any(
fn.location == feature_new.location and fe.location == feature_existing.location
for fn, fe in features_matched
)
if already_matched:
continue
# Get overlap
start = max(feature_new.location.start, feature_existing.location.start)
Expand All @@ -212,7 +230,7 @@ def discover_overlaps(positions, overlap_min):
# Update note to include overlap information
[note_new] = feature_new.qualifiers['note']
feature_new.qualifiers['note'][0] = f'{note_new};overlap:{overlap_new:.2f}'
features_matched.add((feature_new, feature_existing))
features_matched.append((feature_new, feature_existing))
return features_matched


Expand Down

0 comments on commit 5d24bec

Please sign in to comment.