Skip to content
4 changes: 2 additions & 2 deletions prymer/api/picking.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,13 @@ def build_primer_pairs( # noqa: C901

# If the right primer isn't "to the right" of the left primer, move on
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These changes were from linting.

if rp.span.start < lp.span.start or lp.span.end > rp.span.end:
first_right_primer_idx = max(first_right_primer_idx, j+1)
first_right_primer_idx = max(first_right_primer_idx, j + 1)
continue

amp_span = PrimerPair.calculate_amplicon_span(lp, rp)

if amp_span.length < amplicon_sizes.min:
first_right_primer_idx = max(first_right_primer_idx, j+1)
first_right_primer_idx = max(first_right_primer_idx, j + 1)
continue

if amp_span.length > amplicon_sizes.max:
Expand Down
66 changes: 54 additions & 12 deletions prymer/offtarget/offtarget_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ def __init__( # noqa: C901
max_gap_opens: int = 0,
max_gap_extends: int = -1,
max_amplicon_size: int = 1000,
min_amplicon_size: int = 1,
min_primer_pair_hits: int = 1,
allow_overlapping_hits: bool = False,
cache_results: bool = True,
threads: Optional[int] = None,
keep_spans: bool = True,
Expand All @@ -190,7 +192,8 @@ def __init__( # noqa: C901
and `max_primer_hits`.
2. Filtering of individual primers: `max_primer_hits`.
3. Checking of primer pairs: `max_primer_hits`, `min_primer_pair_hits`,
`max_primer_pair_hits`, and `max_amplicon_size`.
`max_primer_pair_hits`, `allow_overlapping_hits`, `min_amplicon_size, and
`max_amplicon_size`.

The `three_prime_region_length` parameter is used as the seed length for `bwa aln`.

Expand Down Expand Up @@ -225,6 +228,13 @@ def __init__( # noqa: C901
"long gaps". Must be greater than or equal to -1.
max_amplicon_size: the maximum amplicon size to consider amplifiable. Must be greater
than 0.
min_amplicon_size: the minimum amplicon size to consider amplifiable. Must be between 1
and `max_amplicon_size` inclusive. If `allow_overlapping_hits` is False, primer
pair hits that overlap each other will not be considered, even if they would create
an amplicon of at least `min_amplicon_size`. Default 1bp.
allow_overlapping_hits: if True, allow hits to the individual primers in a primer pair
to overlap with each other. If False, no overlap will be allowed, even if
`min_amplicon_size` would otherwise allow it. Default False.
cache_results: if True, cache results for faster re-querying
threads: the number of threads to use when invoking bwa
keep_spans: if True, [[OffTargetResult]] objects will be reported with amplicon spans
Expand All @@ -235,6 +245,8 @@ def __init__( # noqa: C901

Raises:
ValueError: If `max_amplicon_size` is not greater than 0.
ValueError: If `min_amplicon_size` is outside the range 1 to `max_amplicon_size`,
inclusive.
ValueError: If any of `max_primer_hits`, `max_primer_pair_hits`, or
`min_primer_pair_hits` are not greater than or equal to 0.
ValueError: If `three_prime_region_length` is not greater than or equal to 8.
Expand All @@ -247,6 +259,11 @@ def __init__( # noqa: C901
errors: list[str] = []
if max_amplicon_size < 1:
errors.append(f"'max_amplicon_size' must be greater than 0. Saw {max_amplicon_size}")
if min_amplicon_size < 1 or min_amplicon_size > max_amplicon_size:
errors.append(
f"'min_amplicon_size' must be between 1 and 'max_amplicon_size'={max_amplicon_size}"
f" inclusive. Saw {min_amplicon_size}"
)
if max_primer_hits < 0:
errors.append(
f"'max_primer_hits' must be greater than or equal to 0. Saw {max_primer_hits}"
Expand Down Expand Up @@ -310,6 +327,8 @@ def __init__( # noqa: C901
self._max_primer_pair_hits: int = max_primer_pair_hits
self._min_primer_pair_hits: int = min_primer_pair_hits
self._max_amplicon_size: int = max_amplicon_size
self._min_amplicon_size: int = min_amplicon_size
self._allow_overlapping_hits: bool = allow_overlapping_hits
self._cache_results: bool = cache_results
self._keep_spans: bool = keep_spans
self._keep_primer_spans: bool = keep_primer_spans
Expand Down Expand Up @@ -357,7 +376,11 @@ def check_all(self, primer_pairs: list[PrimerPair]) -> dict[PrimerPair, OffTarge

This method maps each primer to the specified reference with `bwa aln` to search for
off-target hits. Possible amplicons are identified from the pairwise combinations of these
alignments, up to the specified `max_amplicon_size`.
alignments.

If `allow_overlapping_hits=True`, amplicons in the size range `min_amplicon_size` to
`max_amplicon_size` will be returned. If `allow_overlapping_hits=False`, the size range will
still apply, but will exclude amplicons with overlapping hits.

Primer pairs are marked as passing if both of the following are true:
1. Each primer has no more than `max_primer_hits` alignments to the genome.
Expand Down Expand Up @@ -461,15 +484,19 @@ def _build_off_target_result(
self._to_amplicons(
positive_hits=left_positive_hits[refname],
negative_hits=right_negative_hits[refname],
min_len=self._min_amplicon_size,
max_len=self._max_amplicon_size,
allow_overlapping_hits=self._allow_overlapping_hits,
strand=Strand.POSITIVE,
)
)
amplicons.extend(
self._to_amplicons(
positive_hits=right_positive_hits[refname],
negative_hits=left_negative_hits[refname],
min_len=self._min_amplicon_size,
max_len=self._max_amplicon_size,
allow_overlapping_hits=self._allow_overlapping_hits,
strand=Strand.NEGATIVE,
)
)
Expand Down Expand Up @@ -546,14 +573,21 @@ def mappings_of(self, primers: list[PrimerType]) -> dict[str, BwaResult]:

@staticmethod
def _to_amplicons(
positive_hits: list[BwaHit], negative_hits: list[BwaHit], max_len: int, strand: Strand
positive_hits: list[BwaHit],
negative_hits: list[BwaHit],
max_len: int,
strand: Strand,
min_len: int = 1,
allow_overlapping_hits: bool = False,
) -> list[Span]:
"""Takes lists of positive strand hits and negative strand hits and constructs amplicon
mappings anywhere a positive strand hit and a negative strand hit occur where the end of
the negative strand hit is no more than `max_len` from the start of the positive strand
hit.
the negative strand hit is at least `min_len` and no more than `max_len` from the start of
the positive strand hit.

Primers may not overlap.
Primer hits may overlap if `min_len` is shorter than the length of the left and right
primers together and `allow_overlapping_hits` is True. If `allow_overlapping_hits` is False,
these will not be returned.

Args:
positive_hits: List of hits on the positive strand for one of the primers in the pair.
Expand All @@ -563,6 +597,12 @@ def _to_amplicons(
`positive_hits` are for the left primer and `negative_hits` are for the right
primer. Set to Strand.NEGATIVE if `positive_hits` are for the right primer and
`negative_hits` are for the left primer.
min_len: Minimum length of amplicons to consider. If `allow_overlapping_hits` is False,
primer pair hits that overlap each other will not be considered, even if they would
create an amplicon of at least `min_len`. Default 1bp.
allow_overlapping_hits: if True, allow hits to the individual primers in a primer pair
to overlap with each other. If False, no overlap will be allowed, even if
`min_amplicon_size` would otherwise allow it. Default False.

Raises:
ValueError: If any of the positive hits are not on the positive strand, or any of the
Expand Down Expand Up @@ -600,13 +640,15 @@ def _to_amplicons(
negative_hits_sorted[prev_negative_hit_index:],
start=prev_negative_hit_index,
):
# TODO: Consider allowing overlapping positive and negative hits.
if (
negative_hit.start > positive_hit.end
and negative_hit.end - positive_hit.start + 1 <= max_len
# Hit coordinates are 1-based end-inclusive.
amplicon_length: int = negative_hit.end - positive_hit.start + 1
if min_len <= amplicon_length <= max_len and (
allow_overlapping_hits or negative_hit.start > positive_hit.end
):
# If the negative hit starts to the right of the positive hit, and the amplicon
# length is <= max_len, add it to the list of amplicon hits to be returned.
# If overlapping hits are allowed and the amplicon length is in the correct
# size range, -OR- if overlapping hits are not allowed, the negative hit starts
# after the positive hit ends, and the amplicon length is less than or equal
# to the allowed maximum, add it to the list of amplicon hits to be returned.
amplicons.append(
Span(
refname=positive_hit.refname,
Expand Down
106 changes: 93 additions & 13 deletions tests/offtarget/test_offtarget.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ def _build_detector(
three_prime_region_length: int = 20,
max_mismatches_in_three_prime_region: int = 0,
max_mismatches: int = 0,
min_amplicon_size: int = 1,
max_amplicon_size: int = 250,
allow_overlapping_hits: bool = False,
cache_results: bool = True,
) -> OffTargetDetector:
"""Builds an `OffTargetDetector` with strict defaults"""
Expand All @@ -35,7 +37,9 @@ def _build_detector(
three_prime_region_length=three_prime_region_length,
max_mismatches_in_three_prime_region=max_mismatches_in_three_prime_region,
max_mismatches=max_mismatches,
min_amplicon_size=min_amplicon_size,
max_amplicon_size=max_amplicon_size,
allow_overlapping_hits=allow_overlapping_hits,
cache_results=cache_results,
keep_spans=True,
keep_primer_spans=True,
Expand Down Expand Up @@ -281,7 +285,75 @@ def test_to_amplicons(
) -> None:
with _build_detector(ref_fasta=ref_fasta, cache_results=cache_results) as detector:
actual = detector._to_amplicons(
positive_hits=[positive], negative_hits=[negative], max_len=250, strand=strand
positive_hits=[positive],
negative_hits=[negative],
min_len=200,
max_len=250,
strand=strand,
)
assert actual == expected, test_id


# Test that using the cache (or not) does not affect the results
# NB: BwaHit and Span coordinates are 1-based end-inclusive
#
# One mapping - Overlapping primers (1bp overlap)
# >>>>>>>>>>
# <<<<<<<<<<
# 19bp amplicon [1,19]
#
# One mapping - Overlapping primers (1bp amplicon)
# >>>>>>>>>>
# <<<<<<<<<<
# 1bp amplicon [10,10]
#
# No mappings - amplicon length would be 0.
# >>>>>>>>>>
# <<<<<<<<<<
@pytest.mark.parametrize("cache_results", [True, False])
@pytest.mark.parametrize(
"test_id, positive, negative, strand, expected",
[
(
"One mapping - Overlapping primers (1bp overlap)",
BwaHit.build("chr1", 1, False, "10M", 0),
BwaHit.build("chr1", 10, True, "10M", 0),
Strand.POSITIVE,
[Span(refname="chr1", start=1, end=19, strand=Strand.POSITIVE)],
),
(
"One mapping - Overlapping primers (1bp amplicon)",
BwaHit.build("chr1", 10, False, "10M", 0),
BwaHit.build("chr1", 1, True, "10M", 0),
Strand.POSITIVE,
[Span(refname="chr1", start=10, end=10, strand=Strand.POSITIVE)],
),
(
"No mappings",
BwaHit.build("chr1", 11, False, "10M", 0),
BwaHit.build("chr1", 1, True, "10M", 0),
Strand.POSITIVE,
[],
),
],
)
def test_to_amplicons_overlapping(
ref_fasta: Path,
test_id: str,
positive: BwaHit,
negative: BwaHit,
strand: Strand,
expected: list[Span],
cache_results: bool,
) -> None:
with _build_detector(ref_fasta=ref_fasta, cache_results=cache_results) as detector:
actual = detector._to_amplicons(
positive_hits=[positive],
negative_hits=[negative],
min_len=1,
max_len=250,
allow_overlapping_hits=True,
strand=strand,
)
assert actual == expected, test_id

Expand Down Expand Up @@ -322,7 +394,11 @@ def test_to_amplicons_value_error(
pytest.raises(ValueError, match=expected_error),
):
detector._to_amplicons(
positive_hits=[positive], negative_hits=[negative], max_len=250, strand=Strand.POSITIVE
positive_hits=[positive],
negative_hits=[negative],
min_len=200,
max_len=250,
strand=Strand.POSITIVE,
)


Expand Down Expand Up @@ -356,20 +432,22 @@ class CustomPrimer(Oligo):
@pytest.mark.parametrize(
(
"max_primer_hits,max_primer_pair_hits,min_primer_pair_hits,three_prime_region_length,"
"max_mismatches_in_three_prime_region,max_mismatches,max_amplicon_size,"
"max_mismatches_in_three_prime_region,max_mismatches,max_amplicon_size,min_amplicon_size,"
"max_gap_opens,max_gap_extends,expected_error"
),
[
(-1, 1, 1, 20, 0, 0, 1, 0, 0, "'max_primer_hits' must be greater than or equal to 0. Saw -1"), # noqa: E501
(1, -1, 1, 20, 0, 0, 1, 0, 0, "'max_primer_pair_hits' must be greater than or equal to 0. Saw -1"), # noqa: E501
(1, 1, -1, 20, 0, 0, 1, 0, 0, "'min_primer_pair_hits' must be greater than or equal to 0. Saw -1"), # noqa: E501
(1, 1, 1, 5, 0, 0, 1, 0, 0, "'three_prime_region_length' must be greater than or equal to 8. Saw 5"), # noqa: E501
(1, 1, 1, 20, -1, 0, 1, 0, 0, "'max_mismatches_in_three_prime_region' must be between 0 and 'three_prime_region_length'=20 inclusive. Saw -1"), # noqa: E501
(1, 1, 1, 20, 21, 0, 1, 0, 0, "'max_mismatches_in_three_prime_region' must be between 0 and 'three_prime_region_length'=20 inclusive. Saw 21"), # noqa: E501
(1, 1, 1, 20, 0, -1, 1, 0, 0, "'max_mismatches' must be greater than or equal to 0. Saw -1"), # noqa: E501
(1, 1, 1, 20, 0, 0, 0, 0, 0, "'max_amplicon_size' must be greater than 0. Saw 0"),
(1, 1, 1, 20, 0, 0, 1, -1, 0, "'max_gap_opens' must be greater than or equal to 0. Saw -1"),
(1, 1, 1, 20, 0, 5, 1, 0, -2, re.escape("'max_gap_extends' must be -1 (for unlimited extensions up to 'max_mismatches'=5) or greater than or equal to 0. Saw -2")), #noqa: E501
(-1, 1, 1, 20, 0, 0, 1, 1, 0, 0, "'max_primer_hits' must be greater than or equal to 0. Saw -1"), # noqa: E501
(1, -1, 1, 20, 0, 0, 1, 1, 0, 0, "'max_primer_pair_hits' must be greater than or equal to 0. Saw -1"), # noqa: E501
(1, 1, -1, 20, 0, 0, 1, 1, 0, 0, "'min_primer_pair_hits' must be greater than or equal to 0. Saw -1"), # noqa: E501
(1, 1, 1, 5, 0, 0, 1, 1, 0, 0, "'three_prime_region_length' must be greater than or equal to 8. Saw 5"), # noqa: E501
(1, 1, 1, 20, -1, 0, 1, 1, 0, 0, "'max_mismatches_in_three_prime_region' must be between 0 and 'three_prime_region_length'=20 inclusive. Saw -1"), # noqa: E501
(1, 1, 1, 20, 21, 0, 1, 1, 0, 0, "'max_mismatches_in_three_prime_region' must be between 0 and 'three_prime_region_length'=20 inclusive. Saw 21"), # noqa: E501
(1, 1, 1, 20, 0, -1, 1, 1, 0, 0, "'max_mismatches' must be greater than or equal to 0. Saw -1"), # noqa: E501
(1, 1, 1, 20, 0, 0, 0, 1, 0, 0, "'max_amplicon_size' must be greater than 0. Saw 0"),
(1, 1, 1, 20, 0, 0, 1, 1, -1, 0, "'max_gap_opens' must be greater than or equal to 0. Saw -1"), # noqa: E501
(1, 1, 1, 20, 0, 5, 1, 1, 0, -2, re.escape("'max_gap_extends' must be -1 (for unlimited extensions up to 'max_mismatches'=5) or greater than or equal to 0. Saw -2")), #noqa: E501
(1, 1, 1, 20, 0, 0, 10, 0, 0, 0, "'min_amplicon_size' must be between 1 and 'max_amplicon_size'=10 inclusive. Saw 0"), # noqa: E501
(1, 1, 1, 20, 0, 0, 10, 11, 0, 0, "'min_amplicon_size' must be between 1 and 'max_amplicon_size'=10 inclusive. Saw 11"), # noqa: E501
],
)
# fmt: on
Expand All @@ -382,6 +460,7 @@ def test_init(
max_mismatches_in_three_prime_region: int,
max_mismatches: int,
max_amplicon_size: int,
min_amplicon_size: int,
max_gap_opens: int,
max_gap_extends: int,
expected_error: str,
Expand All @@ -396,6 +475,7 @@ def test_init(
max_mismatches_in_three_prime_region=max_mismatches_in_three_prime_region,
max_mismatches=max_mismatches,
max_amplicon_size=max_amplicon_size,
min_amplicon_size=min_amplicon_size,
max_gap_opens=max_gap_opens,
max_gap_extends=max_gap_extends,
)
Loading