Skip to content

Commit

Permalink
Merge pull request #85 from kedhammar/ake-dev
Browse files Browse the repository at this point in the history
Misc. improvements, unit tests and mypy proofing
  • Loading branch information
kedhammar authored Jun 26, 2024
2 parents 519f74b + 53a2718 commit ec25bfe
Show file tree
Hide file tree
Showing 20 changed files with 765 additions and 145 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/anglerfish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.10.2]
python-version: [3.12]

steps:
# Checkout code and install miniconda + environment
Expand Down Expand Up @@ -37,10 +37,10 @@ jobs:
- shell: bash -l {0}
name: Run anglerfish with test data
run: |
anglerfish run -s test/samples.csv
anglerfish run -s testdata/samples.csv
# Run anglerfish explore
- shell: bash -l {0}
name: Run anglerfish explore
run: |
anglerfish explore -f test/BC18_P14351_1001.fastq.gz -o test/explore_output
anglerfish explore -f testdata/BC18_P14351_1001.fastq.gz -o explore_output
10 changes: 5 additions & 5 deletions .github/workflows/lint-code.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: "3.12"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand All @@ -34,7 +34,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: "3.12"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand All @@ -51,17 +51,17 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: "3.12"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install mypy
# Start by installing type stubs
- name: mypy --> Install stubs
run: echo -e "y" | mypy --install-types **/*.py || exit 0
run: echo -e "y" | mypy --install-types . || exit 0
- name: mypy --> Static type checking
# Configured in pyprojet.toml
run: mypy **/*.py
run: mypy .

# Use Prettier to check various file formats
prettier:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ jobs:
- uses: actions/checkout@v4
name: Check out source-code repository

- name: Set up Python 3.10
- name: Set up Python 3.12
uses: actions/setup-python@v4
with:
python-version: 3.10.10
python-version: 3.12

- name: Install python dependencies
run: |
Expand Down
44 changes: 44 additions & 0 deletions .github/workflows/test-code.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: test-code
on: [push, pull_request]

# Cancel if a newer run is started
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
run_pytest:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.12]

steps:
# Checkout code and install miniconda + environment
- uses: actions/checkout@v4
- uses: mamba-org/setup-micromamba@v1
with:
init-shell: bash
create-args: >-
python=${{ matrix.python-version }}
pip
environment-file: environment.yml
environment-name: anglerfish-dev

# Install Anglerfish
- shell: bash -l {0}
name: Install Anglerfish
run: |
python -m pip install .
# Install Pytest
- shell: bash -l {0}
name: Install Pytest
run: |
python -m pip install pytest
# Run Pytest
- shell: bash -l {0}
name: Run Pytest
run: |
pytest .
8 changes: 5 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
*.egg-info
*.pyc
*~
*.egg-info
.*_cache
.DS_Store
.benchmarks
.*_cache
node_modules
.ignoredir
.vscode
__pycache__
build
node_modules
13 changes: 8 additions & 5 deletions anglerfish/anglerfish.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import numpy as np
import pkg_resources

from .demux.adaptor import Adaptor
from .demux.demux import (
cluster_matches,
layout_matches,
Expand Down Expand Up @@ -86,14 +87,16 @@ def run_demux(args):
adaptor_set: set[tuple[str, str]] = set(adaptor_tuples)

# Create a dictionary with the adaptors as keys and an empty list as value
adaptors_sorted: dict[tuple[str, str], list] = dict([(i, []) for i in adaptor_set])
adaptors_sorted: dict[tuple[str, str], list[tuple[str, Adaptor, str]]] = dict(
[(i, []) for i in adaptor_set]
)

# Populate the dictionary values with sample-specific information
"""
adaptors_sorted = {
( adaptor_name, ont_barcode ) : [
(sample_name, adaptor, fastq),
(sample_name, adaptor, fastq),
adaptor_name_str, ont_barcode_str ) : [
(sample_name_str, Adaptor, fastq_str),
(sample_name_str, Adaptor, fastq_str),
...
],
...
Expand Down Expand Up @@ -168,7 +171,7 @@ def run_demux(args):
**flips[args.force_rc],
)
flipped_i7, flipped_i5 = flips[args.force_rc].values()
elif args.lenient: # Try reverse complementing the I5 and/or i7 indices and choose the best match
elif args.lenient: # Try reverse complementing the i5 and/or i7 indices and choose the best match
flipped = {}
results = []
pool = multiprocessing.Pool(
Expand Down
74 changes: 52 additions & 22 deletions anglerfish/demux/adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,36 @@ class AdaptorPart:
"""This class is used for the i5 or i7 adaptor."""

def __init__(self, sequence_token: str, name: str, index_seq: str | None):
## Type declaration of attributes to be assigned upon instantiation
# Attributes from arguments
self.name: str
self.sequence_token: str
self.index_seq: str | None

# Index attributes
self.has_index: bool
self.len_index: int | None
self.len_before_index: int | None
self.len_after_index: int | None

# UMI attributes
self.has_umi: bool
self.len_umi: int | None
self.len_umi_before_index: int | None
self.len_umi_after_index: int | None

# Length attributes
self.len_total: int | None
self.len_constant: int

# Instantiation outsorced to private method
self._setup(sequence_token, name, index_seq)

def _setup(self, sequence_token: str, name: str, index_seq: str | None):
# Assign attributes from args
self.sequence_token: str = sequence_token
self.name: str = name
self.index_seq: str | None = index_seq
self.sequence_token = sequence_token
self.name = name
self.index_seq = index_seq

# Index bool and len
if has_match(INDEX_TOKEN, self.sequence_token):
Expand All @@ -76,6 +102,10 @@ def __init__(self, sequence_token: str, name: str, index_seq: str | None):
self.len_index = len(index_seq) if index_seq else None

else:
if self.index_seq is not None:
raise UserWarning(
"Index sequence specified, but no index token found in adaptor sequence."
)
self.has_index = False
self.len_index = 0

Expand All @@ -87,21 +117,13 @@ def __init__(self, sequence_token: str, name: str, index_seq: str | None):
)
elif len(umi_tokens) == 1:
self.has_umi = True
self.len_umi = int(
re.search(UMI_LENGTH_TOKEN, self.sequence_token).group(1)
)
umi_token_search = re.search(UMI_LENGTH_TOKEN, self.sequence_token)
assert isinstance(umi_token_search, re.Match)
self.len_umi = int(umi_token_search.group(1))
else:
self.has_umi = False
self.len_umi = 0

# Type declaration of attributes to be assigned
self.len_before_index: int | None
self.len_after_index: int | None
self.len_umi_before_index: int | None
self.len_umi_after_index: int | None
self.len_total: int | None
self.len_constant: int

# Lengths
if self.has_index and self.has_umi:
# Index and UMI
Expand Down Expand Up @@ -149,7 +171,12 @@ def __init__(self, sequence_token: str, name: str, index_seq: str | None):
self.len_before_index = None
self.len_after_index = None

self.len_total = len(self.get_mask(insert_Ns=True)) if self.index_seq else None
if (
self.has_index is True and self.index_seq is not None
) or self.has_index is False:
self.len_total = len(self.get_mask(insert_Ns=True))
else:
self.len_total = None
self.len_constant = len(self.get_mask(insert_Ns=False))

def get_mask(self, insert_Ns: bool = True) -> str:
Expand All @@ -165,11 +192,12 @@ def get_mask(self, insert_Ns: bool = True) -> str:
else 0
)

umi_mask_length = (
max(self.len_umi_after_index, self.len_umi_before_index)
if insert_Ns and self.has_umi
else 0
)
if insert_Ns and self.has_umi:
assert self.len_umi_before_index is not None
assert self.len_umi_after_index is not None
umi_mask_length = max(self.len_umi_after_index, self.len_umi_before_index)
else:
umi_mask_length = 0

# Test if the index is specified in the adaptor sequence when it shouldn't be
if (
Expand All @@ -189,7 +217,7 @@ def get_mask(self, insert_Ns: bool = True) -> str:
return self.sequence_token


def has_match(pattern: re.Pattern, query: str) -> bool:
def has_match(pattern: re.Pattern | str, query: str) -> bool:
"""General function to check if a string contains a pattern."""
match = re.search(pattern, query)
if match is None:
Expand All @@ -209,6 +237,8 @@ def validate_adaptors(adaptors_dict: dict):
f"Adaptor {adaptor_name} has an invalid sequence for {i}: {sequence_token}. Does not conform to the pattern {VALID_SEQUENCE_TOKEN_PATTERN}."
)

return True


def load_adaptors(raw: bool = False) -> list[Adaptor] | dict:
"""Fetch all adaptors.
Expand All @@ -226,7 +256,7 @@ def load_adaptors(raw: bool = False) -> list[Adaptor] | dict:
adaptors_dict = yaml.safe_load(f)

# Validate input
validate_adaptors(adaptors_dict)
assert validate_adaptors(adaptors_dict) is True

# Optionally, return raw dict
if raw:
Expand Down
Loading

0 comments on commit ec25bfe

Please sign in to comment.