Skip to content

Commit

Permalink
Cython implementations for to_allele_counts (#288)
Browse files Browse the repository at this point in the history
* cython implementations for to_allele_counts

* use optimisations for to_allele_counts

* regenerated model.c with to_allele_counts functions

* recythonize

* release note

* Use uint8_t for to_allele_counts and remove dtype argument.

* Remove use of dtype argument when calling to_allele_counts

* re-cythonize allel/opt/model.pyx

Co-authored-by: Alistair Miles <[email protected]>
  • Loading branch information
timothymillar and alimanfoo authored Aug 7, 2020
1 parent 821997d commit 5f2f73f
Show file tree
Hide file tree
Showing 5 changed files with 9,953 additions and 1,887 deletions.
34 changes: 21 additions & 13 deletions allel/model/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from allel.opt.model import genotype_array_pack_diploid, genotype_array_unpack_diploid, \
genotype_array_count_alleles, genotype_array_count_alleles_masked, \
genotype_array_count_alleles_subpop, genotype_array_count_alleles_subpop_masked, \
genotype_array_to_allele_counts, genotype_array_to_allele_counts_masked, \
haplotype_array_count_alleles, haplotype_array_count_alleles_subpop, \
haplotype_array_map_alleles, allele_counts_array_map_alleles
from .generic import index_genotype_vector, compress_genotypes, \
Expand Down Expand Up @@ -883,7 +884,7 @@ def to_n_alt(self, fill=0, dtype='i1'):

return out

def to_allele_counts(self, max_allele=None, dtype='u1'):
def to_allele_counts(self, max_allele=None):
"""Transform genotype calls into allele counts per call.
Parameters
Expand Down Expand Up @@ -923,22 +924,29 @@ def to_allele_counts(self, max_allele=None, dtype='u1'):
# determine alleles to count
if max_allele is None:
max_allele = self.max()
alleles = list(range(max_allele + 1))

# set up output array
outshape = self.shape[:-1] + (len(alleles),)
out = np.zeros(outshape, dtype=dtype)
# use optimisations
values = memoryview_safe(self.values)
mask = memoryview_safe(self.mask).view(dtype='u1') if self.mask is not None else None

# handle genotype vector case as an array of one genotype
if values.ndim == 2:
is_vector = True
values = np.expand_dims(values, 1)
if mask is not None:
mask = np.expand_dims(mask, 1)
else:
is_vector = False

for allele in alleles:
# count alleles along ploidy dimension
allele_match = self.values == allele
if self.mask is not None:
allele_match &= ~self.mask[..., np.newaxis]
np.sum(allele_match, axis=-1, out=out[..., allele])
if mask is None:
out = genotype_array_to_allele_counts(values, max_allele)
else:
out = genotype_array_to_allele_counts_masked(values, mask, max_allele)

if self.ndim == 2:
if is_vector:
out = np.squeeze(out, axis=1)
out = GenotypeAlleleCountsVector(out)
elif self.ndim == 3:
else:
out = GenotypeAlleleCountsArray(out)

return out
Expand Down
Loading

0 comments on commit 5f2f73f

Please sign in to comment.