Skip to content

Commit

Permalink
Use unsigned integers for seqnames (#131)
Browse files Browse the repository at this point in the history
  • Loading branch information
jkanche authored Oct 23, 2024
1 parent b5d005d commit 9f6af69
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 10 deletions.
13 changes: 6 additions & 7 deletions src/genomicranges/GenomicRanges.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,15 +223,14 @@ def _sanitize_seqnames(self, seqnames, seqinfo):
seqnames = np.asarray([self._reverse_seqindex[x] for x in seqnames])

if len(seqnames) == 0:
seqnames = seqnames.astype(np.int8)
seqnames = seqnames.astype(np.uint8)
else:
num_uniq = np.max(seqnames)
if num_uniq < 2**8:
seqnames = seqnames.astype(np.int8)
elif num_uniq < 2**16:
seqnames = seqnames.astype(np.int16)
elif num_uniq < 2**32:
seqnames = seqnames.astype(np.int32)
_types = [np.uint8, np.uint16, np.uint32, np.uint64]
for _dtype in _types:
if num_uniq < np.iinfo(_dtype).max:
seqnames = seqnames.astype(_dtype)
break

return seqnames

Expand Down
6 changes: 3 additions & 3 deletions tests/test_gr_init_seqnames.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,20 @@ def test_create_gr():
)

assert gr is not None
assert gr._seqnames.dtype == np.int8
assert gr._seqnames.dtype == np.uint8

gr16 = GenomicRanges(
seqnames=[f"chr{i}" for i in range(500)],
ranges=IRanges(start=range(0, 500), width=range(10, 510)),
)

assert gr16 is not None
assert gr16._seqnames.dtype == np.int16
assert gr16._seqnames.dtype == np.uint16

gr32 = GenomicRanges(
seqnames=[f"chr{i}" for i in range(2**16 + 1)],
ranges=IRanges(start=range(0, 2**16 + 1), width=range(10, 2**16 + 11)),
)

assert gr32 is not None
assert gr32._seqnames.dtype == np.int32
assert gr32._seqnames.dtype == np.uint32

0 comments on commit 9f6af69

Please sign in to comment.