1
1
import os
2
- from typing import Dict , Tuple , List , Optional
2
+ from typing import Dict , Tuple , Optional , Any
3
3
from . import maptide # type: ignore
4
4
5
5
6
+ BASES = ["A" , "C" , "G" , "T" , "DS" , "N" ]
7
+
8
+
6
9
def query (
7
10
bam : str ,
8
11
region : Optional [str ] = None ,
9
12
bai : Optional [str ] = None ,
10
13
mapping_quality : int = 0 ,
11
14
base_quality : int = 0 ,
12
- ) -> Dict [str , Dict [Tuple [int , int ], List [int ]]]:
15
+ annotated : bool = False ,
16
+ ) -> Dict [str , Dict [Tuple [int , int ], Any ]]:
13
17
"""Performs a pileup over a region, obtaining per-position base frequencies for the provided BAM file.
14
18
15
19
Parameters
@@ -24,6 +28,8 @@ def query(
24
28
Minimum mapping quality for a read to be included in the pileup (default: 0)
25
29
base_quality : int, optional
26
30
Minimum base quality for a base within a read to be included in the pileup (default: 0)
31
+ annotated : bool, optional
32
+ Return frequencies annotated with their bases, as a `dict[str, int]`. Default is to return frequencies only, as a `list[int]` (default: False)
27
33
28
34
Returns
29
35
-------
@@ -34,9 +40,16 @@ def query(
34
40
if region :
35
41
if not bai and os .path .isfile (bam + ".bai" ):
36
42
bai = bam + ".bai"
37
- return maptide .query (bam , bai , region , mapping_quality , base_quality )
43
+ data = maptide .query (bam , bai , region , mapping_quality , base_quality )
38
44
else :
39
- return maptide .all (bam , mapping_quality , base_quality )
45
+ data = maptide .all (bam , mapping_quality , base_quality )
46
+
47
+ if annotated :
48
+ for _ , positions in data .items ():
49
+ for position , frequencies in positions .items ():
50
+ positions [position ] = dict (zip (BASES , frequencies ))
51
+
52
+ return data
40
53
41
54
42
55
def parse_region (region : str ) -> Tuple [str , int , int ]:
0 commit comments