@@ -429,6 +429,8 @@ def nodes_df(self):
429
429
"ancestors_span" : child_right - child_left ,
430
430
"child_left" : child_left , # FIXME add test for this
431
431
"child_right" : child_right , # FIXME add test for this
432
+ "child_left" : child_left , # FIXME add test for this
433
+ "child_right" : child_right , # FIXME add test for this
432
434
"is_sample" : is_sample ,
433
435
}
434
436
)
@@ -566,7 +568,7 @@ def calc_mutations_per_tree(self):
566
568
mutations_per_tree [unique_values ] = counts
567
569
return mutations_per_tree
568
570
569
- def compute_ancestor_spans_heatmap_data (self , win_x_size = 1_000_000 , win_y_size = 500 ):
571
+ def compute_ancestor_spans_heatmap_data (self , num_x_bins , num_y_bins ):
570
572
"""
571
573
Calculates the average ancestor span in a genomic-time window
572
574
"""
@@ -575,38 +577,35 @@ def compute_ancestor_spans_heatmap_data(self, win_x_size=1_000_000, win_y_size=5
575
577
nodes_left = nodes_df .child_left
576
578
nodes_right = nodes_df .child_right
577
579
nodes_time = nodes_df .time
578
- ancestors_span = nodes_df .ancestors_span
579
580
580
- num_x_wins = int (np .ceil (nodes_right .max () - nodes_left .min ()) / win_x_size )
581
- num_y_wins = int (np .ceil (nodes_time .max () / win_y_size ))
582
- heatmap_sums = np .zeros ((num_x_wins , num_y_wins ))
583
- heatmap_counts = np .zeros ((num_x_wins , num_y_wins ))
581
+ x_bins = np .linspace (nodes_left .min (), nodes_right .max (), num_x_bins + 1 )
582
+ y_bins = np .linspace (0 , nodes_time .max (), num_y_bins + 1 )
583
+ heatmap_counts = np .zeros ((num_x_bins , num_y_bins ))
584
+
585
+ x_starts = np .digitize (nodes_left , x_bins , right = True )
586
+ x_ends = np .digitize (nodes_right , x_bins , right = True )
587
+ y_starts = np .digitize (nodes_time , y_bins , right = True )
584
588
585
589
for u in range (len (nodes_left )):
586
- x_start = int (
587
- np .floor (nodes_left [u ] / win_x_size )
588
- ) # map the node span to the x-axis bins it overlaps
589
- x_end = int (np .floor (nodes_right [u ] / win_x_size ))
590
- y = max (0 , int (np .floor (nodes_time [u ] / win_y_size )) - 1 )
591
- heatmap_sums [x_start :x_end , y ] += min (ancestors_span [u ], win_x_size )
592
- heatmap_counts [x_start :x_end , y ] += 1
593
-
594
- avg_spans = heatmap_sums / heatmap_counts
595
- indices = np .indices ((num_x_wins , num_y_wins ))
596
- x_coords = indices [0 ] * win_x_size
597
- y_coords = indices [1 ] * win_y_size
590
+ x_start = max (0 , x_starts [u ] - 1 )
591
+ x_end = max (0 , x_ends [u ] - 1 )
592
+ y_bin = max (0 , y_starts [u ] - 1 )
593
+ heatmap_counts [x_start : x_end + 1 , y_bin ] += 1
594
+
595
+ x_coords = np .repeat (x_bins [:- 1 ], num_y_bins )
596
+ y_coords = np .tile (y_bins [:- 1 ], num_x_bins )
598
597
599
598
df = pd .DataFrame (
600
599
{
601
- "genomic_position " : x_coords .flatten (),
600
+ "position " : x_coords .flatten (),
602
601
"time" : y_coords .flatten (),
603
- "average_ancestor_span " : avg_spans .flatten (),
602
+ "overlapping_node_count " : heatmap_counts .flatten (),
604
603
}
605
604
)
606
605
return df .astype (
607
606
{
608
- "genomic_position " : "int" ,
607
+ "position " : "int" ,
609
608
"time" : "int" ,
610
- "average_ancestor_span " : "float64 " ,
609
+ "overlapping_node_count " : "int " ,
611
610
}
612
611
)
0 commit comments