@@ -556,7 +556,7 @@ def calc_mutations_per_tree(self):
556
556
mutations_per_tree [unique_values ] = counts
557
557
return mutations_per_tree
558
558
559
- def compute_ancestor_spans_heatmap_data (self , win_x_size = 1_000_000 , win_y_size = 500 ):
559
+ def compute_ancestor_spans_heatmap_data (self , num_x_bins , num_y_bins ):
560
560
"""
561
561
Calculates the average ancestor span in a genomic-time window
562
562
"""
@@ -565,38 +565,35 @@ def compute_ancestor_spans_heatmap_data(self, win_x_size=1_000_000, win_y_size=5
565
565
nodes_left = nodes_df .child_left
566
566
nodes_right = nodes_df .child_right
567
567
nodes_time = nodes_df .time
568
- ancestors_span = nodes_df .ancestors_span
569
568
570
- num_x_wins = int (np .ceil (nodes_right .max () - nodes_left .min ()) / win_x_size )
571
- num_y_wins = int (np .ceil (nodes_time .max () / win_y_size ))
572
- heatmap_sums = np .zeros ((num_x_wins , num_y_wins ))
573
- heatmap_counts = np .zeros ((num_x_wins , num_y_wins ))
569
+ x_bins = np .linspace (nodes_left .min (), nodes_right .max (), num_x_bins + 1 )
570
+ y_bins = np .linspace (0 , nodes_time .max (), num_y_bins + 1 )
571
+ heatmap_counts = np .zeros ((num_x_bins , num_y_bins ))
572
+
573
+ x_starts = np .digitize (nodes_left , x_bins , right = True )
574
+ x_ends = np .digitize (nodes_right , x_bins , right = True )
575
+ y_starts = np .digitize (nodes_time , y_bins , right = True )
574
576
575
577
for u in range (len (nodes_left )):
576
- x_start = int (
577
- np .floor (nodes_left [u ] / win_x_size )
578
- ) # map the node span to the x-axis bins it overlaps
579
- x_end = int (np .floor (nodes_right [u ] / win_x_size ))
580
- y = max (0 , int (np .floor (nodes_time [u ] / win_y_size )) - 1 )
581
- heatmap_sums [x_start :x_end , y ] += min (ancestors_span [u ], win_x_size )
582
- heatmap_counts [x_start :x_end , y ] += 1
583
-
584
- avg_spans = heatmap_sums / heatmap_counts
585
- indices = np .indices ((num_x_wins , num_y_wins ))
586
- x_coords = indices [0 ] * win_x_size
587
- y_coords = indices [1 ] * win_y_size
578
+ x_start = max (0 , x_starts [u ] - 1 )
579
+ x_end = max (0 , x_ends [u ] - 1 )
580
+ y_bin = max (0 , y_starts [u ] - 1 )
581
+ heatmap_counts [x_start : x_end + 1 , y_bin ] += 1
582
+
583
+ x_coords = np .repeat (x_bins [:- 1 ], num_y_bins )
584
+ y_coords = np .tile (y_bins [:- 1 ], num_x_bins )
588
585
589
586
df = pd .DataFrame (
590
587
{
591
- "genomic_position " : x_coords .flatten (),
588
+ "position " : x_coords .flatten (),
592
589
"time" : y_coords .flatten (),
593
- "average_ancestor_span " : avg_spans .flatten (),
590
+ "overlapping_node_count " : heatmap_counts .flatten (),
594
591
}
595
592
)
596
593
return df .astype (
597
594
{
598
- "genomic_position " : "int" ,
595
+ "position " : "int" ,
599
596
"time" : "int" ,
600
- "average_ancestor_span " : "float64 " ,
597
+ "overlapping_node_count " : "int " ,
601
598
}
602
599
)
0 commit comments