@@ -419,6 +419,8 @@ def nodes_df(self):
419
419
"time" : ts .nodes_time ,
420
420
"num_mutations" : self .nodes_num_mutations ,
421
421
"ancestors_span" : child_right - child_left ,
422
+ "child_left" : child_left , # FIXME add test for this
423
+ "child_right" : child_right , # FIXME add test for this
422
424
"is_sample" : is_sample ,
423
425
}
424
426
)
@@ -427,6 +429,8 @@ def nodes_df(self):
427
429
"time" : "float64" ,
428
430
"num_mutations" : "int" ,
429
431
"ancestors_span" : "float64" ,
432
+ "child_left" : "float64" ,
433
+ "child_right" : "float64" ,
430
434
"is_sample" : "bool" ,
431
435
}
432
436
)
@@ -551,3 +555,53 @@ def calc_mutations_per_tree(self):
551
555
mutations_per_tree = np .zeros (self .ts .num_trees , dtype = np .int64 )
552
556
mutations_per_tree [unique_values ] = counts
553
557
return mutations_per_tree
558
+
559
+ def compute_ancestor_spans_heatmap_data (self , win_x_size = 1_000_000 , win_y_size = 500 ):
560
+ """
561
+ Calculates the average ancestor span in a genomic-time window
562
+ """
563
+ nodes_df = self .nodes_df [self .nodes_df .ancestors_span != - np .inf ]
564
+ nodes_df = nodes_df .reset_index (drop = True )
565
+ nodes_left = nodes_df .child_left
566
+ nodes_right = nodes_df .child_right
567
+ nodes_time = nodes_df .time
568
+ ancestors_span = nodes_df .ancestors_span
569
+
570
+ num_x_wins = int (np .ceil (nodes_right .max () - nodes_left .min ()) / win_x_size )
571
+ num_y_wins = int (np .ceil (nodes_time .max () / win_y_size ))
572
+ heatmap_sums = np .zeros ((num_x_wins , num_y_wins ))
573
+ heatmap_counts = np .zeros ((num_x_wins , num_y_wins ))
574
+
575
+ for u in range (len (nodes_left )):
576
+ x_start = int (
577
+ np .floor (nodes_left [u ] / win_x_size )
578
+ ) # map the node span to the x-axis bins it overlaps
579
+ x_end = int (np .floor (nodes_right [u ] / win_x_size ))
580
+ y = max (0 , int (np .floor (nodes_time [u ] / win_y_size )) - 1 )
581
+ heatmap_sums [x_start :x_end , y ] += min (
582
+ ancestors_span [u ], win_x_size
583
+ ) # min operator only required for first and last bins
584
+ heatmap_counts [x_start :x_end , y ] += 1
585
+
586
+ avg_spans = heatmap_sums / heatmap_counts
587
+ x_coords = np .zeros ((num_x_wins , num_y_wins ))
588
+ y_coords = np .zeros ((num_x_wins , num_y_wins ))
589
+ for i in range (num_x_wins ):
590
+ for j in range (num_y_wins ):
591
+ x_coords [i , j ] = i * win_x_size
592
+ y_coords [i , j ] = j * win_y_size
593
+
594
+ df = pd .DataFrame (
595
+ {
596
+ "genomic_position" : x_coords .flatten (),
597
+ "time" : y_coords .flatten (),
598
+ "average_ancestor_span" : avg_spans .flatten (),
599
+ }
600
+ )
601
+ return df .astype (
602
+ {
603
+ "genomic_position" : "int" ,
604
+ "time" : "int" ,
605
+ "average_ancestor_span" : "float64" ,
606
+ }
607
+ )
0 commit comments