Skip to content

Commit 138b09e

Browse files
committed
Added child_left, child_right columns to nodes_df and tests for these
Changed computation of ancestor-spans-heatmap data to avoid iterate over nodes instead of bins.
1 parent 1b2cb4a commit 138b09e

File tree

3 files changed

+68
-1
lines changed

3 files changed

+68
-1
lines changed

model.py

+54
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,8 @@ def nodes_df(self):
419419
"time": ts.nodes_time,
420420
"num_mutations": self.nodes_num_mutations,
421421
"ancestors_span": child_right - child_left,
422+
"child_left": child_left, # FIXME add test for this
423+
"child_right": child_right, # FIXME add test for this
422424
"is_sample": is_sample,
423425
}
424426
)
@@ -427,6 +429,8 @@ def nodes_df(self):
427429
"time": "float64",
428430
"num_mutations": "int",
429431
"ancestors_span": "float64",
432+
"child_left": "float64",
433+
"child_right": "float64",
430434
"is_sample": "bool",
431435
}
432436
)
@@ -551,3 +555,53 @@ def calc_mutations_per_tree(self):
551555
mutations_per_tree = np.zeros(self.ts.num_trees, dtype=np.int64)
552556
mutations_per_tree[unique_values] = counts
553557
return mutations_per_tree
558+
559+
def compute_ancestor_spans_heatmap_data(self, win_x_size=1_000_000, win_y_size=500):
560+
"""
561+
Calculates the average ancestor span in a genomic-time window
562+
"""
563+
nodes_df = self.nodes_df[self.nodes_df.ancestors_span != -np.inf]
564+
nodes_df = nodes_df.reset_index(drop=True)
565+
nodes_left = nodes_df.child_left
566+
nodes_right = nodes_df.child_right
567+
nodes_time = nodes_df.time
568+
ancestors_span = nodes_df.ancestors_span
569+
570+
num_x_wins = int(np.ceil(nodes_right.max() - nodes_left.min()) / win_x_size)
571+
num_y_wins = int(np.ceil(nodes_time.max() / win_y_size))
572+
heatmap_sums = np.zeros((num_x_wins, num_y_wins))
573+
heatmap_counts = np.zeros((num_x_wins, num_y_wins))
574+
575+
for u in range(len(nodes_left)):
576+
x_start = int(
577+
np.floor(nodes_left[u] / win_x_size)
578+
) # map the node span to the x-axis bins it overlaps
579+
x_end = int(np.floor(nodes_right[u] / win_x_size))
580+
y = max(0, int(np.floor(nodes_time[u] / win_y_size)) - 1)
581+
heatmap_sums[x_start:x_end, y] += min(
582+
ancestors_span[u], win_x_size
583+
) # min operator only required for first and last bins
584+
heatmap_counts[x_start:x_end, y] += 1
585+
586+
avg_spans = heatmap_sums / heatmap_counts
587+
x_coords = np.zeros((num_x_wins, num_y_wins))
588+
y_coords = np.zeros((num_x_wins, num_y_wins))
589+
for i in range(num_x_wins):
590+
for j in range(num_y_wins):
591+
x_coords[i, j] = i * win_x_size
592+
y_coords[i, j] = j * win_y_size
593+
594+
df = pd.DataFrame(
595+
{
596+
"genomic_position": x_coords.flatten(),
597+
"time": y_coords.flatten(),
598+
"average_ancestor_span": avg_spans.flatten(),
599+
}
600+
)
601+
return df.astype(
602+
{
603+
"genomic_position": "int",
604+
"time": "int",
605+
"average_ancestor_span": "float64",
606+
}
607+
)

pages/nodes.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,13 @@ def page(tsm):
6060
pn.pane.Markdown("# Plot Options"),
6161
log_y_checkbox,
6262
)
63-
return pn.Column(main, hist_panel, plot_options)
63+
64+
anc_span_data = tsm.compute_ancestor_spans_heatmap_data()
65+
heatmap = hv.HeatMap(anc_span_data).opts(
66+
width=config.PLOT_WIDTH,
67+
height=config.PLOT_HEIGHT,
68+
tools=["hover"],
69+
colorbar=True,
70+
)
71+
72+
return pn.Column(main, hist_panel, heatmap, plot_options)

tests/test_data_model.py

+4
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ def test_single_tree_example(self):
162162
nt.assert_array_equal(df.time, [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 2.0])
163163
nt.assert_array_equal(df.num_mutations, [1, 1, 1, 1, 1, 1, 0])
164164
nt.assert_array_equal(df.ancestors_span, [10, 10, 10, 10, 10, 10, -np.inf])
165+
nt.assert_array_equal(df.child_left, [0, 0, 0, 0, 0, 0, np.inf])
166+
nt.assert_array_equal(df.child_right, [10, 10, 10, 10, 10, 10, 0])
165167
nt.assert_array_equal(df.is_sample, [1, 1, 1, 1, 0, 0, 0])
166168

167169
def test_multiple_tree_example(self):
@@ -172,6 +174,8 @@ def test_multiple_tree_example(self):
172174
nt.assert_array_equal(df.time, [0.0, 0.0, 0.0, 1.0, 2.0])
173175
nt.assert_array_equal(df.num_mutations, [0, 0, 0, 0, 0])
174176
nt.assert_array_equal(df.ancestors_span, [10, 10, 10, 10, -np.inf])
177+
nt.assert_array_equal(df.child_left, [0, 0, 0, 0, np.inf])
178+
nt.assert_array_equal(df.child_right, [10, 10, 10, 10, 0])
175179
nt.assert_array_equal(df.is_sample, [1, 1, 1, 0, 0])
176180

177181

0 commit comments

Comments
 (0)