Skip to content

Commit 1b4b7ea

Browse files
committed
use of np.digitize and implementation of slider for bins
Added child_left, child_right columns to nodes_df and tests for these.
1 parent 439f509 commit 1b4b7ea

File tree

2 files changed

+52
-30
lines changed

2 files changed

+52
-30
lines changed

model.py

+21-22
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,8 @@ def nodes_df(self):
429429
"ancestors_span": child_right - child_left,
430430
"child_left": child_left, # FIXME add test for this
431431
"child_right": child_right, # FIXME add test for this
432+
"child_left": child_left, # FIXME add test for this
433+
"child_right": child_right, # FIXME add test for this
432434
"is_sample": is_sample,
433435
}
434436
)
@@ -566,7 +568,7 @@ def calc_mutations_per_tree(self):
566568
mutations_per_tree[unique_values] = counts
567569
return mutations_per_tree
568570

569-
def compute_ancestor_spans_heatmap_data(self, win_x_size=1_000_000, win_y_size=500):
571+
def compute_ancestor_spans_heatmap_data(self, num_x_bins, num_y_bins):
570572
"""
571573
Calculates the average ancestor span in a genomic-time window
572574
"""
@@ -575,38 +577,35 @@ def compute_ancestor_spans_heatmap_data(self, win_x_size=1_000_000, win_y_size=5
575577
nodes_left = nodes_df.child_left
576578
nodes_right = nodes_df.child_right
577579
nodes_time = nodes_df.time
578-
ancestors_span = nodes_df.ancestors_span
579580

580-
num_x_wins = int(np.ceil(nodes_right.max() - nodes_left.min()) / win_x_size)
581-
num_y_wins = int(np.ceil(nodes_time.max() / win_y_size))
582-
heatmap_sums = np.zeros((num_x_wins, num_y_wins))
583-
heatmap_counts = np.zeros((num_x_wins, num_y_wins))
581+
x_bins = np.linspace(nodes_left.min(), nodes_right.max(), num_x_bins + 1)
582+
y_bins = np.linspace(0, nodes_time.max(), num_y_bins + 1)
583+
heatmap_counts = np.zeros((num_x_bins, num_y_bins))
584+
585+
x_starts = np.digitize(nodes_left, x_bins, right=True)
586+
x_ends = np.digitize(nodes_right, x_bins, right=True)
587+
y_starts = np.digitize(nodes_time, y_bins, right=True)
584588

585589
for u in range(len(nodes_left)):
586-
x_start = int(
587-
np.floor(nodes_left[u] / win_x_size)
588-
) # map the node span to the x-axis bins it overlaps
589-
x_end = int(np.floor(nodes_right[u] / win_x_size))
590-
y = max(0, int(np.floor(nodes_time[u] / win_y_size)) - 1)
591-
heatmap_sums[x_start:x_end, y] += min(ancestors_span[u], win_x_size)
592-
heatmap_counts[x_start:x_end, y] += 1
593-
594-
avg_spans = heatmap_sums / heatmap_counts
595-
indices = np.indices((num_x_wins, num_y_wins))
596-
x_coords = indices[0] * win_x_size
597-
y_coords = indices[1] * win_y_size
590+
x_start = max(0, x_starts[u] - 1)
591+
x_end = max(0, x_ends[u] - 1)
592+
y_bin = max(0, y_starts[u] - 1)
593+
heatmap_counts[x_start : x_end + 1, y_bin] += 1
594+
595+
x_coords = np.repeat(x_bins[:-1], num_y_bins)
596+
y_coords = np.tile(y_bins[:-1], num_x_bins)
598597

599598
df = pd.DataFrame(
600599
{
601-
"genomic_position": x_coords.flatten(),
600+
"position": x_coords.flatten(),
602601
"time": y_coords.flatten(),
603-
"average_ancestor_span": avg_spans.flatten(),
602+
"overlapping_node_count": heatmap_counts.flatten(),
604603
}
605604
)
606605
return df.astype(
607606
{
608-
"genomic_position": "int",
607+
"position": "int",
609608
"time": "int",
610-
"average_ancestor_span": "float64",
609+
"overlapping_node_count": "int",
611610
}
612611
)

pages/nodes.py

+31-8
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,39 @@ def make_node_hist_panel(tsm, log_y):
5454
)
5555

5656
plot_options = pn.Column(
57-
pn.pane.Markdown("# Plot Options"),
5857
log_y_checkbox,
5958
)
6059

61-
anc_span_data = tsm.compute_ancestor_spans_heatmap_data()
62-
heatmap = hv.HeatMap(anc_span_data).opts(
63-
width=config.PLOT_WIDTH,
64-
height=config.PLOT_HEIGHT,
65-
tools=["hover"],
66-
colorbar=True,
60+
def make_heatmap(num_x_bins, num_y_bins):
61+
anc_span_data = tsm.compute_ancestor_spans_heatmap_data(num_x_bins, num_y_bins)
62+
heatmap = hv.HeatMap(anc_span_data).opts(
63+
width=config.PLOT_WIDTH,
64+
height=config.PLOT_HEIGHT,
65+
tools=["hover"],
66+
colorbar=True,
67+
)
68+
return heatmap
69+
70+
max_x_bins = int(np.sqrt(df_nodes.child_right.max()))
71+
x_bin_slider = pn.widgets.IntSlider(
72+
name="genome bins",
73+
value=min(50, int((max_x_bins - 1) / 2)),
74+
start=1,
75+
end=max_x_bins,
76+
)
77+
max_y_bins = int(np.sqrt(df_nodes.time.max()))
78+
y_bin_slider = pn.widgets.IntSlider(
79+
name="time bins", value=min(50, int(max_y_bins / 2)), start=1, end=max_y_bins
6780
)
81+
hm_options = pn.Column(x_bin_slider, y_bin_slider)
6882

69-
return pn.Column(main, hist_panel, heatmap, plot_options)
83+
hm_panel = pn.bind(
84+
make_heatmap,
85+
num_x_bins=x_bin_slider,
86+
num_y_bins=y_bin_slider,
87+
)
88+
89+
return pn.Column(
90+
pn.Row(main, pn.Column(hist_panel, plot_options)),
91+
pn.Column(hm_panel, hm_options),
92+
)

0 commit comments

Comments
 (0)