Skip to content

Commit

Permalink
int slider removed, edge case fixed, bin count logged, actual node co…
Browse files Browse the repository at this point in the history
…unt on hover and plot titles added

Added child_left, child_right columns to nodes_df and tests for these.
  • Loading branch information
savitakartik committed Sep 23, 2023
1 parent a1b8505 commit 5541b73
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 33 deletions.
48 changes: 25 additions & 23 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,8 @@ def nodes_df(self):
"ancestors_span": child_right - child_left,
"child_left": child_left, # FIXME add test for this
"child_right": child_right, # FIXME add test for this
"child_left": child_left, # FIXME add test for this
"child_right": child_right, # FIXME add test for this
"is_sample": is_sample,
}
)
Expand Down Expand Up @@ -589,7 +591,7 @@ def calc_mutations_per_tree(self):
mutations_per_tree[unique_values] = counts
return mutations_per_tree

def compute_ancestor_spans_heatmap_data(self, win_x_size=1_000_000, win_y_size=500):
def compute_ancestor_spans_heatmap_data(self, num_x_bins, num_y_bins):
"""
Calculates the average ancestor span in a genomic-time window
"""
Expand All @@ -598,38 +600,38 @@ def compute_ancestor_spans_heatmap_data(self, win_x_size=1_000_000, win_y_size=5
nodes_left = nodes_df.child_left
nodes_right = nodes_df.child_right
nodes_time = nodes_df.time
ancestors_span = nodes_df.ancestors_span

num_x_wins = int(np.ceil(nodes_right.max() - nodes_left.min()) / win_x_size)
num_y_wins = int(np.ceil(nodes_time.max() / win_y_size))
heatmap_sums = np.zeros((num_x_wins, num_y_wins))
heatmap_counts = np.zeros((num_x_wins, num_y_wins))
x_bins = np.linspace(nodes_left.min(), nodes_right.max(), num_x_bins + 1)
y_bins = np.linspace(0, nodes_time.max(), num_y_bins + 1)
heatmap_counts = np.zeros((num_x_bins, num_y_bins))

for u in range(len(nodes_left)):
x_start = int(
np.floor(nodes_left[u] / win_x_size)
) # map the node span to the x-axis bins it overlaps
x_end = int(np.floor(nodes_right[u] / win_x_size))
y = max(0, int(np.floor(nodes_time[u] / win_y_size)) - 1)
heatmap_sums[x_start:x_end, y] += min(ancestors_span[u], win_x_size)
heatmap_counts[x_start:x_end, y] += 1

avg_spans = heatmap_sums / heatmap_counts
indices = np.indices((num_x_wins, num_y_wins))
x_coords = indices[0] * win_x_size
y_coords = indices[1] * win_y_size
x_starts = np.digitize(nodes_left, x_bins, right=True)
x_ends = np.digitize(nodes_right, x_bins, right=True)
y_starts = np.digitize(nodes_time, y_bins, right=True)

for u in range(len(nodes_left)):
x_start = max(0, x_starts[u] - 1)
x_end = max(0, x_ends[u] - 1)
y_bin = max(0, y_starts[u] - 1)
heatmap_counts[x_start : x_end + 1, y_bin] += 1

x_coords = np.repeat(x_bins[:-1], num_y_bins)
y_coords = np.tile(y_bins[:-1], num_x_bins)
overlapping_node_count = heatmap_counts.flatten()
overlapping_node_count[overlapping_node_count == 0] = 1
# FIXME - better way to avoid log 0 above?
df = pd.DataFrame(
{
"genomic_position": x_coords.flatten(),
"position": x_coords.flatten(),
"time": y_coords.flatten(),
"average_ancestor_span": avg_spans.flatten(),
"overlapping_node_count_log10": np.log10(overlapping_node_count),
"overlapping_node_count": overlapping_node_count,
}
)
return df.astype(
{
"genomic_position": "int",
"position": "int",
"time": "int",
"average_ancestor_span": "float64",
"overlapping_node_count": "int",
}
)
59 changes: 49 additions & 10 deletions pages/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import hvplot.pandas # noqa
import numpy as np
import panel as pn
from bokeh.models import HoverTool

import config
from plot_helpers import filter_points
Expand Down Expand Up @@ -40,8 +41,15 @@ def make_node_hist_panel(tsm, log_y):
points = df_nodes.hvplot.scatter(
x="ancestors_span",
y="time",
hover_cols=["ancestors_span", "time"],
).opts(width=config.PLOT_WIDTH, height=config.PLOT_HEIGHT)
hover_cols=["ancestors_span", "time"], # add node ID
).opts(
width=config.PLOT_WIDTH,
height=config.PLOT_HEIGHT,
title="Node span by time",
xlabel="width of genome spanned by node ancestors",
ylabel="node time",
axiswise=True,
)

range_stream = hv.streams.RangeXY(source=points)
streams = [range_stream]
Expand All @@ -54,16 +62,47 @@ def make_node_hist_panel(tsm, log_y):
)

plot_options = pn.Column(
pn.pane.Markdown("# Plot Options"),
log_y_checkbox,
)

anc_span_data = tsm.compute_ancestor_spans_heatmap_data()
heatmap = hv.HeatMap(anc_span_data).opts(
width=config.PLOT_WIDTH,
height=config.PLOT_HEIGHT,
tools=["hover"],
colorbar=True,
def make_heatmap(num_x_bins, num_y_bins):
anc_span_data = tsm.compute_ancestor_spans_heatmap_data(num_x_bins, num_y_bins)
tooltips = [
("position", "@position"),
("time", "@time"),
("overlapping_nodes", "@overlapping_node_count"),
]
hover = HoverTool(tooltips=tooltips)
heatmap = hv.HeatMap(anc_span_data).opts(
width=config.PLOT_WIDTH,
height=config.PLOT_HEIGHT,
tools=[hover],
colorbar=True,
title="Average ancestor length in time and genome bins",
axiswise=True,
)
return heatmap

max_x_bins = int(np.sqrt(df_nodes.child_right.max()))
x_bin_input = pn.widgets.IntInput(
name="genome bins",
value=min(50, max_x_bins),
start=1,
end=max_x_bins,
)
max_y_bins = int(np.sqrt(df_nodes.time.max()))
y_bin_input = pn.widgets.IntInput(
name="time bins", value=min(50, int(max_y_bins)), start=1, end=max_y_bins
)
hm_options = pn.Column(x_bin_input, y_bin_input)

return pn.Column(main, hist_panel, heatmap, plot_options)
hm_panel = pn.bind(
make_heatmap,
num_x_bins=x_bin_input,
num_y_bins=y_bin_input,
)

return pn.Column(
pn.Row(main, pn.Column(hist_panel, plot_options)),
pn.Column(hm_panel, hm_options),
)

0 comments on commit 5541b73

Please sign in to comment.