Skip to content

Commit 5e7b45f

Browse files
authored
Merge pull request #240 from benjeffery/drop-pops
Drop pop plot
2 parents 9a1eee9 + 0528928 commit 5e7b45f

File tree

3 files changed

+1
-217
lines changed

3 files changed

+1
-217
lines changed

tests/test_preprocess.py

-61
Original file line numberDiff line numberDiff line change
@@ -220,67 +220,6 @@ def test_multi_tree_with_polytomies_example(self):
220220
nt.assert_array_equal(t["max_internal_arity"], [3.0, 3.0])
221221

222222

223-
class TestMutationFrequencies:
224-
def example_ts(self):
225-
demography = msprime.Demography()
226-
demography.add_population(name="A", initial_size=10_000)
227-
demography.add_population(name="B", initial_size=5_000)
228-
demography.add_population(name="C", initial_size=1_000)
229-
demography.add_population_split(time=1000, derived=["A", "B"], ancestral="C")
230-
return msprime.sim_ancestry(
231-
samples={"A": 1, "B": 1},
232-
demography=demography,
233-
random_seed=12,
234-
sequence_length=10_000,
235-
)
236-
237-
def compute_mutation_counts(self, ts):
238-
pop_mutation_count = np.zeros((ts.num_populations, ts.num_mutations), dtype=int)
239-
for pop in ts.populations():
240-
for tree in ts.trees(tracked_samples=ts.samples(population=pop.id)):
241-
for mut in tree.mutations():
242-
count = tree.num_tracked_samples(mut.node)
243-
pop_mutation_count[pop.id, mut.id] = count
244-
return pop_mutation_count
245-
246-
def check_ts(self, ts):
247-
C1 = self.compute_mutation_counts(ts)
248-
C2 = preprocess.compute_population_mutation_counts(ts)
249-
nt.assert_array_equal(C1, C2)
250-
m = preprocess.mutations(ts)
251-
nt.assert_array_equal(m["pop_A_freq"], C1[0] / ts.num_samples)
252-
nt.assert_array_equal(m["pop_B_freq"], C1[1] / ts.num_samples)
253-
nt.assert_array_equal(m["pop_C_freq"], C1[2] / ts.num_samples)
254-
255-
def test_all_nodes(self):
256-
ts = self.example_ts()
257-
tables = ts.dump_tables()
258-
for u in range(ts.num_nodes - 1):
259-
site_id = tables.sites.add_row(u, "A")
260-
tables.mutations.add_row(site=site_id, node=u, derived_state="T")
261-
ts = tables.tree_sequence()
262-
self.check_ts(ts)
263-
264-
@pytest.mark.parametrize("seed", range(1, 7))
265-
def test_simulated_mutations(self, seed):
266-
ts = msprime.sim_mutations(self.example_ts(), rate=1e-6, random_seed=seed)
267-
assert ts.num_mutations > 0
268-
self.check_ts(ts)
269-
270-
def test_no_metadata_schema(self):
271-
ts = msprime.sim_mutations(self.example_ts(), rate=1e-6, random_seed=43)
272-
assert ts.num_mutations > 0
273-
tables = ts.dump_tables()
274-
tables.populations.metadata_schema = tskit.MetadataSchema(None)
275-
self.check_ts(tables.tree_sequence())
276-
277-
def test_no_populations(self):
278-
tables = single_tree_example_ts().dump_tables()
279-
tables.populations.add_row(b"{}")
280-
with pytest.raises(ValueError, match="must be assigned to populations"):
281-
preprocess.mutations(tables.tree_sequence())
282-
283-
284223
class TestNodeIsSample:
285224
def test_simple_example(self):
286225
ts = single_tree_example_ts()

tsbrowse/pages/mutations.py

+1-51
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from bokeh.models import HoverTool
88

99
from .. import config
10-
from ..plot_helpers import center_plot_title
1110
from ..plot_helpers import customise_ticks
1211
from ..plot_helpers import filter_points
1312
from ..plot_helpers import hover_points
@@ -130,70 +129,21 @@ def get_mut_data(x_range, y_range, index):
130129
mut_data = filtered_data.loc[index[0]]
131130
return mut_data
132131

133-
def update_pop_freq_plot(x_range, y_range, index):
134-
if not index:
135-
return hv.Bars([], "population", "frequency").opts(
136-
title="Population frequencies",
137-
default_tools=[],
138-
tools=["hover"],
139-
hooks=[center_plot_title],
140-
)
141-
142-
mut_data = get_mut_data(x_range, y_range, index)
143-
pops = [col for col in mut_data.index if "pop_" in col]
144-
145-
if pops:
146-
df = pd.DataFrame(
147-
{
148-
"population": [
149-
pop.replace("pop_", "").replace("_freq", "") for pop in pops
150-
],
151-
"frequency": [mut_data[col] for col in pops],
152-
}
153-
)
154-
df = df[df["frequency"] > 0]
155-
156-
bars = hv.Bars(df, "population", "frequency").opts(
157-
framewise=True,
158-
title=f"Mutation {mut_data['id']}",
159-
ylim=(0, max(df["frequency"]) * 1.1),
160-
xrotation=45,
161-
tools=["hover"],
162-
default_tools=[],
163-
yticks=3,
164-
yformatter="%.3f",
165-
hooks=[center_plot_title],
166-
)
167-
return bars
168-
else:
169-
return hv.Bars([], "population", "frequency").opts(
170-
title="Population frequencies",
171-
default_tools=[],
172-
tools=["hover"],
173-
hooks=[center_plot_title],
174-
)
175-
176132
def update_mut_info_table(x_range, y_range, index):
177133
if not index:
178134
float_panel.visible = False
179135
return hv.Table([], kdims=["mutation"], vdims=["value"])
180136
float_panel.visible = True
181137
mut_data = get_mut_data(x_range, y_range, index)
182-
pops = [col for col in mut_data.index if "pop_" in col]
183-
mut_data = mut_data.drop(pops)
184138
mut_data["time"] = mut_data["time"].round(2)
185139
if "log_time" in mut_data:
186140
mut_data["log_time"] = mut_data["log_time"].round(2)
187141
return hv.Table(mut_data.items(), kdims=["mutation"], vdims=["value"])
188142

189-
pop_data_dynamic = hv.DynamicMap(
190-
update_pop_freq_plot, streams=[range_stream, selection_stream]
191-
)
192-
pop_data_dynamic.opts(align=("center"))
193143
mut_info_table_dynamic = hv.DynamicMap(
194144
update_mut_info_table, streams=[range_stream, selection_stream]
195145
)
196-
tap_widgets_layout = (pop_data_dynamic + mut_info_table_dynamic).cols(1)
146+
tap_widgets_layout = mut_info_table_dynamic
197147
float_panel = pn.layout.FloatPanel(
198148
pn.Column(
199149
tap_widgets_layout,

tsbrowse/preprocess.py

-105
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import dataclasses
2-
import json
32
import pathlib
43
import warnings
54

@@ -100,92 +99,6 @@ def alloc_tree_position(ts):
10099
)
101100

102101

103-
@jit.numba_jit()
104-
def _compute_population_mutation_counts(
105-
tree_pos,
106-
num_nodes,
107-
num_mutations,
108-
num_populations,
109-
edges_parent,
110-
edges_child,
111-
nodes_is_sample,
112-
nodes_population,
113-
mutations_position,
114-
mutations_node,
115-
mutations_parent,
116-
):
117-
num_pop_samples = np.zeros((num_nodes, num_populations), dtype=np.int32)
118-
119-
pop_mutation_count = np.zeros((num_populations, num_mutations), dtype=np.int32)
120-
parent = np.zeros(num_nodes, dtype=np.int32) - 1
121-
122-
for u in range(num_nodes):
123-
if nodes_is_sample[u]:
124-
num_pop_samples[u, nodes_population[u]] = 1
125-
126-
mut_id = 0
127-
while tree_pos.next():
128-
for j in range(tree_pos.out_range[0], tree_pos.out_range[1]):
129-
e = tree_pos.edge_removal_order[j]
130-
c = edges_child[e]
131-
p = edges_parent[e]
132-
parent[c] = -1
133-
u = p
134-
while u != -1:
135-
for k in range(num_populations):
136-
num_pop_samples[u, k] -= num_pop_samples[c, k]
137-
u = parent[u]
138-
139-
for j in range(tree_pos.in_range[0], tree_pos.in_range[1]):
140-
e = tree_pos.edge_insertion_order[j]
141-
p = edges_parent[e]
142-
c = edges_child[e]
143-
parent[c] = p
144-
u = p
145-
while u != -1:
146-
for k in range(num_populations):
147-
num_pop_samples[u, k] += num_pop_samples[c, k]
148-
u = parent[u]
149-
150-
left, right = tree_pos.interval
151-
while mut_id < num_mutations and mutations_position[mut_id] < right:
152-
assert mutations_position[mut_id] >= left
153-
mutation_node = mutations_node[mut_id]
154-
for pop in range(num_populations):
155-
pop_mutation_count[pop, mut_id] = num_pop_samples[mutation_node, pop]
156-
mut_id += 1
157-
158-
return pop_mutation_count
159-
160-
161-
def compute_population_mutation_counts(ts):
162-
"""
163-
Return a (num_populations, num_mutations) array that gives the frequency
164-
of each mutation in each of the populations in the specified tree sequence.
165-
"""
166-
logger.info(
167-
f"Computing mutation frequencies within {ts.num_populations} populations"
168-
)
169-
mutations_position = ts.sites_position[ts.mutations_site].astype(int)
170-
171-
if np.any(ts.nodes_population[ts.samples()] == -1):
172-
raise ValueError("Sample nodes must be assigned to populations")
173-
174-
return _compute_population_mutation_counts(
175-
alloc_tree_position(ts),
176-
ts.num_nodes,
177-
ts.num_mutations,
178-
ts.num_populations,
179-
ts.edges_parent,
180-
ts.edges_child,
181-
node_is_sample(ts),
182-
ts.nodes_population,
183-
mutations_position,
184-
ts.mutations_node,
185-
ts.mutations_parent,
186-
)
187-
188-
189102
@dataclasses.dataclass
190103
class MutationCounts:
191104
num_parents: np.ndarray
@@ -322,23 +235,6 @@ def mutations(ts):
322235
inherited_state[mutations_with_parent] = derived_state[parent]
323236
mutations_inherited_state = inherited_state
324237

325-
population_data = {}
326-
if ts.num_populations > 0:
327-
pop_mutation_count = compute_population_mutation_counts(ts)
328-
for pop in ts.populations():
329-
name = f"pop{pop.id}"
330-
if isinstance(pop.metadata, bytes):
331-
try:
332-
metadata_dict = json.loads(pop.metadata.decode("utf-8"))
333-
except (json.JSONDecodeError, UnicodeDecodeError):
334-
metadata_dict = {}
335-
else:
336-
metadata_dict = pop.metadata
337-
if "name" in metadata_dict:
338-
name = metadata_dict["name"]
339-
col_name = f"pop_{name}_freq"
340-
population_data[col_name] = pop_mutation_count[pop.id] / ts.num_samples
341-
342238
counts = compute_mutation_counts(ts)
343239
logger.info("Preprocessed mutations")
344240
return {
@@ -347,7 +243,6 @@ def mutations(ts):
347243
"num_descendants": counts.num_descendants,
348244
"num_inheritors": counts.num_inheritors,
349245
"num_parents": counts.num_parents,
350-
**population_data,
351246
}
352247

353248

0 commit comments

Comments
 (0)