Skip to content

Commit

Permalink
Add provenance to HTML output
Browse files Browse the repository at this point in the history
  • Loading branch information
benjeffery committed Sep 25, 2024
1 parent 860a9c5 commit 6c1724d
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 50 deletions.
3 changes: 3 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@
- Edges now have an ``.interval`` attribute returning a ``tskit.Interval`` object.
(:user:`hyanwong`, :pr:`2531`)

- Add provenance information to the HTML notebook representation of a tree sequence.
(:user:`benjeffery`, :pr:`3001`)

--------------------
[0.5.8] - 2024-06-27
--------------------
Expand Down
13 changes: 12 additions & 1 deletion python/tests/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2190,11 +2190,22 @@ def test_html_repr(self, ts):
html = ts._repr_html_()
# Parse to check valid
ElementTree.fromstring(html)
assert len(html) > 4300
assert len(html) > 5000
assert f"<tr><td>Trees</td><td>{ts.num_trees}</td></tr>" in html
assert f"<tr><td>Time Units</td><td>{ts.time_units}</td></tr>" in html
for table in ts.tables.table_name_map:
assert f"<td>{table.capitalize()}</td>" in html
if ts.num_provenances > 0:
assert (
f"<td>{json.loads(ts.provenance(0).record)['software']['name']}</td>"
in html
)

def test_bad_provenance(self, ts_fixture):
tables = ts_fixture.dump_tables()
tables.provenances.add_row("bad", "bad")
ts = tables.tree_sequence()
assert "Could not parse provenance" in ts._repr_html_()

@pytest.mark.parametrize("ts", get_example_tree_sequences())
def test_str(self, ts):
Expand Down
149 changes: 100 additions & 49 deletions python/tskit/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
Module responsible for various utility functions used in other modules.
"""
import dataclasses
import datetime
import html
import io
import itertools
import json
Expand Down Expand Up @@ -359,7 +361,7 @@ def obj_to_collapsed_html(d, name=None, open_depth=0):
</div>
"""
else:
return f"{name} {d}"
return f"{name} {html.escape(str(d))}"


def truncate_string_end(string, length):
Expand Down Expand Up @@ -491,21 +493,53 @@ def html_table(rows, *, header):
def tree_sequence_html(ts):
table_rows = "".join(
f"""
<tr>
<td>{name.capitalize()}</td>
<td>{table.num_rows}</td>
<td>{naturalsize(table.nbytes)}</td>
<td style="text-align: center;">
{'✅' if hasattr(table, "metadata") and len(table.metadata) > 0
else ''}
</td>
</tr>
"""
<tr>
<td>{name.capitalize()}</td>
<td>{table.num_rows}</td>
<td>{naturalsize(table.nbytes)}</td>
<td style="text-align: center;">
{'✅' if hasattr(table, "metadata") and len(table.metadata) > 0
else ''}
</td>
</tr>
"""
for name, table in ts.tables.table_name_map.items()
)

provenance_rows = ""
for prov in reversed(ts.provenances()):
try:
timestamp = datetime.datetime.fromisoformat(prov.timestamp).strftime(
"%A, %B %d, %Y at %I:%M:%S %p"
)
record = json.loads(prov.record)
software_name = record.get("software", {}).get("name", "Unknown")
software_version = record.get("software", {}).get("version", "Unknown")
command = record.get("parameters", {}).get("command", "Unknown")
details = obj_to_collapsed_html(record, None, 0)
provenance_rows += f"""
<tr>
<td>{timestamp}</td>
<td>{software_name}</td>
<td>{software_version}</td>
<td>{command}</td>
<td>
<details>
<summary>Details</summary>
{details}
</details>
</td>
</tr>
"""
except Exception as e:
provenance_rows += (
f"""Could not parse provenance record: """
f"""{e.__class__.__name__} {str(e)}"""
)

return f"""
<div>
<style>
<div>
<style>
.tskit-table thead tr th {{text-align: left;padding: 0.5em 0.5em;}}
.tskit-table tbody tr td {{padding: 0.5em 0.5em;}}
.tskit-table tbody tr td:first-of-type {{text-align: left;}}
Expand All @@ -514,47 +548,64 @@ def tree_sequence_html(ts):
.tskit-table-set-table {{margin: 12px 0 0 12px;}}
details {{display: inline-block;}}
summary {{cursor: pointer; outline: 0; display: list-item;}}
</style>
<div class="tskit-table-set">
</style>
<div class="tskit-table-set">
<div class="tskit-table-set-table">
<table class="tskit-table">
<thead>
<tr>
<th style="padding:0;line-height:21px;">
<img style="height: 32px;display: inline-block;padding: 3px 5px 3px 0;" src="https://raw.githubusercontent.com/tskit-dev/administrative/main/tskit_logo.svg"/>
<a target="_blank" href="https://tskit.dev/tskit/docs/latest/python-api.html#the-treesequence-class"> Tree Sequence </a>
</th>
</tr>
</thead>
<tbody>
<tr><td>Trees</td><td>{ts.num_trees}</td></tr>
<tr><td>Sequence Length</td><td>{ts.sequence_length}</td></tr>
<tr><td>Time Units</td><td>{ts.time_units}</td></tr>
<tr><td>Sample Nodes</td><td>{ts.num_samples}</td></tr>
<tr><td>Total Size</td><td>{naturalsize(ts.nbytes)}</td></tr>
<tr>
<td>Metadata</td><td style="text-align: left;">{obj_to_collapsed_html(ts.metadata, None, 1) if len(ts.tables.metadata_bytes) > 0 else "No Metadata"}</td></tr>
</tbody>
</table>
<table class="tskit-table">
<thead>
<tr>
<th style="padding:0;line-height:21px;">
<img style="height: 32px;display: inline-block;padding: 3px 5px 3px 0;" src="https://raw.githubusercontent.com/tskit-dev/administrative/main/tskit_logo.svg"/>
<a target="_blank" href="https://tskit.dev/tskit/docs/latest/python-api.html#the-treesequence-class"> Tree Sequence </a>
</th>
</tr>
</thead>
<tbody>
<tr><td>Trees</td><td>{ts.num_trees}</td></tr>
<tr><td>Sequence Length</td><td>{ts.sequence_length}</td></tr>
<tr><td>Time Units</td><td>{ts.time_units}</td></tr>
<tr><td>Sample Nodes</td><td>{ts.num_samples}</td></tr>
<tr><td>Total Size</td><td>{naturalsize(ts.nbytes)}</td></tr>
<tr>
<td>Metadata</td><td style="text-align: left;">{obj_to_collapsed_html(ts.metadata, None, 1) if len(ts.tables.metadata_bytes) > 0 else "No Metadata"}</td>
</tr>
</tbody>
</table>
</div>
<div class="tskit-table-set-table">
<table class="tskit-table">
<thead>
<tr>
<th style="line-height:21px;">Table</th>
<th>Rows</th>
<th>Size</th>
<th>Has Metadata</th>
</tr>
</thead>
<tbody>
{table_rows}
</tbody>
</table>
<table class="tskit-table">
<thead>
<tr>
<th style="line-height:21px;">Table</th>
<th>Rows</th>
<th>Size</th>
<th>Has Metadata</th>
</tr>
</thead>
<tbody>
{table_rows}
</tbody>
</table>
</div>
<div class="tskit-table-set-table">
<table class="tskit-table">
<thead>
<tr>
<th>Provenance Timestamp</th>
<th>Software Name</th>
<th>Version</th>
<th>Command</th>
<th>Full record</th>
</tr>
</thead>
<tbody>
{provenance_rows}
</tbody>
</table>
</div>
</div>
</div>
""" # noqa: B950
</div>
""" # noqa: B950


def tree_html(tree):
Expand Down

0 comments on commit 6c1724d

Please sign in to comment.