-
Notifications
You must be signed in to change notification settings - Fork 603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(repr): add show_count
option to interactive repr
#10518
Open
NickCrews
wants to merge
2
commits into
ibis-project:main
Choose a base branch
from
NickCrews:interactive-count
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
import ibis.expr.datatypes as dt | ||
|
||
if TYPE_CHECKING: | ||
from ibis.config import Interactive | ||
from ibis.expr.types import Column, Expr, Scalar, Table | ||
|
||
|
||
|
@@ -261,64 +262,43 @@ def format_dtype(dtype, max_string: int) -> Text: | |
def to_rich( | ||
expr: Expr, | ||
*, | ||
max_rows: int | None = None, | ||
max_columns: int | None = None, | ||
max_length: int | None = None, | ||
max_string: int | None = None, | ||
max_depth: int | None = None, | ||
console_width: int | float | None = None, | ||
options: Interactive | None = None, | ||
) -> Pretty: | ||
"""Truncate, evaluate, and render an Ibis expression as a rich object.""" | ||
if options is None: | ||
options = ibis.options.repr.interactive | ||
from ibis.expr.types import Scalar | ||
|
||
if isinstance(expr, Scalar): | ||
return _to_rich_scalar( | ||
expr, max_length=max_length, max_string=max_string, max_depth=max_depth | ||
) | ||
return _to_rich_scalar(expr, options) | ||
else: | ||
return _to_rich_table( | ||
expr, | ||
max_rows=max_rows, | ||
max_columns=max_columns, | ||
max_length=max_length, | ||
max_string=max_string, | ||
max_depth=max_depth, | ||
console_width=console_width, | ||
) | ||
return _to_rich_table(expr, options, console_width=console_width) | ||
|
||
|
||
def _to_rich_scalar( | ||
expr: Scalar, | ||
*, | ||
max_length: int | None = None, | ||
max_string: int | None = None, | ||
max_depth: int | None = None, | ||
) -> Pretty: | ||
def _to_rich_scalar(expr: Scalar, options: Interactive) -> Pretty: | ||
value = format_values( | ||
expr.type(), | ||
[expr.to_pyarrow().as_py()], | ||
max_length=max_length or ibis.options.repr.interactive.max_length, | ||
max_string=max_string or ibis.options.repr.interactive.max_string, | ||
max_depth=max_depth or ibis.options.repr.interactive.max_depth, | ||
max_length=options.max_length, | ||
max_string=options.max_string, | ||
max_depth=options.max_depth, | ||
)[0] | ||
return Panel(value, expand=False, box=box.SQUARE) | ||
|
||
|
||
def _to_rich_table( | ||
tablish: Table | Column, | ||
*, | ||
max_rows: int | None = None, | ||
max_columns: int | None = None, | ||
max_length: int | None = None, | ||
max_string: int | None = None, | ||
max_depth: int | None = None, | ||
options: Interactive, | ||
console_width: int | float | None = None, | ||
) -> rich.table.Table: | ||
max_rows = max_rows or ibis.options.repr.interactive.max_rows | ||
max_columns = max_columns or ibis.options.repr.interactive.max_columns | ||
from ibis.expr import types as ir | ||
|
||
console_width = console_width or float("inf") | ||
max_string = max_string or ibis.options.repr.interactive.max_string | ||
show_types = ibis.options.repr.interactive.show_types | ||
max_rows = options.max_rows | ||
max_columns = options.max_columns | ||
max_string = options.max_string | ||
show_types = options.show_types | ||
|
||
table = tablish.as_table() | ||
orig_ncols = len(table.columns) | ||
|
@@ -360,9 +340,9 @@ def _to_rich_table( | |
formatted, min_width, max_width = format_column( | ||
dtype, | ||
result[name].to_pylist()[:max_rows], | ||
max_length=max_length, | ||
max_length=options.max_length, | ||
max_string=max_string, | ||
max_depth=max_depth, | ||
max_depth=options.max_depth, | ||
) | ||
dtype_str = format_dtype(dtype, max_string) | ||
if show_types and not isinstance(dtype, (dt.Struct, dt.Map, dt.Array)): | ||
|
@@ -433,7 +413,18 @@ def _to_rich_table( | |
if not next_flex_cols: | ||
break | ||
|
||
rich_table = rich.table.Table(padding=(0, 1, 0, 1)) | ||
if options.show_count: | ||
# use underscore to be friendly to i18n and python REPL | ||
nrows = f"{table.count().execute():_}" | ||
else: | ||
nrows = "…" | ||
if isinstance(tablish, ir.Table): | ||
dims = f"{orig_ncols:_} cols by {nrows} rows" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we make this follow the long-standing convention of "rows by columns"? |
||
else: | ||
dims = f"{nrows} rows" | ||
rich_table = rich.table.Table( | ||
title=dims, title_justify="left", padding=(0, 1, 0, 1) | ||
) | ||
|
||
# Configure the columns on the rich table. | ||
for name, dtype, _, max_width in col_info: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is a nice feature to have. I do not think it should default to true.
Interactive mode is supposed to be interactive, and something that can be computationally expensive isn't something we want to turn on by default.
The main failure case I can think of here is reading from CSVs in a blob store. With
show_count=False
, this can be done with a range request and only download the first N rows (ish), except if there's an order-by.show_count=True
would mean any operation would trigger a download of the entire file.And yes, that is one more reason not to use CSVs, but 🤷
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, I am OK with defaulting to False. Let's just wait for Philip to chime in to verify, but I assume he is going to have the same opinion as you.
I really don't think it is worth the complexity/cleverness, but throwing it out there that we could make each relation have its own config, and that could be auto-determined based on if it's a CSV/parquet/physical table, local/remote, etc.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We are slowly merging into the same person. The transformation is nearly complete.
I mean, it's definitely a reliably cheap(er) operation on anything that isn't a CSV, but I agree this seems like too much complexity. I think a better move is your suggestion of persistent default configuration
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, let's leave the default to
False
, otherwise it's a huge performance footgun/bug magnet.