From f4b373aad9c373ef550b8311b1e0ce61587240b6 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 9 Sep 2025 20:50:09 -0400 Subject: [PATCH 01/13] draft basic repr for Session class --- icechunk-python/python/icechunk/_icechunk_python.pyi | 1 + icechunk-python/python/icechunk/session.py | 3 +++ icechunk-python/src/session.rs | 11 +++++++++++ 3 files changed, 15 insertions(+) diff --git a/icechunk-python/python/icechunk/_icechunk_python.pyi b/icechunk-python/python/icechunk/_icechunk_python.pyi index 3fc129ae9..fe8014c34 100644 --- a/icechunk-python/python/icechunk/_icechunk_python.pyi +++ b/icechunk-python/python/icechunk/_icechunk_python.pyi @@ -1501,6 +1501,7 @@ class PyRepository: ) -> str: ... class PySession: + def __repr__(self) -> str: ... @classmethod def from_bytes(cls, data: bytes) -> PySession: ... def __eq__(self, value: object) -> bool: ... diff --git a/icechunk-python/python/icechunk/session.py b/icechunk-python/python/icechunk/session.py index 1abe38304..e9866f3dd 100644 --- a/icechunk-python/python/icechunk/session.py +++ b/icechunk-python/python/icechunk/session.py @@ -22,6 +22,9 @@ def __init__(self, session: PySession): self._session = session self._allow_changes = False + def __repr__(self) -> str: + return self._session.__repr__() + def __eq__(self, value: object) -> bool: if not isinstance(value, Session): return False diff --git a/icechunk-python/src/session.rs b/icechunk-python/src/session.rs index ecf919a3f..f50953cbe 100644 --- a/icechunk-python/src/session.rs +++ b/icechunk-python/src/session.rs @@ -23,6 +23,17 @@ pub struct PySession(pub Arc>); /// Most functions in this class block, so they need to `allow_threads` so other /// python threads can make progress impl PySession { + fn __repr__(&self, py: Python<'_>) -> String { + let read_only = + py.allow_threads(move || self.0.blocking_read().read_only().to_string()); + + format!( + "\n\ + read_only: {}", + read_only + ) + } + #[classmethod] fn from_bytes( _cls: Bound<'_, PyType>, From 50265f84de12fe502f0568e2bad7c0bfa1d14278 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 9 Sep 2025 21:00:47 -0400 Subject: [PATCH 02/13] add snapshot_id --- icechunk-python/src/session.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/icechunk-python/src/session.rs b/icechunk-python/src/session.rs index f50953cbe..7cd8e5090 100644 --- a/icechunk-python/src/session.rs +++ b/icechunk-python/src/session.rs @@ -24,13 +24,16 @@ pub struct PySession(pub Arc>); /// python threads can make progress impl PySession { fn __repr__(&self, py: Python<'_>) -> String { - let read_only = + let read_only: String = py.allow_threads(move || self.0.blocking_read().read_only().to_string()); + let snapshot_id: String = + py.allow_threads(move || self.0.blocking_read().snapshot_id().to_string()); format!( "\n\ - read_only: {}", - read_only + read_only: {}\n\ + snapshot_id: {}", + read_only, snapshot_id, ) } From 81b768509b822aecefcf10242c919a4ac82075da Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 10 Sep 2025 13:02:31 -0400 Subject: [PATCH 03/13] only display branch for a writable session --- icechunk-python/src/session.rs | 37 +++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/icechunk-python/src/session.rs b/icechunk-python/src/session.rs index 7cd8e5090..80206b7c6 100644 --- a/icechunk-python/src/session.rs +++ b/icechunk-python/src/session.rs @@ -24,17 +24,36 @@ pub struct PySession(pub Arc>); /// python threads can make progress impl PySession { fn __repr__(&self, py: Python<'_>) -> String { - let read_only: String = - py.allow_threads(move || self.0.blocking_read().read_only().to_string()); - let snapshot_id: String = + // let mut contents = String::new(); + + let read_only = py.allow_threads(move || self.0.blocking_read().read_only()); + let snapshot_id = py.allow_threads(move || self.0.blocking_read().snapshot_id().to_string()); - format!( - "\n\ - read_only: {}\n\ - snapshot_id: {}", - read_only, snapshot_id, - ) + if read_only { + format!( + "\n\ + read_only: {}\n\ + snapshot_id: {}", + read_only, snapshot_id, + ) + } else { + let branch = py.allow_threads(move || { + self.0 + .blocking_read() + .branch() + .map(|b| b.to_string()) + .unwrap_or_else(|| "None".to_string()) + }); + + format!( + "\n\ + read_only: {}\n\ + snapshot_id: {}\n\ + branch: {}", + read_only, snapshot_id, branch, + ) + } } #[classmethod] From 0aca324fa64d220cfc26deadb0f6dacfbe6887d3 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 10 Sep 2025 13:21:58 -0400 Subject: [PATCH 04/13] move repr implementation into Display on Session instead --- icechunk-python/src/session.rs | 31 +------------------------------ icechunk/src/session.rs | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/icechunk-python/src/session.rs b/icechunk-python/src/session.rs index 80206b7c6..cb991e955 100644 --- a/icechunk-python/src/session.rs +++ b/icechunk-python/src/session.rs @@ -24,36 +24,7 @@ pub struct PySession(pub Arc>); /// python threads can make progress impl PySession { fn __repr__(&self, py: Python<'_>) -> String { - // let mut contents = String::new(); - - let read_only = py.allow_threads(move || self.0.blocking_read().read_only()); - let snapshot_id = - py.allow_threads(move || self.0.blocking_read().snapshot_id().to_string()); - - if read_only { - format!( - "\n\ - read_only: {}\n\ - snapshot_id: {}", - read_only, snapshot_id, - ) - } else { - let branch = py.allow_threads(move || { - self.0 - .blocking_read() - .branch() - .map(|b| b.to_string()) - .unwrap_or_else(|| "None".to_string()) - }); - - format!( - "\n\ - read_only: {}\n\ - snapshot_id: {}\n\ - branch: {}", - read_only, snapshot_id, branch, - ) - } + py.allow_threads(move || self.0.blocking_read().to_string()) } #[classmethod] diff --git a/icechunk/src/session.rs b/icechunk/src/session.rs index 797970f2c..316dfef03 100644 --- a/icechunk/src/session.rs +++ b/icechunk/src/session.rs @@ -6,6 +6,7 @@ use futures::{FutureExt, Stream, StreamExt, TryStreamExt, future::Either, stream use itertools::{Itertools as _, enumerate, repeat_n}; use regex::bytes::Regex; use serde::{Deserialize, Serialize}; +use std::fmt; use std::{ cmp::min, collections::{HashMap, HashSet}, @@ -221,6 +222,39 @@ pub struct Session { splits: HashMap, } +impl fmt::Display for Session { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // let mut contents = String::new(); + + //if self.read_only() { + write!( + f, + "\n\ + read_only: {}\n\ + snapshot_id: {}", + self.read_only(), + self.snapshot_id(), + ) + // } else { + // let branch = py.allow_threads(move || { + // self.0 + // .blocking_read() + // .branch() + // .map(|b| b.to_string()) + // .unwrap_or_else(|| "None".to_string()) + // }); + + // format!( + // "\n\ + // read_only: {}\n\ + // snapshot_id: {}\n\ + // branch: {}", + // read_only, snapshot_id, branch, + // ) + // } + } +} + impl Session { pub fn create_readonly_session( config: RepositoryConfig, From f94d5543f4b4d2fe0e2c23ef601eebb5f2c7f4bb Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 10 Sep 2025 13:29:30 -0400 Subject: [PATCH 05/13] display branch only for writable Session --- icechunk/src/session.rs | 54 ++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/icechunk/src/session.rs b/icechunk/src/session.rs index 316dfef03..2a67d5f1c 100644 --- a/icechunk/src/session.rs +++ b/icechunk/src/session.rs @@ -224,34 +224,32 @@ pub struct Session { impl fmt::Display for Session { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // let mut contents = String::new(); - - //if self.read_only() { - write!( - f, - "\n\ - read_only: {}\n\ - snapshot_id: {}", - self.read_only(), - self.snapshot_id(), - ) - // } else { - // let branch = py.allow_threads(move || { - // self.0 - // .blocking_read() - // .branch() - // .map(|b| b.to_string()) - // .unwrap_or_else(|| "None".to_string()) - // }); - - // format!( - // "\n\ - // read_only: {}\n\ - // snapshot_id: {}\n\ - // branch: {}", - // read_only, snapshot_id, branch, - // ) - // } + if self.read_only() { + write!( + f, + "\n\ + read_only: {}\n\ + snapshot_id: {}", + self.read_only(), + self.snapshot_id(), + ) + } else { + let branch = self + .branch() + .map(|b| b.to_string()) + .unwrap_or_else(|| "None".to_string()); + + write!( + f, + "\n\ + read_only: {}\n\ + snapshot_id: {}\n\ + branch: {}", + self.read_only(), + self.snapshot_id(), + branch, + ) + } } } From 733d9d4dece34050cd5f7bb1eb7bae8546fe8a09 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 10 Sep 2025 14:05:50 -0400 Subject: [PATCH 06/13] add has_uncommitted_changes to repr of writable Session --- icechunk/src/session.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/icechunk/src/session.rs b/icechunk/src/session.rs index 2a67d5f1c..495859358 100644 --- a/icechunk/src/session.rs +++ b/icechunk/src/session.rs @@ -244,10 +244,12 @@ impl fmt::Display for Session { "\n\ read_only: {}\n\ snapshot_id: {}\n\ - branch: {}", + branch: {}\n\ + has_uncommitted_changes: {}", self.read_only(), self.snapshot_id(), branch, + self.has_uncommitted_changes(), ) } } From e0472ca98d53fdccb19180687d652181df82bd8f Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 10 Sep 2025 14:56:00 -0400 Subject: [PATCH 07/13] try generalizing using a macro --- icechunk/src/display.rs | 17 ++++++++++ icechunk/src/lib.rs | 1 + icechunk/src/session.rs | 75 ++++++++++++++++++++++++++--------------- 3 files changed, 65 insertions(+), 28 deletions(-) create mode 100644 icechunk/src/display.rs diff --git a/icechunk/src/display.rs b/icechunk/src/display.rs new file mode 100644 index 000000000..1cfa91b1f --- /dev/null +++ b/icechunk/src/display.rs @@ -0,0 +1,17 @@ +#[macro_export] +macro_rules! write_dataclass_repr { + // Add description of output here + + // Writes the class name + ($f:expr, $class_name:literal) => { + write!($f, "<{}>", $class_name) + }; // Writes the specified attributes + // ($f:expr, $class_name:literal, $($field_name:literal: $field_value:expr),+ $(,)?) => { + // write!($f, "<{}>\n{}", $class_name, + // [$((concat!($field_name, ": {}"), $field_value)),+] + // .iter() + // .map(|(fmt, val)| format!(fmt, val)) + // .collect::>() + // .join("\n")) + //}; +} diff --git a/icechunk/src/lib.rs b/icechunk/src/lib.rs index 31aef15f6..8a2809ca6 100644 --- a/icechunk/src/lib.rs +++ b/icechunk/src/lib.rs @@ -22,6 +22,7 @@ pub mod change_set; pub mod cli; pub mod config; pub mod conflicts; +pub mod display; pub mod error; pub mod format; pub mod inspect; diff --git a/icechunk/src/session.rs b/icechunk/src/session.rs index 495859358..fcc61eab9 100644 --- a/icechunk/src/session.rs +++ b/icechunk/src/session.rs @@ -46,6 +46,7 @@ use crate::{ repository::{RepositoryError, RepositoryErrorKind}, storage::{self, StorageErrorKind}, virtual_chunks::{VirtualChunkContainer, VirtualChunkResolver}, + write_dataclass_repr, }; #[derive(Debug, Error)] @@ -224,34 +225,52 @@ pub struct Session { impl fmt::Display for Session { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.read_only() { - write!( - f, - "\n\ - read_only: {}\n\ - snapshot_id: {}", - self.read_only(), - self.snapshot_id(), - ) - } else { - let branch = self - .branch() - .map(|b| b.to_string()) - .unwrap_or_else(|| "None".to_string()); - - write!( - f, - "\n\ - read_only: {}\n\ - snapshot_id: {}\n\ - branch: {}\n\ - has_uncommitted_changes: {}", - self.read_only(), - self.snapshot_id(), - branch, - self.has_uncommitted_changes(), - ) - } + // if self.read_only() { + // write!( + // f, + // "\n\ + // read_only: {}\n\ + // snapshot_id: {}", + // self.read_only(), + // self.snapshot_id(), + // ) + // } else { + // let branch = self + // .branch() + // .map(|b| b.to_string()) + // .unwrap_or_else(|| "None".to_string()); + + // write!( + // f, + // "\n\ + // read_only: {}\n\ + // snapshot_id: {}\n\ + // branch: {}\n\ + // has_uncommitted_changes: {}", + // self.read_only(), + // self.snapshot_id(), + // branch, + // self.has_uncommitted_changes(), + // ) + // } + + // if self.read_only() { + // write_dataclass_repr!( + // f, + // "icechunk.Session" // "read_only": self.read_only(), + // // "snapshot_id": self.snapshot_id() + // ) + // } else { + // write_dataclass_repr!( + // f, + // "icechunk.Session" // "read_only": self.read_only(), + // // "snapshot_id": self.snapshot_id(), + // // "branch": self.branch().as_ref().unwrap_or(&"None".to_string()), + // // "has_uncommitted_changes": self.has_uncommitted_changes() + // ) + // } + + write_dataclass_repr!(f, "icechunk.Session") } } From 98949270d2ffdd13b2ba9357e9d5157aa2031ddb Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 10 Sep 2025 15:49:27 -0400 Subject: [PATCH 08/13] support attributes in macro --- icechunk/src/display.rs | 37 ++++++++++++++-------- icechunk/src/session.rs | 68 +++++++++++++---------------------------- 2 files changed, 46 insertions(+), 59 deletions(-) diff --git a/icechunk/src/display.rs b/icechunk/src/display.rs index 1cfa91b1f..2157d4d02 100644 --- a/icechunk/src/display.rs +++ b/icechunk/src/display.rs @@ -1,17 +1,28 @@ #[macro_export] macro_rules! write_dataclass_repr { - // Add description of output here + // Writes a python-like (non-executable) repr, given a class name and set of attributes. + // + // Result of: + // + // write_dataclass_repr!( + // f, + // "icechunk.Session", + // "read_only": self.read_only(), + // "snapshot_id": self.snapshot_id(), + // ) + // + // Looks like: + // + // + // read_only: true + // snapshot_id: 1CECHNKREP0F1RSTCMT0 - // Writes the class name - ($f:expr, $class_name:literal) => { - write!($f, "<{}>", $class_name) - }; // Writes the specified attributes - // ($f:expr, $class_name:literal, $($field_name:literal: $field_value:expr),+ $(,)?) => { - // write!($f, "<{}>\n{}", $class_name, - // [$((concat!($field_name, ": {}"), $field_value)),+] - // .iter() - // .map(|(fmt, val)| format!(fmt, val)) - // .collect::>() - // .join("\n")) - //}; + ($f:expr, $class_name:literal, $($field_name:literal: $field_value:expr),+ $(,)?) => { + write!( + $f, + "<{}>\n{}", + $class_name, + [$(format!("{}: {}", $field_name, $field_value)),+].join("\n"), + ) + }; } diff --git a/icechunk/src/session.rs b/icechunk/src/session.rs index fcc61eab9..466fed372 100644 --- a/icechunk/src/session.rs +++ b/icechunk/src/session.rs @@ -225,52 +225,28 @@ pub struct Session { impl fmt::Display for Session { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // if self.read_only() { - // write!( - // f, - // "\n\ - // read_only: {}\n\ - // snapshot_id: {}", - // self.read_only(), - // self.snapshot_id(), - // ) - // } else { - // let branch = self - // .branch() - // .map(|b| b.to_string()) - // .unwrap_or_else(|| "None".to_string()); - - // write!( - // f, - // "\n\ - // read_only: {}\n\ - // snapshot_id: {}\n\ - // branch: {}\n\ - // has_uncommitted_changes: {}", - // self.read_only(), - // self.snapshot_id(), - // branch, - // self.has_uncommitted_changes(), - // ) - // } - - // if self.read_only() { - // write_dataclass_repr!( - // f, - // "icechunk.Session" // "read_only": self.read_only(), - // // "snapshot_id": self.snapshot_id() - // ) - // } else { - // write_dataclass_repr!( - // f, - // "icechunk.Session" // "read_only": self.read_only(), - // // "snapshot_id": self.snapshot_id(), - // // "branch": self.branch().as_ref().unwrap_or(&"None".to_string()), - // // "has_uncommitted_changes": self.has_uncommitted_changes() - // ) - // } - - write_dataclass_repr!(f, "icechunk.Session") + if self.read_only() { + write_dataclass_repr!( + f, + "icechunk.Session", + "read_only": self.read_only(), + "snapshot_id": self.snapshot_id(), + ) + } else { + let branch = self + .branch() + .map(|b| b.to_string()) + .unwrap_or_else(|| "None".to_string()); + + write_dataclass_repr!( + f, + "icechunk.Session", + "read_only": self.read_only(), + "snapshot_id": self.snapshot_id(), + "branch": branch, + "has_uncommitted_changes": self.has_uncommitted_changes(), + ) + } } } From 76f2171f81654a08901e02cee398e911431cc017 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 10 Sep 2025 18:21:46 -0400 Subject: [PATCH 09/13] replace macro with a function --- icechunk/src/display.rs | 28 +++++++++++++--------------- icechunk/src/session.rs | 32 +++++++++++++++++++------------- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/icechunk/src/display.rs b/icechunk/src/display.rs index 2157d4d02..913146880 100644 --- a/icechunk/src/display.rs +++ b/icechunk/src/display.rs @@ -1,14 +1,14 @@ -#[macro_export] -macro_rules! write_dataclass_repr { - // Writes a python-like (non-executable) repr, given a class name and set of attributes. +pub fn dataclass_repr(class_name: &str, attributes: &[(&str, &str)]) -> String { + // Writes a python-like (non-executable) repr, given a class name and an (ordered) mapping of name, attribute pairs. // // Result of: // - // write_dataclass_repr!( - // f, + // dataclass_repr( // "icechunk.Session", - // "read_only": self.read_only(), - // "snapshot_id": self.snapshot_id(), + // &[ + // ("read_only", &self.read_only().to_string()), + // ("snapshot_id", &self.snapshot_id().to_string()), + // ] // ) // // Looks like: @@ -17,12 +17,10 @@ macro_rules! write_dataclass_repr { // read_only: true // snapshot_id: 1CECHNKREP0F1RSTCMT0 - ($f:expr, $class_name:literal, $($field_name:literal: $field_value:expr),+ $(,)?) => { - write!( - $f, - "<{}>\n{}", - $class_name, - [$(format!("{}: {}", $field_name, $field_value)),+].join("\n"), - ) - }; + let attrs = attributes + .iter() + .map(|(name, value)| format!("\n{}: {}", name, value)) + .collect::(); + + format!("<{}>{}", class_name, attrs) } diff --git a/icechunk/src/session.rs b/icechunk/src/session.rs index 466fed372..4278c0ade 100644 --- a/icechunk/src/session.rs +++ b/icechunk/src/session.rs @@ -26,6 +26,7 @@ use crate::{ change_set::{ArrayData, ChangeSet}, config::{ManifestSplitDim, ManifestSplitDimCondition, ManifestSplittingConfig}, conflicts::{Conflict, ConflictResolution, ConflictSolver}, + display::dataclass_repr, error::ICError, format::{ ByteRange, ChunkIndices, ChunkOffset, IcechunkFormatError, @@ -46,7 +47,6 @@ use crate::{ repository::{RepositoryError, RepositoryErrorKind}, storage::{self, StorageErrorKind}, virtual_chunks::{VirtualChunkContainer, VirtualChunkResolver}, - write_dataclass_repr, }; #[derive(Debug, Error)] @@ -225,12 +225,13 @@ pub struct Session { impl fmt::Display for Session { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.read_only() { - write_dataclass_repr!( - f, + let repr = if self.read_only() { + dataclass_repr( "icechunk.Session", - "read_only": self.read_only(), - "snapshot_id": self.snapshot_id(), + &[ + ("read_only", &self.read_only().to_string()), + ("snapshot_id", &self.snapshot_id().to_string()), + ], ) } else { let branch = self @@ -238,15 +239,20 @@ impl fmt::Display for Session { .map(|b| b.to_string()) .unwrap_or_else(|| "None".to_string()); - write_dataclass_repr!( - f, + dataclass_repr( "icechunk.Session", - "read_only": self.read_only(), - "snapshot_id": self.snapshot_id(), - "branch": branch, - "has_uncommitted_changes": self.has_uncommitted_changes(), + &[ + ("read_only", &self.read_only().to_string()), + ("snapshot_id", &self.snapshot_id().to_string()), + ("branch", &branch), + ( + "has_uncommitted_changes", + &self.has_uncommitted_changes().to_string(), + ), + ], ) - } + }; + write!(f, "{}", repr) } } From e00a79ce9327746c0ee9270d1d042ea9a010a776 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Wed, 10 Sep 2025 19:20:01 -0400 Subject: [PATCH 10/13] add an example of an executable repr --- .../python/icechunk/_icechunk_python.pyi | 1 + icechunk-python/src/config.rs | 23 ++++++++---- icechunk/src/display.rs | 35 ++++++++++++++++++- 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/icechunk-python/python/icechunk/_icechunk_python.pyi b/icechunk-python/python/icechunk/_icechunk_python.pyi index fe8014c34..cb1ca9969 100644 --- a/icechunk-python/python/icechunk/_icechunk_python.pyi +++ b/icechunk-python/python/icechunk/_icechunk_python.pyi @@ -263,6 +263,7 @@ class CachingConfig: num_bytes_chunks: int | None The number of bytes of chunks to cache. """ + def __repr__(self) -> str: ... @property def num_snapshot_nodes(self) -> int | None: """ diff --git a/icechunk-python/src/config.rs b/icechunk-python/src/config.rs index 83dca1712..20da55777 100644 --- a/icechunk-python/src/config.rs +++ b/icechunk-python/src/config.rs @@ -1,5 +1,6 @@ use async_trait::async_trait; use chrono::{DateTime, Datelike, TimeDelta, Timelike, Utc}; +use icechunk::display::executable_dataclass_repr; use icechunk::storage::RetriesSettings; use itertools::Itertools; use pyo3::exceptions::PyValueError; @@ -694,13 +695,21 @@ impl PyCachingConfig { } pub fn __repr__(&self) -> String { - format!( - r#"CachingConfig(num_snapshot_nodes={snap}, num_chunk_refs={man}, num_transaction_changes={tx}, num_bytes_attributes={att}, num_bytes_chunks={chunks})"#, - snap = format_option_to_string(self.num_snapshot_nodes), - man = format_option_to_string(self.num_chunk_refs), - tx = format_option_to_string(self.num_transaction_changes), - att = format_option_to_string(self.num_bytes_attributes), - chunks = format_option_to_string(self.num_bytes_chunks), + executable_dataclass_repr( + "icechunk.CachingConfig", + &[ + ("num_snapshot_nodes", &format_option_to_string(self.num_snapshot_nodes)), + ("num_chunk_refs", &format_option_to_string(self.num_chunk_refs)), + ( + "num_transaction_changes", + &format_option_to_string(self.num_transaction_changes), + ), + ( + "num_bytes_attributes", + &format_option_to_string(self.num_bytes_attributes), + ), + ("num_bytes_chunks", &format_option_to_string(self.num_bytes_chunks)), + ], ) } } diff --git a/icechunk/src/display.rs b/icechunk/src/display.rs index 913146880..9792b2079 100644 --- a/icechunk/src/display.rs +++ b/icechunk/src/display.rs @@ -1,5 +1,5 @@ pub fn dataclass_repr(class_name: &str, attributes: &[(&str, &str)]) -> String { - // Writes a python-like (non-executable) repr, given a class name and an (ordered) mapping of name, attribute pairs. + // Writes a python-like (non-executable) multi-line repr, given a class name and an (ordered) mapping of name, attribute pairs. // // Result of: // @@ -24,3 +24,36 @@ pub fn dataclass_repr(class_name: &str, attributes: &[(&str, &str)]) -> String { format!("<{}>{}", class_name, attrs) } + +pub fn executable_dataclass_repr( + class_name: &str, + attributes: &[(&str, &str)], +) -> String { + // Writes a python-like (executable) multi-line repr, given a class name and an (ordered) mapping of name, attribute pairs. + // + // Result of: + // + // dataclass_repr( + // "icechunk.Config", + // &[ + // ("field1", &self.field1().to_string()), + // ("field2", &self.field2().to_string()), + // ] + // ) + // + // Looks like: + // + // icechunk.Config( + // field1=value, + // field2=value, + // ) + + let attrs = attributes + .iter() + .map(|(name, value)| format!("\n {}={}", name, value)) + .collect::(); + + format!("{}({}\n)", class_name, attrs) +} + +// TODO: consolidate these two functions? From cae6ca1219731c3bc7e13b7bc93df4dbd12e4020 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 11 Sep 2025 14:38:29 -0400 Subject: [PATCH 11/13] rename functions --- icechunk-python/src/config.rs | 4 ++-- icechunk/src/display.rs | 9 +++------ icechunk/src/session.rs | 6 +++--- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/icechunk-python/src/config.rs b/icechunk-python/src/config.rs index 20da55777..e37d0f080 100644 --- a/icechunk-python/src/config.rs +++ b/icechunk-python/src/config.rs @@ -1,6 +1,6 @@ use async_trait::async_trait; use chrono::{DateTime, Datelike, TimeDelta, Timelike, Utc}; -use icechunk::display::executable_dataclass_repr; +use icechunk::display::dataclass_repr; use icechunk::storage::RetriesSettings; use itertools::Itertools; use pyo3::exceptions::PyValueError; @@ -695,7 +695,7 @@ impl PyCachingConfig { } pub fn __repr__(&self) -> String { - executable_dataclass_repr( + dataclass_repr( "icechunk.CachingConfig", &[ ("num_snapshot_nodes", &format_option_to_string(self.num_snapshot_nodes)), diff --git a/icechunk/src/display.rs b/icechunk/src/display.rs index 9792b2079..27a107941 100644 --- a/icechunk/src/display.rs +++ b/icechunk/src/display.rs @@ -1,9 +1,9 @@ -pub fn dataclass_repr(class_name: &str, attributes: &[(&str, &str)]) -> String { +pub fn dataclass_str(class_name: &str, attributes: &[(&str, &str)]) -> String { // Writes a python-like (non-executable) multi-line repr, given a class name and an (ordered) mapping of name, attribute pairs. // // Result of: // - // dataclass_repr( + // dataclass_str( // "icechunk.Session", // &[ // ("read_only", &self.read_only().to_string()), @@ -25,10 +25,7 @@ pub fn dataclass_repr(class_name: &str, attributes: &[(&str, &str)]) -> String { format!("<{}>{}", class_name, attrs) } -pub fn executable_dataclass_repr( - class_name: &str, - attributes: &[(&str, &str)], -) -> String { +pub fn dataclass_repr(class_name: &str, attributes: &[(&str, &str)]) -> String { // Writes a python-like (executable) multi-line repr, given a class name and an (ordered) mapping of name, attribute pairs. // // Result of: diff --git a/icechunk/src/session.rs b/icechunk/src/session.rs index 4278c0ade..f1c6327eb 100644 --- a/icechunk/src/session.rs +++ b/icechunk/src/session.rs @@ -26,7 +26,7 @@ use crate::{ change_set::{ArrayData, ChangeSet}, config::{ManifestSplitDim, ManifestSplitDimCondition, ManifestSplittingConfig}, conflicts::{Conflict, ConflictResolution, ConflictSolver}, - display::dataclass_repr, + display::dataclass_str, error::ICError, format::{ ByteRange, ChunkIndices, ChunkOffset, IcechunkFormatError, @@ -226,7 +226,7 @@ pub struct Session { impl fmt::Display for Session { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let repr = if self.read_only() { - dataclass_repr( + dataclass_str( "icechunk.Session", &[ ("read_only", &self.read_only().to_string()), @@ -239,7 +239,7 @@ impl fmt::Display for Session { .map(|b| b.to_string()) .unwrap_or_else(|| "None".to_string()); - dataclass_repr( + dataclass_str( "icechunk.Session", &[ ("read_only", &self.read_only().to_string()), From c47da08631ad12268725f70b327eee4f531cdd11 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 11 Sep 2025 18:44:33 -0400 Subject: [PATCH 12/13] introduce the PyRepr trait --- icechunk-python/src/config.rs | 71 ++++++++++++++++++++++--------- icechunk-python/src/session.rs | 76 +++++++++++++++++++++++++++++++++- icechunk/src/display.rs | 8 ++++ icechunk/src/session.rs | 35 ---------------- 4 files changed, 133 insertions(+), 57 deletions(-) diff --git a/icechunk-python/src/config.rs b/icechunk-python/src/config.rs index e37d0f080..61c41d975 100644 --- a/icechunk-python/src/config.rs +++ b/icechunk-python/src/config.rs @@ -1,6 +1,6 @@ use async_trait::async_trait; use chrono::{DateTime, Datelike, TimeDelta, Timelike, Utc}; -use icechunk::display::dataclass_repr; +use icechunk::display::{PyRepr, dataclass_repr, dataclass_str}; use icechunk::storage::RetriesSettings; use itertools::Itertools; use pyo3::exceptions::PyValueError; @@ -668,8 +668,58 @@ pub struct PyCachingConfig { pub num_bytes_chunks: Option, } +impl PyRepr for PyCachingConfig { + fn __str__(&self) -> String { + dataclass_str( + "icechunk.CachingConfig", + &[ + ("num_snapshot_nodes", &format_option_to_string(self.num_snapshot_nodes)), + ("num_chunk_refs", &format_option_to_string(self.num_chunk_refs)), + ( + "num_transaction_changes", + &format_option_to_string(self.num_transaction_changes), + ), + ( + "num_bytes_attributes", + &format_option_to_string(self.num_bytes_attributes), + ), + ("num_bytes_chunks", &format_option_to_string(self.num_bytes_chunks)), + ], + ) + } + + fn __repr__(&self) -> String { + dataclass_repr( + "icechunk.CachingConfig", + &[ + ("num_snapshot_nodes", &format_option_to_string(self.num_snapshot_nodes)), + ("num_chunk_refs", &format_option_to_string(self.num_chunk_refs)), + ( + "num_transaction_changes", + &format_option_to_string(self.num_transaction_changes), + ), + ( + "num_bytes_attributes", + &format_option_to_string(self.num_bytes_attributes), + ), + ("num_bytes_chunks", &format_option_to_string(self.num_bytes_chunks)), + ], + ) + } +} + #[pymethods] impl PyCachingConfig { + fn __str__(&self) -> String { + // Only needed because #[pymethods] cannot be used on trait impl blocks + ::__str__(self) + } + + fn __repr__(&self) -> String { + // Only needed because #[pymethods] cannot be used on trait impl blocks + ::__repr__(self) + } + #[staticmethod] /// Create a default `CachingConfig` instance fn default() -> Self { @@ -693,25 +743,6 @@ impl PyCachingConfig { num_bytes_chunks, } } - - pub fn __repr__(&self) -> String { - dataclass_repr( - "icechunk.CachingConfig", - &[ - ("num_snapshot_nodes", &format_option_to_string(self.num_snapshot_nodes)), - ("num_chunk_refs", &format_option_to_string(self.num_chunk_refs)), - ( - "num_transaction_changes", - &format_option_to_string(self.num_transaction_changes), - ), - ( - "num_bytes_attributes", - &format_option_to_string(self.num_bytes_attributes), - ), - ("num_bytes_chunks", &format_option_to_string(self.num_bytes_chunks)), - ], - ) - } } impl From<&PyCachingConfig> for CachingConfig { diff --git a/icechunk-python/src/session.rs b/icechunk-python/src/session.rs index cb991e955..c4c46bca2 100644 --- a/icechunk-python/src/session.rs +++ b/icechunk-python/src/session.rs @@ -2,6 +2,7 @@ use std::{borrow::Cow, ops::Deref, sync::Arc}; use async_stream::try_stream; use futures::{StreamExt, TryStreamExt}; +use icechunk::display::{PyRepr, dataclass_str}; use icechunk::{Store, session::Session}; use pyo3::{prelude::*, types::PyType}; use tokio::sync::{Mutex, RwLock}; @@ -19,12 +20,83 @@ use crate::{ #[derive(Clone)] pub struct PySession(pub Arc>); +impl PyRepr for PySession { + fn __str__(&self) -> String { + // Use dataclass_str because Session is a non-executable class + let session = self.0.blocking_read(); + if session.read_only() { + dataclass_str( + "icechunk.Session", + &[ + ("read_only", &session.read_only().to_string()), + ("snapshot_id", &session.snapshot_id().to_string()), + ], + ) + } else { + let branch = session + .branch() + .map(|b| b.to_string()) + .unwrap_or_else(|| "None".to_string()); + + dataclass_str( + "icechunk.Session", + &[ + ("read_only", &session.read_only().to_string()), + ("snapshot_id", &session.snapshot_id().to_string()), + ("branch", &branch), + ( + "has_uncommitted_changes", + &session.has_uncommitted_changes().to_string(), + ), + ], + ) + } + } + + fn __repr__(&self) -> String { + let session = self.0.blocking_read(); + if session.read_only() { + dataclass_str( + "icechunk.Session", + &[ + ("read_only", &session.read_only().to_string()), + ("snapshot_id", &session.snapshot_id().to_string()), + ], + ) + } else { + let branch = session + .branch() + .map(|b| b.to_string()) + .unwrap_or_else(|| "None".to_string()); + + dataclass_str( + "icechunk.Session", + &[ + ("read_only", &session.read_only().to_string()), + ("snapshot_id", &session.snapshot_id().to_string()), + ("branch", &branch), + ( + "has_uncommitted_changes", + &session.has_uncommitted_changes().to_string(), + ), + ], + ) + } + } +} + #[pymethods] /// Most functions in this class block, so they need to `allow_threads` so other /// python threads can make progress impl PySession { - fn __repr__(&self, py: Python<'_>) -> String { - py.allow_threads(move || self.0.blocking_read().to_string()) + fn __str__(&self) -> String { + // Only needed because #[pymethods] cannot be used on trait impl blocks + ::__str__(self) + } + + fn __repr__(&self) -> String { + // Only needed because #[pymethods] cannot be used on trait impl blocks + ::__repr__(self) } #[classmethod] diff --git a/icechunk/src/display.rs b/icechunk/src/display.rs index 27a107941..2f5b6a248 100644 --- a/icechunk/src/display.rs +++ b/icechunk/src/display.rs @@ -1,3 +1,11 @@ +pub trait PyRepr { + fn __str__(&self) -> String; + + fn __repr__(&self) -> String; + + // TODO fn _repr_html_(&self) -> String; +} + pub fn dataclass_str(class_name: &str, attributes: &[(&str, &str)]) -> String { // Writes a python-like (non-executable) multi-line repr, given a class name and an (ordered) mapping of name, attribute pairs. // diff --git a/icechunk/src/session.rs b/icechunk/src/session.rs index f1c6327eb..797970f2c 100644 --- a/icechunk/src/session.rs +++ b/icechunk/src/session.rs @@ -6,7 +6,6 @@ use futures::{FutureExt, Stream, StreamExt, TryStreamExt, future::Either, stream use itertools::{Itertools as _, enumerate, repeat_n}; use regex::bytes::Regex; use serde::{Deserialize, Serialize}; -use std::fmt; use std::{ cmp::min, collections::{HashMap, HashSet}, @@ -26,7 +25,6 @@ use crate::{ change_set::{ArrayData, ChangeSet}, config::{ManifestSplitDim, ManifestSplitDimCondition, ManifestSplittingConfig}, conflicts::{Conflict, ConflictResolution, ConflictSolver}, - display::dataclass_str, error::ICError, format::{ ByteRange, ChunkIndices, ChunkOffset, IcechunkFormatError, @@ -223,39 +221,6 @@ pub struct Session { splits: HashMap, } -impl fmt::Display for Session { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let repr = if self.read_only() { - dataclass_str( - "icechunk.Session", - &[ - ("read_only", &self.read_only().to_string()), - ("snapshot_id", &self.snapshot_id().to_string()), - ], - ) - } else { - let branch = self - .branch() - .map(|b| b.to_string()) - .unwrap_or_else(|| "None".to_string()); - - dataclass_str( - "icechunk.Session", - &[ - ("read_only", &self.read_only().to_string()), - ("snapshot_id", &self.snapshot_id().to_string()), - ("branch", &branch), - ( - "has_uncommitted_changes", - &self.has_uncommitted_changes().to_string(), - ), - ], - ) - }; - write!(f, "{}", repr) - } -} - impl Session { pub fn create_readonly_session( config: RepositoryConfig, From 7381211880748ec5a356ba1b8f8fbba314696ac2 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 11 Sep 2025 18:48:28 -0400 Subject: [PATCH 13/13] rename display to repr --- icechunk-python/src/config.rs | 2 +- icechunk-python/src/session.rs | 2 +- icechunk/src/lib.rs | 2 +- icechunk/src/{display.rs => repr.rs} | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename icechunk/src/{display.rs => repr.rs} (100%) diff --git a/icechunk-python/src/config.rs b/icechunk-python/src/config.rs index 61c41d975..3bd97f51c 100644 --- a/icechunk-python/src/config.rs +++ b/icechunk-python/src/config.rs @@ -1,6 +1,6 @@ use async_trait::async_trait; use chrono::{DateTime, Datelike, TimeDelta, Timelike, Utc}; -use icechunk::display::{PyRepr, dataclass_repr, dataclass_str}; +use icechunk::repr::{PyRepr, dataclass_repr, dataclass_str}; use icechunk::storage::RetriesSettings; use itertools::Itertools; use pyo3::exceptions::PyValueError; diff --git a/icechunk-python/src/session.rs b/icechunk-python/src/session.rs index c4c46bca2..f22e10204 100644 --- a/icechunk-python/src/session.rs +++ b/icechunk-python/src/session.rs @@ -2,7 +2,7 @@ use std::{borrow::Cow, ops::Deref, sync::Arc}; use async_stream::try_stream; use futures::{StreamExt, TryStreamExt}; -use icechunk::display::{PyRepr, dataclass_str}; +use icechunk::repr::{PyRepr, dataclass_str}; use icechunk::{Store, session::Session}; use pyo3::{prelude::*, types::PyType}; use tokio::sync::{Mutex, RwLock}; diff --git a/icechunk/src/lib.rs b/icechunk/src/lib.rs index 8a2809ca6..131dc3c12 100644 --- a/icechunk/src/lib.rs +++ b/icechunk/src/lib.rs @@ -22,13 +22,13 @@ pub mod change_set; pub mod cli; pub mod config; pub mod conflicts; -pub mod display; pub mod error; pub mod format; pub mod inspect; pub mod ops; pub mod refs; pub mod repository; +pub mod repr; pub mod session; pub mod storage; pub mod store; diff --git a/icechunk/src/display.rs b/icechunk/src/repr.rs similarity index 100% rename from icechunk/src/display.rs rename to icechunk/src/repr.rs