diff --git a/icechunk-python/python/icechunk/_icechunk_python.pyi b/icechunk-python/python/icechunk/_icechunk_python.pyi index 3fc129ae9..93f6ba15a 100644 --- a/icechunk-python/python/icechunk/_icechunk_python.pyi +++ b/icechunk-python/python/icechunk/_icechunk_python.pyi @@ -1499,6 +1499,10 @@ class PyRepository: async def inspect_snapshot_async( self, snapshot_id: str, *, pretty: bool = True ) -> str: ... + def inspect_manifest(self, manifest_id: str, *, pretty: bool = True) -> str: ... + async def inspect_manifest_async( + self, manifest_id: str, *, pretty: bool = True + ) -> str: ... class PySession: @classmethod diff --git a/icechunk-python/python/icechunk/repository.py b/icechunk-python/python/icechunk/repository.py index 58e9a9f62..4ec3462f1 100644 --- a/icechunk-python/python/icechunk/repository.py +++ b/icechunk-python/python/icechunk/repository.py @@ -1397,3 +1397,11 @@ async def inspect_snapshot_async( self, snapshot_id: str, *, pretty: bool = True ) -> str: return await self._repository.inspect_snapshot_async(snapshot_id, pretty=pretty) + + def inspect_manifest(self, manifest_id: str, *, pretty: bool = True) -> str: + return self._repository.inspect_manifest(manifest_id, pretty=pretty) + + async def inspect_manifest_async( + self, manifest_id: str, *, pretty: bool = True + ) -> str: + return await self._repository.inspect_manifest_async(manifest_id, pretty=pretty) diff --git a/icechunk-python/src/repository.rs b/icechunk-python/src/repository.rs index cd9841a5f..56fad89d1 100644 --- a/icechunk-python/src/repository.rs +++ b/icechunk-python/src/repository.rs @@ -13,11 +13,11 @@ use icechunk::{ Repository, config::Credentials, format::{ - SnapshotId, + ManifestId, SnapshotId, snapshot::{ManifestFileInfo, SnapshotInfo, SnapshotProperties}, transaction_log::Diff, }, - inspect::snapshot_json, + inspect::{manifest_json, snapshot_json}, ops::{ gc::{ExpiredRefAction, GCConfig, GCSummary, expire, garbage_collect}, manifests::rewrite_manifests, @@ -1660,6 +1660,42 @@ impl PyRepository { Ok(res) }) } + + #[pyo3(signature = (manifest_id, *, pretty = true))] + fn inspect_manifest(&self, manifest_id: String, pretty: bool) -> PyResult { + let result = pyo3_async_runtimes::tokio::get_runtime() + .block_on(async move { + let lock = self.0.read().await; + let manifest = ManifestId::try_from(manifest_id.as_str()) + .map_err(|e| RepositoryErrorKind::Other(e.to_string()))?; + let res = manifest_json(lock.asset_manager(), &manifest, pretty).await?; + Ok(res) + }) + .map_err(PyIcechunkStoreError::RepositoryError)?; + Ok(result) + } + + #[pyo3(signature = (manifest_id, *, pretty = true))] + fn inspect_manifest_async<'py>( + &self, + py: Python<'py>, + manifest_id: String, + pretty: bool, + ) -> PyResult> { + let repository = self.0.clone(); + pyo3_async_runtimes::tokio::future_into_py(py, async move { + let lock = repository.read().await; + let manifest = ManifestId::try_from(manifest_id.as_str()) + .map_err(|e| { + RepositoryError::from(RepositoryErrorKind::Other(e.to_string())) + }) + .map_err(PyIcechunkStoreError::RepositoryError)?; + let res = manifest_json(lock.asset_manager(), &manifest, pretty) + .await + .map_err(PyIcechunkStoreError::RepositoryError)?; + Ok(res) + }) + } } fn map_credentials( diff --git a/icechunk-python/tests/test_inspect.py b/icechunk-python/tests/test_inspect.py index 583ca11f3..9a9a994e8 100644 --- a/icechunk-python/tests/test_inspect.py +++ b/icechunk-python/tests/test_inspect.py @@ -33,3 +33,39 @@ async def test_inspect_snapshot_async() -> None: assert pretty["id"] == snap assert pretty_str != non_pretty_str assert pretty == non_pretty + + +def test_inspect_manifest() -> None: + repo = ic.Repository.open( + storage=ic.local_filesystem_storage("./tests/data/test-repo") + ) + snap = next(repo.ancestry(branch="main")).id + snap_info = json.loads(repo.inspect_snapshot(snap, pretty=True)) + man_id = snap_info["manifests"][0]["id"] + pretty_str = repo.inspect_manifest(man_id, pretty=True) + non_pretty_str = repo.inspect_manifest(man_id, pretty=False) + + pretty = json.loads(pretty_str) + non_pretty = json.loads(non_pretty_str) + + assert pretty["id"] == man_id + assert pretty_str != non_pretty_str + assert pretty == non_pretty + + +async def test_inspect_manifest_async() -> None: + repo = ic.Repository.open( + storage=ic.local_filesystem_storage("./tests/data/test-repo") + ) + snap = next(repo.ancestry(branch="main")).id + snap_info = json.loads(await repo.inspect_snapshot_async(snap, pretty=True)) + man_id = snap_info["manifests"][0]["id"] + pretty_str = await repo.inspect_manifest_async(man_id, pretty=True) + non_pretty_str = await repo.inspect_manifest_async(man_id, pretty=False) + + pretty = json.loads(pretty_str) + non_pretty = json.loads(non_pretty_str) + + assert pretty["id"] == man_id + assert pretty_str != non_pretty_str + assert pretty == non_pretty diff --git a/icechunk/src/format/manifest.rs b/icechunk/src/format/manifest.rs index f437a321f..3a00e1bc7 100644 --- a/icechunk/src/format/manifest.rs +++ b/icechunk/src/format/manifest.rs @@ -470,6 +470,15 @@ impl Manifest { array_manifest.refs().iter().map(|r| ref_to_payload(r)) }) } + + pub fn nodes(&self) -> impl Iterator + '_ { + self.root().arrays().iter().map(|array_manifest| array_manifest.node_id().into()) + } + + pub fn node_refs(&self, node: &NodeId) -> Option { + let manifest = self.root(); + lookup_node(manifest, node).map(|am| am.refs().len()) + } } fn lookup_node<'a>( diff --git a/icechunk/src/inspect.rs b/icechunk/src/inspect.rs index 1a0c4fb3f..1b3a857dc 100644 --- a/icechunk/src/inspect.rs +++ b/icechunk/src/inspect.rs @@ -5,8 +5,8 @@ use serde::{Deserialize, Serialize}; use crate::{ asset_manager::AssetManager, format::{ - SnapshotId, - manifest::ManifestRef, + IcechunkFormatError, ManifestId, SnapshotId, + manifest::{Manifest, ManifestRef}, snapshot::{ ManifestFileInfo, NodeData, NodeSnapshot, NodeType, SnapshotProperties, }, @@ -91,6 +91,65 @@ struct SnapshotInfoInspect { nodes: Vec, } +#[derive(Debug, Serialize, Deserialize)] +struct ArrayManifestInspect { + node_id: String, + num_chunk_refs: u64, +} + +#[derive(Debug, Serialize, Deserialize)] +struct ManifestInfoInspect { + id: String, + size_bytes: u64, + total_chunk_refs: u64, + arrays: Vec, +} + +impl TryFrom<&Manifest> for ManifestInfoInspect { + type Error = IcechunkFormatError; + + fn try_from(value: &Manifest) -> Result { + let arrays = value + .nodes() + .filter_map(|node_id| { + value.node_refs(&node_id).map(|num_chunk_refs| ArrayManifestInspect { + node_id: node_id.to_string(), + num_chunk_refs: num_chunk_refs as u64, + }) + }) + .collect(); + Ok(Self { + id: value.id().to_string(), + size_bytes: value.bytes().len() as u64, + total_chunk_refs: value.len() as u64, + arrays, + }) + } +} + +async fn inspect_manifest( + asset_manager: &AssetManager, + id: &ManifestId, +) -> RepositoryResult { + let manifest = asset_manager.fetch_manifest_unknown_size(id).await?; + Ok(manifest.as_ref().try_into()?) +} + +pub async fn manifest_json( + asset_manager: &AssetManager, + id: &ManifestId, + pretty: bool, +) -> RepositoryResult { + let info = inspect_manifest(asset_manager, id).await?; + let res = if pretty { + serde_json::to_string_pretty(&info) + } else { + serde_json::to_string(&info) + } + .map_err(|e| RepositoryErrorKind::Other(e.to_string()))?; + Ok(res) +} + async fn inspect_snapshot( asset_manager: &AssetManager, id: &SnapshotId, @@ -156,4 +215,30 @@ mod tests { Ok(()) } + + #[icechunk_macros::tokio_test] + async fn test_print_manifest() -> Result<(), Box> { + let st = Arc::new( + ObjectStorage::new_local_filesystem(&PathBuf::from( + "../icechunk-python/tests/data/split-repo", + )) + .await?, + ); + let repo = Repository::open(None, st, Default::default()).await?; + let snap = repo + .ancestry(&VersionInfo::BranchTipRef("main".to_string())) + .await? + .boxed() + .try_next() + .await? + .unwrap(); + + let manifest_id = &snap.manifests[0].id; + + let json = manifest_json(repo.asset_manager(), manifest_id, true).await?; + let info: ManifestInfoInspect = serde_json::from_str(json.as_str())?; + assert!(info.id == manifest_id.to_string()); + + Ok(()) + } }