Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion icechunk/src/format/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ impl Snapshot {
message: String,
properties: Option<SnapshotProperties>,
mut manifest_files: Vec<ManifestFileInfo>,
flushed_at: Option<DateTime<Utc>>,
sorted_iter: I,
) -> IcechunkResult<Self>
where
Expand Down Expand Up @@ -349,7 +350,7 @@ impl Snapshot {

let message = builder.create_string(&message);
let parent_id = parent_id.map(|oid| generated::ObjectId12::new(&oid.0));
let flushed_at = Utc::now().timestamp_micros() as u64;
let flushed_at = flushed_at.unwrap_or_else(Utc::now).timestamp_micros() as u64;
let id = generated::ObjectId12::new(&id.unwrap_or_else(SnapshotId::random).0);

let nodes: Vec<_> = sorted_iter
Expand Down Expand Up @@ -387,6 +388,7 @@ impl Snapshot {
Self::INITIAL_COMMIT_MESSAGE.to_string(),
Some(properties),
Default::default(),
None,
nodes,
)
}
Expand Down Expand Up @@ -459,6 +461,7 @@ impl Snapshot {
new_child.message().clone(),
Some(new_child.metadata()?.clone()),
new_child.manifest_files().collect(),
Some(new_child.flushed_at()?),
new_child.iter(),
)
}
Expand Down Expand Up @@ -736,6 +739,7 @@ mod tests {
String::default(),
Default::default(),
manifests,
None,
nodes.into_iter().map(Ok::<NodeSnapshot, Infallible>),
)
.unwrap();
Expand Down
68 changes: 39 additions & 29 deletions icechunk/src/ops/gc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,12 @@ async fn gc_transaction_logs(

#[derive(Debug, PartialEq, Eq, Clone)]
pub enum ExpireRefResult {
RefIsExpired,
NothingToDo,
Done { released_snapshots: HashSet<SnapshotId>, edited_snapshot: SnapshotId },
Done {
released_snapshots: HashSet<SnapshotId>,
edited_snapshot: SnapshotId,
ref_is_expired: bool,
},
}

/// Expire snapshots older than a threshold.
Expand Down Expand Up @@ -394,10 +397,10 @@ pub async fn expire_ref(

pin!(ancestry);

// If we point to an expired snapshot already, there is nothing to do
let mut ref_is_expired = false;
if let Some(Ok(info)) = ancestry.as_mut().peek().await {
if info.flushed_at < older_than {
return Ok(ExpireRefResult::RefIsExpired);
ref_is_expired = true;
}
}

Expand Down Expand Up @@ -434,15 +437,14 @@ pub async fn expire_ref(
// and, we only set a root as parent
assert!(root.parent_id().is_none());

assert!(editable_snap.flushed_at()? >= older_than);

// TODO: add properties to the snapshot that tell us it was history edited
let new_snapshot = Arc::new(root.adopt(&editable_snap)?);
asset_manager.write_snapshot(new_snapshot).await?;

Ok(ExpireRefResult::Done {
released_snapshots: released,
edited_snapshot: editable_snap.id().clone(),
ref_is_expired,
})
}

Expand Down Expand Up @@ -500,33 +502,41 @@ pub async fn expire(
})
.try_fold(ExpireResult::default(), |mut result, (r, ref_result)| async move {
match ref_result {
ExpireRefResult::Done { released_snapshots, edited_snapshot } => {
ExpireRefResult::Done {
released_snapshots,
edited_snapshot,
ref_is_expired,
} => {
result.released_snapshots.extend(released_snapshots.into_iter());
result.edited_snapshots.insert(edited_snapshot);
Ok(result)
}
ExpireRefResult::RefIsExpired => match &r {
Ref::Tag(name) => {
if expired_tags == ExpiredRefAction::Delete {
delete_tag(storage, storage_settings, name.as_str())
.await
.map_err(GCError::Ref)?;
result.deleted_refs.insert(r);
if ref_is_expired {
match &r {
Ref::Tag(name) => {
if expired_tags == ExpiredRefAction::Delete {
delete_tag(storage, storage_settings, name.as_str())
.await
.map_err(GCError::Ref)?;
result.deleted_refs.insert(r);
}
}
Ref::Branch(name) => {
if expired_branches == ExpiredRefAction::Delete
&& name != Ref::DEFAULT_BRANCH
{
delete_branch(
storage,
storage_settings,
name.as_str(),
)
.await
.map_err(GCError::Ref)?;
result.deleted_refs.insert(r);
}
}
}
Ok(result)
}
Ref::Branch(name) => {
if expired_branches == ExpiredRefAction::Delete
&& name != Ref::DEFAULT_BRANCH
{
delete_branch(storage, storage_settings, name.as_str())
.await
.map_err(GCError::Ref)?;
result.deleted_refs.insert(r);
}
Ok(result)
}
},
Ok(result)
}
ExpireRefResult::NothingToDo => Ok(result),
}
})
Expand Down
2 changes: 2 additions & 0 deletions icechunk/src/session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1576,6 +1576,7 @@ async fn flush(
message.to_string(),
Some(properties),
flush_data.manifest_files.into_iter().collect(),
None,
all_nodes.into_iter().map(Ok::<_, Infallible>),
)?;

Expand Down Expand Up @@ -2080,6 +2081,7 @@ mod tests {
"message".to_string(),
None,
manifests,
None,
nodes.iter().cloned().map(Ok::<NodeSnapshot, Infallible>),
)?);
asset_manager.write_snapshot(Arc::clone(&snapshot)).await?;
Expand Down
102 changes: 80 additions & 22 deletions icechunk/tests/test_gc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,14 +293,12 @@ pub async fn test_expire_ref() -> Result<(), Box<dyn std::error::Error>> {
)
.await?
{
ExpireRefResult::RefIsExpired => {
panic!()
}
ExpireRefResult::NothingToDo => {
panic!()
}
ExpireRefResult::Done { released_snapshots, .. } => {
ExpireRefResult::Done { released_snapshots, ref_is_expired, .. } => {
assert_eq!(released_snapshots.len(), 4);
assert!(!ref_is_expired);
}
}

Expand Down Expand Up @@ -332,9 +330,9 @@ pub async fn test_expire_ref() -> Result<(), Box<dyn std::error::Error>> {
)
.await?
{
ExpireRefResult::RefIsExpired => panic!(),
ExpireRefResult::NothingToDo => panic!(),
ExpireRefResult::Done { released_snapshots, .. } => {
ExpireRefResult::Done { released_snapshots, ref_is_expired, .. } => {
assert!(!ref_is_expired);
assert_eq!(released_snapshots.len(), 4);
}
}
Expand Down Expand Up @@ -367,9 +365,9 @@ pub async fn test_expire_ref() -> Result<(), Box<dyn std::error::Error>> {
)
.await?
{
ExpireRefResult::RefIsExpired => panic!(),
ExpireRefResult::NothingToDo => panic!(),
ExpireRefResult::Done { released_snapshots, .. } => {
ExpireRefResult::Done { released_snapshots, ref_is_expired, .. } => {
assert!(!ref_is_expired);
assert_eq!(released_snapshots.len(), 5);
}
}
Expand Down Expand Up @@ -402,9 +400,9 @@ pub async fn test_expire_ref() -> Result<(), Box<dyn std::error::Error>> {
)
.await?
{
ExpireRefResult::RefIsExpired => panic!(),
ExpireRefResult::NothingToDo => panic!(),
ExpireRefResult::Done { released_snapshots, .. } => {
ExpireRefResult::Done { released_snapshots, ref_is_expired, .. } => {
assert!(!ref_is_expired);
assert_eq!(released_snapshots.len(), 5);
}
}
Expand Down Expand Up @@ -432,8 +430,8 @@ pub async fn test_expire_ref() -> Result<(), Box<dyn std::error::Error>> {
}

#[tokio::test]
pub async fn test_expire_ref_with_odd_timestamp() -> Result<(), Box<dyn std::error::Error>>
{
pub async fn test_expire_ref_with_odd_timestamps()
-> Result<(), Box<dyn std::error::Error>> {
let storage: Arc<dyn Storage + Send + Sync> = new_in_memory_storage().await?;
let storage_settings = storage.default_settings();
let repo = Repository::create(None, Arc::clone(&storage), HashMap::new()).await?;
Expand Down Expand Up @@ -489,15 +487,24 @@ pub async fn test_expire_ref_with_odd_timestamp() -> Result<(), Box<dyn std::err
)
.await?
{
ExpireRefResult::RefIsExpired => {}
ExpireRefResult::Done { ref_is_expired, .. } => {
assert!(ref_is_expired);
// create another repo to avoid caching issues
let repo =
Repository::open(None, Arc::clone(&storage), HashMap::new()).await?;
assert_eq!(
branch_commit_messages(&repo, "main").await,
Vec::from(["third", "Repository initialized"])
);
}
_ => panic!(),
}

// create another repo to avoid caching issues
let repo = Repository::open(None, Arc::clone(&storage), HashMap::new()).await?;
assert_eq!(
branch_commit_messages(&repo, "main").await,
Vec::from(["third", "second", "first", "Repository initialized"])
Vec::from(["third", "Repository initialized"])
);
Ok(())
}
Expand Down Expand Up @@ -552,11 +559,11 @@ pub async fn test_expire_and_garbage_collect() -> Result<(), Box<dyn std::error:
);
assert_eq!(
tag_commit_messages(&repo, "tag1").await,
Vec::from(["3", "2", "1", "Repository initialized"])
Vec::from(["3", "Repository initialized"])
);
assert_eq!(
tag_commit_messages(&repo, "tag2").await,
Vec::from(["5", "4", "2", "1", "Repository initialized"])
Vec::from(["5", "Repository initialized"])
);

let now = Utc::now();
Expand All @@ -575,10 +582,10 @@ pub async fn test_expire_and_garbage_collect() -> Result<(), Box<dyn std::error:
)
.await?;
// other expired snapshots are pointed by tags
assert_eq!(summary.snapshots_deleted, 2);
assert_eq!(summary.snapshots_deleted, 5);

// the non expired snapshots + the expired but pointed by tags snapshots
assert_eq!(storage.list_snapshots(&storage_settings).await?.count().await, 13);
// the non expired snapshots + the 2 expired but pointed by tags snapshots
assert_eq!(storage.list_snapshots(&storage_settings).await?.count().await, 10);

repo.delete_tag("tag1").await?;

Expand All @@ -592,8 +599,8 @@ pub async fn test_expire_and_garbage_collect() -> Result<(), Box<dyn std::error:
// other expired snapshots are pointed by tag2
assert_eq!(summary.snapshots_deleted, 1);

// the non expired snapshots + the expired but pointed by tags snapshots
assert_eq!(storage.list_snapshots(&storage_settings).await?.count().await, 12);
// the non expired snapshots + the 1 pointed by tag2 snapshots
assert_eq!(storage.list_snapshots(&storage_settings).await?.count().await, 9);

repo.delete_tag("tag2").await?;

Expand All @@ -605,10 +612,61 @@ pub async fn test_expire_and_garbage_collect() -> Result<(), Box<dyn std::error:
)
.await?;
// tag2 snapshosts are released now
assert_eq!(summary.snapshots_deleted, 4);
assert_eq!(summary.snapshots_deleted, 1);

// only the non expired snapshots left
assert_eq!(storage.list_snapshots(&storage_settings).await?.count().await, 8);

Ok(())
}

#[tokio::test]
/// In this test, we set up a repo as in the design document for expiration.
///
/// We then, expire old snapshots and garbage collect. We verify we end up
/// with what is expected according to the design document.
pub async fn test_expire_and_garbage_collect_deliting_expired_refs()
-> Result<(), Box<dyn std::error::Error>> {
let storage: Arc<dyn Storage + Send + Sync> = new_in_memory_storage().await?;
let storage_settings = storage.default_settings();
let mut repo = Repository::create(None, Arc::clone(&storage), HashMap::new()).await?;

let expire_older_than = make_design_doc_repo(&mut repo).await?;

let asset_manager = Arc::new(AssetManager::new_no_cache(
storage.clone(),
storage_settings.clone(),
1,
));

let result = expire(
storage.as_ref(),
&storage_settings,
asset_manager.clone(),
expire_older_than,
// This is different compared to the previous test
ExpiredRefAction::Delete,
ExpiredRefAction::Delete,
)
.await?;

assert_eq!(result.released_snapshots.len(), 7);
assert_eq!(result.deleted_refs.len(), 2);

let now = Utc::now();
let gc_config = GCConfig::clean_all(now, now, None);
let summary = garbage_collect(
storage.as_ref(),
&storage_settings,
asset_manager.clone(),
&gc_config,
)
.await?;

assert_eq!(summary.snapshots_deleted, 7);
assert_eq!(summary.transaction_logs_deleted, 7);

// only the non expired snapshots left
assert_eq!(storage.list_snapshots(&storage_settings).await?.count().await, 8);
Ok(())
}