Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions .github/workflows/version-compat.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
name: Version Compatibility Tests

on:
push:
branches:
- main
pull_request:
types: [opened, reopened, synchronize, labeled]
workflow_dispatch:
schedule:
# Run daily at 6am UTC to catch any issues with nightly wheels
- cron: '0 6 * * *'

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

permissions:
contents: read

env:
PYTHON_VERSION: "3.12"

defaults:
run:
working-directory: ./version-compat-tests

jobs:
version-compat:
runs-on: ubuntu-latest
name: Test v1/v2 format compatibility

steps:
- uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v7
with:
python-version: ${{ env.PYTHON_VERSION }}
# Don't cache - we want fresh nightly wheels each run
enable-cache: false

- name: Start wheel-rename proxy server
run: |
uvx --from "wheel-rename[server] @ git+https://github.com/earth-mover/rename-wheel" \
wheel-rename serve \
-u https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
-r "icechunk=icechunk_v1:<2" \
--port 8123 &
echo $! > /tmp/proxy.pid

# Wait for server to start
for _ in {1..30}; do
curl -s http://127.0.0.1:8123/simple/ > /dev/null 2>&1 && break
sleep 1
done

- name: Install dependencies
run: uv sync

- name: Verify both versions are installed
run: |
uv run python -c "
import icechunk
import icechunk_v1
print(f'icechunk v2: {icechunk.__version__}')
print(f'icechunk v1: {icechunk_v1.__version__}')
assert 'icechunk_v1' in icechunk_v1.__file__
assert icechunk_v1 is not icechunk
"

- name: Run version compatibility tests
run: uv run pytest -v --tb=short

- name: Stop proxy server
if: always()
run: |
if [ -f /tmp/proxy.pid ]; then
kill "$(cat /tmp/proxy.pid)" 2>/dev/null || true
rm /tmp/proxy.pid
fi
124 changes: 115 additions & 9 deletions icechunk-python/tests/test_stateful_repo_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
from collections.abc import Iterator
from dataclasses import dataclass
from functools import partial
from typing import Any, Self, cast
from typing import Any, Literal, Self, cast

import numpy as np
import pytest

import icechunk
from zarr.core.buffer import Buffer, default_buffer_prototype

pytest.importorskip("hypothesis")
Expand Down Expand Up @@ -120,6 +121,7 @@ def __init__(self, **kwargs: Any) -> None:

# a tag once created, can never be recreated even after expiration
self.created_tags: set[str] = set()
self.spec_version: Literal[1, 2] | None = None

def __repr__(self) -> str:
return textwrap.dedent(f"""
Expand Down Expand Up @@ -301,14 +303,12 @@ def __init__(self) -> None:
note("----------")
self.model = Model()
self.commit_times: list[datetime.datetime] = []
self.storage = None

@initialize(data=st.data(), target=branches)
def initialize(self, data: st.DataObject) -> str:
# FIXME: currently this test is IC2 only
spec_version = data.draw(
st.one_of(st.integers(min_value=2, max_value=2), st.none())
)
self.repo = Repository.create(in_memory_storage(), spec_version=spec_version)
self.storage = in_memory_storage()
self.repo = Repository.create(self.storage, spec_version=1)
self.session = self.repo.writable_session(DEFAULT_BRANCH)

snap = next(iter(self.repo.ancestry(branch=DEFAULT_BRANCH)))
Expand All @@ -321,6 +321,7 @@ def initialize(self, data: st.DataObject) -> str:
self.model.HEAD = HEAD
self.model.create_branch(DEFAULT_BRANCH, HEAD)
self.model.checkout_branch(DEFAULT_BRANCH)
self.model.spec_version = 1

# initialize with some data always
# TODO: always setting array metadata, since we cannot overwrite an existing group's zarr.json
Expand Down Expand Up @@ -348,6 +349,31 @@ def set_doc(self, path: str, value: Buffer) -> None:
with pytest.raises(IcechunkError, match="read-only store"):
self.sync_store.set(path, value)

@rule()
@precondition(lambda self: len(self.model.commits) > 5)
def upgrade_spec_version(self):
icechunk.upgrade_icechunk_repository(self.repo)
self.model.spec_version = 2

@rule()
def reopen_repository(self) -> None:
"""Reopen the repository from storage to get fresh state.

This discards any uncommitted changes.
"""
assert self.storage is not None
self.repo = Repository.open(self.storage)
note(f"Reopened repository (spec_version={self.repo.spec_version})")

# Reopening discards uncommitted changes - reset model to last committed state
branch = (
self.model.branch
if self.model.branch in self.model.branches
else DEFAULT_BRANCH
)
self.session = self.repo.writable_session(branch)
self.model.checkout_branch(branch)

@rule(message=st.text(max_size=MAX_TEXT_SIZE), target=commits)
@precondition(lambda self: self.model.changes_made)
def commit(self, message: str) -> str:
Expand All @@ -362,6 +388,63 @@ def commit(self, message: str) -> str:
self.commit_times.append(snapinfo.written_at)
return commit_id

@rule(message=st.text(max_size=MAX_TEXT_SIZE), target=commits)
@precondition(lambda self: self.model.changes_made)
def amend_commit(self, message: str) -> str:
"""Amend the last commit.

Amend requires spec_version >= 2. For spec_version=1, it raises an error.
"""
branch = self.session.branch
assert branch is not None
note(
f"Amending commit on branch {branch!r} (spec_version={self.model.spec_version})"
)

# Amend is only supported on spec_version >= 2
if self.model.spec_version == 1:
with pytest.raises(
IcechunkError,
match="repository version error.*requires.*version 2",
):
self.session.amend(message)
note("Amend correctly rejected for spec_version=1")
# Return existing HEAD since amend failed
return self.model.branches[branch]

# spec_version >= 2: amend should succeed
commit_id = self.session.amend(message)
snapinfo = next(iter(self.repo.ancestry(branch=branch)))
assert snapinfo.id == commit_id
self.session = self.repo.writable_session(branch)
note(f"Amended commit: {snapinfo!r}")

# For model: amend replaces the previous HEAD commit on this branch
old_head = self.model.branches[branch]
old_commit = self.model.commits.get(old_head)
parent_id = old_commit.parent_id if old_commit else None

# Only remove old commit from model if no other branch references it
other_refs = [
b for b, c in self.model.branches.items() if c == old_head and b != branch
]
if not other_refs and old_head in self.model.commits:
del self.model.commits[old_head]
# Update commit times - remove old
if old_commit and old_commit.written_at in self.commit_times:
self.commit_times.remove(old_commit.written_at)

# Add new commit
self.model.commits[commit_id] = CommitModel.from_snapshot_and_store(
snapinfo, copy.deepcopy(self.model.store)
)
self.model.commits[commit_id].parent_id = parent_id
self.model.branches[branch] = commit_id
self.model.HEAD = commit_id
self.model.changes_made = False
self.commit_times.append(snapinfo.written_at)
return commit_id

@rule(ref=commits)
def checkout_commit(self, ref: str) -> None:
if ref not in self.model.commits:
Expand Down Expand Up @@ -503,7 +586,9 @@ def delete_branch(self, branch: str) -> None:
with pytest.raises(IcechunkError):
self.repo.delete_branch(branch)

@precondition(lambda self: bool(self.commit_times))
# TODO: v1 has bugs in expire_snapshots, only test for v2
# https://github.com/earth-mover/icechunk/issues/1520
@precondition(lambda self: bool(self.commit_times) and self.model.spec_version >= 2)
@rule(
data=st.data(),
delta=st.timedeltas(
Expand All @@ -524,23 +609,44 @@ def expire_snapshots(
note(
f"Expiring snapshots {older_than=!r}, ({commit_time=!r}, {delta=!r}), {delete_expired_branches=!r}, {delete_expired_tags=!r}"
)

# Track branches and tags before expiration
branches_before = set(self.repo.list_branches())
tags_before = set(self.repo.list_tags())

actual = self.repo.expire_snapshots(
older_than,
delete_expired_branches=delete_expired_branches,
delete_expired_tags=delete_expired_tags,
)
note(f"repo expired snaps={actual!r}")

# Track branches and tags after expiration
branches_after = set(self.repo.list_branches())
tags_after = set(self.repo.list_tags())
actual_deleted_branches = branches_before - branches_after
actual_deleted_tags = tags_before - tags_after

expected = self.model.expire_snapshots(
older_than,
delete_expired_branches=delete_expired_branches,
delete_expired_tags=delete_expired_tags,
)
note(f"from model: {expected}")
note(f"actual: {actual}")
note(f"actual_deleted_branches: {actual_deleted_branches}")
note(f"actual_deleted_tags: {actual_deleted_tags}")

assert self.initial_snapshot.id not in actual
assert actual == expected.expired_snapshots, (actual, expected)

for branch in expected.deleted_branches:
assert (
actual_deleted_branches == expected.deleted_branches
), f"deleted branches mismatch: actual={actual_deleted_branches}, expected={expected.deleted_branches}"
assert (
actual_deleted_tags == expected.deleted_tags
), f"deleted tags mismatch: actual={actual_deleted_tags}, expected={expected.deleted_tags}"

for branch in actual_deleted_branches:
self.maybe_checkout_branch(branch)

@precondition(lambda self: bool(self.commit_times))
Expand Down
17 changes: 17 additions & 0 deletions version-compat-tests/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so

# Virtual environment
.venv/

# pytest
.pytest_cache/

# Downloaded wheels
wheels/

# uv
uv.lock
Loading
Loading