Skip to content
Draft
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changed

- The Rust toolchain version has been updated to `nightly-2025-01-18`.
- The Rust toolchain version has been updated to `nightly-2025-02-01`.
[#103](https://github.com/pyodide/pyodide-build/pull/103)

### Fixed

- Fixed Pyodide venv `sys_platform` marker evaluation with pip >= 25.
[#108](https://github.com/pyodide/pyodide-build/pull/108)

- `pyodide-build` now respects `SOURCE_DATE_EPOCH` to enable reproducible
builds on a best-effort basis.
[#109](https://github.com/pyodide/pyodide-build/pull/109)

## [0.29.3] - 2025/02/04

### Added
Expand Down
111 changes: 97 additions & 14 deletions pyodide_build/recipe/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import shutil
import subprocess
import sys
import tarfile
import time
from collections.abc import Iterator
from datetime import datetime
from email.message import Message
Expand Down Expand Up @@ -46,9 +48,41 @@
from pyodide_build.recipe.spec import MetaConfig, _SourceSpec


def _get_source_epoch() -> int:
"""Get SOURCE_DATE_EPOCH from environment or fallback to current time.
Uses 315532800, i.e., 1980-01-01 00:00:00 UTC as minimum timestamp (as
this is the zipfile limit).
"""
try:
source_epoch = int(os.environ.get("SOURCE_DATE_EPOCH", time.time()))
return max(315532800, source_epoch)
except ValueError:
return int(time.time())


def _update_recursive_timestamp(path: Path, timestamp: int | None = None) -> None:
"""Update timestamps recursively for all directories and files. If
SOURCE_DATE_EPOCH is set, uses that, otherwise keeps original ones."""

if timestamp is None and "SOURCE_DATE_EPOCH" not in os.environ:
return

if timestamp is None:
timestamp = _get_source_epoch()

# Update directory, subdirectories, and files
os.utime(path, (timestamp, timestamp))
if path.is_dir():
for child in path.iterdir():
_update_recursive_timestamp(child, timestamp)


def _make_whlfile(
*args: Any, owner: int | None = None, group: int | None = None, **kwargs: Any
) -> str:
filetime = _get_source_epoch()
# gtime() ensures UTC
kwargs["date_time"] = time.gmtime(filetime)[:6]
return shutil._make_zipfile(*args, **kwargs) # type: ignore[attr-defined]


Expand Down Expand Up @@ -274,6 +308,23 @@ def ignore(path: str, names: list[str]) -> list[str]:

self.src_dist_dir.mkdir(parents=True, exist_ok=True)

def _reproducible_filter(
tarinfo: tarfile.TarInfo, path: str | Path | None = None
) -> tarfile.TarInfo:
"""Filter that preserves permissions but normalizes ownership and optionally
timestamps. This is similar to the "data" filter but injects SOURCE_DATE_EPOCH."""

# set timestamp from SOURCE_DATE_EPOCH if available
filetime = _get_source_epoch() if "SOURCE_DATE_EPOCH" in os.environ else None

tarinfo.uid = tarinfo.gid = 0
tarinfo.uname = tarinfo.gname = "root"

if filetime is not None:
tarinfo.mtime = filetime

return tarinfo

def _download_and_extract(self) -> None:
"""
Download the source from specified in the package metadata,
Expand Down Expand Up @@ -329,11 +380,12 @@ def _download_and_extract(self) -> None:
# is too large for the chown() call. This behavior can lead to "Permission denied" errors
# (missing x bit) or random strange `make` behavior (due to wrong mtime order) in the CI
# pipeline.
shutil.unpack_archive(
tarballpath,
self.build_dir,
filter=None if tarballpath.suffix == ".zip" else "data",
)
if tarballpath.suffix == ".zip":
shutil.unpack_archive(tarballpath, self.build_dir, filter=None)
else:
shutil.unpack_archive(
tarballpath, self.build_dir, filter=self._reproducible_filter
)

extract_dir_name = self.source_metadata.extract_dir
if extract_dir_name is None:
Expand Down Expand Up @@ -531,7 +583,7 @@ def _package_wheel(
) -> None:
"""Package a wheel

This unpacks the wheel, unvendors tests if necessary, runs and "build.post"
This unpacks the wheel, unvendors tests if necessary, and runs the "build.post"
script, and then repacks the wheel.

Parameters
Expand Down Expand Up @@ -593,7 +645,31 @@ def _package_wheel(
)
if nmoved:
with chdir(self.src_dist_dir):
shutil.make_archive(f"{self.name}-tests", "tar", test_dir)
filetime = _get_source_epoch()
shutil.make_archive(
f"{self.name}-tests",
format="tar",
root_dir="tests",
owner="root",
group="root",
)
if filetime is not None:
with tarfile.open(f"{self.name}-tests.tar", "r") as src:
with tarfile.open(
f"{self.name}-tests.new.tar", "w"
) as dst:
for member in src.getmembers():
member.mtime = filetime
if member.isfile():
dst.addfile(
member, src.extractfile(member)
)
else:
dst.addfile(member)
# replace original with timestamped version
os.replace(
f"{self.name}-tests.new.tar", f"{self.name}-tests.tar"
)
finally:
shutil.rmtree(test_dir, ignore_errors=True)

Expand Down Expand Up @@ -749,14 +825,20 @@ def unvendor_tests(
n_moved = 0
out_files = []
shutil.rmtree(test_install_prefix, ignore_errors=True)

filetime = _get_source_epoch() if "SOURCE_DATE_EPOCH" in os.environ else None

for root, _dirs, files in os.walk(install_prefix):
root_rel = Path(root).relative_to(install_prefix)
if root_rel.name == "__pycache__" or root_rel.name.endswith(".egg_info"):
continue
if root_rel.name in ["test", "tests"]:
# This is a test folder
(test_install_prefix / root_rel).parent.mkdir(exist_ok=True, parents=True)
shutil.move(install_prefix / root_rel, test_install_prefix / root_rel)
target = test_install_prefix / root_rel
target.parent.mkdir(exist_ok=True, parents=True)
shutil.move(install_prefix / root_rel, target)
if filetime is not None:
_update_recursive_timestamp(target, filetime)
n_moved += 1
continue
out_files.append(root)
Expand All @@ -768,11 +850,12 @@ def unvendor_tests(
):
if any(fnmatch.fnmatchcase(fpath, pat) for pat in retain_test_patterns):
continue
(test_install_prefix / root_rel).mkdir(exist_ok=True, parents=True)
shutil.move(
install_prefix / root_rel / fpath,
test_install_prefix / root_rel / fpath,
)
target_dir = test_install_prefix / root_rel
target_dir.mkdir(exist_ok=True, parents=True)
target = target_dir / fpath
shutil.move(install_prefix / root_rel / fpath, target)
if filetime is not None:
os.utime(target, (filetime, filetime))
n_moved += 1

return n_moved
Expand Down
114 changes: 114 additions & 0 deletions pyodide_build/tests/recipe/test_builder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import os
import shutil
import subprocess
import tarfile
import time
from contextlib import contextmanager
from pathlib import Path
from typing import Self

Expand Down Expand Up @@ -351,3 +354,114 @@ def test_extract_tarballname():

for header, tarballname in zip(headers, tarballnames, strict=True):
assert _builder._extract_tarballname(url, header) == tarballname


# Some reproducibility tests. These are not exhaustive, but should catch
# some common issues for basics like timestamps and file contents. They
# test the behavior of the builder functions that are most likely to be
# affected by SOURCE_DATE_EPOCH.


from pyodide_build.recipe.builder import _get_source_epoch


@contextmanager
def source_date_epoch(value=None):
old_value = os.environ.get("SOURCE_DATE_EPOCH")
try:
if value is None:
if "SOURCE_DATE_EPOCH" in os.environ:
del os.environ["SOURCE_DATE_EPOCH"]
else:
os.environ["SOURCE_DATE_EPOCH"] = str(value)
yield
finally:
if old_value is None:
if "SOURCE_DATE_EPOCH" in os.environ:
del os.environ["SOURCE_DATE_EPOCH"]
else:
os.environ["SOURCE_DATE_EPOCH"] = old_value


def test_get_source_epoch_reproducibility():
with source_date_epoch("1735689600"): # 2025-01-01
assert _get_source_epoch() == 1735689600

with source_date_epoch("invalid"):
assert _get_source_epoch() > 0 # should fall back to current time

with source_date_epoch("0"):
assert (
_get_source_epoch() == 315532800
) # should fall back to minimum ZIP timestamp


def test_make_whlfile_reproducibility(monkeypatch, tmp_path):
"""Test that _make_whlfile is passing the correct timestamp to _make_zipfile."""
from pyodide_build.recipe.builder import _make_whlfile

test_epoch = 1735689600 # 2025-01-01

def mock_make_zipfile(
base_name, base_dir, verbose=0, dry_run=0, logger=None, date_time=None
):
assert date_time == time.gmtime(test_epoch)[:6]

monkeypatch.setattr(shutil, "_make_zipfile", mock_make_zipfile)

with source_date_epoch(test_epoch):
_make_whlfile("archive.whl", "base_dir", ["file1.py"], b"content")


def test_set_archive_time_reproducibility(tmp_path):
"""Test that archive creation using _set_time sets correct mtime."""
import tarfile

from pyodide_build.recipe.builder import _get_source_epoch

# Create a test tarfile with a specific timestamp
test_file = tmp_path / "test.txt"
test_file.write_text("test content")
test_epoch = 1735689600 # 2025-01-01

with source_date_epoch(test_epoch):
with tarfile.open(tmp_path / "archive.tar", "w") as tar:
tarinfo = tar.gettarinfo(str(test_file))
tarinfo.mtime = _get_source_epoch()
tar.addfile(tarinfo, open(test_file, "rb"))

# Now, verify this timestamp in the archive
with tarfile.open(tmp_path / "archive.tar") as tar:
info = tar.getmembers()[0]
assert info.mtime == test_epoch


def test_reproducible_tar_filter(monkeypatch, tmp_path):
"""Test that our reproducible_filter function sets the timestamp correctly."""

test_epoch = 1735689600 # 2025-01-01

class MockTarInfo:
def __init__(self, name):
self.name = name
self.uid = 1000
self.gid = 1000
self.uname = None
self.gname = None
self.mtime = int(time.time())

monkeypatch.setattr(tarfile, "TarInfo", MockTarInfo)
monkeypatch.setattr(os.path, "getmtime", lambda *args: test_epoch)

with source_date_epoch(test_epoch):
# Create and check a tarinfo object
tarinfo = tarfile.TarInfo("test.txt")
tarinfo.uid = tarinfo.gid = 0
tarinfo.uname = tarinfo.gname = "root"
tarinfo.mtime = test_epoch

assert tarinfo.mtime == test_epoch
assert tarinfo.uid == 0
assert tarinfo.gid == 0
assert tarinfo.uname == "root"
assert tarinfo.gname == "root"