From 0556d85427a6dc237b3a67bd1e868769722e9305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 20 Oct 2025 12:31:48 +0200 Subject: [PATCH 01/11] GH-47798: [CI][Packaging] Enable reproducible builds for Linux packages --- dev/tasks/linux-packages/.gitignore | 1 + .../apache-arrow/apt/debian-trixie/Dockerfile | 2 ++ dev/tasks/linux-packages/apt/build.sh | 18 ++++++++++++++++++ dev/tasks/linux-packages/apt/reprotest.patch | 13 +++++++++++++ dev/tasks/linux-packages/package-task.rb | 18 ++++++++++++++++++ 5 files changed, 52 insertions(+) create mode 100644 dev/tasks/linux-packages/apt/reprotest.patch diff --git a/dev/tasks/linux-packages/.gitignore b/dev/tasks/linux-packages/.gitignore index 138662a4ef4..39fa63bf8a0 100644 --- a/dev/tasks/linux-packages/.gitignore +++ b/dev/tasks/linux-packages/.gitignore @@ -19,6 +19,7 @@ /*/apt/build.sh /*/apt/build/ /*/apt/env.sh +/*/apt/reprotest.sh /*/apt/repositories/ /*/apt/tmp/ /*/yum/build.sh diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile index 257d0056566..ad76820deda 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile @@ -43,6 +43,7 @@ RUN \ cmake \ debhelper \ devscripts \ + faketime \ gi-docgen \ git \ libboost-filesystem-dev \ @@ -77,6 +78,7 @@ RUN \ protobuf-compiler-grpc \ python3-dev \ python3-pip \ + python3-venv \ rapidjson-dev \ tzdata \ valac \ diff --git a/dev/tasks/linux-packages/apt/build.sh b/dev/tasks/linux-packages/apt/build.sh index bc4c61e6221..df33457787c 100755 --- a/dev/tasks/linux-packages/apt/build.sh +++ b/dev/tasks/linux-packages/apt/build.sh @@ -80,6 +80,21 @@ case "${VERSION}" in ${PACKAGE}-${VERSION} ;; esac + +if [ "${REPROTEST:-no}" = "yes" ]; then + # Prepare the reprotest environment by copying reportest.sh and + # adding execution permissions. + run cp /host/reprotest.sh . + run chmod +x reprotest.sh + # Pin reprotest to 0.7.30 in order to apply patch that removes unconditionall call + # to setarch. See: https://salsa.debian.org/reproducible-builds/reprotest/-/issues/15 + run python3 -m venv /tmp/venv + run . /tmp/venv/bin/activate + run pip install reprotest==0.7.30 + # TODO: Programatically find path to apply patch instead of hardcoding. + patch -p1 -f /tmp/venv/lib/python3.13/site-packages/reprotest/build.py -i ${SOURCE_DIR}/reprotest.patch +fi + run cd ${PACKAGE}-${VERSION}/ platform="${distribution}-${code_name}" if [ -d "/host/tmp/debian.${platform}-${architecture}" ]; then @@ -93,6 +108,9 @@ fi # DEB_BUILD_OPTIONS="${DEB_BUILD_OPTIONS} noopt" export DEB_BUILD_OPTIONS df -h +if [ "${REPROTEST:-no}" = "yes" ]; then + run reprotest --verbosity 2 --vary=-kernel,-fileordering,-domain_host,-build_path -s .. ./reprotest.sh **.deb +fi if [ "${DEBUG:-no}" = "yes" ]; then run debuild "${debuild_options[@]}" "${dpkg_buildpackage_options[@]}" else diff --git a/dev/tasks/linux-packages/apt/reprotest.patch b/dev/tasks/linux-packages/apt/reprotest.patch new file mode 100644 index 00000000000..422e75216a6 --- /dev/null +++ b/dev/tasks/linux-packages/apt/reprotest.patch @@ -0,0 +1,13 @@ +diff --git a/reprotest/build.py b/reprotest/build.py +index 52eff03..fa5140c 100644 +--- a/reprotest/build.py ++++ b/reprotest/build.py +@@ -302,7 +302,7 @@ def home(ctx, build, vary): + def kernel(ctx, build, vary): + _ = build + if not vary: +- _ = _.append_setup_exec_raw('SETARCH_ARCH=$(uname -m)') ++ return _ + else: + _ = _.append_setup_exec_raw('SETARCH_ARCH=$(for a in $(setarch --list); do setarch $a true && echo $a || true; done)') + # Perform realistic shuffling of architectures depending diff --git a/dev/tasks/linux-packages/package-task.rb b/dev/tasks/linux-packages/package-task.rb index 4096c89463e..5cfae80209f 100644 --- a/dev/tasks/linux-packages/package-task.rb +++ b/dev/tasks/linux-packages/package-task.rb @@ -150,6 +150,7 @@ def docker_run(os, architecture, console: false) end pass_through_env_names = [ "DEB_BUILD_OPTIONS", + "REPROTEST", "RPM_BUILD_NCPUS", ] pass_through_env_names.each do |name| @@ -343,6 +344,15 @@ def apt_build(console: false) ENV end + reprotest_sh = "#{apt_dir}/reprotest.sh" + rm_rf(reprotest_sh) + File.open(reprotest_sh, "wx") do |file| + file.puts(<<-BASH) +cd #{@package}-#{@deb_upstream_version} +debuild -us -uc + BASH + end + apt_targets.each do |target| cd(apt_dir) do distribution, version, architecture = split_target(target) @@ -362,6 +372,13 @@ def define_apt_task cp(source_build_sh, build_sh) end + source_reprotest_patch = "#{__dir__}/apt/reprotest.patch" + reprotest_patch = "#{apt_dir}/reprotest.patch" + file reprotest_patch => source_reprotest_patch do + cp(source_reprotest_patch, reprotest_patch) + end + + directory repositories_dir desc "Build deb packages" @@ -369,6 +386,7 @@ def define_apt_task build_dependencies = [ deb_archive_name, build_sh, + reprotest_patch, repositories_dir, ] else From 68a200da957c4910c703fe66c65ab7e73360323f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 20 Oct 2025 12:38:54 +0200 Subject: [PATCH 02/11] Set reproducible environment variable only on debian-trixie-amd64 temporarily and skip rat check on patch --- .github/workflows/package_linux.yml | 4 ++++ dev/release/rat_exclude_files.txt | 1 + .../linux-packages/apache-arrow/apt/reprotest.patch | 13 +++++++++++++ 3 files changed, 18 insertions(+) create mode 100644 dev/tasks/linux-packages/apache-arrow/apt/reprotest.patch diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index ba863894283..0ce78261892 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -227,6 +227,10 @@ jobs: dev/release/utils-watch-gh-workflow.sh \ ${GITHUB_REF_NAME} \ release_candidate.yml + - name: Set Reproducible Build Environment + if: ${{ matrix.id }} == 'debian-trixie-amd64' + run: | + echo "REPRODUCIBLE=yes" >> "${GITHUB_ENV}" - name: Build run: | pushd dev/tasks/linux-packages diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 5212dc47e0b..abe3c029d27 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -54,6 +54,7 @@ dev/tasks/linux-packages/apache-arrow/debian/rules dev/tasks/linux-packages/apache-arrow/debian/shlibs.local dev/tasks/linux-packages/apache-arrow/debian/source/format dev/tasks/linux-packages/apache-arrow/debian/watch +dev/tasks/linux-packages/apt/reprotest.patch dev/tasks/requirements*.txt dev/tasks/conda-recipes/* docs/requirements.txt diff --git a/dev/tasks/linux-packages/apache-arrow/apt/reprotest.patch b/dev/tasks/linux-packages/apache-arrow/apt/reprotest.patch new file mode 100644 index 00000000000..422e75216a6 --- /dev/null +++ b/dev/tasks/linux-packages/apache-arrow/apt/reprotest.patch @@ -0,0 +1,13 @@ +diff --git a/reprotest/build.py b/reprotest/build.py +index 52eff03..fa5140c 100644 +--- a/reprotest/build.py ++++ b/reprotest/build.py +@@ -302,7 +302,7 @@ def home(ctx, build, vary): + def kernel(ctx, build, vary): + _ = build + if not vary: +- _ = _.append_setup_exec_raw('SETARCH_ARCH=$(uname -m)') ++ return _ + else: + _ = _.append_setup_exec_raw('SETARCH_ARCH=$(for a in $(setarch --list); do setarch $a true && echo $a || true; done)') + # Perform realistic shuffling of architectures depending From 56e744fec51a6bcd23a08442940fceda6be23ebe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 20 Oct 2025 12:43:45 +0200 Subject: [PATCH 03/11] Remove file pushed unintentionally --- .../linux-packages/apache-arrow/apt/reprotest.patch | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 dev/tasks/linux-packages/apache-arrow/apt/reprotest.patch diff --git a/dev/tasks/linux-packages/apache-arrow/apt/reprotest.patch b/dev/tasks/linux-packages/apache-arrow/apt/reprotest.patch deleted file mode 100644 index 422e75216a6..00000000000 --- a/dev/tasks/linux-packages/apache-arrow/apt/reprotest.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/reprotest/build.py b/reprotest/build.py -index 52eff03..fa5140c 100644 ---- a/reprotest/build.py -+++ b/reprotest/build.py -@@ -302,7 +302,7 @@ def home(ctx, build, vary): - def kernel(ctx, build, vary): - _ = build - if not vary: -- _ = _.append_setup_exec_raw('SETARCH_ARCH=$(uname -m)') -+ return _ - else: - _ = _.append_setup_exec_raw('SETARCH_ARCH=$(for a in $(setarch --list); do setarch $a true && echo $a || true; done)') - # Perform realistic shuffling of architectures depending From 2c4d4f2dcf8563bce28d4566f542684492987da1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 20 Oct 2025 12:44:11 +0200 Subject: [PATCH 04/11] Temporarily remove some linux packages --- .github/workflows/package_linux.yml | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index 0ce78261892..ce182095198 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -84,27 +84,8 @@ jobs: fail-fast: false matrix: id: - - almalinux-8-amd64 - - almalinux-8-arm64 - - almalinux-9-amd64 - - almalinux-9-arm64 - - almalinux-10-amd64 - - almalinux-10-arm64 - - amazon-linux-2023-amd64 - - amazon-linux-2023-arm64 - - centos-9-stream-amd64 - - centos-9-stream-arm64 - - centos-7-amd64 - - debian-bookworm-amd64 - - debian-bookworm-arm64 - debian-trixie-amd64 - debian-trixie-arm64 - - debian-forky-amd64 - - debian-forky-arm64 - - ubuntu-jammy-amd64 - - ubuntu-jammy-arm64 - - ubuntu-noble-amd64 - - ubuntu-noble-arm64 env: DOCKER_VOLUME_PREFIX: ".docker/" steps: From 1b7d1f67b875280ec21a1c19a6fa1ef660286884 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 20 Oct 2025 12:45:29 +0200 Subject: [PATCH 05/11] Add apt/reprotest.patch to .gitignore --- dev/tasks/linux-packages/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/tasks/linux-packages/.gitignore b/dev/tasks/linux-packages/.gitignore index 39fa63bf8a0..6de4ae73bdf 100644 --- a/dev/tasks/linux-packages/.gitignore +++ b/dev/tasks/linux-packages/.gitignore @@ -19,6 +19,7 @@ /*/apt/build.sh /*/apt/build/ /*/apt/env.sh +/*/apt/reprotest.patch /*/apt/reprotest.sh /*/apt/repositories/ /*/apt/tmp/ From a320b028c95dca9c7bed9af4d4225cfdf099cb45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 20 Oct 2025 12:53:17 +0200 Subject: [PATCH 06/11] Maybe using the correct environment variable name would do something :) --- .github/workflows/package_linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index ce182095198..ee3af573e62 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -211,7 +211,7 @@ jobs: - name: Set Reproducible Build Environment if: ${{ matrix.id }} == 'debian-trixie-amd64' run: | - echo "REPRODUCIBLE=yes" >> "${GITHUB_ENV}" + echo "REPROTEST=yes" >> "${GITHUB_ENV}" - name: Build run: | pushd dev/tasks/linux-packages From af3b31e70942030d5fca0b9cd67bf5e0d6a6ea06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 20 Oct 2025 13:05:12 +0200 Subject: [PATCH 07/11] Commit missing SOURCE_DIR --- dev/tasks/linux-packages/apt/build.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev/tasks/linux-packages/apt/build.sh b/dev/tasks/linux-packages/apt/build.sh index df33457787c..661cc75bcb2 100755 --- a/dev/tasks/linux-packages/apt/build.sh +++ b/dev/tasks/linux-packages/apt/build.sh @@ -22,6 +22,8 @@ LANG=C set -u +SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + run() { "$@" From 2bfdbdcd8f827ccd5642f31cee6aece215a6a0fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 20 Oct 2025 14:25:48 +0200 Subject: [PATCH 08/11] Double timeout as we have to build twice now and check architecture for reproducible builds --- .github/workflows/package_linux.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index ee3af573e62..5f9accf410e 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -79,7 +79,7 @@ jobs: needs.check-labels.outputs.force == 'true' || contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra') || contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra: Package: Linux') - timeout-minutes: 75 + timeout-minutes: 150 strategy: fail-fast: false matrix: @@ -209,7 +209,7 @@ jobs: ${GITHUB_REF_NAME} \ release_candidate.yml - name: Set Reproducible Build Environment - if: ${{ matrix.id }} == 'debian-trixie-amd64' + if: env.ARCHITECTURE == 'amd64' run: | echo "REPROTEST=yes" >> "${GITHUB_ENV}" - name: Build From 6c06f3ce1c5956adb8d2753a90479c21a5babd40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 20 Oct 2025 17:50:26 +0200 Subject: [PATCH 09/11] Add Python3-venv to apache-arrow-apt-source/apt/debian-trixie/Dockerfile even though it is wrong. Just testing purposes --- .github/workflows/package_linux.yml | 2 +- .../apache-arrow-apt-source/apt/debian-trixie/Dockerfile | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/package_linux.yml b/.github/workflows/package_linux.yml index 5f9accf410e..9c661d451dc 100644 --- a/.github/workflows/package_linux.yml +++ b/.github/workflows/package_linux.yml @@ -79,7 +79,7 @@ jobs: needs.check-labels.outputs.force == 'true' || contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra') || contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra: Package: Linux') - timeout-minutes: 150 + timeout-minutes: 240 strategy: fail-fast: false matrix: diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-trixie/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-trixie/Dockerfile index 1cab2169f4a..ff8ab73cd19 100644 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-trixie/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-trixie/Dockerfile @@ -37,5 +37,6 @@ RUN \ devscripts \ fakeroot \ gnupg \ - lsb-release && \ + lsb-release \ + python3-venv && \ apt clean From e555e28c045684640c5a491f40fa624b0a7ab67b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 21 Oct 2025 08:35:33 +0200 Subject: [PATCH 10/11] Add missing faketime to Dockerfile --- .../apache-arrow-apt-source/apt/debian-trixie/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-trixie/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-trixie/Dockerfile index ff8ab73cd19..5c1df172560 100644 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-trixie/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-trixie/Dockerfile @@ -36,6 +36,7 @@ RUN \ debhelper \ devscripts \ fakeroot \ + faketime \ gnupg \ lsb-release \ python3-venv && \ From 268b41cf9bdcbc3b14065d6bca0a9d19b26dec14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 21 Oct 2025 15:21:12 +0200 Subject: [PATCH 11/11] Fix gpg homedir so we don't get differences with reproducible builds when HOME is modified --- dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules index 1e3be48c315..2a3c14c558f 100755 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules +++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/rules @@ -12,10 +12,12 @@ export DH_OPTIONS override_dh_auto_build: gpg \ --no-default-keyring \ + --homedir /tmp \ --keyring ./apache-arrow-apt-source.kbx \ --import KEYS gpg \ --no-default-keyring \ + --homedir /tmp \ --keyring ./apache-arrow-apt-source.kbx \ --armor \ --export > apache-arrow-apt-source.asc