diff --git a/.github/actions/download.sh b/.github/actions/download.sh index 15ed6fe0aae..c07bb2dfc18 100755 --- a/.github/actions/download.sh +++ b/.github/actions/download.sh @@ -6,15 +6,59 @@ # Functions: # - Download all referenced native and cross source files for packages. # - Download all referenced python wheels needed to build. -# - Use the “download-all” target when a package has multiple (arch-specific) files. +# - Use the "download-all" target when a package has multiple (arch-specific) files. +# - Retry download if checksum fails (cached file may be outdated). set -euo pipefail # Report any error (with line and package context) and exit. -trap 'echo "::error::Error on line ${LINENO} while processing ${current:-}"; exit 1' ERR +trap 'echo "::error::Error on line ${LINENO} while processing ${current:-unknown}"; exit 1' ERR # Ensure required tooling is present. command -v make >/dev/null 2>&1 || { echo "::error::make is not installed"; exit 1; } +# Function to download and verify with retry on checksum failure +download_with_retry() { + local target_dir="$1" + local target_name="$2" + local result=0 + local output="" + + echo " -> ${target_dir}: ${target_name} then checksum" + + # First attempt + set +e + output=$(make -C "${target_dir}" ${target_name} checksum 2>&1) + result=$? + set -e + + # Display output + echo "$output" + + if [ $result -eq 0 ]; then + return 0 + fi + + # Check if checksum failure occurred by looking for .wrong rename in output + # Use escaped dot to match literal .wrong extension + if echo "$output" | grep -q 'Renamed as .*\.wrong'; then + echo " -> Checksum failed due to outdated cached file, retrying download for ${target_dir}..." + + # Retry download and checksum + set +e + make -C "${target_dir}" ${target_name} checksum + result=$? + set -e + + if [ $result -eq 0 ]; then + return 0 + fi + fi + + # Both attempts failed or failure was not due to cached file + echo "::error::Download/checksum failed for ${target_dir}" + return 1 +} + echo "" # 1) Download native / cross-compiled sources. if [ -z "${DOWNLOAD_PACKAGES:-}" ]; then @@ -22,9 +66,7 @@ if [ -z "${DOWNLOAD_PACKAGES:-}" ]; then else echo "===> Downloading packages: ${DOWNLOAD_PACKAGES}" for current in ${DOWNLOAD_PACKAGES}; do - echo " → ${current}: download-all then checksum" - # download-all pulls down all sources; checksum verifies them. - make -C "${current}" download-all checksum + download_with_retry "${current}" "download-all" done fi @@ -37,8 +79,8 @@ else echo "===> Downloading wheels: ${build_pkgs[*]}" for pkg in "${build_pkgs[@]}"; do current="spk/${pkg}" - echo " → ${current}: download-wheels" - # download-wheels grabs all needed .whl files. + echo " -> ${current}: download-wheels" + # Wheels don't have checksum verification, so no retry needed make -C "${current}" download-wheels done fi diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6bbb6186095..0683d606a31 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -71,11 +71,11 @@ jobs: prepare: name: Prepare for Build runs-on: ubuntu-latest - # provide results to other jobs outputs: arch_packages: ${{ steps.dependencies.outputs.arch_packages }} noarch_packages: ${{ steps.dependencies.outputs.noarch_packages }} has_min_dsm72_packages: ${{ steps.dependencies.outputs.has_min_dsm72_packages }} + distrib_hash: ${{ steps.distrib-hash.outputs.hash }} steps: - name: Checkout repository uses: actions/checkout@v5 @@ -109,19 +109,62 @@ jobs: GH_DEPENDENCY_FOLDERS: ${{ steps.getchanges_push.outputs.dependency_folders }} ${{ steps.getchanges_pr.outputs.dependency_folders }} USER_SPK_TO_BUILD: ${{ github.event.inputs.package }} - - name: Cache downloaded files - uses: actions/cache@v4 + - name: Restore distrib cache + id: cache-restore + uses: actions/cache/restore@v4 with: path: distrib - # use run_id in key to cache within workflow only. - key: distrib-${{ github.run_id }} + # Use a placeholder key that will never match for the exact key, + # forcing GitHub to use restore-keys to find the best available cache + key: distrib-placeholder-${{ github.run_id }} + restore-keys: | + distrib- - name: Download source files run: ./.github/actions/download.sh env: DOWNLOAD_PACKAGES: ${{ steps.dependencies.outputs.download_packages }} - ARCH_PACKAGES: ${{ needs.prepare.outputs.arch_packages }} - NOARCH_PACKAGES: ${{ needs.prepare.outputs.noarch_packages }} + ARCH_PACKAGES: ${{ steps.dependencies.outputs.arch_packages }} + NOARCH_PACKAGES: ${{ steps.dependencies.outputs.noarch_packages }} + + - name: Compute distrib hash + id: distrib-hash + run: | + # Compute a hash based on file paths and sizes (not content) for performance. + # This is fast even with thousands of files since we don't read file contents. + # Including file size ensures that if a corrupted file (.wrong) is replaced + # by a fresh download with different size, the hash will change. + # Format: "path size" per line, sorted for reproducibility, then hashed. + if [ -d "distrib" ] && [ -n "$(ls -A distrib 2>/dev/null)" ]; then + HASH=$(find distrib -type f -printf '%p %s\n' | sort | sha256sum | cut -d' ' -f1) + else + HASH="empty" + fi + echo "hash=$HASH" >> $GITHUB_OUTPUT + echo "Distrib cache hash: $HASH" + + - name: Check if cache already exists + id: cache-check + run: | + # Check if a cache with this exact hash already exists to avoid duplication. + # We search for the exact key pattern to see if this content was already cached. + CACHE_KEY="distrib-${{ steps.distrib-hash.outputs.hash }}" + if gh cache list --json key | jq -e --arg key "$CACHE_KEY" '.[] | select(.key == $key)' > /dev/null 2>&1; then + echo "Cache with key $CACHE_KEY already exists, skipping save" + echo "save_cache=false" >> $GITHUB_OUTPUT + else + echo "New cache content detected, will save with key: $CACHE_KEY" + echo "save_cache=true" >> $GITHUB_OUTPUT + fi + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Save distrib cache + if: steps.cache-check.outputs.save_cache == 'true' + uses: actions/cache/save@v4 + with: + path: distrib + key: distrib-${{ steps.distrib-hash.outputs.hash }} set-defaults: name: Set Defaults @@ -256,11 +299,15 @@ jobs: path: toolchain/*/work key: toolchain-${{ matrix.arch }}-v3-${{ hashFiles(format('toolchain/syno-{0}/digests',matrix.arch)) }} - - name: Use cache of downloaded files - uses: actions/cache@v4 + - name: Restore distrib cache + uses: actions/cache/restore@v4 with: path: distrib - key: distrib-${{ github.run_id }} + # Use a placeholder key that will never match for the exact key, + # forcing GitHub to use restore-keys to find the best available cache + key: distrib-placeholder-${{ github.run_id }} + restore-keys: | + distrib- - name: Build Package (based on changed files) # We don't want to stop the build on errors. diff --git a/.github/workflows/distrib-cache-maintenance.yml b/.github/workflows/distrib-cache-maintenance.yml new file mode 100644 index 00000000000..5beac46695a --- /dev/null +++ b/.github/workflows/distrib-cache-maintenance.yml @@ -0,0 +1,236 @@ +name: Distrib Cache Maintenance + +on: + # Regular refresh to prevent GitHub's 7-day eviction policy + schedule: + - cron: '0 4 * * 1,4' # Monday and Thursday at 4 AM + workflow_dispatch: + inputs: + max_age_days: + description: 'Maximum age of files to keep (days)' + required: false + default: '180' + type: string + dry_run: + description: 'Dry run mode (no deletions)' + required: false + default: false + type: boolean + +jobs: + maintain-distrib-cache: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v5 + + - name: Restore distrib cache + id: cache-restore + uses: actions/cache/restore@v4 + with: + path: distrib + # Use a placeholder key that will never match for the exact key, + # forcing GitHub to use restore-keys to find the best available cache + key: distrib-placeholder-${{ github.run_id }} + restore-keys: | + distrib- + + - name: Display cache status + run: | + if [ "${{ steps.cache-restore.outputs.cache-hit }}" = "true" ]; then + echo "Cache restored successfully" + else + echo "No cache found or partial restore" + fi + + echo "" + echo "Current distrib cache status:" + if [ -d "distrib" ]; then + FILE_COUNT=$(find distrib -type f | wc -l) + TOTAL_SIZE=$(du -sh distrib 2>/dev/null | cut -f1 || echo "0") + echo " Files: $FILE_COUNT" + echo " Size: $TOTAL_SIZE" + else + echo " (empty)" + mkdir -p distrib + fi + + - name: Clean old files from distrib + env: + MAX_AGE_DAYS: ${{ github.event.inputs.max_age_days || '180' }} + DRY_RUN: ${{ github.event.inputs.dry_run || 'false' }} + run: | + echo "Cleaning files older than $MAX_AGE_DAYS days" + echo "" + + if [ ! -d "distrib" ] || [ -z "$(ls -A distrib 2>/dev/null)" ]; then + echo "Distrib cache is empty, nothing to clean" + exit 0 + fi + + # Remove corrupted/invalid files marked as .wrong by checksum verification + WRONG_FILES=$(find distrib -type f -name "*.wrong" 2>/dev/null || true) + if [ -n "$WRONG_FILES" ]; then + WRONG_COUNT=$(echo "$WRONG_FILES" | wc -l) + echo "Found $WRONG_COUNT corrupted file(s) marked as .wrong" + if [ "$DRY_RUN" = "true" ]; then + echo "[DRY RUN] Would delete $WRONG_COUNT .wrong file(s)" + echo "Sample files (first 5):" + echo "$WRONG_FILES" | head -5 + else + echo "$WRONG_FILES" | xargs rm -f + echo "Deleted $WRONG_COUNT .wrong file(s)" + fi + echo "" + fi + + # Find files not modified for more than MAX_AGE_DAYS + OLD_FILES=$(find distrib -type f -mtime +${MAX_AGE_DAYS} 2>/dev/null || true) + + if [ -z "$OLD_FILES" ]; then + echo "No obsolete files found" + exit 0 + fi + + OLD_COUNT=$(echo "$OLD_FILES" | wc -l) + OLD_SIZE=$(echo "$OLD_FILES" | xargs du -ch 2>/dev/null | tail -1 | cut -f1 || echo "0") + + echo "Obsolete files found: $OLD_COUNT ($OLD_SIZE)" + echo "" + + if [ "$DRY_RUN" = "true" ]; then + echo "[DRY RUN] Files that would be deleted:" + echo "Sample files (first 10):" + echo "$OLD_FILES" | head -10 + if [ "$OLD_COUNT" -gt 10 ]; then + echo " ... and $((OLD_COUNT - 10)) more" + fi + else + echo "Deleting files..." + echo "$OLD_FILES" | xargs rm -f + + # Clean empty directories + find distrib -type d -empty -delete 2>/dev/null || true + + echo "$OLD_COUNT files deleted ($OLD_SIZE freed)" + fi + + - name: Report final status + run: | + echo "" + echo "Final distrib cache status:" + if [ -d "distrib" ]; then + FILE_COUNT=$(find distrib -type f | wc -l) + TOTAL_SIZE=$(du -sh distrib 2>/dev/null | cut -f1 || echo "0") + TOTAL_BYTES=$(du -sb distrib 2>/dev/null | cut -f1 || echo "0") + echo " Files: $FILE_COUNT" + echo " Size: $TOTAL_SIZE" + + echo "" + echo "Distribution by file type:" + find distrib -type f -name "*.tar.gz" | wc -l | xargs -I{} echo " .tar.gz: {}" + find distrib -type f -name "*.tar.xz" | wc -l | xargs -I{} echo " .tar.xz: {}" + find distrib -type f -name "*.tar.bz2" | wc -l | xargs -I{} echo " .tar.bz2: {}" + find distrib -type f -name "*.zip" | wc -l | xargs -I{} echo " .zip: {}" + find distrib -type f -name "*.whl" | wc -l | xargs -I{} echo " .whl: {}" + + echo "" + echo "File age distribution:" + find distrib -type f -mtime -7 | wc -l | xargs -I{} echo " < 7 days: {}" + find distrib -type f -mtime +7 -mtime -30 | wc -l | xargs -I{} echo " 7-30 days: {}" + find distrib -type f -mtime +30 -mtime -90 | wc -l | xargs -I{} echo " 30-90 days: {}" + find distrib -type f -mtime +90 -mtime -180 | wc -l | xargs -I{} echo " 90-180 days: {}" + find distrib -type f -mtime +180 | wc -l | xargs -I{} echo " > 180 days: {}" + + # Alert if cache size exceeds 2GB (considering other active caches in the repository) + echo "" + LIMIT_BYTES=2147483648 + if [ "$TOTAL_BYTES" -gt "$LIMIT_BYTES" ]; then + echo "WARNING: Distrib cache size exceeds 2GB. Consider reducing max_age_days parameter." + else + echo "Cache size is within acceptable limits." + fi + else + echo " (empty)" + fi + + - name: Compute distrib hash + id: distrib-hash + run: | + # Compute a hash based on file paths and sizes (not content) for performance. + # This is fast even with thousands of files since we don't read file contents. + # Format: "path size" per line, sorted for reproducibility, then hashed. + if [ -d "distrib" ] && [ -n "$(ls -A distrib 2>/dev/null)" ]; then + HASH=$(find distrib -type f -printf '%p %s\n' | sort | sha256sum | cut -d' ' -f1) + else + HASH="empty" + fi + echo "hash=$HASH" >> $GITHUB_OUTPUT + echo "Distrib cache hash: $HASH" + + - name: Check if cache already exists + id: cache-check + run: | + # Check if a cache with this exact hash already exists to avoid duplication + CACHE_KEY="distrib-${{ steps.distrib-hash.outputs.hash }}" + if gh cache list --json key | jq -e --arg key "$CACHE_KEY" '.[] | select(.key == $key)' > /dev/null 2>&1; then + echo "Cache with key $CACHE_KEY already exists, skipping save" + echo "save_cache=false" >> $GITHUB_OUTPUT + else + echo "New cache content detected, will save with key: $CACHE_KEY" + echo "save_cache=true" >> $GITHUB_OUTPUT + fi + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Save refreshed cache + if: steps.cache-check.outputs.save_cache == 'true' + uses: actions/cache/save@v4 + with: + path: distrib + key: distrib-${{ steps.distrib-hash.outputs.hash }} + + cleanup-stale-caches: + runs-on: ubuntu-latest + needs: maintain-distrib-cache + permissions: + actions: write + steps: + - name: Cleanup old GitHub cache entries + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + echo "Cleaning up old GitHub cache entries" + + # List all distrib caches with available fields + # Note: Only targets caches with keys starting with "distrib-" + # Other caches (e.g., toolchain-*) are not affected + ALL_CACHES=$(gh cache list --key "distrib-" --limit 100 --json key 2>/dev/null || true) + + if [ -z "$ALL_CACHES" ] || [ "$ALL_CACHES" = "[]" ]; then + echo "No caches found" + exit 0 + fi + + # Count total caches + CACHE_COUNT=$(echo "$ALL_CACHES" | jq 'length') + echo "Found $CACHE_COUNT distrib caches" + + # Keep 10 most recent, delete the remaining + # Keeping more during transition period to accommodate both old and new cache formats. + if [ "$CACHE_COUNT" -le 10 ]; then + echo "10 or fewer caches exist, nothing to delete based on count" + else + CACHES_TO_DELETE=$(echo "$ALL_CACHES" | jq -r '.[10:] | .[].key' 2>/dev/null || true) + + if [ -n "$CACHES_TO_DELETE" ]; then + echo "Deleting old caches (keeping 10 most recent):" + for key in $CACHES_TO_DELETE; do + echo " Deleting: $key" + gh cache delete "$key" --confirm 2>/dev/null || true + done + fi + fi + + echo "" + echo "Cleanup completed" diff --git a/mk/spksrc.download.mk b/mk/spksrc.download.mk index 791b9239a8b..172bbf575fb 100644 --- a/mk/spksrc.download.mk +++ b/mk/spksrc.download.mk @@ -163,9 +163,10 @@ download_target: $(PRE_DOWNLOAD_TARGET) $(MSG) " File $${localFile} already downloaded" ; \ else \ rm -f $${localFile}.part ; \ - $(MSG) " wget --secure-protocol=TLSv1_2 --timeout=30 --tries=3 --waitretry=15 --retry-connrefused --max-redirect=20 --content-disposition -nv -O $${localFile} -nc $${url}" ; \ + $(MSG) " wget --secure-protocol=TLSv1_2 --timeout=30 --tries=3 --waitretry=15 --retry-connrefused --max-redirect=20 --content-disposition --retry-on-http-error=429,500,502,503,504 -nv -O $${localFile} -nc $${url}" ; \ wget --secure-protocol=TLSv1_2 --timeout=30 --tries=3 --waitretry=15 \ --retry-connrefused --max-redirect=20 --content-disposition \ + --retry-on-http-error=429,500,502,503,504 \ -nv -O $${localFile}.part -nc $${url} ; \ mv $${localFile}.part $${localFile} ; \ fi ; \ diff --git a/mk/spksrc.wheel.mk b/mk/spksrc.wheel.mk index 3202dc42755..54c9e057cf2 100644 --- a/mk/spksrc.wheel.mk +++ b/mk/spksrc.wheel.mk @@ -43,7 +43,7 @@ include ../../mk/spksrc.wheel-install.mk ## -ifneq ($(and $(WHEEL_NAME),$(or (WHEEL_VERISON),$(WHEEL_URL))),) +ifneq ($(and $(WHEEL_NAME),$(or $(WHEEL_VERSION),$(WHEEL_URL))),) download-wheels: wheel_download wheel: wheel_install else