diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9a639204c..fbd313a60 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -194,31 +194,43 @@ jobs: host-os: ubuntu-latest # Don't abort runners if a single one fails fail-fast: false - runs-on: ${{matrix.host-os}} + runs-on: ${{matrix.arch.host-os}} name: ${{matrix.build-type}} cross-build for ${{ matrix.arch.triple }} steps: - uses: actions/checkout@v2 - - name: Install cross-compile toolchain and QEMU + - name: "Install cross-compile toolchain and QEMU (ubuntu-20.04)" # Install the dependencies and clang 13. Earlier versions of clang don't # find the multilib things for this week's Ubuntu filesystem layout. + if: matrix.arch.host-os == 'ubuntu-20.04' run: | wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - sudo add-apt-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-13 main" sudo apt update sudo apt install libstdc++-9-dev-${{ matrix.arch.name }}-cross qemu-user ninja-build clang-13 lld-13 sudo apt install ${{matrix.arch.extra-packages}} - # The default PowerPC qemu configuration uses the wrong page size. - # Wrap it in a script that fixes this. + - name: "Install cross-compile toolchain and QEMU (ubuntu-latest)" + if: matrix.arch.host-os == 'ubuntu-latest' + run: | + sudo apt update + sudo apt install libstdc++-12-dev-${{ matrix.arch.name }}-cross qemu-user ninja-build + sudo apt install ${{matrix.arch.extra-packages}} + - name: Reconfigure for PowerPC64LE + if: startsWith(matrix.arch.triple, 'powerpc64le') + # The default PowerPC qemu configuration uses the wrong page size. + # Wrap it in a script that fixes this. + run: | sudo update-binfmts --disable qemu-ppc64le sudo sh -c 'echo ":qemu-ppc64le:M:0:\x7f\x45\x4c\x46\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15\x00:\xff\xff\xff\xff\xff\xff\xff\xfc\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\x00:`pwd`/ppc64.sh:" > /proc/sys/fs/binfmt_misc/register' echo '#!/bin/sh' > ppc64.sh echo '/usr/bin/qemu-ppc64le -p 65536 $@' >> ppc64.sh chmod +x ppc64.sh - name: Configure + env: + SNMALLOC_CI_CLANG_VERSION: ${{ (matrix.arch.host-os == 'ubuntu-latest') && 14 || 13 }} + RTLD_NAME: ${{ matrix.arch.rtld }} + ARCH: ${{ matrix.arch.system-processor }} + TRIPLE: ${{ matrix.arch.triple}} run: > - RTLD_NAME=${{ matrix.arch.rtld }} - ARCH=${{ matrix.arch.system-processor }} - TRIPLE=${{ matrix.arch.triple}} cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{matrix.build-type}} @@ -232,12 +244,16 @@ jobs: - name: Build working-directory: ${{github.workspace}}/build run: NINJA_STATUS="%p [%f:%s/%t] %o/s, %es" ninja + # For debugging: verify that we've actually cross-compiled. + - name: Run `file` for inspection + working-directory: ${{github.workspace}}/build + run: file func* # Run the tests, skipping the -malloc and perf- tests (perf doesn't make # sense in an emulator and the pass-through malloc is slightly flaky in # QEMU) - name: Test working-directory: ${{github.workspace}}/build - run: ctest --output-on-failure -E '(perf-.*)|(.*-malloc$)' --timeout 400 + run: ctest -j 2 --output-on-failure -E '(perf-.*)|(.*-malloc$)' --timeout 400 timeout-minutes: 30 windows: diff --git a/ci/Toolchain.cmake b/ci/Toolchain.cmake index 2b5613c47..5c21f9b9e 100644 --- a/ci/Toolchain.cmake +++ b/ci/Toolchain.cmake @@ -3,9 +3,9 @@ set(CMAKE_SYSTEM_PROCESSOR $ENV{ARCH}) set(triple $ENV{TRIPLE}) -set(CMAKE_C_COMPILER clang-13) +set(CMAKE_C_COMPILER clang-$ENV{SNMALLOC_CI_CLANG_VERSION}) set(CMAKE_C_COMPILER_TARGET ${triple}) -set(CMAKE_CXX_COMPILER clang++-13) +set(CMAKE_CXX_COMPILER clang++-$ENV{SNMALLOC_CI_CLANG_VERSION}) set(CMAKE_CXX_COMPILER_TARGET ${triple}) set(CROSS_LINKER_FLAGS "-fuse-ld=${SNMALLOC_LINKER_FLAVOUR} -Wl,--dynamic-linker=/usr/${triple}/lib/$ENV{RTLD_NAME},-rpath,/usr/${triple}/lib") diff --git a/src/snmalloc/ds/allocconfig.h b/src/snmalloc/ds/allocconfig.h index 51d5b415f..1089ccf7a 100644 --- a/src/snmalloc/ds/allocconfig.h +++ b/src/snmalloc/ds/allocconfig.h @@ -26,7 +26,26 @@ namespace snmalloc static constexpr size_t MIN_ALLOC_BITS = bits::ctz_const(MIN_ALLOC_SIZE); // Minimum slab size. +#if defined(SNMALLOC_QEMU_WORKAROUND) && defined(SNMALLOC_VA_BITS_64) + /* + * QEMU user-mode, up through and including v7.2.0-rc4, the latest tag at the + * time of this writing, does not use a tree of any sort to store its opinion + * of the address space, allocating an amount of memory linear in the size of + * any created map, not the number of pages actually used. This is + * exacerbated in and after qemu v6 (or, more specifically, d9c58585), which + * grew the proportionality constant. + * + * In any case, for our CI jobs, then, use a larger minimum chunk size (that + * is, pagemap granularity) than by default to reduce the size of the + * pagemap. We can't raise this *too* much, lest we hit constexpr step + * limits in the sizeclasstable magic! 17 bits seems to be the sweet spot + * and means that any of our tests can run in a little under 2 GiB of RSS + * even on QEMU versions after v6. + */ + static constexpr size_t MIN_CHUNK_BITS = static_cast(17); +#else static constexpr size_t MIN_CHUNK_BITS = static_cast(14); +#endif static constexpr size_t MIN_CHUNK_SIZE = bits::one_at_bit(MIN_CHUNK_BITS); // Minimum number of objects on a slab @@ -37,7 +56,18 @@ namespace snmalloc #endif // Maximum size of an object that uses sizeclasses. +#if defined(SNMALLOC_QEMU_WORKAROUND) && defined(SNMALLOC_VA_BITS_64) + /* + * As a consequence of our significantly larger minimum chunk size, we need + * to raise the threshold for what constitutes a large object (which must + * be a multiple of the minimum chunk size). Extend the space of small + * objects up enough to match yet preserve the notion that there exist small + * objects larger than MIN_CHUNK_SIZE. + */ + static constexpr size_t MAX_SMALL_SIZECLASS_BITS = 19; +#else static constexpr size_t MAX_SMALL_SIZECLASS_BITS = 16; +#endif static constexpr size_t MAX_SMALL_SIZECLASS_SIZE = bits::one_at_bit(MAX_SMALL_SIZECLASS_BITS);