diff --git a/gix-pack/tests/fixtures/make_regression_pack.sh b/gix-pack/tests/fixtures/make_regression_pack.sh new file mode 100755 index 00000000000..ecf7fffe48a --- /dev/null +++ b/gix-pack/tests/fixtures/make_regression_pack.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +set -eu -o pipefail + +# This script creates a pack file specifically designed to trigger the buffer overflow +# bug fixed in PR #2345. + +cleanup() { + cd .. + rm -rf regression-pack-repo +} + +trap cleanup EXIT + +mkdir -p regression-pack-repo +cd regression-pack-repo +git init -q +git config user.email "test@example.com" +git config user.name "Test User" + +# Create a large base blob with highly compressible repetitive content +{ + for i in {1..100}; do + echo "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + echo "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + echo "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" + echo "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD" + echo "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE" + echo "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" + echo "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG" + echo "HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH" + done +} > largefile.txt +git add largefile.txt +git commit -qm "Add large base file" + +# Create first delta - small change +sed -i '1s/AAAA/XXXX/g' largefile.txt +git add largefile.txt +git commit -qm "Delta 1" + +# Create second delta - more small changes +sed -i '2s/BBBB/YYYY/g' largefile.txt +git add largefile.txt +git commit -qm "Delta 2" + +# Create third delta to make a longer chain +sed -i '3s/CCCC/ZZZZ/g' largefile.txt +git add largefile.txt +git commit -qm "Delta 3" + +# Create fourth delta for even longer chain +sed -i '4s/DDDD/WWWW/g' largefile.txt +git add largefile.txt +git commit -qm "Delta 4" + +# Repack aggressively to create delta chains +git repack -adf --window=250 --depth=250 + +# Copy the pack file to the fixtures directory +PACK_FILE=$(ls .git/objects/pack/*.pack) +PACK_IDX=$(ls .git/objects/pack/*.idx) +PACK_HASH=$(basename "$PACK_FILE" .pack | sed 's/pack-//') + +cp "$PACK_FILE" ../objects/pack-regression-$PACK_HASH.pack +cp "$PACK_IDX" ../objects/pack-regression-$PACK_HASH.idx + +echo "Created pack files:" +echo " pack-regression-$PACK_HASH.pack" +echo " pack-regression-$PACK_HASH.idx" +echo "" +echo "Pack statistics:" +git verify-pack -v "$PACK_FILE" | head -20 diff --git a/gix-pack/tests/fixtures/objects/pack-regression-bd7158957832e5b7b85af809fc317508121192f1.idx b/gix-pack/tests/fixtures/objects/pack-regression-bd7158957832e5b7b85af809fc317508121192f1.idx new file mode 100644 index 00000000000..98464952dec Binary files /dev/null and b/gix-pack/tests/fixtures/objects/pack-regression-bd7158957832e5b7b85af809fc317508121192f1.idx differ diff --git a/gix-pack/tests/fixtures/objects/pack-regression-bd7158957832e5b7b85af809fc317508121192f1.pack b/gix-pack/tests/fixtures/objects/pack-regression-bd7158957832e5b7b85af809fc317508121192f1.pack new file mode 100644 index 00000000000..1d7f9125d54 Binary files /dev/null and b/gix-pack/tests/fixtures/objects/pack-regression-bd7158957832e5b7b85af809fc317508121192f1.pack differ diff --git a/gix-pack/tests/pack/data/file.rs b/gix-pack/tests/pack/data/file.rs index 9ec1f97cf3f..9239a3ec8e3 100644 --- a/gix-pack/tests/pack/data/file.rs +++ b/gix-pack/tests/pack/data/file.rs @@ -96,6 +96,122 @@ mod decode_entry { ); } + /// Regression test for PR #2345: Ensures that when decompressing the base object in a delta chain, + /// the output buffer is properly bounded to prevent the decompressor from overshooting and + /// corrupting delta instruction data that follows in the buffer. + /// + /// ## Background + /// When resolving delta chains, the code allocates a buffer structured as: + /// `[first_buffer][second_buffer][delta_instructions]` + /// The fix in PR #2345 bounds the output buffer passed to `decompress_entry_from_data_offset` + /// to only `[first_buffer][second_buffer]` (i.e., `out_size - total_delta_data_size`), preventing + /// the decompressor from writing beyond this boundary and corrupting the delta instructions. + /// + /// ## About this test + /// This test uses a specially crafted pack file (pack-regression-*.pack) with a large base + /// object (52KB) and delta chains to exercise the buffer bounding code path. While this test + /// currently does not fail when the fix is removed (because triggering the actual zlib-rs + /// overshooting behavior requires very specific compression/decompression conditions found in + /// repositories like chromium), it: + /// + /// 1. **Exercises the correct code path**: Tests the delta resolution logic where the buffer + /// bounding fix is applied + /// 2. **Documents the fix**: Serves as in-code documentation of PR #2345 and why buffer bounding + /// is necessary + /// 3. **Provides infrastructure**: If a reproducing pack file is obtained (e.g., from chromium), + /// it can be easily added here + /// 4. **Validates correctness**: Ensures delta chains decode correctly with the fix in place + /// + /// The actual bug manifests when zlib-rs (or potentially other decompressors) write slightly + /// beyond the decompressed size when given an unbounded buffer, corrupting the delta + /// instructions that follow in memory. This is highly dependent on the specific compression + /// ratios and internal zlib-rs behavior. + #[test] + fn regression_delta_decompression_buffer_bound() { + const REGRESSION_PACK: &str = "objects/pack-regression-bd7158957832e5b7b85af809fc317508121192f1.pack"; + + #[allow(clippy::ptr_arg)] + fn resolve_with_panic(_oid: &gix_hash::oid, _out: &mut Vec) -> Option { + panic!("should not want to resolve an id here") + } + + let p = pack_at(REGRESSION_PACK); + + // Test the base object at offset 730 (ed2a638b) - 52000 bytes uncompressed + // This is the large base that the deltas reference + let entry = p.entry(730).expect("valid object at offset 730"); + let mut buf = Vec::new(); + let result = p.decode_entry( + entry, + &mut buf, + &mut Default::default(), + &resolve_with_panic, + &mut cache::Never, + ); + + assert!( + result.is_ok(), + "Base object should decode correctly" + ); + assert_eq!(buf.len(), 52000, "Base object should be 52000 bytes"); + + // Test delta objects with chain length = 1 + // These objects delta against the large base, exercising the critical code path + // where the base object is decompressed with a bounded output buffer. + + // Object 7a035d07 at offset 1141 (delta chain length 1) + let entry = p.entry(1141).expect("valid object at offset 1141"); + let mut buf = Vec::new(); + let result = p.decode_entry( + entry, + &mut buf, + &mut Default::default(), + &resolve_with_panic, + &mut cache::Never, + ); + + assert!( + result.is_ok(), + "Delta with chain length 1 should decode correctly with bounded buffer. \ + Without the fix, buffer overflow would corrupt delta instructions causing decode to fail." + ); + assert!(!buf.is_empty(), "Decoded object should not be empty"); + + // Object e2ace3ae at offset 1222 (delta chain length 1) + let entry = p.entry(1222).expect("valid object at offset 1222"); + let mut buf = Vec::new(); + let result = p.decode_entry( + entry, + &mut buf, + &mut Default::default(), + &resolve_with_panic, + &mut cache::Never, + ); + + assert!( + result.is_ok(), + "Second delta should decode correctly with bounded buffer" + ); + assert!(!buf.is_empty(), "Decoded object should not be empty"); + + // Object 8f3fd104 at offset 1305 (delta chain length 1) + let entry = p.entry(1305).expect("valid object at offset 1305"); + let mut buf = Vec::new(); + let result = p.decode_entry( + entry, + &mut buf, + &mut Default::default(), + &resolve_with_panic, + &mut cache::Never, + ); + + assert!( + result.is_ok(), + "Third delta should decode correctly with bounded buffer" + ); + assert!(!buf.is_empty(), "Decoded object should not be empty"); + } + fn decode_entry_at_offset(offset: u64) -> Vec { #[allow(clippy::ptr_arg)] fn resolve_with_panic(_oid: &gix_hash::oid, _out: &mut Vec) -> Option {