diff --git a/.github/workflows/rust-lint.yml b/.github/workflows/rust-lint.yml index ed514af0..14b944e9 100644 --- a/.github/workflows/rust-lint.yml +++ b/.github/workflows/rust-lint.yml @@ -10,6 +10,13 @@ jobs: - uses: actions/checkout@v3 - uses: dtolnay/rust-toolchain@stable + # Note: This is a workaround for an issue that just started appearing in lint checks + # and I'm not yet sure if it's due to GitHub Actions having updated something behind + # the scenes: + # error: 'cargo-fmt' is not installed for the toolchain 'stable-x86_64-unknown-linux-gnu' + - name: Install rustfmt + run: rustup component add rustfmt clippy + - name: Install tools run: | cargo install cargo-deny diff --git a/CHANGELOG.md b/CHANGELOG.md index b7a4f75f..62d14e01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,39 @@ The format is based on Keep a Changelog and this project adheres to ### Migration - If there are breaking changes, put a short, actionable checklist here. -## [0.14.0-alpha] - 2024-09-08 +--- + +## [0.15.0-alpha] - 2025-09-25 +### Breaking +- Default payload alignment increased from 16 bytes to 64 bytes to ensure + SIMD- and cacheline-safe zero-copy access across SSE/AVX/AVX-512 code + paths. Readers/writers compiled with `<= 0.14.x-alpha` that assume + 16-byte alignment will not be able to parse 0.15.x stores correctly. + +### Added +- Debug/test-only assertions (`assert_aligned`, `assert_aligned_offset`) + to validate both pointer- and offset-level alignment invariants. + +### Changed +- Updated documentation and examples to reflect the new 64-byte default + `PAYLOAD_ALIGNMENT` (still configurable in + `src/storage_engine/constants.rs`). +- `EntryHandle::as_arrow_buffer` and `into_arrow_buffer` now check both + pointer and offset alignment when compiled in test or debug mode. + +### Migration +- Stores created with 0.15.x are not backward-compatible with + 0.14.x readers/writers due to the alignment change. +- To migrate: + 1. Read entries with your existing 0.14.x binary. + 2. Rewrite into a fresh 0.15.x store (which will apply 64-byte + alignment). + 3. Deploy upgraded readers before upgrading writers in multi-service + environments. + +--- + +## [0.14.0-alpha] - 2025-09-08 ### Breaking - Files written by 0.14.0-alpha use padded payload starts for fixed alignment. Older readers (<= 0.13.x-alpha) may misinterpret pre-pad bytes as part of the diff --git a/Cargo.lock b/Cargo.lock index c7d68453..de3d18f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1819,7 +1819,7 @@ dependencies = [ [[package]] name = "simd-r-drive" -version = "0.14.0-alpha" +version = "0.15.0-alpha" dependencies = [ "async-trait", "bincode", @@ -1847,7 +1847,7 @@ dependencies = [ [[package]] name = "simd-r-drive-entry-handle" -version = "0.14.0-alpha" +version = "0.15.0-alpha" dependencies = [ "arrow", "crc32fast", @@ -1856,7 +1856,7 @@ dependencies = [ [[package]] name = "simd-r-drive-extensions" -version = "0.14.0-alpha" +version = "0.15.0-alpha" dependencies = [ "bincode", "doc-comment", @@ -1868,7 +1868,7 @@ dependencies = [ [[package]] name = "simd-r-drive-muxio-service-definition" -version = "0.14.0-alpha" +version = "0.15.0-alpha" dependencies = [ "bitcode", "muxio-rpc-service", @@ -1876,7 +1876,7 @@ dependencies = [ [[package]] name = "simd-r-drive-ws-client" -version = "0.14.0-alpha" +version = "0.15.0-alpha" dependencies = [ "async-trait", "muxio-rpc-service", @@ -1890,7 +1890,7 @@ dependencies = [ [[package]] name = "simd-r-drive-ws-server" -version = "0.14.0-alpha" +version = "0.15.0-alpha" dependencies = [ "clap", "indoc", @@ -2532,3 +2532,19 @@ dependencies = [ "quote", "syn", ] + +[[patch.unused]] +name = "muxio-rpc-service" +version = "0.10.0-alpha" + +[[patch.unused]] +name = "muxio-rpc-service-caller" +version = "0.10.0-alpha" + +[[patch.unused]] +name = "muxio-tokio-rpc-client" +version = "0.10.0-alpha" + +[[patch.unused]] +name = "muxio-tokio-rpc-server" +version = "0.10.0-alpha" diff --git a/Cargo.toml b/Cargo.toml index 5229a819..797b617c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace.package] authors = ["Jeremy Harris "] -version = "0.14.0-alpha" +version = "0.15.0-alpha" edition = "2024" repository = "https://github.com/jzombie/rust-simd-r-drive" license = "Apache-2.0" @@ -79,10 +79,10 @@ resolver = "2" [workspace.dependencies] # Intra-workspace crates -simd-r-drive = { path = ".", version = "0.14.0-alpha" } -simd-r-drive-entry-handle = { path = "./simd-r-drive-entry-handle", version = "0.14.0-alpha" } -simd-r-drive-ws-client = { path = "./experiments/simd-r-drive-ws-client", version = "0.14.0-alpha" } -simd-r-drive-muxio-service-definition = { path = "./experiments/simd-r-drive-muxio-service-definition", version = "0.14.0-alpha" } +simd-r-drive = { path = ".", version = "0.15.0-alpha" } +simd-r-drive-entry-handle = { path = "./simd-r-drive-entry-handle", version = "0.15.0-alpha" } +simd-r-drive-ws-client = { path = "./experiments/simd-r-drive-ws-client", version = "0.15.0-alpha" } +simd-r-drive-muxio-service-definition = { path = "./experiments/simd-r-drive-muxio-service-definition", version = "0.15.0-alpha" } muxio-tokio-rpc-client = "0.9.0-alpha" muxio-tokio-rpc-server = "0.9.0-alpha" muxio-rpc-service = "0.9.0-alpha" diff --git a/README.md b/README.md index e3d8caf6..2a33467a 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,8 @@ `SIMD R Drive` is a high-performance, thread-safe storage engine using a single-file storage container optimized for zero-copy binary access. +Payloads are written at fixed 64-byte aligned boundaries, ensuring efficient zero-copy access and predictable performance for SIMD and cache-friendly workloads. + Can be used as a command line interface (CLI) app, or as a library in another application. Continuously tested on Mac, Linux, and Windows. [Documentation](https://docs.rs/simd-r-drive/latest/simd_r_drive/) @@ -48,11 +50,13 @@ Additionally, `SIMD R Drive` is designed to handle datasets larger than availabl ## Fixed Payload Alignment (Zero-Copy Typed Slices) -Every non-tombstone payload now starts at a fixed, power-of-two boundary (16 bytes by default, configurable). This guarantees that, when your payload length matches the element size, you can reinterpret bytes as typed slices (e.g., `&[u16]`, `&[u32]`, `&[u64]`, `&[u128]`) without copying. +Every non-tombstone payload now begins on a fixed, power-of-two boundary (64 bytes by default). This matches the size of a typical CPU cacheline and ensures SIMD/vector loads (AVX, AVX-512, SVE, etc.) can operate at full speed without crossing cacheline boundaries. + +When your payload length matches the element size, you can safely reinterpret the bytes as typed slices (e.g., &[u16], &[u32], &[u64], &[u128]) without copying. -This change is transparent to the public API and works with all write modes, including streaming. The on-disk layout may include a few padding bytes per entry to maintain alignment. Tombstones are unaffected. +The on-disk layout may include a few padding bytes per entry to maintain alignment. Tombstones are unaffected. -Practical benefits include faster vectorized reads, simpler use of zero-copy helpers (e.g., casting libraries), and fewer fallback copies. If you need a stricter boundary for a target platform, adjust the [alignment constant](./src/storage_engine/constants.rs) and rebuild. +Practical benefits include cache-friendly zero-copy reads, predictable SIMD performance, simpler use of casting libraries, and fewer fallback copies. If a different boundary is required for your hardware, adjust the [alignment constant](./simd-r-drive-entry-handle/src/constants.rs) and rebuild. ## Single-File Storage Container for Binary Data @@ -103,6 +107,8 @@ Think of it as a self-contained binary filesystem—capable of storing and retri +_Note: Illustration is conceptual and does not show the 64-byte aligned boundaries used in the actual on-disk format. In practice, every payload is padded to start on a fixed 64-byte boundary for cacheline and SIMD efficiency._ + Aligned entry (non-tombstone): | Offset Range | Field | Size (Bytes) | Description | diff --git a/experiments/bindings/python-ws-client/Cargo.lock b/experiments/bindings/python-ws-client/Cargo.lock index 2ff051d4..6f99a211 100644 --- a/experiments/bindings/python-ws-client/Cargo.lock +++ b/experiments/bindings/python-ws-client/Cargo.lock @@ -1048,7 +1048,7 @@ dependencies = [ [[package]] name = "simd-r-drive" -version = "0.14.0-alpha" +version = "0.15.0-alpha" dependencies = [ "async-trait", "clap", @@ -1064,7 +1064,7 @@ dependencies = [ [[package]] name = "simd-r-drive-entry-handle" -version = "0.14.0-alpha" +version = "0.15.0-alpha" dependencies = [ "crc32fast", "memmap2", @@ -1072,7 +1072,7 @@ dependencies = [ [[package]] name = "simd-r-drive-muxio-service-definition" -version = "0.14.0-alpha" +version = "0.15.0-alpha" dependencies = [ "bitcode", "muxio-rpc-service", @@ -1080,7 +1080,7 @@ dependencies = [ [[package]] name = "simd-r-drive-ws-client" -version = "0.14.0-alpha" +version = "0.15.0-alpha" dependencies = [ "async-trait", "muxio-rpc-service", diff --git a/experiments/bindings/python_(old_client)/pyproject.toml b/experiments/bindings/python_(old_client)/pyproject.toml index 2ea15565..df5aef25 100644 --- a/experiments/bindings/python_(old_client)/pyproject.toml +++ b/experiments/bindings/python_(old_client)/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "simd-r-drive-py" -version = "0.14.0-alpha" +version = "0.15.0-alpha" description = "SIMD-optimized append-only schema-less storage engine. Key-based binary storage in a single-file storage container." repository = "https://github.com/jzombie/rust-simd-r-drive" license = "Apache-2.0" diff --git a/simd-r-drive-entry-handle/src/constants.rs b/simd-r-drive-entry-handle/src/constants.rs index 62bf73ee..16b54a06 100644 --- a/simd-r-drive-entry-handle/src/constants.rs +++ b/simd-r-drive-entry-handle/src/constants.rs @@ -9,3 +9,10 @@ pub const CHECKSUM_RANGE: Range = 16..20; // Define checksum length explicitly since `CHECKSUM_RANGE.len()` isn't `const` pub const CHECKSUM_LEN: usize = CHECKSUM_RANGE.end - CHECKSUM_RANGE.start; + +/// Fixed alignment (power of two) for the start of every payload. +/// 64 bytes matches cache-line size and SIMD-friendly alignment. +/// This improves chances of staying zero-copy in vector kernels. +/// Max pre-pad per entry is `PAYLOAD_ALIGNMENT - 1` bytes. +pub const PAYLOAD_ALIGN_LOG2: u8 = 6; // 2^6 = 64 +pub const PAYLOAD_ALIGNMENT: u64 = 1 << PAYLOAD_ALIGN_LOG2; diff --git a/simd-r-drive-entry-handle/src/debug_assert_aligned.rs b/simd-r-drive-entry-handle/src/debug_assert_aligned.rs new file mode 100644 index 00000000..f16b4ed5 --- /dev/null +++ b/simd-r-drive-entry-handle/src/debug_assert_aligned.rs @@ -0,0 +1,88 @@ +/// Debug-only pointer alignment assertion that is safe to export. +/// +/// Why this style: +/// - We need to re-export a symbol other crates can call, but we do not +/// want benches or release builds to pull in debug-only deps or code. +/// - Putting `#[cfg(...)]` on the function itself makes the symbol +/// vanish in release/bench. Callers would then need their own cfg +/// fences, which is brittle across crates. +/// - By keeping the function always present and gating only its body, +/// callers can invoke it unconditionally. In debug/test it asserts; +/// in release/bench it compiles to a no-op. +/// +/// Build behavior: +/// - In debug/test, the inner block runs and uses `debug_assert!`. +/// - In release/bench, the else block keeps the args "used" so the +/// function is a true no-op (no codegen warnings, no panic paths). +/// +/// Cost: +/// - Inlining plus the cfg-ed body means zero runtime cost in release +/// and bench profiles. +/// +/// Usage: +/// - Call anywhere you want a cheap alignment check in debug/test, +/// including from other crates that depend on this one. +#[inline] +pub fn debug_assert_aligned(ptr: *const u8, align: usize) { + #[cfg(any(test, debug_assertions))] + { + debug_assert!(align.is_power_of_two()); + debug_assert!( + (ptr as usize & (align - 1)) == 0, + "buffer base is not {}-byte aligned", + align + ); + } + + #[cfg(not(any(test, debug_assertions)))] + { + // Release/bench: no-op. Keep args used to avoid warnings. + let _ = ptr; + let _ = align; + } +} + +/// Debug-only file-offset alignment assertion that is safe to export. +/// +/// Same rationale as `debug_assert_aligned`: keep a stable symbol that +/// callers can invoke without cfg fences, while ensuring zero cost in +/// release/bench builds. +/// +/// Why not a module-level cfg or `use`: +/// - Some bench setups compile with `--all-features` and may still pull +/// modules in ways that trip cfg-ed imports. Gating inside the body +/// avoids those hazards and keeps the bench linker happy. +/// +/// Behavior: +/// - Debug/test: checks that `off` is a multiple of the configured +/// `PAYLOAD_ALIGNMENT`. +/// - Release/bench: no-op, arguments are marked used. +/// +/// Notes: +/// - This asserts the *derived start offset* of a payload, not the +/// pointer. Use the pointer variant to assert the actual address you +/// hand to consumers like Arrow. +#[inline] +pub fn debug_assert_aligned_offset(off: u64) { + #[cfg(any(test, debug_assertions))] + { + use crate::constants::PAYLOAD_ALIGNMENT; + + debug_assert!( + PAYLOAD_ALIGNMENT.is_power_of_two(), + "PAYLOAD_ALIGNMENT must be a power of two" + ); + debug_assert!( + off.is_multiple_of(PAYLOAD_ALIGNMENT), + "derived payload start not {}-byte aligned (got {})", + PAYLOAD_ALIGNMENT, + off + ); + } + + #[cfg(not(any(test, debug_assertions)))] + { + // Release/bench: no-op. Keep arg used to avoid warnings. + let _ = off; + } +} diff --git a/simd-r-drive-entry-handle/src/entry_handle.rs b/simd-r-drive-entry-handle/src/entry_handle.rs index 905faf49..1fdec85c 100644 --- a/simd-r-drive-entry-handle/src/entry_handle.rs +++ b/simd-r-drive-entry-handle/src/entry_handle.rs @@ -387,11 +387,20 @@ impl EntryHandle { use std::ptr::NonNull; use std::sync::Arc; - // Pointer to the start of the payload. - let ptr = NonNull::new(self.as_slice().as_ptr() as *mut u8).expect("non-null slice ptr"); + let slice = self.as_slice(); + #[cfg(any(test, debug_assertions))] + { + use crate::{ + constants::PAYLOAD_ALIGNMENT, debug_assert_aligned, debug_assert_aligned_offset, + }; + // Assert actual pointer alignment. + debug_assert_aligned(slice.as_ptr(), PAYLOAD_ALIGNMENT as usize); + // Assert derived file offset alignment. + debug_assert_aligned_offset(self.range.start as u64); + } - // Owner keeps the mmap alive for the Buffer's lifetime. - unsafe { Buffer::from_custom_allocation(ptr, self.size(), Arc::new(self.clone())) } + let ptr = NonNull::new(slice.as_ptr() as *mut u8).expect("non-null slice ptr"); + unsafe { Buffer::from_custom_allocation(ptr, slice.len(), Arc::new(self.clone())) } } /// Convert this handle into an Arrow `Buffer` without copying. @@ -418,11 +427,20 @@ impl EntryHandle { use std::ptr::NonNull; use std::sync::Arc; - let len: usize = self.size(); - let ptr = NonNull::new(self.as_slice().as_ptr() as *mut u8).expect("non-null slice ptr"); + let slice = self.as_slice(); + #[cfg(any(test, debug_assertions))] + { + use crate::{ + constants::PAYLOAD_ALIGNMENT, debug_assert_aligned, debug_assert_aligned_offset, + }; + // Assert actual pointer alignment. + debug_assert_aligned(slice.as_ptr(), PAYLOAD_ALIGNMENT as usize); + // Assert derived file offset alignment. + debug_assert_aligned_offset(self.range.start as u64); + } - // Move self into the owner to avoid an extra Arc bump later. - unsafe { Buffer::from_custom_allocation(ptr, len, Arc::new(self)) } + let ptr = NonNull::new(slice.as_ptr() as *mut u8).expect("non-null slice ptr"); + unsafe { Buffer::from_custom_allocation(ptr, slice.len(), Arc::new(self)) } } } diff --git a/simd-r-drive-entry-handle/src/lib.rs b/simd-r-drive-entry-handle/src/lib.rs index 674881f9..673af58a 100644 --- a/simd-r-drive-entry-handle/src/lib.rs +++ b/simd-r-drive-entry-handle/src/lib.rs @@ -5,3 +5,6 @@ pub use entry_handle::*; pub mod entry_metadata; pub use entry_metadata::*; + +pub mod debug_assert_aligned; +pub use debug_assert_aligned::*; diff --git a/src/storage_engine/constants.rs b/src/storage_engine/constants.rs index b6af64f2..283fd4ba 100644 --- a/src/storage_engine/constants.rs +++ b/src/storage_engine/constants.rs @@ -5,8 +5,3 @@ pub const NULL_BYTE: [u8; 1] = [0]; /// Stream copy chunk size. pub const WRITE_STREAM_BUFFER_SIZE: usize = 64 * 1024; // 64 KB - -/// Fixed alignment (power of two) for the start of every payload. -/// 16 bytes covers u8/u16/u32/u64/u128 on mainstream targets. -pub const PAYLOAD_ALIGN_LOG2: u8 = 4; -pub const PAYLOAD_ALIGNMENT: u64 = 1 << PAYLOAD_ALIGN_LOG2; diff --git a/src/storage_engine/data_store.rs b/src/storage_engine/data_store.rs index 05000afc..a4d186e4 100644 --- a/src/storage_engine/data_store.rs +++ b/src/storage_engine/data_store.rs @@ -17,6 +17,9 @@ use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, Mutex, RwLock, RwLockReadGuard}; use tracing::{debug, info, warn}; +#[cfg(any(test, debug_assertions))] +use simd_r_drive_entry_handle::debug_assert_aligned_offset; + #[cfg(feature = "parallel")] use rayon::prelude::*; @@ -344,6 +347,11 @@ impl DataStore { return None; } + #[cfg(any(test, debug_assertions))] + { + debug_assert_aligned_offset(entry_start as u64); + } + Some(EntryHandle { mmap_arc: mmap_arc.clone(), range: entry_start..entry_end, @@ -399,6 +407,11 @@ impl DataStore { { prev_tail } else { + #[cfg(any(test, debug_assertions))] + { + debug_assert_aligned_offset(derived_start); + } + derived_start }; @@ -539,6 +552,11 @@ impl DataStore { return None; } + #[cfg(any(test, debug_assertions))] + { + debug_assert_aligned_offset(entry_start as u64); + } + Some(EntryHandle { mmap_arc: mmap_arc.clone(), range: entry_start..entry_end, @@ -1072,6 +1090,11 @@ impl DataStoreReader for DataStore { return Ok(None); } + #[cfg(any(test, debug_assertions))] + { + debug_assert_aligned_offset(entry_start as u64); + } + Ok(Some(EntryHandle { mmap_arc, range: entry_start..entry_end, diff --git a/src/utils/align_or_copy.rs b/src/utils/align_or_copy.rs index 9192a6e7..24cc6c55 100644 --- a/src/utils/align_or_copy.rs +++ b/src/utils/align_or_copy.rs @@ -59,7 +59,7 @@ where Cow::Borrowed(aligned) } else { assert!( - bytes.len() % N == 0, + bytes.len().is_multiple_of(N), "Input length must be a multiple of element size" ); diff --git a/tests/alignment_tests.rs b/tests/alignment_tests.rs index 0b6cf353..4083dc7d 100644 --- a/tests/alignment_tests.rs +++ b/tests/alignment_tests.rs @@ -25,7 +25,7 @@ fn assert_payload_addr_aligned(bytes: &[u8]) { let ptr = bytes.as_ptr() as usize; let a = PAYLOAD_ALIGNMENT as usize; assert!( - ptr % a == 0, + ptr.is_multiple_of(a), "payload start address is not {}-byte aligned", a ); @@ -42,13 +42,13 @@ fn assert_can_view_as(bytes: &[u8]) { ); let ptr = bytes.as_ptr() as usize; assert!( - ptr % a_t == 0, + ptr.is_multiple_of(a_t), "payload addr {} is not aligned to T (align {})", ptr, a_t ); assert!( - bytes.len() % size_of::() == 0, + bytes.len().is_multiple_of(size_of::()), "payload length {} is not a multiple of {}", bytes.len(), size_of::() @@ -67,47 +67,68 @@ fn assert_bytemuck_view_u128(bytes: &[u8]) { } #[cfg(target_arch = "x86_64")] -fn assert_simd_16_byte_loadable(bytes: &[u8]) { +fn assert_simd_64_byte_loadable(bytes: &[u8]) { + // Enforce 64B-aligned base for clean cacheline-friendly loads. assert!( - (bytes.as_ptr() as usize) % 16 == 0, - "SIMD pointer must be 16-byte aligned" + (bytes.as_ptr() as usize).is_multiple_of(64), + "SIMD pointer must be 64-byte aligned" ); - let lanes = bytes.len() / 16; + // Process only the full 64B lanes; ignore any tail < 64B. + let lanes = bytes.len() / 64; + if lanes == 0 { + return; + } unsafe { for i in 0..lanes { - let p = bytes.as_ptr().add(i * 16) as *const __m128i; - let v = _mm_load_si128(p); - core::hint::black_box(v); + let base = bytes.as_ptr().add(i * 64); + let p0 = base.add(0) as *const __m128i; + let p1 = base.add(16) as *const __m128i; + let p2 = base.add(32) as *const __m128i; + let p3 = base.add(48) as *const __m128i; + let v0 = _mm_load_si128(p0); + let v1 = _mm_load_si128(p1); + let v2 = _mm_load_si128(p2); + let v3 = _mm_load_si128(p3); + core::hint::black_box((v0, v1, v2, v3)); } } } #[cfg(target_arch = "aarch64")] -fn assert_simd_16_byte_loadable(bytes: &[u8]) { +fn assert_simd_64_byte_loadable(bytes: &[u8]) { + // Enforce 64B-aligned base for clean cacheline-friendly loads. assert!( - (bytes.as_ptr() as usize) % 16 == 0, - "SIMD pointer must be 16-byte aligned" + (bytes.as_ptr() as usize).is_multiple_of(64), + "SIMD pointer must be 64-byte aligned" ); - let lanes = bytes.len() / 16; + // Process only the full 64B lanes; ignore any tail < 64B. + let lanes = bytes.len() / 64; + if lanes == 0 { + return; + } unsafe { for i in 0..lanes { - let p = bytes.as_ptr().add(i * 16); - let v0 = vld1q_u8(p); - core::hint::black_box(v0); - let p_vec = p as *const uint8x16_t; - let v1: uint8x16_t = core::ptr::read(p_vec); - core::hint::black_box(v1); + let base = bytes.as_ptr().add(i * 64); + let v0 = vld1q_u8(base.add(0)); + let v1 = vld1q_u8(base.add(16)); + let v2 = vld1q_u8(base.add(32)); + let v3 = vld1q_u8(base.add(48)); + // Also test an aligned typed read path. + let p0 = base.add(0) as *const uint8x16_t; + let r0: uint8x16_t = core::ptr::read(p0); + core::hint::black_box((v0, v1, v2, v3, r0)); } } } #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] -fn assert_simd_16_byte_loadable(bytes: &[u8]) { - // Portable fallback: re-assert address and u128 view conditions. +fn assert_simd_64_byte_loadable(bytes: &[u8]) { + // Portable fallback: enforce 64B alignment; if we have >= 64B, + // prove we could read at least one 16B lane safely. assert_payload_addr_aligned(bytes); - if bytes.len() >= 16 && bytes.len() % 16 == 0 { + if bytes.len() >= 64 { assert_can_view_as::(bytes); - assert_bytemuck_view_u128(bytes); + let _: &[u128] = try_cast_slice(bytes).expect("cast &[u8]->&[u128] failed"); } } @@ -160,10 +181,10 @@ fn byte_alignment_unaligned_then_overwrite_and_simd() { // Phase 2: delete one string (tombstone, no pre-pad). store.delete(b"k_s2").unwrap(); - // Phase 3: overwrite with 16B-multiple payloads. - let s1_aligned = vec![0xA5u8; 2 * 16]; // 32 bytes - let s3_aligned = vec![0xB6u8; 3 * 16]; // 48 bytes - let u32_aligned = vec![0xCCu8; 16 * 4]; // 64 bytes + // Phase 3: overwrite with 64B-multiple payloads. + let s1_aligned = vec![0xA5u8; 64]; // 64 bytes + let s3_aligned = vec![0xB6u8; 2 * 64]; // 128 bytes + let u32_aligned = vec![0xCCu8; 64]; // 64 bytes store.write(b"k_s1", &s1_aligned).unwrap(); store.write(b"k_s3", &s3_aligned).unwrap(); @@ -198,7 +219,7 @@ fn byte_alignment_unaligned_then_overwrite_and_simd() { assert_bytemuck_view_u64(e_u64_new.as_slice()); assert_bytemuck_view_u128(e_u128_new.as_slice()); - // SIMD loads or portable fallback. + // SIMD 64B lanes (or portable fallback). for bytes in [ e_s1_new.as_slice(), e_s3_new.as_slice(), @@ -206,8 +227,8 @@ fn byte_alignment_unaligned_then_overwrite_and_simd() { e_u64_new.as_slice(), e_u128_new.as_slice(), ] { - if bytes.len() >= 16 { - assert_simd_16_byte_loadable(bytes); + if bytes.len() >= 64 { + assert_simd_64_byte_loadable(bytes); } } @@ -215,8 +236,8 @@ fn byte_alignment_unaligned_then_overwrite_and_simd() { for entry in store.iter_entries() { let bytes = entry.as_slice(); assert_payload_addr_aligned(bytes); - if bytes.len() >= 16 { - assert_simd_16_byte_loadable(bytes); + if bytes.len() >= 64 { + assert_simd_64_byte_loadable(bytes); } }