From 85f5be2220d4dffbffe2700d3bf68b0daf78aca2 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Wed, 2 Jul 2025 12:12:32 -0700 Subject: [PATCH] Make ZeroVec ULE-clean --- components/casemap/src/lib.rs | 2 ++ components/collections/src/lib.rs | 1 + components/datetime/src/lib.rs | 2 ++ components/decimal/src/lib.rs | 2 ++ components/locale/src/lib.rs | 2 ++ components/plurals/src/lib.rs | 1 + utils/zerovec/derive/src/varule.rs | 9 +++------ utils/zerovec/src/cow.rs | 10 ++++++++-- utils/zerovec/src/lib.rs | 1 + utils/zerovec/src/ule/encode.rs | 10 +++++++--- utils/zerovec/src/ule/mod.rs | 11 +++++++---- 11 files changed, 36 insertions(+), 15 deletions(-) diff --git a/components/casemap/src/lib.rs b/components/casemap/src/lib.rs index f0e43ad510d..218c4a151e8 100644 --- a/components/casemap/src/lib.rs +++ b/components/casemap/src/lib.rs @@ -2,6 +2,8 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +#![feature(ptr_metadata)] + //! Case mapping for Unicode characters and strings. //! //! This module is published as its own crate ([`icu_casemap`](https://docs.rs/icu_casemap/latest/icu_casemap/)) diff --git a/components/collections/src/lib.rs b/components/collections/src/lib.rs index a20545d0887..12f8e74d3e5 100644 --- a/components/collections/src/lib.rs +++ b/components/collections/src/lib.rs @@ -32,6 +32,7 @@ ) )] #![warn(missing_docs)] +#![feature(ptr_metadata)] #[cfg(feature = "alloc")] extern crate alloc; diff --git a/components/datetime/src/lib.rs b/components/datetime/src/lib.rs index 09c9563a75c..1551c4337a6 100644 --- a/components/datetime/src/lib.rs +++ b/components/datetime/src/lib.rs @@ -2,6 +2,8 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +#![cfg(ptr_metadata)] + //! Localized formatting of dates, times, and time zones. //! //! This module is published as its own crate ([`icu_datetime`](https://docs.rs/icu_datetime/latest/icu_datetime/)) diff --git a/components/decimal/src/lib.rs b/components/decimal/src/lib.rs index 4046489fb49..d828dec4f2c 100644 --- a/components/decimal/src/lib.rs +++ b/components/decimal/src/lib.rs @@ -2,6 +2,8 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +#![feature(ptr_metadata)] + //! Formatting basic decimal numbers. //! //! This module is published as its own crate ([`icu_decimal`](https://docs.rs/icu_decimal/latest/icu_decimal/)) diff --git a/components/locale/src/lib.rs b/components/locale/src/lib.rs index 7bfb526a199..5e73cf47801 100644 --- a/components/locale/src/lib.rs +++ b/components/locale/src/lib.rs @@ -2,6 +2,8 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +#![feature(ptr_metadata)] + //! Canonicalization of locale identifiers based on [`CLDR`] data. //! //! This module is published as its own crate ([`icu_locale`](https://docs.rs/icu_locale/latest/icu_locale/)) diff --git a/components/plurals/src/lib.rs b/components/plurals/src/lib.rs index e6b831526f8..f987da4dc2b 100644 --- a/components/plurals/src/lib.rs +++ b/components/plurals/src/lib.rs @@ -73,6 +73,7 @@ ) )] #![warn(missing_docs)] +#![feature(ptr_metadata)] extern crate alloc; diff --git a/utils/zerovec/derive/src/varule.rs b/utils/zerovec/derive/src/varule.rs index 855e207dcdb..53c413038ee 100644 --- a/utils/zerovec/derive/src/varule.rs +++ b/utils/zerovec/derive/src/varule.rs @@ -117,12 +117,9 @@ pub fn derive_impl( // Safety: The invariants of this function allow us to assume bytes is valid, and // having at least #ule_size bytes is a validity constraint for the ULE type. let unsized_bytes = bytes.get_unchecked(#ule_size..); - let unsized_ref = <#unsized_field as zerovec::ule::VarULE>::from_bytes_unchecked(unsized_bytes); - // We should use the pointer metadata APIs here when they are stable: https://github.com/rust-lang/rust/issues/81513 - // For now we rely on all DST metadata being a usize to extract it via a fake slice pointer - let (_ptr, metadata): (usize, usize) = ::core::mem::transmute(unsized_ref); - let entire_struct_as_slice: *const [u8] = ::core::slice::from_raw_parts(bytes.as_ptr(), metadata); - &*(entire_struct_as_slice as *const Self) + let metadata = core::ptr::metadata(unsized_bytes); + + &*core::ptr::from_raw_parts::(bytes as *const [u8] as *const u8, metadata) } } } diff --git a/utils/zerovec/src/cow.rs b/utils/zerovec/src/cow.rs index fea149fc162..50923c10771 100644 --- a/utils/zerovec/src/cow.rs +++ b/utils/zerovec/src/cow.rs @@ -185,8 +185,14 @@ impl<'a, V: VarULE + ?Sized> VarZeroCow<'a, V> { /// Construct a new borrowed version of this #[cfg(feature = "alloc")] pub fn new_owned(val: Box) -> Self { - let val = ManuallyDrop::new(val); - let buf: NonNull<[u8]> = val.as_bytes().into(); + let len = val.as_bytes().len(); + let raw_v = Box::into_raw(val) as *mut V; + // disallowed? + // let raw_u8: *mut u8 = raw_v as *mut [u8] as *mut u8; + let raw_u8: *mut u8 = raw_v as *mut () as *mut u8; + + let buf: NonNull<[u8]> = + unsafe { NonNull::new_unchecked(core::ptr::slice_from_raw_parts_mut(raw_u8, len)) }; let raw = RawVarZeroCow { // Invariants upheld: // 1 & 3: The bytes came from `val` so they're a valid value and byte slice diff --git a/utils/zerovec/src/lib.rs b/utils/zerovec/src/lib.rs index aac65cde966..468ad4d597f 100644 --- a/utils/zerovec/src/lib.rs +++ b/utils/zerovec/src/lib.rs @@ -2,6 +2,7 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +#![feature(ptr_metadata)] //! Zero-copy vector abstractions for arbitrary types, backed by byte slices. //! //! `zerovec` enables a far wider range of types — beyond just `&[u8]` and `&str` — to participate in diff --git a/utils/zerovec/src/ule/encode.rs b/utils/zerovec/src/ule/encode.rs index db9bf1209d8..38bf3391bdd 100644 --- a/utils/zerovec/src/ule/encode.rs +++ b/utils/zerovec/src/ule/encode.rs @@ -97,14 +97,18 @@ pub fn encode_varule_to_box + ?Sized, T: VarULE + ?Sized>(x // zero-fill the vector to avoid uninitialized data UB let mut vec: Vec = vec![0; x.encode_var_ule_len()]; x.encode_var_ule_write(&mut vec); - let boxed = mem::ManuallyDrop::new(vec.into_boxed_slice()); + let boxed = vec.into_boxed_slice(); unsafe { // Safety: `ptr` is a box, and `T` is a VarULE which guarantees it has the same memory layout as `[u8]` // and can be recouped via from_bytes_unchecked() - let ptr: *mut T = T::from_bytes_unchecked(&boxed) as *const T as *mut T; + let ptr = T::from_bytes_unchecked(&boxed) as *const T; + let metadata = core::ptr::metadata(ptr); // Safety: we can construct an owned version since we have mem::forgotten the older owner - Box::from_raw(ptr) + Box::from_raw(core::ptr::from_raw_parts_mut( + Box::into_raw(boxed) as *mut [u8] as *mut u8, + metadata, + )) } } diff --git a/utils/zerovec/src/ule/mod.rs b/utils/zerovec/src/ule/mod.rs index 629fe62c6b3..07ab0bd8a3f 100644 --- a/utils/zerovec/src/ule/mod.rs +++ b/utils/zerovec/src/ule/mod.rs @@ -362,13 +362,16 @@ pub unsafe trait VarULE: 'static { use alloc::boxed::Box; use core::alloc::Layout; let bytesvec = self.as_bytes().to_owned().into_boxed_slice(); - let bytesvec = mem::ManuallyDrop::new(bytesvec); + unsafe { // Get the pointer representation - let ptr: *mut Self = Self::from_bytes_unchecked(&bytesvec) as *const Self as *mut Self; - assert_eq!(Layout::for_value(&*ptr), Layout::for_value(&**bytesvec)); + let ptr = Self::from_bytes_unchecked(&bytesvec) as *const Self; + let metadata = core::ptr::metadata(ptr); // Transmute the pointer to an owned pointer - Box::from_raw(ptr) + Box::from_raw(core::ptr::from_raw_parts_mut( + Box::into_raw(bytesvec) as *mut [u8] as *mut u8, + metadata, + )) } } }