diff --git a/.config/nextest.toml b/.config/nextest.toml new file mode 100644 index 00000000..58c4b56c --- /dev/null +++ b/.config/nextest.toml @@ -0,0 +1,2 @@ +[profile.default] +leak-timeout = "500ms" \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 1411adf7..336666ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,13 +4,21 @@ Notable changes only. ## Unreleased -### Added +### Added - add new unique (non-shared) strings and byte vectors +- add `as_borrowed` +- add inline `const` constructors ### Changed -- refactor the backend to support unique +- refactor the backend to support unique references +- remove low-level `Raw` type +- improve deserialization for strings, bytes and paths, see [#38](https://github.com/polazarus/hipstr/pull/38) and follow-up work + +### Removed + +- dependency to `serde_bytes` ## [0.6.0] - 2024-10-08 @@ -19,7 +27,7 @@ Notable changes only. - implement `core::error:Error` for custom errors, rather than `std::error::Error` and bump msrv -### Fixed +### Fixe - fix doc issue [#28](https://github.com/polazarus/hipstr/issues/28) - fix MIRI check due to provenance loss @@ -137,6 +145,7 @@ Most of those addition are breaking because they shadows `str`'s methods. Initial release + [0.6.0]: https://github.com/polazarus/hipstr/compare/0.5.1...0.6.0 [0.5.1]: https://github.com/polazarus/hipstr/compare/0.5.0...0.5.1 [0.5.0]: https://github.com/polazarus/hipstr/compare/0.4.0...0.5.0 diff --git a/Cargo.toml b/Cargo.toml index 6e52c657..2b1cbae8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "hipstr" -version = "0.6.0" +version = "0.6.1" authors = ["Polazarus "] description = """Yet another string for Rust: zero-cost borrow and slicing, inline representation for small strings, (atomic) reference counting""" @@ -10,16 +10,16 @@ categories = ["memory-management", "data-structures"] license = "MIT OR Apache-2.0" edition = "2021" readme = "README.md" -rust-version = "1.81.0" +rust-version = "1.83.0" [package.metadata.docs.rs] all-features = true [features] default = ["std"] -std = ["serde/std", "serde_bytes/std"] +std = ["serde/std"] unstable = [] -serde = ["dep:serde", "dep:serde_bytes"] +serde = ["dep:serde"] bstr = ["dep:bstr"] [dev-dependencies] @@ -27,6 +27,7 @@ fastrand = "2.0.0" serde_test = "1.0.176" serde = { version = "1.0.100", default-features = false, features = ["derive", "alloc"] } serde_json = { version = "1.0.45", default-features = false, features = ["alloc"] } +divan = "0.1.15" [dependencies] sptr = "0.3.2" @@ -43,18 +44,21 @@ optional = true default-features = false features = ["alloc"] -[dependencies.serde_bytes] -version = "0.11.3" -optional = true -default-features = false -features = ["alloc"] - [target.'cfg(loom)'.dependencies] loom = "0.7" +[target.'cfg(not(loom))'.dev-dependencies] +arcstr = "1.2.0" +ecow = "0.2.3" +kstring = "2.0.2" + [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = [ 'cfg(coverage_nightly)', 'cfg(docsrs)', 'cfg(loom)', ] } + +[[bench]] +name = "main" +harness = false diff --git a/README.md b/README.md index 66b9572d..2dd4f14f 100644 --- a/README.md +++ b/README.md @@ -129,24 +129,26 @@ Note: this crate leverages the "exposed provenance" semantics. `#[non_exhaustive]` -| Name | Thread-safe cheap-clone | Local cheap-clone | Inline | Cheap slice | Bytes | Borrow `'static` | Borrow any `'a` | Comment | -| -------------------------------------------------------------- | ----------------------- | ----------------- | ------ | ----------- | ------ | ---------------- | :-------------- | ------------------------------------------------------------------------------------------------------ | -| `hipstr` | 🟢 | 🟢 | 🟢 | 🟢 | 🟢 | 🟢 | 🟢 | obviously! | -| [`arcstr`](https://github.com/thomcc/arcstr) | 🟢\* | ❌ | ❌ | ❌\*\* | ❌ | 🟢 | ❌ | \*use a custom thin `Arc`, \*\*heavy slice (with dedicated substring type) | -| [`flexstr`](https://github.com/nu11ptr/flexstr) | 🟢\* | 🟢 | 🟢 | ❌ | ❌ | 🟢 | ❌ | \*use an `Arc` instead of an `Arc` (remove one level of indirection but use fat pointers) | -| [`imstr`](https://github.com/xfbs/imstr) | 🟢 | 🟢 | ❌ | 🟢 | ❌ | ❌ | ❌ | | -| [`faststr`](https://github.com/volo-rs/faststr) | 🟢 | ❌ | 🟢 | 🟢 | ❌ | 🟢 | ❌ | zero-doc with complex API | -| [`fast-str`](https://github.com/xxXyh1908/rust-fast-str) | 🟢 | ❌ | 🟢 | 🟢 | ❌ | 🟢 | ❌ | inline repr is opt-in | -| [`ecow`](https://github.com/typst/ecow) | 🟢\* | ❌ | 🟢 | ❌ | 🟢\*\* | 🟢 | ❌ | \*on two words only 🤤, \*\*even any `T` | -| [`cowstr`](https://git.pipapo.org/cehteh/cowstr.git) | 🟢 | ❌ | ❌ | ❌\* | ❌ | 🟢 | ❌\*\* | \*heavy slice, \*\*contrary to its name | -| [`compact_str`](https://github.com/parkmycar/compact_str) | ❌ | ❌ | 🟢 | ❌ | 🟢\* | ❌ | ❌ | \*opt-in via `smallvec` | -| [`inline_string`](https://github.com/fitzgen/inlinable_string) | ❌ | ❌ | 🟢 | ❌ | ❌ | ❌ | ❌ | | -| [`kstring`](https://docs.rs/kstring/latest/kstring/) | 🟢 | ❌ | 🟢 | ❌ | ❌ | 🟢 | ❌ | | -| [`smartstring`](https://github.com/bodil/smartstring) | ❌ | ❌ | 🟢 | ❌ | ❌ | ❌ | ❌ | | -| [`smallstr`](https://github.com/murarth/smallstr) | ❌ | ❌ | 🟢 | ❌ | ❌ | ❌ | ❌ | | -| [`smol_str`](https://github.com/rust-analyzer/smol_str) | ❌ | ❌ | 🟢\* | ❌ | ❌ | 🟢 | ❌ | \*but only inline string, here for reference | - -skipping specialized string types like [`tinystr`](https://github.com/unicode-org/icu4x) (ASCII-only, bounded), or `bstr`, or `bytestring`, or... +| Name | TS cheap-clone | Local cheap-clone | Inline | Cheap slice | Bytes | Borrow `'static` | Borrow any `'a` | Comment | +| -------------------------------------------------------------- | -------------- | ----------------- | ------ | ----------- | ----- | ---------------- | :-------------- | -------------------------------------------------------------------------------------------------- | +| `hipstr` | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | obviously! | +| [`arcstr`](https://github.com/thomcc/arcstr) | ✓\* | - | - | -\*\* | - | ✓ | - | \*use a custom thin `Arc`, \*\*heavy slice (with dedicated substring type) | +| [`flexstr`](https://github.com/nu11ptr/flexstr) | ✓\* | ✓ | ✓ | - | - | ✓ | - | \*use `(A)rc` instead of `(A)rc` (remove a level of indirection but use fat pointers) | +| [`imstr`](https://github.com/xfbs/imstr) | ✓ | ✓ | - | ✓ | - | - | - | | +| [`faststr`](https://github.com/volo-rs/faststr) | ✓ | - | ✓ | ✓ | - | ✓ | - | zero-doc with complex API | +| [`fast-str`](https://github.com/xxXyh1908/rust-fast-str) | ✓ | - | ✓ | ✓ | - | ✓ | - | inline repr is opt-in | +| [`ecow`](https://github.com/typst/ecow) | ✓\* | - | ✓ | - | ✓\*\* | ✓ | - | \*on two words only 🤤, \*\*even any `T` | +| [`cowstr`](https://git.pipapo.org/cehteh/cowstr.git) | ✓ | - | - | -\* | - | ✓ | -\*\* | \*heavy slice, \*\*contrary to its name | +| [`compact_str`](https://github.com/parkmycar/compact_str) | - | - | ✓ | - | ✓\* | - | - | \*opt-in via `smallvec` | +| [`inline_string`](https://github.com/fitzgen/inlinable_string) | - | - | ✓ | - | - | - | - | | +| [`kstring`](https://docs.rs/kstring/latest/kstring/) | ✓ | ✓ | ✓ | - | - | ✓ | ✓\* | safe mode, use boxed strings; \* with second type | +| [`smartstring`](https://github.com/bodil/smartstring) | - | - | ✓ | - | - | - | - | | +| [`smallstr`](https://github.com/murarth/smallstr) | - | - | ✓ | - | - | - | - | | +| [`smol_str`](https://github.com/rust-analyzer/smol_str) | - | - | ✓\* | - | - | ✓ | - | \*but only inline string, here for reference | + +skipping specialized string types like +[`tinystr`](https://github.com/unicode-org/icu4x) (ASCII-only, bounded), or +`bstr`, or `bytestring`, or... In short, `HipStr`, one string type to rule them all 😉 diff --git a/benches/main.rs b/benches/main.rs new file mode 100644 index 00000000..0ed90cfb --- /dev/null +++ b/benches/main.rs @@ -0,0 +1,76 @@ +use arcstr::ArcStr; +use divan::Bencher; +use ecow::EcoString; +use hipstr::{HipStr, UniqueHipStr}; +use kstring::KString; + +fn main() { + divan::main(); +} + +const S: &[u8] = &[42; 42]; +const S2: &str = unsafe { std::str::from_utf8_unchecked(S) }; + +#[divan::bench_group(sample_count = 10_000)] +mod from_slice { + use super::*; + + #[divan::bench(args = [0, 1, 16, 23, 32, 42])] + fn bench_hipstr_from_slice(n: usize) -> HipStr<'static> { + HipStr::from(&S2[0..n]) + } + + #[divan::bench(args = [0, 1, 16, 23, 32, 42])] + fn bench_unique_hipstr_from_slice(n: usize) -> UniqueHipStr<'static> { + UniqueHipStr::from(&S2[0..n]) + } + + #[divan::bench(args = [0, 1, 16, 23, 32, 42])] + fn bench_arcstr_from_slice(n: usize) -> ArcStr { + ArcStr::from(&S2[0..n]) + } + + #[divan::bench(args = [0, 1, 16, 23, 32, 42])] + fn bench_ecow_from_slice(n: usize) -> EcoString { + EcoString::from(&S2[0..n]) + } + + #[divan::bench(args = [0, 1, 16, 23, 32, 42])] + fn bench_kstring_from_slice(n: usize) -> KString { + KString::from_ref(&S2[0..n]) + } +} + +#[divan::bench_group(sample_count = 10_000)] +mod from_string { + use super::*; + + #[divan::bench(args = [0, 1, 16, 23, 32, 42])] + fn bench_hipstr_from_string(b: Bencher, n: usize) { + b.with_inputs(|| String::from(&S2[0..n])) + .bench_local_values(|s| HipStr::from(s)); + } + #[divan::bench(args = [0, 1, 16, 23, 32, 42])] + fn bench_uhipstr_from_string(b: Bencher, n: usize) { + b.with_inputs(|| String::from(&S2[0..n])) + .bench_local_values(|s| UniqueHipStr::from(s)); + } + + #[divan::bench(args = [0, 1, 16, 23, 32, 42])] + fn bench_arcstr_from_string(b: Bencher, n: usize) { + b.with_inputs(|| String::from(&S2[0..n])) + .bench_local_values(|s| ArcStr::from(s)); + } + + #[divan::bench(args = [0, 1, 16, 23, 32, 42])] + fn bench_ecow_from_string(b: Bencher, n: usize) { + b.with_inputs(|| String::from(&S2[0..n])) + .bench_local_values(|s| EcoString::from(s)); + } + + #[divan::bench(args = [0, 1, 16, 23, 32, 42])] + fn bench_kstring_from_string(b: Bencher, n: usize) { + b.with_inputs(|| String::from(&S2[0..n])) + .bench_local_values(|s| KString::from_string(s)); + } +} diff --git a/src/bytes.rs b/src/bytes.rs index 15440ed0..6359e919 100644 --- a/src/bytes.rs +++ b/src/bytes.rs @@ -1,22 +1,27 @@ //! Bytes. //! -//! This module provides the [`HipByt`] type as well as the associated helper and error types. +//! This module provides the [`HipByt`] type as well as the associated helper +//! and error types. use core::borrow::Borrow; use core::error::Error; use core::hash::Hash; -use core::mem::MaybeUninit; +use core::mem::{ManuallyDrop, MaybeUninit}; use core::ops::{Bound, Deref, DerefMut, Range, RangeBounds}; use core::ptr; -use super::raw::Raw; +use raw::borrowed::Borrowed; +use raw::{Inline, Split, SplitMut, Tag, Union}; + +use self::raw::try_range_of; +pub use self::raw::HipByt; use crate::alloc::fmt; use crate::alloc::vec::Vec; -use crate::raw::try_range_of; -use crate::{Arc, Backend}; +use crate::Backend; mod cmp; mod convert; +mod raw; #[cfg(feature = "serde")] pub mod serde; @@ -39,46 +44,6 @@ type Slice = ::bstr::BStr; #[cfg(not(feature = "bstr"))] type Slice = [u8]; -/// Smart bytes, i.e. cheaply clonable and sliceable byte string. -/// -/// # Examples -/// -/// You can create a `HipStr` from a [byte slice (&`[u8]`)][slice], an owned byte string -/// ([`Vec`], [`Box<[u8]>`][Box]), or a clone-on-write smart pointer -/// ([`Cow<[u8]>`][std::borrow::Cow]) with [`From`]: -/// -/// ``` -/// # use hipstr::HipByt; -/// let hello = HipByt::from(b"Hello".as_slice()); -/// ``` -/// -/// When possible, `HipStr::from` takes ownership of the underlying buffer: -/// -/// ``` -/// # use hipstr::HipByt; -/// let vec = Vec::from(b"World".as_slice()); -/// let world = HipByt::from(vec); -/// ``` -/// -/// To borrow a string slice, you can also use the no-copy constructor [`HipByt::borrowed`]: -/// -/// ``` -/// # use hipstr::HipByt; -/// let hello = HipByt::borrowed(b"Hello, world!"); -/// ``` -/// -/// # Representations -/// -/// `HipByt` has three possible internal representations: -/// -/// * borrow -/// * inline string -/// * shared heap allocated string -#[repr(transparent)] -pub struct HipByt<'borrow, B = Arc>(pub(crate) Raw<'borrow, B>) -where - B: Backend; - impl<'borrow, B> HipByt<'borrow, B> where B: Backend, @@ -103,7 +68,53 @@ where #[inline] #[must_use] pub const fn new() -> Self { - Self(Raw::empty()) + Self::inline_empty() + } + + /// Creates a new inline `HipByt` by copying the given slice. + /// The slice **must not** be too large to be inlined. + /// + /// # Panics + /// + /// It panics if the slice is too large. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// # use hipstr::HipByt; + /// let s = HipByt::inline(b"hello\0"); + /// assert_eq!(s, b"hello\0"); + /// ``` + #[must_use] + pub const fn inline(bytes: &[u8]) -> Self { + assert!(bytes.len() <= Self::inline_capacity(), "slice too large"); + + // SAFETY: length checked above + unsafe { Self::inline_unchecked(bytes) } + } + + /// Creates a new inline `HipByt` by copying the given the slice. + /// Return `None` if the given slice is too large to be inlined. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// # use hipstr::HipByt; + /// let s = HipByt::try_inline(b"hello\0").unwrap(); + /// assert_eq!(s, b"hello\0"); + /// ``` + #[must_use] + pub const fn try_inline(bytes: &[u8]) -> Option { + if bytes.len() <= Self::inline_capacity() { + // SAFETY: length checked above + Some(unsafe { Self::inline_unchecked(bytes) }) + } else { + None + } } /// Creates a new `HipByt` with the given capacity. @@ -126,8 +137,12 @@ where /// ``` #[inline] #[must_use] - pub fn with_capacity(cap: usize) -> Self { - Self(Raw::with_capacity(cap)) + pub fn with_capacity(capacity: usize) -> Self { + if capacity <= Self::inline_capacity() { + Self::inline_empty() + } else { + Self::from_vec(Vec::with_capacity(capacity)) + } } /// Creates a new `HipByt` from a byte slice. @@ -144,34 +159,55 @@ where /// assert_eq!(b.len(), 6); /// ``` #[must_use] + #[inline] pub const fn borrowed(bytes: &'borrow [u8]) -> Self { - Self(Raw::borrowed(bytes)) + Union { + borrowed: Borrowed::new(bytes), + } + .into_raw() } - /// Returns `true` if this `HipByt` uses the inline representation, `false` otherwise. + /// Returns the length of this `HipByt`. /// - /// # Examples + /// # Example /// /// Basic usage: /// /// ``` /// # use hipstr::HipByt; - /// let s = HipByt::borrowed(b"hello"); - /// assert!(!s.is_inline()); + /// let a = HipByt::borrowed(b"\xDE\xAD\xBE\xEF"); + /// assert_eq!(a.len(), 4); + /// ``` + #[inline] + #[must_use] + pub const fn len(&self) -> usize { + match self.split() { + Split::Inline(inline) => inline.len(), + Split::Allocated(heap) => heap.len(), + Split::Borrowed(borrowed) => borrowed.len(), + } + } + + /// Returns `true` if this `HipByt` has a length of zero, and `false` otherwise. /// - /// let s = HipByt::from(b"hello"); - /// assert!(s.is_inline()); /// - /// let s = HipByt::from(b"hello".repeat(10)); - /// assert!(!s.is_inline()); + /// Basic usage: + /// + /// ``` + /// # use hipstr::HipByt; + /// let a = HipByt::new(); + /// assert!(a.is_empty()); + /// + /// let b = HipByt::borrowed(b"ab"); + /// assert!(!b.is_empty()); /// ``` #[inline] #[must_use] - pub const fn is_inline(&self) -> bool { - self.0.is_inline() + pub const fn is_empty(&self) -> bool { + self.len() == 0 } - /// Returns `true` if this `HipByt` is a slice borrow, `false` otherwise. + /// Returns `true` if this `HipByt` uses the inline representation, `false` otherwise. /// /// # Examples /// @@ -180,21 +216,21 @@ where /// ``` /// # use hipstr::HipByt; /// let s = HipByt::borrowed(b"hello"); - /// assert!(s.is_borrowed()); + /// assert!(!s.is_inline()); /// /// let s = HipByt::from(b"hello"); - /// assert!(!s.is_borrowed()); + /// assert!(s.is_inline()); /// /// let s = HipByt::from(b"hello".repeat(10)); - /// assert!(!s.is_borrowed()); + /// assert!(!s.is_inline()); /// ``` #[inline] #[must_use] - pub const fn is_borrowed(&self) -> bool { - self.0.is_borrowed() + pub const fn is_inline(&self) -> bool { + matches!(self.tag(), Tag::Inline) } - /// Returns `true` if this `HipByt` is a shared heap-allocated byte sequence, `false` otherwise. + /// Returns `true` if this `HipByt` is a slice borrow, `false` otherwise. /// /// # Examples /// @@ -203,21 +239,22 @@ where /// ``` /// # use hipstr::HipByt; /// let s = HipByt::borrowed(b"hello"); - /// assert!(!s.is_allocated()); + /// assert!(s.is_borrowed()); /// /// let s = HipByt::from(b"hello"); - /// assert!(!s.is_allocated()); + /// assert!(!s.is_borrowed()); /// /// let s = HipByt::from(b"hello".repeat(10)); - /// assert!(s.is_allocated()); + /// assert!(!s.is_borrowed()); /// ``` #[inline] #[must_use] - pub const fn is_allocated(&self) -> bool { - self.0.is_allocated() + pub const fn is_borrowed(&self) -> bool { + matches!(self.tag(), Tag::Borrowed) } - /// Converts `self` into a borrowed slice if this `HipByt` is backed by a borrow. + /// Converts `self` into a borrowed slice if this `HipByt` is backed by a + /// borrow. /// /// # Errors /// @@ -235,82 +272,152 @@ where /// assert_eq!(c, Ok(SEQ)); /// assert!(std::ptr::eq(SEQ, c.unwrap())); /// ``` - #[inline] - pub fn into_borrowed(self) -> Result<&'borrow [u8], Self> { - self.0.into_borrowed().map_err(Self) + pub const fn into_borrowed(self) -> Result<&'borrow [u8], Self> { + match self.split() { + Split::Allocated(_) | Split::Inline(_) => Err(self), + Split::Borrowed(borrowed) => { + let result = borrowed.as_slice(); + core::mem::forget(self); // not needed + Ok(result) + } + } } - /// Returns the length of this `HipByt`. + /// Returns the borrowed slice if this `HipByt` is actually borrowed, `None` + /// otherwise. /// - /// # Example - /// - /// Basic usage: + /// # Examples /// /// ``` /// # use hipstr::HipByt; - /// let a = HipByt::borrowed(b"\xDE\xAD\xBE\xEF"); - /// assert_eq!(a.len(), 4); + /// static SEQ: &[u8] = &[1 ,2, 3]; + /// let s = HipByt::borrowed(SEQ); + /// let c: Option<&'static [u8]> = s.as_borrowed(); + /// assert_eq!(c, Some(SEQ)); + /// assert!(std::ptr::eq(SEQ, c.unwrap())); + /// + /// let s2 = HipByt::from(SEQ); + /// assert!(s2.as_borrowed().is_none()); /// ``` #[inline] #[must_use] - pub const fn len(&self) -> usize { - self.0.len() + pub const fn as_borrowed(&self) -> Option<&'borrow [u8]> { + match self.split() { + Split::Allocated(_) | Split::Inline(_) => None, + Split::Borrowed(borrowed) => Some(borrowed.as_slice()), + } } - /// Returns `true` if this `HipByt` has a length of zero, and `false` otherwise. + /// Returns `true` if this `HipByt` is a shared heap-allocated byte sequence, `false` otherwise. /// + /// # Examples /// /// Basic usage: /// /// ``` /// # use hipstr::HipByt; - /// let a = HipByt::new(); - /// assert!(a.is_empty()); + /// let s = HipByt::borrowed(b"hello"); + /// assert!(!s.is_allocated()); /// - /// let b = HipByt::borrowed(b"ab"); - /// assert!(!b.is_empty()); + /// let s = HipByt::from(b"hello"); + /// assert!(!s.is_allocated()); + /// + /// let s = HipByt::from(b"hello".repeat(10)); + /// assert!(s.is_allocated()); /// ``` #[inline] #[must_use] - pub const fn is_empty(&self) -> bool { - self.0.len() == 0 + pub const fn is_allocated(&self) -> bool { + matches!(self.tag(), Tag::Allocated) } - /// Extracts a slice of the entire `HipByt`. - /// - /// # Examples + /// Returns `true` if the representation is normalized. + #[inline] + #[must_use] + pub const fn is_normalized(&self) -> bool { + self.is_inline() || self.is_borrowed() || self.len() > Self::inline_capacity() + } + + /// Returns the maximal length for inline byte sequence. + #[inline] + #[must_use] + pub const fn inline_capacity() -> usize { + Inline::capacity() + } + + /// Returns the total number of bytes the backend can hold. /// - /// Basic usage: + /// # Example /// /// ``` /// # use hipstr::HipByt; - /// let s = HipByt::from(b"foobar"); + /// let mut vec: Vec = Vec::with_capacity(42); + /// vec.extend(0..30); + /// let bytes = HipByt::from(vec); + /// assert_eq!(bytes.len(), 30); + /// assert_eq!(bytes.capacity(), 42); /// - /// assert_eq!(b"foobar", s.as_slice()); + /// let start = bytes.slice(0..29); + /// assert_eq!(bytes.capacity(), 42); // same backend, same capacity /// ``` #[inline] #[must_use] - pub const fn as_slice(&self) -> &[u8] { - self.0.as_slice() + pub fn capacity(&self) -> usize { + match self.split() { + Split::Inline(_) => Self::inline_capacity(), + Split::Borrowed(borrowed) => borrowed.len(), // provide something to simplify the API + Split::Allocated(allocated) => allocated.capacity(), + } } - /// Extracts a mutable slice of the entire `HipByt` if possible. + /// Converts `self` into a [`Vec`] without clone or allocation if possible. /// - /// # Examples + /// # Errors /// - /// Basic usage: + /// Returns `Err(self)` if it is impossible to take ownership of the vector + /// backing this `HipByt`. + #[inline] + #[allow(clippy::option_if_let_else)] + pub fn into_vec(self) -> Result, Self> { + let mut this = ManuallyDrop::new(self); + if let Some(allocated) = this.take_allocated() { + allocated + .try_into_vec() + .map_err(|allocated| Union { allocated }.into_raw()) + } else { + Err(ManuallyDrop::into_inner(this)) + } + } + + /// Makes the data owned, copying it if the data is actually borrowed. + /// + /// Returns a new `HipByt` consuming this one. + /// + /// # Examples /// /// ``` /// # use hipstr::HipByt; - /// let mut s = HipByt::from(b"foo"); - /// let slice = s.as_mut_slice().unwrap(); - /// slice.copy_from_slice(b"bar"); - /// assert_eq!(b"bar", slice); + /// let v = vec![42; 42]; + /// let h = HipByt::borrowed(&v[..]); + /// // drop(v); // err, v is borrowed + /// let h = h.into_owned(); + /// drop(v); // ok + /// assert_eq!(h, [42; 42]); /// ``` #[inline] #[must_use] - pub fn as_mut_slice(&mut self) -> Option<&mut [u8]> { - self.0.as_mut_slice() + pub fn into_owned(self) -> HipByt<'static, B> { + let tag = self.tag(); + let old = self.union_move(); // self is not dropped! + + // SAFETY: tag representation + unsafe { + match tag { + Tag::Allocated => HipByt::from_allocated(old.allocated), + Tag::Borrowed => HipByt::from_slice(old.borrowed.as_slice()), + Tag::Inline => HipByt::from_inline(old.inline), + } + } } /// Extracts a mutable slice of the entire `HipByt` changing the @@ -331,9 +438,9 @@ where #[inline] #[doc(alias = "make_mut")] pub fn to_mut_slice(&mut self) -> &mut [u8] { - self.0.make_unique(); + self.make_unique(); // SAFETY: `make_unique` above ensures that it is uniquely owned - unsafe { self.0.as_mut_slice_unchecked() } + unsafe { self.as_mut_slice_unchecked() } } /// Extracts a slice as its own `HipByt`. @@ -380,8 +487,8 @@ where pub fn try_slice(&self, range: impl RangeBounds) -> Result> { let range = simplify_range(range, self.len()) .map_err(|(start, end, kind)| SliceError::new(kind, start, end, self))?; - let slice = unsafe { self.0.slice_unchecked(range) }; - Ok(Self(slice)) + let slice = unsafe { self.range_unchecked(range) }; + Ok(slice) } /// Extracts a slice as its own `HipByt`. @@ -403,7 +510,7 @@ where Bound::Included(&n) => n + 1, Bound::Unbounded => self.len(), }; - Self(unsafe { self.0.slice_unchecked(start..end) }) + unsafe { self.range_unchecked(start..end) } } /// Extracts a slice as its own `HipByt` based on the given subslice `&[u8]`. @@ -431,16 +538,6 @@ where result } - /// Extracts a slice as its own `HipByt` based on the given subslice `&[u8]`. - /// - /// # Safety - /// - /// The slice MUST be a part of this `HipByt` - #[must_use] - pub unsafe fn slice_ref_unchecked(&self, slice: &[u8]) -> Self { - Self(unsafe { self.0.slice_ref_unchecked(slice) }) - } - /// Returns a slice as it own `HipByt` based on the given subslice `&[u8]`. /// /// # Errors @@ -461,48 +558,8 @@ where #[must_use] pub fn try_slice_ref(&self, range: &[u8]) -> Option { let slice = range; - let range = try_range_of(self.0.as_slice(), slice)?; - let raw = unsafe { self.0.slice_unchecked(range) }; - Some(Self(raw)) - } - - /// Returns the maximal length for inline byte sequence. - #[inline] - #[must_use] - pub const fn inline_capacity() -> usize { - Raw::::inline_capacity() - } - - /// Returns the total number of bytes the backend can hold. - /// - /// # Example - /// - /// ``` - /// # use hipstr::HipByt; - /// let mut vec: Vec = Vec::with_capacity(42); - /// vec.extend(0..30); - /// let bytes = HipByt::from(vec); - /// assert_eq!(bytes.len(), 30); - /// assert_eq!(bytes.capacity(), 42); - /// - /// let start = bytes.slice(0..29); - /// assert_eq!(bytes.capacity(), 42); // same backend, same capacity - /// ``` - #[inline] - #[must_use] - pub fn capacity(&self) -> usize { - self.0.capacity() - } - - /// Converts `self` into a [`Vec`] without clone or allocation if possible. - /// - /// # Errors - /// - /// Returns `Err(self)` if it is impossible to take ownership of the vector - /// backing this `HipByt`. - #[inline] - pub fn into_vec(self) -> Result, Self> { - self.0.into_vec().map_err(Self) + let range = try_range_of(self.as_slice(), slice)?; + Some(unsafe { self.slice_unchecked(range) }) } /// Returns a mutable handle to the underlying [`Vec`]. @@ -533,7 +590,7 @@ where #[inline] #[must_use] pub fn mutate(&mut self) -> RefMut<'_, 'borrow, B> { - let owned = self.0.take_vec(); + let owned = self.take_vec(); #[cfg(feature = "bstr")] let owned = owned.into(); @@ -544,25 +601,6 @@ where } } - /// Shortens this `HipByt` to the specified length. - /// - /// If the new length is greater than the current length, this has no effect. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// # use hipstr::HipByt; - /// let mut a = HipByt::from(b"abc"); - /// a.truncate(1); - /// assert_eq!(a, b"a"); - /// ``` - #[inline] - pub fn truncate(&mut self, new_len: usize) { - self.0.truncate(new_len); - } - /// Truncates this `HipByt`, removing all contents. /// /// # Examples @@ -578,7 +616,7 @@ where /// ``` #[inline] pub fn clear(&mut self) { - self.0.truncate(0); + self.truncate(0); } /// Removes the last element from this `HipByt` and returns it, or [`None`] @@ -604,82 +642,196 @@ where } } - /// Appends all bytes of the slice to this `HipByt`. + /// Appends a byte to this `HipByt`. /// /// # Examples /// /// ``` /// # use hipstr::HipByt; /// let mut bytes = HipByt::from(b"abc"); - /// bytes.push_slice(b"123"); + /// bytes.push(b'1'); + /// bytes.push(b'2'); + /// bytes.push(b'3'); /// assert_eq!(bytes, b"abc123"); /// ``` #[inline] - #[doc(alias = "extend_from_slice")] - pub fn push_slice(&mut self, addition: &[u8]) { - self.0.push_slice(addition); + pub fn push(&mut self, value: u8) { + self.push_slice(&[value]); } - /// Appends a byte to this `HipByt`. + /// Appends all bytes of the slice to this `HipByt`. /// /// # Examples /// /// ``` /// # use hipstr::HipByt; /// let mut bytes = HipByt::from(b"abc"); - /// bytes.push(b'1'); - /// bytes.push(b'2'); - /// bytes.push(b'3'); + /// bytes.push_slice(b"123"); /// assert_eq!(bytes, b"abc123"); /// ``` #[inline] - pub fn push(&mut self, value: u8) { - self.0.push_slice(&[value]); + #[doc(alias = "extend_from_slice", alias = "append")] + pub fn push_slice(&mut self, addition: &[u8]) { + let new_len = self.len() + addition.len(); + + if self.is_allocated() { + // current allocation may be pushed into it directly? + + // SAFETY: repr checked above + let allocated = unsafe { &mut self.union_mut().allocated }; + + if allocated.is_unique() { + // SAFETY: uniqueness is checked above + unsafe { + allocated.push_slice_unchecked(addition); + } + return; + } + } + + if new_len <= Self::inline_capacity() { + if !self.is_inline() { + // make it inline first + // SAFETY: `new_len` is checked before, so current len <= INLINE_CAPACITY + *self = unsafe { Self::inline_unchecked(self.as_slice()) }; + } + + // SAFETY: `new_len` is checked above + unsafe { + self.union_mut().inline.push_slice_unchecked(addition); + } + return; + } + + // requires a new vector + let mut vec = Vec::with_capacity(new_len); + vec.extend_from_slice(self.as_slice()); + vec.extend_from_slice(addition); + + // SAFETY: vec's len (new_len) is checked above to be > INLINE_CAPACITY + *self = Self::from_vec(vec); } - /// Makes the data owned, copying it if the data is actually borrowed. + /// Creates a new `HipByt` by copying this one `n` times. /// - /// Returns a new `HipByt` consuming this one. + /// This function **will not allocate** if the new length is less than or + /// equal to the maximum inline capacity. + /// + /// # Panics + /// + /// This function will panic if the capacity would overflow. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// # use hipstr::HipByt; - /// let v = vec![42; 42]; - /// let h = HipByt::borrowed(&v[..]); - /// // drop(v); // err, v is borrowed - /// let h = h.into_owned(); - /// drop(v); // ok - /// assert_eq!(h, [42; 42]); + /// assert_eq!(HipByt::from(&[1, 2]).repeat(3), HipByt::from(&[1, 2, 1, 2, 1, 2])); + /// ``` + /// + /// A panic upon overflow: + /// + /// ```should_panic + /// // this will panic at runtime + /// # use hipstr::HipByt; + /// HipByt::from(b"0123456789abcdef").repeat(usize::MAX); /// ``` #[must_use] - pub fn into_owned(self) -> HipByt<'static, B> { - HipByt(self.0.into_owned()) + pub fn repeat(&self, n: usize) -> Self { + if self.is_empty() || n == 1 { + return self.clone(); + } + + let src_len = self.len(); + let new_len = src_len.checked_mul(n).expect("capacity overflow"); + if new_len <= Self::inline_capacity() { + let mut inline = Inline::zeroed(new_len); + let src = self.as_slice().as_ptr(); + let mut dst = inline.as_mut_slice().as_mut_ptr(); + + // SAFETY: copy only `new_len` bytes with an + // upper bound of `INLINE_CAPACITY` checked above + unsafe { + // could be better from an algorithmic standpoint + // but no expected gain for at most 23 bytes on 64 bit platform + for _ in 0..n { + ptr::copy_nonoverlapping(src, dst, src_len); + dst = dst.add(src_len); + } + } + + Self::from_inline(inline) + } else { + let vec = self.as_slice().repeat(n); + Self::from_vec(vec) + } } - /// Shrinks the capacity of the vector as much as possible. + /// Returns the remaining spare capacity of the vector as a slice of + /// `MaybeUninit`. /// - /// The capacity will remain at least as large as the actual length of the - /// vector. + /// The returned slice can be used to fill the vector with data (e.g. by + /// reading from a file) before marking the data as initialized using the + /// [`set_len`] method. /// - /// No-op if the representation is not allocated. + /// [`set_len`]: HipByt::set_len + #[inline] + pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit] { + match self.split_mut() { + SplitMut::Borrowed(_) => &mut [], + SplitMut::Inline(inline) => inline.spare_capacity_mut(), + SplitMut::Allocated(allocated) => allocated.spare_capacity_mut(), + } + } + + /// Forces the length of the vector to `new_len`. /// - /// # Representation stability + /// Does not normalize! /// - /// The allocated representation may change to *inline* if the required - /// capacity is smaller thant the inline capacity. + /// # Safety + /// + /// * If the repr is inline, `new_len` should be must be less than or equal to `INLINE_CAPACITY`. + /// * If `new_len` is greater than the current length: + /// * The elements at `old_len..new_len` must be initialized. + /// * The vector should not be shared. + pub unsafe fn set_len(&mut self, new_len: usize) { + match self.split_mut() { + SplitMut::Borrowed(borrowed) => unsafe { + borrowed.set_len(new_len); + }, + SplitMut::Inline(inline) => unsafe { inline.set_len(new_len) }, + SplitMut::Allocated(allocated) => unsafe { allocated.set_len(new_len) }, + } + } + + /// Shortens this `HipByt` to the specified length. + /// + /// If the new length is greater than the current length, this has no effect. /// /// # Examples /// - /// ```rust + /// Basic usage: + /// + /// ``` /// # use hipstr::HipByt; - /// let mut s = HipByt::with_capacity(100); - /// s.push_slice(b"abc"); - /// s.shrink_to_fit(); - /// assert_eq!(s.capacity(), HipByt::inline_capacity()); + /// let mut a = HipByt::from(b"abc"); + /// a.truncate(1); + /// assert_eq!(a, b"a"); /// ``` - pub fn shrink_to_fit(&mut self) { - self.0.shrink_to(self.len()); + #[inline] + pub fn truncate(&mut self, new_len: usize) { + if new_len < self.len() { + if self.is_allocated() && new_len <= Self::inline_capacity() { + let new = + unsafe { Self::inline_unchecked(self.as_slice().get_unchecked(..new_len)) }; + *self = new; + } else { + // SAFETY: `new_len` is checked above + unsafe { self.set_len(new_len) } + } + } + debug_assert!(self.is_normalized()); } /// Shrinks the capacity of the vector with a lower bound. @@ -701,19 +853,45 @@ where /// let mut s = HipByt::with_capacity(100); /// s.shrink_to(4); /// assert_eq!(s.capacity(), HipByt::inline_capacity()); + /// assert!(s.is_inline()); /// ``` pub fn shrink_to(&mut self, min_capacity: usize) { - self.0.shrink_to(min_capacity); - } - - pub(crate) fn take_vec(&mut self) -> Vec { - self.0.take_vec() + if self.is_allocated() { + let min_capacity = min_capacity.max(self.len()); + + if min_capacity > Self::inline_capacity() { + let allocated = unsafe { &mut self.union_mut().allocated }; + allocated.shrink_to(min_capacity); + } else { + let new = unsafe { Self::inline_unchecked(self.as_slice()) }; + *self = new; + } + } } - #[cfg(test)] - #[inline] - pub(crate) const fn is_normalized(&self) -> bool { - self.0.is_normalized() + /// Shrinks the capacity of the vector as much as possible. + /// + /// The capacity will remain at least as large as the actual length of the + /// vector. + /// + /// No-op if the representation is not allocated. + /// + /// # Representation stability + /// + /// The allocated representation may change to *inline* if the required + /// capacity is smaller thant the inline capacity. + /// + /// # Examples + /// + /// ```rust + /// # use hipstr::HipByt; + /// let mut s = HipByt::with_capacity(100); + /// s.push_slice(b"abc"); + /// s.shrink_to_fit(); + /// assert_eq!(s.capacity(), HipByt::inline_capacity()); + /// ``` + pub fn shrink_to_fit(&mut self) { + self.shrink_to(self.len()); } /// Returns a new `HipByt` containing a copy of this slice where each byte @@ -814,62 +992,6 @@ where self.to_mut_slice().make_ascii_uppercase(); } - /// Creates a new `HipByt` by copying this one `n` times. - /// - /// This function **will not allocate** if the new length is less than or - /// equal to the maximum inline capacity. - /// - /// # Panics - /// - /// This function will panic if the capacity would overflow. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// # use hipstr::HipByt; - /// assert_eq!(HipByt::from(&[1, 2]).repeat(3), HipByt::from(&[1, 2, 1, 2, 1, 2])); - /// ``` - /// - /// A panic upon overflow: - /// - /// ```should_panic - /// // this will panic at runtime - /// # use hipstr::HipByt; - /// HipByt::from(b"0123456789abcdef").repeat(usize::MAX); - /// ``` - #[inline] - #[must_use] - pub fn repeat(&self, n: usize) -> Self { - Self(self.0.repeat(n)) - } - - /// Returns the remaining spare capacity of the vector as a slice of - /// `MaybeUninit`. - /// - /// The returned slice can be used to fill the vector with data (e.g. by - /// reading from a file) before marking the data as initialized using the - /// [`set_len`] method. - /// - /// [`set_len`]: HipByt::set_len - pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit] { - self.0.spare_capacity_mut() - } - - /// Forces the length of the vector to `new_len`. - /// - /// # Safety - /// - /// * If inline, `new_len` should be must be less than or equal to `INLINE_CAPACITY`. - /// * If borrowed, `new_len` must be less than or equal to the current length. - /// * If allocated and `new_len` is greater than the current length, the vector should not be shared. - /// * The elements at `old_len..new_len` must be initialized. - pub unsafe fn set_len(&mut self, new_len: usize) { - // SAFETY: precondition - unsafe { self.0.set_len(new_len) } - } - /// Concatenates some byte slices into a single `HipByt`. /// /// The related constructor [`HipByt::concat`] is more general but may be @@ -890,8 +1012,8 @@ where return Self::new(); } - let mut raw = Raw::with_capacity(new_len); - let dst = raw.spare_capacity_mut(); + let mut new = Self::with_capacity(new_len); + let dst = new.spare_capacity_mut(); let dst_ptr = dst.as_mut_ptr().cast(); let final_ptr = slices.iter().fold(dst_ptr, |dst_ptr, slice| { let len = slice.len(); @@ -910,12 +1032,12 @@ where new_len ); - unsafe { raw.set_len(new_len) }; + unsafe { new.set_len(new_len) }; // check end pointer - debug_assert_eq!(final_ptr.cast_const(), raw.as_slice().as_ptr_range().end); + debug_assert_eq!(final_ptr.cast_const(), new.as_slice().as_ptr_range().end); - Self(raw) + new } /// Concatenates some byte slices (or things than can be seen as byte slice) into a new `HipByt`. @@ -957,8 +1079,8 @@ where return Self::new(); } - let mut raw = Raw::with_capacity(new_len); - let dst = raw.spare_capacity_mut(); + let mut new = Self::with_capacity(new_len); + let dst = new.spare_capacity_mut(); let dst_ptr: *mut u8 = dst.as_mut_ptr().cast(); // compute the final pointer @@ -975,10 +1097,10 @@ where } }); - unsafe { raw.set_len(new_len) }; - debug_assert_eq!(final_ptr.cast_const(), raw.as_slice().as_ptr_range().end); + unsafe { new.set_len(new_len) }; + debug_assert_eq!(final_ptr.cast_const(), new.as_slice().as_ptr_range().end); - Self(raw) + new } /// Joins some byte slices with the given separator into a new `HipByt`, i.e. @@ -1014,8 +1136,8 @@ where return Self::new(); } - let mut raw = Raw::with_capacity(new_len); - let dst = raw.spare_capacity_mut(); + let mut new = Self::with_capacity(new_len); + let dst = new.spare_capacity_mut(); let dst_ptr: *mut u8 = dst.as_mut_ptr().cast(); // compute the final pointer @@ -1053,10 +1175,10 @@ where end_ptr }); - unsafe { raw.set_len(new_len) }; - debug_assert_eq!(final_ptr.cast_const(), raw.as_slice().as_ptr_range().end); + unsafe { new.set_len(new_len) }; + debug_assert_eq!(final_ptr.cast_const(), new.as_slice().as_ptr_range().end); - Self(raw) + new } /// Joins some byte slices (or things than can be seen as byte slice) with @@ -1108,8 +1230,8 @@ where let sep_len = sep.len(); let new_len = (segments - 1) * sep_len + segments_len; - let mut raw = Raw::with_capacity(new_len); - let dst = raw.spare_capacity_mut(); + let mut new = Self::with_capacity(new_len); + let dst = new.spare_capacity_mut(); let dst_ptr: *mut u8 = dst.as_mut_ptr().cast(); // computes the final pointer @@ -1145,10 +1267,10 @@ where }); } - unsafe { raw.set_len(new_len) }; - debug_assert_eq!(final_ptr.cast_const(), raw.as_slice().as_ptr_range().end); + unsafe { new.set_len(new_len) }; + debug_assert_eq!(final_ptr.cast_const(), new.as_slice().as_ptr_range().end); - Self(raw) + new } } @@ -1176,16 +1298,6 @@ where } } -impl Clone for HipByt<'_, B> -where - B: Backend, -{ - #[inline] - fn clone(&self) -> Self { - Self(self.0.clone()) - } -} - impl Default for HipByt<'_, B> where B: Backend, diff --git a/src/bytes/cmp.rs b/src/bytes/cmp.rs index c6d0c223..d4af452b 100644 --- a/src/bytes/cmp.rs +++ b/src/bytes/cmp.rs @@ -18,7 +18,7 @@ where { #[inline] fn eq(&self, other: &HipByt<'b1, B1>) -> bool { - self.0.eq(&other.0) + self.inherent_eq(other) } } diff --git a/src/bytes/convert.rs b/src/bytes/convert.rs index ee455f3d..38b00c90 100644 --- a/src/bytes/convert.rs +++ b/src/bytes/convert.rs @@ -4,7 +4,6 @@ use super::HipByt; use crate::alloc::borrow::Cow; use crate::alloc::boxed::Box; use crate::alloc::vec::Vec; -use crate::raw::Raw; use crate::Backend; impl AsRef<[u8]> for HipByt<'_, B> @@ -25,7 +24,7 @@ where { #[inline] fn from(value: &[u8]) -> Self { - Self(Raw::from_slice(value)) + Self::from_slice(value) } } @@ -35,7 +34,7 @@ where { #[inline] fn from(value: &[u8; N]) -> Self { - Self(Raw::from_slice(value)) + Self::from_slice(value) } } @@ -45,7 +44,7 @@ where { #[inline] fn from(value: Box<[u8]>) -> Self { - Self(Raw::normalized_from_vec(value.into_vec())) + Self::normalized_from_vec(value.into_vec()) } } @@ -55,7 +54,7 @@ where { #[inline] fn from(value: Vec) -> Self { - Self(Raw::normalized_from_vec(value)) + Self::normalized_from_vec(value) } } diff --git a/src/bytes/raw.rs b/src/bytes/raw.rs new file mode 100644 index 00000000..e6544b0f --- /dev/null +++ b/src/bytes/raw.rs @@ -0,0 +1,669 @@ +//! Raw representations of [`HipByt`]. +//! +//! Provides only the core features for the sequence of bytes. + +use core::hint::unreachable_unchecked; +use core::marker::PhantomData; +use core::mem::{align_of, forget, replace, size_of, transmute, ManuallyDrop, MaybeUninit}; +use core::num::NonZeroU8; +use core::ops::Range; + +use allocated::Allocated; +use borrowed::Borrowed; + +use crate::alloc::vec::Vec; +use crate::Backend; + +pub mod allocated; +pub mod borrowed; +pub mod inline; +#[cfg(test)] +mod tests; + +/// Width (in bits) of the tag +const TAG_BITS: u8 = 2; + +/// Mask to extract the tag bits +const MASK: u8 = (1 << TAG_BITS) - 1; + +/// Tag for the inline repr +const TAG_INLINE: u8 = 1; + +/// Tag for the borrowed repr +const TAG_BORROWED: u8 = 2; + +/// Tag for the allocated repr +const TAG_ALLOCATED: u8 = 3; + +/// Maximal byte capacity of an inline [`HipByt`]. +const INLINE_CAPACITY: usize = size_of::() - 1; + +/// Size of word minus a tagged byte. +const WORD_SIZE_M1: usize = size_of::() - 1; + +/// Alias type for `Inline` with set inline capacity +pub type Inline = inline::Inline; + +/// Smart bytes, i.e. cheaply clonable and sliceable byte string. +/// +/// # Examples +/// +/// You can create a `HipStr` from a [byte slice (&`[u8]`)][slice], an owned byte string +/// ([`Vec`], [`Box<[u8]>`][Box]), or a clone-on-write smart pointer +/// ([`Cow<[u8]>`][std::borrow::Cow]) with [`From`]: +/// +/// ``` +/// # use hipstr::HipByt; +/// let hello = HipByt::from(b"Hello".as_slice()); +/// ``` +/// +/// When possible, `HipStr::from` takes ownership of the underlying buffer: +/// +/// ``` +/// # use hipstr::HipByt; +/// let vec = Vec::from(b"World".as_slice()); +/// let world = HipByt::from(vec); +/// ``` +/// +/// To borrow a string slice, you can also use the no-copy constructor [`HipByt::borrowed`]: +/// +/// ``` +/// # use hipstr::HipByt; +/// let hello = HipByt::borrowed(b"Hello, world!"); +/// ``` +/// +/// # Representations +/// +/// `HipByt` has three possible internal representations: +/// +/// * borrow +/// * inline string +/// * shared heap allocated string +#[repr(C)] +pub struct HipByt<'borrow, B: Backend> { + pivot: Pivot, + _marker: PhantomData<&'borrow B>, +} + +#[derive(Clone, Copy)] +#[repr(C)] +pub(super) struct Pivot { + #[cfg(target_endian = "little")] + tag_byte: NonZeroU8, + #[cfg(target_endian = "little")] + _word_remainder: MaybeUninit<[u8; WORD_SIZE_M1]>, + #[cfg(target_endian = "little")] + _word1: MaybeUninit<*mut ()>, + + _word2: MaybeUninit<*mut ()>, + + #[cfg(target_endian = "big")] + _word1: MaybeUninit<*mut ()>, + #[cfg(target_endian = "big")] + _word_remainder: MaybeUninit<[u8; WORD_SIZE_M1]>, + #[cfg(target_endian = "big")] + tag_byte: NonZeroU8, +} + +unsafe impl Sync for HipByt<'_, B> {} +unsafe impl Send for HipByt<'_, B> {} + +/// Equivalent union representation. +/// +/// NOTE: Cannot be used directly to keep the niche for `Option>` +#[repr(C)] +pub union Union<'borrow, B: Backend> { + /// Inline representation + pub inline: Inline, + + /// Allocated and shared representation + pub allocated: Allocated, + + /// Borrowed slice representation + pub borrowed: Borrowed<'borrow>, + + /// Pivot representation with niche + pivot: Pivot, +} + +impl<'borrow, B: Backend> Union<'borrow, B> { + const ASSERTS: () = { + assert!(size_of::() == size_of::>()); + assert!(align_of::() == align_of::>()); + }; + + #[inline] + pub const fn into_raw(self) -> HipByt<'borrow, B> { + // statically checks the layout + let () = Self::ASSERTS; + + // SAFETY: same layout and same niche hopefully + let pivot = unsafe { self.pivot }; + HipByt { + pivot, + _marker: PhantomData, + } + } +} + +/// Repr tag. +/// +/// Cannot be used directly to keep the niche. +#[repr(u8)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Tag { + Inline = TAG_INLINE, + Borrowed = TAG_BORROWED, + Allocated = TAG_ALLOCATED, +} + +/// Helper enum to split this raw byte string into its possible representation. +pub enum Split<'a, 'borrow, B: Backend> { + /// Inline representation + Inline(&'a Inline), + /// Allocated and shared representation + Allocated(&'a Allocated), + /// Borrowed slice representation + Borrowed(&'a Borrowed<'borrow>), +} + +/// Helper enum to split this raw byte string into its possible representation mutably. +pub enum SplitMut<'a, 'borrow, B: Backend> { + /// Inline representation + Inline(&'a mut Inline), + /// Allocated and shared representation + Allocated(&'a mut Allocated), + /// Borrowed slice representation + Borrowed(&'a mut Borrowed<'borrow>), +} + +impl<'borrow, B: Backend> HipByt<'borrow, B> { + /// Retrieves a reference on the union. + #[inline] + pub(super) const fn union(&self) -> &Union<'borrow, B> { + let raw_ptr: *const _ = &self.pivot; + let union_ptr: *const Union<'borrow, B> = raw_ptr.cast(); + // SAFETY: same layout and same niche hopefully, same immutability + unsafe { &*union_ptr } + } + + /// Retrieves a mutable reference on the union. + #[inline] + pub(super) fn union_mut(&mut self) -> &mut Union<'borrow, B> { + let raw_ptr: *mut _ = &mut self.pivot; + let union_ptr: *mut Union<'borrow, B> = raw_ptr.cast(); + // SAFETY: same layout and same niche hopefully, same mutability + unsafe { &mut *union_ptr } + } + + /// Extracts the union without dropping the `HipByt`. + pub(super) fn union_move(self) -> Union<'borrow, B> { + // Do not drop free! + let this = ManuallyDrop::new(self); + Union { pivot: this.pivot } + } + + // basic constructors + + /// Creates a new `HipByt` from an allocated internal representation. + /// + /// To be normalized, the allocated length should be strictly greater than + /// `INLINE_CAPACITY`. + #[inline] + pub(super) const fn from_allocated(allocated: Allocated) -> Self { + Union { allocated }.into_raw() + } + + /// Creates a new `HipByt` from an inline representation. + #[inline] + pub(super) const fn from_inline(inline: Inline) -> Self { + Union { inline }.into_raw() + } + + /// Creates a new `HipByt` from a borrowed representation. + #[inline] + pub(super) const fn from_borrowed(borrowed: Borrowed<'borrow>) -> Self { + Union { borrowed }.into_raw() + } + + /// Retrieves the tag. + pub(super) const fn tag(&self) -> Tag { + match self.pivot.tag_byte.get() & MASK { + TAG_INLINE => Tag::Inline, + TAG_BORROWED => Tag::Borrowed, + TAG_ALLOCATED => Tag::Allocated, + // SAFETY: type invariant + _ => unsafe { unreachable_unchecked() }, + } + } + + /// Splits this raw into its possible representation. + #[inline] + pub(super) const fn split(&self) -> Split<'_, 'borrow, B> { + let tag = self.tag(); + let union = self.union(); + match tag { + Tag::Inline => { + // SAFETY: representation checked + Split::Inline(unsafe { &union.inline }) + } + Tag::Borrowed => { + // SAFETY: representation checked + Split::Borrowed(unsafe { &union.borrowed }) + } + Tag::Allocated => { + // SAFETY: representation checked + Split::Allocated(unsafe { &union.allocated }) + } + } + } + + /// Splits this raw into its possible representation. + #[inline] + pub(super) fn split_mut(&mut self) -> SplitMut<'_, 'borrow, B> { + let tag = self.tag(); + let union = self.union_mut(); + match tag { + Tag::Inline => { + // SAFETY: representation checked + SplitMut::Inline(unsafe { &mut union.inline }) + } + Tag::Borrowed => { + // SAFETY: representation checked + SplitMut::Borrowed(unsafe { &mut union.borrowed }) + } + Tag::Allocated => { + // SAFETY: representation checked + SplitMut::Allocated(unsafe { &mut union.allocated }) + } + } + } + + /// Creates a new `HipByt` from a vector. + pub(super) fn from_vec(vec: Vec) -> Self { + let allocated = Allocated::new(vec); + Self::from_allocated(allocated) + } + + /// Creates a new empty inline `HipByt`. + #[inline] + pub(super) const fn inline_empty() -> Self { + const { Self::from_inline(Inline::empty()) } + } + + /// Creates a new `HipByt` from a short slice. + /// + /// # Safety + /// + /// The input slice's length MUST be at most `INLINE_CAPACITY`. + pub(super) const unsafe fn inline_unchecked(bytes: &[u8]) -> Self { + // SAFETY: see function precondition + let inline = unsafe { Inline::new_unchecked(bytes) }; + Self::from_inline(inline) + } + + // derived constructors + + /// Creates a new `HipByt` from a vector. + /// + /// Will normalize the representation depending on the size of the vector. + #[inline] + pub(crate) fn normalized_from_vec(vec: Vec) -> Self { + let len = vec.len(); + if len <= INLINE_CAPACITY { + // SAFETY: length checked above + unsafe { Self::inline_unchecked(&vec) } + } else { + Self::from_vec(vec) + } + } + + /// Creates a new `HipByt` from a slice. + /// + /// Will normalize the representation depending on the size of the slice. + pub(crate) fn from_slice(bytes: &[u8]) -> Self { + let len = bytes.len(); + if len == 0 { + Self::inline_empty() + } else if len <= INLINE_CAPACITY { + // SAFETY: length checked above + unsafe { Self::inline_unchecked(bytes) } + } else { + Self::from_allocated(Allocated::from_slice(bytes)) + } + } + + /// Extracts a slice of the entire `HipByt`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// # use hipstr::HipByt; + /// let s = HipByt::from(b"foobar"); + /// + /// assert_eq!(b"foobar", s.as_slice()); + /// ``` + #[inline] + #[must_use] + pub const fn as_slice(&self) -> &[u8] { + match self.split() { + Split::Inline(inline) => inline.as_slice(), + Split::Allocated(heap) => heap.as_slice(), + Split::Borrowed(borrowed) => borrowed.as_slice(), + } + } + + /// Returns a pointer to the start of the raw byte string. + #[inline] + #[must_use] + pub const fn as_ptr(&self) -> *const u8 { + match self.split() { + Split::Inline(inline) => inline.as_ptr(), + Split::Allocated(heap) => heap.as_ptr(), + Split::Borrowed(borrowed) => borrowed.as_ptr(), + } + } + + /// Slices the raw byte string. + /// + /// # Safety + /// + /// `range` must be a range `a..b` with `a <= b <= len`. + /// + /// Panics in debug build, UB in release. + #[inline] + pub(super) unsafe fn range_unchecked(&self, range: Range) -> Self { + debug_assert!(range.start <= range.end); + debug_assert!(range.end <= self.len()); + + let result = match self.split() { + Split::Inline(inline) => { + // SAFETY: by `slice_unchecked` safety precondition and `split` + // range must be of a length <= self.len() <= `INLINE_CAPACITY` + unsafe { Self::inline_unchecked(&inline.as_slice()[range]) } + } + Split::Borrowed(borrowed) => Self::borrowed(&borrowed.as_slice()[range]), + Split::Allocated(allocated) => { + // normalize to inline if possible + if range.len() <= INLINE_CAPACITY { + // SAFETY: length is checked above + unsafe { Self::inline_unchecked(&allocated.as_slice()[range]) } + } else { + // SAFETY: length is checked above + unsafe { + let allocated = allocated.slice_unchecked(range); + Self::from_allocated(allocated) + } + } + } + }; + + debug_assert!(self.is_normalized()); + result + } + + /// Extracts a slice as its own `HipByt` based on the given subslice `&[u8]`. + /// + /// # Safety + /// + /// The slice MUST be a part of this `HipByt` + /// + /// # Panics + /// + /// When in debug build, panics if the slice is not a part of this `HipByt`. + #[must_use] + pub unsafe fn slice_ref_unchecked(&self, slice: &[u8]) -> Self { + #[cfg(debug_assertions)] + { + let range = self.as_slice().as_ptr_range(); + let slice_range = slice.as_ptr_range(); + assert!(range.contains(&slice_range.start) || range.end == slice_range.start); + assert!(range.contains(&slice_range.end) || range.end == slice_range.end); + } + + let result = match self.split() { + Split::Inline(_) => { + // SAFETY: by the function precondition and the test above + // slice.len() <= self.len() <= INLINE_CAPACITY + unsafe { Self::inline_unchecked(slice) } + } + Split::Borrowed(_) => { + // SAFETY: by the function precondition and the type invariant + // slice must have at least the same dynamic lifetime + let sl: &'borrow [u8] = unsafe { transmute(slice) }; + Self::borrowed(sl) + } + Split::Allocated(allocated) => { + // normalize to inline if possible + if slice.len() <= INLINE_CAPACITY { + // SAFETY: length checked above + unsafe { Self::inline_unchecked(slice) } + } else { + // SAFETY: by the function precondition + let range = unsafe { range_of_unchecked(self.as_slice(), slice) }; + // SAFETY: length checked above + unsafe { + let allocated = allocated.slice_unchecked(range); + Self::from_allocated(allocated) + } + } + } + }; + + debug_assert!(self.is_normalized()); + result + } + + /// Extracts a mutable slice of the entire `HipByt` if possible. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// # use hipstr::HipByt; + /// let mut s = HipByt::from(b"foo"); + /// let slice = s.as_mut_slice().unwrap(); + /// slice.copy_from_slice(b"bar"); + /// assert_eq!(b"bar", slice); + /// ``` + #[inline] + #[must_use] + pub fn as_mut_slice(&mut self) -> Option<&mut [u8]> { + match self.split_mut() { + SplitMut::Inline(inline) => Some(inline.as_mut_slice()), + SplitMut::Allocated(allocated) => allocated.as_mut_slice(), + SplitMut::Borrowed(_) => None, + } + } + + /// Returns a mutable slice of the underlying string. + /// + /// # Safety + /// + /// This `HipByt` should not be shared or borrowed. + #[inline] + pub(super) unsafe fn as_mut_slice_unchecked(&mut self) -> &mut [u8] { + match self.split_mut() { + SplitMut::Inline(inline) => inline.as_mut_slice(), + SplitMut::Allocated(allocated) => unsafe { allocated.as_mut_slice_unchecked() }, + SplitMut::Borrowed(_) => { + #[cfg(debug_assertions)] + { + panic!("mutable slice of borrowed string"); + } + #[cfg(not(debug_assertions))] + { + unsafe { unreachable_unchecked() } + } + } + } + } + + /// Takes a vector representation of this raw byte string. + /// + /// Will only allocate if needed. + #[inline] + pub(crate) fn take_vec(&mut self) -> Vec { + if self.is_allocated() { + // SAFETY: representation is checked, copy without ownership + let allocated = unsafe { self.union_mut().allocated }; + if let Ok(owned) = allocated.try_into_vec() { + // SAFETY: ownership is taken, replace with empty + // and forget old value (otherwise double drop!!) + forget(replace(self, Self::new())); + return owned; + } + } + let owned = Vec::from(self.as_slice()); + *self = Self::new(); + owned + } + + /// Takes the allocated representation if any, replacing it with an empty + /// byte string. + /// + /// # Errors + /// + /// Returns `None` if this byte string is not allocated. + #[inline] + pub(super) fn take_allocated(&mut self) -> Option> { + match self.split() { + Split::Allocated(&allocated) => { + // Takes a copy of allocated + + // replace `self` one by an empty raw + // forget the old value, we have `allocated` as a valid handle + forget(replace(self, Self::new())); + + Some(allocated) + } + _ => None, + } + } + + /// Makes the underlying data uniquely owned, copying if needed. + #[inline] + pub(super) fn make_unique(&mut self) { + let tag = self.tag(); + match tag { + Tag::Inline => {} + Tag::Borrowed => { + let old = replace(self, Self::new()).union_move(); + + // SAFETY: representation is checked above + let borrowed = unsafe { old.borrowed }; + + *self = Self::from_slice(borrowed.as_slice()); + } + Tag::Allocated => { + // SAFETY: representation checked above + if unsafe { self.union().allocated }.is_unique() { + return; + } + + let old = replace(self, Self::new()); + + // SAFETY: representation checked above + let allocated = unsafe { old.union_move().allocated }; + + // SAFETY: by the type invariant + // allocated len must be > INLINE_CAPACITY + let new = Self::from_vec(allocated.as_slice().to_vec()); + + // manual decrement of the reference count + allocated.explicit_drop(); + + *self = new; + } + } + } + + /// Returns `true` it `self` is equal byte for byte to `other`. + #[inline(never)] + pub(crate) fn inherent_eq(&self, other: &HipByt) -> bool { + // use memcmp directly to squeeze one more comparison + extern "C" { + fn memcmp(a: *const u8, b: *const u8, size: usize) -> core::ffi::c_int; + } + + let len = self.len(); + if len != other.len() { + return false; + } + + let self_ptr = self.as_ptr(); + let other_ptr = other.as_ptr(); + if core::ptr::eq(self_ptr, other_ptr) { + return true; + } + + // use element size (just a remainder for now) + let size = len * size_of::(); + + // SAFETY: size checked above + unsafe { memcmp(self_ptr, other_ptr, size) == 0 } + } +} + +impl Drop for HipByt<'_, B> { + #[inline] + fn drop(&mut self) { + // Formally drops this `Raw` decreasing the ref count if needed + if let Some(allocated) = self.take_allocated() { + allocated.explicit_drop(); + } + } +} + +impl Clone for HipByt<'_, B> { + fn clone(&self) -> Self { + // Duplicates this `Raw` increasing the ref count if needed. + match self.split() { + Split::Inline(&inline) => Self::from_inline(inline), + Split::Borrowed(&borrowed) => Self::from_borrowed(borrowed), + Split::Allocated(allocated) => { + let clone = allocated.explicit_clone(); + Self::from_allocated(clone) + } + } + } +} + +/// Computes the range in `whole` corresponding to the given `slice`. +/// +/// # Safety +/// +/// `slice` must be part of `whole`. +unsafe fn range_of_unchecked(whole: &[u8], slice: &[u8]) -> Range { + unsafe { + let offset = slice.as_ptr().offset_from(whole.as_ptr()); + let offset: usize = offset.try_into().unwrap_unchecked(); + offset..offset + slice.len() + } +} + +pub fn try_range_of(whole: &[u8], slice: &[u8]) -> Option> { + let len = whole.len(); + let Range { start, end } = whole.as_ptr_range(); + let slice_len = slice.len(); + let slice_start = slice.as_ptr(); + + // checks that slice_start in whole + if slice_start < start || slice_start > end { + return None; + } + + // SAFETY: `offset_from` requires both pointers to be in the same allocated object (+1). + // that is checked above: slice_ptr is in self + let offset = unsafe { slice_start.offset_from(start) }; + // SAFETY: offset is between 0 and slice_len included + let offset: usize = unsafe { offset.try_into().unwrap_unchecked() }; + if offset + slice_len > len { + None + } else { + Some(offset..offset + slice_len) + } +} diff --git a/src/raw/allocated.rs b/src/bytes/raw/allocated.rs similarity index 98% rename from src/raw/allocated.rs rename to src/bytes/raw/allocated.rs index 4bbc387f..f14590b1 100644 --- a/src/raw/allocated.rs +++ b/src/bytes/raw/allocated.rs @@ -184,6 +184,11 @@ impl Allocated { this } + /// Creates an allocated vector from a slice. + pub fn from_slice(slice: &[u8]) -> Self { + Self::new(slice.to_vec()) + } + /// Returns the length of this allocated string. #[inline] pub const fn len(&self) -> usize { @@ -275,7 +280,7 @@ impl Allocated { let owner = self.owner(); if owner.incr() == UpdateResult::Overflow { - Self::new(self.as_slice().to_vec()) + Self::from_slice(self.as_slice()) } else { *self } diff --git a/src/raw/borrowed.rs b/src/bytes/raw/borrowed.rs similarity index 100% rename from src/raw/borrowed.rs rename to src/bytes/raw/borrowed.rs diff --git a/src/raw/borrowed/tests.rs b/src/bytes/raw/borrowed/tests.rs similarity index 100% rename from src/raw/borrowed/tests.rs rename to src/bytes/raw/borrowed/tests.rs diff --git a/src/raw/inline.rs b/src/bytes/raw/inline.rs similarity index 90% rename from src/raw/inline.rs rename to src/bytes/raw/inline.rs index 0dbde2af..13dfa4bb 100644 --- a/src/raw/inline.rs +++ b/src/bytes/raw/inline.rs @@ -18,7 +18,7 @@ const SHIFT: u8 = TAG_BITS; const MAX_LEN: usize = 1 << (8 - SHIFT); #[derive(Clone, Copy, Debug)] -pub(super) struct TaggedLen(NonZeroU8); +struct TaggedLen(NonZeroU8); impl TaggedLen { #[allow(clippy::cast_possible_truncation)] @@ -55,9 +55,12 @@ impl TaggedLen { /// For little-endian platform, the shifted length is **before** the data. #[derive(Clone, Copy)] #[repr(C)] +#[cfg_attr(target_pointer_width = "64", repr(align(8)))] +#[cfg_attr(target_pointer_width = "32", repr(align(4)))] +#[cfg_attr(target_pointer_width = "16", repr(align(2)))] pub struct Inline { #[cfg(target_endian = "little")] - pub(super) len: TaggedLen, + len: TaggedLen, data: [MaybeUninit; INLINE_CAPACITY], @@ -102,12 +105,14 @@ impl Inline { /// Creates a new `Inline` string by copying a byte slice. #[inline] - #[allow(dead_code)] - pub fn new(sl: &[u8]) -> Self { - assert!(sl.len() <= INLINE_CAPACITY); - - // SAFETY: length check above - unsafe { Self::new_unchecked(sl) } + #[cfg(test)] + const fn new(sl: &[u8]) -> Option { + if sl.len() <= INLINE_CAPACITY { + // SAFETY: length check above + Some(unsafe { Self::new_unchecked(sl) }) + } else { + None + } } /// Creates a new `Inline` string by copying a short byte slice. @@ -115,9 +120,10 @@ impl Inline { /// # Safety /// /// The input slice's length MUST be at most `INLINE_CAPACITY`. - #[inline] - pub unsafe fn new_unchecked(sl: &[u8]) -> Self { + pub const unsafe fn new_unchecked(sl: &[u8]) -> Self { let len = sl.len(); + debug_assert!(len <= INLINE_CAPACITY); + let mut data = [MaybeUninit::uninit(); INLINE_CAPACITY]; // SAFETY: sl's length is a **function precondition** @@ -163,9 +169,7 @@ impl Inline { /// Returns a mutable view of this inline string. #[inline] - pub fn as_mut_slice(&mut self) -> &mut [u8] { - // XXX should add const: waiting for const_mut_refs - + pub const fn as_mut_slice(&mut self) -> &mut [u8] { debug_assert!(self.is_valid()); // HACK could be done without less unsafe: maybe_uninit_slice diff --git a/src/raw/inline/tests.rs b/src/bytes/raw/inline/tests.rs similarity index 58% rename from src/raw/inline/tests.rs rename to src/bytes/raw/inline/tests.rs index 3e55a669..2792705d 100644 --- a/src/raw/inline/tests.rs +++ b/src/bytes/raw/inline/tests.rs @@ -4,9 +4,18 @@ const N: usize = 23; type I = Inline; +#[test] +fn test_inline() { + let inline = I::new(b"abc").unwrap(); + assert_eq!(inline.as_slice(), b"abc"); + assert_eq!(inline.len(), 3); + + assert!(I::new(&b"*".repeat(N + 1)).is_none()); +} + #[test] fn test_clone() { - let a: I = Inline::new(b"abc"); + let a: I = Inline::new(b"abc").unwrap(); let b = a.clone(); assert_eq!(a.as_slice(), b.as_slice()); } diff --git a/src/raw/tests.rs b/src/bytes/raw/tests.rs similarity index 95% rename from src/raw/tests.rs rename to src/bytes/raw/tests.rs index 8626b378..4c43e639 100644 --- a/src/raw/tests.rs +++ b/src/bytes/raw/tests.rs @@ -1,7 +1,7 @@ use super::*; use crate::Rc; -type R = Raw<'static, Rc>; +type R = HipByt<'static, Rc>; #[test] fn test_niche() { diff --git a/src/bytes/serde.rs b/src/bytes/serde.rs index ec8d34c5..a8309816 100644 --- a/src/bytes/serde.rs +++ b/src/bytes/serde.rs @@ -1,10 +1,48 @@ +//! `serde` support for `HipByt`. +//! +//! This module provides support for serializing and deserializing `HipByt` +//! using [`serde`]. It is enabled by default when the `serde` feature is +//! enabled. +//! +//! # Examples +//! +//! ``` +//! use hipstr::HipByt; +//! +//! let s = HipByt::from(b"hello"); +//! let serialized = serde_json::to_string(&s).unwrap(); +//! assert_eq!(serialized, "[104,101,108,108,111]"); +//! +//! let deserialized: HipByt = serde_json::from_str(r#""hello""#).unwrap(); +//! assert_eq!(deserialized, s); +//! ``` +//! +//! # Notable aspects of the implementations +//! +//! Unlike the `Vec` generic implementation which treats data as a generic +//! sequence, `HipByt` uses the more efficient byte sequence specific API for +//! serialization (similar to the [`serde_bytes`] crate). Note that the actual +//! outcome of a serialization depends on the underlying format's support for +//! byte sequences. +//! +//! During deserialization, this implementation minimizes allocations by reusing +//! the deserializer's internal buffer if possible. +//! +//! [`serde_bytes`]: https://docs.rs/serde_bytes + +use core::marker::PhantomData; + +use serde::de::Visitor; use serde::{Deserialize, Serialize}; use super::HipByt; -use crate::alloc::borrow::Cow; +use crate::alloc::fmt; +use crate::alloc::string::String; use crate::alloc::vec::Vec; use crate::Backend; +const EXPECTING: &str = "a byte array"; + impl Serialize for HipByt<'_, B> where B: Backend, @@ -17,6 +55,59 @@ where } } +/// Deserializer's visitor for owned `HipByt`. +struct OwnedVisitor<'borrow, B: Backend>(PhantomData>); + +impl<'de, 'borrow, B: Backend> Visitor<'de> for OwnedVisitor<'borrow, B> { + type Value = HipByt<'borrow, B>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str(EXPECTING) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: serde::de::Error, + { + Ok(HipByt::from(v)) + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: serde::de::Error, + { + Ok(HipByt::from(v)) + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + Ok(HipByt::from(v.as_bytes())) + } + + fn visit_string(self, v: String) -> Result + where + E: serde::de::Error, + { + Ok(HipByt::from(v.into_bytes())) + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: serde::de::SeqAccess<'de>, + { + let len = core::cmp::min(seq.size_hint().unwrap_or(0), 4096); + let mut bytes = Vec::with_capacity(len); + + while let Some(b) = seq.next_element()? { + bytes.push(b); + } + + Ok(HipByt::from(bytes)) + } +} + impl<'de, B> Deserialize<'de> for HipByt<'_, B> where B: Backend, @@ -25,8 +116,74 @@ where where D: serde::Deserializer<'de>, { - let v: Vec = serde_bytes::deserialize(deserializer)?; - Ok(Self::from(v)) + deserializer.deserialize_bytes(OwnedVisitor(PhantomData)) + } +} + +/// Deserializer's visitor for borrowed `HipByt`. +struct BorrowedVisitor<'de, B: Backend>(PhantomData>); + +impl<'de, B: Backend> Visitor<'de> for BorrowedVisitor<'de, B> { + type Value = HipByt<'de, B>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str(EXPECTING) + } + + fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result + where + E: serde::de::Error, + { + Ok(HipByt::borrowed(v)) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: serde::de::Error, + { + Ok(HipByt::from(v)) + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: serde::de::Error, + { + Ok(HipByt::from(v)) + } + + fn visit_borrowed_str(self, v: &'de str) -> Result + where + E: serde::de::Error, + { + Ok(HipByt::borrowed(v.as_bytes())) + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + Ok(HipByt::from(v.as_bytes())) + } + + fn visit_string(self, v: String) -> Result + where + E: serde::de::Error, + { + Ok(HipByt::from(v.into_bytes())) + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: serde::de::SeqAccess<'de>, + { + let len = core::cmp::min(seq.size_hint().unwrap_or(0), 4096); + let mut bytes = Vec::with_capacity(len); + + while let Some(b) = seq.next_element()? { + bytes.push(b); + } + + Ok(HipByt::from(bytes)) } } @@ -62,8 +219,7 @@ where D: serde::Deserializer<'de>, B: Backend, { - let cow: Cow<'de, [u8]> = serde_bytes::Deserialize::deserialize(deserializer)?; - Ok(HipByt::from(cow)) + deserializer.deserialize_bytes(BorrowedVisitor(PhantomData)) } #[cfg(test)] diff --git a/src/bytes/serde/tests.rs b/src/bytes/serde/tests.rs index 97334820..a068d466 100644 --- a/src/bytes/serde/tests.rs +++ b/src/bytes/serde/tests.rs @@ -20,6 +20,9 @@ fn test_serde() { assert_de_tokens(empty, &[Token::ByteBuf(b"")]); assert_de_tokens(empty, &[Token::BorrowedBytes(b"")]); assert_de_tokens(empty, &[Token::Seq { len: Some(0) }, Token::SeqEnd]); + assert_de_tokens(empty, &[Token::Str("")]); + assert_de_tokens(empty, &[Token::String("")]); + assert_de_tokens(empty, &[Token::BorrowedStr("")]); let small = &HipByt::from(&[1, 2, 3]); assert_tokens(small, &[Token::Bytes(b"\x01\x02\x03")]); @@ -35,13 +38,16 @@ fn test_serde() { Token::SeqEnd, ], ); + assert_de_tokens(small, &[Token::Str("\x01\x02\x03")]); + assert_de_tokens(small, &[Token::String("\x01\x02\x03")]); + assert_de_tokens(small, &[Token::BorrowedStr("\x01\x02\x03")]); } #[test] fn test_de_error() { assert_de_tokens_error::( &[Token::Bool(true)], - "invalid type: boolean `true`, expected byte array", + "invalid type: boolean `true`, expected a byte array", ); } @@ -91,6 +97,38 @@ fn test_serde_borrow() { ], ); + assert_de_tokens( + &MyStruct { + field: HipByt::from(b"a"), + }, + &[ + Token::Struct { + name: "MyStruct", + len: 1, + }, + Token::Str("field"), + Token::ByteBuf(b"a"), + Token::StructEnd, + ], + ); + + assert_de_tokens( + &MyStruct { + field: HipByt::from(b"a"), + }, + &[ + Token::Struct { + name: "MyStruct", + len: 1, + }, + Token::Str("field"), + Token::Seq { len: None }, + Token::U8(b'a'), + Token::SeqEnd, + Token::StructEnd, + ], + ); + assert_de_tokens( &MyStruct { field: HipByt::from(b"a"), diff --git a/src/bytes/tests.rs b/src/bytes/tests.rs index 186fc085..6e3faa2d 100644 --- a/src/bytes/tests.rs +++ b/src/bytes/tests.rs @@ -62,6 +62,28 @@ fn test_with_capacity() { assert_eq!(h.as_ptr(), p); } +#[test] +fn test_inline() { + let h = H::inline(ABC); + assert_eq!(h, ABC); + assert_eq!(h.len(), 3); + assert!(h.is_inline()); + assert!(!h.is_borrowed()); + assert!(!h.is_allocated()); +} + +#[test] +#[should_panic] +fn test_inline_panic() { + let _ = H::inline(MEDIUM); +} + +#[test] +fn test_try_inline() { + assert_eq!(H::try_inline(ABC), Some(H::from(ABC))); + assert_eq!(H::try_inline(MEDIUM), None); +} + #[test] #[cfg(feature = "std")] fn test_borrow_and_hash() { @@ -241,7 +263,9 @@ fn test_clone_drop() { fn test_into_borrowed() { // static let a = H::borrowed(ABC); - assert_eq!(a.into_borrowed(), Ok(ABC)); + let s = a.into_borrowed().unwrap(); + assert_eq!(s, ABC); + assert!(core::ptr::eq(s, ABC)); // inline let a = H::from(ABC); @@ -254,6 +278,23 @@ fn test_into_borrowed() { assert_eq!(a.into_borrowed(), Err(b)); } +#[test] +fn test_as_borrowed() { + // borrowed + let a = H::borrowed(ABC); + let b = a.as_borrowed().unwrap(); + assert_eq!(b, ABC); + assert!(core::ptr::eq(b, ABC)); + + // inline + let a = H::from(ABC); + assert_eq!(a.as_borrowed(), None); + + // heap + let a = H::from(MEDIUM); + assert_eq!(a.as_borrowed(), None); +} + #[test] fn test_as_mut_slice() { // static diff --git a/src/lib.rs b/src/lib.rs index e06f07bf..f9fdfe3f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,15 +46,17 @@ //! //! # Two Backends //! -//! The crate provides two backends: +//! The crate provides three backends: //! -//! - `ThreadSafe` (atomic reference counting), -//! - `Local` (reference counting). +//! - `Arc` (atomic reference counting), formerly `ThreadSafe` +//! - `Rc` (reference counting), formerly `Local` +//! - `Unique` (unique reference) //! -//! The crate root also provides some convenience type aliases: +//! The crate root also provides some convenience type aliases, typically for strings: //! -//! - `hipstr::HipByt` and `hipstr::HipStr` that set `B` to `ThreadSafe`, -//! - `hipstr::LocalHipByt` and `hipstr::LocalHipStr` that set `B` to `Local`. +//! - `hipstr::HipStr` with the `Arc` backend, +//! - `hipstr::LocalHipStr` with the `Rc` backend, +//! - `hipstr::UniqueHipStr` with the `Unique` backend. //! //! # Platform Support //! @@ -88,7 +90,6 @@ pub(crate) extern crate alloc; pub(crate) mod backend; pub mod bytes; pub(crate) mod macros; -mod raw; pub(crate) mod smart; pub mod string; diff --git a/src/os_string.rs b/src/os_string.rs index 148e2fe5..68efd744 100644 --- a/src/os_string.rs +++ b/src/os_string.rs @@ -17,7 +17,7 @@ mod convert; // OsStr(ing) implements Serialize/Deserialize only on Unix and Windows. thx @dsherret #[cfg(all(feature = "serde", any(unix, windows)))] -mod serde; +pub mod serde; #[cfg(test)] mod tests; @@ -244,6 +244,36 @@ where .map_err(Self) } + /// Returns the borrowed slice if this `HipOsStr` is actually borrowed, + /// `None` otherwise. + /// + /// # Examples + /// + /// ``` + /// # use hipstr::HipOsStr; + /// # use std::ffi::OsStr; + /// let abc: &'static OsStr = OsStr::new("abc"); + /// let s = HipOsStr::borrowed(abc); + /// let c: Option<&'static OsStr> = s.as_borrowed(); + /// assert_eq!(c, Some(abc)); + /// assert!(std::ptr::eq(abc, c.unwrap())); + /// + /// let s2 = HipOsStr::from(abc); + /// assert!(s2.as_borrowed().is_none()); + /// ``` + #[inline] + #[must_use] + pub const fn as_borrowed(&self) -> Option<&'borrow OsStr> { + match self.0.as_borrowed() { + Some(slice) => { + // SAFETY: type invariant + // transmute to be "const" + Some(unsafe { core::mem::transmute::<&[u8], &OsStr>(slice) }) + } + None => None, + } + } + /// Returns the length of this `HipOsStr`, in bytes, not [`char`]s or /// graphemes. In other words, it might not be what a human considers the /// length of the string. diff --git a/src/os_string/cmp.rs b/src/os_string/cmp.rs index b94292b8..409db993 100644 --- a/src/os_string/cmp.rs +++ b/src/os_string/cmp.rs @@ -21,7 +21,7 @@ where #[inline] fn eq(&self, other: &HipOsStr) -> bool { // compare the encoded bytes. - self.0.eq(&other.0) + self.0 == other.0 } } diff --git a/src/os_string/serde.rs b/src/os_string/serde.rs index 93454652..b1eed1b4 100644 --- a/src/os_string/serde.rs +++ b/src/os_string/serde.rs @@ -1,3 +1,27 @@ +//! `serde` support for `HipOsStr`. +//! +//! This module provides support for serializing and deserializing `HipStr` +//! using [`serde`]. It is enabled by default when the `serde` feature is +//! enabled and on supported platforms (`unix` and `windows`). +//! +//! # Examples +//! +//! ``` +//! use hipstr::HipStr; +//! +//! let s = HipStr::from("hello"); +//! let serialized = serde_json::to_string(&s).unwrap(); +//! assert_eq!(serialized, r#""hello""#); +//! +//! let deserialized: HipStr = serde_json::from_str(&serialized).unwrap(); +//! assert_eq!(deserialized, s); +//! ``` +//! +//! # Notable aspects of the implementation +//! +//! Due to the overall weirdness of `OsString` and their support in `serde`, no +//! attempt is made to improve on `OsString` standard `serde` implementation. + use std::ffi::OsString; use serde::{Deserialize, Serialize}; diff --git a/src/os_string/tests.rs b/src/os_string/tests.rs index 3efa9b93..1b1c6dbf 100644 --- a/src/os_string/tests.rs +++ b/src/os_string/tests.rs @@ -185,8 +185,8 @@ fn test_clone() { } #[test] -fn test_into_static() { - // static +fn test_into_borrowed() { + // borrowed let a = HipOsStr::borrowed("abc"); assert_eq!(a.into_borrowed(), Ok("abc".as_ref())); @@ -201,6 +201,26 @@ fn test_into_static() { assert_eq!(a.into_borrowed(), Err(b)); } +#[test] +fn test_as_borrowed() { + let abc: &'static OsStr = OsStr::new("abc"); + let medium: &'static OsStr = OsStr::new("abcdefghijklmnopqrstuvwxyz"); + + // borrowed + let a = H::borrowed(abc); + let b = a.as_borrowed().unwrap(); + assert_eq!(b, abc); + assert!(core::ptr::eq(b, abc)); + + // inline + let a = H::from(abc); + assert_eq!(a.as_borrowed(), None); + + // heap + let a = H::from(medium); + assert_eq!(a.as_borrowed(), None); +} + #[test] fn test_into_bytes() { let s = HipOsStr::from("A".repeat(42)); diff --git a/src/path.rs b/src/path.rs index f11c5197..b68ab7eb 100644 --- a/src/path.rs +++ b/src/path.rs @@ -218,6 +218,37 @@ where self.0.into_borrowed().map(Path::new).map_err(Self) } + /// Returns the borrowed slice if this `Path` is actually borrowed, `None` + /// otherwise. + /// + /// # Examples + /// + /// ``` + /// # use hipstr::HipPath; + /// # use std::path::Path; + /// let abc: &'static Path = Path::new("abc"); + /// let s = HipPath::borrowed(abc); + /// let c: Option<&'static Path> = s.as_borrowed(); + /// assert_eq!(c, Some(abc)); + /// assert!(std::ptr::eq(abc, c.unwrap())); + /// + /// let s2 = HipPath::from(abc); + /// assert!(s2.as_borrowed().is_none()); + /// ``` + #[inline] + #[must_use] + pub const fn as_borrowed(&self) -> Option<&'borrow Path> { + match self.0.as_borrowed() { + Some(slice) => { + // SAFETY: type invariant + // `transmute` used in order to be "const" + // `Path` is *transparent* + Some(unsafe { core::mem::transmute::<&OsStr, &Path>(slice) }) + } + None => None, + } + } + /// Converts a `HipPath` into a `HipOsStr`. /// /// It consumes the `HipPath` without copying the content diff --git a/src/path/serde.rs b/src/path/serde.rs index 026e2b6c..6eaf3aa2 100644 --- a/src/path/serde.rs +++ b/src/path/serde.rs @@ -1,10 +1,35 @@ -use std::path::PathBuf; +//! `serde` support for `HipPath`. +//! +//! This module provides support for serializing and deserializing `HipStr` +//! using [`serde`]. It is enabled by default when the `serde` feature is +//! enabled. +//! +//! # Examples +//! +//! ``` +//! use hipstr::HipPath; +//! +//! let s = HipPath::borrowed("/usr/bin"); +//! let serialized = serde_json::to_string(&s).unwrap(); +//! assert_eq!(serialized, r#""/usr/bin""#); +//! +//! let deserialized: HipPath = serde_json::from_str(&serialized).unwrap(); +//! assert_eq!(deserialized, s); +//! ``` +//! +//! # Notable aspects of the implementation +//! +//! During deserialization, this implementation minimizes allocations by reusing +//! the deserializer's internal buffer if possible. +//! +//! Unlike `PathBuf`'s `Deserialize`, this implementation declares transparently +//! that it's expecting a string. Indeed not reusing `HipStr`'s implementation +//! just does not make any sense. -use serde::{de, Deserialize, Serialize}; +use serde::{Deserialize, Serialize}; use super::HipPath; -use crate::alloc::borrow::Cow; -use crate::alloc::fmt; +use crate::string::HipStr; use crate::Backend; impl Serialize for HipPath<'_, B> @@ -29,39 +54,8 @@ where where D: serde::Deserializer<'de>, { - Ok(Self::from(PathBuf::deserialize(deserializer)?)) - } -} - -/// Minimal string cow visitor -struct CowVisitor; - -impl<'de> de::Visitor<'de> for CowVisitor { - type Value = Cow<'de, str>; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("path string") - } - - fn visit_borrowed_str(self, v: &'de str) -> Result - where - E: de::Error, - { - Ok(Cow::Borrowed(v)) - } - - fn visit_str(self, v: &str) -> Result - where - E: de::Error, - { - Ok(Cow::Owned(v.to_string())) - } - - fn visit_string(self, v: String) -> Result - where - E: de::Error, - { - Ok(Cow::Owned(v)) + let s = HipStr::deserialize(deserializer)?; + Ok(Self::from(s)) } } @@ -96,8 +90,7 @@ where D: serde::Deserializer<'de>, B: Backend, { - let cow: Cow<'de, str> = deserializer.deserialize_str(CowVisitor)?; - Ok(HipPath::from(cow)) + crate::string::serde::borrow_deserialize(deserializer).map(HipPath::from) } #[cfg(test)] diff --git a/src/path/serde/tests.rs b/src/path/serde/tests.rs index 3ce4728b..8dc6bd42 100644 --- a/src/path/serde/tests.rs +++ b/src/path/serde/tests.rs @@ -19,7 +19,7 @@ fn test_serde() { fn test_serde_err() { assert_de_tokens_error::( &[Token::I32(0)], - "invalid type: integer `0`, expected path string", + "invalid type: integer `0`, expected a string", ); } @@ -94,6 +94,6 @@ fn test_serde_borrow_err() { Token::I32(0), Token::StructEnd, ], - "invalid type: integer `0`, expected path string", + "invalid type: integer `0`, expected a string", ); } diff --git a/src/path/tests.rs b/src/path/tests.rs index 6c954dc0..88c7def1 100644 --- a/src/path/tests.rs +++ b/src/path/tests.rs @@ -7,22 +7,24 @@ use crate::alloc::string::String; use crate::os_string::HipOsStr; use crate::HipPath; +type H<'borrow> = HipPath<'borrow>; + const INLINE_CAPACITY: usize = HipPath::inline_capacity(); #[test] fn test_deref() { - let h = HipPath::borrowed("test"); + let h = H::borrowed("test"); let _: &Path = &h; } #[test] fn test_new_default() { let empty_path: &Path = "".as_ref(); - let new = HipPath::new(); + let new = H::new(); assert_eq!(new, empty_path); assert!(new.0.is_empty()); - let new = HipPath::default(); + let new = H::default(); assert_eq!(new, empty_path); assert!(new.0.is_empty()); } @@ -30,8 +32,8 @@ fn test_new_default() { #[test] fn test_borrow_and_hash() { let mut set = HashSet::new(); - set.insert(HipPath::from("a")); - set.insert(HipPath::from("b")); + set.insert(H::from("a")); + set.insert(H::from("b")); assert!(set.contains::("a".as_ref())); assert!(!set.contains::("c".as_ref())); @@ -40,14 +42,14 @@ fn test_borrow_and_hash() { #[test] fn test_fmt() { let source: &OsStr = "Rust \u{1F980}".as_ref(); - let a = HipPath::borrowed(source); + let a = H::borrowed(source); assert_eq!(format!("{a:?}"), format!("{source:?}")); } #[test] fn test_from_string() { let s = "A".repeat(42); - let hs = HipPath::from(s.clone()); + let hs = H::from(s.clone()); assert!(!hs.is_borrowed()); assert!(!hs.is_inline()); assert!(hs.is_allocated()); @@ -58,7 +60,7 @@ fn test_from_string() { #[test] fn test_borrowed() { let s = "0123456789"; - let path = HipPath::borrowed(s); + let path = H::borrowed(s); assert!(path.is_borrowed()); assert!(!path.is_inline()); assert_eq!(path.0.len(), s.len()); @@ -71,7 +73,7 @@ fn test_from_static() { const fn is_static_type(_: &T) {} let s = "abcdefghijklmnopqrstuvwxyz"; - let path = HipPath::from_static(s); + let path = H::from_static(s); // compiler check is_static_type(&path); @@ -90,7 +92,7 @@ fn test_from_slice() { let s = core::str::from_utf8(V).unwrap(); for size in [0, 1, INLINE_CAPACITY, INLINE_CAPACITY + 1, 256, 1024] { - let path = HipPath::from(&s[..size]); + let path = H::from(&s[..size]); assert_eq!(size <= INLINE_CAPACITY, path.is_inline()); assert_eq!(size > INLINE_CAPACITY, path.is_allocated()); assert_eq!(path.0.len(), size); @@ -101,7 +103,7 @@ fn test_from_slice() { fn test_as_slice() { // static { - let a = HipPath::borrowed("abc"); + let a = H::borrowed("abc"); assert!(a.is_borrowed()); assert!(!a.is_inline()); assert!(!a.is_allocated()); @@ -109,7 +111,7 @@ fn test_as_slice() { } // inline { - let a = HipPath::from("abc"); + let a = H::from("abc"); assert!(!a.is_borrowed()); assert!(a.is_inline()); assert!(!a.is_allocated()); @@ -118,7 +120,7 @@ fn test_as_slice() { // allocated { let s = "A".repeat(42); - let a = HipPath::from(s.as_str()); + let a = H::from(s.as_str()); assert!(!a.is_borrowed()); assert!(!a.is_inline()); assert!(a.is_allocated()); @@ -131,7 +133,7 @@ fn test_clone() { // static { let s: &'static str = "abc"; - let a = HipPath::borrowed(s); + let a = H::borrowed(s); assert!(a.is_borrowed()); let b = a.clone(); drop(a); @@ -141,7 +143,7 @@ fn test_clone() { // inline { - let a = HipPath::from("abc"); + let a = H::from("abc"); assert!(a.is_inline()); let b = a.clone(); drop(a); @@ -152,7 +154,7 @@ fn test_clone() { { let s = "a".repeat(42); let p = s.as_ptr(); - let a = HipPath::from(s); + let a = H::from(s); assert!(a.is_allocated()); let b = a.clone(); drop(a); @@ -162,25 +164,45 @@ fn test_clone() { } #[test] -fn test_into_static() { - // static - let a = HipPath::borrowed("abc"); +fn test_into_borrowed() { + // borrowed + let a = H::borrowed("abc"); assert_eq!(a.into_borrowed(), Ok("abc".as_ref())); // inline - let a = HipPath::from("abc"); + let a = H::from("abc"); let b = a.clone(); assert_eq!(a.into_borrowed(), Err(b)); // heap - let a = HipPath::from("a".repeat(42).as_str()); + let a = H::from("a".repeat(42).as_str()); let b = a.clone(); assert_eq!(a.into_borrowed(), Err(b)); } +#[test] +fn test_as_borrowed() { + let abc: &'static Path = Path::new("abc"); + let medium: &'static Path = Path::new("abcdefghijklmnopqrstuvwxyz"); + + // borrowed + let a = H::borrowed(abc); + let b = a.as_borrowed().unwrap(); + assert_eq!(b, abc); + assert!(core::ptr::eq(b, abc)); + + // inline + let a = H::from(abc); + assert_eq!(a.as_borrowed(), None); + + // heap + let a = H::from(medium); + assert_eq!(a.as_borrowed(), None); +} + #[test] fn test_into_os_string() { - let h = HipPath::from("A".repeat(42)); + let h = H::from("A".repeat(42)); let os_string = h.into_os_string().unwrap(); assert_eq!(os_string.len(), 42); assert_eq!(os_string.as_encoded_bytes(), [b'A'; 42]); @@ -188,7 +210,7 @@ fn test_into_os_string() { #[test] fn test_into_path_buf() { - let h = HipPath::from("A".repeat(42)); + let h = H::from("A".repeat(42)); let path_buf = h.into_path_buf().unwrap(); assert_eq!(path_buf.as_os_str().len(), 42); assert_eq!(path_buf, Path::new(&"A".repeat(42))); @@ -196,7 +218,7 @@ fn test_into_path_buf() { #[test] fn test_into_str() { - let h = HipPath::from("A".repeat(42)); + let h = H::from("A".repeat(42)); let string = h.into_str().unwrap(); assert_eq!(string.len(), 42); assert_eq!(string, "A".repeat(42)); @@ -207,37 +229,37 @@ fn test_into_str() { use std::os::windows::ffi::OsStringExt; let shorts = [u16::from(b'a'), u16::from(b'b'), u16::from(b'c'), 0xD800]; let source = OsString::from_wide(&shorts); - let hp = HipPath::from(source); + let hp = H::from(source); let _ = hp.into_str().unwrap_err(); } #[cfg(unix)] { use std::os::unix::ffi::OsStrExt; let bytes = b"abc\x80"; - let hp = HipPath::from(OsStr::from_bytes(bytes)); + let hp = H::from(OsStr::from_bytes(bytes)); let _ = hp.into_str().unwrap_err(); } } #[test] fn test_capacity() { - let a = HipPath::borrowed("abc"); + let a = H::borrowed("abc"); assert_eq!(a.capacity(), a.0.len()); - let a = HipPath::from("abc"); - assert_eq!(a.capacity(), HipPath::inline_capacity()); + let a = H::from("abc"); + assert_eq!(a.capacity(), H::inline_capacity()); let mut v = String::with_capacity(42); for _ in 0..10 { v.push_str("abc"); } - let a = HipPath::from(v); + let a = H::from(v); assert_eq!(a.capacity(), 42); } #[test] fn test_mutate_debug() { - let mut a = HipPath::borrowed("abc"); + let mut a = H::borrowed("abc"); let debug = format!("{a:?}"); let r = a.mutate(); assert_eq!(format!("{r:?}"), debug); @@ -245,7 +267,7 @@ fn test_mutate_debug() { #[test] fn test_mutate_borrowed() { - let mut a = HipPath::borrowed("abc"); + let mut a = H::borrowed("abc"); assert!(a.is_borrowed()); { let mut r = a.mutate(); @@ -258,7 +280,7 @@ fn test_mutate_borrowed() { #[test] fn test_mutate_inline() { - let mut a = HipPath::from("abc"); + let mut a = H::from("abc"); assert!(a.is_inline()); a.mutate().push("def"); assert_eq!(a, Path::new("abc/def")); @@ -271,7 +293,7 @@ fn test_mutate_allocated() { let mut v = String::with_capacity(42); v.push_str("abcdefghijklmnopqrstuvwxyz"); let p = v.as_ptr(); - let mut a = HipPath::from(v); + let mut a = H::from(v); assert!(a.is_allocated()); a.mutate().push("0123456789"); assert!(a.is_allocated()); @@ -283,7 +305,7 @@ fn test_mutate_allocated() { // allocated, shared let mut v = String::with_capacity(42); v.push_str("abcdefghijklmnopqrstuvwxyz"); - let mut a = HipPath::from(v); + let mut a = H::from(v); assert!(a.is_allocated()); let b = a.clone(); a.mutate().push("0123456789"); @@ -297,7 +319,7 @@ fn test_mutate_allocated() { #[test] fn test_to_owned() { let b = "abc"; - let h = HipPath::from(b); + let h = H::from(b); assert!(h.is_inline()); let h = h.into_owned(); assert!(h.is_inline()); @@ -305,12 +327,12 @@ fn test_to_owned() { let r = "*".repeat(42); let v = r.clone(); - let a = HipPath::borrowed(&v[0..2]); + let a = H::borrowed(&v[0..2]); let a = a.into_owned(); drop(v); assert_eq!(a, Path::new(&r[0..2])); - let a = HipPath::from(&r[..]); + let a = H::from(&r[..]); drop(r); let p = a.0.as_ptr(); let a = a.into_owned(); @@ -320,15 +342,15 @@ fn test_to_owned() { #[test] fn test_shrink_to_fit() { let h = HipOsStr::with_capacity(INLINE_CAPACITY + 1); - let mut h = HipPath::from(h); + let mut h = H::from(h); h.shrink_to_fit(); assert_eq!(h.capacity(), INLINE_CAPACITY); - let mut h = HipPath::from("abc"); + let mut h = H::from("abc"); h.shrink_to_fit(); assert_eq!(h.capacity(), INLINE_CAPACITY); - let mut h = HipPath::from_static("abc"); + let mut h = H::from_static("abc"); h.shrink_to_fit(); assert_eq!(h.capacity(), 3); } @@ -337,16 +359,16 @@ fn test_shrink_to_fit() { fn test_shrink_to() { let mut h = HipOsStr::with_capacity(INLINE_CAPACITY + 1); h.push("a"); - let mut h = HipPath::from(h); + let mut h = H::from(h); h.shrink_to(0); assert_eq!(h.capacity(), INLINE_CAPACITY); assert_eq!(h.as_os_str().len(), 1); - let mut h = HipPath::from("abc"); + let mut h = H::from("abc"); h.shrink_to(4); assert_eq!(h.capacity(), INLINE_CAPACITY); - let mut h = HipPath::from_static("abc"); + let mut h = H::from_static("abc"); assert_eq!(h.capacity(), 3); h.shrink_to(0); assert_eq!(h.capacity(), 3); diff --git a/src/raw.rs b/src/raw.rs deleted file mode 100644 index 8bb60809..00000000 --- a/src/raw.rs +++ /dev/null @@ -1,895 +0,0 @@ -//! Raw shared sequence of bytes, direct backing of [`HipByt`][super::HipByt]. -//! -//! Provides only the core features for the sequence of bytes. - -use core::hint::unreachable_unchecked; -use core::marker::PhantomData; -use core::mem::{align_of, forget, replace, size_of, transmute, ManuallyDrop, MaybeUninit}; -use core::num::NonZeroU8; -use core::ops::Range; -use core::ptr; - -use allocated::Allocated; -use borrowed::Borrowed; - -use crate::alloc::vec::Vec; -use crate::Backend; - -mod allocated; -mod borrowed; -mod inline; -#[cfg(test)] -mod tests; - -/// Width (in bits) of the tag -const TAG_BITS: u8 = 2; - -/// Mask to extract the tag bits -const MASK: u8 = (1 << TAG_BITS) - 1; - -/// Tag for the inline repr -const TAG_INLINE: u8 = 1; - -/// Tag for the borrowed repr -const TAG_BORROWED: u8 = 2; - -/// Tag for the allocated repr -const TAG_ALLOCATED: u8 = 3; - -/// Maximal byte capacity of an inline [`HipStr`](super::HipStr) or [`HipByt`](super::HipByt). -const INLINE_CAPACITY: usize = size_of::() - 1; - -/// Size of word minus a tagged byte. -const WORD_SIZE_M1: usize = size_of::() - 1; - -/// Alias type for `Inline` with set inline capacity -type Inline = inline::Inline; - -/// Raw byte sequence. -#[repr(C)] -pub struct Raw<'borrow, B: Backend> { - pivot: Pivot, - _marker: PhantomData<&'borrow B>, -} - -#[derive(Clone, Copy)] -#[repr(C)] -struct Pivot { - #[cfg(target_endian = "little")] - tag_byte: NonZeroU8, - #[cfg(target_endian = "little")] - _word_remainder: MaybeUninit<[u8; WORD_SIZE_M1]>, - #[cfg(target_endian = "little")] - _word1: MaybeUninit<*mut ()>, - - _word2: MaybeUninit<*mut ()>, - - #[cfg(target_endian = "big")] - _word1: MaybeUninit<*mut ()>, - #[cfg(target_endian = "big")] - _word_remainder: MaybeUninit<[u8; WORD_SIZE_M1]>, - #[cfg(target_endian = "big")] - tag_byte: NonZeroU8, -} - -unsafe impl Sync for Raw<'_, B> {} -unsafe impl Send for Raw<'_, B> {} - -/// Equivalent union representation. -/// -/// NOTE: Cannot be used directly to keep the niche for Option> -#[repr(C)] -union Union<'borrow, B: Backend> { - /// Inline representation - inline: Inline, - - /// Allocated and shared representation - allocated: Allocated, - - /// Borrowed slice representation - borrowed: Borrowed<'borrow>, - - /// Pivot representation with niche - pivot: Pivot, -} - -impl<'borrow, B: Backend> Union<'borrow, B> { - const ASSERTS: () = { - assert!(size_of::() == size_of::>()); - assert!(align_of::() == align_of::>()); - }; - - #[inline] - const fn into_raw(self) -> Raw<'borrow, B> { - // statically checks the layout - let () = Self::ASSERTS; - - // SAFETY: same layout and same niche hopefully - let pivot = unsafe { self.pivot }; - Raw { - pivot, - _marker: PhantomData, - } - } -} - -/// Repr tag. -/// -/// Cannot be used directly to keep the niche. -#[repr(u8)] -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum Tag { - Inline = TAG_INLINE, - Borrowed = TAG_BORROWED, - Allocated = TAG_ALLOCATED, -} - -/// Helper enum to split this raw byte string into its possible representation. -enum RawSplit<'a, 'borrow, B: Backend> { - /// Inline representation - Inline(&'a Inline), - /// Allocated and shared representation - Allocated(&'a Allocated), - /// Borrowed slice representation - Borrowed(&'a Borrowed<'borrow>), -} - -/// Helper enum to split this raw byte string into its possible representation mutably. -enum RawSplitMut<'a, 'borrow, B: Backend> { - /// Inline representation - Inline(&'a mut Inline), - /// Allocated and shared representation - Allocated(&'a mut Allocated), - /// Borrowed slice representation - Borrowed(&'a mut Borrowed<'borrow>), -} - -impl<'borrow, B: Backend> Raw<'borrow, B> { - /// Retrieves a reference on the union. - #[inline] - const fn union(&self) -> &Union<'borrow, B> { - let raw_ptr: *const _ = &self.pivot; - let union_ptr: *const Union<'borrow, B> = raw_ptr.cast(); - // SAFETY: same layout and same niche hopefully, same immutability - unsafe { &*union_ptr } - } - - /// Retrieves a mutable reference on the union. - #[inline] - fn union_mut(&mut self) -> &mut Union<'borrow, B> { - let raw_ptr: *mut _ = &mut self.pivot; - let union_ptr: *mut Union<'borrow, B> = raw_ptr.cast(); - // SAFETY: same layout and same niche hopefully, same mutability - unsafe { &mut *union_ptr } - } - - /// Extracts the union without dropping the `Raw`. - fn union_move(self) -> Union<'borrow, B> { - // Do not drop free! - let this = ManuallyDrop::new(self); - Union { pivot: this.pivot } - } - - // basic constructors - - /// Creates a new raw byte sequence from an allocated internal representation. - /// - /// The allocated length should be strictly greater than `INLINE_CAPACITY`. - #[inline] - const fn from_allocated(allocated: Allocated) -> Self { - Union { allocated }.into_raw() - } - - /// Creates a new Raw from an inline representation. - #[inline] - const fn from_inline(inline: Inline) -> Self { - Union { inline }.into_raw() - } - - /// Creates a new Raw from a borrowed representation. - #[inline] - const fn from_borrowed(borrowed: Borrowed<'borrow>) -> Self { - Union { borrowed }.into_raw() - } - - /// Retrieves the tag. - const fn tag(&self) -> Tag { - match self.pivot.tag_byte.get() & MASK { - TAG_INLINE => Tag::Inline, - TAG_BORROWED => Tag::Borrowed, - TAG_ALLOCATED => Tag::Allocated, - // SAFETY: type invariant - _ => unsafe { unreachable_unchecked() }, - } - } - - /// Splits this raw into its possible representation. - #[inline] - const fn split(&self) -> RawSplit<'_, 'borrow, B> { - let tag = self.tag(); - let union = self.union(); - match tag { - Tag::Inline => { - // SAFETY: representation checked - RawSplit::Inline(unsafe { &union.inline }) - } - Tag::Borrowed => { - // SAFETY: representation checked - RawSplit::Borrowed(unsafe { &union.borrowed }) - } - Tag::Allocated => { - // SAFETY: representation checked - RawSplit::Allocated(unsafe { &union.allocated }) - } - } - } - - /// Splits this raw into its possible representation. - #[inline] - fn split_mut(&mut self) -> RawSplitMut<'_, 'borrow, B> { - let tag = self.tag(); - let union = self.union_mut(); - match tag { - Tag::Inline => { - // SAFETY: representation checked - RawSplitMut::Inline(unsafe { &mut union.inline }) - } - Tag::Borrowed => { - // SAFETY: representation checked - RawSplitMut::Borrowed(unsafe { &mut union.borrowed }) - } - Tag::Allocated => { - // SAFETY: representation checked - RawSplitMut::Allocated(unsafe { &mut union.allocated }) - } - } - } - - /// Creates a new `Raw` from a vector. - /// - /// The vector's length should be strictly greater than `INLINE_CAPACITY`. - #[inline] - pub fn from_vec(vec: Vec) -> Self { - let allocated = Allocated::new(vec); - Self::from_allocated(allocated) - } - - /// Creates a new `Raw` from a short slice. - /// - /// # Safety - /// - /// The input slice's length MUST be at most `INLINE_CAPACITY`. - pub unsafe fn inline_unchecked(bytes: &[u8]) -> Self { - debug_assert!(bytes.len() <= INLINE_CAPACITY); - - // SAFETY: see function precondition - let inline = unsafe { Inline::new_unchecked(bytes) }; - - Self::from_inline(inline) - } - - /// Creates a new `Raw` from a static slice. - /// - /// # Representation - /// - /// For now, `borrowed` does not inline strings, i.e. switch to inline string if - /// possible: it cannot do it because [`Inline::new`] is not const. - #[inline] - pub const fn borrowed(bytes: &'borrow [u8]) -> Self { - Union { - borrowed: Borrowed::new(bytes), - } - .into_raw() - } - - // derived constructors - - /// Creates a new empty `Raw`. - #[inline] - pub const fn empty() -> Self { - Self::from_inline(Inline::empty()) - } - - /// Creates a new `Raw` from a vector. - /// - /// Will normalize the representation depending on the size of the vector. - #[inline] - pub fn normalized_from_vec(vec: Vec) -> Self { - let len = vec.len(); - if len <= INLINE_CAPACITY { - // SAFETY: length checked above - unsafe { Self::inline_unchecked(&vec) } - } else { - Self::from_vec(vec) - } - } - - /// Creates a new `Raw` from a slice. - /// - /// Will normalize the representation depending on the size of the slice. - #[inline] - pub fn from_slice(bytes: &[u8]) -> Self { - let len = bytes.len(); - if len <= INLINE_CAPACITY { - // SAFETY: length checked above - unsafe { Self::inline_unchecked(bytes) } - } else { - Self::from_vec(bytes.to_vec()) - } - } - - /// Creates a new `Raw` with the given capacity. - /// - /// **This representation may not be normalized.** - pub fn with_capacity(capacity: usize) -> Self { - if capacity <= INLINE_CAPACITY { - Self::from_inline(Inline::empty()) - } else { - Self::from_vec(Vec::with_capacity(capacity)) - } - } - - /// Returns `true` if the actual representation is an inline string. - #[inline] - pub const fn is_inline(&self) -> bool { - matches!(self.tag(), Tag::Inline) - } - - /// Returns `true` if the actual representation is a borrowed reference. - #[inline] - pub const fn is_borrowed(&self) -> bool { - matches!(self.tag(), Tag::Borrowed) - } - - /// Returns `true` if the actual representation is a heap-allocated string. - #[inline] - pub const fn is_allocated(&self) -> bool { - matches!(self.tag(), Tag::Allocated) - } - - /// Returns the borrowed bytes if it was actually borrowed. - /// - /// # Errors - /// - /// Return the raw byte string if the actual representation is not a borrow. - #[inline] - pub const fn into_borrowed(self) -> Result<&'borrow [u8], Self> { - match self.split() { - RawSplit::Allocated(_) | RawSplit::Inline(_) => Err(self), - RawSplit::Borrowed(borrowed) => { - let result = borrowed.as_slice(); - forget(self); - Ok(result) - } - } - } - - /// Returns the length of the raw byte string. - #[inline] - pub const fn len(&self) -> usize { - match self.split() { - RawSplit::Inline(inline) => inline.len(), - RawSplit::Allocated(heap) => heap.len(), - RawSplit::Borrowed(borrowed) => borrowed.len(), - } - } - - /// Returns the raw byte string as a byte slice. - #[inline] - pub const fn as_slice(&self) -> &[u8] { - match self.split() { - RawSplit::Inline(inline) => inline.as_slice(), - RawSplit::Allocated(heap) => heap.as_slice(), - RawSplit::Borrowed(borrowed) => borrowed.as_slice(), - } - } - - /// Returns a pointer to the start of the raw byte string. - #[inline] - pub const fn as_ptr(&self) -> *const u8 { - match self.split() { - RawSplit::Inline(inline) => inline.as_ptr(), - RawSplit::Allocated(heap) => heap.as_ptr(), - RawSplit::Borrowed(borrowed) => borrowed.as_ptr(), - } - } - - /// Slices the raw byte string. - /// - /// # Safety - /// - /// `range` must be a range `a..b` with `a <= b <= len`. - /// - /// Panics in debug build, UB in release. - #[inline] - pub unsafe fn slice_unchecked(&self, range: Range) -> Self { - debug_assert!(range.start <= range.end); - debug_assert!(range.end <= self.len()); - - let result = match self.split() { - RawSplit::Inline(inline) => { - // SAFETY: by `slice_unchecked` safety precondition and `split` - // range must be of a length <= self.len() <= `INLINE_CAPACITY` - unsafe { Self::inline_unchecked(&inline.as_slice()[range]) } - } - RawSplit::Borrowed(borrowed) => Self::borrowed(&borrowed.as_slice()[range]), - RawSplit::Allocated(allocated) => { - // normalize to inline if possible - if range.len() <= INLINE_CAPACITY { - // SAFETY: length is checked above - unsafe { Self::inline_unchecked(&allocated.as_slice()[range]) } - } else { - // SAFETY: length is checked above - unsafe { - let allocated = allocated.slice_unchecked(range); - Self::from_allocated(allocated) - } - } - } - }; - - debug_assert!(self.is_normalized()); - result - } - - /// Slices the raw byte string given a Rust slice. - /// - /// # Safety - /// - /// `slice` MUST be a part of the raw byte string. - pub unsafe fn slice_ref_unchecked(&self, slice: &[u8]) -> Self { - #[cfg(debug_assertions)] - { - let range = self.as_slice().as_ptr_range(); - let slice_range = slice.as_ptr_range(); - assert!(range.contains(&slice_range.start) || range.end == slice_range.start); - assert!(range.contains(&slice_range.end) || range.end == slice_range.end); - } - - let result = match self.split() { - RawSplit::Inline(_) => { - // SAFETY: by the function precondition and the test above - // slice.len() <= self.len() <= INLINE_CAPACITY - unsafe { Self::inline_unchecked(slice) } - } - RawSplit::Borrowed(_) => { - // SAFETY: by the function precondition and the type invariant - // slice must have at least the same dynamic lifetime - let sl: &'borrow [u8] = unsafe { transmute(slice) }; - Self::borrowed(sl) - } - RawSplit::Allocated(allocated) => { - // normalize to inline if possible - if slice.len() <= INLINE_CAPACITY { - // SAFETY: length checked above - unsafe { Self::inline_unchecked(slice) } - } else { - // SAFETY: by the function precondition - let range = unsafe { range_of_unchecked(self.as_slice(), slice) }; - // SAFETY: length checked above - unsafe { - let allocated = allocated.slice_unchecked(range); - Self::from_allocated(allocated) - } - } - } - }; - - debug_assert!(self.is_normalized()); - result - } - - /// Returns a mutable slice if this `Raw` is neither borrowed nor shared. - #[inline] - pub fn as_mut_slice(&mut self) -> Option<&mut [u8]> { - match self.split_mut() { - RawSplitMut::Inline(inline) => Some(inline.as_mut_slice()), - RawSplitMut::Allocated(allocated) => allocated.as_mut_slice(), - RawSplitMut::Borrowed(_) => None, - } - } - - /// Returns a mutable slice of the underlying string. - /// - /// # Safety - /// - /// This `Raw` should not be shared or borrowed. - #[inline] - pub unsafe fn as_mut_slice_unchecked(&mut self) -> &mut [u8] { - match self.split_mut() { - RawSplitMut::Inline(inline) => inline.as_mut_slice(), - RawSplitMut::Allocated(allocated) => unsafe { allocated.as_mut_slice_unchecked() }, - RawSplitMut::Borrowed(_) => { - #[cfg(debug_assertions)] - { - panic!("mutable slice of borrowed string"); - } - #[cfg(not(debug_assertions))] - { - unsafe { unreachable_unchecked() } - } - } - } - } - - /// Push a slice at the end of this raw byte string. - #[inline] - pub fn push_slice(&mut self, addition: &[u8]) { - let new_len = self.len() + addition.len(); - - if self.is_allocated() { - // current allocation may be pushed into it directly? - - // SAFETY: repr checked above - let allocated = unsafe { &mut self.union_mut().allocated }; - - if allocated.is_unique() { - // SAFETY: uniqueness is checked above - unsafe { - allocated.push_slice_unchecked(addition); - } - return; - } - } - - if new_len <= INLINE_CAPACITY { - if !self.is_inline() { - // make it inline first - // SAFETY: `new_len` is checked before, so current len <= INLINE_CAPACITY - *self = unsafe { Self::inline_unchecked(self.as_slice()) }; - } - - // SAFETY: `new_len` is checked above - unsafe { - self.union_mut().inline.push_slice_unchecked(addition); - } - return; - } - - // requires a new vector - let mut vec = Vec::with_capacity(new_len); - vec.extend_from_slice(self.as_slice()); - vec.extend_from_slice(addition); - - // SAFETY: vec's len (new_len) is checked above to be > INLINE_CAPACITY - *self = Self::from_vec(vec); - } - - /// Takes a vector representation of this raw byte string. - /// - /// Will only allocate if needed. - #[inline] - pub fn take_vec(&mut self) -> Vec { - if self.is_allocated() { - // SAFETY: representation is checked, copy without ownership - let allocated = unsafe { self.union_mut().allocated }; - if let Ok(owned) = allocated.try_into_vec() { - // SAFETY: ownership is taken, replace with empty - // and forget old value (otherwise double drop!!) - forget(replace(self, Self::empty())); - return owned; - } - } - let owned = Vec::from(self.as_slice()); - *self = Self::empty(); - owned - } - - /// Returns the inline capacity for this particular backend. - #[inline] - pub const fn inline_capacity() -> usize { - Inline::capacity() - } - - /// Returns the capacity. - /// - /// For simplicity's sake, if it's a borrowed byte string, it returns the length. - #[inline] - pub fn capacity(&self) -> usize { - match self.split() { - RawSplit::Inline(_) => Self::inline_capacity(), - RawSplit::Borrowed(borrowed) => borrowed.len(), // provide something to simplify the API - RawSplit::Allocated(allocated) => allocated.capacity(), - } - } - - /// Returns the underlying vector if any. - /// - /// # Errors - /// - /// Returns the byte string as-is if it is not allocated. - #[inline] - #[allow(clippy::option_if_let_else)] - pub fn into_vec(self) -> Result, Self> { - let mut this = ManuallyDrop::new(self); - if let Some(allocated) = this.take_allocated() { - allocated - .try_into_vec() - .map_err(|allocated| Union { allocated }.into_raw()) - } else { - Err(ManuallyDrop::into_inner(this)) - } - } - - /// Takes the allocated representation if any, - /// replacing it with an empty byte string. - /// - /// # Errors - /// - /// Returns `None` if this raw byte string is not allocated. - #[inline] - fn take_allocated(&mut self) -> Option> { - match self.split() { - RawSplit::Allocated(&allocated) => { - // Takes a copy of allocated - - // replace `self` one by an empty raw - // forget the old value, we have `allocated` as a valid handle - forget(replace(self, Self::empty())); - - Some(allocated) - } - _ => None, - } - } - - /// Makes the data owned, copying it if it's not already owned. - #[inline] - pub fn into_owned(self) -> Raw<'static, B> { - let tag = self.tag(); - let old = self.union_move(); // self is not dropped! - - // SAFETY: tag representation - unsafe { - match tag { - Tag::Allocated => Raw::from_allocated(old.allocated), - Tag::Borrowed => Raw::from_slice(old.borrowed.as_slice()), - Tag::Inline => Raw::from_inline(old.inline), - } - } - } - - /// Makes the underlying data uniquely owned, copying if needed. - #[inline] - pub fn make_unique(&mut self) { - let tag = self.tag(); - match tag { - Tag::Inline => {} - Tag::Borrowed => { - let old = replace(self, Self::empty()).union_move(); - - // SAFETY: representation is checked above - let borrowed = unsafe { old.borrowed }; - - *self = Self::from_slice(borrowed.as_slice()); - } - Tag::Allocated => { - // SAFETY: representation checked above - if unsafe { self.union().allocated }.is_unique() { - return; - } - - let old = replace(self, Self::empty()); - - // SAFETY: representation checked above - let allocated = unsafe { old.union_move().allocated }; - - // SAFETY: by the type invariant - // allocated len must be > INLINE_CAPACITY - let new = Self::from_vec(allocated.as_slice().to_vec()); - - // manual decrement of the reference count - allocated.explicit_drop(); - - *self = new; - } - } - } - - /// Returns `true` if the representation is normalized. - /// - /// For now, borrowed representation are not inlined. - #[inline] - pub const fn is_normalized(&self) -> bool { - self.is_inline() || self.is_borrowed() || self.len() > Self::inline_capacity() - } - - /// Returns `true` it `self` is equal byte for byte to `other`. - #[inline(never)] - pub fn eq(&self, other: &Raw) -> bool { - // use memcmp directly to squeeze one more comparison - extern "C" { - fn memcmp(a: *const u8, b: *const u8, size: usize) -> core::ffi::c_int; - } - - let len = self.len(); - if len != other.len() { - return false; - } - - let self_ptr = self.as_ptr(); - let other_ptr = other.as_ptr(); - if core::ptr::eq(self_ptr, other_ptr) { - return true; - } - - // use element size (just a remainder for now) - let size = len * size_of::(); - - // SAFETY: size checked above - unsafe { memcmp(self_ptr, other_ptr, size) == 0 } - } - - /// Creates a new raw byte string by repeating this one `n` times. - /// - /// # Panics - /// - /// Panics if the capacity would overflow. - pub fn repeat(&self, n: usize) -> Self { - if self.len() == 0 || n == 1 { - return self.clone(); - } - - let src_len = self.len(); - let new_len = src_len.checked_mul(n).expect("capacity overflow"); - if new_len <= INLINE_CAPACITY { - let mut inline = Inline::zeroed(new_len); - let src = self.as_slice().as_ptr(); - let mut dst = inline.as_mut_slice().as_mut_ptr(); - - // SAFETY: copy only `new_len` bytes with an - // upper bound of `INLINE_CAPACITY` checked above - unsafe { - // could be better from an algorithmic standpoint - // but no expected gain for at most 23 bytes on 64 bit platform - for _ in 0..n { - ptr::copy_nonoverlapping(src, dst, src_len); - dst = dst.add(src_len); - } - } - - Self::from_inline(inline) - } else { - let vec = self.as_slice().repeat(n); - Self::from_vec(vec) - } - } - - /// Returns the remaining spare capacity of the vector as a slice of - /// `MaybeUninit`. - /// - /// The returned slice can be used to fill the vector with data (e.g. by - /// reading from a file) before marking the data as initialized using the - /// [`set_len`] method. - /// - /// [`set_len`]: Raw::set_len - pub fn spare_capacity_mut(&mut self) -> &mut [MaybeUninit] { - match self.split_mut() { - RawSplitMut::Borrowed(_) => &mut [], - RawSplitMut::Inline(inline) => inline.spare_capacity_mut(), - RawSplitMut::Allocated(allocated) => allocated.spare_capacity_mut(), - } - } - - /// Forces the length of the vector to `new_len`. - /// - /// Does not normalize! - /// - /// # Safety - /// - /// * If the repr is inline, `new_len` should be must be less than or equal to `INLINE_CAPACITY`. - /// * If `new_len` is greater than the current length: - /// * The elements at `old_len..new_len` must be initialized. - /// * The vector should not be shared. - pub unsafe fn set_len(&mut self, new_len: usize) { - match self.split_mut() { - RawSplitMut::Borrowed(borrowed) => unsafe { - borrowed.set_len(new_len); - }, - RawSplitMut::Inline(inline) => unsafe { inline.set_len(new_len) }, - RawSplitMut::Allocated(allocated) => unsafe { allocated.set_len(new_len) }, - } - } - - /// Shortens and normalizes the vector keeping the first `new_len` elements. - /// - /// Do nothing is `new_len` is greater than the current length. - pub fn truncate(&mut self, new_len: usize) { - if new_len < self.len() { - if self.is_allocated() && new_len <= INLINE_CAPACITY { - let new = - unsafe { Self::inline_unchecked(self.as_slice().get_unchecked(..new_len)) }; - *self = new; - } else { - // SAFETY: `new_len` is checked above - unsafe { self.set_len(new_len) } - } - } - debug_assert!(self.is_normalized()); - } - - /// Shrinks the capacity of the vector with a lower bound. - /// - /// The capacity will remain at least as large as the given bound and the - /// given length. - /// - /// No-op if the representation is not allocated. - /// - /// # Representation stability - /// - /// The representation may change to inline if the required capacity is - /// smaller than the inline capacity. - pub fn shrink_to(&mut self, min_capacity: usize) { - if self.is_allocated() { - let min_capacity = min_capacity.max(self.len()); - - if min_capacity > INLINE_CAPACITY { - let allocated = unsafe { &mut self.union_mut().allocated }; - allocated.shrink_to(min_capacity); - } else { - let new = unsafe { Self::inline_unchecked(self.as_slice()) }; - *self = new; - } - } - } -} - -impl Drop for Raw<'_, B> { - #[inline] - fn drop(&mut self) { - // Formally drops this `Raw` decreasing the ref count if needed - if let Some(allocated) = self.take_allocated() { - allocated.explicit_drop(); - } - } -} - -impl Clone for Raw<'_, B> { - fn clone(&self) -> Self { - // Duplicates this `Raw` increasing the ref count if needed. - match self.split() { - RawSplit::Inline(&inline) => Self::from_inline(inline), - RawSplit::Borrowed(&borrowed) => Self::from_borrowed(borrowed), - RawSplit::Allocated(allocated) => { - let clone = allocated.explicit_clone(); - Self::from_allocated(clone) - } - } - } -} - -/// Computes the range in `whole` corresponding to the given `slice`. -/// -/// # Safety -/// -/// `slice` must be part of `whole`. -unsafe fn range_of_unchecked(whole: &[u8], slice: &[u8]) -> Range { - unsafe { - let offset = slice.as_ptr().offset_from(whole.as_ptr()); - let offset: usize = offset.try_into().unwrap_unchecked(); - offset..offset + slice.len() - } -} - -pub fn try_range_of(whole: &[u8], slice: &[u8]) -> Option> { - let len = whole.len(); - let Range { start, end } = whole.as_ptr_range(); - let slice_len = slice.len(); - let slice_start = slice.as_ptr(); - - // checks that slice_start in whole - if slice_start < start || slice_start > end { - return None; - } - - // SAFETY: `offset_from` requires both pointers to be in the same allocated object (+1). - // that is checked above: slice_ptr is in self - let offset = unsafe { slice_start.offset_from(start) }; - // SAFETY: offset is between 0 and slice_len included - let offset: usize = unsafe { offset.try_into().unwrap_unchecked() }; - if offset + slice_len > len { - None - } else { - Some(offset..offset + slice_len) - } -} diff --git a/src/string.rs b/src/string.rs index 5c4b3329..dbd35035 100644 --- a/src/string.rs +++ b/src/string.rs @@ -242,6 +242,34 @@ where .map_err(Self) } + /// Returns the borrowed string slice if this `HipStr` is actually borrowed, + /// `None` otherwise. + /// + /// # Examples + /// + /// ``` + /// # use hipstr::HipStr; + /// static S: &str = "abc"; + /// let s = HipStr::borrowed(S); + /// let c: Option<&'static str> = s.as_borrowed(); + /// assert_eq!(c, Some(S)); + /// assert!(std::ptr::eq(S, c.unwrap())); + /// + /// let s2 = HipStr::from(S); + /// assert!(s2.as_borrowed().is_none()); + /// ``` + #[inline] + #[must_use] + pub const fn as_borrowed(&self) -> Option<&'borrow str> { + match self.0.as_borrowed() { + Some(slice) => Some(unsafe { + // SAFETY: type invariant + core::str::from_utf8_unchecked(slice) + }), + None => None, + } + } + /// Returns the length of this `HipStr`, in bytes, not [`char`]s or /// graphemes. In other words, it might not be what a human considers the /// length of the string. diff --git a/src/string/serde.rs b/src/string/serde.rs index 41bc118f..8ffa1899 100644 --- a/src/string/serde.rs +++ b/src/string/serde.rs @@ -1,14 +1,46 @@ +//! `serde` support for `HipStr`. +//! +//! This module provides support for serializing and deserializing `HipStr` +//! using [`serde`]. It is enabled by default when the `serde` feature is +//! enabled. +//! +//! # Examples +//! +//! ``` +//! use hipstr::HipStr; +//! +//! let s = HipStr::from("hello"); +//! let serialized = serde_json::to_string(&s).unwrap(); +//! assert_eq!(serialized, r#""hello""#); +//! +//! let deserialized: HipStr = serde_json::from_str(&serialized).unwrap(); +//! assert_eq!(deserialized, s); +//! ``` +//! +//! # Notable aspects of the implementation +//! +//! During deserialization, this implementation minimizes allocations by +//! reusing the deserializer's internal buffer if possible. +//! +//! Like `String`'s serde implementation, this implementation supports +//! UTF-8 encoded byte sequence as input. + +#![allow(clippy::option_if_let_else)] + +use core::fmt; use core::marker::PhantomData; -use serde::de::{Error, Visitor}; +use serde::de::{Error, Unexpected, Visitor}; use serde::{de, Deserialize, Deserializer, Serialize}; use super::HipStr; -use crate::alloc::borrow::Cow; -use crate::alloc::fmt; -use crate::alloc::string::{String, ToString}; +use crate::alloc::string::String; +use crate::alloc::vec::Vec; +use crate::bytes::HipByt; use crate::Backend; +const EXPECTING: &str = "a string"; + impl Serialize for HipStr<'_, B> where B: Backend, @@ -22,15 +54,14 @@ where } } -struct HipStrVisitor<'b, B> { - data: PhantomData<&'b B>, -} +/// Deserializer's visitor for owned `HipStr`. +struct OwnedVisitor<'borrow, B: Backend>(PhantomData>); -impl<'a, 'b, B: Backend> Visitor<'a> for HipStrVisitor<'b, B> { +impl<'b, B: Backend> Visitor<'_> for OwnedVisitor<'b, B> { type Value = HipStr<'b, B>; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("a string") + formatter.write_str(EXPECTING) } #[inline] @@ -48,6 +79,29 @@ impl<'a, 'b, B: Backend> Visitor<'a> for HipStrVisitor<'b, B> { { Ok(HipStr::from(v)) } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + match core::str::from_utf8(v) { + Ok(s) => Ok(HipStr::from(s)), + Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), + } + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: Error, + { + match String::from_utf8(v) { + Ok(s) => Ok(HipStr::from(s)), + Err(e) => Err(Error::invalid_value( + Unexpected::Bytes(&e.into_bytes()), + &self, + )), + } + } } impl<'de, B> Deserialize<'de> for HipStr<'_, B> @@ -59,41 +113,72 @@ where where D: Deserializer<'de>, { - deserializer.deserialize_str(HipStrVisitor:: { - data: PhantomData::default(), - }) + deserializer.deserialize_str(OwnedVisitor(PhantomData)) } } -/// Minimal string cow visitor -struct CowVisitor; +/// Deserializer's visitor for borrowed `HipStr`. +struct BorrowedVisitor<'de, B: Backend>(PhantomData>); -impl<'de> de::Visitor<'de> for CowVisitor { - type Value = Cow<'de, str>; +impl<'de, B: Backend> Visitor<'de> for BorrowedVisitor<'de, B> { + type Value = HipStr<'de, B>; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("a string") + formatter.write_str(EXPECTING) } fn visit_borrowed_str(self, v: &'de str) -> Result where E: de::Error, { - Ok(Cow::Borrowed(v)) + Ok(HipStr::borrowed(v)) } fn visit_str(self, v: &str) -> Result where E: de::Error, { - Ok(Cow::Owned(v.to_string())) + Ok(HipStr::from(v)) } fn visit_string(self, v: String) -> Result where E: de::Error, { - Ok(Cow::Owned(v)) + Ok(HipStr::from(v)) + } + + fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result + where + E: Error, + { + match core::str::from_utf8(v) { + Ok(s) => Ok(HipStr::borrowed(s)), + Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), + } + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + match core::str::from_utf8(v) { + Ok(s) => Ok(HipStr::from(s)), + Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)), + } + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: Error, + { + match String::from_utf8(v) { + Ok(s) => Ok(HipStr::from(s)), + Err(e) => Err(Error::invalid_value( + Unexpected::Bytes(&e.into_bytes()), + &self, + )), + } } } @@ -128,8 +213,7 @@ where D: serde::Deserializer<'de>, B: Backend, { - let cow: Cow<'de, str> = deserializer.deserialize_str(CowVisitor)?; - Ok(HipStr::from(cow)) + deserializer.deserialize_str(BorrowedVisitor(PhantomData)) } #[cfg(test)] diff --git a/src/string/serde/tests.rs b/src/string/serde/tests.rs index 9c9e00c2..0dea8386 100644 --- a/src/string/serde/tests.rs +++ b/src/string/serde/tests.rs @@ -16,11 +16,17 @@ fn test_serde() { assert_tokens(&empty_str, &[Token::Str("")]); assert_de_tokens(&empty_str, &[Token::BorrowedStr("")]); assert_de_tokens(&empty_str, &[Token::String("")]); + assert_de_tokens(&empty_str, &[Token::Bytes(b"")]); + assert_de_tokens(&empty_str, &[Token::BorrowedBytes(b"")]); + assert_de_tokens(&empty_str, &[Token::ByteBuf(b"")]); let small = HipStr::from("abc"); - assert_de_tokens(&small, &[Token::Str("abc")]); + assert_tokens(&small, &[Token::Str("abc")]); assert_de_tokens(&small, &[Token::BorrowedStr("abc")]); assert_de_tokens(&small, &[Token::String("abc")]); + assert_de_tokens(&small, &[Token::Bytes(b"abc")]); + assert_de_tokens(&small, &[Token::BorrowedBytes(b"abc")]); + assert_de_tokens(&small, &[Token::ByteBuf(b"abc")]); } #[test] @@ -29,6 +35,14 @@ fn test_serde_err() { &[Token::I32(0)], "invalid type: integer `0`, expected a string", ); + assert_de_tokens_error::( + &[Token::Bytes(b"\xFF")], + "invalid value: byte array, expected a string", + ); + assert_de_tokens_error::( + &[Token::ByteBuf(b"\xFF")], + "invalid value: byte array, expected a string", + ); } #[test] @@ -88,6 +102,51 @@ fn test_serde_borrow() { Token::StructEnd, ], ); + + assert_de_tokens( + &MyStruct { + field: HipStr::from("a"), + }, + &[ + Token::Struct { + name: "MyStruct", + len: 1, + }, + Token::Str("field"), + Token::BorrowedBytes(b"a"), + Token::StructEnd, + ], + ); + + assert_de_tokens( + &MyStruct { + field: HipStr::from("a"), + }, + &[ + Token::Struct { + name: "MyStruct", + len: 1, + }, + Token::Str("field"), + Token::Bytes(b"a"), + Token::StructEnd, + ], + ); + + assert_de_tokens( + &MyStruct { + field: HipStr::from("a"), + }, + &[ + Token::Struct { + name: "MyStruct", + len: 1, + }, + Token::String("field"), + Token::ByteBuf(b"a"), + Token::StructEnd, + ], + ); } #[test] @@ -104,4 +163,43 @@ fn test_serde_borrow_err() { ], "invalid type: integer `0`, expected a string", ); + + assert_de_tokens_error::( + &[ + Token::Struct { + name: "MyStruct", + len: 1, + }, + Token::Str("field"), + Token::Bytes(b"\xFF"), + Token::StructEnd, + ], + "invalid value: byte array, expected a string", + ); + + assert_de_tokens_error::( + &[ + Token::Struct { + name: "MyStruct", + len: 1, + }, + Token::Str("field"), + Token::BorrowedBytes(b"\xFF"), + Token::StructEnd, + ], + "invalid value: byte array, expected a string", + ); + + assert_de_tokens_error::( + &[ + Token::Struct { + name: "MyStruct", + len: 1, + }, + Token::Str("field"), + Token::ByteBuf(b"\xFF"), + Token::StructEnd, + ], + "invalid value: byte array, expected a string", + ); } diff --git a/src/string/tests.rs b/src/string/tests.rs index de4bb9a8..73a220c4 100644 --- a/src/string/tests.rs +++ b/src/string/tests.rs @@ -219,9 +219,11 @@ fn test_clone() { #[test] fn test_into_borrowed() { - // static + // borrowed let a = H::borrowed(ABC); - assert_eq!(a.into_borrowed(), Ok(ABC)); + let s = a.into_borrowed().unwrap(); + assert_eq!(s, ABC); + assert!(core::ptr::eq(s, ABC)); // inline let a = H::from(ABC); @@ -234,6 +236,23 @@ fn test_into_borrowed() { assert_eq!(a.into_borrowed(), Err(b)); } +#[test] +fn test_as_borrowed() { + // borrowed + let a = H::borrowed(ABC); + let b = a.as_borrowed().unwrap(); + assert_eq!(b, ABC); + assert!(core::ptr::eq(b, ABC)); + + // inline + let a = H::from(ABC); + assert_eq!(a.as_borrowed(), None); + + // heap + let a = H::from(MEDIUM); + assert_eq!(a.as_borrowed(), None); +} + #[test] fn test_as_mut_str() { // static