From 321c4c7d241b78484d1374f64840378ceb82d2c1 Mon Sep 17 00:00:00 2001 From: Markus Reiter Date: Tue, 23 Jan 2024 02:21:47 +0100 Subject: [PATCH 1/2] Add `Vec::drain`. --- CHANGELOG.md | 1 + src/lib.rs | 2 + src/slice.rs | 38 +++++++ src/vec/drain.rs | 222 +++++++++++++++++++++++++++++++++++++ src/{vec.rs => vec/mod.rs} | 111 ++++++++++++++++++- 5 files changed, 373 insertions(+), 1 deletion(-) create mode 100644 src/slice.rs create mode 100644 src/vec/drain.rs rename src/{vec.rs => vec/mod.rs} (93%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 804cfb015b..34dd577825 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Added `IntoIterator` implementation for `LinearMap` - Added `Deque::{get, get_mut, get_unchecked, get_unchecked_mut}`. - Added `serde::Serialize` and `serde::Deserialize` implementations to `HistoryBuffer`. +- Added `Vec::drain`. ### Changed diff --git a/src/lib.rs b/src/lib.rs index 9d51d40cc9..5bf5fa57c8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -96,6 +96,7 @@ pub use indexmap::{ pub use indexset::{FnvIndexSet, IndexSet, Iter as IndexSetIter}; pub use linear_map::LinearMap; pub use string::String; + pub use vec::{Vec, VecView}; #[macro_use] @@ -107,6 +108,7 @@ mod histbuf; mod indexmap; mod indexset; mod linear_map; +mod slice; pub mod storage; pub mod string; pub mod vec; diff --git a/src/slice.rs b/src/slice.rs new file mode 100644 index 0000000000..51d1a6b6a3 --- /dev/null +++ b/src/slice.rs @@ -0,0 +1,38 @@ +use core::ops; + +// FIXME: Remove when `slice_range` feature is stable. +#[track_caller] +#[must_use] +pub fn range(range: R, bounds: ops::RangeTo) -> ops::Range +where + R: ops::RangeBounds, +{ + let len = bounds.end; + + let start: ops::Bound<&usize> = range.start_bound(); + let start = match start { + ops::Bound::Included(&start) => start, + ops::Bound::Excluded(start) => start + .checked_add(1) + .unwrap_or_else(|| panic!("attempted to index slice from after maximum usize")), + ops::Bound::Unbounded => 0, + }; + + let end: ops::Bound<&usize> = range.end_bound(); + let end = match end { + ops::Bound::Included(end) => end + .checked_add(1) + .unwrap_or_else(|| panic!("attempted to index slice up to maximum usize")), + ops::Bound::Excluded(&end) => end, + ops::Bound::Unbounded => len, + }; + + if start > end { + panic!("slice index starts at {start} but ends at {end}"); + } + if end > len { + panic!("range end index {end} out of range for slice of length {len}"); + } + + ops::Range { start, end } +} diff --git a/src/vec/drain.rs b/src/vec/drain.rs new file mode 100644 index 0000000000..f501cfb188 --- /dev/null +++ b/src/vec/drain.rs @@ -0,0 +1,222 @@ +use core::{ + fmt, + iter::FusedIterator, + mem::{self, size_of}, + ptr::{self, NonNull}, + slice, +}; + +use super::VecView; + +/// A draining iterator for [`Vec`](super::Vec). +/// +/// This `struct` is created by [`Vec::drain`](super::Vec::drain). +/// See its documentation for more. +/// +/// # Example +/// +/// ``` +/// use heapless::{vec, Vec}; +/// +/// let mut v = Vec::<_, 4>::from_array([0, 1, 2]); +/// let iter: vec::Drain<'_, _> = v.drain(..); +/// ``` +pub struct Drain<'a, T: 'a> { + /// Index of tail to preserve + pub(super) tail_start: usize, + /// Length of tail + pub(super) tail_len: usize, + /// Current remaining range to remove + pub(super) iter: slice::Iter<'a, T>, + pub(super) vec: NonNull>, +} + +impl fmt::Debug for Drain<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("Drain").field(&self.iter.as_slice()).finish() + } +} + +impl<'a, T> Drain<'a, T> { + /// Returns the remaining items of this iterator as a slice. + /// + /// # Examples + /// + /// ``` + /// use heapless::{vec, Vec}; + /// + /// let mut vec = Vec::<_, 3>::from_array(['a', 'b', 'c']); + /// let mut drain = vec.drain(..); + /// assert_eq!(drain.as_slice(), &['a', 'b', 'c']); + /// let _ = drain.next().unwrap(); + /// assert_eq!(drain.as_slice(), &['b', 'c']); + /// ``` + #[must_use] + pub fn as_slice(&self) -> &[T] { + self.iter.as_slice() + } +} + +impl<'a, T> AsRef<[T]> for Drain<'a, T> { + fn as_ref(&self) -> &[T] { + self.as_slice() + } +} + +unsafe impl Sync for Drain<'_, T> {} +unsafe impl Send for Drain<'_, T> {} + +impl Iterator for Drain<'_, T> { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + self.iter + .next() + .map(|elt| unsafe { ptr::read(elt as *const _) }) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl DoubleEndedIterator for Drain<'_, T> { + #[inline] + fn next_back(&mut self) -> Option { + self.iter + .next_back() + .map(|elt| unsafe { ptr::read(elt as *const _) }) + } +} + +impl Drop for Drain<'_, T> { + fn drop(&mut self) { + /// Moves back the un-`Drain`ed elements to restore the original `Vec`. + struct DropGuard<'r, 'a, T>(&'r mut Drain<'a, T>); + + impl<'r, 'a, T> Drop for DropGuard<'r, 'a, T> { + fn drop(&mut self) { + if self.0.tail_len > 0 { + unsafe { + let source_vec = self.0.vec.as_mut(); + // memmove back untouched tail, update to new length + let start = source_vec.len(); + let tail = self.0.tail_start; + if tail != start { + let dst = source_vec.as_mut_ptr().add(start); + let src = source_vec.as_ptr().add(tail); + ptr::copy(src, dst, self.0.tail_len); + } + source_vec.set_len(start + self.0.tail_len); + } + } + } + } + + let iter = mem::take(&mut self.iter); + let drop_len = iter.len(); + + let mut vec = self.vec; + + if size_of::() == 0 { + // ZSTs have no identity, so we don't need to move them around, we only need to drop the correct amount. + // this can be achieved by manipulating the `Vec` length instead of moving values out from `iter`. + unsafe { + let vec = vec.as_mut(); + let old_len = vec.len(); + vec.set_len(old_len + drop_len + self.tail_len); + vec.truncate(old_len + self.tail_len); + } + + return; + } + + // ensure elements are moved back into their appropriate places, even when drop_in_place panics + let _guard = DropGuard(self); + + if drop_len == 0 { + return; + } + + // as_slice() must only be called when iter.len() is > 0 because + // it also gets touched by vec::Splice which may turn it into a dangling pointer + // which would make it and the vec pointer point to different allocations which would + // lead to invalid pointer arithmetic below. + let drop_ptr = iter.as_slice().as_ptr(); + + unsafe { + // drop_ptr comes from a slice::Iter which only gives us a &[T] but for drop_in_place + // a pointer with mutable provenance is necessary. Therefore we must reconstruct + // it from the original vec but also avoid creating a &mut to the front since that could + // invalidate raw pointers to it which some unsafe code might rely on. + let vec_ptr = vec.as_mut().as_mut_ptr(); + // FIXME: Replace with `sub_ptr` once stable. + let drop_offset = (drop_ptr as usize - vec_ptr as usize) / size_of::(); + let to_drop = ptr::slice_from_raw_parts_mut(vec_ptr.add(drop_offset), drop_len); + ptr::drop_in_place(to_drop); + } + } +} + +impl ExactSizeIterator for Drain<'_, T> {} + +impl FusedIterator for Drain<'_, T> {} + +#[cfg(test)] +mod tests { + use super::super::Vec; + + #[test] + fn drain_front() { + let mut vec = Vec::<_, 8>::from_array([1, 2, 3, 4]); + let mut it = vec.drain(..1); + assert_eq!(it.next(), Some(1)); + drop(it); + assert_eq!(vec, &[2, 3, 4]); + } + + #[test] + fn drain_middle() { + let mut vec = Vec::<_, 8>::from_array([1, 2, 3, 4]); + let mut it = vec.drain(1..3); + assert_eq!(it.next(), Some(2)); + assert_eq!(it.next(), Some(3)); + drop(it); + assert_eq!(vec, &[1, 4]); + } + + #[test] + fn drain_end() { + let mut vec = Vec::<_, 8>::from_array([1, 2, 3, 4]); + let mut it = vec.drain(3..); + assert_eq!(it.next(), Some(4)); + drop(it); + assert_eq!(vec, &[1, 2, 3]); + } + + #[test] + fn drain_drop_rest() { + droppable!(); + + let mut vec = Vec::<_, 8>::from_array([ + Droppable::new(), + Droppable::new(), + Droppable::new(), + Droppable::new(), + ]); + assert_eq!(Droppable::count(), 4); + + let mut iter = vec.drain(2..); + assert_eq!(iter.next().unwrap().0, 3); + drop(iter); + assert_eq!(Droppable::count(), 2); + + assert_eq!(vec.len(), 2); + assert_eq!(vec.remove(0).0, 1); + assert_eq!(Droppable::count(), 1); + + drop(vec); + assert_eq!(Droppable::count(), 0); + } +} diff --git a/src/vec.rs b/src/vec/mod.rs similarity index 93% rename from src/vec.rs rename to src/vec/mod.rs index 0378fc608b..3fbfc65226 100644 --- a/src/vec.rs +++ b/src/vec/mod.rs @@ -5,11 +5,16 @@ use core::{ cmp::Ordering, fmt, hash, mem::{self, ManuallyDrop, MaybeUninit}, - ops, ptr, slice, + ops::{self, Range, RangeBounds}, + ptr::{self, NonNull}, + slice, }; use crate::storage::{OwnedStorage, Storage, ViewStorage}; +mod drain; +pub use drain::Drain; + /// Base struct for [`Vec`] and [`VecView`], generic over the [`Storage`]. /// /// In most cases you should use [`Vec`] or [`VecView`] directly. Only use this @@ -246,6 +251,110 @@ impl Vec { pub fn as_mut_view(&mut self) -> &mut VecView { self } + + /// Removes the specified range from the vector in bulk, returning all + /// removed elements as an iterator. If the iterator is dropped before + /// being fully consumed, it drops the remaining removed elements. + /// + /// The returned iterator keeps a mutable borrow on the vector to optimize + /// its implementation. + /// + /// # Panics + /// + /// Panics if the starting point is greater than the end point or if + /// the end point is greater than the length of the vector. + /// + /// # Leaking + /// + /// If the returned iterator goes out of scope without being dropped (due to + /// [`mem::forget`], for example), the vector may have lost and leaked + /// elements arbitrarily, including elements outside the range. + /// + /// # Examples + /// + /// ``` + /// use heapless::Vec; + /// + /// let mut v = Vec::<_, 8>::from_array([1, 2, 3]); + /// let u: Vec<_, 8> = v.drain(1..).collect(); + /// assert_eq!(v, &[1]); + /// assert_eq!(u, &[2, 3]); + /// + /// // A full range clears the vector, like `clear()` does. + /// v.drain(..); + /// assert_eq!(v, &[]); + /// ``` + pub fn drain(&mut self, range: R) -> Drain<'_, T> + where + R: RangeBounds, + { + self.as_mut_view().drain(range) + } +} + +impl VecView { + /// Removes the specified range from the vector in bulk, returning all + /// removed elements as an iterator. If the iterator is dropped before + /// being fully consumed, it drops the remaining removed elements. + /// + /// The returned iterator keeps a mutable borrow on the vector to optimize + /// its implementation. + /// + /// # Panics + /// + /// Panics if the starting point is greater than the end point or if + /// the end point is greater than the length of the vector. + /// + /// # Leaking + /// + /// If the returned iterator goes out of scope without being dropped (due to + /// [`mem::forget`], for example), the vector may have lost and leaked + /// elements arbitrarily, including elements outside the range. + /// + /// # Examples + /// + /// ``` + /// use heapless::Vec; + /// + /// let mut v = Vec::<_, 8>::from_array([1, 2, 3]); + /// let u: Vec<_, 8> = v.drain(1..).collect(); + /// assert_eq!(v, &[1]); + /// assert_eq!(u, &[2, 3]); + /// + /// // A full range clears the vector, like `clear()` does. + /// v.drain(..); + /// assert_eq!(v, &[]); + /// ``` + pub fn drain(&mut self, range: R) -> Drain<'_, T> + where + R: RangeBounds, + { + // Memory Safety + // + // When the `Drain` is first created, it shortens the length of + // the source vector to make sure no uninitialized or moved-from elements + // are accessible at all if the `Drain`'s destructor never gets to run. + // + // `Drain` will `ptr::read` out the values to remove. + // When finished, remaining tail of the vec is copied back to cover + // the hole, and the vector length is restored to the new length. + // + let len = self.len(); + let Range { start, end } = crate::slice::range(range, ..len); + + unsafe { + // Set `self.vec` length's to `start`, to be safe in case `Drain` is leaked. + self.set_len(start); + let vec = NonNull::from(self); + let range_slice = slice::from_raw_parts(vec.as_ref().as_ptr().add(start), end - start); + Drain { + tail_start: end, + tail_len: len - end, + iter: range_slice.iter(), + vec, + } + } + } } impl VecInner { From 29d2156379297366161c68918aa8456a37e8a9f9 Mon Sep 17 00:00:00 2001 From: Markus Reiter Date: Tue, 23 Jan 2024 02:21:56 +0100 Subject: [PATCH 2/2] Add `String::drain`. --- CHANGELOG.md | 1 + src/string/drain.rs | 134 +++++++++++++++++++++++++++++++ src/{string.rs => string/mod.rs} | 69 +++++++++++++++- 3 files changed, 203 insertions(+), 1 deletion(-) create mode 100644 src/string/drain.rs rename src/{string.rs => string/mod.rs} (91%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34dd577825..523a3ea353 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Added `Deque::{get, get_mut, get_unchecked, get_unchecked_mut}`. - Added `serde::Serialize` and `serde::Deserialize` implementations to `HistoryBuffer`. - Added `Vec::drain`. +- Added `String::drain`. ### Changed diff --git a/src/string/drain.rs b/src/string/drain.rs new file mode 100644 index 0000000000..c547e5f4b2 --- /dev/null +++ b/src/string/drain.rs @@ -0,0 +1,134 @@ +use core::{fmt, iter::FusedIterator, str::Chars}; + +use super::String; + +/// A draining iterator for `String`. +/// +/// This struct is created by the [`drain`] method on [`String`]. See its +/// documentation for more. +/// +/// [`drain`]: String::drain +pub struct Drain<'a, const N: usize> { + /// Will be used as &'a mut String in the destructor + pub(super) string: *mut String, + /// Start of part to remove + pub(super) start: usize, + /// End of part to remove + pub(super) end: usize, + /// Current remaining range to remove + pub(super) iter: Chars<'a>, +} + +impl fmt::Debug for Drain<'_, N> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("Drain").field(&self.as_str()).finish() + } +} + +unsafe impl Sync for Drain<'_, N> {} +unsafe impl Send for Drain<'_, N> {} + +impl Drop for Drain<'_, N> { + fn drop(&mut self) { + unsafe { + // Use `Vec::drain`. “Reaffirm” the bounds checks to avoid + // panic code being inserted again. + let self_vec = (*self.string).as_mut_vec(); + if self.start <= self.end && self.end <= self_vec.len() { + self_vec.drain(self.start..self.end); + } + } + } +} + +impl<'a, const N: usize> Drain<'a, N> { + /// Returns the remaining (sub)string of this iterator as a slice. + /// + /// # Examples + /// + /// ``` + /// use heapless::String; + /// + /// let mut s = String::<8>::try_from("abc").unwrap(); + /// let mut drain = s.drain(..); + /// assert_eq!(drain.as_str(), "abc"); + /// let _ = drain.next().unwrap(); + /// assert_eq!(drain.as_str(), "bc"); + /// ``` + #[must_use] + pub fn as_str(&self) -> &str { + self.iter.as_str() + } +} + +impl AsRef for Drain<'_, N> { + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl AsRef<[u8]> for Drain<'_, N> { + fn as_ref(&self) -> &[u8] { + self.as_str().as_bytes() + } +} + +impl Iterator for Drain<'_, N> { + type Item = char; + + #[inline] + fn next(&mut self) -> Option { + self.iter.next() + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } + + #[inline] + fn last(mut self) -> Option { + self.next_back() + } +} + +impl DoubleEndedIterator for Drain<'_, N> { + #[inline] + fn next_back(&mut self) -> Option { + self.iter.next_back() + } +} + +impl FusedIterator for Drain<'_, N> {} + +#[cfg(test)] +mod tests { + use super::String; + + #[test] + fn drain_front() { + let mut s = String::<8>::try_from("abcd").unwrap(); + let mut it = s.drain(..1); + assert_eq!(it.next(), Some('a')); + drop(it); + assert_eq!(s, "bcd"); + } + + #[test] + fn drain_middle() { + let mut s = String::<8>::try_from("abcd").unwrap(); + let mut it = s.drain(1..3); + assert_eq!(it.next(), Some('b')); + assert_eq!(it.next(), Some('c')); + drop(it); + assert_eq!(s, "ad"); + } + + #[test] + fn drain_end() { + let mut s = String::<8>::try_from("abcd").unwrap(); + let mut it = s.drain(3..); + assert_eq!(it.next(), Some('d')); + drop(it); + assert_eq!(s, "abc"); + } +} diff --git a/src/string.rs b/src/string/mod.rs similarity index 91% rename from src/string.rs rename to src/string/mod.rs index dce6394d2d..ceeac8455e 100644 --- a/src/string.rs +++ b/src/string/mod.rs @@ -5,12 +5,16 @@ use core::{ cmp::Ordering, fmt, fmt::{Arguments, Write}, - hash, iter, ops, + hash, iter, + ops::{self, Range, RangeBounds}, str::{self, Utf8Error}, }; use crate::Vec; +mod drain; +pub use drain::Drain; + /// A possible error value when converting a [`String`] from a UTF-16 byte slice. /// /// This type is the error type for the [`from_utf16`] method on [`String`]. @@ -456,6 +460,69 @@ impl String { pub fn clear(&mut self) { self.vec.clear() } + + /// Removes the specified range from the string in bulk, returning all + /// removed characters as an iterator. + /// + /// The returned iterator keeps a mutable borrow on the string to optimize + /// its implementation. + /// + /// # Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// # Leaking + /// + /// If the returned iterator goes out of scope without being dropped (due to + /// [`core::mem::forget`], for example), the string may still contain a copy + /// of any drained characters, or may have lost characters arbitrarily, + /// including characters outside the range. + /// + /// # Examples + /// + /// ``` + /// use heapless::String; + /// + /// let mut s = String::<32>::try_from("α is alpha, β is beta").unwrap(); + /// let beta_offset = s.find('β').unwrap_or(s.len()); + /// + /// // Remove the range up until the β from the string + /// let t: String<32> = s.drain(..beta_offset).collect(); + /// assert_eq!(t, "α is alpha, "); + /// assert_eq!(s, "β is beta"); + /// + /// // A full range clears the string, like `clear()` does + /// s.drain(..); + /// assert_eq!(s, ""); + /// ``` + pub fn drain(&mut self, range: R) -> Drain<'_, N> + where + R: RangeBounds, + { + // Memory safety + // + // The `String` version of `Drain` does not have the memory safety issues + // of the `Vec` version. The data is just plain bytes. + // Because the range removal happens in `Drop`, if the `Drain` iterator is leaked, + // the removal will not happen. + let Range { start, end } = crate::slice::range(range, ..self.len()); + assert!(self.is_char_boundary(start)); + assert!(self.is_char_boundary(end)); + + // Take out two simultaneous borrows. The &mut String won't be accessed + // until iteration is over, in Drop. + let self_ptr = self as *mut _; + // SAFETY: `slice::range` and `is_char_boundary` do the appropriate bounds checks. + let chars_iter = unsafe { self.get_unchecked(start..end) }.chars(); + + Drain { + start, + end, + iter: chars_iter, + string: self_ptr, + } + } } impl Default for String {