From 5557a8a280205daeb21a2c76ac8e3403793372d3 Mon Sep 17 00:00:00 2001 From: Iago-lito Date: Mon, 20 May 2024 19:24:38 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=A7=20Implementing=20cartesian=20power?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cartesian_power.rs | 552 +++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 20 ++ 2 files changed, 572 insertions(+) create mode 100644 src/cartesian_power.rs diff --git a/src/cartesian_power.rs b/src/cartesian_power.rs new file mode 100644 index 000000000..f8634f3b3 --- /dev/null +++ b/src/cartesian_power.rs @@ -0,0 +1,552 @@ +use alloc::vec::Vec; +use core::convert::{TryFrom, TryInto}; +use std::fmt; +use std::iter::FusedIterator; + +/// An adaptor iterating through all the ordered `n`-length lists of items +/// yielded by the underlying iterator, including repetitions. +/// +/// See [`.cartesian_power()`](crate::Itertools::cartesian_power) +/// for more information. +#[derive(Clone)] +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +pub enum CartesianPower +where + I: Iterator, + I::Item: Clone, +{ + NullPower(bool), // Yield one empty family. Raise when consumed. + EmptySet, // No items to list: yield nothing (or mark exhaustion). + NonDegenerated { + // The adapted iterator: lazily consumed once then forgotten. + iter: Option, + // Use a separate enum to not complicate the passing of `iter` + // from one variant to the next given only a `&mut self`. + var: NonDegenerated, + }, +} + +#[derive(Debug, Clone)] +// Logically "transparent" wrt enclosing type. +pub enum NonDegenerated { + // Has not yet allocated but will on `.next()` unless iterator is empty. + Init { + pow: usize, // Positive. + }, + // State during non-degenerated iteration. + Nominal { + items: Vec, // Lazily collected from the original iterator + indices: Vec, // Target items just yielded. Length is `pow`. + }, + // Both buffers above are cleared after exhaustion to conclude/fuse the iterator. +} + +/// Create a new `CartesianPower` from an iterator of clonables. +pub fn cartesian_power(iter: I, pow: usize) -> CartesianPower +where + I: Iterator, + I::Item: Clone, +{ + if pow == 0 { + CartesianPower::NullPower(false) + } else { + CartesianPower::NonDegenerated { + iter: Some(iter), + var: NonDegenerated::Init { pow }, + } + } +} + +impl CartesianPower +where + I: Iterator, + I::Item: Clone, +{ + /// Increments internal indices to advance to the next list to be yielded. + /// This collects new items from the underlying iterator + /// if they were not all already collected. + /// + /// Returns None if we've run out of possible lists, + /// otherwise return refs to the indices to yield next, + /// valid within the collected items slice also returned. + fn increment_indices(&mut self) -> Option<(&[usize], &[I::Item])> { + use CartesianPower as C; + use NonDegenerated as N; + match self { + // Degenerated cases. + C::NullPower(true) | C::EmptySet => None, + C::NullPower(consumed) => { + // Only yield the empty list. + *consumed = true; + Some((&[], &[])) + } + + // Initialization on the most common cases. + &mut C::NonDegenerated { + var: ref mut var @ N::Init { pow }, + ref mut iter, + } => { + // Query the first item to figure whether there are any to yield. + let iter = iter.as_mut().unwrap(); // Must exist at this point: not consumed. + let n = iter.size_hint().0; + if let Some(item) = iter.next() { + // If so, allocate a buffer to collect it and the next ones. + let mut items = Vec::with_capacity(n); + items.push(item); + *var = N::Nominal { + items, + indices: vec![0; pow], + }; + // (fiddle to return refs to the newly created enum variant) + let C::NonDegenerated { + var: N::Nominal { indices, items }, + .. + } = self + else { + unreachable!() + }; + return Some((indices, items)); + } + // If not, then iteration is already over. + *self = C::EmptySet; + None + } + + // Cruise call on the most common cases. + C::NonDegenerated { + var: N::Nominal { items, indices }, + iter, + } => { + if indices.is_empty() { + return None; // Indice that iteration has been consumed forever: fuse. + } + // Collect next item if any remains to collect. + if let Some(it) = iter { + if let Some(item) = it.next() { + items.push(item); + } else { + *iter = None; // When none remains, forget original iterator. + } + } + // Increment indices rightmost first, propagating left if wrapping is needed. + let b = items.len(); + for index in indices.iter_mut().rev() { + *index += 1; + if *index < b { + // On the first pass, the new item to be yielded + // will have just been collected, + // on subsequent passes, they will have been collected for a while. + return Some((indices, items)); + } + // Since collection just happened, + // a larger index means that wrapping is needed. + *index = 0; + } + // Cartesian iteration is over when the leftmost index wraps. + items.clear(); + indices.clear(); + None + } + } + } + + /// Same as [`increment_indices`], but does n increments at once. + fn increment_indices_by_n(&mut self, n: usize) -> Option<(&[usize], &[I::Item])> { + // Implementation mostly duplicated/adapted from increment_indices. + use CartesianPower as C; + use NonDegenerated as N; + match self { + // Degenerated cases. + C::NullPower(true) | C::EmptySet => None, + + // May overshoot the only yielded empty list. + C::NullPower(consumed) => { + *consumed = true; + if n == 0 { + Some((&[], &[])) + } else { + None + } + } + + // Initialization of the most common case. + &mut C::NonDegenerated { + var: ref mut var @ N::Init { pow }, + ref mut iter, + } => { + // Collect the n + 1 first items. + let it = iter.as_mut().unwrap(); + let items = it.take(n + 1).collect::>(); + if items.is_empty() { + // There was no item: iteration is already over. + *self = C::EmptySet; + return None; + } + + // Calculate correct indices. + let indices = if items.len() == n + 1 { + // We don't know the total number of underlying items yet, + // so we can tell that the first index has not wrapped yet. + let mut indices = vec![0; pow]; + indices[pow - 1] = n; + indices + } else { + // All items have been yielded, and the 'nth' goes further, + // so indices need to be wrapped, but we know the number of items now. + *iter = None; + let b = items.len(); + if n >= b ^ pow { + // Overshoot. + *self = C::EmptySet; + return None; + } + let mut indices = Vec::with_capacity(pow); + for p in (0..pow).rev() { + indices.push((n / (b ^ p)) % b); + } + indices + }; + *var = N::Nominal { indices, items }; + let C::NonDegenerated { + var: + N::Nominal { + ref indices, + ref items, + }, + .. + } = *self + else { + unreachable!() + }; + Some((indices, items)) + } + + // Cruise call on the most common case. + C::NonDegenerated { + var: N::Nominal { items, indices }, + iter, + } => { + // Collect next items if any remain to collect. + if let Some(it) = iter { + let before = items.len(); + items.extend(it.take(n + 1)); + let after = items.len(); + if after - before == n + 1 { + // Original iterator is not consumed yet, + // so we don't know the number of items, + // but we know that wrapping did not occur. + let pow = indices.len(); + indices[pow - 1] += n + 1; + return Some((indices, items)); + } else { + // Original iterator is consumed, + *iter = None; + } + } + // At this point we know the total number of underlying items + // so we can calculated the indices wrapping. + let b = items.len(); + // Increment rightmost first. + for (p, index) in indices.iter_mut().rev().enumerate() { + *index += (n + 1) / (b ^ p); + if *index < b { + return Some((indices, items)); + } + *index %= b; + } + items.clear(); + indices.clear(); + None + } + } + } +} + +impl Iterator for CartesianPower +where + I: Iterator, + I::Item: Clone, +{ + type Item = Vec; + + fn next(&mut self) -> Option { + // If anything to yield, + // clone the correct 'pow' instances of collected items + // into a freshly allocated vector. + self.increment_indices().map(|(indices, items)| { + indices + .iter() + .map(|&i| items[i].clone()) + .collect::>() + }) + } + + fn nth(&mut self, n: usize) -> Option { + self.increment_indices_by_n(n).map(|(indices, items)| { + indices + .iter() + .map(|&i| items[i].clone()) + .collect::>() + }) + } + + fn size_hint(&self) -> (usize, Option) { + use CartesianPower as C; + use NonDegenerated as N; + match self { + // Trivial hints. + C::NullPower(true) | C::EmptySet => (0, Some(0)), + C::NullPower(false) => (1, Some(1)), + + // Before it has started, + // the expected size is the expected number of items ^ the cartesian exponent. + &C::NonDegenerated { + ref iter, + var: N::Init { pow }, + } => { + let (lo, hi) = iter.as_ref().unwrap().size_hint(); + let p = pow.try_into().ok(); + ( + p.and_then(|p| lo.checked_pow(p)).unwrap_or(usize::MAX), // (ceil on overflow) + p.and_then(|p| hi.and_then(|hi| hi.checked_pow(p))), + ) + } + + // Once started, the expected size is either exact or approximate + // depending on whether all items have yet been collected. + C::NonDegenerated { + iter, + var: N::Nominal { items, indices }, + } => { + // The number of items includes the ones already collected. + let n = items.len(); + let (lo, hi) = if let Some(iter) = iter { + let (lo, hi) = iter.size_hint(); + (n + lo, hi.map(|hi| n + hi)) + } else { + (n, Some(n)) + }; + if let Ok(pow) = u32::try_from(indices.len()) { + // The number of *remaining* lists + // needs to be calculated from the upcoming indices. + let remaining = |n: usize| -> Option { + let n: u64 = n.try_into().ok()?; + let mut res: u64 = 0; + for &i in indices { + let Ok(i) = u32::try_from(i) else { + return None; + }; + // Leftmost indices contribute most: + // a * b = (n - (i + 1)) * n ^ (p - (i + 1)) + let a = n - (u64::from(i) + 1); + let b = n.checked_pow(pow - (i + 1))?; + let contribution = a.checked_mul(b)?; + res = res.checked_add(contribution)?; + } + res.checked_add(1).and_then(|r| r.try_into().ok()) + }; + (remaining(lo).unwrap_or(usize::MAX), hi.and_then(remaining)) + } else { + (usize::MAX, None) + } + } + } + } + + fn count(self) -> usize { + use CartesianPower as C; + use NonDegenerated as N; + match self { + C::NullPower(true) | C::EmptySet => 0, + C::NullPower(false) => 1, + C::NonDegenerated { + iter, + var: N::Init { pow }, + } => { + // Consume iterator to calculate total number. + let n = iter.unwrap().count(); + n ^ pow + } + C::NonDegenerated { + iter, + var: N::Nominal { items, indices }, + } => { + // Consume if any left, then calculate only remaining items from indices. + let n = items.len() + iter.map_or(0, Iterator::count); + let pow = indices.len(); + let mut res = 0; + for i in indices { + res += (n - (i + 1)) * (n ^ (pow - (i + 1))); + } + res + 1 + } + } + } +} + +// Elide underlying iterator from the debug display. +impl fmt::Debug for CartesianPower +where + I: Iterator + fmt::Debug, + I::Item: fmt::Debug + Clone, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use CartesianPower as C; + match self { + C::NullPower(consumed) => f.debug_tuple("NullPower").field(consumed).finish(), + C::EmptySet => f.debug_tuple("EmptySet").finish(), + C::NonDegenerated { var, .. } => f.debug_tuple("NonDegenerated").field(var).finish(), + } + } +} + +impl FusedIterator for CartesianPower +where + I: Iterator, + I::Item: Clone, +{ +} + +#[cfg(test)] +mod tests { + //! Use chars and string to ease testing of every yielded iterator values. + + use super::CartesianPower; + use crate::Itertools; + use core::str::Chars; + + fn check_fused(mut exhausted_it: CartesianPower, context: String) { + for i in 0..100 { + let act = exhausted_it.next(); + assert!( + act.is_none(), + "Iteration {} after expected exhaustion of {} \ + yielded {:?} instead of None. ", + i, + context, + act, + ); + } + } + + #[test] + fn basic() { + fn check(origin: &str, pow: usize, expected: &[&str]) { + let mut it = origin.chars().cartesian_power(pow); + let mut i = 0; + for exp in expected { + let act = it.next(); + if act != Some(exp.chars().collect()) { + panic!( + "Failed iteration {} for {:?}^{}. \ + Expected {:?}, got {:?} instead.", + i, origin, pow, exp, act, + ); + } + i += 1; + } + check_fused(it, format!("iteration {} or {:?}^{}", i, origin, pow)); + } + + // Empty underlying iterator. + check("", 0, &[""]); + check("", 1, &[]); + check("", 2, &[]); + check("", 3, &[]); + + // Singleton underlying iterator. + check("a", 0, &[""]); + check("a", 1, &["a"]); + check("a", 2, &["aa"]); + check("a", 3, &["aaa"]); + + // Underlying pair. + check("ab", 0, &[""]); + check("ab", 1, &["a", "b"]); + check("ab", 2, &["aa", "ab", "ba", "bb"]); + check( + "ab", + 3, + &["aaa", "aab", "aba", "abb", "baa", "bab", "bba", "bbb"], + ); + + // Underlying triplet. + check("abc", 0, &[""]); + check("abc", 1, &["a", "b", "c"]); + check( + "abc", + 2, + &["aa", "ab", "ac", "ba", "bb", "bc", "ca", "cb", "cc"], + ); + check( + "abc", + 3, + &[ + "aaa", "aab", "aac", "aba", "abb", "abc", "aca", "acb", "acc", "baa", "bab", "bac", + "bba", "bbb", "bbc", "bca", "bcb", "bcc", "caa", "cab", "cac", "cba", "cbb", "cbc", + "cca", "ccb", "ccc", + ], + ); + } + + #[test] + fn nth() { + fn check(origin: &str, pow: usize, expected: &[(usize, Option<&str>)]) { + let mut it = origin.chars().cartesian_power(pow); + let mut total_n = 0; + for &(n, exp) in expected { + let act = it.nth(n); + if act != exp.map(|s| s.chars().collect::>()) { + panic!( + "Failed nth({}) iteration for {:?}^{}. \ + Expected {:?}, got {:?} instead.", + n, origin, pow, exp, act, + ); + } + total_n += n; + } + check_fused( + it, + format!("nth({}) iteration of {:?}^{}", total_n, origin, pow), + ); + } + + // Check degenerated cases. + check("", 0, &[(0, Some("")), (0, None)]); + check("", 0, &[(0, Some("")), (1, None)]); + check("", 0, &[(0, Some("")), (2, None)]); + check("", 0, &[(1, None), (0, None)]); + check("", 0, &[(1, None), (1, None)]); + check("", 0, &[(1, None), (2, None)]); + check("", 0, &[(2, None), (0, None)]); + check("", 0, &[(2, None), (1, None)]); + check("", 0, &[(2, None), (2, None)]); + + check("a", 0, &[(0, Some("")), (0, None)]); + check("a", 0, &[(0, Some("")), (1, None)]); + check("a", 0, &[(0, Some("")), (2, None)]); + check("a", 0, &[(1, None), (0, None)]); + check("a", 0, &[(1, None), (1, None)]); + check("a", 0, &[(1, None), (2, None)]); + check("a", 0, &[(2, None), (0, None)]); + check("a", 0, &[(2, None), (1, None)]); + check("a", 0, &[(2, None), (2, None)]); + + // Unit power. + check("a", 1, &[(0, Some("a")), (0, None)]); + check("a", 1, &[(0, Some("a")), (1, None)]); + check("a", 1, &[(0, Some("a")), (2, None)]); + check("a", 1, &[(1, None), (0, None)]); + check("a", 1, &[(1, None), (1, None)]); + check("a", 1, &[(1, None), (2, None)]); + check("a", 1, &[(2, None), (0, None)]); + check("a", 1, &[(2, None), (1, None)]); + check("a", 1, &[(2, None), (2, None)]); + + // HERE: make that pass. + // check("ab", 1, &[(0, Some("a")), (0, Some("b")), (0, None)]); + // check("ab", 1, &[(1, Some("b")), (0, None), (0, None)]); + // check("ab", 1, &[(2, None), (0, None), (0, None)]); + + } +} diff --git a/src/lib.rs b/src/lib.rs index f4de79c50..aed13b4a4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -57,6 +57,7 @@ extern crate alloc; #[cfg(feature = "use_alloc")] use alloc::{collections::VecDeque, string::String, vec::Vec}; +use cartesian_power::CartesianPower; pub use either::Either; use core::borrow::Borrow; @@ -175,6 +176,8 @@ pub mod free; #[doc(inline)] pub use crate::free::*; #[cfg(feature = "use_alloc")] +mod cartesian_power; +#[cfg(feature = "use_alloc")] mod combinations; #[cfg(feature = "use_alloc")] mod combinations_with_replacement; @@ -1729,6 +1732,23 @@ pub trait Itertools: Iterator { combinations_with_replacement::combinations_with_replacement(self, k) } + /// Returns an iterator yielding the successive elements + /// of the cartesian power of the set described by the original iterator. + /// + /// ``` + /// use itertools::Itertools; + /// + /// TODO: illustrative example. + /// ``` + #[cfg(feature = "use_alloc")] + fn cartesian_power(self, pow: usize) -> CartesianPower + where + Self: Sized, + Self::Item: Clone, + { + cartesian_power::cartesian_power(self, pow) + } + /// Return an iterator adaptor that iterates over all k-permutations of the /// elements from an iterator. ///