From 23d83466bd83c20ef31132a7e2c42ff065768a93 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Thu, 24 Aug 2023 11:47:11 +0200 Subject: [PATCH 1/5] `MergeJoinBy` alias of new `InternalMergeJoinBy` And the trait `OrderingOrBool` is now for wrapped functions rather than for `Ordering` and `bool`. NOTES: - The new struct `MergeFuncLR` is useful to avoid conflicting implementations of `OrderingOrBool for F` because the compiler thinks that `F` could implement both `FnMut(&L, &R) -> Ordering` and `FnMut(&L, &R) -> bool`. - The new trait `FuncLR` is useful to be able to infer the Output type `T` without having to add a fourth parameter to `MergeJoinBy`. From what I understand, this is needed because the `FnMut` trait is not fully stabilized yet. - `OrderingOrBool` has a new associated type `Out`, which is useful in `impl Iterator` because otherwise the compiler would complain about `T` be unconstrained. This successfully pass the tests. --- src/lib.rs | 1 - src/merge_join.rs | 77 +++++++++++++++++++++++++++++------------------ 2 files changed, 48 insertions(+), 30 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7dc40370b..ec1bbca2a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1075,7 +1075,6 @@ pub trait Itertools: Iterator { where J: IntoIterator, F: FnMut(&Self::Item, &J::Item) -> T, - T: merge_join::OrderingOrBool, Self: Sized, { merge_join_by(self, other, cmp_fn) diff --git a/src/merge_join.rs b/src/merge_join.rs index 1769aaf3a..45560471f 100644 --- a/src/merge_join.rs +++ b/src/merge_join.rs @@ -2,6 +2,7 @@ use std::cmp::Ordering; use std::fmt; use std::iter::Fuse; use std::iter::{FusedIterator, Peekable}; +use std::marker::PhantomData; use either::Either; @@ -171,37 +172,55 @@ where I: IntoIterator, J: IntoIterator, F: FnMut(&I::Item, &J::Item) -> T, - T: OrderingOrBool, { - MergeJoinBy { + InternalMergeJoinBy { left: put_back(left.into_iter().fuse()), right: put_back(right.into_iter().fuse()), - cmp_fn, + cmp_fn: MergeFuncLR(cmp_fn, PhantomData), } } /// An iterator adaptor that merge-joins items from the two base iterators in ascending order. /// /// See [`.merge_join_by()`](crate::Itertools::merge_join_by) for more information. +pub type MergeJoinBy = InternalMergeJoinBy< + I, + J, + MergeFuncLR::Item, ::Item>>::T>, +>; + #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] -pub struct MergeJoinBy { +pub struct InternalMergeJoinBy { left: PutBack>, right: PutBack>, cmp_fn: F, } +#[derive(Clone, Debug)] +pub struct MergeFuncLR(F, PhantomData); + +pub trait FuncLR { + type T; +} + +impl T> FuncLR for F { + type T = T; +} + pub trait OrderingOrBool { + type Out; type MergeResult; fn left(left: L) -> Self::MergeResult; fn right(right: R) -> Self::MergeResult; // "merge" never returns (Some(...), Some(...), ...) so Option> // is appealing but it is always followed by two put_backs, so we think the compiler is // smart enough to optimize it. Or we could move put_backs into "merge". - fn merge(self, left: L, right: R) -> (Option, Option, Self::MergeResult); + fn merge(&mut self, left: L, right: R) -> (Option, Option, Self::MergeResult); fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint; } -impl OrderingOrBool for Ordering { +impl Ordering> OrderingOrBool for MergeFuncLR { + type Out = Ordering; type MergeResult = EitherOrBoth; fn left(left: L) -> Self::MergeResult { EitherOrBoth::Left(left) @@ -209,8 +228,8 @@ impl OrderingOrBool for Ordering { fn right(right: R) -> Self::MergeResult { EitherOrBoth::Right(right) } - fn merge(self, left: L, right: R) -> (Option, Option, Self::MergeResult) { - match self { + fn merge(&mut self, left: L, right: R) -> (Option, Option, Self::MergeResult) { + match self.0(&left, &right) { Ordering::Equal => (None, None, EitherOrBoth::Both(left, right)), Ordering::Less => (None, Some(right), EitherOrBoth::Left(left)), Ordering::Greater => (Some(left), None, EitherOrBoth::Right(right)), @@ -228,7 +247,8 @@ impl OrderingOrBool for Ordering { } } -impl OrderingOrBool for bool { +impl bool> OrderingOrBool for MergeFuncLR { + type Out = bool; type MergeResult = Either; fn left(left: L) -> Self::MergeResult { Either::Left(left) @@ -236,8 +256,8 @@ impl OrderingOrBool for bool { fn right(right: R) -> Self::MergeResult { Either::Right(right) } - fn merge(self, left: L, right: R) -> (Option, Option, Self::MergeResult) { - if self { + fn merge(&mut self, left: L, right: R) -> (Option, Option, Self::MergeResult) { + if self.0(&left, &right) { (None, Some(right), Either::Left(left)) } else { (Some(left), None, Either::Right(right)) @@ -249,7 +269,7 @@ impl OrderingOrBool for bool { } } -impl Clone for MergeJoinBy +impl Clone for InternalMergeJoinBy where I: Iterator, J: Iterator, @@ -260,32 +280,31 @@ where clone_fields!(left, right, cmp_fn); } -impl fmt::Debug for MergeJoinBy +impl fmt::Debug for InternalMergeJoinBy where I: Iterator + fmt::Debug, I::Item: fmt::Debug, J: Iterator + fmt::Debug, J::Item: fmt::Debug, { - debug_fmt_fields!(MergeJoinBy, left, right); + debug_fmt_fields!(InternalMergeJoinBy, left, right); } -impl Iterator for MergeJoinBy +impl Iterator for InternalMergeJoinBy where I: Iterator, J: Iterator, - F: FnMut(&I::Item, &J::Item) -> T, - T: OrderingOrBool, + F: OrderingOrBool, { - type Item = T::MergeResult; + type Item = F::MergeResult; fn next(&mut self) -> Option { match (self.left.next(), self.right.next()) { (None, None) => None, - (Some(left), None) => Some(T::left(left)), - (None, Some(right)) => Some(T::right(right)), + (Some(left), None) => Some(F::left(left)), + (None, Some(right)) => Some(F::right(right)), (Some(left), Some(right)) => { - let (left, right, next) = (self.cmp_fn)(&left, &right).merge(left, right); + let (left, right, next) = self.cmp_fn.merge(left, right); if let Some(left) = left { self.left.put_back(left); } @@ -298,7 +317,7 @@ where } fn size_hint(&self) -> SizeHint { - T::size_hint(self.left.size_hint(), self.right.size_hint()) + F::size_hint(self.left.size_hint(), self.right.size_hint()) } fn count(mut self) -> usize { @@ -310,7 +329,7 @@ where (None, Some(_right)) => break count + 1 + self.right.into_parts().1.count(), (Some(left), Some(right)) => { count += 1; - let (left, right, _) = (self.cmp_fn)(&left, &right).merge(left, right); + let (left, right, _) = self.cmp_fn.merge(left, right); if let Some(left) = left { self.left.put_back(left); } @@ -328,13 +347,13 @@ where match (self.left.next(), self.right.next()) { (None, None) => break previous_element, (Some(left), None) => { - break Some(T::left(self.left.into_parts().1.last().unwrap_or(left))) + break Some(F::left(self.left.into_parts().1.last().unwrap_or(left))) } (None, Some(right)) => { - break Some(T::right(self.right.into_parts().1.last().unwrap_or(right))) + break Some(F::right(self.right.into_parts().1.last().unwrap_or(right))) } (Some(left), Some(right)) => { - let (left, right, elem) = (self.cmp_fn)(&left, &right).merge(left, right); + let (left, right, elem) = self.cmp_fn.merge(left, right); if let Some(left) = left { self.left.put_back(left); } @@ -355,10 +374,10 @@ where n -= 1; match (self.left.next(), self.right.next()) { (None, None) => break None, - (Some(_left), None) => break self.left.nth(n).map(T::left), - (None, Some(_right)) => break self.right.nth(n).map(T::right), + (Some(_left), None) => break self.left.nth(n).map(F::left), + (None, Some(_right)) => break self.right.nth(n).map(F::right), (Some(left), Some(right)) => { - let (left, right, _) = (self.cmp_fn)(&left, &right).merge(left, right); + let (left, right, _) = self.cmp_fn.merge(left, right); if let Some(left) = left { self.left.put_back(left); } From 1d0d801abe5cbe2c15344a61cc8f9edaedd66574 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Thu, 24 Aug 2023 12:02:52 +0200 Subject: [PATCH 2/5] Make `MergeBy` an alias of `InternalMergeJoinBy` Being now an alias, we can remove various implementations. `FusedIterator` will be inserted back soon. We don't need `MergePredicate` anymore because we use two new implementations of `OrderingOrBool`. --- src/merge_join.rs | 148 +++++++++++++++------------------------------- 1 file changed, 49 insertions(+), 99 deletions(-) diff --git a/src/merge_join.rs b/src/merge_join.rs index 45560471f..08c6dda67 100644 --- a/src/merge_join.rs +++ b/src/merge_join.rs @@ -1,7 +1,6 @@ use std::cmp::Ordering; use std::fmt; use std::iter::Fuse; -use std::iter::{FusedIterator, Peekable}; use std::marker::PhantomData; use either::Either; @@ -12,19 +11,9 @@ use crate::size_hint::{self, SizeHint}; #[cfg(doc)] use crate::Itertools; -pub trait MergePredicate { - fn merge_pred(&mut self, a: &T, b: &T) -> bool; -} - #[derive(Clone, Debug)] pub struct MergeLte; -impl MergePredicate for MergeLte { - fn merge_pred(&mut self, a: &T, b: &T) -> bool { - a <= b - } -} - /// An iterator adaptor that merges the two base iterators in ascending order. /// If both base iterators are sorted (ascending), the result is sorted. /// @@ -62,104 +51,21 @@ where /// Iterator element type is `I::Item`. /// /// See [`.merge_by()`](crate::Itertools::merge_by) for more information. -#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] -pub struct MergeBy -where - I: Iterator, - J: Iterator, -{ - a: Peekable, - b: Peekable, - fused: Option, - cmp: F, -} - -impl fmt::Debug for MergeBy -where - I: Iterator + fmt::Debug, - J: Iterator + fmt::Debug, - I::Item: fmt::Debug, -{ - debug_fmt_fields!(MergeBy, a, b); -} - -impl bool> MergePredicate for F { - fn merge_pred(&mut self, a: &T, b: &T) -> bool { - self(a, b) - } -} +pub type MergeBy = InternalMergeJoinBy; /// Create a `MergeBy` iterator. pub fn merge_by_new(a: I, b: J, cmp: F) -> MergeBy where I: IntoIterator, J: IntoIterator, - F: MergePredicate, { - MergeBy { - a: a.into_iter().peekable(), - b: b.into_iter().peekable(), - fused: None, - cmp, - } -} - -impl Clone for MergeBy -where - I: Iterator, - J: Iterator, - Peekable: Clone, - Peekable: Clone, - F: Clone, -{ - clone_fields!(a, b, fused, cmp); -} - -impl Iterator for MergeBy -where - I: Iterator, - J: Iterator, - F: MergePredicate, -{ - type Item = I::Item; - - fn next(&mut self) -> Option { - let less_than = match self.fused { - Some(lt) => lt, - None => match (self.a.peek(), self.b.peek()) { - (Some(a), Some(b)) => self.cmp.merge_pred(a, b), - (Some(_), None) => { - self.fused = Some(true); - true - } - (None, Some(_)) => { - self.fused = Some(false); - false - } - (None, None) => return None, - }, - }; - if less_than { - self.a.next() - } else { - self.b.next() - } - } - - fn size_hint(&self) -> (usize, Option) { - // Not ExactSizeIterator because size may be larger than usize - size_hint::add(self.a.size_hint(), self.b.size_hint()) + InternalMergeJoinBy { + left: put_back(a.into_iter().fuse()), + right: put_back(b.into_iter().fuse()), + cmp_fn: cmp, } } -impl FusedIterator for MergeBy -where - I: FusedIterator, - J: FusedIterator, - F: MergePredicate, -{ -} - /// Return an iterator adaptor that merge-joins items from the two base iterators in ascending order. /// /// [`IntoIterator`] enabled version of [`Itertools::merge_join_by`]. @@ -269,6 +175,50 @@ impl bool> OrderingOrBool for MergeFuncLR bool> OrderingOrBool for F { + type Out = bool; + type MergeResult = T; + fn left(left: T) -> Self::MergeResult { + left + } + fn right(right: T) -> Self::MergeResult { + right + } + fn merge(&mut self, left: T, right: T) -> (Option, Option, Self::MergeResult) { + if self(&left, &right) { + (None, Some(right), left) + } else { + (Some(left), None, right) + } + } + fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint { + // Not ExactSizeIterator because size may be larger than usize + size_hint::add(left, right) + } +} + +impl OrderingOrBool for MergeLte { + type Out = bool; + type MergeResult = T; + fn left(left: T) -> Self::MergeResult { + left + } + fn right(right: T) -> Self::MergeResult { + right + } + fn merge(&mut self, left: T, right: T) -> (Option, Option, Self::MergeResult) { + if left <= right { + (None, Some(right), left) + } else { + (Some(left), None, right) + } + } + fn size_hint(left: SizeHint, right: SizeHint) -> SizeHint { + // Not ExactSizeIterator because size may be larger than usize + size_hint::add(left, right) + } +} + impl Clone for InternalMergeJoinBy where I: Iterator, From 535310b3594245956907976b0e22e2bfe7b9bb28 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Thu, 24 Aug 2023 12:11:23 +0200 Subject: [PATCH 3/5] Implement `FusedIterator` for `InternalMergeJoinBy` `I` and `J` are fused by `InternalMergeJoinBy` so we don't need them to implement `FusedIterator`. --- src/merge_join.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/merge_join.rs b/src/merge_join.rs index 08c6dda67..6415b05c1 100644 --- a/src/merge_join.rs +++ b/src/merge_join.rs @@ -1,6 +1,6 @@ use std::cmp::Ordering; use std::fmt; -use std::iter::Fuse; +use std::iter::{Fuse, FusedIterator}; use std::marker::PhantomData; use either::Either; @@ -339,3 +339,11 @@ where } } } + +impl FusedIterator for InternalMergeJoinBy +where + I: Iterator, + J: Iterator, + F: OrderingOrBool, +{ +} From 2ce8986b17115c33b577b20d5de4cdcddb8de4e2 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Thu, 31 Aug 2023 16:33:27 +0200 Subject: [PATCH 4/5] Unalias `MergeBy` --- src/merge_join.rs | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/merge_join.rs b/src/merge_join.rs index 6415b05c1..848a5363c 100644 --- a/src/merge_join.rs +++ b/src/merge_join.rs @@ -51,7 +51,12 @@ where /// Iterator element type is `I::Item`. /// /// See [`.merge_by()`](crate::Itertools::merge_by) for more information. -pub type MergeBy = InternalMergeJoinBy; +#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] +pub struct MergeBy { + left: PutBack>, + right: PutBack>, + cmp_fn: F, +} /// Create a `MergeBy` iterator. pub fn merge_by_new(a: I, b: J, cmp: F) -> MergeBy @@ -59,7 +64,7 @@ where I: IntoIterator, J: IntoIterator, { - InternalMergeJoinBy { + MergeBy { left: put_back(a.into_iter().fuse()), right: put_back(b.into_iter().fuse()), cmp_fn: cmp, @@ -79,7 +84,7 @@ where J: IntoIterator, F: FnMut(&I::Item, &J::Item) -> T, { - InternalMergeJoinBy { + MergeBy { left: put_back(left.into_iter().fuse()), right: put_back(right.into_iter().fuse()), cmp_fn: MergeFuncLR(cmp_fn, PhantomData), @@ -89,18 +94,8 @@ where /// An iterator adaptor that merge-joins items from the two base iterators in ascending order. /// /// See [`.merge_join_by()`](crate::Itertools::merge_join_by) for more information. -pub type MergeJoinBy = InternalMergeJoinBy< - I, - J, - MergeFuncLR::Item, ::Item>>::T>, ->; - -#[must_use = "iterator adaptors are lazy and do nothing unless consumed"] -pub struct InternalMergeJoinBy { - left: PutBack>, - right: PutBack>, - cmp_fn: F, -} +pub type MergeJoinBy = + MergeBy::Item, ::Item>>::T>>; #[derive(Clone, Debug)] pub struct MergeFuncLR(F, PhantomData); @@ -219,7 +214,7 @@ impl OrderingOrBool for MergeLte { } } -impl Clone for InternalMergeJoinBy +impl Clone for MergeBy where I: Iterator, J: Iterator, @@ -230,17 +225,17 @@ where clone_fields!(left, right, cmp_fn); } -impl fmt::Debug for InternalMergeJoinBy +impl fmt::Debug for MergeBy where I: Iterator + fmt::Debug, I::Item: fmt::Debug, J: Iterator + fmt::Debug, J::Item: fmt::Debug, { - debug_fmt_fields!(InternalMergeJoinBy, left, right); + debug_fmt_fields!(MergeBy, left, right); } -impl Iterator for InternalMergeJoinBy +impl Iterator for MergeBy where I: Iterator, J: Iterator, @@ -340,7 +335,7 @@ where } } -impl FusedIterator for InternalMergeJoinBy +impl FusedIterator for MergeBy where I: Iterator, J: Iterator, From 9e267b64261ad1203fa281ca234bcb073b02ffd6 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet Date: Wed, 20 Sep 2023 16:50:42 +0200 Subject: [PATCH 5/5] Remove `OrderingOrBool::Out` Apparently not needed anymore. --- src/merge_join.rs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/merge_join.rs b/src/merge_join.rs index 848a5363c..c83159186 100644 --- a/src/merge_join.rs +++ b/src/merge_join.rs @@ -109,7 +109,6 @@ impl T> FuncLR for F { } pub trait OrderingOrBool { - type Out; type MergeResult; fn left(left: L) -> Self::MergeResult; fn right(right: R) -> Self::MergeResult; @@ -121,7 +120,6 @@ pub trait OrderingOrBool { } impl Ordering> OrderingOrBool for MergeFuncLR { - type Out = Ordering; type MergeResult = EitherOrBoth; fn left(left: L) -> Self::MergeResult { EitherOrBoth::Left(left) @@ -149,7 +147,6 @@ impl Ordering> OrderingOrBool for MergeFuncLR bool> OrderingOrBool for MergeFuncLR { - type Out = bool; type MergeResult = Either; fn left(left: L) -> Self::MergeResult { Either::Left(left) @@ -171,7 +168,6 @@ impl bool> OrderingOrBool for MergeFuncLR bool> OrderingOrBool for F { - type Out = bool; type MergeResult = T; fn left(left: T) -> Self::MergeResult { left @@ -193,7 +189,6 @@ impl bool> OrderingOrBool for F { } impl OrderingOrBool for MergeLte { - type Out = bool; type MergeResult = T; fn left(left: T) -> Self::MergeResult { left @@ -235,11 +230,11 @@ where debug_fmt_fields!(MergeBy, left, right); } -impl Iterator for MergeBy +impl Iterator for MergeBy where I: Iterator, J: Iterator, - F: OrderingOrBool, + F: OrderingOrBool, { type Item = F::MergeResult; @@ -335,10 +330,10 @@ where } } -impl FusedIterator for MergeBy +impl FusedIterator for MergeBy where I: Iterator, J: Iterator, - F: OrderingOrBool, + F: OrderingOrBool, { }