From b7d639b6441c16f28f89d895424e55f002e35388 Mon Sep 17 00:00:00 2001 From: meooow25 Date: Sun, 1 Sep 2024 14:04:51 +0530 Subject: [PATCH] Build Set and Map more efficiently Use "Builder"s to implement some Set and Map construction functions. As a result, some have become good consumers in terms of list fusion, and all are now O(n) for non-decreasing input. Fusible Fusible O(n) for O(n) for before after before after Set.fromList No Yes Strict incr Non-decr Set.map - - Strict incr Non-decr Map.fromList No Yes Strict incr Non-decr Map.fromListWith Yes Yes Never Non-decr Map.fromListWithKey Yes Yes Never Non-decr Map.mapKeys - - Strict incr Non-decr Map.mapKeysWith - - Never Non-decr --- containers/src/Data/Map/Internal.hs | 119 +++++++++++---------- containers/src/Data/Map/Strict/Internal.hs | 104 +++++++++--------- containers/src/Data/Set/Internal.hs | 94 ++++++++-------- 3 files changed, 167 insertions(+), 150 deletions(-) diff --git a/containers/src/Data/Map/Internal.hs b/containers/src/Data/Map/Internal.hs index 2dc30a929..79831531a 100644 --- a/containers/src/Data/Map/Internal.hs +++ b/containers/src/Data/Map/Internal.hs @@ -366,6 +366,9 @@ module Data.Map.Internal ( , Identity(..) , Stack(..) , foldl'Stack + , MapBuilder(..) + , emptyB + , finishB -- Used by Map.Merge.Lazy , mapWhenMissing @@ -387,7 +390,6 @@ import Data.Semigroup (Semigroup((<>))) #endif import Control.Applicative (Const (..)) import Control.DeepSeq (NFData(rnf)) -import Data.Bits (shiftL, shiftR) import qualified Data.Foldable as Foldable import Data.Bifoldable import Utils.Containers.Internal.Prelude hiding @@ -3242,7 +3244,7 @@ mapAccumRWithKey f a (Bin sx kx x l r) = -- > mapKeys (\ _ -> 3) (fromList [(1,"b"), (2,"a"), (3,"d"), (4,"c")]) == singleton 3 "c" mapKeys :: Ord k2 => (k1->k2) -> Map k1 a -> Map k2 a -mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) [] +mapKeys f = finishB . foldlWithKey' (\b kx x -> insertB (f kx) x b) emptyB #if __GLASGOW_HASKELL__ {-# INLINABLE mapKeys #-} #endif @@ -3261,7 +3263,8 @@ mapKeys f = fromList . foldrWithKey (\k x xs -> (f k, x) : xs) [] -- Also see the performance note on 'fromListWith'. mapKeysWith :: Ord k2 => (a -> a -> a) -> (k1->k2) -> Map k1 a -> Map k2 a -mapKeysWith c f = fromListWith c . foldrWithKey (\k x xs -> (f k, x) : xs) [] +mapKeysWith c f = + finishB . foldlWithKey' (\b kx x -> insertWithB c (f kx) x b) emptyB #if __GLASGOW_HASKELL__ {-# INLINABLE mapKeysWith #-} #endif @@ -3510,46 +3513,9 @@ instance (Ord k) => GHCExts.IsList (Map k v) where -- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")] -- > fromList [(5,"c"), (3,"b"), (5, "a")] == fromList [(5,"a"), (3,"b")] --- For some reason, when 'singleton' is used in fromList or in --- create, it is not inlined, so we inline it manually. fromList :: Ord k => [(k,a)] -> Map k a -fromList [] = Tip -fromList [(kx, x)] = Bin 1 kx x Tip Tip -fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = fromList' (Bin 1 kx0 x0 Tip Tip) xs0 - | otherwise = go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0 - where - not_ordered _ [] = False - not_ordered kx ((ky,_) : _) = kx >= ky - {-# INLINE not_ordered #-} - - fromList' t0 xs = Foldable.foldl' ins t0 xs - where ins t (k,x) = insert k x t - - go !_ t [] = t - go _ t [(kx, x)] = insertMax kx x t - go s l xs@((kx, x) : xss) | not_ordered kx xss = fromList' l xs - | otherwise = case create s xss of - (r, ys, []) -> go (s `shiftL` 1) (link kx x l r) ys - (r, _, ys) -> fromList' (link kx x l r) ys - - -- The create is returning a triple (tree, xs, ys). Both xs and ys - -- represent not yet processed elements and only one of them can be nonempty. - -- If ys is nonempty, the keys in ys are not ordered with respect to tree - -- and must be inserted using fromList'. Otherwise the keys have been - -- ordered so far. - create !_ [] = (Tip, [], []) - create s xs@(xp : xss) - | s == 1 = case xp of (kx, x) | not_ordered kx xss -> (Bin 1 kx x Tip Tip, [], xss) - | otherwise -> (Bin 1 kx x Tip Tip, xss, []) - | otherwise = case create (s `shiftR` 1) xs of - res@(_, [], _) -> res - (l, [(ky, y)], zs) -> (insertMax ky y l, [], zs) - (l, ys@((ky, y):yss), _) | not_ordered ky yss -> (l, [], ys) - | otherwise -> case create (s `shiftR` 1) yss of - (r, zs, ws) -> (link ky y l r, zs, ws) -#if __GLASGOW_HASKELL__ -{-# INLINABLE fromList #-} -#endif +fromList xs = finishB (Foldable.foldl' (\b (kx, x) -> insertB kx x b) emptyB xs) +{-# INLINE fromList #-} -- INLINE for fusion -- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWith'. -- @@ -3588,11 +3554,9 @@ fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = fromList' (Bin 1 kx0 x0 Tip T -- > fromListWith (++) $ reverse $ map (\(k, v) -> (k, [v])) someListOfTuples fromListWith :: Ord k => (a -> a -> a) -> [(k,a)] -> Map k a -fromListWith f xs - = fromListWithKey (\_ x y -> f x y) xs -#if __GLASGOW_HASKELL__ -{-# INLINABLE fromListWith #-} -#endif +fromListWith f xs = + finishB (Foldable.foldl' (\b (kx, x) -> insertWithB f kx x b) emptyB xs) +{-# INLINE fromListWith #-} -- INLINE for fusion -- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWithKey'. -- @@ -3603,13 +3567,9 @@ fromListWith f xs -- Also see the performance note on 'fromListWith'. fromListWithKey :: Ord k => (k -> a -> a -> a) -> [(k,a)] -> Map k a -fromListWithKey f xs - = Foldable.foldl' ins empty xs - where - ins t (k,x) = insertWithKey f k x t -#if __GLASGOW_HASKELL__ -{-# INLINABLE fromListWithKey #-} -#endif +fromListWithKey f xs = + finishB (Foldable.foldl' (\b (kx, x) -> insertWithB (f kx) kx x b) emptyB xs) +{-# INLINE fromListWithKey #-} -- INLINE for fusion -- | \(O(n)\). Convert the map to a list of key\/value pairs. Subject to list fusion. -- @@ -4004,6 +3964,57 @@ splitMember k0 m = case go k0 m of data StrictTriple a b c = StrictTriple !a !b !c +{-------------------------------------------------------------------- + MapBuilder +--------------------------------------------------------------------} + +-- See Note [SetBuilder] in Data.Set.Internal + +data MapBuilder k a + = BAsc !(Stack k a) + | BMap !(Map k a) + +-- Empty builder. +emptyB :: MapBuilder k a +emptyB = BAsc Nada + +-- Insert a key and value. Replaces the old value if one already exists for +-- the key. +insertB :: Ord k => k -> a -> MapBuilder k a -> MapBuilder k a +insertB !ky y b = case b of + BAsc stk -> case stk of + Push kx x l stk' -> case compare ky kx of + LT -> BMap (insert ky y (ascLinkAll stk)) + EQ -> BAsc (Push ky y l stk') + GT -> case l of + Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y) + Bin{} -> BAsc (Push ky y Tip stk) + Nada -> BAsc (Push ky y Tip Nada) + BMap m -> BMap (insert ky y m) +{-# INLINE insertB #-} + +-- Insert a key and value. The new value is combined with the old value if one +-- already exists for the key. +insertWithB + :: Ord k => (a -> a -> a) -> k -> a -> MapBuilder k a -> MapBuilder k a +insertWithB f !ky y b = case b of + BAsc stk -> case stk of + Push kx x l stk' -> case compare ky kx of + LT -> BMap (insertWith f ky y (ascLinkAll stk)) + EQ -> BAsc (Push ky (f y x) l stk') + GT -> case l of + Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y) + Bin{} -> BAsc (Push ky y Tip stk) + Nada -> BAsc (Push ky y Tip Nada) + BMap m -> BMap (insertWith f ky y m) +{-# INLINE insertWithB #-} + +-- Finalize the builder into a Map. +finishB :: MapBuilder k a -> Map k a +finishB (BAsc stk) = ascLinkAll stk +finishB (BMap m) = m +{-# INLINABLE finishB #-} + {-------------------------------------------------------------------- Utility functions that maintain the balance properties of the tree. All constructors assume that all values in [l] < [k] and all values diff --git a/containers/src/Data/Map/Strict/Internal.hs b/containers/src/Data/Map/Strict/Internal.hs index c93003e27..89eb83db5 100644 --- a/containers/src/Data/Map/Strict/Internal.hs +++ b/containers/src/Data/Map/Strict/Internal.hs @@ -336,6 +336,9 @@ import Data.Map.Internal , descLinkTop , descLinkAll , Stack (..) + , MapBuilder(..) + , emptyB + , finishB , (!) , (!?) , (\\) @@ -375,7 +378,6 @@ import Data.Map.Internal , foldrWithKey , foldrWithKey' , glue - , insertMax , intersection , isProperSubmapOf , isProperSubmapOfBy @@ -433,7 +435,6 @@ import qualified Data.Set.Internal as Set import qualified Data.Map.Internal as L import Utils.Containers.Internal.StrictPair -import Data.Bits (shiftL, shiftR) #ifdef __GLASGOW_HASKELL__ import Data.Coerce #endif @@ -1451,7 +1452,8 @@ mapAccumRWithKey f a (Bin sx kx x l r) = -- Also see the performance note on 'fromListWith'. mapKeysWith :: Ord k2 => (a -> a -> a) -> (k1->k2) -> Map k1 a -> Map k2 a -mapKeysWith c f = fromListWith c . foldrWithKey (\k x xs -> (f k, x) : xs) [] +mapKeysWith c f = + finishB . foldlWithKey' (\b kx x -> insertWithB c (f kx) x b) emptyB #if __GLASGOW_HASKELL__ {-# INLINABLE mapKeysWith #-} #endif @@ -1492,46 +1494,9 @@ fromArgSet (Set.Bin sz (Arg x v) l r) = v `seq` Bin sz x v (fromArgSet l) (fromA -- > fromList [(5,"a"), (3,"b"), (5, "c")] == fromList [(5,"c"), (3,"b")] -- > fromList [(5,"c"), (3,"b"), (5, "a")] == fromList [(5,"a"), (3,"b")] --- For some reason, when 'singleton' is used in fromList or in --- create, it is not inlined, so we inline it manually. fromList :: Ord k => [(k,a)] -> Map k a -fromList [] = Tip -fromList [(kx, x)] = x `seq` Bin 1 kx x Tip Tip -fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = x0 `seq` fromList' (Bin 1 kx0 x0 Tip Tip) xs0 - | otherwise = x0 `seq` go (1::Int) (Bin 1 kx0 x0 Tip Tip) xs0 - where - not_ordered _ [] = False - not_ordered kx ((ky,_) : _) = kx >= ky - {-# INLINE not_ordered #-} - - fromList' t0 xs = Foldable.foldl' ins t0 xs - where ins t (k,x) = insert k x t - - go !_ t [] = t - go _ t [(kx, x)] = x `seq` insertMax kx x t - go s l xs@((kx, x) : xss) | not_ordered kx xss = fromList' l xs - | otherwise = case create s xss of - (r, ys, []) -> x `seq` go (s `shiftL` 1) (link kx x l r) ys - (r, _, ys) -> x `seq` fromList' (link kx x l r) ys - - -- The create is returning a triple (tree, xs, ys). Both xs and ys - -- represent not yet processed elements and only one of them can be nonempty. - -- If ys is nonempty, the keys in ys are not ordered with respect to tree - -- and must be inserted using fromList'. Otherwise the keys have been - -- ordered so far. - create !_ [] = (Tip, [], []) - create s xs@(xp : xss) - | s == 1 = case xp of (kx, x) | not_ordered kx xss -> x `seq` (Bin 1 kx x Tip Tip, [], xss) - | otherwise -> x `seq` (Bin 1 kx x Tip Tip, xss, []) - | otherwise = case create (s `shiftR` 1) xs of - res@(_, [], _) -> res - (l, [(ky, y)], zs) -> y `seq` (insertMax ky y l, [], zs) - (l, ys@((ky, y):yss), _) | not_ordered ky yss -> (l, [], ys) - | otherwise -> case create (s `shiftR` 1) yss of - (r, zs, ws) -> y `seq` (link ky y l r, zs, ws) -#if __GLASGOW_HASKELL__ -{-# INLINABLE fromList #-} -#endif +fromList xs = finishB (Foldable.foldl' (\b (kx, x) -> insertB kx x b) emptyB xs) +{-# INLINE fromList #-} -- INLINE for fusion -- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWith'. -- @@ -1570,11 +1535,9 @@ fromList ((kx0, x0) : xs0) | not_ordered kx0 xs0 = x0 `seq` fromList' (Bin 1 kx0 -- > fromListWith (++) $ reverse $ map (\(k, v) -> (k, [v])) someListOfTuples fromListWith :: Ord k => (a -> a -> a) -> [(k,a)] -> Map k a -fromListWith f xs - = fromListWithKey (\_ x y -> f x y) xs -#if __GLASGOW_HASKELL__ -{-# INLINABLE fromListWith #-} -#endif +fromListWith f xs = + finishB (Foldable.foldl' (\b (kx, x) -> insertWithB f kx x b) emptyB xs) +{-# INLINE fromListWith #-} -- INLINE for fusion -- | \(O(n \log n)\). Build a map from a list of key\/value pairs with a combining function. See also 'fromAscListWithKey'. -- @@ -1585,13 +1548,9 @@ fromListWith f xs -- Also see the performance note on 'fromListWith'. fromListWithKey :: Ord k => (k -> a -> a -> a) -> [(k,a)] -> Map k a -fromListWithKey f xs - = Foldable.foldl' ins empty xs - where - ins t (k,x) = insertWithKey f k x t -#if __GLASGOW_HASKELL__ -{-# INLINABLE fromListWithKey #-} -#endif +fromListWithKey f xs = + finishB (Foldable.foldl' (\b (kx, x) -> insertWithB (f kx) kx x b) emptyB xs) +{-# INLINE fromListWithKey #-} -- INLINE for fusion {-------------------------------------------------------------------- Building trees from ascending/descending lists can be done in linear time. @@ -1756,3 +1715,40 @@ fromDistinctDescList = descLinkAll . Foldable.foldl' next Nada next (Push ky y Tip stk) (!kx, !x) = descLinkTop kx x 1 (singleton ky y) stk next stk (!ky, !y) = Push ky y Tip stk {-# INLINE fromDistinctDescList #-} -- INLINE for fusion + +{-------------------------------------------------------------------- + MapBuilder +--------------------------------------------------------------------} + +-- Insert a key and value. Replaces the old value if one already exists for +-- the key. Strict in the value. +insertB :: Ord k => k -> a -> MapBuilder k a -> MapBuilder k a +insertB !ky !y b = case b of + BAsc stk -> case stk of + Push kx x l stk' -> case compare ky kx of + LT -> BMap (insert ky y (ascLinkAll stk)) + EQ -> BAsc (Push ky y l stk') + GT -> case l of + Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y) + Bin{} -> BAsc (Push ky y Tip stk) + Nada -> BAsc (Push ky y Tip Nada) + BMap m -> BMap (insert ky y m) +{-# INLINE insertB #-} + +-- Insert a key and value. The new value is combined with the old value if one +-- already exists for the key. Strict in the inserted value. +insertWithB + :: Ord k => (a -> a -> a) -> k -> a -> MapBuilder k a -> MapBuilder k a +insertWithB f !ky y b = case b of + BAsc stk -> case stk of + Push kx x l stk' -> case compare ky kx of + LT -> BMap (insertWith f ky y (ascLinkAll stk)) + EQ -> BAsc (push ky (f y x) l stk') + GT -> case l of + Tip -> BAsc (ascLinkTop stk' 1 (singleton kx x) ky y) + Bin{} -> BAsc (push ky y Tip stk) + Nada -> BAsc (push ky y Tip Nada) + BMap m -> BMap (insertWith f ky y m) + where + push kx !x = Push kx x +{-# INLINE insertWithB #-} diff --git a/containers/src/Data/Set/Internal.hs b/containers/src/Data/Set/Internal.hs index 805f5c361..a5fc2f79a 100644 --- a/containers/src/Data/Set/Internal.hs +++ b/containers/src/Data/Set/Internal.hs @@ -238,7 +238,6 @@ import Utils.Containers.Internal.Prelude hiding import Prelude () import Control.Applicative (Const(..)) import qualified Data.List as List -import Data.Bits (shiftL, shiftR) import Data.Semigroup (Semigroup(stimes)) import Data.List.NonEmpty (NonEmpty(..)) #if !(MIN_VERSION_base(4,11,0)) @@ -981,7 +980,7 @@ partition p0 t0 = toPair $ go p0 t0 -- for some @(x,y)@, @x \/= y && f x == f y@ map :: Ord b => (a->b) -> Set a -> Set b -map f = fromList . List.map f . toList +map f = finishB . foldl' (\b x -> insertB (f x) b) emptyB #if __GLASGOW_HASKELL__ {-# INLINABLE map #-} #endif @@ -1125,47 +1124,9 @@ foldlFB = foldl -- | \(O(n \log n)\). Create a set from a list of elements. -- -- If the elements are ordered, a linear-time implementation is used. - --- For some reason, when 'singleton' is used in fromList or in --- create, it is not inlined, so we inline it manually. fromList :: Ord a => [a] -> Set a -fromList [] = Tip -fromList [x] = Bin 1 x Tip Tip -fromList (x0 : xs0) | not_ordered x0 xs0 = fromList' (Bin 1 x0 Tip Tip) xs0 - | otherwise = go (1::Int) (Bin 1 x0 Tip Tip) xs0 - where - not_ordered _ [] = False - not_ordered x (y : _) = x >= y - {-# INLINE not_ordered #-} - - fromList' t0 xs = Foldable.foldl' ins t0 xs - where ins t x = insert x t - - go !_ t [] = t - go _ t [x] = insertMax x t - go s l xs@(x : xss) | not_ordered x xss = fromList' l xs - | otherwise = case create s xss of - (r, ys, []) -> go (s `shiftL` 1) (link x l r) ys - (r, _, ys) -> fromList' (link x l r) ys - - -- The create is returning a triple (tree, xs, ys). Both xs and ys - -- represent not yet processed elements and only one of them can be nonempty. - -- If ys is nonempty, the keys in ys are not ordered with respect to tree - -- and must be inserted using fromList'. Otherwise the keys have been - -- ordered so far. - create !_ [] = (Tip, [], []) - create s xs@(x : xss) - | s == 1 = if not_ordered x xss then (Bin 1 x Tip Tip, [], xss) - else (Bin 1 x Tip Tip, xss, []) - | otherwise = case create (s `shiftR` 1) xs of - res@(_, [], _) -> res - (l, [y], zs) -> (insertMax y l, [], zs) - (l, ys@(y:yss), _) | not_ordered y yss -> (l, [], ys) - | otherwise -> case create (s `shiftR` 1) yss of - (r, zs, ws) -> (link y l r, zs, ws) -#if __GLASGOW_HASKELL__ -{-# INLINABLE fromList #-} -#endif +fromList xs = finishB (Foldable.foldl' (flip insertB) emptyB xs) +{-# INLINE fromList #-} -- INLINE for fusion {-------------------------------------------------------------------- Building trees from ascending/descending lists can be done in linear time. @@ -1640,6 +1601,55 @@ spanAntitone p0 m = toPair (go p0 m) | p x = let u :*: v = go p r in link x l u :*: v | otherwise = let u :*: v = go p l in u :*: link x v r +{-------------------------------------------------------------------- + SetBuilder +--------------------------------------------------------------------} + +-- Note [SetBuilder] +-- ~~~~~~~~~~~~~~~~~ +-- SetBuilder serves as an accumulator for element-by-element construction of +-- a Set. It can be used in folds to construct sets. This plays nicely with list +-- fusion if the structure folded over is a list, as in fromList and friends. +-- +-- As long as the elements are sorted, insertB accumulates them in a Stack, +-- just as fromDistinctAscList does. On encountering an element out of order, +-- it builds a Set from the Stack and switches to using insert for all future +-- elements. This gives us O(n log n) worst case but O(n) if the elements are +-- already sorted. +-- +-- More complicated implementations are possible, such as repeatedly +-- accumulating runs of increasing elements in Stacks (not just once) and +-- union-ing with an accumulated Set, but this makes the worst case somewhat +-- slower (~10%). + +data SetBuilder a + = BAsc !(Stack a) + | BSet !(Set a) + +-- Empty builder. +emptyB :: SetBuilder a +emptyB = BAsc Nada + +-- Insert an element. Replaces the old element if an equal element already +-- exists. +insertB :: Ord a => a -> SetBuilder a -> SetBuilder a +insertB !y b = case b of + BAsc stk -> case stk of + Push x l stk' -> case compare y x of + LT -> BSet (insert y (ascLinkAll stk)) + EQ -> BAsc (Push y l stk') + GT -> case l of + Tip -> BAsc (ascLinkTop stk' 1 (singleton x) y) + Bin{} -> BAsc (Push y Tip stk) + Nada -> BAsc (Push y Tip Nada) + BSet m -> BSet (insert y m) +{-# INLINE insertB #-} + +-- Finalize the builder into a Set. +finishB :: SetBuilder a -> Set a +finishB (BAsc stk) = ascLinkAll stk +finishB (BSet s) = s +{-# INLINABLE finishB #-} {-------------------------------------------------------------------- Utility functions that maintain the balance properties of the tree.