From 49a0871035fa7272203c8c8dc936748387b81ed9 Mon Sep 17 00:00:00 2001 From: Huw Campbell Date: Thu, 11 Jan 2024 18:45:30 +1100 Subject: [PATCH 1/4] Simplify how calculation of rebalance is required. The current logic uses unsafe pointer equality to determine if a rebalance is required, but this case is niche, and we can instead pass this information upwards. This performs well in benchmarks. --- containers/src/Data/Map/Internal.hs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/containers/src/Data/Map/Internal.hs b/containers/src/Data/Map/Internal.hs index 99531e3a1..5234a71f6 100644 --- a/containers/src/Data/Map/Internal.hs +++ b/containers/src/Data/Map/Internal.hs @@ -776,24 +776,24 @@ singleton k x = Bin 1 k x Tip Tip -- See Note: Type of local 'go' function -- See Note: Avoiding worker/wrapper insert :: Ord k => k -> a -> Map k a -> Map k a -insert kx0 = go kx0 kx0 +insert kx0 ax0 m0 = + case go kx0 ax0 m0 of (m :*: _) -> m where -- Unlike insertR, we only get sharing here -- when the inserted value is at the same address -- as the present value. We try anyway; this condition -- seems particularly likely to occur in 'union'. - go :: Ord k => k -> k -> a -> Map k a -> Map k a - go orig !_ x Tip = singleton (lazy orig) x - go orig !kx x t@(Bin sz ky y l r) = + go :: Ord k => k -> a -> Map k a -> StrictPair (Map k a) Bool + go !kx x Tip = singleton kx x :*: False + go !kx x (Bin sz ky y l r) = case compare kx ky of - LT | l' `ptrEq` l -> t - | otherwise -> balanceL ky y l' r - where !l' = go orig kx x l - GT | r' `ptrEq` r -> t - | otherwise -> balanceR ky y l r' - where !r' = go orig kx x r - EQ | x `ptrEq` y && (lazy orig `seq` (orig `ptrEq` ky)) -> t - | otherwise -> Bin sz (lazy orig) x l r + LT | found -> Bin sz ky y l' r :*: found + | otherwise -> balanceL ky y l' r :*: found + where !(l' :*: found) = go kx x l + GT | found -> Bin sz ky y l r' :*: found + | otherwise -> balanceR ky y l r' :*: found + where !(r' :*: found) = go kx x r + EQ -> Bin sz kx x l r :*: True #if __GLASGOW_HASKELL__ {-# INLINABLE insert #-} #else From e4b1ae76b85d28ea7234f7d5b57ad479339485da Mon Sep 17 00:00:00 2001 From: Huw Campbell Date: Thu, 11 Jan 2024 19:10:01 +1100 Subject: [PATCH 2/4] New benchmark --- containers-tests/benchmarks/Map.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/containers-tests/benchmarks/Map.hs b/containers-tests/benchmarks/Map.hs index b53a4914d..faa7b60b2 100644 --- a/containers-tests/benchmarks/Map.hs +++ b/containers-tests/benchmarks/Map.hs @@ -35,6 +35,7 @@ main = do , bench "alterF no rules lookup present" $ whnf (atLookupNoRules evens) m_even , bench "insert absent" $ whnf (ins elems_even) m_odd , bench "insert present" $ whnf (ins elems_even) m_even + , bench "insert alternate" $ whnf (ins elems_alts) m_even , bench "alterF insert absent" $ whnf (atIns elems_even) m_odd , bench "alterF insert present" $ whnf (atIns elems_even) m_even , bench "alterF no rules insert absent" $ whnf (atInsNoRules elems_even) m_odd @@ -100,6 +101,7 @@ main = do bound = 2^12 elems = zip keys values elems_even = zip evens evens + elems_alts = zip evens odds elems_odd = zip odds odds elems_rev = reverse elems keys = [1..bound] From d95188588cd8d3383648ea1cc468e108125301c3 Mon Sep 17 00:00:00 2001 From: Huw Campbell Date: Fri, 12 Jan 2024 10:00:59 +1100 Subject: [PATCH 3/4] Use explicit continuation for insertR instead of pointer equality. The pointer equality is used to check if we can not do anything when we retraverse back up the chain, but, if we turn it around and use an explicit continuation instead, we can just choose to not run it. --- containers-tests/benchmarks/Map.hs | 8 +++++-- containers/src/Data/Map/Internal.hs | 37 +++++++---------------------- 2 files changed, 15 insertions(+), 30 deletions(-) diff --git a/containers-tests/benchmarks/Map.hs b/containers-tests/benchmarks/Map.hs index faa7b60b2..bf2460019 100644 --- a/containers-tests/benchmarks/Map.hs +++ b/containers-tests/benchmarks/Map.hs @@ -20,8 +20,9 @@ main = do let m = M.fromAscList elems :: M.Map Int Int m_even = M.fromAscList elems_even :: M.Map Int Int m_odd = M.fromAscList elems_odd :: M.Map Int Int - evaluate $ rnf [m, m_even, m_odd] - evaluate $ rnf elems_rev + m_sparse = M.filter (\v -> v `mod` 15 == 0) m_even + evaluate $ rnf [m, m_even, m_odd, m_sparse] + evaluate $ rnf [elems_rev, elems_alts] defaultMain [ bench "lookup absent" $ whnf (lookup evens) m_odd , bench "lookup present" $ whnf (lookup evens) m_even @@ -85,6 +86,9 @@ main = do , bench "mapMaybeWithKey" $ whnf (M.mapMaybeWithKey (const maybeDel)) m , bench "lookupIndex" $ whnf (lookupIndex keys) m , bench "union" $ whnf (M.union m_even) m_odd + , bench "union_identical" $ whnf (M.union m_even) m_even + , bench "union_sparse" $ whnf (M.union m_even) m_sparse + , bench "union_into_sparse" $ whnf (M.union m_sparse) m_even , bench "difference" $ whnf (M.difference m) m_even , bench "intersection" $ whnf (M.intersection m) m_even , bench "split" $ whnf (M.split (bound `div` 2)) m diff --git a/containers/src/Data/Map/Internal.hs b/containers/src/Data/Map/Internal.hs index 5234a71f6..b3168ab6f 100644 --- a/containers/src/Data/Map/Internal.hs +++ b/containers/src/Data/Map/Internal.hs @@ -403,7 +403,7 @@ import Utils.Containers.Internal.BitUtil (wordSize) #endif #if __GLASGOW_HASKELL__ -import GHC.Exts (build, lazy) +import GHC.Exts (build) import Language.Haskell.TH.Syntax (Lift) -- See Note [ Template Haskell Dependencies ] import Language.Haskell.TH () @@ -805,39 +805,20 @@ lazy :: a -> a lazy a = a #endif --- [Note: Avoiding worker/wrapper] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --- 'insert' has to go to great lengths to get pointer equality right and --- to prevent unnecessary allocation. The trouble is that GHC *really* wants --- to unbox the key and throw away the boxed one. This is bad for us, because --- we want to compare the pointer of the box we are given to the one already --- present if they compare EQ. It's also bad for us because it leads to the --- key being *reboxed* if it's actually stored in the map. Ugh! So we pass the --- 'go' function *two copies* of the key we're given. One of them we use for --- comparisons; the other we keep in our pocket. To prevent worker/wrapper from --- messing with the copy in our pocket, we sprinkle about calls to the magical --- function 'lazy'. This is all horrible, but it seems to work okay. - -- Insert a new key and value in the map if it is not already present. -- Used by `union`. - --- See Note: Type of local 'go' function --- See Note: Avoiding worker/wrapper insertR :: Ord k => k -> a -> Map k a -> Map k a -insertR kx0 = go kx0 kx0 +insertR k0 a0 m0 = go k0 a0 m0 id where - go :: Ord k => k -> k -> a -> Map k a -> Map k a - go orig !_ x Tip = singleton (lazy orig) x - go orig !kx x t@(Bin _ ky y l r) = + -- Use an explicit continuation which isn't executed if the + -- key is found. + go !kx x Tip k = k (singleton kx x) + go !kx x (Bin _ ky y l r) k = case compare kx ky of - LT | l' `ptrEq` l -> t - | otherwise -> balanceL ky y l' r - where !l' = go orig kx x l - GT | r' `ptrEq` r -> t - | otherwise -> balanceR ky y l r' - where !r' = go orig kx x r - EQ -> t + LT -> go kx x l (k . (\l' -> balanceL ky y l' r)) + GT -> go kx x r (k . (\r' -> balanceR ky y l r')) + EQ -> m0 #if __GLASGOW_HASKELL__ {-# INLINABLE insertR #-} #else From 2d6ca6b89df6726d7527a2352b996c5c2b2bd6c8 Mon Sep 17 00:00:00 2001 From: Huw Campbell Date: Fri, 12 Jan 2024 12:28:30 +1100 Subject: [PATCH 4/4] Use same trick for insertWith --- containers/src/Data/Map/Internal.hs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/containers/src/Data/Map/Internal.hs b/containers/src/Data/Map/Internal.hs index b3168ab6f..48364c5c7 100644 --- a/containers/src/Data/Map/Internal.hs +++ b/containers/src/Data/Map/Internal.hs @@ -838,19 +838,19 @@ insertR k0 a0 m0 = go k0 a0 m0 id -- Also see the performance note on 'fromListWith'. insertWith :: Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a -insertWith = go +insertWith f k0 a0 m0 = + case go k0 a0 m0 of (m :*: _) -> m where - -- We have no hope of making pointer equality tricks work - -- here, because lazy insertWith *always* changes the tree, - -- either adding a new entry or replacing an element with a - -- thunk. - go :: Ord k => (a -> a -> a) -> k -> a -> Map k a -> Map k a - go _ !kx x Tip = singleton kx x - go f !kx x (Bin sy ky y l r) = + go !kx x Tip = singleton kx x :*: False + go !kx x (Bin sy ky y l r) = case compare kx ky of - LT -> balanceL ky y (go f kx x l) r - GT -> balanceR ky y l (go f kx x r) - EQ -> Bin sy kx (f x y) l r + LT | found -> Bin sy ky y l' r :*: found + | otherwise -> balanceL ky y l' r :*: found + where !(l' :*: found) = go kx x l + GT | found -> Bin sy ky y l r' :*: found + | otherwise -> balanceR ky y l r' :*: found + where !(r' :*: found) = go kx x r + EQ -> Bin sy kx (f x y) l r :*: True #if __GLASGOW_HASKELL__ {-# INLINABLE insertWith #-}