Skip to content

Commit 458df41

Browse files
committed
fixup! Avoid per-byte loop in cstring{,Utf8} builders
Matthew Craven review fixes
1 parent 0a7d5c8 commit 458df41

File tree

4 files changed

+25
-33
lines changed

4 files changed

+25
-33
lines changed

Data/ByteString/Builder/Internal.hs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
{-# LANGUAGE MagicHash #-}
2-
{-# LANGUAGE NoMonoLocalBinds #-}
3-
{-# LANGUAGE TypeFamilies #-}
41
{-# LANGUAGE Unsafe #-}
2+
{-# LANGUAGE TypeFamilies #-}
3+
{-# LANGUAGE TypeApplications #-}
4+
{-# LANGUAGE NoMonoLocalBinds #-}
55
{-# LANGUAGE ViewPatterns #-}
66

77
{-# OPTIONS_HADDOCK not-home #-}
@@ -979,11 +979,11 @@ byteStringInsert =
979979
-- Raw CString encoding
980980
------------------------------------------------------------------------------
981981

982-
-- | Builder for raw 'Addr#' pointers to null-terminated primitive ASCII
983-
-- strings that are free of embedded (overlong-encoded as the two-byte sequence
984-
-- @0xC0 0x80@) null characters.
982+
-- | Builder for raw pointers to static data of known length that will never be
983+
-- moved or freed. (This is used with the static buffers GHC uses to implement
984+
-- ASCII string literals that do not contain null characters.)
985985
--
986-
-- @since 0.11.5.0
986+
-- @since 0.13.0.0
987987
{-# INLINABLE asciiLiteralCopy #-}
988988
asciiLiteralCopy :: Ptr Word8 -> Int -> Builder
989989
asciiLiteralCopy = \ !ip !len -> builder $ \k br@(BufferRange op ope) ->
@@ -996,7 +996,7 @@ asciiLiteralCopy = \ !ip !len -> builder $ \k br@(BufferRange op ope) ->
996996
-- as @0xC0 0x80@. Other deviations from strict UTF-8 are tolerated, but the
997997
-- result is not well defined.
998998
--
999-
-- @since 0.11.5.0
999+
-- @since 0.13.0.0
10001000
{-# INLINABLE modUtf8LitCopy #-}
10011001
modUtf8LitCopy :: Ptr Word8 -> Int -> Builder
10021002
modUtf8LitCopy !ip !len
@@ -1034,14 +1034,15 @@ modUtf8_step !ip !len k (BufferRange op ope)
10341034
utf8_copyBytes :: Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO Int
10351035
utf8_copyBytes !ipe = \ ip op -> go 0 ip op
10361036
where
1037+
go :: Int -> Ptr Word8 -> Ptr Word8 -> IO Int
10371038
go !n !ip@((< ipe) -> True) !op = do
10381039
!ch <- peek ip
10391040
let !ip' = ip `plusPtr` 1
10401041
!op' = op `plusPtr` 1
10411042
if | ch /= 0xC0 -> do
10421043
poke op ch
10431044
let !cnt = ipe `minusPtr` ip'
1044-
!runend <- S.memchr ip' 0xC0 (fromIntegral cnt)
1045+
!runend <- S.memchr ip' 0xC0 (fromIntegral @Int cnt)
10451046
let !runlen | runend == nullPtr = cnt
10461047
| otherwise = runend `minusPtr` ip'
10471048
if (runlen == 0)

Data/ByteString/Builder/Prim.hs

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
1-
{-# LANGUAGE CPP #-}
21
{-# LANGUAGE Trustworthy #-}
32

4-
#include "bytestring-cpp-macros.h"
5-
63
{- | Copyright : (c) 2010-2011 Simon Meier
74
(c) 2010 Jasper van der Jeugt
85
License : BSD3-style (see LICENSE)
@@ -456,6 +453,7 @@ import Data.ByteString.Builder.Internal
456453

457454
import qualified Data.ByteString as S
458455
import qualified Data.ByteString.Internal as S
456+
import qualified Data.ByteString.Internal.Type as S
459457
import qualified Data.ByteString.Lazy.Internal as L
460458

461459
import Data.Char (ord)
@@ -466,9 +464,6 @@ import Data.ByteString.Builder.Prim.Binary
466464
import Data.ByteString.Builder.Prim.ASCII
467465

468466
import Foreign
469-
#if !MIN_VERSION_base(4,15,0)
470-
import Foreign.C (CSize(..), CString)
471-
#endif
472467
import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr)
473468
import GHC.Exts
474469

@@ -667,7 +662,7 @@ primMapLazyByteStringBounded w =
667662
--
668663
-- @since 0.11.0.0
669664
cstring :: Addr# -> Builder
670-
cstring s = asciiLiteralCopy (Ptr s) (byteCountLiteral s)
665+
cstring s = asciiLiteralCopy (Ptr s) (S.byteCountLiteral s)
671666
{-# INLINE cstring #-}
672667

673668
-- | Builder for raw 'Addr#' pointers to null-terminated primitive UTF-8
@@ -676,23 +671,9 @@ cstring s = asciiLiteralCopy (Ptr s) (byteCountLiteral s)
676671
--
677672
-- @since 0.11.0.0
678673
cstringUtf8 :: Addr# -> Builder
679-
cstringUtf8 s = modUtf8LitCopy (Ptr s) (byteCountLiteral s)
674+
cstringUtf8 s = modUtf8LitCopy (Ptr s) (S.byteCountLiteral s)
680675
{-# INLINE cstringUtf8 #-}
681676

682-
-- | Byte count of null-terminated primitive literal string excluding the
683-
-- terminating null byte.
684-
byteCountLiteral :: Addr# -> Int
685-
byteCountLiteral addr# =
686-
#if HS_cstringLength_AND_FinalPtr_AVAILABLE
687-
I# (cstringLength# addr#)
688-
#else
689-
fromIntegral (pure_strlen (Ptr addr#))
690-
691-
foreign import ccall unsafe "string.h strlen" pure_strlen
692-
:: CString -> CSize
693-
#endif
694-
{-# INLINE byteCountLiteral #-}
695-
696677
------------------------------------------------------------------------------
697678
-- Char8 encoding
698679
------------------------------------------------------------------------------

Data/ByteString/Internal/Type.hs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ module Data.ByteString.Internal.Type (
4242
unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict,
4343
unsafePackAddress, unsafePackLenAddress,
4444
unsafePackLiteral, unsafePackLenLiteral,
45+
byteCountLiteral,
4546

4647
-- * Low level imperative construction
4748
empty,
@@ -475,6 +476,17 @@ unsafePackLenAddress len addr# = do
475476
#endif
476477
{-# INLINE unsafePackLenAddress #-}
477478

479+
-- | Byte count of null-terminated primitive literal string excluding the
480+
-- terminating null byte.
481+
byteCountLiteral :: Addr# -> Int
482+
byteCountLiteral addr# =
483+
#if HS_cstringLength_AND_FinalPtr_AVAILABLE
484+
I# (cstringLength# addr#)
485+
#else
486+
fromIntegral (c_strlen (Ptr addr#))
487+
#endif
488+
{-# INLINE byteCountLiteral #-}
489+
478490
-- | See 'unsafePackAddress'. This function has similar behavior. Prefer
479491
-- this function when the address in known to be an @Addr#@ literal. In
480492
-- that context, there is no need for the sequencing guarantees that 'IO'

bench/BenchAll.hs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -327,8 +327,6 @@ main = do
327327
, benchB'_ "ASCII String (12B)" $ asciiLit (Ptr "hello wurld!"#)
328328
, benchB' "ASCII String (64B, naive)" asciiStr fromString
329329
, benchB'_ "ASCII String (64B)" $ asciiLit asciiBuf
330-
, benchB'_ "AsciiLit" $ asciiLit asciiBuf
331-
, benchB'_ "Utf8Lit" $ utf8Lit utf8Buf
332330
, benchB'_ "strLit" $ string8 asciiStr
333331
, benchB'_ "stringUtf8" $ stringUtf8 utf8Str
334332
, benchB'_ "strLitInline" $ string8 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"

0 commit comments

Comments
 (0)