Skip to content

Commit a6b5c82

Browse files
committed
Implemented TH splices for validated ByteString literals
thLiteral :: Quote m => String -> Code m ByteString thHexLiteral :: Quote m => String -> Code m ByteString The former rejects inputs with non-octet code points above 0xFF. The latter rejects odd-length inputs or inputs with characters other than non-hexadecimal digits.
1 parent 188512c commit a6b5c82

File tree

4 files changed

+109
-1
lines changed

4 files changed

+109
-1
lines changed

Changelog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
* [`Data.Data.dataTypeOf` for `StrictByteString` and `LazyByteString` now returns a `DataType` that uses `AlgRep` instead of `NoRep`.](https://github.com/haskell/bytestring/pull/614)
55
* This allows utilities like `syb:Data.Generics.Text.gread` to be meaningfully used at these types containing `ByteString`s.
66
* [`fromListN` in `instance IsList ByteString` truncates input list if it's longer than the size hint](https://github.com/haskell/bytestring/pull/672)
7+
* API additions
8+
* New TH splices: `Data.ByteString.thLiteral` and `Data.ByteString.thHexLiteral`
9+
* These validate input strings prior to generating corresponding
10+
compile-time literal ByteStrings.
711
<!--
812
* Bug fixes:
913
* API additions and behavior changes:

Data/ByteString.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ module Data.ByteString (
6767
toStrict,
6868
fromFilePath,
6969
toFilePath,
70+
thLiteral,
71+
thHexLiteral,
7072

7173
-- * Basic interface
7274
cons,

Data/ByteString/Internal/Type.hs

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ module Data.ByteString.Internal.Type (
4242
unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict,
4343
unsafePackAddress, unsafePackLenAddress,
4444
unsafePackLiteral, unsafePackLenLiteral,
45+
thLiteral, thHexLiteral,
4546

4647
-- * Low level imperative construction
4748
empty,
@@ -152,8 +153,9 @@ import Data.String (IsString(..))
152153

153154
import Control.Exception (assert, throw, Exception)
154155

155-
import Data.Bits ((.&.))
156+
import Data.Bits ((.|.), (.&.), complement, shiftL)
156157
import Data.Char (ord)
158+
import Data.Foldable (foldr')
157159
import Data.Word
158160

159161
import Data.Data (Data(..), mkConstr, mkDataType, Constr, DataType, Fixity(Prefix), constrIndex)
@@ -530,6 +532,98 @@ packUptoLenChars len cs0 =
530532
go !p (c:cs) = pokeFp p (c2w c) >> go (p `plusForeignPtr` 1) cs
531533
in go p0 cs0
532534

535+
data S2W = Octets {-# UNPACK #-} !Int [Word8]
536+
| Hichar {-# UNPACK #-} !Int {-# UNPACK #-} !Word
537+
538+
-- | Template Haskell splice to convert string constants to compile-time
539+
-- ByteString literals. Unlike the 'IsString' instance, the input string
540+
-- is validated to ensure that each character is a valid /octet/, i.e. is
541+
-- at most @0xFF@ (255).
542+
--
543+
-- Example:
544+
--
545+
-- > :set -XTemplateHaskell
546+
-- > ehloCmd :: ByteString
547+
-- > ehloCmd = $$(thLiteral "EHLO")
548+
--
549+
#if MIN_VERSION_template_haskell(2,17,0)
550+
liftTyped :: forall a m. (TH.Lift a, TH.Quote m) => a -> TH.Code m a
551+
liftTyped = TH.liftTyped
552+
553+
liftCode :: forall a m. m (TH.TExp a) -> TH.Code m a
554+
liftCode = TH.liftCode
555+
556+
thLiteral :: (MonadFail m, TH.Quote m) => String -> TH.Code m ByteString
557+
#else
558+
liftTyped :: forall a. TH.Lift a => a -> TH.Q (TH.TExp a)
559+
liftTyped = TH.unsafeTExpCoerce . TH.lift
560+
561+
liftCode :: forall a. TH.Q TH.Exp -> TH.Q (TH.TExp a)
562+
liftCode = TH.unsafeTExpCoerce
563+
564+
thLiteral :: String -> TH.Q (TH.TExp ByteString)
565+
#endif
566+
thLiteral "" = [||empty||]
567+
thLiteral s = case foldr' op (Octets 0 []) s of
568+
Octets !n ws -> liftTyped (unsafePackLenBytes n ws)
569+
Hichar !i !w -> liftCode $ fail $ "non-octet character '\\" ++
570+
show w ++ "' at offset: " ++ show i
571+
where
572+
op _ (Hichar !i !w) = Hichar (i + 1) w
573+
op (fromIntegral . fromEnum -> !w) (Octets !i ws)
574+
| w <= 0xff = Octets (i + 1) (fromIntegral w : ws)
575+
| otherwise = Hichar 0 w
576+
577+
data H2W = Hex {-# UNPACK #-} !Int [Word8]
578+
| Odd {-# UNPACK #-} !Int {-# UNPACK #-} !Word [Word8]
579+
| Bad {-# UNPACK #-} !Int {-# UNPACK #-} !Word
580+
581+
-- | Template Haskell splice to convert hex-encoded string constants to compile-time
582+
-- ByteString literals. The input string is validated to ensure that it consists of
583+
-- of an even number of valid hexadecimal digits (case insensitive).
584+
--
585+
-- Example:
586+
--
587+
-- > :set -XTemplateHaskell
588+
-- > ehloCmd :: ByteString
589+
-- > ehloCmd = $$(thLiteral "45484c4F")
590+
--
591+
#if MIN_VERSION_template_haskell(2,17,0)
592+
thHexLiteral :: (MonadFail m, TH.Quote m) => String -> TH.Code m ByteString
593+
#else
594+
thHexLiteral :: String -> TH.Q (TH.TExp ByteString)
595+
#endif
596+
thHexLiteral "" = [||empty||]
597+
thHexLiteral s =
598+
case foldr' op (Hex 0 []) s of
599+
(Hex n ws) -> liftTyped (unsafePackLenBytes n ws)
600+
(Odd i _ _) -> liftCode $ fail $ "Odd input length: " ++ show (1 + 2 * i)
601+
(Bad i w) -> liftCode $ fail $ "Non-hexadecimal character '\\" ++
602+
show w ++ "' at offset: " ++ show i
603+
where
604+
-- Convert char to decimal digit value if result in [0, 9].
605+
-- Otherwise, for hex digits, it remains to:
606+
-- - fold upper and lower case by masking 0x20,
607+
-- - subtract another 0x11 (0x41 total),
608+
-- - check that result in [0,5]
609+
-- - add 0xa
610+
--
611+
c2d :: Char -> Word
612+
c2d c = fromIntegral (fromEnum c) - 0x30
613+
614+
op (c2d -> d) acc
615+
| d <= 9 = case acc of
616+
Hex i ws -> Odd i d ws
617+
Odd i lo ws -> Hex (i+1) $ fromIntegral ((d `shiftL` 4 .|. lo)) : ws
618+
Bad i w -> Bad (i + 1) w
619+
| l <- (d .&. complement 0x20) - 0x11
620+
, l <= 5
621+
, x <- l + 0xa = case acc of
622+
Hex i ws -> Odd i (l + 0xa) ws
623+
Odd i lo ws -> Hex (i+ 1) $ fromIntegral (x `shiftL` 4 .|. lo) : ws
624+
Bad i w -> Bad (i + 1) w
625+
| otherwise = Bad 0 (d + 0x30)
626+
533627
-- Unpacking bytestrings into lists efficiently is a tradeoff: on the one hand
534628
-- we would like to write a tight loop that just blasts the list into memory, on
535629
-- the other hand we want it to be unpacked lazily so we don't end up with a

tests/Lift.hs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@ testSuite = testGroup "Lift"
3131
let bs = "\0\1\2\3\0\1\2\3" :: BS.ByteString in
3232
bs === $$(TH.liftTyped $ BS.pack [0,1,2,3,0,1,2,3])
3333
#endif
34+
35+
, testProperty "thLiteral" $
36+
let bs = "EHLO" :: BS.ByteString in
37+
bs === $$(BS.thLiteral "EHLO")
38+
39+
, testProperty "thHexLiteral" $
40+
let bs = "EHLO" :: BS.ByteString in
41+
bs === $$(BS.thHexLiteral "45484c4F")
3442
]
3543

3644
, testGroup "lazy"

0 commit comments

Comments
 (0)