diff --git a/elm-format.cabal b/elm-format.cabal index 0ebd71b44..0d385071e 100644 --- a/elm-format.cabal +++ b/elm-format.cabal @@ -49,7 +49,6 @@ library markdown exposed-modules: - Elm.Name ElmFormat ElmVersion -- exposed for tests @@ -76,7 +75,6 @@ library Parse.Pattern Parse.Type Reporting.Annotation - Reporting.Doc Reporting.Error.Syntax Reporting.Region Util.List @@ -93,6 +91,7 @@ library Cheapskate.Util CommandLine.Helpers Data.Index + Elm.Name ElmFormat.Execute ElmFormat.Filesystem ElmFormat.FileStore @@ -116,9 +115,13 @@ library Parse.Module Parse.Parse Parse.ParsecAdapter + Parse.Primitives Parse.Primitives.Internals - Parse.State + Parse.Primitives.Symbol + Parse.Primitives.Variable + Parse.Primitives.Whitespace Parse.Whitespace + Reporting.Doc Reporting.Render.Code Reporting.Report Reporting.Result diff --git a/parser/src/Parse/Expression.hs b/parser/src/Parse/Expression.hs index eacd4be99..9dbc23742 100644 --- a/parser/src/Parse/Expression.hs +++ b/parser/src/Parse/Expression.hs @@ -10,7 +10,6 @@ import Parse.Common import qualified Parse.Helpers as Help import qualified Parse.Literal as Literal import qualified Parse.Pattern as Pattern -import qualified Parse.State as State import qualified Parse.Type as Type import Parse.IParser import Parse.Whitespace diff --git a/parser/src/Parse/Helpers.hs b/parser/src/Parse/Helpers.hs index 4bdb4aea1..58d2c862b 100644 --- a/parser/src/Parse/Helpers.hs +++ b/parser/src/Parse/Helpers.hs @@ -5,16 +5,17 @@ import Prelude hiding (until) import Control.Monad (guard) import Data.Map.Strict hiding (foldl) import qualified Data.Maybe as Maybe -import Text.Parsec hiding (newline, spaces, State) -import Text.Parsec.Indent (indented, runIndent) import AST.V0_16 import qualified AST.Expression import qualified AST.Helpers as Help import qualified AST.Variable -import qualified Parse.State as State +import qualified Data.Text as Text +import Data.Text.Encoding (encodeUtf8) import Parse.Comments import Parse.IParser +import Parse.ParsecAdapter (string, (<|>), many, many1, choice, option, char, eof, lookAhead, notFollowedBy, anyWord8) +import Parse.Primitives (run, getPosition, try, oneOf) import Parse.Whitespace import qualified Reporting.Annotation as A import qualified Reporting.Error.Syntax as Syntax @@ -41,15 +42,11 @@ expecting = flip () -- SETUP -iParse :: IParser a -> String -> Either ParseError a -iParse = - iParseWithState "" State.init +iParse :: IParser a -> String -> Either Syntax.Error a +iParse parser input = + run parser (encodeUtf8 $ Text.pack input) -iParseWithState :: SourceName -> State.State -> IParser a -> String -> Either ParseError a -iParseWithState sourceName state aParser input = - runIndent sourceName $ runParserT aParser state sourceName input - -- VARIABLES @@ -95,7 +92,7 @@ makeVar :: IParser Char -> IParser String makeVar firstChar = do variable <- (:) <$> firstChar <*> many innerVarChar if variable `elem` reserveds - then fail (Syntax.keyword variable) + then fail () -- (Syntax.keyword variable) else return variable @@ -291,7 +288,7 @@ separated :: IParser sep -> IParser e -> IParser (Either e (R.Region, (e,Maybe S separated sep expr' = let subparser = - do start <- getMyPosition + do start <- getPosition t1 <- expr' arrow <- optionMaybe $ try ((,) <$> restOfLine <*> whitespace <* sep) case arrow of @@ -300,7 +297,7 @@ separated sep expr' = Just (eolT1, preArrow) -> do postArrow <- whitespace t2 <- separated sep expr' - end <- getMyPosition + end <- getPosition case t2 of Right (_, (t2',eolT2), ts, _) -> return $ \multiline -> Right @@ -350,7 +347,7 @@ constrainedSpacePrefix' parser constraint = spacing = do (n, comments) <- whitespace' - _ <- constraint (not n) Syntax.whitespace + _ <- constraint (not n) -- Syntax.whitespace indented return comments @@ -458,9 +455,11 @@ surround'' leftDelim rightDelim inner = -- HELPERS FOR EXPRESSIONS + +-- TODO: inline this getMyPosition :: IParser R.Position getMyPosition = - R.fromSourcePos <$> getPosition + getPosition addLocation :: IParser a -> IParser (A.Located a) @@ -471,15 +470,15 @@ addLocation expr = located :: IParser a -> IParser (R.Position, a, R.Position) located parser = - do start <- getMyPosition + do start <- getPosition value <- parser - end <- getMyPosition + end <- getPosition return (start, value, end) accessible :: IParser AST.Expression.Expr -> IParser AST.Expression.Expr accessible exprParser = - do start <- getMyPosition + do start <- getPosition annotatedRootExpr@(A.A _ _rootExpr) <- exprParser @@ -492,7 +491,7 @@ accessible exprParser = Just _ -> accessible $ do v <- lowVar - end <- getMyPosition + end <- getPosition return . A.at start end $ -- case rootExpr of -- AST.Expression.VarExpr (AST.Variable.VarRef name@(c:_)) diff --git a/parser/src/Parse/IParser.hs b/parser/src/Parse/IParser.hs index bffb2de9e..5e5cd80cc 100644 --- a/parser/src/Parse/IParser.hs +++ b/parser/src/Parse/IParser.hs @@ -1,6 +1,6 @@ module Parse.IParser where -import Control.Monad.State (State) -import qualified Parse.Primitives.Internals as I +import Parse.Primitives (Parser) -type IParser a = I.Parser a +-- TODO: inline this +type IParser a = Parser a diff --git a/parser/src/Parse/ParsecAdapter.hs b/parser/src/Parse/ParsecAdapter.hs index f01a24409..716f45a88 100644 --- a/parser/src/Parse/ParsecAdapter.hs +++ b/parser/src/Parse/ParsecAdapter.hs @@ -1,24 +1,35 @@ module Parse.ParsecAdapter -( string -, (<|>) -, many -, many1 -, choice -, option -) -where + ( string + , (<|>) + , many + , many1 + , choice + , option + , char + , eof + , lookAhead + , notFollowedBy + , anyWord8 + ) + where + +{-| This module implements parts of Parsec's API in terms of the new Elm 0.19 parser primitives +(`Parse.Primitives.Internals`). +Eventually the rest of the elm-format parsers should be rewritten to more closely match the +Elm 0.19 parser, and once that is done, this module can be removed. +-} import Data.Word (Word8) import qualified Data.ByteString as B -import qualified Data.ByteString.Char8 as C -import Data.Char (ord) +-- import qualified Data.ByteString.Char8 as C +-- import Data.Char (ord) import qualified Data.Text as T import Foreign.ForeignPtr (ForeignPtr) -import qualified Parse.Primitives.Internals as I -import qualified Reporting.Region as R +-- import qualified Reporting.Region as R import qualified Reporting.Error.Syntax as E import Data.Text.Encoding (encodeUtf8) -import Parse.Primitives.Internals (Parser(Parser), State(State), noError, oneOf) +import Parse.Primitives.Internals (Parser(Parser), State(State), unsafeIndex, noError, oneOf) +import Parse.Primitives (endOfFile) toWord8 :: String -> [Word8] @@ -28,61 +39,74 @@ toWord8 = B.unpack . encodeUtf8 . T.pack string :: String -> Parser String string str = Parser $ \(State fp offset terminal indent row col ctx) cok cerr _ _ -> - case eatString (toWord8 str) fp offset terminal row col of - Err err -> - cerr err + case eatWord8s (toWord8 str) fp offset terminal row col of + Err err -> + cerr err + + Ok newOffset newRow newCol -> + cok str (State fp newOffset terminal indent newRow newCol ctx) noError - Ok newOffset newRow newCol -> - let - !newState = State fp newOffset terminal indent newRow newCol ctx - in - cok str newState noError -data Result +data EatStringResult = Err E.ParseError | Ok Int Int Int -eatString :: [Word8] -> ForeignPtr Word8 -> Int -> Int -> Int -> Int -> Result -eatString str fp offset terminal row col = + +eatWord8s :: [Word8] -> ForeignPtr Word8 -> Int -> Int -> Int -> Int -> EatStringResult +eatWord8s str fp offset terminal row col = case str of [] -> Ok offset row col - 0x0A {- \n -} : _ -> error "eatString doesn't support matching '\\n'" + 0x0A {- \n -} : _ -> error "eatWord8s doesn't support matching '\\n'" h : t -> if offset >= terminal then Err noError - else if h == I.unsafeIndex fp offset then - eatString t fp (offset + 1) terminal row (col + 1) + else if h == unsafeIndex fp offset then + eatWord8s t fp (offset + 1) terminal row (col + 1) else Err noError + +char :: Char -> Parser Char +char c = + Parser $ \(State fp offset terminal indent row col ctx) cok cerr _ _ -> + case eatWord8s (toWord8 [c]) fp offset terminal row col of + Err err -> + cerr err + + Ok newOffset newRow newCol -> + cok c (State fp newOffset terminal indent newRow newCol ctx) noError + + (<|>) :: Parser a -> Parser a -> Parser a a <|> b = oneOf [ a, b ] many1 :: Parser a -> Parser [a] many1 (Parser parser) = - Parser $ \initialState cok cerr eok eerr -> + Parser $ \initialState cok cerr _ eerr -> let - parseFirst state err = + parseFirst state _err = parser state (\a newState e -> parseNext [a] newState e) (\ e -> cerr e) - (\a newState _ -> error "many1 succeeded with empty parser") + (\_ _ _ -> error "many1 succeeded with empty parser") (\ e -> eerr e) + -- help :: [a] -> State -> E.ParseError b parseNext acc state err = -- TODO: is the error passing correct? what do errors mean for parser success cases? parser state (\a newState e -> parseNext (a:acc) newState e) (\ e -> cerr e) - (\a newState _ -> error "many1 succeeded with empty parser") + (\_ _ _ -> error "many1 succeeded with empty parser") (\ _ -> cok (reverse acc) state err) in - parseFirst initialState noError + parseFirst initialState noError + many :: Parser a -> Parser [a] many (Parser parser) = - Parser $ \initialState cok cerr eok eerr -> + Parser $ \initialState cok cerr _ _ -> -- TODO: do we need to use eerr when the first term fails? let -- help :: [a] -> State -> E.ParseError b parseNext acc state err = @@ -90,14 +114,62 @@ many (Parser parser) = parser state (\a newState e -> parseNext (a:acc) newState e) (\ e -> cerr e) - (\a newState _ -> error "many succeeded with empty parser") + (\_ _ _ -> error "many succeeded with empty parser") (\ _ -> cok (reverse acc) state err) in - parseNext [] initialState noError + parseNext [] initialState noError + choice :: [Parser a] -> Parser a -choice = oneOf +choice = + oneOf + option :: a -> Parser a -> Parser a option a parser = - oneOf [parser, pure a] \ No newline at end of file + oneOf [parser, pure a] + + +eof :: Parser () +eof = + endOfFile + + +lookAhead :: Parser a -> Parser a +lookAhead (Parser parser) = + Parser $ \state cok cerr eok eer -> + let + cok' a _ e = + cok a state e + in + parser state cok' cerr eok eer + + +notFollowedBy :: Parser a -> Parser () +notFollowedBy (Parser parser) = + Parser $ \state _ _ eok eerr -> + parser state + (\_ _ _ -> eerr noError) + (\_ -> eok () state noError) + (\_ _ _ -> eerr noError) + (\_ -> eok () state noError) + + +anyWord8 :: Parser Word8 +anyWord8 = + Parser $ \(State fp offset terminal indent row col ctx) cok cerr _ eerr -> + if offset >= terminal then + eerr noError + else + case unsafeIndex fp offset of + 0x0A {- \n -} -> + cok 0x0A (State fp (offset + 1) terminal indent (row + 1) 1 ctx) noError + + 0x0D {- \r -} -> + cok 0x0D (State fp (offset + 1) terminal indent row col ctx) noError + + 0x09 {- \t -} -> + cerr (E.ParseError row col E.Tab) + + word -> + cok word (State fp (offset + 1) terminal indent row (col + 1) ctx) noError diff --git a/parser/src/Parse/Primitives/Symbol.hs b/parser/src/Parse/Primitives/Symbol.hs index 356d2f995..32e6f6d79 100644 --- a/parser/src/Parse/Primitives/Symbol.hs +++ b/parser/src/Parse/Primitives/Symbol.hs @@ -14,18 +14,18 @@ import Prelude hiding (length) import Control.Exception (assert) import qualified Data.ByteString.Internal as B import qualified Data.ByteString.Char8 as Char8 -import qualified Data.Char as Char -import qualified Data.IntSet as IntSet +-- import qualified Data.Char as Char +-- import qualified Data.IntSet as IntSet -- import qualified Data.Vector as Vector -import Foreign.ForeignPtr (ForeignPtr) -import GHC.Word (Word8) +-- import Foreign.ForeignPtr (ForeignPtr) +-- import GHC.Word (Word8) -import qualified Elm.Name as N +-- import qualified Elm.Name as N import Parse.Primitives.Internals (Parser(..), State(..), expect, noError) import qualified Parse.Primitives.Internals as I import qualified Parse.Primitives.Variable as Var import qualified Reporting.Error.Syntax as E -import Reporting.Error.Syntax (Theory(..), BadOp(..)) +import Reporting.Error.Syntax (Theory(..)) --, BadOp(..)) @@ -92,10 +92,10 @@ underscore = -- Vector.generate 128 (\i -> IntSet.member i binopCharSet) -{-# NOINLINE binopCharSet #-} -binopCharSet :: IntSet.IntSet -binopCharSet = - IntSet.fromList (map Char.ord "+-/*=.<>:&|^?%!") +-- {-# NOINLINE binopCharSet #-} +-- binopCharSet :: IntSet.IntSet +-- binopCharSet = +-- IntSet.fromList (map Char.ord "+-/*=.<>:&|^?%!") diff --git a/parser/src/Parse/State.hs b/parser/src/Parse/State.hs deleted file mode 100644 index 224140230..000000000 --- a/parser/src/Parse/State.hs +++ /dev/null @@ -1,40 +0,0 @@ -module Parse.State where - - -data State = State - { newline :: [Bool] - } - - -init :: State -init = - State - { newline = [False] - } - - -setNewline :: State -> State -setNewline state = - case newline state of - [] -> state - (_:rest) -> state { newline = (True:rest) } - - -pushNewlineContext :: State -> State -pushNewlineContext state = - state { newline = (False:(newline state)) } - - -popNewlineContext :: State -> State -popNewlineContext state = - case newline state of - [] -> state - (_:[]) -> state - (last:next:rest) -> state { newline = (last || next):rest } - - -sawNewline :: State -> Bool -sawNewline state = - case newline state of - [] -> False - (b:_) -> b diff --git a/parser/src/Parse/Whitespace.hs b/parser/src/Parse/Whitespace.hs index 13f15a6f5..dbd5b1f88 100644 --- a/parser/src/Parse/Whitespace.hs +++ b/parser/src/Parse/Whitespace.hs @@ -2,13 +2,16 @@ module Parse.Whitespace where import AST.V0_16 import qualified Cheapskate.Types as Markdown +import qualified Data.ByteString as ByteString import qualified Data.Char as Char +import qualified Data.Text as Text +import Data.Text.Encoding (decodeUtf8) import Parse.IParser import qualified Parse.Markdown as Markdown -import qualified Parse.State as State -import qualified Reporting.Error.Syntax as Syntax +-- import qualified Reporting.Error.Syntax as Syntax import Parse.Primitives (try) -import Parse.ParsecAdapter (string, (<|>), many, many1, choice, option) +import Parse.Primitives.Internals (Parser(..), State(..)) +import Parse.ParsecAdapter (string, (<|>), many, many1, choice, option, char, eof, lookAhead, notFollowedBy, anyWord8) -- import Text.Parsec hiding (newline, spaces, State) @@ -76,18 +79,24 @@ newline = simpleNewline :: IParser () simpleNewline = do _ <- try (string "\r\n") <|> string "\n" - updateState State.setNewline return () trackNewline :: IParser a -> IParser (a, Multiline) -trackNewline parser = - do - updateState State.pushNewlineContext - a <- parser - state <- getState - updateState State.popNewlineContext - return (a, if State.sawNewline state then SplitAll else JoinAll) +trackNewline (Parser parser) = + Parser $ \state@(State _ _ _ _ row _ _) cok _ eok err -> + let + cok' a newState@(State _ _ _ _ newRow _ _) e = + if newRow > row + then cok (a, SplitAll) newState e + else cok (a, JoinAll) newState e + + eok' a newState e = + -- Nothing was consumed, so there cannot have been a newline + eok (a, JoinAll) newState e + + in + parser state cok' err eok' err lineComment :: IParser Comment @@ -160,19 +169,19 @@ closeComment keepClosingPunc = uncurry (++) <$> anyUntil (choice - [ try ((\a b -> if keepClosingPunc then concat (a ++ [b]) else "") <$> many (string " ") <*> string "-}") "the end of a comment -}" + [ try ((\a b -> if keepClosingPunc then concat (a ++ [b]) else "") <$> many (string " ") <*> string "-}") -- "the end of a comment -}" , concat <$> sequence [ try (string "{-"), closeComment True, closeComment keepClosingPunc] ]) anyUntil :: IParser a -> IParser (String, a) anyUntil end = - go "" + go [] where next pre = do - nextChar <- anyChar - go (nextChar : pre) + nextByte <- anyWord8 + go (nextByte : pre) go pre = - ((,) (reverse pre) <$> end) <|> next pre + ((,) (Text.unpack . decodeUtf8 . ByteString.pack $ reverse pre) <$> end) <|> next pre