-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparsecss.hs
220 lines (169 loc) · 9.58 KB
/
parsecss.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
{-# LANGUAGE OverloadedStrings #-}
module Main where
import Control.Applicative (pure, (*>), (<$>), (<*), (<*>))
import Data.Char (digitToInt)
import Text.Parsec
import Text.Parsec.Text
type CssComment = String
data CssSelectorsGroup = Group CssSelector [CssSelector] deriving Show
data CssSelector = Selector CssSimpleSelectorSequence [(CssSelectorCombinator, CssSimpleSelectorSequence)] deriving Show
data CssSelectorCombinator = Descendant
| Child
| AdjacentSibling
| Sibling
deriving Show
data CssSimpleSelectorSequence = TypedSequence CssTypeSelector [CssSimpleSelector]
| UniversalSequence CssUniversalSelector [CssSimpleSelector]
| SimpleSequence CssSimpleSelector [CssSimpleSelector]
deriving Show
data CssTypeSelector = Type (Maybe CssNamespacePrefix) CssElementName deriving Show
data CssNamespacePrefix = Namespace CssIdentifier
| AllNamespaces
| NoNamespace
deriving Show
type CssElementName = CssIdentifier
data CssUniversalSelector = Universal (Maybe CssNamespacePrefix) deriving Show
data CssSimpleSelector = HashSelector CssHash
| ClassSelector CssClass
| AttribSelector CssAttrib
| PseudoSelector CssPseudo
| NegatedTypeSelector CssTypeSelector
| NegatedUniversalSelector CssUniversalSelector
| NegatedHashSelector CssHash
| NegatedClassSelector CssClass
| NegatedAttribSelector CssAttrib
| NegatedPseudoSelector CssPseudo
deriving Show
data CssHash = Hash CssName deriving Show
data CssClass = Class CssIdentifier deriving Show
data CssAttrib = Attrib (Maybe CssNamespacePrefix) CssIdentifier (Maybe (CssAttribMatcher, CssAttribValue)) deriving Show
data CssAttribMatcher = PrefixMatcher
| SuffixMatcher
| SubstringMatcher
| EqualMatcher
| IncludeMatcher
| HyphenMatcher
deriving Show
data CssAttribValue = IdentifierValue CssIdentifier
| StringValue CssString
deriving Show
data CssPseudoType = PseudoClassType | PseudoElementType deriving Show
data CssPseudo = Pseudo CssPseudoType CssIdentifier (Maybe CssExpression) deriving Show
data CssExpression = Expression CssExpressionTerm [CssExpressionTerm] deriving Show
data CssExpressionTerm = Plus
| Minus
| Dimension Double (Maybe CssIdentifier)
| String CssString
| Identifier CssIdentifier
deriving Show
data CssString = Quote String deriving Show
type CssIdentifier = String
type CssName = String
-- SEE http://www.w3.org/TR/css3-selectors/#lex
-- SEE http://hackage.haskell.org/package/parsec
-- SEE http://book.realworldhaskell.org/read/using-parsec.html
-- TODO reimplement cssString
cssSelectorGroup :: Parser CssSelectorsGroup
cssSelectorGroup = Group <$> cssSelector <*> many groupElement
where groupElement = (cssWhiteSpace *> char ',' <* cssWhiteSpace) *> cssSelector
cssSelector :: Parser CssSelector
cssSelector = Selector <$> simpleSelectorSequence <*> many combined
where simpleSelectorSequence = (try (TypedSequence <$> cssTypeSelector)
<|> try (UniversalSequence <$> cssUniversal)
<|> SimpleSequence <$> cssSimpleSelector)
<*> many cssSimpleSelector
combined = (,) <$> (combinator <* cssWhiteSpace) <*> simpleSelectorSequence
combinator = try (cssWhiteSpace *>
( char '+' *> pure AdjacentSibling
<|> char '>' *> pure Child
<|> char '~' *> pure Sibling))
<|> space *> pure Descendant <?> "selector combinator"
cssNamespacePrefix :: Parser (Maybe CssNamespacePrefix)
cssNamespacePrefix = try ((try ((Just . Namespace) <$> cssIdentifier) <|> (char '*' *> pure (Just AllNamespaces)) <|> pure (Just NoNamespace)) <* char '|') <|> pure Nothing
cssElementName :: Parser CssIdentifier
cssElementName = cssIdentifier
cssUniversal :: Parser CssUniversalSelector
cssUniversal = Universal <$> (cssNamespacePrefix <* char '*') <?> "universal selector"
cssTypeSelector :: Parser CssTypeSelector
cssTypeSelector = Type <$> cssNamespacePrefix <*> cssElementName <?> "type selector"
cssSimpleSelector :: Parser CssSimpleSelector
cssSimpleSelector = (try (string ":not") *> between (char '(') (char ')') (cssWhiteSpace *> negationArg <* cssWhiteSpace) <?> "negation")
<|> PseudoSelector <$> cssPseudo
<|> AttribSelector <$> cssAttrib
<|> ClassSelector <$> cssClass
<|> HashSelector <$> cssHash
where negationArg = NegatedUniversalSelector <$> cssUniversal
<|> NegatedHashSelector <$> cssHash
<|> NegatedClassSelector <$> cssClass
<|> NegatedAttribSelector <$> cssAttrib
<|> NegatedPseudoSelector <$> cssPseudo
<|> NegatedTypeSelector <$> cssTypeSelector
cssClass :: Parser CssClass
cssClass = Class <$> (char '.' *> cssIdentifier) <?> "class"
cssAttrib :: Parser CssAttrib
cssAttrib = between (char '[') (char ']') (Attrib <$> (cssWhiteSpace *> cssNamespacePrefix) <*> (cssIdentifier <* cssWhiteSpace) <*> optionMaybe attrib) <?> "attribute"
where attrib = (,) <$> matcher <*> value
matcher = try (string "~=") *> pure IncludeMatcher
<|> try (string "|=") *> pure HyphenMatcher
<|> try (string "^=") *> pure PrefixMatcher
<|> try (string "$=") *> pure SuffixMatcher
<|> try (string "*=") *> pure SubstringMatcher
<|> string "=" *> pure EqualMatcher
value = try (IdentifierValue <$> cssIdentifier) <|> (StringValue <$> cssString)
cssPseudo :: Parser CssPseudo
cssPseudo = Pseudo <$> pseudoColon <*> pseudoIdentifier <*> pseudoExpression <?> "pseudo"
where pseudoColon = (try (string "::") *> pure PseudoElementType) <|> (char ':' *> pure PseudoClassType)
pseudoIdentifier = cssIdentifier <* cssWhiteSpace
pseudoExpression = between (char '(') (char ')') (Just <$> cssExpression) <|> pure Nothing
cssExpression :: Parser CssExpression
cssExpression = Expression <$> cssExpressionTerm <*> try (many (many1 space *> cssExpressionTerm) <|> pure [])
cssExpressionTerm :: Parser CssExpressionTerm
cssExpressionTerm = char '+' *> pure Plus
<|> char '-' *> pure Minus
<|> try (Dimension <$> cssNumber <*> optionMaybe cssIdentifier)
<|> try (String <$> cssString)
<|> (Identifier <$> cssIdentifier)
cssIdentifier :: Parser CssIdentifier
cssIdentifier = (++) <$> (string "-" <|> pure "") <*> ((++) <$> cssNameStart <*> many cssNameChar) <?> "identifier"
cssNameStart :: Parser String
cssNameStart = (return <$> (char '_' <|> letter <|> cssNonAscii)) <|> cssEscape <?> "start of name character"
cssNonAscii :: Parser Char
cssNonAscii = satisfy (\c -> c >= '\o240' && c <= '\o4177777') <?> "non ascii character"
cssEscape :: Parser String
cssEscape = cssUnicode <|> ((:) <$> char '\\' <*> (return <$> noneOf ("\n\r\f" ++ cssHexDigits))) <?> "escape sequence"
cssHexDigits :: String
cssHexDigits = ['0'..'9'] ++ ['a'..'f'] ++ ['A'..'F']
cssUnicode :: Parser String
cssUnicode = (:) <$> char '\\' <*> (try (count 6 hexDigit) <|> ((++) <$> (many1 hexDigit <* unicodeEnd) <*> pure " ")) <?> "unicode escape sequence"
where unicodeEnd = try (string "\r\n") <|> string " " <|> string "\n" <|> string "\r" <|> string "\t" <|> string "\f" <?> "end of unicode escape sequence"
cssNameChar :: Parser Char
cssNameChar = oneOf "-_" <|> alphaNum <?> "name character"
cssName :: Parser CssName
cssName = many1 cssNameChar <?> "name"
cssHash :: Parser CssHash
cssHash = Hash <$> (string "#" *> cssName) <?> "hash selector"
cssString :: Parser CssString
cssString = Quote <$> (quotedString '\'' <|> quotedString '\"') <?> "string literal"
where quotedString q = between (char q) (char q) (concat <$> many (quotedStringPart q))
quotedStringPart q = try ((:) <$> char '\\' <*> cssNewLine) <|> cssEscape <|> (return <$> cssNonAscii) <|> (return <$> noneOf (q : "\n\r\f"))
cssNumber :: Parser Double
cssNumber = try ((+) <$> (numberParser <|> pure 0) <*> (char '.' *> fractionParser)) <|> numberParser <?> "number"
where numberParser = (fromInteger . foldl (\x d -> 10*x + toInteger (digitToInt d)) 0) <$> many1 digit
fractionParser = foldr (\d f -> (f + fromIntegral (digitToInt d))/10.0) 0.0 <$> many1 digit
-- | Parses one newline
-- > nl \n|\r\n|\r|\f
cssNewLine :: Parser String
cssNewLine = try $ string "\r\n" <|> string "\n" <|> string "\r" <|> string "\f"
-- | Parses whitespace
-- > w [ \t\r\n\f]*
cssWhiteSpace :: Parser String
cssWhiteSpace = many (oneOf " \t\r\n\f")
--parseCSS :: Text -> Either ParseError CssSelectorsGroup
--parseCSS input = parse cssSelectorGroup "culo" input
main :: IO ()
main = putStrLn "Error parsing input:"
--do c <- getContents
-- case parse cssHash "(stdin)" (T.pack c) of
-- Left e -> do putStrLn "Error parsing input:"
-- print e
-- Right r -> mapM_ print r