Skip to content

Commit

Permalink
Merge pull request #20 from carocad/symbols
Browse files Browse the repository at this point in the history
Symbols and macro keyword patterns
  • Loading branch information
carocad committed Oct 16, 2019
2 parents 3363f5d + b4ca5c6 commit a9e71b4
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 21 deletions.
2 changes: 1 addition & 1 deletion project.clj
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
(defproject carocad/parcera "0.3.0"
(defproject carocad/parcera "0.3.1"
:description "Grammar-based Clojure(script) parser"
:url "https://github.com/carocad/parcera"
:license {:name "LGPLv3"
Expand Down
43 changes: 24 additions & 19 deletions src/parcera/terminals.cljc
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
(ns parcera.terminals)
(ns parcera.terminals
"Clojure symbols, keywords, numbers and string/regex share quite a lot
of matching logic. This namespace is aimed towards clearly identifying
those pieces and share them among the different definitions to
avoid recurring issues")

; todo: anchor ALL to the beginning of string
; todo: try to avoid lookahead
;; Clojure's reader is quite permissive so we follow the motto
;; "if it is not forbidden, it is allowed"
(def not-allowed "\\s\\(\\)\\[\\]{}\"@~\\^;`\\\\\\/,")
(def allowed-characters (str "[^" not-allowed "]*"))
(def not-number "(?![+-]?\\d+)")
(def symbol-end "(?=[\\s\"()\\[\\]{},]|$)")

(defn- name-pattern
[restriction]
(let [first-character (str "[^" restriction not-allowed "]")]
(str "(" first-character allowed-characters "\\/)?"
"(\\/|(" first-character allowed-characters "))"
symbol-end)))


(def symbol-pattern (str not-number (name-pattern ":#\\'")))
(def simple-keyword (str ":" (name-pattern ":")))
(def macro-keyword (str "::" (name-pattern ":")))

;; Clojure's reader is quite permissive so we follow the motto "if it is not forbidden, it is allowed"
; todo: dont allow /
(def NAME "[^\\s\\(\\)\\[\\]{}\"@~\\^;`\\\\]+")
; todo: (?!\/) do i need that ?
;; symbols cannot start with a number, :, # nor '
; todo: no need for negative lookahead of chars
(def symbol-pattern (str "(?![:#\\',]|[+-]?\\d+)(" NAME "\\/)?(\\/|(" NAME "))"))

(def double-suffix "(((\\.\\d*)?([eE][-+]?\\d+)?)M?)")
(def long-suffix "((0[xX]([\\dA-Fa-f]+)|0([0-7]+)|([1-9]\\d?)[rR]([\\d\\w]+)|0\\d+)?N?)")
Expand All @@ -21,19 +34,11 @@
; mentioned here: https://www.regular-expressions.info/unicode.html
; It's cooked by this generator: http://kourge.net/projects/regexp-unicode-block
; ticking all 'Combining Diacritical Marks' boxes *))
; todo: repeated pattern could be simplified
(def unicode-char "([^\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF][\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF]*)")
(def unicode-char "([^\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF])")
(def named-char "(newline|return|space|tab|formfeed|backspace)")
(def unicode "(u[\\dD-Fd-f]{4})")
; todo: use word boundary to avoid lookahead
(def character-pattern (str "\\\\(" unicode-char "|" named-char "|" unicode ")(?!\\w+)"))


; : is not allowed as first keyword character
; todo: no need for negative lookahead of symbol
(def simple-keyword (str ":(?!:)" symbol-pattern))
(def macro-keyword (str "::(?!:)" NAME))


(def string-pattern "\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"")
(def regex-pattern (str "#" string-pattern))
6 changes: 5 additions & 1 deletion test/parcera/test/core.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,11 @@
(as-> "{:hello ;2}
2}" input (and (is (valid? input))
(is (roundtrip input))
(is (clear input))))))
(is (clear input)))))
(testing "symbols"
(as-> "hello/world/" input (is (not (valid? input))))
(as-> ":hello/world/" input (is (not (valid? input))))
(as-> "::hello/world/" input (is (not (valid? input))))))


(deftest macros
Expand Down

0 comments on commit a9e71b4

Please sign in to comment.