diff --git a/.travis.yml b/.travis.yml index 04ed0b3..1d6d675 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,10 +18,10 @@ jobs: - lein trampoline test - nvm install 10.10 && nvm use 10.10 && lein trampoline cljsbuild test - # only run the benchmark is we are on master + # only run the benchmark if we are trying to merge to master # otherwise the build takes too long - stage: Benchmark - if: head_branch = master + if: branch = master script: - lein trampoline test :benchmark diff --git a/README.md b/README.md index a95fbef..083ef8d 100644 --- a/README.md +++ b/README.md @@ -32,20 +32,29 @@ full explanation of the options available for a parser please visit Instaparse w [:symbol "parcera.core"] [:whitespace " "] [:list - [:simple-keyword "require"] + [:simple-keyword ":require"] [:whitespace " "] [:vector [:symbol "instaparse.core"] [:whitespace " "] - [:simple-keyword "as"] + [:simple-keyword ":as"] [:whitespace " "] [:symbol "instaparse"]] [:whitespace " "] - [:vector [:symbol "clojure.data"] [:whitespace " "] [:simple-keyword "as"] [:whitespace " "] [:symbol "data"]] + [:vector [:symbol "clojure.data"] [:whitespace " "] [:simple-keyword ":as"] [:whitespace " "] [:symbol "data"]] [:whitespace " "] - [:vector [:symbol "clojure.string"] [:whitespace " "] [:simple-keyword "as"] [:whitespace " "] [:symbol "str"]]]]] + [:vector [:symbol "clojure.string"] [:whitespace " "] [:simple-keyword ":as"] [:whitespace " "] [:symbol "str"]]]]] ;; convert an AST back into a string (parcera/code [:symbol "ns"]) ;; "ns" ``` + +### notes +There are some restrictions as to how much can a parser do. In my experience, these restrictions +are related to some [semantic context-sensitivity](http://blog.reverberate.org/2013/09/ll-and-lr-in-context-why-parsing-tools.html). +which the Clojure reader has embedded into itself. In general I have found the following ones: + - `parcera` doesnt check that a map contains an even number of elements. This is specially difficult + to do since Clojure supports the discard macro `#_ form` which is a valid element but "doesnt count as one" + - `parcera` doesnt check if a map has repeated keys + - `parcera` doesnt check if a set has repeated elements diff --git a/project.clj b/project.clj index e05b4d0..e1516a9 100644 --- a/project.clj +++ b/project.clj @@ -1,4 +1,4 @@ -(defproject carocad/parcera "0.2.1" +(defproject carocad/parcera "0.3.0" :description "Grammar-based Clojure(script) parser" :url "https://github.com/carocad/parcera" :license {:name "LGPLv3" diff --git a/src/parcera/core.cljc b/src/parcera/core.cljc index adb51f3..88cca67 100644 --- a/src/parcera/core.cljc +++ b/src/parcera/core.cljc @@ -1,76 +1,80 @@ (ns parcera.core - (:require [instaparse.core :as instaparse]) + (:require [instaparse.core :as instaparse] + [instaparse.combinators-source :as combi] + [instaparse.cfg :as cfg] + [parcera.terminals :as terminal]) #?(:cljs (:import goog.string.StringBuffer))) -(def grammar +; todo: implement advices from +; http://blog.reverberate.org/2013/09/ll-and-lr-in-context-why-parsing-tools.html +; https://www.loggly.com/blog/regexes-the-bad-better-best/ +; https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/ + +; todo: use advices in https://medium.appbase.io/analyzing-20k-github-repositories-af76de21c3fc +; to check if the heuristics are accurate + +; NOTE: Through my experiments I found out that Instaparse will gladly take the +; first match as long as the grammar is not ambiguous. Therefore I switched the +; unordered OR (|) with an ordered one (/). This of course implies an heuristic +; of knowing which grammar rules are expected to match more often. I use +; Clojure's core as a reference with the following code snippet +#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojure/master/src/clj/clojure/core.clj")] + (time (sort-by second > (frequencies (filter keyword? (flatten (clojure core-content :optimize :memory))))))) +#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc")] + (time (sort-by second > (frequencies (filter keyword? (flatten (clojure core-content :optimize :memory))))))) +; todo: performance of [,\s]*;.*|[,\s]+ for whitespace +(def grammar-rules "code: form*; -
: whitespace ( literal - | symbol - | collection - | reader-macro - ) - whitespace; + : whitespace / literal / collection / reader-macro; - whitespace = #'[,\\s]*' + (* we treat comments the same way as commas *) + whitespace = #'([,\\s]*;.*)?([,\\s]+|$)'; - : &#'[\\(\\[{#]' ( list - | vector - | map - | set - ) - ; + (* for parsing purposes we dont consider a Set a collection since it starts + with # -> dispatch macro *) + : list / vector / map; list: <'('> form* <')'> ; vector: <'['> form* <']'> ; - map: map-namespace? <'{'> map-content <'}'> ; + map: <'{'> form* <'}'>; - map-namespace: <'#'> (keyword | auto-resolve); + (* a literal is basically anything that is not a collection, macro or whitespace *) + : ( symbol + / keyword + / string + / number + / character + ); - map-content: (form form)* + : simple-keyword / macro-keyword ; - set: <'#{'> form* <'}'> ; + : ( unquote + / metadata + / backtick + / quote + / dispatch + / unquote-splicing + / deref + / symbolic + ); - : - number - | string - | character - | keyword - | comment - | symbolic - ; + set: <'#{'> form* <'}'>; - symbolic: #'##(Inf|-Inf|NaN)' + namespaced-map: <'#'> ( keyword / auto-resolve ) map; - number: ( DOUBLE | RATIO | LONG ) !symbol (* remove ambiguity with symbols 1/5 - 1 -> number, / -> symbol, 5 -> number *); + auto-resolve: '::'; - character: <'\\\\'> ( SIMPLE-CHAR | UNICODE-CHAR ) !symbol (* remove ambiguity with symbols \backspace - \b -> character, ackspace -> symbol *); + metadata: (metadata-entry whitespace)+ ( symbol + / collection + / tag + / unquote + / unquote-splicing + ); - : - dispatch - | metadata - | deref - | quote - | backtick - | unquote - | unquote-splicing - ; - - : &'#' ( function | regex | var-quote | discard | tag | conditional | conditional-splicing); - - function: <'#'> list; - - metadata: <'^'> ( map | shorthand-metadata ) form; - - : ( symbol | string | keyword ); - - regex: <'#'> string; - - var-quote: <'#\\''> symbol; + metadata-entry: <'^'> ( map / symbol / string / keyword ); quote: <'\\''> form; @@ -82,67 +86,42 @@ deref: <'@'> form; - discard: <'#_'> form; - - tag: <#'#(?![_?])'> symbol form; - - conditional: <'#?'> list; - - conditional-splicing: <'#?@'> list; + : function + / regex + / set + / conditional + / conditional-splicing + / namespaced-map + / var-quote + / discard + / tag; - string : <'\"'> #'[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*' <'\"'>; + function: <'#('> form* <')'>; - symbol: !SYMBOL-HEAD name; - - : simple-keyword | macro-keyword ; - - auto-resolve: '::' ; - - simple-keyword: <':'> !':' name; - - macro-keyword: !':' name; + var-quote: <'#\\''> symbol; - comment: <';'> #'.*'; + discard: <'#_'> form; - (* - ;; symbols cannot start with number, :, # - ;; / is a valid symbol as long as it is not part of the name - ;; note: added ' as invalid first character due to ambiguity in #'hello - ;; -> [:tag [:symbol 'hello]] - ;; -> [:var-quote [:symbol hello]] - *) - SYMBOL-HEAD: number | ':' | '#' | '\\'' + tag: <#'#(?![_?])'> symbol whitespace? (literal / collection); - (* - ;; NOTE: several characters are not allowed according to clojure reference. - ;; https://clojure.org/reference/reader#_symbols - ;; EDN reader says otherwise https://github.com/edn-format/edn#symbols - ;; nil, true, false are actually symbols with special meaning ... not grammar rules - ;; on their own - VALID-CHARACTERS>: #'[^\\s\\(\\)\\[\\]{}\"@~\\^;`]+' - *) - : #'([^\\s\\(\\)\\[\\]{}\"@~,\\\\^;`]+\\/)?(\\/|([^\\s\\(\\)\\[\\]{}\"@~,\\\\^;`]+))(?!\\/)' + conditional: <'#?('> form* <')'>; - (* HIDDEN PARSERS ------------------------------------------------------ *) + conditional-splicing: <'#?@('> form* <')'>; - : #'[-+]?(\\d+(\\.\\d*)?([eE][-+]?\\d+)?)(M)?' + symbolic: #'##(Inf|-Inf|NaN)'") - : #'[-+]?(\\d+)/(\\d+)'; - : #'[-+]?(?:(0)|([1-9]\\d*)|0[xX]([\\dA-Fa-f]+)|0([0-7]+)|([1-9]\\d?)[rR]([\\d\\w]+)|0\\d+)(N)?'; +(def grammar-terminals + {:character (combi/regexp terminal/character-pattern) + :string (combi/regexp terminal/string-pattern) + :symbol (combi/regexp terminal/symbol-pattern) + :number (combi/regexp terminal/number-pattern) + :macro-keyword (combi/regexp terminal/macro-keyword) + :simple-keyword (combi/regexp terminal/simple-keyword) + :regex (combi/regexp terminal/regex-pattern)}) - : #'u[\\dD-Fd-f]{4}'; - : - 'newline' - | 'return' - | 'space' - | 'tab' - | 'formfeed' - | 'backspace' - | #'[^\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF][\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF]*'; - (* This is supposed to be the JavaScript friendly version of #'\\P{M}\\p{M}*+' mentioned here: https://www.regular-expressions.info/unicode.html - It's cooked by this generator: http://kourge.net/projects/regexp-unicode-block, ticking all 'Combining Diacritical Marks' boxes *)") +(def grammar (merge (cfg/ebnf grammar-rules) grammar-terminals)) (def clojure @@ -157,7 +136,7 @@ For a description of all possible options, visit Instaparse's official documentation: https://github.com/Engelberg/instaparse#reference" - (instaparse/parser grammar)) + (instaparse/parser grammar :start :code)) (defn- code* @@ -180,14 +159,11 @@ (doseq [child (rest ast)] (code* child string-builder)) (. string-builder (append "]"))) - :map - (doseq [child (rest ast)] (code* child string-builder)) - - :map-namespace + :namespaced-map (do (. string-builder (append "#")) - (code* (second ast) string-builder)) + (doseq [child (rest ast)] (code* child string-builder))) - :map-content + :map (do (. string-builder (append "{")) (doseq [child (rest ast)] (code* child string-builder)) (. string-builder (append "}"))) @@ -197,42 +173,23 @@ (doseq [child (rest ast)] (code* child string-builder)) (. string-builder (append "}"))) - (:number :whitespace :symbolic :auto-resolve :symbol) + (:number :whitespace :symbolic :auto-resolve :symbol :simple-keyword + :macro-keyword :character :string :regex) (. string-builder (append (second ast))) - :string - (do (. string-builder (append "\"")) - (. string-builder (append (second ast))) - (. string-builder (append "\""))) - - :character - (do (. string-builder (append "\\")) - (. string-builder (append (second ast)))) - - :simple-keyword - (do (. string-builder (append ":")) - (. string-builder (append (second ast)))) - - :macro-keyword - (do (. string-builder (append "::")) - (. string-builder (append (second ast)))) - - :comment - (do (. string-builder (append ";")) - (. string-builder (append (second ast)))) - :metadata - (do (. string-builder (append "^")) - (doseq [child (rest ast)] (code* child string-builder))) + (do (doseq [child (rest (butlast ast))] (code* child string-builder)) + (code* (last ast) string-builder)) + + :metadata-entry + (doseq [child (rest ast)] + (. string-builder (append "^")) + (code* child string-builder)) :quote (do (. string-builder (append "'")) (doseq [child (rest ast)] (code* child string-builder))) - :regex - (do (. string-builder (append "#")) - (code* (second ast) string-builder)) - :var-quote (do (. string-builder (append "#'")) (code* (second ast) string-builder)) @@ -258,20 +215,23 @@ (doseq [child (rest ast)] (code* child string-builder))) :conditional - (do (. string-builder (append "#?")) - (code* (second ast) string-builder)) + (do (. string-builder (append "#?(")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append ")"))) :conditional-splicing - (do (. string-builder (append "#?@")) - (code* (second ast) string-builder)) + (do (. string-builder (append "#?@(")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append ")"))) :deref (do (. string-builder (append "@")) (doseq [child (rest ast)] (code* child string-builder))) :function - (do (. string-builder (append "#")) - (code* (second ast) string-builder)))) + (do (. string-builder (append "#(")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append ")"))))) (defn code @@ -290,10 +250,13 @@ (. string-builder (toString)))) ; Successful parse. -; Profile: {:create-node 1651, :push-full-listener 2, :push-stack 1651, :push-listener 1689, :push-result 273, :push-message 275} -; "Elapsed time: 141.452323 msecs" +; Profile: {:create-node 384, :push-full-listener 2, :push-stack 384, +; :push-listener 382, :push-result 227, :push-message 227 } +; "Elapsed time: 47.25084 msecs" #_(time (clojure (str '(ns parcera.core (:require [instaparse.core :as instaparse] [clojure.data :as data] [clojure.string :as str]))) :trace true)) + +#_(instaparse/disable-tracing!) diff --git a/src/parcera/terminals.cljc b/src/parcera/terminals.cljc new file mode 100644 index 0000000..c9788de --- /dev/null +++ b/src/parcera/terminals.cljc @@ -0,0 +1,39 @@ +(ns parcera.terminals) + +; todo: anchor ALL to the beginning of string +; todo: try to avoid lookahead + +;; Clojure's reader is quite permissive so we follow the motto "if it is not forbidden, it is allowed" +; todo: dont allow / +(def NAME "[^\\s\\(\\)\\[\\]{}\"@~\\^;`\\\\]+") +; todo: (?!\/) do i need that ? +;; symbols cannot start with a number, :, # nor ' +; todo: no need for negative lookahead of chars +(def symbol-pattern (str "(?![:#\\',]|[+-]?\\d+)(" NAME "\\/)?(\\/|(" NAME "))")) + +(def double-suffix "(((\\.\\d*)?([eE][-+]?\\d+)?)M?)") +(def long-suffix "((0[xX]([\\dA-Fa-f]+)|0([0-7]+)|([1-9]\\d?)[rR]([\\d\\w]+)|0\\d+)?N?)") +(def ratio-suffix "(\\/(\\d+))") +(def number-pattern (str "[+-]?\\d+(" long-suffix "|" double-suffix "|" ratio-suffix ")(?![\\.\\/])")) ; todo: word boundary ? + + +; This is supposed to be the JavaScript friendly version of #'\P{M}\p{M}*+' +; mentioned here: https://www.regular-expressions.info/unicode.html +; It's cooked by this generator: http://kourge.net/projects/regexp-unicode-block +; ticking all 'Combining Diacritical Marks' boxes *)) +; todo: repeated pattern could be simplified +(def unicode-char "([^\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF][\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF]*)") +(def named-char "(newline|return|space|tab|formfeed|backspace)") +(def unicode "(u[\\dD-Fd-f]{4})") +; todo: use word boundary to avoid lookahead +(def character-pattern (str "\\\\(" unicode-char "|" named-char "|" unicode ")(?!\\w+)")) + + +; : is not allowed as first keyword character +; todo: no need for negative lookahead of symbol +(def simple-keyword (str ":(?!:)" symbol-pattern)) +(def macro-keyword (str "::(?!:)" NAME)) + + +(def string-pattern "\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"") +(def regex-pattern (str "#" string-pattern)) diff --git a/test/parcera/test/benchmark.clj b/test/parcera/test/benchmark.clj index fe6e42c..4bb385f 100644 --- a/test/parcera/test/benchmark.clj +++ b/test/parcera/test/benchmark.clj @@ -2,7 +2,8 @@ (:require [clojure.test :refer [deftest is testing]] [clojure.test.check :as tc] [criterium.core :as criterium] - [parcera.test.core :as pt])) + [parcera.test.core :as pt] + [parcera.core :as parcera])) (deftest ^:benchmark parsing (println "Benchmark: Time parsing Clojure values ⌛") @@ -15,3 +16,15 @@ (println "Benchmark: Round trip of Clojure values 🚀") (criterium/quick-bench (tc/quick-check 30 pt/symmetric) :os :runtime :verbose)) + + +;; execute last ... hopefully +(deftest ^:benchmark z-known-namespace + (newline) + (newline) + (println "Benchmark: Parsing parcera namespace with traces 👮") + (criterium/quick-bench (parcera/clojure (str '(ns parcera.core + (:require [instaparse.core :as instaparse] + [clojure.data :as data] + [clojure.string :as str])))) + :os :runtime :verbose)) diff --git a/test/parcera/test/core.cljc b/test/parcera/test/core.cljc index e5fd363..766d31b 100644 --- a/test/parcera/test/core.cljc +++ b/test/parcera/test/core.cljc @@ -8,31 +8,35 @@ [instaparse.core :as instaparse] #?(:cljs [parcera.slurp :refer [slurp]]))) + (defn- roundtrip "checks parcera can parse and write back the exact same input code" [input] (= input (parcera/code (parcera/clojure input)))) + (defn- valid? [input] (not (instaparse/failure? (parcera/clojure input)))) + (defn- clear [input] - (= 1 (count (instaparse/parses parcera/clojure input)))) + (= 1 (count (instaparse/parses parcera/clojure input :unhide :all)))) + (def validity "The grammar definition of parcera is valid for any clojure value. Meaning that for any clojure value, parcera can create an AST for it" (prop/for-all [input (gen/fmap pr-str gen/any)] - (= false (instaparse/failure? (parcera/clojure input))))) + (valid? input))) (def symmetric "The read <-> write process of parcera MUST be symmetrical. Meaning that the AST and the text representation are equivalent" (prop/for-all [input (gen/fmap pr-str gen/any)] - (= input (parcera/code (parcera/clojure input))))) + (roundtrip input))) (def unambiguous @@ -40,18 +44,36 @@ that any input should (but must not) only have 1 AST representation ... however I have found this is not always possible" (prop/for-all [input (gen/fmap pr-str gen/any)] - (= 1 (count (instaparse/parses parcera/clojure input))))) + (clear input))) + (deftest simple (testing "character literals" - (as-> "\\t" input (is (= input (parcera/code (parcera/clojure input))))) - (as-> "\\n" input (is (= input (parcera/code (parcera/clojure input))))) - (as-> "\\r" input (is (= input (parcera/code (parcera/clojure input))))) - (as-> "\\a" input (is (= input (parcera/code (parcera/clojure input))))) - (as-> "\\é" input (is (= input (parcera/code (parcera/clojure input))))) - (as-> "\\ö" input (is (= input (parcera/code (parcera/clojure input))))) - (as-> "\\ï" input (is (= input (parcera/code (parcera/clojure input))))) - (as-> "\\ϕ" input (is (= input (parcera/code (parcera/clojure input))))))) + (as-> "\\t" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "\\n" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "\\r" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "\\a" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "\\é" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "\\ö" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "\\ï" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "\\ϕ" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))))) + (deftest data-structures (testing "grammar definitions" @@ -73,99 +95,218 @@ "high accuracy\n" (with-out-str (pprint/pprint result))))))) + (deftest unit-tests (testing "names" - (as-> "foo" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "foo-bar" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "foo->bar" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "->" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "->as" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "föl" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "Öl" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "ϕ" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "❤️" input (is (and (valid? input) (roundtrip input) (clear input)))))) + (as-> "foo" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "foo-bar" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "foo->bar" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "->" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "->as" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "föl" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "Öl" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "ϕ" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "❤️" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))))) + + +(deftest edge-cases + (testing "comments" + (as-> "{:hello ;2} + 2}" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))))) (deftest macros (testing "metadata" - (as-> "^String [a b 2]" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "^\"String\" [a b 2]" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "^:string [a b 2]" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "^{:a 1} [a b 2]" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "^:hello ^\"World\" ^{:a 1} [a b 2]" input (is (and (valid? input) (roundtrip input) (clear input))))) + (as-> "^String [a b 2]" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "^\"String\" [a b 2]" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "^:string [a b 2]" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "^{:a 1} [a b 2]" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "^:hello ^\"World\" ^{:a 1} [a b 2]" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "discard" - (as-> "#_[a b 2]" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "#_(a b 2)" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "#_{:a 1}" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "#_macros" input (is (and (valid? input) (roundtrip input) (clear input))))) + (as-> "#_[a b 2]" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "#_(a b 2)" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "#_{:a 1}" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "#_macros" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "regex" - (as-> "#_\"[a b 2]\"" input (is (and (valid? input) (roundtrip input) (clear input))))) + (as-> "#_\"[a b 2]\"" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "comments" - (as-> ";[a b 2]" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> ";; \"[a b 2]\"" input (is (and (valid? input) (roundtrip input) (clear input))))) + (as-> ";[a b 2]" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> ";; \"[a b 2]\"" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "2 ;[a b 2]" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> " :hello ;; \"[a b 2]\"" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "var quote" - (as-> "#'hello/world" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "#'/" input (is (and (valid? input) (roundtrip input) (clear input))))) + (as-> "#'hello/world" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "#'/" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "tag" - (as-> "#hello/world [1 a \"3\"]" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "#hello/world {1 \"3\"}" input (is (and (valid? input) (roundtrip input) (clear input))))) + (as-> "#hello/world [1 a \"3\"]" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "#hello/world {1 \"3\"}" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "keyword" - (as-> "::hello/world [1 a \"3\"]" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "::hello" input (is (and (valid? input) (roundtrip input) (clear input))))) + (as-> "::hello/world [1 a \"3\"]" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "::hello" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "quote" - (as-> "'hello/world" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "'hello" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "'/" input (is (and (valid? input) (roundtrip input) (clear input))))) + (as-> "'hello/world" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "'hello" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "'/" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "backtick" - (as-> "`hello/world" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "`hello" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "`/" input (is (and (valid? input) (roundtrip input) (clear input))))) + (as-> "`hello/world" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "`hello" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "`/" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "unquote" - (as-> "~hello/world" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "~(hello 2 3)" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "~/" input (is (and (valid? input) (roundtrip input) (clear input))))) + (as-> "~hello/world" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "~(hello 2 3)" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "~/" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "quote splicing" - (as-> "~@hello/world" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "~@(hello 2 b)" input (is (and (valid? input) (roundtrip input) (clear input))))) + (as-> "~@hello/world" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "~@(hello 2 b)" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "deref" - (as-> "@hello/world" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "@hello" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "@/" input (is (and (valid? input) (roundtrip input) (clear input))))) + (as-> "@hello/world" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "@hello" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "@/" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "anonymous function" - (as-> "#(= (str %1 %2 %&))" input (is (and (valid? input) (roundtrip input) (clear input))))) + (as-> "#(= (str %1 %2 %&))" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "namespaced map" - (as-> "#::{:a 1 b 3}" input (is (and (valid? input) (roundtrip input) (clear input)))) - (as-> "#::hello{:a 1 b 3}" input (is (and (valid? input) (roundtrip input) (clear input))))) + (as-> "#::{:a 1 b 3}" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (as-> "#::hello{:a 1 b 3}" input (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "reader conditional" (as-> "#?(:clj Double/NaN :cljs js/NaN :default nil)" input - (is (and (valid? input) (roundtrip input) (clear input)))) + (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) (as-> "[1 2 #?@(:clj [3 4] :cljs [5 6])]" input - (is (and (valid? input) (roundtrip input) (clear input)))))) + (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))))) (deftest bootstrap (testing "parcera should be able to parse itself" (let [input (slurp "./src/parcera/core.cljc")] - (is (and (valid? input) (roundtrip input) (clear input))))) + (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (let [input (slurp "./src/parcera/slurp.cljc")] + (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input))))) (testing "parcera should be able to parse its own test suite" (let [input (slurp "./test/parcera/test/core.cljc")] - (is (and (valid? input) (roundtrip input) (clear input)))))) + (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))) + (let [input (slurp "./test/parcera/test/benchmark.clj")] + (and (is (valid? input)) + (is (roundtrip input)) + (is (clear input)))))) (deftest clojure$cript