diff --git a/.gitignore b/.gitignore index 5bae9d9..0655512 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,12 @@ pom.xml.asc /.idea/ /nashorn_code_cache /.cljs_nashorn_repl +/build/ +/yarn-error.log +/node_modules/ +/out/ +/src/java/ +/src/javascript +/figwheel_server.log +package*.json +/.eastwood diff --git a/.travis.yml b/.travis.yml index 1d6d675..489935b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,16 +14,17 @@ jobs: include: - stage: Tests script: + - curl -O https://www.antlr.org/download/antlr-4.7.1-complete.jar + # generate java + - java -jar antlr-4.7.1-complete.jar -Xexact-output-dir -o src/java/parcera/antlr -package parcera.antlr -Dlanguage=Java -no-listener -no-visitor src/Clojure.g4 + # now we can actually proceed with clojure code - lein do clean, compile, check, eastwood - lein trampoline test - - nvm install 10.10 && nvm use 10.10 && lein trampoline cljsbuild test - - # only run the benchmark if we are trying to merge to master - # otherwise the build takes too long - - stage: Benchmark - if: branch = master - script: - lein trampoline test :benchmark + # todo - re-enable js + # generate javascript - todo + #- java -jar antlr-4.7.1-complete.jar -Xexact-output-dir -o src/javascript/parcera/antlr -package parcera.antlr -Dlanguage=JavaScript -no-listener -no-visitor src/Clojure.g4 + #- nvm install 10.10 && nvm use 10.10 && lein trampoline cljsbuild test - stage: Release if: tag IS present diff --git a/README.md b/README.md index 60ab704..e45ac6c 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Build Status](https://travis-ci.com/carocad/parcera.svg?branch=master)](https://travis-ci.com/carocad/parcera) [![Clojars Project](https://img.shields.io/clojars/v/carocad/parcera.svg)](https://clojars.org/carocad/parcera) -Grammar-based Clojure(script) parser. +Grammar-based Clojure parser. Parcera can safely read any Clojure file without any code evaluation. @@ -19,42 +19,33 @@ full explanation of the options available for a parser please visit Instaparse w [instaparse.core :as instaparse])) ;;parse clojure code from a string -(parcera/clojure (str '(ns parcera.core - (:require [instaparse.core :as instaparse] - [clojure.data :as data] - [clojure.string :as str])))) +(parcera/ast (str '(ns parcera.core + (:require [instaparse.core :as instaparse] + [clojure.data :as data] + [clojure.string :as str])))) ;; => returns a data structure with the result from the parser -[:code - [:list - [:symbol "ns"] - [:whitespace " "] - [:symbol "parcera.core"] - [:whitespace " "] - [:list - [:simple-keyword ":require"] - [:whitespace " "] - [:vector - [:symbol "instaparse.core"] - [:whitespace " "] - [:simple-keyword ":as"] - [:whitespace " "] - [:symbol "instaparse"]] - [:whitespace " "] - [:vector [:symbol "clojure.data"] [:whitespace " "] [:simple-keyword ":as"] [:whitespace " "] [:symbol "data"]] - [:whitespace " "] - [:vector [:symbol "clojure.string"] [:whitespace " "] [:simple-keyword ":as"] [:whitespace " "] [:symbol "str"]]]]] +(:code + (:list + (:symbol "ns") + (:whitespace " ") + (:symbol "parcera.core") + (:whitespace " ") + (:list + (:simple_keyword "require") + (:whitespace " ") + (:vector + (:symbol "instaparse.core") + (:whitespace " ") + (:simple_keyword "as") + (:whitespace " ") + (:symbol "instaparse")) + (:whitespace " ") + (:vector (:symbol "clojure.data") (:whitespace " ") (:simple_keyword "as") (:whitespace " ") (:symbol "data")) + (:whitespace " ") + (:vector (:symbol "clojure.string") (:whitespace " ") (:simple_keyword "as") (:whitespace " ") (:symbol "str"))))) ;; convert an AST back into a string (parcera/code [:symbol "ns"]) ;; "ns" ``` - -### notes -There are some restrictions as to how much can a parser do. In my experience, these restrictions -are related to some [semantic context-sensitivity](http://blog.reverberate.org/2013/09/ll-and-lr-in-context-why-parsing-tools.html). -which the Clojure reader has embedded into itself. In general I have found the following ones: -- `parcera` doesnt check that a map contains an even number of elements. This is specially difficult - to do since Clojure supports the discard macro `#_ form` which is a valid element but "doesnt count as one" -- `parcera` doesnt check if a map has repeated keys -- `parcera` doesnt check if a set has repeated elements diff --git a/pom.xml b/pom.xml index e78ceff..e9ec8cf 100644 --- a/pom.xml +++ b/pom.xml @@ -3,9 +3,9 @@ carocad parcera jar - 0.3.1 + 0.4.0 parcera - Grammar-based Clojure(script) parser + Grammar-based Clojure parser https://github.com/carocad/parcera @@ -17,10 +17,10 @@ https://github.com/carocad/parcera scm:git:git://github.com/carocad/parcera.git scm:git:ssh://git@github.com/carocad/parcera.git - b4ca5c659e55f00781e37bee1dc6bb400460e307 + 4ff04f242eddc4791cfdf2df572f91890c202e6c - src + src/clojure test @@ -28,13 +28,37 @@ + + target + resources target target/classes - + + + org.codehaus.mojo + build-helper-maven-plugin + 1.7 + + + add-source + generate-sources + + add-source + + + + src/javascript + src/java + + + + + + @@ -68,14 +92,9 @@ 1.10.1 - instaparse - instaparse - 1.4.10 - - - org.clojure - clojurescript - 1.10.520 + org.antlr + antlr4-runtime + 4.7.1 provided diff --git a/project.clj b/project.clj index b65b02b..cc778c7 100644 --- a/project.clj +++ b/project.clj @@ -1,26 +1,23 @@ -(defproject carocad/parcera "0.3.1" - :description "Grammar-based Clojure(script) parser" +(defproject carocad/parcera "0.4.0" + :description "Grammar-based Clojure parser" :url "https://github.com/carocad/parcera" :license {:name "LGPLv3" :url "https://github.com/carocad/parcera/blob/master/LICENSE.md"} - :dependencies [[org.clojure/clojure "1.10.1"] - [instaparse/instaparse "1.4.10"]] - :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark - [org.clojure/test.check "0.10.0"]] - :plugins [[jonase/eastwood "0.3.5"] - [lein-cljsbuild "1.1.7"]] - :cljsbuild {:builds - [{:id "dev" - :source-paths ["src" "test"] - :compiler {:main parcera.test-runner - :output-to "target/out/tests.js" - :target :nodejs - :optimizations :none}}] - :test-commands - {"test" ["node" "target/out/tests.js"]}}} - :provided {:dependencies [[org.clojure/clojurescript "1.10.520"]]}} + :source-paths ["src/clojure"] + :java-source-paths ["src/java"] + :dependencies [[org.clojure/clojure "1.10.1"]] + :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark + [org.clojure/test.check "0.10.0"]] ;; generative testing + :plugins [[jonase/eastwood "0.3.5"]] ;; linter + :resource-paths ["target"] + :clean-targets ^{:protect false} ["target"]} + ;; java reloader + ;[lein-virgil "0.1.9"]] + :provided {:dependencies [[org.antlr/antlr4-runtime "4.7.1"]]}} + :test-selectors {:default (fn [m] (not (some #{:benchmark} (keys m)))) :benchmark :benchmark} + :deploy-repositories [["clojars" {:url "https://clojars.org/repo" :username :env/clojars_username :password :env/clojars_password diff --git a/scripts/figwheel.clj b/scripts/figwheel.clj new file mode 100644 index 0000000..41fd686 --- /dev/null +++ b/scripts/figwheel.clj @@ -0,0 +1,2 @@ +(require '[figwheel.main.api :as fig]) +(fig/start "dev") diff --git a/src/Clojure.g4 b/src/Clojure.g4 new file mode 100644 index 0000000..a271a7f --- /dev/null +++ b/src/Clojure.g4 @@ -0,0 +1,152 @@ + +grammar Clojure; + +/* + * NOTES to myself and to other developers: + * + * - You have to remember that the parser cannot check for semantics + * - You have to find the right balance of dividing enforcement between the + * grammar and your own code. + * + * The parser should only check the syntax. So the rule of thumb is that when + * in doubt you let the parser pass the content up to your program. Then, in + * your program, you check the semantics and make sure that the rule actually + * have a proper meaning + * + * https://tomassetti.me/antlr-mega-tutorial/#lexers-and-parser +*/ + +code: form*; + +form: whitespace | literal | collection | reader_macro; + +// sets and namespaced map are not considerd collection from grammar perspective +// since they start with # -> dispatch macro +collection: list | vector | map; + +list: '(' form* ')'; + +vector: '[' form* ']'; + +map: '{' form* '}'; + +literal: keyword | string | number | character | symbol; + +keyword: simple_keyword | macro_keyword; + +// making symbols, simple and macro keywords be based on NAME allows to +// conform them all in the same way (see `conform` function) +simple_keyword: ':' NAME; + +macro_keyword: '::' NAME; + +string: STRING; + +number: NUMBER; + +character: CHARACTER; + +symbol: NAME; + +reader_macro: ( unquote + | metadata + | backtick + | quote + | dispatch + | unquote_splicing + | deref + ); + +unquote: '~' form; + +metadata: (metadata_entry whitespace?)+ ( symbol + | collection + | tag + | unquote + | unquote_splicing + ); + +metadata_entry: '^' ( map | symbol | string | keyword ); + +backtick: '`' form; + +quote: '\'' form; + +unquote_splicing: '~@' form; + +deref: '@' form; + +dispatch: function + | regex + | set + | conditional + | conditional_splicing + | namespaced_map + | var_quote + | discard + | tag + | symbolic; + +function: '#(' form* ')'; + +regex: '#' STRING; + +set: '#{' form* '}'; + +namespaced_map: '#' ( keyword | auto_resolve) map; + +auto_resolve: '::'; + +var_quote: '#\'' symbol; + +discard: '#_' form; + +tag: '#' symbol whitespace? (literal | collection); + +conditional: '#?(' form* ')'; + +conditional_splicing: '#?@(' form* ')'; + +symbolic: '##' ('Inf' | '-Inf' | 'NaN'); + +// whitespace or comment +whitespace: WHITESPACE; + +NUMBER: [+-]? DIGIT+ (DOUBLE_SUFFIX | LONG_SUFFIX | RATIO_SUFFIX); + +STRING: '"' ~["\\]* ('\\' . ~["\\]*)* '"'; + +WHITESPACE: (SPACE | COMMENT)+; + +COMMENT: ';' ~[\r\n]*; + +SPACE: [\r\n\t\f, ]+; + +CHARACTER: '\\' (UNICODE_CHAR | NAMED_CHAR | UNICODE); + +NAME: NAME_HEAD NAME_BODY*; + +fragment UNICODE_CHAR: ~[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF]; + +fragment NAMED_CHAR: 'newline' | 'return' | 'space' | 'tab' | 'formfeed' | 'backspace'; + +fragment UNICODE: 'u' [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F]; + +// re-allow :#' as valid characters inside the name itself +fragment NAME_BODY: NAME_HEAD | [:#'0-9]; + +// these is the set of characters that are allowed by all symbols and keywords +// however, this is more strict that necessary so that we can re-use it for both +fragment NAME_HEAD: ~[\r\n\t\f ()[\]{}"@~^;`\\,:#'0-9]; + +fragment DOUBLE_SUFFIX: ((('.' DIGIT*)? ([eE][-+]?DIGIT+)?) 'M'?); + +fragment LONG_SUFFIX: ('0'[xX]((DIGIT|[A-Fa-f])+) | + '0'([0-7]+) | + ([1-9]DIGIT?)[rR](DIGIT[a-zA-Z]+) | + '0'DIGIT+ + )?'N'?; + +fragment RATIO_SUFFIX: '/' DIGIT+; + +fragment DIGIT: [0-9]; diff --git a/src/clojure/parcera/antlr/java.clj b/src/clojure/parcera/antlr/java.clj new file mode 100644 index 0000000..525038d --- /dev/null +++ b/src/clojure/parcera/antlr/java.clj @@ -0,0 +1,99 @@ +(ns parcera.antlr.java + (:require [parcera.antlr.protocols :as antlr]) + (:import (parcera.antlr ClojureParser ClojureLexer) + (org.antlr.v4.runtime ParserRuleContext Token CommonTokenStream CharStreams ANTLRErrorListener Parser) + (org.antlr.v4.runtime.tree ErrorNodeImpl))) + +(set! *warn-on-reflection* true) + + +;; A custom Error Listener to avoid Antlr printing the errors on the terminal +;; by default. This is also useful to mimic Instaparse :total parse mechanism +;; such that if we get an error, we can report it as the result instead +(defrecord AntlrFailure [reports] + ANTLRErrorListener + ;; I am not sure how to use these methods. If you came here wondering why + ;; is this being printed, please open an issue so that we can all benefit + ;; from your findings ;) + (reportAmbiguity [this parser dfa start-index stop-index exact ambig-alts configs] + ;; TODO + (println "report ambiguity: " parser dfa start-index stop-index exact ambig-alts configs)) + (reportAttemptingFullContext [this parser dfa start-index stop-index conflicting-alts configs] + ;; TODO + (println "report attempting full context: " parser dfa start-index stop-index conflicting-alts configs)) + (reportContextSensitivity [this parser dfa start-index stop-index prediction configs] + ;; TODO + (println "report context sensitivity: " parser dfa start-index stop-index prediction configs)) + (syntaxError [this recognizer offending-symbol line char message error] + ;; recognizer is either clojureParser or clojureLexer + (let [report (merge {:row line + :column char + :message message + :type (if (instance? Parser recognizer) :parser :lexer)} + (when (instance? Parser recognizer) + {:symbol (str offending-symbol) + :stack (->> (.getRuleInvocationStack ^Parser recognizer) + (reverse) + (map keyword))}) + (when (some? error) + {:error error}))] + (vswap! reports conj report)))) + + +;; start and end are tokens not positions. +;; So '(hello/world)' has '(' 'hello/world' and ')' as tokens +(extend-type ParserRuleContext + antlr/ParserRule + (children [^ParserRuleContext this] (.-children this)) + (rule-index [^ParserRuleContext this] (.getRuleIndex this)) + antlr/LocationInfo + (span [^ParserRuleContext this] + (let [start (.getStart this) + stop (.getStop this)] + (cond + ;; happens when the parser rule is a single lexer rule + (= start stop) + {::start {:row (.getLine start) + :column (.getCharPositionInLine start)} + ::end {:row (.getLine start) + :column (.getStopIndex start)}} + + ;; no end found - happens on errors + (nil? stop) + {::start {:row (.getLine start) + :column (.getCharPositionInLine start)}} + + :else + {::start {:row (.getLine start) + :column (.getCharPositionInLine start)} + ::end {:row (.getLine stop) + :column (.getCharPositionInLine stop)}})))) + + +(extend-type ErrorNodeImpl + antlr/LocationInfo + (span [^ErrorNodeImpl this] + (let [token (.-symbol this)] + {::start {:row (.getLine token) + :column (.getCharPositionInLine token)}}))) + + +(extend-type ClojureParser + antlr/AntlrParser + (rules [^ClojureParser this] (into [] (map keyword) (.getRuleNames this))) + (tree [^ClojureParser this] (. this (code)))) + + +(defn parser + [input] + (let [listener (->AntlrFailure (volatile! ())) + chars (CharStreams/fromString input) + lexer (doto (new ClojureLexer chars) + (.removeErrorListeners) + (.addErrorListener listener)) + tokens (new CommonTokenStream lexer) + parser (doto (new ClojureParser tokens) + (.setBuildParseTree true) + (.removeErrorListeners) + (.addErrorListener listener))] + {:parser parser :errors {:parser listener}})) diff --git a/src/clojure/parcera/antlr/javascript.cljs b/src/clojure/parcera/antlr/javascript.cljs new file mode 100644 index 0000000..abb0afe --- /dev/null +++ b/src/clojure/parcera/antlr/javascript.cljs @@ -0,0 +1,50 @@ +(ns parcera.antlr.javascript + (:require [parcera.antlr.protocols :as antlr] + [antlr4 :refer [CharStreams CommonTokenStream]] + [parcera.antlr.ClojureLexer :refer [ClojureLexer]] + [parcera.antlr.ClojureParser :refer [ClojureParser]])) + +(set! *warn-on-infer* true) + + +#_(extend-type ParserRuleContext + antlr/ParserRule + (children [^ParserRuleContext this] (.-children this)) + (rule-index [^ParserRuleContext this] (.getRuleIndex this)) + (start [^ParserRuleContext this] (.getStart this)) + (end [^ParserRuleContext this] (.getStop this))) + + +#_(extend-type ErrorNodeImpl + antlr/ErrorNode + (token [^ErrorNodeImpl this] (.-symbol this))) + + +#_(extend-type Token + antlr/Token + (row [^Token this] (.getLine this)) + (column [^Token this] (.getCharPositionInLine this))) + + +#_(extend-type clojureParser + antlr/AntlrParser + (rules [^clojureParser this] (vec (.getRuleNames this))) + (tree [^clojureParser this] (. this (code)))) + + +(defn parser + [input] + {:parser input}) + +#_(defn parser + [input listener] + (let [chars (CharStreams/fromString input) + lexer (doto (new clojureLexer chars) + (.removeErrorListeners)) + ;; todo: how to handle lexer errors ? + ;(.addErrorListener listener)) + tokens (new CommonTokenStream lexer)] + (doto (new clojureParser tokens) + (.setBuildParseTree true) + (.removeErrorListeners) + (.addErrorListener listener)))) diff --git a/src/clojure/parcera/antlr/protocols.cljc b/src/clojure/parcera/antlr/protocols.cljc new file mode 100644 index 0000000..7acde2b --- /dev/null +++ b/src/clojure/parcera/antlr/protocols.cljc @@ -0,0 +1,21 @@ +(ns parcera.antlr.protocols + "These protocols are a cheat: I use them to be able to dispatch + to both Java and JavaScript parser implementations without the + common code having to know about it") + + +(defprotocol AntlrParser + (rules [this]) + (tree [this])) + + +(defprotocol ParserRule + (children [this]) + (rule-index [this])) + +(defprotocol LocationInfo + (span [this])) + + +(defprotocol ErrorNode + (token [this])) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc new file mode 100644 index 0000000..7e250f6 --- /dev/null +++ b/src/clojure/parcera/core.cljc @@ -0,0 +1,268 @@ +(ns parcera.core + (:require [parcera.antlr.protocols :as antlr] + #?(:clj [parcera.antlr.java :as platform])) + ; todo: re-enable once we have javscript support + ;:cljs [parcera.antlr.javascript :as platform])) + #?(:cljs (:import goog.string.StringBuffer))) + + +(def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} + :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" "`" "'" "~" + "~@" "@" "#(" "#'" "#_" "#?(" "#?@(" "##" ":" "::"}}) + + +;; for some reason cljs doesnt accept escaping the / characters +(def name-pattern #?(:clj #"^([^\s\/]+\/)?(\/|[^\s\/]+)$" + :cljs #"^([^\s/]+/)?(/|[^\s/]+)$")) + + +(defn- failure + "Checks that `rule` conforms to additional rules which are too difficult + to represent with pure Antlr4 syntax" + [rule children metadata] + (case rule + (:symbol :simple_keyword :macro_keyword) + (when (nil? (re-find name-pattern (first children))) + (with-meta (list ::failure (cons rule children)) + (assoc-in metadata [::start :message] + (str "name cannot contain more than one /")))) + + :map + (let [forms (remove (comp #{:whitespace :discard} first) children)] + (when (odd? (count forms)) + (with-meta (list ::failure (cons rule children)) + (assoc-in metadata [::start :message] + "Map literal must contain an even number of forms")))) + + :set + (let [forms (remove (comp #{:whitespace :discard} first) children) + set-length (count forms) + unique-length (count (distinct forms))] + (when (not= set-length unique-length) + (with-meta (list ::failure (cons rule children)) + (assoc-in metadata [::start :message] + "Set literal contains duplicate forms")))) + + nil)) + + +(defn- hiccup + "transforms the tree `hiccup-like` ast data structure. + + Yields a lazy sequence to avoid expensive computation whenever + the user is not interested in the full content." + [tree rule-names hide-tags hide-literals] + (cond + (boolean (satisfies? antlr/ParserRule tree)) + (let [rule (get rule-names (antlr/rule-index tree)) + children (for [child (antlr/children tree) + :let [child (hiccup child rule-names hide-tags hide-literals)] + :when (not (nil? child))] + child) + ;; attach meta data ... ala instaparse + ast-meta (antlr/span tree) + ;; extra validation rules + fail (failure rule children ast-meta)] + ;; parcera hidden tags are always "or" statements, so just take the single children + (if (contains? hide-tags rule) + (first children) + (or fail (with-meta (cons rule children) ast-meta)))) + + (boolean (satisfies? antlr/ErrorNode tree)) + (with-meta (list ::failure (str tree)) + (antlr/span tree)) + + :else + (let [text (str tree)] + (if (contains? hide-literals text) nil text)))) + + +(defn- unhide + [options] + (case (:unhide options) + :all (dissoc default-hidden :literals :tags) + :content (dissoc default-hidden :literals) + :tags (dissoc default-hidden :tags) + default-hidden)) + + +(defn ast + "Clojure (antlr4) parser. It can be used as: + - `(parcera/ast input-string)` + -> returns a lazy AST representation of input-string + + The following options are accepted: + - `:unhide` can be one of `#{:tags :content :all}`. Defaults to `nil` + + NOTE: Antlr returns a fully parsed version of the provided input string + however this function returns a lazy sequence in order to expose + those through Clojure's immutable data structures" + [input & {:as options}] + (let [hidden (unhide options) + {:keys [parser errors]} (platform/parser input) + rule-names (antlr/rules parser) + tree (antlr/tree parser) + result (hiccup tree rule-names (:tags hidden) (:literals hidden)) + reports @(:reports (:parser errors))] + (vary-meta result assoc ::errors reports))) + + +(defn- code* + "internal function used to imperatively build up the code from the provided + AST as Clojure's str would be too slow" + [ast #?(:clj ^StringBuilder string-builder + :cljs ^StringBuffer string-builder)] + (case (first ast) + :code + (doseq [child (rest ast)] + (code* child string-builder)) + + :list + (do (. string-builder (append "(")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append ")"))) + + :vector + (do (. string-builder (append "[")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append "]"))) + + :namespaced_map + (do (. string-builder (append "#")) + (doseq [child (rest ast)] (code* child string-builder))) + + :map + (do (. string-builder (append "{")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append "}"))) + + :set + (do (. string-builder (append "#{")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append "}"))) + + (:number :whitespace :symbol :character :string) + (. string-builder (append (second ast))) + + :symbolic + (do (. string-builder (append "##")) + (. string-builder (append (second ast)))) + + :regex + (do (. string-builder (append "#")) + (. string-builder (append (second ast)))) + + :auto_resolve + (. string-builder (append "::")) + + :simple_keyword + (do (. string-builder (append ":")) + (. string-builder (append (second ast)))) + + :macro_keyword + (do (. string-builder (append "::")) + (. string-builder (append (second ast)))) + + :metadata + (do (doseq [child (rest (butlast ast))] (code* child string-builder)) + (code* (last ast) string-builder)) + + :metadata_entry + (doseq [child (rest ast)] + (. string-builder (append "^")) + (code* child string-builder)) + + :quote + (do (. string-builder (append "'")) + (doseq [child (rest ast)] (code* child string-builder))) + + :var_quote + (do (. string-builder (append "#'")) + (code* (second ast) string-builder)) + + :discard + (do (. string-builder (append "#_")) + (doseq [child (rest ast)] (code* child string-builder))) + + :tag + (do (. string-builder (append "#")) + (doseq [child (rest ast)] (code* child string-builder))) + + :backtick + (do (. string-builder (append "`")) + (doseq [child (rest ast)] (code* child string-builder))) + + :unquote + (do (. string-builder (append "~")) + (doseq [child (rest ast)] (code* child string-builder))) + + :unquote_splicing + (do (. string-builder (append "~@")) + (doseq [child (rest ast)] (code* child string-builder))) + + :conditional + (do (. string-builder (append "#?(")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append ")"))) + + :conditional_splicing + (do (. string-builder (append "#?@(")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append ")"))) + + :deref + (do (. string-builder (append "@")) + (doseq [child (rest ast)] (code* child string-builder))) + + :function + (do (. string-builder (append "#(")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append ")"))))) + + +(defn code + "Transforms your AST back into code + + ast: The nested sequence of [:keyword & content] which MUST follow the + same structure as the result of `(parcera/clojure input-string)` + + Returns a string representation of the provided AST + + In general (= input (parcera/code (parcera/clojure input)))" + [ast] + (let [string-builder #?(:clj (new StringBuilder) + :cljs (new StringBuffer))] + (code* ast string-builder) + (. string-builder (toString)))) + + +(defn failure? + "Checks if ast contains any `::failure` instances. + + NOTE: This function is potentially slow since it might have to check the + complete ast to be sure that there are no failures. + + Whenever possible, prefer to handle errors directly appearing in the ast" + [ast] + (or + ;; ast is root node + (not (empty? (::errors (meta ast)))) + ;; ast is child node + (and (seq? ast) (= ::failure (first ast))) + ;; ast is root node but "doesnt know" about the failure -> conformed + (some #{::failure} (filter keyword? (tree-seq seq? identity ast))))) + +#_(time (ast (str '(ns parcera.core + (:require [instaparse.core :as instaparse] + [clojure.data :as data] + [clojure.string :as str]))))) + +#_(time (ast "(ns parcera.core + (:require [instaparse.core :as #{:hello \"world\" :hello}] + [clojure.data :as data] + [clojure.string :as str])")) + +#_(filter :meta (map #(hash-map :item % :meta (meta %)) + (tree-seq seq? seq (ast " + (ns + parcera.core))")))) diff --git a/src/parcera/slurp.cljc b/src/clojure/parcera/slurp.cljc similarity index 100% rename from src/parcera/slurp.cljc rename to src/clojure/parcera/slurp.cljc diff --git a/src/parcera/core.cljc b/src/parcera/core.cljc deleted file mode 100644 index 88cca67..0000000 --- a/src/parcera/core.cljc +++ /dev/null @@ -1,262 +0,0 @@ -(ns parcera.core - (:require [instaparse.core :as instaparse] - [instaparse.combinators-source :as combi] - [instaparse.cfg :as cfg] - [parcera.terminals :as terminal]) - #?(:cljs (:import goog.string.StringBuffer))) - -; todo: implement advices from -; http://blog.reverberate.org/2013/09/ll-and-lr-in-context-why-parsing-tools.html -; https://www.loggly.com/blog/regexes-the-bad-better-best/ -; https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/ - -; todo: use advices in https://medium.appbase.io/analyzing-20k-github-repositories-af76de21c3fc -; to check if the heuristics are accurate - -; NOTE: Through my experiments I found out that Instaparse will gladly take the -; first match as long as the grammar is not ambiguous. Therefore I switched the -; unordered OR (|) with an ordered one (/). This of course implies an heuristic -; of knowing which grammar rules are expected to match more often. I use -; Clojure's core as a reference with the following code snippet -#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojure/master/src/clj/clojure/core.clj")] - (time (sort-by second > (frequencies (filter keyword? (flatten (clojure core-content :optimize :memory))))))) -#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc")] - (time (sort-by second > (frequencies (filter keyword? (flatten (clojure core-content :optimize :memory))))))) -; todo: performance of [,\s]*;.*|[,\s]+ for whitespace -(def grammar-rules - "code: form*; - -
: whitespace / literal / collection / reader-macro; - - (* we treat comments the same way as commas *) - whitespace = #'([,\\s]*;.*)?([,\\s]+|$)'; - - (* for parsing purposes we dont consider a Set a collection since it starts - with # -> dispatch macro *) - : list / vector / map; - - list: <'('> form* <')'> ; - - vector: <'['> form* <']'> ; - - map: <'{'> form* <'}'>; - - (* a literal is basically anything that is not a collection, macro or whitespace *) - : ( symbol - / keyword - / string - / number - / character - ); - - : simple-keyword / macro-keyword ; - - : ( unquote - / metadata - / backtick - / quote - / dispatch - / unquote-splicing - / deref - / symbolic - ); - - set: <'#{'> form* <'}'>; - - namespaced-map: <'#'> ( keyword / auto-resolve ) map; - - auto-resolve: '::'; - - metadata: (metadata-entry whitespace)+ ( symbol - / collection - / tag - / unquote - / unquote-splicing - ); - - metadata-entry: <'^'> ( map / symbol / string / keyword ); - - quote: <'\\''> form; - - backtick: <'`'> form; - - unquote: <#'~(?!@)'> form; - - unquote-splicing: <'~@'> form; - - deref: <'@'> form; - - : function - / regex - / set - / conditional - / conditional-splicing - / namespaced-map - / var-quote - / discard - / tag; - - function: <'#('> form* <')'>; - - var-quote: <'#\\''> symbol; - - discard: <'#_'> form; - - tag: <#'#(?![_?])'> symbol whitespace? (literal / collection); - - conditional: <'#?('> form* <')'>; - - conditional-splicing: <'#?@('> form* <')'>; - - symbolic: #'##(Inf|-Inf|NaN)'") - - -(def grammar-terminals - {:character (combi/regexp terminal/character-pattern) - :string (combi/regexp terminal/string-pattern) - :symbol (combi/regexp terminal/symbol-pattern) - :number (combi/regexp terminal/number-pattern) - :macro-keyword (combi/regexp terminal/macro-keyword) - :simple-keyword (combi/regexp terminal/simple-keyword) - :regex (combi/regexp terminal/regex-pattern)}) - - -(def grammar (merge (cfg/ebnf grammar-rules) grammar-terminals)) - - -(def clojure - "Clojure (instaparse) parser. It can be used as: - - (parcera/clojure input-string) - -> returns an AST representation of input-string - - (instaparse/parse parcera/clojure input-string) - -> same as above but more explicit - - (instaparse/parses parcera/clojure input-string) - -> returns a sequence of possible AST representations in case of ambiguity - in input-string - - For a description of all possible options, visit Instaparse's official - documentation: https://github.com/Engelberg/instaparse#reference" - (instaparse/parser grammar :start :code)) - - -(defn- code* - "internal function used to imperatively build up the code from the provided - AST as Clojure's str would be too slow" - [ast #?(:clj ^StringBuilder string-builder - :cljs ^StringBuffer string-builder)] - (case (first ast) - :code - (doseq [child (rest ast)] - (code* child string-builder)) - - :list - (do (. string-builder (append "(")) - (doseq [child (rest ast)] (code* child string-builder)) - (. string-builder (append ")"))) - - :vector - (do (. string-builder (append "[")) - (doseq [child (rest ast)] (code* child string-builder)) - (. string-builder (append "]"))) - - :namespaced-map - (do (. string-builder (append "#")) - (doseq [child (rest ast)] (code* child string-builder))) - - :map - (do (. string-builder (append "{")) - (doseq [child (rest ast)] (code* child string-builder)) - (. string-builder (append "}"))) - - :set - (do (. string-builder (append "#{")) - (doseq [child (rest ast)] (code* child string-builder)) - (. string-builder (append "}"))) - - (:number :whitespace :symbolic :auto-resolve :symbol :simple-keyword - :macro-keyword :character :string :regex) - (. string-builder (append (second ast))) - - :metadata - (do (doseq [child (rest (butlast ast))] (code* child string-builder)) - (code* (last ast) string-builder)) - - :metadata-entry - (doseq [child (rest ast)] - (. string-builder (append "^")) - (code* child string-builder)) - - :quote - (do (. string-builder (append "'")) - (doseq [child (rest ast)] (code* child string-builder))) - - :var-quote - (do (. string-builder (append "#'")) - (code* (second ast) string-builder)) - - :discard - (do (. string-builder (append "#_")) - (doseq [child (rest ast)] (code* child string-builder))) - - :tag - (do (. string-builder (append "#")) - (doseq [child (rest ast)] (code* child string-builder))) - - :backtick - (do (. string-builder (append "`")) - (doseq [child (rest ast)] (code* child string-builder))) - - :unquote - (do (. string-builder (append "~")) - (doseq [child (rest ast)] (code* child string-builder))) - - :unquote-splicing - (do (. string-builder (append "~@")) - (doseq [child (rest ast)] (code* child string-builder))) - - :conditional - (do (. string-builder (append "#?(")) - (doseq [child (rest ast)] (code* child string-builder)) - (. string-builder (append ")"))) - - :conditional-splicing - (do (. string-builder (append "#?@(")) - (doseq [child (rest ast)] (code* child string-builder)) - (. string-builder (append ")"))) - - :deref - (do (. string-builder (append "@")) - (doseq [child (rest ast)] (code* child string-builder))) - - :function - (do (. string-builder (append "#(")) - (doseq [child (rest ast)] (code* child string-builder)) - (. string-builder (append ")"))))) - - -(defn code - "Transforms your AST back into code - - ast: The nested sequence of [:keyword & content] which MUST follow the - same structure as the result of `(parcera/clojure input-string)` - - Returns a string representation of the provided AST - - In general (= input (parcera/code (parcera/clojure input)))" - [ast] - (let [string-builder #?(:clj (new StringBuilder) - :cljs (new StringBuffer))] - (code* ast string-builder) - (. string-builder (toString)))) - -; Successful parse. -; Profile: {:create-node 384, :push-full-listener 2, :push-stack 384, -; :push-listener 382, :push-result 227, :push-message 227 } -; "Elapsed time: 47.25084 msecs" -#_(time (clojure (str '(ns parcera.core - (:require [instaparse.core :as instaparse] - [clojure.data :as data] - [clojure.string :as str]))) - :trace true)) - -#_(instaparse/disable-tracing!) diff --git a/src/parcera/terminals.cljc b/src/parcera/terminals.cljc deleted file mode 100644 index b9e3c69..0000000 --- a/src/parcera/terminals.cljc +++ /dev/null @@ -1,44 +0,0 @@ -(ns parcera.terminals - "Clojure symbols, keywords, numbers and string/regex share quite a lot - of matching logic. This namespace is aimed towards clearly identifying - those pieces and share them among the different definitions to - avoid recurring issues") - -;; Clojure's reader is quite permissive so we follow the motto -;; "if it is not forbidden, it is allowed" -(def not-allowed "\\s\\(\\)\\[\\]{}\"@~\\^;`\\\\\\/,") -(def allowed-characters (str "[^" not-allowed "]*")) -(def not-number "(?![+-]?\\d+)") -(def symbol-end "(?=[\\s\"()\\[\\]{},]|$)") - -(defn- name-pattern - [restriction] - (let [first-character (str "[^" restriction not-allowed "]")] - (str "(" first-character allowed-characters "\\/)?" - "(\\/|(" first-character allowed-characters "))" - symbol-end))) - - -(def symbol-pattern (str not-number (name-pattern ":#\\'"))) -(def simple-keyword (str ":" (name-pattern ":"))) -(def macro-keyword (str "::" (name-pattern ":"))) - - -(def double-suffix "(((\\.\\d*)?([eE][-+]?\\d+)?)M?)") -(def long-suffix "((0[xX]([\\dA-Fa-f]+)|0([0-7]+)|([1-9]\\d?)[rR]([\\d\\w]+)|0\\d+)?N?)") -(def ratio-suffix "(\\/(\\d+))") -(def number-pattern (str "[+-]?\\d+(" long-suffix "|" double-suffix "|" ratio-suffix ")(?![\\.\\/])")) ; todo: word boundary ? - - -; This is supposed to be the JavaScript friendly version of #'\P{M}\p{M}*+' -; mentioned here: https://www.regular-expressions.info/unicode.html -; It's cooked by this generator: http://kourge.net/projects/regexp-unicode-block -; ticking all 'Combining Diacritical Marks' boxes *)) -(def unicode-char "([^\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF])") -(def named-char "(newline|return|space|tab|formfeed|backspace)") -(def unicode "(u[\\dD-Fd-f]{4})") -(def character-pattern (str "\\\\(" unicode-char "|" named-char "|" unicode ")(?!\\w+)")) - - -(def string-pattern "\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"") -(def regex-pattern (str "#" string-pattern)) diff --git a/test/parcera/test/benchmark.clj b/test/parcera/test/benchmark.clj index 4bb385f..5595409 100644 --- a/test/parcera/test/benchmark.clj +++ b/test/parcera/test/benchmark.clj @@ -6,25 +6,27 @@ [parcera.core :as parcera])) (deftest ^:benchmark parsing - (println "Benchmark: Time parsing Clojure values ⌛") + (newline) + (newline) + (println "Benchmark: Parsing automatically generated values") (criterium/quick-bench (tc/quick-check 30 pt/validity) - :os :runtime :verbose)) - -(deftest ^:benchmark roundtrip + :os :runtime :verbose) (newline) (newline) - (println "Benchmark: Round trip of Clojure values 🚀") + (println "Benchmark: Round trip of automatically generated values") (criterium/quick-bench (tc/quick-check 30 pt/symmetric) :os :runtime :verbose)) -;; execute last ... hopefully -(deftest ^:benchmark z-known-namespace - (newline) - (newline) - (println "Benchmark: Parsing parcera namespace with traces 👮") - (criterium/quick-bench (parcera/clojure (str '(ns parcera.core - (:require [instaparse.core :as instaparse] - [clojure.data :as data] - [clojure.string :as str])))) - :os :runtime :verbose)) +(deftest ^:benchmark clojure.core-roundtrip + (let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojure/master/src/clj/clojure/core.clj")] + (newline) + (newline) + (println "Benchmark: Parsing Clojure's core namespace 🚧") + (criterium/quick-bench (parcera/ast core-content :optimize :memory) + :os :runtime :verbose) + (newline) + (newline) + (println "Benchmark: Rountrip Clojure's core namespace 🚧") + (criterium/quick-bench (parcera/code (parcera/ast core-content :optimize :memory)) + :os :runtime :verbose))) diff --git a/test/parcera/test/core.cljc b/test/parcera/test/core.cljc index 0ae9324..079828c 100644 --- a/test/parcera/test/core.cljc +++ b/test/parcera/test/core.cljc @@ -5,24 +5,24 @@ [clojure.test.check.properties :as prop] [clojure.test.check :as tc] [parcera.core :as parcera] - [instaparse.core :as instaparse] #?(:cljs [parcera.slurp :refer [slurp]]))) (defn- roundtrip "checks parcera can parse and write back the exact same input code" [input] - (= input (parcera/code (parcera/clojure input)))) + (= input (parcera/code (parcera/ast input)))) (defn- valid? [input] - (not (instaparse/failure? (parcera/clojure input)))) + (not (parcera/failure? (parcera/ast input)))) -(defn- clear - [input] - (= 1 (count (instaparse/parses parcera/clojure input :unhide :all)))) +;; todo: is this even possible with antlr ? 🤔 +#_(defn- clear + [input] + (= 1 (count (instaparse/parses parcera/ast input :unhide :all)))) (def validity @@ -39,40 +39,40 @@ (roundtrip input))) -(def unambiguous - "The process of parsing clojure code yields consistent results. Meaning +#_(def unambiguous + "The process of parsing clojure code yields consistent results. Meaning that any input should (but must not) only have 1 AST representation ... however I have found this is not always possible" - (prop/for-all [input (gen/fmap pr-str gen/any)] - (clear input))) + (prop/for-all [input (gen/fmap pr-str gen/any)] + (clear input))) (deftest simple (testing "character literals" (as-> "\\t" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "\\n" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "\\r" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "\\a" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "\\é" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "\\ö" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "\\ï" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "\\ϕ" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))))) + (is (roundtrip input)))))) +;(is (clear input)))))) (deftest data-structures @@ -88,237 +88,240 @@ (str "read <-> write process yield different result. Failed at\n" (with-out-str (pprint/pprint result)))))) - (testing "very little ambiguity" - (let [result (tc/quick-check 200 unambiguous)] - (is (:pass? result) - (str "high ambiguity case found. Please check the grammar to ensure " - "high accuracy\n" - (with-out-str (pprint/pprint result))))))) + #_(testing "very little ambiguity" + (let [result (tc/quick-check 200 unambiguous)] + (is (:pass? result) + (str "high ambiguity case found. Please check the grammar to ensure " + "high accuracy\n" + (with-out-str (pprint/pprint result))))))) (deftest unit-tests (testing "names" (as-> "foo" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "foo-bar" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "foo->bar" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "->" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "->as" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "föl" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "Öl" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "ϕ" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "❤️" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))))) + (is (roundtrip input)))))) +;(is (clear input)))))) (deftest edge-cases (testing "comments" (as-> "{:hello ;2} 2}" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "symbols" (as-> "hello/world/" input (is (not (valid? input)))) (as-> ":hello/world/" input (is (not (valid? input)))) - (as-> "::hello/world/" input (is (not (valid? input)))))) + (as-> "::hello/world/" input (is (not (valid? input))))) + + (testing "strings" + (as-> "hello \"world" input (is (not (valid? input)))))) (deftest macros (testing "metadata" (as-> "^String [a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "^\"String\" [a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "^:string [a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "^{:a 1} [a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "^:hello ^\"World\" ^{:a 1} [a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "discard" (as-> "#_[a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "#_(a b 2)" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "#_{:a 1}" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "#_macros" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "regex" (as-> "#_\"[a b 2]\"" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "comments" (as-> ";[a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> ";; \"[a b 2]\"" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "2 ;[a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> " :hello ;; \"[a b 2]\"" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "var quote" (as-> "#'hello/world" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "#'/" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "tag" (as-> "#hello/world [1 a \"3\"]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "#hello/world {1 \"3\"}" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "keyword" (as-> "::hello/world [1 a \"3\"]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "::hello" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "quote" (as-> "'hello/world" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "'hello" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "'/" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "backtick" (as-> "`hello/world" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "`hello" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "`/" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "unquote" (as-> "~hello/world" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "~(hello 2 3)" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "~/" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "quote splicing" (as-> "~@hello/world" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "~@(hello 2 b)" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "deref" (as-> "@hello/world" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "@hello" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "@/" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "anonymous function" (as-> "#(= (str %1 %2 %&))" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "namespaced map" (as-> "#::{:a 1 b 3}" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "#::hello{:a 1 b 3}" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "reader conditional" (as-> "#?(:clj Double/NaN :cljs js/NaN :default nil)" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "[1 2 #?@(:clj [3 4] :cljs [5 6])]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))))) + (is (roundtrip input)))))) +;(is (clear input)))))) (deftest bootstrap (testing "parcera should be able to parse itself" - (let [input (slurp "./src/parcera/core.cljc")] + (let [input (slurp "./src/clojure/parcera/core.cljc")] (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) - (let [input (slurp "./src/parcera/slurp.cljc")] + (is (roundtrip input)))) + ;(is (clear input)))) + (let [input (slurp "./src/clojure/parcera/slurp.cljc")] (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "parcera should be able to parse its own test suite" (let [input (slurp "./test/parcera/test/core.cljc")] (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (let [input (slurp "./test/parcera/test/benchmark.clj")] (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))))) + (is (roundtrip input)))))) +;(is (clear input)))))) (deftest clojure$cript (testing "parcera should be able to parse clojure core" (let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojure/master/src/clj/clojure/core.clj")] - (time (is (= core-content (parcera/code (parcera/clojure core-content :optimize :memory))))))) + (time (is (= core-content (parcera/code (parcera/ast core-content :optimize :memory))))))) (testing "parcera should be able to parse clojurescript core" (let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc")] - (time (is (= core-content (parcera/code (parcera/clojure core-content :optimize :memory)))))))) + (time (is (= core-content (parcera/code (parcera/ast core-content :optimize :memory))))))))