From 0c912b21194386cad874a7239b2027f8f9d7b9a4 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 16:31:12 +0100 Subject: [PATCH 001/128] first draft of clojure grammar --- resources/clojure.g4 | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 resources/clojure.g4 diff --git a/resources/clojure.g4 b/resources/clojure.g4 new file mode 100644 index 0000000..930fd04 --- /dev/null +++ b/resources/clojure.g4 @@ -0,0 +1,23 @@ + +grammar clojure; + +code: form*; + +form: list | symbol | string | whitespace; + +list: '(' form* ')'; + +symbol: (NAME '/')? NAME; + +string: STRING; + +whitespace: WHITESPACE; + +STRING : '"' ( ~'"' | '\\' '"' )* '"' ; + +NAME: ~[\r\n\t\f()[\]{}@~^;`\\/, ]; + +// whitespace or comment +WHITESPACE: SPACE+ | (SPACE* ';' SPACE); + +fragment SPACE: [\r\n\t\f ]; From d38ac7d9db62aceca164830645c3eff86058167e Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 16:48:02 +0100 Subject: [PATCH 002/128] refactored fragments to be more restrictive --- resources/clojure.g4 | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 930fd04..c9b7480 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -7,7 +7,7 @@ form: list | symbol | string | whitespace; list: '(' form* ')'; -symbol: (NAME '/')? NAME; +symbol: (SYMBOL_NAME '/')? SYMBOL_NAME; string: STRING; @@ -15,9 +15,14 @@ whitespace: WHITESPACE; STRING : '"' ( ~'"' | '\\' '"' )* '"' ; -NAME: ~[\r\n\t\f()[\]{}@~^;`\\/, ]; +// re-allow :#' as valid characters in a name +SYMBOL_NAME: NAME (NAME | [:#'])+; // whitespace or comment WHITESPACE: SPACE+ | (SPACE* ';' SPACE); fragment SPACE: [\r\n\t\f ]; + +// these is the set of characters that are allowed by all symbols and keywords +// however, this is more strict that necessary so that we can re-use it for both +fragment NAME: ~[\r\n\t\f()[\]{}@~^;`\\/, :#']; From 6bacee4c0e11ad639123305478e32568ddfda08b Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 16:48:18 +0100 Subject: [PATCH 003/128] fix: allow anything inside comments --- resources/clojure.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index c9b7480..521d66c 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -19,7 +19,7 @@ STRING : '"' ( ~'"' | '\\' '"' )* '"' ; SYMBOL_NAME: NAME (NAME | [:#'])+; // whitespace or comment -WHITESPACE: SPACE+ | (SPACE* ';' SPACE); +WHITESPACE: SPACE+ | (SPACE* ';' ~[\r\n]* SPACE*); fragment SPACE: [\r\n\t\f ]; From 04316203762b1d311afeeafb6b32f8c13813124e Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 17:03:31 +0100 Subject: [PATCH 004/128] refactored names to reuse more functionality --- resources/clojure.g4 | 22 +++++++++++++++------- src/parcera/terminals.cljc | 2 ++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 521d66c..0cec6f6 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -3,26 +3,34 @@ grammar clojure; code: form*; -form: list | symbol | string | whitespace; +form: list | symbol | keyword | string | whitespace; list: '(' form* ')'; -symbol: (SYMBOL_NAME '/')? SYMBOL_NAME; +symbol: NAME; -string: STRING; +keyword: simple_keyword | macro_keyword; + +simple_keyword: ':' NAME; + +macro_keyword: '::' NAME; + +string: '"' ( ~'"' | '\\' '"' )* '"'; whitespace: WHITESPACE; -STRING : '"' ( ~'"' | '\\' '"' )* '"' ; +NAME: (SIMPLE_NAME '/')? SIMPLE_NAME; -// re-allow :#' as valid characters in a name -SYMBOL_NAME: NAME (NAME | [:#'])+; +SIMPLE_NAME: NAME_HEAD NAME_BODY+; // whitespace or comment WHITESPACE: SPACE+ | (SPACE* ';' ~[\r\n]* SPACE*); fragment SPACE: [\r\n\t\f ]; +// re-allow :#' as valid characters inside the name itself +fragment NAME_BODY: NAME_HEAD | [:#']; + // these is the set of characters that are allowed by all symbols and keywords // however, this is more strict that necessary so that we can re-use it for both -fragment NAME: ~[\r\n\t\f()[\]{}@~^;`\\/, :#']; +fragment NAME_HEAD: ~[\r\n\t\f()[\]{}@~^;`\\/, :#']; diff --git a/src/parcera/terminals.cljc b/src/parcera/terminals.cljc index b9e3c69..749d0ca 100644 --- a/src/parcera/terminals.cljc +++ b/src/parcera/terminals.cljc @@ -42,3 +42,5 @@ (def string-pattern "\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"") (def regex-pattern (str "#" string-pattern)) + +:h:elo/world From 64b1aa89ecaf21a53c33909bc4c9ffedfcf5c0b4 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 17:37:26 +0100 Subject: [PATCH 005/128] cosmetic change --- resources/clojure.g4 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 0cec6f6..df20545 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -24,7 +24,9 @@ NAME: (SIMPLE_NAME '/')? SIMPLE_NAME; SIMPLE_NAME: NAME_HEAD NAME_BODY+; // whitespace or comment -WHITESPACE: SPACE+ | (SPACE* ';' ~[\r\n]* SPACE*); +WHITESPACE: SPACE+ | (SPACE* COMMENT SPACE*); + +fragment COMMENT: ';' ~[\r\n]*; fragment SPACE: [\r\n\t\f ]; From 0e943bb9189f0b409068de743fbba2dfb90d3cae Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 17:40:47 +0100 Subject: [PATCH 006/128] cosmetic change --- resources/clojure.g4 | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index df20545..69f2741 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -17,22 +17,20 @@ macro_keyword: '::' NAME; string: '"' ( ~'"' | '\\' '"' )* '"'; -whitespace: WHITESPACE; +// whitespace or comment +whitespace: SPACE+ | (SPACE* COMMENT SPACE*); NAME: (SIMPLE_NAME '/')? SIMPLE_NAME; SIMPLE_NAME: NAME_HEAD NAME_BODY+; -// whitespace or comment -WHITESPACE: SPACE+ | (SPACE* COMMENT SPACE*); - -fragment COMMENT: ';' ~[\r\n]*; +COMMENT: ';' ~[\r\n]*; -fragment SPACE: [\r\n\t\f ]; +SPACE: [\r\n\t\f ]; // re-allow :#' as valid characters inside the name itself fragment NAME_BODY: NAME_HEAD | [:#']; // these is the set of characters that are allowed by all symbols and keywords // however, this is more strict that necessary so that we can re-use it for both -fragment NAME_HEAD: ~[\r\n\t\f()[\]{}@~^;`\\/, :#']; +fragment NAME_HEAD: ~[\r\n\t\f()[\]{}"@~^;`\\/, :#']; From d2bb9015648aef2b91c76d0c7d4ca99bf45e0afc Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 17:48:20 +0100 Subject: [PATCH 007/128] fix: body can be empty fix: / is a valid symbol --- resources/clojure.g4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 69f2741..096eeaf 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -20,9 +20,9 @@ string: '"' ( ~'"' | '\\' '"' )* '"'; // whitespace or comment whitespace: SPACE+ | (SPACE* COMMENT SPACE*); -NAME: (SIMPLE_NAME '/')? SIMPLE_NAME; +NAME: (SIMPLE_NAME '/')? ('/' | SIMPLE_NAME ); -SIMPLE_NAME: NAME_HEAD NAME_BODY+; +SIMPLE_NAME: NAME_HEAD NAME_BODY*; COMMENT: ';' ~[\r\n]*; From 3dc11adb2290efe0dacd0f7521929ebde89c5b0f Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 17:53:12 +0100 Subject: [PATCH 008/128] cosmetics --- resources/clojure.g4 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 096eeaf..94aea49 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -3,10 +3,14 @@ grammar clojure; code: form*; -form: list | symbol | keyword | string | whitespace; +form: whitespace | literal | collection; + +collection: list; list: '(' form* ')'; +literal: symbol | keyword | string ;//| number | character; + symbol: NAME; keyword: simple_keyword | macro_keyword; From c3454c78486badf2e1cd6ce8f0d48f2d203e126a Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 18:01:08 +0100 Subject: [PATCH 009/128] added basic number added collections --- resources/clojure.g4 | 14 +++++++++++--- src/parcera/terminals.cljc | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 94aea49..d539aad 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -5,11 +5,17 @@ code: form*; form: whitespace | literal | collection; -collection: list; +collection: list | vector | map; -list: '(' form* ')'; +list: '(' form* ')'; -literal: symbol | keyword | string ;//| number | character; +vector: '[' form* ']'; + +map: '{' form* '}'; + +literal: number | symbol | keyword | string ;// | character; + +number: NUMBER; symbol: NAME; @@ -24,6 +30,8 @@ string: '"' ( ~'"' | '\\' '"' )* '"'; // whitespace or comment whitespace: SPACE+ | (SPACE* COMMENT SPACE*); +NUMBER: [0-9]+; + NAME: (SIMPLE_NAME '/')? ('/' | SIMPLE_NAME ); SIMPLE_NAME: NAME_HEAD NAME_BODY*; diff --git a/src/parcera/terminals.cljc b/src/parcera/terminals.cljc index 749d0ca..5c3df5b 100644 --- a/src/parcera/terminals.cljc +++ b/src/parcera/terminals.cljc @@ -44,3 +44,4 @@ (def regex-pattern (str "#" string-pattern)) :h:elo/world +:0 From cb02fbc1320c0aa67e0bade55bd13e5298ba6cb2 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 18:20:22 +0100 Subject: [PATCH 010/128] cosmetics --- resources/clojure.g4 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index d539aad..61b4472 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -1,4 +1,6 @@ +// NOTE: Antlr solves ambiguity based on the order of the rules + grammar clojure; code: form*; From 6b4f26836f5cbcf5b142d2d3ac511481c98800e4 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 18:40:40 +0100 Subject: [PATCH 011/128] number pattern added --- resources/clojure.g4 | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 61b4472..00cc919 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -32,7 +32,7 @@ string: '"' ( ~'"' | '\\' '"' )* '"'; // whitespace or comment whitespace: SPACE+ | (SPACE* COMMENT SPACE*); -NUMBER: [0-9]+; +NUMBER: [+-]? DIGIT+ (DOUBLE_SUFFIX | LONG_SUFFIX | RATIO_SUFFIX); NAME: (SIMPLE_NAME '/')? ('/' | SIMPLE_NAME ); @@ -48,3 +48,15 @@ fragment NAME_BODY: NAME_HEAD | [:#']; // these is the set of characters that are allowed by all symbols and keywords // however, this is more strict that necessary so that we can re-use it for both fragment NAME_HEAD: ~[\r\n\t\f()[\]{}"@~^;`\\/, :#']; + +fragment DOUBLE_SUFFIX: ((('.' DIGIT*)? ([eE][-+]?DIGIT+)?) 'M'?); + +fragment LONG_SUFFIX: ('0'[xX]((DIGIT|[A-Fa-f])+) | + '0'([0-7]+) | + ([1-9]DIGIT?)[rR](DIGIT[a-zA-Z]+) | + '0'DIGIT+ + )?'N'?; + +fragment RATIO_SUFFIX: '/' DIGIT+; + +fragment DIGIT: [0-9]; From c42d072a8d43aeae2527f1d6fa118294d820e2c8 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 18:52:14 +0100 Subject: [PATCH 012/128] fix: disallow numbers in head --- resources/clojure.g4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 00cc919..3f1fa13 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -43,11 +43,11 @@ COMMENT: ';' ~[\r\n]*; SPACE: [\r\n\t\f ]; // re-allow :#' as valid characters inside the name itself -fragment NAME_BODY: NAME_HEAD | [:#']; +fragment NAME_BODY: NAME_HEAD | [:#'0-9]; // these is the set of characters that are allowed by all symbols and keywords // however, this is more strict that necessary so that we can re-use it for both -fragment NAME_HEAD: ~[\r\n\t\f()[\]{}"@~^;`\\/, :#']; +fragment NAME_HEAD: ~[\r\n\t\f()[\]{}"@~^;`\\/, :#'0-9]; fragment DOUBLE_SUFFIX: ((('.' DIGIT*)? ([eE][-+]?DIGIT+)?) 'M'?); From 0b3eedebc7b6873938300929850d1e62cb8dc023 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 19:04:28 +0100 Subject: [PATCH 013/128] character support added --- resources/clojure.g4 | 10 +++++++++- src/parcera/terminals.cljc | 3 --- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 3f1fa13..67300d6 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -15,10 +15,12 @@ vector: '[' form* ']'; map: '{' form* '}'; -literal: number | symbol | keyword | string ;// | character; +literal: number | character | symbol | keyword | string ; number: NUMBER; +character: '\\' (UNICODE | NAMED_CHAR | UNICODE_CHAR); + symbol: NAME; keyword: simple_keyword | macro_keyword; @@ -34,6 +36,12 @@ whitespace: SPACE+ | (SPACE* COMMENT SPACE*); NUMBER: [+-]? DIGIT+ (DOUBLE_SUFFIX | LONG_SUFFIX | RATIO_SUFFIX); +UNICODE_CHAR: ~[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF]; + +NAMED_CHAR: 'newline' | 'return' | 'space' | 'tab' | 'formfeed' | 'backspace'; + +UNICODE: 'u' [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F]; + NAME: (SIMPLE_NAME '/')? ('/' | SIMPLE_NAME ); SIMPLE_NAME: NAME_HEAD NAME_BODY*; diff --git a/src/parcera/terminals.cljc b/src/parcera/terminals.cljc index 5c3df5b..b9e3c69 100644 --- a/src/parcera/terminals.cljc +++ b/src/parcera/terminals.cljc @@ -42,6 +42,3 @@ (def string-pattern "\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"") (def regex-pattern (str "#" string-pattern)) - -:h:elo/world -:0 From 28434b3ab25ebeb510112cdac0aaac692a4ff300 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 19:18:41 +0100 Subject: [PATCH 014/128] added reader macro support --- resources/clojure.g4 | 67 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 67300d6..7889b08 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -5,7 +5,7 @@ grammar clojure; code: form*; -form: whitespace | literal | collection; +form: whitespace | literal | collection | reader_macro; collection: list | vector | map; @@ -15,7 +15,7 @@ vector: '[' form* ']'; map: '{' form* '}'; -literal: number | character | symbol | keyword | string ; +literal: symbol | keyword | string | number | character; number: NUMBER; @@ -29,13 +29,74 @@ simple_keyword: ':' NAME; macro_keyword: '::' NAME; -string: '"' ( ~'"' | '\\' '"' )* '"'; +string: STRING; + +reader_macro: ( unquote + | metadata + | backtick + | quote + | dispatch + | unquote_splicing + | deref + | symbolic + ); + +unquote: '~' form; + +metadata: (metadata_entry whitespace)+ ( symbol + | collection + | tag + | unquote + | unquote_splicing + ); + +metadata_entry: '^' ( map | symbol | string | keyword ); + +quote: '\'' form; + +backtick: '`' form; + +unquote_splicing: '~@' form; + +deref: '@' form; + +dispatch: function + | regex + | set + | conditional + | conditional_splicing + | namespaced_map + | var_quote + | discard + | tag; + +function: '#(' form* ')'; + +regex: '#' STRING; + +set: '#{' form* '}'; + +namespaced_map: '#' ( keyword | '::' ) map; + +var_quote: '#\'' symbol; + +discard: '#_' form; + +tag: '#' symbol whitespace? (literal | collection); + +conditional: '#?(' form* ')'; + +conditional_splicing: '#?@(' form* ')'; + +symbolic: '##' ('Inf' | '-Inf' | 'NaN'); // whitespace or comment whitespace: SPACE+ | (SPACE* COMMENT SPACE*); NUMBER: [+-]? DIGIT+ (DOUBLE_SUFFIX | LONG_SUFFIX | RATIO_SUFFIX); +STRING: '"' ( ~'"' | '\\' '"' )* '"'; + UNICODE_CHAR: ~[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF]; NAMED_CHAR: 'newline' | 'return' | 'space' | 'tab' | 'formfeed' | 'backspace'; From 39c88e8e2ea98cd478b3ddb27c813320c7ff0be0 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 19:21:28 +0100 Subject: [PATCH 015/128] cosmetics --- resources/clojure.g4 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 7889b08..7849e21 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -38,7 +38,6 @@ reader_macro: ( unquote | dispatch | unquote_splicing | deref - | symbolic ); unquote: '~' form; @@ -52,10 +51,10 @@ metadata: (metadata_entry whitespace)+ ( symbol metadata_entry: '^' ( map | symbol | string | keyword ); -quote: '\'' form; - backtick: '`' form; +quote: '\'' form; + unquote_splicing: '~@' form; deref: '@' form; @@ -68,7 +67,8 @@ dispatch: function | namespaced_map | var_quote | discard - | tag; + | tag + | symbolic; function: '#(' form* ')'; From e1828c6070d42b2391cf46a8993e0cf24d2d23af Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 19:39:43 +0100 Subject: [PATCH 016/128] cosmetic changes --- resources/clojure.g4 | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 7849e21..3868770 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -1,5 +1,8 @@ -// NOTE: Antlr solves ambiguity based on the order of the rules +// NOTE: Antlr solves ambiguity based on the order of the rules. Unfortunately +// it doesnt have any look ahead :( +// therefore it will make the "right" decision on a valid grammar but it will +// create a wrong AST on a wrong one grammar clojure; From bd04a66690e187b814408f3255bae6fbb0ca206f Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 20:21:08 +0100 Subject: [PATCH 017/128] cosmetic changes --- resources/clojure.g4 | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 3868770..c32b78d 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -22,7 +22,7 @@ literal: symbol | keyword | string | number | character; number: NUMBER; -character: '\\' (UNICODE | NAMED_CHAR | UNICODE_CHAR); +character: '\\' CHARACTER; symbol: NAME; @@ -94,32 +94,34 @@ conditional_splicing: '#?@(' form* ')'; symbolic: '##' ('Inf' | '-Inf' | 'NaN'); // whitespace or comment -whitespace: SPACE+ | (SPACE* COMMENT SPACE*); +whitespace: (SPACE | COMMENT)+; NUMBER: [+-]? DIGIT+ (DOUBLE_SUFFIX | LONG_SUFFIX | RATIO_SUFFIX); STRING: '"' ( ~'"' | '\\' '"' )* '"'; -UNICODE_CHAR: ~[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF]; - -NAMED_CHAR: 'newline' | 'return' | 'space' | 'tab' | 'formfeed' | 'backspace'; - -UNICODE: 'u' [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F]; +CHARACTER: UNICODE_CHAR | NAMED_CHAR | UNICODE; NAME: (SIMPLE_NAME '/')? ('/' | SIMPLE_NAME ); -SIMPLE_NAME: NAME_HEAD NAME_BODY*; - COMMENT: ';' ~[\r\n]*; SPACE: [\r\n\t\f ]; +fragment SIMPLE_NAME: NAME_HEAD NAME_BODY*; + +fragment UNICODE_CHAR: ~[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF]; + +fragment NAMED_CHAR: 'newline' | 'return' | 'space' | 'tab' | 'formfeed' | 'backspace'; + +fragment UNICODE: 'u' [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F]; + // re-allow :#' as valid characters inside the name itself fragment NAME_BODY: NAME_HEAD | [:#'0-9]; // these is the set of characters that are allowed by all symbols and keywords // however, this is more strict that necessary so that we can re-use it for both -fragment NAME_HEAD: ~[\r\n\t\f()[\]{}"@~^;`\\/, :#'0-9]; +fragment NAME_HEAD: ~[\r\n\t\f ()[\]{}"@~^;`\\/,:#'0-9]; fragment DOUBLE_SUFFIX: ((('.' DIGIT*)? ([eE][-+]?DIGIT+)?) 'M'?); From c9c09e286a1928b944c5ae6493952bd6193f48f0 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 20:56:56 +0100 Subject: [PATCH 018/128] fix: invalid negative group --- src/parcera/terminals.cljc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parcera/terminals.cljc b/src/parcera/terminals.cljc index b9e3c69..343cd37 100644 --- a/src/parcera/terminals.cljc +++ b/src/parcera/terminals.cljc @@ -34,7 +34,7 @@ ; mentioned here: https://www.regular-expressions.info/unicode.html ; It's cooked by this generator: http://kourge.net/projects/regexp-unicode-block ; ticking all 'Combining Diacritical Marks' boxes *)) -(def unicode-char "([^\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF])") +(def unicode-char "([\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF])") (def named-char "(newline|return|space|tab|formfeed|backspace)") (def unicode "(u[\\dD-Fd-f]{4})") (def character-pattern (str "\\\\(" unicode-char "|" named-char "|" unicode ")(?!\\w+)")) From d2fab0261aa272e8d272806ed0795b8b2eae14a4 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 21:02:31 +0100 Subject: [PATCH 019/128] fix: order of lexer rules to disambiguate whitespace --- resources/clojure.g4 | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index c32b78d..4f918ec 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -22,7 +22,7 @@ literal: symbol | keyword | string | number | character; number: NUMBER; -character: '\\' CHARACTER; +character: '\\' (UNICODE_CHAR | NAMED_CHAR | UNICODE); symbol: NAME; @@ -100,21 +100,19 @@ NUMBER: [+-]? DIGIT+ (DOUBLE_SUFFIX | LONG_SUFFIX | RATIO_SUFFIX); STRING: '"' ( ~'"' | '\\' '"' )* '"'; -CHARACTER: UNICODE_CHAR | NAMED_CHAR | UNICODE; - -NAME: (SIMPLE_NAME '/')? ('/' | SIMPLE_NAME ); - COMMENT: ';' ~[\r\n]*; -SPACE: [\r\n\t\f ]; +UNICODE_CHAR: [\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF]; -fragment SIMPLE_NAME: NAME_HEAD NAME_BODY*; +NAMED_CHAR: 'newline' | 'return' | 'space' | 'tab' | 'formfeed' | 'backspace'; -fragment UNICODE_CHAR: ~[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF]; +UNICODE: 'u' [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F]; -fragment NAMED_CHAR: 'newline' | 'return' | 'space' | 'tab' | 'formfeed' | 'backspace'; +SPACE: [\r\n\t\f, ]; -fragment UNICODE: 'u' [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F]; +NAME: (SIMPLE_NAME '/')? ('/' | SIMPLE_NAME ); + +fragment SIMPLE_NAME: NAME_HEAD NAME_BODY*; // re-allow :#' as valid characters inside the name itself fragment NAME_BODY: NAME_HEAD | [:#'0-9]; From d5de1ccfbae6ca5e8eb6abcf6ed0483157082552 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 21:05:44 +0100 Subject: [PATCH 020/128] fix: eat all whitespaces --- resources/clojure.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 4f918ec..e272677 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -108,7 +108,7 @@ NAMED_CHAR: 'newline' | 'return' | 'space' | 'tab' | 'formfeed' | 'backspace'; UNICODE: 'u' [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F]; -SPACE: [\r\n\t\f, ]; +SPACE: [\r\n\t\f, ]+; NAME: (SIMPLE_NAME '/')? ('/' | SIMPLE_NAME ); From abcabca926124290264f4b4496748af67cdb694d Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 21:10:32 +0100 Subject: [PATCH 021/128] cosmetics --- resources/clojure.g4 | 1 + 1 file changed, 1 insertion(+) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index e272677..ea78b84 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -3,6 +3,7 @@ // it doesnt have any look ahead :( // therefore it will make the "right" decision on a valid grammar but it will // create a wrong AST on a wrong one +// For example: 3.e -> invalid, but it parses as '3.' -> number, 'e' -> symbol grammar clojure; From 3a256b050f03726b5468253863cb743c310872a9 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 21:34:24 +0100 Subject: [PATCH 022/128] fix: prefer keyword over symbol --- resources/clojure.g4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index ea78b84..8680f9e 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -19,7 +19,7 @@ vector: '[' form* ']'; map: '{' form* '}'; -literal: symbol | keyword | string | number | character; +literal: keyword | string | number | character | symbol; number: NUMBER; @@ -80,7 +80,7 @@ regex: '#' STRING; set: '#{' form* '}'; -namespaced_map: '#' ( keyword | '::' ) map; +namespaced_map: '#' ( keyword | '::') map; var_quote: '#\'' symbol; From 68b18dd954e185e18d547b6f68e2caed276a9acb Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 21:35:00 +0100 Subject: [PATCH 023/128] cosmetics --- resources/clojure.g4 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 8680f9e..35bd1ab 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -21,12 +21,6 @@ map: '{' form* '}'; literal: keyword | string | number | character | symbol; -number: NUMBER; - -character: '\\' (UNICODE_CHAR | NAMED_CHAR | UNICODE); - -symbol: NAME; - keyword: simple_keyword | macro_keyword; simple_keyword: ':' NAME; @@ -35,6 +29,12 @@ macro_keyword: '::' NAME; string: STRING; +number: NUMBER; + +character: '\\' (UNICODE_CHAR | NAMED_CHAR | UNICODE); + +symbol: NAME; + reader_macro: ( unquote | metadata | backtick From bb2a8cce78c1c0b38d8715ba612e88a26540fb25 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 21:48:52 +0100 Subject: [PATCH 024/128] fix: ambiguity on whitespace parser --- resources/clojure.g4 | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 35bd1ab..c074134 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -95,20 +95,22 @@ conditional_splicing: '#?@(' form* ')'; symbolic: '##' ('Inf' | '-Inf' | 'NaN'); // whitespace or comment -whitespace: (SPACE | COMMENT)+; +whitespace: WHITESPACE; NUMBER: [+-]? DIGIT+ (DOUBLE_SUFFIX | LONG_SUFFIX | RATIO_SUFFIX); STRING: '"' ( ~'"' | '\\' '"' )* '"'; -COMMENT: ';' ~[\r\n]*; - UNICODE_CHAR: [\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF]; NAMED_CHAR: 'newline' | 'return' | 'space' | 'tab' | 'formfeed' | 'backspace'; UNICODE: 'u' [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F]; +WHITESPACE: (SPACE | COMMENT)+; + +COMMENT: ';' ~[\r\n]*; + SPACE: [\r\n\t\f, ]+; NAME: (SIMPLE_NAME '/')? ('/' | SIMPLE_NAME ); From fe11ada6b3a66c5a111aecf216b38fd3a453b8fb Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 21:59:05 +0100 Subject: [PATCH 025/128] fix: make lexer understand character as a whole to avoid ambiguities with symbols fix: unicode char is a negation --- resources/clojure.g4 | 16 +++++++++------- src/parcera/terminals.cljc | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index c074134..fd6a40d 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -31,7 +31,7 @@ string: STRING; number: NUMBER; -character: '\\' (UNICODE_CHAR | NAMED_CHAR | UNICODE); +character: CHARACTER; symbol: NAME; @@ -101,20 +101,22 @@ NUMBER: [+-]? DIGIT+ (DOUBLE_SUFFIX | LONG_SUFFIX | RATIO_SUFFIX); STRING: '"' ( ~'"' | '\\' '"' )* '"'; -UNICODE_CHAR: [\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF]; - -NAMED_CHAR: 'newline' | 'return' | 'space' | 'tab' | 'formfeed' | 'backspace'; - -UNICODE: 'u' [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F]; - WHITESPACE: (SPACE | COMMENT)+; COMMENT: ';' ~[\r\n]*; SPACE: [\r\n\t\f, ]+; +CHARACTER: '\\' (UNICODE_CHAR | NAMED_CHAR | UNICODE); + NAME: (SIMPLE_NAME '/')? ('/' | SIMPLE_NAME ); +fragment UNICODE_CHAR: ~[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF]; + +fragment NAMED_CHAR: 'newline' | 'return' | 'space' | 'tab' | 'formfeed' | 'backspace'; + +fragment UNICODE: 'u' [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F]; + fragment SIMPLE_NAME: NAME_HEAD NAME_BODY*; // re-allow :#' as valid characters inside the name itself diff --git a/src/parcera/terminals.cljc b/src/parcera/terminals.cljc index 343cd37..b9e3c69 100644 --- a/src/parcera/terminals.cljc +++ b/src/parcera/terminals.cljc @@ -34,7 +34,7 @@ ; mentioned here: https://www.regular-expressions.info/unicode.html ; It's cooked by this generator: http://kourge.net/projects/regexp-unicode-block ; ticking all 'Combining Diacritical Marks' boxes *)) -(def unicode-char "([\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF])") +(def unicode-char "([^\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF])") (def named-char "(newline|return|space|tab|formfeed|backspace)") (def unicode "(u[\\dD-Fd-f]{4})") (def character-pattern (str "\\\\(" unicode-char "|" named-char "|" unicode ")(?!\\w+)")) From 608e0de1bcce826932931d9a315b2328c0d8f1eb Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 22:30:11 +0100 Subject: [PATCH 026/128] fix: string lexer didnt handle escaped characters properly --- resources/clojure.g4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index fd6a40d..dcfbb05 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -99,7 +99,7 @@ whitespace: WHITESPACE; NUMBER: [+-]? DIGIT+ (DOUBLE_SUFFIX | LONG_SUFFIX | RATIO_SUFFIX); -STRING: '"' ( ~'"' | '\\' '"' )* '"'; +STRING: '"' ~["\\]* ('\\' . ~["\\]*)* '"'; WHITESPACE: (SPACE | COMMENT)+; From db2b15bb515408aa2e309f6039148a982d99d262 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 22:45:50 +0100 Subject: [PATCH 027/128] fix: whitespace is optional between metadata entries --- resources/clojure.g4 | 12 ++++++------ src/parcera/core.cljc | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index dcfbb05..d0b7b71 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -46,12 +46,12 @@ reader_macro: ( unquote unquote: '~' form; -metadata: (metadata_entry whitespace)+ ( symbol - | collection - | tag - | unquote - | unquote_splicing - ); +metadata: (metadata_entry whitespace?)+ ( symbol + | collection + | tag + | unquote + | unquote_splicing + ); metadata_entry: '^' ( map | symbol | string | keyword ); diff --git a/src/parcera/core.cljc b/src/parcera/core.cljc index 88cca67..83728b7 100644 --- a/src/parcera/core.cljc +++ b/src/parcera/core.cljc @@ -67,12 +67,12 @@ auto-resolve: '::'; - metadata: (metadata-entry whitespace)+ ( symbol - / collection - / tag - / unquote - / unquote-splicing - ); + metadata: (metadata-entry whitespace?)+ ( symbol + / collection + / tag + / unquote + / unquote-splicing + ); metadata-entry: <'^'> ( map / symbol / string / keyword ); From c2ee248b2d537afcfddc42c9b598126de50a9a91 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 27 Oct 2019 22:49:38 +0100 Subject: [PATCH 028/128] cosmetics --- resources/clojure.g4 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index d0b7b71..7f1ea82 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -11,6 +11,8 @@ code: form*; form: whitespace | literal | collection | reader_macro; +// sets and namespaced map are not considerd collection from grammar perspective +// since they start with # -> dispatch macro collection: list | vector | map; list: '(' form* ')'; From a46397e52f35e12e55fd89d3190e57604934b439 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 28 Oct 2019 21:51:33 +0100 Subject: [PATCH 029/128] notes added --- resources/clojure.g4 | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 7f1ea82..89e90e8 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -1,12 +1,19 @@ -// NOTE: Antlr solves ambiguity based on the order of the rules. Unfortunately -// it doesnt have any look ahead :( -// therefore it will make the "right" decision on a valid grammar but it will -// create a wrong AST on a wrong one -// For example: 3.e -> invalid, but it parses as '3.' -> number, 'e' -> symbol - grammar clojure; +/* + * NOTES to myself and to other developers: + * + * - You have to remember that the parser cannot check for semantics + * - You have to find the right balance of dividing enforcement between the + * grammar and your own code. + * + * The parser should only check the syntax. So the rule of thumb is that when + * in doubt you let the parser pass the content up to your program. Then, in + * your program, you check the semantics and make sure that the rule actually + * have a proper meaning +*/ + code: form*; form: whitespace | literal | collection | reader_macro; From 61b6d0f1881a119825ba85c9362dd4f4491093e4 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 28 Oct 2019 21:51:54 +0100 Subject: [PATCH 030/128] link added --- resources/clojure.g4 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 89e90e8..06cd857 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -12,6 +12,8 @@ grammar clojure; * in doubt you let the parser pass the content up to your program. Then, in * your program, you check the semantics and make sure that the rule actually * have a proper meaning + * + * https://tomassetti.me/antlr-mega-tutorial/#lexers-and-parser */ code: form*; From 0676142aa1fee1159d0a7f3ff7c8dc187a3aaf7d Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Tue, 29 Oct 2019 01:04:44 +0100 Subject: [PATCH 031/128] antlr node.js visitor created --- .gitignore | 3 +++ index.js | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ package.json | 11 ++++++++++ yarn.lock | 8 +++++++ 4 files changed, 83 insertions(+) create mode 100644 index.js create mode 100644 package.json create mode 100644 yarn.lock diff --git a/.gitignore b/.gitignore index 5bae9d9..a152311 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,6 @@ pom.xml.asc /.idea/ /nashorn_code_cache /.cljs_nashorn_repl +/build/ +/yarn-error.log +/node_modules/ diff --git a/index.js b/index.js new file mode 100644 index 0000000..bdee4f3 --- /dev/null +++ b/index.js @@ -0,0 +1,61 @@ +const antlr4 = require('antlr4/index') +const {clojureLexer} = require('./build/js/clojureLexer') +const {clojureParser} = require('./build/js/clojureParser') +const {clojureListener} = require('./build/js/clojureListener') + +/** + * Takes an AST tree; the result of a parser walk and returns + * an array with the same style as Instaparse + * + * @param {Object} ast + * @param {Array} ruleNames + * @return {Array} a hiccup-like array + */ +function treeSeq(ast, ruleNames) { + const result = [] + // a parser rule has childrens if it is a repetition (* or +) + if (ast.children !== undefined) { + for (const child of ast.children) { + const childResult = treeSeq(child, ruleNames) + // we are on a lexer match so we just add the value and move on + if (child.getPayload().tokenIndex !== undefined) { + result.push(childResult) + + // we are inside a parser rule; therefore we add the rule and + // its result to the global one + } else if (child.getPayload().ruleIndex !== undefined) { + const rule = ruleNames[child.ruleIndex] + result.push([rule].concat(childResult)) + } else { + throw new Error(`Unexpected ast node: ${child.toString()}`) + } + } + return result + + // the parser rule its not a repetition -> it matches directly + // therefore we just take the match + } else { + return ast.getText() + } +} + +const input = `(john :SHOUTS "hello" @michael pink/this will work)` +const chars = new antlr4.InputStream(input) +const lexer = new clojureLexer(chars) +const tokens = new antlr4.CommonTokenStream(lexer) +const parser = new clojureParser(tokens) +const ruleNames = parser.ruleNames +parser.buildParseTrees = true + +const treeBuilder = (ast) => treeSeq(ast, ruleNames) + +class listener extends clojureListener { + enterCode(result) { + console.log(JSON.stringify(treeBuilder(result), null, 2)) + } +} + +const tree = parser.code() +antlr4.tree.ParseTreeWalker.DEFAULT.walk(new listener(), tree) + +console.log(`DONE 💫`) diff --git a/package.json b/package.json new file mode 100644 index 0000000..6c3a2a9 --- /dev/null +++ b/package.json @@ -0,0 +1,11 @@ +{ + "name": "parcera", + "version": "1.0.0", + "main": "index.js", + "repository": "git@github.com:carocad/parcera.git", + "author": "Camilo Roca ", + "license": "MIT", + "dependencies": { + "antlr4": "^4.7.2" + } +} diff --git a/yarn.lock b/yarn.lock new file mode 100644 index 0000000..2fb0b32 --- /dev/null +++ b/yarn.lock @@ -0,0 +1,8 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + + +antlr4@^4.7.2: + version "4.7.2" + resolved "https://registry.npmjs.org/antlr4/-/antlr4-4.7.2.tgz#9d0b5987bb63660de658055ee9149141b4d9b462" + integrity sha512-vZA1xYufXLe3LX+ja9rIVxjRmILb1x3k7KYZHltRbfJtXjJ1DlFIqt+CbPYmghx0EuzY9DajiDw+MdyEt1qAsQ== From daa52395de5bd7f5ec0b695e24dbd7b3f370f2d1 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Tue, 29 Oct 2019 01:20:39 +0100 Subject: [PATCH 032/128] moved rule matcher to the top to be able to get code rule name removed visitor since it is unnecessary --- index.js | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/index.js b/index.js index bdee4f3..3f6c851 100644 --- a/index.js +++ b/index.js @@ -15,17 +15,15 @@ function treeSeq(ast, ruleNames) { const result = [] // a parser rule has childrens if it is a repetition (* or +) if (ast.children !== undefined) { + // we are inside a parser rule; therefore we add the rule name to the result + result.push(ruleNames[ast.ruleIndex]) for (const child of ast.children) { const childResult = treeSeq(child, ruleNames) // we are on a lexer match so we just add the value and move on if (child.getPayload().tokenIndex !== undefined) { result.push(childResult) - - // we are inside a parser rule; therefore we add the rule and - // its result to the global one } else if (child.getPayload().ruleIndex !== undefined) { - const rule = ruleNames[child.ruleIndex] - result.push([rule].concat(childResult)) + result.push.apply(result, childResult) } else { throw new Error(`Unexpected ast node: ${child.toString()}`) } @@ -47,15 +45,8 @@ const parser = new clojureParser(tokens) const ruleNames = parser.ruleNames parser.buildParseTrees = true -const treeBuilder = (ast) => treeSeq(ast, ruleNames) - -class listener extends clojureListener { - enterCode(result) { - console.log(JSON.stringify(treeBuilder(result), null, 2)) - } -} - const tree = parser.code() -antlr4.tree.ParseTreeWalker.DEFAULT.walk(new listener(), tree) +console.log(JSON.stringify(treeSeq(tree, ruleNames), null, 2)) +//antlr4.tree.ParseTreeWalker.DEFAULT.walk(new listener(), tree) console.log(`DONE 💫`) From b066b2681d007dee69c1ffcb8c62ea5b75151f2a Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Tue, 29 Oct 2019 01:24:38 +0100 Subject: [PATCH 033/128] simplified treeSeq function --- index.js | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/index.js b/index.js index 3f6c851..1c4b31a 100644 --- a/index.js +++ b/index.js @@ -13,25 +13,14 @@ const {clojureListener} = require('./build/js/clojureListener') */ function treeSeq(ast, ruleNames) { const result = [] - // a parser rule has childrens if it is a repetition (* or +) + // parser rules always have childrens if (ast.children !== undefined) { // we are inside a parser rule; therefore we add the rule name to the result result.push(ruleNames[ast.ruleIndex]) - for (const child of ast.children) { - const childResult = treeSeq(child, ruleNames) - // we are on a lexer match so we just add the value and move on - if (child.getPayload().tokenIndex !== undefined) { - result.push(childResult) - } else if (child.getPayload().ruleIndex !== undefined) { - result.push.apply(result, childResult) - } else { - throw new Error(`Unexpected ast node: ${child.toString()}`) - } - } + result.push.apply(result, ast.children.map((child) => treeSeq(child, ruleNames))) return result - // the parser rule its not a repetition -> it matches directly - // therefore we just take the match + // lexer rules dont have childrens, so we just take the matched text } else { return ast.getText() } From 57716cd905fc6a877356c92940207d42b80ed6b3 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Tue, 29 Oct 2019 21:37:14 +0100 Subject: [PATCH 034/128] antlr runtime and tree visitor code added --- project.clj | 28 ++++++++-------- src/parcera/experimental.cljc | 61 +++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 13 deletions(-) create mode 100644 src/parcera/experimental.cljc diff --git a/project.clj b/project.clj index b65b02b..7838b02 100644 --- a/project.clj +++ b/project.clj @@ -5,19 +5,21 @@ :url "https://github.com/carocad/parcera/blob/master/LICENSE.md"} :dependencies [[org.clojure/clojure "1.10.1"] [instaparse/instaparse "1.4.10"]] - :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark - [org.clojure/test.check "0.10.0"]] - :plugins [[jonase/eastwood "0.3.5"] - [lein-cljsbuild "1.1.7"]] - :cljsbuild {:builds - [{:id "dev" - :source-paths ["src" "test"] - :compiler {:main parcera.test-runner - :output-to "target/out/tests.js" - :target :nodejs - :optimizations :none}}] - :test-commands - {"test" ["node" "target/out/tests.js"]}}} + :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark + [org.clojure/test.check "0.10.0"] + [org.antlr/antlr4-runtime "4.7.1"]] + :plugins [[jonase/eastwood "0.3.5"] + [lein-cljsbuild "1.1.7"]] + :java-source-paths ["build/java"] + :cljsbuild {:builds + [{:id "dev" + :source-paths ["src" "test"] + :compiler {:main parcera.test-runner + :output-to "target/out/tests.js" + :target :nodejs + :optimizations :none}}] + :test-commands + {"test" ["node" "target/out/tests.js"]}}} :provided {:dependencies [[org.clojure/clojurescript "1.10.520"]]}} :test-selectors {:default (fn [m] (not (some #{:benchmark} (keys m)))) :benchmark :benchmark} diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc new file mode 100644 index 0000000..ab66112 --- /dev/null +++ b/src/parcera/experimental.cljc @@ -0,0 +1,61 @@ +(ns parcera.experimental + (:import (parcera.antlr clojureParser clojureLexer clojureListener) + (java.util ArrayList) + (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext))) + +; const input = `(john :SHOUTS "hello" @michael pink/this will work)` +;const chars = new antlr4.InputStream(input) +;const lexer = new clojureLexer(chars) +;const tokens = new antlr4.CommonTokenStream(lexer) +;const parser = new clojureParser(tokens) +;const ruleNames = parser.ruleNames +;parser.buildParseTrees = true +; +;const tree = parser.code() +;console.log(JSON.stringify(treeSeq(tree, ruleNames), null, 2)) +;//antlr4.tree.ParseTreeWalker.DEFAULT.walk(new listener(), tree) +; +;console.log(`DONE 💫`) + +; /** +; * Takes an AST tree; the result of a parser walk and returns +; * an array with the same style as Instaparse +; * +; * @param {Object} ast +; * @param {Array} ruleNames +; * @return {Array} a hiccup-like array +; */ +;function treeSeq(ast, ruleNames) { +; const result = [] +; // parser rules always have childrens +; if (ast.children !== undefined) { +; // we are inside a parser rule; therefore we add the rule name to the result +; result.push(ruleNames[ast.ruleIndex]) +; result.push.apply(result, ast.children.map((child) => treeSeq(child, ruleNames))) +; return result +; +; // lexer rules dont have childrens, so we just take the matched text +; } else { +; return ast.getText() +; } +;} + +(defn- hiccup + [ast rule-names] + (if (and (instance? ParserRuleContext ast) (not-empty (.-children ast))) + (cons (keyword (aget rule-names (.getRuleIndex ast))) + (for [child (.-children ast)] + (hiccup child rule-names))) + (.toString ast))) + + +(let [input "(john :SHOUTS \"hello\" @michael pink/this will work)" + chars (CharStreams/fromString input) + lexer (new clojureLexer chars) + tokens (new CommonTokenStream lexer) + parser (new clojureParser tokens) + rule-names (. parser (getRuleNames)) + _ (. parser (setBuildParseTree true)) + tree (. parser (code))] + (hiccup tree rule-names)) + From d593d57f4a4b1b4c9497cb8956b23ebe3235e2aa Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Tue, 29 Oct 2019 21:37:38 +0100 Subject: [PATCH 035/128] removed js code from java side --- src/parcera/experimental.cljc | 36 ----------------------------------- 1 file changed, 36 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index ab66112..dfc786e 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -3,42 +3,6 @@ (java.util ArrayList) (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext))) -; const input = `(john :SHOUTS "hello" @michael pink/this will work)` -;const chars = new antlr4.InputStream(input) -;const lexer = new clojureLexer(chars) -;const tokens = new antlr4.CommonTokenStream(lexer) -;const parser = new clojureParser(tokens) -;const ruleNames = parser.ruleNames -;parser.buildParseTrees = true -; -;const tree = parser.code() -;console.log(JSON.stringify(treeSeq(tree, ruleNames), null, 2)) -;//antlr4.tree.ParseTreeWalker.DEFAULT.walk(new listener(), tree) -; -;console.log(`DONE 💫`) - -; /** -; * Takes an AST tree; the result of a parser walk and returns -; * an array with the same style as Instaparse -; * -; * @param {Object} ast -; * @param {Array} ruleNames -; * @return {Array} a hiccup-like array -; */ -;function treeSeq(ast, ruleNames) { -; const result = [] -; // parser rules always have childrens -; if (ast.children !== undefined) { -; // we are inside a parser rule; therefore we add the rule name to the result -; result.push(ruleNames[ast.ruleIndex]) -; result.push.apply(result, ast.children.map((child) => treeSeq(child, ruleNames))) -; return result -; -; // lexer rules dont have childrens, so we just take the matched text -; } else { -; return ast.getText() -; } -;} (defn- hiccup [ast rule-names] From 2ace27f2d9ffa69479b28bb9fc50a24af0226791 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Tue, 29 Oct 2019 22:37:16 +0100 Subject: [PATCH 036/128] make listo into vector to be hiccup compliant --- src/parcera/experimental.cljc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index dfc786e..5d21a36 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -3,11 +3,11 @@ (java.util ArrayList) (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext))) - +;; todo: add metadata to each node (defn- hiccup [ast rule-names] (if (and (instance? ParserRuleContext ast) (not-empty (.-children ast))) - (cons (keyword (aget rule-names (.getRuleIndex ast))) + (into [(keyword (aget rule-names (.getRuleIndex ast)))] (for [child (.-children ast)] (hiccup child rule-names))) (.toString ast))) From 8dac799c90cdc52787480d75c3cdd5463350fee9 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Tue, 29 Oct 2019 22:43:39 +0100 Subject: [PATCH 037/128] cosmetics --- src/parcera/experimental.cljc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 5d21a36..c8f1ced 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -4,13 +4,16 @@ (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext))) ;; todo: add metadata to each node +;; todo: identify parsing errors in the tree (defn- hiccup [ast rule-names] - (if (and (instance? ParserRuleContext ast) (not-empty (.-children ast))) + (if (and (instance? ParserRuleContext ast) + ;; mainly for consistency with Js implementation + (not-empty (.-children ast))) (into [(keyword (aget rule-names (.getRuleIndex ast)))] (for [child (.-children ast)] (hiccup child rule-names))) - (.toString ast))) + (. ast (toString)))) (let [input "(john :SHOUTS \"hello\" @michael pink/this will work)" From 2ac54d472786794c83c1a314086376a838f40378 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Tue, 29 Oct 2019 22:57:19 +0100 Subject: [PATCH 038/128] attach metadata to parser rules --- src/parcera/experimental.cljc | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index c8f1ced..b77f5b3 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -1,18 +1,29 @@ (ns parcera.experimental (:import (parcera.antlr clojureParser clojureLexer clojureListener) (java.util ArrayList) - (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext))) + (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext Token))) -;; todo: add metadata to each node ;; todo: identify parsing errors in the tree +(defn- info + "extract the match meta data information from the ast node" + [^ParserRuleContext ast] + (let [start (.getStart ast) + end (.getStop ast)] + {::start {:row (.getLine start) + :column (.getCharPositionInLine start)} + ::end {:row (.getLine end) + :column (.getCharPositionInLine end)}})) + (defn- hiccup [ast rule-names] (if (and (instance? ParserRuleContext ast) ;; mainly for consistency with Js implementation (not-empty (.-children ast))) - (into [(keyword (aget rule-names (.getRuleIndex ast)))] - (for [child (.-children ast)] - (hiccup child rule-names))) + (let [head [(keyword (aget rule-names (.getRuleIndex ast)))] + body (for [child (.-children ast)] + (hiccup child rule-names))] + ;; attach meta data ... ala instaparse + (with-meta (into head body) (info ast))) (. ast (toString)))) From 547594f715b4c916bec5cad18c96c74e26c955a5 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Tue, 29 Oct 2019 23:26:27 +0100 Subject: [PATCH 039/128] cosmetics --- src/parcera/experimental.cljc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index b77f5b3..cbf7538 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -3,6 +3,11 @@ (java.util ArrayList) (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext Token))) +;; antlr automatically prints errors to std out +;; line 1:14 token recognition error at: '\"hello @michael pink/this will work)' +;; line 1:50 extraneous input '' expecting {'(', ')', '[', '{', ':', '::', '~' + +;; todo: mute antlr default error listener ;; todo: identify parsing errors in the tree (defn- info "extract the match meta data information from the ast node" From bcad8cf10bdea4435294e6b010b80ee7a0e37634 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Fri, 1 Nov 2019 13:06:58 +0100 Subject: [PATCH 040/128] hiccup now returns a list instead of a vector for performance docs added --- src/parcera/experimental.cljc | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index cbf7538..652f84a 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -20,25 +20,31 @@ :column (.getCharPositionInLine end)}})) (defn- hiccup + "transform the AST into a `hiccup-like` data structure. + + This function doesnt return a vectors because they are + 100 times slower for this use case compared to `cons`" [ast rule-names] (if (and (instance? ParserRuleContext ast) ;; mainly for consistency with Js implementation (not-empty (.-children ast))) - (let [head [(keyword (aget rule-names (.getRuleIndex ast)))] + (let [head (keyword (aget rule-names (.getRuleIndex ast))) body (for [child (.-children ast)] (hiccup child rule-names))] ;; attach meta data ... ala instaparse - (with-meta (into head body) (info ast))) + (with-meta (cons head body) (info ast))) (. ast (toString)))) -(let [input "(john :SHOUTS \"hello\" @michael pink/this will work)" - chars (CharStreams/fromString input) - lexer (new clojureLexer chars) - tokens (new CommonTokenStream lexer) - parser (new clojureParser tokens) - rule-names (. parser (getRuleNames)) - _ (. parser (setBuildParseTree true)) - tree (. parser (code))] - (hiccup tree rule-names)) +(defn parse + [input] + (let [chars (CharStreams/fromString input) + lexer (new clojureLexer chars) + tokens (new CommonTokenStream lexer) + parser (new clojureParser tokens) + rule-names (. parser (getRuleNames)) + _ (. parser (setBuildParseTree true)) + tree (. parser (code))] + (hiccup tree rule-names))) +;(time (parse (slurp "test/parcera/test/core.cljc"))) From 442021e99a20ca33270e84fe98b9190c75e15851 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Fri, 1 Nov 2019 18:30:05 +0100 Subject: [PATCH 041/128] hide tags and literals --- src/parcera/experimental.cljc | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 652f84a..690d15a 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -7,6 +7,11 @@ ;; line 1:14 token recognition error at: '\"hello @michael pink/this will work)' ;; line 1:50 extraneous input '' expecting {'(', ')', '[', '{', ':', '::', '~' +(def default-options {:hide {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} + :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" + "`" "'" "~@" "@" "#(" "#'" "#_" "#?" "#?@" "##"}}}) + + ;; todo: mute antlr default error listener ;; todo: identify parsing errors in the tree (defn- info @@ -28,16 +33,20 @@ (if (and (instance? ParserRuleContext ast) ;; mainly for consistency with Js implementation (not-empty (.-children ast))) - (let [head (keyword (aget rule-names (.getRuleIndex ast))) - body (for [child (.-children ast)] - (hiccup child rule-names))] + (let [head (keyword (aget rule-names (.getRuleIndex ast))) + wrap-child (fn [child] (hiccup child rule-names))] ;; attach meta data ... ala instaparse - (with-meta (cons head body) (info ast))) - (. ast (toString)))) + (with-meta (if (contains? (:tags (:hide default-options)) head) + (mapcat wrap-child (.-children ast)) + (cons head (remove nil? (map wrap-child (.-children ast))))) + (info ast))) + (let [text (. ast (toString))] + (when (not (contains? (:literals (:hide default-options)) text)) + text)))) (defn parse - [input] + [input & options] (let [chars (CharStreams/fromString input) lexer (new clojureLexer chars) tokens (new CommonTokenStream lexer) @@ -47,4 +56,5 @@ tree (. parser (code))] (hiccup tree rule-names))) + ;(time (parse (slurp "test/parcera/test/core.cljc"))) From 9a5b0b06d7de2ee768f58baceb102a37e12d3878 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Fri, 1 Nov 2019 18:55:49 +0100 Subject: [PATCH 042/128] hide elements by default allow configurable hide --- src/parcera/experimental.cljc | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 690d15a..7811a35 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -7,9 +7,9 @@ ;; line 1:14 token recognition error at: '\"hello @michael pink/this will work)' ;; line 1:50 extraneous input '' expecting {'(', ')', '[', '{', ':', '::', '~' -(def default-options {:hide {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} - :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" - "`" "'" "~@" "@" "#(" "#'" "#_" "#?" "#?@" "##"}}}) +(def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} + :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" + "`" "'" "~@" "@" "#(" "#'" "#_" "#?" "#?@" "##"}}) ;; todo: mute antlr default error listener @@ -29,32 +29,39 @@ This function doesnt return a vectors because they are 100 times slower for this use case compared to `cons`" - [ast rule-names] + [ast rule-names hide-tags hide-literals] (if (and (instance? ParserRuleContext ast) ;; mainly for consistency with Js implementation (not-empty (.-children ast))) (let [head (keyword (aget rule-names (.getRuleIndex ast))) - wrap-child (fn [child] (hiccup child rule-names))] + wrap-child (fn [child] (hiccup child rule-names hide-tags hide-literals))] ;; attach meta data ... ala instaparse - (with-meta (if (contains? (:tags (:hide default-options)) head) + (with-meta (if (contains? hide-tags head) (mapcat wrap-child (.-children ast)) (cons head (remove nil? (map wrap-child (.-children ast))))) (info ast))) (let [text (. ast (toString))] - (when (not (contains? (:literals (:hide default-options)) text)) - text)))) + (if (contains? hide-literals text) nil text)))) +(defn- unhide + [options] + (case (:unhide options) + :all (dissoc default-hidden :literals :tags) + :content (dissoc default-hidden :literals) + :tags (dissoc default-hidden :tags) + default-hidden)) (defn parse - [input & options] - (let [chars (CharStreams/fromString input) + [input & {:as options}] + (let [hide (unhide options) + chars (CharStreams/fromString input) lexer (new clojureLexer chars) tokens (new CommonTokenStream lexer) parser (new clojureParser tokens) rule-names (. parser (getRuleNames)) _ (. parser (setBuildParseTree true)) tree (. parser (code))] - (hiccup tree rule-names))) + (hiccup tree rule-names (:tags hide) (:literals hide)))) ;(time (parse (slurp "test/parcera/test/core.cljc"))) From 0d08f639469262c6266dd360dc64442d63aa1f86 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Fri, 1 Nov 2019 18:56:25 +0100 Subject: [PATCH 043/128] cosmetics --- src/parcera/experimental.cljc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 7811a35..4d2968c 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -43,6 +43,7 @@ (let [text (. ast (toString))] (if (contains? hide-literals text) nil text)))) + (defn- unhide [options] (case (:unhide options) @@ -51,6 +52,7 @@ :tags (dissoc default-hidden :tags) default-hidden)) + (defn parse [input & {:as options}] (let [hide (unhide options) From 26ece5a7275efbf912a72c77ecddf6b6bfba7c29 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Fri, 1 Nov 2019 19:01:28 +0100 Subject: [PATCH 044/128] cosmetics --- src/parcera/experimental.cljc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 4d2968c..157b458 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -33,12 +33,12 @@ (if (and (instance? ParserRuleContext ast) ;; mainly for consistency with Js implementation (not-empty (.-children ast))) - (let [head (keyword (aget rule-names (.getRuleIndex ast))) - wrap-child (fn [child] (hiccup child rule-names hide-tags hide-literals))] + (let [rule (keyword (aget rule-names (.getRuleIndex ast))) + hiccup-child (fn [child] (hiccup child rule-names hide-tags hide-literals))] ;; attach meta data ... ala instaparse - (with-meta (if (contains? hide-tags head) - (mapcat wrap-child (.-children ast)) - (cons head (remove nil? (map wrap-child (.-children ast))))) + (with-meta (if (contains? hide-tags rule) + (mapcat hiccup-child (.-children ast)) + (cons rule (remove nil? (map hiccup-child (.-children ast))))) (info ast))) (let [text (. ast (toString))] (if (contains? hide-literals text) nil text)))) From 49e83a87b1befe45be94a578cd5a36e9810108f0 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Fri, 1 Nov 2019 19:01:40 +0100 Subject: [PATCH 045/128] cosmetics --- src/parcera/experimental.cljc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 157b458..ecb5fb0 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -24,6 +24,7 @@ ::end {:row (.getLine end) :column (.getCharPositionInLine end)}})) + (defn- hiccup "transform the AST into a `hiccup-like` data structure. From cd43f323cbadde59bf59f9965930f4baead03f81 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sat, 2 Nov 2019 17:37:13 +0100 Subject: [PATCH 046/128] error listener added to lexer and parser --- src/parcera/experimental.cljc | 43 +++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index ecb5fb0..309826d 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -1,11 +1,27 @@ (ns parcera.experimental (:import (parcera.antlr clojureParser clojureLexer clojureListener) (java.util ArrayList) - (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext Token))) + (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext Token ANTLRErrorListener Parser))) -;; antlr automatically prints errors to std out -;; line 1:14 token recognition error at: '\"hello @michael pink/this will work)' -;; line 1:50 extraneous input '' expecting {'(', ')', '[', '{', ':', '::', '~' +;; A custom Error Listener to avoid Antlr printing the errors on the terminal +;; by default. This is also useful to mimic Instaparse :total parse mechanism +;; such that if we get an error, we can report it as the result instead +(defrecord ParseFailure [reports] + ANTLRErrorListener + (reportAmbiguity [this parser dfa start-index stop-index exact ambig-alts configs] + (println parser dfa start-index stop-index exact ambig-alts configs)) + (reportAttemptingFullContext [this parser dfa start-index stop-index conflicting-alts configs] + (println parser dfa start-index stop-index conflicting-alts configs)) + (reportContextSensitivity [this parser dfa start-index stop-index prediction configs] + (println parser dfa start-index stop-index prediction configs)) + (syntaxError [this recognizer offending-symbol line char message error] + (let [report {:symbol (str offending-symbol) + :row line + :column char + :recognizer (.toString recognizer) + :message message + :error (str error)}] + (vswap! reports conj report)))) (def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" @@ -56,15 +72,24 @@ (defn parse [input & {:as options}] - (let [hide (unhide options) + (let [hidden (unhide options) + listener (->ParseFailure (volatile! ())) chars (CharStreams/fromString input) - lexer (new clojureLexer chars) + lexer (doto (new clojureLexer chars) + (.removeErrorListeners) + (.addErrorListener listener)) tokens (new CommonTokenStream lexer) - parser (new clojureParser tokens) + parser (doto (new clojureParser tokens) + (.setBuildParseTree true) + (.removeErrorListeners) + (.addErrorListener listener)) rule-names (. parser (getRuleNames)) - _ (. parser (setBuildParseTree true)) tree (. parser (code))] - (hiccup tree rule-names (:tags hide) (:literals hide)))) + (if (and (not (empty? @(:reports listener))) + (:total options)) + (hiccup tree rule-names (:tags hidden) (:literals hidden)) + @(:reports listener)))) ;(time (parse (slurp "test/parcera/test/core.cljc"))) +;(time (parse "(hello @michael \"pink/this will work)")) From 14883f51274a5553c8ee0f05ebe2d3a0c84c0dee Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sat, 2 Nov 2019 17:58:24 +0100 Subject: [PATCH 047/128] comments added --- src/parcera/experimental.cljc | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 309826d..05ac574 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -8,12 +8,18 @@ ;; such that if we get an error, we can report it as the result instead (defrecord ParseFailure [reports] ANTLRErrorListener + ;; I am not sure how to use these methods. If you came here wondering why + ;; is this being printed, please open an issue so that we can all benefit + ;; from your findings ;) (reportAmbiguity [this parser dfa start-index stop-index exact ambig-alts configs] - (println parser dfa start-index stop-index exact ambig-alts configs)) + ;; TODO + (println "report ambiguity: " parser dfa start-index stop-index exact ambig-alts configs)) (reportAttemptingFullContext [this parser dfa start-index stop-index conflicting-alts configs] - (println parser dfa start-index stop-index conflicting-alts configs)) + ;; TODO + (println "report attempting full context: " parser dfa start-index stop-index conflicting-alts configs)) (reportContextSensitivity [this parser dfa start-index stop-index prediction configs] - (println parser dfa start-index stop-index prediction configs)) + ;; TODO + (println "report context sensitivity: " parser dfa start-index stop-index prediction configs)) (syntaxError [this recognizer offending-symbol line char message error] (let [report {:symbol (str offending-symbol) :row line @@ -85,11 +91,15 @@ (.addErrorListener listener)) rule-names (. parser (getRuleNames)) tree (. parser (code))] + ;(println @(:reports listener)) (if (and (not (empty? @(:reports listener))) (:total options)) (hiccup tree rule-names (:tags hidden) (:literals hidden)) @(:reports listener)))) +;(time (parse (slurp "test/parcera/test/core.cljc") :total true)) ;(time (parse (slurp "test/parcera/test/core.cljc"))) + +;(time (parse "(hello @michael \"pink/this will work)" :total true)) ;(time (parse "(hello @michael \"pink/this will work)")) From ad4e3a9a6512ee48865f2e7ab743f266ac2e07a0 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sat, 2 Nov 2019 18:09:10 +0100 Subject: [PATCH 048/128] cosmetics --- src/parcera/experimental.cljc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 05ac574..05ae9bc 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -92,8 +92,7 @@ rule-names (. parser (getRuleNames)) tree (. parser (code))] ;(println @(:reports listener)) - (if (and (not (empty? @(:reports listener))) - (:total options)) + (if (or (empty? @(:reports listener)) (:total options)) (hiccup tree rule-names (:tags hidden) (:literals hidden)) @(:reports listener)))) @@ -102,4 +101,4 @@ ;(time (parse (slurp "test/parcera/test/core.cljc"))) ;(time (parse "(hello @michael \"pink/this will work)" :total true)) -;(time (parse "(hello @michael \"pink/this will work)")) +;(time (parse "(hello @michael pink/this will work)")) From cd1db883b6819068248ef181d947dc1143de435a Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sat, 2 Nov 2019 18:16:17 +0100 Subject: [PATCH 049/128] recognizer remove comments added --- src/parcera/experimental.cljc | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 05ae9bc..d8916e5 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -21,12 +21,12 @@ ;; TODO (println "report context sensitivity: " parser dfa start-index stop-index prediction configs)) (syntaxError [this recognizer offending-symbol line char message error] - (let [report {:symbol (str offending-symbol) - :row line - :column char - :recognizer (.toString recognizer) - :message message - :error (str error)}] + ;; recognizer is either clojureParser or clojureLexer + (let [report {:symbol (str offending-symbol) + :row line + :column char + :message message + :error error}] (vswap! reports conj report)))) (def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} @@ -82,8 +82,9 @@ listener (->ParseFailure (volatile! ())) chars (CharStreams/fromString input) lexer (doto (new clojureLexer chars) - (.removeErrorListeners) - (.addErrorListener listener)) + (.removeErrorListeners)) + ;; todo: how to handle lexer errors ? + ;(.addErrorListener listener)) tokens (new CommonTokenStream lexer) parser (doto (new clojureParser tokens) (.setBuildParseTree true) @@ -101,4 +102,5 @@ ;(time (parse (slurp "test/parcera/test/core.cljc"))) ;(time (parse "(hello @michael \"pink/this will work)" :total true)) +;(time (parse "(hello @michael \"pink/this will work)")) ;(time (parse "(hello @michael pink/this will work)")) From 92b5a094bff35e69d32d50692221211578b5ab13 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sat, 2 Nov 2019 18:29:54 +0100 Subject: [PATCH 050/128] added reverse stack for error reporting --- src/parcera/experimental.cljc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index d8916e5..ae45185 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -26,7 +26,9 @@ :row line :column char :message message - :error error}] + :error error + :stack (when (instance? Parser recognizer) + (map keyword (reverse (.getRuleInvocationStack ^Parser recognizer))))}] (vswap! reports conj report)))) (def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} @@ -34,7 +36,6 @@ "`" "'" "~@" "@" "#(" "#'" "#_" "#?" "#?@" "##"}}) -;; todo: mute antlr default error listener ;; todo: identify parsing errors in the tree (defn- info "extract the match meta data information from the ast node" From f5fa3ed6f6cd7634d2df252f280c7d7fa1ae2f45 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sat, 2 Nov 2019 18:33:31 +0100 Subject: [PATCH 051/128] cosmetics --- src/parcera/experimental.cljc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index ae45185..7d732db 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -3,6 +3,7 @@ (java.util ArrayList) (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext Token ANTLRErrorListener Parser))) + ;; A custom Error Listener to avoid Antlr printing the errors on the terminal ;; by default. This is also useful to mimic Instaparse :total parse mechanism ;; such that if we get an error, we can report it as the result instead @@ -31,6 +32,7 @@ (map keyword (reverse (.getRuleInvocationStack ^Parser recognizer))))}] (vswap! reports conj report)))) + (def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" "`" "'" "~@" "@" "#(" "#'" "#_" "#?" "#?@" "##"}}) From 896f8a4e38cda6533824d54e66df646711340fd8 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sat, 2 Nov 2019 18:50:22 +0100 Subject: [PATCH 052/128] fix: dont include keys on nil --- src/parcera/experimental.cljc | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 7d732db..81b09c7 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -1,7 +1,8 @@ (ns parcera.experimental (:import (parcera.antlr clojureParser clojureLexer clojureListener) (java.util ArrayList) - (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext Token ANTLRErrorListener Parser))) + (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext + Token ANTLRErrorListener Parser))) ;; A custom Error Listener to avoid Antlr printing the errors on the terminal @@ -23,13 +24,16 @@ (println "report context sensitivity: " parser dfa start-index stop-index prediction configs)) (syntaxError [this recognizer offending-symbol line char message error] ;; recognizer is either clojureParser or clojureLexer - (let [report {:symbol (str offending-symbol) - :row line - :column char - :message message - :error error - :stack (when (instance? Parser recognizer) - (map keyword (reverse (.getRuleInvocationStack ^Parser recognizer))))}] + (let [report (merge {:row line + :column char + :message message} + (when (instance? Parser recognizer) + {:symbol (str offending-symbol) + :stack (->> (.getRuleInvocationStack ^Parser recognizer) + (reverse) + (map keyword))}) + (when (some? error) + {:error error}))] (vswap! reports conj report)))) From e62ad895977dc38b6bd98b13a23198604b5d88bf Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 3 Nov 2019 21:17:20 +0100 Subject: [PATCH 053/128] cosmetic changes --- src/parcera/experimental.cljc | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 81b09c7..d6960ff 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -58,19 +58,20 @@ "transform the AST into a `hiccup-like` data structure. This function doesnt return a vectors because they are - 100 times slower for this use case compared to `cons`" - [ast rule-names hide-tags hide-literals] - (if (and (instance? ParserRuleContext ast) - ;; mainly for consistency with Js implementation - (not-empty (.-children ast))) - (let [rule (keyword (aget rule-names (.getRuleIndex ast))) - hiccup-child (fn [child] (hiccup child rule-names hide-tags hide-literals))] + 100 times slower for this use case compared to `cons` cells" + [tree rule-names hide-tags hide-literals] + (if (instance? ParserRuleContext tree) + (let [rule (keyword (aget rule-names (.getRuleIndex tree))) + children-ast (for [child (.-children tree) + :let [child-ast (hiccup child rule-names hide-tags hide-literals)] + :when (not (nil? child-ast))] + child-ast) + ast (if (contains? hide-tags rule) + (apply concat children-ast) + (cons rule children-ast))] ;; attach meta data ... ala instaparse - (with-meta (if (contains? hide-tags rule) - (mapcat hiccup-child (.-children ast)) - (cons rule (remove nil? (map hiccup-child (.-children ast))))) - (info ast))) - (let [text (. ast (toString))] + (with-meta ast (info tree))) + (let [text (str tree)] (if (contains? hide-literals text) nil text)))) From 4d8b9e573aa0a07c034d5be7841ada921902288a Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 3 Nov 2019 21:56:12 +0100 Subject: [PATCH 054/128] fix: check for errors in the parsed tree fix: return ParseFailure without volatile --- src/parcera/experimental.cljc | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index d6960ff..80c7131 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -2,7 +2,8 @@ (:import (parcera.antlr clojureParser clojureLexer clojureListener) (java.util ArrayList) (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext - Token ANTLRErrorListener Parser))) + Token ANTLRErrorListener Parser) + (org.antlr.v4.runtime.tree ErrorNode))) ;; A custom Error Listener to avoid Antlr printing the errors on the terminal @@ -42,7 +43,6 @@ "`" "'" "~@" "@" "#(" "#'" "#_" "#?" "#?@" "##"}}) -;; todo: identify parsing errors in the tree (defn- info "extract the match meta data information from the ast node" [^ParserRuleContext ast] @@ -60,7 +60,8 @@ This function doesnt return a vectors because they are 100 times slower for this use case compared to `cons` cells" [tree rule-names hide-tags hide-literals] - (if (instance? ParserRuleContext tree) + (cond + (instance? ParserRuleContext tree) (let [rule (keyword (aget rule-names (.getRuleIndex tree))) children-ast (for [child (.-children tree) :let [child-ast (hiccup child rule-names hide-tags hide-literals)] @@ -71,6 +72,15 @@ (cons rule children-ast))] ;; attach meta data ... ala instaparse (with-meta ast (info tree))) + + (instance? ErrorNode tree) + (let [token (.-symbol tree) + ;; error metadata + info {::start {:row (.getLine token) + :column (.getCharPositionInLine token)}}] + (with-meta (list ::failure (str tree)) info)) + + :else (let [text (str tree)] (if (contains? hide-literals text) nil text)))) @@ -103,12 +113,14 @@ ;(println @(:reports listener)) (if (or (empty? @(:reports listener)) (:total options)) (hiccup tree rule-names (:tags hidden) (:literals hidden)) - @(:reports listener)))) + ;; hide the volatile to avoid exposing mutable memory ;) + (->ParseFailure @(:reports listener))))) ;(time (parse (slurp "test/parcera/test/core.cljc") :total true)) ;(time (parse (slurp "test/parcera/test/core.cljc"))) ;(time (parse "(hello @michael \"pink/this will work)" :total true)) +;(time (parse "(hello @michael pink/this will work)" :total true)) ;(time (parse "(hello @michael \"pink/this will work)")) ;(time (parse "(hello @michael pink/this will work)")) From 486de22f1413a507b14c791657e162ab45d7843c Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 3 Nov 2019 22:15:33 +0100 Subject: [PATCH 055/128] fix: wrap '::' in parser rule for consistency --- resources/clojure.g4 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/resources/clojure.g4 b/resources/clojure.g4 index 06cd857..9872607 100644 --- a/resources/clojure.g4 +++ b/resources/clojure.g4 @@ -91,7 +91,9 @@ regex: '#' STRING; set: '#{' form* '}'; -namespaced_map: '#' ( keyword | '::') map; +namespaced_map: '#' ( keyword | auto_resolve) map; + +auto_resolve: '::'; var_quote: '#\'' symbol; From c384d8e2d68facb358ffc77354b9a32a6b8d4e5f Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 3 Nov 2019 22:33:02 +0100 Subject: [PATCH 056/128] fix: : and :: added to ignored literals --- src/parcera/experimental.cljc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 80c7131..71183d8 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -39,8 +39,8 @@ (def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} - :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" - "`" "'" "~@" "@" "#(" "#'" "#_" "#?" "#?@" "##"}}) + :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" "`" "'" + "~@" "@" "#(" "#'" "#_" "#?" "#?@" "##" ":" "::"}}) (defn- info From 9165f4ed0df122d4e9f3169ff11f7269ec4f2ecc Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 3 Nov 2019 23:17:03 +0100 Subject: [PATCH 057/128] refactor: experimental into protocols and java implementation --- src/parcera/antlr/java.clj | 32 +++++++++++++++++++++++++++++++ src/parcera/antlr/protocols.cljc | 21 ++++++++++++++++++++ src/parcera/experimental.cljc | 33 ++++++++++++++++---------------- 3 files changed, 70 insertions(+), 16 deletions(-) create mode 100644 src/parcera/antlr/java.clj create mode 100644 src/parcera/antlr/protocols.cljc diff --git a/src/parcera/antlr/java.clj b/src/parcera/antlr/java.clj new file mode 100644 index 0000000..82a2c0f --- /dev/null +++ b/src/parcera/antlr/java.clj @@ -0,0 +1,32 @@ +(ns parcera.antlr.java + (:require [parcera.antlr.protocols :as antlr]) + (:import (parcera.antlr clojureParser) + (org.antlr.v4.runtime ParserRuleContext Token) + (org.antlr.v4.runtime.tree ErrorNodeImpl))) + +(set! *warn-on-reflection* true) + + +(extend-type ParserRuleContext + antlr/ParserRule + (children [^ParserRuleContext this] (.-children this)) + (rule-index [^ParserRuleContext this] (.getRuleIndex this)) + (start [^ParserRuleContext this] (.getStart this)) + (end [^ParserRuleContext this] (.getStop this))) + + +(extend-type ErrorNodeImpl + antlr/ErrorNode + (token [^ErrorNodeImpl this] (.-symbol this))) + + +(extend-type Token + antlr/Token + (row [^Token this] (.getLine this)) + (column [^Token this] (.getCharPositionInLine this))) + + +(extend-type clojureParser + antlr/AntlrParser + (rules [^clojureParser this] (vec (.getRuleNames this))) + (tree [^clojureParser this] (. this (code)))) diff --git a/src/parcera/antlr/protocols.cljc b/src/parcera/antlr/protocols.cljc new file mode 100644 index 0000000..6010d0d --- /dev/null +++ b/src/parcera/antlr/protocols.cljc @@ -0,0 +1,21 @@ +(ns parcera.antlr.protocols + (:import (org.antlr.v4.runtime ANTLRErrorListener Parser))) + + +(defprotocol AntlrParser + (rules [this]) + (tree [this])) + +(defprotocol ParserRule + (children [this]) + (rule-index [this]) + (start [this]) + (end [this])) + + +(defprotocol Token + (row [this]) + (column [this])) + +(defprotocol ErrorNode + (token [this])) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 71183d8..6abd4c1 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -1,4 +1,5 @@ (ns parcera.experimental + (:require [parcera.antlr.protocols :as antlr]) (:import (parcera.antlr clojureParser clojureLexer clojureListener) (java.util ArrayList) (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext @@ -45,13 +46,13 @@ (defn- info "extract the match meta data information from the ast node" - [^ParserRuleContext ast] - (let [start (.getStart ast) - end (.getStop ast)] - {::start {:row (.getLine start) - :column (.getCharPositionInLine start)} - ::end {:row (.getLine end) - :column (.getCharPositionInLine end)}})) + [ast] + (let [start (antlr/start ast) + end (antlr/end ast)] + {::start {:row (antlr/row start) + :column (antlr/column start)} + ::end {:row (antlr/row end) + :column (antlr/column end)}})) (defn- hiccup @@ -61,9 +62,9 @@ 100 times slower for this use case compared to `cons` cells" [tree rule-names hide-tags hide-literals] (cond - (instance? ParserRuleContext tree) - (let [rule (keyword (aget rule-names (.getRuleIndex tree))) - children-ast (for [child (.-children tree) + (satisfies? antlr/ParserRule tree) + (let [rule (keyword (get rule-names (antlr/rule-index tree))) + children-ast (for [child (antlr/children tree) :let [child-ast (hiccup child rule-names hide-tags hide-literals)] :when (not (nil? child-ast))] child-ast) @@ -73,11 +74,11 @@ ;; attach meta data ... ala instaparse (with-meta ast (info tree))) - (instance? ErrorNode tree) - (let [token (.-symbol tree) + (satisfies? antlr/ErrorNode tree) + (let [token (antlr/token tree) ;; error metadata - info {::start {:row (.getLine token) - :column (.getCharPositionInLine token)}}] + info {::start {:row (antlr/row token) + :column (antlr/column token)}}] (with-meta (list ::failure (str tree)) info)) :else @@ -108,8 +109,8 @@ (.setBuildParseTree true) (.removeErrorListeners) (.addErrorListener listener)) - rule-names (. parser (getRuleNames)) - tree (. parser (code))] + rule-names (antlr/rules parser) + tree (antlr/tree parser)] ;(println @(:reports listener)) (if (or (empty? @(:reports listener)) (:total options)) (hiccup tree rule-names (:tags hidden) (:literals hidden)) From d8a65317a88bc6c6339597bd54feb311fee74b1a Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 3 Nov 2019 23:18:18 +0100 Subject: [PATCH 058/128] cosmetics --- src/parcera/antlr/protocols.cljc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/parcera/antlr/protocols.cljc b/src/parcera/antlr/protocols.cljc index 6010d0d..2a7eb32 100644 --- a/src/parcera/antlr/protocols.cljc +++ b/src/parcera/antlr/protocols.cljc @@ -1,4 +1,7 @@ (ns parcera.antlr.protocols + "These protocols are a cheat: I use them to be able to dispatch + to both Java and JavaScript parser implementations without the + common code having to know about it" (:import (org.antlr.v4.runtime ANTLRErrorListener Parser))) From 6e6e7ce3da3ecab55fd616a002f8f87fe1fcddbd Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 3 Nov 2019 23:23:59 +0100 Subject: [PATCH 059/128] moved parser to its own namespace --- src/parcera/antlr/java.clj | 17 +++++++++++++++-- src/parcera/experimental.cljc | 15 +++------------ 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/parcera/antlr/java.clj b/src/parcera/antlr/java.clj index 82a2c0f..7244ffb 100644 --- a/src/parcera/antlr/java.clj +++ b/src/parcera/antlr/java.clj @@ -1,7 +1,7 @@ (ns parcera.antlr.java (:require [parcera.antlr.protocols :as antlr]) - (:import (parcera.antlr clojureParser) - (org.antlr.v4.runtime ParserRuleContext Token) + (:import (parcera.antlr clojureParser clojureLexer) + (org.antlr.v4.runtime ParserRuleContext Token CommonTokenStream CharStreams) (org.antlr.v4.runtime.tree ErrorNodeImpl))) (set! *warn-on-reflection* true) @@ -30,3 +30,16 @@ antlr/AntlrParser (rules [^clojureParser this] (vec (.getRuleNames this))) (tree [^clojureParser this] (. this (code)))) + +(defn parser + [input listener] + (let [chars (CharStreams/fromString input) + lexer (doto (new clojureLexer chars) + (.removeErrorListeners)) + ;; todo: how to handle lexer errors ? + ;(.addErrorListener listener)) + tokens (new CommonTokenStream lexer)] + (doto (new clojureParser tokens) + (.setBuildParseTree true) + (.removeErrorListeners) + (.addErrorListener listener)))) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 6abd4c1..aaad485 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -1,5 +1,6 @@ (ns parcera.experimental - (:require [parcera.antlr.protocols :as antlr]) + (:require [parcera.antlr.protocols :as antlr] + [parcera.antlr.java :as platform]) (:import (parcera.antlr clojureParser clojureLexer clojureListener) (java.util ArrayList) (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext @@ -99,19 +100,9 @@ [input & {:as options}] (let [hidden (unhide options) listener (->ParseFailure (volatile! ())) - chars (CharStreams/fromString input) - lexer (doto (new clojureLexer chars) - (.removeErrorListeners)) - ;; todo: how to handle lexer errors ? - ;(.addErrorListener listener)) - tokens (new CommonTokenStream lexer) - parser (doto (new clojureParser tokens) - (.setBuildParseTree true) - (.removeErrorListeners) - (.addErrorListener listener)) + parser (platform/parser input listener) rule-names (antlr/rules parser) tree (antlr/tree parser)] - ;(println @(:reports listener)) (if (or (empty? @(:reports listener)) (:total options)) (hiccup tree rule-names (:tags hidden) (:literals hidden)) ;; hide the volatile to avoid exposing mutable memory ;) From 8429c8f123ae5c646e2486fccecb6cf9ab25fbf8 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 3 Nov 2019 23:25:13 +0100 Subject: [PATCH 060/128] removed unused imports --- src/parcera/experimental.cljc | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index aaad485..639da90 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -1,11 +1,7 @@ (ns parcera.experimental (:require [parcera.antlr.protocols :as antlr] [parcera.antlr.java :as platform]) - (:import (parcera.antlr clojureParser clojureLexer clojureListener) - (java.util ArrayList) - (org.antlr.v4.runtime CharStreams CommonTokenStream ParserRuleContext - Token ANTLRErrorListener Parser) - (org.antlr.v4.runtime.tree ErrorNode))) + (:import (org.antlr.v4.runtime ANTLRErrorListener Parser))) ;; A custom Error Listener to avoid Antlr printing the errors on the terminal From 66d1408622e6cb25376fdb206880d3297e589ab2 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 21:41:45 +0100 Subject: [PATCH 061/128] moved antlr4 runtime to provided dependencies added build js as a foreign lib --- project.clj | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/project.clj b/project.clj index 7838b02..753d7ed 100644 --- a/project.clj +++ b/project.clj @@ -6,21 +6,29 @@ :dependencies [[org.clojure/clojure "1.10.1"] [instaparse/instaparse "1.4.10"]] :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark - [org.clojure/test.check "0.10.0"] - [org.antlr/antlr4-runtime "4.7.1"]] + [org.clojure/test.check "0.10.0"]] :plugins [[jonase/eastwood "0.3.5"] [lein-cljsbuild "1.1.7"]] :java-source-paths ["build/java"] + ;; todo: does this even work ? + :foreign-libs [{:file "build/js/parcera/antlr/clojureLexer.js" + :provides ["parcera.antlr.clojureLexer"] + :module-type :commonjs} + {:file "build/js/parcera/antlr/clojureParser.js" + :provides ["parcera.antlr.clojureParser"] + :module-type :commonjs}] :cljsbuild {:builds [{:id "dev" :source-paths ["src" "test"] :compiler {:main parcera.test-runner :output-to "target/out/tests.js" :target :nodejs + :infer-externs true :optimizations :none}}] :test-commands {"test" ["node" "target/out/tests.js"]}}} - :provided {:dependencies [[org.clojure/clojurescript "1.10.520"]]}} + :provided {:dependencies [[org.clojure/clojurescript "1.10.520"] + [org.antlr/antlr4-runtime "4.7.1"]]}} :test-selectors {:default (fn [m] (not (some #{:benchmark} (keys m)))) :benchmark :benchmark} :deploy-repositories [["clojars" {:url "https://clojars.org/repo" From e26552507829a33bd50034cf75fd45624aafbd7c Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 21:42:11 +0100 Subject: [PATCH 062/128] mimic java implementation on js --- index.js | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/index.js b/index.js index 1c4b31a..df31056 100644 --- a/index.js +++ b/index.js @@ -1,7 +1,6 @@ const antlr4 = require('antlr4/index') -const {clojureLexer} = require('./build/js/clojureLexer') -const {clojureParser} = require('./build/js/clojureParser') -const {clojureListener} = require('./build/js/clojureListener') +const {clojureLexer} = require('./build/js/parcera/antlr/clojureLexer') +const {clojureParser} = require('./build/js/parcera/antlr/clojureParser') /** * Takes an AST tree; the result of a parser walk and returns @@ -16,6 +15,7 @@ function treeSeq(ast, ruleNames) { // parser rules always have childrens if (ast.children !== undefined) { // we are inside a parser rule; therefore we add the rule name to the result + console.log(ast instanceof antlr4.ParserRuleContext) result.push(ruleNames[ast.ruleIndex]) result.push.apply(result, ast.children.map((child) => treeSeq(child, ruleNames))) return result @@ -27,12 +27,15 @@ function treeSeq(ast, ruleNames) { } const input = `(john :SHOUTS "hello" @michael pink/this will work)` -const chars = new antlr4.InputStream(input) +const chars = new antlr4.CharStreams.fromString(input) const lexer = new clojureLexer(chars) +lexer.removeErrorListeners() const tokens = new antlr4.CommonTokenStream(lexer) const parser = new clojureParser(tokens) const ruleNames = parser.ruleNames parser.buildParseTrees = true +parser.removeErrorListeners() +// parser.addErrorListener() const tree = parser.code() console.log(JSON.stringify(treeSeq(tree, ruleNames), null, 2)) From bbc8ac7a1b69333ef0a02b67e199fbbb366c73a5 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 21:42:36 +0100 Subject: [PATCH 063/128] javascript namespace created to mirror java implementation --- src/parcera/antlr/javascript.cljs | 52 +++++++++++++++++++++++++++++++ src/parcera/experimental.cljc | 2 +- 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 src/parcera/antlr/javascript.cljs diff --git a/src/parcera/antlr/javascript.cljs b/src/parcera/antlr/javascript.cljs new file mode 100644 index 0000000..f8266a7 --- /dev/null +++ b/src/parcera/antlr/javascript.cljs @@ -0,0 +1,52 @@ +(ns parcera.antlr.javascript + ;; TODO: does this even works ? + ;; TODO: translate the index.js file to Clojurescript 😥 + ;; TODO: how do I get a Clojurescript repl ... I am blind without it + ;; am I suppose to code the whole thing and hope that it works by running + ;; the tests 🤔 ... I can feel the pain of other languages 😭 + (:require [parcera.antlr.protocols :as antlr] + [antlr4.index :as runtime :refer [ParserRuleContext]] + [parcera.antlr.clojureLexer :as clojureLexer] + [parcera.antlr.clojureParser :as clojureParser])) + + +(set! *warn-on-infer* true) + + +(extend-type ParserRuleContext + antlr/ParserRule + (children [^ParserRuleContext this] (.-children this)) + (rule-index [^ParserRuleContext this] (.getRuleIndex this)) + (start [^ParserRuleContext this] (.getStart this)) + (end [^ParserRuleContext this] (.getStop this))) + + +(extend-type ErrorNodeImpl + antlr/ErrorNode + (token [^ErrorNodeImpl this] (.-symbol this))) + + +(extend-type Token + antlr/Token + (row [^Token this] (.getLine this)) + (column [^Token this] (.getCharPositionInLine this))) + + +(extend-type clojureParser + antlr/AntlrParser + (rules [^clojureParser this] (vec (.getRuleNames this))) + (tree [^clojureParser this] (. this (code)))) + + +(defn parser + [input listener] + (let [chars (CharStreams/fromString input) + lexer (doto (new clojureLexer chars) + (.removeErrorListeners)) + ;; todo: how to handle lexer errors ? + ;(.addErrorListener listener)) + tokens (new CommonTokenStream lexer)] + (doto (new clojureParser tokens) + (.setBuildParseTree true) + (.removeErrorListeners) + (.addErrorListener listener)))) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 639da90..b158ad4 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -92,7 +92,7 @@ default-hidden)) -(defn parse +(defn clojure [input & {:as options}] (let [hidden (unhide options) listener (->ParseFailure (volatile! ())) From 055ef4789111c72edcf590b5bb57cc4238059cb5 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 21:43:12 +0100 Subject: [PATCH 064/128] ignore out dir --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index a152311..6110096 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ pom.xml.asc /build/ /yarn-error.log /node_modules/ +/out/ From 0e3da457bebb60ba494da78311b98349573f2f2c Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 21:51:08 +0100 Subject: [PATCH 065/128] moved code function to experimental namespace hide listener from parse function for the time being --- src/parcera/antlr/java.clj | 54 ++++++++--- src/parcera/experimental.cljc | 165 +++++++++++++++++++++++++--------- 2 files changed, 166 insertions(+), 53 deletions(-) diff --git a/src/parcera/antlr/java.clj b/src/parcera/antlr/java.clj index 7244ffb..ef2f8c9 100644 --- a/src/parcera/antlr/java.clj +++ b/src/parcera/antlr/java.clj @@ -1,12 +1,44 @@ (ns parcera.antlr.java (:require [parcera.antlr.protocols :as antlr]) (:import (parcera.antlr clojureParser clojureLexer) - (org.antlr.v4.runtime ParserRuleContext Token CommonTokenStream CharStreams) + (org.antlr.v4.runtime ParserRuleContext Token CommonTokenStream CharStreams ANTLRErrorListener Parser) (org.antlr.v4.runtime.tree ErrorNodeImpl))) (set! *warn-on-reflection* true) +;; A custom Error Listener to avoid Antlr printing the errors on the terminal +;; by default. This is also useful to mimic Instaparse :total parse mechanism +;; such that if we get an error, we can report it as the result instead +(defrecord ParseFailure [reports] + ANTLRErrorListener + ;; I am not sure how to use these methods. If you came here wondering why + ;; is this being printed, please open an issue so that we can all benefit + ;; from your findings ;) + (reportAmbiguity [this parser dfa start-index stop-index exact ambig-alts configs] + ;; TODO + (println "report ambiguity: " parser dfa start-index stop-index exact ambig-alts configs)) + (reportAttemptingFullContext [this parser dfa start-index stop-index conflicting-alts configs] + ;; TODO + (println "report attempting full context: " parser dfa start-index stop-index conflicting-alts configs)) + (reportContextSensitivity [this parser dfa start-index stop-index prediction configs] + ;; TODO + (println "report context sensitivity: " parser dfa start-index stop-index prediction configs)) + (syntaxError [this recognizer offending-symbol line char message error] + ;; recognizer is either clojureParser or clojureLexer + (let [report (merge {:row line + :column char + :message message} + (when (instance? Parser recognizer) + {:symbol (str offending-symbol) + :stack (->> (.getRuleInvocationStack ^Parser recognizer) + (reverse) + (map keyword))}) + (when (some? error) + {:error error}))] + (vswap! reports conj report)))) + + (extend-type ParserRuleContext antlr/ParserRule (children [^ParserRuleContext this] (.-children this)) @@ -32,14 +64,16 @@ (tree [^clojureParser this] (. this (code)))) (defn parser - [input listener] - (let [chars (CharStreams/fromString input) - lexer (doto (new clojureLexer chars) - (.removeErrorListeners)) + [input] + (let [chars (CharStreams/fromString input) + lexer (doto (new clojureLexer chars) + (.removeErrorListeners)) ;; todo: how to handle lexer errors ? ;(.addErrorListener listener)) - tokens (new CommonTokenStream lexer)] - (doto (new clojureParser tokens) - (.setBuildParseTree true) - (.removeErrorListeners) - (.addErrorListener listener)))) + tokens (new CommonTokenStream lexer) + listener (->ParseFailure (volatile! ())) + parser (doto (new clojureParser tokens) + (.setBuildParseTree true) + (.removeErrorListeners) + (.addErrorListener listener))] + {:parser parser :listener listener})) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index b158ad4..d910e4a 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -1,39 +1,8 @@ (ns parcera.experimental (:require [parcera.antlr.protocols :as antlr] [parcera.antlr.java :as platform]) - (:import (org.antlr.v4.runtime ANTLRErrorListener Parser))) - - -;; A custom Error Listener to avoid Antlr printing the errors on the terminal -;; by default. This is also useful to mimic Instaparse :total parse mechanism -;; such that if we get an error, we can report it as the result instead -(defrecord ParseFailure [reports] - ANTLRErrorListener - ;; I am not sure how to use these methods. If you came here wondering why - ;; is this being printed, please open an issue so that we can all benefit - ;; from your findings ;) - (reportAmbiguity [this parser dfa start-index stop-index exact ambig-alts configs] - ;; TODO - (println "report ambiguity: " parser dfa start-index stop-index exact ambig-alts configs)) - (reportAttemptingFullContext [this parser dfa start-index stop-index conflicting-alts configs] - ;; TODO - (println "report attempting full context: " parser dfa start-index stop-index conflicting-alts configs)) - (reportContextSensitivity [this parser dfa start-index stop-index prediction configs] - ;; TODO - (println "report context sensitivity: " parser dfa start-index stop-index prediction configs)) - (syntaxError [this recognizer offending-symbol line char message error] - ;; recognizer is either clojureParser or clojureLexer - (let [report (merge {:row line - :column char - :message message} - (when (instance? Parser recognizer) - {:symbol (str offending-symbol) - :stack (->> (.getRuleInvocationStack ^Parser recognizer) - (reverse) - (map keyword))}) - (when (some? error) - {:error error}))] - (vswap! reports conj report)))) + #?(:cljs (:import goog.string.StringBuffer))) + (def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} @@ -95,20 +64,130 @@ (defn clojure [input & {:as options}] (let [hidden (unhide options) - listener (->ParseFailure (volatile! ())) - parser (platform/parser input listener) + {:keys [parser listener]} (platform/parser input) rule-names (antlr/rules parser) tree (antlr/tree parser)] (if (or (empty? @(:reports listener)) (:total options)) (hiccup tree rule-names (:tags hidden) (:literals hidden)) - ;; hide the volatile to avoid exposing mutable memory ;) - (->ParseFailure @(:reports listener))))) + @(:reports listener)))) ;; todo: expose a proper error record ? + + +(defn- code* + "internal function used to imperatively build up the code from the provided + AST as Clojure's str would be too slow" + [ast #?(:clj ^StringBuilder string-builder + :cljs ^StringBuffer string-builder)] + (case (first ast) + :code + (doseq [child (rest ast)] + (code* child string-builder)) + + :list + (do (. string-builder (append "(")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append ")"))) + + :vector + (do (. string-builder (append "[")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append "]"))) + + :namespaced-map + (do (. string-builder (append "#")) + (doseq [child (rest ast)] (code* child string-builder))) + + :map + (do (. string-builder (append "{")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append "}"))) + + :set + (do (. string-builder (append "#{")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append "}"))) + + (:number :whitespace :symbolic :auto-resolve :symbol :simple-keyword + :macro-keyword :character :string :regex) + (. string-builder (append (second ast))) + + :metadata + (do (doseq [child (rest (butlast ast))] (code* child string-builder)) + (code* (last ast) string-builder)) + + :metadata-entry + (doseq [child (rest ast)] + (. string-builder (append "^")) + (code* child string-builder)) + + :quote + (do (. string-builder (append "'")) + (doseq [child (rest ast)] (code* child string-builder))) + + :var-quote + (do (. string-builder (append "#'")) + (code* (second ast) string-builder)) + :discard + (do (. string-builder (append "#_")) + (doseq [child (rest ast)] (code* child string-builder))) -;(time (parse (slurp "test/parcera/test/core.cljc") :total true)) -;(time (parse (slurp "test/parcera/test/core.cljc"))) + :tag + (do (. string-builder (append "#")) + (doseq [child (rest ast)] (code* child string-builder))) -;(time (parse "(hello @michael \"pink/this will work)" :total true)) -;(time (parse "(hello @michael pink/this will work)" :total true)) -;(time (parse "(hello @michael \"pink/this will work)")) -;(time (parse "(hello @michael pink/this will work)")) + :backtick + (do (. string-builder (append "`")) + (doseq [child (rest ast)] (code* child string-builder))) + + :unquote + (do (. string-builder (append "~")) + (doseq [child (rest ast)] (code* child string-builder))) + + :unquote-splicing + (do (. string-builder (append "~@")) + (doseq [child (rest ast)] (code* child string-builder))) + + :conditional + (do (. string-builder (append "#?(")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append ")"))) + + :conditional-splicing + (do (. string-builder (append "#?@(")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append ")"))) + + :deref + (do (. string-builder (append "@")) + (doseq [child (rest ast)] (code* child string-builder))) + + :function + (do (. string-builder (append "#(")) + (doseq [child (rest ast)] (code* child string-builder)) + (. string-builder (append ")"))))) + + +(defn code + "Transforms your AST back into code + + ast: The nested sequence of [:keyword & content] which MUST follow the + same structure as the result of `(parcera/clojure input-string)` + + Returns a string representation of the provided AST + + In general (= input (parcera/code (parcera/clojure input)))" + [ast] + (let [string-builder #?(:clj (new StringBuilder) + :cljs (new StringBuffer))] + (code* ast string-builder) + (. string-builder (toString)))) + +; Successful parse. +; Profile: {:create-node 384, :push-full-listener 2, :push-stack 384, +; :push-listener 382, :push-result 227, :push-message 227 } +; "Elapsed time: 47.25084 msecs" +#_(time (clojure (str '(ns parcera.core + (:require [instaparse.core :as instaparse] + [clojure.data :as data] + [clojure.string :as str]))) + :trace true)) From 2bcbf70792e63c803b7d7b15aee15f7941e0d2fd Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 21:57:35 +0100 Subject: [PATCH 066/128] fix: replaced - with _ fix: keywords now need to append : and :: --- src/parcera/experimental.cljc | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index d910e4a..27e0ba1 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -92,7 +92,7 @@ (doseq [child (rest ast)] (code* child string-builder)) (. string-builder (append "]"))) - :namespaced-map + :namespaced_map (do (. string-builder (append "#")) (doseq [child (rest ast)] (code* child string-builder))) @@ -106,15 +106,25 @@ (doseq [child (rest ast)] (code* child string-builder)) (. string-builder (append "}"))) - (:number :whitespace :symbolic :auto-resolve :symbol :simple-keyword - :macro-keyword :character :string :regex) + (:number :whitespace :symbolic :symbol :character :string :regex) (. string-builder (append (second ast))) + :auto_resolve + (. string-builder (append "::")) + + :simple_keyword + (do (. string-builder (append ":")) + (. string-builder (append (second ast)))) + + :macro_keyword + (do (. string-builder (append "::")) + (. string-builder (append (second ast)))) + :metadata (do (doseq [child (rest (butlast ast))] (code* child string-builder)) (code* (last ast) string-builder)) - :metadata-entry + :metadata_entry (doseq [child (rest ast)] (. string-builder (append "^")) (code* child string-builder)) @@ -123,7 +133,7 @@ (do (. string-builder (append "'")) (doseq [child (rest ast)] (code* child string-builder))) - :var-quote + :var_quote (do (. string-builder (append "#'")) (code* (second ast) string-builder)) @@ -143,7 +153,7 @@ (do (. string-builder (append "~")) (doseq [child (rest ast)] (code* child string-builder))) - :unquote-splicing + :unquote_splicing (do (. string-builder (append "~@")) (doseq [child (rest ast)] (code* child string-builder))) @@ -152,7 +162,7 @@ (doseq [child (rest ast)] (code* child string-builder)) (. string-builder (append ")"))) - :conditional-splicing + :conditional_splicing (do (. string-builder (append "#?@(")) (doseq [child (rest ast)] (code* child string-builder)) (. string-builder (append ")"))) From 3a6c2659c714052faa3b0a2d1f987a640b8417b0 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 22:01:27 +0100 Subject: [PATCH 067/128] cosmetics --- src/parcera/experimental.cljc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc index 27e0ba1..44b0bee 100644 --- a/src/parcera/experimental.cljc +++ b/src/parcera/experimental.cljc @@ -4,7 +4,6 @@ #?(:cljs (:import goog.string.StringBuffer))) - (def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" "`" "'" "~@" "@" "#(" "#'" "#_" "#?" "#?@" "##" ":" "::"}}) @@ -62,6 +61,13 @@ (defn clojure + "Clojure (antlr4) parser. It can be used as: + - `(parcera/clojure input-string)` + -> returns an AST representation of input-string + + The following options are accepted: + - `:unhide` can be one of `#{:tags :content :all}`. Defaults to `nil` + - `:total` thruthy value to get a parse tree even on failures" [input & {:as options}] (let [hidden (unhide options) {:keys [parser listener]} (platform/parser input) From 249dae06e8325b6f3b8385b8cf6bbbd0cce565ff Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 22:10:15 +0100 Subject: [PATCH 068/128] failure function added to platform for the time being --- src/parcera/antlr/java.clj | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/parcera/antlr/java.clj b/src/parcera/antlr/java.clj index ef2f8c9..3f041f4 100644 --- a/src/parcera/antlr/java.clj +++ b/src/parcera/antlr/java.clj @@ -63,6 +63,7 @@ (rules [^clojureParser this] (vec (.getRuleNames this))) (tree [^clojureParser this] (. this (code)))) + (defn parser [input] (let [chars (CharStreams/fromString input) @@ -77,3 +78,6 @@ (.removeErrorListeners) (.addErrorListener listener))] {:parser parser :listener listener})) + + +(defn failure? [obj] (instance? ParseFailure obj)) From 12c9ee66a930dfc3f340bcadc102c6de3b7a1f32 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 22:11:56 +0100 Subject: [PATCH 069/128] mirro failure? in core experimental renamed to core removed terminals --- src/parcera/core.cljc | 233 ++++++++++++++-------------------- src/parcera/experimental.cljc | 209 ------------------------------ src/parcera/terminals.cljc | 44 ------- 3 files changed, 92 insertions(+), 394 deletions(-) delete mode 100644 src/parcera/experimental.cljc delete mode 100644 src/parcera/terminals.cljc diff --git a/src/parcera/core.cljc b/src/parcera/core.cljc index 83728b7..2cab18a 100644 --- a/src/parcera/core.cljc +++ b/src/parcera/core.cljc @@ -1,148 +1,87 @@ (ns parcera.core - (:require [instaparse.core :as instaparse] - [instaparse.combinators-source :as combi] - [instaparse.cfg :as cfg] - [parcera.terminals :as terminal]) + (:require [parcera.antlr.protocols :as antlr] + [parcera.antlr.java :as platform]) #?(:cljs (:import goog.string.StringBuffer))) -; todo: implement advices from -; http://blog.reverberate.org/2013/09/ll-and-lr-in-context-why-parsing-tools.html -; https://www.loggly.com/blog/regexes-the-bad-better-best/ -; https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/ -; todo: use advices in https://medium.appbase.io/analyzing-20k-github-repositories-af76de21c3fc -; to check if the heuristics are accurate +(def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} + :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" "`" "'" + "~@" "@" "#(" "#'" "#_" "#?" "#?@" "##" ":" "::"}}) -; NOTE: Through my experiments I found out that Instaparse will gladly take the -; first match as long as the grammar is not ambiguous. Therefore I switched the -; unordered OR (|) with an ordered one (/). This of course implies an heuristic -; of knowing which grammar rules are expected to match more often. I use -; Clojure's core as a reference with the following code snippet -#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojure/master/src/clj/clojure/core.clj")] - (time (sort-by second > (frequencies (filter keyword? (flatten (clojure core-content :optimize :memory))))))) -#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc")] - (time (sort-by second > (frequencies (filter keyword? (flatten (clojure core-content :optimize :memory))))))) -; todo: performance of [,\s]*;.*|[,\s]+ for whitespace -(def grammar-rules - "code: form*; - -
: whitespace / literal / collection / reader-macro; - - (* we treat comments the same way as commas *) - whitespace = #'([,\\s]*;.*)?([,\\s]+|$)'; - - (* for parsing purposes we dont consider a Set a collection since it starts - with # -> dispatch macro *) - : list / vector / map; - - list: <'('> form* <')'> ; - - vector: <'['> form* <']'> ; - - map: <'{'> form* <'}'>; - - (* a literal is basically anything that is not a collection, macro or whitespace *) - : ( symbol - / keyword - / string - / number - / character - ); - - : simple-keyword / macro-keyword ; - - : ( unquote - / metadata - / backtick - / quote - / dispatch - / unquote-splicing - / deref - / symbolic - ); - - set: <'#{'> form* <'}'>; - - namespaced-map: <'#'> ( keyword / auto-resolve ) map; - - auto-resolve: '::'; - - metadata: (metadata-entry whitespace?)+ ( symbol - / collection - / tag - / unquote - / unquote-splicing - ); - - metadata-entry: <'^'> ( map / symbol / string / keyword ); - - quote: <'\\''> form; - - backtick: <'`'> form; - - unquote: <#'~(?!@)'> form; - - unquote-splicing: <'~@'> form; - - deref: <'@'> form; - - : function - / regex - / set - / conditional - / conditional-splicing - / namespaced-map - / var-quote - / discard - / tag; - - function: <'#('> form* <')'>; - - var-quote: <'#\\''> symbol; - - discard: <'#_'> form; - - tag: <#'#(?![_?])'> symbol whitespace? (literal / collection); - - conditional: <'#?('> form* <')'>; - - conditional-splicing: <'#?@('> form* <')'>; - - symbolic: #'##(Inf|-Inf|NaN)'") - - -(def grammar-terminals - {:character (combi/regexp terminal/character-pattern) - :string (combi/regexp terminal/string-pattern) - :symbol (combi/regexp terminal/symbol-pattern) - :number (combi/regexp terminal/number-pattern) - :macro-keyword (combi/regexp terminal/macro-keyword) - :simple-keyword (combi/regexp terminal/simple-keyword) - :regex (combi/regexp terminal/regex-pattern)}) - - -(def grammar (merge (cfg/ebnf grammar-rules) grammar-terminals)) - - -(def clojure - "Clojure (instaparse) parser. It can be used as: - - (parcera/clojure input-string) +(defn- info + "extract the match meta data information from the ast node" + [ast] + (let [start (antlr/start ast) + end (antlr/end ast)] + {::start {:row (antlr/row start) + :column (antlr/column start)} + ::end {:row (antlr/row end) + :column (antlr/column end)}})) + + +(defn- hiccup + "transform the AST into a `hiccup-like` data structure. + + This function doesnt return a vectors because they are + 100 times slower for this use case compared to `cons` cells" + [tree rule-names hide-tags hide-literals] + (cond + (satisfies? antlr/ParserRule tree) + (let [rule (keyword (get rule-names (antlr/rule-index tree))) + children-ast (for [child (antlr/children tree) + :let [child-ast (hiccup child rule-names hide-tags hide-literals)] + :when (not (nil? child-ast))] + child-ast) + ast (if (contains? hide-tags rule) + (apply concat children-ast) + (cons rule children-ast))] + ;; attach meta data ... ala instaparse + (with-meta ast (info tree))) + + (satisfies? antlr/ErrorNode tree) + (let [token (antlr/token tree) + ;; error metadata + info {::start {:row (antlr/row token) + :column (antlr/column token)}}] + (with-meta (list ::failure (str tree)) info)) + + :else + (let [text (str tree)] + (if (contains? hide-literals text) nil text)))) + + +(defn- unhide + [options] + (case (:unhide options) + :all (dissoc default-hidden :literals :tags) + :content (dissoc default-hidden :literals) + :tags (dissoc default-hidden :tags) + default-hidden)) + + +(defn clojure + "Clojure (antlr4) parser. It can be used as: + - `(parcera/clojure input-string)` -> returns an AST representation of input-string - - (instaparse/parse parcera/clojure input-string) - -> same as above but more explicit - - (instaparse/parses parcera/clojure input-string) - -> returns a sequence of possible AST representations in case of ambiguity - in input-string - For a description of all possible options, visit Instaparse's official - documentation: https://github.com/Engelberg/instaparse#reference" - (instaparse/parser grammar :start :code)) + The following options are accepted: + - `:unhide` can be one of `#{:tags :content :all}`. Defaults to `nil` + - `:total` thruthy value to get a parse tree even on failures" + [input & {:as options}] + (let [hidden (unhide options) + {:keys [parser listener]} (platform/parser input) + rule-names (antlr/rules parser) + tree (antlr/tree parser)] + (if (or (empty? @(:reports listener)) (:total options)) + (hiccup tree rule-names (:tags hidden) (:literals hidden)) + @(:reports listener)))) +;; todo: expose a proper error record ? (defn- code* "internal function used to imperatively build up the code from the provided AST as Clojure's str would be too slow" - [ast #?(:clj ^StringBuilder string-builder + [ast #?(:clj ^StringBuilder string-builder :cljs ^StringBuffer string-builder)] (case (first ast) :code @@ -159,7 +98,7 @@ (doseq [child (rest ast)] (code* child string-builder)) (. string-builder (append "]"))) - :namespaced-map + :namespaced_map (do (. string-builder (append "#")) (doseq [child (rest ast)] (code* child string-builder))) @@ -173,15 +112,25 @@ (doseq [child (rest ast)] (code* child string-builder)) (. string-builder (append "}"))) - (:number :whitespace :symbolic :auto-resolve :symbol :simple-keyword - :macro-keyword :character :string :regex) + (:number :whitespace :symbolic :symbol :character :string :regex) (. string-builder (append (second ast))) + :auto_resolve + (. string-builder (append "::")) + + :simple_keyword + (do (. string-builder (append ":")) + (. string-builder (append (second ast)))) + + :macro_keyword + (do (. string-builder (append "::")) + (. string-builder (append (second ast)))) + :metadata (do (doseq [child (rest (butlast ast))] (code* child string-builder)) (code* (last ast) string-builder)) - :metadata-entry + :metadata_entry (doseq [child (rest ast)] (. string-builder (append "^")) (code* child string-builder)) @@ -190,7 +139,7 @@ (do (. string-builder (append "'")) (doseq [child (rest ast)] (code* child string-builder))) - :var-quote + :var_quote (do (. string-builder (append "#'")) (code* (second ast) string-builder)) @@ -210,7 +159,7 @@ (do (. string-builder (append "~")) (doseq [child (rest ast)] (code* child string-builder))) - :unquote-splicing + :unquote_splicing (do (. string-builder (append "~@")) (doseq [child (rest ast)] (code* child string-builder))) @@ -219,7 +168,7 @@ (doseq [child (rest ast)] (code* child string-builder)) (. string-builder (append ")"))) - :conditional-splicing + :conditional_splicing (do (. string-builder (append "#?@(")) (doseq [child (rest ast)] (code* child string-builder)) (. string-builder (append ")"))) @@ -259,4 +208,6 @@ [clojure.string :as str]))) :trace true)) -#_(instaparse/disable-tracing!) +;; this is just forwarding for the time +;; ideally we shouldnt need to do it but directly define it here +(defn failure? [obj] (platform/failure? obj)) diff --git a/src/parcera/experimental.cljc b/src/parcera/experimental.cljc deleted file mode 100644 index 44b0bee..0000000 --- a/src/parcera/experimental.cljc +++ /dev/null @@ -1,209 +0,0 @@ -(ns parcera.experimental - (:require [parcera.antlr.protocols :as antlr] - [parcera.antlr.java :as platform]) - #?(:cljs (:import goog.string.StringBuffer))) - - -(def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} - :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" "`" "'" - "~@" "@" "#(" "#'" "#_" "#?" "#?@" "##" ":" "::"}}) - - -(defn- info - "extract the match meta data information from the ast node" - [ast] - (let [start (antlr/start ast) - end (antlr/end ast)] - {::start {:row (antlr/row start) - :column (antlr/column start)} - ::end {:row (antlr/row end) - :column (antlr/column end)}})) - - -(defn- hiccup - "transform the AST into a `hiccup-like` data structure. - - This function doesnt return a vectors because they are - 100 times slower for this use case compared to `cons` cells" - [tree rule-names hide-tags hide-literals] - (cond - (satisfies? antlr/ParserRule tree) - (let [rule (keyword (get rule-names (antlr/rule-index tree))) - children-ast (for [child (antlr/children tree) - :let [child-ast (hiccup child rule-names hide-tags hide-literals)] - :when (not (nil? child-ast))] - child-ast) - ast (if (contains? hide-tags rule) - (apply concat children-ast) - (cons rule children-ast))] - ;; attach meta data ... ala instaparse - (with-meta ast (info tree))) - - (satisfies? antlr/ErrorNode tree) - (let [token (antlr/token tree) - ;; error metadata - info {::start {:row (antlr/row token) - :column (antlr/column token)}}] - (with-meta (list ::failure (str tree)) info)) - - :else - (let [text (str tree)] - (if (contains? hide-literals text) nil text)))) - - -(defn- unhide - [options] - (case (:unhide options) - :all (dissoc default-hidden :literals :tags) - :content (dissoc default-hidden :literals) - :tags (dissoc default-hidden :tags) - default-hidden)) - - -(defn clojure - "Clojure (antlr4) parser. It can be used as: - - `(parcera/clojure input-string)` - -> returns an AST representation of input-string - - The following options are accepted: - - `:unhide` can be one of `#{:tags :content :all}`. Defaults to `nil` - - `:total` thruthy value to get a parse tree even on failures" - [input & {:as options}] - (let [hidden (unhide options) - {:keys [parser listener]} (platform/parser input) - rule-names (antlr/rules parser) - tree (antlr/tree parser)] - (if (or (empty? @(:reports listener)) (:total options)) - (hiccup tree rule-names (:tags hidden) (:literals hidden)) - @(:reports listener)))) ;; todo: expose a proper error record ? - - -(defn- code* - "internal function used to imperatively build up the code from the provided - AST as Clojure's str would be too slow" - [ast #?(:clj ^StringBuilder string-builder - :cljs ^StringBuffer string-builder)] - (case (first ast) - :code - (doseq [child (rest ast)] - (code* child string-builder)) - - :list - (do (. string-builder (append "(")) - (doseq [child (rest ast)] (code* child string-builder)) - (. string-builder (append ")"))) - - :vector - (do (. string-builder (append "[")) - (doseq [child (rest ast)] (code* child string-builder)) - (. string-builder (append "]"))) - - :namespaced_map - (do (. string-builder (append "#")) - (doseq [child (rest ast)] (code* child string-builder))) - - :map - (do (. string-builder (append "{")) - (doseq [child (rest ast)] (code* child string-builder)) - (. string-builder (append "}"))) - - :set - (do (. string-builder (append "#{")) - (doseq [child (rest ast)] (code* child string-builder)) - (. string-builder (append "}"))) - - (:number :whitespace :symbolic :symbol :character :string :regex) - (. string-builder (append (second ast))) - - :auto_resolve - (. string-builder (append "::")) - - :simple_keyword - (do (. string-builder (append ":")) - (. string-builder (append (second ast)))) - - :macro_keyword - (do (. string-builder (append "::")) - (. string-builder (append (second ast)))) - - :metadata - (do (doseq [child (rest (butlast ast))] (code* child string-builder)) - (code* (last ast) string-builder)) - - :metadata_entry - (doseq [child (rest ast)] - (. string-builder (append "^")) - (code* child string-builder)) - - :quote - (do (. string-builder (append "'")) - (doseq [child (rest ast)] (code* child string-builder))) - - :var_quote - (do (. string-builder (append "#'")) - (code* (second ast) string-builder)) - - :discard - (do (. string-builder (append "#_")) - (doseq [child (rest ast)] (code* child string-builder))) - - :tag - (do (. string-builder (append "#")) - (doseq [child (rest ast)] (code* child string-builder))) - - :backtick - (do (. string-builder (append "`")) - (doseq [child (rest ast)] (code* child string-builder))) - - :unquote - (do (. string-builder (append "~")) - (doseq [child (rest ast)] (code* child string-builder))) - - :unquote_splicing - (do (. string-builder (append "~@")) - (doseq [child (rest ast)] (code* child string-builder))) - - :conditional - (do (. string-builder (append "#?(")) - (doseq [child (rest ast)] (code* child string-builder)) - (. string-builder (append ")"))) - - :conditional_splicing - (do (. string-builder (append "#?@(")) - (doseq [child (rest ast)] (code* child string-builder)) - (. string-builder (append ")"))) - - :deref - (do (. string-builder (append "@")) - (doseq [child (rest ast)] (code* child string-builder))) - - :function - (do (. string-builder (append "#(")) - (doseq [child (rest ast)] (code* child string-builder)) - (. string-builder (append ")"))))) - - -(defn code - "Transforms your AST back into code - - ast: The nested sequence of [:keyword & content] which MUST follow the - same structure as the result of `(parcera/clojure input-string)` - - Returns a string representation of the provided AST - - In general (= input (parcera/code (parcera/clojure input)))" - [ast] - (let [string-builder #?(:clj (new StringBuilder) - :cljs (new StringBuffer))] - (code* ast string-builder) - (. string-builder (toString)))) - -; Successful parse. -; Profile: {:create-node 384, :push-full-listener 2, :push-stack 384, -; :push-listener 382, :push-result 227, :push-message 227 } -; "Elapsed time: 47.25084 msecs" -#_(time (clojure (str '(ns parcera.core - (:require [instaparse.core :as instaparse] - [clojure.data :as data] - [clojure.string :as str]))) - :trace true)) diff --git a/src/parcera/terminals.cljc b/src/parcera/terminals.cljc deleted file mode 100644 index b9e3c69..0000000 --- a/src/parcera/terminals.cljc +++ /dev/null @@ -1,44 +0,0 @@ -(ns parcera.terminals - "Clojure symbols, keywords, numbers and string/regex share quite a lot - of matching logic. This namespace is aimed towards clearly identifying - those pieces and share them among the different definitions to - avoid recurring issues") - -;; Clojure's reader is quite permissive so we follow the motto -;; "if it is not forbidden, it is allowed" -(def not-allowed "\\s\\(\\)\\[\\]{}\"@~\\^;`\\\\\\/,") -(def allowed-characters (str "[^" not-allowed "]*")) -(def not-number "(?![+-]?\\d+)") -(def symbol-end "(?=[\\s\"()\\[\\]{},]|$)") - -(defn- name-pattern - [restriction] - (let [first-character (str "[^" restriction not-allowed "]")] - (str "(" first-character allowed-characters "\\/)?" - "(\\/|(" first-character allowed-characters "))" - symbol-end))) - - -(def symbol-pattern (str not-number (name-pattern ":#\\'"))) -(def simple-keyword (str ":" (name-pattern ":"))) -(def macro-keyword (str "::" (name-pattern ":"))) - - -(def double-suffix "(((\\.\\d*)?([eE][-+]?\\d+)?)M?)") -(def long-suffix "((0[xX]([\\dA-Fa-f]+)|0([0-7]+)|([1-9]\\d?)[rR]([\\d\\w]+)|0\\d+)?N?)") -(def ratio-suffix "(\\/(\\d+))") -(def number-pattern (str "[+-]?\\d+(" long-suffix "|" double-suffix "|" ratio-suffix ")(?![\\.\\/])")) ; todo: word boundary ? - - -; This is supposed to be the JavaScript friendly version of #'\P{M}\p{M}*+' -; mentioned here: https://www.regular-expressions.info/unicode.html -; It's cooked by this generator: http://kourge.net/projects/regexp-unicode-block -; ticking all 'Combining Diacritical Marks' boxes *)) -(def unicode-char "([^\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF])") -(def named-char "(newline|return|space|tab|formfeed|backspace)") -(def unicode "(u[\\dD-Fd-f]{4})") -(def character-pattern (str "\\\\(" unicode-char "|" named-char "|" unicode ")(?!\\w+)")) - - -(def string-pattern "\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"") -(def regex-pattern (str "#" string-pattern)) From ca3c6e1ac05184d10bf1e17fd6cbee927b327dca Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 22:16:55 +0100 Subject: [PATCH 070/128] commented out currently impossible tests --- test/parcera/test/core.cljc | 274 ++++++++++++++++++------------------ 1 file changed, 137 insertions(+), 137 deletions(-) diff --git a/test/parcera/test/core.cljc b/test/parcera/test/core.cljc index 0ae9324..0fd4fe2 100644 --- a/test/parcera/test/core.cljc +++ b/test/parcera/test/core.cljc @@ -5,7 +5,6 @@ [clojure.test.check.properties :as prop] [clojure.test.check :as tc] [parcera.core :as parcera] - [instaparse.core :as instaparse] #?(:cljs [parcera.slurp :refer [slurp]]))) @@ -17,12 +16,13 @@ (defn- valid? [input] - (not (instaparse/failure? (parcera/clojure input)))) + (not (parcera/failure? (parcera/clojure input)))) -(defn- clear - [input] - (= 1 (count (instaparse/parses parcera/clojure input :unhide :all)))) +;; todo: is this even possible with antlr ? 🤔 +#_(defn- clear + [input] + (= 1 (count (instaparse/parses parcera/clojure input :unhide :all)))) (def validity @@ -39,40 +39,40 @@ (roundtrip input))) -(def unambiguous - "The process of parsing clojure code yields consistent results. Meaning +#_(def unambiguous + "The process of parsing clojure code yields consistent results. Meaning that any input should (but must not) only have 1 AST representation ... however I have found this is not always possible" - (prop/for-all [input (gen/fmap pr-str gen/any)] - (clear input))) + (prop/for-all [input (gen/fmap pr-str gen/any)] + (clear input))) (deftest simple (testing "character literals" (as-> "\\t" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "\\n" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "\\r" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "\\a" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "\\é" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "\\ö" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "\\ï" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "\\ϕ" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))))) + (is (roundtrip input)))))) +;(is (clear input)))))) (deftest data-structures @@ -88,51 +88,51 @@ (str "read <-> write process yield different result. Failed at\n" (with-out-str (pprint/pprint result)))))) - (testing "very little ambiguity" - (let [result (tc/quick-check 200 unambiguous)] - (is (:pass? result) - (str "high ambiguity case found. Please check the grammar to ensure " - "high accuracy\n" - (with-out-str (pprint/pprint result))))))) + #_(testing "very little ambiguity" + (let [result (tc/quick-check 200 unambiguous)] + (is (:pass? result) + (str "high ambiguity case found. Please check the grammar to ensure " + "high accuracy\n" + (with-out-str (pprint/pprint result))))))) (deftest unit-tests (testing "names" (as-> "foo" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "foo-bar" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "foo->bar" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "->" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "->as" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "föl" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "Öl" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "ϕ" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "❤️" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))))) + (is (roundtrip input)))))) +;(is (clear input)))))) (deftest edge-cases (testing "comments" (as-> "{:hello ;2} 2}" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "symbols" (as-> "hello/world/" input (is (not (valid? input)))) (as-> ":hello/world/" input (is (not (valid? input)))) @@ -142,152 +142,152 @@ (deftest macros (testing "metadata" (as-> "^String [a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "^\"String\" [a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "^:string [a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "^{:a 1} [a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "^:hello ^\"World\" ^{:a 1} [a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "discard" (as-> "#_[a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "#_(a b 2)" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "#_{:a 1}" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "#_macros" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "regex" (as-> "#_\"[a b 2]\"" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "comments" (as-> ";[a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> ";; \"[a b 2]\"" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "2 ;[a b 2]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> " :hello ;; \"[a b 2]\"" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "var quote" (as-> "#'hello/world" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "#'/" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "tag" (as-> "#hello/world [1 a \"3\"]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "#hello/world {1 \"3\"}" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "keyword" (as-> "::hello/world [1 a \"3\"]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "::hello" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "quote" (as-> "'hello/world" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "'hello" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "'/" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "backtick" (as-> "`hello/world" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "`hello" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "`/" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "unquote" (as-> "~hello/world" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "~(hello 2 3)" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "~/" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "quote splicing" (as-> "~@hello/world" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "~@(hello 2 b)" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "deref" (as-> "@hello/world" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "@hello" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "@/" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "anonymous function" (as-> "#(= (str %1 %2 %&))" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "namespaced map" (as-> "#::{:a 1 b 3}" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "#::hello{:a 1 b 3}" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "reader conditional" (as-> "#?(:clj Double/NaN :cljs js/NaN :default nil)" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (as-> "[1 2 #?@(:clj [3 4] :cljs [5 6])]" input (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))))) + (is (roundtrip input)))))) +;(is (clear input)))))) (deftest bootstrap @@ -295,22 +295,22 @@ (testing "parcera should be able to parse itself" (let [input (slurp "./src/parcera/core.cljc")] (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (let [input (slurp "./src/parcera/slurp.cljc")] (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input))))) + (is (roundtrip input))))) + ;(is (clear input))))) (testing "parcera should be able to parse its own test suite" (let [input (slurp "./test/parcera/test/core.cljc")] (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))) + (is (roundtrip input)))) + ;(is (clear input)))) (let [input (slurp "./test/parcera/test/benchmark.clj")] (and (is (valid? input)) - (is (roundtrip input)) - (is (clear input)))))) + (is (roundtrip input)))))) +;(is (clear input)))))) (deftest clojure$cript From b4e6c155e5c7c4faeddfd4b97cadd5eb5d92c951 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 22:39:45 +0100 Subject: [PATCH 071/128] fix: missing roundtrip configuration for symbolic and regex values --- src/parcera/core.cljc | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/parcera/core.cljc b/src/parcera/core.cljc index 2cab18a..948e8c0 100644 --- a/src/parcera/core.cljc +++ b/src/parcera/core.cljc @@ -5,8 +5,8 @@ (def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} - :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" "`" "'" - "~@" "@" "#(" "#'" "#_" "#?" "#?@" "##" ":" "::"}}) + :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" "`" "'" "~" + "~@" "@" "#(" "#'" "#_" "#?(" "#?@(" "##" ":" "::"}}) (defn- info "extract the match meta data information from the ast node" @@ -112,9 +112,17 @@ (doseq [child (rest ast)] (code* child string-builder)) (. string-builder (append "}"))) - (:number :whitespace :symbolic :symbol :character :string :regex) + (:number :whitespace :symbol :character :string) (. string-builder (append (second ast))) + :symbolic + (do (. string-builder (append "##")) + (. string-builder (append (second ast)))) + + :regex + (do (. string-builder (append "#")) + (. string-builder (append (second ast)))) + :auto_resolve (. string-builder (append "::")) From efe4869bf3df2d58a55d736c6456001df48a416e Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 22:53:26 +0100 Subject: [PATCH 072/128] renamed grammar to Clojure --- resources/{clojure.g4 => Clojure.g4} | 4 ++-- src/parcera/antlr/java.clj | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) rename resources/{clojure.g4 => Clojure.g4} (97%) diff --git a/resources/clojure.g4 b/resources/Clojure.g4 similarity index 97% rename from resources/clojure.g4 rename to resources/Clojure.g4 index 9872607..039c79c 100644 --- a/resources/clojure.g4 +++ b/resources/Clojure.g4 @@ -1,5 +1,5 @@ -grammar clojure; +grammar Clojure; /* * NOTES to myself and to other developers: @@ -9,7 +9,7 @@ grammar clojure; * grammar and your own code. * * The parser should only check the syntax. So the rule of thumb is that when - * in doubt you let the parser pass the content up to your program. Then, in + * in doubt you let the parser pass the content up to your program. Then, in * your program, you check the semantics and make sure that the rule actually * have a proper meaning * diff --git a/src/parcera/antlr/java.clj b/src/parcera/antlr/java.clj index 3f041f4..faa921f 100644 --- a/src/parcera/antlr/java.clj +++ b/src/parcera/antlr/java.clj @@ -1,6 +1,6 @@ (ns parcera.antlr.java (:require [parcera.antlr.protocols :as antlr]) - (:import (parcera.antlr clojureParser clojureLexer) + (:import (parcera.antlr ClojureParser ClojureLexer) (org.antlr.v4.runtime ParserRuleContext Token CommonTokenStream CharStreams ANTLRErrorListener Parser) (org.antlr.v4.runtime.tree ErrorNodeImpl))) @@ -58,22 +58,22 @@ (column [^Token this] (.getCharPositionInLine this))) -(extend-type clojureParser +(extend-type ClojureParser antlr/AntlrParser - (rules [^clojureParser this] (vec (.getRuleNames this))) - (tree [^clojureParser this] (. this (code)))) + (rules [^ClojureParser this] (vec (.getRuleNames this))) + (tree [^ClojureParser this] (. this (code)))) (defn parser [input] (let [chars (CharStreams/fromString input) - lexer (doto (new clojureLexer chars) + lexer (doto (new ClojureLexer chars) (.removeErrorListeners)) ;; todo: how to handle lexer errors ? ;(.addErrorListener listener)) tokens (new CommonTokenStream lexer) listener (->ParseFailure (volatile! ())) - parser (doto (new clojureParser tokens) + parser (doto (new ClojureParser tokens) (.setBuildParseTree true) (.removeErrorListeners) (.addErrorListener listener))] From 5b36acc114739fbed1fa0910c47fdcf7e6c10f22 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 23:27:06 +0100 Subject: [PATCH 073/128] moved files around for better organization --- .gitignore | 1 + .travis.yml | 1 + project.clj | 5 +++-- {resources => src}/Clojure.g4 | 0 src/{ => clojure}/parcera/antlr/java.clj | 0 src/{ => clojure}/parcera/antlr/javascript.cljs | 0 src/{ => clojure}/parcera/antlr/protocols.cljc | 0 src/{ => clojure}/parcera/core.cljc | 0 src/{ => clojure}/parcera/slurp.cljc | 0 9 files changed, 5 insertions(+), 2 deletions(-) rename {resources => src}/Clojure.g4 (100%) rename src/{ => clojure}/parcera/antlr/java.clj (100%) rename src/{ => clojure}/parcera/antlr/javascript.cljs (100%) rename src/{ => clojure}/parcera/antlr/protocols.cljc (100%) rename src/{ => clojure}/parcera/core.cljc (100%) rename src/{ => clojure}/parcera/slurp.cljc (100%) diff --git a/.gitignore b/.gitignore index 6110096..50c900d 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ pom.xml.asc /yarn-error.log /node_modules/ /out/ +/src/java/ diff --git a/.travis.yml b/.travis.yml index 1d6d675..bd2f88e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,6 +14,7 @@ jobs: include: - stage: Tests script: + - curl -O https://www.antlr.org/download/antlr-4.7.1-complete.jar - lein do clean, compile, check, eastwood - lein trampoline test - nvm install 10.10 && nvm use 10.10 && lein trampoline cljsbuild test diff --git a/project.clj b/project.clj index 753d7ed..e66f70e 100644 --- a/project.clj +++ b/project.clj @@ -1,15 +1,16 @@ -(defproject carocad/parcera "0.3.1" +(defproject carocad/parcera "0.4.0" :description "Grammar-based Clojure(script) parser" :url "https://github.com/carocad/parcera" :license {:name "LGPLv3" :url "https://github.com/carocad/parcera/blob/master/LICENSE.md"} :dependencies [[org.clojure/clojure "1.10.1"] [instaparse/instaparse "1.4.10"]] + :source-paths ["src/clojure"] + :java-source-paths ["src/java"] :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark [org.clojure/test.check "0.10.0"]] :plugins [[jonase/eastwood "0.3.5"] [lein-cljsbuild "1.1.7"]] - :java-source-paths ["build/java"] ;; todo: does this even work ? :foreign-libs [{:file "build/js/parcera/antlr/clojureLexer.js" :provides ["parcera.antlr.clojureLexer"] diff --git a/resources/Clojure.g4 b/src/Clojure.g4 similarity index 100% rename from resources/Clojure.g4 rename to src/Clojure.g4 diff --git a/src/parcera/antlr/java.clj b/src/clojure/parcera/antlr/java.clj similarity index 100% rename from src/parcera/antlr/java.clj rename to src/clojure/parcera/antlr/java.clj diff --git a/src/parcera/antlr/javascript.cljs b/src/clojure/parcera/antlr/javascript.cljs similarity index 100% rename from src/parcera/antlr/javascript.cljs rename to src/clojure/parcera/antlr/javascript.cljs diff --git a/src/parcera/antlr/protocols.cljc b/src/clojure/parcera/antlr/protocols.cljc similarity index 100% rename from src/parcera/antlr/protocols.cljc rename to src/clojure/parcera/antlr/protocols.cljc diff --git a/src/parcera/core.cljc b/src/clojure/parcera/core.cljc similarity index 100% rename from src/parcera/core.cljc rename to src/clojure/parcera/core.cljc diff --git a/src/parcera/slurp.cljc b/src/clojure/parcera/slurp.cljc similarity index 100% rename from src/parcera/slurp.cljc rename to src/clojure/parcera/slurp.cljc From 1bbdf33456049816544a3d39a40eed67835908d8 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 23:28:21 +0100 Subject: [PATCH 074/128] fix: path for bootstrap files --- test/parcera/test/core.cljc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/parcera/test/core.cljc b/test/parcera/test/core.cljc index 0fd4fe2..caa4f51 100644 --- a/test/parcera/test/core.cljc +++ b/test/parcera/test/core.cljc @@ -293,11 +293,11 @@ (deftest bootstrap (testing "parcera should be able to parse itself" - (let [input (slurp "./src/parcera/core.cljc")] + (let [input (slurp "./src/clojure/parcera/core.cljc")] (and (is (valid? input)) (is (roundtrip input)))) ;(is (clear input)))) - (let [input (slurp "./src/parcera/slurp.cljc")] + (let [input (slurp "./src/clojure/parcera/slurp.cljc")] (and (is (valid? input)) (is (roundtrip input))))) ;(is (clear input))))) From 88f3012f0f7b8e05d65fcbdd16ae63020c03f8fd Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 23:30:46 +0100 Subject: [PATCH 075/128] fix: add antlr compilation to travis --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index bd2f88e..39dfc59 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,7 @@ jobs: - stage: Tests script: - curl -O https://www.antlr.org/download/antlr-4.7.1-complete.jar + - java -jar antlr-4.7.1-complete.jar -Xexact-output-dir -o src/java/parcera/antlr -package parcera.antlr -Dlanguage=Java -no-listener -no-visitor src/Clojure.g4 - lein do clean, compile, check, eastwood - lein trampoline test - nvm install 10.10 && nvm use 10.10 && lein trampoline cljsbuild test From 9af0c31ad8e9f760d8e9b26f30514c006521f995 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 23:48:18 +0100 Subject: [PATCH 076/128] added javascript compilation --- .gitignore | 1 + .travis.yml | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 50c900d..77c96c5 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ pom.xml.asc /node_modules/ /out/ /src/java/ +/src/javascript diff --git a/.travis.yml b/.travis.yml index 39dfc59..75f69ff 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,7 +15,11 @@ jobs: - stage: Tests script: - curl -O https://www.antlr.org/download/antlr-4.7.1-complete.jar + # generate java - java -jar antlr-4.7.1-complete.jar -Xexact-output-dir -o src/java/parcera/antlr -package parcera.antlr -Dlanguage=Java -no-listener -no-visitor src/Clojure.g4 + # generate javascript + - java -jar antlr-4.7.1-complete.jar -Xexact-output-dir -o src/javascript/parcera/antlr -package parcera.antlr -Dlanguage=JavaScript -no-listener -no-visitor src/Clojure.g4 + # now we can actually proceed with clojure code - lein do clean, compile, check, eastwood - lein trampoline test - nvm install 10.10 && nvm use 10.10 && lein trampoline cljsbuild test From 8a98f8fef8eaa662f736e11235815fbd585e6003 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 4 Nov 2019 23:50:40 +0100 Subject: [PATCH 077/128] fix: incorrect path for js antlr fix: wrong case for generated code --- index.js | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/index.js b/index.js index df31056..ab2b0fe 100644 --- a/index.js +++ b/index.js @@ -1,6 +1,6 @@ const antlr4 = require('antlr4/index') -const {clojureLexer} = require('./build/js/parcera/antlr/clojureLexer') -const {clojureParser} = require('./build/js/parcera/antlr/clojureParser') +const {ClojureLexer} = require('./src/javascript/parcera/antlr/ClojureLexer') +const {ClojureParser} = require('./src/javascript/parcera/antlr/ClojureParser') /** * Takes an AST tree; the result of a parser walk and returns @@ -15,7 +15,6 @@ function treeSeq(ast, ruleNames) { // parser rules always have childrens if (ast.children !== undefined) { // we are inside a parser rule; therefore we add the rule name to the result - console.log(ast instanceof antlr4.ParserRuleContext) result.push(ruleNames[ast.ruleIndex]) result.push.apply(result, ast.children.map((child) => treeSeq(child, ruleNames))) return result @@ -28,10 +27,10 @@ function treeSeq(ast, ruleNames) { const input = `(john :SHOUTS "hello" @michael pink/this will work)` const chars = new antlr4.CharStreams.fromString(input) -const lexer = new clojureLexer(chars) +const lexer = new ClojureLexer(chars) lexer.removeErrorListeners() const tokens = new antlr4.CommonTokenStream(lexer) -const parser = new clojureParser(tokens) +const parser = new ClojureParser(tokens) const ruleNames = parser.ruleNames parser.buildParseTrees = true parser.removeErrorListeners() From e87c49c1983b4d1d577248a862d8d0ef2e926f9f Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Thu, 7 Nov 2019 22:44:19 +0100 Subject: [PATCH 078/128] lookahead and negative lookahead functionality added to handle edge cases example edge case added --- src/clojure/parcera/core.cljc | 54 ++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 948e8c0..d584936 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -1,13 +1,43 @@ (ns parcera.core (:require [parcera.antlr.protocols :as antlr] - [parcera.antlr.java :as platform]) + [parcera.antlr.java :as platform] + [clojure.zip :as zip]) #?(:cljs (:import goog.string.StringBuffer))) +(defn- branches + "given a zipper loc returns all reachable branch nodes" + [loc] + (filter zip/branch? + (take-while (complement zip/end?) + (iterate zip/next loc)))) + + +(defn- lookahead + "given an AST yields a sequence of branches which match rule and are + followed by the ahead rules" + [ast rule ahead] ;; ahead -> #{:rule-names} + (let [zipper (zip/seq-zip ast)] + (for [branch (branches zipper) + :when (= rule (first (zip/node branch))) + :let [neighbour (zip/right branch)] + :when (some? neighbour) + :when (ahead (first (zip/node neighbour)))] + branch))) + + +(defn- negative-lookahead + "given an AST yields a sequence of branches which match rule and are + followed by the forbidden rules" + [ast rule forbidden] ;; ahead -> #{:rule-names} + (lookahead ast rule (complement forbidden))) + + (def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" "`" "'" "~" "~@" "@" "#(" "#'" "#_" "#?(" "#?@(" "##" ":" "::"}}) + (defn- info "extract the match meta data information from the ast node" [ast] @@ -206,6 +236,10 @@ (code* ast string-builder) (. string-builder (toString)))) +;; this is just forwarding for the time +;; ideally we shouldnt need to do it but directly define it here +(defn failure? [obj] (platform/failure? obj)) + ; Successful parse. ; Profile: {:create-node 384, :push-full-listener 2, :push-stack 384, ; :push-listener 382, :push-result 227, :push-message 227 } @@ -213,9 +247,15 @@ #_(time (clojure (str '(ns parcera.core (:require [instaparse.core :as instaparse] [clojure.data :as data] - [clojure.string :as str]))) - :trace true)) - -;; this is just forwarding for the time -;; ideally we shouldnt need to do it but directly define it here -(defn failure? [obj] (platform/failure? obj)) + [clojure.string :as str]))))) + +#_(let [input "hello/world/" + ast (time (clojure input)) + failures (negative-lookahead ast :symbol :symbol)] + (for [branch failures] + (let [neighbour (zip/right branch) + failure (zip/replace branch (list ::failure + (zip/node branch) + (zip/node neighbour))) + removal (zip/remove (zip/right failure))] + (zip/root removal)))) From 3e2dc2012f359d5386306578a841dcdc70ce1266 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Thu, 7 Nov 2019 23:14:48 +0100 Subject: [PATCH 079/128] faster branches with eduction --- src/clojure/parcera/core.cljc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index d584936..cb9210b 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -8,9 +8,9 @@ (defn- branches "given a zipper loc returns all reachable branch nodes" [loc] - (filter zip/branch? - (take-while (complement zip/end?) - (iterate zip/next loc)))) + (eduction (take-while (complement zip/end?)) + (filter zip/branch?) + (iterate zip/next loc))) (defn- lookahead From b51f73520d915e6b5409bea5ac609826d3ccb416 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Thu, 7 Nov 2019 23:42:07 +0100 Subject: [PATCH 080/128] extra perfs checks added --- src/clojure/parcera/core.cljc | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index cb9210b..d872b21 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -12,7 +12,8 @@ (filter zip/branch?) (iterate zip/next loc))) - +;; TODO: it would probably make more sense to do the lookahead directly on +;; hiccup (defn- lookahead "given an AST yields a sequence of branches which match rule and are followed by the ahead rules" @@ -251,7 +252,7 @@ #_(let [input "hello/world/" ast (time (clojure input)) - failures (negative-lookahead ast :symbol :symbol)] + failures (time (lookahead ast :symbol #{:symbol}))] (for [branch failures] (let [neighbour (zip/right branch) failure (zip/replace branch (list ::failure @@ -259,3 +260,25 @@ (zip/node neighbour))) removal (zip/remove (zip/right failure))] (zip/root removal)))) + + +#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc")] + (time (last (lookahead (time (clojure core-content :optimize :memory)) + :symbol + #{:symbol})))) + +#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc")] + (time (last (clojure core-content :optimize :memory)))) + +#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc") + ast (time (clojure core-content :optimize :memory)) + zipper (zip/seq-zip ast)] + (time (last (for [branch (branches zipper) + :when (not true)] + branch)))) + + +#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc") + ast (time (clojure core-content :optimize :memory))] + (time (for []))) + From 3eacbdeacd4d462543cea7f54fd4cb32cb6371be Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 10 Nov 2019 14:47:36 +0100 Subject: [PATCH 081/128] make keywords lexer rules --- src/Clojure.g4 | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Clojure.g4 b/src/Clojure.g4 index 039c79c..184d677 100644 --- a/src/Clojure.g4 +++ b/src/Clojure.g4 @@ -34,9 +34,9 @@ literal: keyword | string | number | character | symbol; keyword: simple_keyword | macro_keyword; -simple_keyword: ':' NAME; +simple_keyword: SIMPLE_KEYWORD; -macro_keyword: '::' NAME; +macro_keyword: MACRO_KEYWORD; string: STRING; @@ -122,6 +122,10 @@ SPACE: [\r\n\t\f, ]+; CHARACTER: '\\' (UNICODE_CHAR | NAMED_CHAR | UNICODE); +MACRO_KEYWORD: '::' NAME; + +SIMPLE_KEYWORD: ':' NAME; + NAME: (SIMPLE_NAME '/')? ('/' | SIMPLE_NAME ); fragment UNICODE_CHAR: ~[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF]; From 123180568cf2a6c9c93de4f849c7399063ae5988 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 10 Nov 2019 17:53:12 +0100 Subject: [PATCH 082/128] virgil added for automatic java compilation --- project.clj | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/project.clj b/project.clj index e66f70e..745cf83 100644 --- a/project.clj +++ b/project.clj @@ -7,27 +7,28 @@ [instaparse/instaparse "1.4.10"]] :source-paths ["src/clojure"] :java-source-paths ["src/java"] - :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark - [org.clojure/test.check "0.10.0"]] - :plugins [[jonase/eastwood "0.3.5"] - [lein-cljsbuild "1.1.7"]] + :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark + [org.clojure/test.check "0.10.0"]] + :plugins [[jonase/eastwood "0.3.5"] + [lein-cljsbuild "1.1.7"] + [lein-virgil "0.1.9"]] ;; todo: does this even work ? - :foreign-libs [{:file "build/js/parcera/antlr/clojureLexer.js" - :provides ["parcera.antlr.clojureLexer"] - :module-type :commonjs} - {:file "build/js/parcera/antlr/clojureParser.js" - :provides ["parcera.antlr.clojureParser"] - :module-type :commonjs}] - :cljsbuild {:builds - [{:id "dev" - :source-paths ["src" "test"] - :compiler {:main parcera.test-runner - :output-to "target/out/tests.js" - :target :nodejs - :infer-externs true - :optimizations :none}}] - :test-commands - {"test" ["node" "target/out/tests.js"]}}} + :foreign-libs [{:file "build/js/parcera/antlr/clojureLexer.js" + :provides ["parcera.antlr.clojureLexer"] + :module-type :commonjs} + {:file "build/js/parcera/antlr/clojureParser.js" + :provides ["parcera.antlr.clojureParser"] + :module-type :commonjs}] + :cljsbuild {:builds + [{:id "dev" + :source-paths ["src" "test"] + :compiler {:main parcera.test-runner + :output-to "target/out/tests.js" + :target :nodejs + :infer-externs true + :optimizations :none}}] + :test-commands + {"test" ["node" "target/out/tests.js"]}}} :provided {:dependencies [[org.clojure/clojurescript "1.10.520"] [org.antlr/antlr4-runtime "4.7.1"]]}} :test-selectors {:default (fn [m] (not (some #{:benchmark} (keys m)))) From 8084ae38986b8efa212a4887c90622c0f3866f29 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 10 Nov 2019 17:53:53 +0100 Subject: [PATCH 083/128] relax NAME lexer rule to later on validate it at runtime --- src/Clojure.g4 | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Clojure.g4 b/src/Clojure.g4 index 184d677..4b45d05 100644 --- a/src/Clojure.g4 +++ b/src/Clojure.g4 @@ -126,7 +126,7 @@ MACRO_KEYWORD: '::' NAME; SIMPLE_KEYWORD: ':' NAME; -NAME: (SIMPLE_NAME '/')? ('/' | SIMPLE_NAME ); +NAME: NAME_HEAD NAME_BODY*; fragment UNICODE_CHAR: ~[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF]; @@ -134,14 +134,12 @@ fragment NAMED_CHAR: 'newline' | 'return' | 'space' | 'tab' | 'formfeed' | 'back fragment UNICODE: 'u' [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F] [0-9d-fD-F]; -fragment SIMPLE_NAME: NAME_HEAD NAME_BODY*; - // re-allow :#' as valid characters inside the name itself fragment NAME_BODY: NAME_HEAD | [:#'0-9]; // these is the set of characters that are allowed by all symbols and keywords // however, this is more strict that necessary so that we can re-use it for both -fragment NAME_HEAD: ~[\r\n\t\f ()[\]{}"@~^;`\\/,:#'0-9]; +fragment NAME_HEAD: ~[\r\n\t\f ()[\]{}"@~^;`\\,:#'0-9]; fragment DOUBLE_SUFFIX: ((('.' DIGIT*)? ([eE][-+]?DIGIT+)?) 'M'?); From 02d5cb641cc0a2fd02f3c47e18ba1b5357b797a2 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 10 Nov 2019 17:54:27 +0100 Subject: [PATCH 084/128] type added to failure report --- src/clojure/parcera/antlr/java.clj | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/clojure/parcera/antlr/java.clj b/src/clojure/parcera/antlr/java.clj index faa921f..cfabe49 100644 --- a/src/clojure/parcera/antlr/java.clj +++ b/src/clojure/parcera/antlr/java.clj @@ -28,7 +28,8 @@ ;; recognizer is either clojureParser or clojureLexer (let [report (merge {:row line :column char - :message message} + :message message + :type :parser} ;; todo: lexer should also be allowed (when (instance? Parser recognizer) {:symbol (str offending-symbol) :stack (->> (.getRuleInvocationStack ^Parser recognizer) @@ -78,6 +79,3 @@ (.removeErrorListeners) (.addErrorListener listener))] {:parser parser :listener listener})) - - -(defn failure? [obj] (instance? ParseFailure obj)) From a2f2fbde576610893f2407a08db675cb7a436641 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 10 Nov 2019 17:56:41 +0100 Subject: [PATCH 085/128] refactored parcera to conform rules at runtime always attach failure to metadata --- src/clojure/parcera/core.cljc | 93 +++++++++++++++-------------------- 1 file changed, 39 insertions(+), 54 deletions(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index d872b21..4820934 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -1,45 +1,15 @@ (ns parcera.core (:require [parcera.antlr.protocols :as antlr] - [parcera.antlr.java :as platform] - [clojure.zip :as zip]) + [parcera.antlr.java :as platform]) #?(:cljs (:import goog.string.StringBuffer))) -(defn- branches - "given a zipper loc returns all reachable branch nodes" - [loc] - (eduction (take-while (complement zip/end?)) - (filter zip/branch?) - (iterate zip/next loc))) - -;; TODO: it would probably make more sense to do the lookahead directly on -;; hiccup -(defn- lookahead - "given an AST yields a sequence of branches which match rule and are - followed by the ahead rules" - [ast rule ahead] ;; ahead -> #{:rule-names} - (let [zipper (zip/seq-zip ast)] - (for [branch (branches zipper) - :when (= rule (first (zip/node branch))) - :let [neighbour (zip/right branch)] - :when (some? neighbour) - :when (ahead (first (zip/node neighbour)))] - branch))) - - -(defn- negative-lookahead - "given an AST yields a sequence of branches which match rule and are - followed by the forbidden rules" - [ast rule forbidden] ;; ahead -> #{:rule-names} - (lookahead ast rule (complement forbidden))) - - (def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" "`" "'" "~" "~@" "@" "#(" "#'" "#_" "#?(" "#?@(" "##" ":" "::"}}) -(defn- info +(defn- meta-data "extract the match meta data information from the ast node" [ast] (let [start (antlr/start ast) @@ -50,24 +20,40 @@ :column (antlr/column end)}})) +(defn- conform + "Checks that `rule` conforms to additional rules which are too difficult + to represent with pure Antlr4 syntax" + [rule children metadata] + (case rule + :symbol (when (nil? (re-find #"^([^\s\/]+\/)?(\/|[^\s\/]+)$" (first children))) + (with-meta (list ::failure (cons rule children)) + metadata)) + + nil)) + + (defn- hiccup - "transform the AST into a `hiccup-like` data structure. + "transforms the tree `hiccup-like` ast data structure. - This function doesnt return a vectors because they are - 100 times slower for this use case compared to `cons` cells" + Yields a lazy sequence to avoid expensive computation whenever + the user is not interested in the full content." [tree rule-names hide-tags hide-literals] (cond (satisfies? antlr/ParserRule tree) - (let [rule (keyword (get rule-names (antlr/rule-index tree))) - children-ast (for [child (antlr/children tree) - :let [child-ast (hiccup child rule-names hide-tags hide-literals)] - :when (not (nil? child-ast))] - child-ast) - ast (if (contains? hide-tags rule) - (apply concat children-ast) - (cons rule children-ast))] - ;; attach meta data ... ala instaparse - (with-meta ast (info tree))) + (let [rule (keyword (get rule-names (antlr/rule-index tree))) + children (for [child (antlr/children tree) + :let [child (hiccup child rule-names hide-tags hide-literals)] + :when (not (nil? child))] + child) + ;; flatten out first children level in case of hidden tags + ast (if (contains? hide-tags rule) + (apply concat children) + (cons rule children)) + ;; attach meta data ... ala instaparse + ast-meta (meta-data tree) + conformed (conform rule children ast-meta)] + (with-meta (if (some? conformed) conformed ast) + ast-meta)) (satisfies? antlr/ErrorNode tree) (let [token (antlr/token tree) @@ -96,17 +82,16 @@ -> returns an AST representation of input-string The following options are accepted: - - `:unhide` can be one of `#{:tags :content :all}`. Defaults to `nil` - - `:total` thruthy value to get a parse tree even on failures" + - `:unhide` can be one of `#{:tags :content :all}`. Defaults to `nil`" [input & {:as options}] (let [hidden (unhide options) {:keys [parser listener]} (platform/parser input) rule-names (antlr/rules parser) - tree (antlr/tree parser)] - (if (or (empty? @(:reports listener)) (:total options)) - (hiccup tree rule-names (:tags hidden) (:literals hidden)) - @(:reports listener)))) -;; todo: expose a proper error record ? + tree (antlr/tree parser) + result (hiccup tree rule-names (:tags hidden) (:literals hidden)) + reports @(:reports listener)] + (with-meta result {::failure (not (empty? reports)) + ::reports reports}))) (defn- code* @@ -239,7 +224,8 @@ ;; this is just forwarding for the time ;; ideally we shouldnt need to do it but directly define it here -(defn failure? [obj] (platform/failure? obj)) +;; todo +#_(defn failure? [obj] (platform/failure? obj)) ; Successful parse. ; Profile: {:create-node 384, :push-full-listener 2, :push-stack 384, @@ -281,4 +267,3 @@ #_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc") ast (time (clojure core-content :optimize :memory))] (time (for []))) - From c2b2d85e6693a1157a44366f7bc84506bd71a30c Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 10 Nov 2019 18:08:59 +0100 Subject: [PATCH 086/128] refactored failure? to handle all possible failure cases --- src/clojure/parcera/core.cljc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 4820934..6f57bb0 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -224,8 +224,14 @@ ;; this is just forwarding for the time ;; ideally we shouldnt need to do it but directly define it here -;; todo -#_(defn failure? [obj] (platform/failure? obj)) +(defn failure? + [ast] + (or ;; ast is root node + (::failure (meta ast)) + ;; ast is child node + (and (seq? ast) (= ::failure (first ast))) + ;; ast is root node but "doesnt know" about the failure -> conformed + (some #{::failure} (filter keyword? (tree-seq seq? identity ast))))) ; Successful parse. ; Profile: {:create-node 384, :push-full-listener 2, :push-stack 384, From 4d4e4b2eafdcc1e9e014b7e710c5a0c5007d55f5 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 10 Nov 2019 18:11:22 +0100 Subject: [PATCH 087/128] cosmetic changes --- src/clojure/parcera/core.cljc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 6f57bb0..1f34108 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -90,8 +90,7 @@ tree (antlr/tree parser) result (hiccup tree rule-names (:tags hidden) (:literals hidden)) reports @(:reports listener)] - (with-meta result {::failure (not (empty? reports)) - ::reports reports}))) + (with-meta result {::errors reports}))) (defn- code* @@ -227,7 +226,7 @@ (defn failure? [ast] (or ;; ast is root node - (::failure (meta ast)) + (not (empty? (::errors (meta ast)))) ;; ast is child node (and (seq? ast) (= ::failure (first ast))) ;; ast is root node but "doesnt know" about the failure -> conformed From f216ada6b852ae8735068af3f90044bb258244bf Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 10 Nov 2019 18:14:13 +0100 Subject: [PATCH 088/128] renamed listener to parser errors for clarity --- src/clojure/parcera/antlr/java.clj | 2 +- src/clojure/parcera/core.cljc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/clojure/parcera/antlr/java.clj b/src/clojure/parcera/antlr/java.clj index cfabe49..7f599b4 100644 --- a/src/clojure/parcera/antlr/java.clj +++ b/src/clojure/parcera/antlr/java.clj @@ -78,4 +78,4 @@ (.setBuildParseTree true) (.removeErrorListeners) (.addErrorListener listener))] - {:parser parser :listener listener})) + {:parser parser :errors {:parser listener}})) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 1f34108..debdfb4 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -85,11 +85,11 @@ - `:unhide` can be one of `#{:tags :content :all}`. Defaults to `nil`" [input & {:as options}] (let [hidden (unhide options) - {:keys [parser listener]} (platform/parser input) + {:keys [parser errors]} (platform/parser input) rule-names (antlr/rules parser) tree (antlr/tree parser) result (hiccup tree rule-names (:tags hidden) (:literals hidden)) - reports @(:reports listener)] + reports @(:reports (:parser errors))] (with-meta result {::errors reports}))) From 7d89ce3e87afe6ae7057f6fe078ad40e3451f7aa Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 10 Nov 2019 18:26:28 +0100 Subject: [PATCH 089/128] rollback: keywords treated as names --- src/Clojure.g4 | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Clojure.g4 b/src/Clojure.g4 index 4b45d05..a271a7f 100644 --- a/src/Clojure.g4 +++ b/src/Clojure.g4 @@ -34,9 +34,11 @@ literal: keyword | string | number | character | symbol; keyword: simple_keyword | macro_keyword; -simple_keyword: SIMPLE_KEYWORD; +// making symbols, simple and macro keywords be based on NAME allows to +// conform them all in the same way (see `conform` function) +simple_keyword: ':' NAME; -macro_keyword: MACRO_KEYWORD; +macro_keyword: '::' NAME; string: STRING; @@ -122,10 +124,6 @@ SPACE: [\r\n\t\f, ]+; CHARACTER: '\\' (UNICODE_CHAR | NAMED_CHAR | UNICODE); -MACRO_KEYWORD: '::' NAME; - -SIMPLE_KEYWORD: ':' NAME; - NAME: NAME_HEAD NAME_BODY*; fragment UNICODE_CHAR: ~[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF]; From 5920b1f7d25167433530777f009eba3ab00a1af8 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 10 Nov 2019 18:26:49 +0100 Subject: [PATCH 090/128] fix: missing conform for keywords --- src/clojure/parcera/core.cljc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index debdfb4..1ba0938 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -20,14 +20,18 @@ :column (antlr/column end)}})) +(def name-pattern #"^([^\s\/]+\/)?(\/|[^\s\/]+)$") + + (defn- conform "Checks that `rule` conforms to additional rules which are too difficult to represent with pure Antlr4 syntax" [rule children metadata] (case rule - :symbol (when (nil? (re-find #"^([^\s\/]+\/)?(\/|[^\s\/]+)$" (first children))) - (with-meta (list ::failure (cons rule children)) - metadata)) + (:symbol :simple_keyword :macro_keyword) + (when (nil? (re-find name-pattern (first children))) + (with-meta (list ::failure (cons rule children)) + metadata)) nil)) From c6752da1a6c70a152cf5f084467f11f1ee32e196 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 10 Nov 2019 18:30:17 +0100 Subject: [PATCH 091/128] performance note --- src/clojure/parcera/core.cljc | 43 +++++++---------------------------- 1 file changed, 8 insertions(+), 35 deletions(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 1ba0938..b422768 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -225,11 +225,16 @@ (code* ast string-builder) (. string-builder (toString)))) -;; this is just forwarding for the time -;; ideally we shouldnt need to do it but directly define it here (defn failure? + "Checks if ast contains any `::failure` instances. + + NOTE: This function is potentially slow since there it has to check the + complete ast to be sure that there are no failures. + + Whenever possible, prefer to handle errors directly appearing in the ast" [ast] - (or ;; ast is root node + (or + ;; ast is root node (not (empty? (::errors (meta ast)))) ;; ast is child node (and (seq? ast) (= ::failure (first ast))) @@ -244,35 +249,3 @@ (:require [instaparse.core :as instaparse] [clojure.data :as data] [clojure.string :as str]))))) - -#_(let [input "hello/world/" - ast (time (clojure input)) - failures (time (lookahead ast :symbol #{:symbol}))] - (for [branch failures] - (let [neighbour (zip/right branch) - failure (zip/replace branch (list ::failure - (zip/node branch) - (zip/node neighbour))) - removal (zip/remove (zip/right failure))] - (zip/root removal)))) - - -#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc")] - (time (last (lookahead (time (clojure core-content :optimize :memory)) - :symbol - #{:symbol})))) - -#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc")] - (time (last (clojure core-content :optimize :memory)))) - -#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc") - ast (time (clojure core-content :optimize :memory)) - zipper (zip/seq-zip ast)] - (time (last (for [branch (branches zipper) - :when (not true)] - branch)))) - - -#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc") - ast (time (clojure core-content :optimize :memory))] - (time (for []))) From 0c992fbe74e8af87fec12c06f4d279ae17507e8f Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 10 Nov 2019 18:31:27 +0100 Subject: [PATCH 092/128] renamed clojure to ast for clarity --- src/clojure/parcera/core.cljc | 10 +++++----- test/parcera/test/benchmark.clj | 8 ++++---- test/parcera/test/core.cljc | 10 +++++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index b422768..2ceb80b 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -80,7 +80,7 @@ default-hidden)) -(defn clojure +(defn ast "Clojure (antlr4) parser. It can be used as: - `(parcera/clojure input-string)` -> returns an AST representation of input-string @@ -245,7 +245,7 @@ ; Profile: {:create-node 384, :push-full-listener 2, :push-stack 384, ; :push-listener 382, :push-result 227, :push-message 227 } ; "Elapsed time: 47.25084 msecs" -#_(time (clojure (str '(ns parcera.core - (:require [instaparse.core :as instaparse] - [clojure.data :as data] - [clojure.string :as str]))))) +#_(time (ast (str '(ns parcera.core + (:require [instaparse.core :as instaparse] + [clojure.data :as data] + [clojure.string :as str]))))) diff --git a/test/parcera/test/benchmark.clj b/test/parcera/test/benchmark.clj index 4bb385f..d329e09 100644 --- a/test/parcera/test/benchmark.clj +++ b/test/parcera/test/benchmark.clj @@ -23,8 +23,8 @@ (newline) (newline) (println "Benchmark: Parsing parcera namespace with traces 👮") - (criterium/quick-bench (parcera/clojure (str '(ns parcera.core - (:require [instaparse.core :as instaparse] - [clojure.data :as data] - [clojure.string :as str])))) + (criterium/quick-bench (parcera/ast (str '(ns parcera.core + (:require [instaparse.core :as instaparse] + [clojure.data :as data] + [clojure.string :as str])))) :os :runtime :verbose)) diff --git a/test/parcera/test/core.cljc b/test/parcera/test/core.cljc index caa4f51..27601c6 100644 --- a/test/parcera/test/core.cljc +++ b/test/parcera/test/core.cljc @@ -11,18 +11,18 @@ (defn- roundtrip "checks parcera can parse and write back the exact same input code" [input] - (= input (parcera/code (parcera/clojure input)))) + (= input (parcera/code (parcera/ast input)))) (defn- valid? [input] - (not (parcera/failure? (parcera/clojure input)))) + (not (parcera/failure? (parcera/ast input)))) ;; todo: is this even possible with antlr ? 🤔 #_(defn- clear [input] - (= 1 (count (instaparse/parses parcera/clojure input :unhide :all)))) + (= 1 (count (instaparse/parses parcera/ast input :unhide :all)))) (def validity @@ -317,8 +317,8 @@ (testing "parcera should be able to parse clojure core" (let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojure/master/src/clj/clojure/core.clj")] - (time (is (= core-content (parcera/code (parcera/clojure core-content :optimize :memory))))))) + (time (is (= core-content (parcera/code (parcera/ast core-content :optimize :memory))))))) (testing "parcera should be able to parse clojurescript core" (let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc")] - (time (is (= core-content (parcera/code (parcera/clojure core-content :optimize :memory)))))))) + (time (is (= core-content (parcera/code (parcera/ast core-content :optimize :memory)))))))) From 4388634820eaf33ee32bdeba1056fb31447c0946 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 10 Nov 2019 18:33:52 +0100 Subject: [PATCH 093/128] todo added --- src/clojure/parcera/core.cljc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 2ceb80b..29368e9 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -3,6 +3,11 @@ [parcera.antlr.java :as platform]) #?(:cljs (:import goog.string.StringBuffer))) +;; TODO: it would be interesting to explore the idea of 'visitor' +;; for Clojure(script). Such that instead of computing the full AST +;; a developer could extend a multi-method with the rules that +;; it wants to handle and only those are called + (def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" "`" "'" "~" From 0948d5402185279955f82dea873e3646317b906a Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Sun, 10 Nov 2019 18:45:19 +0100 Subject: [PATCH 094/128] cosmetics --- project.clj | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/project.clj b/project.clj index 745cf83..890c5ac 100644 --- a/project.clj +++ b/project.clj @@ -11,7 +11,8 @@ [org.clojure/test.check "0.10.0"]] :plugins [[jonase/eastwood "0.3.5"] [lein-cljsbuild "1.1.7"] - [lein-virgil "0.1.9"]] + ;; reactivate for optimal workflow when changing the g4 file + #_[lein-virgil "0.1.9"]] ;; todo: does this even work ? :foreign-libs [{:file "build/js/parcera/antlr/clojureLexer.js" :provides ["parcera.antlr.clojureLexer"] From aafda43de4f0e19ac40f57170b42e7e26ec49fe3 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 11 Nov 2019 21:28:59 +0100 Subject: [PATCH 095/128] fix: remove wrong import on protocol file --- src/clojure/parcera/antlr/protocols.cljc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/clojure/parcera/antlr/protocols.cljc b/src/clojure/parcera/antlr/protocols.cljc index 2a7eb32..6d1c84f 100644 --- a/src/clojure/parcera/antlr/protocols.cljc +++ b/src/clojure/parcera/antlr/protocols.cljc @@ -1,8 +1,7 @@ (ns parcera.antlr.protocols "These protocols are a cheat: I use them to be able to dispatch to both Java and JavaScript parser implementations without the - common code having to know about it" - (:import (org.antlr.v4.runtime ANTLRErrorListener Parser))) + common code having to know about it") (defprotocol AntlrParser From 5c80e00e68bea7d5a902af5ed34500e9f2befaaf Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 11 Nov 2019 21:30:13 +0100 Subject: [PATCH 096/128] deactivated cljs-build in favour of figwheel-main --- .gitignore | 1 + dev.cljs.edn | 1 + project.clj | 57 +++++++++++++++++++++++++++++----------------------- 3 files changed, 34 insertions(+), 25 deletions(-) create mode 100644 dev.cljs.edn diff --git a/.gitignore b/.gitignore index 77c96c5..e424542 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ pom.xml.asc /out/ /src/java/ /src/javascript +/figwheel_server.log diff --git a/dev.cljs.edn b/dev.cljs.edn new file mode 100644 index 0000000..8f028ef --- /dev/null +++ b/dev.cljs.edn @@ -0,0 +1 @@ +{:main parcera.core} diff --git a/project.clj b/project.clj index 890c5ac..ef451cf 100644 --- a/project.clj +++ b/project.clj @@ -3,35 +3,42 @@ :url "https://github.com/carocad/parcera" :license {:name "LGPLv3" :url "https://github.com/carocad/parcera/blob/master/LICENSE.md"} - :dependencies [[org.clojure/clojure "1.10.1"] - [instaparse/instaparse "1.4.10"]] + :dependencies [[org.clojure/clojure "1.10.1"]] + ;[instaparse/instaparse "1.4.10"]] :source-paths ["src/clojure"] :java-source-paths ["src/java"] - :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark - [org.clojure/test.check "0.10.0"]] - :plugins [[jonase/eastwood "0.3.5"] - [lein-cljsbuild "1.1.7"] - ;; reactivate for optimal workflow when changing the g4 file - #_[lein-virgil "0.1.9"]] - ;; todo: does this even work ? - :foreign-libs [{:file "build/js/parcera/antlr/clojureLexer.js" - :provides ["parcera.antlr.clojureLexer"] - :module-type :commonjs} - {:file "build/js/parcera/antlr/clojureParser.js" - :provides ["parcera.antlr.clojureParser"] - :module-type :commonjs}] - :cljsbuild {:builds - [{:id "dev" - :source-paths ["src" "test"] - :compiler {:main parcera.test-runner - :output-to "target/out/tests.js" - :target :nodejs - :infer-externs true - :optimizations :none}}] - :test-commands - {"test" ["node" "target/out/tests.js"]}}} + :profiles {:dev {:dependencies [;; benchmark + [criterium/criterium "0.4.5"] + ;; generative testing + [org.clojure/test.check "0.10.0"] + ;; cljs repl + [com.bhauman/figwheel-main "0.2.3"]] + :plugins [;; linter + [jonase/eastwood "0.3.5"]]} + ;; java reloader + ;[lein-virgil "0.1.9"]] :provided {:dependencies [[org.clojure/clojurescript "1.10.520"] [org.antlr/antlr4-runtime "4.7.1"]]}} + ;:cljsbuild + #_{:builds + [{:id "dev" + :source-paths ["src/clojure" "src/javascript"] + :compiler {:main parcera.core + :target :nodejs + :output-to "target/dev/index.js" + :output-dir "target/dev/" + :infer-externs true + :optimizations :none}} + {:id "test" + :source-paths ["src/clojure" "test"] + :compiler {:main parcera.test-runner + :output-to "target/test/main.js" + :output-dir "target/test/" + :target :nodejs + :optimizations :none}}] + :test-commands + {"test" ["node" "target/test/main.js"]}} + :aliases {"fig" ["trampoline" "run" "-m" "figwheel.main"]} :test-selectors {:default (fn [m] (not (some #{:benchmark} (keys m)))) :benchmark :benchmark} :deploy-repositories [["clojars" {:url "https://clojars.org/repo" From ae4a745f49927528556554d740a6121baedc3a7f Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 11 Nov 2019 21:30:40 +0100 Subject: [PATCH 097/128] script added to bridge figwheel with cursive ide --- src/clojure/user.clj | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 src/clojure/user.clj diff --git a/src/clojure/user.clj b/src/clojure/user.clj new file mode 100644 index 0000000..41fd686 --- /dev/null +++ b/src/clojure/user.clj @@ -0,0 +1,2 @@ +(require '[figwheel.main.api :as fig]) +(fig/start "dev") From c5a3f97d694fd74dc35f6879a4722963416679e9 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 11 Nov 2019 21:31:01 +0100 Subject: [PATCH 098/128] fix: deactivate js to allow compilation --- src/clojure/parcera/antlr/javascript.cljs | 66 ++++++++++++----------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/src/clojure/parcera/antlr/javascript.cljs b/src/clojure/parcera/antlr/javascript.cljs index f8266a7..f32db80 100644 --- a/src/clojure/parcera/antlr/javascript.cljs +++ b/src/clojure/parcera/antlr/javascript.cljs @@ -5,48 +5,52 @@ ;; am I suppose to code the whole thing and hope that it works by running ;; the tests 🤔 ... I can feel the pain of other languages 😭 (:require [parcera.antlr.protocols :as antlr] - [antlr4.index :as runtime :refer [ParserRuleContext]] - [parcera.antlr.clojureLexer :as clojureLexer] - [parcera.antlr.clojureParser :as clojureParser])) + #_[antlr4.index :as runtime] + #_[parcera.antlr.clojureLexer :as clojureLexer] + #_[parcera.antlr.clojureParser :as clojureParser])) (set! *warn-on-infer* true) -(extend-type ParserRuleContext - antlr/ParserRule - (children [^ParserRuleContext this] (.-children this)) - (rule-index [^ParserRuleContext this] (.getRuleIndex this)) - (start [^ParserRuleContext this] (.getStart this)) - (end [^ParserRuleContext this] (.getStop this))) +#_(extend-type ParserRuleContext + antlr/ParserRule + (children [^ParserRuleContext this] (.-children this)) + (rule-index [^ParserRuleContext this] (.getRuleIndex this)) + (start [^ParserRuleContext this] (.getStart this)) + (end [^ParserRuleContext this] (.getStop this))) -(extend-type ErrorNodeImpl - antlr/ErrorNode - (token [^ErrorNodeImpl this] (.-symbol this))) +#_(extend-type ErrorNodeImpl + antlr/ErrorNode + (token [^ErrorNodeImpl this] (.-symbol this))) -(extend-type Token - antlr/Token - (row [^Token this] (.getLine this)) - (column [^Token this] (.getCharPositionInLine this))) +#_(extend-type Token + antlr/Token + (row [^Token this] (.getLine this)) + (column [^Token this] (.getCharPositionInLine this))) -(extend-type clojureParser - antlr/AntlrParser - (rules [^clojureParser this] (vec (.getRuleNames this))) - (tree [^clojureParser this] (. this (code)))) +#_(extend-type clojureParser + antlr/AntlrParser + (rules [^clojureParser this] (vec (.getRuleNames this))) + (tree [^clojureParser this] (. this (code)))) (defn parser - [input listener] - (let [chars (CharStreams/fromString input) - lexer (doto (new clojureLexer chars) - (.removeErrorListeners)) - ;; todo: how to handle lexer errors ? - ;(.addErrorListener listener)) - tokens (new CommonTokenStream lexer)] - (doto (new clojureParser tokens) - (.setBuildParseTree true) - (.removeErrorListeners) - (.addErrorListener listener)))) + [input] + {:parser input}) + +#_(defn parser + [input listener] + (let [chars (CharStreams/fromString input) + lexer (doto (new clojureLexer chars) + (.removeErrorListeners)) + ;; todo: how to handle lexer errors ? + ;(.addErrorListener listener)) + tokens (new CommonTokenStream lexer)] + (doto (new clojureParser tokens) + (.setBuildParseTree true) + (.removeErrorListeners) + (.addErrorListener listener)))) From a67c1d8e91a6cb91f7632bbc825ed752d12c9385 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 11 Nov 2019 21:32:15 +0100 Subject: [PATCH 099/128] fix: dont import java code on cljs --- src/clojure/parcera/core.cljc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 29368e9..f7eba2e 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -1,6 +1,7 @@ (ns parcera.core (:require [parcera.antlr.protocols :as antlr] - [parcera.antlr.java :as platform]) + #?(:clj [parcera.antlr.java :as platform] + :cljs [parcera.antlr.javascript :as platform])) #?(:cljs (:import goog.string.StringBuffer))) ;; TODO: it would be interesting to explore the idea of 'visitor' From 4d32581fdc27bf9079aa8da0b35a43254196622c Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 11 Nov 2019 21:45:17 +0100 Subject: [PATCH 100/128] fix: add target to resource following figwheel advice removed dead code cosmetic changes --- project.clj | 37 +++++++++---------------------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/project.clj b/project.clj index ef451cf..4c7287e 100644 --- a/project.clj +++ b/project.clj @@ -4,43 +4,24 @@ :license {:name "LGPLv3" :url "https://github.com/carocad/parcera/blob/master/LICENSE.md"} :dependencies [[org.clojure/clojure "1.10.1"]] - ;[instaparse/instaparse "1.4.10"]] :source-paths ["src/clojure"] :java-source-paths ["src/java"] - :profiles {:dev {:dependencies [;; benchmark - [criterium/criterium "0.4.5"] - ;; generative testing - [org.clojure/test.check "0.10.0"] - ;; cljs repl - [com.bhauman/figwheel-main "0.2.3"]] - :plugins [;; linter - [jonase/eastwood "0.3.5"]]} + + :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark + [org.clojure/test.check "0.10.0"] ;; generative testing + [com.bhauman/figwheel-main "0.2.3"]] ;; cljs repl + :plugins [[jonase/eastwood "0.3.5"]] ;; linter + :resource-paths ["target"]} ;; java reloader ;[lein-virgil "0.1.9"]] :provided {:dependencies [[org.clojure/clojurescript "1.10.520"] [org.antlr/antlr4-runtime "4.7.1"]]}} - ;:cljsbuild - #_{:builds - [{:id "dev" - :source-paths ["src/clojure" "src/javascript"] - :compiler {:main parcera.core - :target :nodejs - :output-to "target/dev/index.js" - :output-dir "target/dev/" - :infer-externs true - :optimizations :none}} - {:id "test" - :source-paths ["src/clojure" "test"] - :compiler {:main parcera.test-runner - :output-to "target/test/main.js" - :output-dir "target/test/" - :target :nodejs - :optimizations :none}}] - :test-commands - {"test" ["node" "target/test/main.js"]}} + :aliases {"fig" ["trampoline" "run" "-m" "figwheel.main"]} + :test-selectors {:default (fn [m] (not (some #{:benchmark} (keys m)))) :benchmark :benchmark} + :deploy-repositories [["clojars" {:url "https://clojars.org/repo" :username :env/clojars_username :password :env/clojars_password From 9c01e98a2a278b631a84b51ac615e720a3d19697 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 11 Nov 2019 22:20:37 +0100 Subject: [PATCH 101/128] fix: downgrade figwheel version due to current bug --- project.clj | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/project.clj b/project.clj index 4c7287e..cdaff09 100644 --- a/project.clj +++ b/project.clj @@ -9,7 +9,9 @@ :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark [org.clojure/test.check "0.10.0"] ;; generative testing - [com.bhauman/figwheel-main "0.2.3"]] ;; cljs repl + ;; todo: bump version to 0.2.4 + ;; https://github.com/bhauman/figwheel-main/issues/161 + [com.bhauman/figwheel-main "0.2.0"]] ;; cljs repl :plugins [[jonase/eastwood "0.3.5"]] ;; linter :resource-paths ["target"]} ;; java reloader From 59d34105808ac2a5a9cefdbfd7e5701ef2619bc2 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 11 Nov 2019 22:34:00 +0100 Subject: [PATCH 102/128] fix: name pattern compatibility in cljs --- src/clojure/parcera/core.cljc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index f7eba2e..0619548 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -26,7 +26,9 @@ :column (antlr/column end)}})) -(def name-pattern #"^([^\s\/]+\/)?(\/|[^\s\/]+)$") +;; for some reason cljs doesnt accept escaping the / characters +(def name-pattern #?(:clj #"^([^\s\/]+\/)?(\/|[^\s\/]+)$" + :cljs #"^([^\s/]+/)?(/|[^\s/]+)$")) (defn- conform From a65de3a61e3d035c335c6e1bdcae5a0f63547d6c Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 11 Nov 2019 22:38:22 +0100 Subject: [PATCH 103/128] fix: satisfies is macro in cljs which doesnt play nice with cond --- src/clojure/parcera/core.cljc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 0619548..72d61c3 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -51,7 +51,7 @@ the user is not interested in the full content." [tree rule-names hide-tags hide-literals] (cond - (satisfies? antlr/ParserRule tree) + (boolean (satisfies? antlr/ParserRule tree)) (let [rule (keyword (get rule-names (antlr/rule-index tree))) children (for [child (antlr/children tree) :let [child (hiccup child rule-names hide-tags hide-literals)] @@ -67,7 +67,7 @@ (with-meta (if (some? conformed) conformed ast) ast-meta)) - (satisfies? antlr/ErrorNode tree) + (boolean (satisfies? antlr/ErrorNode tree)) (let [token (antlr/token tree) ;; error metadata info {::start {:row (antlr/row token) From 539e447746409d751f92f48582be82f09ad52ed6 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 11 Nov 2019 22:52:58 +0100 Subject: [PATCH 104/128] fix: require antlr4 runtime in javascript fix: configure cljs to use nodejs target fix: automatically install js dependencies fix: ignore package autogenerated info --- .gitignore | 1 + dev.cljs.edn | 5 ++++- index.js | 2 +- package.json | 11 ----------- src/clojure/parcera/antlr/javascript.cljs | 2 +- 5 files changed, 7 insertions(+), 14 deletions(-) delete mode 100644 package.json diff --git a/.gitignore b/.gitignore index e424542..c6fc51f 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ pom.xml.asc /src/java/ /src/javascript /figwheel_server.log +package*.json diff --git a/dev.cljs.edn b/dev.cljs.edn index 8f028ef..eca3bb1 100644 --- a/dev.cljs.edn +++ b/dev.cljs.edn @@ -1 +1,4 @@ -{:main parcera.core} +{:main parcera.core + :target :nodejs + :npm-deps {"antlr4" "^4.7.2"} + :install-deps true} diff --git a/index.js b/index.js index ab2b0fe..17da5c9 100644 --- a/index.js +++ b/index.js @@ -1,4 +1,4 @@ -const antlr4 = require('antlr4/index') +const antlr4 = require('antlr4') const {ClojureLexer} = require('./src/javascript/parcera/antlr/ClojureLexer') const {ClojureParser} = require('./src/javascript/parcera/antlr/ClojureParser') diff --git a/package.json b/package.json deleted file mode 100644 index 6c3a2a9..0000000 --- a/package.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "name": "parcera", - "version": "1.0.0", - "main": "index.js", - "repository": "git@github.com:carocad/parcera.git", - "author": "Camilo Roca ", - "license": "MIT", - "dependencies": { - "antlr4": "^4.7.2" - } -} diff --git a/src/clojure/parcera/antlr/javascript.cljs b/src/clojure/parcera/antlr/javascript.cljs index f32db80..5fab118 100644 --- a/src/clojure/parcera/antlr/javascript.cljs +++ b/src/clojure/parcera/antlr/javascript.cljs @@ -5,7 +5,7 @@ ;; am I suppose to code the whole thing and hope that it works by running ;; the tests 🤔 ... I can feel the pain of other languages 😭 (:require [parcera.antlr.protocols :as antlr] - #_[antlr4.index :as runtime] + [antlr4 :as runtime] #_[parcera.antlr.clojureLexer :as clojureLexer] #_[parcera.antlr.clojureParser :as clojureParser])) From 860e5ed5acbdbc954d57f08518e990beb9278e6e Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 11 Nov 2019 23:04:46 +0100 Subject: [PATCH 105/128] fix: prevent warning from leiningen --- project.clj | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/project.clj b/project.clj index cdaff09..c3d4a05 100644 --- a/project.clj +++ b/project.clj @@ -13,7 +13,8 @@ ;; https://github.com/bhauman/figwheel-main/issues/161 [com.bhauman/figwheel-main "0.2.0"]] ;; cljs repl :plugins [[jonase/eastwood "0.3.5"]] ;; linter - :resource-paths ["target"]} + :resource-paths ["target"] + :clean-targets ^{:protect false} ["target"]} ;; java reloader ;[lein-virgil "0.1.9"]] :provided {:dependencies [[org.clojure/clojurescript "1.10.520"] From 524f6109deac72dccbbe02160f645f8c11737ec5 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Wed, 13 Nov 2019 22:25:10 +0100 Subject: [PATCH 106/128] fix: renamed user.clj to figwheel to avoid creating a repl on start fix: moved figwheel script to another directory to avoid eastwood wrath added extra main according to figwheel main documentation --- .gitignore | 1 + dev.cljs.edn | 1 + project.clj | 1 + src/clojure/user.clj => scripts/figwheel.clj | 0 4 files changed, 3 insertions(+) rename src/clojure/user.clj => scripts/figwheel.clj (100%) diff --git a/.gitignore b/.gitignore index c6fc51f..0655512 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ pom.xml.asc /src/javascript /figwheel_server.log package*.json +/.eastwood diff --git a/dev.cljs.edn b/dev.cljs.edn index eca3bb1..d0fb2a4 100644 --- a/dev.cljs.edn +++ b/dev.cljs.edn @@ -1,3 +1,4 @@ +^{:extra-main-files {:tests {:main parcera.test-runner}}} {:main parcera.core :target :nodejs :npm-deps {"antlr4" "^4.7.2"} diff --git a/project.clj b/project.clj index c3d4a05..5987030 100644 --- a/project.clj +++ b/project.clj @@ -13,6 +13,7 @@ ;; https://github.com/bhauman/figwheel-main/issues/161 [com.bhauman/figwheel-main "0.2.0"]] ;; cljs repl :plugins [[jonase/eastwood "0.3.5"]] ;; linter + :source-paths ["src/clojure" "scripts"] :resource-paths ["target"] :clean-targets ^{:protect false} ["target"]} ;; java reloader diff --git a/src/clojure/user.clj b/scripts/figwheel.clj similarity index 100% rename from src/clojure/user.clj rename to scripts/figwheel.clj From 86e803b1579e3f198de02b71bf261b105d968b96 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Wed, 13 Nov 2019 22:26:01 +0100 Subject: [PATCH 107/128] removed lock file --- yarn.lock | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 yarn.lock diff --git a/yarn.lock b/yarn.lock deleted file mode 100644 index 2fb0b32..0000000 --- a/yarn.lock +++ /dev/null @@ -1,8 +0,0 @@ -# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. -# yarn lockfile v1 - - -antlr4@^4.7.2: - version "4.7.2" - resolved "https://registry.npmjs.org/antlr4/-/antlr4-4.7.2.tgz#9d0b5987bb63660de658055ee9149141b4d9b462" - integrity sha512-vZA1xYufXLe3LX+ja9rIVxjRmILb1x3k7KYZHltRbfJtXjJ1DlFIqt+CbPYmghx0EuzY9DajiDw+MdyEt1qAsQ== From c09d77f117968c3607e1a369ca94177de7227fe6 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Thu, 14 Nov 2019 21:27:50 +0100 Subject: [PATCH 108/128] fix: avoid repeatedly creating keywords --- src/clojure/parcera/antlr/java.clj | 2 +- src/clojure/parcera/core.cljc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/clojure/parcera/antlr/java.clj b/src/clojure/parcera/antlr/java.clj index 7f599b4..70f5bda 100644 --- a/src/clojure/parcera/antlr/java.clj +++ b/src/clojure/parcera/antlr/java.clj @@ -61,7 +61,7 @@ (extend-type ClojureParser antlr/AntlrParser - (rules [^ClojureParser this] (vec (.getRuleNames this))) + (rules [^ClojureParser this] (into [] (map keyword) (.getRuleNames this))) (tree [^ClojureParser this] (. this (code)))) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 72d61c3..93d471b 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -52,7 +52,7 @@ [tree rule-names hide-tags hide-literals] (cond (boolean (satisfies? antlr/ParserRule tree)) - (let [rule (keyword (get rule-names (antlr/rule-index tree))) + (let [rule (get rule-names (antlr/rule-index tree)) children (for [child (antlr/children tree) :let [child (hiccup child rule-names hide-tags hide-literals)] :when (not (nil? child))] From e287d0bad72274caa2905cd17463a43ca098a174 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Thu, 14 Nov 2019 22:34:21 +0100 Subject: [PATCH 109/128] performance note added todo removed --- src/clojure/parcera/core.cljc | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 93d471b..0ef50a6 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -4,11 +4,6 @@ :cljs [parcera.antlr.javascript :as platform])) #?(:cljs (:import goog.string.StringBuffer))) -;; TODO: it would be interesting to explore the idea of 'visitor' -;; for Clojure(script). Such that instead of computing the full AST -;; a developer could extend a multi-method with the rules that -;; it wants to handle and only those are called - (def default-hidden {:tags #{:form :collection :literal :keyword :reader_macro :dispatch} :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" "`" "'" "~" @@ -90,11 +85,15 @@ (defn ast "Clojure (antlr4) parser. It can be used as: - - `(parcera/clojure input-string)` - -> returns an AST representation of input-string + - `(parcera/ast input-string)` + -> returns a lazy AST representation of input-string The following options are accepted: - - `:unhide` can be one of `#{:tags :content :all}`. Defaults to `nil`" + - `:unhide` can be one of `#{:tags :content :all}`. Defaults to `nil` + + NOTE: Antlr returns a fully parsed version of the provided input string + however this function returns a lazy sequence in order to expose + those through Clojure's immutable data structures" [input & {:as options}] (let [hidden (unhide options) {:keys [parser errors]} (platform/parser input) From c0764f526ca6e3f93fb00e5f24564e3c0ca56dbd Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Thu, 14 Nov 2019 22:47:02 +0100 Subject: [PATCH 110/128] cosmetics --- src/clojure/parcera/core.cljc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 0ef50a6..9de1905 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -235,7 +235,7 @@ (defn failure? "Checks if ast contains any `::failure` instances. - NOTE: This function is potentially slow since there it has to check the + NOTE: This function is potentially slow since it might have to check the complete ast to be sure that there are no failures. Whenever possible, prefer to handle errors directly appearing in the ast" From dd4ddd159cc00f5371633e6e651ffe697ca37199 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Fri, 15 Nov 2019 00:02:46 +0100 Subject: [PATCH 111/128] configure foreign-libs to include local js files --- dev.cljs.edn | 8 +++++++- project.clj | 8 +++----- src/clojure/parcera/antlr/javascript.cljs | 12 +++--------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/dev.cljs.edn b/dev.cljs.edn index d0fb2a4..f913906 100644 --- a/dev.cljs.edn +++ b/dev.cljs.edn @@ -2,4 +2,10 @@ {:main parcera.core :target :nodejs :npm-deps {"antlr4" "^4.7.2"} - :install-deps true} + :install-deps true + :foreign-libs [{:file "src/javascript/parcera/antlr/ClojureLexer.js" + :module-type :es6 + :provides ["parcera.antlr.ClojureLexer"]} + {:file "src/javascript/parcera/antlr/ClojureParser.js" + :module-type :es6 + :provides ["parcera.antlr.ClojureParser"]}]} diff --git a/project.clj b/project.clj index 5987030..8316331 100644 --- a/project.clj +++ b/project.clj @@ -3,22 +3,20 @@ :url "https://github.com/carocad/parcera" :license {:name "LGPLv3" :url "https://github.com/carocad/parcera/blob/master/LICENSE.md"} - :dependencies [[org.clojure/clojure "1.10.1"]] - :source-paths ["src/clojure"] + :source-paths ["src/clojure" "src/javascript" "scripts"] :java-source-paths ["src/java"] - :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark [org.clojure/test.check "0.10.0"] ;; generative testing ;; todo: bump version to 0.2.4 ;; https://github.com/bhauman/figwheel-main/issues/161 [com.bhauman/figwheel-main "0.2.0"]] ;; cljs repl :plugins [[jonase/eastwood "0.3.5"]] ;; linter - :source-paths ["src/clojure" "scripts"] :resource-paths ["target"] :clean-targets ^{:protect false} ["target"]} ;; java reloader ;[lein-virgil "0.1.9"]] - :provided {:dependencies [[org.clojure/clojurescript "1.10.520"] + :provided {:dependencies [[org.clojure/clojure "1.10.1"] + [org.clojure/clojurescript "1.10.520"] [org.antlr/antlr4-runtime "4.7.1"]]}} :aliases {"fig" ["trampoline" "run" "-m" "figwheel.main"]} diff --git a/src/clojure/parcera/antlr/javascript.cljs b/src/clojure/parcera/antlr/javascript.cljs index 5fab118..abb0afe 100644 --- a/src/clojure/parcera/antlr/javascript.cljs +++ b/src/clojure/parcera/antlr/javascript.cljs @@ -1,14 +1,8 @@ (ns parcera.antlr.javascript - ;; TODO: does this even works ? - ;; TODO: translate the index.js file to Clojurescript 😥 - ;; TODO: how do I get a Clojurescript repl ... I am blind without it - ;; am I suppose to code the whole thing and hope that it works by running - ;; the tests 🤔 ... I can feel the pain of other languages 😭 (:require [parcera.antlr.protocols :as antlr] - [antlr4 :as runtime] - #_[parcera.antlr.clojureLexer :as clojureLexer] - #_[parcera.antlr.clojureParser :as clojureParser])) - + [antlr4 :refer [CharStreams CommonTokenStream]] + [parcera.antlr.ClojureLexer :refer [ClojureLexer]] + [parcera.antlr.ClojureParser :refer [ClojureParser]])) (set! *warn-on-infer* true) From 23bde58e288ad56f5420f78033a2a9d909ee9734 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Fri, 15 Nov 2019 01:02:59 +0100 Subject: [PATCH 112/128] deactivate javascript --- .travis.yml | 7 ++++--- dev.cljs.edn | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 75f69ff..a6742e7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,12 +17,13 @@ jobs: - curl -O https://www.antlr.org/download/antlr-4.7.1-complete.jar # generate java - java -jar antlr-4.7.1-complete.jar -Xexact-output-dir -o src/java/parcera/antlr -package parcera.antlr -Dlanguage=Java -no-listener -no-visitor src/Clojure.g4 - # generate javascript - - java -jar antlr-4.7.1-complete.jar -Xexact-output-dir -o src/javascript/parcera/antlr -package parcera.antlr -Dlanguage=JavaScript -no-listener -no-visitor src/Clojure.g4 + # generate javascript - todo + #- java -jar antlr-4.7.1-complete.jar -Xexact-output-dir -o src/javascript/parcera/antlr -package parcera.antlr -Dlanguage=JavaScript -no-listener -no-visitor src/Clojure.g4 # now we can actually proceed with clojure code - lein do clean, compile, check, eastwood - lein trampoline test - - nvm install 10.10 && nvm use 10.10 && lein trampoline cljsbuild test + # todo - re-enable js + #- nvm install 10.10 && nvm use 10.10 && lein trampoline cljsbuild test # only run the benchmark if we are trying to merge to master # otherwise the build takes too long diff --git a/dev.cljs.edn b/dev.cljs.edn index f913906..0fcebc6 100644 --- a/dev.cljs.edn +++ b/dev.cljs.edn @@ -1,4 +1,3 @@ -^{:extra-main-files {:tests {:main parcera.test-runner}}} {:main parcera.core :target :nodejs :npm-deps {"antlr4" "^4.7.2"} From cc1828b70bc27ff2778a54c87f07316ef6d4a86d Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Fri, 15 Nov 2019 01:10:47 +0100 Subject: [PATCH 113/128] measure execution time --- index.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/index.js b/index.js index 17da5c9..fb4bfbd 100644 --- a/index.js +++ b/index.js @@ -1,4 +1,5 @@ const antlr4 = require('antlr4') +const fs = require('fs') const {ClojureLexer} = require('./src/javascript/parcera/antlr/ClojureLexer') const {ClojureParser} = require('./src/javascript/parcera/antlr/ClojureParser') @@ -25,7 +26,8 @@ function treeSeq(ast, ruleNames) { } } -const input = `(john :SHOUTS "hello" @michael pink/this will work)` +// const input = `(john :SHOUTS "hello" @michael pink/this will work)` +const input = fs.readFileSync('foo.text', {encoding: 'utf8'}) const chars = new antlr4.CharStreams.fromString(input) const lexer = new ClojureLexer(chars) lexer.removeErrorListeners() @@ -37,7 +39,9 @@ parser.removeErrorListeners() // parser.addErrorListener() const tree = parser.code() +console.time() console.log(JSON.stringify(treeSeq(tree, ruleNames), null, 2)) +console.timeEnd() //antlr4.tree.ParseTreeWalker.DEFAULT.walk(new listener(), tree) console.log(`DONE 💫`) From 2303af9c5b49ee55f996ec373725e9dd317743f5 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Fri, 15 Nov 2019 01:16:30 +0100 Subject: [PATCH 114/128] dont include scripts in source paths --- project.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project.clj b/project.clj index 8316331..e821612 100644 --- a/project.clj +++ b/project.clj @@ -3,7 +3,7 @@ :url "https://github.com/carocad/parcera" :license {:name "LGPLv3" :url "https://github.com/carocad/parcera/blob/master/LICENSE.md"} - :source-paths ["src/clojure" "src/javascript" "scripts"] + :source-paths ["src/clojure" "src/javascript"] :java-source-paths ["src/java"] :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark [org.clojure/test.check "0.10.0"] ;; generative testing From 90a78fd13d7b20c86b0e865964a6866d6cfb8ab3 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Fri, 15 Nov 2019 01:25:07 +0100 Subject: [PATCH 115/128] moved dependencies up again --- project.clj | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/project.clj b/project.clj index e821612..67428af 100644 --- a/project.clj +++ b/project.clj @@ -5,6 +5,8 @@ :url "https://github.com/carocad/parcera/blob/master/LICENSE.md"} :source-paths ["src/clojure" "src/javascript"] :java-source-paths ["src/java"] + :dependencies [[org.clojure/clojure "1.10.1"] + [org.antlr/antlr4-runtime "4.7.1"]] :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark [org.clojure/test.check "0.10.0"] ;; generative testing ;; todo: bump version to 0.2.4 @@ -15,9 +17,7 @@ :clean-targets ^{:protect false} ["target"]} ;; java reloader ;[lein-virgil "0.1.9"]] - :provided {:dependencies [[org.clojure/clojure "1.10.1"] - [org.clojure/clojurescript "1.10.520"] - [org.antlr/antlr4-runtime "4.7.1"]]}} + :provided {:dependencies [[org.clojure/clojurescript "1.10.520"]]}} :aliases {"fig" ["trampoline" "run" "-m" "figwheel.main"]} From ba9cd62531f260eecfed676151a7fa535810dbf8 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Fri, 15 Nov 2019 01:28:41 +0100 Subject: [PATCH 116/128] fix: missing code generation for benchmark --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index a6742e7..8d53ae9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,6 +30,9 @@ jobs: - stage: Benchmark if: branch = master script: + - curl -O https://www.antlr.org/download/antlr-4.7.1-complete.jar + # generate java + - java -jar antlr-4.7.1-complete.jar -Xexact-output-dir -o src/java/parcera/antlr -package parcera.antlr -Dlanguage=Java -no-listener -no-visitor src/Clojure.g4 - lein trampoline test :benchmark - stage: Release From 9047c4ed49d8b51637bd1b879f96fd1d0b4ca7ca Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 18 Nov 2019 13:31:16 +0100 Subject: [PATCH 117/128] fix: remove javascript pieces made dependencies provided --- .travis.yml | 15 +++------------ dev.cljs.edn | 10 ---------- index.js | 47 ----------------------------------------------- project.clj | 12 +++--------- 4 files changed, 6 insertions(+), 78 deletions(-) delete mode 100644 dev.cljs.edn delete mode 100644 index.js diff --git a/.travis.yml b/.travis.yml index 8d53ae9..489935b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,24 +17,15 @@ jobs: - curl -O https://www.antlr.org/download/antlr-4.7.1-complete.jar # generate java - java -jar antlr-4.7.1-complete.jar -Xexact-output-dir -o src/java/parcera/antlr -package parcera.antlr -Dlanguage=Java -no-listener -no-visitor src/Clojure.g4 - # generate javascript - todo - #- java -jar antlr-4.7.1-complete.jar -Xexact-output-dir -o src/javascript/parcera/antlr -package parcera.antlr -Dlanguage=JavaScript -no-listener -no-visitor src/Clojure.g4 # now we can actually proceed with clojure code - lein do clean, compile, check, eastwood - lein trampoline test + - lein trampoline test :benchmark # todo - re-enable js + # generate javascript - todo + #- java -jar antlr-4.7.1-complete.jar -Xexact-output-dir -o src/javascript/parcera/antlr -package parcera.antlr -Dlanguage=JavaScript -no-listener -no-visitor src/Clojure.g4 #- nvm install 10.10 && nvm use 10.10 && lein trampoline cljsbuild test - # only run the benchmark if we are trying to merge to master - # otherwise the build takes too long - - stage: Benchmark - if: branch = master - script: - - curl -O https://www.antlr.org/download/antlr-4.7.1-complete.jar - # generate java - - java -jar antlr-4.7.1-complete.jar -Xexact-output-dir -o src/java/parcera/antlr -package parcera.antlr -Dlanguage=Java -no-listener -no-visitor src/Clojure.g4 - - lein trampoline test :benchmark - - stage: Release if: tag IS present deploy: diff --git a/dev.cljs.edn b/dev.cljs.edn deleted file mode 100644 index 0fcebc6..0000000 --- a/dev.cljs.edn +++ /dev/null @@ -1,10 +0,0 @@ -{:main parcera.core - :target :nodejs - :npm-deps {"antlr4" "^4.7.2"} - :install-deps true - :foreign-libs [{:file "src/javascript/parcera/antlr/ClojureLexer.js" - :module-type :es6 - :provides ["parcera.antlr.ClojureLexer"]} - {:file "src/javascript/parcera/antlr/ClojureParser.js" - :module-type :es6 - :provides ["parcera.antlr.ClojureParser"]}]} diff --git a/index.js b/index.js deleted file mode 100644 index fb4bfbd..0000000 --- a/index.js +++ /dev/null @@ -1,47 +0,0 @@ -const antlr4 = require('antlr4') -const fs = require('fs') -const {ClojureLexer} = require('./src/javascript/parcera/antlr/ClojureLexer') -const {ClojureParser} = require('./src/javascript/parcera/antlr/ClojureParser') - -/** - * Takes an AST tree; the result of a parser walk and returns - * an array with the same style as Instaparse - * - * @param {Object} ast - * @param {Array} ruleNames - * @return {Array} a hiccup-like array - */ -function treeSeq(ast, ruleNames) { - const result = [] - // parser rules always have childrens - if (ast.children !== undefined) { - // we are inside a parser rule; therefore we add the rule name to the result - result.push(ruleNames[ast.ruleIndex]) - result.push.apply(result, ast.children.map((child) => treeSeq(child, ruleNames))) - return result - - // lexer rules dont have childrens, so we just take the matched text - } else { - return ast.getText() - } -} - -// const input = `(john :SHOUTS "hello" @michael pink/this will work)` -const input = fs.readFileSync('foo.text', {encoding: 'utf8'}) -const chars = new antlr4.CharStreams.fromString(input) -const lexer = new ClojureLexer(chars) -lexer.removeErrorListeners() -const tokens = new antlr4.CommonTokenStream(lexer) -const parser = new ClojureParser(tokens) -const ruleNames = parser.ruleNames -parser.buildParseTrees = true -parser.removeErrorListeners() -// parser.addErrorListener() - -const tree = parser.code() -console.time() -console.log(JSON.stringify(treeSeq(tree, ruleNames), null, 2)) -console.timeEnd() -//antlr4.tree.ParseTreeWalker.DEFAULT.walk(new listener(), tree) - -console.log(`DONE 💫`) diff --git a/project.clj b/project.clj index 67428af..dcc63d1 100644 --- a/project.clj +++ b/project.clj @@ -5,21 +5,15 @@ :url "https://github.com/carocad/parcera/blob/master/LICENSE.md"} :source-paths ["src/clojure" "src/javascript"] :java-source-paths ["src/java"] - :dependencies [[org.clojure/clojure "1.10.1"] - [org.antlr/antlr4-runtime "4.7.1"]] :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark - [org.clojure/test.check "0.10.0"] ;; generative testing - ;; todo: bump version to 0.2.4 - ;; https://github.com/bhauman/figwheel-main/issues/161 - [com.bhauman/figwheel-main "0.2.0"]] ;; cljs repl + [org.clojure/test.check "0.10.0"]] ;; generative testing :plugins [[jonase/eastwood "0.3.5"]] ;; linter :resource-paths ["target"] :clean-targets ^{:protect false} ["target"]} ;; java reloader ;[lein-virgil "0.1.9"]] - :provided {:dependencies [[org.clojure/clojurescript "1.10.520"]]}} - - :aliases {"fig" ["trampoline" "run" "-m" "figwheel.main"]} + :provided {:dependencies [[org.clojure/clojure "1.10.1"] + [org.antlr/antlr4-runtime "4.7.1"]]}} :test-selectors {:default (fn [m] (not (some #{:benchmark} (keys m)))) :benchmark :benchmark} From b70b5063461d0f4c2a14a5ba751cad857c69e464 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 18 Nov 2019 13:53:16 +0100 Subject: [PATCH 118/128] refactored benchmark to put clojure core on the spot --- test/parcera/test/benchmark.clj | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/test/parcera/test/benchmark.clj b/test/parcera/test/benchmark.clj index d329e09..5595409 100644 --- a/test/parcera/test/benchmark.clj +++ b/test/parcera/test/benchmark.clj @@ -6,25 +6,27 @@ [parcera.core :as parcera])) (deftest ^:benchmark parsing - (println "Benchmark: Time parsing Clojure values ⌛") + (newline) + (newline) + (println "Benchmark: Parsing automatically generated values") (criterium/quick-bench (tc/quick-check 30 pt/validity) - :os :runtime :verbose)) - -(deftest ^:benchmark roundtrip + :os :runtime :verbose) (newline) (newline) - (println "Benchmark: Round trip of Clojure values 🚀") + (println "Benchmark: Round trip of automatically generated values") (criterium/quick-bench (tc/quick-check 30 pt/symmetric) :os :runtime :verbose)) -;; execute last ... hopefully -(deftest ^:benchmark z-known-namespace - (newline) - (newline) - (println "Benchmark: Parsing parcera namespace with traces 👮") - (criterium/quick-bench (parcera/ast (str '(ns parcera.core - (:require [instaparse.core :as instaparse] - [clojure.data :as data] - [clojure.string :as str])))) - :os :runtime :verbose)) +(deftest ^:benchmark clojure.core-roundtrip + (let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojure/master/src/clj/clojure/core.clj")] + (newline) + (newline) + (println "Benchmark: Parsing Clojure's core namespace 🚧") + (criterium/quick-bench (parcera/ast core-content :optimize :memory) + :os :runtime :verbose) + (newline) + (newline) + (println "Benchmark: Rountrip Clojure's core namespace 🚧") + (criterium/quick-bench (parcera/code (parcera/ast core-content :optimize :memory)) + :os :runtime :verbose))) From 3d59b38ea76f642e3020a06bd8484283c550ba32 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 18 Nov 2019 14:59:28 +0100 Subject: [PATCH 119/128] fix: conform map literals fix: assoc error message on symbol error fix: hidden tags were dropping the children meta data --- src/clojure/parcera/core.cljc | 38 ++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 9de1905..f154c3f 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -34,7 +34,15 @@ (:symbol :simple_keyword :macro_keyword) (when (nil? (re-find name-pattern (first children))) (with-meta (list ::failure (cons rule children)) - metadata)) + (assoc-in metadata [::start :message] + (str "name cannot contain more than one /")))) + + (:map) + (let [forms (remove (comp #{:whitespace :discard} first) children)] + (when (odd? (count forms)) + (with-meta (list ::failure (cons rule children)) + (assoc-in metadata [::start :message] + "Map literal must contain an even number of forms")))) nil)) @@ -52,15 +60,20 @@ :let [child (hiccup child rule-names hide-tags hide-literals)] :when (not (nil? child))] child) - ;; flatten out first children level in case of hidden tags - ast (if (contains? hide-tags rule) - (apply concat children) - (cons rule children)) ;; attach meta data ... ala instaparse ast-meta (meta-data tree) + ;; extra validation rules conformed (conform rule children ast-meta)] - (with-meta (if (some? conformed) conformed ast) - ast-meta)) + ;; flatten out first children level in case of hidden tags + (if (contains? hide-tags rule) + (first children) + (or conformed (with-meta (cons rule children) + ast-meta)) + #_(clojure.pprint/pprint {:conformed conformed + :rule rule + :children children + :ast ast + :meta (or (meta conformed) ast-meta)}))) (boolean (satisfies? antlr/ErrorNode tree)) (let [token (antlr/token tree) @@ -232,6 +245,7 @@ (code* ast string-builder) (. string-builder (toString)))) + (defn failure? "Checks if ast contains any `::failure` instances. @@ -256,3 +270,13 @@ (:require [instaparse.core :as instaparse] [clojure.data :as data] [clojure.string :as str]))))) + +#_(time (ast "(ns parcera.core + (:require [instaparse.core :as #::{:hello \"world\" instaparse}] + [clojure.data :as data] + [clojure.string :as str]))")) + + + +;; TODO +;(ast "\"hello/world") From f06bbc3e5173dc23a4d02ba976b81f0238bc076b Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 18 Nov 2019 15:59:10 +0100 Subject: [PATCH 120/128] fix: check set literal elements --- src/clojure/parcera/core.cljc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index f154c3f..529d9df 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -37,13 +37,22 @@ (assoc-in metadata [::start :message] (str "name cannot contain more than one /")))) - (:map) + :map (let [forms (remove (comp #{:whitespace :discard} first) children)] (when (odd? (count forms)) (with-meta (list ::failure (cons rule children)) (assoc-in metadata [::start :message] "Map literal must contain an even number of forms")))) + :set + (let [forms (remove (comp #{:whitespace :discard} first) children) + set-length (count forms) + unique-length (count (distinct forms))] + (when (not= set-length unique-length) + (with-meta (list ::failure (cons rule children)) + (assoc-in metadata [::start :message] + "Set literal contains duplicate keys")))) + nil)) @@ -272,11 +281,8 @@ [clojure.string :as str]))))) #_(time (ast "(ns parcera.core - (:require [instaparse.core :as #::{:hello \"world\" instaparse}] + (:require [instaparse.core :as #{:hello \"world\" :hello}] [clojure.data :as data] [clojure.string :as str]))")) - - - ;; TODO ;(ast "\"hello/world") From d1bf4a27ef6de7e67a05b1b7e539eaba5a1279fd Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 18 Nov 2019 16:14:46 +0100 Subject: [PATCH 121/128] fix: dont throw an error on :code failure solved notes removed --- README.md | 9 --------- src/clojure/parcera/core.cljc | 13 +++++++------ 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 60ab704..ad506ab 100644 --- a/README.md +++ b/README.md @@ -49,12 +49,3 @@ full explanation of the options available for a parser please visit Instaparse w (parcera/code [:symbol "ns"]) ;; "ns" ``` - -### notes -There are some restrictions as to how much can a parser do. In my experience, these restrictions -are related to some [semantic context-sensitivity](http://blog.reverberate.org/2013/09/ll-and-lr-in-context-why-parsing-tools.html). -which the Clojure reader has embedded into itself. In general I have found the following ones: -- `parcera` doesnt check that a map contains an even number of elements. This is specially difficult - to do since Clojure supports the discard macro `#_ form` which is a valid element but "doesnt count as one" -- `parcera` doesnt check if a map has repeated keys -- `parcera` doesnt check if a set has repeated elements diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 529d9df..736129a 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -15,10 +15,11 @@ [ast] (let [start (antlr/start ast) end (antlr/end ast)] - {::start {:row (antlr/row start) - :column (antlr/column start)} - ::end {:row (antlr/row end) - :column (antlr/column end)}})) + (merge {::start {:row (antlr/row start) + :column (antlr/column start)}} + (when (some? end) + {::end {:row (antlr/row end) + :column (antlr/column end)}})))) ;; for some reason cljs doesnt accept escaping the / characters @@ -51,7 +52,7 @@ (when (not= set-length unique-length) (with-meta (list ::failure (cons rule children)) (assoc-in metadata [::start :message] - "Set literal contains duplicate keys")))) + "Set literal contains duplicate forms")))) nil)) @@ -123,7 +124,7 @@ tree (antlr/tree parser) result (hiccup tree rule-names (:tags hidden) (:literals hidden)) reports @(:reports (:parser errors))] - (with-meta result {::errors reports}))) + (vary-meta result assoc ::errors reports))) (defn- code* From 8491895e55a733a4e67f553679bde0fc7811c2d6 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 18 Nov 2019 16:21:48 +0100 Subject: [PATCH 122/128] fix: report lexer errors --- src/clojure/parcera/antlr/java.clj | 13 ++++++------- src/clojure/parcera/core.cljc | 2 -- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/clojure/parcera/antlr/java.clj b/src/clojure/parcera/antlr/java.clj index 70f5bda..7950d9d 100644 --- a/src/clojure/parcera/antlr/java.clj +++ b/src/clojure/parcera/antlr/java.clj @@ -10,7 +10,7 @@ ;; A custom Error Listener to avoid Antlr printing the errors on the terminal ;; by default. This is also useful to mimic Instaparse :total parse mechanism ;; such that if we get an error, we can report it as the result instead -(defrecord ParseFailure [reports] +(defrecord AntlrFailure [reports] ANTLRErrorListener ;; I am not sure how to use these methods. If you came here wondering why ;; is this being printed, please open an issue so that we can all benefit @@ -29,7 +29,7 @@ (let [report (merge {:row line :column char :message message - :type :parser} ;; todo: lexer should also be allowed + :type (if (instance? Parser recognizer) :parser :lexer)} (when (instance? Parser recognizer) {:symbol (str offending-symbol) :stack (->> (.getRuleInvocationStack ^Parser recognizer) @@ -67,13 +67,12 @@ (defn parser [input] - (let [chars (CharStreams/fromString input) + (let [listener (->AntlrFailure (volatile! ())) + chars (CharStreams/fromString input) lexer (doto (new ClojureLexer chars) - (.removeErrorListeners)) - ;; todo: how to handle lexer errors ? - ;(.addErrorListener listener)) + (.removeErrorListeners) + (.addErrorListener listener)) tokens (new CommonTokenStream lexer) - listener (->ParseFailure (volatile! ())) parser (doto (new ClojureParser tokens) (.setBuildParseTree true) (.removeErrorListeners) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 736129a..663eedb 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -285,5 +285,3 @@ (:require [instaparse.core :as #{:hello \"world\" :hello}] [clojure.data :as data] [clojure.string :as str]))")) -;; TODO -;(ast "\"hello/world") From 505cd31d7c4adc7884b04b26bb14a55a944da3f6 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 18 Nov 2019 17:42:01 +0100 Subject: [PATCH 123/128] fix: some rules dont have an end token renamed conform to failure for better readability --- src/clojure/parcera/core.cljc | 63 ++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 663eedb..037ec5e 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -9,17 +9,31 @@ :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" "`" "'" "~" "~@" "@" "#(" "#'" "#_" "#?(" "#?@(" "##" ":" "::"}}) - +;; start and end are tokens not positions. +;; So '(hello/world)' has '(' 'hello/world' and ')' as tokens (defn- meta-data "extract the match meta data information from the ast node" [ast] (let [start (antlr/start ast) end (antlr/end ast)] - (merge {::start {:row (antlr/row start) - :column (antlr/column start)}} - (when (some? end) - {::end {:row (antlr/row end) - :column (antlr/column end)}})))) + (cond + ;; happens when the parser rule is a single lexer rule + (= start end) + {::start {:row (antlr/row start) + :column (antlr/column start)} + ::end {:row (antlr/row start) + :column (.getStopIndex start)}} + + ;; no end found - happens on errors + (nil? end) + {::start {:row (antlr/row start) + :column (antlr/column start)}} + + :else + {::start {:row (antlr/row start) + :column (antlr/column start)} + ::end {:row (antlr/row end) + :column (antlr/column end)}}))) ;; for some reason cljs doesnt accept escaping the / characters @@ -27,7 +41,7 @@ :cljs #"^([^\s/]+/)?(/|[^\s/]+)$")) -(defn- conform +(defn- failure "Checks that `rule` conforms to additional rules which are too difficult to represent with pure Antlr4 syntax" [rule children metadata] @@ -65,25 +79,19 @@ [tree rule-names hide-tags hide-literals] (cond (boolean (satisfies? antlr/ParserRule tree)) - (let [rule (get rule-names (antlr/rule-index tree)) - children (for [child (antlr/children tree) - :let [child (hiccup child rule-names hide-tags hide-literals)] - :when (not (nil? child))] - child) + (let [rule (get rule-names (antlr/rule-index tree)) + children (for [child (antlr/children tree) + :let [child (hiccup child rule-names hide-tags hide-literals)] + :when (not (nil? child))] + child) ;; attach meta data ... ala instaparse - ast-meta (meta-data tree) + ast-meta (meta-data tree) ;; extra validation rules - conformed (conform rule children ast-meta)] - ;; flatten out first children level in case of hidden tags + fail (failure rule children ast-meta)] + ;; parcera hidden tags are always "or" statements, so just take the single children (if (contains? hide-tags rule) (first children) - (or conformed (with-meta (cons rule children) - ast-meta)) - #_(clojure.pprint/pprint {:conformed conformed - :rule rule - :children children - :ast ast - :meta (or (meta conformed) ast-meta)}))) + (or fail (with-meta (cons rule children) ast-meta)))) (boolean (satisfies? antlr/ErrorNode tree)) (let [token (antlr/token tree) @@ -272,10 +280,6 @@ ;; ast is root node but "doesnt know" about the failure -> conformed (some #{::failure} (filter keyword? (tree-seq seq? identity ast))))) -; Successful parse. -; Profile: {:create-node 384, :push-full-listener 2, :push-stack 384, -; :push-listener 382, :push-result 227, :push-message 227 } -; "Elapsed time: 47.25084 msecs" #_(time (ast (str '(ns parcera.core (:require [instaparse.core :as instaparse] [clojure.data :as data] @@ -284,4 +288,9 @@ #_(time (ast "(ns parcera.core (:require [instaparse.core :as #{:hello \"world\" :hello}] [clojure.data :as data] - [clojure.string :as str]))")) + [clojure.string :as str])")) + +#_(filter :meta (map #(hash-map :item % :meta (meta %)) + (tree-seq seq? seq (ast " + (ns + parcera.core))")))) From 460d11b731fb5ca33fc09c3652fbf315d822b06f Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 18 Nov 2019 17:42:10 +0100 Subject: [PATCH 124/128] edge case added --- test/parcera/test/core.cljc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/parcera/test/core.cljc b/test/parcera/test/core.cljc index 27601c6..079828c 100644 --- a/test/parcera/test/core.cljc +++ b/test/parcera/test/core.cljc @@ -136,7 +136,10 @@ (testing "symbols" (as-> "hello/world/" input (is (not (valid? input)))) (as-> ":hello/world/" input (is (not (valid? input)))) - (as-> "::hello/world/" input (is (not (valid? input)))))) + (as-> "::hello/world/" input (is (not (valid? input))))) + + (testing "strings" + (as-> "hello \"world" input (is (not (valid? input)))))) (deftest macros From 3e49ba636da08201cfcc0820bf096d7c633b7280 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 18 Nov 2019 18:11:41 +0100 Subject: [PATCH 125/128] fix: moved line/column logic to protocol implementation --- src/clojure/parcera/antlr/java.clj | 37 ++++++++++++++++++------ src/clojure/parcera/antlr/protocols.cljc | 10 +++---- src/clojure/parcera/core.cljc | 35 ++-------------------- 3 files changed, 35 insertions(+), 47 deletions(-) diff --git a/src/clojure/parcera/antlr/java.clj b/src/clojure/parcera/antlr/java.clj index 7950d9d..525038d 100644 --- a/src/clojure/parcera/antlr/java.clj +++ b/src/clojure/parcera/antlr/java.clj @@ -40,23 +40,42 @@ (vswap! reports conj report)))) +;; start and end are tokens not positions. +;; So '(hello/world)' has '(' 'hello/world' and ')' as tokens (extend-type ParserRuleContext antlr/ParserRule (children [^ParserRuleContext this] (.-children this)) (rule-index [^ParserRuleContext this] (.getRuleIndex this)) - (start [^ParserRuleContext this] (.getStart this)) - (end [^ParserRuleContext this] (.getStop this))) + antlr/LocationInfo + (span [^ParserRuleContext this] + (let [start (.getStart this) + stop (.getStop this)] + (cond + ;; happens when the parser rule is a single lexer rule + (= start stop) + {::start {:row (.getLine start) + :column (.getCharPositionInLine start)} + ::end {:row (.getLine start) + :column (.getStopIndex start)}} + ;; no end found - happens on errors + (nil? stop) + {::start {:row (.getLine start) + :column (.getCharPositionInLine start)}} -(extend-type ErrorNodeImpl - antlr/ErrorNode - (token [^ErrorNodeImpl this] (.-symbol this))) + :else + {::start {:row (.getLine start) + :column (.getCharPositionInLine start)} + ::end {:row (.getLine stop) + :column (.getCharPositionInLine stop)}})))) -(extend-type Token - antlr/Token - (row [^Token this] (.getLine this)) - (column [^Token this] (.getCharPositionInLine this))) +(extend-type ErrorNodeImpl + antlr/LocationInfo + (span [^ErrorNodeImpl this] + (let [token (.-symbol this)] + {::start {:row (.getLine token) + :column (.getCharPositionInLine token)}}))) (extend-type ClojureParser diff --git a/src/clojure/parcera/antlr/protocols.cljc b/src/clojure/parcera/antlr/protocols.cljc index 6d1c84f..7acde2b 100644 --- a/src/clojure/parcera/antlr/protocols.cljc +++ b/src/clojure/parcera/antlr/protocols.cljc @@ -8,16 +8,14 @@ (rules [this]) (tree [this])) + (defprotocol ParserRule (children [this]) - (rule-index [this]) - (start [this]) - (end [this])) + (rule-index [this])) +(defprotocol LocationInfo + (span [this])) -(defprotocol Token - (row [this]) - (column [this])) (defprotocol ErrorNode (token [this])) diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 037ec5e..3d3ac55 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -9,32 +9,6 @@ :literals #{"(" ")" "[" "]" "{" "}" "#{" "#" "^" "`" "'" "~" "~@" "@" "#(" "#'" "#_" "#?(" "#?@(" "##" ":" "::"}}) -;; start and end are tokens not positions. -;; So '(hello/world)' has '(' 'hello/world' and ')' as tokens -(defn- meta-data - "extract the match meta data information from the ast node" - [ast] - (let [start (antlr/start ast) - end (antlr/end ast)] - (cond - ;; happens when the parser rule is a single lexer rule - (= start end) - {::start {:row (antlr/row start) - :column (antlr/column start)} - ::end {:row (antlr/row start) - :column (.getStopIndex start)}} - - ;; no end found - happens on errors - (nil? end) - {::start {:row (antlr/row start) - :column (antlr/column start)}} - - :else - {::start {:row (antlr/row start) - :column (antlr/column start)} - ::end {:row (antlr/row end) - :column (antlr/column end)}}))) - ;; for some reason cljs doesnt accept escaping the / characters (def name-pattern #?(:clj #"^([^\s\/]+\/)?(\/|[^\s\/]+)$" @@ -85,7 +59,7 @@ :when (not (nil? child))] child) ;; attach meta data ... ala instaparse - ast-meta (meta-data tree) + ast-meta (antlr/span tree) ;; extra validation rules fail (failure rule children ast-meta)] ;; parcera hidden tags are always "or" statements, so just take the single children @@ -94,11 +68,8 @@ (or fail (with-meta (cons rule children) ast-meta)))) (boolean (satisfies? antlr/ErrorNode tree)) - (let [token (antlr/token tree) - ;; error metadata - info {::start {:row (antlr/row token) - :column (antlr/column token)}}] - (with-meta (list ::failure (str tree)) info)) + (with-meta (list ::failure (str tree)) + (antlr/span tree)) :else (let [text (str tree)] From 6582dc47d3b7cf74b4f65646bffa2f08f75e0db5 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 18 Nov 2019 19:17:07 +0100 Subject: [PATCH 126/128] made clojure a fix dependency --- project.clj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/project.clj b/project.clj index dcc63d1..654a82b 100644 --- a/project.clj +++ b/project.clj @@ -5,6 +5,7 @@ :url "https://github.com/carocad/parcera/blob/master/LICENSE.md"} :source-paths ["src/clojure" "src/javascript"] :java-source-paths ["src/java"] + :dependencies [[org.clojure/clojure "1.10.1"]] :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark [org.clojure/test.check "0.10.0"]] ;; generative testing :plugins [[jonase/eastwood "0.3.5"]] ;; linter @@ -12,8 +13,7 @@ :clean-targets ^{:protect false} ["target"]} ;; java reloader ;[lein-virgil "0.1.9"]] - :provided {:dependencies [[org.clojure/clojure "1.10.1"] - [org.antlr/antlr4-runtime "4.7.1"]]}} + :provided {:dependencies [[org.antlr/antlr4-runtime "4.7.1"]]}} :test-selectors {:default (fn [m] (not (some #{:benchmark} (keys m)))) :benchmark :benchmark} From 4ff04f242eddc4791cfdf2df572f91890c202e6c Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Mon, 18 Nov 2019 19:31:29 +0100 Subject: [PATCH 127/128] README updated --- README.md | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index ad506ab..8db0743 100644 --- a/README.md +++ b/README.md @@ -19,31 +19,31 @@ full explanation of the options available for a parser please visit Instaparse w [instaparse.core :as instaparse])) ;;parse clojure code from a string -(parcera/clojure (str '(ns parcera.core - (:require [instaparse.core :as instaparse] - [clojure.data :as data] - [clojure.string :as str])))) +(parcera/ast (str '(ns parcera.core + (:require [instaparse.core :as instaparse] + [clojure.data :as data] + [clojure.string :as str])))) ;; => returns a data structure with the result from the parser -[:code - [:list - [:symbol "ns"] - [:whitespace " "] - [:symbol "parcera.core"] - [:whitespace " "] - [:list - [:simple-keyword ":require"] - [:whitespace " "] - [:vector - [:symbol "instaparse.core"] - [:whitespace " "] - [:simple-keyword ":as"] - [:whitespace " "] - [:symbol "instaparse"]] - [:whitespace " "] - [:vector [:symbol "clojure.data"] [:whitespace " "] [:simple-keyword ":as"] [:whitespace " "] [:symbol "data"]] - [:whitespace " "] - [:vector [:symbol "clojure.string"] [:whitespace " "] [:simple-keyword ":as"] [:whitespace " "] [:symbol "str"]]]]] +(:code + (:list + (:symbol "ns") + (:whitespace " ") + (:symbol "parcera.core") + (:whitespace " ") + (:list + (:simple_keyword "require") + (:whitespace " ") + (:vector + (:symbol "instaparse.core") + (:whitespace " ") + (:simple_keyword "as") + (:whitespace " ") + (:symbol "instaparse")) + (:whitespace " ") + (:vector (:symbol "clojure.data") (:whitespace " ") (:simple_keyword "as") (:whitespace " ") (:symbol "data")) + (:whitespace " ") + (:vector (:symbol "clojure.string") (:whitespace " ") (:simple_keyword "as") (:whitespace " ") (:symbol "str"))))) ;; convert an AST back into a string (parcera/code [:symbol "ns"]) From c798963bdd79be3748ec3345df4ebe7fc1894a23 Mon Sep 17 00:00:00 2001 From: Camilo Roca Date: Tue, 19 Nov 2019 22:54:20 +0100 Subject: [PATCH 128/128] remove javascript references --- README.md | 2 +- pom.xml | 45 +++++++++++++++++++++++++---------- project.clj | 4 ++-- src/clojure/parcera/core.cljc | 5 ++-- 4 files changed, 38 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 8db0743..e45ac6c 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Build Status](https://travis-ci.com/carocad/parcera.svg?branch=master)](https://travis-ci.com/carocad/parcera) [![Clojars Project](https://img.shields.io/clojars/v/carocad/parcera.svg)](https://clojars.org/carocad/parcera) -Grammar-based Clojure(script) parser. +Grammar-based Clojure parser. Parcera can safely read any Clojure file without any code evaluation. diff --git a/pom.xml b/pom.xml index e78ceff..e9ec8cf 100644 --- a/pom.xml +++ b/pom.xml @@ -3,9 +3,9 @@ carocad parcera jar - 0.3.1 + 0.4.0 parcera - Grammar-based Clojure(script) parser + Grammar-based Clojure parser https://github.com/carocad/parcera @@ -17,10 +17,10 @@ https://github.com/carocad/parcera scm:git:git://github.com/carocad/parcera.git scm:git:ssh://git@github.com/carocad/parcera.git - b4ca5c659e55f00781e37bee1dc6bb400460e307 + 4ff04f242eddc4791cfdf2df572f91890c202e6c - src + src/clojure test @@ -28,13 +28,37 @@ + + target + resources target target/classes - + + + org.codehaus.mojo + build-helper-maven-plugin + 1.7 + + + add-source + generate-sources + + add-source + + + + src/javascript + src/java + + + + + + @@ -68,14 +92,9 @@ 1.10.1 - instaparse - instaparse - 1.4.10 - - - org.clojure - clojurescript - 1.10.520 + org.antlr + antlr4-runtime + 4.7.1 provided diff --git a/project.clj b/project.clj index 654a82b..cc778c7 100644 --- a/project.clj +++ b/project.clj @@ -1,9 +1,9 @@ (defproject carocad/parcera "0.4.0" - :description "Grammar-based Clojure(script) parser" + :description "Grammar-based Clojure parser" :url "https://github.com/carocad/parcera" :license {:name "LGPLv3" :url "https://github.com/carocad/parcera/blob/master/LICENSE.md"} - :source-paths ["src/clojure" "src/javascript"] + :source-paths ["src/clojure"] :java-source-paths ["src/java"] :dependencies [[org.clojure/clojure "1.10.1"]] :profiles {:dev {:dependencies [[criterium/criterium "0.4.5"] ;; benchmark diff --git a/src/clojure/parcera/core.cljc b/src/clojure/parcera/core.cljc index 3d3ac55..7e250f6 100644 --- a/src/clojure/parcera/core.cljc +++ b/src/clojure/parcera/core.cljc @@ -1,7 +1,8 @@ (ns parcera.core (:require [parcera.antlr.protocols :as antlr] - #?(:clj [parcera.antlr.java :as platform] - :cljs [parcera.antlr.javascript :as platform])) + #?(:clj [parcera.antlr.java :as platform])) + ; todo: re-enable once we have javscript support + ;:cljs [parcera.antlr.javascript :as platform])) #?(:cljs (:import goog.string.StringBuffer)))