From f989518142ec3ec1ced578f456637cba9fbc4ee9 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Thu, 13 Feb 2020 19:28:22 -0500 Subject: [PATCH 1/4] Bump to v0.6.3 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 6b54086..0927f7e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "regjsparser", - "version": "0.6.2", + "version": "0.6.3", "author": "'Julian Viereck' ", "license": "BSD-2-Clause", "main": "./parser", From 2a2f0902896a7712df410a9f2e5f8bdb3527a6f1 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Sat, 7 Mar 2020 15:43:10 -0500 Subject: [PATCH 2/4] Allow closing brace and bracket inside of classes. Fixes #101 --- parser.js | 8 +++--- test/test-data-unicode.json | 52 +++++++++++++++++++++++++++++++++++-- test/test-data.json | 41 +++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 6 deletions(-) diff --git a/parser.js b/parser.js index 68849f0..851fcbe 100644 --- a/parser.js +++ b/parser.js @@ -229,14 +229,14 @@ return createValue(kind, codePoint, pos - (value.length + fromOffset), pos); } - function createCharacter(matches) { + function createCharacter(matches, insideClass) { var _char = matches[0]; var first = _char.charCodeAt(0); if (hasUnicodeFlag) { - if (_char === '}') { + if (!insideClass && _char === '}') { bail("unescaped or unmatched closing brace"); } - if (_char === ']') { + if (!insideClass && _char === ']') { bail("unescaped or unmatched closing bracket"); } var second; @@ -1102,7 +1102,7 @@ var res; if (res = matchReg(/^[^\\\]-]/)) { - return createCharacter(res[0]); + return createCharacter(res[0], true); } else if (match('\\')) { res = parseClassEscape(); if (!res) { diff --git a/test/test-data-unicode.json b/test/test-data-unicode.json index 6b2e16b..916cafe 100644 --- a/test/test-data-unicode.json +++ b/test/test-data-unicode.json @@ -930,12 +930,60 @@ "type": "value", "kind": "singleEscape", "codePoint": 45, - "range": [1, 3], + "range": [ + 1, + 3 + ], "raw": "\\-" } ], "negative": false, - "range": [0, 4], + "range": [ + 0, + 4 + ], "raw": "[\\-]" + }, + "[}]": { + "type": "characterClass", + "body": [ + { + "type": "value", + "kind": "symbol", + "codePoint": 125, + "range": [ + 1, + 2 + ], + "raw": "}" + } + ], + "negative": false, + "range": [ + 0, + 3 + ], + "raw": "[}]" + }, + "[^}]": { + "type": "characterClass", + "body": [ + { + "type": "value", + "kind": "symbol", + "codePoint": 125, + "range": [ + 2, + 3 + ], + "raw": "}" + } + ], + "negative": true, + "range": [ + 0, + 4 + ], + "raw": "[^}]" } } diff --git a/test/test-data.json b/test/test-data.json index f1ad2f4..a81bc69 100644 --- a/test/test-data.json +++ b/test/test-data.json @@ -37472,5 +37472,46 @@ 2 ], "raw": "a." + }, + "}": { + "type": "value", + "kind": "symbol", + "codePoint": 125, + "range": [ + 0, + 1 + ], + "raw": "}" + }, + "]": { + "type": "value", + "kind": "symbol", + "codePoint": 93, + "range": [ + 0, + 1 + ], + "raw": "]" + }, + "[}]": { + "type": "characterClass", + "body": [ + { + "type": "value", + "kind": "symbol", + "codePoint": 125, + "range": [ + 1, + 2 + ], + "raw": "}" + } + ], + "negative": false, + "range": [ + 0, + 3 + ], + "raw": "[}]" } } From 23d58b47c557455358e2b3ffd4dc34a16bc279f9 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Sun, 8 Mar 2020 12:50:14 -0400 Subject: [PATCH 3/4] Adding parsing for ExtendedAtom. Remove parser logic from createCharacter. --- parser.js | 30 ++++++++++++++++++------------ test/test-data-unicode.json | 4 ++-- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/parser.js b/parser.js index 851fcbe..b23ed9f 100644 --- a/parser.js +++ b/parser.js @@ -229,16 +229,10 @@ return createValue(kind, codePoint, pos - (value.length + fromOffset), pos); } - function createCharacter(matches, insideClass) { + function createCharacter(matches) { var _char = matches[0]; var first = _char.charCodeAt(0); if (hasUnicodeFlag) { - if (!insideClass && _char === '}') { - bail("unescaped or unmatched closing brace"); - } - if (!insideClass && _char === ']') { - bail("unescaped or unmatched closing bracket"); - } var second; if (_char.length === 1 && first >= 0xD800 && first <= 0xDBFF) { second = lookahead().charCodeAt(0); @@ -487,7 +481,7 @@ return anchor; } - var atom = parseAtom(); + var atom = parseAtomAndExtendedAtom(); if (!atom) { bail('Expected atom'); } @@ -611,7 +605,12 @@ return quantifier; } - function parseAtom() { + function parseAtomAndExtendedAtom() { + // Parsing Atom and ExtendedAtom together due to redundancy. + // ExtendedAtom is defined in Apendix B of the ECMA-262 standard. + // + // SEE: https://www.ecma-international.org/ecma-262/10.0/index.html#prod-annexB-ExtendedPatternCharacter + // // Atom :: // PatternCharacter // . @@ -619,16 +618,23 @@ // CharacterClass // ( GroupSpecifier Disjunction ) // ( ? : Disjunction ) + // ExtendedAtom :: + // ExtendedPatternCharacter + // ExtendedPatternCharacter :: + // SourceCharacter but not one of ^$\.*+?()[| var res; // jviereck: allow ']', '}' here as well to be compatible with browser's // implementations: ']'.match(/]/); - // if (res = matchReg(/^[^^$\\.*+?()[\]{}|]/)) { - if (res = matchReg(/^[^^$\\.*+?(){[|]/)) { + if (res = matchReg(/^[^^$\\.*+?()[\]{}|]/)) { // PatternCharacter return createCharacter(res); } + else if (!hasUnicodeFlag && (res = matchReg(/^[^^$\\.*+?(){[|]/))) { + // ExtendedPatternCharacter + return createCharacter(res); + } else if (match('.')) { // . return createDot(); @@ -1102,7 +1108,7 @@ var res; if (res = matchReg(/^[^\\\]-]/)) { - return createCharacter(res[0], true); + return createCharacter(res[0]); } else if (match('\\')) { res = parseClassEscape(); if (!res) { diff --git a/test/test-data-unicode.json b/test/test-data-unicode.json index 916cafe..011cfed 100644 --- a/test/test-data-unicode.json +++ b/test/test-data-unicode.json @@ -914,13 +914,13 @@ "}": { "type": "error", "name": "SyntaxError", - "message": "unescaped or unmatched closing brace at position 1\n }\n ^", + "message": "Expected atom at position 0\n }\n ^", "input": "}" }, "]": { "type": "error", "name": "SyntaxError", - "message": "unescaped or unmatched closing bracket at position 1\n ]\n ^", + "message": "Expected atom at position 0\n ]\n ^", "input": "]" }, "[\\-]": { From e92926765beea0743fc111efb1b6288904b2f577 Mon Sep 17 00:00:00 2001 From: Julian Viereck Date: Tue, 10 Mar 2020 23:07:27 -0400 Subject: [PATCH 4/4] Optimize ExtendedPatternCharacter given previous if statement --- parser.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parser.js b/parser.js index b23ed9f..47e0029 100644 --- a/parser.js +++ b/parser.js @@ -631,7 +631,7 @@ // PatternCharacter return createCharacter(res); } - else if (!hasUnicodeFlag && (res = matchReg(/^[^^$\\.*+?(){[|]/))) { + else if (!hasUnicodeFlag && (res = matchReg(/^(?:]|})/))) { // ExtendedPatternCharacter return createCharacter(res); }