julien-duponchelle · polvalente · Oct 7, 2019 · Oct 7, 2019 · Oct 7, 2019 · Oct 8, 2019
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,7 +4,8 @@
 
 By https://github.com/polvalente
 
-- Fix 'ignoreInDelimiters' nesting
+- Fix 'ignoreInDelimiters' nesting (#20)
+- Refactor code to include new parsing engine (#21)
 
 ## 0.7.3
 

diff --git a/package.json b/package.json
@@ -4,7 +4,7 @@
     "license": "Apache-2.0",
     "displayName": "Rainbow End",
     "description": "This extension allows to identify keyword / end with colours.",
-    "version": "0.7.2",
+    "version": "0.8.0",
     "icon": "images/logo.png",
     "engines": {
         "vscode": "^1.29.0"
@@ -71,4 +71,4 @@
         "typescript": "^2.6.1",
         "vscode": "^1.1.26"
     }
-}
+}
diff --git a/src/extension.ts b/src/extension.ts
@@ -2,37 +2,30 @@
 
 import * as vscode from "vscode";
 import { languages } from "./languages";
+import { tokenize, loadRegexes, Token, TokenizeParams } from "./tokenizer";
+import { parse, deepDecorations } from "./parser";
 
-const deepDecorations = [
-  vscode.window.createTextEditorDecorationType({
-    color: { id: "rainbowend.deep1" }
-  }),
-  vscode.window.createTextEditorDecorationType({
-    color: { id: "rainbowend.deep2" }
-  }),
-  vscode.window.createTextEditorDecorationType({
-    color: { id: "rainbowend.deep3" }
-  })
-];
+export function activate(context: vscode.ExtensionContext) {
+  let regExps: {
+    [index: string]: TokenizeParams;
+  } = {};
 
-let timeout: NodeJS.Timer | null = null;
-let regExs: { [index: string]: RegExp } = {};
+  let timeout: NodeJS.Timer | null = null;
 
-export function activate(context: vscode.ExtensionContext) {
   Object.keys(languages).forEach(language => {
-    regExs[language] = buildRegex(language);
+    regExps[language] = loadRegexes(languages[language]);
   });
 
   let activeEditor = vscode.window.activeTextEditor;
   if (activeEditor) {
-    triggerUpdateDecorations(activeEditor);
+    triggerUpdateDecorations(timeout, regExps);
   }
 
   vscode.window.onDidChangeActiveTextEditor(
     editor => {
       activeEditor = editor;
       if (activeEditor) {
-        triggerUpdateDecorations(activeEditor);
+        triggerUpdateDecorations(timeout, regExps);
       }
     },
     null,
@@ -42,192 +35,54 @@ export function activate(context: vscode.ExtensionContext) {
   vscode.workspace.onDidChangeTextDocument(
     event => {
       if (activeEditor && event.document === activeEditor.document) {
-        triggerUpdateDecorations(activeEditor);
+        timeout = triggerUpdateDecorations(timeout, regExps);
       }
     },
     null,
     context.subscriptions
   );
 }
 
-function triggerUpdateDecorations(activeEditor: vscode.TextEditor) {
-  if (timeout) {
-    clearTimeout(timeout);
+function triggerUpdateDecorations(
+  timeout: NodeJS.Timer | null,
+  regExps: {
+    [index: string]: TokenizeParams;
   }
-  timeout = setTimeout(updateDecorations, 250);
-}
-
-function buildRegex(language: string) {
-  const languageConfiguration = languages[language];
-  let tokens: Array<string> = languageConfiguration["openTokens"];
-  tokens = tokens.concat(languageConfiguration["inlineOpenTokens"]);
-  tokens = tokens.concat(languageConfiguration["closeTokens"]);
-  tokens = tokens.concat(languageConfiguration["neutralTokens"]);
-  return RegExp("(\\b)(" + tokens.join("|") + ")(\\b)", "gm");
-}
-
-function ignoreInDelimiters(
-  token_pairs:
-    | Array<{
-        open: string;
-        close: string;
-      }>
-    | undefined,
-  text: string
 ) {
-  /* This function replaces text inside each token pair with spaces,
-	   so as to ignore the text between delimiters */
-  if (token_pairs) {
-    token_pairs.forEach(({ open: open_delim, close: close_delim }) => {
-      /* Only allow nesting if delimiters are different */
-      if (open_delim == close_delim) {
-        let regexp = RegExp(
-          `${open_delim}[^${close_delim}]*${close_delim}`,
-          "gm"
-        );
-        text = text.replace(regexp, match => {
-          return " ".repeat(match.length);
-        });
-      } else {
-        let openRegexp = RegExp(`${open_delim}`, "gm");
-        let closeRegexp = RegExp(`${close_delim}`, "gm");
-
-        let indices = [];
-
-        let match = openRegexp.exec(text);
-        if (match == null) {
-          return;
-        }
-
-        while (match != null) {
-          indices.push({ index: match.index, type: "open" });
-          match = openRegexp.exec(text);
-        }
-
-        match = closeRegexp.exec(text);
-        if (match == null) {
-          return;
-        }
-
-        while (match != null) {
-          indices.push({ index: match.index, type: "close" });
-          match = closeRegexp.exec(text);
-        }
-
-        /* Sort by index */
-        indices = indices.sort(({ index: a }, { index: b }) => a - b);
-
-        let ignore_env_counter = 0;
-        let first_index = indices[0].index;
-
-        let index: number;
-        let type: string;
-
-        /* This isn't so inefficient in that it is
-    	     O(indices.length), instead of O(text.length).
-	         Also, the list is already ordered, which is really helpful */
-        for ({ index, type } of indices) {
-          /* skip current token if trying to close when there is no open block
-           cannot just break because '\n' can be both a closing token and a
-           normal line end
-          */
-          if (type == "close" && ignore_env_counter == 0) {
-            continue;
-          }
-
-          /* if counter is zero, should begin an ignore block */
-          if (ignore_env_counter == 0) {
-            first_index = index;
-          }
-
-          if (type == "open") {
-            /* if it is an open token, always increment env counter */
-            ignore_env_counter++;
-          } else {
-            ignore_env_counter--;
-            /* if counter has reached zero after a closing token,
-             end ignore block */
-            let last_index = index;
-
-            /* Set ignore block slice as whitespace and keep the rest */
-            text =
-              text.slice(0, first_index) +
-              " ".repeat(last_index - first_index + 1) +
-              text.slice(last_index + 1);
-          }
-        }
-
-        if (ignore_env_counter != 0) {
-          /* Didn't close last block */
-          text =
-            text.slice(0, first_index) +
-            " ".repeat(text.length - first_index + 1);
-        }
-      }
-    });
+  if (timeout) {
+    clearTimeout(timeout);
   }
-  return text;
+  return setTimeout(() => updateDecorations(regExps), 250);
 }
 
-function updateDecorations() {
+function updateDecorations(regExps: { [index: string]: TokenizeParams }) {
   const activeEditor = vscode.window.activeTextEditor;
   if (!activeEditor) {
     return;
   }
-  const languageConfiguration = languages[activeEditor.document.languageId];
+  let lang = activeEditor.document.languageId;
+  const languageConfiguration = languages[lang];
+
+  if (!languageConfiguration) {
+    return;
+  }
 
   let text = activeEditor.document.getText();
   const options: vscode.DecorationOptions[][] = [];
-  deepDecorations.forEach(d => {
+  deepDecorations.forEach((d: any) => {
     options.push([]);
   });
-  let match;
-  let deep = 0;
 
-  // if we are not case sensitive, then ensure the case of text matches then keyworkd matches
+  // if we are not case sensitive, then ensure the case of text matches the keyword matches
   if (!languageConfiguration.caseSensitive) {
     text = text.toLowerCase();
   }
-  // substitute all ignore intervals with spaces
-  // this ensures commented code or
-  // keywords inside strings are ignored properly
-
-  // also, prepend a whitespace to allow matching the first character in document
-  // if needed
 
-  text =
-    " " + ignoreInDelimiters(languageConfiguration.ignoreInDelimiters, text);
-  while ((match = regExs[activeEditor.document.languageId].exec(text))) {
-    const startIndex = match.index + match[1].length - 1; // Decrement to compensate for added character
-    const startPos = activeEditor.document.positionAt(startIndex);
-    const endPos = activeEditor.document.positionAt(
-      startIndex + match[2].length
-    );
-    const decoration: vscode.DecorationOptions = {
-      range: new vscode.Range(startPos, endPos)
-    };
+  let tokens: Token[] = tokenize(text, regExps[lang]);
 
-    if (languageConfiguration.closeTokens.indexOf(match[2]) > -1) {
-      if (deep > 0) {
-        deep -= 1;
-      }
-      options[deep % deepDecorations.length].push(decoration);
-    } else if (languageConfiguration.neutralTokens.indexOf(match[2]) > -1) {
-      if (deep > 0) {
-        options[(deep - 1) % deepDecorations.length].push(decoration);
-      }
-    } else if (languageConfiguration.openTokens.indexOf(match[2]) > -1) {
-      options[deep % deepDecorations.length].push(decoration);
-      deep += 1;
-    } else {
-      if (match[1].length === 0 || match[1].match("^[\\s\n]+$")) {
-        options[deep % deepDecorations.length].push(decoration);
-        deep += 1;
-      }
-    }
-  }
+  parse({ activeEditor, options, tokens });
 
-  deepDecorations.forEach((deepDecoration, i) => {
+  deepDecorations.forEach((deepDecoration: any, i: number) => {
     activeEditor.setDecorations(deepDecoration, options[i]);
   });
 }