New point release

mike-lischke · mike-lischke · commit 7fa4a9acaa90 · 2024-01-20T16:13:35.000+01:00
Signed-off-by: Mike Lischke &lt;mike@lischke-online.de&gt;
diff --git a/ReadMe.md b/ReadMe.md
@@ -16,6 +16,7 @@ This package is a fork of the official ANTLR4 JavaScript runtime and has been fu
 - Numerous bug fixes and other changes.
 - Smaller node package (no test specs or other unnecessary files).
 - No differentiation between node and browser environments.
+- InterpreterDataReader implementation.
 - Includes the `antlr4ng-cli` tool to generate parser files compatible with this runtime. This tool uses a custom build of the ANTLR4 tool.
 
 This package is a blend of the original JS implementation and antlr4ts, which is a TypeScript implementation of the ANTLR4 runtime, but was abandoned. It tries to keep the best of both worlds, while following the Java runtime as close as possible. It's a bit slower than the JS runtime, but faster than antlr4ts.
@@ -108,7 +109,7 @@ const result = visitor.visit(tree);
 
 ## Benchmarks
 
-This runtime is monitored for performance regressions. The following table shows the results of the benchmarks run on last release:
+This runtime is monitored for performance regressions. The following tables show the results of the benchmarks previously run on the JS runtime and on last release of this one. Warm times were taken from 5 runs with the 2 slowest stripped off and averaged.
 
 Pure JavaScript release (with type definitions):
 
@@ -123,10 +124,10 @@ Last release (pure TypeScript):
 
 | Test | Cold Run | Warm Run|
 | ---- | -------- | ------- |
-| Query Collection| 4823 ms | 372 ms |
-| Example File | 680 ms | 196 ms |
-| Large Inserts | 15176 ms | 15115 ms |
-| Total | 20738 ms | 15704 ms |
+| Query Collection| 4724 ms | 337 ms |
+| Example File | 672 ms | 192 ms |
+| Large Inserts | 15144 ms | 15039 ms |
+| Total | 20600 ms | 15592 ms |
 
 The numbers are interesting. While the cold run for the query collection is almost 3 seconds faster with pure TS, the overall numbers in warm state are worse. So it's not a pure JS vs. TS situation, but something else must have additional influence and this will be investigated. After all the TypeScript code is ultimately transpiled to JS, so it's probably a matter of how effective the TS code is translated to JS.
 
@@ -144,6 +145,12 @@ The example file is a copy of the largest test file in [this repository](https:/
 
 ## Release Notes
 
+### 2.0.7
+
+- Added an InterpreterDataReader implementation (copied from the vscode-antlr4 extension).
+- Benchmark values listed here are now computed from 5 runs, instead just one.
+
+
 ### 2.0.6
 
 - Optimizations in HashMap and HashSet (from Peter van Gulik). This can have dramatic speed improvements, depending on the grammar. In the unit tests this shows mostly by a faster cold start.
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
     "name": "antlr4ng",
-    "version": "2.0.6",
+    "version": "2.0.7",
     "type": "module",
     "description": "Alternative JavaScript/TypeScript runtime for ANTLR4",
     "main": "dist/index.cjs",
diff --git a/src/misc/InterpreterDataReader.ts b/src/misc/InterpreterDataReader.ts
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) The ANTLR Project. All rights reserved.
+ * Use of this file is governed by the BSD 3-clause license that
+ * can be found in the LICENSE.txt file in the project root.
+ */
+
+import { Vocabulary } from "../Vocabulary.js";
+import { ATN } from "../atn/ATN.js";
+import { ATNDeserializer } from "../atn/ATNDeserializer.js";
+
+/** The data in an interpreter file. */
+export interface IInterpreterData {
+    atn: ATN;
+    vocabulary: Vocabulary;
+    ruleNames: string[];
+
+    /** Only valid for lexer grammars. Lists the defined lexer channels. */
+    channels?: string[];
+
+    /** Only valid for lexer grammars. Lists the defined lexer modes. */
+    modes?: string[];
+}
+
+export class InterpreterDataReader {
+    /**
+     * The structure of the data file is very simple. Everything is line based with empty lines
+     * separating the different parts. For lexers the layout is:
+     * token literal names:
+     * ...
+     *
+     * token symbolic names:
+     * ...
+     *
+     * rule names:
+     * ...
+     *
+     * channel names:
+     * ...
+     *
+     * mode names:
+     * ...
+     *
+     * atn:
+     * a single line with comma separated int values, enclosed in a pair of squared brackets.
+     *
+     * Data for a parser does not contain channel and mode names.
+     */
+
+    public static parseInterpreterData(source: string): IInterpreterData {
+        const ruleNames: string[] = [];
+        const channels: string[] = [];
+        const modes: string[] = [];
+
+        const literalNames: Array<string | null> = [];
+        const symbolicNames: Array<string | null> = [];
+        const lines = source.split("\n");
+        let index = 0;
+        let line = lines[index++];
+        if (line !== "token literal names:") {
+            throw new Error("Unexpected data entry");
+        }
+
+        do {
+            line = lines[index++];
+            if (line.length === 0) {
+                break;
+            }
+            literalNames.push(line === "null" ? null : line);
+        } while (true);
+
+        line = lines[index++];
+        if (line !== "token symbolic names:") {
+            throw new Error("Unexpected data entry");
+        }
+
+        do {
+            line = lines[index++];
+            if (line.length === 0) {
+                break;
+            }
+            symbolicNames.push(line === "null" ? null : line);
+        } while (true);
+
+        line = lines[index++];
+        if (line !== "rule names:") {
+            throw new Error("Unexpected data entry");
+        }
+
+        do {
+            line = lines[index++];
+            if (line.length === 0) {
+                break;
+            }
+            ruleNames.push(line);
+        } while (true);
+
+        line = lines[index++];
+        if (line === "channel names:") { // Additional lexer data.
+            do {
+                line = lines[index++];
+                if (line.length === 0) {
+                    break;
+                }
+                channels.push(line);
+            } while (true);
+
+            line = lines[index++];
+            if (line !== "mode names:") {
+                throw new Error("Unexpected data entry");
+            }
+
+            do {
+                line = lines[index++];
+                if (line.length === 0) {
+                    break;
+                }
+                modes.push(line);
+            } while (true);
+        }
+
+        line = lines[index++];
+        if (line !== "atn:") {
+            throw new Error("Unexpected data entry");
+        }
+
+        line = lines[index++];
+        const elements = line.split(",");
+        let value;
+
+        const serializedATN: number[] = [];
+        for (let i = 0; i < elements.length; ++i) {
+            const element = elements[i];
+            if (element.startsWith("[")) {
+                value = Number(element.substring(1).trim());
+            } else if (element.endsWith("]")) {
+                value = Number(element.substring(0, element.length - 1).trim());
+            } else {
+                value = Number(element.trim());
+            }
+            serializedATN[i] = value;
+        }
+
+        const deserializer = new ATNDeserializer();
+
+        return {
+            atn: deserializer.deserialize(serializedATN),
+            vocabulary: new Vocabulary(literalNames, symbolicNames, []),
+            ruleNames,
+            channels: channels.length > 0 ? channels : undefined,
+            modes: modes.length > 0 ? modes : undefined,
+        };
+    }
+}
diff --git a/src/misc/index.ts b/src/misc/index.ts
@@ -11,3 +11,4 @@ export * from "./HashSet.js";
 export * from "./Interval.js";
 export * from "./IntervalSet.js";
 export * from "./ParseCancellationException.js";
+export * from "./InterpreterDataReader.js";
diff --git a/tests/benchmarks/run-benchmarks.ts b/tests/benchmarks/run-benchmarks.ts
@@ -134,7 +134,14 @@ const splitterTest = () => {
     assert(r4.delimiter === "$$");
 };
 
-const parseFiles = () => {
+/**
+ * Parses a number of files and returns the time it took to parse them.
+ *
+ * @param logResults If true, the number of statements found in each file and the duration is logged.
+ *
+ * @returns The time it took to parse each file.
+ */
+const parseFiles = (logResults: boolean): number[] => {
     const testFiles: ITestFile[] = [
         // Large set of all possible query types in different combinations and versions.
         { name: "./data/statements.txt", initialDelimiter: "$$" },
@@ -147,11 +154,15 @@ const parseFiles = () => {
         { name: "./data/sakila-db/sakila-data.sql", initialDelimiter: ";" },
     ];
 
-    testFiles.forEach((entry) => {
+    const result: number[] = [];
+    testFiles.forEach((entry, index) => {
         const sql = fs.readFileSync(path.join(path.dirname(__filename), entry.name), { encoding: "utf-8" });
 
         const ranges = determineStatementRanges(sql, entry.initialDelimiter);
-        console.log("    Found " + ranges.length + " statements in " + entry.name + ".");
+
+        if (logResults) {
+            console.log(`    Found ${ranges.length} statements in file ${index + 1} (${entry.name}).`);
+        }
 
         const timestamp = performance.now();
         ranges.forEach((range, index) => {
@@ -181,19 +192,33 @@ const parseFiles = () => {
             }
         });
 
-        console.log("    Parsing all statements took: " + (performance.now() - timestamp) + " ms");
+        const duration = performance.now() - timestamp;
+        if (logResults) {
+            console.log("    Parsing all statements took: " + duration + " ms");
+        }
+
+        result.push(duration);
     });
+
+    return result;
 };
 
-const parserRun = (index: number) => {
+const parserRun = (showOutput: boolean): number[] => {
+    let result: number[] = [];
     const timestamp = performance.now();
     try {
-        parseFiles();
+        result = parseFiles(showOutput);
     } catch (e) {
         console.error(e);
     } finally {
-        console.log(`Parse run ${index} took ${(performance.now() - timestamp)} ms`);
+        if (showOutput) {
+            console.log(`Overall parse run took ${(performance.now() - timestamp)} ms`);
+        }
     }
+
+    result.push(performance.now() - timestamp);
+
+    return result;
 };
 
 console.log("\n\nStarting MySQL JS/TS benchmarks");
@@ -204,14 +229,40 @@ splitterTest();
 
 console.log("Splitter tests took " + (performance.now() - timestamp) + " ms");
 
-console.log("Running antlr4ng parser (cold) ...");
-parserRun(0);
+console.log("Running antlr4ng parser once (cold) ");
+parserRun(true);
+
+process.stdout.write("Running antlr4ng parser 5 times (warm) ");
+
+const times: number[][] = [];
+
+// Run the parser a few times to get a better average.
+for (let i = 0; i < 5; ++i) {
+    times.push(parserRun(false));
+    process.stdout.write(".");
+}
+console.log();
+
+// Transpose the result matrix.
+const transposed: number[][] = [];
+for (let i = 0; i < times[0].length; ++i) {
+    transposed.push([]);
+    for (const row of times) {
+        transposed[i].push(row[i]);
+    }
+}
+
+// Remove the 2 slowest runs in each row and compute the average of the remaining 3.
+const averageTimes: number[] = [];
+for (const row of transposed) {
+    const values = row.sort().slice(0, 3);
+    averageTimes.push(values.reduce((sum, time) => { return sum + time; }, 0) / values.length);
+}
+
+for (let i = 0; i < averageTimes.length - 1; ++i) {
+    console.log(`    File ${i + 1} took ${averageTimes[i]} ms`);
+}
 
-console.log("Running antlr4ng parser (warm) ...");
-parserRun(1);
-//parserRun(2);
-//parserRun(3);
-//parserRun(4);
-//parserRun(5);
+console.log(`Overall parse run took ${averageTimes[averageTimes.length - 1]} ms`);
 
 console.log("Done");

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "antlr4ng",`
`3`		`- "version": "2.0.6",`
	`3`	`+ "version": "2.0.7",`
`4`	`4`	`"type": "module",`
`5`	`5`	`"description": "Alternative JavaScript/TypeScript runtime for ANTLR4",`
`6`	`6`	`"main": "dist/index.cjs",`