From 278ac1e7247b43dcde6abbbd62e4dea64fb10c47 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Thu, 22 Jun 2023 16:38:45 +0200 Subject: [PATCH 1/6] scaffolding a little compiler for generating cached parsers --- .../grammar/storage/ModuleParserStorage.rsc | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc diff --git a/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc b/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc new file mode 100644 index 0000000000..95390e6c55 --- /dev/null +++ b/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc @@ -0,0 +1,46 @@ +@synopsis{Functionality for caching module parsers} +@description{ +The Rascal interpreter can take a lot of time while loading modules. +In particular in deployed situations (Eclipse and VScode plugins), the +time it takes to load the parser generator for generating the parsers +which are required for analyzing concrete syntax fragments is prohibitive (20s). +This means that the first syntax highlighting sometimes can only appear +after more than 20s after loading an extension (VScode) or plugin (Eclipse). + +This "compiler" takes any number of Rascal modules and extracts a grammar +for each of them, in order to use the ((Library::ParseTree)) module's +functions ((saveParsers)) on them respectively to store each parser +in a `.parsers` file. + +After that the Rascal interpreter has a special mode for using ((loadParsers)) +while importing a new module if a cache `.parsers` file is present next to +the `.rsc` respective file. +} +@benefits{ +* loading modules without having to first load and use a parser generator can be up 1000 times faster. +} +@pitfalls{ +:::warning +This caching feature is _static_. There is no automated cache clearance. +If your grammars change, any saved `.parsers` files do not change with it. +It is advised that you programmatically execute this compiler at deployment time +to store the `.parsers` file _only_ in deployed `jar` files. That way, you can not +be bitten by a concrete syntax parser that is out of date at development time. +::: +} +@license{ + Copyright (c) 2009-2023 NWO-I CWI + All rights reserved. This program and the accompanying materials + are made available under the terms of the Eclipse Public License v1.0 + which accompanies this distribution, and is available at + http://www.eclipse.org/legal/epl-v10.html +} +@contributor{Jurgen J. Vinju - Jurgen.Vinju@cwi.nl - CWI} +@bootstrapParser +module lang::rascal::grammar::storage::ModuleParserStorage + +import lang::rascal::grammar::definition::Modules; +import lang::rascal::\syntax::Rascal; +import util::Reflective; + + From 9d39d8c49ec96e552b4373e3b16842454430cbe4 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Thu, 22 Jun 2023 19:39:02 +0200 Subject: [PATCH 2/6] wrote mini compiler for generating cached parsers for parsing concrete syntax modules faster --- .../grammar/storage/ModuleParserStorage.rsc | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc b/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc index 95390e6c55..147f813bed 100644 --- a/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc +++ b/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc @@ -42,5 +42,39 @@ module lang::rascal::grammar::storage::ModuleParserStorage import lang::rascal::grammar::definition::Modules; import lang::rascal::\syntax::Rascal; import util::Reflective; +import util::FileSystem; +import Location; +import ParseTree; +import Grammar; +import IO; +@synopsis{For all modules in pcfg.srcs this will produce a `.parsers` stored parser capable of parsing concrete syntax fragment in said module.} +@description{ +Use ((loadParsers)) to retrieve the parsers stored by this function. In particular the +Rascal interpreter will use this instead of spinning up its own parser generator. +} +void storeParsersForModules(PathConfig pcfg) { + storeParsersForModules({*find(src, "rsc") | src <- pcfg.srcs, bprintln("Crawling ")}, pcfg); +} + +void storeParsersForModules(set[loc] moduleFiles, PathConfig pcfg) { + storeParsersForModules({parseModule(m) | m <- moduleFiles, bprintln("Loading ")}, pcfg); +} + +void storeParsersForModules(set[Module] modules, PathConfig pcfg) { + for (m <- modules) { + storeParserForModule("", m@\loc, modules, pcfg); + } +} +void storeParserForModule(str main, loc file, set[Module] modules, PathConfig pcfg) { + // this has to be done from scratch due to different ways combining layout definitions + // with import and extend. Each main module has a different grammar because of this. + def = modules2definition(main, modules); + gr = fuse(def); + target = pcfg.bin + relativize(pcfg.srcs, file)[extension="parsers"].path; + println("Generating parser for
at "); + if (type[Tree] rt := type(sort("Tree"), gr.rules)) { + storeParsers(rt, target); + } +} \ No newline at end of file From 8bc2aad8870d39595d5ebb063fee5303684078d7 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Thu, 22 Jun 2023 19:46:53 +0200 Subject: [PATCH 3/6] added docs --- .../grammar/storage/ModuleParserStorage.rsc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc b/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc index 147f813bed..98dfd46b49 100644 --- a/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc +++ b/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc @@ -53,6 +53,17 @@ import IO; Use ((loadParsers)) to retrieve the parsers stored by this function. In particular the Rascal interpreter will use this instead of spinning up its own parser generator. } +@benefits{ +* the single pathConfig parameter makes it easy to wire this function into Maven scripts (see generate-sources maven plugin) +* time spent here generating parsers, once, does not have to be spent while running IDE plugins, many times. +} +@pitfalls{ +* this compiler has very weak error reporting. it just crashes with stacktraces in case of trouble. +* for large projects running this can take a few minutes; it is slower than importing the same modules in the interpreter. +* this compiler assumes the grammars are all correct and can be used to parse the concrete syntax fragments in each respective module. +* this compiler may have slight differences in semantics with the way the interpreter composes grammars for modules, since +it is implemented differently. However, no such issues are currently known. +} void storeParsersForModules(PathConfig pcfg) { storeParsersForModules({*find(src, "rsc") | src <- pcfg.srcs, bprintln("Crawling ")}, pcfg); } @@ -71,8 +82,13 @@ void storeParserForModule(str main, loc file, set[Module] modules, PathConfig pc // this has to be done from scratch due to different ways combining layout definitions // with import and extend. Each main module has a different grammar because of this. def = modules2definition(main, modules); + + // here the layout semantics comes really into action gr = fuse(def); + + // find a file in the target folder to write to target = pcfg.bin + relativize(pcfg.srcs, file)[extension="parsers"].path; + println("Generating parser for
at "); if (type[Tree] rt := type(sort("Tree"), gr.rules)) { storeParsers(rt, target); From 9ddbda8941d2cdcc58a5633b41ebdb9ffe35b7bf Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Thu, 22 Jun 2023 20:01:33 +0200 Subject: [PATCH 4/6] instrumented interpreter to reuse ModuleFile.parsers files for every ModuleFile.rsc instance that has concrete syntax in it --- .../rascalmpl/semantics/dynamic/Import.java | 51 ++++++++++++------- src/org/rascalmpl/uri/URIUtil.java | 28 ++++++++++ 2 files changed, 61 insertions(+), 18 deletions(-) diff --git a/src/org/rascalmpl/semantics/dynamic/Import.java b/src/org/rascalmpl/semantics/dynamic/Import.java index be14e4b613..c09f9c239d 100644 --- a/src/org/rascalmpl/semantics/dynamic/Import.java +++ b/src/org/rascalmpl/semantics/dynamic/Import.java @@ -86,6 +86,7 @@ import io.usethesource.vallang.IValue; import io.usethesource.vallang.IValueFactory; import io.usethesource.vallang.type.Type; +import io.usethesource.vallang.type.TypeFactory; public abstract class Import { @@ -394,7 +395,7 @@ private static void addImportToCurrentModule(ISourceLocation src, String name, I current.setSyntaxDefined(current.definesSyntax() || module.definesSyntax()); } - public static ITree parseModuleAndFragments(char[] data, ISourceLocation location, IEvaluator> eval){ + public static ITree parseModuleAndFragments(char[] data, ISourceLocation location, IEvaluator> eval) { eval.__setInterrupt(false); IActionExecutor actions = new NoActionExecutor(); @@ -454,24 +455,38 @@ public static ITree parseModuleAndFragments(char[] data, ISourceLocation locatio // parse the embedded concrete syntax fragments of the current module ITree result = tree; - if (!eval.getHeap().isBootstrapper() && (needBootstrapParser(data) || (env.definesSyntax() && containsBackTick(data, 0)))) { - RascalFunctionValueFactory vf = eval.getFunctionValueFactory(); - IFunction parsers = null; - - if (env.getBootstrap()) { - parsers = vf.bootstrapParsers(); - } - else { - IConstructor dummy = TreeAdapter.getType(tree); // I just need _any_ ok non-terminal - IMap syntaxDefinition = env.getSyntaxDefinition(); - IMap grammar = (IMap) eval.getParserGenerator().getGrammarFromModules(eval.getMonitor(),env.getName(), syntaxDefinition).get("rules"); - IConstructor reifiedType = vf.reifiedType(dummy, grammar); - parsers = vf.parsers(reifiedType, vf.bool(false), vf.bool(false), vf.bool(false), vf.set()); - } - - result = parseFragments(vf, eval.getMonitor(), parsers, tree, location, env); - } + try { + if (!eval.getHeap().isBootstrapper() && (needBootstrapParser(data) || (env.definesSyntax() && containsBackTick(data, 0)))) { + RascalFunctionValueFactory vf = eval.getFunctionValueFactory(); + URIResolverRegistry reg = URIResolverRegistry.getInstance(); + ISourceLocation parserCacheFile = URIUtil.changeExtension(env.getLocation(), "parsers"); + IFunction parsers = null; + + if (env.getBootstrap()) { + // no need to generste a parser for the Rascal language itself + parsers = vf.bootstrapParsers(); + } + else if (reg.exists(parserCacheFile)) { + // if we cached a ModuleFile.parsers file, we will use the parser from that (typically after deployment time) + parsers = vf.loadParsers(parserCacheFile, vf.bool(false),vf.bool(false),vf.bool(false), vf.set()); + } + else { + // otherwise we have to generate a fresh parser for this module now + IConstructor dummy = TreeAdapter.getType(tree); // I just need _any_ ok non-terminal + IMap syntaxDefinition = env.getSyntaxDefinition(); + IMap grammar = (IMap) eval.getParserGenerator().getGrammarFromModules(eval.getMonitor(),env.getName(), syntaxDefinition).get("rules"); + IConstructor reifiedType = vf.reifiedType(dummy, grammar); + parsers = vf.parsers(reifiedType, vf.bool(false), vf.bool(false), vf.bool(false), vf.set()); + } + + result = parseFragments(vf, eval.getMonitor(), parsers, tree, location, env); + } + } + catch (URISyntaxException | ClassNotFoundException | IOException e) { + eval.warning("reusing parsers failed during module import: " + e.getMessage(), env.getLocation()); + } + return result; } diff --git a/src/org/rascalmpl/uri/URIUtil.java b/src/org/rascalmpl/uri/URIUtil.java index e1503b592f..1b0d4ee556 100644 --- a/src/org/rascalmpl/uri/URIUtil.java +++ b/src/org/rascalmpl/uri/URIUtil.java @@ -433,4 +433,32 @@ public static ISourceLocation removeOffset(ISourceLocation prev) { public static ISourceLocation createFromURI(String value) throws URISyntaxException { return vf.sourceLocation(createFromEncoded(value)); } + public static ISourceLocation changeExtension(ISourceLocation location, String ext) throws URISyntaxException { + String path = location.getPath(); + boolean endsWithSlash = path.endsWith(URIUtil.URI_PATH_SEPARATOR); + if (endsWithSlash) { + path = path.substring(0, path.length() - 1); + } + + if (path.length() > 1) { + int slashIndex = path.lastIndexOf(URIUtil.URI_PATH_SEPARATOR); + int index = path.substring(slashIndex).lastIndexOf('.'); + + if (index == -1 && !ext.isEmpty()) { + path = path + (!ext.startsWith(".") ? "." : "") + ext; + } + else if (!ext.isEmpty()) { + path = path.substring(0, slashIndex + index) + (!ext.startsWith(".") ? "." : "") + ext; + } + else if (index != -1) { + path = path.substring(0, slashIndex + index); + } + + if (endsWithSlash) { + path = path + URIUtil.URI_PATH_SEPARATOR; + } + } + + return URIUtil.changePath(location, path); + } } From 5c06b14c7c07ff709bca3865a9079e62031e47f9 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Fri, 23 Jun 2023 09:22:42 +0200 Subject: [PATCH 5/6] removed clone of extension replacement in URI --- .../result/SourceLocationResult.java | 25 +------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/src/org/rascalmpl/interpreter/result/SourceLocationResult.java b/src/org/rascalmpl/interpreter/result/SourceLocationResult.java index 44824a5390..d63f789678 100644 --- a/src/org/rascalmpl/interpreter/result/SourceLocationResult.java +++ b/src/org/rascalmpl/interpreter/result/SourceLocationResult.java @@ -509,31 +509,8 @@ else if (name.equals("extension")) { if (!replType.isString()) { throw new UnexpectedType(getTypeFactory().stringType(), replType, ctx.getCurrentAST()); } - String ext = newStringValue; - boolean endsWithSlash = path.endsWith(URIUtil.URI_PATH_SEPARATOR); - if (endsWithSlash) { - path = path.substring(0, path.length() - 1); - } - - if (path.length() > 1) { - int slashIndex = path.lastIndexOf(URIUtil.URI_PATH_SEPARATOR); - int index = path.substring(slashIndex).lastIndexOf('.'); - - if (index == -1 && !ext.isEmpty()) { - path = path + (!ext.startsWith(".") ? "." : "") + ext; - } - else if (!ext.isEmpty()) { - path = path.substring(0, slashIndex + index) + (!ext.startsWith(".") ? "." : "") + ext; - } - else if (index != -1) { - path = path.substring(0, slashIndex + index); - } - - if (endsWithSlash) { - path = path + URIUtil.URI_PATH_SEPARATOR; - } - } + path = URIUtil.changeExtension(loc, newStringValue).getPath(); uriPartChanged = true; } else if (name.equals("top")) { From b6c0b935b6c0f963d450e7bcbc97690c50f342e9 Mon Sep 17 00:00:00 2001 From: "Jurgen J. Vinju" Date: Fri, 23 Jun 2023 09:33:42 +0200 Subject: [PATCH 6/6] added docs on how and when to call the ModuleParserStorage --- .../grammar/storage/ModuleParserStorage.rsc | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc b/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc index 98dfd46b49..88a353d7c9 100644 --- a/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc +++ b/src/org/rascalmpl/library/lang/rascal/grammar/storage/ModuleParserStorage.rsc @@ -64,6 +64,43 @@ Rascal interpreter will use this instead of spinning up its own parser generator * this compiler may have slight differences in semantics with the way the interpreter composes grammars for modules, since it is implemented differently. However, no such issues are currently known. } +@examples{ +Typically you would call the generate-sources MOJO from the rascal-maven-plugin, in `pom.xml`, like so: + +```xml + + org.rascalmpl + rascal-maven-plugin + 0.14.6 + + YourMainModule + + + + it-compile + generate-test-sources + + generate-sources + + + + +``` + +And you'd write this module to make it work: + +```rascal +module YourMainModule + +import util::Reflective; +import lang::rascal::grammar::storage::ModuleParserStorage; + +int main(list[str] args) { + pcfg = getProjectPathConfig(|project://yourProject|); + storeParsersForModules(pcfg); +} +``` +} void storeParsersForModules(PathConfig pcfg) { storeParsersForModules({*find(src, "rsc") | src <- pcfg.srcs, bprintln("Crawling ")}, pcfg); }