Skip to content

Commit

Permalink
feat!: Implement new core: new lexer/parser/visitor/plugin systems (#40)
Browse files Browse the repository at this point in the history
* feat(fs): introduce the fs plugin

* fix(fs): typo

* feat(fs): convert sandboxdir into a function

* fix(fs): fix return values

* feat(fs): finish up and add tests

* feat(git): introduce the git module

* feat!(core,http,ethereum): unify the parser

* feat(core)!: introduce new lexer/parser/visitor

This new core here eases the heavy work on the developer by simplifing the
API while keeping all the things we want.  At first, we used to use Chevrotain
with a method that didn't allow method that didn't allow us to detect the
individual syntax errors of a custom statement we defined.  I worked around that
issue by casting black-magic spells over Chevrotain internals.  I hated that
solution, but at that time, it was the most practicle one.

Now, here, I introduce the new core with its new lexer/parser/visitor that is
smart enough to cover most of our real-world needs.  The visitor is in perfect
Shape shape.  The lexer is almost perfect, perhaps needs just a little bit of
polishing, but it is the one that is the simplest.  The parser is smart enough
to cover most of our real-world cases, but it also needs some treatment.  I'm
planning to have a ranking system over it.  Currently, it uses the number of
errors detected on a statement, and using that, it ranks its matches.  This
solution appears to be quite reliable in practice.

The way we use the API is quite simple ("Given I" parts can also be "Then I"):

	import {Plugin} from '@slangroom/core'

	const p = new Plugin();
	const cb = (ctx) => ctx.fail('example') // cb is called when statement is matched
	p.new('love asche', cb)
	// -> Given I love Asche

	p.new('open', 'read file contents', cb)
	// -> Given I open 'ident' and read file contents

	p.new('connect', 'send http request', cb)
	// -> Given I connect to 'ident' and send http request

	p.new(['base32'], 'convert to base64', cb)
	// -> Given I send base32 'ident' and convert to base64

	p.new('connect', ['object', 'proxy'], 'send http request', cb)
	// -> Given I connect to 'ident' and send object 'ident' and send proxy 'ident' and send http request

	export const myPlugin = p;

* feat(ethereum)!: port over to the new core and fix tests

* feat(http)!: port over to the new core and fix tests

* fix(core): relax phrase and params checks

* feat(wallet)!: port over to the new core

Currently, the tests fail for some reason.

* feat(core)!: forbid spaces in params

* fix(wallet)!: switch over to underscore params

* fix: wrongo order of the bindings set /wor/ident/

* fix: the wallet params names

* lint: linting

* fix(core): fix parsing of params order

* test(ethereum): fix rule ignore

* fix(wallet): parameter names

---------

Co-authored-by: Puria Nafisi Azizi <[email protected]>
  • Loading branch information
denizenging and puria authored Nov 16, 2023
1 parent f1c354f commit 94ebaa1
Show file tree
Hide file tree
Showing 42 changed files with 3,034 additions and 1,523 deletions.
2 changes: 1 addition & 1 deletion pkg/core/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export * from '@slangroom/core/lexicon';
export * from '@slangroom/core/lexer';
export * from '@slangroom/core/parser';
export * from '@slangroom/core/lexer';
export * from '@slangroom/core/visitor';
Expand Down
57 changes: 48 additions & 9 deletions pkg/core/src/lexer.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,49 @@
import { Lexicon } from '@slangroom/core';
import { Lexer } from '@slangroom/deps/chevrotain';

/**
* Lexes the given line.
*/
export const lex = (lexicon: Lexicon, line: string) => {
const lexer = new Lexer(lexicon.tokens);
return lexer.tokenize(line);
export class Token {
readonly name: string;
readonly isIdent: boolean;

constructor(
readonly raw: string,
readonly start: number,
readonly end: number,
) {
this.name = this.raw.toLowerCase();
this.isIdent = this.raw.charAt(0) === "'";
}
}

export class LexError extends Error {
constructor(t: Token) {
super();
this.name = 'LexError';
this.message = `unclosed single-quote at ${t.start},${t.end}: ${t.raw}`;
}
}

export const lex = (line: string): Token[] => {
const tokens: Token[] = [];
const c = [...line];
let raw = '';
let i = 0;

while (i < c.length) {
while (c[i] === ' ' || c[i] === '\t') ++i;

if (c[i] === "'") {
const start = i;
raw += c[i++];
while (i < c.length && c[i] !== "'") raw += c[i++];
if (i >= c.length) throw new LexError(new Token(raw, start, c.length - 1));
raw += c[i++];
tokens.push(new Token(raw, start, i - 1));
raw = '';
} else {
const start = i;
while (i < c.length && c[i] !== ' ' && c[i] !== '\t' && c[i] !== "'") raw += c[i++];
if (raw.length) tokens.push(new Token(raw, start, i - 1));
raw = '';
}
}

return tokens;
};
6 changes: 3 additions & 3 deletions pkg/core/src/lexicon.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ export class Lexicon {
name: 'Whitespace',
pattern: /\s+/,
group: Lexer.SKIPPED,
})
}),
);

this.#store.set(
Expand All @@ -25,15 +25,15 @@ export class Lexicon {
name: 'Comment',
pattern: /#[^\n\r]*/,
group: 'comments',
})
}),
);

this.#store.set(
'identifier',
createToken({
name: 'Identifier',
pattern: /'(?:[^\\']|\\(?:[bfnrtv'\\/]|u[0-9a-fA-F]{4}))*'/,
})
}),
);
}

Expand Down
241 changes: 138 additions & 103 deletions pkg/core/src/parser.ts
Original file line number Diff line number Diff line change
@@ -1,119 +1,154 @@
import { Lexicon } from '@slangroom/core';
import {
CstParser,
type IToken,
type CstNode,
type IOrAlt,
type ConsumeMethodOpts,
} from '@slangroom/deps/chevrotain';

export type StatementCst = CstNode & {
children: { [K in string]: [PhraseCst] };
};
import { PluginMap, Token, type PluginMapKey } from '@slangroom/core';

export class ParseError extends Error {
static wrong(have: Token, wantFirst: string, ...wantRest: string[]) {
const wants = [wantFirst, ...wantRest];
return new ParseError(
`"${have.raw}" between (${have.start}, ${have.end}) must be one of: ${wants.join(
', ',
)}`,
);
}

export type PhraseCst = CstNode & {
children: {
connect?: [IToken];
} & { open?: [IToken] } & { into?: [IToken] } & {
[K in string]: [IToken | PhraseCst];
};
};
static missing(wantFirst: string, ...wantRest: string[]) {
const wants = [wantFirst, ...wantRest];
return new ParseError(`missing token(s): ${wants.join(', ')}`);
}

export class Parser extends CstParser {
#phrases: IOrAlt<unknown>[];
#lexicon: Lexicon;

constructor(lexicon: Lexicon, parsers: ((this: Parser) => void)[]) {
super(lexicon.tokens, { maxLookahead: 1024 });
this.#lexicon = lexicon;
parsers = [...new Set(parsers)];
parsers.forEach((p) => p.apply(this));
this.#phrases = Object.entries(this).reduce((acc, [k, v]) => {
if (k.endsWith('Phrase') && typeof v === 'function')
acc.push({ ALT: () => this.SUBRULE(v) });
return acc;
}, [] as IOrAlt<unknown>[]);
this.performSelfAnalysis();
static extra(token: Token) {
return new ParseError(`extra token (${token.start}, ${token.end}): ${token.raw}`);
}

/**
* {@inheritDoc Lexicon.token}
* @internal
*/
#token(name: string) {
return this.#lexicon.token(name);
constructor(message: string) {
super(message);
this.name = 'ParseError';
}
}

tokenn(idx: number, name: string, opts?: ConsumeMethodOpts) {
this.consume(idx, this.#token(name), opts);
}
export type Cst = {
givenThen?: 'given' | 'then';
errors: ParseError[];
matches: Match[];
};

token(name: string, opts?: ConsumeMethodOpts) {
this.tokenn(0, name, opts);
}
export type Match = {
bindings: Map<string, string>;
key: PluginMapKey;
err: ParseError[];
into?: string;
} & (
| {
open?: string;
connect?: never;
}
| {
open?: never;
connect?: string;
}
);

export const parse = (p: PluginMap, t: Token[]): Cst => {
const cst: Cst = {
matches: [],
errors: [],
};
let givenThen: 'given' | 'then' | undefined;

token1(name: string, opts?: ConsumeMethodOpts) {
this.tokenn(1, name, opts);
}
// Given or Then
if (t[0]?.name === 'given') givenThen = 'given';
else if (t[0]?.name === 'then') givenThen = 'then';
else if (t[0]) cst.errors.push(ParseError.wrong(t[0], 'given', 'then'));
else cst.errors.push(ParseError.missing('given', 'then'));

token2(name: string, opts?: ConsumeMethodOpts) {
this.tokenn(2, name, opts);
}
// TODO: should we allow "that" here ("Given that I")

token3(name: string, opts?: ConsumeMethodOpts) {
this.tokenn(3, name, opts);
// I
if (t[1]) {
if (t[1]?.raw !== 'I') cst.errors.push(ParseError.wrong(t[1], 'I'));
} else {
cst.errors.push(ParseError.missing('I'));
}

statement = this.RULE('statement', () => {
this.OR(this.#phrases);
p.forEach(([k]) => {
let i = 1;
const m: Match = { key: k, bindings: new Map(), err: [] };
const curErrLen = cst.matches[0]?.err.length;
const lemmeout = {};
const newErr = (have: undefined | Token, wantsFirst: string, ...wantsRest: string[]) => {
if (have) m.err.push(ParseError.wrong(have, wantsFirst, ...wantsRest));
else m.err.push(ParseError.missing(wantsFirst, ...wantsRest));
if (curErrLen !== undefined && m.err.length > curErrLen) throw lemmeout;
};
try {
// Open 'ident' and|Connect to 'ident' and
if (k.openconnect === 'open') {
if (t[++i]?.name !== 'open') newErr(t[i], 'open');
const ident = t[++i];
if (ident?.isIdent) m.open = ident.raw.slice(1, -1);
else newErr(ident, '<identifier>');
if (t[++i]?.name !== 'and') newErr(t[i], 'and');
} else if (k.openconnect === 'connect') {
if (t[++i]?.name !== 'connect') newErr(t[i], 'connect');
if (t[++i]?.name !== 'to') newErr(t[i], 'connect');
const ident = t[++i];
if (ident?.isIdent) m.connect = ident.raw.slice(1, -1);
else newErr(ident, '<identifier>');
if (t[++i]?.name !== 'and') newErr(t[i], 'and');
}

// Send $buzzword 'ident' And
// TODO: allow spaces in between params
const params = new Set(k.params);
k.params?.forEach(() => {
if (t[++i]?.name !== 'send') newErr(t[i], 'send');

const tokName = t[++i];
if (tokName && params.has(tokName.name)) {
params.delete(tokName.name);
} else {
const [first, ...rest] = [...params.values()] as [string, ...string[]];
newErr(t[i], first, ...rest);
}

const ident = t[++i];
if (ident?.isIdent) {
if (tokName) m.bindings.set(tokName.name, ident.raw.slice(1, -1));
} else {
newErr(ident, '<identifier>');
}
if (t[++i]?.name !== 'and') newErr(t[i], 'and');
});

// $buzzwords
k.phrase.split(' ').forEach((name) => t[++i]?.name !== name && newErr(t[i], name));

// Save Output Into 'ident'
const ident = t[t.length - 1];
if (t.length - i >= 5 && ident?.isIdent) {
for (++i; i < t.length - 4; ++i) m.err.push(ParseError.extra(t[i] as Token));
if (t[t.length - 4]?.name !== 'and') newErr(t[t.length - 4], 'and');
if (t[t.length - 3]?.name !== 'output') newErr(t[t.length - 3], 'output');
if (t[t.length - 2]?.name !== 'into') newErr(t[t.length - 2], 'into');
if (
t[t.length - 4]?.name === 'and' &&
t[t.length - 3]?.name === 'output' &&
t[t.length - 2]?.name === 'into'
)
m.into = ident.raw.slice(1, -1);
} else {
for (++i; i < t.length; ++i) m.err.push(ParseError.extra(t[i] as Token));
}

if (curErrLen !== undefined && m.err.length > curErrLen) throw lemmeout;
if (curErrLen !== undefined && m.err.length < curErrLen) cst.matches.length = 0;
cst.matches.push(m);
} catch (e) {
if (e !== lemmeout) throw e;
}
});

connect() {
this.tokenn(255, 'connect');
this.tokenn(255, 'to');
this.tokenn(255, 'identifier', { LABEL: 'connect' });
this.tokenn(255, 'and');
}

open() {
this.tokenn(255, 'open');
this.tokenn(255, 'identifier', { LABEL: 'open' });
this.tokenn(255, 'and');
}

into() {
this.tokenn(254, 'and');
this.tokenn(254, 'output');
this.tokenn(254, 'into');
this.tokenn(254, 'identifier', { LABEL: 'into' });
}

sendpassn(idx: number, parameter: string) {
this.or(idx, [
{ ALT: () => this.tokenn(idx, 'send', { LABEL: `sendpass${idx}` }) },
{ ALT: () => this.tokenn(idx, 'pass', { LABEL: `sendpass${idx}` }) },
]);
this.tokenn(idx, parameter, { LABEL: `sendpass${idx}.parameter` });
this.tokenn(idx, 'identifier', { LABEL: `sendpass${idx}.identifier` });
this.tokenn(idx, 'and', { LABEL: `sendpass${idx}.and` });
}

sendpass(parameter: string) {
this.sendpassn(0, parameter);
}

sendpass1(parameter: string) {
this.sendpassn(1, parameter);
}

sendpass2(parameter: string) {
this.sendpassn(2, parameter);
}
}

export const parse = (parser: Parser, tokens: IToken[]) => {
parser.input = tokens;
return {
cst: parser.statement(),
errors: parser.errors,
};
if (givenThen) cst.givenThen = givenThen;
return cst;
};
Loading

0 comments on commit 94ebaa1

Please sign in to comment.