Skip to content

Commit

Permalink
Merge pull request #62 from UCNot/chunk-lexers
Browse files Browse the repository at this point in the history
Input lexers
  • Loading branch information
surol committed Jul 19, 2023
2 parents 36b8ec9 + a622018 commit 7b57ae0
Show file tree
Hide file tree
Showing 29 changed files with 601 additions and 51 deletions.
1 change: 1 addition & 0 deletions src/compiler/deserialization/ucd-compiler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ export namespace UcdCompiler {
| UccFeature<UcdCompiler.Any>
| readonly UccFeature<UcdCompiler.Any>[]
| undefined;
readonly embed?: EsSnippet | undefined;
readonly exportDefaults?: boolean | undefined;

createDeserializer?<T, TSchema extends UcSchema<T>>(
Expand Down
3 changes: 2 additions & 1 deletion src/compiler/deserialization/ucd-function.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ export class UcdFunction<out T = unknown, out TSchema extends UcSchema<T> = UcSc
}

exportFn(externalName: string, mode: UcDeserializer.Mode): EsFunction<UcdExportSignature.Args> {
const { opaqueUcrx, defaultEntities, defaultFormats, onMeta } = this.lib;
const { opaqueUcrx, defaultEntities, defaultFormats, onMeta, embed } = this.lib;
const stream = new EsSymbol('stream');
const options = (code: EsCode): void => {
code.multiLine(code => {
Expand All @@ -78,6 +78,7 @@ export class UcdFunction<out T = unknown, out TSchema extends UcSchema<T> = UcSc
'formats,',
'onMeta,',
opaqueUcrx ? esline`opaqueRx: ${opaqueUcrx.instantiate()},` : EsCode.none,
embed ? esline`embed: ${embed},` : EsCode.none,
)
.write('}');
});
Expand Down
5 changes: 5 additions & 0 deletions src/compiler/deserialization/ucd-lib.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ export class UcdLib<out TModels extends UcdModels = UcdModels> extends UcrxLib {
return this.#onMeta;
}

get embed(): EsSnippet | undefined {
return this.#options.embed;
}

deserializerFor<T, TSchema extends UcSchema<T> = UcSchema<T>>(
schema: TSchema,
): UcdFunction<T, TSchema> {
Expand Down Expand Up @@ -148,6 +152,7 @@ export namespace UcdLib {
formats(this: void, exportNs?: EsNamespace): EsSnippet;
meta(this: void, exportNs?: EsNamespace): EsSnippet;
onMeta?: EsSnippet | undefined;
readonly embed?: EsSnippet | undefined;
readonly exportDefaults?: boolean | undefined;

createDeserializer?<T, TSchema extends UcSchema<T>>(
Expand Down
7 changes: 6 additions & 1 deletion src/compiler/deserialization/unknown.ucrx.class.ts
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,12 @@ export class UnknownUcrxClass extends UcrxClass {

#overrideRemainingMethods(): void {
for (const { member, declared } of this.members()) {
if (!declared && member instanceof UcrxMethod && member !== UcrxCore.raw) {
if (
!declared
&& member instanceof UcrxMethod
&& member !== UcrxCore.emb
&& member !== UcrxCore.raw
) {
this.#declareMethod(member);
}
}
Expand Down
5 changes: 5 additions & 0 deletions src/compiler/rx/ucrx-core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export type UcrxCore = {
readonly att: UcrxMethod<UcrxAttrSetterSignature.Args>;
readonly bol: UcrxSetter;
readonly big: UcrxSetter;
readonly emb: UcrxMethod<{ emit: EsArg; cx: EsArg }>;
readonly ent: UcrxEntitySetter;
readonly fmt: UcrxFormattedSetter;
readonly nls: UcrxMethod<{ cx: EsArg }>;
Expand All @@ -35,6 +36,10 @@ export const UcrxCore: UcrxCore = {
att: /*#__PURE__*/ new UcrxAttrSetter('att'),
bol: /*#__PURE__*/ new UcrxSetter('bol', { typeName: 'boolean', stub: UcrxCore$stub }),
big: /*#__PURE__*/ new UcrxSetter('big', { typeName: 'bigint', stub: UcrxCore$stub }),
emb: /*#__PURE__*/ new UcrxMethod('emb', {
args: { emit: {}, cx: {} },
stub: UcrxCore$stub,
}),
ent: /*#__PURE__*/ new UcrxEntitySetter('ent'),
fmt: /*#__PURE__*/ new UcrxFormattedSetter('fmt'),
nls: /*#__PURE__*/ new UcrxMethod<{ cx: EsArg }>('nls', {
Expand Down
48 changes: 44 additions & 4 deletions src/deserializer/async-ucd-reader.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
import { Ucrx } from '../rx/ucrx.js';
import { UcDeserializer } from '../schema/uc-deserializer.js';
import { UcInputLexer } from '../syntax/uc-input-lexer.js';
import { UcToken } from '../syntax/uc-token.js';
import { ucdReadValue } from './impl/ucd-read-value.js';
import { UcrxHandle } from './impl/ucrx-handle.js';
import { UcdReader } from './ucd-reader.js';

export class AsyncUcdReader extends UcdReader {

readonly #reader: ReadableStreamDefaultReader<UcToken>;
#stream: ReadableStream<UcToken>;
#reader: ReadableStreamDefaultReader<UcToken>;

#current: UcToken | undefined;
readonly #prev: UcToken[] = [];
#hasNext = true;

constructor(stream: ReadableStream<UcToken>, options?: UcDeserializer.Options) {
constructor(stream: ReadableStream<UcToken>, options?: UcdReader.Options) {
super(options);
this.#stream = stream;
this.#reader = stream.getReader();
}

Expand All @@ -35,7 +37,21 @@ export class AsyncUcdReader extends UcdReader {
}

override async read(rx: Ucrx): Promise<void> {
await ucdReadValue(this, new UcrxHandle(this, rx, [{}]), rx => rx.end());
await ucdReadValue(this, new UcrxHandle(this, rx, [{}]), false);
}

override async readEmbeds(
rx: Ucrx,
createLexer: (emit: (token: UcToken) => void) => UcInputLexer,
single: boolean,
): Promise<void> {
this.skip();

this.#reader.releaseLock();
this.#stream = this.#stream.pipeThrough(new UcEmbedsStream(createLexer));
this.#reader = this.#stream.getReader();

await ucdReadValue(this, new UcrxHandle(this, rx, [{}]), single);
}

override async next(): Promise<UcToken | undefined> {
Expand Down Expand Up @@ -133,3 +149,27 @@ export class AsyncUcdReader extends UcdReader {
}

}

export class UcEmbedsStream extends TransformStream<UcToken, UcToken> {

constructor(createLexer: (emit: (token: UcToken) => void) => UcInputLexer) {
let lexer: UcInputLexer;
let pass = (token: UcToken, _controller: TransformStreamDefaultController<UcToken>): void => {
if (typeof token === 'number') {
lexer.flush();
pass = (token, controller) => controller.enqueue(token);
} else {
lexer.scan(token);
}
};

super({
start: controller => {
lexer = createLexer(token => controller.enqueue(token));
},
transform: (token, controller) => pass(token, controller),
flush: () => lexer.flush(),
});
}

}
35 changes: 25 additions & 10 deletions src/deserializer/impl/ucd-read-value.sync.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import {
UC_TOKEN_CLOSING_PARENTHESIS,
UC_TOKEN_COMMA,
UC_TOKEN_DOLLAR_SIGN,
UC_TOKEN_EMBED,
UC_TOKEN_EXCLAMATION_MARK,
UC_TOKEN_OPENING_PARENTHESIS,
UcToken,
Expand All @@ -33,10 +34,8 @@ import { UcrxHandle } from './ucrx-handle.js';
export function ucdReadValueSync(
reader: SyncUcdReader,
rx: UcrxHandle,
end?: (rx: UcrxHandle) => void, // Never set for the first item of the list, unless it is non-empty.
single: boolean,
): void {
const single = !end;

ucdSkipWhitespaceSync(reader);

const firstToken = reader.current();
Expand Down Expand Up @@ -89,6 +88,15 @@ export function ucdReadValueSync(
return;
}

hasValue = true;
} else if (firstToken === UC_TOKEN_EMBED) {
reader.readEmbeds(rx.rx, emit => rx.emb(emit), single);
ucdSkipWhitespaceSync(reader);

if (single) {
return;
}

hasValue = true;
}

Expand Down Expand Up @@ -125,17 +133,24 @@ export function ucdReadValueSync(
const bound = reader.current();

if (!bound) {
if (!single) {
rx.end();
}

// End of input.
return end?.(rx);
return;
}
if (bound === UC_TOKEN_CLOSING_PARENTHESIS) {
// Unbalanced closing parenthesis.
// Consume up to its position.
if (!hasValue) {
rx.decode(printUcTokens(trimUcTokensTail(reader.consumePrev())));
}
if (!single) {
rx.end();
}

return end?.(rx);
return;
}

if (bound === UC_TOKEN_COMMA) {
Expand Down Expand Up @@ -245,12 +260,12 @@ function ucdReadMetaAndValueSync(reader: SyncUcdReader, rx: UcrxHandle): void {

reader.skip(); // Skip opening parenthesis.

ucdReadValueSync(reader, rx.att(attributeName), rx => rx.end());
ucdReadValueSync(reader, rx.att(attributeName), false);

reader.skip(); // Skip closing parenthesis.

// Read single value following the attribute.
ucdReadValueSync(reader, rx);
ucdReadValueSync(reader, rx, true);
}

function ucdReadTokensSync(
Expand Down Expand Up @@ -355,7 +370,7 @@ function ucdReadItemsSync(
} else {
rx.nextItem();
}
ucdReadValueSync(reader, rx);
ucdReadValueSync(reader, rx, true);

if (reader.current() === UC_TOKEN_COMMA) {
// Skip comma and whitespace following it.
Expand All @@ -372,7 +387,7 @@ function ucdReadMapSync(reader: SyncUcdReader, rx: UcrxHandle, firstKey: string)

const entryRx = rx.firstEntry(firstKey);

ucdReadValueSync(reader, entryRx, rx => rx.end());
ucdReadValueSync(reader, entryRx, false);

const bound = reader.current();

Expand Down Expand Up @@ -421,7 +436,7 @@ function ucdReadEntriesSync(reader: SyncUcdReader, rx: UcrxHandle): void {

const entryRx = rx.nextEntry(key);

ucdReadValueSync(reader, entryRx, rx => rx.end());
ucdReadValueSync(reader, entryRx, false);

if (!reader.current()) {
// End of input.
Expand Down
35 changes: 25 additions & 10 deletions src/deserializer/impl/ucd-read-value.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
UC_TOKEN_CLOSING_PARENTHESIS,
UC_TOKEN_COMMA,
UC_TOKEN_DOLLAR_SIGN,
UC_TOKEN_EMBED,
UC_TOKEN_EXCLAMATION_MARK,
UC_TOKEN_OPENING_PARENTHESIS,
UcToken,
Expand All @@ -25,10 +26,8 @@ import { UcrxHandle } from './ucrx-handle.js';
export async function ucdReadValue(
reader: AsyncUcdReader,
rx: UcrxHandle,
end?: (rx: UcrxHandle) => void, // Never set for the first item of the list, unless it is non-empty.
single: boolean,
): Promise<void> {
const single = !end;

await ucdSkipWhitespace(reader);

const firstToken = reader.current();
Expand Down Expand Up @@ -81,6 +80,15 @@ export async function ucdReadValue(
return;
}

hasValue = true;
} else if (firstToken === UC_TOKEN_EMBED) {
await reader.readEmbeds(rx.rx, emit => rx.emb(emit), single);
await ucdSkipWhitespace(reader);

if (single) {
return;
}

hasValue = true;
}

Expand Down Expand Up @@ -117,17 +125,24 @@ export async function ucdReadValue(
const bound = reader.current();

if (!bound) {
if (!single) {
rx.end();
}

// End of input.
return end?.(rx);
return;
}
if (bound === UC_TOKEN_CLOSING_PARENTHESIS) {
// Unbalanced closing parenthesis.
// Consume up to its position.
if (!hasValue) {
rx.decode(printUcTokens(trimUcTokensTail(reader.consumePrev())));
}
if (!single) {
rx.end();
}

return end?.(rx);
return;
}

if (bound === UC_TOKEN_COMMA) {
Expand Down Expand Up @@ -237,12 +252,12 @@ async function ucdReadMetaAndValue(reader: AsyncUcdReader, rx: UcrxHandle): Prom

reader.skip(); // Skip opening parenthesis.

await ucdReadValue(reader, rx.att(attributeName), rx => rx.end());
await ucdReadValue(reader, rx.att(attributeName), false);

reader.skip(); // Skip closing parenthesis.

// Read single value following the attribute.
await ucdReadValue(reader, rx);
await ucdReadValue(reader, rx, true);
}

async function ucdReadTokens(
Expand Down Expand Up @@ -347,7 +362,7 @@ async function ucdReadItems(
} else {
rx.nextItem();
}
await ucdReadValue(reader, rx);
await ucdReadValue(reader, rx, true);

if (reader.current() === UC_TOKEN_COMMA) {
// Skip comma and whitespace following it.
Expand All @@ -364,7 +379,7 @@ async function ucdReadMap(reader: AsyncUcdReader, rx: UcrxHandle, firstKey: stri

const entryRx = rx.firstEntry(firstKey);

await ucdReadValue(reader, entryRx, rx => rx.end());
await ucdReadValue(reader, entryRx, false);

const bound = reader.current();

Expand Down Expand Up @@ -413,7 +428,7 @@ async function ucdReadEntries(reader: AsyncUcdReader, rx: UcrxHandle): Promise<v

const entryRx = rx.nextEntry(key);

await ucdReadValue(reader, entryRx, rx => rx.end());
await ucdReadValue(reader, entryRx, false);

if (!reader.current()) {
// End of input.
Expand Down
Loading

0 comments on commit 7b57ae0

Please sign in to comment.