Skip to content

Commit 04161ab

Browse files
authored
Merge pull request #312 from mike-lischke/unicode
Ported full Unicode support in serialized ATN from main ANTLR repo.
2 parents d933992 + 51f40ab commit 04161ab

File tree

1 file changed

+43
-17
lines changed

1 file changed

+43
-17
lines changed

src/atn/ATNDeserializer.ts

+43-17
Original file line numberDiff line numberDiff line change
@@ -83,19 +83,26 @@ export class ATNDeserializer {
8383
* {@link LexerAction} instances.
8484
*/
8585
private static readonly ADDED_LEXER_ACTIONS: UUID = UUID.fromString("AB35191A-1603-487E-B75A-479B831EAF6D");
86+
/**
87+
* This UUID indicates the serialized ATN contains two sets of
88+
* IntervalSets, where the second set's values are encoded as
89+
* 32-bit integers to support the full Unicode SMP range up to U+10FFFF.
90+
*/
91+
private static readonly ADDED_UNICODE_SMP: UUID = UUID.fromString("59627784-3BE5-417A-B9EB-8131A7286089");
8692
/**
8793
* This list contains all of the currently supported UUIDs, ordered by when
8894
* the feature first appeared in this branch.
8995
*/
9096
private static readonly SUPPORTED_UUIDS: UUID[] = [
9197
ATNDeserializer.BASE_SERIALIZED_UUID,
92-
ATNDeserializer.ADDED_LEXER_ACTIONS
98+
ATNDeserializer.ADDED_LEXER_ACTIONS,
99+
ATNDeserializer.ADDED_UNICODE_SMP
93100
];
94101

95102
/**
96103
* This is the current serialized UUID.
97104
*/
98-
private static readonly SERIALIZED_UUID: UUID = ATNDeserializer.ADDED_LEXER_ACTIONS;
105+
private static readonly SERIALIZED_UUID: UUID = ATNDeserializer.ADDED_UNICODE_SMP;
99106

100107
@NotNull
101108
private readonly deserializationOptions: ATNDeserializationOptions;
@@ -285,22 +292,12 @@ export class ATNDeserializer {
285292
// SETS
286293
//
287294
let sets: IntervalSet[] = [];
288-
let nsets: number = ATNDeserializer.toInt(data[p++]);
289-
for (let i = 0; i < nsets; i++) {
290-
let nintervals: number = ATNDeserializer.toInt(data[p]);
291-
p++;
292-
let set: IntervalSet = new IntervalSet();
293-
sets.push(set);
295+
p = this.readSets(data, p, sets, false);
294296

295-
let containsEof: boolean = ATNDeserializer.toInt(data[p++]) != 0;
296-
if (containsEof) {
297-
set.add(-1);
298-
}
299-
300-
for (let j = 0; j < nintervals; j++) {
301-
set.add(ATNDeserializer.toInt(data[p]), ATNDeserializer.toInt(data[p + 1]));
302-
p += 2;
303-
}
297+
// Next, if the ATN was serialized with the Unicode SMP feature,
298+
// deserialize sets with 32-bit arguments <= U+10FFFF.
299+
if (this.isFeatureSupported(ATNDeserializer.ADDED_UNICODE_SMP, uuid)) {
300+
p = this.readSets(data, p, sets, true);
304301
}
305302

306303
//
@@ -585,6 +582,35 @@ export class ATNDeserializer {
585582
return atn;
586583
}
587584

585+
private readSets(data: Uint16Array, p: number, sets: IntervalSet[], read32: boolean): number {
586+
let nsets: number = ATNDeserializer.toInt(data[p++]);
587+
for (let i = 0; i < nsets; i++) {
588+
let nintervals: number = ATNDeserializer.toInt(data[p]);
589+
p++;
590+
let set: IntervalSet = new IntervalSet();
591+
sets.push(set);
592+
593+
let containsEof: boolean = ATNDeserializer.toInt(data[p++]) != 0;
594+
if (containsEof) {
595+
set.add(-1);
596+
}
597+
598+
if (read32) {
599+
for (let j = 0; j < nintervals; j++) {
600+
set.add(ATNDeserializer.toInt32(data, p), ATNDeserializer.toInt32(data, p + 2));
601+
p += 4;
602+
}
603+
} else {
604+
for (let j = 0; j < nintervals; j++) {
605+
set.add(ATNDeserializer.toInt(data[p]), ATNDeserializer.toInt(data[p + 1]));
606+
p += 2;
607+
}
608+
}
609+
}
610+
611+
return p;
612+
}
613+
588614
/**
589615
* Analyze the {@link StarLoopEntryState} states in the specified ATN to set
590616
* the {@link StarLoopEntryState#precedenceRuleDecision} field to the

0 commit comments

Comments
 (0)