Skip to content

Commit

Permalink
feat: add support for ES2025 duplicate named capturing groups (#195)
Browse files Browse the repository at this point in the history
close #194
  • Loading branch information
ota-meshi authored Jun 28, 2024
1 parent f38e97a commit fb20f68
Show file tree
Hide file tree
Showing 19 changed files with 3,367 additions and 87 deletions.
13 changes: 12 additions & 1 deletion src/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -428,10 +428,21 @@ export interface Character extends NodeBase {
* The backreference.
* E.g. `\1`, `\k<name>`
*/
export interface Backreference extends NodeBase {
export type Backreference = AmbiguousBackreference | UnambiguousBackreference
interface BaseBackreference extends NodeBase {
type: "Backreference"
parent: Alternative | Quantifier
ref: number | string
ambiguous: boolean
resolved: CapturingGroup | CapturingGroup[]
}
export interface AmbiguousBackreference extends BaseBackreference {
ref: string
ambiguous: true
resolved: CapturingGroup[]
}
export interface UnambiguousBackreference extends BaseBackreference {
ambiguous: false
resolved: CapturingGroup
}

Expand Down
3 changes: 2 additions & 1 deletion src/ecma-versions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ export type EcmaVersion =
| 2022
| 2023
| 2024
export const latestEcmaVersion = 2024
| 2025
export const latestEcmaVersion = 2025
167 changes: 167 additions & 0 deletions src/group-specifiers.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/**
* Holds information for all GroupSpecifiers included in the pattern.
*/
export interface GroupSpecifiers {
/**
* @returns true if there are no GroupSpecifiers included in the pattern.
*/
isEmpty: () => boolean
clear: () => void
/**
* Called when visiting the Disjunction.
* For ES2025, manage nesting with new Disjunction scopes.
*/
enterDisjunction: () => void
/**
* Called when visiting the Alternative.
* For ES2025, manage nesting with new Alternative scopes.
*/
enterAlternative: (index: number) => void
/**
* Called when leaving the Disjunction.
*/
leaveDisjunction: () => unknown
/**
* Checks whether the given group name is within the pattern.
*/
hasInPattern: (name: string) => boolean
/**
* Checks whether the given group name is within the current scope.
*/
hasInScope: (name: string) => boolean
/**
* Adds the given group name to the current scope.
*/
addToScope: (name: string) => void
}

export class GroupSpecifiersAsES2018 implements GroupSpecifiers {
private readonly groupName = new Set<string>()

public clear(): void {
this.groupName.clear()
}

public isEmpty(): boolean {
return !this.groupName.size
}

public hasInPattern(name: string): boolean {
return this.groupName.has(name)
}

public hasInScope(name: string): boolean {
return this.hasInPattern(name)
}

public addToScope(name: string): void {
this.groupName.add(name)
}

// eslint-disable-next-line class-methods-use-this
public enterDisjunction(): void {
// Prior to ES2025, it does not manage disjunction scopes.
}

// eslint-disable-next-line class-methods-use-this
public enterAlternative(): void {
// Prior to ES2025, it does not manage alternative scopes.
}

// eslint-disable-next-line class-methods-use-this
public leaveDisjunction(): void {
// Prior to ES2025, it does not manage disjunction scopes.
}
}

/**
* Track disjunction structure to determine whether a duplicate
* capture group name is allowed because it is in a separate branch.
*/
class BranchID {
public readonly parent: BranchID | null
private readonly base: BranchID
public constructor(parent: BranchID | null, base: BranchID | null) {
// Parent disjunction branch
this.parent = parent
// Identifies this set of sibling branches
this.base = base ?? this
}

/**
* A branch is separate from another branch if they or any of
* their parents are siblings in a given disjunction
*/
public separatedFrom(other: BranchID): boolean {
if (this.base === other.base && this !== other) {
return true
}
if (other.parent && this.separatedFrom(other.parent)) {
return true
}
return this.parent?.separatedFrom(other) ?? false
}

public child() {
return new BranchID(this, null)
}

public sibling() {
return new BranchID(this.parent, this.base)
}
}

export class GroupSpecifiersAsES2025 implements GroupSpecifiers {
private branchID = new BranchID(null, null)
private readonly groupNames = new Map<string, BranchID[]>()

public clear(): void {
this.branchID = new BranchID(null, null)
this.groupNames.clear()
}

public isEmpty(): boolean {
return !this.groupNames.size
}

public enterDisjunction(): void {
this.branchID = this.branchID.child()
}

public enterAlternative(index: number): void {
if (index === 0) {
return
}
this.branchID = this.branchID.sibling()
}

public leaveDisjunction(): void {
this.branchID = this.branchID.parent!
}

public hasInPattern(name: string): boolean {
return this.groupNames.has(name)
}

public hasInScope(name: string): boolean {
const branches = this.groupNames.get(name)
if (!branches) {
return false
}
for (const branch of branches) {
if (!branch.separatedFrom(this.branchID)) {
return true
}
}
return false
}

public addToScope(name: string): void {
const branches = this.groupNames.get(name)
if (branches) {
branches.push(this.branchID)
return
}
this.groupNames.set(name, [this.branchID])
}
}
23 changes: 17 additions & 6 deletions src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,21 @@ class RegExpParserState {

for (const reference of this._backreferences) {
const ref = reference.ref
const group =
const groups =
typeof ref === "number"
? this._capturingGroups[ref - 1]
: this._capturingGroups.find((g) => g.name === ref)!
reference.resolved = group
group.references.push(reference)
? [this._capturingGroups[ref - 1]]
: this._capturingGroups.filter((g) => g.name === ref)
if (groups.length === 1) {
const group = groups[0]
reference.ambiguous = false
reference.resolved = group
} else {
reference.ambiguous = true
reference.resolved = groups
}
for (const group of groups) {
group.references.push(reference)
}
}
}

Expand Down Expand Up @@ -480,6 +489,7 @@ class RegExpParserState {
end,
raw: this.source.slice(start, end),
ref,
ambiguous: false,
resolved: DUMMY_CAPTURING_GROUP,
}
parent.elements.push(node)
Expand Down Expand Up @@ -747,14 +757,15 @@ export namespace RegExpParser {
strict?: boolean

/**
* ECMAScript version. Default is `2024`.
* ECMAScript version. Default is `2025`.
* - `2015` added `u` and `y` flags.
* - `2018` added `s` flag, Named Capturing Group, Lookbehind Assertion,
* and Unicode Property Escape.
* - `2019`, `2020`, and `2021` added more valid Unicode Property Escapes.
* - `2022` added `d` flag.
* - `2023` added more valid Unicode Property Escapes.
* - `2024` added `v` flag.
* - `2025` added duplicate named capturing groups.
*/
ecmaVersion?: EcmaVersion
}
Expand Down
27 changes: 20 additions & 7 deletions src/validator.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import type { EcmaVersion } from "./ecma-versions"
import { latestEcmaVersion } from "./ecma-versions"
import type { GroupSpecifiers } from "./group-specifiers"
import {
GroupSpecifiersAsES2018,
GroupSpecifiersAsES2025,
} from "./group-specifiers"
import { Reader } from "./reader"
import { newRegExpSyntaxError } from "./regexp-syntax-error"
import {
Expand Down Expand Up @@ -231,14 +236,15 @@ export namespace RegExpValidator {
strict?: boolean

/**
* ECMAScript version. Default is `2024`.
* ECMAScript version. Default is `2025`.
* - `2015` added `u` and `y` flags.
* - `2018` added `s` flag, Named Capturing Group, Lookbehind Assertion,
* and Unicode Property Escape.
* - `2019`, `2020`, and `2021` added more valid Unicode Property Escapes.
* - `2022` added `d` flag.
* - `2023` added more valid Unicode Property Escapes.
* - `2024` added `v` flag.
* - `2025` added duplicate named capturing groups.
*/
ecmaVersion?: EcmaVersion

Expand Down Expand Up @@ -631,7 +637,7 @@ export class RegExpValidator {

private _numCapturingParens = 0

private _groupNames = new Set<string>()
private _groupSpecifiers: GroupSpecifiers

private _backreferenceNames = new Set<string>()

Expand All @@ -643,6 +649,10 @@ export class RegExpValidator {
*/
public constructor(options?: RegExpValidator.Options) {
this._options = options ?? {}
this._groupSpecifiers =
this.ecmaVersion >= 2025
? new GroupSpecifiersAsES2025()
: new GroupSpecifiersAsES2018()
}

/**
Expand Down Expand Up @@ -763,7 +773,7 @@ export class RegExpValidator {
if (
!this._nFlag &&
this.ecmaVersion >= 2018 &&
this._groupNames.size > 0
!this._groupSpecifiers.isEmpty()
) {
this._nFlag = true
this.rewind(start)
Expand Down Expand Up @@ -1301,7 +1311,7 @@ export class RegExpValidator {
private consumePattern(): void {
const start = this.index
this._numCapturingParens = this.countCapturingParens()
this._groupNames.clear()
this._groupSpecifiers.clear()
this._backreferenceNames.clear()

this.onPatternEnter(start)
Expand All @@ -1322,7 +1332,7 @@ export class RegExpValidator {
this.raise(`Unexpected character '${c}'`)
}
for (const name of this._backreferenceNames) {
if (!this._groupNames.has(name)) {
if (!this._groupSpecifiers.hasInPattern(name)) {
this.raise("Invalid named capture referenced")
}
}
Expand Down Expand Up @@ -1378,6 +1388,7 @@ export class RegExpValidator {
const start = this.index
let i = 0

this._groupSpecifiers.enterDisjunction()
this.onDisjunctionEnter(start)
do {
this.consumeAlternative(i++)
Expand All @@ -1390,6 +1401,7 @@ export class RegExpValidator {
this.raise("Lone quantifier brackets")
}
this.onDisjunctionLeave(start, this.index)
this._groupSpecifiers.leaveDisjunction()
}

/**
Expand All @@ -1403,6 +1415,7 @@ export class RegExpValidator {
private consumeAlternative(i: number): void {
const start = this.index

this._groupSpecifiers.enterAlternative(i)
this.onAlternativeEnter(start, i)
while (this.currentCodePoint !== -1 && this.consumeTerm()) {
// do nothing.
Expand Down Expand Up @@ -1846,8 +1859,8 @@ export class RegExpValidator {
private consumeGroupSpecifier(): boolean {
if (this.eat(QUESTION_MARK)) {
if (this.eatGroupName()) {
if (!this._groupNames.has(this._lastStrValue)) {
this._groupNames.add(this._lastStrValue)
if (!this._groupSpecifiers.hasInScope(this._lastStrValue)) {
this._groupSpecifiers.addToScope(this._lastStrValue)
return true
}
this.raise("Duplicate capture group name")
Expand Down
4 changes: 4 additions & 0 deletions test/fixtures/parser/literal/basic-valid-2015-u.json
Original file line number Diff line number Diff line change
Expand Up @@ -1690,6 +1690,7 @@
"end": 6,
"raw": "\\1",
"ref": 1,
"ambiguous": false,
"resolved": "♻️../0"
}
]
Expand Down Expand Up @@ -1741,6 +1742,7 @@
"end": 3,
"raw": "\\1",
"ref": 1,
"ambiguous": false,
"resolved": "♻️../1"
},
{
Expand Down Expand Up @@ -2104,6 +2106,7 @@
"end": 34,
"raw": "\\10",
"ref": 10,
"ambiguous": false,
"resolved": "♻️../9"
}
]
Expand Down Expand Up @@ -2465,6 +2468,7 @@
"end": 37,
"raw": "\\11",
"ref": 11,
"ambiguous": false,
"resolved": "♻️../10"
}
]
Expand Down
Loading

0 comments on commit fb20f68

Please sign in to comment.