Skip to content

Commit b2c91a6

Browse files
committed
Fix TerminalGenerator.cs
1 parent 374869e commit b2c91a6

File tree

4 files changed

+34
-53
lines changed

4 files changed

+34
-53
lines changed

Parsers/DotParser/DotTerminals.cs

Lines changed: 1 addition & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ public sealed partial class DotTerminals
1515
[Regex(@"\d+")]
1616
public static partial Terminal Number();
1717

18-
[Regex(@"(//[^\n]*(\n|$)|\s)*")]
18+
[Regex(@"(//[^\n]*|\s)*")]
1919
public static partial Terminal Trivia();
20-
//public static Terminal Trivia() => _trivia;
2120

2221
private sealed record QuotedStringMatcher() : Terminal(Kind: "QuotedString")
2322
{
@@ -55,35 +54,4 @@ public override int TryMatch(string input, int startPos)
5554
}
5655

5756
private static readonly Terminal _quotedString = new QuotedStringMatcher();
58-
59-
//private sealed record TriviaMatcher() : Terminal(Kind: "Trivia")
60-
//{
61-
// public override int TryMatch(string input, int startPos)
62-
// {
63-
// var i = startPos;
64-
// for (; i < input.Length; i++)
65-
// {
66-
// var c = input[i];
67-
//
68-
// if (char.IsWhiteSpace(c))
69-
// continue;
70-
//
71-
// if (c == '/' && peek() == '/')
72-
// {
73-
// for (i += 2; i < input.Length && (c = input[i]) != '\n'; i++)
74-
// ;
75-
// i--;
76-
// }
77-
// else
78-
// return i - startPos;
79-
// }
80-
//
81-
// return i - startPos;
82-
// char peek() => i + 1 < input.Length ? input[i + 1] : '\0';
83-
// }
84-
//
85-
// public override string ToString() => @"Trivia";
86-
//}
87-
//
88-
//private static readonly Terminal _trivia = new TriviaMatcher();
8957
}

TerminalGenerator/TerminalGenerator.cs

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#nullable enable
1+
#nullable enable
22

33
using Diagnostics;
44
using Microsoft.CodeAnalysis;
@@ -175,18 +175,20 @@ private string GenerateDfaCode(DfaState start, int startIndent)
175175
}
176176

177177
sb.AppendLine($$"""
178-
int currentPos = startPos;
179-
int length = input.Length;
180-
int currentState = {{start.Id}};
181-
int lastAccept = -1;
178+
var currentPos = startPos;
179+
var length = input.Length;
180+
var currentState = {{start.Id}};
181+
var lastAccept = -1;
182182
{{(start.IsFinal ? "lastAccept = currentPos;" : "")}}
183-
""");
183+
//System.Diagnostics.Debugger.Launch();
184184
185-
sb.AppendLine("""
186-
while (currentPos < length)
185+
while (currentPos <= length)
187186
{
188-
char c = input[currentPos];
189-
bool transitionFound = false;
187+
var c = currentPos < length ? input[currentPos] : '\0';
188+
if (c == '\0')
189+
{
190+
}
191+
var transitionFound = false;
190192
switch (currentState)
191193
{
192194
""");
@@ -216,14 +218,15 @@ private string GenerateDfaCode(DfaState start, int startIndent)
216218
static string generateStateTransitions(DfaState state, IndentHelper indent)
217219
{
218220
var sb = new StringBuilder();
219-
foreach (var transition in state.Transitions)
221+
222+
foreach (var transition in state.Transitions)
220223
{
221224
var condition = generateTransitionCondition(transition.Condition);
222225
sb.AppendLine($$"""
223226
if ({{condition}})
224227
{
225228
currentState = {{transition.Target.Id}};
226-
currentPos++;
229+
{{(transition.Condition is RegexEndOfLine ? "" : "currentPos++;")}}
227230
{{(transition.Target.IsFinal ? "lastAccept = currentPos;" : "")}}
228231
transitionFound = true;
229232
continue;
@@ -237,24 +240,25 @@ static string generateTransitionCondition(RegexNode node)
237240
{
238241
return node switch
239242
{
240-
RegexChar rc => $"""c == '{EscapeChar(rc.Value)}'""",
241-
RegexAnyChar => "true",
242-
RangesCharClass rcc when rcc.ToString() == "[^[\\n]]" => "c != '\\n'",
243+
RegexEndOfLine x => $@"c == '\0' /* {x} */",
244+
RegexChar rc => $"""c == '{EscapeChar(rc.Value)}' /* {rc} */""",
245+
RegexAnyChar => "true /* RegexAnyChar */",
246+
NegatedCharClassGroup rcc when rcc.ToString() == $@"[^[\n]]" => $@"c is not '\n' and not '\0' /* {rcc} */",
243247
RangesCharClass rcc => GenerateRangeCondition(rcc),
244248
WordCharClass wcc => $"""{(wcc.Negated ? "!" : "")}(char.IsLetterOrDigit(c) || c == '_')""",
245249
DigitCharClass dcc => $"""{(dcc.Negated ? "!" : "")}char.IsDigit(c)""",
246250
WhitespaceCharClass scc => $"""{(scc.Negated ? "!" : "")}char.IsWhiteSpace(c)""",
247251
LetterCharClass lcc => $"""{(lcc.Negated ? "!" : "")}char.IsLetter(c)""",
248-
_ => "false"
252+
_ => $"false /* {node} ({node.GetType().Name}) */"
249253
};
250254
}
251255
}
252256
}
253257

254258
private static string GenerateRangeCondition(RangesCharClass rcc)
255259
{
256-
if (rcc.ToString() == "[^[\\n]]") // Специальный случай для [^\n]
257-
return rcc.Negated ? "c == '\\n'" : "c != '\\n'";
260+
if (rcc.ToString() == @"[^[\n]]") // Специальный случай для [^\n]
261+
return rcc.Negated ? @"c == '\n'" : @"c != '\n'";
258262

259263
var conditions = rcc.Ranges
260264
.Select(r => r.From == r.To

Tests/ParaserTests/Dot/DotTests.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ public void TriviaMatcherTest()
1111
{
1212
var matcher = DotTerminals.Trivia();
1313

14+
test(startPos: 0, expectedLen: 19, " /// Top to Bottom");
1415
test(startPos: 0, expectedLen: 18, " // Top to Bottom");
1516
test(startPos: 1, expectedLen: 24, "; // Top to Bottom\r\n n");
1617
test(startPos: 0, expectedLen: 20, " // Top to Bottom\r\n/ ");

Tests/RegexTests/RegexTests.cs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,18 @@ public void DfaMatching()
165165
{
166166
var testCases = new[]
167167
{
168-
(Start: 0, Pattern: @"(//[^\n]*(\n|$)|\s)*", Input: " // Top to Bottom", Expected: 18),
169-
(Start: 1, Pattern: @"(//[^\n]*(\n|$)|\s)*", Input: "; // Top to Bottom\r\n n", Expected: 24),
170168
//012345678901234567 8 901234567890
171169
// 10 20
170+
(Start: 0, Pattern: @"(//[^\n]*|\s)*", Input: " /// Top to Bottom", Expected: 19),
171+
(Start: 0, Pattern: @"(//[^\n]*|\s)*", Input: " // Top to Bottom", Expected: 18),
172+
(Start: 0, Pattern: @"(//[^\n]*|\s)*", Input: " // Top to Bottom", Expected: 18),
173+
(Start: 1, Pattern: @"(//[^\n]*|\s)*", Input: "; // Top to Bottom\r\n n", Expected: 24),
174+
(Start: 0, Pattern: @"(//[^\n]*|\s)*", Input: " // Top to Bottom\r\n/ ", Expected: 20),
175+
(Start: 0, Pattern: @"(//[^\n]*|\s)*", Input: " // Top to Bottom\r\n ", Expected: 21),
176+
(Start: 0, Pattern: @"(//[^\n]*|\s)*", Input: " // Top to Bottom\n ", Expected: 20),
177+
178+
(Start: 0, Pattern: @"(//[^\n]*(\n|$)|\s)*", Input: " // Top to Bottom", Expected: 18),
179+
(Start: 1, Pattern: @"(//[^\n]*(\n|$)|\s)*", Input: "; // Top to Bottom\r\n n", Expected: 24),
172180
(Start: 0, Pattern: @"(//[^\n]*(\n|$)|\s)*", Input: " // Top to Bottom\r\n/ ", Expected: 20),
173181
(Start: 0, Pattern: @"(//[^\n]*(\n|$)|\s)*", Input: " // Top to Bottom\r\n ", Expected: 21),
174182
(Start: 0, Pattern: @"(//[^\n]*(\n|$)|\s)*", Input: " // Top to Bottom\n ", Expected: 20),

0 commit comments

Comments
 (0)