1
+ // Load some auxiliary tools
2
+ #load " grammartools.fsx"
3
+ open CSCI374.GrammarTools
4
+ open CSCI374.ParserTypes
5
+
6
+ type Tokenizer ( grammar : PRODUCTION [], verbose : bool ) =
7
+ let mutable inputState = []
8
+ let mutable curentToken = INVALID
9
+
10
+ // Access to the
11
+ member this.CurrentToken = curentToken
12
+ member this.NextToken () =
13
+ let tkn , input = CSCI374.Lexer.token inputState
14
+ inputState <- input
15
+ curentToken <- tkn
16
+ this
17
+
18
+ member this.InputState
19
+ with set( str ) = inputState <- Seq.toList str
20
+ member this.IsVerbose = verbose
21
+ member this.PrintRule ruleIdx =
22
+ printGrammarRule false grammar ruleIdx // print rule
23
+ new ( grammar) = Tokenizer( grammar, false )
24
+
25
+ /// This infix operator function provides verbose output while calling
26
+ /// a particular production rule
27
+ let (= =>) ( cnxt: Tokenizer) ( prod: Tokenizer-> Tokenizer) =
28
+ if cnxt.IsVerbose then
29
+ printfn " Enter <%A > with token `%A `" prod cnxt.CurrentToken
30
+ let nextcnxt = prod cnxt
31
+ if cnxt.IsVerbose then
32
+ printfn " Exit <%A > with token `%A `" prod cnxt.CurrentToken
33
+ nextcnxt
34
+
35
+ /// This infix operator function will allow to print a production rule
36
+ /// call `cnxt @ 2` will print second grammar rule
37
+ let (@) ( cnxt : Tokenizer ) ruleIdx =
38
+ cnxt.PrintRule ruleIdx
39
+ cnxt
40
+
41
+ let grammarfake = parseGrammarString """
42
+ S -> eaf | eUT
43
+ T -> e
44
+ U -> UcS | ae
45
+ """
46
+ let grammar = parseGrammarString """
47
+ S -> eaf | eUT
48
+ T -> e
49
+ U -> aeV
50
+ V -> cSV | ε
51
+ """
52
+ printfn " %A " grammar
53
+
54
+ // Show grammar rules
55
+ printGrammar grammar
56
+
57
+ let rec ProdS ( cnxt : Tokenizer ) =
58
+ // check the current token is `E` then move to next token because S -> eaf | eUT
59
+ if cnxt.CurrentToken = E then
60
+ cnxt.NextToken() |> ignore
61
+ if cnxt.CurrentToken = A then
62
+ // 1: S → eaf
63
+ cnxt @( 1 )==> Match A ==> Match F
64
+ else
65
+ // 2: S → eUT
66
+ cnxt @( 2 )==> ProdU ==> ProdT
67
+ else
68
+ cnxt
69
+ /// The function for production T → e is straight forward: match nonterminal `e`
70
+ and ProdT ( cnxt : Tokenizer ) =
71
+ // 3: T -> e
72
+ cnxt @( 3 )==> Match E
73
+
74
+ and ProdU ( cnxt : Tokenizer ) =
75
+ // 4: U → aeV
76
+ cnxt @( 4 )==> Match A ==> Match E ==> ProdV
77
+
78
+ and ProdV ( cnxt : Tokenizer ) =
79
+ if cnxt.CurrentToken = C then
80
+ //5: V → cSV
81
+ cnxt.NextToken() @( 5 ) ==> ProdS ==> ProdV
82
+ else
83
+ //6: V → ε
84
+ cnxt @( 6 ) ==> Match EPS
85
+
86
+ /// For each terminal symbol compare it with a current token
87
+ /// and if they match, continue with the next token, else there is an error
88
+ and Match term cnxt =
89
+ if cnxt.IsVerbose then printfn " Match %A with %A " term cnxt.CurrentToken
90
+ //printf "The Term `%A` and the current Token `%A`" term cnxt.CurrentToken
91
+ // if we matched the current token with a terminal symbol
92
+ if term = cnxt.CurrentToken then
93
+ cnxt.NextToken() // read next token
94
+ else
95
+ failwith ( sprintf " Cannot match symbol `%A ` with `%A `" term cnxt.CurrentToken)
96
+
97
+ /// Start parsing by calling starting symbol function
98
+ let parser ( cnxt : Tokenizer ) : Tokenizer =
99
+ // Read token and pass it to the function for S rule
100
+ cnxt.NextToken() ==> ProdS
101
+
102
+
103
+ let inputString = " eaeceafceaeceafee"
104
+ inputString |> Seq.toList |> CSCI374.Lexer.tokenize |> printfn " %A "
105
+
106
+ Tokenizer( grammar, InputState= inputString) |> parser |> ignore
107
+
108
+ Tokenizer( grammar, true , InputState= " eaeceafceaeceafee" ) |> parser |> ignore
109
+ Tokenizer( grammar, true , InputState= " eaeceaeee" ) |> parser |> ignore
110
+ Tokenizer( grammar, true , InputState= " eaeceaeeceafe" ) |> parser |> ignore
111
+ Tokenizer( grammar, true , InputState= " eaeceafceaeeceafe" ) |> parser |> ignore
112
+ Tokenizer( grammar, true , InputState= " eaeceafceaeee" ) |> parser |> ignore
0 commit comments