Skip to content

Commit

Permalink
Merge pull request #10 from baioc/ll1
Browse files Browse the repository at this point in the history
LL(1) compilation and execution
  • Loading branch information
baioc committed Sep 18, 2021
2 parents b81160c + 0cef7f9 commit 5e10181
Show file tree
Hide file tree
Showing 7 changed files with 548 additions and 177 deletions.
457 changes: 310 additions & 147 deletions src/Client/Index.fs

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions src/Client/style.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
td {
white-space: nowrap;
}
1 change: 1 addition & 0 deletions src/Server/Server.fs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ let storage = Storage()

let api =
{ generateLexer = fun spec -> async { return Lexer.make spec }
generateParser = fun grammar -> async { return Parser.make grammar }
saveProject = fun project -> async { return storage.SaveProject(project) }
loadProject = fun id -> async { return storage.GetProject(id) } }

Expand Down
23 changes: 11 additions & 12 deletions src/Shared/ContextFree.fs
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ type Grammar<'Terminal, 'NonTerminal when 'Terminal: comparison and 'NonTerminal

[<RequireQualifiedAccess>]
module Grammar =
/// Finds the subset of derivations with a specific symbol at its head.
let derivationsFrom symbol grammar =
/// Finds the DIRECT derivations with a specific symbol at its head.
let derivationsOf symbol grammar =
Set.filter (fun (head, body) -> head = symbol) grammar.Rules

/// Computes the FIRST set of a given symbol sequence in a grammar.
/// Computes the FIRST-set of a given symbol sequence in a grammar.
///
/// Epsilon is a terminal symbol represented by `None`.
let rec first symbols grammar =
Expand All @@ -62,8 +62,7 @@ module Grammar =
// (but with an altered grammar to avoid going infinite on cycles)
| NonTerminal n :: rest ->
let firstSet =
grammar
|> derivationsFrom n
derivationsOf n grammar
|> Seq.map
(fun (head, body) ->
let grammar =
Expand All @@ -80,8 +79,8 @@ module Grammar =
(Set.remove None firstSet)
(first rest grammar)

/// Computes the FOLLOW set of every non-terminal symbol in the grammar.
let followSets endmarker (grammar: Grammar<_, _>) =
/// Computes the FOLLOW-set of every non-terminal symbol in the grammar.
let followSets (grammar: Grammar<_, _>) endmarker =
// initially, FOLLOW(<startSymbol>) = { endmarker }
let mutable follows = System.Collections.Generic.Dictionary()
for symbol in grammar.NonTerminals do
Expand Down Expand Up @@ -161,7 +160,7 @@ module private Stack =
type DpdaTransition<'State, 'InputSymbol, 'StackSymbol
when 'State: comparison and 'InputSymbol: comparison and 'StackSymbol: comparison> =
| EpsilonTransition of 'State * StackAction<'StackSymbol>
| InputConsumingTransition of Map<'InputSymbol, ('State * StackAction<'StackSymbol>)>
| InputConsumingTransitions of Map<'InputSymbol, ('State * StackAction<'StackSymbol>)>

/// This type is defined such that building a non-deterministic PDA is impossible.
type private DpdaTransitionTable<'State, 'InputSymbol, 'StackSymbol
Expand All @@ -186,7 +185,7 @@ type Dpda<'State, 'InputSymbol, 'StackSymbol
(fun ((q, topOfStack), transition) ->
match transition with
| EpsilonTransition (q', action) -> set [ q; q' ]
| InputConsumingTransition options ->
| InputConsumingTransitions options ->
Map.toSeq options
|> Seq.map (fun (input, (q', action)) -> q')
|> Set.ofSeq
Expand All @@ -202,7 +201,7 @@ type Dpda<'State, 'InputSymbol, 'StackSymbol
(fun (_, transition) ->
match transition with
| EpsilonTransition _ -> Set.empty
| InputConsumingTransition options ->
| InputConsumingTransitions options ->
Map.toSeq options
|> Seq.map (fun (input, action) -> input)
|> Set.ofSeq)
Expand All @@ -219,7 +218,7 @@ type Dpda<'State, 'InputSymbol, 'StackSymbol
| EpsilonTransition (q', action) ->
symbolsInAction action
|> Set.add topOfStack
| InputConsumingTransition options ->
| InputConsumingTransitions options ->
Map.toSeq options
|> Seq.map (fun (input, (q', action)) -> symbolsInAction action)
|> Set.unionMany
Expand Down Expand Up @@ -253,7 +252,7 @@ type Dpda<'State, 'InputSymbol, 'StackSymbol
| None -> this.Dead, stack, Ok NoOp
| Some (EpsilonTransition (nextState, action)) ->
tryTransition stack (nextState, action)
| Some (InputConsumingTransition options) ->
| Some (InputConsumingTransitions options) ->
match Map.tryFind input options with
| None -> this.Dead, stack, Ok NoOp
| Some (nextState, action) ->
Expand Down
149 changes: 137 additions & 12 deletions src/Shared/Shared.fs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
namespace Shared

open System.Collections.Generic
open System.Text.RegularExpressions
open System.Runtime.CompilerServices

Expand Down Expand Up @@ -91,7 +92,7 @@ type TokenInstance =

/// Indicates a lexical error and keeps track of non-lexed input.
type LexicalError =
{ String: char seq
{ Irritant: char seq
Position: uint }

/// Functions for creating and manipulating lexers.
Expand Down Expand Up @@ -194,7 +195,7 @@ module Lexer =
| Some lastToken -> yield Ok lastToken
| None -> ()
else
let error = { Position = lexer.Start; String = lexer.String }
let error = { Position = lexer.Start; Irritant = lexer.String }
yield Error error // aka "unexpected end of file ..."

// otherwise, apply transition logic and iterate down the input stream
Expand Down Expand Up @@ -223,7 +224,7 @@ module Lexer =
elif justDied && (not wasAccepting) then
// make an error containing all input from this point forward
yield Error { Position = lexer.Start
String = Seq.append lexer.String inputs }
Irritant = Seq.append lexer.String inputs }

else
// otherwise, keep going with the updated lexer
Expand All @@ -239,24 +240,147 @@ module Lexer =
/// Syntactical spec, where terminals are assumed to identify tokens.
type Grammar = Grammar<Identifier, Identifier>
type Symbol = Symbol<Identifier, Identifier>
type SyntacticalAnalysisTable = Map<Identifier * Identifier, Set<list<Symbol>>>

/// Due to multiple stack actions in a single transition, we only need 3 states.
type LL1State = Dead | Parsing | Accept
// these help us handle the differences between a DPDA and an LL(1) parser
type LL1State = Parse | Accept | Dead
type InputAction<'InputSymbol> = Consume | Keep of 'InputSymbol
type ParserAction<'InputSymbol, 'StackSymbol> =
StackAction<'StackSymbol> * InputAction<'InputSymbol>

/// Table-based LL(1) parser and its dynamic state.
/// Table-based LL(1) parser.
type Parser =
{ Automaton: Dpda<LL1State, Identifier, Symbol>
Initial: LL1State * Stack<Symbol> }
AcceptsEmpty: bool }

interface IAutomaton<(LL1State * Stack<Symbol>), TokenInstance, Result<ParserAction<TokenInstance, Symbol>, unit>> with
member this.View = this.Automaton.Current

member this.Step input =
let output, automaton = Automaton.step input.Token this.Automaton
let state = Automaton.view automaton
let automaton = { this.Automaton with Current = state }
let output =
match output with
| Error () -> Error ()
| Ok action ->
// input should NOT be consumed when a derivation is performed
match action, snd this.Automaton.Current with
| ReplaceTop _, NonTerminal _ :: _ -> Ok (action, Keep input)
| _ -> Ok (action, Consume)

output, { this with Automaton = automaton } :> IAutomaton<_, _, _>

/// Functions for creating and manipulating LL(1) parsers.
module Parser =
/// Builds a new Parser according to the given syntactical specification.
let [<Literal>] private Endmarker = "$"

/// <summary>
/// Makes an LL(1) parser according to the given syntactical specification.
/// </summary>
///
/// <returns>
/// Either a ready-to-use `Parser` or a parsing table with LL(1) conflicts.
/// </returns>
let make grammar =
failwith "TODO: Parser.make"
let follows = Grammar.followSets grammar Endmarker

// finds all the entries in the table to contain a given production rule
let entriesForRule (head, body) =
Grammar.first body grammar
|> Seq.map
(function
// (head, x) for every x in FIRST(body)
| Some lookahead ->
set [ (head, lookahead), body ]
// if epsilon is in FIRST(body), (head, x) for every x in FOLLOW(head)
| None ->
follows.[head]
|> Set.map (fun lookahead -> ((head, lookahead), body)))
|> Set.unionMany

// build the parsing table, with a set of productions at each cell
let entries = grammar.Rules |> Seq.map entriesForRule |> Set.unionMany
let mutable table = Dictionary()
for cell, rule in entries do
if table.ContainsKey(cell) then
table.[cell] <- Set.add rule table.[cell]
else
table.[cell] <- Set.singleton rule

let isLL1 =
table
|> Seq.forall (fun (entry: KeyValuePair<_, _>) -> Set.count entry.Value <= 1)
if not isLL1 then
table
|> Seq.map (fun entry -> entry.Key, entry.Value)
|> Map.ofSeq
|> Error
else
let mutable transitions = Dictionary()

let (|->) (state, input, topOfStack) (next, action) =
if transitions.ContainsKey((state, topOfStack)) then
transitions.[(state, topOfStack)] <-
Map.add input (next, action) transitions.[(state, topOfStack)]
else
transitions.[(state, topOfStack)] <-
Map.ofSeq [ input, (next, action) ]

// for every terminal, there's a transition (Parse -> Parse) where,
// if the top of the stack and the input symbol match, remove both
for symbol in grammar.Terminals do
(Parse, symbol, Terminal symbol) |-> (Parse, ReplaceTop [])

// for non-terminals, we add a transition that does a derivation
// on the stack based on the syntactical analysis table
// NOTE: PDAs always step on input, so the lookahead is consumed
for entry in table do
let (symbol, lookahead), rules = entry.Key, entry.Value
let derivation = Set.minElement rules
(Parse, lookahead, NonTerminal symbol) |-> (Parse, ReplaceTop derivation)

// matching the endmarker as a terminal moves to the accept state
do (Parse, Endmarker, Terminal Endmarker) |-> (Accept, ReplaceTop [])

let transitions =
Map.ofSeq <| seq {
for entry in transitions do
entry.Key, InputConsumingTransitions entry.Value
}

let automaton =
{ Transitions = transitions
Current = Parse, [ NonTerminal grammar.Initial; Terminal Endmarker ]
Accepting = Set.singleton Accept
Dead = Dead }

let acceptsEmtpy =
Grammar.first [ NonTerminal grammar.Initial ] grammar
|> Set.contains None

Ok { Automaton = automaton; AcceptsEmpty = acceptsEmtpy }

/// Tests whether a sequence of tokens is accepted by the given parser.
let accepts parser tokens =
if Seq.isEmpty tokens then
parser.AcceptsEmpty
else
let rec loop currentState inputs =
match Seq.tryHead inputs with
| None -> (fst <| Automaton.view currentState) = Accept
| Some input ->
match Automaton.step input currentState with
| Error (), _ -> false
| Ok (_, Keep _), nextState -> loop nextState inputs
| Ok (_, Consume), nextState -> loop nextState (Seq.tail inputs)

let tokens =
Seq.append
tokens
(Seq.singleton { Token = Endmarker; Lexeme = ""; Position = 0u })

/// Lazily compute a sequence of derivations based on a stream of input tokens.
let parse parser tokens =
failwith "TODO: Parser.parse"
loop parser tokens


/// A formal language project.
Expand All @@ -271,6 +395,7 @@ type Project =
/// Fable.Remoting, everything transmitted needs to be a (public) value type.
type FormallySharp =
{ generateLexer: LexicalSpecification -> Async<Lexer>
generateParser: Grammar -> Async<Result<Parser, SyntacticalAnalysisTable>>
saveProject: Project -> Async<unit>
loadProject: Identifier -> Async<Project> }

Expand Down
8 changes: 4 additions & 4 deletions tests/Shared/ContextFree.Tests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ module Grammar =
testFirst [ NonTerminal "D" ] (set [ Some 'a'; Some 'b' ])

testCase "FOLLOW sets" <| fun _ ->
let follows = Grammar.followSets '$' notLL1
let follows = Grammar.followSets notLL1 '$'
let testFollow symbol expected =
Expect.equal (Map.find symbol follows) expected $"FOLLOW({symbol})"
testFollow "S" (set [ '$' ])
Expand All @@ -131,13 +131,13 @@ module Grammar =
testFollow "C" (set [ 'c'; 'a'; 'b'; '$' ])
testFollow "D" (set [ 'a'; 'b'; 'c'; '$' ])

testCase "Left recursion elimination" <| fun _ ->
ptestCase "Left recursion elimination" <| fun _ ->
Expect.equal
(Grammar.eliminateLeftRecursions grammarWithLeftRecursions)
grammarWithoutLeftRecursions
"Failed to eliminate left recursions"

testCase "Left-factoring" <| fun _ ->
ptestCase "Left-factoring" <| fun _ ->
Expect.equal
(Grammar.leftFactor grammarToLeftFactor)
grammarLeftFactored
Expand All @@ -152,7 +152,7 @@ module Dpda =
// functional DSL style
let map s = Map.ofSeq s
let (=>) a b = a, b
let (|->) a b = a => InputConsumingTransition (map b)
let (|->) a b = a => InputConsumingTransitions (map b)
let (?->) a b = a => EpsilonTransition b
let [<Literal>] Bottom = '$'

Expand Down
Loading

0 comments on commit 5e10181

Please sign in to comment.