Skip to content

Commit bcbfc22

Browse files
committed
wip
1 parent 6712ec0 commit bcbfc22

23 files changed

+1283
-401
lines changed

Steepfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ target :lib do
77
check "lib/lrama/grammar/code/printer_code.rb"
88
check "lib/lrama/grammar/code.rb"
99
check "lib/lrama/grammar/counter.rb"
10+
check "lib/lrama/grammar/parser_state.rb"
1011
check "lib/lrama/grammar/percent_code.rb"
1112
check "lib/lrama/grammar/precedence.rb"
1213
check "lib/lrama/grammar/printer.rb"
@@ -15,6 +16,9 @@ target :lib do
1516
check "lib/lrama/lexer/token/char.rb"
1617
check "lib/lrama/lexer/token/ident.rb"
1718
check "lib/lrama/lexer/token/parameterizing.rb"
19+
check "lib/lrama/lexer/token/parser_state_pop.rb"
20+
check "lib/lrama/lexer/token/parser_state_push.rb"
21+
check "lib/lrama/lexer/token/parser_state_set.rb"
1822
check "lib/lrama/lexer/token/tag.rb"
1923
check "lib/lrama/lexer/token/user_code.rb"
2024
check "lib/lrama/lexer/location.rb"

lib/lrama/grammar.rb

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
require "lrama/grammar/code"
33
require "lrama/grammar/counter"
44
require "lrama/grammar/error_token"
5+
require "lrama/grammar/parser_state"
56
require "lrama/grammar/percent_code"
67
require "lrama/grammar/precedence"
78
require "lrama/grammar/printer"
@@ -16,7 +17,7 @@
1617
module Lrama
1718
# Grammar is the result of parsing an input grammar file
1819
class Grammar
19-
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
20+
attr_reader :percent_codes, :parser_states, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
2021
attr_accessor :union, :expect,
2122
:printers, :error_tokens,
2223
:lex_param, :parse_param, :initial_action,
@@ -31,6 +32,7 @@ def initialize(rule_counter)
3132
@percent_codes = []
3233
@printers = []
3334
@error_tokens = []
35+
@parser_states = []
3436
@symbols = []
3537
@types = []
3638
@rule_builders = []
@@ -58,6 +60,10 @@ def add_error_token(ident_or_tags:, token_code:, lineno:)
5860
@error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
5961
end
6062

63+
def add_parser_state(state_id, state_list)
64+
@parser_states << ParserState.new(state_id: state_id, state_list: state_list)
65+
end
66+
6167
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
6268
if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
6369
if replace
@@ -186,7 +192,7 @@ def find_symbol_by_id(id)
186192
end
187193

188194
def find_symbol_by_id!(id)
189-
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
195+
find_symbol_by_id(id) || (raise "Symbol not found: #{id.s_value}")
190196
end
191197

192198
def find_symbol_by_number!(number)

lib/lrama/grammar/parser_state.rb

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
module Lrama
2+
class Grammar
3+
class ParserState
4+
attr_reader :state_id, :state_list
5+
6+
def initialize(state_id:, state_list:)
7+
@state_id = state_id
8+
@state_list = state_list
9+
end
10+
11+
def enum_definition
12+
<<~ENUM
13+
enum #{enum_name}
14+
{
15+
#{enum_body}
16+
};
17+
typedef enum #{enum_name} #{enum_type};
18+
19+
static const char *const #{enum_name_table_name}[] = {
20+
#{int_to_name.join(", ")}
21+
};
22+
23+
YY_ATTRIBUTE_UNUSED
24+
static const char *
25+
#{enum_name}_name (#{enum_type} num)
26+
{
27+
return #{enum_name_table_name}[num];
28+
}
29+
30+
# define #{state_name_macro}(value) #{enum_name}_name (value)
31+
# define #{current_state_name_macro} #{state_name_macro} (*#{stack_prefix}_p)
32+
ENUM
33+
end
34+
35+
def state_name_macro
36+
"YY_STATE_#{state_name.upcase}_NAME"
37+
end
38+
39+
def current_state_name_macro
40+
"YY_CURRENT_STATE_#{state_name.upcase}_NAME"
41+
end
42+
43+
def states_functions
44+
# TODO: Stack check for push
45+
<<~FUNC
46+
# define YYPUSH_STATE_#{state_name.upcase}(value) \\
47+
do \\
48+
{ \\
49+
YYDPRINTF ((stderr, "Push %s to #{state_name}\\n", #{state_name_macro} (value))); \\
50+
*++#{stack_prefix}_p = value; \\
51+
} \\
52+
while (0)
53+
54+
# define YYPOP_STATE_#{state_name.upcase}() \\
55+
do \\
56+
{ \\
57+
YYDPRINTF ((stderr, "Pop #{state_name}\\n")); \\
58+
if (#{stack_prefix}_p != #{stack_prefix}) \\
59+
{ \\
60+
#{stack_prefix}_p -= 1; \\
61+
} \\
62+
else \\
63+
{ \\
64+
YYDPRINTF ((stderr, "Try to pop empty #{state_name} stack\\n")); \\
65+
} \\
66+
} \\
67+
while (0)
68+
69+
# define YYSET_STATE_#{state_name.upcase}(value) \\
70+
do \\
71+
{ \\
72+
YYDPRINTF ((stderr, "Set %s to #{state_name}\\n", #{state_name_macro} (value))); \\
73+
*#{stack_prefix}_p = value; \\
74+
} \\
75+
while (0)
76+
77+
# define YY_STATE_#{state_name.upcase} #{stack_prefix}_p
78+
FUNC
79+
end
80+
81+
def states_stacks
82+
<<~STACKS
83+
/* The parser state stack (#{stack_prefix}): array, bottom, top. */
84+
int #{stack_prefix}_a[YYINITDEPTH];
85+
int *#{stack_prefix} = #{stack_prefix}_a;
86+
int *#{stack_prefix}_p = #{stack_prefix};
87+
STACKS
88+
end
89+
90+
def state_name
91+
state_id.s_value
92+
end
93+
94+
def enum_name
95+
"yyparser_state_#{state_name}"
96+
end
97+
98+
def enum_type
99+
"#{enum_name}_t"
100+
end
101+
102+
def enum_body
103+
state_list.map do |state|
104+
state.s_value
105+
end.join(",\n ")
106+
end
107+
108+
def int_to_name
109+
state_list.map do |state|
110+
"\"#{state.s_value}\""
111+
end << "YY_NULLPTR"
112+
end
113+
114+
def enum_name_table_name
115+
"#{enum_name}_names"
116+
end
117+
118+
def stack_prefix
119+
"yyparser_state_#{state_name}"
120+
end
121+
end
122+
end
123+
end

lib/lrama/grammar/rule_builder.rb

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@ def process_rhs
117117
when Lrama::Lexer::Token::Parameterizing
118118
@parameterizing_rules = ParameterizingRules::Builder.new(token, @rule_counter, lhs, user_code, precedence_sym, line).build
119119
@replaced_rhs << token
120+
when Lrama::Lexer::Token::ParserStatePop
121+
process_parser_state_token(token, "parser_state_pop_", "YYPOP_STATE_#{token.s_value.upcase}();", i)
122+
when Lrama::Lexer::Token::ParserStatePush
123+
process_parser_state_token(token, "parser_state_push_", "YYPUSH_STATE_#{token.s_value.upcase}(#{token.state.s_value});", i)
124+
when Lrama::Lexer::Token::ParserStateSet
125+
process_parser_state_token(token, "parser_state_set_", "YYSET_STATE_#{token.s_value.upcase}(#{token.state.s_value});", i)
120126
when Lrama::Lexer::Token::UserCode
121127
prefix = token.referred ? "@" : "$@"
122128
new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s)
@@ -135,6 +141,20 @@ def process_rhs
135141
end
136142
end
137143

144+
def process_parser_state_token(token, prefix, code, position_in_original_rule_rhs)
145+
new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + token.s_value + @midrule_action_counter.increment.to_s)
146+
user_code = Lrama::Lexer::Token::UserCode.new(s_value: code, location: token.location)
147+
148+
@replaced_rhs << new_token
149+
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, position_in_original_rule_rhs, skip_preprocess_references: true)
150+
rule_builder.lhs = new_token
151+
rule_builder.user_code = user_code
152+
rule_builder.complete_input
153+
rule_builder.setup_rules
154+
155+
@rule_builders_for_derived_rules << rule_builder
156+
end
157+
138158
def numberize_references
139159
# Bison n'th component is 1-origin
140160
(rhs + [user_code]).compact.each.with_index(1) do |token, i|

lib/lrama/lexer.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ class Lexer
2828
%error-token
2929
%empty
3030
%code
31+
%parser-state
32+
%push
33+
%pop
34+
%set
3135
)
3236

3337
def initialize(text)

lib/lrama/lexer/token.rb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,8 @@ def last_column
4040
require 'lrama/lexer/token/char'
4141
require 'lrama/lexer/token/ident'
4242
require 'lrama/lexer/token/parameterizing'
43+
require 'lrama/lexer/token/parser_state_pop'
44+
require 'lrama/lexer/token/parser_state_push'
45+
require 'lrama/lexer/token/parser_state_set'
4346
require 'lrama/lexer/token/tag'
4447
require 'lrama/lexer/token/user_code'
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
module Lrama
2+
class Lexer
3+
class Token
4+
class ParserStatePop < Token
5+
end
6+
end
7+
end
8+
end
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
module Lrama
2+
class Lexer
3+
class Token
4+
class ParserStatePush < Token
5+
attr_accessor :state
6+
end
7+
end
8+
end
9+
end
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
module Lrama
2+
class Lexer
3+
class Token
4+
class ParserStateSet < Token
5+
attr_accessor :state
6+
end
7+
end
8+
end
9+
end

lib/lrama/output.rb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,24 @@ def percent_code(name)
358358
end.join
359359
end
360360

361+
def parser_states_enums
362+
@grammar.parser_states.map do |ps|
363+
ps.enum_definition
364+
end
365+
end
366+
367+
def parser_states_stacks
368+
@grammar.parser_states.map do |ps|
369+
ps.states_stacks
370+
end
371+
end
372+
373+
def parser_states_functions
374+
@grammar.parser_states.map do |ps|
375+
ps.states_functions
376+
end
377+
end
378+
361379
private
362380

363381
def eval_template(file, path)

0 commit comments

Comments
 (0)