-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscanner.mll
152 lines (144 loc) · 3.94 KB
/
scanner.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
{
open Parser
type str_typ = Static | BegInter | MidInter | EndInter
(* forgive me *)
let paren_count = ref 0 (* allows for nested parens *)
let inter_depth = ref 0 (* allows for nested strings *)
(*
another solution would be creating a new rule
of interpolated expressions but that would
require copying the tokenize rule and update
the new rule whenever tokenize is updated
Additional regex added to scanner can be
done as usual
*)
}
let letter = ['a'-'z' 'A'-'Z' '_' '\'']
let digit = ['0'-'9']
let exp = ['e''E'] ['+' '-']? digit+
rule tokenize = parse
| [' ' '\t' '\r' '\n'] { tokenize lexbuf }
| "/*" { comment lexbuf }
| "//" [^'\n']* '\n' { tokenize lexbuf }
| '(' {
if !inter_depth > 0 then
paren_count := !paren_count + 1;
LPAREN
}
| ')' {
if !inter_depth > 0 then
if !paren_count = 0 then (
inter_depth := !inter_depth - 1;
let (styp, s) = str "" MidInter lexbuf in
match styp with
| MidInter -> MIDINTERSTRING(s)
| EndInter -> ENDINTERSTRING(s)
| _ -> raise (Failure "heckin string" )
) else ( paren_count := !paren_count - 1; RPAREN )
else
RPAREN
}
| '{' { LBRACE }
| '}' { RBRACE }
| '[' { LBRACKET }
| ']' { RBRACKET }
| '+' { PLUS }
| '-' { MINUS }
| '*' { TIMES }
| '/' { DIVIDE }
| "==" { EQ }
| "!=" { NEQ }
| '<' { LT }
| "<=" { LEQ }
| ">" { GT }
| ">=" { GEQ }
| "and" { AND }
| "or" { OR }
| '|' { PIPE }
| "int" { INT }
| "real" { REAL }
| "bool" { BOOL }
| "char" { CHAR }
| "set" { SET }
| "map" { MAP }
| "array" { ARRAY }
| "string" { STRING }
| "let" { LET }
| "in" { IN }
| "..." { ELLIPSE }
| ',' { COMMA }
| ':' { COLON }
| ';' { SEMI }
| ";;" { DSEMI }
| "end" { END }
| '=' { EQUAL }
| "if" { IF }
| "then" { THEN }
| "else" { ELSE }
| "while" { WHILE }
| "for" { FOR }
| "do" { DO }
| '.' { DOT }
| "get" { GET }
| "at" { AT }
| digit+ as lit { LITERAL(int_of_string lit) }
| (digit+ exp | (digit+ '.' digit* | '.' digit+) exp?) as reallit
{ REALLIT(float_of_string reallit) }
| "true" { BOOLLIT(true) }
| "false" { BOOLLIT(false) }
| "'" ([^ '\'' '\\'] as c) "'" { CHARLIT(c) }
| "'\\n'" { CHARLIT('\n') }
| "'\\t'" { CHARLIT('\t') }
| "'\\''" { CHARLIT('\'') }
| "'\\\"'" { CHARLIT('"') }
| "\\\\" { CHARLIT('\\') }
| "'\\" (digit+ as d) "'" {
let value = int_of_string d in
if value > 255 then
raise (Failure "character escape must be 0-255")
else
CHARLIT(Char.chr value)
}
| letter (letter | digit)* as lit { ID(lit) }
| '"' {
let (styp, s) = str "" Static lexbuf in
match styp with
| Static -> STRINGLIT(s)
| BegInter -> BEGINTERSTRING(s)
| _ -> raise (Failure "heckin string" )
}
| eof { EOF }
| "\x2D\x3E" { ARROW } (* arrow op *)
and str old_str typ = parse
[^ '\n' '"' '\\']+ as c { str (old_str ^ c) typ lexbuf }
| "\\n" { str (old_str ^ "\n") typ lexbuf }
| "\\t" { str (old_str ^ "\t") typ lexbuf }
| "\\\"" { str (old_str ^ "\"") typ lexbuf }
| "\\'" { str (old_str ^ "\'") typ lexbuf }
| "\\(" {
inter_depth := !inter_depth + 1;
match typ with
| Static -> (BegInter, old_str)
| MidInter -> (MidInter, old_str)
| _ -> raise (Failure "heckin string" )
}
| "\\" (digit+ as d) {
let value = int_of_string d in
if value > 255 then
raise (Failure "character escape must be 0-255")
else
str (old_str ^ String.make 1 (Char.chr value)) typ lexbuf
}
| "\\\\" { str (old_str ^ "\\" ) typ lexbuf }
| "\\\n" { str (old_str ^ "\n") typ lexbuf }
| '"' {
match typ with
| Static -> (Static, old_str)
| MidInter -> (EndInter, old_str)
| _ -> raise (Failure "heckin string" )
}
| _ as char { raise (Failure("illegal character " ^ Char.escaped char ^
" in string literal")) }
and comment = parse
"*/" { tokenize lexbuf }
| _ { comment lexbuf }