-
Notifications
You must be signed in to change notification settings - Fork 0
/
TokenStream.cpp
126 lines (102 loc) · 3.3 KB
/
TokenStream.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#include "TokenStream.hpp"
// a few variables that were useful as global only to this file.
static char charBuffer; //last char read;might not be part of current token.
static bool charBufferFull; //is there a char stored in the last char read?
// default constructor
TokenStream::TokenStream()
{
charBufferFull = false; //have not yet read a character. So nothing in buffer
_dfa = new int*[7]; // 7 dfa states (see below comments)
for (int state=0; state<7; state++) // each could "go to" based on any char
_dfa[state] = new int[256];
// STATE 0 --> (start)
// STATE 1 --> TEXT
// STATE 2 --> BG
// STATE 3 --> PIPE
// STATE 4 --> REDIR_IN
// STATE 5 --> REDIR_OUT
// STATE 6 --> EOL
// set up "go to" transitions. Assume anything is text from start state
// or existing text state ... next if's patch this as need be
for (int ch=0; ch<256; ch++)
{
_dfa[0][ch]=TEXT;
_dfa[TEXT][ch]=TEXT;
}
// when we see an &, |, <, >, or EOL, it has to be its own token
_dfa[0][(int)'&'] = 2;
_dfa[TEXT][(int)'&'] = -1;
_dfa[0][(int)'|'] = 3;
_dfa[TEXT][(int)'|'] = -1;
_dfa[0][(int)'<'] = 4;
_dfa[TEXT][(int)'<'] = -1;
_dfa[0][(int)'>'] = 5;
_dfa[TEXT][(int)'>'] = -1;
_dfa[0][(int)'\n'] = 6;
_dfa[TEXT][(int)'\n'] = -1;
// all transitins out of states 2 or higher are invalid
for (int state=2; state<7; state++)
for (int ch=0; ch<256; ch++)
_dfa[state][ch] = -1;
// no whitespace is part of a token.
for (int state=0; state<7;state++)
{
_dfa[state][(int)' '] = -1;
_dfa[state][(int)'\t'] = -1;
}
}
// get the next token, based on the dfa
Token
TokenStream::nextToken()
{
char ch; // next character in input stream.
// if the buffer is filled, then it is our next character ...
if (charBufferFull)
{
ch = charBuffer;
charBufferFull=false;
}
else // ... otherwise get the next character from the keyboard.
ch = cin.get(); // does not skip whitespace!
// skip all whitespace
while(ch==' ' || ch=='\t')
{
ch = cin.get();
}
// start building up token based on DFA
string tokVal;
int state = 0; // active state in DFA; at first , it is "start" state
int old_state=-1; // previous state in DFA (none at first)
// as long as our state s valid
while(state!=-1)
{
old_state=state; // update the old state
state=_dfa[state][(int)ch]; // follow transition to new state
// so long as new state is valid
if (state!=-1) //live state
{
tokVal.push_back(ch); // add character to end of token string
if (state==0 || state==1) // more chars possible
ch=cin.get();
else // dead end state
break;
}
}
// if we got out of above loop due to reading an extra character ...
if (state==-1)
{
// put the character in our buffer
charBufferFull=true;
charBuffer=ch;
// side note: could likley have used cin.unget() instead, but
// behavior of such can be a bit unreliable on differeng systems.
}
// now we're ready to fill in our information into a token.
Token t;
t.value()=tokVal;
if (state!=-1) // if we cut out without seeing an invalid character ...
t.type()=state; // state tells us the type.
else
t.type()=old_state; // otehrwise old_state tells us the type.
return t;
}