cprover
smt2_tokenizer.cpp
Go to the documentation of this file.
1
/*******************************************************************\
2
3
Module:
4
5
Author: Daniel Kroening, kroening@kroening.com
6
7
\*******************************************************************/
8
9
#include "
smt2_tokenizer.h
"
10
11
#include <istream>
12
13
bool
smt2_tokenizert::is_simple_symbol_character
(
char
ch)
14
{
15
// any non-empty sequence of letters, digits and the characters
16
// ~ ! @ $ % ^ & * _ - + = < > . ? /
17
// that does not start with a digit and is not a reserved word.
18
19
return
isalnum(ch) ||
20
ch==
'~'
|| ch==
'!'
|| ch==
'@'
|| ch==
'$'
|| ch==
'%'
||
21
ch==
'^'
|| ch==
'&'
|| ch==
'*'
|| ch==
'_'
|| ch==
'-'
||
22
ch==
'+'
|| ch==
'='
|| ch==
'<'
|| ch==
'>'
|| ch==
'.'
||
23
ch==
'?'
|| ch==
'/'
;
24
}
25
26
smt2_tokenizert::tokent
smt2_tokenizert::get_simple_symbol
()
27
{
28
// any non-empty sequence of letters, digits and the characters
29
// ~ ! @ $ % ^ & * _ - + = < > . ? /
30
// that does not start with a digit and is not a reserved word.
31
32
buffer
.clear();
33
34
char
ch;
35
while
(
in
->get(ch))
36
{
37
if
(
is_simple_symbol_character
(ch))
38
{
39
buffer
+=ch;
40
}
41
else
42
{
43
in
->unget();
// put back
44
quoted_symbol
=
false
;
45
return
SYMBOL;
46
}
47
}
48
49
// eof -- this is ok here
50
if
(
buffer
.empty())
51
return
END_OF_FILE;
52
else
53
{
54
quoted_symbol
=
false
;
55
return
SYMBOL;
56
}
57
}
58
59
smt2_tokenizert::tokent
smt2_tokenizert::get_decimal_numeral
()
60
{
61
// we accept any sequence of digits and dots
62
63
buffer
.clear();
64
65
char
ch;
66
while
(
in
->get(ch))
67
{
68
if
(isdigit(ch) || ch==
'.'
)
69
{
70
buffer
+=ch;
71
}
72
else
73
{
74
in
->unget();
// put back
75
return
NUMERAL;
76
}
77
}
78
79
// eof -- this is ok here
80
if
(
buffer
.empty())
81
return
END_OF_FILE;
82
else
83
return
NUMERAL;
84
}
85
86
smt2_tokenizert::tokent
smt2_tokenizert::get_bin_numeral
()
87
{
88
// we accept any sequence of '0' or '1'
89
90
buffer
.clear();
91
buffer
+=
'#'
;
92
buffer
+=
'b'
;
93
94
char
ch;
95
while
(
in
->get(ch))
96
{
97
if
(ch==
'0'
|| ch==
'1'
)
98
{
99
buffer
+=ch;
100
}
101
else
102
{
103
in
->unget();
// put back
104
return
NUMERAL;
105
}
106
}
107
108
// eof -- this is ok here
109
if
(
buffer
.empty())
110
return
END_OF_FILE;
111
else
112
return
NUMERAL;
113
}
114
115
smt2_tokenizert::tokent
smt2_tokenizert::get_hex_numeral
()
116
{
117
// we accept any sequence of '0'-'9', 'a'-'f', 'A'-'F'
118
119
buffer
.clear();
120
buffer
+=
'#'
;
121
buffer
+=
'x'
;
122
123
char
ch;
124
while
(
in
->get(ch))
125
{
126
if
(isxdigit(ch))
127
{
128
buffer
+=ch;
129
}
130
else
131
{
132
in
->unget();
// put back
133
return
NUMERAL;
134
}
135
}
136
137
// eof -- this is ok here
138
if
(
buffer
.empty())
139
return
END_OF_FILE;
140
else
141
return
NUMERAL;
142
}
143
144
smt2_tokenizert::tokent
smt2_tokenizert::get_quoted_symbol
()
145
{
146
// any sequence of printable ASCII characters (including space,
147
// tab, and line-breaking characters) except for the backslash
148
// character \, that starts and ends with | and does not otherwise
149
// contain |
150
151
buffer
.clear();
152
153
char
ch;
154
while
(
in
->get(ch))
155
{
156
if
(ch==
'|'
)
157
{
158
quoted_symbol
=
true
;
159
return
SYMBOL;
// done
160
}
161
162
buffer
+=ch;
163
164
if
(ch==
'\n'
)
165
line_no
++;
166
}
167
168
// Hmpf. Eof before end of quoted symbol. This is an error.
169
throw
error
(
"EOF within quoted symbol"
);
170
}
171
172
smt2_tokenizert::tokent
smt2_tokenizert::get_string_literal
()
173
{
174
buffer
.clear();
175
176
char
ch;
177
while
(
in
->get(ch))
178
{
179
if
(ch==
'"'
)
180
{
181
// quotes may be escaped by repeating
182
if
(
in
->get(ch))
183
{
184
if
(ch==
'"'
)
185
{
186
}
187
else
188
{
189
in
->unget();
190
return
STRING_LITERAL;
// done
191
}
192
}
193
else
194
return
STRING_LITERAL;
// done
195
}
196
buffer
+=ch;
197
}
198
199
// Hmpf. Eof before end of string literal. This is an error.
200
throw
error
(
"EOF within string literal"
);
201
}
202
203
smt2_tokenizert::tokent
smt2_tokenizert::next_token
()
204
{
205
if
(
peeked
)
206
peeked
=
false
;
207
else
208
get_token_from_stream
();
209
210
return
token
;
211
}
212
213
void
smt2_tokenizert::get_token_from_stream
()
214
{
215
char
ch;
216
217
while
(
in
->get(ch))
218
{
219
switch
(ch)
220
{
221
case
'\n'
:
222
line_no
++;
223
break
;
224
225
case
' '
:
226
case
'\r'
:
227
case
'\t'
:
228
case
static_cast<
char
>
(160):
// non-breaking space
229
// skip any whitespace
230
break
;
231
232
case
';'
:
// comment
233
// skip until newline
234
while
(
in
->get(ch))
235
{
236
if
(ch==
'\n'
)
237
{
238
line_no
++;
239
break
;
240
}
241
}
242
break
;
243
244
case
'('
:
245
// produce sub-expression
246
token
= OPEN;
247
return
;
248
249
case
')'
:
250
// done with sub-expression
251
token
= CLOSE;
252
return
;
253
254
case
'|'
:
// quoted symbol
255
token
=
get_quoted_symbol
();
256
return
;
257
258
case
'"'
:
// string literal
259
token
=
get_string_literal
();
260
return
;
261
262
case
':'
:
// keyword
263
token
=
get_simple_symbol
();
264
if
(
token
== SYMBOL)
265
{
266
token
= KEYWORD;
267
return
;
268
}
269
else
270
throw
error
(
"expecting symbol after colon"
);
271
272
case
'#'
:
273
if
(
in
->get(ch))
274
{
275
if
(ch==
'b'
)
276
{
277
token
=
get_bin_numeral
();
278
return
;
279
}
280
else
if
(ch==
'x'
)
281
{
282
token
=
get_hex_numeral
();
283
return
;
284
}
285
else
286
throw
error
(
"unknown numeral token"
);
287
}
288
else
289
throw
error
(
"unexpected EOF in numeral token"
);
290
break
;
291
292
default
:
// likely a simple symbol or a numeral
293
if
(isdigit(ch))
294
{
295
in
->unget();
296
token
=
get_decimal_numeral
();
297
return
;
298
}
299
else
if
(
is_simple_symbol_character
(ch))
300
{
301
in
->unget();
302
token
=
get_simple_symbol
();
303
return
;
304
}
305
else
306
{
307
// illegal character, error
308
throw
error
() <<
"unexpected character '"
<< ch <<
'\''
;
309
}
310
}
311
}
312
313
token
= END_OF_FILE;
314
}
smt2_tokenizert::get_quoted_symbol
tokent get_quoted_symbol()
Definition:
smt2_tokenizer.cpp:144
smt2_tokenizert::token
tokent token
Definition:
smt2_tokenizer.h:112
smt2_tokenizert::tokent
enum { NONE, END_OF_FILE, STRING_LITERAL, NUMERAL, SYMBOL, KEYWORD, OPEN, CLOSE } tokent
Definition:
smt2_tokenizer.h:68
smt2_tokenizert::get_string_literal
tokent get_string_literal()
Definition:
smt2_tokenizer.cpp:172
smt2_tokenizert::in
std::istream * in
Definition:
smt2_tokenizer.h:107
smt2_tokenizert::peeked
bool peeked
Definition:
smt2_tokenizer.h:111
smt2_tokenizert::error
smt2_errort error()
generate an error exception
Definition:
smt2_tokenizer.h:101
smt2_tokenizer.h
smt2_tokenizert::get_bin_numeral
tokent get_bin_numeral()
Definition:
smt2_tokenizer.cpp:86
smt2_tokenizert::line_no
unsigned line_no
Definition:
smt2_tokenizer.h:108
smt2_tokenizert::buffer
std::string buffer
Definition:
smt2_tokenizer.h:109
smt2_tokenizert::get_hex_numeral
tokent get_hex_numeral()
Definition:
smt2_tokenizer.cpp:115
smt2_tokenizert::get_simple_symbol
tokent get_simple_symbol()
Definition:
smt2_tokenizer.cpp:26
smt2_tokenizert::get_decimal_numeral
tokent get_decimal_numeral()
Definition:
smt2_tokenizer.cpp:59
smt2_tokenizert::next_token
tokent next_token()
Definition:
smt2_tokenizer.cpp:203
smt2_tokenizert::quoted_symbol
bool quoted_symbol
Definition:
smt2_tokenizer.h:110
smt2_tokenizert::is_simple_symbol_character
static bool is_simple_symbol_character(char)
Definition:
smt2_tokenizer.cpp:13
smt2_tokenizert::get_token_from_stream
void get_token_from_stream()
read a token from the input stream and store it in 'token'
Definition:
smt2_tokenizer.cpp:213
solvers
smt2
smt2_tokenizer.cpp
Generated by
1.8.20