cprover
smt2_tokenizer.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 #include "smt2_tokenizer.h"
10 
11 #include <istream>
12 
14 {
15  // any non-empty sequence of letters, digits and the characters
16  // ~ ! @ $ % ^ & * _ - + = < > . ? /
17  // that does not start with a digit and is not a reserved word.
18 
19  return isalnum(ch) ||
20  ch=='~' || ch=='!' || ch=='@' || ch=='$' || ch=='%' ||
21  ch=='^' || ch=='&' || ch=='*' || ch=='_' || ch=='-' ||
22  ch=='+' || ch=='=' || ch=='<' || ch=='>' || ch=='.' ||
23  ch=='?' || ch=='/';
24 }
25 
27 {
28  // any non-empty sequence of letters, digits and the characters
29  // ~ ! @ $ % ^ & * _ - + = < > . ? /
30  // that does not start with a digit and is not a reserved word.
31 
32  buffer.clear();
33 
34  char ch;
35  while(in->get(ch))
36  {
38  {
39  buffer+=ch;
40  }
41  else
42  {
43  in->unget(); // put back
44  quoted_symbol = false;
45  return SYMBOL;
46  }
47  }
48 
49  // eof -- this is ok here
50  if(buffer.empty())
51  return END_OF_FILE;
52  else
53  {
54  quoted_symbol = false;
55  return SYMBOL;
56  }
57 }
58 
60 {
61  // we accept any sequence of digits and dots
62 
63  buffer.clear();
64 
65  char ch;
66  while(in->get(ch))
67  {
68  if(isdigit(ch) || ch=='.')
69  {
70  buffer+=ch;
71  }
72  else
73  {
74  in->unget(); // put back
75  return NUMERAL;
76  }
77  }
78 
79  // eof -- this is ok here
80  if(buffer.empty())
81  return END_OF_FILE;
82  else
83  return NUMERAL;
84 }
85 
87 {
88  // we accept any sequence of '0' or '1'
89 
90  buffer.clear();
91  buffer+='#';
92  buffer+='b';
93 
94  char ch;
95  while(in->get(ch))
96  {
97  if(ch=='0' || ch=='1')
98  {
99  buffer+=ch;
100  }
101  else
102  {
103  in->unget(); // put back
104  return NUMERAL;
105  }
106  }
107 
108  // eof -- this is ok here
109  if(buffer.empty())
110  return END_OF_FILE;
111  else
112  return NUMERAL;
113 }
114 
116 {
117  // we accept any sequence of '0'-'9', 'a'-'f', 'A'-'F'
118 
119  buffer.clear();
120  buffer+='#';
121  buffer+='x';
122 
123  char ch;
124  while(in->get(ch))
125  {
126  if(isxdigit(ch))
127  {
128  buffer+=ch;
129  }
130  else
131  {
132  in->unget(); // put back
133  return NUMERAL;
134  }
135  }
136 
137  // eof -- this is ok here
138  if(buffer.empty())
139  return END_OF_FILE;
140  else
141  return NUMERAL;
142 }
143 
145 {
146  // any sequence of printable ASCII characters (including space,
147  // tab, and line-breaking characters) except for the backslash
148  // character \, that starts and ends with | and does not otherwise
149  // contain |
150 
151  buffer.clear();
152 
153  char ch;
154  while(in->get(ch))
155  {
156  if(ch=='|')
157  {
158  quoted_symbol = true;
159  return SYMBOL; // done
160  }
161 
162  buffer+=ch;
163 
164  if(ch=='\n')
165  line_no++;
166  }
167 
168  // Hmpf. Eof before end of quoted symbol. This is an error.
169  throw error("EOF within quoted symbol");
170 }
171 
173 {
174  buffer.clear();
175 
176  char ch;
177  while(in->get(ch))
178  {
179  if(ch=='"')
180  {
181  // quotes may be escaped by repeating
182  if(in->get(ch))
183  {
184  if(ch=='"')
185  {
186  }
187  else
188  {
189  in->unget();
190  return STRING_LITERAL; // done
191  }
192  }
193  else
194  return STRING_LITERAL; // done
195  }
196  buffer+=ch;
197  }
198 
199  // Hmpf. Eof before end of string literal. This is an error.
200  throw error("EOF within string literal");
201 }
202 
204 {
205  if(peeked)
206  peeked = false;
207  else
209 
210  return token;
211 }
212 
214 {
215  char ch;
216 
217  while(in->get(ch))
218  {
219  switch(ch)
220  {
221  case '\n':
222  line_no++;
223  break;
224 
225  case ' ':
226  case '\r':
227  case '\t':
228  case static_cast<char>(160): // non-breaking space
229  // skip any whitespace
230  break;
231 
232  case ';': // comment
233  // skip until newline
234  while(in->get(ch))
235  {
236  if(ch=='\n')
237  {
238  line_no++;
239  break;
240  }
241  }
242  break;
243 
244  case '(':
245  // produce sub-expression
246  token = OPEN;
247  return;
248 
249  case ')':
250  // done with sub-expression
251  token = CLOSE;
252  return;
253 
254  case '|': // quoted symbol
256  return;
257 
258  case '"': // string literal
260  return;
261 
262  case ':': // keyword
264  if(token == SYMBOL)
265  {
266  token = KEYWORD;
267  return;
268  }
269  else
270  throw error("expecting symbol after colon");
271 
272  case '#':
273  if(in->get(ch))
274  {
275  if(ch=='b')
276  {
278  return;
279  }
280  else if(ch=='x')
281  {
283  return;
284  }
285  else
286  throw error("unknown numeral token");
287  }
288  else
289  throw error("unexpected EOF in numeral token");
290  break;
291 
292  default: // likely a simple symbol or a numeral
293  if(isdigit(ch))
294  {
295  in->unget();
297  return;
298  }
299  else if(is_simple_symbol_character(ch))
300  {
301  in->unget();
303  return;
304  }
305  else
306  {
307  // illegal character, error
308  throw error() << "unexpected character '" << ch << '\'';
309  }
310  }
311  }
312 
313  token = END_OF_FILE;
314 }
smt2_tokenizert::get_quoted_symbol
tokent get_quoted_symbol()
Definition: smt2_tokenizer.cpp:144
smt2_tokenizert::token
tokent token
Definition: smt2_tokenizer.h:112
smt2_tokenizert::tokent
enum { NONE, END_OF_FILE, STRING_LITERAL, NUMERAL, SYMBOL, KEYWORD, OPEN, CLOSE } tokent
Definition: smt2_tokenizer.h:68
smt2_tokenizert::get_string_literal
tokent get_string_literal()
Definition: smt2_tokenizer.cpp:172
smt2_tokenizert::in
std::istream * in
Definition: smt2_tokenizer.h:107
smt2_tokenizert::peeked
bool peeked
Definition: smt2_tokenizer.h:111
smt2_tokenizert::error
smt2_errort error()
generate an error exception
Definition: smt2_tokenizer.h:101
smt2_tokenizer.h
smt2_tokenizert::get_bin_numeral
tokent get_bin_numeral()
Definition: smt2_tokenizer.cpp:86
smt2_tokenizert::line_no
unsigned line_no
Definition: smt2_tokenizer.h:108
smt2_tokenizert::buffer
std::string buffer
Definition: smt2_tokenizer.h:109
smt2_tokenizert::get_hex_numeral
tokent get_hex_numeral()
Definition: smt2_tokenizer.cpp:115
smt2_tokenizert::get_simple_symbol
tokent get_simple_symbol()
Definition: smt2_tokenizer.cpp:26
smt2_tokenizert::get_decimal_numeral
tokent get_decimal_numeral()
Definition: smt2_tokenizer.cpp:59
smt2_tokenizert::next_token
tokent next_token()
Definition: smt2_tokenizer.cpp:203
smt2_tokenizert::quoted_symbol
bool quoted_symbol
Definition: smt2_tokenizer.h:110
smt2_tokenizert::is_simple_symbol_character
static bool is_simple_symbol_character(char)
Definition: smt2_tokenizer.cpp:13
smt2_tokenizert::get_token_from_stream
void get_token_from_stream()
read a token from the input stream and store it in 'token'
Definition: smt2_tokenizer.cpp:213