cprover
format_strings.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Format String Parser
4 
5 Author: CM Wintersteiger
6 
7 \*******************************************************************/
8 
11 
12 #include "format_strings.h"
13 
14 #include <util/c_types.h>
15 #include <util/exception_utils.h>
16 #include <util/invariant.h>
17 #include <util/std_expr.h>
18 #include <util/std_types.h>
19 
20 #include <cctype>
21 
23  std::string::const_iterator &it,
24  format_tokent &curtok)
25 {
26  while(*it=='#' || *it=='0' ||
27  *it=='-' || *it==' ' || *it=='+')
28  {
29  switch(*it)
30  {
31  case '#':
32  curtok.flags.push_back(format_tokent::flag_typet::ALTERNATE); break;
33  case '0':
34  curtok.flags.push_back(format_tokent::flag_typet::ZERO_PAD); break;
35  case '-':
36  curtok.flags.push_back(format_tokent::flag_typet::LEFT_ADJUST); break;
37  case ' ':
38  curtok.flags.push_back(format_tokent::flag_typet::SIGNED_SPACE); break;
39  case '+':
40  curtok.flags.push_back(format_tokent::flag_typet::SIGN); break;
41  default:
43  std::string("unsupported format specifier flag: '") + *it + "'");
44  }
45  it++;
46  }
47 }
48 
50  std::string::const_iterator &it,
51  format_tokent &curtok)
52 {
53  if(*it=='*')
54  {
56  it++;
57  }
58 
59  std::string tmp;
60  for( ; isdigit(*it); it++) tmp+=*it;
61  curtok.field_width=string2integer(tmp);
62 }
63 
65  std::string::const_iterator &it,
66  format_tokent &curtok)
67 {
68  if(*it=='.')
69  {
70  it++;
71 
72  if(*it=='*')
73  {
75  it++;
76  }
77  else
78  {
79  std::string tmp;
80  for( ; isdigit(*it); it++) tmp+=*it;
81  curtok.precision=string2integer(tmp);
82  }
83  }
84 }
85 
87  std::string::const_iterator &it,
88  format_tokent &curtok)
89 {
90  if(*it=='h')
91  {
92  it++;
93  if(*it=='h')
94  it++;
96  }
97  else if(*it=='l')
98  {
99  it++;
100  if(*it=='l')
101  it++;
103  }
104  else if(*it=='L')
105  {
106  it++;
108  }
109  else if(*it=='j')
110  {
111  it++;
113  }
114  else if(*it=='t')
115  {
116  it++;
118  }
119 }
120 
122  const std::string &arg_string,
123  std::string::const_iterator &it,
124  format_tokent &curtok)
125 {
126  switch(*it)
127  {
128  case 'd':
129  case 'i':
132  break;
133  case 'o':
136  break;
137  case 'u':
140  break;
141  case 'x':
142  case 'X':
145  break;
146  case 'e':
147  case 'E': curtok.type=format_tokent::token_typet::FLOAT; break;
148  case 'f':
149  case 'F': curtok.type=format_tokent::token_typet::FLOAT; break;
150  case 'g':
151  case 'G': curtok.type=format_tokent::token_typet::FLOAT; break;
152  case 'a':
153  case 'A': curtok.type=format_tokent::token_typet::FLOAT; break;
154  case 'c': curtok.type=format_tokent::token_typet::CHAR; break;
155  case 's': curtok.type=format_tokent::token_typet::STRING; break;
156  case 'p': curtok.type=format_tokent::token_typet::POINTER; break;
157  case '%':
159  curtok.value="%";
160  break;
161  case '[': // pattern matching in, e.g., fscanf.
162  {
163  std::string tmp;
164  it++;
165  if(*it=='^') // if it's there, it must be first
166  {
167  tmp+='^'; it++;
168  if(*it==']') // if it's there, it must be here
169  {
170  tmp+=']'; it++;
171  }
172  }
173 
174  for( ; it!=arg_string.end() && *it!=']'; it++)
175  tmp+=*it;
176 
177  break;
178  }
179 
180  default:
182  std::string("unsupported format conversion specifier: '") + *it + "'");
183  }
184  it++;
185 }
186 
187 format_token_listt parse_format_string(const std::string &arg_string)
188 {
189  format_token_listt token_list;
190 
191  std::string::const_iterator it=arg_string.begin();
192 
193  while(it!=arg_string.end())
194  {
195  if(*it=='%')
196  {
197  token_list.push_back(format_tokent());
198  format_tokent &curtok=token_list.back();
199  it++;
200 
201  parse_flags(it, curtok);
202  parse_field_width(it, curtok);
203  parse_precision(it, curtok);
204  parse_length_modifier(it, curtok);
205  parse_conversion_specifier(arg_string, it, curtok);
206  }
207  else
208  {
209  if(token_list.empty() ||
210  token_list.back().type!=format_tokent::token_typet::TEXT)
211  token_list.push_back(format_tokent(format_tokent::token_typet::TEXT));
212 
213  std::string tmp;
214  for( ; it!=arg_string.end() && *it!='%'; it++)
215  tmp+=*it;
216 
217  INVARIANT(
218  !token_list.empty() &&
219  token_list.back().type == format_tokent::token_typet::TEXT,
220  "must already have a TEXT token at the back of the token list");
221 
222  token_list.back().value=tmp;
223  }
224  }
225 
226  return token_list;
227 }
228 
230 {
231  switch(token.type)
232  {
234  switch(token.length_modifier)
235  {
238  return signed_char_type();
239  else
240  return unsigned_char_type();
241 
244  return signed_short_int_type();
245  else
246  return unsigned_short_int_type();
247 
250  return signed_long_int_type();
251  else
252  return unsigned_long_int_type();
253 
256  return signed_long_long_int_type();
257  else
259 
265  return signed_int_type();
266  else
267  return unsigned_int_type();
268  }
269 
271  switch(token.length_modifier)
272  {
281  return float_type();
282  }
283 
285  switch(token.length_modifier)
286  {
295  return char_type();
296  }
297 
299  return pointer_type(void_type());
300 
302  switch(token.length_modifier)
303  {
305  return array_typet(wchar_t_type(), nil_exprt());
313  return array_typet(char_type(), nil_exprt());
314  }
315 
318  return {};
319  }
320 
321  UNREACHABLE;
322 }
UNREACHABLE
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:504
exception_utils.h
parse_format_string
format_token_listt parse_format_string(const std::string &arg_string)
Definition: format_strings.cpp:187
parse_length_modifier
void parse_length_modifier(std::string::const_iterator &it, format_tokent &curtok)
Definition: format_strings.cpp:86
format_tokent::value
irep_idt value
Definition: format_strings.h:84
signed_long_long_int_type
signedbv_typet signed_long_long_int_type()
Definition: c_types.cpp:87
signed_char_type
signedbv_typet signed_char_type()
Definition: c_types.cpp:142
format_tokent::token_typet::STRING
@ STRING
format_tokent::representationt::UNSIGNED_OCT
@ UNSIGNED_OCT
long_double_type
floatbv_typet long_double_type()
Definition: c_types.cpp:201
unsupported_operation_exceptiont
Thrown when we encounter an instruction, parameters to an instruction etc.
Definition: exception_utils.h:144
string2integer
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:57
invariant.h
parse_flags
void parse_flags(std::string::const_iterator &it, format_tokent &curtok)
Definition: format_strings.cpp:22
format_strings.h
Format String Parser.
format_tokent::precision
mp_integer precision
Definition: format_strings.h:81
format_tokent::length_modifierst::LEN_undef
@ LEN_undef
format_tokent::field_width
mp_integer field_width
Definition: format_strings.h:80
format_tokent::length_modifier
length_modifierst length_modifier
Definition: format_strings.h:82
unsigned_char_type
unsignedbv_typet unsigned_char_type()
Definition: c_types.cpp:135
void_type
empty_typet void_type()
Definition: c_types.cpp:253
format_tokent::length_modifierst::LEN_h
@ LEN_h
format_tokent::representationt::SIGNED_DEC
@ SIGNED_DEC
unsigned_short_int_type
unsignedbv_typet unsigned_short_int_type()
Definition: c_types.cpp:51
unsigned_long_long_int_type
unsignedbv_typet unsigned_long_long_int_type()
Definition: c_types.cpp:101
format_tokent::token_typet::POINTER
@ POINTER
format_tokent::representationt::UNSIGNED_DEC
@ UNSIGNED_DEC
unsigned_long_int_type
unsignedbv_typet unsigned_long_int_type()
Definition: c_types.cpp:94
parse_field_width
void parse_field_width(std::string::const_iterator &it, format_tokent &curtok)
Definition: format_strings.cpp:49
parse_conversion_specifier
void parse_conversion_specifier(const std::string &arg_string, std::string::const_iterator &it, format_tokent &curtok)
Definition: format_strings.cpp:121
format_token_listt
std::list< format_tokent > format_token_listt
Definition: format_strings.h:87
signed_int_type
signedbv_typet signed_int_type()
Definition: c_types.cpp:30
format_tokent::length_modifierst::LEN_hh
@ LEN_hh
format_tokent::token_typet::INT
@ INT
format_tokent::length_modifierst::LEN_t
@ LEN_t
format_tokent::flag_typet::ZERO_PAD
@ ZERO_PAD
nil_exprt
The NIL expression.
Definition: std_expr.h:3973
wchar_t_type
bitvector_typet wchar_t_type()
Definition: c_types.cpp:149
std_types.h
Pre-defined types.
signed_short_int_type
signedbv_typet signed_short_int_type()
Definition: c_types.cpp:37
float_type
floatbv_typet float_type()
Definition: c_types.cpp:185
format_tokent::flags
std::list< flag_typet > flags
Definition: format_strings.h:79
format_tokent::flag_typet::LEFT_ADJUST
@ LEFT_ADJUST
format_tokent::representationt::UNSIGNED_HEX
@ UNSIGNED_HEX
pointer_type
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:243
unsigned_int_type
unsignedbv_typet unsigned_int_type()
Definition: c_types.cpp:44
format_tokent::length_modifierst::LEN_L
@ LEN_L
format_tokent::length_modifierst::LEN_j
@ LEN_j
format_tokent::flag_typet::ALTERNATE
@ ALTERNATE
optionalt
nonstd::optional< T > optionalt
Definition: optional.h:35
double_type
floatbv_typet double_type()
Definition: c_types.cpp:193
char_type
bitvector_typet char_type()
Definition: c_types.cpp:114
parse_precision
void parse_precision(std::string::const_iterator &it, format_tokent &curtok)
Definition: format_strings.cpp:64
format_tokent::flag_typet::SIGN
@ SIGN
format_tokent::token_typet::CHAR
@ CHAR
array_typet
Arrays with given size.
Definition: std_types.h:965
format_tokent::token_typet::UNKNOWN
@ UNKNOWN
format_tokent::length_modifierst::LEN_l
@ LEN_l
format_tokent::type
token_typet type
Definition: format_strings.h:78
format_tokent::representation
representationt representation
Definition: format_strings.h:83
get_type
optionalt< typet > get_type(const format_tokent &token)
Definition: format_strings.cpp:229
signed_long_int_type
signedbv_typet signed_long_int_type()
Definition: c_types.cpp:80
format_tokent
Definition: format_strings.h:22
format_tokent::token_typet::FLOAT
@ FLOAT
INVARIANT
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:424
format_tokent::token_typet::TEXT
@ TEXT
format_tokent::flag_typet::SIGNED_SPACE
@ SIGNED_SPACE
std_expr.h
API to expression classes.
c_types.h
format_tokent::flag_typet::ASTERISK
@ ASTERISK
format_tokent::length_modifierst::LEN_ll
@ LEN_ll