cprover
symex_dereference.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Symbolic Execution of ANSI-C
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "goto_symex.h"
13 
14 #include <util/arith_tools.h>
15 #include <util/byte_operators.h>
16 #include <util/c_types.h>
17 #include <util/exception_utils.h>
18 #include <util/expr_iterator.h>
19 #include <util/expr_util.h>
20 #include <util/invariant.h>
22 
24 
26 
41  const exprt &expr,
42  statet &state,
43  bool keep_array)
44 {
45  exprt result;
46 
47  if(expr.id()==ID_byte_extract_little_endian ||
48  expr.id()==ID_byte_extract_big_endian)
49  {
50  // address_of(byte_extract(op, offset, t)) is
51  // address_of(op) + offset with adjustments for arrays
52 
54 
55  // recursive call
56  result = address_arithmetic(be.op(), state, keep_array);
57 
58  if(be.op().type().id() == ID_array && result.id() == ID_address_of)
59  {
61 
62  // turn &a of type T[i][j] into &(a[0][0])
63  for(const typet *t = &(a.type().subtype());
64  t->id() == ID_array && expr.type() != *t;
65  t = &(t->subtype()))
67  }
68 
69  // do (expr.type() *)(((char *)op)+offset)
70  result=typecast_exprt(result, pointer_type(char_type()));
71 
72  // there could be further dereferencing in the offset
73  exprt offset=be.offset();
74  dereference_rec(offset, state, false);
75 
76  result=plus_exprt(result, offset);
77 
78  // treat &array as &array[0]
79  const typet &expr_type = expr.type();
80  typet dest_type_subtype;
81 
82  if(expr_type.id()==ID_array && !keep_array)
83  dest_type_subtype=expr_type.subtype();
84  else
85  dest_type_subtype=expr_type;
86 
87  result=typecast_exprt(result, pointer_type(dest_type_subtype));
88  }
89  else if(expr.id()==ID_index ||
90  expr.id()==ID_member)
91  {
93  ode.build(expr, ns);
94 
95  const byte_extract_exprt be(
96  byte_extract_id(), ode.root_object(), ode.offset(), expr.type());
97 
98  // recursive call
99  result = address_arithmetic(be, state, keep_array);
100 
101  do_simplify(result);
102  }
103  else if(expr.id()==ID_dereference)
104  {
105  // ANSI-C guarantees &*p == p no matter what p is,
106  // even if it's complete garbage
107  // just grab the pointer, but be wary of further dereferencing
108  // in the pointer itself
109  result=to_dereference_expr(expr).pointer();
110  dereference_rec(result, state, false);
111  }
112  else if(expr.id()==ID_if)
113  {
114  if_exprt if_expr=to_if_expr(expr);
115 
116  // the condition is not an address
117  dereference_rec(if_expr.cond(), state, false);
118 
119  // recursive call
120  if_expr.true_case() =
121  address_arithmetic(if_expr.true_case(), state, keep_array);
122  if_expr.false_case() =
123  address_arithmetic(if_expr.false_case(), state, keep_array);
124 
125  result=if_expr;
126  }
127  else if(expr.id()==ID_symbol ||
128  expr.id()==ID_string_constant ||
129  expr.id()==ID_label ||
130  expr.id()==ID_array)
131  {
132  // give up, just dereference
133  result=expr;
134  dereference_rec(result, state, false);
135 
136  // turn &array into &array[0]
137  if(result.type().id() == ID_array && !keep_array)
138  result=index_exprt(result, from_integer(0, index_type()));
139 
140  // handle field-sensitive SSA symbol
141  mp_integer offset=0;
142  if(expr.id()==ID_symbol &&
143  expr.get_bool(ID_C_SSA_symbol))
144  {
145  auto offset_opt = compute_pointer_offset(expr, ns);
146  PRECONDITION(offset_opt.has_value());
147  offset = *offset_opt;
148  }
149 
150  if(offset>0)
151  {
152  const byte_extract_exprt be(
153  byte_extract_id(),
154  to_ssa_expr(expr).get_l1_object(),
155  from_integer(offset, index_type()),
156  expr.type());
157 
158  result = address_arithmetic(be, state, keep_array);
159 
160  do_simplify(result);
161  }
162  else
163  result=address_of_exprt(result);
164  }
165  else if(expr.id() == ID_typecast)
166  {
167  const typecast_exprt &tc_expr = to_typecast_expr(expr);
168 
169  result = address_arithmetic(tc_expr.op(), state, keep_array);
170 
171  // treat &array as &array[0]
172  const typet &expr_type = expr.type();
173  typet dest_type_subtype;
174 
175  if(expr_type.id() == ID_array && !keep_array)
176  dest_type_subtype = expr_type.subtype();
177  else
178  dest_type_subtype = expr_type;
179 
180  result = typecast_exprt(result, pointer_type(dest_type_subtype));
181  }
182  else
184  "goto_symext::address_arithmetic does not handle " + expr.id_string());
185 
186  const typet &expr_type = expr.type();
187  INVARIANT(
188  (expr_type.id() == ID_array && !keep_array) ||
189  pointer_type(expr_type) == result.type(),
190  "either non-persistent array or pointer to result");
191 
192  return result;
193 }
194 
202 void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
203 {
204  if(expr.id()==ID_dereference)
205  {
206  bool expr_is_not_null = false;
207 
208  if(state.threads.size() == 1)
209  {
210  const irep_idt &expr_function = state.source.function_id;
211  if(!expr_function.empty())
212  {
213  const dereference_exprt to_check =
215 
216  expr_is_not_null = path_storage.safe_pointers.at(expr_function)
217  .is_safe_dereference(to_check, state.source.pc);
218  }
219  }
220 
221  exprt tmp1;
222  tmp1.swap(to_dereference_expr(expr).pointer());
223 
224  // first make sure there are no dereferences in there
225  dereference_rec(tmp1, state, false);
226 
227  // Depending on the nature of the pointer expression, the recursive deref
228  // operation might have introduced a construct such as
229  // (x == &o1 ? o1 : o2).field, in which case we should simplify to push the
230  // member operator inside the if, then apply field-sensitivity to yield
231  // (x == &o1 ? o1..field : o2..field). value_set_dereferencet can then
232  // apply the dereference operation to each of o1..field and o2..field
233  // independently, as it special-cases the ternary conditional operator.
234  // There may also be index operators in tmp1 which can now be resolved to
235  // constant array cell references, so we replace symbols with constants
236  // first, hoping for a transformation such as
237  // (x == &o1 ? o1 : o2)[idx] =>
238  // (x == &o1 ? o1 : o2)[2] =>
239  // (x == &o1 ? o1[[2]] : o2[[2]])
240  // Note we don't L2 rename non-constant symbols at this point, because the
241  // value-set works in terms of L1 names and we don't want to ask it to
242  // dereference an L2 pointer, which it would not have an entry for.
243 
244  tmp1 = state.rename<L1_WITH_CONSTANT_PROPAGATION>(tmp1, ns).get();
245 
246  do_simplify(tmp1);
247 
249  {
250  // make sure simplify has not re-introduced any dereferencing that
251  // had previously been cleaned away
252  INVARIANT(
253  !has_subexpr(tmp1, ID_dereference),
254  "simplify re-introduced dereferencing");
255  }
256 
257  tmp1 = state.field_sensitivity.apply(ns, state, std::move(tmp1), false);
258 
259  // we need to set up some elaborate call-backs
260  symex_dereference_statet symex_dereference_state(state, ns);
261 
263  ns,
264  state.symbol_table,
265  symex_dereference_state,
267  expr_is_not_null);
268 
269  // std::cout << "**** " << format(tmp1) << '\n';
270  exprt tmp2 = dereference.dereference(tmp1);
271  // std::cout << "**** " << format(tmp2) << '\n';
272 
273  expr.swap(tmp2);
274 
275  // this may yield a new auto-object
276  trigger_auto_object(expr, state);
277  }
278  else if(
279  expr.id() == ID_index && to_index_expr(expr).array().id() == ID_member &&
280  to_array_type(to_index_expr(expr).array().type()).size().is_zero())
281  {
282  // This is an expression of the form x.a[i],
283  // where a is a zero-sized array. This gets
284  // re-written into *(&x.a+i)
285 
286  index_exprt index_expr=to_index_expr(expr);
287 
288  address_of_exprt address_of_expr(index_expr.array());
289  address_of_expr.type()=pointer_type(expr.type());
290 
291  dereference_exprt tmp{plus_exprt{address_of_expr, index_expr.index()}};
293 
294  // recursive call
295  dereference_rec(tmp, state, write);
296 
297  expr.swap(tmp);
298  }
299  else if(expr.id()==ID_index &&
300  to_index_expr(expr).array().type().id()==ID_pointer)
301  {
302  // old stuff, will go away
303  UNREACHABLE;
304  }
305  else if(expr.id()==ID_address_of)
306  {
307  address_of_exprt &address_of_expr=to_address_of_expr(expr);
308 
309  exprt &object=address_of_expr.object();
310 
311  expr = address_arithmetic(
312  object,
313  state,
314  to_pointer_type(expr.type()).subtype().id() == ID_array);
315  }
316  else if(expr.id()==ID_typecast)
317  {
318  exprt &tc_op=to_typecast_expr(expr).op();
319 
320  // turn &array into &array[0] when casting to pointer-to-element-type
321  if(
322  tc_op.id() == ID_address_of &&
323  to_address_of_expr(tc_op).object().type().id() == ID_array &&
324  expr.type() ==
325  pointer_type(to_address_of_expr(tc_op).object().type().subtype()))
326  {
327  expr=
329  index_exprt(
330  to_address_of_expr(tc_op).object(),
331  from_integer(0, index_type())));
332 
333  dereference_rec(expr, state, write);
334  }
335  else
336  {
337  dereference_rec(tc_op, state, write);
338  }
339  }
340  else
341  {
342  Forall_operands(it, expr)
343  dereference_rec(*it, state, write);
344  }
345 }
346 
347 static exprt
348 apply_to_objects_in_dereference(exprt e, const std::function<exprt(exprt)> &f)
349 {
350  if(auto deref = expr_try_dynamic_cast<dereference_exprt>(e))
351  {
352  deref->op() = f(std::move(deref->op()));
353  return *deref;
354  }
355 
356  for(auto &sub : e.operands())
357  sub = apply_to_objects_in_dereference(std::move(sub), f);
358  return e;
359 }
360 
398 void goto_symext::dereference(exprt &expr, statet &state, bool write)
399 {
400  PRECONDITION(!state.call_stack().empty());
401 
402  // Symbols whose address is taken need to be renamed to level 1
403  // in order to distinguish addresses of local variables
404  // from different frames.
405  expr = apply_to_objects_in_dereference(std::move(expr), [&](exprt e) {
406  return state.field_sensitivity.apply(
407  ns, state, state.rename<L1>(std::move(e), ns).get(), false);
408  });
409 
410  // start the recursion!
411  dereference_rec(expr, state, write);
412  // dereferencing may introduce new symbol_exprt
413  // (like __CPROVER_memory)
414  expr = state.rename<L1>(std::move(expr), ns).get();
415 
416  // Dereferencing is likely to introduce new member-of-if constructs --
417  // for example, "x->field" may have become "(x == &o1 ? o1 : o2).field."
418  // Run expression simplification, which converts that to
419  // (x == &o1 ? o1.field : o2.field))
420  // before applying field sensitivity. Field sensitivity can then turn such
421  // field-of-symbol expressions into atomic SSA expressions instead of having
422  // to rewrite all of 'o1' otherwise.
423  // Even without field sensitivity this can be beneficial: for example,
424  // "(b ? s1 : s2).member := X" results in
425  // (b ? s1 : s2) := (b ? s1 : s2) with (member := X)
426  // and then
427  // s1 := b ? ((b ? s1 : s2) with (member := X)) : s1
428  // when all we need is
429  // s1 := s1 with (member := X) [and guard b]
430  // s2 := s2 with (member := X) [and guard !b]
431  do_simplify(expr);
432 
434  {
435  // make sure simplify has not re-introduced any dereferencing that
436  // had previously been cleaned away
437  INVARIANT(
438  !has_subexpr(expr, ID_dereference),
439  "simplify re-introduced dereferencing");
440  }
441 
442  expr = state.field_sensitivity.apply(ns, state, std::move(expr), write);
443 }
UNREACHABLE
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:504
exception_utils.h
dstringt
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
pointer_offset_size.h
Pointer Logic.
goto_symext::dereference_rec
void dereference_rec(exprt &, statet &, bool write)
If expr is a dereference_exprt, replace it with explicit references to the objects it may point to.
Definition: symex_dereference.cpp:202
typet::subtype
const typet & subtype() const
Definition: type.h:47
has_subexpr
bool has_subexpr(const exprt &expr, const std::function< bool(const exprt &)> &pred)
returns true if the expression has a subexpression that satisfies pred
Definition: expr_util.cpp:139
Forall_operands
#define Forall_operands(it, expr)
Definition: expr.h:24
arith_tools.h
object_descriptor_exprt::build
void build(const exprt &expr, const namespacet &ns)
Given an expression expr, attempt to find the underlying object it represents by skipping over type c...
Definition: std_expr.cpp:141
address_of_exprt::object
exprt & object()
Definition: std_expr.h:2795
L1
@ L1
Definition: renamed.h:18
typet
The type of an expression, extends irept.
Definition: type.h:29
goto_symext::address_arithmetic
exprt address_arithmetic(const exprt &, statet &, bool keep_array)
Transforms an lvalue expression by replacing any dereference operations it contains with explicit ref...
Definition: symex_dereference.cpp:40
symex_targett::sourcet::pc
goto_programt::const_targett pc
Definition: symex_target.h:43
to_byte_extract_expr
const byte_extract_exprt & to_byte_extract_expr(const exprt &expr)
Definition: byte_operators.h:57
to_index_expr
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1347
to_if_expr
const if_exprt & to_if_expr(const exprt &expr)
Cast an exprt to an if_exprt.
Definition: std_expr.h:3029
value_set_dereference.h
Pointer Dereferencing.
dereference_exprt
Operator to dereference a pointer.
Definition: std_expr.h:2888
goto_symext::path_storage
path_storaget & path_storage
Symbolic execution paths to be resumed later.
Definition: goto_symex.h:796
goto_symex_statet
Central data structure: state.
Definition: goto_symex_state.h:46
unsupported_operation_exceptiont
Thrown when we encounter an instruction, parameters to an instruction etc.
Definition: exception_utils.h:144
mp_integer
BigInt mp_integer
Definition: mp_arith.h:19
if_exprt
The trinary if-then-else operator.
Definition: std_expr.h:2964
object_descriptor_exprt
Split an expression into a base object and a (byte) offset.
Definition: std_expr.h:1832
invariant.h
plus_exprt
The plus expression Associativity is not specified.
Definition: std_expr.h:881
exprt
Base class for all expressions.
Definition: expr.h:53
goto_symex_statet::source
symex_targett::sourcet source
Definition: goto_symex_state.h:64
goto_symext::trigger_auto_object
void trigger_auto_object(const exprt &, statet &)
Definition: auto_objects.cpp:78
symex_dereference_state.h
Symbolic Execution of ANSI-C.
field_sensitivityt::apply
exprt apply(const namespacet &ns, goto_symex_statet &state, exprt expr, bool write) const
Turn an expression expr into a field-sensitive SSA expression.
Definition: field_sensitivity.cpp:21
index_type
bitvector_typet index_type()
Definition: c_types.cpp:16
if_exprt::false_case
exprt & false_case()
Definition: std_expr.h:3001
compute_pointer_offset
optionalt< mp_integer > compute_pointer_offset(const exprt &expr, const namespacet &ns)
Definition: pointer_offset_size.cpp:507
byte_extract_id
irep_idt byte_extract_id()
Definition: byte_operators.cpp:13
exprt::type
typet & type()
Return the type of the expression.
Definition: expr.h:81
irept::get_bool
bool get_bool(const irep_namet &name) const
Definition: irep.cpp:64
goto_symex_statet::threads
std::vector< threadt > threads
Definition: goto_symex_state.h:198
byte_operators.h
Expression classes for byte-level operators.
goto_symex_statet::call_stack
call_stackt & call_stack()
Definition: goto_symex_state.h:147
to_ssa_expr
const ssa_exprt & to_ssa_expr(const exprt &expr)
Cast a generic exprt to an ssa_exprt.
Definition: ssa_expr.h:148
goto_symext::dereference
virtual void dereference(exprt &, statet &, bool write)
Replace all dereference operations within expr with explicit references to the objects they may refer...
Definition: symex_dereference.cpp:398
to_address_of_expr
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
Definition: std_expr.h:2823
goto_symex_statet::symbol_table
symbol_tablet symbol_table
contains symbols that are minted during symbolic execution, such as dynamically created objects etc.
Definition: goto_symex_state.h:69
byte_extract_exprt::op
exprt & op()
Definition: byte_operators.h:43
PRECONDITION
#define PRECONDITION(CONDITION)
Definition: invariant.h:464
dereference_exprt::pointer
exprt & pointer()
Definition: std_expr.h:2901
goto_symex.h
Symbolic Execution.
symex_dereference_statet
Callback object that goto_symext::dereference_rec provides to value_set_dereferencet to provide value...
Definition: symex_dereference_state.h:26
irept::id_string
const std::string & id_string() const
Definition: irep.h:421
index_exprt::index
exprt & index()
Definition: std_expr.h:1319
pointer_type
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:243
index_exprt::array
exprt & array()
Definition: std_expr.h:1309
irept::swap
void swap(irept &irep)
Definition: irep.h:463
get_original_name
exprt get_original_name(exprt expr)
Undo all levels of renaming.
Definition: renaming_level.cpp:169
irept::id
const irep_idt & id() const
Definition: irep.h:418
dstringt::empty
bool empty() const
Definition: dstring.h:88
unary_exprt::op
const exprt & op() const
Definition: std_expr.h:281
to_pointer_type
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: std_types.h:1526
object_descriptor_exprt::root_object
const exprt & root_object() const
Definition: std_expr.cpp:218
L1_WITH_CONSTANT_PROPAGATION
@ L1_WITH_CONSTANT_PROPAGATION
Definition: renamed.h:19
goto_symext::do_simplify
virtual void do_simplify(exprt &expr)
Definition: goto_symex.cpp:32
goto_symex_statet::field_sensitivity
field_sensitivityt field_sensitivity
Definition: goto_symex_state.h:124
char_type
bitvector_typet char_type()
Definition: c_types.cpp:114
byte_extract_exprt::offset
exprt & offset()
Definition: byte_operators.h:44
value_set_dereferencet
Wrapper for a function dereferencing pointer expressions using a value set.
Definition: value_set_dereference.h:27
goto_symext::ns
namespacet ns
Initialized just before symbolic execution begins, to point to both outer_symbol_table and the symbol...
Definition: goto_symex.h:256
expr_util.h
Deprecated expression utility functions.
to_dereference_expr
const dereference_exprt & to_dereference_expr(const exprt &expr)
Cast an exprt to a dereference_exprt.
Definition: std_expr.h:2944
apply_to_objects_in_dereference
static exprt apply_to_objects_in_dereference(exprt e, const std::function< exprt(exprt)> &f)
Definition: symex_dereference.cpp:348
expr_iterator.h
Forward depth-first search iterators These iterators' copy operations are expensive,...
if_exprt::true_case
exprt & true_case()
Definition: std_expr.h:2991
goto_symext::language_mode
irep_idt language_mode
language_mode: ID_java, ID_C or another language identifier if we know the source language in use,...
Definition: goto_symex.h:239
path_storaget::safe_pointers
std::unordered_map< irep_idt, local_safe_pointerst > safe_pointers
Map function identifiers to local_safe_pointerst instances.
Definition: path_storage.h:100
from_integer
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
to_typecast_expr
const typecast_exprt & to_typecast_expr(const exprt &expr)
Cast an exprt to a typecast_exprt.
Definition: std_expr.h:2047
if_exprt::cond
exprt & cond()
Definition: std_expr.h:2981
goto_symext::symex_config
const symex_configt symex_config
The configuration to use for this symbolic execution.
Definition: goto_symex.h:183
byte_extract_exprt
Expression of type type extracted from some object op starting at position offset (given in number of...
Definition: byte_operators.h:29
to_array_type
const array_typet & to_array_type(const typet &type)
Cast a typet to an array_typet.
Definition: std_types.h:1011
goto_symex_statet::rename
NODISCARD renamedt< exprt, level > rename(exprt expr, const namespacet &ns)
Rewrites symbol expressions in exprt, applying a suffix to each symbol reflecting its most recent ver...
exprt::operands
operandst & operands()
Definition: expr.h:95
index_exprt
Array index operator.
Definition: std_expr.h:1293
address_of_exprt
Operator to return the address of an object.
Definition: std_expr.h:2786
exprt::add_source_location
source_locationt & add_source_location()
Definition: expr.h:259
symex_configt::run_validation_checks
bool run_validation_checks
Should the additional validation checks be run? If this flag is set the checks for renaming (both lev...
Definition: symex_config.h:44
typecast_exprt
Semantic type conversion.
Definition: std_expr.h:2013
symex_targett::sourcet::function_id
irep_idt function_id
Definition: symex_target.h:40
exprt::source_location
const source_locationt & source_location() const
Definition: expr.h:254
c_types.h
validation_modet::INVARIANT
@ INVARIANT
object_descriptor_exprt::offset
exprt & offset()
Definition: std_expr.h:1870