cprover
remove_instanceof.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Remove Instance-of Operators
4 
5 Author: Chris Smowton, chris.smowton@diffblue.com
6 
7 \*******************************************************************/
8 
11 
12 #include "remove_instanceof.h"
13 #include "java_utils.h"
14 
18 
20 
21 #include <util/arith_tools.h>
22 
23 #include <sstream>
24 
26 {
27 public:
36  {
37  }
38 
39  // Lower instanceof for a single function
40  bool lower_instanceof(const irep_idt &function_identifier, goto_programt &);
41 
42  // Lower instanceof for a single instruction
43  bool lower_instanceof(
44  const irep_idt &function_identifier,
45  goto_programt &,
47 
48 protected:
53 
54  bool lower_instanceof(
55  const irep_idt &function_identifier,
56  exprt &,
57  goto_programt &,
59 };
60 
71  const exprt &classid_field,
72  const irep_idt &target_type,
73  const class_hierarchyt &class_hierarchy)
74 {
75  std::vector<irep_idt> children =
76  class_hierarchy.get_children_trans(target_type);
77  children.push_back(target_type);
78  // Sort alphabetically to make order of generated disjuncts
79  // independent of class loading order
80  std::sort(
81  children.begin(), children.end(), [](const irep_idt &a, const irep_idt &b) {
82  return a.compare(b) < 0;
83  });
84 
85  exprt::operandst or_ops;
86  for(const auto &class_name : children)
87  {
88  constant_exprt class_expr(class_name, string_typet());
89  equal_exprt test(classid_field, class_expr);
90  or_ops.push_back(test);
91  }
92 
93  return disjunction(or_ops);
94 }
95 
105  const irep_idt &function_identifier,
106  exprt &expr,
107  goto_programt &goto_program,
108  goto_programt::targett this_inst)
109 {
110  if(expr.id()!=ID_java_instanceof)
111  {
112  bool changed = false;
113  Forall_operands(it, expr)
114  changed |=
115  lower_instanceof(function_identifier, *it, goto_program, this_inst);
116  return changed;
117  }
118 
119  INVARIANT(
120  expr.operands().size()==2,
121  "java_instanceof should have two operands");
122 
123  const exprt &check_ptr = to_binary_expr(expr).op0();
124  INVARIANT(
125  check_ptr.type().id()==ID_pointer,
126  "instanceof first operand should be a pointer");
127 
128  const exprt &target_arg = to_binary_expr(expr).op1();
129  INVARIANT(
130  target_arg.id()==ID_type,
131  "instanceof second operand should be a type");
132 
133  INVARIANT(
134  target_arg.type().id() == ID_struct_tag,
135  "instanceof second operand should have a simple type");
136 
137  const auto &target_type = to_struct_tag_type(target_arg.type());
138 
139  const auto underlying_type_and_dimension =
141 
142  bool target_type_is_reference_array =
143  underlying_type_and_dimension.second >= 1 &&
144  can_cast_type<java_reference_typet>(underlying_type_and_dimension.first);
145 
146  // If we're casting to a reference array type, check
147  // @class_identifier == "java::array[reference]" &&
148  // @array_dimension == target_array_dimension &&
149  // @array_element_type (subtype of) target_array_element_type
150  // Otherwise just check
151  // @class_identifier (subtype of) target_type
152 
153  // Exception: when the target type is Object, nothing needs checking; when
154  // it is Object[], Object[][] etc, then we check that
155  // @array_dimension >= target_array_dimension (because
156  // String[][] (subtype of) Object[] for example) and don't check the element
157  // type.
158 
159  // All tests are made directly against a pointer to the object whose type is
160  // queried. By operating directly on a pointer rather than using a temporary
161  // (x->@clsid == "A" rather than id = x->@clsid; id == "A") we enable symex's
162  // value-set filtering to discard no-longer-viable candidates for "x" (in the
163  // case where 'x' is a symbol, not a general expression like x->y->@clsid).
164  // This means there are more clean dereference operations (ones where there
165  // is no possibility of reading a null, invalid or incorrectly-typed object),
166  // and less pessimistic aliasing assumptions possibly causing goto-symex to
167  // explore in-fact-unreachable paths.
168 
169  // In all cases require the input pointer is not null for any cast to succeed:
170 
171  std::vector<exprt> test_conjuncts;
172  test_conjuncts.push_back(notequal_exprt(
173  check_ptr, null_pointer_exprt(to_pointer_type(check_ptr.type()))));
174 
175  auto jlo = to_struct_tag_type(java_lang_object_type().subtype());
176 
177  exprt object_class_identifier_field =
178  get_class_identifier_field(check_ptr, jlo, ns);
179 
180  if(target_type_is_reference_array)
181  {
182  const auto &underlying_type =
183  to_struct_tag_type(underlying_type_and_dimension.first.subtype());
184 
185  test_conjuncts.push_back(equal_exprt(
186  object_class_identifier_field,
188 
189  exprt object_array_dimension = get_array_dimension_field(check_ptr);
190  constant_exprt target_array_dimension = from_integer(
191  underlying_type_and_dimension.second, object_array_dimension.type());
192 
193  if(underlying_type == jlo)
194  {
195  test_conjuncts.push_back(binary_relation_exprt(
196  object_array_dimension, ID_ge, target_array_dimension));
197  }
198  else
199  {
200  test_conjuncts.push_back(
201  equal_exprt(object_array_dimension, target_array_dimension));
202  test_conjuncts.push_back(subtype_expr(
203  get_array_element_type_field(check_ptr),
204  underlying_type.get_identifier(),
205  class_hierarchy));
206  }
207  }
208  else if(target_type != jlo)
209  {
210  test_conjuncts.push_back(subtype_expr(
211  get_class_identifier_field(check_ptr, jlo, ns),
212  target_type.get_identifier(),
213  class_hierarchy));
214  }
215 
216  expr = conjunction(test_conjuncts);
217 
218  return true;
219 }
220 
221 static bool contains_instanceof(const exprt &e)
222 {
223  if(e.id() == ID_java_instanceof)
224  return true;
225 
226  for(const exprt &subexpr : e.operands())
227  {
228  if(contains_instanceof(subexpr))
229  return true;
230  }
231 
232  return false;
233 }
234 
243  const irep_idt &function_identifier,
244  goto_programt &goto_program,
245  goto_programt::targett target)
246 {
247  if(target->is_target() &&
248  (contains_instanceof(target->code) || contains_instanceof(target->guard)))
249  {
250  // If this is a branch target, add a skip beforehand so we can splice new
251  // GOTO programs before the target instruction without inserting into the
252  // wrong basic block.
253  goto_program.insert_before_swap(target);
254  target->turn_into_skip();
255  // Actually alter the now-moved instruction:
256  ++target;
257  }
258 
259  return lower_instanceof(
260  function_identifier, target->code, goto_program, target) |
262  function_identifier, target->guard, goto_program, target);
263 }
264 
272  const irep_idt &function_identifier,
273  goto_programt &goto_program)
274 {
275  bool changed=false;
276  for(goto_programt::instructionst::iterator target=
277  goto_program.instructions.begin();
278  target!=goto_program.instructions.end();
279  ++target)
280  {
281  changed =
282  lower_instanceof(function_identifier, goto_program, target) || changed;
283  }
284  if(!changed)
285  return false;
286  goto_program.update();
287  return true;
288 }
289 
300  const irep_idt &function_identifier,
301  goto_programt::targett target,
302  goto_programt &goto_program,
303  symbol_table_baset &symbol_table,
304  const class_hierarchyt &class_hierarchy,
305  message_handlert &message_handler)
306 {
307  remove_instanceoft rem(symbol_table, class_hierarchy, message_handler);
308  rem.lower_instanceof(function_identifier, goto_program, target);
309 }
310 
320  const irep_idt &function_identifier,
322  symbol_table_baset &symbol_table,
323  const class_hierarchyt &class_hierarchy,
324  message_handlert &message_handler)
325 {
326  remove_instanceoft rem(symbol_table, class_hierarchy, message_handler);
327  rem.lower_instanceof(function_identifier, function.body);
328 }
329 
338  goto_functionst &goto_functions,
339  symbol_table_baset &symbol_table,
340  const class_hierarchyt &class_hierarchy,
341  message_handlert &message_handler)
342 {
343  remove_instanceoft rem(symbol_table, class_hierarchy, message_handler);
344  bool changed=false;
345  for(auto &f : goto_functions.function_map)
346  changed = rem.lower_instanceof(f.first, f.second.body) || changed;
347  if(changed)
348  goto_functions.compute_location_numbers();
349 }
350 
360  goto_modelt &goto_model,
361  const class_hierarchyt &class_hierarchy,
362  message_handlert &message_handler)
363 {
365  goto_model.goto_functions,
366  goto_model.symbol_table,
367  class_hierarchy,
368  message_handler);
369 }
dstringt
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
can_cast_type< java_reference_typet >
bool can_cast_type< java_reference_typet >(const typet &type)
Definition: java_types.h:626
conjunction
exprt conjunction(const exprt::operandst &op)
1) generates a conjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:51
class_hierarchyt
Non-graph-based representation of the class hierarchy.
Definition: class_hierarchy.h:43
Forall_operands
#define Forall_operands(it, expr)
Definition: expr.h:24
arith_tools.h
get_array_dimension_field
exprt get_array_dimension_field(const exprt &pointer)
Definition: java_types.cpp:205
get_class_identifier_field
exprt get_class_identifier_field(const exprt &this_expr_in, const struct_tag_typet &suggested_type, const namespacet &ns)
Definition: class_identifier.cpp:56
goto_programt::update
void update()
Update all indices.
Definition: goto_program.cpp:541
remove_instanceoft::ns
namespacet ns
Definition: remove_instanceof.cpp:51
goto_functionst::compute_location_numbers
void compute_location_numbers()
Definition: goto_functions.cpp:18
exprt
Base class for all expressions.
Definition: expr.h:53
goto_modelt
Definition: goto_model.h:26
goto_convert.h
Program Transformation.
goto_functionst::function_map
function_mapt function_map
Definition: goto_functions.h:27
equal_exprt
Equality.
Definition: std_expr.h:1190
remove_instanceoft::class_hierarchy
const class_hierarchyt & class_hierarchy
Definition: remove_instanceof.cpp:50
notequal_exprt
Disequality.
Definition: std_expr.h:1248
remove_instanceof
void remove_instanceof(const irep_idt &function_identifier, goto_programt::targett target, goto_programt &goto_program, symbol_table_baset &symbol_table, const class_hierarchyt &class_hierarchy, message_handlert &message_handler)
Replace an instanceof in the expression or guard of the passed instruction of the given function body...
Definition: remove_instanceof.cpp:299
to_binary_expr
const binary_exprt & to_binary_expr(const exprt &expr)
Cast an exprt to a binary_exprt.
Definition: std_expr.h:678
namespacet
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:92
exprt::type
typet & type()
Return the type of the expression.
Definition: expr.h:81
disjunction
exprt disjunction(const exprt::operandst &op)
1) generates a disjunction for two or more operands 2) for one operand, returns the operand 3) return...
Definition: std_expr.cpp:29
null_pointer_exprt
The null pointer constant.
Definition: std_expr.h:3989
symbol_table_baset
The symbol table base class interface.
Definition: symbol_table_base.h:22
irept::id
const irep_idt & id() const
Definition: irep.h:418
message_handlert
Definition: message.h:28
to_struct_tag_type
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:515
exprt::operandst
std::vector< exprt > operandst
Definition: expr.h:55
remove_instanceoft
Definition: remove_instanceof.cpp:26
to_pointer_type
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Definition: std_types.h:1526
java_lang_object_type
java_reference_typet java_lang_object_type()
Definition: java_types.cpp:99
remove_instanceoft::symbol_table
symbol_table_baset & symbol_table
Definition: remove_instanceof.cpp:49
remove_instanceof.h
Remove Instance-of Operators.
goto_functionst::goto_functiont
::goto_functiont goto_functiont
Definition: goto_functions.h:25
goto_programt::instructions
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:585
goto_functionst
A collection of goto functions.
Definition: goto_functions.h:23
class_hierarchy.h
Class Hierarchy.
goto_modelt::goto_functions
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:33
remove_instanceoft::message_handler
message_handlert & message_handler
Definition: remove_instanceof.cpp:52
remove_instanceoft::remove_instanceoft
remove_instanceoft(symbol_table_baset &symbol_table, const class_hierarchyt &class_hierarchy, message_handlert &message_handler)
Definition: remove_instanceof.cpp:28
JAVA_REFERENCE_ARRAY_CLASSID
#define JAVA_REFERENCE_ARRAY_CLASSID
Definition: java_types.h:658
from_integer
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
string_typet
String type.
Definition: std_types.h:1655
binary_relation_exprt
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:725
contains_instanceof
static bool contains_instanceof(const exprt &e)
Definition: remove_instanceof.cpp:221
subtype_expr
static exprt subtype_expr(const exprt &classid_field, const irep_idt &target_type, const class_hierarchyt &class_hierarchy)
Produce an expression of the form classid_field == "A" || classid_field == "B" || ....
Definition: remove_instanceof.cpp:70
goto_programt
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
class_identifier.h
Extract class identifier.
java_array_dimension_and_element_type
std::pair< typet, std::size_t > java_array_dimension_and_element_type(const struct_tag_typet &type)
Returns the underlying element type and array dimensionality of Java struct type.
Definition: java_types.cpp:188
exprt::operands
operandst & operands()
Definition: expr.h:95
java_types.h
goto_programt::insert_before_swap
void insert_before_swap(targett target)
Insertion that preserves jumps to "target".
Definition: goto_program.h:606
get_array_element_type_field
exprt get_array_element_type_field(const exprt &pointer)
Definition: java_types.cpp:217
java_utils.h
constant_exprt
A constant literal expression.
Definition: std_expr.h:3906
binary_exprt::op1
exprt & op1()
Definition: expr.h:105
goto_modelt::symbol_table
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:30
goto_programt::targett
instructionst::iterator targett
Definition: goto_program.h:579
class_hierarchyt::get_children_trans
idst get_children_trans(const irep_idt &id) const
Definition: class_hierarchy.h:68
binary_exprt::op0
exprt & op0()
Definition: expr.h:102
remove_instanceoft::lower_instanceof
bool lower_instanceof(const irep_idt &function_identifier, goto_programt &)
Replace every instanceof in the passed function body with an explicit class-identifier test.
Definition: remove_instanceof.cpp:271
validation_modet::INVARIANT
@ INVARIANT