cprover
remove_java_new.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Remove Java New Operators
4 
5 Author: Peter Schrammel
6 
7 \*******************************************************************/
8 
11 
12 #include "remove_java_new.h"
13 
16 
17 #include "java_utils.h"
18 #include <util/arith_tools.h>
19 #include <util/c_types.h>
20 #include <util/expr_cast.h>
21 #include <util/expr_initializer.h>
22 #include <util/message.h>
24 
25 class remove_java_newt : public messaget
26 {
27 public:
30  message_handlert &_message_handler)
31  : messaget(_message_handler), symbol_table(symbol_table), ns(symbol_table)
32  {
33  }
34 
35  // Lower java_new for a single function
36  bool lower_java_new(const irep_idt &function_identifier, goto_programt &);
37 
38  // Lower java_new for a single instruction
40  const irep_idt &function_identifier,
41  goto_programt &,
43 
44 protected:
47 
49  const irep_idt &function_identifier,
50  const exprt &lhs,
51  const side_effect_exprt &rhs,
52  goto_programt &,
54 
56  const irep_idt &function_identifier,
57  const exprt &lhs,
58  const side_effect_exprt &rhs,
59  goto_programt &,
61 };
62 
76  const irep_idt &function_identifier,
77  const exprt &lhs,
78  const side_effect_exprt &rhs,
79  goto_programt &dest,
81 {
82  PRECONDITION(!lhs.is_nil());
83  PRECONDITION(rhs.operands().empty());
84  PRECONDITION(rhs.type().id() == ID_pointer);
85  source_locationt location = rhs.source_location();
86  typet object_type = rhs.type().subtype();
87 
88  // build size expression
89  const auto object_size = size_of_expr(object_type, ns);
90  CHECK_RETURN(object_size.has_value());
91 
92  // we produce a malloc side-effect, which stays
93  side_effect_exprt malloc_expr(ID_allocate, rhs.type(), location);
94  malloc_expr.copy_to_operands(*object_size);
95  // could use true and get rid of the code below
96  malloc_expr.copy_to_operands(false_exprt());
97  *target =
98  goto_programt::make_assignment(code_assignt(lhs, malloc_expr), location);
99 
100  // zero-initialize the object
101  dereference_exprt deref(lhs, object_type);
102  auto zero_object = zero_initializer(object_type, location, ns);
103  CHECK_RETURN(zero_object.has_value());
105  to_struct_expr(*zero_object), ns, to_struct_tag_type(object_type));
106  return dest.insert_after(
107  target,
109  code_assignt(deref, *zero_object), location));
110 }
111 
125  const irep_idt &function_identifier,
126  const exprt &lhs,
127  const side_effect_exprt &rhs,
128  goto_programt &dest,
129  goto_programt::targett target)
130 {
131  // lower_java_new_array without lhs not implemented
132  PRECONDITION(!lhs.is_nil());
133  PRECONDITION(rhs.operands().size() >= 1); // one per dimension
134  PRECONDITION(rhs.type().id() == ID_pointer);
135 
136  source_locationt location = rhs.source_location();
137  struct_tag_typet object_type = to_struct_tag_type(rhs.type().subtype());
138  PRECONDITION(ns.follow(object_type).id() == ID_struct);
139 
140  // build size expression
141  const auto object_size = size_of_expr(object_type, ns);
142  CHECK_RETURN(object_size.has_value());
143 
144  // we produce a malloc side-effect, which stays
145  side_effect_exprt malloc_expr(ID_allocate, rhs.type(), location);
146  malloc_expr.copy_to_operands(*object_size);
147  // code use true and get rid of the code below
148  malloc_expr.copy_to_operands(false_exprt());
149 
150  *target =
151  goto_programt::make_assignment(code_assignt(lhs, malloc_expr), location);
152  goto_programt::targett next = std::next(target);
153 
154  const struct_typet &struct_type = to_struct_type(ns.follow(object_type));
155 
156  PRECONDITION(is_valid_java_array(struct_type));
157 
158  // Init base class:
159  dereference_exprt deref(lhs, object_type);
160  auto zero_object = zero_initializer(object_type, location, ns);
161  CHECK_RETURN(zero_object.has_value());
162  set_class_identifier(to_struct_expr(*zero_object), ns, object_type);
163  dest.insert_before(
164  next,
166  code_assignt(deref, *zero_object), location));
167 
168  // If it's a reference array we need to set the dimension and element type
169  // fields. Primitive array types don't have these fields; if the element type
170  // is a void pointer then the element type is statically unknown and the
171  // caller must set these up itself. This happens in array[reference].clone(),
172  // where the type info must be copied over from the input array)
173  const auto underlying_type_and_dimension =
175 
176  bool target_type_is_reference_array =
177  underlying_type_and_dimension.second >= 1 &&
178  can_cast_type<java_reference_typet>(underlying_type_and_dimension.first);
179 
180  if(target_type_is_reference_array)
181  {
182  exprt object_array_dimension = get_array_dimension_field(lhs);
183  dest.insert_before(
184  next,
186  object_array_dimension,
187  from_integer(
188  underlying_type_and_dimension.second, object_array_dimension.type()),
189  location)));
190 
191  exprt object_array_element_type = get_array_element_type_field(lhs);
192  dest.insert_before(
193  next,
195  object_array_element_type,
197  to_struct_tag_type(underlying_type_and_dimension.first.subtype())
198  .get_identifier(),
199  string_typet()),
200  location)));
201  }
202 
203  // if it's an array, we need to set the length field
204  member_exprt length(
205  deref,
206  struct_type.components()[1].get_name(),
207  struct_type.components()[1].type());
208  dest.insert_before(
209  next,
211  code_assignt(length, to_multi_ary_expr(rhs).op0()), location));
212 
213  // we also need to allocate space for the data
215  deref,
216  struct_type.components()[2].get_name(),
217  struct_type.components()[2].type());
218 
219  // Allocate a (struct realtype**) instead of a (void**) if possible.
220  const irept &given_element_type = object_type.find(ID_element_type);
221  typet allocate_data_type;
222  if(given_element_type.is_not_nil())
223  {
224  allocate_data_type =
225  pointer_type(static_cast<const typet &>(given_element_type));
226  }
227  else
228  allocate_data_type = data.type();
229 
230  side_effect_exprt data_java_new_expr(
231  ID_java_new_array_data, allocate_data_type, location);
232 
233  // The instruction may specify a (hopefully small) upper bound on the
234  // array size, in which case we allocate a fixed-length array that may
235  // be larger than the `length` member rather than use a true variable-
236  // length array, which produces a more complex formula in the current
237  // backend.
238  const irept size_bound = rhs.find(ID_length_upper_bound);
239  if(size_bound.is_nil())
240  data_java_new_expr.set(ID_size, to_multi_ary_expr(rhs).op0());
241  else
242  data_java_new_expr.set(ID_size, size_bound);
243 
244  // Must directly assign the new array to a temporary
245  // because goto-symex will notice `x=side_effect_exprt` but not
246  // `x=typecast_exprt(side_effect_exprt(...))`
247  symbol_exprt new_array_data_symbol = fresh_java_symbol(
248  data_java_new_expr.type(),
249  "tmp_new_data_array",
250  location,
251  function_identifier,
252  symbol_table)
253  .symbol_expr();
254  code_declt array_decl(new_array_data_symbol);
255  array_decl.add_source_location() = location;
256  dest.insert_before(next, goto_programt::make_decl(array_decl, location));
257  dest.insert_before(
258  next,
260  code_assignt(new_array_data_symbol, data_java_new_expr), location));
261 
262  exprt cast_java_new = new_array_data_symbol;
263  if(cast_java_new.type() != data.type())
264  cast_java_new = typecast_exprt(cast_java_new, data.type());
265  dest.insert_before(
266  next,
268  code_assignt(data, cast_java_new), location));
269 
270  // zero-initialize the data
271  if(!rhs.get_bool(ID_skip_initialize))
272  {
273  const auto zero_element =
274  zero_initializer(data.type().subtype(), location, ns);
275  CHECK_RETURN(zero_element.has_value());
276  codet array_set(ID_array_set);
277  array_set.copy_to_operands(new_array_data_symbol, *zero_element);
278  dest.insert_before(next, goto_programt::make_other(array_set, location));
279  }
280 
281  // multi-dimensional?
282 
283  if(rhs.operands().size() >= 2)
284  {
285  // produce
286  // for(int i=0; i<size; i++) tmp[i]=java_new(dim-1);
287  // This will be converted recursively.
288 
289  goto_programt tmp;
290 
291  symbol_exprt tmp_i =
293  length.type(), "tmp_index", location, function_identifier, symbol_table)
294  .symbol_expr();
295  code_declt decl(tmp_i);
296  decl.add_source_location() = location;
297  tmp.insert_before(
298  tmp.instructions.begin(), goto_programt::make_decl(decl, location));
299 
300  side_effect_exprt sub_java_new = rhs;
301  sub_java_new.operands().erase(sub_java_new.operands().begin());
302 
303  // we already know that rhs has pointer type
304  typet sub_type =
305  static_cast<const typet &>(rhs.type().subtype().find(ID_element_type));
306  CHECK_RETURN(sub_type.id() == ID_pointer);
307  sub_java_new.type() = sub_type;
308 
309  plus_exprt(tmp_i, from_integer(1, tmp_i.type()));
310  dereference_exprt deref_expr(
311  plus_exprt(data, tmp_i), data.type().subtype());
312 
313  code_blockt for_body;
314  symbol_exprt init_sym =
316  sub_type, "subarray_init", location, function_identifier, symbol_table)
317  .symbol_expr();
318  code_declt init_decl(init_sym);
319  init_decl.add_source_location() = location;
320  for_body.add(std::move(init_decl));
321 
322  code_assignt init_subarray(init_sym, sub_java_new);
323  for_body.add(std::move(init_subarray));
324  code_assignt assign_subarray(
325  deref_expr, typecast_exprt(init_sym, deref_expr.type()));
326  for_body.add(std::move(assign_subarray));
327 
328  const code_fort for_loop = code_fort::from_index_bounds(
329  from_integer(0, tmp_i.type()),
330  to_multi_ary_expr(rhs).op0(),
331  tmp_i,
332  std::move(for_body),
333  location);
334 
335  goto_convert(for_loop, symbol_table, tmp, get_message_handler(), ID_java);
336 
337  // lower new side effects recursively
338  lower_java_new(function_identifier, tmp);
339 
340  dest.destructive_insert(next, tmp);
341  }
342 
343  return std::prev(next);
344 }
345 
353  const irep_idt &function_identifier,
354  goto_programt &goto_program,
355  goto_programt::targett target)
356 {
357  if(!target->is_assign())
358  return target;
359 
360  if(as_const(*target).get_assign().rhs().id() == ID_side_effect)
361  {
362  // we make a copy, as we intend to destroy the assignment
363  // inside lower_java_new and lower_java_new_array
364  const auto code_assign = target->get_assign();
365 
366  const exprt &lhs = code_assign.lhs();
367  const exprt &rhs = code_assign.rhs();
368 
369  const auto &side_effect_expr = to_side_effect_expr(rhs);
370  const auto &statement = side_effect_expr.get_statement();
371 
372  if(statement == ID_java_new)
373  {
374  return lower_java_new(
375  function_identifier, lhs, side_effect_expr, goto_program, target);
376  }
377  else if(statement == ID_java_new_array)
378  {
379  return lower_java_new_array(
380  function_identifier, lhs, side_effect_expr, goto_program, target);
381  }
382  }
383 
384  return target;
385 }
386 
394  const irep_idt &function_identifier,
395  goto_programt &goto_program)
396 {
397  bool changed = false;
398  for(goto_programt::instructionst::iterator target =
399  goto_program.instructions.begin();
400  target != goto_program.instructions.end();
401  ++target)
402  {
403  goto_programt::targett new_target =
404  lower_java_new(function_identifier, goto_program, target);
405  changed = changed || new_target == target;
406  target = new_target;
407  }
408  if(!changed)
409  return false;
410  goto_program.update();
411  return true;
412 }
413 
423  const irep_idt &function_identifier,
424  goto_programt::targett target,
425  goto_programt &goto_program,
426  symbol_table_baset &symbol_table,
427  message_handlert &message_handler)
428 {
429  remove_java_newt rem(symbol_table, message_handler);
430  rem.lower_java_new(function_identifier, goto_program, target);
431 }
432 
441  const irep_idt &function_identifier,
443  symbol_table_baset &symbol_table,
444  message_handlert &message_handler)
445 {
446  remove_java_newt rem(symbol_table, message_handler);
447  rem.lower_java_new(function_identifier, function.body);
448 }
449 
457  goto_functionst &goto_functions,
458  symbol_table_baset &symbol_table,
459  message_handlert &message_handler)
460 {
461  remove_java_newt rem(symbol_table, message_handler);
462  bool changed = false;
463  for(auto &f : goto_functions.function_map)
464  changed = rem.lower_java_new(f.first, f.second.body) || changed;
465  if(changed)
466  goto_functions.compute_location_numbers();
467 }
468 
475 void remove_java_new(goto_modelt &goto_model, message_handlert &message_handler)
476 {
478  goto_model.goto_functions, goto_model.symbol_table, message_handler);
479 }
messaget
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:155
remove_java_newt::remove_java_newt
remove_java_newt(symbol_table_baset &symbol_table, message_handlert &_message_handler)
Definition: remove_java_new.cpp:28
exprt::copy_to_operands
void copy_to_operands(const exprt &expr)
Copy the given argument to the end of exprt's operands.
Definition: expr.h:150
tag_typet::get_identifier
const irep_idt & get_identifier() const
Definition: std_types.h:451
struct_union_typet::components
const componentst & components() const
Definition: std_types.h:142
dstringt
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
pointer_offset_size.h
Pointer Logic.
can_cast_type< java_reference_typet >
bool can_cast_type< java_reference_typet >(const typet &type)
Definition: java_types.h:626
code_blockt
A codet representing sequential composition of program statements.
Definition: std_code.h:170
typet::subtype
const typet & subtype() const
Definition: type.h:47
arith_tools.h
remove_java_newt::lower_java_new
bool lower_java_new(const irep_idt &function_identifier, goto_programt &)
Replace every java_new or java_new_array by a malloc side-effect and zero initialization.
Definition: remove_java_new.cpp:393
to_struct_type
const struct_typet & to_struct_type(const typet &type)
Cast a typet to a struct_typet.
Definition: std_types.h:303
get_array_dimension_field
exprt get_array_dimension_field(const exprt &pointer)
Definition: java_types.cpp:205
code_fort
codet representation of a for statement.
Definition: std_code.h:1020
CHECK_RETURN
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:496
typet
The type of an expression, extends irept.
Definition: type.h:29
dereference_exprt
Operator to dereference a pointer.
Definition: std_expr.h:2888
data
Definition: kdev_t.h:24
irept::find
const irept & find(const irep_namet &name) const
Definition: irep.cpp:103
goto_programt::update
void update()
Update all indices.
Definition: goto_program.cpp:541
code_declt
A codet representing the declaration of a local variable.
Definition: std_code.h:402
plus_exprt
The plus expression Associativity is not specified.
Definition: std_expr.h:881
goto_functionst::compute_location_numbers
void compute_location_numbers()
Definition: goto_functions.cpp:18
is_valid_java_array
bool is_valid_java_array(const struct_typet &type)
Programmatic documentation of the structure of a Java array (of either primitives or references) type...
Definition: java_types.cpp:834
exprt
Base class for all expressions.
Definition: expr.h:53
goto_modelt
Definition: goto_model.h:26
struct_tag_typet
A struct tag type, i.e., struct_typet with an identifier.
Definition: std_types.h:490
to_side_effect_expr
side_effect_exprt & to_side_effect_expr(exprt &expr)
Definition: std_code.h:1931
goto_convert.h
Program Transformation.
goto_functionst::function_map
function_mapt function_map
Definition: goto_functions.h:27
symbol_exprt
Expression to hold a symbol (variable)
Definition: std_expr.h:82
goto_programt::make_decl
static instructiont make_decl(const symbol_exprt &symbol, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:920
remove_java_newt::symbol_table
symbol_table_baset & symbol_table
Definition: remove_java_new.cpp:45
goto_programt::make_assignment
static instructiont make_assignment(const code_assignt &_code, const source_locationt &l=source_locationt::nil())
Create an assignment instruction.
Definition: goto_program.h:1024
zero_initializer
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Definition: expr_initializer.cpp:318
goto_convert
void goto_convert(const codet &code, symbol_table_baset &symbol_table, goto_programt &dest, message_handlert &message_handler, const irep_idt &mode)
Definition: goto_convert.cpp:1846
namespacet
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:92
exprt::type
typet & type()
Return the type of the expression.
Definition: expr.h:81
irept::get_bool
bool get_bool(const irep_namet &name) const
Definition: irep.cpp:64
irept::is_not_nil
bool is_not_nil() const
Definition: irep.h:402
goto_programt::insert_before
targett insert_before(const_targett target)
Insertion before the instruction pointed-to by the given instruction iterator target.
Definition: goto_program.h:639
remove_java_newt::ns
namespacet ns
Definition: remove_java_new.cpp:46
set_class_identifier
void set_class_identifier(struct_exprt &expr, const namespacet &ns, const struct_tag_typet &class_type)
If expr has its components filled in then sets the @class_identifier member of the struct.
Definition: class_identifier.cpp:82
expr_initializer.h
Expression Initialization.
as_const
const T & as_const(T &value)
Return a reference to the same object but ensures the type is const.
Definition: as_const.h:14
goto_programt::destructive_insert
void destructive_insert(const_targett target, goto_programt &p)
Inserts the given program p before target.
Definition: goto_program.h:677
PRECONDITION
#define PRECONDITION(CONDITION)
Definition: invariant.h:464
symbol_table_baset
The symbol table base class interface.
Definition: symbol_table_base.h:22
symbolt::symbol_expr
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:122
pointer_type
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:243
object_size
exprt object_size(const exprt &pointer)
Definition: pointer_predicates.cpp:32
irept::is_nil
bool is_nil() const
Definition: irep.h:398
irept::id
const irep_idt & id() const
Definition: irep.h:418
message_handlert
Definition: message.h:28
to_struct_tag_type
const struct_tag_typet & to_struct_tag_type(const typet &type)
Cast a typet to a struct_tag_typet.
Definition: std_types.h:515
code_blockt::add
void add(const codet &code)
Definition: std_code.h:208
false_exprt
The Boolean constant false.
Definition: std_expr.h:3964
remove_java_new
void remove_java_new(const irep_idt &function_identifier, goto_programt::targett target, goto_programt &goto_program, symbol_table_baset &symbol_table, message_handlert &message_handler)
Replace every java_new or java_new_array by a malloc side-effect and zero initialization.
Definition: remove_java_new.cpp:422
remove_java_newt::lower_java_new_array
goto_programt::targett lower_java_new_array(const irep_idt &function_identifier, const exprt &lhs, const side_effect_exprt &rhs, goto_programt &, goto_programt::targett)
Replaces the instruction lhs = new java_array_type by the following code: lhs = ALLOCATE(java_type) l...
Definition: remove_java_new.cpp:124
fresh_java_symbol
symbolt & fresh_java_symbol(const typet &type, const std::string &basename_prefix, const source_locationt &source_location, const irep_idt &function_name, symbol_table_baset &symbol_table)
Definition: java_utils.cpp:566
source_locationt
Definition: source_location.h:20
goto_functionst::goto_functiont
::goto_functiont goto_functiont
Definition: goto_functions.h:25
member_exprt
Extract member of struct or union.
Definition: std_expr.h:3405
goto_programt::instructions
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:585
goto_functionst
A collection of goto functions.
Definition: goto_functions.h:23
messaget::get_message_handler
message_handlert & get_message_handler()
Definition: message.h:184
struct_typet
Structure type, corresponds to C style structs.
Definition: std_types.h:226
goto_modelt::goto_functions
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:33
namespace_baset::follow
const typet & follow(const typet &) const
Resolve type symbol to the type it points to.
Definition: namespace.cpp:51
irept::set
void set(const irep_namet &name, const irep_idt &value)
Definition: irep.h:442
from_integer
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
goto_programt::make_other
static instructiont make_other(const codet &_code, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:913
string_typet
String type.
Definition: std_types.h:1655
expr_cast.h
Templated functions to cast to specific exprt-derived classes.
goto_programt
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
class_identifier.h
Extract class identifier.
code_fort::from_index_bounds
static code_fort from_index_bounds(exprt start_index, exprt end_index, symbol_exprt loop_index, codet body, source_locationt location)
Produce a code_fort representing:
Definition: std_code.cpp:212
goto_programt::insert_after
targett insert_after(const_targett target)
Insertion after the instruction pointed-to by the given instruction iterator target.
Definition: goto_program.h:655
java_array_dimension_and_element_type
std::pair< typet, std::size_t > java_array_dimension_and_element_type(const struct_tag_typet &type)
Returns the underlying element type and array dimensionality of Java struct type.
Definition: java_types.cpp:188
size_of_expr
optionalt< exprt > size_of_expr(const typet &type, const namespacet &ns)
Definition: pointer_offset_size.cpp:285
irept
There are a large number of kinds of tree structured or tree-like data in CPROVER.
Definition: irep.h:394
remove_java_new.h
Remove Java New Operators.
exprt::operands
operandst & operands()
Definition: expr.h:95
exprt::add_source_location
source_locationt & add_source_location()
Definition: expr.h:259
typecast_exprt
Semantic type conversion.
Definition: std_expr.h:2013
get_array_element_type_field
exprt get_array_element_type_field(const exprt &pointer)
Definition: java_types.cpp:217
code_assignt
A codet representing an assignment in the program.
Definition: std_code.h:295
message.h
java_utils.h
constant_exprt
A constant literal expression.
Definition: std_expr.h:3906
to_multi_ary_expr
const multi_ary_exprt & to_multi_ary_expr(const exprt &expr)
Cast an exprt to a multi_ary_exprt.
Definition: std_expr.h:866
exprt::source_location
const source_locationt & source_location() const
Definition: expr.h:254
goto_modelt::symbol_table
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:30
c_types.h
remove_java_newt
Definition: remove_java_new.cpp:26
goto_programt::targett
instructionst::iterator targett
Definition: goto_program.h:579
side_effect_exprt
An expression containing a side effect.
Definition: std_code.h:1866
to_struct_expr
const struct_exprt & to_struct_expr(const exprt &expr)
Cast an exprt to a struct_exprt.
Definition: std_expr.h:1656
codet
Data structure for representing an arbitrary statement in a program.
Definition: std_code.h:35