cprover
linker_script_merge.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Linker Script Merging
4 
5 Author: Kareem Khazem <karkhaz@karkhaz.com>, 2017
6 
7 \*******************************************************************/
8 
9 #include "linker_script_merge.h"
10 
11 #include <algorithm>
12 #include <fstream>
13 #include <iterator>
14 
15 #include <util/arith_tools.h>
16 #include <util/c_types.h>
17 #include <util/expr_initializer.h>
18 #include <util/magic.h>
19 #include <util/run.h>
20 #include <util/tempfile.h>
21 
22 #include <json/json_parser.h>
23 
25 
27 
29 {
30  if(!cmdline.isset('T'))
31  return 0;
32 
33  temporary_filet linker_def_outfile("goto-cc-linker-info", ".json");
34  std::list<irep_idt> linker_defined_symbols;
35  int fail = get_linker_script_data(
36  linker_defined_symbols,
38  elf_binary,
39  linker_def_outfile());
40  // ignore linker script parsing failures until the code is tested more widely
41  if(fail!=0)
42  return 0;
43 
44  jsont data;
45  fail=parse_json(linker_def_outfile(), get_message_handler(), data);
46  if(fail!=0)
47  {
48  error() << "Problem parsing linker script JSON data" << eom;
49  return fail;
50  }
51 
53  if(fail!=0)
54  {
55  error() << "Malformed linker-script JSON document" << eom;
56  data.output(error());
57  return fail;
58  }
59 
60  auto original_goto_model =
62 
63  if(!original_goto_model.has_value())
64  {
65  error() << "Unable to read goto binary for linker script merging" << eom;
66  return 1;
67  }
68 
69  fail=1;
70  linker_valuest linker_values;
71  const auto &pair =
72  original_goto_model->goto_functions.function_map.find(INITIALIZE_FUNCTION);
73  if(pair == original_goto_model->goto_functions.function_map.end())
74  {
75  error() << "No " << INITIALIZE_FUNCTION << " found in goto_functions"
76  << eom;
77  return fail;
78  }
79  fail = ls_data2instructions(
80  data,
81  cmdline.get_value('T'),
82  pair->second.body,
83  original_goto_model->symbol_table,
84  linker_values);
85  if(fail!=0)
86  {
87  error() << "Could not add linkerscript defs to " INITIALIZE_FUNCTION << eom;
88  return fail;
89  }
90 
91  fail=goto_and_object_mismatch(linker_defined_symbols, linker_values);
92  if(fail!=0)
93  return fail;
94 
95  // The keys of linker_values are exactly the elements of
96  // linker_defined_symbols, so iterate over linker_values from now on.
97 
98  fail = pointerize_linker_defined_symbols(*original_goto_model, linker_values);
99 
100  if(fail!=0)
101  {
102  error() << "Could not pointerize all linker-defined expressions" << eom;
103  return fail;
104  }
105 
106  fail = compiler.write_bin_object_file(goto_binary, *original_goto_model);
107 
108  if(fail!=0)
109  error() << "Could not write linkerscript-augmented binary" << eom;
110 
111  return fail;
112 }
113 
115  compilet &compiler,
116  const std::string &elf_binary,
117  const std::string &goto_binary,
118  const cmdlinet &cmdline,
119  message_handlert &message_handler)
120  : messaget(message_handler),
121  compiler(compiler),
122  elf_binary(elf_binary),
123  goto_binary(goto_binary),
124  cmdline(cmdline),
125  replacement_predicates(
127  "address of array's first member",
128  [](const exprt &expr) -> const symbol_exprt & {
129  return to_symbol_expr(
130  to_index_expr(to_address_of_expr(expr).object()).index());
131  },
132  [](const exprt &expr) {
133  return expr.id() == ID_address_of &&
134  expr.type().id() == ID_pointer &&
135 
136  to_address_of_expr(expr).object().id() == ID_index &&
137  to_address_of_expr(expr).object().type().id() ==
138  ID_unsignedbv &&
139 
140  to_index_expr(to_address_of_expr(expr).object())
141  .array()
142  .id() == ID_symbol &&
143  to_index_expr(to_address_of_expr(expr).object())
144  .array()
145  .type()
146  .id() == ID_array &&
147 
148  to_index_expr(to_address_of_expr(expr).object())
149  .index()
150  .id() == ID_constant &&
151  to_index_expr(to_address_of_expr(expr).object())
152  .index()
153  .type()
154  .id() == ID_signedbv;
155  }),
157  "address of array",
158  [](const exprt &expr) -> const symbol_exprt & {
159  return to_symbol_expr(to_address_of_expr(expr).object());
160  },
161  [](const exprt &expr) {
162  return expr.id() == ID_address_of &&
163  expr.type().id() == ID_pointer &&
164 
165  to_address_of_expr(expr).object().id() == ID_symbol &&
166  to_address_of_expr(expr).object().type().id() == ID_array;
167  }),
169  "address of struct",
170  [](const exprt &expr) -> const symbol_exprt & {
171  return to_symbol_expr(to_address_of_expr(expr).object());
172  },
173  [](const exprt &expr) {
174  return expr.id() == ID_address_of &&
175  expr.type().id() == ID_pointer &&
176 
177  to_address_of_expr(expr).object().id() == ID_symbol &&
178  (to_address_of_expr(expr).object().type().id() == ID_struct ||
179  to_address_of_expr(expr).object().type().id() ==
180  ID_struct_tag);
181  }),
183  "array variable",
184  [](const exprt &expr) -> const symbol_exprt & {
185  return to_symbol_expr(expr);
186  },
187  [](const exprt &expr) {
188  return expr.id() == ID_symbol && expr.type().id() == ID_array;
189  }),
191  "pointer (does not need pointerizing)",
192  [](const exprt &expr) -> const symbol_exprt & {
193  return to_symbol_expr(expr);
194  },
195  [](const exprt &expr) {
196  return expr.id() == ID_symbol && expr.type().id() == ID_pointer;
197  })})
198 {}
199 
201  goto_modelt &goto_model,
202  const linker_valuest &linker_values)
203 {
204  const namespacet ns(goto_model.symbol_table);
205 
206  int ret=0;
207  // First, pointerize the actual linker-defined symbols
208  for(const auto &pair : linker_values)
209  {
210  const auto maybe_symbol = goto_model.symbol_table.get_writeable(pair.first);
211  if(!maybe_symbol)
212  continue;
213  symbolt &entry=*maybe_symbol;
214  entry.type=pointer_type(char_type());
215  entry.is_extern=false;
216  entry.value=pair.second.second;
217  }
218 
219  // Next, find all occurrences of linker-defined symbols that are _values_
220  // of some symbol in the symbol table, and pointerize them too
221  for(const auto &pair : goto_model.symbol_table.symbols)
222  {
223  std::list<symbol_exprt> to_pointerize;
224  symbols_to_pointerize(linker_values, pair.second.value, to_pointerize);
225 
226  if(to_pointerize.empty())
227  continue;
228  debug() << "Pointerizing the symbol-table value of symbol " << pair.first
229  << eom;
230  int fail = pointerize_subexprs_of(
231  goto_model.symbol_table.get_writeable_ref(pair.first).value,
232  to_pointerize,
233  linker_values);
234  if(to_pointerize.empty() && fail==0)
235  continue;
236  ret=1;
237  for(const auto &sym : to_pointerize)
238  error() << " Could not pointerize '" << sym.get_identifier()
239  << "' in symbol table entry " << pair.first << ". Pretty:\n"
240  << sym.pretty() << "\n";
241  error() << eom;
242  }
243 
244  // Finally, pointerize all occurrences of linker-defined symbols in the
245  // goto program
246  for(auto &gf : goto_model.goto_functions.function_map)
247  {
248  goto_programt &program=gf.second.body;
250  {
251  for(exprt *insts : std::list<exprt *>({&(iit->code), &(iit->guard)}))
252  {
253  std::list<symbol_exprt> to_pointerize;
254  symbols_to_pointerize(linker_values, *insts, to_pointerize);
255  if(to_pointerize.empty())
256  continue;
257  debug() << "Pointerizing a program expression..." << eom;
258  int fail = pointerize_subexprs_of(*insts, to_pointerize, linker_values);
259  if(to_pointerize.empty() && fail==0)
260  continue;
261  ret=1;
262  for(const auto &sym : to_pointerize)
263  {
264  error() << " Could not pointerize '" << sym.get_identifier()
265  << "' in function " << gf.first << ". Pretty:\n"
266  << sym.pretty() << "\n";
267  error().source_location=iit->source_location;
268  }
269  error() << eom;
270  }
271  }
272  }
273  return ret;
274 }
275 
277  exprt &old_expr,
278  const linker_valuest &linker_values,
279  const symbol_exprt &old_symbol,
280  const irep_idt &ident,
281  const std::string &shape)
282 {
283  auto it=linker_values.find(ident);
284  if(it==linker_values.end())
285  {
286  error() << "Could not find a new expression for linker script-defined "
287  << "symbol '" << ident << "'" << eom;
288  return 1;
289  }
290  symbol_exprt new_expr=it->second.first;
291  new_expr.add_source_location()=old_symbol.source_location();
292  debug() << "Pointerizing linker-defined symbol '" << ident << "' of shape <"
293  << shape << ">." << eom;
294  old_expr=new_expr;
295  return 0;
296 }
297 
299  exprt &expr,
300  std::list<symbol_exprt> &to_pointerize,
301  const linker_valuest &linker_values)
302 {
303  int fail=0, tmp=0;
304  for(auto const &pair : linker_values)
305  for(auto const &pattern : replacement_predicates)
306  {
307  if(!pattern.match(expr))
308  continue;
309  // take a copy, expr will be changed below
310  const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
311  if(pair.first!=inner_symbol.get_identifier())
312  continue;
313  tmp=replace_expr(expr, linker_values, inner_symbol, pair.first,
314  pattern.description());
315  fail=tmp?tmp:fail;
316  auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
317  inner_symbol);
318  if(result==to_pointerize.end())
319  {
320  fail=1;
321  error() << "Too many removals of '" << inner_symbol.get_identifier()
322  << "'" << eom;
323  }
324  else
325  to_pointerize.erase(result);
326  // If we get here, we've just pointerized this expression. That expression
327  // will be a symbol possibly wrapped in some unary junk, but won't contain
328  // other symbols as subexpressions that might need to be pointerized. So
329  // it's safe to bail out here.
330  return fail;
331  }
332 
333  for(auto &op : expr.operands())
334  {
335  tmp = pointerize_subexprs_of(op, to_pointerize, linker_values);
336  fail=tmp?tmp:fail;
337  }
338  return fail;
339 }
340 
342  const linker_valuest &linker_values,
343  const exprt &expr,
344  std::list<symbol_exprt> &to_pointerize) const
345 {
346  for(const auto &op : expr.operands())
347  {
348  if(op.id()!=ID_symbol)
349  continue;
350  const symbol_exprt &sym_exp=to_symbol_expr(op);
351  const auto &pair=linker_values.find(sym_exp.get_identifier());
352  if(pair!=linker_values.end())
353  to_pointerize.push_back(sym_exp);
354  }
355  for(const auto &op : expr.operands())
356  symbols_to_pointerize(linker_values, op, to_pointerize);
357 }
358 
359 #if 0
360 The current implementation of this function is less precise than the
361  commented-out version below. To understand the difference between these
362  implementations, consider the following example:
363 
364 Suppose we have a section in the linker script, 100 bytes long, where the
365 address of the symbol sec_start is the start of the section (value 4096) and the
366 address of sec_end is the end of that section (value 4196).
367 
368 The current implementation synthesizes the goto-version of the following C:
369 
370  char __sec_array[100];
371  char *sec_start=(&__sec_array[0]);
372  char *sec_end=((&__sec_array[0])+100);
373  // Yes, it is 100 not 99. We're pointing to the end of the memory occupied
374  // by __sec_array, not the last element of __sec_array.
375 
376 This is imprecise for the following reason: the actual address of the array and
377 the pointers shall be some random CBMC-internal address, instead of being 4096
378 and 4196. The linker script, on the other hand, would have specified the exact
379 position of the section, and we even know what the actual values of sec_start
380 and sec_end are from the object file (these values are in the `addresses` list
381 of the `data` argument to this function). If the correctness of the code depends
382 on these actual values, then CBMCs model of the code is too imprecise to verify
383 this.
384 
385 The commented-out version of this function below synthesizes the following:
386 
387  char *sec_start=4096;
388  char *sec_end=4196;
389  __CPROVER_allocated_memory(4096, 100);
390 
391 This code has both the actual addresses of the start and end of the section and
392 tells CBMC that the intermediate region is valid. However, the allocated_memory
393 macro does not currently allocate an actual object at the address 4096, so
394 symbolic execution can fail. In particular, the 'allocated memory' is part of
395 __CPROVER_memory, which does not have a bounded size; this means that (for
396 example) calls to memcpy or memset fail, because the first and third arguments
397 do not have know n size. The commented-out implementation should be reinstated
398 once this limitation of __CPROVER_allocated_memory has been fixed.
399 
400 In either case, no other changes to the rest of the code (outside this function)
401 should be necessary. The rest of this file converts expressions containing the
402 linker-defined symbol into pointer types if they were not already, and this is
403 the right behaviour for both implementations.
404 #endif
406  jsont &data,
407  const std::string &linker_script,
408  goto_programt &gp,
409  symbol_tablet &symbol_table,
410  linker_valuest &linker_values)
411 #if 1
412 {
413  goto_programt::instructionst initialize_instructions=gp.instructions;
414  std::map<irep_idt, std::size_t> truncated_symbols;
415  for(auto &d : to_json_array(data["regions"]))
416  {
417  bool has_end=d["has-end-symbol"].is_true();
418 
419  std::ostringstream array_name;
420  array_name << CPROVER_PREFIX << "linkerscript-array_"
421  << d["start-symbol"].value;
422  if(has_end)
423  array_name << "-" << d["end-symbol"].value;
424 
425 
426  // Array symbol_exprt
427  mp_integer array_size = string2integer(d["size"].value);
428  if(array_size > MAX_FLATTENED_ARRAY_SIZE)
429  {
430  warning() << "Object section '" << d["section"].value << "' of size "
431  << array_size << " is too large to model. Truncating to "
432  << MAX_FLATTENED_ARRAY_SIZE << " bytes" << eom;
433  array_size=MAX_FLATTENED_ARRAY_SIZE;
434  if(!has_end)
435  truncated_symbols[d["size-symbol"].value]=MAX_FLATTENED_ARRAY_SIZE;
436  }
437 
438  constant_exprt array_size_expr=from_integer(array_size, size_type());
439  array_typet array_type(char_type(), array_size_expr);
440  symbol_exprt array_expr(array_name.str(), array_type);
441  source_locationt array_loc;
442 
443  array_loc.set_file(linker_script);
444  std::ostringstream array_comment;
445  array_comment << "Object section '" << d["section"].value << "' of size "
446  << array_size << " bytes";
447  array_loc.set_comment(array_comment.str());
448  array_expr.add_source_location()=array_loc;
449 
450  // Array start address
451  index_exprt zero_idx(array_expr, from_integer(0, size_type()));
452  address_of_exprt array_start(zero_idx);
453 
454  // Linker-defined symbol_exprt pointing to start address
455  symbol_exprt start_sym(d["start-symbol"].value, pointer_type(char_type()));
456  linker_values.emplace(
457  d["start-symbol"].value, std::make_pair(start_sym, array_start));
458 
459  // Since the value of the pointer will be a random CBMC address, write a
460  // note about the real address in the object file
461  auto it = std::find_if(
462  to_json_array(data["addresses"]).begin(),
463  to_json_array(data["addresses"]).end(),
464  [&d](const jsont &add) {
465  return add["sym"].value == d["start-symbol"].value;
466  });
467  if(it == to_json_array(data["addresses"]).end())
468  {
469  error() << "Start: Could not find address corresponding to symbol '"
470  << d["start-symbol"].value << "' (start of section)" << eom;
471  return 1;
472  }
473  source_locationt start_loc;
474  start_loc.set_file(linker_script);
475  std::ostringstream start_comment;
476  start_comment << "Pointer to beginning of object section '"
477  << d["section"].value << "'. Original address in object file"
478  << " is " << (*it)["val"].value;
479  start_loc.set_comment(start_comment.str());
480  start_sym.add_source_location()=start_loc;
481 
482  // Instruction for start-address pointer in __CPROVER_initialize
483  code_assignt start_assign(start_sym, array_start);
484  start_assign.add_source_location()=start_loc;
485  goto_programt::instructiont start_assign_i =
486  goto_programt::make_assignment(start_assign, start_loc);
487  initialize_instructions.push_front(start_assign_i);
488 
489  if(has_end) // Same for pointer to end of array
490  {
491  plus_exprt array_end(array_start, array_size_expr);
492 
493  symbol_exprt end_sym(d["end-symbol"].value, pointer_type(char_type()));
494  linker_values.emplace(
495  d["end-symbol"].value, std::make_pair(end_sym, array_end));
496 
497  auto entry = std::find_if(
498  to_json_array(data["addresses"]).begin(),
499  to_json_array(data["addresses"]).end(),
500  [&d](const jsont &add) {
501  return add["sym"].value == d["end-symbol"].value;
502  });
503  if(entry == to_json_array(data["addresses"]).end())
504  {
505  error() << "Could not find address corresponding to symbol '"
506  << d["end-symbol"].value << "' (end of section)" << eom;
507  return 1;
508  }
509  source_locationt end_loc;
510  end_loc.set_file(linker_script);
511  std::ostringstream end_comment;
512  end_comment << "Pointer to end of object section '" << d["section"].value
513  << "'. Original address in object file"
514  << " is " << (*entry)["val"].value;
515  end_loc.set_comment(end_comment.str());
516  end_sym.add_source_location()=end_loc;
517 
518  code_assignt end_assign(end_sym, array_end);
519  end_assign.add_source_location()=end_loc;
520  goto_programt::instructiont end_assign_i =
521  goto_programt::make_assignment(end_assign, end_loc);
522  initialize_instructions.push_front(end_assign_i);
523  }
524 
525  // Add the array to the symbol table. We don't add the pointer(s) to the
526  // symbol table because they were already there, but declared as extern and
527  // probably with a different type. We change the externness and type in
528  // pointerize_linker_defined_symbols.
529  symbolt array_sym;
530  array_sym.name=array_name.str();
531  array_sym.pretty_name=array_name.str();
532  array_sym.is_lvalue=array_sym.is_static_lifetime=true;
533  array_sym.type=array_type;
534  array_sym.location=array_loc;
535  symbol_table.add(array_sym);
536 
537  // Push the array initialization to the front now, so that it happens before
538  // the initialization of the symbols that point to it.
539  namespacet ns(symbol_table);
540  const auto zi = zero_initializer(array_type, array_loc, ns);
541  CHECK_RETURN(zi.has_value());
542  code_assignt array_assign(array_expr, *zi);
543  array_assign.add_source_location()=array_loc;
544  goto_programt::instructiont array_assign_i =
545  goto_programt::make_assignment(array_assign, array_loc);
546  initialize_instructions.push_front(array_assign_i);
547  }
548 
549  // We've added every linker-defined symbol that marks the start or end of a
550  // region. But there might be other linker-defined symbols with some random
551  // address. These will have been declared extern too, so we need to give them
552  // a value also. Here, we give them the actual value that they have in the
553  // object file, since we're not assigning any object to them.
554  for(const auto &d : to_json_array(data["addresses"]))
555  {
556  auto it=linker_values.find(irep_idt(d["sym"].value));
557  if(it!=linker_values.end())
558  // sym marks the start or end of a region; already dealt with.
559  continue;
560 
561  // Perhaps this is a size symbol for a section whose size we truncated
562  // earlier.
563  irep_idt symbol_value;
564  const auto &pair=truncated_symbols.find(d["sym"].value);
565  if(pair==truncated_symbols.end())
566  symbol_value=d["val"].value;
567  else
568  {
569  debug() << "Truncating the value of symbol " << d["sym"].value << " from "
570  << d["val"].value << " to " << MAX_FLATTENED_ARRAY_SIZE
571  << " because it corresponds to the size of a too-large section."
572  << eom;
574  }
575 
576  source_locationt loc;
577  loc.set_file(linker_script);
578  loc.set_comment("linker script-defined symbol: char *"+
579  d["sym"].value+"="+"(char *)"+id2string(symbol_value)+"u;");
580 
581  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
582 
583  constant_exprt rhs(
585  string2integer(id2string(symbol_value)),
586  unsigned_int_type().get_width()),
588 
589  typecast_exprt rhs_tc(rhs, pointer_type(char_type()));
590 
591  linker_values.emplace(
592  irep_idt(d["sym"].value), std::make_pair(lhs, rhs_tc));
593 
594  code_assignt assign(lhs, rhs_tc);
595  assign.add_source_location()=loc;
596  goto_programt::instructiont assign_i =
597  goto_programt::make_assignment(assign, loc);
598  initialize_instructions.push_front(assign_i);
599  }
600  return 0;
601 }
602 #else
603 {
604  goto_programt::instructionst initialize_instructions=gp.instructions;
605  for(const auto &d : to_json_array(data["regions"]))
606  {
607  unsigned start=safe_string2unsigned(d["start"].value);
608  unsigned size=safe_string2unsigned(d["size"].value);
609  constant_exprt first=from_integer(start, size_type());
610  constant_exprt second=from_integer(size, size_type());
611  const code_typet void_t({}, empty_typet());
613  symbol_exprt(CPROVER_PREFIX "allocated_memory", void_t), {first, second});
614 
615  source_locationt loc;
616  loc.set_file(linker_script);
617  loc.set_comment("linker script-defined region:\n"+d["commt"].value+":\n"+
618  d["annot"].value);
619  f.add_source_location()=loc;
620 
622  i.make_function_call(f);
623  initialize_instructions.push_front(i);
624  }
625 
626  if(!symbol_table.has_symbol(CPROVER_PREFIX "allocated_memory"))
627  {
628  symbolt sym;
629  sym.name=CPROVER_PREFIX "allocated_memory";
630  sym.pretty_name=CPROVER_PREFIX "allocated_memory";
631  sym.is_lvalue=sym.is_static_lifetime=true;
632  const code_typet void_t({}, empty_typet());
633  sym.type=void_t;
634  symbol_table.add(sym);
635  }
636 
637  for(const auto &d : to_json_array(data["addresses"]))
638  {
639  source_locationt loc;
640  loc.set_file(linker_script);
641  loc.set_comment("linker script-defined symbol: char *"+
642  d["sym"].value+"="+"(char *)"+d["val"].value+"u;");
643 
644  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
645 
646  constant_exprt rhs;
648  string2integer(d["val"].value), unsigned_int_type().get_width()));
649  rhs.type()=unsigned_int_type();
650 
651  exprt rhs_tc =
653 
654  linker_values.emplace(
655  irep_idt(d["sym"].value), std::make_pair(lhs, rhs_tc));
656 
657  code_assignt assign(lhs, rhs_tc);
658  assign.add_source_location()=loc;
660  assign_i.make_assignment(assign);
661  initialize_instructions.push_front(assign_i);
662  }
663  return 0;
664 }
665 #endif
666 
668  std::list<irep_idt> &linker_defined_symbols,
669  const symbol_tablet &symbol_table,
670  const std::string &out_file,
671  const std::string &def_out_file)
672 {
673  for(auto const &pair : symbol_table.symbols)
674  {
675  if(
676  pair.second.is_extern && pair.second.value.is_nil() &&
677  pair.second.name != CPROVER_PREFIX "memory")
678  {
679  linker_defined_symbols.push_back(pair.second.name);
680  }
681  }
682 
683  std::ostringstream linker_def_str;
684  std::copy(
685  linker_defined_symbols.begin(),
686  linker_defined_symbols.end(),
687  std::ostream_iterator<irep_idt>(linker_def_str, "\n"));
688  debug() << "Linker-defined symbols: [" << linker_def_str.str() << "]\n"
689  << eom;
690 
691  temporary_filet linker_def_infile("goto-cc-linker-defs", "");
692  std::ofstream linker_def_file(linker_def_infile());
693  linker_def_file << linker_def_str.str();
694  linker_def_file.close();
695 
696  std::vector<std::string> argv=
697  {
698  "ls_parse.py",
699  "--script", cmdline.get_value('T'),
700  "--object", out_file,
701  "--sym-file", linker_def_infile(),
702  "--out-file", def_out_file
703  };
704 
705  if(get_message_handler().get_verbosity() >= messaget::M_DEBUG)
706  argv.push_back("--very-verbose");
707  else if(get_message_handler().get_verbosity() > messaget::M_RESULT)
708  argv.push_back("--verbose");
709 
710  debug() << "RUN:";
711  for(std::size_t i=0; i<argv.size(); i++)
712  debug() << " " << argv[i];
713  debug() << eom;
714 
715  int rc = run(argv[0], argv, linker_def_infile(), def_out_file, "");
716  if(rc!=0)
717  warning() << "Problem parsing linker script" << eom;
718 
719  return rc;
720 }
721 
723  const std::list<irep_idt> &linker_defined_symbols,
724  const linker_valuest &linker_values)
725 {
726  int fail=0;
727  for(const auto &sym : linker_defined_symbols)
728  if(linker_values.find(sym)==linker_values.end())
729  {
730  fail=1;
731  error() << "Variable '" << sym << "' was declared extern but never given "
732  << "a value, even in a linker script" << eom;
733  }
734 
735  for(const auto &pair : linker_values)
736  {
737  auto it=std::find(linker_defined_symbols.begin(),
738  linker_defined_symbols.end(), pair.first);
739  if(it==linker_defined_symbols.end())
740  {
741  fail=1;
742  error() << "Linker script-defined symbol '" << pair.first << "' was "
743  << "either defined in the C source code, not declared extern in "
744  << "the C source code, or does not appear in the C source code"
745  << eom;
746  }
747  }
748  return fail;
749 }
750 
752 {
753  if(!data.is_object())
754  return true;
755 
756  const json_objectt &data_object = to_json_object(data);
757  return (
758  !(data_object.find("regions") != data_object.end() &&
759  data_object.find("addresses") != data_object.end() &&
760  data["regions"].is_array() && data["addresses"].is_array() &&
761  std::all_of(
762  to_json_array(data["addresses"]).begin(),
763  to_json_array(data["addresses"]).end(),
764  [](const jsont &j) {
765  if(!j.is_object())
766  return false;
767 
768  const json_objectt &address = to_json_object(j);
769  return address.find("val") != address.end() &&
770  address.find("sym") != address.end() &&
771  address["val"].is_number() && address["sym"].is_string();
772  }) &&
773  std::all_of(
774  to_json_array(data["regions"]).begin(),
775  to_json_array(data["regions"]).end(),
776  [](const jsont &j) {
777  if(!j.is_object())
778  return false;
779 
780  const json_objectt &region = to_json_object(j);
781  return region.find("start") != region.end() &&
782  region.find("size") != region.end() &&
783  region.find("annot") != region.end() &&
784  region.find("commt") != region.end() &&
785  region.find("start-symbol") != region.end() &&
786  region.find("has-end-symbol") != region.end() &&
787  region.find("section") != region.end() &&
788  region["start"].is_number() && region["size"].is_number() &&
789  region["annot"].is_string() &&
790  region["start-symbol"].is_string() &&
791  region["section"].is_string() && region["commt"].is_string() &&
792  ((region["has-end-symbol"].is_true() &&
793  region.find("end-symbol") != region.end() &&
794  region["end-symbol"].is_string()) ||
795  (region["has-end-symbol"].is_false() &&
796  region.find("size-symbol") != region.end() &&
797  region.find("end-symbol") == region.end() &&
798  region["size-symbol"].is_string()));
799  })));
800 }
messaget
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:155
Forall_goto_program_instructions
#define Forall_goto_program_instructions(it, program)
Definition: goto_program.h:1201
symbol_table_baset::has_symbol
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
Definition: symbol_table_base.h:87
dstringt
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
array_name
std::string array_name(const namespacet &ns, const exprt &expr)
Definition: array_name.cpp:20
messaget::M_RESULT
@ M_RESULT
Definition: message.h:170
tempfile.h
typecast_exprt::conditional_cast
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:2021
symbol_tablet
The symbol table.
Definition: symbol_table.h:20
source_locationt::set_comment
void set_comment(const irep_idt &comment)
Definition: source_location.h:141
linker_script_merget::linker_valuest
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
Definition: linker_script_merge.h:79
linker_script_merge.h
Merge linker script-defined symbols into a goto-program.
arith_tools.h
goto_programt::instructiont::make_function_call
void make_function_call(const code_function_callt &_code)
Definition: goto_program.h:452
cmdlinet::isset
virtual bool isset(char option) const
Definition: cmdline.cpp:29
address_of_exprt::object
exprt & object()
Definition: std_expr.h:2795
linker_script_merget::get_linker_script_data
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
Definition: linker_script_merge.cpp:667
__CPROVER_allocated_memory
void __CPROVER_allocated_memory(__CPROVER_size_t address, __CPROVER_size_t extent)
CHECK_RETURN
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:496
to_index_expr
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1347
symbolt::type
typet type
Type of symbol.
Definition: symbol.h:31
goto_programt::instructionst
std::list< instructiont > instructionst
Definition: goto_program.h:577
data
Definition: kdev_t.h:24
linker_script_merget::add_linker_script_definitions
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
Definition: linker_script_merge.cpp:28
mp_integer
BigInt mp_integer
Definition: mp_arith.h:19
json_objectt::find
iterator find(const std::string &key)
Definition: json.h:356
string2integer
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:57
MAX_FLATTENED_ARRAY_SIZE
const std::size_t MAX_FLATTENED_ARRAY_SIZE
Definition: magic.h:11
file
Definition: kdev_t.h:19
plus_exprt
The plus expression Associativity is not specified.
Definition: std_expr.h:881
linker_script_merget::ls_data2instructions
int ls_data2instructions(jsont &data, const std::string &linker_script, goto_programt &gp, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into gp's instructions member.
Definition: linker_script_merge.cpp:405
run
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:49
exprt
Base class for all expressions.
Definition: expr.h:53
linker_script_merget::replace_expr
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
Definition: linker_script_merge.cpp:276
goto_modelt
Definition: goto_model.h:26
irep_idt
dstringt irep_idt
Definition: irep.h:32
to_string
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
Definition: string_constraint.cpp:55
messaget::eom
static eomt eom
Definition: message.h:297
run.h
goto_functionst::function_map
function_mapt function_map
Definition: goto_functions.h:27
to_json_array
json_arrayt & to_json_array(jsont &json)
Definition: json.h:426
symbol_exprt
Expression to hold a symbol (variable)
Definition: std_expr.h:82
jsont
Definition: json.h:27
linker_script_merget::pointerize_subexprs_of
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values)
Definition: linker_script_merge.cpp:298
magic.h
Magic numbers used throughout the codebase.
goto_programt::make_assignment
static instructiont make_assignment(const code_assignt &_code, const source_locationt &l=source_locationt::nil())
Create an assignment instruction.
Definition: goto_program.h:1024
zero_initializer
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Definition: expr_initializer.cpp:318
symbolt::pretty_name
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:52
linker_script_merget::goto_binary
const std::string & goto_binary
Definition: linker_script_merge.h:91
json_objectt
Definition: json.h:300
json_parser.h
replacement_predicatet
Patterns of expressions that should be replaced.
Definition: linker_script_merge.h:23
namespacet
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:92
linker_script_merget::linker_data_is_malformed
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
Definition: linker_script_merge.cpp:751
exprt::type
typet & type()
Return the type of the expression.
Definition: expr.h:81
code_function_callt
codet representation of a function call statement.
Definition: std_code.h:1183
cmdlinet
Definition: cmdline.h:21
expr_initializer.h
Expression Initialization.
messaget::result
mstreamt & result() const
Definition: message.h:409
messaget::error
mstreamt & error() const
Definition: message.h:399
empty_typet
The empty type.
Definition: std_types.h:46
integer2bvrep
irep_idt integer2bvrep(const mp_integer &src, std::size_t width)
convert an integer to bit-vector representation with given width This uses two's complement for negat...
Definition: arith_tools.cpp:379
linker_script_merget::linker_script_merget
linker_script_merget(compilet &, const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
Definition: linker_script_merge.cpp:114
symbol_table_baset::get_writeable_ref
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
Definition: symbol_table_base.h:121
to_address_of_expr
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
Definition: std_expr.h:2823
id2string
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
messaget::mstreamt::source_location
source_locationt source_location
Definition: message.h:247
symbol_exprt::get_identifier
const irep_idt & get_identifier() const
Definition: std_expr.h:111
source_locationt::set_file
void set_file(const irep_idt &file)
Definition: source_location.h:96
INITIALIZE_FUNCTION
#define INITIALIZE_FUNCTION
Definition: static_lifetime_init.h:23
cmdlinet::get_value
std::string get_value(char option) const
Definition: cmdline.cpp:47
linker_script_merget::cmdline
const cmdlinet & cmdline
Definition: linker_script_merge.h:92
linker_script_merget::symbols_to_pointerize
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
Definition: linker_script_merge.cpp:341
index_exprt::index
exprt & index()
Definition: std_expr.h:1319
pointer_type
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:243
index_exprt::array
exprt & array()
Definition: std_expr.h:1309
unsigned_int_type
unsignedbv_typet unsigned_int_type()
Definition: c_types.cpp:44
to_symbol_expr
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:177
code_typet
Base type of functions.
Definition: std_types.h:736
linker_script_merget::elf_binary
const std::string & elf_binary
Definition: linker_script_merge.h:90
irept::id
const irep_idt & id() const
Definition: irep.h:418
message_handlert
Definition: message.h:28
read_goto_binary.h
Read Goto Programs.
safe_string2unsigned
unsigned safe_string2unsigned(const std::string &str, int base)
Definition: string2int.cpp:19
linker_script_merget::goto_and_object_mismatch
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, const linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
Definition: linker_script_merge.cpp:722
goto_programt::instructiont::make_assignment
void make_assignment()
Definition: goto_program.h:382
char_type
bitvector_typet char_type()
Definition: c_types.cpp:114
source_locationt
Definition: source_location.h:20
symbol_table_baset::add
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
Definition: symbol_table_base.cpp:18
symbol_tablet::get_writeable
virtual symbolt * get_writeable(const irep_idt &name) override
Find a symbol in the symbol table for read-write access.
Definition: symbol_table.h:96
goto_programt::instructions
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:585
symbolt::value
exprt value
Initial value of symbol.
Definition: symbol.h:34
messaget::get_message_handler
message_handlert & get_message_handler()
Definition: message.h:184
json_objectt::end
iterator end()
Definition: json.h:386
array_typet
Arrays with given size.
Definition: std_types.h:965
symbolt::is_extern
bool is_extern
Definition: symbol.h:66
goto_modelt::goto_functions
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:33
to_json_object
json_objectt & to_json_object(jsont &json)
Definition: json.h:444
symbolt::location
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
symbolt
Symbol table entry.
Definition: symbol.h:28
from_integer
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
symbol_table_baset::symbols
const symbolst & symbols
Read-only field, used to look up symbols given their names.
Definition: symbol_table_base.h:30
compilet::write_bin_object_file
bool write_bin_object_file(const std::string &, const goto_modelt &)
Writes the goto functions of src_goto_model to a binary format object file.
Definition: compile.cpp:564
CPROVER_PREFIX
#define CPROVER_PREFIX
Definition: cprover_prefix.h:14
symbolt::is_static_lifetime
bool is_static_lifetime
Definition: symbol.h:65
goto_programt
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
read_goto_binary
static bool read_goto_binary(const std::string &filename, symbol_tablet &, goto_functionst &, message_handlert &)
Read a goto binary from a file, but do not update config.
Definition: read_goto_binary.cpp:59
linker_script_merget::replacement_predicates
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
Definition: linker_script_merge.h:101
jsont::is_object
bool is_object() const
Definition: json.h:56
exprt::operands
operandst & operands()
Definition: expr.h:95
symbolt::is_lvalue
bool is_lvalue
Definition: symbol.h:66
messaget::debug
mstreamt & debug() const
Definition: message.h:429
messaget::M_DEBUG
@ M_DEBUG
Definition: message.h:171
linker_script_merget::compiler
compilet & compiler
Definition: linker_script_merge.h:89
index_exprt
Array index operator.
Definition: std_expr.h:1293
static_lifetime_init.h
address_of_exprt
Operator to return the address of an object.
Definition: std_expr.h:2786
exprt::add_source_location
source_locationt & add_source_location()
Definition: expr.h:259
compilet::goto_model
goto_modelt goto_model
Definition: compile.h:31
typecast_exprt
Semantic type conversion.
Definition: std_expr.h:2013
compilet
Definition: compile.h:27
size_type
unsignedbv_typet size_type()
Definition: c_types.cpp:58
code_assignt
A codet representing an assignment in the program.
Definition: std_code.h:295
constant_exprt
A constant literal expression.
Definition: std_expr.h:3906
messaget::warning
mstreamt & warning() const
Definition: message.h:404
linker_script_merget::pointerize_linker_defined_symbols
int pointerize_linker_defined_symbols(goto_modelt &, const linker_valuest &)
convert the type of linker script-defined symbols to char*
Definition: linker_script_merge.cpp:200
parse_json
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Definition: json_parser.cpp:16
goto_programt::instructiont
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:179
constant_exprt::set_value
void set_value(const irep_idt &value)
Definition: std_expr.h:3919
exprt::source_location
const source_locationt & source_location() const
Definition: expr.h:254
goto_modelt::symbol_table
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:30
c_types.h
symbolt::name
irep_idt name
The unique identifier.
Definition: symbol.h:40
temporary_filet
Definition: tempfile.h:24
jsont::value
std::string value
Definition: json.h:132