34 std::list<irep_idt> linker_defined_symbols;
36 linker_defined_symbols,
39 linker_def_outfile());
48 error() <<
"Problem parsing linker script JSON data" <<
eom;
55 error() <<
"Malformed linker-script JSON document" <<
eom;
60 auto original_goto_model =
63 if(!original_goto_model.has_value())
65 error() <<
"Unable to read goto binary for linker script merging" <<
eom;
73 if(pair == original_goto_model->goto_functions.function_map.end())
83 original_goto_model->symbol_table,
102 error() <<
"Could not pointerize all linker-defined expressions" <<
eom;
109 error() <<
"Could not write linkerscript-augmented binary" <<
eom;
116 const std::string &elf_binary,
117 const std::string &goto_binary,
122 elf_binary(elf_binary),
123 goto_binary(goto_binary),
125 replacement_predicates(
127 "address of array's first member",
132 [](
const exprt &expr) {
133 return expr.
id() == ID_address_of &&
134 expr.
type().
id() == ID_pointer &&
142 .
id() == ID_symbol &&
150 .
id() == ID_constant &&
154 .
id() == ID_signedbv;
161 [](
const exprt &expr) {
162 return expr.
id() == ID_address_of &&
163 expr.
type().
id() == ID_pointer &&
173 [](
const exprt &expr) {
174 return expr.
id() == ID_address_of &&
175 expr.
type().
id() == ID_pointer &&
187 [](
const exprt &expr) {
188 return expr.
id() == ID_symbol && expr.
type().
id() == ID_array;
191 "pointer (does not need pointerizing)",
195 [](
const exprt &expr) {
196 return expr.
id() == ID_symbol && expr.
type().
id() == ID_pointer;
208 for(
const auto &pair : linker_values)
216 entry.
value=pair.second.second;
223 std::list<symbol_exprt> to_pointerize;
226 if(to_pointerize.empty())
228 debug() <<
"Pointerizing the symbol-table value of symbol " << pair.first
234 if(to_pointerize.empty() && fail==0)
237 for(
const auto &sym : to_pointerize)
238 error() <<
" Could not pointerize '" << sym.get_identifier()
239 <<
"' in symbol table entry " << pair.first <<
". Pretty:\n"
240 << sym.pretty() <<
"\n";
251 for(
exprt *insts : std::list<exprt *>({&(iit->code), &(iit->guard)}))
253 std::list<symbol_exprt> to_pointerize;
255 if(to_pointerize.empty())
257 debug() <<
"Pointerizing a program expression..." <<
eom;
259 if(to_pointerize.empty() && fail==0)
262 for(
const auto &sym : to_pointerize)
264 error() <<
" Could not pointerize '" << sym.get_identifier()
265 <<
"' in function " << gf.first <<
". Pretty:\n"
266 << sym.pretty() <<
"\n";
281 const std::string &shape)
283 auto it=linker_values.find(ident);
284 if(it==linker_values.end())
286 error() <<
"Could not find a new expression for linker script-defined "
287 <<
"symbol '" << ident <<
"'" <<
eom;
292 debug() <<
"Pointerizing linker-defined symbol '" << ident <<
"' of shape <"
293 << shape <<
">." <<
eom;
300 std::list<symbol_exprt> &to_pointerize,
304 for(
auto const &pair : linker_values)
307 if(!pattern.match(expr))
310 const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
313 tmp=
replace_expr(expr, linker_values, inner_symbol, pair.first,
314 pattern.description());
316 auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
318 if(
result==to_pointerize.end())
325 to_pointerize.erase(
result);
344 std::list<symbol_exprt> &to_pointerize)
const
346 for(
const auto &op : expr.
operands())
348 if(op.id()!=ID_symbol)
352 if(pair!=linker_values.end())
353 to_pointerize.push_back(sym_exp);
355 for(
const auto &op : expr.
operands())
360 The current implementation of
this function is less precise than the
361 commented-out version below. To understand the difference between these
362 implementations, consider the following example:
364 Suppose we have a section in the linker script, 100 bytes long, where the
365 address of the symbol sec_start is the start of the section (value 4096) and the
366 address of sec_end is the end of that section (value 4196).
368 The current implementation synthesizes the
goto-version of the following C:
370 char __sec_array[100];
371 char *sec_start=(&__sec_array[0]);
372 char *sec_end=((&__sec_array[0])+100);
376 This is imprecise
for the following reason: the actual address of the array and
377 the pointers shall be some random CBMC-
internal address, instead of being 4096
378 and 4196. The linker script, on the other hand, would have specified the exact
379 position of the section, and we even know what the actual values of sec_start
380 and sec_end are from the
object file (these values are in the `addresses` list
381 of the `
data` argument to
this function). If the correctness of the code depends
382 on these actual values, then CBMCs model of the code is too imprecise to verify
385 The commented-out version of
this function below synthesizes the following:
387 char *sec_start=4096;
391 This code has both the actual addresses of the start and end of the section and
392 tells CBMC that the intermediate region is valid. However, the allocated_memory
393 macro does not currently allocate an actual
object at the address 4096, so
394 symbolic execution can fail. In particular, the
'allocated memory' is part of
395 __CPROVER_memory, which does not have a bounded size;
this means that (
for
396 example) calls to memcpy or memset fail, because the first and third arguments
397 do not have know n size. The commented-out implementation should be reinstated
400 In either
case, no other changes to the rest of the code (outside
this function)
401 should be necessary. The rest of
this file converts expressions containing the
402 linker-defined symbol into pointer types
if they were not already, and
this is
403 the right behaviour
for both implementations.
407 const std::string &linker_script,
414 std::map<irep_idt, std::size_t> truncated_symbols;
417 bool has_end=d[
"has-end-symbol"].is_true();
421 << d[
"start-symbol"].value;
430 warning() <<
"Object section '" << d[
"section"].value <<
"' of size "
431 << array_size <<
" is too large to model. Truncating to "
444 std::ostringstream array_comment;
445 array_comment <<
"Object section '" << d[
"section"].value <<
"' of size "
446 << array_size <<
" bytes";
456 linker_values.emplace(
457 d[
"start-symbol"].value, std::make_pair(start_sym, array_start));
461 auto it = std::find_if(
464 [&d](
const jsont &add) {
465 return add[
"sym"].
value == d[
"start-symbol"].value;
469 error() <<
"Start: Could not find address corresponding to symbol '"
470 << d[
"start-symbol"].value <<
"' (start of section)" << eom;
475 std::ostringstream start_comment;
476 start_comment <<
"Pointer to beginning of object section '"
477 << d[
"section"].value <<
"'. Original address in object file"
478 <<
" is " << (*it)[
"val"].value;
487 initialize_instructions.push_front(start_assign_i);
491 plus_exprt array_end(array_start, array_size_expr);
494 linker_values.emplace(
495 d[
"end-symbol"].value, std::make_pair(end_sym, array_end));
497 auto entry = std::find_if(
500 [&d](
const jsont &add) {
501 return add[
"sym"].
value == d[
"end-symbol"].value;
505 error() <<
"Could not find address corresponding to symbol '"
506 << d[
"end-symbol"].value <<
"' (end of section)" << eom;
511 std::ostringstream end_comment;
512 end_comment <<
"Pointer to end of object section '" << d[
"section"].value
513 <<
"'. Original address in object file"
514 <<
" is " << (*entry)[
"val"].value;
522 initialize_instructions.push_front(end_assign_i);
533 array_sym.
type=array_type;
535 symbol_table.
add(array_sym);
546 initialize_instructions.push_front(array_assign_i);
556 auto it=linker_values.find(
irep_idt(d[
"sym"].value));
557 if(it!=linker_values.end())
564 const auto &pair=truncated_symbols.find(d[
"sym"].value);
565 if(pair==truncated_symbols.end())
566 symbol_value=d[
"val"].value;
569 debug() <<
"Truncating the value of symbol " << d[
"sym"].value <<
" from "
571 <<
" because it corresponds to the size of a too-large section."
578 loc.
set_comment(
"linker script-defined symbol: char *"+
579 d[
"sym"].value+
"="+
"(char *)"+
id2string(symbol_value)+
"u;");
591 linker_values.emplace(
592 irep_idt(d[
"sym"].value), std::make_pair(lhs, rhs_tc));
598 initialize_instructions.push_front(assign_i);
617 loc.set_comment(
"linker script-defined region:\n"+d[
"commt"].value+
":\n"+
619 f.add_source_location()=loc;
623 initialize_instructions.push_front(i);
634 symbol_table.
add(sym);
641 loc.
set_comment(
"linker script-defined symbol: char *"+
642 d[
"sym"].value+
"="+
"(char *)"+d[
"val"].value+
"u;");
654 linker_values.emplace(
655 irep_idt(d[
"sym"].value), std::make_pair(lhs, rhs_tc));
658 assign.add_source_location()=loc;
661 initialize_instructions.push_front(assign_i);
668 std::list<irep_idt> &linker_defined_symbols,
670 const std::string &out_file,
671 const std::string &def_out_file)
673 for(
auto const &pair : symbol_table.
symbols)
676 pair.second.is_extern && pair.second.value.is_nil() &&
679 linker_defined_symbols.push_back(pair.second.name);
683 std::ostringstream linker_def_str;
685 linker_defined_symbols.begin(),
686 linker_defined_symbols.end(),
687 std::ostream_iterator<irep_idt>(linker_def_str,
"\n"));
688 debug() <<
"Linker-defined symbols: [" << linker_def_str.str() <<
"]\n"
692 std::ofstream linker_def_file(linker_def_infile());
693 linker_def_file << linker_def_str.str();
694 linker_def_file.close();
696 std::vector<std::string> argv=
700 "--object", out_file,
701 "--sym-file", linker_def_infile(),
702 "--out-file", def_out_file
706 argv.push_back(
"--very-verbose");
708 argv.push_back(
"--verbose");
711 for(std::size_t i=0; i<argv.size(); i++)
712 debug() <<
" " << argv[i];
715 int rc =
run(argv[0], argv, linker_def_infile(), def_out_file,
"");
717 warning() <<
"Problem parsing linker script" <<
eom;
723 const std::list<irep_idt> &linker_defined_symbols,
727 for(
const auto &sym : linker_defined_symbols)
728 if(linker_values.find(sym)==linker_values.end())
731 error() <<
"Variable '" << sym <<
"' was declared extern but never given "
732 <<
"a value, even in a linker script" <<
eom;
735 for(
const auto &pair : linker_values)
737 auto it=std::find(linker_defined_symbols.begin(),
738 linker_defined_symbols.end(), pair.first);
739 if(it==linker_defined_symbols.end())
742 error() <<
"Linker script-defined symbol '" << pair.first <<
"' was "
743 <<
"either defined in the C source code, not declared extern in "
744 <<
"the C source code, or does not appear in the C source code"
753 if(!
data.is_object())
758 !(data_object.
find(
"regions") != data_object.
end() &&
759 data_object.
find(
"addresses") != data_object.
end() &&
760 data[
"regions"].is_array() &&
data[
"addresses"].is_array() &&
769 return address.
find(
"val") != address.end() &&
770 address.find(
"sym") != address.end() &&
771 address[
"val"].is_number() && address[
"sym"].is_string();
781 return region.
find(
"start") != region.end() &&
782 region.find(
"size") != region.end() &&
783 region.find(
"annot") != region.end() &&
784 region.find(
"commt") != region.end() &&
785 region.find(
"start-symbol") != region.end() &&
786 region.find(
"has-end-symbol") != region.end() &&
787 region.find(
"section") != region.end() &&
788 region[
"start"].is_number() && region[
"size"].is_number() &&
789 region[
"annot"].is_string() &&
790 region[
"start-symbol"].is_string() &&
791 region[
"section"].is_string() && region[
"commt"].is_string() &&
792 ((region[
"has-end-symbol"].is_true() &&
793 region.find(
"end-symbol") != region.end() &&
794 region[
"end-symbol"].is_string()) ||
795 (region[
"has-end-symbol"].is_false() &&
796 region.find(
"size-symbol") != region.end() &&
797 region.find(
"end-symbol") == region.end() &&
798 region[
"size-symbol"].is_string()));