cprover
taint_analysis.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Taint Analysis
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "taint_analysis.h"
13 
14 #include <iostream>
15 #include <fstream>
16 
17 #include <util/invariant.h>
18 #include <util/json.h>
19 #include <util/prefix.h>
20 #include <util/simplify_expr.h>
21 #include <util/string_constant.h>
22 
24 
26 
27 #include "taint_parser.h"
28 
30 {
31 public:
32  explicit taint_analysist(message_handlert &message_handler)
33  : log(message_handler)
34  {
35  }
36 
37  bool operator()(
38  const std::string &taint_file_name,
39  const symbol_tablet &,
41  bool show_full,
42  const optionalt<std::string> &json_file_name);
43 
44 protected:
48 
49  void instrument(const namespacet &, goto_functionst &);
51 };
52 
54  const namespacet &ns,
55  goto_functionst &goto_functions)
56 {
57  for(auto &function : goto_functions.function_map)
58  instrument(ns, function.second);
59 }
60 
62  const namespacet &ns,
63  goto_functionst::goto_functiont &goto_function)
64 {
65  for(goto_programt::instructionst::iterator
66  it=goto_function.body.instructions.begin();
67  it!=goto_function.body.instructions.end();
68  it++)
69  {
70  const goto_programt::instructiont &instruction=*it;
71 
72  goto_programt insert_before, insert_after;
73 
74  if(instruction.is_function_call())
75  {
76  const code_function_callt &function_call =
77  instruction.get_function_call();
78  const exprt &function = function_call.function();
79 
80  if(function.id() == ID_symbol)
81  {
82  const irep_idt &identifier = to_symbol_expr(function).get_identifier();
83 
84  std::set<irep_idt> identifiers;
85 
86  identifiers.insert(identifier);
87 
88  irep_idt class_id = function.get(ID_C_class);
89  if(class_id.empty())
90  {
91  }
92  else
93  {
94  std::string suffix = std::string(
95  id2string(identifier), class_id.size(), std::string::npos);
96 
97  class_hierarchyt::idst parents =
99  for(const auto &p : parents)
100  identifiers.insert(id2string(p) + suffix);
101  }
102 
103  for(const auto &rule : taint.rules)
104  {
105  bool match = false;
106  for(const auto &i : identifiers)
107  {
108  if(
109  i == rule.function_identifier ||
110  has_prefix(
111  id2string(i),
112  "java::" + id2string(rule.function_identifier) + ":"))
113  {
114  match = true;
115  break;
116  }
117  }
118 
119  if(match)
120  {
121  log.debug() << "MATCH " << rule.id << " on " << identifier
122  << messaget::eom;
123 
124  exprt where = nil_exprt();
125 
126  const code_typet &code_type = to_code_type(function.type());
127 
128  bool have_this = !code_type.parameters().empty() &&
129  code_type.parameters().front().get_bool(ID_C_this);
130 
131  switch(rule.where)
132  {
134  {
136  ns.lookup(id2string(identifier) + "#return_value");
137  where = return_value_symbol.symbol_expr();
138  break;
139  }
140 
142  {
143  unsigned nr =
144  have_this ? rule.parameter_number : rule.parameter_number - 1;
145  if(function_call.arguments().size() > nr)
146  where = function_call.arguments()[nr];
147  break;
148  }
149 
151  if(have_this)
152  {
154  !function_call.arguments().empty(),
155  "`this` implies at least one argument in function call");
156  where = function_call.arguments()[0];
157  }
158  break;
159  }
160 
161  switch(rule.kind)
162  {
164  {
165  codet code_set_may{ID_set_may};
166  code_set_may.operands().resize(2);
167  code_set_may.op0() = where;
168  code_set_may.op1() =
169  address_of_exprt(string_constantt(rule.taint));
170  insert_after.add(goto_programt::make_other(
171  code_set_may, instruction.source_location));
172  break;
173  }
174 
176  {
177  binary_predicate_exprt get_may{
178  where,
179  ID_get_may,
180  address_of_exprt(string_constantt(rule.taint))};
182  insert_before.add(goto_programt::make_assertion(
183  not_exprt(get_may), instruction.source_location));
184  t->source_location.set_property_class(
185  "taint rule " + id2string(rule.id));
186  t->source_location.set_comment(rule.message);
187  break;
188  }
189 
191  {
192  codet code_clear_may{ID_clear_may};
193  code_clear_may.operands().resize(2);
194  code_clear_may.op0() = where;
195  code_clear_may.op1() =
196  address_of_exprt(string_constantt(rule.taint));
197  insert_after.add(goto_programt::make_other(
198  code_clear_may, instruction.source_location));
199  break;
200  }
201  }
202  }
203  }
204  }
205  }
206 
207  if(!insert_before.empty())
208  {
209  goto_function.body.insert_before_swap(it, insert_before);
210  // advance until we get back to the call
211  while(!it->is_function_call()) ++it;
212  }
213 
214  if(!insert_after.empty())
215  {
216  goto_function.body.destructive_insert(
217  std::next(it), insert_after);
218  }
219  }
220 }
221 
223  const std::string &taint_file_name,
224  const symbol_tablet &symbol_table,
225  goto_functionst &goto_functions,
226  bool show_full,
227  const optionalt<std::string> &json_file_name)
228 {
229  try
230  {
231  json_arrayt json_result;
232  bool use_json = json_file_name.has_value();
233 
234  log.status() << "Reading taint file '" << taint_file_name << "'"
235  << messaget::eom;
236 
237  if(taint_parser(taint_file_name, taint, log.get_message_handler()))
238  {
239  log.error() << "Failed to read taint definition file" << messaget::eom;
240  return true;
241  }
242 
243  log.status() << "Got " << taint.rules.size() << " taint definitions"
244  << messaget::eom;
245 
246  log.conditional_output(log.debug(), [this](messaget::mstreamt &mstream) {
247  taint.output(mstream);
248  mstream << messaget::eom;
249  });
250 
251  log.status() << "Instrumenting taint" << messaget::eom;
252 
253  class_hierarchy(symbol_table);
254 
255  const namespacet ns(symbol_table);
256  instrument(ns, goto_functions);
257  goto_functions.update();
258 
259  bool have_entry_point=
260  goto_functions.function_map.find(goto_functionst::entry_point())!=
261  goto_functions.function_map.end();
262 
263  // do we have an entry point?
264  if(have_entry_point)
265  {
266  log.status() << "Working from entry point" << messaget::eom;
267  }
268  else
269  {
270  log.status() << "No entry point found; "
271  << "we will consider the heads of all functions as reachable"
272  << messaget::eom;
273 
274  goto_programt end, gotos, calls;
275 
277 
278  forall_goto_functions(f_it, goto_functions)
279  if(f_it->second.body_available() &&
280  f_it->first!=goto_functionst::entry_point())
281  {
282  const symbolt &symbol = ns.lookup(f_it->first);
283  const code_function_callt call(symbol.symbol_expr());
286  calls.add(
290  }
291 
293  goto_functions.function_map[goto_functionst::entry_point()];
294 
295  goto_programt &body=entry.body;
296 
297  body.destructive_append(gotos);
298  body.destructive_append(calls);
299  body.destructive_append(end);
300 
301  goto_functions.update();
302  }
303 
304  log.status() << "Data-flow analysis" << messaget::eom;
305 
306  custom_bitvector_analysist custom_bitvector_analysis;
307  custom_bitvector_analysis(goto_functions, ns);
308 
309  if(show_full)
310  {
311  custom_bitvector_analysis.output(ns, goto_functions, std::cout);
312  return false;
313  }
314 
315  forall_goto_functions(f_it, goto_functions)
316  {
317  if(!f_it->second.body.has_assertion())
318  continue;
319 
320  const symbolt &symbol=ns.lookup(f_it->first);
321 
322  if(f_it->first=="__actual_thread_spawn")
323  continue;
324 
325  bool first=true;
326 
327  forall_goto_program_instructions(i_it, f_it->second.body)
328  {
329  if(!i_it->is_assert())
330  continue;
331 
333  i_it->get_condition()))
334  {
335  continue;
336  }
337 
338  if(custom_bitvector_analysis[i_it].has_values.is_false())
339  continue;
340 
341  exprt result =
342  custom_bitvector_analysis.eval(i_it->get_condition(), i_it);
343  if(simplify_expr(std::move(result), ns).is_true())
344  continue;
345 
346  if(first)
347  {
348  first=false;
349  if(!use_json)
350  std::cout << "\n"
351  << "******** Function " << symbol.display_name() << '\n';
352  }
353 
354  if(use_json)
355  {
357  {"bugClass",
358  json_stringt(i_it->source_location.get_property_class())},
359  {"file", json_stringt(i_it->source_location.get_file())},
360  {"line",
361  json_numbert(id2string(i_it->source_location.get_line()))}};
362  json_result.push_back(std::move(json));
363  }
364  else
365  {
366  std::cout << i_it->source_location;
367  if(!i_it->source_location.get_comment().empty())
368  std::cout << ": " << i_it->source_location.get_comment();
369 
370  if(!i_it->source_location.get_property_class().empty())
371  std::cout << " ("
372  << i_it->source_location.get_property_class() << ")";
373 
374  std::cout << '\n';
375  }
376  }
377  }
378 
379  if(use_json)
380  {
381  std::ofstream json_out(json_file_name.value());
382 
383  if(!json_out)
384  {
385  log.error() << "Failed to open json output '" << json_file_name.value()
386  << "'" << messaget::eom;
387  return true;
388  }
389 
390  log.status() << "Analysis result is written to '"
391  << json_file_name.value() << "'" << messaget::eom;
392 
393  json_out << json_result << '\n';
394  }
395 
396  return false;
397  }
398  catch(const char *error_msg)
399  {
400  log.error() << error_msg << messaget::eom;
401  return true;
402  }
403  catch(const std::string &error_msg)
404  {
405  log.error() << error_msg << messaget::eom;
406  return true;
407  }
408  catch(...)
409  {
410  log.error() << "Caught unexpected error in taint_analysist::operator()"
411  << messaget::eom;
412  return true;
413  }
414 }
415 
417  goto_modelt &goto_model,
418  const std::string &taint_file_name,
419  message_handlert &message_handler,
420  bool show_full,
421  const optionalt<std::string> &json_file_name)
422 {
423  taint_analysist taint_analysis(message_handler);
424  return taint_analysis(
425  taint_file_name,
426  goto_model.symbol_table,
427  goto_model.goto_functions,
428  show_full,
429  json_file_name);
430 }
messaget
Class that provides messages with a built-in verbosity 'level'.
Definition: message.h:155
dstringt
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
json_numbert
Definition: json.h:291
taint_parse_treet::rules
rulest rules
Definition: taint_parser.h:62
taint_analysist::log
messaget log
Definition: taint_analysis.cpp:45
symbol_tablet
The symbol table.
Definition: symbol_table.h:20
goto_programt::instructiont::source_location
source_locationt source_location
The location of the instruction in the source file.
Definition: goto_program.h:269
class_hierarchyt
Non-graph-based representation of the class hierarchy.
Definition: class_hierarchy.h:43
taint_analysist::class_hierarchy
class_hierarchyt class_hierarchy
Definition: taint_analysis.cpp:47
taint_parse_treet::rulet::RETURN_VALUE
@ RETURN_VALUE
Definition: taint_parser.h:29
symbolt::display_name
const irep_idt & display_name() const
Return language specific display name if present.
Definition: symbol.h:55
messaget::status
mstreamt & status() const
Definition: message.h:414
taint_analysist::instrument
void instrument(const namespacet &, goto_functionst &)
Definition: taint_analysis.cpp:53
custom_bitvector_analysis.h
Field-insensitive, location-sensitive bitvector analysis.
taint_parser.h
Taint Parser.
taint_parse_treet
Definition: taint_parser.h:23
taint_analysist
Definition: taint_analysis.cpp:30
prefix.h
taint_analysis.h
Taint Analysis.
invariant.h
goto_programt::make_end_function
static instructiont make_end_function(const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:957
goto_programt::add
targett add(instructiont &&instruction)
Adds a given instruction at the end.
Definition: goto_program.h:686
string_constant.h
goto_programt::empty
bool empty() const
Is the program empty?
Definition: goto_program.h:762
exprt
Base class for all expressions.
Definition: expr.h:53
goto_modelt
Definition: goto_model.h:26
custom_bitvector_analysist::eval
exprt eval(const exprt &src, locationt loc)
Definition: custom_bitvector_analysis.h:159
bool_typet
The Boolean type.
Definition: std_types.h:37
messaget::eom
static eomt eom
Definition: message.h:297
goto_functionst::function_map
function_mapt function_map
Definition: goto_functions.h:27
string_constantt
Definition: string_constant.h:16
taint_parse_treet::rulet::SANITIZER
@ SANITIZER
Definition: taint_parser.h:28
goto_programt::make_goto
static instructiont make_goto(targett _target, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:998
taint_analysist::taint
taint_parse_treet taint
Definition: taint_analysis.cpp:46
json_arrayt
Definition: json.h:165
json_objectt
Definition: json.h:300
namespacet
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:92
goto_programt::make_function_call
static instructiont make_function_call(const code_function_callt &_code, const source_locationt &l=source_locationt::nil())
Create a function call instruction.
Definition: goto_program.h:1049
namespacet::lookup
bool lookup(const irep_idt &name, const symbolt *&symbol) const override
See documentation for namespace_baset::lookup().
Definition: namespace.cpp:140
code_function_callt
codet representation of a function call statement.
Definition: std_code.h:1183
to_code_type
const code_typet & to_code_type(const typet &type)
Cast a typet to a code_typet.
Definition: std_types.h:946
goto_programt::make_assertion
static instructiont make_assertion(const exprt &g, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:893
messaget::error
mstreamt & error() const
Definition: message.h:399
DATA_INVARIANT
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition: invariant.h:511
id2string
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
binary_predicate_exprt
A base class for expressions that are predicates, i.e., Boolean-typed, and that take exactly two argu...
Definition: std_expr.h:694
symbol_exprt::get_identifier
const irep_idt & get_identifier() const
Definition: std_expr.h:111
nil_exprt
The NIL expression.
Definition: std_expr.h:3973
json
static void json(json_objectT &result, const irep_idt &property_id, const property_infot &property_info)
Definition: properties.cpp:114
symbolt::symbol_expr
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:122
goto_programt::instructiont::get_function_call
const code_function_callt & get_function_call() const
Get the function call for FUNCTION_CALL.
Definition: goto_program.h:241
taint_parse_treet::rulet::PARAMETER
@ PARAMETER
Definition: taint_parser.h:29
exprt::op1
exprt & op1()
Definition: expr.h:105
simplify_expr
exprt simplify_expr(exprt src, const namespacet &ns)
Definition: simplify_expr.cpp:2698
custom_bitvector_analysist
Definition: custom_bitvector_analysis.h:152
to_symbol_expr
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:177
code_typet
Base type of functions.
Definition: std_types.h:736
message_handlert
Definition: message.h:28
class_hierarchyt::idst
std::vector< irep_idt > idst
Definition: class_hierarchy.h:45
taint_analysist::operator()
bool operator()(const std::string &taint_file_name, const symbol_tablet &, goto_functionst &, bool show_full, const optionalt< std::string > &json_file_name)
Definition: taint_analysis.cpp:222
taint_parser
bool taint_parser(const std::string &file_name, taint_parse_treet &dest, message_handlert &message_handler)
Definition: taint_parser.cpp:20
dstringt::empty
bool empty() const
Definition: dstring.h:88
taint_parse_treet::rulet::SINK
@ SINK
Definition: taint_parser.h:28
code_typet::parameters
const parameterst & parameters() const
Definition: std_types.h:857
code_function_callt::arguments
argumentst & arguments()
Definition: std_code.h:1228
optionalt
nonstd::optional< T > optionalt
Definition: optional.h:35
goto_programt::destructive_append
void destructive_append(goto_programt &p)
Appends the given program p to *this. p is destroyed.
Definition: goto_program.h:669
taint_parse_treet::rulet::THIS
@ THIS
Definition: taint_parser.h:29
simplify_expr.h
side_effect_expr_nondett
A side_effect_exprt that returns a non-deterministically chosen value.
Definition: std_code.h:1945
goto_functionst::goto_functiont
::goto_functiont goto_functiont
Definition: goto_functions.h:25
goto_programt::instructions
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:585
goto_functionst
A collection of goto functions.
Definition: goto_functions.h:23
custom_bitvector_domaint::has_get_must_or_may
static bool has_get_must_or_may(const exprt &)
Definition: custom_bitvector_analysis.cpp:687
messaget::get_message_handler
message_handlert & get_message_handler()
Definition: message.h:184
class_hierarchy.h
Class Hierarchy.
goto_modelt::goto_functions
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:33
symbolt::location
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
ai_baset::output
virtual void output(const namespacet &ns, const irep_idt &function_id, const goto_programt &goto_program, std::ostream &out) const
Output the abstract states for a single function.
Definition: ai.cpp:42
symbolt
Symbol table entry.
Definition: symbol.h:28
taint_analysis
bool taint_analysis(goto_modelt &goto_model, const std::string &taint_file_name, message_handlert &message_handler, bool show_full, const optionalt< std::string > &json_file_name)
Definition: taint_analysis.cpp:416
goto_programt::make_other
static instructiont make_other(const codet &_code, const source_locationt &l=source_locationt::nil())
Definition: goto_program.h:913
messaget::conditional_output
void conditional_output(mstreamt &mstream, const std::function< void(mstreamt &)> &output_generator) const
Generate output to message_stream using output_generator if the configured verbosity is at least as h...
Definition: message.cpp:138
goto_functionst::update
void update()
Definition: goto_functions.h:81
json.h
goto_programt
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
return_value_symbol
symbol_exprt return_value_symbol(const irep_idt &identifier, const namespacet &ns)
produces the symbol that is used to store the return value of the function with the given identifier
Definition: remove_returns.cpp:415
forall_goto_functions
#define forall_goto_functions(it, functions)
Definition: goto_functions.h:122
messaget::mstreamt
Definition: message.h:224
has_prefix
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
exprt::operands
operandst & operands()
Definition: expr.h:95
messaget::debug
mstreamt & debug() const
Definition: message.h:429
goto_functionst::entry_point
static irep_idt entry_point()
Get the identifier of the entry point to a goto model.
Definition: goto_functions.h:90
class_hierarchyt::get_parents_trans
idst get_parents_trans(const irep_idt &id) const
Definition: class_hierarchy.h:76
address_of_exprt
Operator to return the address of an object.
Definition: std_expr.h:2786
true_exprt
The Boolean constant true.
Definition: std_expr.h:3955
goto_programt::instructiont::is_function_call
bool is_function_call() const
Definition: goto_program.h:461
is_true
bool is_true(const literalt &l)
Definition: literal.h:198
taint_analysist::taint_analysist
taint_analysist(message_handlert &message_handler)
Definition: taint_analysis.cpp:32
goto_programt::instructiont
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:179
goto_modelt::symbol_table
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:30
json_arrayt::push_back
jsont & push_back(const jsont &json)
Definition: json.h:212
goto_programt::targett
instructionst::iterator targett
Definition: goto_program.h:579
code_function_callt::function
exprt & function()
Definition: std_code.h:1218
forall_goto_program_instructions
#define forall_goto_program_instructions(it, program)
Definition: goto_program.h:1196
dstringt::size
size_t size() const
Definition: dstring.h:104
taint_parse_treet::rulet::SOURCE
@ SOURCE
Definition: taint_parser.h:28
json_stringt
Definition: json.h:270
codet
Data structure for representing an arbitrary statement in a program.
Definition: std_code.h:35
not_exprt
Boolean negation.
Definition: std_expr.h:2843