cprover
gdb_api.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: GDB Machine Interface API
4 
5 Author: Malte Mues <mail.mues@gmail.com>
6  Daniel Poetzl
7 
8 \*******************************************************************/
9 
14 
15 #include <cctype>
16 #include <cerrno>
17 #include <cstdio>
18 #include <cstring>
19 #include <regex>
20 
21 #include <iostream>
22 
23 #include "gdb_api.h"
24 
26 
27 #include <util/prefix.h>
28 #include <util/string2int.h>
29 #include <util/string_utils.h>
30 
31 #include <sys/wait.h>
32 
33 gdb_apit::gdb_apit(const std::vector<std::string> &args, const bool log)
34  : args(args), log(log), gdb_state(gdb_statet::NOT_CREATED)
35 {
36 }
37 
39 {
43 
45  return;
46 
47  write_to_gdb("-gdb-exit");
48  // we cannot use most_recent_line_has_tag() here as it checks the last line
49  // before the next `(gdb) \n` prompt in the output; however when gdb exits no
50  // next prompt is printed
52 
54 
55  fclose(command_stream);
56  fclose(response_stream);
57 
58  wait(NULL);
59 }
60 
61 size_t gdb_apit::query_malloc_size(const std::string &pointer_expr)
62 {
63  const auto maybe_address_string = get_value(pointer_expr);
64  CHECK_RETURN(maybe_address_string.has_value());
65 
66  if(allocated_memory.count(*maybe_address_string) == 0)
67  return 1;
68  else
69  return allocated_memory[*maybe_address_string];
70 }
71 
73 {
75 
76  command_log.clear();
77 
78  pid_t gdb_process;
79 
80  int pipe_input[2];
81  int pipe_output[2];
82 
83  if(pipe(pipe_input) == -1)
84  {
85  throw gdb_interaction_exceptiont("could not create pipe for stdin");
86  }
87 
88  if(pipe(pipe_output) == -1)
89  {
90  throw gdb_interaction_exceptiont("could not create pipe for stdout");
91  }
92 
93  gdb_process = fork();
94 
95  if(gdb_process == -1)
96  {
97  throw gdb_interaction_exceptiont("could not create gdb process");
98  }
99 
100  if(gdb_process == 0)
101  {
102  // child process
103  close(pipe_input[1]);
104  close(pipe_output[0]);
105 
106  dup2(pipe_input[0], STDIN_FILENO);
107  dup2(pipe_output[1], STDOUT_FILENO);
108  dup2(pipe_output[1], STDERR_FILENO);
109 
110  dprintf(pipe_output[1], "binary name: %s\n", args.front().c_str());
111 
112  std::vector<std::string> exec_cmd;
113  exec_cmd.reserve(args.size() + 3);
114  exec_cmd.push_back("gdb");
115  exec_cmd.push_back("--interpreter=mi");
116  exec_cmd.push_back("--args");
117  exec_cmd.insert(exec_cmd.end(), args.begin(), args.end());
118 
119  char **exec_cmd_ptr = static_cast<char **>(malloc(
120  sizeof(char *) * (exec_cmd.size() + 1)));
121  exec_cmd_ptr[exec_cmd.size()] = NULL;
122 
123  for(std::size_t i = 0; i < exec_cmd.size(); i++)
124  {
125  exec_cmd_ptr[i] = static_cast<char *>(malloc(
126  sizeof(char) * (exec_cmd[i].length() + 1)));
127  strcpy(exec_cmd_ptr[i], exec_cmd[i].c_str()); // NOLINT(runtime/printf)
128  }
129 
130  dprintf(pipe_output[1], "Loading gdb...\n");
131  execvp("gdb", exec_cmd_ptr);
132 
133  // Only reachable, if execvp failed
134  int errno_value = errno;
135  dprintf(pipe_output[1], "errno in child: %s\n", strerror(errno_value));
136  }
137  else
138  {
139  // parent process
141 
142  close(pipe_input[0]);
143  close(pipe_output[1]);
144 
145  // get stream for reading the gdb output
146  response_stream = fdopen(pipe_output[0], "r");
147 
148  // get stream for writing to gdb
149  command_stream = fdopen(pipe_input[1], "w");
150 
151  std::string line = read_most_recent_line();
152  CHECK_RETURN(
153  has_prefix(line, R"(~"done)") ||
154  has_prefix(line, R"(~"Reading)"));
155 
156  if(log)
157  {
158  // logs output to `gdb.txt` in the current directory, input is not logged
159  // hence we log it to `command_log`
160  write_to_gdb("-gdb-set logging on");
162  }
163 
164  write_to_gdb("-gdb-set max-value-size unlimited");
166  }
167 }
168 
169 void gdb_apit::write_to_gdb(const std::string &command)
170 {
171  PRECONDITION(!command.empty());
172  PRECONDITION(command.find('\n') == std::string::npos);
173 
174  std::string line(command);
175  line += '\n';
176 
177  if(log)
178  {
179  command_log.push_front(command);
180  }
181 
182  if(fputs(line.c_str(), command_stream) == EOF)
183  {
184  throw gdb_interaction_exceptiont("could not write a command to gdb");
185  }
186 
187  fflush(command_stream);
188 }
189 
191 {
192  PRECONDITION(log);
193  return command_log;
194 }
195 
197 {
198  std::string result;
199 
200  do
201  {
202  const size_t buf_size = 1024;
203  char buf[buf_size]; // NOLINT(runtime/arrays)
204 
205  const char *c = fgets(buf, buf_size, response_stream);
206 
207  if(c == NULL)
208  {
209  if(ferror(response_stream))
210  {
211  throw gdb_interaction_exceptiont("error reading from gdb");
212  }
213 
214  INVARIANT(
215  feof(response_stream),
216  "EOF must have been reached when the error indicator on the stream "
217  "is not set and fgets returned NULL");
218  INVARIANT(
219  result.empty() || result.back() != '\n',
220  "when EOF is reached then either no characters were read or the string"
221  " read does not end in a newline");
222 
223  return result;
224  }
225 
226  std::string chunk(buf);
227  INVARIANT(!chunk.empty(), "chunk cannot be empty when EOF was not reached");
228 
229  result += chunk;
230  } while(result.back() != '\n');
231 
232  return result;
233 }
234 
236 {
237  std::string line;
238  std::string output;
239 
240  do
241  {
242  output = line;
243  line = read_next_line();
244  } while(line != "(gdb) \n");
245 
246  return output;
247 }
248 
250 gdb_apit::get_most_recent_record(const std::string &tag, const bool must_exist)
251 {
252  std::string line = read_most_recent_line();
253  const bool b = has_prefix(line, tag);
254 
255  if(must_exist)
256  {
257  CHECK_RETURN(b);
258  }
259  else if(!b)
260  {
261  throw gdb_interaction_exceptiont("record does not exist");
262  }
263 
264  std::string record = strip_string(line.substr(line.find(',') + 1));
265 
266  return parse_gdb_output_record(record);
267 }
268 
269 bool gdb_apit::most_recent_line_has_tag(const std::string &tag)
270 {
271  const std::string line = read_most_recent_line();
272  return has_prefix(line, tag);
273 }
274 
275 void gdb_apit::run_gdb_from_core(const std::string &corefile)
276 {
278 
279  // there does not seem to be a gdb mi command to run from a core file
280  const std::string command = "core " + corefile;
281 
282  write_to_gdb(command);
284 
286 }
287 
289 {
290  // this is what the registers look like at the function call entry:
291  //
292  // reg. name hex. value dec. value
293  // 0: rax 0xffffffff 4294967295
294  // 1: rbx 0x20000000 536870912
295  // 2: rcx 0x591 1425
296  // 3: rdx 0x591 1425
297  // 4: rsi 0x1 1
298  // 5: rdi 0x591 1425
299  // ...
300  // rax will eventually contain the return value and
301  // rdi now stores the first (integer) argument
302  // in the machine interface they are referred to by numbers, hence:
303  write_to_gdb("-data-list-register-values d 5");
304  auto record = get_most_recent_record("^done", true);
305  auto allocated_size = safe_string2size_t(get_register_value(record));
306 
307  write_to_gdb("-exec-finish");
308  if(!most_recent_line_has_tag("*running"))
309  {
310  throw gdb_interaction_exceptiont("could not run program");
311  }
312  record = get_most_recent_record("*stopped");
313  auto frame_content = get_value_from_record(record, "frame");
314 
315  // the malloc breakpoint may be inside another malloc function
316  if(frame_content.find("func=\"malloc\"") != std::string::npos)
317  {
318  // so we need to finish the outer malloc as well
319  write_to_gdb("-exec-finish");
320  if(!most_recent_line_has_tag("*running"))
321  {
322  throw gdb_interaction_exceptiont("could not run program");
323  }
324  record = get_most_recent_record("*stopped");
325  }
326 
327  // now we can read the rax register to the the allocated memory address
328  write_to_gdb("-data-list-register-values x 0");
329  record = get_most_recent_record("^done", true);
330  allocated_memory[get_register_value(record)] = allocated_size;
331 }
332 
333 bool gdb_apit::run_gdb_to_breakpoint(const std::string &breakpoint)
334 {
336 
337  write_to_gdb("-break-insert " + malloc_name);
338  bool malloc_is_known = was_command_accepted();
339 
340  std::string command("-break-insert");
341  command += " " + breakpoint;
342 
343  write_to_gdb(command);
344  if(!was_command_accepted())
345  {
346  throw gdb_interaction_exceptiont("could not set breakpoint");
347  }
348 
349  write_to_gdb("-exec-run");
350 
351  if(!most_recent_line_has_tag("*running"))
352  {
353  throw gdb_interaction_exceptiont("could not run program");
354  }
355 
356  gdb_output_recordt record = get_most_recent_record("*stopped");
357 
358  // malloc function is known, i.e. present among the symbols
359  if(malloc_is_known)
360  {
361  // stop at every entry into malloc call
362  while(hit_malloc_breakpoint(record))
363  {
364  // and store the information about the allocated memory
366  write_to_gdb("-exec-continue");
367  if(!most_recent_line_has_tag("*running"))
368  {
369  throw gdb_interaction_exceptiont("could not run program");
370  }
371  record = get_most_recent_record("*stopped");
372  }
373 
374  write_to_gdb("-break-delete 1");
375  if(!was_command_accepted())
376  {
377  throw gdb_interaction_exceptiont("could not delete breakpoint at malloc");
378  }
379  }
380 
381  const auto it = record.find("reason");
382  CHECK_RETURN(it != record.end());
383 
384  const std::string &reason = it->second;
385 
386  if(reason == "breakpoint-hit")
387  {
389  return true;
390  }
391  else if(reason == "exited-normally")
392  {
393  return false;
394  }
395  else
396  {
398  "gdb stopped for unhandled reason `" + reason + "`");
399  }
400 
401  UNREACHABLE;
402 }
403 
404 std::string gdb_apit::eval_expr(const std::string &expr)
405 {
406  write_to_gdb("-var-create tmp * " + expr);
407 
408  if(!was_command_accepted())
409  {
411  "could not create variable for expression `" + expr + "`");
412  }
413 
414  write_to_gdb("-var-evaluate-expression tmp");
415  gdb_output_recordt record = get_most_recent_record("^done", true);
416 
417  write_to_gdb("-var-delete tmp");
419 
420  const auto it = record.find("value");
421  CHECK_RETURN(it != record.end());
422 
423  const std::string value = it->second;
424 
425  INVARIANT(
426  value.back() != '"' ||
427  (value.length() >= 2 && value[value.length() - 2] == '\\'),
428  "quotes should have been stripped off from value");
429  INVARIANT(value.back() != '\n', "value should not end in a newline");
430 
431  return value;
432 }
433 
435 {
437 
438  std::string value;
439  try
440  {
441  value = eval_expr(expr);
442  }
444  {
445  return pointer_valuet{};
446  }
447 
448  std::regex regex(
449  r_hex_addr + r_opt(' ' + r_id) + r_opt(' ' + r_or(r_char, r_string)));
450 
451  std::smatch result;
452  const bool b = regex_match(value, result, regex);
453  if(!b)
454  return pointer_valuet{};
455 
456  optionalt<std::string> opt_string;
457  const std::string string = result[4];
458 
459  if(!string.empty())
460  {
461  const std::size_t len = string.length();
462 
463  INVARIANT(
464  len >= 4,
465  "pointer-string should be: backslash, quotes, .., backslash, quotes");
466  INVARIANT(
467  string[0] == '\\',
468  "pointer-string should be: backslash, quotes, .., backslash, quotes");
469  INVARIANT(
470  string[1] == '"',
471  "pointer-string should be: backslash, quotes, .., backslash, quotes");
472  INVARIANT(
473  string[len - 2] == '\\',
474  "pointer-string should be: backslash, quotes, .., backslash, quotes");
475  INVARIANT(
476  string[len - 1] == '"',
477  "pointer-string should be: backslash, quotes, .., backslash, quotes");
478 
479  opt_string = string.substr(2, len - 4);
480  }
481 
482  return pointer_valuet(result[1], result[2], result[3], opt_string, true);
483 }
484 
486 {
488 
489  std::string value;
490  try
491  {
492  value = eval_expr(expr);
493  }
495  {
496  return {};
497  }
498 
499  // Get char value
500  {
501  // matches e.g. 99 'c' and extracts c
502  std::regex regex(R"([^ ]+ '([^']+)')");
503 
504  std::smatch result;
505  const bool b = regex_match(value, result, regex);
506 
507  if(b)
508  {
509  return std::string{result[1]};
510  }
511  }
512 
513  // return raw value
514  return value;
515 }
516 
519 {
520  PRECONDITION(s.back() != '\n');
521 
522  gdb_output_recordt result;
523 
524  std::size_t depth = 0;
525  std::string::size_type start = 0;
526 
527  const std::string::size_type n = s.length();
528 
529  for(std::string::size_type i = 0; i < n; i++)
530  {
531  const char c = s[i];
532 
533  if(c == '{' || c == '[')
534  {
535  depth++;
536  }
537  else if(c == '}' || c == ']')
538  {
539  depth--;
540  }
541 
542  if(depth == 0 && (c == ',' || i == n - 1))
543  {
544  const std::string item =
545  i == n - 1 ? s.substr(start) : s.substr(start, i - start);
546 
547  // Split on first `=`
548  std::string::size_type j = item.find('=');
549  CHECK_RETURN(j != std::string::npos);
550  CHECK_RETURN(j > 0);
551  CHECK_RETURN(j < s.length());
552 
553  const std::string key = strip_string(item.substr(0, j));
554  std::string value = strip_string(item.substr(j + 1));
555 
556  const char first = value.front();
557  const char last = value.back();
558 
559  INVARIANT(first == '"' || first == '{' || first == '[', "");
560  INVARIANT(first != '"' || last == '"', "");
561  INVARIANT(first != '{' || last == '}', "");
562  INVARIANT(first != '[' || last == ']', "");
563 
564  // Remove enclosing `"` for primitive values
565  if(first == '"')
566  {
567  value = value.substr(1, value.length() - 2);
568  }
569 
570  auto r = result.insert(std::make_pair(key, value));
571  CHECK_RETURN(r.second);
572 
573  start = i + 1;
574  }
575  }
576 
577  return result;
578 }
579 
581 {
582  return most_recent_line_has_tag("^done");
583 }
584 
586 {
587  bool was_accepted = was_command_accepted();
588  CHECK_RETURN(was_accepted);
589 }
590 
591 std::string gdb_apit::r_opt(const std::string &regex)
592 {
593  return R"((?:)" + regex + R"()?)";
594 }
595 
596 std::string
597 gdb_apit::r_or(const std::string &regex_left, const std::string &regex_right)
598 {
599  return R"((?:)" + regex_left + '|' + regex_right + R"())";
600 }
601 
603  const gdb_output_recordt &record,
604  const std::string &value_name)
605 {
606  const auto it = record.find(value_name);
607  CHECK_RETURN(it != record.end());
608  const auto value = it->second;
609 
610  INVARIANT(
611  value.back() != '"' ||
612  (value.length() >= 2 && value[value.length() - 2] == '\\'),
613  "quotes should have been stripped off from value");
614  INVARIANT(value.back() != '\n', "value should not end in a newline");
615 
616  return value;
617 }
618 
620 {
621  const auto it = stopped_record.find("reason");
622  CHECK_RETURN(it != stopped_record.end());
623 
624  if(it->second != "breakpoint-hit")
625  return false;
626 
627  return safe_string2size_t(get_value_from_record(stopped_record, "bkptno")) ==
628  1;
629 }
630 
632 {
633  // we expect the record of form:
634  // {[register-values]->[name=name_string, value=\"value_string\"],..}
635  auto record_value = get_value_from_record(record, "register-values");
636  std::string value_eq_quotes = "value=\"";
637  auto value_eq_quotes_size = value_eq_quotes.size();
638 
639  auto starting_pos = record_value.find(value_eq_quotes) + value_eq_quotes_size;
640  auto ending_pos = record_value.find('\"', starting_pos);
641  auto value_length = ending_pos - starting_pos;
642  return std::string{record_value, starting_pos, value_length};
643 }
UNREACHABLE
#define UNREACHABLE
This should be used to mark dead code.
Definition: invariant.h:504
gdb_apit::hit_malloc_breakpoint
bool hit_malloc_breakpoint(const gdb_output_recordt &stopped_record)
Check if the breakpoint we hit is inside a malloc.
Definition: gdb_api.cpp:619
gdb_apit::collect_malloc_calls
void collect_malloc_calls()
Intercepts the gdb-analysis at the malloc call-site to add the corresponding information into allocat...
Definition: gdb_api.cpp:288
gdb_apit::query_malloc_size
size_t query_malloc_size(const std::string &pointer_expr)
Get the exact allocated size for a pointer pointer_expr.
Definition: gdb_api.cpp:61
gdb_interaction_exceptiont
Definition: gdb_api.h:230
gdb_apit::pointer_valuet
Data associated with the value of a pointer, i.e.
Definition: gdb_api.h:78
gdb_apit::args
std::vector< std::string > args
Definition: gdb_api.h:144
CHECK_RETURN
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:496
string_utils.h
gdb_apit::gdb_statet::NOT_CREATED
@ NOT_CREATED
gdb_apit::commandst
std::forward_list< std::string > commandst
Definition: gdb_api.h:33
gdb_apit::most_recent_line_has_tag
bool most_recent_line_has_tag(const std::string &tag)
Definition: gdb_api.cpp:269
gdb_apit::get_value
optionalt< std::string > get_value(const std::string &expr)
Get the memory address pointed to by the given pointer expression.
Definition: gdb_api.cpp:485
gdb_apit::gdb_state
gdb_statet gdb_state
Definition: gdb_api.h:159
irept::find
const irept & find(const irep_namet &name) const
Definition: irep.cpp:103
prefix.h
goto_model.h
Symbol Table + CFG.
gdb_apit::gdb_apit
gdb_apit(const std::vector< std::string > &args, const bool log=false)
Create a gdb_apit object.
Definition: gdb_api.cpp:33
gdb_apit::command_stream
FILE * command_stream
Definition: gdb_api.h:147
gdb_apit::eval_expr
std::string eval_expr(const std::string &expr)
Definition: gdb_api.cpp:404
gdb_apit::parse_gdb_output_record
static gdb_output_recordt parse_gdb_output_record(const std::string &s)
Definition: gdb_api.cpp:518
safe_string2size_t
std::size_t safe_string2size_t(const std::string &str, int base)
Definition: string2int.cpp:26
gdb_apit::command_log
commandst command_log
Definition: gdb_api.h:150
gdb_api.h
Low-level interface to gdb.
string2int.h
gdb_apit::gdb_statet::STOPPED
@ STOPPED
strip_string
std::string strip_string(const std::string &s)
Remove all whitespace characters from either end of a string.
Definition: string_utils.cpp:22
gdb_apit::r_hex_addr
const std::string r_hex_addr
Definition: gdb_api.h:211
gdb_apit::response_stream
FILE * response_stream
Definition: gdb_api.h:146
PRECONDITION
#define PRECONDITION(CONDITION)
Definition: invariant.h:464
gdb_apit::get_most_recent_record
gdb_output_recordt get_most_recent_record(const std::string &tag, const bool must_exist=false)
Definition: gdb_api.cpp:250
gdb_apit::write_to_gdb
void write_to_gdb(const std::string &command)
Definition: gdb_api.cpp:169
gdb_apit::~gdb_apit
~gdb_apit()
Terminate the gdb process and close open streams (for reading from and writing to gdb)
Definition: gdb_api.cpp:38
gdb_apit::read_next_line
std::string read_next_line()
Definition: gdb_api.cpp:196
gdb_apit::get_command_log
const commandst & get_command_log()
Return the vector of commands that have been written to gdb so far.
Definition: gdb_api.cpp:190
gdb_apit::malloc_name
const std::string malloc_name
Definition: gdb_api.h:226
gdb_apit::allocated_memory
std::map< std::string, size_t > allocated_memory
track the allocated size for each malloc call maps hexadecimal address to the number of bytes
Definition: gdb_api.h:163
gdb_apit::r_id
const std::string r_id
Definition: gdb_api.h:215
gdb_apit::get_register_value
std::string get_register_value(const gdb_output_recordt &record)
Parse the record produced by listing register value.
Definition: gdb_api.cpp:631
gdb_apit::log
const bool log
Definition: gdb_api.h:149
optionalt
nonstd::optional< T > optionalt
Definition: optional.h:35
gdb_apit::r_opt
static std::string r_opt(const std::string &regex)
Definition: gdb_api.cpp:591
gdb_apit::was_command_accepted
bool was_command_accepted()
Definition: gdb_api.cpp:580
gdb_apit::get_memory
pointer_valuet get_memory(const std::string &expr)
Get the value of a pointer associated with expr.
Definition: gdb_api.cpp:434
gdb_apit::get_value_from_record
std::string get_value_from_record(const gdb_output_recordt &record, const std::string &value_name)
Locate and return the value for a given name.
Definition: gdb_api.cpp:602
gdb_apit::create_gdb_process
void create_gdb_process()
Create a new gdb process for analysing the binary indicated by the first element in args
Definition: gdb_api.cpp:72
gdb_apit::r_string
const std::string r_string
Definition: gdb_api.h:223
gdb_apit::check_command_accepted
void check_command_accepted()
Definition: gdb_api.cpp:585
gdb_apit::gdb_statet
gdb_statet
Definition: gdb_api.h:153
gdb_apit::run_gdb_to_breakpoint
bool run_gdb_to_breakpoint(const std::string &breakpoint)
Run gdb to the given breakpoint.
Definition: gdb_api.cpp:333
gdb_apit::run_gdb_from_core
void run_gdb_from_core(const std::string &corefile)
Run gdb with the given core file.
Definition: gdb_api.cpp:275
gdb_apit::r_or
static std::string r_or(const std::string &regex_left, const std::string &regex_right)
Definition: gdb_api.cpp:597
has_prefix
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
r
static int8_t r
Definition: irep_hash.h:59
gdb_apit::r_char
const std::string r_char
Definition: gdb_api.h:219
size_type
unsignedbv_typet size_type()
Definition: c_types.cpp:58
gdb_apit::gdb_statet::CREATED
@ CREATED
validation_modet::INVARIANT
@ INVARIANT
gdb_apit::read_most_recent_line
std::string read_most_recent_line()
Definition: gdb_api.cpp:235
gdb_apit::gdb_output_recordt
std::map< std::string, std::string > gdb_output_recordt
Definition: gdb_api.h:165