cprover
string_utils.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Poetzl
6 
7 \*******************************************************************/
8 
9 #include "string_utils.h"
10 #include "exception_utils.h"
11 #include "invariant.h"
12 
13 #include <algorithm>
14 #include <cassert>
15 #include <cctype>
16 #include <iomanip>
17 
22 std::string strip_string(const std::string &s)
23 {
24  auto pred=[](char c){ return std::isspace(c); };
25 
26  std::string::const_iterator left
27  =std::find_if_not(s.begin(), s.end(), pred);
28  if(left==s.end())
29  return "";
30 
31  std::string::size_type i=std::distance(s.begin(), left);
32 
33  std::string::const_reverse_iterator right
34  =std::find_if_not(s.rbegin(), s.rend(), pred);
35  std::string::size_type j=std::distance(right, s.rend())-1;
36 
37  return s.substr(i, (j-i+1));
38 }
39 
41  const std::string &s,
42  char delim,
43  std::vector<std::string> &result,
44  bool strip,
45  bool remove_empty)
46 {
47  PRECONDITION(result.empty());
48  // delim can't be a space character if using strip
49  PRECONDITION(!std::isspace(delim) || !strip);
50 
51  if(s.empty())
52  {
53  if(!remove_empty)
54  result.push_back("");
55  return;
56  }
57 
58  std::string::size_type n=s.length();
59  INVARIANT(n > 0, "Empty string case should already be handled");
60 
61  std::string::size_type start=0;
63 
64  for(i=0; i<n; i++)
65  {
66  if(s[i]==delim)
67  {
68  std::string new_s=s.substr(start, i-start);
69 
70  if(strip)
71  new_s=strip_string(new_s);
72 
73  if(!remove_empty || !new_s.empty())
74  result.push_back(new_s);
75 
76  start=i+1;
77  }
78  }
79 
80  std::string new_s=s.substr(start, n-start);
81 
82  if(strip)
83  new_s=strip_string(new_s);
84 
85  if(!remove_empty || !new_s.empty())
86  result.push_back(new_s);
87 
88  if(!remove_empty && result.empty())
89  result.push_back("");
90 }
91 
93  const std::string &s,
94  char delim,
95  std::string &left,
96  std::string &right,
97  bool strip)
98 {
99  // delim can't be a space character if using strip
100  PRECONDITION(!std::isspace(delim) || !strip);
101 
102  std::vector<std::string> result = split_string(s, delim, strip);
103 
104  if(result.size() != 2)
105  {
106  throw deserialization_exceptiont{"expected string '" + s +
107  "' to contain two substrings "
108  "delimited by " +
109  delim + " but has " +
110  std::to_string(result.size())};
111  }
112 
113  left=result[0];
114  right=result[1];
115 }
116 
117 std::vector<std::string> split_string(
118  const std::string &s,
119  char delim,
120  bool strip,
121  bool remove_empty)
122 {
123  std::vector<std::string> result;
124  split_string(s, delim, result, strip, remove_empty);
125  return result;
126 }
127 
129  const std::string &s,
130  const char delim)
131 {
132  std::string result;
133  const size_t index=s.find_last_of(delim);
134  if(index!=std::string::npos)
135  result=s.substr(0, index);
136  return result;
137 }
138 
139 std::string escape(const std::string &s)
140 {
141  std::string result;
142 
143  for(std::size_t i=0; i<s.size(); i++)
144  {
145  if(s[i]=='\\' || s[i]=='"')
146  result+='\\';
147 
148  result+=s[i];
149  }
150 
151  return result;
152 }
153 
154 std::string escape_non_alnum(const std::string &to_escape)
155 {
156  std::ostringstream escaped;
157  for(auto &ch : to_escape)
158  {
159  // `ch` may have a negative value in the case of utf-8 encodings of
160  // characters above unicode code point 127. The following line maps these
161  // negative values to positive values in the 128-255 range, using a
162  // `static_cast`. This is neccessary in order to avoid undefined behaviour
163  // in `isalnum`. The positive values are then stored in an integer using a
164  // widening initialisation so that the stream insertion operator prints them
165  // as numbers rather than characters.
166  const int uch{static_cast<unsigned char>(ch)};
167  if(ch == '_')
168  escaped << "__";
169  else if(isalnum(uch))
170  escaped << ch;
171  else
172  escaped << '_' << std::hex << std::setfill('0') << std::setw(2) << uch;
173  }
174  return escaped.str();
175 }
176 std::string capitalize(const std::string &str)
177 {
178  if(str.empty())
179  return str;
180  std::string capitalized = str;
181  capitalized[0] = toupper(capitalized[0]);
182  return capitalized;
183 }
184 
185 std::string wrap_line(
186  const std::string &line,
187  const std::size_t left_margin,
188  const std::size_t width)
189 {
190  return wrap_line(line.cbegin(), line.cend(), left_margin, width);
191 }
192 
193 std::string wrap_line(
194  std::string::const_iterator left,
195  std::string::const_iterator right,
196  const std::size_t left_margin,
197  const std::size_t width)
198 {
199  PRECONDITION(left_margin < width);
200 
201  const std::size_t column_width = width - left_margin;
202  const std::string margin(left_margin, ' ');
203 
204  auto distance = std::distance(left, right);
205  CHECK_RETURN(distance > 0);
206 
207  std::string result;
208 
209  if(static_cast<std::size_t>(distance) <= column_width)
210  {
211  result.append(margin);
212  result.append(left, right);
213 
214  return result;
215  }
216 
217  auto it_line_begin = left;
218 
219  do
220  {
221  // points to the first character past the current column
222  auto it = it_line_begin + column_width;
223 
224  auto rit_r = std::reverse_iterator<decltype(it)>(it) - 1;
225  auto rit_l = rit_r + column_width;
226 
227  auto rit_space = std::find(rit_r, rit_l, ' ');
228 
229  if(rit_space != rit_l)
230  {
231  auto it_space = rit_space.base() - 1;
232  CHECK_RETURN(*it_space == ' ');
233 
234  result.append(margin);
235  result.append(it_line_begin, it_space);
236  result.append("\n");
237 
238  it_line_begin = it_space + 1;
239  }
240  else
241  {
242  // we have not found a space, thus cannot wrap this line
243  result.clear();
244  result.append(left, right);
245 
246  return result;
247  }
248  } while(static_cast<std::size_t>(std::distance(it_line_begin, right)) >
249  column_width);
250 
251  result.append(margin);
252  result.append(it_line_begin, right);
253 
254  return result;
255 }
exception_utils.h
wrap_line
std::string wrap_line(const std::string &line, const std::size_t left_margin, const std::size_t width)
Wrap line at spaces to not extend past the right margin, and include given padding with spaces to the...
Definition: string_utils.cpp:185
CHECK_RETURN
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:496
escape_non_alnum
std::string escape_non_alnum(const std::string &to_escape)
Replace non-alphanumeric characters with _xx escapes, where xx are hex digits.
Definition: string_utils.cpp:154
string_utils.h
deserialization_exceptiont
Thrown when failing to deserialize a value from some low level format, like JSON or raw bytes.
Definition: exception_utils.h:73
to_string
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
Definition: string_constraint.cpp:55
trim_from_last_delimiter
std::string trim_from_last_delimiter(const std::string &s, const char delim)
Definition: string_utils.cpp:128
split_string
void split_string(const std::string &s, char delim, std::vector< std::string > &result, bool strip, bool remove_empty)
Definition: string_utils.cpp:40
strip_string
std::string strip_string(const std::string &s)
Remove all whitespace characters from either end of a string.
Definition: string_utils.cpp:22
PRECONDITION
#define PRECONDITION(CONDITION)
Definition: invariant.h:464
invariant.h
INVARIANT
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition: invariant.h:424
size_type
unsignedbv_typet size_type()
Definition: c_types.cpp:58
escape
std::string escape(const std::string &s)
Generic escaping of strings; this is not meant to be a particular programming language.
Definition: string_utils.cpp:139
capitalize
std::string capitalize(const std::string &str)
Definition: string_utils.cpp:176