cprover
string_constraint_generator_code_points.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints for Java functions dealing with
4  code points
5 
6 Author: Romain Brenguier, romain.brenguier@diffblue.com
7 
8 \*******************************************************************/
9 
12 
14 
20 std::pair<exprt, string_constraintst>
22  const array_string_exprt &res,
23  const exprt &code_point)
24 {
25  string_constraintst constraints;
26  const typet &char_type = res.content().type().subtype();
27  const typet &type = code_point.type();
28  PRECONDITION(type.id() == ID_signedbv);
29 
30  // We add axioms:
31  // a1 : code_point<0x010000 => |res|=1
32  // a2 : code_point>=0x010000 => |res|=2
33  // a3 : code_point<0x010000 => res[0]=code_point
34  // a4 : code_point>=0x010000 => res[0]=0xD800+(code_point-0x10000)/0x0400
35  // a5 : code_point>=0x010000 => res[1]=0xDC00+(code_point-0x10000)/0x0400
36  // For more explenations about this conversion, see:
37  // https://en.wikipedia.org/wiki/UTF-16
38 
39  exprt hex010000 = from_integer(0x010000, type);
40  exprt hexD800 = from_integer(0xD800, type);
41  exprt hexDC00 = from_integer(0xDC00, type);
42  exprt hex0400 = from_integer(0x0400, type);
43 
44  binary_relation_exprt small(code_point, ID_lt, hex010000);
46  constraints.existential.push_back(a1);
47 
48  implies_exprt a2(
50  constraints.existential.push_back(a2);
51 
52  typecast_exprt code_point_as_char(code_point, char_type);
53  implies_exprt a3(small, equal_exprt(res[0], code_point_as_char));
54  constraints.existential.push_back(a3);
55 
56  plus_exprt first_char(
57  hexD800, div_exprt(minus_exprt(code_point, hex010000), hex0400));
58  implies_exprt a4(
59  not_exprt(small),
60  equal_exprt(res[0], typecast_exprt(first_char, char_type)));
61  constraints.existential.push_back(a4);
62 
63  plus_exprt second_char(hexDC00, mod_exprt(code_point, hex0400));
64  implies_exprt a5(
65  not_exprt(small),
66  equal_exprt(res[1], typecast_exprt(second_char, char_type)));
67  constraints.existential.push_back(a5);
68 
69  return {from_integer(0, get_return_code_type()), constraints};
70 }
71 
78 static exprt is_high_surrogate(const exprt &chr)
79 {
80  return and_exprt(
81  binary_relation_exprt(chr, ID_ge, from_integer(0xD800, chr.type())),
82  binary_relation_exprt(chr, ID_le, from_integer(0xDBFF, chr.type())));
83 }
84 
91 static exprt is_low_surrogate(const exprt &chr)
92 {
93  return and_exprt(
94  binary_relation_exprt(chr, ID_ge, from_integer(0xDC00, chr.type())),
95  binary_relation_exprt(chr, ID_le, from_integer(0xDFFF, chr.type())));
96 }
97 
107 exprt pair_value(exprt char1, exprt char2, typet return_type)
108 {
109  exprt hex010000 = from_integer(0x010000, return_type);
110  exprt hex0800 = from_integer(0x0800, return_type);
111  exprt hex0400 = from_integer(0x0400, return_type);
112  mult_exprt m1(mod_exprt(char1, hex0800), hex0400);
113  mod_exprt m2(char2, hex0400);
114  return plus_exprt(hex010000, plus_exprt(m1, m2));
115 }
116 
121 std::pair<exprt, string_constraintst>
124 {
125  string_constraintst constraints;
126  const typet &return_type = f.type();
127  PRECONDITION(return_type.id() == ID_signedbv);
128  PRECONDITION(f.arguments().size() == 2);
130  const exprt &pos = f.arguments()[1];
131 
132  const symbol_exprt result = fresh_symbol("char", return_type);
133  const exprt index1 = from_integer(1, str.length_type());
134  const exprt &char1 = str[pos];
135  const exprt &char2 = str[plus_exprt(pos, index1)];
136  const typecast_exprt char1_as_int(char1, return_type);
137  const typecast_exprt char2_as_int(char2, return_type);
138  const exprt pair = pair_value(char1_as_int, char2_as_int, return_type);
139  const exprt is_low = is_low_surrogate(str[plus_exprt(pos, index1)]);
140  const and_exprt return_pair(is_high_surrogate(str[pos]), is_low);
141 
142  constraints.existential.push_back(
143  implies_exprt(return_pair, equal_exprt(result, pair)));
144  constraints.existential.push_back(
145  implies_exprt(not_exprt(return_pair), equal_exprt(result, char1_as_int)));
146  return {result, constraints};
147 }
148 
153 std::pair<exprt, string_constraintst>
156 {
158  PRECONDITION(args.size() == 2);
159  typet return_type = f.type();
160  PRECONDITION(return_type.id() == ID_signedbv);
161  symbol_exprt result = fresh_symbol("char", return_type);
163  string_constraintst constraints;
164 
165  const exprt &char1 = str[minus_exprt(
166  args[1], from_integer(2, array_pool.get_or_create_length(str).type()))];
167  const exprt &char2 = str[minus_exprt(
168  args[1], from_integer(1, array_pool.get_or_create_length(str).type()))];
169  const typecast_exprt char1_as_int(char1, return_type);
170  const typecast_exprt char2_as_int(char2, return_type);
171 
172  const exprt pair = pair_value(char1_as_int, char2_as_int, return_type);
173  const and_exprt return_pair(
174  is_high_surrogate(char1), is_low_surrogate(char2));
175 
176  constraints.existential.push_back(
177  implies_exprt(return_pair, equal_exprt(result, pair)));
178  constraints.existential.push_back(
179  implies_exprt(not_exprt(return_pair), equal_exprt(result, char2_as_int)));
180  return {result, constraints};
181 }
182 
188 std::pair<exprt, string_constraintst>
191 {
192  PRECONDITION(f.arguments().size() == 3);
193  string_constraintst constraints;
194  const exprt &begin = f.arguments()[1];
195  const exprt &end = f.arguments()[2];
196  const typet &return_type = f.type();
197  const symbol_exprt result = fresh_symbol("code_point_count", return_type);
198  const minus_exprt length(end, begin);
199  const div_exprt minimum(length, from_integer(2, length.type()));
200  constraints.existential.push_back(
201  binary_relation_exprt(result, ID_le, length));
202  constraints.existential.push_back(
203  binary_relation_exprt(result, ID_ge, minimum));
204 
205  return {result, constraints};
206 }
207 
214 std::pair<exprt, string_constraintst>
217 {
218  PRECONDITION(f.arguments().size() == 3);
219  string_constraintst constraints;
220  const exprt &index = f.arguments()[1];
221  const exprt &offset = f.arguments()[2];
222  const typet &return_type = f.type();
223  const symbol_exprt result = fresh_symbol("offset_by_code_point", return_type);
224 
225  const exprt minimum = plus_exprt(index, offset);
226  const exprt maximum = plus_exprt(minimum, offset);
227  constraints.existential.push_back(
228  binary_relation_exprt(result, ID_le, maximum));
229  constraints.existential.push_back(
230  binary_relation_exprt(result, ID_ge, minimum));
231 
232  return {result, constraints};
233 }
function_application_exprt::arguments
argumentst & arguments()
Definition: mathematical_expr.h:221
array_string_exprt::length_type
const typet & length_type() const
Definition: string_expr.h:69
typet::subtype
const typet & subtype() const
Definition: type.h:47
string_constraint_generatort::add_axioms_for_code_point_count
std::pair< exprt, string_constraintst > add_axioms_for_code_point_count(const function_application_exprt &f)
Add axioms corresponding the String.codePointCount java function.
Definition: string_constraint_generator_code_points.cpp:189
minimum
exprt minimum(const exprt &a, const exprt &b)
Definition: string_constraint_generator_main.cpp:395
pos
literalt pos(literalt a)
Definition: literal.h:194
typet
The type of an expression, extends irept.
Definition: type.h:29
string_constraint_generatort::fresh_symbol
symbol_generatort fresh_symbol
Definition: string_constraint_generator.h:56
and_exprt
Boolean AND.
Definition: std_expr.h:2137
array_poolt::get_or_create_length
exprt get_or_create_length(const array_string_exprt &s)
Get the length of an array_string_exprt from the array_pool.
Definition: array_pool.cpp:24
get_string_expr
array_string_exprt get_string_expr(array_poolt &array_pool, const exprt &expr)
Fetch the string_exprt corresponding to the given refined_string_exprt.
Definition: array_pool.cpp:196
plus_exprt
The plus expression Associativity is not specified.
Definition: std_expr.h:881
is_low_surrogate
static exprt is_low_surrogate(const exprt &chr)
the output is true when the character is a low surrogate for UTF-16 encoding, see https://en....
Definition: string_constraint_generator_code_points.cpp:91
exprt
Base class for all expressions.
Definition: expr.h:53
string_constraintst
Collection of constraints of different types: existential formulas, universal formulas,...
Definition: string_constraint_generator.h:36
array_string_exprt::content
exprt & content()
Definition: string_expr.h:74
div_exprt
Division.
Definition: std_expr.h:1031
symbol_exprt
Expression to hold a symbol (variable)
Definition: std_expr.h:82
equal_exprt
Equality.
Definition: std_expr.h:1190
function_application_exprt::argumentst
exprt::operandst argumentst
Definition: mathematical_expr.h:193
string_constraint_generatort::array_pool
array_poolt array_pool
Definition: string_constraint_generator.h:58
string_constraint_generatort::add_axioms_for_offset_by_code_point
std::pair< exprt, string_constraintst > add_axioms_for_offset_by_code_point(const function_application_exprt &f)
Add axioms corresponding the String.offsetByCodePointCount java function.
Definition: string_constraint_generator_code_points.cpp:215
exprt::type
typet & type()
Return the type of the expression.
Definition: expr.h:81
maximum
exprt maximum(const exprt &a, const exprt &b)
Definition: string_constraint_generator_main.cpp:400
is_high_surrogate
static exprt is_high_surrogate(const exprt &chr)
the output is true when the character is a high surrogate for UTF-16 encoding, see https://en....
Definition: string_constraint_generator_code_points.cpp:78
PRECONDITION
#define PRECONDITION(CONDITION)
Definition: invariant.h:464
function_application_exprt
Application of (mathematical) function.
Definition: mathematical_expr.h:191
mult_exprt
Binary multiplication Associativity is not specified.
Definition: std_expr.h:986
irept::id
const irep_idt & id() const
Definition: irep.h:418
get_return_code_type
signedbv_typet get_return_code_type()
Definition: string_constraint_generator_main.cpp:180
minus_exprt
Binary minus.
Definition: std_expr.h:940
char_type
bitvector_typet char_type()
Definition: c_types.cpp:114
from_integer
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
binary_relation_exprt
A base class for relations, i.e., binary predicates whose two operands have the same type.
Definition: std_expr.h:725
string_constraint_generatort::add_axioms_for_code_point_at
std::pair< exprt, string_constraintst > add_axioms_for_code_point_at(const function_application_exprt &f)
add axioms corresponding to the String.codePointAt java function
Definition: string_constraint_generator_code_points.cpp:122
implies_exprt
Boolean implication.
Definition: std_expr.h:2200
equal_to
equal_exprt equal_to(exprt lhs, exprt rhs)
Definition: string_expr.h:54
pair_value
exprt pair_value(exprt char1, exprt char2, typet return_type)
the output corresponds to the unicode character given by the pair of characters of inputs assuming it...
Definition: string_constraint_generator_code_points.cpp:107
typecast_exprt
Semantic type conversion.
Definition: std_expr.h:2013
string_constraint_generatort::add_axioms_for_code_point
std::pair< exprt, string_constraintst > add_axioms_for_code_point(const array_string_exprt &res, const exprt &code_point)
add axioms for the conversion of an integer representing a java code point to a utf-16 string
Definition: string_constraint_generator_code_points.cpp:21
string_constraint_generator.h
Generates string constraints to link results from string functions with their arguments.
string_constraint_generatort::add_axioms_for_code_point_before
std::pair< exprt, string_constraintst > add_axioms_for_code_point_before(const function_application_exprt &f)
add axioms corresponding to the String.codePointBefore java function
Definition: string_constraint_generator_code_points.cpp:154
array_string_exprt
Definition: string_expr.h:67
mod_exprt
Modulo.
Definition: std_expr.h:1100
string_constraintst::existential
std::vector< exprt > existential
Definition: string_constraint_generator.h:37
not_exprt
Boolean negation.
Definition: std_expr.h:2843