cprover
irep_serialization.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: binary irep conversions with hashing
4 
5 Author: CM Wintersteiger
6 
7 Date: May 2007
8 
9 \*******************************************************************/
10 
13 
14 #include "irep_serialization.h"
15 
16 #include <sstream>
17 #include <iostream>
18 
19 #include "exception_utils.h"
20 #include "string_hash.h"
21 
23  std::ostream &out,
24  const irept &irep)
25 {
26  write_string_ref(out, irep.id());
27 
28  forall_irep(it, irep.get_sub())
29  {
30  out.put('S');
31  reference_convert(*it, out);
32  }
33 
35  {
36  out.put('N');
37  write_string_ref(out, it->first);
38  reference_convert(it->second, out);
39  }
40 
41  out.put(0); // terminator
42 }
43 
45 {
46  std::size_t id=read_gb_word(in);
47 
48  if(
49  id >= ireps_container.ireps_on_read.size() ||
50  !ireps_container.ireps_on_read[id].first)
51  {
52  irept irep = read_irep(in);
53 
54  if(id >= ireps_container.ireps_on_read.size())
55  ireps_container.ireps_on_read.resize(1 + id * 2, {false, get_nil_irep()});
56 
57  // guard against self-referencing ireps
58  if(ireps_container.ireps_on_read[id].first)
59  throw deserialization_exceptiont("irep id read twice.");
60 
61  ireps_container.ireps_on_read[id] = {true, std::move(irep)};
62  }
63 
64  return ireps_container.ireps_on_read[id].second;
65 }
66 
68 {
69  irep_idt id = read_string_ref(in);
70  irept::subt sub;
71  irept::named_subt named_sub;
72 
73  while(in.peek()=='S')
74  {
75  in.get();
76  sub.push_back(reference_convert(in));
77  }
78 
79 #ifdef NAMED_SUB_IS_FORWARD_LIST
80  irept::named_subt::iterator before = named_sub.before_begin();
81 #endif
82  while(in.peek()=='N')
83  {
84  in.get();
85  irep_idt id = read_string_ref(in);
86 #ifdef NAMED_SUB_IS_FORWARD_LIST
87  named_sub.emplace_after(before, id, reference_convert(in));
88  ++before;
89 #else
90  named_sub.emplace(id, reference_convert(in));
91 #endif
92  }
93 
94  while(in.peek()=='C')
95  {
96  in.get();
97  irep_idt id = read_string_ref(in);
98 #ifdef NAMED_SUB_IS_FORWARD_LIST
99  named_sub.emplace_after(before, id, reference_convert(in));
100  ++before;
101 #else
102  named_sub.emplace(id, reference_convert(in));
103 #endif
104  }
105 
106  if(in.get()!=0)
107  {
108  throw deserialization_exceptiont("irep not terminated");
109  }
110 
111  return irept(std::move(id), std::move(named_sub), std::move(sub));
112 }
113 
118  const irept &irep,
119  std::ostream &out)
120 {
121  std::size_t h=ireps_container.irep_full_hash_container.number(irep);
122 
123  const auto res = ireps_container.ireps_on_write.insert(
124  {h, ireps_container.ireps_on_write.size()});
125 
126  write_gb_word(out, res.first->second);
127  if(res.second)
128  write_irep(out, irep);
129 }
130 
135 void write_gb_word(std::ostream &out, std::size_t u)
136 {
137 
138  while(true)
139  {
140  unsigned char value=u&0x7f;
141  u>>=7;
142 
143  if(u==0)
144  {
145  out.put(value);
146  break;
147  }
148 
149  out.put(value | 0x80);
150  }
151 }
152 
156 std::size_t irep_serializationt::read_gb_word(std::istream &in)
157 {
158  std::size_t res=0;
159 
160  unsigned shift_distance=0;
161 
162  while(in.good())
163  {
164  if(shift_distance >= sizeof(res) * 8)
165  throw deserialization_exceptiont("input number too large");
166 
167  unsigned char ch=static_cast<unsigned char>(in.get());
168  res|=(size_t(ch&0x7f))<<shift_distance;
169  shift_distance+=7;
170  if((ch&0x80)==0)
171  break;
172  }
173 
174  if(!in.good())
175  throw deserialization_exceptiont("unexpected end of input stream");
176 
177  return res;
178 }
179 
183 void write_gb_string(std::ostream &out, const std::string &s)
184 {
185  for(std::string::const_iterator it=s.begin();
186  it!=s.end();
187  ++it)
188  {
189  if(*it==0 || *it=='\\')
190  out.put('\\'); // escape specials
191  out << *it;
192  }
193 
194  out.put(0);
195 }
196 
201 {
202  char c;
203  size_t length=0;
204 
205  while((c=static_cast<char>(in.get()))!=0)
206  {
207  if(length>=read_buffer.size())
208  read_buffer.resize(read_buffer.size()*2, 0);
209 
210  if(c=='\\') // escaped chars
211  read_buffer[length]=static_cast<char>(in.get());
212  else
213  read_buffer[length]=c;
214 
215  length++;
216  }
217 
218  return irep_idt(std::string(read_buffer.data(), length));
219 }
220 
225  std::ostream &out,
226  const irep_idt &s)
227 {
228  size_t id=irep_id_hash()(s);
229  if(id>=ireps_container.string_map.size())
230  ireps_container.string_map.resize(id+1, false);
231 
233  write_gb_word(out, id);
234  else
235  {
236  ireps_container.string_map[id]=true;
237  write_gb_word(out, id);
238  write_gb_string(out, id2string(s));
239  }
240 }
241 
246 {
247  std::size_t id=read_gb_word(in);
248 
249  if(id>=ireps_container.string_rev_map.size())
250  ireps_container.string_rev_map.resize(1+id*2,
251  std::pair<bool, irep_idt>(false, irep_idt()));
252 
253  if(!ireps_container.string_rev_map[id].first)
254  {
255  irep_idt s=read_gb_string(in);
257  std::pair<bool, irep_idt>(true, s);
258  }
259 
260  return ireps_container.string_rev_map[id].second;
261 }
exception_utils.h
dstringt
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
irep_serialization.h
binary irep conversions with hashing
irep_serializationt::read_gb_string
irep_idt read_gb_string(std::istream &)
reads a string from the stream
Definition: irep_serialization.cpp:200
irep_serializationt::read_buffer
std::vector< char > read_buffer
Definition: irep_serialization.h:77
deserialization_exceptiont
Thrown when failing to deserialize a value from some low level format, like JSON or raw bytes.
Definition: exception_utils.h:73
irep_serializationt::ireps_containert::ireps_on_read
ireps_on_readt ireps_on_read
Definition: irep_serialization.h:35
irep_serializationt::reference_convert
const irept & reference_convert(std::istream &)
Definition: irep_serialization.cpp:44
irep_serializationt::ireps_containert::string_map
string_mapt string_map
Definition: irep_serialization.h:42
irep_serializationt::read_gb_word
static std::size_t read_gb_word(std::istream &)
Interpret a stream of byte as a 7-bit encoded unsigned number.
Definition: irep_serialization.cpp:156
sharing_treet< irept, std::map< irep_namet, irept > >::named_subt
typename dt::named_subt named_subt
Definition: irep.h:183
irep_idt
dstringt irep_idt
Definition: irep.h:32
irep_serializationt::read_irep
irept read_irep(std::istream &)
Definition: irep_serialization.cpp:67
id2string
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
irept::get_named_sub
named_subt & get_named_sub()
Definition: irep.h:479
write_gb_string
void write_gb_string(std::ostream &out, const std::string &s)
outputs the string and then a zero byte.
Definition: irep_serialization.cpp:183
irep_serializationt::ireps_containert::ireps_on_write
ireps_on_writet ireps_on_write
Definition: irep_serialization.h:39
string_hash.h
string hashing
irep_serializationt::write_irep
void write_irep(std::ostream &, const irept &irep)
Definition: irep_serialization.cpp:22
forall_named_irep
#define forall_named_irep(it, irep)
Definition: irep.h:70
write_gb_word
void write_gb_word(std::ostream &out, std::size_t u)
Write 7 bits of u each time, least-significant byte first, until we have zero.
Definition: irep_serialization.cpp:135
irept::id
const irep_idt & id() const
Definition: irep.h:418
irep_serializationt::ireps_containert::irep_full_hash_container
irep_full_hash_containert irep_full_hash_container
Definition: irep_serialization.h:37
sharing_treet< irept, std::map< irep_namet, irept > >::subt
typename dt::subt subt
Definition: irep.h:182
irep_id_hash
dstring_hash irep_id_hash
Definition: irep.h:35
forall_irep
#define forall_irep(it, irep)
Definition: irep.h:62
irep_hash_container_baset::number
std::size_t number(const irept &irep)
Definition: irep_hash_container.cpp:17
irep_serializationt::ireps_containert::string_rev_map
string_rev_mapt string_rev_map
Definition: irep_serialization.h:45
irept::get_sub
subt & get_sub()
Definition: irep.h:477
irep_serializationt::read_string_ref
irep_idt read_string_ref(std::istream &)
Read a string reference from the stream.
Definition: irep_serialization.cpp:245
irept
There are a large number of kinds of tree structured or tree-like data in CPROVER.
Definition: irep.h:394
get_nil_irep
const irept & get_nil_irep()
Definition: irep.cpp:26
irep_serializationt::ireps_container
ireps_containert & ireps_container
Definition: irep_serialization.h:76
irep_serializationt::write_string_ref
void write_string_ref(std::ostream &, const irep_idt &)
Output a string and maintain a reference to it.
Definition: irep_serialization.cpp:224