37 bool parse()
override;
71 error() <<
"invalid constant pool index (" << index <<
")" <<
eom;
106 void rbytecode(std::vector<instructiont> &);
118 for(std::size_t i=0; i<bytes; i++)
122 error() <<
"unexpected end of bytecode file" <<
eom;
129 template <
typename T>
133 std::is_unsigned<T>::value,
"T should be an unsigned integer");
134 const constexpr
size_t bytes =
sizeof(T);
136 for(
size_t i = 0; i < bytes; i++)
140 error() <<
"unexpected end of bytecode file" <<
eom;
146 return narrow_cast<T>(
result);
152 #define CONSTANT_Class 7
153 #define CONSTANT_Fieldref 9
154 #define CONSTANT_Methodref 10
155 #define CONSTANT_InterfaceMethodref 11
156 #define CONSTANT_String 8
157 #define CONSTANT_Integer 3
158 #define CONSTANT_Float 4
159 #define CONSTANT_Long 5
160 #define CONSTANT_Double 6
161 #define CONSTANT_NameAndType 12
162 #define CONSTANT_Utf8 1
163 #define CONSTANT_MethodHandle 15
164 #define CONSTANT_MethodType 16
165 #define CONSTANT_InvokeDynamic 18
167 #define VTYPE_INFO_TOP 0
168 #define VTYPE_INFO_INTEGER 1
169 #define VTYPE_INFO_FLOAT 2
170 #define VTYPE_INFO_LONG 3
171 #define VTYPE_INFO_DOUBLE 4
172 #define VTYPE_INFO_ITEM_NULL 5
173 #define VTYPE_INFO_UNINIT_THIS 6
174 #define VTYPE_INFO_OBJECT 7
175 #define VTYPE_INFO_UNINIT 8
284 "name_and_typeindex did not correspond to a name_and_type in the "
393 catch(
const std::string &
message)
408 #define ACC_PUBLIC 0x0001u
409 #define ACC_PRIVATE 0x0002u
410 #define ACC_PROTECTED 0x0004u
411 #define ACC_STATIC 0x0008u
412 #define ACC_FINAL 0x0010u
413 #define ACC_SYNCHRONIZED 0x0020u
414 #define ACC_BRIDGE 0x0040u
415 #define ACC_NATIVE 0x0100u
416 #define ACC_INTERFACE 0x0200u
417 #define ACC_ABSTRACT 0x0400u
418 #define ACC_STRICT 0x0800u
419 #define ACC_SYNTHETIC 0x1000u
420 #define ACC_ANNOTATION 0x2000u
421 #define ACC_ENUM 0x4000u
423 #define UNUSED_u2(x) \
425 const u2 x = read<u2>(); \
434 const u4 magic = read<u4>();
436 const u2 major_version = read<u2>();
438 if(magic!=0xCAFEBABE)
446 error() <<
"unexpected major version" <<
eom;
454 const u2 access_flags = read<u2>();
455 const u2 this_class = read<u2>();
456 const u2 super_class = read<u2>();
483 const u2 attributes_count = read<u2>();
485 for(std::size_t j=0; j<attributes_count; j++)
519 if(field.signature.has_value())
541 for(
const auto ¶meter_annotations : method.parameter_annotations)
544 if(method.signature.has_value())
559 for(
const auto &var : method.local_variable_table)
561 if(var.signature.has_value())
581 if(src.
id()==ID_code)
589 else if(src.
id() == ID_struct_tag)
597 else if(src.
id()==ID_struct)
603 else if(src.
id()==ID_pointer)
610 const std::vector<annotationt> &annotations)
612 for(
const auto &annotation : annotations)
615 for(
const auto &element_value_pair : annotation.element_value_pairs)
625 if(
const auto &symbol_expr = expr_try_dynamic_cast<symbol_exprt>(value))
627 const irep_idt &value_id = symbol_expr->get_identifier();
630 else if(
const auto &array_expr = expr_try_dynamic_cast<array_exprt>(value))
643 const u2 constant_pool_count = read<u2>();
644 if(constant_pool_count==0)
646 error() <<
"invalid constant_pool_count" <<
eom;
655 it->tag = read<u1>();
660 it->ref1 = read<u2>();
668 it->ref1 = read<u2>();
669 it->ref2 = read<u2>();
674 it->ref1 = read<u2>();
679 it->number = read<u4>();
684 it->number = read<u8>();
688 error() <<
"invalid double entry" <<
eom;
697 const u2 bytes = read<u2>();
707 it->ref1 = read<u1>();
708 it->ref2 = read<u2>();
712 error() <<
"unknown constant pool entry (" << it->tag <<
")"
722 [&](constant_poolt::value_type &entry) {
727 const std::string &s = id2string(pool_entry(entry.ref1).s);
728 entry.expr = type_exprt(java_classname(s));
732 case CONSTANT_Fieldref:
734 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
735 const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
736 const pool_entryt &class_entry = pool_entry(entry.ref1);
737 const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
738 typet type=type_entry(nameandtype_entry.ref2);
740 auto class_tag = java_classname(id2string(class_name_entry.s));
742 fieldref_exprt fieldref(type, name_entry.s, class_tag.get_identifier());
744 entry.expr = fieldref;
748 case CONSTANT_Methodref:
749 case CONSTANT_InterfaceMethodref:
751 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
752 const pool_entryt &name_entry=pool_entry(nameandtype_entry.ref1);
753 const pool_entryt &class_entry = pool_entry(entry.ref1);
754 const pool_entryt &class_name_entry=pool_entry(class_entry.ref1);
755 typet type=type_entry(nameandtype_entry.ref2);
757 auto class_tag = java_classname(id2string(class_name_entry.s));
759 irep_idt mangled_method_name =
760 id2string(name_entry.s) +
":" +
761 id2string(pool_entry(nameandtype_entry.ref2).s);
763 irep_idt class_id = class_tag.get_identifier();
765 entry.expr = class_method_descriptor_exprt{
766 type, mangled_method_name, class_id, name_entry.s};
770 case CONSTANT_String:
773 entry.expr = java_string_literal_exprt{pool_entry(entry.ref1).s};
777 case CONSTANT_Integer:
778 entry.expr = from_integer(entry.number, java_int_type());
783 ieee_floatt value(ieee_float_spect::single_precision());
784 value.unpack(entry.number);
785 entry.expr = value.to_expr();
790 entry.expr = from_integer(entry.number, java_long_type());
793 case CONSTANT_Double:
795 ieee_floatt value(ieee_float_spect::double_precision());
796 value.unpack(entry.number);
797 entry.expr = value.to_expr();
801 case CONSTANT_NameAndType:
803 entry.expr.id(
"nameandtype");
807 case CONSTANT_MethodHandle:
809 entry.expr.id(
"methodhandle");
813 case CONSTANT_MethodType:
815 entry.expr.id(
"methodtype");
819 case CONSTANT_InvokeDynamic:
821 entry.expr.id(
"invokedynamic");
822 const pool_entryt &nameandtype_entry = pool_entry(entry.ref2);
823 typet type=type_entry(nameandtype_entry.ref2);
824 type.set(ID_java_lambda_method_handle_index, entry.ref1);
825 entry.expr.type() = type;
834 const u2 interfaces_count = read<u2>();
836 for(std::size_t i=0; i<interfaces_count; i++)
838 constant(read<u2>()).type().get(ID_C_base_name));
843 const u2 fields_count = read<u2>();
845 for(std::size_t i=0; i<fields_count; i++)
849 const u2 access_flags = read<u2>();
850 const u2 name_index = read<u2>();
851 const u2 descriptor_index = read<u2>();
852 const u2 attributes_count = read<u2>();
863 const auto flags = (field.
is_public ? 1 : 0) +
866 DATA_INVARIANT(flags<=1,
"at most one of public, protected, private");
868 for(std::size_t j=0; j<attributes_count; j++)
884 const u4 code_length = read<u4>();
887 size_t bytecode_index=0;
889 for(address=0; address<code_length; address++)
891 bool wide_instruction=
false;
892 u4 start_of_instruction=address;
894 u1 bytecode = read<u1>();
898 wide_instruction=
true;
900 bytecode = read<u1>();
907 std::string(
"Unexpected wide instruction: ") +
911 instructions.emplace_back();
914 instruction.
address=start_of_instruction;
943 const s1 c = read<u1>();
951 const s2 offset = read<u2>();
954 instruction.
args.push_back(
962 const s4 offset = read<u4>();
965 instruction.
args.push_back(
975 const u2 v = read<u2>();
981 const u1 v = read<u1>();
993 const u2 v = read<u2>();
995 const s2 c = read<u2>();
1001 const u1 v = read<u1>();
1003 const s1 c = read<u1>();
1011 const u2 c = read<u2>();
1013 const u1 b1 = read<u1>();
1015 const u1 b2 = read<u1>();
1023 u4 base_offset=address;
1026 while(((address + 1u) & 3u) != 0)
1033 const s4 default_value = read<u4>();
1036 instruction.
args.push_back(
1041 const u4 npairs = read<u4>();
1044 for(std::size_t i=0; i<npairs; i++)
1046 const s4 match = read<u4>();
1047 const s4 offset = read<u4>();
1048 instruction.
args.push_back(
1052 instruction.
args.push_back(
1061 size_t base_offset=address;
1064 while(((address + 1u) & 3u) != 0)
1071 const s4 default_value = read<u4>();
1072 instruction.
args.push_back(
1077 const s4 low_value = read<u4>();
1081 const s4 high_value = read<u4>();
1085 for(
s4 i=low_value; i<=high_value; i++)
1087 s4 offset = read<u4>();
1091 instruction.
args.push_back(
1100 const u2 c = read<u2>();
1102 const u1 dimensions = read<u1>();
1103 instruction.
args.push_back(
1120 case T_INT: t.
id(ID_int);
break;
1131 const s2 s = read<u2>();
1138 throw "unknown JVM bytecode instruction";
1143 if(address!=code_length)
1145 error() <<
"bytecode length mismatch" <<
eom;
1152 const u2 attribute_name_index = read<u2>();
1153 const u4 attribute_length = read<u4>();
1157 if(attribute_name ==
"Code")
1167 const u2 exception_table_length = read<u2>();
1174 for(std::size_t e = 0; e < exception_table_length; e++)
1176 const u2 start_pc = read<u2>();
1177 const u2 end_pc = read<u2>();
1183 "The start_pc must be less than the end_pc as this is the range the "
1184 "exception is active");
1186 const u2 handler_pc = read<u2>();
1187 const u2 catch_type = read<u2>();
1197 u2 attributes_count = read<u2>();
1199 for(std::size_t j=0; j<attributes_count; j++)
1212 if(!instruction.source_location.get_line().empty())
1213 line_number = instruction.source_location.get_line();
1214 else if(!line_number.
empty())
1215 instruction.source_location.set_line(line_number);
1216 instruction.source_location.set_function(
1221 const auto it = std::find_if(
1225 return !instruction.source_location.get_line().empty();
1230 else if(attribute_name==
"Signature")
1232 const u2 signature_index = read<u2>();
1235 else if(attribute_name==
"RuntimeInvisibleAnnotations" ||
1236 attribute_name==
"RuntimeVisibleAnnotations")
1241 attribute_name ==
"RuntimeInvisibleParameterAnnotations" ||
1242 attribute_name ==
"RuntimeVisibleParameterAnnotations")
1244 const u1 parameter_count = read<u1>();
1252 for(
u2 param_no = 0; param_no < parameter_count; ++param_no)
1255 else if(attribute_name ==
"Exceptions")
1265 const u2 attribute_name_index = read<u2>();
1266 const u4 attribute_length = read<u4>();
1270 if(attribute_name==
"Signature")
1272 const u2 signature_index = read<u2>();
1275 else if(attribute_name==
"RuntimeInvisibleAnnotations" ||
1276 attribute_name==
"RuntimeVisibleAnnotations")
1286 const u2 attribute_name_index = read<u2>();
1287 const u4 attribute_length = read<u4>();
1291 if(attribute_name==
"LineNumberTable")
1293 std::map<unsigned, std::reference_wrapper<instructiont>> instruction_map;
1295 instruction_map.emplace(instruction.address, instruction);
1297 const u2 line_number_table_length = read<u2>();
1299 for(std::size_t i=0; i<line_number_table_length; i++)
1301 const u2 start_pc = read<u2>();
1302 const u2 line_number = read<u2>();
1305 auto it = instruction_map.find(start_pc);
1307 if(it!=instruction_map.end())
1308 it->second.get().source_location.set_line(line_number);
1311 else if(attribute_name==
"LocalVariableTable")
1313 const u2 local_variable_table_length = read<u2>();
1317 for(std::size_t i=0; i<local_variable_table_length; i++)
1319 const u2 start_pc = read<u2>();
1320 const u2 length = read<u2>();
1321 const u2 name_index = read<u2>();
1322 const u2 descriptor_index = read<u2>();
1323 const u2 index = read<u2>();
1333 else if(attribute_name==
"LocalVariableTypeTable")
1337 else if(attribute_name==
"StackMapTable")
1339 const u2 stack_map_entries = read<u2>();
1343 for(
size_t i=0; i<stack_map_entries; i++)
1345 const u1 frame_type = read<u1>();
1352 else if(64<=frame_type && frame_type<=127)
1362 else if(frame_type==247)
1369 const u2 offset_delta = read<u2>();
1374 else if(248<=frame_type && frame_type<=250)
1379 const u2 offset_delta = read<u2>();
1382 else if(frame_type==251)
1388 const u2 offset_delta = read<u2>();
1391 else if(252<=frame_type && frame_type<=254)
1393 size_t new_locals = frame_type - 251;
1397 const u2 offset_delta = read<u2>();
1399 for(
size_t k=0; k<new_locals; k++)
1408 else if(frame_type==255)
1411 const u2 offset_delta = read<u2>();
1413 const u2 number_locals = read<u2>();
1415 for(
size_t k=0; k<(size_t) number_locals; k++)
1423 const u2 number_stack_items = read<u2>();
1425 for(
size_t k=0; k<(size_t) number_stack_items; k++)
1435 throw "error: unknown stack frame type encountered";
1445 const u1 tag = read<u1>();
1478 throw "error: unknown verification type info encountered";
1483 std::vector<annotationt> &annotations)
1485 const u2 num_annotations = read<u2>();
1487 for(
u2 number=0; number<num_annotations; number++)
1491 annotations.push_back(annotation);
1498 const u2 type_index = read<u2>();
1506 const u2 num_element_value_pairs = read<u2>();
1507 element_value_pairs.resize(num_element_value_pairs);
1509 for(
auto &element_value_pair : element_value_pairs)
1511 const u2 element_name_index = read<u2>();
1512 element_value_pair.element_name=
pool_entry(element_name_index).
s;
1525 const u1 tag = read<u1>();
1539 const u2 class_info_index = read<u2>();
1554 const u2 num_values = read<u2>();
1556 values.reserve(num_values);
1557 for(std::size_t i=0; i<num_values; i++)
1566 const u2 const_value_index = read<u2>();
1572 const u2 const_value_index = read<u2>();
1573 return constant(const_value_index);
1590 const u4 &attribute_length)
1593 std::string name = parsed_class.
name.
c_str();
1594 const u2 number_of_classes = read<u2>();
1595 const u4 number_of_bytes_to_be_read = number_of_classes * 8 + 2;
1597 number_of_bytes_to_be_read == attribute_length,
1598 "The number of bytes to be read for the InnerClasses attribute does not "
1599 "match the attribute length.");
1601 const auto pool_entry_lambda = [
this](
u2 index) ->
pool_entryt & {
1604 const auto remove_separator_char = [](std::string str,
char ch) {
1605 str.erase(std::remove(str.begin(), str.end(), ch), str.end());
1609 for(
int i = 0; i < number_of_classes; i++)
1611 const u2 inner_class_info_index = read<u2>();
1612 const u2 outer_class_info_index = read<u2>();
1613 const u2 inner_name_index = read<u2>();
1614 const u2 inner_class_access_flags = read<u2>();
1616 std::string inner_class_info_name =
1619 bool is_private = (inner_class_access_flags &
ACC_PRIVATE) != 0;
1620 bool is_public = (inner_class_access_flags &
ACC_PUBLIC) != 0;
1621 bool is_protected = (inner_class_access_flags &
ACC_PROTECTED) != 0;
1622 bool is_static = (inner_class_access_flags &
ACC_STATIC) != 0;
1627 bool is_inner_class = remove_separator_char(
id2string(parsed_class.
name),
'.') ==
1628 remove_separator_char(inner_class_info_name,
'/');
1634 if(inner_name_index == 0)
1637 parsed_class.
inner_name = pool_entry_lambda(inner_name_index).s;
1640 if(outer_class_info_index == 0)
1648 std::string outer_class_info_name =
1667 const u2 number_of_exceptions = read<u2>();
1669 std::vector<irep_idt> exceptions;
1670 for(
size_t i = 0; i < number_of_exceptions; i++)
1672 const u2 exception_index_table = read<u2>();
1675 exceptions.push_back(exception_name);
1684 const u2 attribute_name_index = read<u2>();
1685 const u4 attribute_length = read<u4>();
1689 if(attribute_name==
"SourceFile")
1691 const u2 sourcefile_index = read<u2>();
1695 size_t last_index = fqn.find_last_of(
'.');
1696 if(last_index==std::string::npos)
1700 std::string package_name=fqn.substr(0, last_index+1);
1701 std::replace(package_name.begin(), package_name.end(),
'.',
'/');
1702 const std::string &full_file_name=
1704 sourcefile_name=full_file_name;
1707 for(
auto &method : parsed_class.
methods)
1709 method.source_location.set_file(sourcefile_name);
1710 for(
auto &instruction : method.instructions)
1712 if(!instruction.source_location.get_line().empty())
1713 instruction.source_location.set_file(sourcefile_name);
1717 else if(attribute_name==
"Signature")
1719 const u2 signature_index = read<u2>();
1725 else if(attribute_name==
"RuntimeInvisibleAnnotations" ||
1726 attribute_name==
"RuntimeVisibleAnnotations")
1730 else if(attribute_name ==
"BootstrapMethods")
1736 "only one BootstrapMethods argument is allowed in a class file");
1742 else if(attribute_name ==
"InnerClasses")
1752 const u2 methods_count = read<u2>();
1754 for(std::size_t j=0; j<methods_count; j++)
1758 #define ACC_PUBLIC 0x0001u
1759 #define ACC_PRIVATE 0x0002u
1760 #define ACC_PROTECTED 0x0004u
1761 #define ACC_STATIC 0x0008u
1762 #define ACC_FINAL 0x0010u
1763 #define ACC_VARARGS 0x0080u
1764 #define ACC_SUPER 0x0020u
1765 #define ACC_VOLATILE 0x0040u
1766 #define ACC_TRANSIENT 0x0080u
1767 #define ACC_INTERFACE 0x0200u
1768 #define ACC_ABSTRACT 0x0400u
1769 #define ACC_SYNTHETIC 0x1000u
1770 #define ACC_ANNOTATION 0x2000u
1771 #define ACC_ENUM 0x4000u
1777 const u2 access_flags = read<u2>();
1778 const u2 name_index = read<u2>();
1779 const u2 descriptor_index = read<u2>();
1796 const auto flags = (method.
is_public ? 1 : 0) +
1799 DATA_INVARIANT(flags<=1,
"at most one of public, protected, private");
1800 const u2 attributes_count = read<u2>();
1802 for(std::size_t j=0; j<attributes_count; j++)
1807 std::istream &istream,
1810 bool skip_instructions)
1813 java_bytecode_parser.
in=&istream;
1816 bool parser_result=java_bytecode_parser.
parse();
1825 return std::move(java_bytecode_parser.
parse_tree);
1829 const std::string &
file,
1832 bool skip_instructions)
1842 in, class_name, message_handler, skip_instructions);
1850 const u2 local_variable_type_table_length = read<u2>();
1854 "Local variable type table cannot have more elements "
1855 "than the local variable table.");
1856 for(std::size_t i=0; i<local_variable_type_table_length; i++)
1858 const u2 start_pc = read<u2>();
1859 const u2 length = read<u2>();
1860 const u2 name_index = read<u2>();
1861 const u2 signature_index = read<u2>();
1862 const u2 index = read<u2>();
1868 if(lvar.index==index &&
1870 lvar.start_pc==start_pc &&
1871 lvar.length==length)
1880 "Entry in LocalVariableTypeTable must be present in LVT");
1893 switch(java_handle_kind)
1956 std::string descriptor = name_and_type.
get_descriptor(pool_entry_lambda);
1964 method_type, mangled_method_name, class_name, method_name};
1973 const u2 num_bootstrap_methods = read<u2>();
1974 for(
size_t bootstrap_method_index = 0;
1975 bootstrap_method_index < num_bootstrap_methods;
1976 ++bootstrap_method_index)
1978 const u2 bootstrap_methodhandle_ref = read<u2>();
1983 const u2 num_bootstrap_arguments = read<u2>();
1984 debug() <<
"INFO: parse BootstrapMethod handle " << num_bootstrap_arguments
1988 std::vector<u2> u2_values(num_bootstrap_arguments);
1989 for(
size_t i = 0; i < num_bootstrap_arguments; i++)
1990 u2_values[i] = read<u2>();
2022 if(num_bootstrap_arguments < 3)
2026 <<
"format of BootstrapMethods entry not recognized: too few arguments"
2031 u2 interface_type_index = u2_values[0];
2032 u2 method_handle_index = u2_values[1];
2033 u2 method_type_index = u2_values[2];
2039 bool recognized =
true;
2040 for(
size_t i = 3; i < num_bootstrap_arguments; i++)
2042 u2 skipped_argument = u2_values[i];
2048 debug() <<
"format of BootstrapMethods entry not recognized: extra "
2049 "arguments of wrong type"
2065 debug() <<
"format of BootstrapMethods entry not recognized: arguments "
2072 debug() <<
"INFO: parse lambda handle" <<
eom;
2078 debug() <<
"format of BootstrapMethods entry not recognized: method "
2079 "handle not recognised"
2089 debug() <<
"lambda function reference "
2091 .base_method_name())
2093 <<
"\n interface type is "
2095 <<
"\n method type is "
2106 size_t bootstrap_method_index)