1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from ..construct import (
11 UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
12 SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
13 Adapter, Struct, ConstructError, If, RepeatUntil, Field, Rename, Enum,
14 Array, PrefixedArray, CString, Embed,
16 from ..common.construct_utils import RepeatUntilExcluding
21 class DWARFStructs(object):
22 """ Exposes Construct structs suitable for parsing information from DWARF
23 sections. Each compile unit in DWARF info can have its own structs
24 object. Keep in mind that these structs have to be given a name (by
25 calling them with a name) before being used for parsing (like other
26 Construct structs). Those that should be used without a name are marked
29 Accessible attributes (mostly as described in chapter 7 of the DWARF
32 Dwarf_[u]int{8,16,32,64):
33 Data chunks of the common sizes
36 32-bit or 64-bit word, depending on dwarf_format
39 32-bit or 64-bit word, depending on address size
42 "Initial length field" encoding
46 ULEB128 and SLEB128 variable-length encoding
49 Compilation unit header
51 Dwarf_abbrev_declaration (+):
52 Abbreviation table declaration - doesn't include the initial
53 code, only the contents.
56 A dictionary mapping 'DW_FORM_*' keys into construct Structs
57 that parse such forms. These Structs have already been given
60 Dwarf_lineprog_header (+):
63 Dwarf_lineprog_file_entry (+):
64 A single file entry in a line program header or instruction
72 See also the documentation of public methods.
74 def __init__(self, little_endian, dwarf_format, address_size):
76 True if the file is little endian, False if big
79 DWARF Format: 32 or 64-bit (see spec section 7.4)
82 Target machine address size, in bytes (4 or 8). (See spec
85 assert dwarf_format == 32 or dwarf_format == 64
86 assert address_size == 8 or address_size == 4
87 self.little_endian = little_endian
88 self.dwarf_format = dwarf_format
89 self.address_size = address_size
90 self._create_structs()
92 def initial_length_field_size(self):
93 """ Size of an initial length field.
95 return 4 if self.dwarf_format == 32 else 12
97 def _create_structs(self):
98 if self.little_endian:
99 self.Dwarf_uint8 = ULInt8
100 self.Dwarf_uint16 = ULInt16
101 self.Dwarf_uint32 = ULInt32
102 self.Dwarf_uint64 = ULInt64
103 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
104 self.Dwarf_target_addr = (
105 ULInt32 if self.address_size == 4 else ULInt64)
106 self.Dwarf_int8 = SLInt8
107 self.Dwarf_int16 = SLInt16
108 self.Dwarf_int32 = SLInt32
109 self.Dwarf_int64 = SLInt64
111 self.Dwarf_uint8 = UBInt8
112 self.Dwarf_uint16 = UBInt16
113 self.Dwarf_uint32 = UBInt32
114 self.Dwarf_uint64 = UBInt64
115 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
116 self.Dwarf_target_addr = (
117 UBInt32 if self.address_size == 4 else UBInt64)
118 self.Dwarf_int8 = SBInt8
119 self.Dwarf_int16 = SBInt16
120 self.Dwarf_int32 = SBInt32
121 self.Dwarf_int64 = SBInt64
123 self._create_initial_length()
124 self._create_leb128()
125 self._create_cu_header()
126 self._create_abbrev_declaration()
127 self._create_dw_form()
128 self._create_lineprog_header()
129 self._create_callframe_entry_headers()
131 def _create_initial_length(self):
132 def _InitialLength(name):
133 # Adapts a Struct that parses forward a full initial length field.
134 # Only if the first word is the continuation value, the second
135 # word is parsed from the stream.
137 return _InitialLengthAdapter(
139 self.Dwarf_uint32('first'),
140 If(lambda ctx: ctx.first == 0xFFFFFFFF,
141 self.Dwarf_uint64('second'),
143 self.Dwarf_initial_length = _InitialLength
145 def _create_leb128(self):
146 self.Dwarf_uleb128 = _ULEB128
147 self.Dwarf_sleb128 = _SLEB128
149 def _create_cu_header(self):
150 self.Dwarf_CU_header = Struct('Dwarf_CU_header',
151 self.Dwarf_initial_length('unit_length'),
152 self.Dwarf_uint16('version'),
153 self.Dwarf_offset('debug_abbrev_offset'),
154 self.Dwarf_uint8('address_size'))
156 def _create_abbrev_declaration(self):
157 self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
158 Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
159 Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
160 RepeatUntilExcluding(
162 obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
164 Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
165 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM))))
167 def _create_dw_form(self):
168 self.Dwarf_dw_form = dict(
169 DW_FORM_addr=self.Dwarf_target_addr(''),
171 DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
172 DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
173 DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
174 DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
176 # All DW_FORM_data<n> forms are assumed to be unsigned
177 DW_FORM_data1=self.Dwarf_uint8(''),
178 DW_FORM_data2=self.Dwarf_uint16(''),
179 DW_FORM_data4=self.Dwarf_uint32(''),
180 DW_FORM_data8=self.Dwarf_uint64(''),
181 DW_FORM_sdata=self.Dwarf_sleb128(''),
182 DW_FORM_udata=self.Dwarf_uleb128(''),
184 DW_FORM_string=CString(''),
185 DW_FORM_strp=self.Dwarf_offset(''),
186 DW_FORM_flag=self.Dwarf_uint8(''),
188 DW_FORM_ref1=self.Dwarf_uint8(''),
189 DW_FORM_ref2=self.Dwarf_uint16(''),
190 DW_FORM_ref4=self.Dwarf_uint32(''),
191 DW_FORM_ref8=self.Dwarf_uint64(''),
192 DW_FORM_ref_udata=self.Dwarf_uleb128(''),
193 DW_FORM_ref_addr=self.Dwarf_offset(''),
195 DW_FORM_indirect=self.Dwarf_uleb128(''),
198 def _create_lineprog_header(self):
199 # A file entry is terminated by a NULL byte, so we don't want to parse
200 # past it. Therefore an If is used.
201 self.Dwarf_lineprog_file_entry = Struct('file_entry',
203 If(lambda ctx: len(ctx.name) != 0,
205 self.Dwarf_uleb128('dir_index'),
206 self.Dwarf_uleb128('mtime'),
207 self.Dwarf_uleb128('length')))))
209 self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
210 self.Dwarf_initial_length('unit_length'),
211 self.Dwarf_uint16('version'),
212 self.Dwarf_offset('header_length'),
213 self.Dwarf_uint8('minimum_instruction_length'),
214 self.Dwarf_uint8('default_is_stmt'),
215 self.Dwarf_int8('line_base'),
216 self.Dwarf_uint8('line_range'),
217 self.Dwarf_uint8('opcode_base'),
218 Array(lambda ctx: ctx['opcode_base'] - 1,
219 self.Dwarf_uint8('standard_opcode_lengths')),
220 RepeatUntilExcluding(
221 lambda obj, ctx: obj == b'',
222 CString('include_directory')),
223 RepeatUntilExcluding(
224 lambda obj, ctx: len(obj.name) == 0,
225 self.Dwarf_lineprog_file_entry),
228 def _create_callframe_entry_headers(self):
229 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
230 self.Dwarf_initial_length('length'),
231 self.Dwarf_offset('CIE_id'),
232 self.Dwarf_uint8('version'),
233 CString('augmentation'),
234 self.Dwarf_uleb128('code_alignment_factor'),
235 self.Dwarf_sleb128('data_alignment_factor'),
236 self.Dwarf_uleb128('return_address_register'))
238 self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
239 self.Dwarf_initial_length('length'),
240 self.Dwarf_offset('CIE_pointer'),
241 self.Dwarf_target_addr('initial_location'),
242 self.Dwarf_target_addr('address_range'))
244 def _make_block_struct(self, length_field):
245 """ Create a struct for DW_FORM_block<size>
247 return PrefixedArray(
248 subcon=self.Dwarf_uint8('elem'),
249 length_field=length_field(''))
252 class _InitialLengthAdapter(Adapter):
253 """ A standard Construct adapter that expects a sub-construct
254 as a struct with one or two values (first, second).
256 def _decode(self, obj, context):
257 if obj.first < 0xFFFFFF00:
260 if obj.first == 0xFFFFFFFF:
263 raise ConstructError("Failed decoding initial length for %X" % (
267 def _LEB128_reader():
268 """ Read LEB128 variable-length data from the stream. The data is terminated
269 by a byte with 0 in its highest bit.
272 lambda obj, ctx: ord(obj) < 0x80,
276 class _ULEB128Adapter(Adapter):
277 """ An adapter for ULEB128, given a sequence of bytes in a sub-construct.
279 def _decode(self, obj, context):
281 for b in reversed(obj):
282 value = (value << 7) + (ord(b) & 0x7F)
286 class _SLEB128Adapter(Adapter):
287 """ An adapter for SLEB128, given a sequence of bytes in a sub-construct.
289 def _decode(self, obj, context):
291 for b in reversed(obj):
292 value = (value << 7) + (ord(b) & 0x7F)
293 if ord(obj[-1]) & 0x40:
294 # negative -> sign extend
296 value |= - (1 << (7 * len(obj)))
301 """ A construct creator for ULEB128 encoding.
303 return Rename(name, _ULEB128Adapter(_LEB128_reader()))
307 """ A construct creator for SLEB128 encoding.
309 return Rename(name, _SLEB128Adapter(_LEB128_reader()))