1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/callframe.py
4 # DWARF call frame information
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
10 from collections import namedtuple
11 from ..common.utils import (struct_parse, dwarf_assert, preserve_stream_pos)
12 from ..common.py3compat import iterkeys
13 from .structs import DWARFStructs
14 from .constants import *
17 class CallFrameInfo(object):
18 """ DWARF CFI (Call Frame Info)
21 A stream holding the .debug_frame section, and the size of the
25 The structs to be used as the base for parsing this section.
26 Eventually, each entry gets its own structs based on the initial
27 length field it starts with. The address_size, however, is taken
28 from base_structs. This appears to be a limitation of the DWARFv3
29 standard, fixed in v4 (where an address_size field exists for each
30 CFI. I had a discussion about this on dwarf-discuss that confirms
32 Currently for base_structs I simply use the elfclass of the
33 containing file, but more sophisticated methods are used by
34 libdwarf and others, such as guessing which CU contains which FDEs
35 (based on their address ranges) and taking the address_size from
38 def __init__(self, stream, size, base_structs):
41 self.base_structs = base_structs
44 # Map between an offset in the stream and the entry object found at this
45 # offset. Useful for assigning CIE to FDEs according to the CIE_pointer
46 # header field which contains a stream offset.
47 self._entry_cache = {}
49 def get_entries(self):
50 """ Get a list of entries that constitute this CFI. The list consists
51 of CIE or FDE objects, in the order of their appearance in the
54 if self.entries is None:
55 self.entries = self._parse_entries()
58 #-------------------------
60 def _parse_entries(self):
63 while offset < self.size:
64 entries.append(self._parse_entry_at(offset))
65 offset = self.stream.tell()
68 def _parse_entry_at(self, offset):
69 """ Parse an entry from self.stream starting with the given offset.
70 Return the entry object. self.stream will point right after the
73 if offset in self._entry_cache:
74 return self._entry_cache[offset]
76 entry_length = struct_parse(
77 self.base_structs.Dwarf_uint32(''), self.stream, offset)
78 dwarf_format = 64 if entry_length == 0xFFFFFFFF else 32
80 entry_structs = DWARFStructs(
81 little_endian=self.base_structs.little_endian,
82 dwarf_format=dwarf_format,
83 address_size=self.base_structs.address_size)
85 # Read the next field to see whether this is a CIE or FDE
86 CIE_id = struct_parse(
87 entry_structs.Dwarf_offset(''), self.stream)
90 (dwarf_format == 32 and CIE_id == 0xFFFFFFFF) or
91 CIE_id == 0xFFFFFFFFFFFFFFFF)
94 header_struct = entry_structs.Dwarf_CIE_header
96 header_struct = entry_structs.Dwarf_FDE_header
98 # Parse the header, which goes up to and including the
99 # return_address_register field
100 header = struct_parse(
101 header_struct, self.stream, offset)
103 # For convenience, compute the end offset for this entry
105 offset + header.length +
106 entry_structs.initial_length_field_size())
108 # At this point self.stream is at the start of the instruction list
110 instructions = self._parse_instructions(
111 entry_structs, self.stream.tell(), end_offset)
114 self._entry_cache[offset] = CIE(
115 header=header, instructions=instructions, offset=offset,
116 structs=entry_structs)
118 with preserve_stream_pos(self.stream):
119 cie = self._parse_entry_at(header['CIE_pointer'])
120 self._entry_cache[offset] = FDE(
121 header=header, instructions=instructions, offset=offset,
122 structs=entry_structs, cie=cie)
123 return self._entry_cache[offset]
125 def _parse_instructions(self, structs, offset, end_offset):
126 """ Parse a list of CFI instructions from self.stream, starting with
127 the offset and until (not including) end_offset.
128 Return a list of CallFrameInstruction objects.
131 while offset < end_offset:
132 opcode = struct_parse(structs.Dwarf_uint8(''), self.stream, offset)
135 primary = opcode & _PRIMARY_MASK
136 primary_arg = opcode & _PRIMARY_ARG_MASK
137 if primary == DW_CFA_advance_loc:
139 elif primary == DW_CFA_offset:
142 struct_parse(structs.Dwarf_uleb128(''), self.stream)]
143 elif primary == DW_CFA_restore:
145 # primary == 0 and real opcode is extended
146 elif opcode in (DW_CFA_nop, DW_CFA_remember_state,
147 DW_CFA_restore_state):
149 elif opcode == DW_CFA_set_loc:
151 struct_parse(structs.Dwarf_target_addr(''), self.stream)]
152 elif opcode == DW_CFA_advance_loc1:
153 args = [struct_parse(structs.Dwarf_uint8(''), self.stream)]
154 elif opcode == DW_CFA_advance_loc2:
155 args = [struct_parse(structs.Dwarf_uint16(''), self.stream)]
156 elif opcode == DW_CFA_advance_loc4:
157 args = [struct_parse(structs.Dwarf_uint32(''), self.stream)]
158 elif opcode in (DW_CFA_offset_extended, DW_CFA_register,
159 DW_CFA_def_cfa, DW_CFA_val_offset):
161 struct_parse(structs.Dwarf_uleb128(''), self.stream),
162 struct_parse(structs.Dwarf_uleb128(''), self.stream)]
163 elif opcode in (DW_CFA_restore_extended, DW_CFA_undefined,
164 DW_CFA_same_value, DW_CFA_def_cfa_register,
165 DW_CFA_def_cfa_offset):
166 args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)]
167 elif opcode == DW_CFA_def_cfa_offset_sf:
168 args = [struct_parse(structs.Dwarf_sleb128(''), self.stream)]
169 elif opcode == DW_CFA_def_cfa_expression:
170 args = [struct_parse(
171 structs.Dwarf_dw_form['DW_FORM_block'], self.stream)]
172 elif opcode in (DW_CFA_expression, DW_CFA_val_expression):
174 struct_parse(structs.Dwarf_uleb128(''), self.stream),
176 structs.Dwarf_dw_form['DW_FORM_block'], self.stream)]
177 elif opcode in (DW_CFA_offset_extended_sf,
178 DW_CFA_def_cfa_sf, DW_CFA_val_offset_sf):
180 struct_parse(structs.Dwarf_uleb128(''), self.stream),
181 struct_parse(structs.Dwarf_sleb128(''), self.stream)]
183 dwarf_assert(False, 'Unknown CFI opcode: 0x%x' % opcode)
185 instructions.append(CallFrameInstruction(opcode=opcode, args=args))
186 offset = self.stream.tell()
190 def instruction_name(opcode):
191 """ Given an opcode, return the instruction name.
193 primary = opcode & _PRIMARY_MASK
195 return _OPCODE_NAME_MAP[opcode]
197 return _OPCODE_NAME_MAP[primary]
200 class CallFrameInstruction(object):
201 """ An instruction in the CFI section. opcode is the instruction
202 opcode, numeric - as it appears in the section. args is a list of
203 arguments (including arguments embedded in the low bits of some
204 instructions, when applicable), decoded from the stream.
206 def __init__(self, opcode, args):
211 return '%s (0x%x): %s' % (
212 instruction_name(self.opcode), self.opcode, self.args)
215 class CFIEntry(object):
216 """ A common base class for CFI entries.
217 Contains a header and a list of instructions (CallFrameInstruction).
218 offset: the offset of this entry from the beginning of the section
219 cie: for FDEs, a CIE pointer is required
221 def __init__(self, header, structs, instructions, offset, cie=None):
223 self.structs = structs
224 self.instructions = instructions
227 self._decoded_table = None
229 def get_decoded(self):
230 """ Decode the CFI contained in this entry and return a
231 DecodedCallFrameTable object representing it. See the documentation
232 of that class to understand how to interpret the decoded table.
234 if self._decoded_table is None:
235 self._decoded_table = self._decode_CFI_table()
236 return self._decoded_table
238 def __getitem__(self, name):
239 """ Implement dict-like access to header entries
241 return self.header[name]
243 def _decode_CFI_table(self):
244 """ Decode the instructions contained in the given CFI entry and return
245 a DecodedCallFrameTable.
247 if isinstance(self, CIE):
248 # For a CIE, initialize cur_line to an "empty" line
250 cur_line = dict(pc=0, cfa=None)
253 # For a FDE, we need to decode the attached CIE first, because its
254 # decoded table is needed. Its "initial instructions" describe a
255 # line that serves as the base (first) line in the FDE's table.
257 cie_decoded_table = cie.get_decoded()
258 last_line_in_CIE = copy.copy(cie_decoded_table.table[-1])
259 cur_line = last_line_in_CIE
260 cur_line['pc'] = self['initial_location']
261 reg_order = copy.copy(cie_decoded_table.reg_order)
265 # Keeps a stack for the use of DW_CFA_{remember|restore}_state
269 def _add_to_order(regnum):
270 if regnum not in cur_line:
271 reg_order.append(regnum)
273 for instr in self.instructions:
274 # Throughout this loop, cur_line is the current line. Some
275 # instructions add it to the table, but most instructions just
276 # update it without adding it to the table.
278 name = instruction_name(instr.opcode)
280 if name == 'DW_CFA_set_loc':
281 table.append(copy.copy(cur_line))
282 cur_line['pc'] = instr.args[0]
283 elif name in ( 'DW_CFA_advance_loc1', 'DW_CFA_advance_loc2',
284 'DW_CFA_advance_loc4', 'DW_CFA_advance_loc'):
285 table.append(copy.copy(cur_line))
286 cur_line['pc'] += instr.args[0] * cie['code_alignment_factor']
287 elif name == 'DW_CFA_def_cfa':
288 cur_line['cfa'] = CFARule(
290 offset=instr.args[1])
291 elif name == 'DW_CFA_def_cfa_sf':
292 cur_line['cfa'] = CFARule(
294 offset=instr.args[1] * cie['code_alignment_factor'])
295 elif name == 'DW_CFA_def_cfa_register':
296 cur_line['cfa'] = CFARule(
298 offset=cur_line['cfa'].offset)
299 elif name == 'DW_CFA_def_cfa_offset':
300 cur_line['cfa'] = CFARule(
301 reg=cur_line['cfa'].reg,
302 offset=instr.args[0])
303 elif name == 'DW_CFA_def_cfa_expression':
304 cur_line['cfa'] = CFARule(expr=instr.args[0])
305 elif name == 'DW_CFA_undefined':
306 _add_to_order(instr.args[0])
307 cur_line[instr.args[0]] = RegisterRule(RegisterRule.UNDEFINED)
308 elif name == 'DW_CFA_same_value':
309 _add_to_order(instr.args[0])
310 cur_line[instr.args[0]] = RegisterRule(RegisterRule.SAME_VALUE)
311 elif name in ( 'DW_CFA_offset', 'DW_CFA_offset_extended',
312 'DW_CFA_offset_extended_sf'):
313 _add_to_order(instr.args[0])
314 cur_line[instr.args[0]] = RegisterRule(
316 instr.args[1] * cie['data_alignment_factor'])
317 elif name in ('DW_CFA_val_offset', 'DW_CFA_val_offset_sf'):
318 _add_to_order(instr.args[0])
319 cur_line[instr.args[0]] = RegisterRule(
320 RegisterRule.VAL_OFFSET,
321 instr.args[1] * cie['data_alignment_factor'])
322 elif name == 'DW_CFA_register':
323 _add_to_order(instr.args[0])
324 cur_line[instr.args[0]] = RegisterRule(
325 RegisterRule.REGISTER,
327 elif name == 'DW_CFA_expression':
328 _add_to_order(instr.args[0])
329 cur_line[instr.args[0]] = RegisterRule(
330 RegisterRule.EXPRESSION,
332 elif name == 'DW_CFA_val_expression':
333 _add_to_order(instr.args[0])
334 cur_line[instr.args[0]] = RegisterRule(
335 RegisterRule.VAL_EXPRESSION,
337 elif name in ('DW_CFA_restore', 'DW_CFA_restore_extended'):
338 _add_to_order(instr.args[0])
340 isinstance(self, FDE),
341 '%s instruction must be in a FDE' % name)
343 instr.args[0] in last_line_in_CIE,
344 '%s: can not find register in CIE')
345 cur_line[instr.args[0]] = last_line_in_CIE[instr.args[0]]
346 elif name == 'DW_CFA_remember_state':
347 line_stack.append(cur_line)
348 elif name == 'DW_CFA_restore_state':
349 cur_line = line_stack.pop()
351 # The current line is appended to the table after all instructions
352 # have ended, in any case (even if there were no instructions).
353 table.append(cur_line)
354 return DecodedCallFrameTable(table=table, reg_order=reg_order)
357 # A CIE and FDE have exactly the same functionality, except that a FDE has
358 # a pointer to its CIE. The functionality was wholly encapsulated in CFIEntry,
359 # so the CIE and FDE classes exists separately for identification (instead
360 # of having an explicit "entry_type" field in CFIEntry).
370 class RegisterRule(object):
371 """ Register rules are used to find registers in call frames. Each rule
372 consists of a type (enumeration following DWARFv3 section 6.4.1)
373 and an optional argument to augment the type.
375 UNDEFINED = 'UNDEFINED'
376 SAME_VALUE = 'SAME_VALUE'
378 VAL_OFFSET = 'VAL_OFFSET'
379 REGISTER = 'REGISTER'
380 EXPRESSION = 'EXPRESSION'
381 VAL_EXPRESSION = 'VAL_EXPRESSION'
382 ARCHITECTURAL = 'ARCHITECTURAL'
384 def __init__(self, type, arg=None):
389 return 'RegisterRule(%s, %s)' % (self.type, self.arg)
392 class CFARule(object):
393 """ A CFA rule is used to compute the CFA for each location. It either
394 consists of a register+offset, or a DWARF expression.
396 def __init__(self, reg=None, offset=None, expr=None):
402 return 'CFARule(reg=%s, offset=%s, expr=%s)' % (
403 self.reg, self.offset, self.expr)
406 # Represents the decoded CFI for an entry, which is just a large table,
407 # according to DWARFv3 section 6.4.1
409 # DecodedCallFrameTable is a simple named tuple to group together the table
410 # and the register appearance order.
414 # A list of dicts that represent "lines" in the decoded table. Each line has
415 # some special dict entries: 'pc' for the location/program counter (LOC),
416 # and 'cfa' for the CFARule to locate the CFA on that line.
417 # The other entries are keyed by register numbers with RegisterRule values,
418 # and describe the rules for these registers.
422 # A list of register numbers that are described in the table by the order of
425 DecodedCallFrameTable = namedtuple(
426 'DecodedCallFrameTable', 'table reg_order')
429 #---------------- PRIVATE ----------------#
431 _PRIMARY_MASK = 0b11000000
432 _PRIMARY_ARG_MASK = 0b00111111
434 # This dictionary is filled by automatically scanning the constants module
435 # for DW_CFA_* instructions, and mapping their values to names. Since all
436 # names were imported from constants with `import *`, we look in globals()
437 _OPCODE_NAME_MAP = {}
438 for name in list(iterkeys(globals())):
439 if name.startswith('DW_CFA'):
440 _OPCODE_NAME_MAP[globals()[name]] = name