1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/descriptions.py
4 # Textual descriptions of the various values and enums of DWARF
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from collections import defaultdict
11 from .constants import *
12 from .dwarf_expr import GenericExprVisitor
14 from ..common.utils import preserve_stream_pos, dwarf_assert
15 from ..common.py3compat import bytes2str
16 from .callframe import instruction_name, CIE, FDE
19 def set_global_machine_arch(machine_arch):
21 _MACHINE_ARCH = machine_arch
24 def describe_attr_value(attr, die, section_offset):
25 """ Given an attribute attr, return the textual representation of its
26 value, suitable for tools like readelf.
28 To cover all cases, this function needs some extra arguments:
30 die: the DIE this attribute was extracted from
31 section_offset: offset in the stream of the section the DIE belongs to
33 descr_func = _ATTR_DESCRIPTION_MAP[attr.form]
34 val_description = descr_func(attr, die, section_offset)
36 # For some attributes we can display further information
37 extra_info_func = _EXTRA_INFO_DESCRIPTION_MAP[attr.name]
38 extra_info = extra_info_func(attr, die, section_offset)
39 return str(val_description) + '\t' + extra_info
42 def describe_CFI_instructions(entry):
43 """ Given a CFI entry (CIE or FDE), return the textual description of its
46 def _assert_FDE_instruction(instr):
48 isinstance(entry, FDE),
49 'Unexpected instruction "%s" for a CIE' % instr)
51 def _full_reg_name(regnum):
52 return 'r%s (%s)' % (regnum, describe_reg_name(regnum))
54 if isinstance(entry, CIE):
58 pc = entry['initial_location']
61 for instr in entry.instructions:
62 name = instruction_name(instr.opcode)
64 if name in ('DW_CFA_offset',
65 'DW_CFA_offset_extended', 'DW_CFA_offset_extended_sf',
66 'DW_CFA_val_offset', 'DW_CFA_val_offset_sf'):
67 s += ' %s: %s at cfa%+d\n' % (
68 name, _full_reg_name(instr.args[0]),
69 instr.args[1] * cie['data_alignment_factor'])
70 elif name in ( 'DW_CFA_restore', 'DW_CFA_restore_extended',
71 'DW_CFA_undefined', 'DW_CFA_same_value',
72 'DW_CFA_def_cfa_register'):
73 s += ' %s: %s\n' % (name, _full_reg_name(instr.args[0]))
74 elif name == 'DW_CFA_register':
75 s += ' %s: %s in %s' % (
76 name, _full_reg_name(instr.args[0]),
77 _full_reg_name(instr.args[1]))
78 elif name == 'DW_CFA_set_loc':
80 s += ' %s: %08x\n' % (name, pc)
81 elif name in ( 'DW_CFA_advance_loc1', 'DW_CFA_advance_loc2',
82 'DW_CFA_advance_loc4', 'DW_CFA_advance_loc'):
83 _assert_FDE_instruction(instr)
84 factored_offset = instr.args[0] * cie['code_alignment_factor']
85 s += ' %s: %s to %08x\n' % (
86 name, factored_offset, factored_offset + pc)
88 elif name in ( 'DW_CFA_remember_state', 'DW_CFA_restore_state',
91 elif name == 'DW_CFA_def_cfa':
92 s += ' %s: %s ofs %s\n' % (
93 name, _full_reg_name(instr.args[0]), instr.args[1])
94 elif name == 'DW_CFA_def_cfa_sf':
95 s += ' %s: %s ofs %s\n' % (
96 name, _full_reg_name(instr.args[0]),
97 instr.args[1] * cie['data_alignment_factor'])
98 elif name == 'DW_CFA_def_cfa_offset':
99 s += ' %s: %s\n' % (name, instr.args[0])
100 elif name == 'DW_CFA_def_cfa_expression':
101 expr_dumper = ExprDumper(entry.structs)
102 expr_dumper.process_expr(instr.args[0])
103 s += ' %s: (%s)\n' % (name, expr_dumper.get_str())
104 elif name == 'DW_CFA_expression':
105 expr_dumper = ExprDumper(entry.structs)
106 expr_dumper.process_expr(instr.args[1])
107 s += ' %s: %s (%s)\n' % (
108 name, _full_reg_name(instr.args[0]), expr_dumper.get_str())
110 s += ' %s: <??>\n' % name
115 def describe_CFI_register_rule(rule):
116 s = _DESCR_CFI_REGISTER_RULE_TYPE[rule.type]
117 if rule.type in ('OFFSET', 'VAL_OFFSET'):
118 s += '%+d' % rule.arg
119 elif rule.type == 'REGISTER':
120 s += describe_reg_name(rule.arg)
124 def describe_CFI_CFA_rule(rule):
128 return '%s%+d' % (describe_reg_name(rule.reg), rule.offset)
131 def describe_DWARF_expr(expr, structs):
132 """ Textual description of a DWARF expression encoded in 'expr'.
133 structs should come from the entity encompassing the expression - it's
134 needed to be able to parse it correctly.
136 # Since this function can be called a lot, initializing a fresh new
137 # ExprDumper per call is expensive. So a rudimentary caching scheme is in
138 # place to create only one such dumper per instance of structs.
139 cache_key = id(structs)
140 if cache_key not in _DWARF_EXPR_DUMPER_CACHE:
141 _DWARF_EXPR_DUMPER_CACHE[cache_key] = \
143 dwarf_expr_dumper = _DWARF_EXPR_DUMPER_CACHE[cache_key]
144 dwarf_expr_dumper.clear()
145 dwarf_expr_dumper.process_expr(expr)
146 return '(' + dwarf_expr_dumper.get_str() + ')'
149 def describe_reg_name(regnum, machine_arch=None):
150 """ Provide a textual description for a register name, given its serial
151 number. The number is expected to be valid.
153 if machine_arch is None:
154 machine_arch = _MACHINE_ARCH
156 if machine_arch == 'x86':
157 return _REG_NAMES_x86[regnum]
158 elif machine_arch == 'x64':
159 return _REG_NAMES_x64[regnum]
163 #-------------------------------------------------------------------------------
165 # The machine architecture. Set globally via set_global_machine_arch
170 def _describe_attr_ref(attr, die, section_offset):
171 return '<0x%x>' % (attr.value + die.cu.cu_offset)
173 def _describe_attr_value_passthrough(attr, die, section_offset):
176 def _describe_attr_hex(attr, die, section_offset):
177 return '0x%x' % (attr.value)
179 def _describe_attr_hex_addr(attr, die, section_offset):
180 return '<0x%x>' % (attr.value)
182 def _describe_attr_split_64bit(attr, die, section_offset):
183 low_word = attr.value & 0xFFFFFFFF
184 high_word = (attr.value >> 32) & 0xFFFFFFFF
185 return '0x%x 0x%x' % (low_word, high_word)
187 def _describe_attr_strp(attr, die, section_offset):
188 return '(indirect string, offset: 0x%x): %s' % (
189 attr.raw_value, bytes2str(attr.value))
191 def _describe_attr_string(attr, die, section_offset):
192 return bytes2str(attr.value)
194 def _describe_attr_debool(attr, die, section_offset):
195 """ To be consistent with readelf, generate 1 for True flags, 0 for False
198 return '1' if attr.value else '0'
200 def _describe_attr_block(attr, die, section_offset):
201 s = '%s byte block: ' % len(attr.value)
202 s += ' '.join('%x' % item for item in attr.value)
206 _ATTR_DESCRIPTION_MAP = defaultdict(
207 lambda: _describe_attr_value_passthrough, # default_factory
209 DW_FORM_ref1=_describe_attr_ref,
210 DW_FORM_ref2=_describe_attr_ref,
211 DW_FORM_ref4=_describe_attr_ref,
212 DW_FORM_ref8=_describe_attr_split_64bit,
213 DW_FORM_ref_udata=_describe_attr_ref,
214 DW_FORM_ref_addr=_describe_attr_hex_addr,
215 DW_FORM_data4=_describe_attr_hex,
216 DW_FORM_data8=_describe_attr_split_64bit,
217 DW_FORM_addr=_describe_attr_hex,
218 DW_FORM_sec_offset=_describe_attr_hex,
219 DW_FORM_flag=_describe_attr_debool,
220 DW_FORM_data1=_describe_attr_value_passthrough,
221 DW_FORM_data2=_describe_attr_value_passthrough,
222 DW_FORM_sdata=_describe_attr_value_passthrough,
223 DW_FORM_udata=_describe_attr_value_passthrough,
224 DW_FORM_string=_describe_attr_string,
225 DW_FORM_strp=_describe_attr_strp,
226 DW_FORM_block1=_describe_attr_block,
227 DW_FORM_block2=_describe_attr_block,
228 DW_FORM_block4=_describe_attr_block,
229 DW_FORM_block=_describe_attr_block,
234 DW_INL_not_inlined: '(not inlined)',
235 DW_INL_inlined: '(inlined)',
236 DW_INL_declared_not_inlined: '(declared as inline but ignored)',
237 DW_INL_declared_inlined: '(declared as inline and inlined)',
241 DW_LANG_C89: '(ANSI C)',
242 DW_LANG_C: '(non-ANSI C)',
243 DW_LANG_Ada83: '(Ada)',
244 DW_LANG_C_plus_plus: '(C++)',
245 DW_LANG_Cobol74: '(Cobol 74)',
246 DW_LANG_Cobol85: '(Cobol 85)',
247 DW_LANG_Fortran77: '(FORTRAN 77)',
248 DW_LANG_Fortran90: '(Fortran 90)',
249 DW_LANG_Pascal83: '(ANSI Pascal)',
250 DW_LANG_Modula2: '(Modula 2)',
251 DW_LANG_Java: '(Java)',
252 DW_LANG_C99: '(ANSI C99)',
253 DW_LANG_Ada95: '(ADA 95)',
254 DW_LANG_Fortran95: '(Fortran 95)',
255 DW_LANG_PLI: '(PLI)',
256 DW_LANG_ObjC: '(Objective C)',
257 DW_LANG_ObjC_plus_plus: '(Objective C++)',
258 DW_LANG_UPC: '(Unified Parallel C)',
260 DW_LANG_Python: '(Python)',
261 DW_LANG_Mips_Assembler: '(MIPS assembler)',
262 DW_LANG_Upc: '(nified Parallel C)',
263 DW_LANG_HP_Bliss: '(HP Bliss)',
264 DW_LANG_HP_Basic91: '(HP Basic 91)',
265 DW_LANG_HP_Pascal91: '(HP Pascal 91)',
266 DW_LANG_HP_IMacro: '(HP IMacro)',
267 DW_LANG_HP_Assembler: '(HP assembler)',
271 DW_ATE_void: '(void)',
272 DW_ATE_address: '(machine address)',
273 DW_ATE_boolean: '(boolean)',
274 DW_ATE_complex_float: '(complex float)',
275 DW_ATE_float: '(float)',
276 DW_ATE_signed: '(signed)',
277 DW_ATE_signed_char: '(signed char)',
278 DW_ATE_unsigned: '(unsigned)',
279 DW_ATE_unsigned_char: '(unsigned char)',
280 DW_ATE_imaginary_float: '(imaginary float)',
281 DW_ATE_decimal_float: '(decimal float)',
282 DW_ATE_packed_decimal: '(packed_decimal)',
283 DW_ATE_numeric_string: '(numeric_string)',
284 DW_ATE_edited: '(edited)',
285 DW_ATE_signed_fixed: '(signed_fixed)',
286 DW_ATE_unsigned_fixed: '(unsigned_fixed)',
287 DW_ATE_HP_float80: '(HP_float80)',
288 DW_ATE_HP_complex_float80: '(HP_complex_float80)',
289 DW_ATE_HP_float128: '(HP_float128)',
290 DW_ATE_HP_complex_float128: '(HP_complex_float128)',
291 DW_ATE_HP_floathpintel: '(HP_floathpintel)',
292 DW_ATE_HP_imaginary_float80: '(HP_imaginary_float80)',
293 DW_ATE_HP_imaginary_float128: '(HP_imaginary_float128)',
297 DW_ACCESS_public: '(public)',
298 DW_ACCESS_protected: '(protected)',
299 DW_ACCESS_private: '(private)',
303 DW_VIS_local: '(local)',
304 DW_VIS_exported: '(exported)',
305 DW_VIS_qualified: '(qualified)',
308 _DESCR_DW_VIRTUALITY = {
309 DW_VIRTUALITY_none: '(none)',
310 DW_VIRTUALITY_virtual: '(virtual)',
311 DW_VIRTUALITY_pure_virtual: '(pure virtual)',
314 _DESCR_DW_ID_CASE = {
315 DW_ID_case_sensitive: '(case_sensitive)',
316 DW_ID_up_case: '(up_case)',
317 DW_ID_down_case: '(down_case)',
318 DW_ID_case_insensitive: '(case_insensitive)',
322 DW_CC_normal: '(normal)',
323 DW_CC_program: '(program)',
324 DW_CC_nocall: '(nocall)',
328 DW_ORD_row_major: '(row major)',
329 DW_ORD_col_major: '(column major)',
332 _DESCR_CFI_REGISTER_RULE_TYPE = dict(
339 VAL_EXPRESSION='vexp',
343 def _make_extra_mapper(mapping, default, default_interpolate_value=False):
344 """ Create a mapping function from attribute parameters to an extra
345 value that should be displayed.
347 def mapper(attr, die, section_offset):
348 if default_interpolate_value:
349 d = default % attr.value
352 return mapping.get(attr.value, d)
356 def _make_extra_string(s=''):
357 """ Create an extra function that just returns a constant string.
359 def extra(attr, die, section_offset):
364 _DWARF_EXPR_DUMPER_CACHE = {}
366 def _location_list_extra(attr, die, section_offset):
367 # According to section 2.6 of the DWARF spec v3, class loclistptr means
368 # a location list, and class block means a location expression.
370 if attr.form in ('DW_FORM_data4', 'DW_FORM_data8'):
371 return '(location list)'
373 return describe_DWARF_expr(attr.value, die.cu.structs)
376 def _import_extra(attr, die, section_offset):
377 # For DW_AT_import the value points to a DIE (that can be either in the
378 # current DIE's CU or in another CU, depending on the FORM). The extra
379 # information for it is the abbreviation number in this DIE and its tag.
380 if attr.form == 'DW_FORM_ref_addr':
381 # Absolute offset value
382 ref_die_offset = section_offset + attr.value
384 # Relative offset to the current DIE's CU
385 ref_die_offset = attr.value + die.cu.cu_offset
387 # Now find the CU this DIE belongs to (since we have to find its abbrev
388 # table). This is done by linearly scanning through all CUs, looking for
389 # one spanning an address space containing the referred DIE's offset.
390 for cu in die.dwarfinfo.iter_CUs():
391 if cu['unit_length'] + cu.cu_offset > ref_die_offset >= cu.cu_offset:
392 # Once we have the CU, we can actually parse this DIE from the
394 with preserve_stream_pos(die.stream):
395 ref_die = DIE(cu, die.stream, ref_die_offset)
396 #print '&&& ref_die', ref_die
397 return '[Abbrev Number: %s (%s)]' % (
398 ref_die.abbrev_code, ref_die.tag)
403 _EXTRA_INFO_DESCRIPTION_MAP = defaultdict(
404 lambda: _make_extra_string(''), # default_factory
406 DW_AT_inline=_make_extra_mapper(
407 _DESCR_DW_INL, '(Unknown inline attribute value: %x',
408 default_interpolate_value=True),
409 DW_AT_language=_make_extra_mapper(
410 _DESCR_DW_LANG, '(Unknown: %x)', default_interpolate_value=True),
411 DW_AT_encoding=_make_extra_mapper(_DESCR_DW_ATE, '(unknown type)'),
412 DW_AT_accessibility=_make_extra_mapper(
413 _DESCR_DW_ACCESS, '(unknown accessibility)'),
414 DW_AT_visibility=_make_extra_mapper(
415 _DESCR_DW_VIS, '(unknown visibility)'),
416 DW_AT_virtuality=_make_extra_mapper(
417 _DESCR_DW_VIRTUALITY, '(unknown virtuality)'),
418 DW_AT_identifier_case=_make_extra_mapper(
419 _DESCR_DW_ID_CASE, '(unknown case)'),
420 DW_AT_calling_convention=_make_extra_mapper(
421 _DESCR_DW_CC, '(unknown convention)'),
422 DW_AT_ordering=_make_extra_mapper(
423 _DESCR_DW_ORD, '(undefined)'),
424 DW_AT_frame_base=_location_list_extra,
425 DW_AT_location=_location_list_extra,
426 DW_AT_string_length=_location_list_extra,
427 DW_AT_return_addr=_location_list_extra,
428 DW_AT_data_member_location=_location_list_extra,
429 DW_AT_vtable_elem_location=_location_list_extra,
430 DW_AT_segment=_location_list_extra,
431 DW_AT_static_link=_location_list_extra,
432 DW_AT_use_location=_location_list_extra,
433 DW_AT_allocated=_location_list_extra,
434 DW_AT_associated=_location_list_extra,
435 DW_AT_data_location=_location_list_extra,
436 DW_AT_stride=_location_list_extra,
437 DW_AT_import=_import_extra,
440 # 8 in a line, for easier counting
442 'eax', 'ecx', 'edx', 'ebx', 'esp', 'ebp', 'esi', 'edi',
443 'eip', 'eflags', '<none>', 'st0', 'st1', 'st2', 'st3', 'st4',
444 'st5', 'st6', 'st7', '<none>', '<none>', 'xmm0', 'xmm1', 'xmm2',
445 'xmm3', 'xmm4', 'xmm5', 'xmm6', 'xmm7', 'mm0', 'mm1', 'mm2',
446 'mm3', 'mm4', 'mm5', 'mm6', 'mm7', 'fcw', 'fsw', 'mxcsr',
447 'es', 'cs', 'ss', 'ds', 'fs', 'gs', '<none>', '<none>', 'tr', 'ldtr'
451 'rax', 'rdx', 'rcx', 'rbx', 'rsi', 'rdi', 'rbp', 'rsp',
452 'r8', 'r9', 'r10', 'r11', 'r12', 'r13', 'r14', 'r15',
453 'rip', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5', 'xmm6',
454 'xmm7', 'xmm8', 'xmm9', 'xmm10', 'xmm11', 'xmm12', 'xmm13', 'xmm14',
455 'xmm15', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5', 'st6',
456 'st7', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6',
457 'mm7', 'rflags', 'es', 'cs', 'ss', 'ds', 'fs', 'gs',
458 '<none>', '<none>', 'fs.base', 'gs.base', '<none>', '<none>', 'tr', 'ldtr',
459 'mxcsr', 'fcw', 'fsw'
463 class ExprDumper(GenericExprVisitor):
464 """ A concrete visitor for DWARF expressions that dumps a textual
465 representation of the complete expression.
467 Usage: after creation, call process_expr, and then get_str for a
468 semicolon-delimited string representation of the decoded expression.
470 def __init__(self, structs):
471 super(ExprDumper, self).__init__(structs)
479 return '; '.join(self._str_parts)
481 def _init_lookups(self):
482 self._ops_with_decimal_arg = set([
483 'DW_OP_const1u', 'DW_OP_const1s', 'DW_OP_const2u', 'DW_OP_const2s',
484 'DW_OP_const4u', 'DW_OP_const4s', 'DW_OP_constu', 'DW_OP_consts',
485 'DW_OP_pick', 'DW_OP_plus_uconst', 'DW_OP_bra', 'DW_OP_skip',
486 'DW_OP_fbreg', 'DW_OP_piece', 'DW_OP_deref_size',
487 'DW_OP_xderef_size', 'DW_OP_regx',])
489 for n in range(0, 32):
490 self._ops_with_decimal_arg.add('DW_OP_breg%s' % n)
492 self._ops_with_two_decimal_args = set([
493 'DW_OP_const8u', 'DW_OP_const8s', 'DW_OP_bregx', 'DW_OP_bit_piece'])
495 self._ops_with_hex_arg = set(
496 ['DW_OP_addr', 'DW_OP_call2', 'DW_OP_call4', 'DW_OP_call_ref'])
498 def _after_visit(self, opcode, opcode_name, args):
499 self._str_parts.append(self._dump_to_string(opcode, opcode_name, args))
501 def _dump_to_string(self, opcode, opcode_name, args):
503 if opcode_name.startswith('DW_OP_reg'):
504 regnum = int(opcode_name[9:])
507 describe_reg_name(regnum, _MACHINE_ARCH))
510 elif opcode_name in self._ops_with_decimal_arg:
511 if opcode_name.startswith('DW_OP_breg'):
512 regnum = int(opcode_name[10:])
513 return '%s (%s): %s' % (
515 describe_reg_name(regnum, _MACHINE_ARCH),
517 elif opcode_name.endswith('regx'):
518 # applies to both regx and bregx
519 return '%s: %s (%s)' % (
522 describe_reg_name(args[0], _MACHINE_ARCH))
524 return '%s: %s' % (opcode_name, args[0])
525 elif opcode_name in self._ops_with_hex_arg:
526 return '%s: %x' % (opcode_name, args[0])
527 elif opcode_name in self._ops_with_two_decimal_args:
528 return '%s: %s %s' % (opcode_name, args[0], args[1])
530 return '<unknown %s>' % opcode_name