From 1d24041adc4750c7dde85345dd6f20e1d3fe5aca Mon Sep 17 00:00:00 2001 From: Tom Tromey Date: Thu, 31 Jan 2013 17:32:34 +0000 Subject: [PATCH] * lib/dwarf.exp (namespace Dwarf): New. --- gdb/testsuite/ChangeLog | 4 + gdb/testsuite/lib/dwarf.exp | 728 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 732 insertions(+) diff --git a/gdb/testsuite/ChangeLog b/gdb/testsuite/ChangeLog index ba5fa5d..3b53031 100644 --- a/gdb/testsuite/ChangeLog +++ b/gdb/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2013-01-31 Tom Tromey + + * lib/dwarf.exp (namespace Dwarf): New. + 2013-01-29 Tom Tromey * gdb.dwarf2/fission-reread.exp: Add unload test. diff --git a/gdb/testsuite/lib/dwarf.exp b/gdb/testsuite/lib/dwarf.exp index 9028714..28e8e26 100644 --- a/gdb/testsuite/lib/dwarf.exp +++ b/gdb/testsuite/lib/dwarf.exp @@ -28,3 +28,731 @@ proc dwarf2_support {} { return 0 } + +# A DWARF assembler. +# +# All the variables in this namespace are private to the +# implementation. Also, any procedure whose name starts with "_" is +# private as well. Do not use these. +# +# Exported functions are documented at their definition. +# +# In addition to the hand-written functions documented below, this +# module automatically generates a function for each DWARF tag. For +# most tags, two forms are made: a full name, and one with the +# "DW_TAG_" prefix stripped. For example, you can use either +# 'DW_TAG_compile_unit' or 'compile_unit' interchangeably. +# +# There are two exceptions to this rule: DW_TAG_variable and +# DW_TAG_namespace. For these, the full name must always be used, +# as the short name conflicts with Tcl builtins. (Should future +# versions of Tcl or DWARF add more conflicts, this list will grow. +# If you want to be safe you should always use the full names.) +# +# Each tag procedure is defined like: +# +# proc DW_TAG_mumble {{attrs {}} {children {}}} { ... } +# +# ATTRS is an optional list of attributes. +# It is run through 'subst' in the caller's context before processing. +# +# Each attribute in the list has one of two forms: +# 1. { NAME VALUE } +# 2. { NAME VALUE FORM } +# +# In each case, NAME is the attribute's name. +# This can either be the full name, like 'DW_AT_name', or a shortened +# name, like 'name'. These are fully equivalent. +# +# If FORM is given, it should name a DW_FORM_ constant. +# This can either be the short form, like 'DW_FORM_addr', or a +# shortened version, like 'addr'. If the form is given, VALUE +# is its value; see below. In some cases, additional processing +# is done; for example, DW_FORM_strp manages the .debug_str +# section automatically. +# +# If FORM is 'SPECIAL_expr', then VALUE is treated as a location +# expression. The effective form is then DW_FORM_block, and VALUE +# is passed to the (internal) '_location' proc to be translated. +# This proc implements a miniature DW_OP_ assembler. +# +# If FORM is not given, it is guessed: +# * If VALUE starts with the "@" character, the rest of VALUE is +# looked up as a DWARF constant, and DW_FORM_sdata is used. For +# example, '@DW_LANG_c89' could be used. +# * If VALUE starts with the ":" character, then it is a label +# reference. The rest of VALUE is taken to be the name of a label, +# and DW_FORM_ref4 is used. See 'new_label' and 'define_label'. +# * Otherwise, VALUE is taken to be a string and DW_FORM_string is +# used. +# More form-guessing functionality may be added. +# +# CHILDREN is just Tcl code that can be used to define child DIEs. It +# is evaluated in the caller's context. +# +# Currently this code is missing nice support for CFA handling, and +# probably other things as well. + +namespace eval Dwarf { + # True if the module has been initialized. + variable _initialized 0 + + # Constants from dwarf2.h. + variable _constants + # DW_AT short names. + variable _AT + # DW_FORM short names. + variable _FORM + # DW_OP short names. + variable _OP + + # The current output file. + variable _output_file + + # Current CU count. + variable _cu_count + + # The current CU's base label. + variable _cu_label + + # The current CU's version. + variable _cu_version + + # The current CU's address size. + variable _cu_addr_size + # The current CU's offset size. + variable _cu_offset_size + + # Label generation number. + variable _label_num + + # The deferred output array. The index is the section name; the + # contents hold the data for that section. + variable _deferred_output + + # If empty, we should write directly to the output file. + # Otherwise, this is the name of a section to write to. + variable _defer + + # The next available abbrev number in the current CU's abbrev + # table. + variable _abbrev_num + + # The string table for this assembly. The key is the string; the + # value is the label for that string. + variable _strings + + proc _process_one_constant {name value} { + variable _constants + variable _AT + variable _FORM + variable _OP + + set _constants($name) $value + + if {![regexp "^DW_(\[A-Z\]+)_(\[A-Za-z0-9_\]+)$" $name \ + ignore prefix name2]} { + error "non-matching name: $name" + } + + if {$name2 == "lo_user" || $name2 == "hi_user"} { + return + } + + # We only try to shorten some very common things. + # FIXME: CFA? + switch -exact -- $prefix { + TAG { + # Create two procedures for the tag. These call + # _handle_DW_TAG with the full tag name baked in; this + # does all the actual work. + proc $name {{attrs {}} {children {}}} \ + "_handle_DW_TAG $name \$attrs \$children" + + # Filter out ones that are known to clash. + if {$name2 == "variable" || $name2 == "namespace"} { + set name2 "tag_$name2" + } + + if {[info commands $name2] != {}} { + error "duplicate proc name: from $name" + } + + proc $name2 {{attrs {}} {children {}}} \ + "_handle_DW_TAG $name \$attrs \$children" + } + + AT { + set _AT($name2) $name + } + + FORM { + set _FORM($name2) $name + } + + OP { + set _OP($name2) $name + } + + default { + return + } + } + } + + proc _read_constants {} { + global srcdir hex decimal + variable _constants + + # DWARF name-matching regexp. + set dwrx "DW_\[a-zA-Z0-9_\]+" + # Whitespace regexp. + set ws "\[ \t\]+" + + set fd [open [file join $srcdir .. .. include dwarf2.h]] + while {![eof $fd]} { + set line [gets $fd] + if {[regexp -- "^${ws}($dwrx)${ws}=${ws}($hex|$decimal),?$" \ + $line ignore name value ignore2]} { + _process_one_constant $name $value + } + } + close $fd + + set fd [open [file join $srcdir .. .. include dwarf2.def]] + while {![eof $fd]} { + set line [gets $fd] + if {[regexp -- \ + "^DW_\[A-Z_\]+${ws}\\(($dwrx),${ws}($hex|$decimal)\\)$" \ + $line ignore name value ignore2]} { + _process_one_constant $name $value + } + } + close $fd + + set _constants(SPECIAL_expr) $_constants(DW_FORM_block) + } + + proc _quote {string} { + # FIXME + return "\"${string}\\0\"" + } + + proc _handle_DW_FORM {form value} { + switch -exact -- $form { + DW_FORM_string { + _op .ascii [_quote $value] + } + + DW_FORM_flag_present { + # We don't need to emit anything. + } + + DW_FORM_data4 - + DW_FORM_ref4 { + _op .4byte $value + } + + DW_FORM_ref_addr { + variable _cu_offset_size + variable _cu_version + variable _cu_addr_size + + if {$_cu_version == 2} { + set size $_cu_addr_size + } else { + set size $_cu_offset_size + } + + _op .${size}byte $value + } + + DW_FORM_ref1 - + DW_FORM_flag - + DW_FORM_data1 { + _op .byte $value + } + + DW_FORM_sdata { + _op .sleb128 $value + } + + DW_FORM_ref_udata - + DW_FORM_udata { + _op .uleb128 $value + } + + DW_FORM_addr { + variable _cu_addr_size + + _op .${_cu_addr_size}byte $value + } + + DW_FORM_data2 - + DW_FORM_ref2 { + _op .2byte $value + } + + DW_FORM_data8 - + DW_FORM_ref8 - + DW_FORM_ref_sig8 { + _op .8byte $value + } + + DW_FORM_strp { + variable _strings + variable _cu_offset_size + + if {![info exists _strings($value)]} { + set _strings($value) [new_label strp] + _defer_output .debug_string { + define_label $_strings($value) + _op .ascii [_quote $value] + } + } + + _op .${_cu_offset_size}byte $_strings($value) "strp: $value" + } + + SPECIAL_expr { + set l1 [new_label "expr_start"] + set l2 [new_label "expr_end"] + _op .uleb128 "$l2 - $l1" "expression" + define_label $l1 + _location $value + define_label $l2 + } + + DW_FORM_block2 - + DW_FORM_block4 - + + DW_FORM_block - + DW_FORM_block1 - + + DW_FORM_ref2 - + DW_FORM_indirect - + DW_FORM_sec_offset - + DW_FORM_exprloc - + + DW_FORM_GNU_addr_index - + DW_FORM_GNU_str_index - + DW_FORM_GNU_ref_alt - + DW_FORM_GNU_strp_alt - + + default { + error "unhandled form $form" + } + } + } + + proc _guess_form {value varname} { + upvar $varname new_value + + switch -exact -- [string range $value 0 0] { + @ { + # Constant reference. + variable _constants + + set new_value $_constants([string range $value 1 end]) + # Just the simplest. + return DW_FORM_sdata + } + + : { + # Label reference. + variable _cu_label + + set new_value "[string range $value 1 end] - $_cu_label" + + return DW_FORM_ref4 + } + + default { + return DW_FORM_string + } + } + } + + # Map NAME to its canonical form. + proc _map_name {name ary} { + variable $ary + + if {[info exists ${ary}($name)]} { + set name [set ${ary}($name)] + } + + return $name + } + + proc _handle_DW_TAG {tag_name {attrs {}} {children {}}} { + variable _abbrev_num + variable _constants + + set has_children [expr {[string length $children] > 0}] + set my_abbrev [incr _abbrev_num] + + # We somewhat wastefully emit a new abbrev entry for each tag. + # There's no reason for this other than laziness. + _defer_output .debug_abbrev { + _op .uleb128 $my_abbrev "Abbrev start" + _op .uleb128 $_constants($tag_name) $tag_name + _op .byte $has_children "has_children" + } + + _op .uleb128 $my_abbrev "Abbrev ($tag_name)" + + foreach attr $attrs { + set attr_name [_map_name [lindex $attr 0] _AT] + set attr_value [uplevel 2 [list subst [lindex $attr 1]]] + if {[llength $attr] > 2} { + set attr_form [lindex $attr 2] + } else { + set attr_form [_guess_form $attr_value attr_value] + } + set attr_form [_map_name $attr_form _FORM] + + _handle_DW_FORM $attr_form $attr_value + + _defer_output .debug_abbrev { + _op .uleb128 $_constants($attr_name) $attr_name + _op .uleb128 $_constants($attr_form) $attr_form + } + } + + _defer_output .debug_abbrev { + # Terminator. + _op .byte 0x0 Terminator + _op .byte 0x0 Terminator + } + + if {$has_children} { + uplevel 2 $children + + # Terminate children. + _op .byte 0x0 "Terminate children" + } + } + + proc _emit {string} { + variable _output_file + variable _defer + variable _deferred_output + + if {$_defer == ""} { + puts $_output_file $string + } else { + append _deferred_output($_defer) ${string}\n + } + } + + proc _section {name} { + _emit " .section $name" + } + + proc _defer_output {section body} { + variable _defer + variable _deferred_output + + set old_defer $_defer + set _defer $section + + if {![info exists _deferred_output($_defer)]} { + set _deferred_output($_defer) "" + _section $section + } + + uplevel $body + + set _defer $old_defer + } + + proc _defer_to_string {body} { + variable _defer + variable _deferred_output + + set old_defer $_defer + set _defer temp + + set _deferred_output($_defer) "" + + uplevel $body + + set result $_deferred_output($_defer) + unset _deferred_output($_defer) + + set _defer $old_defer + return $result + } + + proc _write_deferred_output {} { + variable _output_file + variable _deferred_output + + foreach section [array names _deferred_output] { + # The data already has a newline. + puts -nonewline $_output_file $_deferred_output($section) + } + + # Save some memory. + unset _deferred_output + } + + proc _op {name value {comment ""}} { + set text " ${name} ${value}" + if {$comment != ""} { + # Try to make stuff line up nicely. + while {[string length $text] < 40} { + append text " " + } + append text "/* ${comment} */" + } + _emit $text + } + + proc _compute_label {name} { + return ".L${name}" + } + + # Return a name suitable for use as a label. If BASE_NAME is + # specified, it is incorporated into the label name; this is to + # make debugging the generated assembler easier. If BASE_NAME is + # not specified a generic default is used. This proc does not + # define the label; see 'define_label'. 'new_label' attempts to + # ensure that label names are unique. + proc new_label {{base_name label}} { + variable _label_num + + return [_compute_label ${base_name}[incr _label_num]] + } + + # Define a label named NAME. Ordinarily, NAME comes from a call + # to 'new_label', but this is not required. + proc define_label {name} { + _emit "${name}:" + } + + # Declare a global label. This is typically used to refer to + # labels defined in other files, for example a function defined in + # a .c file. + proc extern {args} { + foreach name $args { + _op .global $name + } + } + + # A higher-level interface to label handling. + # + # ARGS is a list of label descriptors. Each one is either a + # single element, or a list of two elements -- a name and some + # text. For each descriptor, 'new_label' is invoked. If the list + # form is used, the second element in the list is passed as an + # argument. The label name is used to define a variable in the + # enclosing scope; this can be used to refer to the label later. + # The label name is also used to define a new proc whose name is + # the label name plus a trailing ":". This proc takes a body as + # an argument and can be used to define the label at that point; + # then the body, if any, is evaluated in the caller's context. + # + # For example: + # + # declare_labels int_label + # something { ... $int_label } ;# refer to the label + # int_label: constant { ... } ;# define the label + proc declare_labels {args} { + foreach arg $args { + set name [lindex $arg 0] + set text [lindex $arg 1] + + upvar $name label_var + if {$text == ""} { + set label_var [new_label] + } else { + set label_var [new_label $text] + } + + proc ${name}: {args} [format { + define_label %s + uplevel $args + } $label_var] + } + } + + # This is a miniature assembler for location expressions. It is + # suitable for use in the attributes to a DIE. Its output is + # prefixed with "=" to make it automatically use DW_FORM_block. + # BODY is split by lines, and each line is taken to be a list. + # (FIXME should use 'info complete' here.) + # Each list's first element is the opcode, either short or long + # forms are accepted. + # FIXME argument handling + # FIXME move docs + proc _location {body} { + variable _constants + + foreach line [split $body \n] { + if {[lindex $line 0] == ""} { + continue + } + set opcode [_map_name [lindex $line 0] _OP] + _op .byte $_constants($opcode) $opcode + + switch -exact -- $opcode { + DW_OP_addr { + variable _cu_addr_size + + _op .${_cu_addr_size}byte [lindex $line 1] + } + + DW_OP_const1u - + DW_OP_const1s { + _op .byte [lindex $line 1] + } + + DW_OP_const2u - + DW_OP_const2s { + _op .2byte [lindex $line 1] + } + + DW_OP_const4u - + DW_OP_const4s { + _op .4byte [lindex $line 1] + } + + DW_OP_const8u - + DW_OP_const8s { + _op .8byte [lindex $line 1] + } + + DW_OP_constu { + _op .uleb128 [lindex $line 1] + } + DW_OP_consts { + _op .sleb128 [lindex $line 1] + } + + default { + if {[llength $line] > 1} { + error "Unimplemented: operands in location for $opcode" + } + } + } + } + } + + # Emit a DWARF CU. + # IS_64 is a boolean which is true if you want to emit 64-bit + # DWARF, and false for 32-bit DWARF. + # VERSION is the DWARF version number to emit. + # ADDR_SIZE is the size of addresses in bytes. + # BODY is Tcl code that emits the DIEs which make up the body of + # the CU. It is evaluated in the caller's context. + proc cu {is_64 version addr_size body} { + variable _cu_count + variable _abbrev_num + variable _cu_label + variable _cu_version + variable _cu_addr_size + variable _cu_offset_size + + set _cu_version $version + if {$is_64} { + set _cu_offset_size 8 + } else { + set _cu_offset_size 4 + } + set _cu_addr_size $addr_size + + _section .debug_info + + set cu_num [incr _cu_count] + set my_abbrevs [_compute_label "abbrev${cu_num}_begin"] + set _abbrev_num 1 + + set _cu_label [_compute_label "cu${cu_num}_begin"] + set start_label [_compute_label "cu${cu_num}_start"] + set end_label [_compute_label "cu${cu_num}_end"] + + define_label $_cu_label + if {$is_64} { + _op .4byte 0xffffffff + _op .8byte "$end_label - $start_label" + } else { + _op .4byte "$end_label - $start_label" + } + define_label $start_label + _op .2byte $version Version + _op .4byte $my_abbrevs Abbrevs + _op .byte $addr_size "Pointer size" + + _defer_output .debug_abbrev { + define_label $my_abbrevs + } + + uplevel $body + + _defer_output .debug_abbrev { + # Emit the terminator. + _op .byte 0x0 Terminator + _op .byte 0x0 Terminator + } + + define_label $end_label + } + + proc _empty_array {name} { + upvar $name the_array + + catch {unset the_array} + set the_array(_) {} + unset the_array(_) + } + + # The top-level interface to the DWARF assembler. + # FILENAME is the name of the file where the generated assembly + # code is written. + # BODY is Tcl code to emit the assembly. It is evaluated via + # "eval" -- not uplevel as you might expect, because it is + # important to run the body in the Dwarf namespace. + # + # A typical invocation is something like: + # Dwarf::assemble $file { + # cu 0 2 8 { + # compile_unit { + # ... + # } + # } + # cu 0 2 8 { + # ... + # } + # } + proc assemble {filename body} { + variable _initialized + variable _output_file + variable _deferred_output + variable _defer + variable _label_num + variable _strings + + if {!$_initialized} { + _read_constants + set _initialized 1 + } + + set _output_file [open $filename w] + set _cu_count 0 + _empty_array _deferred_output + set _defer "" + set _label_num 0 + _empty_array _strings + + # Not "uplevel" here, because we want to evaluate in this + # namespace. This is somewhat bad because it means we can't + # readily refer to outer variables. + eval $body + + _write_deferred_output + + catch {close $_output_file} + set _output_file {} + } +} -- 2.7.4