1 /* Copyright Dima Kogan <dima@secretsauce.net>
3 * This program is free software; you can redistribute it and/or modify it under
4 * the terms of version 2 of the GNU General Public License as published by the
5 * Free Software Foundation.
9 #include <elfutils/libdwfl.h>
16 #include "prototype.h"
21 #include "lens_enum.h"
29 //#define DUMP_PROTOTYPES
32 #define complain( die, format, ... ) \
33 fprintf(stderr, "%s() die '%s' @ 0x%lx: " format "\n", \
34 __func__, dwarf_diename(die), dwarf_dieoffset(die), \
37 #define complain( die, format, ... )
40 // A map from DIE addresses (Dwarf_Off) to type structures (struct
41 // arg_type_info*). This is created and filled in at the start of each import,
42 // and deleted when the import is complete
43 static struct dict type_hash;
46 static bool get_type(struct arg_type_info** info, Dwarf_Die* type_die);
50 static bool _dump_dwarf_tree(Dwarf_Die* die, int indent)
53 fprintf(stderr, "%*sprocessing unit: 0x%02x/'%s'\n", indent*4, "",
54 dwarf_tag(die), dwarf_diename(die));
57 if (dwarf_child(die, &child) == 0) {
58 if (!_dump_dwarf_tree(&child, indent+1))
62 int res = dwarf_siblingof(die, die);
63 if (res == 0 ) continue; // sibling exists
64 if (res < 0 ) return false; // error
65 break; // no sibling exists
71 static bool dump_dwarf_tree(Dwarf_Die* die)
73 return _dump_dwarf_tree( die, 0 );
77 #ifdef DUMP_PROTOTYPES
78 static bool _dump_ltrace_tree(const struct arg_type_info* info, int indent)
81 fprintf(stderr, "%*s%p ...\n", indent*4, "", (void*)info);
86 fprintf(stderr, "%*s%p NULL\n", indent*4, "", (void*)info);
92 fprintf(stderr, "%*s%p void\n", indent*4, "", (void*)info);
104 fprintf(stderr, "%*s%p base\n", indent*4, "", (void*)info);
108 fprintf(stderr, "%*s%p array. elements not printed\n", indent*4, "",
112 case ARGTYPE_POINTER:
113 fprintf(stderr, "%*s%p pointer to...\n", indent*4, "", (void*)info);
114 _dump_ltrace_tree( info->u.ptr_info.info, indent+1 );
118 fprintf(stderr, "%*s%p struct...\n", indent*4, "", (void*)info);
121 struct arg_type_info *info;
123 }* elements = (struct struct_field*)info->u.entries.data;
125 for(i=0; i<info->u.entries.size; i++)
126 _dump_ltrace_tree( elements[i].info, indent+1 );
130 fprintf(stderr, "%*s%p unknown type\n", indent*4, "", (void*)info);
137 static bool dump_ltrace_tree(const struct arg_type_info* info)
139 return _dump_ltrace_tree( info, 0 );
144 // pulls a numerical value out of a particular attribute in a die. Returns true
145 // if successful. The result is returned in *result. Note that this is cast to
146 // (uint64_t), regardless of the actual type of the input
147 static bool get_die_numeric(uint64_t* result,
148 Dwarf_Die *die, unsigned int attr_name)
150 Dwarf_Attribute attr ;
159 if (dwarf_attr(die, attr_name, &attr) == NULL)
162 unsigned int form = dwarf_whatform(&attr);
164 #define PROCESS_NUMERIC(type) \
165 if (dwarf_form ## type(&attr, &u.type) != 0) \
167 *result = (uint64_t)u.type; \
173 PROCESS_NUMERIC(addr);
180 PROCESS_NUMERIC(udata);
183 PROCESS_NUMERIC(sdata);
186 PROCESS_NUMERIC(flag);
189 complain(die, "Unknown numeric form %d for attr_name: %d", form, attr_name);
192 #undef PROCESS_NUMERIC
195 static enum arg_type get_base_type(Dwarf_Die* die)
198 if( !get_die_numeric((uint64_t*)&encoding, die, DW_AT_encoding) )
201 if (encoding == DW_ATE_void )
204 if (encoding == DW_ATE_signed_char || encoding == DW_ATE_unsigned_char )
208 if (!get_die_numeric(&byte_size, die, DW_AT_byte_size))
211 if (encoding == DW_ATE_signed ||
212 encoding == DW_ATE_unsigned ||
213 encoding == DW_ATE_boolean) {
215 bool is_signed = (encoding == DW_ATE_signed);
222 return is_signed ? ARGTYPE_SHORT : ARGTYPE_USHORT;
225 return is_signed ? ARGTYPE_INT : ARGTYPE_UINT;
228 return is_signed ? ARGTYPE_LONG : ARGTYPE_ULONG;
236 if (encoding == DW_ATE_float) {
239 return ARGTYPE_FLOAT;
242 return ARGTYPE_DOUBLE;
245 // things like long doubles. ltrace has no support yet, so I just
252 if (encoding == DW_ATE_complex_float) {
254 case 2*sizeof(float):
255 return ARGTYPE_FLOAT;
257 case 2*sizeof(double):
258 return ARGTYPE_DOUBLE;
261 // things like long doubles. ltrace has no support yet, so I just
268 // Unknown encoding. I just say void
269 complain(die, "Unknown base type. Returning 'void'");
273 static bool get_type_die(Dwarf_Die* type_die, Dwarf_Die* die)
275 Dwarf_Attribute attr;
277 dwarf_attr(die, DW_AT_type, &attr) != NULL &&
278 dwarf_formref_die(&attr, type_die) != NULL;
281 static size_t dwarf_die_hash(const void* x)
283 return *(const Dwarf_Off*)x;
285 static int dwarf_die_eq(const void* a, const void* b)
287 return *(const Dwarf_Off*)a == *(const Dwarf_Off*)b;
290 static bool get_enum(struct arg_type_info* enum_info, Dwarf_Die* parent)
292 enum_info->type = ARGTYPE_INT;
294 struct enum_lens *lens = calloc(1, sizeof(struct enum_lens));
296 complain(parent, "alloc error");
299 lens_init_enum(lens);
300 enum_info->lens = &lens->super;
303 if (dwarf_child(parent, &die) != 0) {
304 // empty enum. we're done
309 complain(&die, "enum element: 0x%02x/'%s'", dwarf_tag(&die),
310 dwarf_diename(&die));
312 if (dwarf_tag(&die) != DW_TAG_enumerator) {
313 complain(&die, "Enums can have ONLY DW_TAG_enumerator elements");
317 if (!dwarf_hasattr(&die, DW_AT_const_value)) {
318 complain(&die, "Enums MUST have DW_AT_const_value values");
322 const char* key = dwarf_diename(&die);
324 complain(&die, "Enums must have a DW_AT_name key");
327 const char* dupkey = strdup(key);
328 if (dupkey == NULL) {
329 complain(&die, "Couldn't duplicate enum key");
333 struct value* value = calloc( 1, sizeof(struct value));
335 complain(&die, "Couldn't alloc enum value");
339 value_init_detached(value, NULL, type_get_simple( ARGTYPE_INT ), 0);
341 if (!get_die_numeric(&enum_value, &die, DW_AT_const_value)) {
342 complain(&die, "Couldn't get enum value");
346 value_set_word(value, (long)enum_value);
348 if (lens_enum_add( lens, dupkey, 0, value, 0 )) {
349 complain(&die, "Couldn't add enum element");
353 int res = dwarf_siblingof(&die, &die);
354 if (res == 0) continue; /* sibling exists */
355 if (res < 0) return false; /* error */
356 break; /* no sibling exists */
362 static bool get_array(struct arg_type_info* array_info, Dwarf_Die* parent)
365 if (!get_type_die( &type_die, parent )) {
366 complain( parent, "Array has unknown type" );
370 struct arg_type_info* info;
371 if (!get_type( &info, &type_die )) {
372 complain( parent, "Couldn't figure out array's type" );
377 if (dwarf_child(parent, &subrange) != 0) {
379 "Array must have a DW_TAG_subrange_type child, but has none");
383 Dwarf_Die next_subrange;
384 if (dwarf_siblingof(&subrange, &next_subrange) <= 0) {
386 "Array must have exactly one DW_TAG_subrange_type child");
390 if (dwarf_hasattr(&subrange, DW_AT_lower_bound)) {
391 uint64_t lower_bound;
392 if (!get_die_numeric(&lower_bound, &subrange, DW_AT_lower_bound)) {
393 complain( parent, "Couldn't read lower bound");
397 if (lower_bound != 0) {
399 "Array subrange has a nonzero lower bound. Don't know what to do");
405 if (!dwarf_hasattr(&subrange, DW_AT_upper_bound)) {
406 // no upper bound is defined. This is probably a variable-width array,
407 // and I don't know how long it is. Let's say 0 to be safe
412 if (!get_die_numeric(&N, &subrange, DW_AT_upper_bound)) {
413 complain( parent, "Couldn't read upper bound");
419 // I'm not checking the subrange type. It should be some sort of integer,
420 // and I don't know what it would mean for it to be something else
422 struct value* value = calloc( 1, sizeof(struct value));
424 complain(&subrange, "Couldn't alloc length value");
427 value_init_detached(value, NULL, type_get_simple( ARGTYPE_INT ), 0);
428 value_set_word(value, N );
430 struct expr_node* length = calloc( 1, sizeof(struct expr_node));
431 if (length == NULL) {
432 complain(&subrange, "Couldn't alloc length expr");
435 expr_init_const(length, value);
437 type_init_array(array_info, info, 0, length, 0 );
442 static bool get_structure(struct arg_type_info* struct_info, Dwarf_Die* parent)
444 type_init_struct(struct_info);
447 if (dwarf_child(parent, &die) != 0) {
448 // no elements; we're done
453 complain(&die, "member: 0x%02x", dwarf_tag(&die));
455 if (dwarf_tag(&die) != DW_TAG_member) {
456 complain(&die, "Structure can have ONLY DW_TAG_member");
461 if (!get_type_die( &type_die, &die )) {
462 complain( &die, "Couldn't get type of element");
466 struct arg_type_info* member_info = NULL;
467 if (!get_type( &member_info, &type_die )) {
468 complain(&die, "Couldn't parse type from DWARF data");
471 type_struct_add( struct_info, member_info, 0 );
473 int res = dwarf_siblingof(&die, &die);
474 if (res == 0) continue; /* sibling exists */
475 if (res < 0) return false; /* error */
476 break; /* no sibling exists */
482 // Reads the type in the die into the given structure
483 // Returns true on sucess
484 static bool get_type(struct arg_type_info** info, Dwarf_Die* type_die)
486 Dwarf_Off die_offset = dwarf_dieoffset(type_die);
487 struct arg_type_info** found_type = dict_find(&type_hash, &die_offset );
488 if (found_type != NULL) {
490 complain(type_die, "Read pre-computed type: %p", *info);
496 switch (dwarf_tag(type_die)) {
497 case DW_TAG_base_type:
498 *info = type_get_simple( get_base_type( type_die ));
499 complain(type_die, "Storing base type: %p", *info);
500 dict_insert( &type_hash, &die_offset, info );
503 case DW_TAG_subroutine_type:
504 case DW_TAG_inlined_subroutine:
505 // function pointers are stored as void*. If ltrace tries to dereference
506 // these, it'll get a segfault
507 *info = type_get_simple( ARGTYPE_VOID );
508 complain(type_die, "Storing subroutine type: %p", *info);
509 dict_insert( &type_hash, &die_offset, info );
512 case DW_TAG_pointer_type:
514 if (!get_type_die(&next_die, type_die )) {
515 // the pointed-to type isn't defined, so I report a void*
516 *info = type_get_simple( ARGTYPE_VOID );
517 complain(type_die, "Storing void-pointer type: %p", *info);
518 dict_insert( &type_hash, &die_offset, info );
522 *info = calloc( 1, sizeof(struct arg_type_info));
524 complain(type_die, "alloc error");
527 type_init_pointer(*info, NULL, 0);
529 complain(type_die, "Storing pointer type: %p", *info);
530 dict_insert( &type_hash, &die_offset, info );
531 return get_type( &(*info)->u.ptr_info.info, &next_die );
533 case DW_TAG_structure_type:
534 *info = calloc( 1, sizeof(struct arg_type_info));
536 complain(type_die, "alloc error");
540 complain(type_die, "Storing struct type: %p", *info);
541 dict_insert( &type_hash, &die_offset, info );
542 return get_structure( *info, type_die );
545 case DW_TAG_typedef: ;
546 case DW_TAG_const_type: ;
547 case DW_TAG_volatile_type: ;
548 // Various tags are simply pass-through, so I just keep going
550 if (get_type_die(&next_die, type_die )) {
551 complain(type_die, "Storing const/typedef type: %p", *info);
552 res = get_type( info, &next_die );
554 // no type. Use 'void'. Normally I'd think this is bogus, but stdio
555 // typedefs something to void
556 *info = type_get_simple( ARGTYPE_VOID );
557 complain(type_die, "Storing void type: %p", *info);
560 dict_insert( &type_hash, &die_offset, info );
563 case DW_TAG_enumeration_type:
564 // We have an enumeration. This has type "int", but has a particular
565 // lens to handle the enum
566 *info = calloc( 1, sizeof(struct arg_type_info));
568 complain(type_die, "alloc error");
572 complain(type_die, "Storing enum int: %p", *info);
573 dict_insert( &type_hash, &die_offset, info );
574 return get_enum( *info, type_die );
576 case DW_TAG_array_type:
577 *info = calloc( 1, sizeof(struct arg_type_info));
579 complain(type_die, "alloc error");
583 complain(type_die, "Storing array: %p", *info);
584 dict_insert( &type_hash, &die_offset, info );
585 return get_array( *info, type_die );
587 case DW_TAG_union_type:
588 *info = type_get_simple( ARGTYPE_VOID );
589 complain(type_die, "Storing union-as-void type: %p", *info);
593 complain(type_die, "Unknown type tag 0x%x", dwarf_tag(type_die));
600 static bool get_prototype(struct prototype* proto, Dwarf_Die* subroutine)
602 // First, look at the return type. This is stored in a DW_AT_type tag in the
603 // subroutine DIE. If there is no such tag, this function returns void
604 Dwarf_Die return_type_die;
605 if (!get_type_die(&return_type_die, subroutine )) {
606 proto->return_info = type_get_simple( ARGTYPE_VOID );
607 proto->own_return_info = 0;
609 proto->return_info = calloc( 1, sizeof( struct arg_type_info ));
610 if (proto->return_info == NULL) {
611 complain(subroutine, "Couldn't alloc return type");
614 proto->own_return_info = 0;
616 if (!get_type( &proto->return_info, &return_type_die )) {
617 complain(subroutine, "Couldn't get return type");
623 // Now look at the arguments
625 if (dwarf_child(subroutine, &arg_die) != 0) {
626 // no args. We're done
631 if (dwarf_tag(&arg_die) != DW_TAG_formal_parameter )
632 goto next_prototype_argument;
634 complain(&arg_die, "arg: 0x%02x", dwarf_tag(&arg_die));
637 if (!get_type_die(&type_die, &arg_die )) {
638 complain(&arg_die, "Couldn't get the argument type die");
642 struct arg_type_info* arg_type_info = NULL;
643 if (!get_type( &arg_type_info, &type_die )) {
644 complain(&arg_die, "Couldn't parse arg type from DWARF data");
649 param_init_type(¶m, arg_type_info, 0);
650 if (prototype_push_param(proto, ¶m) <0) {
651 complain(&arg_die, "couldn't add argument to the prototype");
655 #ifdef DUMP_PROTOTYPES
656 fprintf(stderr, "Adding argument:\n");
657 dump_ltrace_tree(arg_type_info);
660 next_prototype_argument: ;
661 int res = dwarf_siblingof(&arg_die, &arg_die);
662 if (res == 0) continue; /* sibling exists */
663 if (res < 0) return false; /* error */
664 break; /* no sibling exists */
670 static bool process_die_compileunit(struct protolib* plib, struct library* lib,
674 if (dwarf_child(parent, &die) != 0) {
675 // no child nodes, so nothing to do
680 if (dwarf_tag(&die) == DW_TAG_subprogram) {
681 const char* function_name = dwarf_diename(&die);
683 complain(&die, "subroutine_type: 0x%02x; function '%s'",
684 dwarf_tag(&die), function_name);
686 struct prototype* proto =
687 protolib_lookup_prototype(plib, function_name, true );
690 complain(&die, "Prototype already exists. Skipping");
694 if (!filter_matches_symbol(options.plt_filter, function_name, lib) &&
695 !filter_matches_symbol(options.static_filter, function_name, lib) &&
696 !filter_matches_symbol(options.export_filter, function_name, lib)) {
697 complain(&die, "Prototype not requested by any filter");
701 proto = malloc(sizeof(struct prototype));
703 complain(&die, "couldn't alloc prototype");
706 prototype_init( proto );
708 if (!get_prototype(proto, &die )) {
709 complain(&die, "couldn't get prototype");
713 protolib_add_prototype(plib, function_name, 0, proto);
717 int res = dwarf_siblingof(&die, &die);
718 if (res == 0) continue; /* sibling exists */
719 if (res < 0) return false; /* error */
720 break; /* no sibling exists */
726 static bool import(struct protolib* plib, struct library* lib, Dwfl* dwfl)
728 dict_init(&type_hash, sizeof(Dwarf_Off), sizeof(struct arg_type_info*),
729 dwarf_die_hash, dwarf_die_eq, NULL );
732 Dwarf_Die* die = NULL;
733 while ((die = dwfl_nextcu(dwfl, die, &bias)) != NULL) {
734 if (dwarf_tag(die) == DW_TAG_compile_unit) {
735 if (!process_die_compileunit(plib, lib, die)) {
736 complain(die, "Error reading compile unit");
741 complain(die, "DW_TAG_compile_unit expected");
747 dict_destroy( &type_hash, NULL, NULL, NULL );
751 bool import_DWARF_prototypes(struct protolib* plib, struct library* lib,
755 plib = protolib_cache_default(&g_protocache, lib->soname, 0);
757 fprintf(stderr, "Error loading protolib %s: %s.\n",
758 lib->soname, strerror(errno));
762 return import(plib, lib, dwfl);
766 - I handle static functions now. Should I? Those do not have DW_AT_external==1
768 - should process existing prototypes to make sure they match
770 - what do function pointers look like? I'm doing void*
774 - all my *allocs leak
776 - protolib_lookup_prototype should look for imports?