1 /* Copyright Dima Kogan <dima@secretsauce.net>
3 * This program is free software; you can redistribute it and/or modify it under
4 * the terms of version 2 of the GNU General Public License as published by the
5 * Free Software Foundation.
9 #include <elfutils/libdwfl.h>
16 #include "prototype.h"
21 #include "lens_enum.h"
29 //#define DUMP_PROTOTYPES
32 #define complain(die, format, ...) \
33 fprintf(stderr, "%s() die '%s' @ 0x%lx: " format "\n", \
34 __func__, dwarf_diename(die), dwarf_dieoffset(die), \
37 #define complain(die, format, ...)
40 #define NEXT_SIBLING(die) \
41 int res = dwarf_siblingof(die, die); \
42 if (res == 0) continue; /* sibling exists */ \
43 if (res < 0) return false; /* error */ \
44 break /* no sibling exists */
46 static bool get_type(struct arg_type_info** info, Dwarf_Die* type_die, struct protolib* plib,
47 struct dict* type_dieoffset_hash);
51 static bool _dump_dwarf_tree(Dwarf_Die* die, int indent)
54 fprintf(stderr, "%*sprocessing unit: 0x%02x/'%s'\n", indent*4, "",
55 dwarf_tag(die), dwarf_diename(die));
58 if (dwarf_child(die, &child) == 0) {
59 if (!_dump_dwarf_tree(&child, indent+1))
69 static bool dump_dwarf_tree(Dwarf_Die* die)
71 return _dump_dwarf_tree(die, 0);
75 #ifdef DUMP_PROTOTYPES
76 static bool _dump_ltrace_tree(const struct arg_type_info* info, int indent)
79 fprintf(stderr, "%*s%p ...\n", indent*4, "", (void*)info);
84 fprintf(stderr, "%*s%p NULL\n", indent*4, "", (void*)info);
90 fprintf(stderr, "%*s%p void\n", indent*4, "", (void*)info);
102 fprintf(stderr, "%*s%p base\n", indent*4, "", (void*)info);
106 fprintf(stderr, "%*s%p array. elements not printed\n", indent*4, "",
110 case ARGTYPE_POINTER:
111 fprintf(stderr, "%*s%p pointer to...\n", indent*4, "", (void*)info);
112 _dump_ltrace_tree(info->u.ptr_info.info, indent+1);
116 fprintf(stderr, "%*s%p struct...\n", indent*4, "", (void*)info);
119 struct arg_type_info *info;
121 }* elements = (struct struct_field*)info->u.entries.data;
123 for(i=0; i<info->u.entries.size; i++)
124 _dump_ltrace_tree(elements[i].info, indent+1);
128 fprintf(stderr, "%*s%p unknown type\n", indent*4, "", (void*)info);
135 static bool dump_ltrace_tree(const struct arg_type_info* info)
137 return _dump_ltrace_tree(info, 0);
142 // pulls a numerical value out of a particular attribute in a die. Returns true
143 // if successful. The result is returned in *result. Note that this is cast to
144 // (uint64_t), regardless of the actual type of the input
145 static bool get_die_numeric(uint64_t* result,
146 Dwarf_Die *die, unsigned int attr_name)
148 Dwarf_Attribute attr ;
157 if (dwarf_attr(die, attr_name, &attr) == NULL)
160 unsigned int form = dwarf_whatform(&attr);
162 #define PROCESS_NUMERIC(type) \
163 if (dwarf_form ## type(&attr, &u.type) != 0) \
165 *result = (uint64_t)u.type; \
171 PROCESS_NUMERIC(addr);
178 PROCESS_NUMERIC(udata);
181 PROCESS_NUMERIC(sdata);
184 PROCESS_NUMERIC(flag);
187 complain(die, "Unknown numeric form %d for attr_name: %d", form, attr_name);
190 #undef PROCESS_NUMERIC
193 static bool get_integer_base_type(enum arg_type* type, int byte_size, bool is_signed)
197 *type = ARGTYPE_CHAR;
201 *type = is_signed ? ARGTYPE_SHORT : ARGTYPE_USHORT;
205 *type = is_signed ? ARGTYPE_INT : ARGTYPE_UINT;
209 *type = is_signed ? ARGTYPE_LONG : ARGTYPE_ULONG;
217 static enum arg_type get_base_type(Dwarf_Die* die)
220 if(!get_die_numeric((uint64_t*)&encoding, die, DW_AT_encoding))
223 if (encoding == DW_ATE_void)
226 if (encoding == DW_ATE_signed_char || encoding == DW_ATE_unsigned_char)
230 if (!get_die_numeric(&byte_size, die, DW_AT_byte_size))
233 if (encoding == DW_ATE_signed ||
234 encoding == DW_ATE_unsigned ||
235 encoding == DW_ATE_boolean) {
237 bool is_signed = (encoding == DW_ATE_signed);
240 if(!get_integer_base_type(&type, (int)byte_size, is_signed)) {
241 complain(die, "Unknown integer base type. Using 'void'");
247 if (encoding == DW_ATE_float) {
250 return ARGTYPE_FLOAT;
253 return ARGTYPE_DOUBLE;
256 // things like long doubles. ltrace has no support yet, so I just
263 if (encoding == DW_ATE_complex_float) {
265 case 2*sizeof(float):
266 return ARGTYPE_FLOAT;
268 case 2*sizeof(double):
269 return ARGTYPE_DOUBLE;
272 // things like long doubles. ltrace has no support yet, so I just
279 // Unknown encoding. I just say void
280 complain(die, "Unknown base type. Returning 'void'");
284 static bool get_type_die(Dwarf_Die* type_die, Dwarf_Die* die)
286 Dwarf_Attribute attr;
288 dwarf_attr(die, DW_AT_type, &attr) != NULL &&
289 dwarf_formref_die(&attr, type_die) != NULL;
292 static size_t dwarf_die_hash(const void* x)
294 return *(const Dwarf_Off*)x;
296 static int dwarf_die_eq(const void* a, const void* b)
298 return *(const Dwarf_Off*)a == *(const Dwarf_Off*)b;
301 static bool get_enum(struct arg_type_info* enum_info, Dwarf_Die* parent)
304 if (!get_die_numeric(&byte_size, parent, DW_AT_byte_size)) {
305 // No byte size given, assume 'int'
306 enum_info->type = ARGTYPE_INT;
308 if(!get_integer_base_type(&enum_info->type, (int)byte_size, true)) {
309 complain(parent, "Unknown integer base type. Using 'int'");
310 enum_info->type = ARGTYPE_INT;
314 struct enum_lens *lens = calloc(1, sizeof(struct enum_lens));
316 complain(parent, "alloc error");
319 lens_init_enum(lens);
320 enum_info->lens = &lens->super;
323 if (dwarf_child(parent, &die) != 0) {
324 // empty enum. we're done
329 complain(&die, "enum element: 0x%02x/'%s'", dwarf_tag(&die),
330 dwarf_diename(&die));
332 if (dwarf_tag(&die) != DW_TAG_enumerator) {
333 complain(&die, "Enums can have ONLY DW_TAG_enumerator elements");
337 if (!dwarf_hasattr(&die, DW_AT_const_value)) {
338 complain(&die, "Enums MUST have DW_AT_const_value values");
342 const char* key = dwarf_diename(&die);
344 complain(&die, "Enums must have a DW_AT_name key");
347 const char* dupkey = strdup(key);
348 if (dupkey == NULL) {
349 complain(&die, "Couldn't duplicate enum key");
353 struct value* value = calloc(1, sizeof(struct value));
355 complain(&die, "Couldn't alloc enum value");
359 value_init_detached(value, NULL, type_get_simple(enum_info->type), 0);
361 if (!get_die_numeric(&enum_value, &die, DW_AT_const_value)) {
362 complain(&die, "Couldn't get enum value");
366 value_set_word(value, (long)enum_value);
368 if (lens_enum_add(lens, dupkey, 0, value, 0)) {
369 complain(&die, "Couldn't add enum element");
379 static bool get_array(struct arg_type_info* array_info, Dwarf_Die* parent, struct protolib* plib,
380 struct dict* type_dieoffset_hash)
383 if (!get_type_die(&type_die, parent)) {
384 complain(parent, "Array has unknown type");
388 struct arg_type_info* info;
389 if (!get_type(&info, &type_die, plib, type_dieoffset_hash)) {
390 complain(parent, "Couldn't figure out array's type");
395 if (dwarf_child(parent, &subrange) != 0) {
397 "Array must have a DW_TAG_subrange_type child, but has none");
401 Dwarf_Die next_subrange;
402 if (dwarf_siblingof(&subrange, &next_subrange) <= 0) {
404 "Array must have exactly one DW_TAG_subrange_type child");
408 if (dwarf_hasattr(&subrange, DW_AT_lower_bound)) {
409 uint64_t lower_bound;
410 if (!get_die_numeric(&lower_bound, &subrange, DW_AT_lower_bound)) {
411 complain(parent, "Couldn't read lower bound");
415 if (lower_bound != 0) {
417 "Array subrange has a nonzero lower bound. Don't know what to do");
423 if (!dwarf_hasattr(&subrange, DW_AT_upper_bound)) {
424 // no upper bound is defined. This is probably a variable-width array,
425 // and I don't know how long it is. Let's say 0 to be safe
430 if (!get_die_numeric(&N, &subrange, DW_AT_upper_bound)) {
431 complain(parent, "Couldn't read upper bound");
437 // I'm not checking the subrange type. It should be some sort of integer,
438 // and I don't know what it would mean for it to be something else
440 struct value* value = calloc(1, sizeof(struct value));
442 complain(&subrange, "Couldn't alloc length value");
445 value_init_detached(value, NULL, type_get_simple(ARGTYPE_INT), 0);
446 value_set_word(value, N);
448 struct expr_node* length = calloc(1, sizeof(struct expr_node));
449 if (length == NULL) {
450 complain(&subrange, "Couldn't alloc length expr");
453 expr_init_const(length, value);
455 type_init_array(array_info, info, 0, length, 0);
460 static bool get_structure(struct arg_type_info* struct_info, Dwarf_Die* parent, struct protolib* plib,
461 struct dict* type_dieoffset_hash)
463 type_init_struct(struct_info);
466 if (dwarf_child(parent, &die) != 0) {
467 // no elements; we're done
472 complain(&die, "member: 0x%02x", dwarf_tag(&die));
474 if (dwarf_tag(&die) != DW_TAG_member) {
475 complain(&die, "Structure can have ONLY DW_TAG_member");
480 if (!get_type_die(&type_die, &die)) {
481 complain(&die, "Couldn't get type of element");
485 struct arg_type_info* member_info = NULL;
486 if (!get_type(&member_info, &type_die, plib, type_dieoffset_hash)) {
487 complain(&die, "Couldn't parse type from DWARF data");
490 type_struct_add(struct_info, member_info, 0);
498 // Reads the type in the die into the given structure
499 // Returns true on sucess
500 static bool get_type(struct arg_type_info** info, Dwarf_Die* type_die, struct protolib* plib,
501 struct dict* type_dieoffset_hash)
503 Dwarf_Off die_offset = dwarf_dieoffset(type_die);
504 struct arg_type_info** found_type = dict_find(type_dieoffset_hash, &die_offset);
505 if (found_type != NULL) {
507 complain(type_die, "Read pre-computed type: %p", *info);
511 const char* type_name = dwarf_diename(type_die);
512 if (type_name != NULL) {
514 struct named_type* already_defined_type =
515 protolib_lookup_type(plib, type_name, true);
517 if (already_defined_type != NULL) {
519 "Type '%s' defined in a .conf file. Using that instead of DWARF",
521 *info = already_defined_type->info;
528 switch (dwarf_tag(type_die)) {
529 case DW_TAG_base_type:
530 *info = type_get_simple(get_base_type(type_die));
531 complain(type_die, "Storing base type: %p", *info);
532 dict_insert(type_dieoffset_hash, &die_offset, info);
535 case DW_TAG_subroutine_type:
536 case DW_TAG_inlined_subroutine:
537 // function pointers are stored as void*. If ltrace tries to dereference
538 // these, it'll get a segfault
539 *info = type_get_simple(ARGTYPE_VOID);
540 complain(type_die, "Storing subroutine type: %p", *info);
541 dict_insert(type_dieoffset_hash, &die_offset, info);
544 case DW_TAG_pointer_type:
546 if (!get_type_die(&next_die, type_die)) {
547 // the pointed-to type isn't defined, so I report a void*
548 *info = type_get_voidptr();
549 complain(type_die, "Storing void-pointer type: %p", *info);
550 dict_insert(type_dieoffset_hash, &die_offset, info);
554 *info = calloc(1, sizeof(struct arg_type_info));
556 complain(type_die, "alloc error");
559 type_init_pointer(*info, NULL, 0);
561 complain(type_die, "Storing pointer type: %p", *info);
562 dict_insert(type_dieoffset_hash, &die_offset, info);
563 return get_type(&(*info)->u.ptr_info.info, &next_die, plib, type_dieoffset_hash);
565 case DW_TAG_structure_type:
566 *info = calloc(1, sizeof(struct arg_type_info));
568 complain(type_die, "alloc error");
572 complain(type_die, "Storing struct type: %p", *info);
573 dict_insert(type_dieoffset_hash, &die_offset, info);
574 return get_structure(*info, type_die, plib, type_dieoffset_hash);
578 case DW_TAG_const_type:
579 case DW_TAG_volatile_type: {
580 // Various tags are simply pass-through, so I just keep going
582 if (get_type_die(&next_die, type_die)) {
583 complain(type_die, "Storing const/typedef type: %p", *info);
584 res = get_type(info, &next_die, plib, type_dieoffset_hash);
586 // no type. Use 'void'. Normally I'd think this is bogus, but stdio
587 // typedefs something to void
588 *info = type_get_simple(ARGTYPE_VOID);
589 complain(type_die, "Storing void type: %p", *info);
592 dict_insert(type_dieoffset_hash, &die_offset, info);
596 case DW_TAG_enumeration_type:
597 // We have an enumeration. This has type "int", but has a particular
598 // lens to handle the enum
599 *info = calloc(1, sizeof(struct arg_type_info));
601 complain(type_die, "alloc error");
605 complain(type_die, "Storing enum int: %p", *info);
606 dict_insert(type_dieoffset_hash, &die_offset, info);
607 return get_enum(*info, type_die);
609 case DW_TAG_array_type:
610 *info = calloc(1, sizeof(struct arg_type_info));
612 complain(type_die, "alloc error");
616 complain(type_die, "Storing array: %p", *info);
617 dict_insert(type_dieoffset_hash, &die_offset, info);
618 return get_array(*info, type_die, plib, type_dieoffset_hash);
620 case DW_TAG_union_type:
621 *info = type_get_simple(ARGTYPE_VOID);
622 complain(type_die, "Storing union-as-void type: %p", *info);
626 complain(type_die, "Unknown type tag 0x%x", dwarf_tag(type_die));
633 static bool get_prototype(struct prototype* proto, Dwarf_Die* subroutine, struct protolib* plib,
634 struct dict* type_dieoffset_hash)
636 // First, look at the return type. This is stored in a DW_AT_type tag in the
637 // subroutine DIE. If there is no such tag, this function returns void
638 Dwarf_Die return_type_die;
639 if (!get_type_die(&return_type_die, subroutine)) {
640 proto->return_info = type_get_simple(ARGTYPE_VOID);
641 proto->own_return_info = 0;
643 proto->return_info = calloc(1, sizeof(struct arg_type_info));
644 if (proto->return_info == NULL) {
645 complain(subroutine, "Couldn't alloc return type");
648 proto->own_return_info = 0;
650 if (!get_type(&proto->return_info, &return_type_die, plib, type_dieoffset_hash)) {
651 complain(subroutine, "Couldn't get return type");
657 // Now look at the arguments
659 if (dwarf_child(subroutine, &arg_die) != 0) {
660 // no args. We're done
665 if (dwarf_tag(&arg_die) == DW_TAG_formal_parameter) {
667 complain(&arg_die, "arg: 0x%02x", dwarf_tag(&arg_die));
670 if (!get_type_die(&type_die, &arg_die)) {
671 complain(&arg_die, "Couldn't get the argument type die");
675 struct arg_type_info* arg_type_info = NULL;
676 if (!get_type(&arg_type_info, &type_die, plib, type_dieoffset_hash)) {
677 complain(&arg_die, "Couldn't parse arg type from DWARF data");
682 param_init_type(¶m, arg_type_info, 0);
683 if (prototype_push_param(proto, ¶m) <0) {
684 complain(&arg_die, "couldn't add argument to the prototype");
688 #ifdef DUMP_PROTOTYPES
689 fprintf(stderr, "Adding argument:\n");
690 dump_ltrace_tree(arg_type_info);
694 NEXT_SIBLING(&arg_die);
700 static bool import_subprogram(struct protolib* plib, struct library* lib,
701 struct dict* type_dieoffset_hash,
704 // I use the linkage function name if there is one, otherwise the
706 const char* function_name = NULL;
707 Dwarf_Attribute attr;
708 if (dwarf_attr(die, DW_AT_linkage_name, &attr) != NULL)
709 function_name = dwarf_formstring(&attr);
710 if (function_name == NULL)
711 function_name = dwarf_diename(die);
712 if (function_name == NULL) {
713 complain(die, "Function has no name. Not importing");
717 if (!filter_matches_symbol(options.plt_filter, function_name, lib) &&
718 !filter_matches_symbol(options.static_filter, function_name, lib) &&
719 !filter_matches_symbol(options.export_filter, function_name, lib)) {
720 complain(die, "Prototype not requested by any filter");
724 complain(die, "subroutine_type: 0x%02x; function '%s'",
725 dwarf_tag(die), function_name);
727 struct prototype* proto =
728 protolib_lookup_prototype(plib, function_name, false);
731 complain(die, "Prototype already exists. Skipping");
735 proto = malloc(sizeof(struct prototype));
737 complain(die, "couldn't alloc prototype");
740 prototype_init(proto);
742 if (!get_prototype(proto, die, plib, type_dieoffset_hash)) {
743 complain(die, "couldn't get prototype");
747 protolib_add_prototype(plib, function_name, 0, proto);
751 static bool process_die_compileunit(struct protolib* plib, struct library* lib,
752 struct dict* type_dieoffset_hash,
756 if (dwarf_child(parent, &die) != 0) {
757 // no child nodes, so nothing to do
762 if (dwarf_tag(&die) == DW_TAG_subprogram)
763 if(!import_subprogram(plib, lib, type_dieoffset_hash, &die))
772 static bool import(struct protolib* plib, struct library* lib, Dwfl* dwfl)
774 // A map from DIE addresses (Dwarf_Off) to type structures (struct
775 // arg_type_info*). This is created and filled in at the start of each
776 // import, and deleted when the import is complete
777 struct dict type_dieoffset_hash;
779 dict_init(&type_dieoffset_hash, sizeof(Dwarf_Off), sizeof(struct arg_type_info*),
780 dwarf_die_hash, dwarf_die_eq, NULL);
785 Dwarf_Die* die = NULL;
786 while ((die = dwfl_nextcu(dwfl, die, &bias)) != NULL) {
787 if (dwarf_tag(die) == DW_TAG_compile_unit) {
788 if (!process_die_compileunit(plib, lib, &type_dieoffset_hash, die)) {
789 complain(die, "Error reading compile unit");
796 complain(die, "DW_TAG_compile_unit expected");
804 dict_destroy(&type_dieoffset_hash, NULL, NULL, NULL);
808 bool import_DWARF_prototypes(struct library* lib)
810 struct protolib* plib = lib->protolib;
811 Dwfl* dwfl = lib->dwfl;
814 plib = protolib_cache_default(&g_protocache, lib->soname, 0);
816 fprintf(stderr, "Error loading protolib %s: %s.\n",
817 lib->soname, strerror(errno));
821 if (import(plib, lib, dwfl)) {
822 lib->protolib = plib;
829 - I handle static functions now. Should I? Those do not have DW_AT_external==1
831 - should process existing prototypes to make sure they match
833 - what do function pointers look like? I'm doing void*
837 - all my *allocs leak