initial prototypes-from-dwarf implementation
[platform/upstream/ltrace.git] / dwarf_prototypes.c
1 /* Most of this is Copyright Dima Kogan <dima@secretsauce.net>
2  *
3  * Pieces of this were taken from dwarf_prototypes.c in the dwarves project.
4  * Those are Copyright (C) 2008 Arnaldo Carvalho de Melo <acme@redhat.com>.
5  *
6  * This program is free software; you can redistribute it and/or modify it under
7  * the terms of version 2 of the GNU General Public License as published by the
8  * Free Software Foundation.
9  *
10  */
11 #include <stdio.h>
12 #include <elfutils/libdwfl.h>
13 #include <dwarf.h>
14 #include <stdlib.h>
15 #include <errno.h>
16 #include <string.h>
17
18 #include "config.h"
19 #include "prototype.h"
20 #include "type.h"
21 #include "param.h"
22 #include "dict.h"
23 #include "lens.h"
24 #include "lens_enum.h"
25 #include "value.h"
26 #include "expr.h"
27 #include "library.h"
28 #include "options.h"
29 #include "filter.h"
30
31 #if 0
32 #define complain( die, format, ... )                                                    \
33         fprintf(stderr, "%s() die '%s' @ 0x%lx: " format "\n",          \
34                         __func__, dwarf_diename(die), dwarf_dieoffset(die),     \
35                         ##__VA_ARGS__ )
36 #else
37 #define complain( die, format, ... )
38 #endif
39
40 // A map from DIE addresses (Dwarf_Off) to type structures (struct
41 // arg_type_info*). This is created and filled in at the start of each import,
42 // and deleted when the import is complete
43 static struct dict type_hash;
44
45
46 static bool getType( struct arg_type_info** info, Dwarf_Die* type_die);
47
48
49 #if 0
50 static bool _dump_dwarf_tree(Dwarf_Die* die, int indent)
51 {
52     while(1)
53     {
54         printf("%*sprocessing unit: 0x%02x/'%s'\n", indent*4, "",
55                dwarf_tag(die), dwarf_diename(die) );
56
57         Dwarf_Die child;
58         if (dwarf_child(die, &child) == 0)
59         {
60                         if( !_dump_dwarf_tree(&child, indent+1) )
61                                 return false;
62         }
63
64         int res = dwarf_siblingof(die, die);
65         if( res == 0 ) continue;     // sibling exists
66         if( res < 0 )  return false; // error
67         break;                       // no sibling exists
68     }
69
70     return true;
71 }
72
73 static bool dump_dwarf_tree(Dwarf_Die* die)
74 {
75     return _dump_dwarf_tree( die, 0 );
76 }
77
78 static bool _dump_ltrace_tree( const struct arg_type_info* info, int indent )
79 {
80         if( indent > 7 )
81         {
82                 printf("%*s%p ...\n", indent*4, "", (void*)info);
83                 return true;
84         }
85
86         if( info == NULL )
87         {
88                 printf("%*s%p NULL\n", indent*4, "", (void*)info);
89                 return true;
90         }
91
92         switch(info->type)
93         {
94         case ARGTYPE_VOID:
95                 printf("%*s%p void\n", indent*4, "", (void*)info);
96                 break;
97
98         case ARGTYPE_INT:
99         case ARGTYPE_UINT:
100         case ARGTYPE_LONG:
101         case ARGTYPE_ULONG:
102         case ARGTYPE_CHAR:
103         case ARGTYPE_SHORT:
104         case ARGTYPE_USHORT:
105         case ARGTYPE_FLOAT:
106         case ARGTYPE_DOUBLE:
107                 printf("%*s%p base\n", indent*4, "", (void*)info);
108                 break;
109
110         case ARGTYPE_ARRAY:
111                 printf("%*s%p array. elements not printed\n", indent*4, "", (void*)info);
112                 break;
113
114         case ARGTYPE_POINTER:
115                 printf("%*s%p pointer to...\n", indent*4, "", (void*)info);
116                 _dump_ltrace_tree( info->u.ptr_info.info, indent+1 );
117                 break;
118
119         case ARGTYPE_STRUCT:
120                 printf("%*s%p struct...\n", indent*4, "", (void*)info);
121                 struct struct_field
122                 {
123                         struct arg_type_info *info;
124                         int own_info;
125                 }* elements = (struct struct_field*)info->u.entries.data;
126                 unsigned int i;
127                 for(i=0; i<info->u.entries.size; i++)
128                         _dump_ltrace_tree( elements[i].info, indent+1 );
129                 break;
130
131         default:
132                 printf("%*s%p unknown type\n", indent*4, "", (void*)info);
133                 return false;;
134         }
135
136         return true;
137 }
138
139 static bool dump_ltrace_tree( const struct arg_type_info* info )
140 {
141         return _dump_ltrace_tree( info, 0 );
142 }
143 #endif
144
145
146
147 static uint64_t attr_numeric(Dwarf_Die *die, uint32_t name)
148 {
149         Dwarf_Attribute attr;
150         uint32_t form;
151
152         if (dwarf_attr(die, name, &attr) == NULL)
153                 return 0;
154
155         form = dwarf_whatform(&attr);
156
157         switch (form) {
158         case DW_FORM_addr: {
159                 Dwarf_Addr addr;
160                 if (dwarf_formaddr(&attr, &addr) == 0)
161                         return addr;
162         }
163                 break;
164         case DW_FORM_data1:
165         case DW_FORM_data2:
166         case DW_FORM_data4:
167         case DW_FORM_data8:
168         case DW_FORM_sdata:
169         case DW_FORM_udata: {
170                 Dwarf_Word value;
171                 if (dwarf_formudata(&attr, &value) == 0)
172                         return value;
173         }
174                 break;
175         case DW_FORM_flag:
176         case DW_FORM_flag_present: {
177                 bool value;
178                 if (dwarf_formflag(&attr, &value) == 0)
179                         return value;
180         }
181                 break;
182         default:
183                 complain(die, "DW_AT_<0x%x>=0x%x", name, form);
184                 break;
185         }
186
187         return 0;
188 }
189
190 static enum arg_type getBaseType( Dwarf_Die* die )
191 {
192         int encoding = attr_numeric(die, DW_AT_encoding);
193
194         if( encoding == DW_ATE_void )
195                 return ARGTYPE_VOID;
196
197         if( encoding == DW_ATE_signed_char || encoding == DW_ATE_unsigned_char )
198                 return ARGTYPE_CHAR;
199
200         if( encoding == DW_ATE_signed ||
201                 encoding == DW_ATE_unsigned )
202         {
203                 bool is_signed = (encoding == DW_ATE_signed);
204                 switch( attr_numeric(die, DW_AT_byte_size) )
205                 {
206                 case sizeof(char):
207                         return ARGTYPE_CHAR;
208
209                 case sizeof(short):
210                         return is_signed ? ARGTYPE_SHORT : ARGTYPE_USHORT;
211
212                 case sizeof(int):
213                         return is_signed ? ARGTYPE_INT : ARGTYPE_UINT;
214
215                 case sizeof(long):
216                         return is_signed ? ARGTYPE_LONG : ARGTYPE_ULONG;
217
218                 default:
219                         complain(die, "");
220                         exit(1);
221                 }
222         }
223
224         if( encoding == DW_ATE_float )
225         {
226                 switch( attr_numeric(die, DW_AT_byte_size) )
227                 {
228                 case sizeof(float):
229                         return ARGTYPE_FLOAT;
230
231                 case sizeof(double):
232                         return ARGTYPE_DOUBLE;
233
234                 default:
235                         complain(die, "");
236                         exit(1);
237                 }
238         }
239
240         complain(die, "");
241         exit(1);
242         return ARGTYPE_VOID;
243 }
244
245 static bool getTypeDie( Dwarf_Die* type_die, Dwarf_Die* die )
246 {
247         Dwarf_Attribute attr;
248         return
249                 dwarf_attr(die, DW_AT_type, &attr) != NULL &&
250                 dwarf_formref_die(&attr, type_die) != NULL;
251 }
252
253 static size_t dwarf_die_hash(const void* x)
254 {
255         return *(const Dwarf_Off*)x;
256 }
257 static int dwarf_die_eq(const void* a, const void* b)
258 {
259         return *(const Dwarf_Off*)a == *(const Dwarf_Off*)b;
260 }
261
262 static bool getEnum(struct arg_type_info* enum_info, Dwarf_Die* parent)
263 {
264         enum_info->type = ARGTYPE_INT;
265
266         struct enum_lens *lens = calloc(1, sizeof(struct enum_lens));
267         if (lens == NULL)
268         {
269                 complain(parent, "alloc error");
270                 return false;
271         }
272         lens_init_enum(lens);
273         enum_info->lens = &lens->super;
274
275         Dwarf_Die die;
276         if( dwarf_child(parent, &die) != 0 )
277         {
278                 // empty enum. we're done
279                 return true;
280         }
281
282         while(1) {
283                 complain(&die, "enum element: 0x%02x/'%s'", dwarf_tag(&die), dwarf_diename(&die) );
284
285                 if( dwarf_tag(&die) != DW_TAG_enumerator )
286                 {
287                         complain(&die, "Enums can have ONLY DW_TAG_enumerator elements");
288                         return false;
289                 }
290
291                 if( !dwarf_hasattr(&die, DW_AT_const_value) )
292                 {
293                         complain(&die, "Enums MUST have DW_AT_const_value values");
294                         return false;
295                 }
296
297                 const char* key = dwarf_diename(&die);
298                 if( key == NULL )
299                 {
300                         complain(&die, "Enums must have a DW_AT_name key");
301                         return false;
302                 }
303                 const char* dupkey = strdup(key);
304                 if( dupkey == NULL )
305                 {
306                         complain(&die, "Couldn't duplicate enum key");
307                         return false;
308                 }
309
310                 struct value* value = calloc( 1, sizeof(struct value) );
311                 if( value == NULL )
312                 {
313                         complain(&die, "Couldn't alloc enum value");
314                         return false;
315                 }
316
317                 value_init_detached(value, NULL, type_get_simple( ARGTYPE_INT ), 0);
318                 value_set_word(value, attr_numeric(&die, DW_AT_const_value) );
319
320                 if( lens_enum_add( lens, dupkey, 0, value, 0 ) )
321                 {
322                         complain(&die, "Couldn't add enum element");
323                         return false;
324                 }
325
326                 int res = dwarf_siblingof(&die, &die);
327                 if( res == 0 ) continue;     /* sibling exists    */
328                 if( res < 0 )  return false; /* error             */
329                 break;                       /* no sibling exists */
330         }
331
332         return true;
333 }
334
335 static bool getArray(struct arg_type_info* array_info, Dwarf_Die* parent)
336 {
337         Dwarf_Die type_die;
338         if( !getTypeDie( &type_die, parent ) )
339         {
340                 complain( parent, "Array has unknown type" );
341                 return false;
342         }
343
344         struct arg_type_info* info;
345         if( !getType( &info, &type_die ) )
346         {
347                 complain( parent, "Couldn't figure out array's type" );
348                 return false;
349         }
350
351         Dwarf_Die subrange;
352         if( dwarf_child(parent, &subrange) != 0 )
353         {
354                 complain( parent, "Array must have a DW_TAG_subrange_type child, but has none" );
355                 return false;
356         }
357
358         Dwarf_Die next_subrange;
359         if( dwarf_siblingof(&subrange, &next_subrange) <= 0 )
360         {
361                 complain( parent, "Array must have exactly one DW_TAG_subrange_type child" );
362                 return false;
363         }
364
365         if( !dwarf_hasattr(&subrange, DW_AT_upper_bound) )
366         {
367                 complain( parent, "Array subrange must have a DW_AT_upper_bound");
368                 return false;
369         }
370
371         if( dwarf_hasattr(&subrange, DW_AT_lower_bound) )
372         {
373                 if( attr_numeric(&subrange, DW_AT_lower_bound) != 0 )
374                 {
375                         complain( parent, "Array subrange has a nonzero lower bound. Don't know what to do");
376                         return false;
377                 }
378         }
379
380         // I'm not checking the subrange type. It should be some sort of integer,
381         // and I don't know what it would mean for it to be something else
382
383         struct value* value = calloc( 1, sizeof(struct value) );
384         if( value == NULL )
385         {
386                 complain(&subrange, "Couldn't alloc length value");
387                 return false;
388         }
389         value_init_detached(value, NULL, type_get_simple( ARGTYPE_INT ), 0);
390         value_set_word(value, attr_numeric(&subrange, DW_AT_upper_bound)+1 );
391
392         struct expr_node* length = calloc( 1, sizeof(struct expr_node) );
393         if( length == NULL )
394         {
395                 complain(&subrange, "Couldn't alloc length expr");
396                 return false;
397         }
398         expr_init_const(length, value);
399
400         type_init_array(array_info, info, 0, length, 0 );
401
402         return true;
403 }
404
405 static bool getStructure(struct arg_type_info* struct_info, Dwarf_Die* parent)
406 {
407         type_init_struct(struct_info);
408
409         Dwarf_Die die;
410         if( dwarf_child(parent, &die) != 0 )
411         {
412                 // no elements; we're done
413                 return true;
414         }
415
416         while(1) {
417                 fprintf(stderr, "member: 0x%02x/'%s'\n", dwarf_tag(&die), dwarf_diename(&die) );
418
419                 if( dwarf_tag(&die) != DW_TAG_member )
420                 {
421                         complain(&die, "Structure can have ONLY DW_TAG_member");
422                         return false;
423                 }
424
425                 Dwarf_Die type_die;
426                 if( !getTypeDie( &type_die, &die ) )
427                 {
428                         complain( &die, "Couldn't get type of element");
429                         return false;
430                 }
431
432                 struct arg_type_info* member_info = NULL;
433                 if( !getType( &member_info, &type_die ) )
434                 {
435                         complain(&die, "Couldn't parse type from DWARF data");
436                         return false;
437                 }
438                 type_struct_add( struct_info, member_info, 0 );
439
440                 int res = dwarf_siblingof(&die, &die);
441                 if( res == 0 ) continue;     /* sibling exists    */
442                 if( res < 0 )  return false; /* error             */
443                 break;                       /* no sibling exists */
444         }
445
446         return true;
447 }
448
449 // Reads the type in the die into the given structure
450 // Returns true on sucess
451 static bool getType( struct arg_type_info** info, Dwarf_Die* type_die)
452 {
453         Dwarf_Off die_offset = dwarf_dieoffset(type_die);
454         struct arg_type_info** found_type = dict_find(&type_hash, &die_offset );
455         if(found_type != NULL)
456         {
457                 *info = *found_type;
458                 complain(type_die, "Read pre-computed type: %p", *info);
459                 return true;
460         }
461
462         Dwarf_Die next_die;
463
464         switch( dwarf_tag(type_die) )
465         {
466         case DW_TAG_base_type:
467                 *info = type_get_simple( getBaseType( type_die ) );
468                 complain(type_die, "Storing base type: %p", *info);
469                 dict_insert( &type_hash, &die_offset, info );
470                 return true;
471
472         case DW_TAG_subroutine_type:
473         case DW_TAG_inlined_subroutine:
474                 // function pointers are stored as void*. If ltrace tries to dereference
475                 // these, it'll get a segfault
476                 *info = type_get_simple( ARGTYPE_VOID );
477                 complain(type_die, "Storing subroutine type: %p", *info);
478                 dict_insert( &type_hash, &die_offset, info );
479                 return true;
480
481         case DW_TAG_pointer_type:
482
483                 if( !getTypeDie(&next_die, type_die ) )
484                 {
485                         // the pointed-to type isn't defined, so I report a void*
486                         *info = type_get_simple( ARGTYPE_VOID );
487                         complain(type_die, "Storing void-pointer type: %p", *info);
488                         dict_insert( &type_hash, &die_offset, info );
489                         return true;
490                 }
491
492                 *info = calloc( 1, sizeof(struct arg_type_info) );
493                 if( *info == NULL )
494                 {
495                         complain(type_die, "alloc error");
496                         return false;
497                 }
498                 type_init_pointer(*info, NULL, 0);
499
500                 complain(type_die, "Storing pointer type: %p", *info);
501                 dict_insert( &type_hash, &die_offset, info );
502                 return getType( &(*info)->u.ptr_info.info, &next_die );
503
504         case DW_TAG_structure_type:
505                 *info = calloc( 1, sizeof(struct arg_type_info) );
506                 if( *info == NULL )
507                 {
508                         complain(type_die, "alloc error");
509                         return false;
510                 }
511
512                 complain(type_die, "Storing struct type: %p", *info);
513                 dict_insert( &type_hash, &die_offset, info );
514                 return getStructure( *info, type_die );
515
516
517         case DW_TAG_typedef: ;
518         case DW_TAG_const_type: ;
519         case DW_TAG_volatile_type: ;
520                 // Various tags are simply pass-through, so I just keep going
521                 bool res = true;
522                 if( getTypeDie(&next_die, type_die ) )
523                 {
524                         complain(type_die, "Storing const/typedef type: %p", *info);
525                         res = getType( info, &next_die );
526                 }
527                 else
528                 {
529                         // no type. Use 'void'. Normally I'd think this is bogus, but stdio
530                         // typedefs something to void
531                         *info = type_get_simple( ARGTYPE_VOID );
532                         complain(type_die, "Storing void type: %p", *info);
533                 }
534                 if( res )
535                         dict_insert( &type_hash, &die_offset, info );
536                 return res;
537
538         case DW_TAG_enumeration_type:
539                 // We have an enumeration. This has type "int", but has a particular
540                 // lens to handle the enum
541                 *info = calloc( 1, sizeof(struct arg_type_info) );
542                 if( *info == NULL )
543                 {
544                         complain(type_die, "alloc error");
545                         return false;
546                 }
547
548                 complain(type_die, "Storing enum int: %p", *info);
549                 dict_insert( &type_hash, &die_offset, info );
550                 return getEnum( *info, type_die );
551
552         case DW_TAG_array_type:
553                 *info = calloc( 1, sizeof(struct arg_type_info) );
554                 if( *info == NULL )
555                 {
556                         complain(type_die, "alloc error");
557                         return false;
558                 }
559
560                 complain(type_die, "Storing array: %p", *info);
561                 dict_insert( &type_hash, &die_offset, info );
562                 return getArray( *info, type_die );
563
564         default:
565                 complain(type_die, "Unknown type tag 0x%x", dwarf_tag(type_die));
566                 break;
567         }
568
569         return false;
570 }
571
572 static bool getPrototype(struct prototype* proto, Dwarf_Die* subroutine)
573 {
574         // First, look at the return type. This is stored in a DW_AT_type tag in the
575         // subroutine DIE. If there is no such tag, this function returns void
576         Dwarf_Die return_type_die;
577         if( !getTypeDie(&return_type_die, subroutine ) )
578         {
579                 proto->return_info = type_get_simple( ARGTYPE_VOID );
580                 proto->own_return_info = 0;
581         }
582         else
583         {
584                 proto->return_info = calloc( 1, sizeof( struct arg_type_info ) );
585                 if( proto->return_info == NULL )
586                 {
587                         complain(subroutine, "Couldn't alloc return type");
588                         return false;
589                 }
590                 proto->own_return_info = 0;
591
592                 if( !getType( &proto->return_info, &return_type_die ) )
593                 {
594                         complain(subroutine, "Couldn't get return type");
595                         return false;
596                 }
597         }
598
599
600         // Now look at the arguments
601         Dwarf_Die arg_die;
602         if( dwarf_child(subroutine, &arg_die) != 0 )
603         {
604                 // no args. We're done
605                 return true;
606         }
607
608         while(1) {
609                 if( dwarf_tag(&arg_die) != DW_TAG_formal_parameter )
610                         goto next_prototype_argument;
611
612                 complain(&arg_die, "arg: 0x%02x", dwarf_tag(&arg_die));
613
614                 Dwarf_Die type_die;
615                 if( !getTypeDie(&type_die, &arg_die ) )
616                 {
617                         complain(&arg_die, "Couldn't get the argument type die");
618                         return false;
619                 }
620
621                 struct arg_type_info* arg_type_info = NULL;
622                 if( !getType( &arg_type_info, &type_die ) )
623                 {
624                         complain(&arg_die, "Couldn't parse arg type from DWARF data");
625                         return false;
626                 }
627
628                 struct param param;
629                 param_init_type(&param, arg_type_info, 0);
630                 if( prototype_push_param(proto, &param) <0 )
631                 {
632                         complain(&arg_die, "couldn't add argument to the prototype");
633                         return false;
634                 }
635
636         next_prototype_argument: ;
637                 int res = dwarf_siblingof(&arg_die, &arg_die);
638                 if( res == 0 ) continue;     /* sibling exists    */
639                 if( res < 0 )  return false; /* error             */
640                 break;                       /* no sibling exists */
641         }
642
643         return true;
644 }
645
646 static bool process_die_compileunit(struct protolib* plib, struct library* lib, Dwarf_Die* parent)
647 {
648         Dwarf_Die die;
649         if( dwarf_child(parent, &die) != 0 )
650                 return false;
651
652         while(1)
653         {
654                 if( dwarf_tag(&die) == DW_TAG_subprogram )
655                 {
656                         const char* function_name = dwarf_diename(&die);
657
658                         complain(&die, "subroutine_type: 0x%02x; function '%s'", dwarf_tag(&die), function_name);
659
660                         struct prototype* proto =
661                                 protolib_lookup_prototype(plib, function_name, true );
662
663                         if( proto != NULL )
664                         {
665                                 complain(&die, "Prototype already exists. Skipping");
666                                 goto next_prototype;
667                         }
668
669                         if( !filter_matches_symbol(options.plt_filter,    function_name, lib) &&
670                                 !filter_matches_symbol(options.static_filter, function_name, lib) &&
671                                 !filter_matches_symbol(options.export_filter, function_name, lib) )
672                         {
673                                 complain(&die, "Prototype not requested by any filter");
674                                 goto next_prototype;
675                         }
676
677                         proto = malloc(sizeof(struct prototype));
678                         if( proto == NULL )
679                         {
680                                 complain(&die, "couldn't alloc prototype");
681                                 return false;
682                         }
683                         prototype_init( proto );
684
685                         if( !getPrototype(proto, &die ) )
686                         {
687                                 complain(&die, "couldn't get prototype");
688                                 return false;
689                         }
690
691                         protolib_add_prototype(plib, function_name, 0, proto);
692                 }
693
694                 next_prototype:;
695                 int res = dwarf_siblingof(&die, &die);
696                 if( res == 0 ) continue;     /* sibling exists    */
697                 if( res < 0 )  return false; /* error             */
698                 break;                       /* no sibling exists */
699         }
700
701         return true;
702 }
703
704 static bool import( struct protolib* plib, struct library* lib, Dwfl* dwfl )
705 {
706         dict_init(&type_hash, sizeof(Dwarf_Off), sizeof(struct arg_type_info*),
707                           dwarf_die_hash, dwarf_die_eq, NULL );
708
709         Dwarf_Addr bias;
710     Dwarf_Die* die = NULL;
711     while( (die = dwfl_nextcu(dwfl, die, &bias)) != NULL )
712     {
713         if( dwarf_tag(die) == DW_TAG_compile_unit )
714         {
715             if( !process_die_compileunit(plib, lib, die) )
716             {
717                 complain(die, "Error reading compile unit");
718                                 exit(1);
719                                 return false;
720             }
721         }
722         else
723         {
724             complain(die, "DW_TAG_compile_unit expected");
725                         exit(1);
726             return false;
727         }
728     }
729
730         dict_destroy( &type_hash, NULL, NULL, NULL );
731         return true;
732 }
733
734 bool import_DWARF_prototypes( struct protolib* plib, struct library* lib,
735                                                           Dwfl *dwfl )
736 {
737         if( plib == NULL )
738         {
739                 plib = protolib_cache_default(&g_protocache, lib->soname, 0);
740                 if (plib == NULL)
741                 {
742                         fprintf(stderr, "Error loading protolib %s: %s.\n",
743                                         lib->soname, strerror(errno));
744                 }
745         }
746
747         return import(plib, lib, dwfl);
748 }
749
750 /*
751 - I handle static functions now. Should I? Those do not have DW_AT_external==1
752
753 - should process existing prototypes to make sure they match
754
755 - what do function pointers look like? I'm doing void*
756
757 - unions
758
759 - all my *allocs leak
760
761 - protolib_lookup_prototype should look for imports?
762
763 */