do not error on warnings
[platform/upstream/ltrace.git] / ltrace-elf.c
1 /*
2  * This file is part of ltrace.
3  * Copyright (C) 2006,2010,2011,2012,2013 Petr Machata, Red Hat Inc.
4  * Copyright (C) 2010 Zachary T Welch, CodeSourcery
5  * Copyright (C) 2010 Joe Damato
6  * Copyright (C) 1997,1998,2001,2004,2007,2008,2009 Juan Cespedes
7  * Copyright (C) 2006 Olaf Hering, SUSE Linux GmbH
8  * Copyright (C) 2006 Eric Vaitl, Cisco Systems, Inc.
9  * Copyright (C) 2006 Paul Gilliam, IBM Corporation
10  * Copyright (C) 2006 Ian Wienand
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public License as
14  * published by the Free Software Foundation; either version 2 of the
15  * License, or (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful, but
18  * WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25  * 02110-1301 USA
26  */
27
28 #include "config.h"
29
30 #include <assert.h>
31 #ifdef  __linux__
32 #include <endian.h>
33 #endif
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <gelf.h>
37 #include <inttypes.h>
38 #include <search.h>
39 #include <stdbool.h>
40 #include <stdint.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <strings.h>
45 #include <unistd.h>
46
47 #include "backend.h"
48 #include "filter.h"
49 #include "library.h"
50 #include "ltrace-elf.h"
51 #include "proc.h"
52 #include "debug.h"
53 #include "options.h"
54
55 #ifndef ARCH_HAVE_LTELF_DATA
56 int
57 arch_elf_init(struct ltelf *lte, struct library *lib)
58 {
59         return 0;
60 }
61
62 void
63 arch_elf_destroy(struct ltelf *lte)
64 {
65 }
66 #endif
67
68 #ifndef OS_HAVE_ADD_PLT_ENTRY
69 enum plt_status
70 os_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
71                      const char *a_name, GElf_Rela *rela, size_t ndx,
72                      struct library_symbol **ret)
73 {
74         return PLT_DEFAULT;
75 }
76 #endif
77
78 #ifndef ARCH_HAVE_ADD_PLT_ENTRY
79 enum plt_status
80 arch_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
81                        const char *a_name, GElf_Rela *rela, size_t ndx,
82                        struct library_symbol **ret)
83 {
84         return PLT_DEFAULT;
85 }
86 #endif
87
88 #ifndef OS_HAVE_ADD_FUNC_ENTRY
89 enum plt_status
90 os_elf_add_func_entry(struct process *proc, struct ltelf *lte,
91                       const GElf_Sym *sym,
92                       arch_addr_t addr, const char *name,
93                       struct library_symbol **ret)
94 {
95         if (GELF_ST_TYPE(sym->st_info) != STT_FUNC) {
96                 *ret = NULL;
97                 return PLT_OK;
98         } else {
99                 return PLT_DEFAULT;
100         }
101 }
102 #endif
103
104 #ifndef ARCH_HAVE_ADD_FUNC_ENTRY
105 enum plt_status
106 arch_elf_add_func_entry(struct process *proc, struct ltelf *lte,
107                         const GElf_Sym *sym,
108                         arch_addr_t addr, const char *name,
109                         struct library_symbol **ret)
110 {
111         return PLT_DEFAULT;
112 }
113 #endif
114
115 Elf_Data *
116 elf_loaddata(Elf_Scn *scn, GElf_Shdr *shdr)
117 {
118         Elf_Data *data = elf_getdata(scn, NULL);
119         if (data == NULL || elf_getdata(scn, data) != NULL
120             || data->d_off || data->d_size != shdr->sh_size)
121                 return NULL;
122         return data;
123 }
124
125 static int
126 elf_get_section_if(struct ltelf *lte, Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr,
127                    int (*predicate)(Elf_Scn *, GElf_Shdr *, void *data),
128                    void *data)
129 {
130         int i;
131         for (i = 1; i < lte->ehdr.e_shnum; ++i) {
132                 Elf_Scn *scn;
133                 GElf_Shdr shdr;
134
135                 scn = elf_getscn(lte->elf, i);
136                 if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
137                         debug(1, "Couldn't read section or header.");
138                         return -1;
139                 }
140                 if (predicate(scn, &shdr, data)) {
141                         *tgt_sec = scn;
142                         *tgt_shdr = shdr;
143                         return 0;
144                 }
145         }
146
147         *tgt_sec = NULL;
148         return 0;
149 }
150
151 static int
152 inside_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
153 {
154         GElf_Addr addr = *(GElf_Addr *)data;
155         return addr >= shdr->sh_addr
156                 && addr < shdr->sh_addr + shdr->sh_size;
157 }
158
159 int
160 elf_get_section_covering(struct ltelf *lte, GElf_Addr addr,
161                          Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
162 {
163         return elf_get_section_if(lte, tgt_sec, tgt_shdr,
164                                   &inside_p, &addr);
165 }
166
167 static int
168 type_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
169 {
170         GElf_Word type = *(GElf_Word *)data;
171         return shdr->sh_type == type;
172 }
173
174 int
175 elf_get_section_type(struct ltelf *lte, GElf_Word type,
176                      Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
177 {
178         return elf_get_section_if(lte, tgt_sec, tgt_shdr,
179                                   &type_p, &type);
180 }
181
182 struct section_named_data {
183         struct ltelf *lte;
184         const char *name;
185 };
186
187 static int
188 name_p(Elf_Scn *scn, GElf_Shdr *shdr, void *d)
189 {
190         struct section_named_data *data = d;
191         const char *name = elf_strptr(data->lte->elf,
192                                       data->lte->ehdr.e_shstrndx,
193                                       shdr->sh_name);
194         return strcmp(name, data->name) == 0;
195 }
196
197 int
198 elf_get_section_named(struct ltelf *lte, const char *name,
199                      Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
200 {
201         struct section_named_data data = {
202                 .lte = lte,
203                 .name = name,
204         };
205         return elf_get_section_if(lte, tgt_sec, tgt_shdr,
206                                   &name_p, &data);
207 }
208
209 static struct elf_each_symbol_t
210 each_symbol_in(Elf_Data *symtab, const char *strtab, size_t count,
211                unsigned i,
212                enum callback_status (*cb)(GElf_Sym *symbol,
213                                           const char *name, void *data),
214                void *data)
215 {
216         for (; i < count; ++i) {
217                 GElf_Sym sym;
218                 if (gelf_getsym(symtab, i, &sym) == NULL)
219                         return (struct elf_each_symbol_t){ i, -2 };
220
221                 switch (cb(&sym, strtab + sym.st_name, data)) {
222                 case CBS_FAIL:
223                         return (struct elf_each_symbol_t){ i, -1 };
224                 case CBS_STOP:
225                         return (struct elf_each_symbol_t){ i + 1, 0 };
226                 case CBS_CONT:
227                         break;
228                 }
229         }
230
231         return (struct elf_each_symbol_t){ 0, 0 };
232 }
233
234 /* N.B.: gelf_getsym takes integer argument.  Since negative values
235  * are invalid as indices, we can use the extra bit to encode which
236  * symbol table we are looking into.  ltrace currently doesn't handle
237  * more than two symbol tables anyway, nor does it handle the xindex
238  * stuff.  */
239 struct elf_each_symbol_t
240 elf_each_symbol(struct ltelf *lte, unsigned start_after,
241                 enum callback_status (*cb)(GElf_Sym *symbol,
242                                            const char *name, void *data),
243                 void *data)
244 {
245         unsigned index = start_after == 0 ? 0 : start_after >> 1;
246
247         /* Go through static symbol table first.  */
248         if ((start_after & 0x1) == 0) {
249                 struct elf_each_symbol_t st
250                         = each_symbol_in(lte->symtab, lte->strtab,
251                                          lte->symtab_count, index, cb, data);
252
253                 /* If the iteration stopped prematurely, bail out.  */
254                 if (st.restart != 0)
255                         return ((struct elf_each_symbol_t)
256                                 { st.restart << 1, st.status });
257         }
258
259         struct elf_each_symbol_t st
260                 = each_symbol_in(lte->dynsym, lte->dynstr, lte->dynsym_count,
261                                  index, cb, data);
262         if (st.restart != 0)
263                 return ((struct elf_each_symbol_t)
264                         { st.restart << 1 | 0x1, st.status });
265
266         return (struct elf_each_symbol_t){ 0, 0 };
267 }
268
269 int
270 elf_can_read_next(Elf_Data *data, GElf_Xword offset, GElf_Xword size)
271 {
272         assert(data != NULL);
273         if (data->d_size < size || offset > data->d_size - size) {
274                 debug(1, "Not enough data to read %"PRId64"-byte value"
275                       " at offset %"PRId64".", size, offset);
276                 return 0;
277         }
278         return 1;
279 }
280
281 #define DEF_READER(NAME, SIZE)                                          \
282         int                                                             \
283         NAME(Elf_Data *data, GElf_Xword offset, uint##SIZE##_t *retp)   \
284         {                                                               \
285                 if (!elf_can_read_next(data, offset, SIZE / 8))         \
286                         return -1;                                      \
287                                                                         \
288                 if (data->d_buf == NULL) /* NODATA section */ {         \
289                         *retp = 0;                                      \
290                         return 0;                                       \
291                 }                                                       \
292                                                                         \
293                 union {                                                 \
294                         uint##SIZE##_t dst;                             \
295                         char buf[0];                                    \
296                 } u;                                                    \
297                 memcpy(u.buf, data->d_buf + offset, sizeof(u.dst));     \
298                 *retp = u.dst;                                          \
299                 return 0;                                               \
300         }
301
302 DEF_READER(elf_read_u8, 8)
303 DEF_READER(elf_read_u16, 16)
304 DEF_READER(elf_read_u32, 32)
305 DEF_READER(elf_read_u64, 64)
306
307 #undef DEF_READER
308
309 #define DEF_READER(NAME, SIZE)                                          \
310         int                                                             \
311         NAME(Elf_Data *data, GElf_Xword *offset, uint##SIZE##_t *retp)  \
312         {                                                               \
313                 int rc = elf_read_u##SIZE(data, *offset, retp);         \
314                 if (rc < 0)                                             \
315                         return rc;                                      \
316                 *offset += SIZE / 8;                                    \
317                 return 0;                                               \
318         }
319
320 DEF_READER(elf_read_next_u8, 8)
321 DEF_READER(elf_read_next_u16, 16)
322 DEF_READER(elf_read_next_u32, 32)
323 DEF_READER(elf_read_next_u64, 64)
324
325 #undef DEF_READER
326
327 int
328 elf_read_next_uleb128(Elf_Data *data, GElf_Xword *offset, uint64_t *retp)
329 {
330         uint64_t result = 0;
331         int shift = 0;
332         int size = 8 * sizeof result;
333
334         while (1) {
335                 uint8_t byte;
336                 if (elf_read_next_u8(data, offset, &byte) < 0)
337                         return -1;
338
339                 uint8_t payload = byte & 0x7f;
340                 result |= (uint64_t)payload << shift;
341                 shift += 7;
342                 if (shift > size && byte != 0x1)
343                         return -1;
344                 if ((byte & 0x80) == 0)
345                         break;
346         }
347
348         if (retp != NULL)
349                 *retp = result;
350         return 0;
351 }
352
353 int
354 elf_read_uleb128(Elf_Data *data, GElf_Xword offset, uint64_t *retp)
355 {
356         return elf_read_next_uleb128(data, &offset, retp);
357 }
358
359 int
360 ltelf_init(struct ltelf *lte, const char *filename)
361 {
362         memset(lte, 0, sizeof *lte);
363         lte->fd = open(filename, O_RDONLY);
364         if (lte->fd == -1) {
365                 fprintf(stderr, "Can't open %s: %s\n", filename,
366                         strerror(errno));
367                 return 1;
368         }
369
370         elf_version(EV_CURRENT);
371
372 #ifdef HAVE_ELF_C_READ_MMAP
373         lte->elf = elf_begin(lte->fd, ELF_C_READ_MMAP, NULL);
374 #else
375         lte->elf = elf_begin(lte->fd, ELF_C_READ, NULL);
376 #endif
377
378         if (lte->elf == NULL || elf_kind(lte->elf) != ELF_K_ELF) {
379                 fprintf(stderr, "\"%s\" is not an ELF file\n", filename);
380                 exit(EXIT_FAILURE);
381         }
382
383         if (gelf_getehdr(lte->elf, &lte->ehdr) == NULL) {
384                 fprintf(stderr, "can't read ELF header of \"%s\": %s\n",
385                         filename, elf_errmsg(-1));
386                 exit(EXIT_FAILURE);
387         }
388
389         if (lte->ehdr.e_type != ET_EXEC && lte->ehdr.e_type != ET_DYN) {
390                 fprintf(stderr, "\"%s\" is neither an ELF executable"
391                         " nor a shared library\n", filename);
392                 exit(EXIT_FAILURE);
393         }
394
395         if (1
396 #ifdef LT_ELF_MACHINE
397             && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS
398                 || lte->ehdr.e_machine != LT_ELF_MACHINE)
399 #endif
400 #ifdef LT_ELF_MACHINE2
401             && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS2
402                 || lte->ehdr.e_machine != LT_ELF_MACHINE2)
403 #endif
404 #ifdef LT_ELF_MACHINE3
405             && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS3
406                 || lte->ehdr.e_machine != LT_ELF_MACHINE3)
407 #endif
408                 ) {
409                 fprintf(stderr,
410                         "\"%s\" is ELF from incompatible architecture\n",
411                         filename);
412                 exit(EXIT_FAILURE);
413         }
414
415         VECT_INIT(&lte->plt_relocs, GElf_Rela);
416
417         return 0;
418 }
419
420 void
421 ltelf_destroy(struct ltelf *lte)
422 {
423         debug(DEBUG_FUNCTION, "close_elf()");
424         elf_end(lte->elf);
425         close(lte->fd);
426         VECT_DESTROY(&lte->plt_relocs, GElf_Rela, NULL, NULL);
427 }
428
429 static void
430 read_symbol_table(struct ltelf *lte, const char *filename,
431                   Elf_Scn *scn, GElf_Shdr *shdr, const char *name,
432                   Elf_Data **datap, size_t *countp, const char **strsp)
433 {
434         *datap = elf_getdata(scn, NULL);
435         *countp = shdr->sh_size / shdr->sh_entsize;
436         if ((*datap == NULL || elf_getdata(scn, *datap) != NULL)
437             && options.static_filter != NULL) {
438                 fprintf(stderr, "Couldn't get data of section"
439                         " %s from \"%s\": %s\n",
440                         name, filename, elf_errmsg(-1));
441                 exit(EXIT_FAILURE);
442         }
443
444         scn = elf_getscn(lte->elf, shdr->sh_link);
445         GElf_Shdr shdr2;
446         if (scn == NULL || gelf_getshdr(scn, &shdr2) == NULL) {
447                 fprintf(stderr, "Couldn't get header of section"
448                         " #%d from \"%s\": %s\n",
449                         shdr->sh_link, filename, elf_errmsg(-1));
450                 exit(EXIT_FAILURE);
451         }
452
453         Elf_Data *data = elf_getdata(scn, NULL);
454         if (data == NULL || elf_getdata(scn, data) != NULL
455             || shdr2.sh_size != data->d_size || data->d_off) {
456                 fprintf(stderr, "Couldn't get data of section"
457                         " #%d from \"%s\": %s\n",
458                         shdr2.sh_link, filename, elf_errmsg(-1));
459                 exit(EXIT_FAILURE);
460         }
461
462         *strsp = data->d_buf;
463 }
464
465 static int
466 rel_to_rela(struct ltelf *lte, const GElf_Rel *rel, GElf_Rela *rela)
467 {
468         rela->r_offset = rel->r_offset;
469         rela->r_info = rel->r_info;
470
471         Elf_Scn *sec;
472         GElf_Shdr shdr;
473         if (elf_get_section_covering(lte, rel->r_offset, &sec, &shdr) < 0
474             || sec == NULL)
475                 return -1;
476
477         Elf_Data *data = elf_loaddata(sec, &shdr);
478         if (data == NULL)
479                 return -1;
480
481         GElf_Xword offset = rel->r_offset - shdr.sh_addr - data->d_off;
482         uint64_t value;
483         if (lte->ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
484                 uint32_t tmp;
485                 if (elf_read_u32(data, offset, &tmp) < 0)
486                         return -1;
487                 value = tmp;
488         } else if (elf_read_u64(data, offset, &value) < 0) {
489                 return -1;
490         }
491
492         rela->r_addend = value;
493         return 0;
494 }
495
496 int
497 elf_read_relocs(struct ltelf *lte, Elf_Scn *scn, GElf_Shdr *shdr,
498                 struct vect *rela_vec)
499 {
500         if (vect_reserve_additional(rela_vec, lte->ehdr.e_shnum) < 0)
501                 return -1;
502
503         Elf_Data *relplt = elf_loaddata(scn, shdr);
504         if (relplt == NULL) {
505                 fprintf(stderr, "Couldn't load .rel*.plt data.\n");
506                 return -1;
507         }
508
509         if ((shdr->sh_size % shdr->sh_entsize) != 0) {
510                 fprintf(stderr, ".rel*.plt size (%" PRIx64 "d) not a multiple "
511                         "of its sh_entsize (%" PRIx64 "d).\n",
512                         shdr->sh_size, shdr->sh_entsize);
513                 return -1;
514         }
515
516         GElf_Xword relplt_count = shdr->sh_size / shdr->sh_entsize;
517         GElf_Xword i;
518         for (i = 0; i < relplt_count; ++i) {
519                 GElf_Rela rela;
520                 if (relplt->d_type == ELF_T_REL) {
521                         GElf_Rel rel;
522                         if (gelf_getrel(relplt, i, &rel) == NULL
523                             || rel_to_rela(lte, &rel, &rela) < 0)
524                                 return -1;
525
526                 } else if (gelf_getrela(relplt, i, &rela) == NULL) {
527                         return -1;
528                 }
529
530                 if (VECT_PUSHBACK(rela_vec, &rela) < 0)
531                         return -1;
532         }
533
534         return 0;
535 }
536
537 int
538 elf_load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep)
539 {
540         Elf_Scn *scn;
541         GElf_Shdr shdr;
542         if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
543             || scn == NULL) {
544         fail:
545                 fprintf(stderr, "Couldn't get SHT_DYNAMIC: %s\n",
546                         elf_errmsg(-1));
547                 return -1;
548         }
549
550         Elf_Data *data = elf_loaddata(scn, &shdr);
551         if (data == NULL)
552                 goto fail;
553
554         size_t j;
555         for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
556                 GElf_Dyn dyn;
557                 if (gelf_getdyn(data, j, &dyn) == NULL)
558                         goto fail;
559
560                 if(dyn.d_tag == tag) {
561                         *valuep = dyn.d_un.d_ptr;
562                         return 0;
563                 }
564         }
565
566         return -1;
567 }
568
569 static int
570 ltelf_read_elf(struct ltelf *lte, const char *filename)
571 {
572         int i;
573         GElf_Addr relplt_addr = 0;
574         GElf_Addr soname_offset = 0;
575         GElf_Xword relplt_size = 0;
576
577         debug(DEBUG_FUNCTION, "ltelf_read_elf(filename=%s)", filename);
578         debug(1, "Reading ELF from %s...", filename);
579
580         for (i = 1; i < lte->ehdr.e_shnum; ++i) {
581                 Elf_Scn *scn;
582                 GElf_Shdr shdr;
583                 const char *name;
584
585                 scn = elf_getscn(lte->elf, i);
586                 if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
587                         fprintf(stderr, "Couldn't get section #%d from"
588                                 " \"%s\": %s\n", i, filename, elf_errmsg(-1));
589                         exit(EXIT_FAILURE);
590                 }
591
592                 name = elf_strptr(lte->elf, lte->ehdr.e_shstrndx, shdr.sh_name);
593                 if (name == NULL) {
594                         fprintf(stderr, "Couldn't get name of section #%d from"
595                                 " \"%s\": %s\n", i, filename, elf_errmsg(-1));
596                         exit(EXIT_FAILURE);
597                 }
598
599                 if (shdr.sh_type == SHT_SYMTAB) {
600                         read_symbol_table(lte, filename,
601                                           scn, &shdr, name, &lte->symtab,
602                                           &lte->symtab_count, &lte->strtab);
603
604                 } else if (shdr.sh_type == SHT_DYNSYM) {
605                         read_symbol_table(lte, filename,
606                                           scn, &shdr, name, &lte->dynsym,
607                                           &lte->dynsym_count, &lte->dynstr);
608
609                 } else if (shdr.sh_type == SHT_DYNAMIC) {
610                         Elf_Data *data;
611                         size_t j;
612
613                         lte->dyn_addr = shdr.sh_addr + lte->bias;
614                         lte->dyn_sz = shdr.sh_size;
615
616                         data = elf_getdata(scn, NULL);
617                         if (data == NULL || elf_getdata(scn, data) != NULL) {
618                                 fprintf(stderr, "Couldn't get .dynamic data"
619                                         " from \"%s\": %s\n",
620                                         filename, strerror(errno));
621                                 exit(EXIT_FAILURE);
622                         }
623
624                         for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
625                                 GElf_Dyn dyn;
626
627                                 if (gelf_getdyn(data, j, &dyn) == NULL) {
628                                         fprintf(stderr, "Couldn't get .dynamic"
629                                                 " data from \"%s\": %s\n",
630                                                 filename, strerror(errno));
631                                         exit(EXIT_FAILURE);
632                                 }
633                                 if (dyn.d_tag == DT_JMPREL)
634                                         relplt_addr = dyn.d_un.d_ptr;
635                                 else if (dyn.d_tag == DT_PLTRELSZ)
636                                         relplt_size = dyn.d_un.d_val;
637                                 else if (dyn.d_tag == DT_SONAME)
638                                         soname_offset = dyn.d_un.d_val;
639                         }
640                 } else if (shdr.sh_type == SHT_PROGBITS
641                            || shdr.sh_type == SHT_NOBITS) {
642                         if (strcmp(name, ".plt") == 0) {
643                                 lte->plt_addr = shdr.sh_addr;
644                                 lte->plt_size = shdr.sh_size;
645                                 lte->plt_data = elf_loaddata(scn, &shdr);
646                                 if (lte->plt_data == NULL)
647                                         fprintf(stderr,
648                                                 "Can't load .plt data\n");
649                                 lte->plt_flags = shdr.sh_flags;
650                         }
651 #ifdef ARCH_SUPPORTS_OPD
652                         else if (strcmp(name, ".opd") == 0) {
653                                 lte->opd_addr = (GElf_Addr *) (long) shdr.sh_addr;
654                                 lte->opd_size = shdr.sh_size;
655                                 lte->opd = elf_rawdata(scn, NULL);
656                         }
657 #endif
658                 }
659         }
660
661         if (lte->dynsym == NULL || lte->dynstr == NULL) {
662                 fprintf(stderr, "Couldn't find .dynsym or .dynstr in \"%s\"\n",
663                         filename);
664                 exit(EXIT_FAILURE);
665         }
666
667         if (!relplt_addr || !lte->plt_addr) {
668                 debug(1, "%s has no PLT relocations", filename);
669         } else if (relplt_size == 0) {
670                 debug(1, "%s has unknown PLT size", filename);
671         } else {
672                 for (i = 1; i < lte->ehdr.e_shnum; ++i) {
673                         Elf_Scn *scn;
674                         GElf_Shdr shdr;
675
676                         scn = elf_getscn(lte->elf, i);
677                         if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
678                                 fprintf(stderr, "Couldn't get section header"
679                                         " from \"%s\": %s\n",
680                                         filename, elf_errmsg(-1));
681                                 exit(EXIT_FAILURE);
682                         }
683                         if (shdr.sh_addr == relplt_addr
684                             && shdr.sh_size == relplt_size) {
685                                 if (elf_read_relocs(lte, scn, &shdr,
686                                                     &lte->plt_relocs) < 0) {
687                                         fprintf(stderr, "Couldn't get .rel*.plt"
688                                                 " data from \"%s\": %s\n",
689                                                 filename, elf_errmsg(-1));
690                                         exit(EXIT_FAILURE);
691                                 }
692                                 break;
693                         }
694                 }
695
696                 if (i == lte->ehdr.e_shnum) {
697                         fprintf(stderr,
698                                 "Couldn't find .rel*.plt section in \"%s\"\n",
699                                 filename);
700                         exit(EXIT_FAILURE);
701                 }
702         }
703         debug(1, "%s %zd PLT relocations", filename,
704               vect_size(&lte->plt_relocs));
705
706         if (soname_offset != 0)
707                 lte->soname = lte->dynstr + soname_offset;
708
709         return 0;
710 }
711
712 #ifndef ARCH_HAVE_GET_SYMINFO
713 int
714 arch_get_sym_info(struct ltelf *lte, const char *filename,
715                   size_t sym_index, GElf_Rela *rela, GElf_Sym *sym)
716 {
717         return gelf_getsym(lte->dynsym,
718                            ELF64_R_SYM(rela->r_info), sym) != NULL ? 0 : -1;
719 }
720 #endif
721
722 int
723 default_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
724                           const char *a_name, GElf_Rela *rela, size_t ndx,
725                           struct library_symbol **ret)
726 {
727         char *name = strdup(a_name);
728         if (name == NULL) {
729         fail_message:
730                 fprintf(stderr, "Couldn't create symbol for PLT entry: %s\n",
731                         strerror(errno));
732         fail:
733                 free(name);
734                 return -1;
735         }
736
737         GElf_Addr addr = arch_plt_sym_val(lte, ndx, rela);
738
739         struct library_symbol *libsym = malloc(sizeof(*libsym));
740         if (libsym == NULL)
741                 goto fail_message;
742
743         /* XXX The double cast should be removed when
744          * arch_addr_t becomes integral type.  */
745         arch_addr_t taddr = (arch_addr_t)
746                 (uintptr_t)(addr + lte->bias);
747
748         if (library_symbol_init(libsym, taddr, name, 1, LS_TOPLT_EXEC) < 0) {
749                 free(libsym);
750                 goto fail;
751         }
752
753         libsym->next = *ret;
754         *ret = libsym;
755         return 0;
756 }
757
758 int
759 elf_add_plt_entry(struct process *proc, struct ltelf *lte,
760                   const char *name, GElf_Rela *rela, size_t idx,
761                   struct library_symbol **ret)
762 {
763         enum plt_status plts
764                 = arch_elf_add_plt_entry(proc, lte, name, rela, idx, ret);
765
766         if (plts == PLT_DEFAULT)
767                 plts = os_elf_add_plt_entry(proc, lte, name, rela, idx, ret);
768
769         switch (plts) {
770         case PLT_DEFAULT:
771                 return default_elf_add_plt_entry(proc, lte, name,
772                                                  rela, idx, ret);
773         case PLT_FAIL:
774                 return -1;
775         case PLT_OK:
776                 return 0;
777         }
778
779         assert(! "Invalid return from X_elf_add_plt_entry!");
780         abort();
781 }
782
783 static void
784 mark_chain_latent(struct library_symbol *libsym)
785 {
786         for (; libsym != NULL; libsym = libsym->next) {
787                 debug(DEBUG_FUNCTION, "marking %s latent", libsym->name);
788                 libsym->latent = 1;
789         }
790 }
791
792 static void
793 filter_symbol_chain(struct filter *filter,
794                     struct library_symbol **libsymp, struct library *lib)
795 {
796         assert(libsymp != NULL);
797         struct library_symbol **ptr = libsymp;
798         while (*ptr != NULL) {
799                 if (filter_matches_symbol(filter, (*ptr)->name, lib)) {
800                         ptr = &(*ptr)->next;
801                 } else {
802                         struct library_symbol *sym = *ptr;
803                         *ptr = (*ptr)->next;
804                         library_symbol_destroy(sym);
805                         free(sym);
806                 }
807         }
808 }
809
810 static int
811 populate_plt(struct process *proc, const char *filename,
812              struct ltelf *lte, struct library *lib)
813 {
814         const bool latent_plts = options.export_filter != NULL;
815         const size_t count = vect_size(&lte->plt_relocs);
816
817         size_t i;
818         for (i = 0; i < count; ++i) {
819                 GElf_Rela *rela = VECT_ELEMENT(&lte->plt_relocs, GElf_Rela, i);
820                 GElf_Sym sym;
821
822                 switch (arch_get_sym_info(lte, filename, i, rela, &sym)) {
823                 default:
824                         fprintf(stderr,
825                                 "Couldn't get relocation for symbol #%zd"
826                                 " from \"%s\": %s\n",
827                                 i, filename, elf_errmsg(-1));
828                         /* Fall through.  */
829                 case 1:
830                         continue; /* Skip this entry.  */
831                 case 0:
832                         break;
833                 }
834
835                 char const *name = lte->dynstr + sym.st_name;
836                 int matched = filter_matches_symbol(options.plt_filter,
837                                                     name, lib);
838
839                 struct library_symbol *libsym = NULL;
840                 if (elf_add_plt_entry(proc, lte, name, rela, i, &libsym) < 0)
841                         return -1;
842
843                 /* If we didn't match the PLT entry, filter the chain
844                  * to only include the matching symbols (but include
845                  * all if we are adding latent symbols) to allow
846                  * backends to override the PLT symbol's name.  */
847
848                 if (! matched && ! latent_plts)
849                         filter_symbol_chain(options.plt_filter, &libsym, lib);
850
851                 if (libsym != NULL) {
852                         /* If we are adding those symbols just for
853                          * tracing exports, mark them all latent.  */
854                         if (! matched && latent_plts)
855                                 mark_chain_latent(libsym);
856                         library_add_symbol(lib, libsym);
857                 }
858         }
859         return 0;
860 }
861
862 void
863 delete_symbol_chain(struct library_symbol *libsym)
864 {
865         while (libsym != NULL) {
866                 struct library_symbol *tmp = libsym->next;
867                 library_symbol_destroy(libsym);
868                 free(libsym);
869                 libsym = tmp;
870         }
871 }
872
873 /* When -x rules result in request to trace several aliases, we only
874  * want to add such symbol once.  The only way that those symbols
875  * differ in is their name, e.g. in glibc you have __GI___libc_free,
876  * __cfree, __free, __libc_free, cfree and free all defined on the
877  * same address.  So instead we keep this unique symbol struct for
878  * each address, and replace name in libsym with a shorter variant if
879  * we find it.  */
880 struct unique_symbol {
881         arch_addr_t addr;
882         struct library_symbol *libsym;
883 };
884
885 static int
886 unique_symbol_cmp(const void *key, const void *val)
887 {
888         const struct unique_symbol *sym_key = key;
889         const struct unique_symbol *sym_val = val;
890         return sym_key->addr != sym_val->addr;
891 }
892
893 static enum callback_status
894 symbol_with_address(struct library_symbol *sym, void *addrptr)
895 {
896         return sym->enter_addr == *(arch_addr_t *)addrptr
897                 ? CBS_STOP : CBS_CONT;
898 }
899
900 static int
901 populate_this_symtab(struct process *proc, const char *filename,
902                      struct ltelf *lte, struct library *lib,
903                      Elf_Data *symtab, const char *strtab, size_t count,
904                      struct library_exported_names *names,
905                      bool only_exported_names)
906 {
907         /* Using sorted array would be arguably better, but this
908          * should be well enough for the number of symbols that we
909          * typically deal with.  */
910         size_t num_symbols = 0;
911         struct unique_symbol *symbols = malloc(sizeof(*symbols) * count);
912         if (symbols == NULL) {
913                 fprintf(stderr, "couldn't insert symbols for -x: %s\n",
914                         strerror(errno));
915                 return -1;
916         }
917
918         GElf_Word secflags[lte->ehdr.e_shnum];
919         size_t i;
920         for (i = 1; i < lte->ehdr.e_shnum; ++i) {
921                 Elf_Scn *scn = elf_getscn(lte->elf, i);
922                 GElf_Shdr shdr;
923                 if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
924                         secflags[i] = 0;
925                 else
926                         secflags[i] = shdr.sh_flags;
927         }
928
929         for (i = 0; i < count; ++i) {
930                 GElf_Sym sym;
931                 if (gelf_getsym(symtab, i, &sym) == NULL) {
932                         fprintf(stderr,
933                                 "couldn't get symbol #%zd from %s: %s\n",
934                                 i, filename, elf_errmsg(-1));
935                         continue;
936                 }
937
938                 if (sym.st_value == 0 || sym.st_shndx == STN_UNDEF
939                     /* Also ignore any special values besides direct
940                      * section references.  */
941                     || sym.st_shndx >= lte->ehdr.e_shnum)
942                         continue;
943
944                 /* Find symbol name and snip version.  */
945                 const char *orig_name = strtab + sym.st_name;
946                 const char *version = strchr(orig_name, '@');
947                 size_t len = version != NULL ? (assert(version > orig_name),
948                                                 (size_t)(version - orig_name))
949                         : strlen(orig_name);
950                 char name[len + 1];
951                 memcpy(name, orig_name, len);
952                 name[len] = 0;
953
954                 /* If we are interested in exports, store this name.  */
955                 if (names != NULL) {
956                         char *name_copy = strdup(name);
957                         if (name_copy == NULL ||
958                             library_exported_names_push(names,
959                                                         sym.st_value,
960                                                         name_copy, 1) != 0)
961                         {
962                                 fprintf(stderr, "Couldn't store symbol %s.  "
963                                         "Tracing may be incomplete.\n", name);
964                         }
965                 }
966
967                 /* If we're only dealing with the exported names list, there's
968                  * nothing left to do with this symbol */
969                 if (only_exported_names)
970                         continue;
971
972                 /* If the symbol is not matched, skip it.  We already
973                  * stored it to export list above.  */
974                 if (!filter_matches_symbol(options.static_filter, name, lib))
975                         continue;
976
977                 arch_addr_t addr = (arch_addr_t)
978                         (uintptr_t)(sym.st_value + lte->bias);
979                 arch_addr_t naddr;
980
981                 /* On arches that support OPD, the value of typical
982                  * function symbol will be a pointer to .opd, but some
983                  * will point directly to .text.  We don't want to
984                  * translate those.  */
985                 if (secflags[sym.st_shndx] & SHF_EXECINSTR) {
986                         naddr = addr;
987                 } else if (arch_translate_address(lte, addr, &naddr) < 0) {
988                         fprintf(stderr,
989                                 "couldn't translate address of %s@%s: %s\n",
990                                 name, lib->soname, strerror(errno));
991                         continue;
992                 }
993
994                 char *full_name = strdup(name);
995                 if (full_name == NULL) {
996                         fprintf(stderr, "couldn't copy name of %s@%s: %s\n",
997                                 name, lib->soname, strerror(errno));
998                         continue;
999                 }
1000
1001                 struct library_symbol *libsym = NULL;
1002                 enum plt_status plts
1003                         = arch_elf_add_func_entry(proc, lte, &sym,
1004                                                   naddr, full_name, &libsym);
1005                 if (plts == PLT_DEFAULT)
1006                         plts = os_elf_add_func_entry(proc, lte, &sym,
1007                                                      naddr, full_name, &libsym);
1008
1009                 switch (plts) {
1010                 case PLT_DEFAULT:;
1011                         /* Put the default symbol to the chain.  */
1012                         struct library_symbol *tmp = malloc(sizeof *tmp);
1013                         if (tmp == NULL
1014                             || library_symbol_init(tmp, naddr, full_name, 1,
1015                                                    LS_TOPLT_NONE) < 0) {
1016                                 free(tmp);
1017
1018                                 /* Either add the whole bunch, or none
1019                                  * of it.  Note that for PLT_FAIL we
1020                                  * don't do this--it's the callee's
1021                                  * job to clean up after itself before
1022                                  * it bails out.  */
1023                                 delete_symbol_chain(libsym);
1024                                 libsym = NULL;
1025
1026                 case PLT_FAIL:
1027                                 fprintf(stderr, "Couldn't add symbol %s@%s "
1028                                         "for tracing.\n", name, lib->soname);
1029
1030                                 break;
1031                         }
1032
1033                         full_name = NULL;
1034                         tmp->next = libsym;
1035                         libsym = tmp;
1036                         break;
1037
1038                 case PLT_OK:
1039                         break;
1040                 }
1041
1042                 free(full_name);
1043
1044                 struct library_symbol *tmp;
1045                 for (tmp = libsym; tmp != NULL; ) {
1046                         /* Look whether we already have a symbol for
1047                          * this address.  If not, add this one.  If
1048                          * yes, look if we should pick the new symbol
1049                          * name.  */
1050
1051                         struct unique_symbol key = { tmp->enter_addr, NULL };
1052                         struct unique_symbol *unique
1053                                 = lsearch(&key, symbols, &num_symbols,
1054                                           sizeof *symbols, &unique_symbol_cmp);
1055
1056                         if (unique->libsym == NULL) {
1057                                 unique->libsym = tmp;
1058                                 unique->addr = tmp->enter_addr;
1059                                 tmp = tmp->next;
1060                                 unique->libsym->next = NULL;
1061                         } else {
1062                                 if (strlen(tmp->name)
1063                                     < strlen(unique->libsym->name)) {
1064                                         library_symbol_set_name
1065                                                 (unique->libsym, tmp->name, 1);
1066                                         tmp->name = NULL;
1067                                 }
1068                                 struct library_symbol *next = tmp->next;
1069                                 library_symbol_destroy(tmp);
1070                                 free(tmp);
1071                                 tmp = next;
1072                         }
1073                 }
1074         }
1075
1076         /* If we're only dealing with the exported names list, there's nothing
1077          * left to do */
1078         if (only_exported_names)
1079                 return 0;
1080
1081
1082         /* Now we do the union of this set of unique symbols with
1083          * what's already in the library.  */
1084         for (i = 0; i < num_symbols; ++i) {
1085                 struct library_symbol *this_sym = symbols[i].libsym;
1086                 assert(this_sym != NULL);
1087                 struct library_symbol *other
1088                         = library_each_symbol(lib, NULL, symbol_with_address,
1089                                               &this_sym->enter_addr);
1090                 if (other != NULL) {
1091                         library_symbol_destroy(this_sym);
1092                         free(this_sym);
1093                         symbols[i].libsym = NULL;
1094                 }
1095         }
1096
1097         for (i = 0; i < num_symbols; ++i)
1098                 if (symbols[i].libsym != NULL)
1099                         library_add_symbol(lib, symbols[i].libsym);
1100
1101         free(symbols);
1102         return 0;
1103 }
1104
1105 static int
1106 populate_symtab(struct process *proc, const char *filename,
1107                 struct ltelf *lte, struct library *lib,
1108                 int symtabs, int exports)
1109 {
1110         int status;
1111         if (symtabs && lte->symtab != NULL && lte->strtab != NULL
1112             && (status = populate_this_symtab(proc, filename, lte, lib,
1113                                               lte->symtab, lte->strtab,
1114                                               lte->symtab_count, NULL,
1115                                               false)) < 0)
1116                 return status;
1117
1118         /* Check whether we want to trace symbols implemented by this
1119          * library (-l).  */
1120         struct library_exported_names *names = &lib->exported_names;
1121         lib->should_activate_latent = exports != 0;
1122
1123         bool only_exported_names = symtabs == 0 && exports == 0;
1124         return populate_this_symtab(proc, filename, lte, lib,
1125                                     lte->dynsym, lte->dynstr,
1126                                     lte->dynsym_count, names,
1127                                     only_exported_names);
1128 }
1129
1130 static int
1131 read_module(struct library *lib, struct process *proc,
1132             const char *filename, GElf_Addr bias, int main)
1133 {
1134         struct ltelf lte;
1135         if (ltelf_init(&lte, filename) < 0)
1136                 return -1;
1137
1138         /* XXX When we abstract ABI into a module, this should instead
1139          * become something like
1140          *
1141          *    proc->abi = arch_get_abi(lte.ehdr);
1142          *
1143          * The code in ltelf_init needs to be replaced by this logic.
1144          * Be warned that libltrace.c calls ltelf_init as well to
1145          * determine whether ABI is supported.  This is to get
1146          * reasonable error messages when trying to run 64-bit binary
1147          * with 32-bit ltrace.  It is desirable to preserve this.  */
1148         proc->e_machine = lte.ehdr.e_machine;
1149         proc->e_class = lte.ehdr.e_ident[EI_CLASS];
1150         get_arch_dep(proc);
1151
1152         /* Find out the base address.  For PIE main binaries we look
1153          * into auxv, otherwise we scan phdrs.  */
1154         if (main && lte.ehdr.e_type == ET_DYN) {
1155                 arch_addr_t entry;
1156                 if (process_get_entry(proc, &entry, NULL) < 0) {
1157                         fprintf(stderr, "Couldn't find entry of PIE %s\n",
1158                                 filename);
1159                 fail:
1160                         ltelf_destroy(&lte);
1161                         return -1;
1162                 }
1163                 /* XXX The double cast should be removed when
1164                  * arch_addr_t becomes integral type.  */
1165                 lte.entry_addr = (GElf_Addr)(uintptr_t)entry;
1166                 lte.bias = (GElf_Addr)(uintptr_t)entry - lte.ehdr.e_entry;
1167
1168         } else {
1169                 GElf_Phdr phdr;
1170                 size_t i;
1171                 for (i = 0; gelf_getphdr (lte.elf, i, &phdr) != NULL; ++i) {
1172                         if (phdr.p_type == PT_LOAD) {
1173                                 lte.base_addr = phdr.p_vaddr + bias;
1174                                 break;
1175                         }
1176                 }
1177
1178                 lte.bias = bias;
1179                 lte.entry_addr = lte.ehdr.e_entry + lte.bias;
1180
1181                 if (lte.base_addr == 0) {
1182                         fprintf(stderr,
1183                                 "Couldn't determine base address of %s\n",
1184                                 filename);
1185                         goto fail;
1186                 }
1187         }
1188
1189         if (ltelf_read_elf(&lte, filename) < 0)
1190                 goto fail;
1191
1192         if (arch_elf_init(&lte, lib) < 0) {
1193                 fprintf(stderr, "Backend initialization failed.\n");
1194                 goto fail;
1195         }
1196
1197         if (lib == NULL)
1198                 goto fail;
1199
1200         /* Note that we set soname and pathname as soon as they are
1201          * allocated, so in case of further errors, this get released
1202          * when LIB is released, which should happen in the caller
1203          * when we return error.  */
1204
1205         if (lib->pathname == NULL) {
1206                 char *pathname = strdup(filename);
1207                 if (pathname == NULL)
1208                         goto fail;
1209                 library_set_pathname(lib, pathname, 1);
1210         }
1211
1212         if (lte.soname != NULL) {
1213                 char *soname = strdup(lte.soname);
1214                 if (soname == NULL)
1215                         goto fail;
1216                 library_set_soname(lib, soname, 1);
1217         } else {
1218                 const char *soname = rindex(lib->pathname, '/');
1219                 if (soname != NULL)
1220                         soname += 1;
1221                 else
1222                         soname = lib->pathname;
1223                 library_set_soname(lib, soname, 0);
1224         }
1225
1226         /* XXX The double cast should be removed when
1227          * arch_addr_t becomes integral type.  */
1228         arch_addr_t entry = (arch_addr_t)(uintptr_t)lte.entry_addr;
1229         if (arch_translate_address(&lte, entry, &entry) < 0)
1230                 goto fail;
1231
1232         /* XXX The double cast should be removed when
1233          * arch_addr_t becomes integral type.  */
1234         lib->base = (arch_addr_t)(uintptr_t)lte.base_addr;
1235         lib->entry = entry;
1236         /* XXX The double cast should be removed when
1237          * arch_addr_t becomes integral type.  */
1238         lib->dyn_addr = (arch_addr_t)(uintptr_t)lte.dyn_addr;
1239
1240         /* There are several reasons that we need to inspect symbol tables or
1241          * populate PLT entries. The user may have requested corresponding
1242          * tracing features (respectively -x and -e), or they requested tracing
1243          * exported symbols (-l). We also do this to resolve symbol aliases
1244          *
1245          * In the case of -l, we need to keep even those PLT slots that are not
1246          * requested by -e (but we keep them latent). We also need to inspect
1247          * .dynsym to find what exports this library provide, to turn on
1248          * existing latent PLT entries. */
1249
1250         int plts = filter_matches_library(options.plt_filter, lib);
1251         if ((plts || options.export_filter != NULL)
1252             && populate_plt(proc, filename, &lte, lib) < 0)
1253                 goto fail;
1254
1255         int exports = filter_matches_library(options.export_filter, lib);
1256         int symtabs = filter_matches_library(options.static_filter, lib);
1257         if (populate_symtab(proc, filename, &lte, lib,
1258                             symtabs, exports) < 0)
1259                 goto fail;
1260
1261         arch_elf_destroy(&lte);
1262         ltelf_destroy(&lte);
1263         return 0;
1264 }
1265
1266 int
1267 ltelf_read_library(struct library *lib, struct process *proc,
1268                    const char *filename, GElf_Addr bias)
1269 {
1270         return read_module(lib, proc, filename, bias, 0);
1271 }
1272
1273
1274 struct library *
1275 ltelf_read_main_binary(struct process *proc, const char *path)
1276 {
1277         struct library *lib = malloc(sizeof(*lib));
1278         if (lib == NULL || library_init(lib, LT_LIBTYPE_MAIN) < 0) {
1279                 free(lib);
1280                 return NULL;
1281         }
1282         library_set_pathname(lib, path, 0);
1283
1284         /* There is a race between running the process and reading its
1285          * binary for internal consumption.  So open the binary from
1286          * the /proc filesystem.  XXX Note that there is similar race
1287          * for libraries, but there we don't have a nice answer like
1288          * that.  Presumably we could read the DSOs from the process
1289          * memory image, but that's not currently done.  */
1290         char *fname = pid2name(proc->pid);
1291         if (fname == NULL
1292             || read_module(lib, proc, fname, 0, 1) < 0) {
1293                 library_destroy(lib);
1294                 free(lib);
1295                 lib = NULL;
1296         }
1297
1298         free(fname);
1299         return lib;
1300 }