Move load_dynamic_entry from PPC backend to ltrace-elf.c/.h
[platform/upstream/ltrace.git] / ltrace-elf.c
1 /*
2  * This file is part of ltrace.
3  * Copyright (C) 2006,2010,2011,2012,2013 Petr Machata, Red Hat Inc.
4  * Copyright (C) 2010 Zachary T Welch, CodeSourcery
5  * Copyright (C) 2010 Joe Damato
6  * Copyright (C) 1997,1998,2001,2004,2007,2008,2009 Juan Cespedes
7  * Copyright (C) 2006 Olaf Hering, SUSE Linux GmbH
8  * Copyright (C) 2006 Eric Vaitl, Cisco Systems, Inc.
9  * Copyright (C) 2006 Paul Gilliam, IBM Corporation
10  * Copyright (C) 2006 Ian Wienand
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public License as
14  * published by the Free Software Foundation; either version 2 of the
15  * License, or (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful, but
18  * WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25  * 02110-1301 USA
26  */
27
28 #include "config.h"
29
30 #include <assert.h>
31 #ifdef  __linux__
32 #include <endian.h>
33 #endif
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <gelf.h>
37 #include <inttypes.h>
38 #include <search.h>
39 #include <stdbool.h>
40 #include <stdint.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <strings.h>
45 #include <unistd.h>
46
47 #include "backend.h"
48 #include "filter.h"
49 #include "library.h"
50 #include "ltrace-elf.h"
51 #include "proc.h"
52 #include "debug.h"
53 #include "options.h"
54
55 #ifndef ARCH_HAVE_LTELF_DATA
56 int
57 arch_elf_init(struct ltelf *lte, struct library *lib)
58 {
59         return 0;
60 }
61
62 void
63 arch_elf_destroy(struct ltelf *lte)
64 {
65 }
66 #endif
67
68 #ifndef OS_HAVE_ADD_PLT_ENTRY
69 enum plt_status
70 os_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
71                      const char *a_name, GElf_Rela *rela, size_t ndx,
72                      struct library_symbol **ret)
73 {
74         return PLT_DEFAULT;
75 }
76 #endif
77
78 #ifndef ARCH_HAVE_ADD_PLT_ENTRY
79 enum plt_status
80 arch_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
81                        const char *a_name, GElf_Rela *rela, size_t ndx,
82                        struct library_symbol **ret)
83 {
84         return PLT_DEFAULT;
85 }
86 #endif
87
88 #ifndef OS_HAVE_ADD_FUNC_ENTRY
89 enum plt_status
90 os_elf_add_func_entry(struct process *proc, struct ltelf *lte,
91                       const GElf_Sym *sym,
92                       arch_addr_t addr, const char *name,
93                       struct library_symbol **ret)
94 {
95         if (GELF_ST_TYPE(sym->st_info) != STT_FUNC) {
96                 *ret = NULL;
97                 return PLT_OK;
98         } else {
99                 return PLT_DEFAULT;
100         }
101 }
102 #endif
103
104 #ifndef ARCH_HAVE_ADD_FUNC_ENTRY
105 enum plt_status
106 arch_elf_add_func_entry(struct process *proc, struct ltelf *lte,
107                         const GElf_Sym *sym,
108                         arch_addr_t addr, const char *name,
109                         struct library_symbol **ret)
110 {
111         return PLT_DEFAULT;
112 }
113 #endif
114
115 Elf_Data *
116 elf_loaddata(Elf_Scn *scn, GElf_Shdr *shdr)
117 {
118         Elf_Data *data = elf_getdata(scn, NULL);
119         if (data == NULL || elf_getdata(scn, data) != NULL
120             || data->d_off || data->d_size != shdr->sh_size)
121                 return NULL;
122         return data;
123 }
124
125 static int
126 elf_get_section_if(struct ltelf *lte, Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr,
127                    int (*predicate)(Elf_Scn *, GElf_Shdr *, void *data),
128                    void *data)
129 {
130         int i;
131         for (i = 1; i < lte->ehdr.e_shnum; ++i) {
132                 Elf_Scn *scn;
133                 GElf_Shdr shdr;
134
135                 scn = elf_getscn(lte->elf, i);
136                 if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
137                         debug(1, "Couldn't read section or header.");
138                         return -1;
139                 }
140                 if (predicate(scn, &shdr, data)) {
141                         *tgt_sec = scn;
142                         *tgt_shdr = shdr;
143                         return 0;
144                 }
145         }
146
147         *tgt_sec = NULL;
148         return 0;
149 }
150
151 static int
152 inside_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
153 {
154         GElf_Addr addr = *(GElf_Addr *)data;
155         return addr >= shdr->sh_addr
156                 && addr < shdr->sh_addr + shdr->sh_size;
157 }
158
159 int
160 elf_get_section_covering(struct ltelf *lte, GElf_Addr addr,
161                          Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
162 {
163         return elf_get_section_if(lte, tgt_sec, tgt_shdr,
164                                   &inside_p, &addr);
165 }
166
167 static int
168 type_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
169 {
170         GElf_Word type = *(GElf_Word *)data;
171         return shdr->sh_type == type;
172 }
173
174 int
175 elf_get_section_type(struct ltelf *lte, GElf_Word type,
176                      Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
177 {
178         return elf_get_section_if(lte, tgt_sec, tgt_shdr,
179                                   &type_p, &type);
180 }
181
182 struct section_named_data {
183         struct ltelf *lte;
184         const char *name;
185 };
186
187 static int
188 name_p(Elf_Scn *scn, GElf_Shdr *shdr, void *d)
189 {
190         struct section_named_data *data = d;
191         const char *name = elf_strptr(data->lte->elf,
192                                       data->lte->ehdr.e_shstrndx,
193                                       shdr->sh_name);
194         return strcmp(name, data->name) == 0;
195 }
196
197 int
198 elf_get_section_named(struct ltelf *lte, const char *name,
199                      Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
200 {
201         struct section_named_data data = {
202                 .lte = lte,
203                 .name = name,
204         };
205         return elf_get_section_if(lte, tgt_sec, tgt_shdr,
206                                   &name_p, &data);
207 }
208
209 static struct elf_each_symbol_t
210 each_symbol_in(Elf_Data *symtab, const char *strtab, size_t count,
211                unsigned i,
212                enum callback_status (*cb)(GElf_Sym *symbol,
213                                           const char *name, void *data),
214                void *data)
215 {
216         for (; i < count; ++i) {
217                 GElf_Sym sym;
218                 if (gelf_getsym(symtab, i, &sym) == NULL)
219                         return (struct elf_each_symbol_t){ i, -2 };
220
221                 switch (cb(&sym, strtab + sym.st_name, data)) {
222                 case CBS_FAIL:
223                         return (struct elf_each_symbol_t){ i, -1 };
224                 case CBS_STOP:
225                         return (struct elf_each_symbol_t){ i + 1, 0 };
226                 case CBS_CONT:
227                         break;
228                 }
229         }
230
231         return (struct elf_each_symbol_t){ 0, 0 };
232 }
233
234 /* N.B.: gelf_getsym takes integer argument.  Since negative values
235  * are invalid as indices, we can use the extra bit to encode which
236  * symbol table we are looking into.  ltrace currently doesn't handle
237  * more than two symbol tables anyway, nor does it handle the xindex
238  * stuff.  */
239 struct elf_each_symbol_t
240 elf_each_symbol(struct ltelf *lte, unsigned start_after,
241                 enum callback_status (*cb)(GElf_Sym *symbol,
242                                            const char *name, void *data),
243                 void *data)
244 {
245         unsigned index = start_after == 0 ? 0 : start_after >> 1;
246
247         /* Go through static symbol table first.  */
248         if ((start_after & 0x1) == 0) {
249                 struct elf_each_symbol_t st
250                         = each_symbol_in(lte->symtab, lte->strtab,
251                                          lte->symtab_count, index, cb, data);
252
253                 /* If the iteration stopped prematurely, bail out.  */
254                 if (st.restart != 0)
255                         return ((struct elf_each_symbol_t)
256                                 { st.restart << 1, st.status });
257         }
258
259         struct elf_each_symbol_t st
260                 = each_symbol_in(lte->dynsym, lte->dynstr, lte->dynsym_count,
261                                  index, cb, data);
262         if (st.restart != 0)
263                 return ((struct elf_each_symbol_t)
264                         { st.restart << 1 | 0x1, st.status });
265
266         return (struct elf_each_symbol_t){ 0, 0 };
267 }
268
269 int
270 elf_can_read_next(Elf_Data *data, GElf_Xword offset, GElf_Xword size)
271 {
272         assert(data != NULL);
273         if (data->d_size < size || offset > data->d_size - size) {
274                 debug(1, "Not enough data to read %"PRId64"-byte value"
275                       " at offset %"PRId64".", size, offset);
276                 return 0;
277         }
278         return 1;
279 }
280
281 #define DEF_READER(NAME, SIZE)                                          \
282         int                                                             \
283         NAME(Elf_Data *data, GElf_Xword offset, uint##SIZE##_t *retp)   \
284         {                                                               \
285                 if (!elf_can_read_next(data, offset, SIZE / 8))         \
286                         return -1;                                      \
287                                                                         \
288                 if (data->d_buf == NULL) /* NODATA section */ {         \
289                         *retp = 0;                                      \
290                         return 0;                                       \
291                 }                                                       \
292                                                                         \
293                 union {                                                 \
294                         uint##SIZE##_t dst;                             \
295                         char buf[0];                                    \
296                 } u;                                                    \
297                 memcpy(u.buf, data->d_buf + offset, sizeof(u.dst));     \
298                 *retp = u.dst;                                          \
299                 return 0;                                               \
300         }
301
302 DEF_READER(elf_read_u8, 8)
303 DEF_READER(elf_read_u16, 16)
304 DEF_READER(elf_read_u32, 32)
305 DEF_READER(elf_read_u64, 64)
306
307 #undef DEF_READER
308
309 #define DEF_READER(NAME, SIZE)                                          \
310         int                                                             \
311         NAME(Elf_Data *data, GElf_Xword *offset, uint##SIZE##_t *retp)  \
312         {                                                               \
313                 int rc = elf_read_u##SIZE(data, *offset, retp);         \
314                 if (rc < 0)                                             \
315                         return rc;                                      \
316                 *offset += SIZE / 8;                                    \
317                 return 0;                                               \
318         }
319
320 DEF_READER(elf_read_next_u8, 8)
321 DEF_READER(elf_read_next_u16, 16)
322 DEF_READER(elf_read_next_u32, 32)
323 DEF_READER(elf_read_next_u64, 64)
324
325 #undef DEF_READER
326
327 int
328 elf_read_next_uleb128(Elf_Data *data, GElf_Xword *offset, uint64_t *retp)
329 {
330         uint64_t result = 0;
331         int shift = 0;
332         int size = 8 * sizeof result;
333
334         while (1) {
335                 uint8_t byte;
336                 if (elf_read_next_u8(data, offset, &byte) < 0)
337                         return -1;
338
339                 uint8_t payload = byte & 0x7f;
340                 result |= (uint64_t)payload << shift;
341                 shift += 7;
342                 if (shift > size && byte != 0x1)
343                         return -1;
344                 if ((byte & 0x80) == 0)
345                         break;
346         }
347
348         if (retp != NULL)
349                 *retp = result;
350         return 0;
351 }
352
353 int
354 elf_read_uleb128(Elf_Data *data, GElf_Xword offset, uint64_t *retp)
355 {
356         return elf_read_next_uleb128(data, &offset, retp);
357 }
358
359 int
360 ltelf_init(struct ltelf *lte, const char *filename)
361 {
362         memset(lte, 0, sizeof *lte);
363         lte->fd = open(filename, O_RDONLY);
364         if (lte->fd == -1)
365                 return 1;
366
367         elf_version(EV_CURRENT);
368
369 #ifdef HAVE_ELF_C_READ_MMAP
370         lte->elf = elf_begin(lte->fd, ELF_C_READ_MMAP, NULL);
371 #else
372         lte->elf = elf_begin(lte->fd, ELF_C_READ, NULL);
373 #endif
374
375         if (lte->elf == NULL || elf_kind(lte->elf) != ELF_K_ELF) {
376                 fprintf(stderr, "\"%s\" is not an ELF file\n", filename);
377                 exit(EXIT_FAILURE);
378         }
379
380         if (gelf_getehdr(lte->elf, &lte->ehdr) == NULL) {
381                 fprintf(stderr, "can't read ELF header of \"%s\": %s\n",
382                         filename, elf_errmsg(-1));
383                 exit(EXIT_FAILURE);
384         }
385
386         if (lte->ehdr.e_type != ET_EXEC && lte->ehdr.e_type != ET_DYN) {
387                 fprintf(stderr, "\"%s\" is neither an ELF executable"
388                         " nor a shared library\n", filename);
389                 exit(EXIT_FAILURE);
390         }
391
392         if (1
393 #ifdef LT_ELF_MACHINE
394             && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS
395                 || lte->ehdr.e_machine != LT_ELF_MACHINE)
396 #endif
397 #ifdef LT_ELF_MACHINE2
398             && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS2
399                 || lte->ehdr.e_machine != LT_ELF_MACHINE2)
400 #endif
401 #ifdef LT_ELF_MACHINE3
402             && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS3
403                 || lte->ehdr.e_machine != LT_ELF_MACHINE3)
404 #endif
405                 ) {
406                 fprintf(stderr,
407                         "\"%s\" is ELF from incompatible architecture\n",
408                         filename);
409                 exit(EXIT_FAILURE);
410         }
411
412         VECT_INIT(&lte->plt_relocs, GElf_Rela);
413
414         return 0;
415 }
416
417 void
418 ltelf_destroy(struct ltelf *lte)
419 {
420         debug(DEBUG_FUNCTION, "close_elf()");
421         elf_end(lte->elf);
422         close(lte->fd);
423         VECT_DESTROY(&lte->plt_relocs, GElf_Rela, NULL, NULL);
424 }
425
426 static void
427 read_symbol_table(struct ltelf *lte, const char *filename,
428                   Elf_Scn *scn, GElf_Shdr *shdr, const char *name,
429                   Elf_Data **datap, size_t *countp, const char **strsp)
430 {
431         *datap = elf_getdata(scn, NULL);
432         *countp = shdr->sh_size / shdr->sh_entsize;
433         if ((*datap == NULL || elf_getdata(scn, *datap) != NULL)
434             && options.static_filter != NULL) {
435                 fprintf(stderr, "Couldn't get data of section"
436                         " %s from \"%s\": %s\n",
437                         name, filename, elf_errmsg(-1));
438                 exit(EXIT_FAILURE);
439         }
440
441         scn = elf_getscn(lte->elf, shdr->sh_link);
442         GElf_Shdr shdr2;
443         if (scn == NULL || gelf_getshdr(scn, &shdr2) == NULL) {
444                 fprintf(stderr, "Couldn't get header of section"
445                         " #%d from \"%s\": %s\n",
446                         shdr->sh_link, filename, elf_errmsg(-1));
447                 exit(EXIT_FAILURE);
448         }
449
450         Elf_Data *data = elf_getdata(scn, NULL);
451         if (data == NULL || elf_getdata(scn, data) != NULL
452             || shdr2.sh_size != data->d_size || data->d_off) {
453                 fprintf(stderr, "Couldn't get data of section"
454                         " #%d from \"%s\": %s\n",
455                         shdr2.sh_link, filename, elf_errmsg(-1));
456                 exit(EXIT_FAILURE);
457         }
458
459         *strsp = data->d_buf;
460 }
461
462 static int
463 rel_to_rela(struct ltelf *lte, const GElf_Rel *rel, GElf_Rela *rela)
464 {
465         rela->r_offset = rel->r_offset;
466         rela->r_info = rel->r_info;
467
468         Elf_Scn *sec;
469         GElf_Shdr shdr;
470         if (elf_get_section_covering(lte, rel->r_offset, &sec, &shdr) < 0
471             || sec == NULL)
472                 return -1;
473
474         Elf_Data *data = elf_loaddata(sec, &shdr);
475         if (data == NULL)
476                 return -1;
477
478         GElf_Xword offset = rel->r_offset - shdr.sh_addr - data->d_off;
479         uint64_t value;
480         if (lte->ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
481                 uint32_t tmp;
482                 if (elf_read_u32(data, offset, &tmp) < 0)
483                         return -1;
484                 value = tmp;
485         } else if (elf_read_u64(data, offset, &value) < 0) {
486                 return -1;
487         }
488
489         rela->r_addend = value;
490         return 0;
491 }
492
493 int
494 elf_read_relocs(struct ltelf *lte, Elf_Scn *scn, GElf_Shdr *shdr,
495                 struct vect *rela_vec)
496 {
497         if (vect_reserve_additional(rela_vec, lte->ehdr.e_shnum) < 0)
498                 return -1;
499
500         Elf_Data *relplt = elf_loaddata(scn, shdr);
501         if (relplt == NULL) {
502                 fprintf(stderr, "Couldn't load .rel*.plt data.\n");
503                 return -1;
504         }
505
506         if ((shdr->sh_size % shdr->sh_entsize) != 0) {
507                 fprintf(stderr, ".rel*.plt size (%" PRIx64 "d) not a multiple "
508                         "of its sh_entsize (%" PRIx64 "d).\n",
509                         shdr->sh_size, shdr->sh_entsize);
510                 return -1;
511         }
512
513         GElf_Xword relplt_count = shdr->sh_size / shdr->sh_entsize;
514         GElf_Xword i;
515         for (i = 0; i < relplt_count; ++i) {
516                 GElf_Rela rela;
517                 if (relplt->d_type == ELF_T_REL) {
518                         GElf_Rel rel;
519                         if (gelf_getrel(relplt, i, &rel) == NULL
520                             || rel_to_rela(lte, &rel, &rela) < 0)
521                                 return -1;
522
523                 } else if (gelf_getrela(relplt, i, &rela) == NULL) {
524                         return -1;
525                 }
526
527                 if (VECT_PUSHBACK(rela_vec, &rela) < 0)
528                         return -1;
529         }
530
531         return 0;
532 }
533
534 int
535 elf_load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep)
536 {
537         Elf_Scn *scn;
538         GElf_Shdr shdr;
539         if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
540             || scn == NULL) {
541         fail:
542                 fprintf(stderr, "Couldn't get SHT_DYNAMIC: %s\n",
543                         elf_errmsg(-1));
544                 return -1;
545         }
546
547         Elf_Data *data = elf_loaddata(scn, &shdr);
548         if (data == NULL)
549                 goto fail;
550
551         size_t j;
552         for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
553                 GElf_Dyn dyn;
554                 if (gelf_getdyn(data, j, &dyn) == NULL)
555                         goto fail;
556
557                 if(dyn.d_tag == tag) {
558                         *valuep = dyn.d_un.d_ptr;
559                         return 0;
560                 }
561         }
562
563         return -1;
564 }
565
566 static int
567 ltelf_read_elf(struct ltelf *lte, const char *filename)
568 {
569         int i;
570         GElf_Addr relplt_addr = 0;
571         GElf_Addr soname_offset = 0;
572         GElf_Xword relplt_size = 0;
573
574         debug(DEBUG_FUNCTION, "ltelf_read_elf(filename=%s)", filename);
575         debug(1, "Reading ELF from %s...", filename);
576
577         for (i = 1; i < lte->ehdr.e_shnum; ++i) {
578                 Elf_Scn *scn;
579                 GElf_Shdr shdr;
580                 const char *name;
581
582                 scn = elf_getscn(lte->elf, i);
583                 if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
584                         fprintf(stderr, "Couldn't get section #%d from"
585                                 " \"%s\": %s\n", i, filename, elf_errmsg(-1));
586                         exit(EXIT_FAILURE);
587                 }
588
589                 name = elf_strptr(lte->elf, lte->ehdr.e_shstrndx, shdr.sh_name);
590                 if (name == NULL) {
591                         fprintf(stderr, "Couldn't get name of section #%d from"
592                                 " \"%s\": %s\n", i, filename, elf_errmsg(-1));
593                         exit(EXIT_FAILURE);
594                 }
595
596                 if (shdr.sh_type == SHT_SYMTAB) {
597                         read_symbol_table(lte, filename,
598                                           scn, &shdr, name, &lte->symtab,
599                                           &lte->symtab_count, &lte->strtab);
600
601                 } else if (shdr.sh_type == SHT_DYNSYM) {
602                         read_symbol_table(lte, filename,
603                                           scn, &shdr, name, &lte->dynsym,
604                                           &lte->dynsym_count, &lte->dynstr);
605
606                 } else if (shdr.sh_type == SHT_DYNAMIC) {
607                         Elf_Data *data;
608                         size_t j;
609
610                         lte->dyn_addr = shdr.sh_addr + lte->bias;
611                         lte->dyn_sz = shdr.sh_size;
612
613                         data = elf_getdata(scn, NULL);
614                         if (data == NULL || elf_getdata(scn, data) != NULL) {
615                                 fprintf(stderr, "Couldn't get .dynamic data"
616                                         " from \"%s\": %s\n",
617                                         filename, strerror(errno));
618                                 exit(EXIT_FAILURE);
619                         }
620
621                         for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
622                                 GElf_Dyn dyn;
623
624                                 if (gelf_getdyn(data, j, &dyn) == NULL) {
625                                         fprintf(stderr, "Couldn't get .dynamic"
626                                                 " data from \"%s\": %s\n",
627                                                 filename, strerror(errno));
628                                         exit(EXIT_FAILURE);
629                                 }
630                                 if (dyn.d_tag == DT_JMPREL)
631                                         relplt_addr = dyn.d_un.d_ptr;
632                                 else if (dyn.d_tag == DT_PLTRELSZ)
633                                         relplt_size = dyn.d_un.d_val;
634                                 else if (dyn.d_tag == DT_SONAME)
635                                         soname_offset = dyn.d_un.d_val;
636                         }
637                 } else if (shdr.sh_type == SHT_PROGBITS
638                            || shdr.sh_type == SHT_NOBITS) {
639                         if (strcmp(name, ".plt") == 0) {
640                                 lte->plt_addr = shdr.sh_addr;
641                                 lte->plt_size = shdr.sh_size;
642                                 lte->plt_data = elf_loaddata(scn, &shdr);
643                                 if (lte->plt_data == NULL)
644                                         fprintf(stderr,
645                                                 "Can't load .plt data\n");
646                                 lte->plt_flags = shdr.sh_flags;
647                         }
648 #ifdef ARCH_SUPPORTS_OPD
649                         else if (strcmp(name, ".opd") == 0) {
650                                 lte->opd_addr = (GElf_Addr *) (long) shdr.sh_addr;
651                                 lte->opd_size = shdr.sh_size;
652                                 lte->opd = elf_rawdata(scn, NULL);
653                         }
654 #endif
655                 }
656         }
657
658         if (lte->dynsym == NULL || lte->dynstr == NULL) {
659                 fprintf(stderr, "Couldn't find .dynsym or .dynstr in \"%s\"\n",
660                         filename);
661                 exit(EXIT_FAILURE);
662         }
663
664         if (!relplt_addr || !lte->plt_addr) {
665                 debug(1, "%s has no PLT relocations", filename);
666         } else if (relplt_size == 0) {
667                 debug(1, "%s has unknown PLT size", filename);
668         } else {
669                 for (i = 1; i < lte->ehdr.e_shnum; ++i) {
670                         Elf_Scn *scn;
671                         GElf_Shdr shdr;
672
673                         scn = elf_getscn(lte->elf, i);
674                         if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
675                                 fprintf(stderr, "Couldn't get section header"
676                                         " from \"%s\": %s\n",
677                                         filename, elf_errmsg(-1));
678                                 exit(EXIT_FAILURE);
679                         }
680                         if (shdr.sh_addr == relplt_addr
681                             && shdr.sh_size == relplt_size) {
682                                 if (elf_read_relocs(lte, scn, &shdr,
683                                                     &lte->plt_relocs) < 0) {
684                                         fprintf(stderr, "Couldn't get .rel*.plt"
685                                                 " data from \"%s\": %s\n",
686                                                 filename, elf_errmsg(-1));
687                                         exit(EXIT_FAILURE);
688                                 }
689                                 break;
690                         }
691                 }
692
693                 if (i == lte->ehdr.e_shnum) {
694                         fprintf(stderr,
695                                 "Couldn't find .rel*.plt section in \"%s\"\n",
696                                 filename);
697                         exit(EXIT_FAILURE);
698                 }
699         }
700         debug(1, "%s %zd PLT relocations", filename,
701               vect_size(&lte->plt_relocs));
702
703         if (soname_offset != 0)
704                 lte->soname = lte->dynstr + soname_offset;
705
706         return 0;
707 }
708
709 #ifndef ARCH_HAVE_GET_SYMINFO
710 int
711 arch_get_sym_info(struct ltelf *lte, const char *filename,
712                   size_t sym_index, GElf_Rela *rela, GElf_Sym *sym)
713 {
714         return gelf_getsym(lte->dynsym,
715                            ELF64_R_SYM(rela->r_info), sym) != NULL ? 0 : -1;
716 }
717 #endif
718
719 int
720 default_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
721                           const char *a_name, GElf_Rela *rela, size_t ndx,
722                           struct library_symbol **ret)
723 {
724         char *name = strdup(a_name);
725         if (name == NULL) {
726         fail_message:
727                 fprintf(stderr, "Couldn't create symbol for PLT entry: %s\n",
728                         strerror(errno));
729         fail:
730                 free(name);
731                 return -1;
732         }
733
734         GElf_Addr addr = arch_plt_sym_val(lte, ndx, rela);
735
736         struct library_symbol *libsym = malloc(sizeof(*libsym));
737         if (libsym == NULL)
738                 goto fail_message;
739
740         /* XXX The double cast should be removed when
741          * arch_addr_t becomes integral type.  */
742         arch_addr_t taddr = (arch_addr_t)
743                 (uintptr_t)(addr + lte->bias);
744
745         if (library_symbol_init(libsym, taddr, name, 1, LS_TOPLT_EXEC) < 0) {
746                 free(libsym);
747                 goto fail;
748         }
749
750         libsym->next = *ret;
751         *ret = libsym;
752         return 0;
753 }
754
755 int
756 elf_add_plt_entry(struct process *proc, struct ltelf *lte,
757                   const char *name, GElf_Rela *rela, size_t idx,
758                   struct library_symbol **ret)
759 {
760         enum plt_status plts
761                 = arch_elf_add_plt_entry(proc, lte, name, rela, idx, ret);
762
763         if (plts == PLT_DEFAULT)
764                 plts = os_elf_add_plt_entry(proc, lte, name, rela, idx, ret);
765
766         switch (plts) {
767         case PLT_DEFAULT:
768                 return default_elf_add_plt_entry(proc, lte, name,
769                                                  rela, idx, ret);
770         case PLT_FAIL:
771                 return -1;
772         case PLT_OK:
773                 return 0;
774         }
775
776         assert(! "Invalid return from X_elf_add_plt_entry!");
777         abort();
778 }
779
780 static void
781 mark_chain_latent(struct library_symbol *libsym)
782 {
783         for (; libsym != NULL; libsym = libsym->next) {
784                 debug(DEBUG_FUNCTION, "marking %s latent", libsym->name);
785                 libsym->latent = 1;
786         }
787 }
788
789 static void
790 filter_symbol_chain(struct filter *filter,
791                     struct library_symbol **libsymp, struct library *lib)
792 {
793         assert(libsymp != NULL);
794         struct library_symbol **ptr = libsymp;
795         while (*ptr != NULL) {
796                 if (filter_matches_symbol(filter, (*ptr)->name, lib)) {
797                         ptr = &(*ptr)->next;
798                 } else {
799                         struct library_symbol *sym = *ptr;
800                         *ptr = (*ptr)->next;
801                         library_symbol_destroy(sym);
802                         free(sym);
803                 }
804         }
805 }
806
807 static int
808 populate_plt(struct process *proc, const char *filename,
809              struct ltelf *lte, struct library *lib)
810 {
811         const bool latent_plts = options.export_filter != NULL;
812         const size_t count = vect_size(&lte->plt_relocs);
813
814         size_t i;
815         for (i = 0; i < count; ++i) {
816                 GElf_Rela *rela = VECT_ELEMENT(&lte->plt_relocs, GElf_Rela, i);
817                 GElf_Sym sym;
818
819                 switch (arch_get_sym_info(lte, filename, i, rela, &sym)) {
820                 default:
821                         fprintf(stderr,
822                                 "Couldn't get relocation for symbol #%zd"
823                                 " from \"%s\": %s\n",
824                                 i, filename, elf_errmsg(-1));
825                         /* Fall through.  */
826                 case 1:
827                         continue; /* Skip this entry.  */
828                 case 0:
829                         break;
830                 }
831
832                 char const *name = lte->dynstr + sym.st_name;
833                 int matched = filter_matches_symbol(options.plt_filter,
834                                                     name, lib);
835
836                 struct library_symbol *libsym = NULL;
837                 if (elf_add_plt_entry(proc, lte, name, rela, i, &libsym) < 0)
838                         return -1;
839
840                 /* If we didn't match the PLT entry, filter the chain
841                  * to only include the matching symbols (but include
842                  * all if we are adding latent symbols) to allow
843                  * backends to override the PLT symbol's name.  */
844
845                 if (! matched && ! latent_plts)
846                         filter_symbol_chain(options.plt_filter, &libsym, lib);
847
848                 if (libsym != NULL) {
849                         /* If we are adding those symbols just for
850                          * tracing exports, mark them all latent.  */
851                         if (! matched && latent_plts)
852                                 mark_chain_latent(libsym);
853                         library_add_symbol(lib, libsym);
854                 }
855         }
856         return 0;
857 }
858
859 static void
860 delete_symbol_chain(struct library_symbol *libsym)
861 {
862         while (libsym != NULL) {
863                 struct library_symbol *tmp = libsym->next;
864                 library_symbol_destroy(libsym);
865                 free(libsym);
866                 libsym = tmp;
867         }
868 }
869
870 /* When -x rules result in request to trace several aliases, we only
871  * want to add such symbol once.  The only way that those symbols
872  * differ in is their name, e.g. in glibc you have __GI___libc_free,
873  * __cfree, __free, __libc_free, cfree and free all defined on the
874  * same address.  So instead we keep this unique symbol struct for
875  * each address, and replace name in libsym with a shorter variant if
876  * we find it.  */
877 struct unique_symbol {
878         arch_addr_t addr;
879         struct library_symbol *libsym;
880 };
881
882 static int
883 unique_symbol_cmp(const void *key, const void *val)
884 {
885         const struct unique_symbol *sym_key = key;
886         const struct unique_symbol *sym_val = val;
887         return sym_key->addr != sym_val->addr;
888 }
889
890 static enum callback_status
891 symbol_with_address(struct library_symbol *sym, void *addrptr)
892 {
893         return sym->enter_addr == *(arch_addr_t *)addrptr
894                 ? CBS_STOP : CBS_CONT;
895 }
896
897 static int
898 populate_this_symtab(struct process *proc, const char *filename,
899                      struct ltelf *lte, struct library *lib,
900                      Elf_Data *symtab, const char *strtab, size_t count,
901                      struct library_exported_name **names)
902 {
903         /* If a valid NAMES is passed, we pass in *NAMES a list of
904          * symbol names that this library exports.  */
905         if (names != NULL)
906                 *names = NULL;
907
908         /* Using sorted array would be arguably better, but this
909          * should be well enough for the number of symbols that we
910          * typically deal with.  */
911         size_t num_symbols = 0;
912         struct unique_symbol *symbols = malloc(sizeof(*symbols) * count);
913         if (symbols == NULL) {
914                 fprintf(stderr, "couldn't insert symbols for -x: %s\n",
915                         strerror(errno));
916                 return -1;
917         }
918
919         GElf_Word secflags[lte->ehdr.e_shnum];
920         size_t i;
921         for (i = 1; i < lte->ehdr.e_shnum; ++i) {
922                 Elf_Scn *scn = elf_getscn(lte->elf, i);
923                 GElf_Shdr shdr;
924                 if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
925                         secflags[i] = 0;
926                 else
927                         secflags[i] = shdr.sh_flags;
928         }
929
930         for (i = 0; i < count; ++i) {
931                 GElf_Sym sym;
932                 if (gelf_getsym(symtab, i, &sym) == NULL) {
933                         fprintf(stderr,
934                                 "couldn't get symbol #%zd from %s: %s\n",
935                                 i, filename, elf_errmsg(-1));
936                         continue;
937                 }
938
939                 if (sym.st_value == 0 || sym.st_shndx == STN_UNDEF
940                     /* Also ignore any special values besides direct
941                      * section references.  */
942                     || sym.st_shndx >= lte->ehdr.e_shnum)
943                         continue;
944
945                 /* Find symbol name and snip version.  */
946                 const char *orig_name = strtab + sym.st_name;
947                 const char *version = strchr(orig_name, '@');
948                 size_t len = version != NULL ? (assert(version > orig_name),
949                                                 (size_t)(version - orig_name))
950                         : strlen(orig_name);
951                 char name[len + 1];
952                 memcpy(name, orig_name, len);
953                 name[len] = 0;
954
955                 /* If we are interested in exports, store this name.  */
956                 if (names != NULL) {
957                         struct library_exported_name *export
958                                 = malloc(sizeof *export);
959                         char *name_copy = strdup(name);
960
961                         if (name_copy == NULL || export == NULL) {
962                                 free(name_copy);
963                                 free(export);
964                                 fprintf(stderr, "Couldn't store symbol %s.  "
965                                         "Tracing may be incomplete.\n", name);
966                         } else {
967                                 export->name = name_copy;
968                                 export->own_name = 1;
969                                 export->next = *names;
970                                 *names = export;
971                         }
972                 }
973
974                 /* If the symbol is not matched, skip it.  We already
975                  * stored it to export list above.  */
976                 if (!filter_matches_symbol(options.static_filter, name, lib))
977                         continue;
978
979                 arch_addr_t addr = (arch_addr_t)
980                         (uintptr_t)(sym.st_value + lte->bias);
981                 arch_addr_t naddr;
982
983                 /* On arches that support OPD, the value of typical
984                  * function symbol will be a pointer to .opd, but some
985                  * will point directly to .text.  We don't want to
986                  * translate those.  */
987                 if (secflags[sym.st_shndx] & SHF_EXECINSTR) {
988                         naddr = addr;
989                 } else if (arch_translate_address(lte, addr, &naddr) < 0) {
990                         fprintf(stderr,
991                                 "couldn't translate address of %s@%s: %s\n",
992                                 name, lib->soname, strerror(errno));
993                         continue;
994                 }
995
996                 char *full_name = strdup(name);
997                 if (full_name == NULL) {
998                         fprintf(stderr, "couldn't copy name of %s@%s: %s\n",
999                                 name, lib->soname, strerror(errno));
1000                         continue;
1001                 }
1002
1003                 struct library_symbol *libsym = NULL;
1004                 enum plt_status plts
1005                         = arch_elf_add_func_entry(proc, lte, &sym,
1006                                                   naddr, full_name, &libsym);
1007                 if (plts == PLT_DEFAULT)
1008                         plts = os_elf_add_func_entry(proc, lte, &sym,
1009                                                      naddr, full_name, &libsym);
1010
1011                 switch (plts) {
1012                 case PLT_DEFAULT:;
1013                         /* Put the default symbol to the chain.  */
1014                         struct library_symbol *tmp = malloc(sizeof *tmp);
1015                         if (tmp == NULL
1016                             || library_symbol_init(tmp, naddr, full_name, 1,
1017                                                    LS_TOPLT_NONE) < 0) {
1018                                 free(tmp);
1019
1020                                 /* Either add the whole bunch, or none
1021                                  * of it.  Note that for PLT_FAIL we
1022                                  * don't do this--it's the callee's
1023                                  * job to clean up after itself before
1024                                  * it bails out.  */
1025                                 delete_symbol_chain(libsym);
1026                                 libsym = NULL;
1027
1028                 case PLT_FAIL:
1029                                 fprintf(stderr, "Couldn't add symbol %s@%s "
1030                                         "for tracing.\n", name, lib->soname);
1031
1032                                 break;
1033                         }
1034
1035                         full_name = NULL;
1036                         tmp->next = libsym;
1037                         libsym = tmp;
1038                         break;
1039
1040                 case PLT_OK:
1041                         break;
1042                 }
1043
1044                 free(full_name);
1045
1046                 struct library_symbol *tmp;
1047                 for (tmp = libsym; tmp != NULL; ) {
1048                         /* Look whether we already have a symbol for
1049                          * this address.  If not, add this one.  If
1050                          * yes, look if we should pick the new symbol
1051                          * name.  */
1052
1053                         struct unique_symbol key = { tmp->enter_addr, NULL };
1054                         struct unique_symbol *unique
1055                                 = lsearch(&key, symbols, &num_symbols,
1056                                           sizeof *symbols, &unique_symbol_cmp);
1057
1058                         if (unique->libsym == NULL) {
1059                                 unique->libsym = tmp;
1060                                 unique->addr = tmp->enter_addr;
1061                                 tmp = tmp->next;
1062                                 unique->libsym->next = NULL;
1063                         } else {
1064                                 if (strlen(tmp->name)
1065                                     < strlen(unique->libsym->name)) {
1066                                         library_symbol_set_name
1067                                                 (unique->libsym, tmp->name, 1);
1068                                         tmp->name = NULL;
1069                                 }
1070                                 struct library_symbol *next = tmp->next;
1071                                 library_symbol_destroy(tmp);
1072                                 free(tmp);
1073                                 tmp = next;
1074                         }
1075                 }
1076         }
1077
1078         /* Now we do the union of this set of unique symbols with
1079          * what's already in the library.  */
1080         for (i = 0; i < num_symbols; ++i) {
1081                 struct library_symbol *this_sym = symbols[i].libsym;
1082                 assert(this_sym != NULL);
1083                 struct library_symbol *other
1084                         = library_each_symbol(lib, NULL, symbol_with_address,
1085                                               &this_sym->enter_addr);
1086                 if (other != NULL) {
1087                         library_symbol_destroy(this_sym);
1088                         free(this_sym);
1089                         symbols[i].libsym = NULL;
1090                 }
1091         }
1092
1093         for (i = 0; i < num_symbols; ++i)
1094                 if (symbols[i].libsym != NULL)
1095                         library_add_symbol(lib, symbols[i].libsym);
1096
1097         free(symbols);
1098         return 0;
1099 }
1100
1101 static int
1102 populate_symtab(struct process *proc, const char *filename,
1103                 struct ltelf *lte, struct library *lib,
1104                 int symtabs, int exports)
1105 {
1106         int status;
1107         if (symtabs && lte->symtab != NULL && lte->strtab != NULL
1108             && (status = populate_this_symtab(proc, filename, lte, lib,
1109                                               lte->symtab, lte->strtab,
1110                                               lte->symtab_count, NULL)) < 0)
1111                 return status;
1112
1113         /* Check whether we want to trace symbols implemented by this
1114          * library (-l).  */
1115         struct library_exported_name **names = NULL;
1116         if (exports) {
1117                 debug(DEBUG_FUNCTION, "-l matches %s", lib->soname);
1118                 names = &lib->exported_names;
1119         }
1120
1121         return populate_this_symtab(proc, filename, lte, lib,
1122                                     lte->dynsym, lte->dynstr,
1123                                     lte->dynsym_count, names);
1124 }
1125
1126 static int
1127 read_module(struct library *lib, struct process *proc,
1128             const char *filename, GElf_Addr bias, int main)
1129 {
1130         struct ltelf lte;
1131         if (ltelf_init(&lte, filename) < 0)
1132                 return -1;
1133
1134         /* XXX When we abstract ABI into a module, this should instead
1135          * become something like
1136          *
1137          *    proc->abi = arch_get_abi(lte.ehdr);
1138          *
1139          * The code in ltelf_init needs to be replaced by this logic.
1140          * Be warned that libltrace.c calls ltelf_init as well to
1141          * determine whether ABI is supported.  This is to get
1142          * reasonable error messages when trying to run 64-bit binary
1143          * with 32-bit ltrace.  It is desirable to preserve this.  */
1144         proc->e_machine = lte.ehdr.e_machine;
1145         proc->e_class = lte.ehdr.e_ident[EI_CLASS];
1146         get_arch_dep(proc);
1147
1148         /* Find out the base address.  For PIE main binaries we look
1149          * into auxv, otherwise we scan phdrs.  */
1150         if (main && lte.ehdr.e_type == ET_DYN) {
1151                 arch_addr_t entry;
1152                 if (process_get_entry(proc, &entry, NULL) < 0) {
1153                         fprintf(stderr, "Couldn't find entry of PIE %s\n",
1154                                 filename);
1155                 fail:
1156                         ltelf_destroy(&lte);
1157                         return -1;
1158                 }
1159                 /* XXX The double cast should be removed when
1160                  * arch_addr_t becomes integral type.  */
1161                 lte.entry_addr = (GElf_Addr)(uintptr_t)entry;
1162                 lte.bias = (GElf_Addr)(uintptr_t)entry - lte.ehdr.e_entry;
1163
1164         } else {
1165                 GElf_Phdr phdr;
1166                 size_t i;
1167                 for (i = 0; gelf_getphdr (lte.elf, i, &phdr) != NULL; ++i) {
1168                         if (phdr.p_type == PT_LOAD) {
1169                                 lte.base_addr = phdr.p_vaddr + bias;
1170                                 break;
1171                         }
1172                 }
1173
1174                 lte.bias = bias;
1175                 lte.entry_addr = lte.ehdr.e_entry + lte.bias;
1176
1177                 if (lte.base_addr == 0) {
1178                         fprintf(stderr,
1179                                 "Couldn't determine base address of %s\n",
1180                                 filename);
1181                         goto fail;
1182                 }
1183         }
1184
1185         if (ltelf_read_elf(&lte, filename) < 0)
1186                 goto fail;
1187
1188         if (arch_elf_init(&lte, lib) < 0) {
1189                 fprintf(stderr, "Backend initialization failed.\n");
1190                 goto fail;
1191         }
1192
1193         if (lib == NULL)
1194                 goto fail;
1195
1196         /* Note that we set soname and pathname as soon as they are
1197          * allocated, so in case of further errors, this get released
1198          * when LIB is released, which should happen in the caller
1199          * when we return error.  */
1200
1201         if (lib->pathname == NULL) {
1202                 char *pathname = strdup(filename);
1203                 if (pathname == NULL)
1204                         goto fail;
1205                 library_set_pathname(lib, pathname, 1);
1206         }
1207
1208         if (lte.soname != NULL) {
1209                 char *soname = strdup(lte.soname);
1210                 if (soname == NULL)
1211                         goto fail;
1212                 library_set_soname(lib, soname, 1);
1213         } else {
1214                 const char *soname = rindex(lib->pathname, '/');
1215                 if (soname != NULL)
1216                         soname += 1;
1217                 else
1218                         soname = lib->pathname;
1219                 library_set_soname(lib, soname, 0);
1220         }
1221
1222         /* XXX The double cast should be removed when
1223          * arch_addr_t becomes integral type.  */
1224         arch_addr_t entry = (arch_addr_t)(uintptr_t)lte.entry_addr;
1225         if (arch_translate_address(&lte, entry, &entry) < 0)
1226                 goto fail;
1227
1228         /* XXX The double cast should be removed when
1229          * arch_addr_t becomes integral type.  */
1230         lib->base = (arch_addr_t)(uintptr_t)lte.base_addr;
1231         lib->entry = entry;
1232         /* XXX The double cast should be removed when
1233          * arch_addr_t becomes integral type.  */
1234         lib->dyn_addr = (arch_addr_t)(uintptr_t)lte.dyn_addr;
1235
1236         /* There are two reasons that we need to inspect symbol tables
1237          * or populate PLT entries.  Either the user requested
1238          * corresponding tracing features (respectively -x and -e), or
1239          * they requested tracing exported symbols (-l).
1240          *
1241          * In the latter case we need to keep even those PLT slots
1242          * that are not requested by -e (but we keep them latent).  We
1243          * also need to inspect .dynsym to find what exports this
1244          * library provide, to turn on existing latent PLT
1245          * entries.  */
1246
1247         int plts = filter_matches_library(options.plt_filter, lib);
1248         if ((plts || options.export_filter != NULL)
1249             && populate_plt(proc, filename, &lte, lib) < 0)
1250                 goto fail;
1251
1252         int exports = filter_matches_library(options.export_filter, lib);
1253         int symtabs = filter_matches_library(options.static_filter, lib);
1254         if ((symtabs || exports)
1255             && populate_symtab(proc, filename, &lte, lib,
1256                                symtabs, exports) < 0)
1257                 goto fail;
1258
1259         arch_elf_destroy(&lte);
1260         ltelf_destroy(&lte);
1261         return 0;
1262 }
1263
1264 int
1265 ltelf_read_library(struct library *lib, struct process *proc,
1266                    const char *filename, GElf_Addr bias)
1267 {
1268         return read_module(lib, proc, filename, bias, 0);
1269 }
1270
1271
1272 struct library *
1273 ltelf_read_main_binary(struct process *proc, const char *path)
1274 {
1275         struct library *lib = malloc(sizeof(*lib));
1276         if (lib == NULL || library_init(lib, LT_LIBTYPE_MAIN) < 0) {
1277                 free(lib);
1278                 return NULL;
1279         }
1280         library_set_pathname(lib, path, 0);
1281
1282         /* There is a race between running the process and reading its
1283          * binary for internal consumption.  So open the binary from
1284          * the /proc filesystem.  XXX Note that there is similar race
1285          * for libraries, but there we don't have a nice answer like
1286          * that.  Presumably we could read the DSOs from the process
1287          * memory image, but that's not currently done.  */
1288         char *fname = pid2name(proc->pid);
1289         if (fname == NULL
1290             || read_module(lib, proc, fname, 0, 1) < 0) {
1291                 library_destroy(lib);
1292                 free(lib);
1293                 lib = NULL;
1294         }
1295
1296         free(fname);
1297         return lib;
1298 }