Imported Upstream version 0.7.91
[platform/upstream/ltrace.git] / ltrace-elf.c
1 /*
2  * This file is part of ltrace.
3  * Copyright (C) 2006,2010,2011,2012,2013 Petr Machata, Red Hat Inc.
4  * Copyright (C) 2010 Zachary T Welch, CodeSourcery
5  * Copyright (C) 2010 Joe Damato
6  * Copyright (C) 1997,1998,2001,2004,2007,2008,2009 Juan Cespedes
7  * Copyright (C) 2006 Olaf Hering, SUSE Linux GmbH
8  * Copyright (C) 2006 Eric Vaitl, Cisco Systems, Inc.
9  * Copyright (C) 2006 Paul Gilliam, IBM Corporation
10  * Copyright (C) 2006 Ian Wienand
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public License as
14  * published by the Free Software Foundation; either version 2 of the
15  * License, or (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful, but
18  * WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25  * 02110-1301 USA
26  */
27
28 #include "config.h"
29
30 #include <assert.h>
31 #ifdef  __linux__
32 #include <endian.h>
33 #endif
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <gelf.h>
37 #include <inttypes.h>
38 #include <search.h>
39 #include <stdbool.h>
40 #include <stdint.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <strings.h>
45 #include <unistd.h>
46
47 #include "backend.h"
48 #include "filter.h"
49 #include "library.h"
50 #include "ltrace-elf.h"
51 #include "proc.h"
52 #include "debug.h"
53 #include "options.h"
54
55 #ifndef ARCH_HAVE_LTELF_DATA
56 int
57 arch_elf_init(struct ltelf *lte, struct library *lib)
58 {
59         return 0;
60 }
61
62 void
63 arch_elf_destroy(struct ltelf *lte)
64 {
65 }
66 #endif
67
68 #ifndef OS_HAVE_ADD_PLT_ENTRY
69 enum plt_status
70 os_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
71                      const char *a_name, GElf_Rela *rela, size_t ndx,
72                      struct library_symbol **ret)
73 {
74         return PLT_DEFAULT;
75 }
76 #endif
77
78 #ifndef ARCH_HAVE_ADD_PLT_ENTRY
79 enum plt_status
80 arch_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
81                        const char *a_name, GElf_Rela *rela, size_t ndx,
82                        struct library_symbol **ret)
83 {
84         return PLT_DEFAULT;
85 }
86 #endif
87
88 #ifndef OS_HAVE_ADD_FUNC_ENTRY
89 enum plt_status
90 os_elf_add_func_entry(struct process *proc, struct ltelf *lte,
91                       const GElf_Sym *sym,
92                       arch_addr_t addr, const char *name,
93                       struct library_symbol **ret)
94 {
95         if (GELF_ST_TYPE(sym->st_info) != STT_FUNC) {
96                 *ret = NULL;
97                 return PLT_OK;
98         } else {
99                 return PLT_DEFAULT;
100         }
101 }
102 #endif
103
104 #ifndef ARCH_HAVE_ADD_FUNC_ENTRY
105 enum plt_status
106 arch_elf_add_func_entry(struct process *proc, struct ltelf *lte,
107                         const GElf_Sym *sym,
108                         arch_addr_t addr, const char *name,
109                         struct library_symbol **ret)
110 {
111         return PLT_DEFAULT;
112 }
113 #endif
114
115 Elf_Data *
116 elf_loaddata(Elf_Scn *scn, GElf_Shdr *shdr)
117 {
118         Elf_Data *data = elf_getdata(scn, NULL);
119         if (data == NULL || elf_getdata(scn, data) != NULL
120             || data->d_off || data->d_size != shdr->sh_size)
121                 return NULL;
122         return data;
123 }
124
125 static int
126 elf_get_section_if(struct ltelf *lte, Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr,
127                    int (*predicate)(Elf_Scn *, GElf_Shdr *, void *data),
128                    void *data)
129 {
130         int i;
131         for (i = 1; i < lte->ehdr.e_shnum; ++i) {
132                 Elf_Scn *scn;
133                 GElf_Shdr shdr;
134
135                 scn = elf_getscn(lte->elf, i);
136                 if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
137                         debug(1, "Couldn't read section or header.");
138                         return -1;
139                 }
140                 if (predicate(scn, &shdr, data)) {
141                         *tgt_sec = scn;
142                         *tgt_shdr = shdr;
143                         return 0;
144                 }
145         }
146
147         *tgt_sec = NULL;
148         return 0;
149 }
150
151 static int
152 inside_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
153 {
154         GElf_Addr addr = *(GElf_Addr *)data;
155         return addr >= shdr->sh_addr
156                 && addr < shdr->sh_addr + shdr->sh_size;
157 }
158
159 int
160 elf_get_section_covering(struct ltelf *lte, GElf_Addr addr,
161                          Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
162 {
163         return elf_get_section_if(lte, tgt_sec, tgt_shdr,
164                                   &inside_p, &addr);
165 }
166
167 static int
168 type_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
169 {
170         GElf_Word type = *(GElf_Word *)data;
171         return shdr->sh_type == type;
172 }
173
174 int
175 elf_get_section_type(struct ltelf *lte, GElf_Word type,
176                      Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
177 {
178         return elf_get_section_if(lte, tgt_sec, tgt_shdr,
179                                   &type_p, &type);
180 }
181
182 struct section_named_data {
183         struct ltelf *lte;
184         const char *name;
185 };
186
187 static int
188 name_p(Elf_Scn *scn, GElf_Shdr *shdr, void *d)
189 {
190         struct section_named_data *data = d;
191         const char *name = elf_strptr(data->lte->elf,
192                                       data->lte->ehdr.e_shstrndx,
193                                       shdr->sh_name);
194         return strcmp(name, data->name) == 0;
195 }
196
197 int
198 elf_get_section_named(struct ltelf *lte, const char *name,
199                      Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
200 {
201         struct section_named_data data = {
202                 .lte = lte,
203                 .name = name,
204         };
205         return elf_get_section_if(lte, tgt_sec, tgt_shdr,
206                                   &name_p, &data);
207 }
208
209 static struct elf_each_symbol_t
210 each_symbol_in(Elf_Data *symtab, const char *strtab, size_t count,
211                unsigned i,
212                enum callback_status (*cb)(GElf_Sym *symbol,
213                                           const char *name, void *data),
214                void *data)
215 {
216         for (; i < count; ++i) {
217                 GElf_Sym sym;
218                 if (gelf_getsym(symtab, i, &sym) == NULL)
219                         return (struct elf_each_symbol_t){ i, -2 };
220
221                 switch (cb(&sym, strtab + sym.st_name, data)) {
222                 case CBS_FAIL:
223                         return (struct elf_each_symbol_t){ i, -1 };
224                 case CBS_STOP:
225                         return (struct elf_each_symbol_t){ i + 1, 0 };
226                 case CBS_CONT:
227                         break;
228                 }
229         }
230
231         return (struct elf_each_symbol_t){ 0, 0 };
232 }
233
234 /* N.B.: gelf_getsym takes integer argument.  Since negative values
235  * are invalid as indices, we can use the extra bit to encode which
236  * symbol table we are looking into.  ltrace currently doesn't handle
237  * more than two symbol tables anyway, nor does it handle the xindex
238  * stuff.  */
239 struct elf_each_symbol_t
240 elf_each_symbol(struct ltelf *lte, unsigned start_after,
241                 enum callback_status (*cb)(GElf_Sym *symbol,
242                                            const char *name, void *data),
243                 void *data)
244 {
245         unsigned index = start_after == 0 ? 0 : start_after >> 1;
246
247         /* Go through static symbol table first.  */
248         if ((start_after & 0x1) == 0) {
249                 struct elf_each_symbol_t st
250                         = each_symbol_in(lte->symtab, lte->strtab,
251                                          lte->symtab_count, index, cb, data);
252
253                 /* If the iteration stopped prematurely, bail out.  */
254                 if (st.restart != 0)
255                         return ((struct elf_each_symbol_t)
256                                 { st.restart << 1, st.status });
257         }
258
259         struct elf_each_symbol_t st
260                 = each_symbol_in(lte->dynsym, lte->dynstr, lte->dynsym_count,
261                                  index, cb, data);
262         if (st.restart != 0)
263                 return ((struct elf_each_symbol_t)
264                         { st.restart << 1 | 0x1, st.status });
265
266         return (struct elf_each_symbol_t){ 0, 0 };
267 }
268
269 int
270 elf_can_read_next(Elf_Data *data, GElf_Xword offset, GElf_Xword size)
271 {
272         assert(data != NULL);
273         if (data->d_size < size || offset > data->d_size - size) {
274                 debug(1, "Not enough data to read %"PRId64"-byte value"
275                       " at offset %"PRId64".", size, offset);
276                 return 0;
277         }
278         return 1;
279 }
280
281 #define DEF_READER(NAME, SIZE)                                          \
282         int                                                             \
283         NAME(Elf_Data *data, GElf_Xword offset, uint##SIZE##_t *retp)   \
284         {                                                               \
285                 if (!elf_can_read_next(data, offset, SIZE / 8))         \
286                         return -1;                                      \
287                                                                         \
288                 if (data->d_buf == NULL) /* NODATA section */ {         \
289                         *retp = 0;                                      \
290                         return 0;                                       \
291                 }                                                       \
292                                                                         \
293                 union {                                                 \
294                         uint##SIZE##_t dst;                             \
295                         char buf[0];                                    \
296                 } u;                                                    \
297                 memcpy(u.buf, data->d_buf + offset, sizeof(u.dst));     \
298                 *retp = u.dst;                                          \
299                 return 0;                                               \
300         }
301
302 DEF_READER(elf_read_u8, 8)
303 DEF_READER(elf_read_u16, 16)
304 DEF_READER(elf_read_u32, 32)
305 DEF_READER(elf_read_u64, 64)
306
307 #undef DEF_READER
308
309 #define DEF_READER(NAME, SIZE)                                          \
310         int                                                             \
311         NAME(Elf_Data *data, GElf_Xword *offset, uint##SIZE##_t *retp)  \
312         {                                                               \
313                 int rc = elf_read_u##SIZE(data, *offset, retp);         \
314                 if (rc < 0)                                             \
315                         return rc;                                      \
316                 *offset += SIZE / 8;                                    \
317                 return 0;                                               \
318         }
319
320 DEF_READER(elf_read_next_u8, 8)
321 DEF_READER(elf_read_next_u16, 16)
322 DEF_READER(elf_read_next_u32, 32)
323 DEF_READER(elf_read_next_u64, 64)
324
325 #undef DEF_READER
326
327 int
328 elf_read_next_uleb128(Elf_Data *data, GElf_Xword *offset, uint64_t *retp)
329 {
330         uint64_t result = 0;
331         int shift = 0;
332         int size = 8 * sizeof result;
333
334         while (1) {
335                 uint8_t byte;
336                 if (elf_read_next_u8(data, offset, &byte) < 0)
337                         return -1;
338
339                 uint8_t payload = byte & 0x7f;
340                 result |= (uint64_t)payload << shift;
341                 shift += 7;
342                 if (shift > size && byte != 0x1)
343                         return -1;
344                 if ((byte & 0x80) == 0)
345                         break;
346         }
347
348         if (retp != NULL)
349                 *retp = result;
350         return 0;
351 }
352
353 int
354 elf_read_uleb128(Elf_Data *data, GElf_Xword offset, uint64_t *retp)
355 {
356         return elf_read_next_uleb128(data, &offset, retp);
357 }
358
359 int
360 ltelf_init(struct ltelf *lte, const char *filename)
361 {
362         memset(lte, 0, sizeof *lte);
363         lte->fd = open(filename, O_RDONLY);
364         if (lte->fd == -1)
365                 return 1;
366
367         elf_version(EV_CURRENT);
368
369 #ifdef HAVE_ELF_C_READ_MMAP
370         lte->elf = elf_begin(lte->fd, ELF_C_READ_MMAP, NULL);
371 #else
372         lte->elf = elf_begin(lte->fd, ELF_C_READ, NULL);
373 #endif
374
375         if (lte->elf == NULL || elf_kind(lte->elf) != ELF_K_ELF) {
376                 fprintf(stderr, "\"%s\" is not an ELF file\n", filename);
377                 exit(EXIT_FAILURE);
378         }
379
380         if (gelf_getehdr(lte->elf, &lte->ehdr) == NULL) {
381                 fprintf(stderr, "can't read ELF header of \"%s\": %s\n",
382                         filename, elf_errmsg(-1));
383                 exit(EXIT_FAILURE);
384         }
385
386         if (lte->ehdr.e_type != ET_EXEC && lte->ehdr.e_type != ET_DYN) {
387                 fprintf(stderr, "\"%s\" is neither an ELF executable"
388                         " nor a shared library\n", filename);
389                 exit(EXIT_FAILURE);
390         }
391
392         if (1
393 #ifdef LT_ELF_MACHINE
394             && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS
395                 || lte->ehdr.e_machine != LT_ELF_MACHINE)
396 #endif
397 #ifdef LT_ELF_MACHINE2
398             && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS2
399                 || lte->ehdr.e_machine != LT_ELF_MACHINE2)
400 #endif
401 #ifdef LT_ELF_MACHINE3
402             && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS3
403                 || lte->ehdr.e_machine != LT_ELF_MACHINE3)
404 #endif
405                 ) {
406                 fprintf(stderr,
407                         "\"%s\" is ELF from incompatible architecture\n",
408                         filename);
409                 exit(EXIT_FAILURE);
410         }
411
412         VECT_INIT(&lte->plt_relocs, GElf_Rela);
413
414         return 0;
415 }
416
417 void
418 ltelf_destroy(struct ltelf *lte)
419 {
420         debug(DEBUG_FUNCTION, "close_elf()");
421         elf_end(lte->elf);
422         close(lte->fd);
423         VECT_DESTROY(&lte->plt_relocs, GElf_Rela, NULL, NULL);
424 }
425
426 static void
427 read_symbol_table(struct ltelf *lte, const char *filename,
428                   Elf_Scn *scn, GElf_Shdr *shdr, const char *name,
429                   Elf_Data **datap, size_t *countp, const char **strsp)
430 {
431         *datap = elf_getdata(scn, NULL);
432         *countp = shdr->sh_size / shdr->sh_entsize;
433         if ((*datap == NULL || elf_getdata(scn, *datap) != NULL)
434             && options.static_filter != NULL) {
435                 fprintf(stderr, "Couldn't get data of section"
436                         " %s from \"%s\": %s\n",
437                         name, filename, elf_errmsg(-1));
438                 exit(EXIT_FAILURE);
439         }
440
441         scn = elf_getscn(lte->elf, shdr->sh_link);
442         GElf_Shdr shdr2;
443         if (scn == NULL || gelf_getshdr(scn, &shdr2) == NULL) {
444                 fprintf(stderr, "Couldn't get header of section"
445                         " #%d from \"%s\": %s\n",
446                         shdr->sh_link, filename, elf_errmsg(-1));
447                 exit(EXIT_FAILURE);
448         }
449
450         Elf_Data *data = elf_getdata(scn, NULL);
451         if (data == NULL || elf_getdata(scn, data) != NULL
452             || shdr2.sh_size != data->d_size || data->d_off) {
453                 fprintf(stderr, "Couldn't get data of section"
454                         " #%d from \"%s\": %s\n",
455                         shdr2.sh_link, filename, elf_errmsg(-1));
456                 exit(EXIT_FAILURE);
457         }
458
459         *strsp = data->d_buf;
460 }
461
462 static int
463 rel_to_rela(struct ltelf *lte, const GElf_Rel *rel, GElf_Rela *rela)
464 {
465         rela->r_offset = rel->r_offset;
466         rela->r_info = rel->r_info;
467
468         Elf_Scn *sec;
469         GElf_Shdr shdr;
470         if (elf_get_section_covering(lte, rel->r_offset, &sec, &shdr) < 0
471             || sec == NULL)
472                 return -1;
473
474         Elf_Data *data = elf_loaddata(sec, &shdr);
475         if (data == NULL)
476                 return -1;
477
478         GElf_Xword offset = rel->r_offset - shdr.sh_addr - data->d_off;
479         uint64_t value;
480         if (lte->ehdr.e_ident[EI_CLASS] == ELFCLASS32) {
481                 uint32_t tmp;
482                 if (elf_read_u32(data, offset, &tmp) < 0)
483                         return -1;
484                 value = tmp;
485         } else if (elf_read_u64(data, offset, &value) < 0) {
486                 return -1;
487         }
488
489         rela->r_addend = value;
490         return 0;
491 }
492
493 int
494 elf_read_relocs(struct ltelf *lte, Elf_Scn *scn, GElf_Shdr *shdr,
495                 struct vect *rela_vec)
496 {
497         if (vect_reserve_additional(rela_vec, lte->ehdr.e_shnum) < 0)
498                 return -1;
499
500         Elf_Data *relplt = elf_loaddata(scn, shdr);
501         if (relplt == NULL) {
502                 fprintf(stderr, "Couldn't load .rel*.plt data.\n");
503                 return -1;
504         }
505
506         if ((shdr->sh_size % shdr->sh_entsize) != 0) {
507                 fprintf(stderr, ".rel*.plt size (%" PRIx64 "d) not a multiple "
508                         "of its sh_entsize (%" PRIx64 "d).\n",
509                         shdr->sh_size, shdr->sh_entsize);
510                 return -1;
511         }
512
513         GElf_Xword relplt_count = shdr->sh_size / shdr->sh_entsize;
514         GElf_Xword i;
515         for (i = 0; i < relplt_count; ++i) {
516                 GElf_Rela rela;
517                 if (relplt->d_type == ELF_T_REL) {
518                         GElf_Rel rel;
519                         if (gelf_getrel(relplt, i, &rel) == NULL
520                             || rel_to_rela(lte, &rel, &rela) < 0)
521                                 return -1;
522
523                 } else if (gelf_getrela(relplt, i, &rela) == NULL) {
524                         return -1;
525                 }
526
527                 if (VECT_PUSHBACK(rela_vec, &rela) < 0)
528                         return -1;
529         }
530
531         return 0;
532 }
533
534 static int
535 ltelf_read_elf(struct ltelf *lte, const char *filename)
536 {
537         int i;
538         GElf_Addr relplt_addr = 0;
539         GElf_Addr soname_offset = 0;
540         GElf_Xword relplt_size = 0;
541
542         debug(DEBUG_FUNCTION, "ltelf_read_elf(filename=%s)", filename);
543         debug(1, "Reading ELF from %s...", filename);
544
545         for (i = 1; i < lte->ehdr.e_shnum; ++i) {
546                 Elf_Scn *scn;
547                 GElf_Shdr shdr;
548                 const char *name;
549
550                 scn = elf_getscn(lte->elf, i);
551                 if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
552                         fprintf(stderr, "Couldn't get section #%d from"
553                                 " \"%s\": %s\n", i, filename, elf_errmsg(-1));
554                         exit(EXIT_FAILURE);
555                 }
556
557                 name = elf_strptr(lte->elf, lte->ehdr.e_shstrndx, shdr.sh_name);
558                 if (name == NULL) {
559                         fprintf(stderr, "Couldn't get name of section #%d from"
560                                 " \"%s\": %s\n", i, filename, elf_errmsg(-1));
561                         exit(EXIT_FAILURE);
562                 }
563
564                 if (shdr.sh_type == SHT_SYMTAB) {
565                         read_symbol_table(lte, filename,
566                                           scn, &shdr, name, &lte->symtab,
567                                           &lte->symtab_count, &lte->strtab);
568
569                 } else if (shdr.sh_type == SHT_DYNSYM) {
570                         read_symbol_table(lte, filename,
571                                           scn, &shdr, name, &lte->dynsym,
572                                           &lte->dynsym_count, &lte->dynstr);
573
574                 } else if (shdr.sh_type == SHT_DYNAMIC) {
575                         Elf_Data *data;
576                         size_t j;
577
578                         lte->dyn_addr = shdr.sh_addr + lte->bias;
579                         lte->dyn_sz = shdr.sh_size;
580
581                         data = elf_getdata(scn, NULL);
582                         if (data == NULL || elf_getdata(scn, data) != NULL) {
583                                 fprintf(stderr, "Couldn't get .dynamic data"
584                                         " from \"%s\": %s\n",
585                                         filename, strerror(errno));
586                                 exit(EXIT_FAILURE);
587                         }
588
589                         for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
590                                 GElf_Dyn dyn;
591
592                                 if (gelf_getdyn(data, j, &dyn) == NULL) {
593                                         fprintf(stderr, "Couldn't get .dynamic"
594                                                 " data from \"%s\": %s\n",
595                                                 filename, strerror(errno));
596                                         exit(EXIT_FAILURE);
597                                 }
598                                 if (dyn.d_tag == DT_JMPREL)
599                                         relplt_addr = dyn.d_un.d_ptr;
600                                 else if (dyn.d_tag == DT_PLTRELSZ)
601                                         relplt_size = dyn.d_un.d_val;
602                                 else if (dyn.d_tag == DT_SONAME)
603                                         soname_offset = dyn.d_un.d_val;
604                         }
605                 } else if (shdr.sh_type == SHT_PROGBITS
606                            || shdr.sh_type == SHT_NOBITS) {
607                         if (strcmp(name, ".plt") == 0) {
608                                 lte->plt_addr = shdr.sh_addr;
609                                 lte->plt_size = shdr.sh_size;
610                                 lte->plt_data = elf_loaddata(scn, &shdr);
611                                 if (lte->plt_data == NULL)
612                                         fprintf(stderr,
613                                                 "Can't load .plt data\n");
614                                 lte->plt_flags = shdr.sh_flags;
615                         }
616 #ifdef ARCH_SUPPORTS_OPD
617                         else if (strcmp(name, ".opd") == 0) {
618                                 lte->opd_addr = (GElf_Addr *) (long) shdr.sh_addr;
619                                 lte->opd_size = shdr.sh_size;
620                                 lte->opd = elf_rawdata(scn, NULL);
621                         }
622 #endif
623                 }
624         }
625
626         if (lte->dynsym == NULL || lte->dynstr == NULL) {
627                 fprintf(stderr, "Couldn't find .dynsym or .dynstr in \"%s\"\n",
628                         filename);
629                 exit(EXIT_FAILURE);
630         }
631
632         if (!relplt_addr || !lte->plt_addr) {
633                 debug(1, "%s has no PLT relocations", filename);
634         } else if (relplt_size == 0) {
635                 debug(1, "%s has unknown PLT size", filename);
636         } else {
637                 for (i = 1; i < lte->ehdr.e_shnum; ++i) {
638                         Elf_Scn *scn;
639                         GElf_Shdr shdr;
640
641                         scn = elf_getscn(lte->elf, i);
642                         if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
643                                 fprintf(stderr, "Couldn't get section header"
644                                         " from \"%s\": %s\n",
645                                         filename, elf_errmsg(-1));
646                                 exit(EXIT_FAILURE);
647                         }
648                         if (shdr.sh_addr == relplt_addr
649                             && shdr.sh_size == relplt_size) {
650                                 if (elf_read_relocs(lte, scn, &shdr,
651                                                     &lte->plt_relocs) < 0) {
652                                         fprintf(stderr, "Couldn't get .rel*.plt"
653                                                 " data from \"%s\": %s\n",
654                                                 filename, elf_errmsg(-1));
655                                         exit(EXIT_FAILURE);
656                                 }
657                                 break;
658                         }
659                 }
660
661                 if (i == lte->ehdr.e_shnum) {
662                         fprintf(stderr,
663                                 "Couldn't find .rel*.plt section in \"%s\"\n",
664                                 filename);
665                         exit(EXIT_FAILURE);
666                 }
667         }
668         debug(1, "%s %zd PLT relocations", filename,
669               vect_size(&lte->plt_relocs));
670
671         if (soname_offset != 0)
672                 lte->soname = lte->dynstr + soname_offset;
673
674         return 0;
675 }
676
677 #ifndef ARCH_HAVE_GET_SYMINFO
678 int
679 arch_get_sym_info(struct ltelf *lte, const char *filename,
680                   size_t sym_index, GElf_Rela *rela, GElf_Sym *sym)
681 {
682         return gelf_getsym(lte->dynsym,
683                            ELF64_R_SYM(rela->r_info), sym) != NULL ? 0 : -1;
684 }
685 #endif
686
687 int
688 default_elf_add_plt_entry(struct process *proc, struct ltelf *lte,
689                           const char *a_name, GElf_Rela *rela, size_t ndx,
690                           struct library_symbol **ret)
691 {
692         char *name = strdup(a_name);
693         if (name == NULL) {
694         fail_message:
695                 fprintf(stderr, "Couldn't create symbol for PLT entry: %s\n",
696                         strerror(errno));
697         fail:
698                 free(name);
699                 return -1;
700         }
701
702         GElf_Addr addr = arch_plt_sym_val(lte, ndx, rela);
703
704         struct library_symbol *libsym = malloc(sizeof(*libsym));
705         if (libsym == NULL)
706                 goto fail_message;
707
708         /* XXX The double cast should be removed when
709          * arch_addr_t becomes integral type.  */
710         arch_addr_t taddr = (arch_addr_t)
711                 (uintptr_t)(addr + lte->bias);
712
713         if (library_symbol_init(libsym, taddr, name, 1, LS_TOPLT_EXEC) < 0) {
714                 free(libsym);
715                 goto fail;
716         }
717
718         libsym->next = *ret;
719         *ret = libsym;
720         return 0;
721 }
722
723 int
724 elf_add_plt_entry(struct process *proc, struct ltelf *lte,
725                   const char *name, GElf_Rela *rela, size_t idx,
726                   struct library_symbol **ret)
727 {
728         enum plt_status plts
729                 = arch_elf_add_plt_entry(proc, lte, name, rela, idx, ret);
730
731         if (plts == PLT_DEFAULT)
732                 plts = os_elf_add_plt_entry(proc, lte, name, rela, idx, ret);
733
734         switch (plts) {
735         case PLT_DEFAULT:
736                 return default_elf_add_plt_entry(proc, lte, name,
737                                                  rela, idx, ret);
738         case PLT_FAIL:
739                 return -1;
740         case PLT_OK:
741                 return 0;
742         }
743
744         assert(! "Invalid return from X_elf_add_plt_entry!");
745         abort();
746 }
747
748 static void
749 mark_chain_latent(struct library_symbol *libsym)
750 {
751         for (; libsym != NULL; libsym = libsym->next) {
752                 debug(DEBUG_FUNCTION, "marking %s latent", libsym->name);
753                 libsym->latent = 1;
754         }
755 }
756
757 static void
758 filter_symbol_chain(struct filter *filter,
759                     struct library_symbol **libsymp, struct library *lib)
760 {
761         assert(libsymp != NULL);
762         struct library_symbol **ptr = libsymp;
763         while (*ptr != NULL) {
764                 if (filter_matches_symbol(filter, (*ptr)->name, lib)) {
765                         ptr = &(*ptr)->next;
766                 } else {
767                         struct library_symbol *sym = *ptr;
768                         *ptr = (*ptr)->next;
769                         library_symbol_destroy(sym);
770                         free(sym);
771                 }
772         }
773 }
774
775 static int
776 populate_plt(struct process *proc, const char *filename,
777              struct ltelf *lte, struct library *lib)
778 {
779         const bool latent_plts = options.export_filter != NULL;
780         const size_t count = vect_size(&lte->plt_relocs);
781
782         size_t i;
783         for (i = 0; i < count; ++i) {
784                 GElf_Rela *rela = VECT_ELEMENT(&lte->plt_relocs, GElf_Rela, i);
785                 GElf_Sym sym;
786
787                 switch (arch_get_sym_info(lte, filename, i, rela, &sym)) {
788                 default:
789                         fprintf(stderr,
790                                 "Couldn't get relocation for symbol #%zd"
791                                 " from \"%s\": %s\n",
792                                 i, filename, elf_errmsg(-1));
793                         /* Fall through.  */
794                 case 1:
795                         continue; /* Skip this entry.  */
796                 case 0:
797                         break;
798                 }
799
800                 char const *name = lte->dynstr + sym.st_name;
801                 int matched = filter_matches_symbol(options.plt_filter,
802                                                     name, lib);
803
804                 struct library_symbol *libsym = NULL;
805                 if (elf_add_plt_entry(proc, lte, name, rela, i, &libsym) < 0)
806                         return -1;
807
808                 /* If we didn't match the PLT entry, filter the chain
809                  * to only include the matching symbols (but include
810                  * all if we are adding latent symbols) to allow
811                  * backends to override the PLT symbol's name.  */
812
813                 if (! matched && ! latent_plts)
814                         filter_symbol_chain(options.plt_filter, &libsym, lib);
815
816                 if (libsym != NULL) {
817                         /* If we are adding those symbols just for
818                          * tracing exports, mark them all latent.  */
819                         if (! matched && latent_plts)
820                                 mark_chain_latent(libsym);
821                         library_add_symbol(lib, libsym);
822                 }
823         }
824         return 0;
825 }
826
827 static void
828 delete_symbol_chain(struct library_symbol *libsym)
829 {
830         while (libsym != NULL) {
831                 struct library_symbol *tmp = libsym->next;
832                 library_symbol_destroy(libsym);
833                 free(libsym);
834                 libsym = tmp;
835         }
836 }
837
838 /* When -x rules result in request to trace several aliases, we only
839  * want to add such symbol once.  The only way that those symbols
840  * differ in is their name, e.g. in glibc you have __GI___libc_free,
841  * __cfree, __free, __libc_free, cfree and free all defined on the
842  * same address.  So instead we keep this unique symbol struct for
843  * each address, and replace name in libsym with a shorter variant if
844  * we find it.  */
845 struct unique_symbol {
846         arch_addr_t addr;
847         struct library_symbol *libsym;
848 };
849
850 static int
851 unique_symbol_cmp(const void *key, const void *val)
852 {
853         const struct unique_symbol *sym_key = key;
854         const struct unique_symbol *sym_val = val;
855         return sym_key->addr != sym_val->addr;
856 }
857
858 static enum callback_status
859 symbol_with_address(struct library_symbol *sym, void *addrptr)
860 {
861         return sym->enter_addr == *(arch_addr_t *)addrptr
862                 ? CBS_STOP : CBS_CONT;
863 }
864
865 static int
866 populate_this_symtab(struct process *proc, const char *filename,
867                      struct ltelf *lte, struct library *lib,
868                      Elf_Data *symtab, const char *strtab, size_t count,
869                      struct library_exported_name **names)
870 {
871         /* If a valid NAMES is passed, we pass in *NAMES a list of
872          * symbol names that this library exports.  */
873         if (names != NULL)
874                 *names = NULL;
875
876         /* Using sorted array would be arguably better, but this
877          * should be well enough for the number of symbols that we
878          * typically deal with.  */
879         size_t num_symbols = 0;
880         struct unique_symbol *symbols = malloc(sizeof(*symbols) * count);
881         if (symbols == NULL) {
882                 fprintf(stderr, "couldn't insert symbols for -x: %s\n",
883                         strerror(errno));
884                 return -1;
885         }
886
887         GElf_Word secflags[lte->ehdr.e_shnum];
888         size_t i;
889         for (i = 1; i < lte->ehdr.e_shnum; ++i) {
890                 Elf_Scn *scn = elf_getscn(lte->elf, i);
891                 GElf_Shdr shdr;
892                 if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
893                         secflags[i] = 0;
894                 else
895                         secflags[i] = shdr.sh_flags;
896         }
897
898         for (i = 0; i < count; ++i) {
899                 GElf_Sym sym;
900                 if (gelf_getsym(symtab, i, &sym) == NULL) {
901                         fprintf(stderr,
902                                 "couldn't get symbol #%zd from %s: %s\n",
903                                 i, filename, elf_errmsg(-1));
904                         continue;
905                 }
906
907                 if (sym.st_value == 0 || sym.st_shndx == STN_UNDEF
908                     /* Also ignore any special values besides direct
909                      * section references.  */
910                     || sym.st_shndx >= lte->ehdr.e_shnum)
911                         continue;
912
913                 /* Find symbol name and snip version.  */
914                 const char *orig_name = strtab + sym.st_name;
915                 const char *version = strchr(orig_name, '@');
916                 size_t len = version != NULL ? (assert(version > orig_name),
917                                                 (size_t)(version - orig_name))
918                         : strlen(orig_name);
919                 char name[len + 1];
920                 memcpy(name, orig_name, len);
921                 name[len] = 0;
922
923                 /* If we are interested in exports, store this name.  */
924                 if (names != NULL) {
925                         struct library_exported_name *export
926                                 = malloc(sizeof *export);
927                         char *name_copy = strdup(name);
928
929                         if (name_copy == NULL || export == NULL) {
930                                 free(name_copy);
931                                 free(export);
932                                 fprintf(stderr, "Couldn't store symbol %s.  "
933                                         "Tracing may be incomplete.\n", name);
934                         } else {
935                                 export->name = name_copy;
936                                 export->own_name = 1;
937                                 export->next = *names;
938                                 *names = export;
939                         }
940                 }
941
942                 /* If the symbol is not matched, skip it.  We already
943                  * stored it to export list above.  */
944                 if (!filter_matches_symbol(options.static_filter, name, lib))
945                         continue;
946
947                 arch_addr_t addr = (arch_addr_t)
948                         (uintptr_t)(sym.st_value + lte->bias);
949                 arch_addr_t naddr;
950
951                 /* On arches that support OPD, the value of typical
952                  * function symbol will be a pointer to .opd, but some
953                  * will point directly to .text.  We don't want to
954                  * translate those.  */
955                 if (secflags[sym.st_shndx] & SHF_EXECINSTR) {
956                         naddr = addr;
957                 } else if (arch_translate_address(lte, addr, &naddr) < 0) {
958                         fprintf(stderr,
959                                 "couldn't translate address of %s@%s: %s\n",
960                                 name, lib->soname, strerror(errno));
961                         continue;
962                 }
963
964                 char *full_name = strdup(name);
965                 if (full_name == NULL) {
966                         fprintf(stderr, "couldn't copy name of %s@%s: %s\n",
967                                 name, lib->soname, strerror(errno));
968                         continue;
969                 }
970
971                 struct library_symbol *libsym = NULL;
972                 enum plt_status plts
973                         = arch_elf_add_func_entry(proc, lte, &sym,
974                                                   naddr, full_name, &libsym);
975                 if (plts == PLT_DEFAULT)
976                         plts = os_elf_add_func_entry(proc, lte, &sym,
977                                                      naddr, full_name, &libsym);
978
979                 switch (plts) {
980                 case PLT_DEFAULT:;
981                         /* Put the default symbol to the chain.  */
982                         struct library_symbol *tmp = malloc(sizeof *tmp);
983                         if (tmp == NULL
984                             || library_symbol_init(tmp, naddr, full_name, 1,
985                                                    LS_TOPLT_NONE) < 0) {
986                                 free(tmp);
987
988                                 /* Either add the whole bunch, or none
989                                  * of it.  Note that for PLT_FAIL we
990                                  * don't do this--it's the callee's
991                                  * job to clean up after itself before
992                                  * it bails out.  */
993                                 delete_symbol_chain(libsym);
994                                 libsym = NULL;
995
996                 case PLT_FAIL:
997                                 fprintf(stderr, "Couldn't add symbol %s@%s "
998                                         "for tracing.\n", name, lib->soname);
999
1000                                 break;
1001                         }
1002
1003                         full_name = NULL;
1004                         tmp->next = libsym;
1005                         libsym = tmp;
1006                         break;
1007
1008                 case PLT_OK:
1009                         break;
1010                 }
1011
1012                 free(full_name);
1013
1014                 struct library_symbol *tmp;
1015                 for (tmp = libsym; tmp != NULL; ) {
1016                         /* Look whether we already have a symbol for
1017                          * this address.  If not, add this one.  If
1018                          * yes, look if we should pick the new symbol
1019                          * name.  */
1020
1021                         struct unique_symbol key = { tmp->enter_addr, NULL };
1022                         struct unique_symbol *unique
1023                                 = lsearch(&key, symbols, &num_symbols,
1024                                           sizeof *symbols, &unique_symbol_cmp);
1025
1026                         if (unique->libsym == NULL) {
1027                                 unique->libsym = tmp;
1028                                 unique->addr = tmp->enter_addr;
1029                                 tmp = tmp->next;
1030                                 unique->libsym->next = NULL;
1031                         } else {
1032                                 if (strlen(tmp->name)
1033                                     < strlen(unique->libsym->name)) {
1034                                         library_symbol_set_name
1035                                                 (unique->libsym, tmp->name, 1);
1036                                         tmp->name = NULL;
1037                                 }
1038                                 struct library_symbol *next = tmp->next;
1039                                 library_symbol_destroy(tmp);
1040                                 free(tmp);
1041                                 tmp = next;
1042                         }
1043                 }
1044         }
1045
1046         /* Now we do the union of this set of unique symbols with
1047          * what's already in the library.  */
1048         for (i = 0; i < num_symbols; ++i) {
1049                 struct library_symbol *this_sym = symbols[i].libsym;
1050                 assert(this_sym != NULL);
1051                 struct library_symbol *other
1052                         = library_each_symbol(lib, NULL, symbol_with_address,
1053                                               &this_sym->enter_addr);
1054                 if (other != NULL) {
1055                         library_symbol_destroy(this_sym);
1056                         free(this_sym);
1057                         symbols[i].libsym = NULL;
1058                 }
1059         }
1060
1061         for (i = 0; i < num_symbols; ++i)
1062                 if (symbols[i].libsym != NULL)
1063                         library_add_symbol(lib, symbols[i].libsym);
1064
1065         free(symbols);
1066         return 0;
1067 }
1068
1069 static int
1070 populate_symtab(struct process *proc, const char *filename,
1071                 struct ltelf *lte, struct library *lib,
1072                 int symtabs, int exports)
1073 {
1074         int status;
1075         if (symtabs && lte->symtab != NULL && lte->strtab != NULL
1076             && (status = populate_this_symtab(proc, filename, lte, lib,
1077                                               lte->symtab, lte->strtab,
1078                                               lte->symtab_count, NULL)) < 0)
1079                 return status;
1080
1081         /* Check whether we want to trace symbols implemented by this
1082          * library (-l).  */
1083         struct library_exported_name **names = NULL;
1084         if (exports) {
1085                 debug(DEBUG_FUNCTION, "-l matches %s", lib->soname);
1086                 names = &lib->exported_names;
1087         }
1088
1089         return populate_this_symtab(proc, filename, lte, lib,
1090                                     lte->dynsym, lte->dynstr,
1091                                     lte->dynsym_count, names);
1092 }
1093
1094 static int
1095 read_module(struct library *lib, struct process *proc,
1096             const char *filename, GElf_Addr bias, int main)
1097 {
1098         struct ltelf lte;
1099         if (ltelf_init(&lte, filename) < 0)
1100                 return -1;
1101
1102         /* XXX When we abstract ABI into a module, this should instead
1103          * become something like
1104          *
1105          *    proc->abi = arch_get_abi(lte.ehdr);
1106          *
1107          * The code in ltelf_init needs to be replaced by this logic.
1108          * Be warned that libltrace.c calls ltelf_init as well to
1109          * determine whether ABI is supported.  This is to get
1110          * reasonable error messages when trying to run 64-bit binary
1111          * with 32-bit ltrace.  It is desirable to preserve this.  */
1112         proc->e_machine = lte.ehdr.e_machine;
1113         proc->e_class = lte.ehdr.e_ident[EI_CLASS];
1114         get_arch_dep(proc);
1115
1116         /* Find out the base address.  For PIE main binaries we look
1117          * into auxv, otherwise we scan phdrs.  */
1118         if (main && lte.ehdr.e_type == ET_DYN) {
1119                 arch_addr_t entry;
1120                 if (process_get_entry(proc, &entry, NULL) < 0) {
1121                         fprintf(stderr, "Couldn't find entry of PIE %s\n",
1122                                 filename);
1123                 fail:
1124                         ltelf_destroy(&lte);
1125                         return -1;
1126                 }
1127                 /* XXX The double cast should be removed when
1128                  * arch_addr_t becomes integral type.  */
1129                 lte.entry_addr = (GElf_Addr)(uintptr_t)entry;
1130                 lte.bias = (GElf_Addr)(uintptr_t)entry - lte.ehdr.e_entry;
1131
1132         } else {
1133                 GElf_Phdr phdr;
1134                 size_t i;
1135                 for (i = 0; gelf_getphdr (lte.elf, i, &phdr) != NULL; ++i) {
1136                         if (phdr.p_type == PT_LOAD) {
1137                                 lte.base_addr = phdr.p_vaddr + bias;
1138                                 break;
1139                         }
1140                 }
1141
1142                 lte.bias = bias;
1143                 lte.entry_addr = lte.ehdr.e_entry + lte.bias;
1144
1145                 if (lte.base_addr == 0) {
1146                         fprintf(stderr,
1147                                 "Couldn't determine base address of %s\n",
1148                                 filename);
1149                         goto fail;
1150                 }
1151         }
1152
1153         if (ltelf_read_elf(&lte, filename) < 0)
1154                 goto fail;
1155
1156         if (arch_elf_init(&lte, lib) < 0) {
1157                 fprintf(stderr, "Backend initialization failed.\n");
1158                 goto fail;
1159         }
1160
1161         if (lib == NULL)
1162                 goto fail;
1163
1164         /* Note that we set soname and pathname as soon as they are
1165          * allocated, so in case of further errors, this get released
1166          * when LIB is released, which should happen in the caller
1167          * when we return error.  */
1168
1169         if (lib->pathname == NULL) {
1170                 char *pathname = strdup(filename);
1171                 if (pathname == NULL)
1172                         goto fail;
1173                 library_set_pathname(lib, pathname, 1);
1174         }
1175
1176         if (lte.soname != NULL) {
1177                 char *soname = strdup(lte.soname);
1178                 if (soname == NULL)
1179                         goto fail;
1180                 library_set_soname(lib, soname, 1);
1181         } else {
1182                 const char *soname = rindex(lib->pathname, '/');
1183                 if (soname != NULL)
1184                         soname += 1;
1185                 else
1186                         soname = lib->pathname;
1187                 library_set_soname(lib, soname, 0);
1188         }
1189
1190         /* XXX The double cast should be removed when
1191          * arch_addr_t becomes integral type.  */
1192         arch_addr_t entry = (arch_addr_t)(uintptr_t)lte.entry_addr;
1193         if (arch_translate_address(&lte, entry, &entry) < 0)
1194                 goto fail;
1195
1196         /* XXX The double cast should be removed when
1197          * arch_addr_t becomes integral type.  */
1198         lib->base = (arch_addr_t)(uintptr_t)lte.base_addr;
1199         lib->entry = entry;
1200         /* XXX The double cast should be removed when
1201          * arch_addr_t becomes integral type.  */
1202         lib->dyn_addr = (arch_addr_t)(uintptr_t)lte.dyn_addr;
1203
1204         /* There are two reasons that we need to inspect symbol tables
1205          * or populate PLT entries.  Either the user requested
1206          * corresponding tracing features (respectively -x and -e), or
1207          * they requested tracing exported symbols (-l).
1208          *
1209          * In the latter case we need to keep even those PLT slots
1210          * that are not requested by -e (but we keep them latent).  We
1211          * also need to inspect .dynsym to find what exports this
1212          * library provide, to turn on existing latent PLT
1213          * entries.  */
1214
1215         int plts = filter_matches_library(options.plt_filter, lib);
1216         if ((plts || options.export_filter != NULL)
1217             && populate_plt(proc, filename, &lte, lib) < 0)
1218                 goto fail;
1219
1220         int exports = filter_matches_library(options.export_filter, lib);
1221         int symtabs = filter_matches_library(options.static_filter, lib);
1222         if ((symtabs || exports)
1223             && populate_symtab(proc, filename, &lte, lib,
1224                                symtabs, exports) < 0)
1225                 goto fail;
1226
1227         arch_elf_destroy(&lte);
1228         ltelf_destroy(&lte);
1229         return 0;
1230 }
1231
1232 int
1233 ltelf_read_library(struct library *lib, struct process *proc,
1234                    const char *filename, GElf_Addr bias)
1235 {
1236         return read_module(lib, proc, filename, bias, 0);
1237 }
1238
1239
1240 struct library *
1241 ltelf_read_main_binary(struct process *proc, const char *path)
1242 {
1243         struct library *lib = malloc(sizeof(*lib));
1244         if (lib == NULL || library_init(lib, LT_LIBTYPE_MAIN) < 0) {
1245                 free(lib);
1246                 return NULL;
1247         }
1248         library_set_pathname(lib, path, 0);
1249
1250         /* There is a race between running the process and reading its
1251          * binary for internal consumption.  So open the binary from
1252          * the /proc filesystem.  XXX Note that there is similar race
1253          * for libraries, but there we don't have a nice answer like
1254          * that.  Presumably we could read the DSOs from the process
1255          * memory image, but that's not currently done.  */
1256         char *fname = pid2name(proc->pid);
1257         if (fname == NULL
1258             || read_module(lib, proc, fname, 0, 1) < 0) {
1259                 library_destroy(lib);
1260                 free(lib);
1261                 lib = NULL;
1262         }
1263
1264         free(fname);
1265         return lib;
1266 }