From 5c5c38e500fd8f20bb8d6c0177948dbfe86b6fcd Mon Sep 17 00:00:00 2001 From: Dima Kogan Date: Mon, 2 Jun 2014 02:01:57 -0700 Subject: [PATCH] We now use known prototypes for all aliased symbols (same address) Some libraries have multiple names for the same function. Prior to this patch, it was possible to define a prototype for a symbol, and not have ltrace use it because it saw a different symbol be called. libc is a common source of this. For instance (on my amd64 Debian box) it defines the nanosleep symbol as both 'nanosleep' and '__GI___nanosleep', at the same address. If a calling library calls '__GI___nanosleep', then an ltrace prototype for 'nanosleep' would not be used, even though it should apply to this call --- dwarf_prototypes.c | 7 +- library.c | 241 ++++++++++++++++++++++++++++++++++++++++++++++------- library.h | 58 +++++++++++-- ltrace-elf.c | 25 ++---- output.c | 39 ++++++++- proc.c | 52 +++++++++--- 6 files changed, 346 insertions(+), 76 deletions(-) diff --git a/dwarf_prototypes.c b/dwarf_prototypes.c index 6094658..b50c82e 100644 --- a/dwarf_prototypes.c +++ b/dwarf_prototypes.c @@ -910,12 +910,7 @@ static bool import_subprogram(struct protolib *plib, struct library *lib, { // I use the linkage function name if there is one, otherwise the // plain name - const char *function_name = NULL; - Dwarf_Attribute attr; - if (dwarf_attr(die, DW_AT_linkage_name, &attr) != NULL) - function_name = dwarf_formstring(&attr); - if (function_name == NULL) - function_name = dwarf_diename(die); + const char *function_name = dwarf_diename(die); if (function_name == NULL) { complain(die, "Function has no name. Not importing"); return true; diff --git a/library.c b/library.c index 3a22519..7ccd7da 100644 --- a/library.c +++ b/library.c @@ -29,8 +29,12 @@ #include "callback.h" #include "debug.h" #include "dict.h" +#include "vect.h" #include "backend.h" // for arch_library_symbol_init, arch_library_init +static void +library_exported_names_init(struct library_exported_names *names); + #ifndef OS_HAVE_LIBRARY_DATA int os_library_init(struct library *lib) @@ -292,7 +296,7 @@ private_library_init(struct library *lib, enum library_type type) lib->own_pathname = 0; lib->symbols = NULL; - lib->exported_names = NULL; + library_exported_names_init(&lib->exported_names); lib->type = type; #if defined(HAVE_LIBDW) @@ -316,18 +320,213 @@ library_init(struct library *lib, enum library_type type) return 0; } -static int -library_exported_name_clone(struct library_exported_name *retp, - struct library_exported_name *exnm) + + + + +static void _dtor_string(const char **tgt, void *data) +{ + free((char*)*tgt); +} +static int _clone_vect(struct vect **to, const struct vect **from, void *data) +{ + *to = malloc(sizeof(struct vect)); + if(*to == NULL) + return -1; + + return + VECT_CLONE(*to, *from, const char*, + dict_clone_string, + _dtor_string, + NULL); +} +static void _dtor_vect(const struct vect **tgt, void *data) +{ + VECT_DESTROY(*tgt, const char*, _dtor_string, NULL); + free(*tgt); +} + +static void +library_exported_names_init(struct library_exported_names *names) +{ + DICT_INIT(&names->names, + const char*, uint64_t, + dict_hash_string, dict_eq_string, NULL); + DICT_INIT(&names->addrs, + uint64_t, struct vect*, + dict_hash_uint64, dict_eq_uint64, NULL); +} + +static void +library_exported_names_destroy(struct library_exported_names *names) { - char *name = exnm->own_name ? strdup(exnm->name) : (char *)exnm->name; - if (name == NULL) + DICT_DESTROY(&names->names, + const char*, uint64_t, + _dtor_string, NULL, NULL); + DICT_DESTROY(&names->addrs, + uint64_t, struct vect*, + NULL, _dtor_vect, NULL); +} + +static int +library_exported_names_clone(struct library_exported_names *retp, + const struct library_exported_names *names) +{ + return + DICT_CLONE(&retp->names, &names->names, + const char*, uint64_t, + dict_clone_string, _dtor_string, + NULL, NULL, + NULL) || + DICT_CLONE(&retp->addrs, &names->addrs, + uint64_t, struct vect*, + NULL, NULL, + _clone_vect, _dtor_vect, + NULL); +} + +int library_exported_names_push(struct library_exported_names *names, + uint64_t addr, const char *name, + int own_name ) +{ + // first, take ownership of the name, if it's not yet ours + if(!own_name) + name = strdup(name); + if(name == NULL) return -1; - retp->name = name; - retp->own_name = exnm->own_name; + + // push to the name->addr map + int result = DICT_INSERT(&names->names, &name, &addr); + if(result == 1) { + // This symbol is already present in the table. This library has + // multiple copies of this symbol (probably corresponding to + // different symbol versions). I should handle this gracefully + // at some point, but for now I simply ignore later instances of + // any particular symbol + free(name); + return 0; + } + + if(result != 0) + return result; + + // push to the addr->names map + // I get the alias vector. If it doesn't yet exist, I make it + struct vect *aliases; + struct vect **paliases = DICT_FIND_REF(&names->addrs, + &addr, struct vect*); + + if (paliases == NULL) { + aliases = malloc(sizeof(struct vect)); + if(aliases == NULL) + return -1; + VECT_INIT(aliases, const char*); + result = DICT_INSERT(&names->addrs, &addr, &aliases); + if(result != 0) + return result; + } + else + aliases = *paliases; + + const char *namedup = strdup(name); + if(namedup == NULL) + return -1; + + result = vect_pushback(aliases, &namedup); + if(result != 0) + return result; + return 0; } +struct library_exported_names_each_context +{ + enum callback_status (*inner_cb)(const char *, void *); + void *data; + bool failure : 1; +}; +static enum callback_status +library_exported_names_each_cb(const char **key, uint64_t *value, void *data) +{ + struct library_exported_names_each_context *context = + (struct library_exported_names_each_context*)data; + enum callback_status status = context->inner_cb(*key, context->data); + if(status == CBS_FAIL) + context->failure = true; + return status; +} +bool library_exported_names_each(const struct library_exported_names *names, + enum callback_status (*cb)(const char *, + void *), + void *data) +{ + struct library_exported_names_each_context context = + {.inner_cb = cb, + .data = data, + .failure = false}; + DICT_EACH(&names->names, + const char*, uint64_t, + NULL, library_exported_names_each_cb, &context); + return !context.failure; +} + +struct library_exported_names_each_alias_context +{ + enum callback_status (*inner_cb)(const char *, void *); + const char *origname; + void *data; + bool failure : 1; +}; +static enum callback_status +library_exported_names_each_alias_cb(const char **name, void *data) +{ + struct library_exported_names_each_alias_context *context = + (struct library_exported_names_each_alias_context*)data; + + // I do not report the original name we were asked about. Otherwise, any + // time the caller asks for aliases of symbol "sym", I'll always report + // "sym" along with any actual aliases + if(strcmp(*name, context->origname) == 0) + return CBS_CONT; + + enum callback_status status = context->inner_cb(*name, context->data); + if(status == CBS_FAIL) + context->failure = true; + return status; +} + +bool library_exported_names_each_alias( + const struct library_exported_names *names, + const char *aliasname, + enum callback_status (*cb)(const char *, + void *), + void *data) +{ + // I have a symbol name. I look up its address, then get the list of + // aliased names + uint64_t *addr = DICT_FIND_REF(&names->names, + &aliasname, uint64_t); + if(addr == NULL) + return false; + + // OK. I have an address. Get the list of symbols at this address + struct vect **aliases = DICT_FIND_REF(&names->addrs, + addr, struct vect*); + if(aliases == NULL) + return false; + + struct library_exported_names_each_alias_context context = + {.inner_cb = cb, + .origname = aliasname, + .data = data, + .failure = false}; + VECT_EACH(*aliases, const char*, NULL, + library_exported_names_each_alias_cb, &context); +} + + + + int library_clone(struct library *retp, struct library *lib) { @@ -372,20 +571,9 @@ library_clone(struct library *retp, struct library *lib) } /* Clone exported names. */ - { - struct library_exported_name *it; - struct library_exported_name **nnamep = &retp->exported_names; - for (it = lib->exported_names; it != NULL; it = it->next) { - *nnamep = malloc(sizeof(**nnamep)); - if (*nnamep == NULL - || library_exported_name_clone(*nnamep, it) < 0) { - free(*nnamep); - goto fail; - } - nnamep = &(*nnamep)->next; - } - *nnamep = NULL; - } + if (library_exported_names_clone(&retp->exported_names, + &lib->exported_names) != 0) + goto fail; if (os_library_clone(retp, lib) < 0) goto fail; @@ -419,14 +607,7 @@ library_destroy(struct library *lib) } /* Release exported names. */ - struct library_exported_name *it; - for (it = lib->exported_names; it != NULL; ) { - struct library_exported_name *next = it->next; - if (it->own_name) - free((char *)it->name); - free(it); - it = next; - } + library_exported_names_destroy(&lib->exported_names); } void diff --git a/library.h b/library.h index 4d649e0..f8a5cf8 100644 --- a/library.h +++ b/library.h @@ -23,11 +23,13 @@ #define _LIBRARY_H_ #include +#include #if defined(HAVE_LIBDW) # include #endif +#include "dict.h" #include "callback.h" #include "forward.h" #include "sysdep.h" @@ -41,11 +43,21 @@ enum toplt { size_t arch_addr_hash(const arch_addr_t *addr); int arch_addr_eq(const arch_addr_t *addr1, const arch_addr_t *addr2); -/* For handling -l. */ -struct library_exported_name { - struct library_exported_name *next; - const char *name; - int own_name : 1; + +/* For handling -l and for handling library export aliases (different symbol + * name, same address) + * + * This structure needs to + * - store (addr, name) tuples + * - be searchable by addr (when populating) + * - be searchable by name (when looking for aliases) + * - be enumeratable (by activate_latent_in()) + */ +struct library_exported_names { + // I store the data in several structures to facilitate different types + // of access + struct dict names; // maps a name to an address + struct dict addrs; // maps an address to a vect of names }; struct library_symbol { @@ -158,8 +170,10 @@ struct library { /* List of names that this library implements, and that match * -l filter. Each time a new library is mapped, its list of * exports is examined, and corresponding PLT slots are - * enabled. */ - struct library_exported_name *exported_names; + * enabled. This data structure also keeps track of export + * addresses to find symbols with different names, but same + * addresses */ + struct library_exported_names exported_names; /* Prototype library associated with this library. */ struct protolib *protolib; @@ -241,4 +255,34 @@ int arch_translate_address(struct ltelf *lte, int arch_translate_address_dyn(struct process *proc, arch_addr_t addr, arch_addr_t *ret); + +/* Pushes a name/address tuple to the list of a library's exports. Returns 0 on + * success + */ +int library_exported_names_push(struct library_exported_names *names, + uint64_t addr, const char *name, + int own_name ); + +/* Iterates through the a library's export list. The callback is called for + * every symbol a library exports. Symbol aliases do not apply here. If multiple + * symbols are defined at the same address, each is reported here. Returns true + * on success. If the callback fails at any point, this returns false + */ +bool library_exported_names_each(const struct library_exported_names *names, + enum callback_status (*cb)(const char *, + void *), + void *data); + +/* Iterates through the a library's export list, reporting each symbol that is + * an alias of the given 'aliasname' symbol. This 'aliasname' symbol itself is + * NOT reported, so if this symbol is unique, the callback is not called at all. + * Returns true on success + */ +bool library_exported_names_each_alias( + const struct library_exported_names *names, + const char *aliasname, + enum callback_status (*cb)(const char *, + void *), + void *data); + #endif /* _LIBRARY_H_ */ diff --git a/ltrace-elf.c b/ltrace-elf.c index f638342..a6a5531 100644 --- a/ltrace-elf.c +++ b/ltrace-elf.c @@ -901,13 +901,8 @@ static int populate_this_symtab(struct process *proc, const char *filename, struct ltelf *lte, struct library *lib, Elf_Data *symtab, const char *strtab, size_t count, - struct library_exported_name **names) + struct library_exported_names *names) { - /* If a valid NAMES is passed, we pass in *NAMES a list of - * symbol names that this library exports. */ - if (names != NULL) - *names = NULL; - /* Using sorted array would be arguably better, but this * should be well enough for the number of symbols that we * typically deal with. */ @@ -957,20 +952,14 @@ populate_this_symtab(struct process *proc, const char *filename, /* If we are interested in exports, store this name. */ if (names != NULL) { - struct library_exported_name *export - = malloc(sizeof *export); char *name_copy = strdup(name); - - if (name_copy == NULL || export == NULL) { - free(name_copy); - free(export); + if (name_copy == NULL || + library_exported_names_push(names, + sym.st_value, + name_copy, 1) != 0) + { fprintf(stderr, "Couldn't store symbol %s. " "Tracing may be incomplete.\n", name); - } else { - export->name = name_copy; - export->own_name = 1; - export->next = *names; - *names = export; } } @@ -1115,7 +1104,7 @@ populate_symtab(struct process *proc, const char *filename, /* Check whether we want to trace symbols implemented by this * library (-l). */ - struct library_exported_name **names = NULL; + struct library_exported_names *names = NULL; if (exports) { debug(DEBUG_FUNCTION, "-l matches %s", lib->soname); names = &lib->exported_names; diff --git a/output.c b/output.c index 10faee2..2128816 100644 --- a/output.c +++ b/output.c @@ -197,6 +197,28 @@ snip_period(char *buf) } } +struct lookup_prototype_alias_context +{ + struct library *lib; + struct prototype *result; // output +}; +static enum callback_status +lookup_prototype_alias_cb(const char *name, void *data) +{ + struct lookup_prototype_alias_context *context = + (struct lookup_prototype_alias_context*)data; + + struct library *lib = context->lib; + + context->result = + protolib_lookup_prototype(lib->protolib, name, + lib->type != LT_LIBTYPE_SYSCALL); + if (context->result != NULL) + return CBS_STOP; + + return CBS_CONT; +} + static struct prototype * library_get_prototype(struct library *lib, const char *name) { @@ -235,8 +257,21 @@ library_get_prototype(struct library *lib, const char *name) if (lib->protolib == NULL) return NULL; - return protolib_lookup_prototype(lib->protolib, name, - lib->type != LT_LIBTYPE_SYSCALL); + struct prototype *result = + protolib_lookup_prototype(lib->protolib, name, + lib->type != LT_LIBTYPE_SYSCALL); + if (result != NULL) + return result; + + // prototype not found. Is it aliased? + struct lookup_prototype_alias_context context = {.lib = lib, + .result = NULL}; + library_exported_names_each_alias(&lib->exported_names, name, + lookup_prototype_alias_cb, + &context); + + // if found, the prototype is stored here, otherwise it's NULL + return context.result; } struct find_proto_data { diff --git a/proc.c b/proc.c index 5385510..6d6562f 100644 --- a/proc.c +++ b/proc.c @@ -867,23 +867,49 @@ proc_activate_delayed_symbol(struct process *proc, return breakpoint_for_symbol(libsym, proc); } + +struct activate_latent_in_context +{ + struct process *proc; + struct library *lib; +}; static enum callback_status -activate_latent_in(struct process *proc, struct library *lib, void *data) +activate_latent_in_cb(const char *name, void *data) { - struct library_exported_name *exported; - for (exported = data; exported != NULL; exported = exported->next) { - struct library_symbol *libsym = NULL; - while ((libsym = library_each_symbol(lib, libsym, - library_symbol_named_cb, - (void *)exported->name)) - != NULL) - if (libsym->latent - && proc_activate_latent_symbol(proc, libsym) < 0) - return CBS_FAIL; - } + const struct activate_latent_in_context *ctx = + (const struct activate_latent_in_context*)data; + + struct process *proc = ctx->proc; + struct library *lib = ctx->lib; + + struct library_symbol *libsym = NULL; + while ((libsym = library_each_symbol(lib, libsym, + library_symbol_named_cb, + (void *)name)) + != NULL) + if (libsym->latent + && proc_activate_latent_symbol(proc, libsym) < 0) + return CBS_FAIL; + return CBS_CONT; } +static enum callback_status +activate_latent_in(struct process *proc, struct library *lib, void *data) +{ + struct library_exported_names *exported_names = + (struct library_exported_names*)data; + + struct activate_latent_in_context context = {.proc = proc, + .lib = lib}; + if (library_exported_names_each(exported_names, + activate_latent_in_cb, + &context)) + return CBS_CONT; + else + return CBS_FAIL; +} + void proc_add_library(struct process *proc, struct library *lib) { @@ -969,7 +995,7 @@ proc_add_library(struct process *proc, struct library *lib) * library itself). */ struct library *lib2 = NULL; while ((lib2 = proc_each_library(proc, lib2, activate_latent_in, - lib->exported_names)) != NULL) + &lib->exported_names)) != NULL) fprintf(stderr, "Couldn't activate latent symbols for %s in %d: %s.\n", lib2->soname, proc->pid, strerror(errno)); -- 2.7.4