X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=src%2Fsymbolize.cc;h=98a754f7ca09918c2f1de648984b2ca1af169dad;hb=0a9f71036f5617ecd3549abff78a8d9a09c7e56e;hp=18bbccf307213d5257c51205e08ff37095608259;hpb=a0bf19d0dfe1af3540c09796b1d860e5e998bd60;p=platform%2Fupstream%2Fglog.git diff --git a/src/symbolize.cc b/src/symbolize.cc index 18bbccf..98a754f 100644 --- a/src/symbolize.cc +++ b/src/symbolize.cc @@ -45,11 +45,19 @@ // some functions which are not guaranteed to be so, such as memchr() // and memmove(). We assume they are async-signal-safe. // +// Additional header can be specified by the GLOG_BUILD_CONFIG_INCLUDE +// macro to add platform specific defines (e.g. OS_OPENBSD). + +#ifdef GLOG_BUILD_CONFIG_INCLUDE +#include GLOG_BUILD_CONFIG_INCLUDE +#endif // GLOG_BUILD_CONFIG_INCLUDE #include "utilities.h" #if defined(HAVE_SYMBOLIZE) +#include + #include #include "symbolize.h" @@ -74,6 +82,13 @@ void InstallSymbolizeCallback(SymbolizeCallback callback) { g_symbolize_callback = callback; } +static SymbolizeOpenObjectFileCallback g_symbolize_open_object_file_callback = + NULL; +void InstallSymbolizeOpenObjectFileCallback( + SymbolizeOpenObjectFileCallback callback) { + g_symbolize_open_object_file_callback = callback; +} + // This function wraps the Demangle function to provide an interface // where the input symbol is demangled in-place. // To keep stack consumption low, we would like this function to not @@ -95,11 +110,14 @@ _END_GOOGLE_NAMESPACE_ #if defined(__ELF__) #include +#if defined(OS_OPENBSD) +#include +#else #include +#endif #include #include #include -#include // For ElfW() macro. #include #include #include @@ -123,7 +141,7 @@ _START_GOOGLE_NAMESPACE_ // success, return the number of bytes read. Otherwise, return -1. static ssize_t ReadPersistent(const int fd, void *buf, const size_t count) { SAFE_ASSERT(fd >= 0); - SAFE_ASSERT(count >= 0 && count <= std::numeric_limits::max()); + SAFE_ASSERT(count <= std::numeric_limits::max()); char *buf0 = reinterpret_cast(buf); ssize_t num_bytes = 0; while (num_bytes < count) { @@ -309,48 +327,41 @@ FindSymbol(uint64_t pc, const int fd, char *out, int out_size, // both regular and dynamic symbol tables if necessary. On success, // write the symbol name to "out" and return true. Otherwise, return // false. -static bool GetSymbolFromObjectFile(const int fd, uint64_t pc, - char *out, int out_size, - uint64_t map_start_address) { +static bool GetSymbolFromObjectFile(const int fd, + uint64_t pc, + char* out, + int out_size, + uint64_t base_address) { // Read the ELF header. ElfW(Ehdr) elf_header; if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { return false; } - uint64_t symbol_offset = 0; - if (elf_header.e_type == ET_DYN) { // DSO needs offset adjustment. - symbol_offset = map_start_address; - } - ElfW(Shdr) symtab, strtab; // Consult a regular symbol table first. - if (!GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff, - SHT_SYMTAB, &symtab)) { - return false; - } - if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff + - symtab.sh_link * sizeof(symtab))) { - return false; - } - if (FindSymbol(pc, fd, out, out_size, symbol_offset, - &strtab, &symtab)) { - return true; // Found the symbol in a regular symbol table. + if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff, + SHT_SYMTAB, &symtab)) { + if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff + + symtab.sh_link * sizeof(symtab))) { + return false; + } + if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) { + return true; // Found the symbol in a regular symbol table. + } } // If the symbol is not found, then consult a dynamic symbol table. - if (!GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff, - SHT_DYNSYM, &symtab)) { - return false; - } - if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff + - symtab.sh_link * sizeof(symtab))) { - return false; - } - if (FindSymbol(pc, fd, out, out_size, symbol_offset, - &strtab, &symtab)) { - return true; // Found the symbol in a dynamic symbol table. + if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff, + SHT_DYNSYM, &symtab)) { + if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff + + symtab.sh_link * sizeof(symtab))) { + return false; + } + if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) { + return true; // Found the symbol in a dynamic symbol table. + } } return false; @@ -481,16 +492,22 @@ static char *GetHex(const char *start, const char *end, uint64_t *hex) { return const_cast(p); } -// Search for the object file (from /proc/self/maps) that contains -// the specified pc. If found, open this file and return the file handle, -// and also set start_address to the start address of where this object -// file is mapped to in memory. Otherwise, return -1. +// Searches for the object file (from /proc/self/maps) that contains +// the specified pc. If found, sets |start_address| to the start address +// of where this object file is mapped in memory, sets the module base +// address into |base_address|, copies the object file name into +// |out_file_name|, and attempts to open the object file. If the object +// file is opened successfully, returns the file descriptor. Otherwise, +// returns -1. |out_file_name_size| is the size of the file name buffer +// (including the null-terminator). static ATTRIBUTE_NOINLINE int OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, - uint64_t &start_address) { + uint64_t &start_address, + uint64_t &base_address, + char *out_file_name, + int out_file_name_size) { int object_fd; - // Open /proc/self/maps. int maps_fd; NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY)); FileDescriptor wrapped_maps_fd(maps_fd); @@ -498,11 +515,20 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, return -1; } + int mem_fd; + NO_INTR(mem_fd = open("/proc/self/mem", O_RDONLY)); + FileDescriptor wrapped_mem_fd(mem_fd); + if (wrapped_mem_fd.get() < 0) { + return -1; + } + // Iterate over maps and look for the map containing the pc. Then // look into the symbol tables inside. char buf[1024]; // Big enough for line of sane /proc/self/maps + int num_maps = 0; LineReader reader(wrapped_maps_fd.get(), buf, sizeof(buf)); while (true) { + num_maps++; const char *cursor; const char *eol; if (!reader.ReadLine(&cursor, &eol)) { // EOF or malformed line. @@ -531,11 +557,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, } ++cursor; // Skip ' '. - // Check start and end addresses. - if (!(start_address <= pc && pc < end_address)) { - continue; // We skip this map. PC isn't in this map. - } - // Read flags. Skip flags until we encounter a space or eol. const char * const flags_start = cursor; while (cursor < eol && *cursor != ' ') { @@ -546,20 +567,71 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, return -1; // Malformed line. } - // Check flags. We are only interested in "r-x" maps. - if (memcmp(flags_start, "r-x", 3) != 0) { // Not a "r-x" map. + // Determine the base address by reading ELF headers in process memory. + ElfW(Ehdr) ehdr; + // Skip non-readable maps. + if (flags_start[0] == 'r' && + ReadFromOffsetExact(mem_fd, &ehdr, sizeof(ElfW(Ehdr)), start_address) && + memcmp(ehdr.e_ident, ELFMAG, SELFMAG) == 0) { + switch (ehdr.e_type) { + case ET_EXEC: + base_address = 0; + break; + case ET_DYN: + // Find the segment containing file offset 0. This will correspond + // to the ELF header that we just read. Normally this will have + // virtual address 0, but this is not guaranteed. We must subtract + // the virtual address from the address where the ELF header was + // mapped to get the base address. + // + // If we fail to find a segment for file offset 0, use the address + // of the ELF header as the base address. + base_address = start_address; + for (unsigned i = 0; i != ehdr.e_phnum; ++i) { + ElfW(Phdr) phdr; + if (ReadFromOffsetExact( + mem_fd, &phdr, sizeof(phdr), + start_address + ehdr.e_phoff + i * sizeof(phdr)) && + phdr.p_type == PT_LOAD && phdr.p_offset == 0) { + base_address = start_address - phdr.p_vaddr; + break; + } + } + break; + default: + // ET_REL or ET_CORE. These aren't directly executable, so they don't + // affect the base address. + break; + } + } + + // Check start and end addresses. + if (!(start_address <= pc && pc < end_address)) { + continue; // We skip this map. PC isn't in this map. + } + + // Check flags. We are only interested in "r*x" maps. + if (flags_start[0] != 'r' || flags_start[2] != 'x') { continue; // We skip this map. } ++cursor; // Skip ' '. - // Skip to file name. "cursor" now points to file offset. We need to - // skip at least three spaces for file offset, dev, and inode. + // Read file offset. + uint64_t file_offset; + cursor = GetHex(cursor, eol, &file_offset); + if (cursor == eol || *cursor != ' ') { + return -1; // Malformed line. + } + ++cursor; // Skip ' '. + + // Skip to file name. "cursor" now points to dev. We need to + // skip at least two spaces for dev and inode. int num_spaces = 0; while (cursor < eol) { if (*cursor == ' ') { ++num_spaces; - } else if (num_spaces >= 3) { - // The first non-space character after skipping three spaces + } else if (num_spaces >= 2) { + // The first non-space character after skipping two spaces // is the beginning of the file name. break; } @@ -572,12 +644,105 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, // Finally, "cursor" now points to file name of our interest. NO_INTR(object_fd = open(cursor, O_RDONLY)); if (object_fd < 0) { + // Failed to open object file. Copy the object file name to + // |out_file_name|. + strncpy(out_file_name, cursor, out_file_name_size); + // Making sure |out_file_name| is always null-terminated. + out_file_name[out_file_name_size - 1] = '\0'; return -1; } return object_fd; } } +// POSIX doesn't define any async-signal safe function for converting +// an integer to ASCII. We'll have to define our own version. +// itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the +// conversion was successful or NULL otherwise. It never writes more than "sz" +// bytes. Output will be truncated as needed, and a NUL character is always +// appended. +// NOTE: code from sandbox/linux/seccomp-bpf/demo.cc. +char *itoa_r(intptr_t i, char *buf, size_t sz, int base, size_t padding) { + // Make sure we can write at least one NUL byte. + size_t n = 1; + if (n > sz) + return NULL; + + if (base < 2 || base > 16) { + buf[0] = '\000'; + return NULL; + } + + char *start = buf; + + uintptr_t j = i; + + // Handle negative numbers (only for base 10). + if (i < 0 && base == 10) { + j = -i; + + // Make sure we can write the '-' character. + if (++n > sz) { + buf[0] = '\000'; + return NULL; + } + *start++ = '-'; + } + + // Loop until we have converted the entire number. Output at least one + // character (i.e. '0'). + char *ptr = start; + do { + // Make sure there is still enough space left in our output buffer. + if (++n > sz) { + buf[0] = '\000'; + return NULL; + } + + // Output the next digit. + *ptr++ = "0123456789abcdef"[j % base]; + j /= base; + + if (padding > 0) + padding--; + } while (j > 0 || padding > 0); + + // Terminate the output with a NUL character. + *ptr = '\000'; + + // Conversion to ASCII actually resulted in the digits being in reverse + // order. We can't easily generate them in forward order, as we can't tell + // the number of characters needed until we are done converting. + // So, now, we reverse the string (except for the possible "-" sign). + while (--ptr > start) { + char ch = *ptr; + *ptr = *start; + *start++ = ch; + } + return buf; +} + +// Safely appends string |source| to string |dest|. Never writes past the +// buffer size |dest_size| and guarantees that |dest| is null-terminated. +void SafeAppendString(const char* source, char* dest, int dest_size) { + int dest_string_length = strlen(dest); + SAFE_ASSERT(dest_string_length < dest_size); + dest += dest_string_length; + dest_size -= dest_string_length; + strncpy(dest, source, dest_size); + // Making sure |dest| is always null-terminated. + dest[dest_size - 1] = '\0'; +} + +// Converts a 64-bit value into a hex string, and safely appends it to |dest|. +// Never writes past the buffer size |dest_size| and guarantees that |dest| is +// null-terminated. +void SafeAppendHexNumber(uint64_t value, char* dest, int dest_size) { + // 64-bit numbers in hex can have up to 16 digits. + char buf[17] = {'\0'}; + SafeAppendString(itoa_r(value, buf, sizeof(buf), 16, 0), dest, dest_size); +} + // The implementation of our symbolization routine. If it // successfully finds the symbol containing "pc" and obtains the // symbol name, returns true and write the symbol name to "out". @@ -590,10 +755,40 @@ static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out, int out_size) { uint64_t pc0 = reinterpret_cast(pc); uint64_t start_address = 0; + uint64_t base_address = 0; + int object_fd = -1; - int object_fd = OpenObjectFileContainingPcAndGetStartAddress(pc0, - start_address); - if (object_fd == -1) { + if (out_size < 1) { + return false; + } + out[0] = '\0'; + SafeAppendString("(", out, out_size); + + if (g_symbolize_open_object_file_callback) { + object_fd = g_symbolize_open_object_file_callback(pc0, start_address, + base_address, out + 1, + out_size - 1); + } else { + object_fd = OpenObjectFileContainingPcAndGetStartAddress(pc0, start_address, + base_address, + out + 1, + out_size - 1); + } + + // Check whether a file name was returned. + if (object_fd < 0) { + if (out[1]) { + // The object file containing PC was determined successfully however the + // object file was not opened successfully. This is still considered + // success because the object file name and offset are known and tools + // like asan_symbolize.py can be used for the symbolization. + out[out_size - 1] = '\0'; // Making sure |out| is always null-terminated. + SafeAppendString("+0x", out, out_size); + SafeAppendHexNumber(pc0 - base_address, out, out_size); + SafeAppendString(")", out, out_size); + return true; + } + // Failed to determine the object file containing PC. Bail out. return false; } FileDescriptor wrapped_object_fd(object_fd); @@ -615,7 +810,7 @@ static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out, } } if (!GetSymbolFromObjectFile(wrapped_object_fd.get(), pc0, - out, out_size, start_address)) { + out, out_size, base_address)) { return false; } @@ -649,6 +844,67 @@ static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out, _END_GOOGLE_NAMESPACE_ +#elif defined(OS_WINDOWS) + +#include +#pragma comment(lib, "DbgHelp") + +_START_GOOGLE_NAMESPACE_ + +class SymInitializer { +public: + HANDLE process = NULL; + bool ready = false; + SymInitializer() { + // Initialize the symbol handler. + // https://msdn.microsoft.com/en-us/library/windows/desktop/ms680344(v=vs.85).aspx + process = GetCurrentProcess(); + // Defer symbol loading. + // We do not request undecorated symbols with SYMOPT_UNDNAME + // because the mangling library calls UnDecorateSymbolName. + SymSetOptions(SYMOPT_DEFERRED_LOADS); + if (SymInitialize(process, NULL, true)) { + ready = true; + } + } + ~SymInitializer() { + SymCleanup(process); + // We do not need to close `HANDLE process` because it's a "pseudo handle." + } +private: + SymInitializer(const SymInitializer&); + SymInitializer& operator=(const SymInitializer&); +}; + +static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out, + int out_size) { + const static SymInitializer symInitializer; + if (!symInitializer.ready) { + return false; + } + // Resolve symbol information from address. + // https://msdn.microsoft.com/en-us/library/windows/desktop/ms680578(v=vs.85).aspx + char buf[sizeof(SYMBOL_INFO) + MAX_SYM_NAME]; + SYMBOL_INFO *symbol = reinterpret_cast(buf); + symbol->SizeOfStruct = sizeof(SYMBOL_INFO); + symbol->MaxNameLen = MAX_SYM_NAME; + // We use the ANSI version to ensure the string type is always `char *`. + // This could break if a symbol has Unicode in it. + BOOL ret = SymFromAddr(symInitializer.process, + reinterpret_cast(pc), 0, symbol); + if (ret == 1 && static_cast(symbol->NameLen) < out_size) { + // `NameLen` does not include the null terminating character. + strncpy(out, symbol->Name, static_cast(symbol->NameLen) + 1); + out[static_cast(symbol->NameLen)] = '\0'; + // Symbolization succeeded. Now we try to demangle the symbol. + DemangleInplace(out, out_size); + return true; + } + return false; +} + +_END_GOOGLE_NAMESPACE_ + #else # error BUG: HAVE_SYMBOLIZE was wrongly set #endif