From: Milian Wolff Date: Wed, 4 Jun 2014 12:18:14 +0000 (+0200) Subject: Store backtrace information in a memory efficient tree structure. X-Git-Tag: submit/tizen/20180620.112952^2~503 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c92e49000453af99fa509fcd30d4a6c9ea1d9332;p=sdk%2Ftools%2Fheaptrack.git Store backtrace information in a memory efficient tree structure. Instead of storing all individual traces that ever call any of the allocation functions, we now store a tree of instruction pointers. The tree is constructed from top-to-bottom and each node representing an instruction pointer gets associated with a unique index. This index is enough to reconstruct the backtrace from that point by iterating over the parent indices, which is now done in the evaluation program. A nice side effect is that the data file becomes smaller for bigger application evaluation runs, as common parts at the beginning of every allocation trace can be implicitly shared. --- diff --git a/malloctrace.cpp b/malloctrace.cpp index 7b157bb..aab7168 100644 --- a/malloctrace.cpp +++ b/malloctrace.cpp @@ -18,88 +18,24 @@ */ #include -#include -#include #include #include #include #include #include -#include -#include #include +#include -#include #include #include -#include #include -#define UNW_LOCAL_ONLY -#include +#include "tracetree.h" using namespace std; -struct Trace -{ - using ip_t = void*; - - static const int MAX_SIZE = 64; - - const ip_t* begin() const - { - return m_data; - } - ip_t* begin() - { - return m_data; - } - - const ip_t* end() const - { - return m_data + m_size; - } - ip_t* end() - { - return m_data + m_size; - } - - int size() const - { - return m_size; - } - void setSize(int size) - { - m_size = size; - } - - bool operator==(const Trace& o) const - { - return m_size == o.m_size && !memcmp(m_data, o.m_data, m_size * sizeof(ip_t)); - } -private: - int m_size = 0; - ip_t m_data[MAX_SIZE]; -}; - -namespace std { - template<> - struct hash - { - size_t operator() (const Trace& trace) const - { - size_t seed = 0; - for (auto ip : trace) { - boost::hash_combine(seed, ip); - } - boost::hash_combine(seed, trace.size()); - return seed; - } - }; -} - namespace { using malloc_t = void* (*) (size_t); @@ -123,14 +59,6 @@ dlopen_t real_dlopen = nullptr; // threadsafe stuff atomic moduleCacheDirty(true); -bool trace(Trace& trace) -{ - ///FIXME: handle skip value - int size = unw_backtrace(trace.begin(), Trace::MAX_SIZE); - trace.setSize(max(0, size)); - return size > 0; -} - struct HandleGuard { HandleGuard() @@ -178,7 +106,6 @@ struct Data Data() { modules.reserve(32); - traceCache.reserve(16384); allocationInfo.reserve(16384); string outputFileName = env("DUMP_MALLOC_TRACE_OUTPUT"); @@ -278,8 +205,8 @@ struct Data void handleMalloc(void* ptr, size_t size) { - Trace traceBuffer; - if (!trace(traceBuffer)) { + Trace trace; + if (!trace.fill()) { return; } @@ -287,20 +214,9 @@ struct Data if (moduleCacheDirty) { updateModuleCache(); } - auto it = traceCache.find(traceBuffer); - if (it == traceCache.end()) { - // cache trace - auto traceId = next_trace_id++; - it = traceCache.insert(it, {traceBuffer, traceId}); - // print trace - fprintf(out, "t %u ", traceId); - for (auto ip : traceBuffer) { - fprintf(out, "%p ", ip); - } - fputc('\n', out); - } - allocationInfo[ptr] = {size, it->second}; - fprintf(out, "+ %lu %u\n", size, it->second); + auto index = traceTree.index(trace, out); + allocationInfo[ptr] = {size, index}; + fprintf(out, "+ %lu %lu\n", size, index); } void handleFree(void* ptr) @@ -310,20 +226,18 @@ struct Data if (it == allocationInfo.end()) { return; } - fprintf(out, "- %lu %u\n", it->second.size, it->second.traceId); + fprintf(out, "- %lu %lu\n", it->second.size, it->second.traceIndex); allocationInfo.erase(it); } mutex m_mutex; - unsigned int next_thread_id = 0; - unsigned int next_trace_id = 0; vector modules; - unordered_map traceCache; + TraceTree traceTree; struct AllocationInfo { size_t size; - unsigned int traceId; + size_t traceIndex; }; unordered_map allocationInfo; FILE* out = nullptr; diff --git a/malloctrace_main.cpp b/malloctrace_main.cpp index 414f1f0..ede201f 100644 --- a/malloctrace_main.cpp +++ b/malloctrace_main.cpp @@ -159,11 +159,20 @@ struct Module bool isExe; }; -struct Trace +struct InstructionPointer { - vector backtrace; - size_t allocations = 0; - size_t leaked = 0; + uintptr_t instructionPointer; + size_t parentIndex; +}; + +struct Allocation +{ + // backtrace entry point + size_t ipIndex; + // number of allocations + size_t allocations; + // amount of bytes leaked + size_t leaked; }; struct AccumulatedTraceData @@ -171,30 +180,60 @@ struct AccumulatedTraceData AccumulatedTraceData() { modules.reserve(64); - traces.reserve(16384); + instructionPointers.reserve(65536); + // root node with invalid instruction pointer + instructionPointers.push_back({0, 0}); + allocations.reserve(16384); } - void printBacktrace(const Trace& trace, ostream& out) const + void printBacktrace(InstructionPointer ip, ostream& out) const { - for (auto ip : trace.backtrace) { - out << "0x" << hex << ip << dec; + while (ip.instructionPointer) { + out << "0x" << hex << ip.instructionPointer << dec; // find module for this instruction pointer - auto module = lower_bound(modules.begin(), modules.end(), ip, + auto module = lower_bound(modules.begin(), modules.end(), ip.instructionPointer, [] (const Module& module, const uintptr_t ip) -> bool { return module.addressEnd < ip; }); - if (module != modules.end() && module->addressStart <= ip && module->addressEnd >= ip) { - out << ' ' << module->resolveAddress(ip) + if (module != modules.end() + && module->addressStart <= ip.instructionPointer + && module->addressEnd >= ip.instructionPointer) + { + out << ' ' << module->resolveAddress(ip.instructionPointer) << ' ' << module->fileName; } else { out << " "; } out << endl; + + ip = instructionPointers[ip.parentIndex]; + }; + } + + Allocation& findAllocation(const size_t ipIndex) + { + auto it = lower_bound(allocations.begin(), allocations.end(), ipIndex, + [] (const Allocation& allocation, const size_t ipIndex) -> bool { + return allocation.ipIndex < ipIndex; + }); + if (it == allocations.end() || it->ipIndex != ipIndex) { + it = allocations.insert(it, {ipIndex, 0, 0}); + } + return *it; + } + + InstructionPointer findIp(const size_t ipIndex) const + { + if (ipIndex > instructionPointers.size()) { + return {}; + } else { + return instructionPointers[ipIndex]; } } vector modules; - vector traces; + vector instructionPointers; + vector allocations; map sizeHistogram; size_t totalAllocated = 0; @@ -225,7 +264,7 @@ int main(int argc, char** argv) string line; line.reserve(1024); stringstream lineIn(ios_base::in); - size_t nextTraceId = 0; + size_t nextIpId = 1; while (in.good()) { getline(in, line); if (line.empty()) { @@ -251,71 +290,60 @@ int main(int argc, char** argv) return 1; } data.modules.push_back({fileName, isExe, addressStart, addressEnd}); - } else if (mode == 't') { - Trace trace; - unsigned int id = 0; + } else if (mode == 'i') { + InstructionPointer ip{0, 0}; + size_t id = 0; lineIn >> id; if (lineIn.bad()) { cerr << "failed to parse line: " << line << endl; return 1; } - if (id != nextTraceId) { + if (id != nextIpId) { cerr << "inconsistent trace data: " << line << endl - << "expected trace with id: " << nextTraceId << endl; + << "expected id: " << nextIpId << endl; return 1; } lineIn << hex; - uintptr_t ip = 0; - while (lineIn >> ip) { - trace.backtrace.push_back(ip); - } + lineIn >> ip.instructionPointer; lineIn << dec; - data.traces.push_back(trace); - ++nextTraceId; + lineIn >> ip.parentIndex; + data.instructionPointers.push_back(ip); + ++nextIpId; } else if (mode == '+') { size_t size = 0; lineIn >> size; - unsigned int traceId = 0; - lineIn >> traceId; + size_t ipId = 0; + lineIn >> ipId; if (lineIn.bad()) { cerr << "failed to parse line: " << line << endl; return 1; } - if (traceId < data.traces.size()) { - auto& trace = data.traces[traceId]; - trace.leaked += size; - ++trace.allocations; - } else { - cerr << "failed to find trace of malloc at " << traceId << endl; - return 1; - } + auto& allocation = data.findAllocation(ipId); + allocation.leaked += size; + ++allocation.allocations; data.totalAllocated += size; - data.totalAllocations++; + ++data.totalAllocations; data.leaked += size; if (data.leaked > data.peak) { data.peak = data.leaked; } - data.sizeHistogram[size]++; + ++data.sizeHistogram[size]; } else if (mode == '-') { - /// TODO size_t size = 0; lineIn >> size; - unsigned int traceId = 0; - lineIn >> traceId; + size_t ipId = 0; + lineIn >> ipId; if (lineIn.bad()) { cerr << "failed to parse line: " << line << endl; return 1; } - if (traceId < data.traces.size()) { - auto& trace = data.traces[traceId]; - if (trace.leaked >= size) { - trace.leaked -= size; - } else { - cerr << "inconsistent allocation info, underflowed allocations of " << traceId << endl; - trace.leaked = 0; - } + auto& allocation = data.findAllocation(ipId); + if (!allocation.allocations || allocation.leaked < size) { + cerr << "inconsistent allocation info, underflowed allocations of " << ipId << endl; + allocation.leaked = 0; + allocation.allocations = 0; } else { - cerr << "failed to find trace for free at " << traceId << endl; + allocation.leaked -= size; } data.leaked -= size; } else { @@ -327,32 +355,32 @@ int main(int argc, char** argv) sort(data.modules.begin(), data.modules.end()); // sort by amount of allocations - sort(data.traces.begin(), data.traces.end(), [] (const Trace& l, const Trace &r) { + sort(data.allocations.begin(), data.allocations.end(), [] (const Allocation& l, const Allocation &r) { return l.allocations > r.allocations; }); cout << "TOP ALLOCATORS" << endl; - for (size_t i = 0; i < min(10lu, data.traces.size()); ++i) { - const auto& trace = data.traces[i]; - cout << trace.allocations << " allocations at:" << endl; - data.printBacktrace(trace, cout); + for (size_t i = 0; i < min(10lu, data.allocations.size()); ++i) { + const auto& allocation = data.allocations[i]; + cout << allocation.allocations << " allocations at:" << endl; + data.printBacktrace(data.findIp(allocation.ipIndex), cout); cout << endl; } cout << endl; // sort by amount of leaks - sort(data.traces.begin(), data.traces.end(), [] (const Trace& l, const Trace &r) { + sort(data.allocations.begin(), data.allocations.end(), [] (const Allocation& l, const Allocation &r) { return l.leaked < r.leaked; }); size_t totalLeakAllocations = 0; - for (const auto& trace : data.traces) { - if (!trace.leaked) { + for (const auto& allocation : data.allocations) { + if (!allocation.leaked) { continue; } - totalLeakAllocations += trace.allocations; + totalLeakAllocations += allocation.allocations; - cout << trace.leaked << " bytes leaked in " << trace.allocations << " allocations at:" << endl; - data.printBacktrace(trace, cout); + cout << allocation.leaked << " bytes leaked in " << allocation.allocations << " allocations at:" << endl; + data.printBacktrace(data.findIp(allocation.ipIndex), cout); cout << endl; } cout << data.leaked << " bytes leaked in total from " << totalLeakAllocations << " allocations" << endl; diff --git a/trace.h b/trace.h new file mode 100644 index 0000000..725ab7d --- /dev/null +++ b/trace.h @@ -0,0 +1,68 @@ +/* + * Copyright 2014 Milian Wolff + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef TRACE_H +#define TRACE_H + +#include + +#define UNW_LOCAL_ONLY +#include + +struct Trace +{ + using ip_t = void*; + + static const int MAX_SIZE = 64; + + const ip_t* begin() const + { + return m_data + m_skip; + } + + const ip_t* end() const + { + return begin() + m_size; + } + + ip_t operator[] (int i) const + { + return m_data[m_skip + i]; + } + + int size() const + { + return m_size; + } + + bool fill(int skip = 2) + { + int size = unw_backtrace(m_data, MAX_SIZE); + m_size = size > skip ? size - skip : 0; + m_skip = skip; + return m_size > 0; + } + +private: + int m_size = 0; + int m_skip = 0; + ip_t m_data[MAX_SIZE]; +}; + +#endif // TRACE_H diff --git a/tracetree.h b/tracetree.h new file mode 100644 index 0000000..14f31a7 --- /dev/null +++ b/tracetree.h @@ -0,0 +1,79 @@ +/* + * Copyright 2014 Milian Wolff + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Library General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef TRACETREE_H +#define TRACETREE_H + +#include +#include + +#include "trace.h" + +struct TraceEdge +{ + Trace::ip_t instructionPointer; + // index associated to the backtrace up to this instruction pointer + // the evaluation process can then reverse-map the index to the parent ip + // to rebuild the backtrace from the bottom-up + std::size_t index; + // Unsorted list of children, assumed to be small + std::vector children; +}; + +/** + * Top-down tree of backtrace instruction pointers. + * + * This is supposed to be a memory efficient storage of all instruction pointers + * ever encountered in any backtrace. + */ +class TraceTree +{ +public: + /** + * Index the backtrace and return the index of the last instruction pointer. + */ + std::size_t index(const Trace& trace, FILE* out) + { + size_t index = 0; + TraceEdge* parent = &m_root; + for (int i = trace.size() - 1; i >= 0; --i) { + const auto ip = trace[i]; + if (!ip) { + continue; + } + auto it = std::find_if(parent->children.begin(), parent->children.end(), [ip] (const TraceEdge& l) { + return l.instructionPointer == ip; + }); + if (it == parent->children.end()) { + index = m_index++; + it = parent->children.insert(it, {ip, index, {}}); + fprintf(out, "i %lu %p %lu\n", index, ip, parent->index); + } + index = it->index; + parent = &(*it); + } + return index; + } + +private: + TraceEdge m_root = {0, 0, {}}; + std::size_t m_index = 1; +}; + +#endif // TRACETREE_H