Bring back trace and ip cache to reduce generated filesize.
authorMilian Wolff <mail@milianw.de>
Tue, 20 May 2014 13:08:14 +0000 (15:08 +0200)
committerMilian Wolff <mail@milianw.de>
Tue, 20 May 2014 13:08:14 +0000 (15:08 +0200)
Also speeds up things as repeated large prints are actually quite
slow. Furthermore we extend the tool to actually interpret the data.

malloctrace.cpp
malloctrace_main.cpp

index 5fb66ae..40f5941 100644 (file)
 
 using namespace std;
 
+using Trace = vector<unw_word_t>;
+
+namespace std {
+    template<>
+    struct hash<Trace>
+    {
+        size_t operator() (const Trace& trace) const
+        {
+            std::size_t seed = 0;
+            for (auto ip : trace) {
+                boost::hash_combine(seed, ip);
+            }
+            return seed;
+        }
+    };
+}
+
 namespace {
 
 using malloc_t = void* (*) (size_t);
@@ -61,8 +78,10 @@ valloc_t real_valloc = nullptr;
 aligned_alloc_t real_aligned_alloc = nullptr;
 dlopen_t real_dlopen = nullptr;
 
-atomic<unsigned int> next_thread_id;
+atomic<unsigned int> next_thread_id(0);
 atomic<unsigned int> next_module_id(1);
+atomic<unsigned int> next_ipCache_id(0);
+atomic<unsigned int> next_trace_id(0);
 
 struct ThreadData;
 
@@ -135,6 +154,9 @@ struct ThreadData
         in_handler = true;
         threadRegistry.addThread(this);
         modules.reserve(32);
+        ipCache.reserve(65536);
+        traceCache.reserve(16384);
+        allocationInfo.reserve(16384);
 
         string outputFileName = env("DUMP_MALLOC_TRACE_OUTPUT") + to_string(getpid()) + '.' + to_string(thread_id);
         out = fopen(outputFileName.c_str(), "wa");
@@ -215,7 +237,7 @@ struct ThreadData
         return 0;
     }
 
-    void trace(const int skip = 2)
+    unsigned int trace(const int skip = 2)
     {
         unw_context_t uc;
         unw_getcontext (&uc);
@@ -226,25 +248,45 @@ struct ThreadData
         // skip functions we are not interested in
         for (int i = 0; i < skip; ++i) {
             if (unw_step(&cursor) <= 0) {
-                return;
+                return 0;
             }
         }
 
-        while (unw_step(&cursor) > 0) {
+        traceBuffer.clear();
+        const size_t MAX_TRACE_SIZE = 64;
+        while (unw_step(&cursor) > 0 && traceBuffer.size() < MAX_TRACE_SIZE) {
             unw_word_t ip;
             unw_get_reg(&cursor, UNW_REG_IP, &ip);
 
-            // find module and offset from cache
+            auto it = ipCache.find(ip);
+            if (it == ipCache.end()) {
+                auto ipId = next_ipCache_id++;
+                // find module and offset from cache
+                auto module = lower_bound(modules.begin(), modules.end(), ip,
+                                          [] (const Module& module, const unw_word_t addr) -> bool {
+                                              return module.baseAddress + module.size < addr;
+                                          });
+                if (module != modules.end()) {
+                    fprintf(out, "i %lu %lu %lx\n", ipId, module->id, ip - module->baseAddress);
+                } else {
+                    fprintf(out, "i %lu 0 %lx\n", ipId, ip);
+                }
+                it = ipCache.insert(it, {ip, ipId});
+            }
+            traceBuffer.push_back(it->second);
+        }
 
-            auto module = lower_bound(modules.begin(), modules.end(), ip,
-                                      [] (const Module& module, const unw_word_t addr) -> bool {
-                                          return module.baseAddress + module.size < addr;
-                                      });
-            if (module != modules.end()) {
-                fprintf(out, "%lu %lx ", module->id, ip - module->baseAddress);
+        auto it = traceCache.find(traceBuffer);
+        if (it == traceCache.end()) {
+            auto traceId = next_trace_id++;
+            it = traceCache.insert(it, {traceBuffer, traceId});
+            fprintf(out, "t %lu ", traceId);
+            for (auto ipId : traceBuffer) {
+                fprintf(out, "%lu ", ipId);
             }
-            // TODO: handle failure
+            fputc('\n', out);
         }
+        return it->second;
     }
 
     void handleMalloc(void* ptr, size_t size)
@@ -253,20 +295,38 @@ struct ThreadData
             updateModuleCache();
         }
 
-        fprintf(out, "+ %lu %p ", size, ptr);
-        trace();
-        fputc('\n', out);
+        auto traceId = trace();
+        if (!traceId) {
+            return;
+        }
+
+        allocationInfo[ptr] = {size, traceId};
+        fprintf(out, "+ %lu %lu\n", size, traceId);
     }
 
     void handleFree(void* ptr)
     {
-        fprintf(out, "- %p\n", ptr);
+        auto it = allocationInfo.find(ptr);
+        if (it == allocationInfo.end()) {
+            return;
+        }
+        fprintf(out, "- %lu %lu\n", it->second.size, it->second.traceId);
+        allocationInfo.erase(it);
     }
 
     vector<Module> modules;
+    unordered_map<unw_word_t, unw_word_t> ipCache;
+    unordered_map<vector<unw_word_t>, unsigned int> traceCache;
+    struct AllocationInfo
+    {
+        size_t size;
+        unsigned int traceId;
+    };
+    unordered_map<void*, AllocationInfo> allocationInfo;
     unsigned int thread_id;
     FILE* out;
     atomic<bool> moduleCacheDirty;
+    vector<unw_word_t> traceBuffer;
 };
 
 void ThreadRegistry::setModuleCacheDirty()
index a1a7757..cb01174 100644 (file)
@@ -21,6 +21,8 @@
 #include <fstream>
 #include <sstream>
 #include <unordered_map>
+#include <vector>
+#include <memory>
 
 #include "libbacktrace/backtrace.h"
 
@@ -35,7 +37,10 @@ void printUsage(ostream& out)
 
 struct Module
 {
-    void init()
+    Module(string _fileName, uintptr_t baseAddress, bool isExe)
+        : fileName(move(_fileName))
+        , baseAddress(baseAddress)
+        , isExe(isExe)
     {
         backtraceState = backtrace_create_state(fileName.c_str(), /* we are single threaded, so: not thread safe */ false,
                                                 [] (void *data, const char *msg, int errnum) {
@@ -75,16 +80,38 @@ struct Module
         cerr << "Module backtrace error (code " << errnum << "): " << msg << endl;
     }
 
-    backtrace_state* backtraceState;
+    backtrace_state* backtraceState = nullptr;
     string fileName;
     uintptr_t baseAddress;
     bool isExe;
 };
 
+struct InstructionPointer
+{
+    shared_ptr<Module> module;
+    uintptr_t offset;
+};
+
+struct Trace
+{
+    vector<InstructionPointer> backtrace;
+    size_t allocations = 0;
+    size_t leaked = 0;
+};
+
 struct AccumulatedTraceData
 {
-    /// TODO: vector?
-    unordered_map<unsigned int, Module> modules;
+    AccumulatedTraceData()
+    {
+        modules.reserve(64);
+        instructions.reserve(65536);
+        traces.reserve(16384);
+    }
+
+    /// TODO: vectors?
+    unordered_map<unsigned int, shared_ptr<Module>> modules;
+    unordered_map<unsigned int, InstructionPointer> instructions;
+    unordered_map<unsigned int, Trace> traces;
 };
 
 }
@@ -109,46 +136,118 @@ int main(int argc, char** argv)
 
         string line;
         line.reserve(1024);
+        stringstream lineIn(ios_base::in);
         while (in.good()) {
             getline(in, line);
-            stringstream lineIn(line, ios_base::in);
-            char mode;
+            if (line.empty()) {
+                continue;
+            }
+            lineIn.str(line);
+            lineIn.clear();
+            char mode = 0;
             lineIn >> mode;
             if (mode == 'm') {
-                Module module;
-                module.backtraceState = nullptr;
-                unsigned int id;
+                unsigned int id = 0;
+                lineIn >> id;
+                string fileName;
+                lineIn >> fileName;
+                lineIn << hex;
+                uintptr_t baseAddress = 0;
+                lineIn >> baseAddress;
+                bool isExe = false;
+                lineIn << dec;
+                lineIn >> isExe;
+                if (lineIn.bad()) {
+                    cerr << "failed to parse line: " << line << endl;
+                    continue;
+                }
+                data.modules[id] = make_shared<Module>(fileName, baseAddress, isExe);
+            } else if (mode == 'i') {
+                InstructionPointer ip;
+                unsigned int id = 0;
                 lineIn >> id;
-                lineIn >> module.fileName;
+                unsigned int moduleId = 0;
+                lineIn >> moduleId;
                 lineIn << hex;
-                lineIn >> module.baseAddress;
+                lineIn >> ip.offset;
                 lineIn << dec;
-                lineIn >> module.isExe;
-                module.init();
-                data.modules[id] = module;
+                if (lineIn.bad()) {
+                    cerr << "failed to parse line: " << line << endl;
+                    continue;
+                }
+                auto module = data.modules.find(moduleId);
+                if (module != data.modules.end()) {
+                    ip.module = module->second;
+                }
+                data.instructions[id] = ip;
+            } else if (mode == 't') {
+                Trace trace;
+                unsigned int id = 0;
+                lineIn >> id;
+                if (lineIn.bad()) {
+                    cerr << "failed to parse line: " << line << endl;
+                    continue;
+                }
+                while (lineIn.good()) {
+                    unsigned int ipId = 0;
+                    lineIn >> ipId;
+                    auto ip = data.instructions.find(ipId);
+                    if (ip != data.instructions.end()) {
+                        trace.backtrace.push_back(ip->second);
+                    } else {
+                        cerr << "failed to find instruction " << ipId << endl;
+                    }
+                }
+                data.traces[id] = trace;
             } else if (mode == '+') {
                 size_t size = 0;
                 lineIn >> size;
-                lineIn << hex;
-                void* ptr = nullptr;
-                lineIn >> ptr;
-                cout << "GOGOGO " << size << ' ' << ptr << '\n';
-                while (lineIn.good()) {
-                    unsigned int moduleId = 0;
-                    lineIn >> moduleId;
-                    if (!moduleId) {
-                        break;
+                unsigned int traceId = 0;
+                lineIn >> traceId;
+                if (lineIn.bad()) {
+                    cerr << "failed to parse line: " << line << endl;
+                    continue;
+                }
+                auto trace = data.traces.find(traceId);
+                if (trace != data.traces.end()) {
+                    trace->second.leaked += size;
+                    trace->second.allocations++;
+                } else {
+                    cerr << "failed to find trace " << traceId << endl;
+                }
+            } else if (mode == '-') {
+                /// TODO
+                size_t size = 0;
+                lineIn >> size;
+                unsigned int traceId = 0;
+                lineIn >> traceId;
+                if (lineIn.bad()) {
+                    cerr << "failed to parse line: " << line << endl;
+                    continue;
+                }
+                auto trace = data.traces.find(traceId);
+                if (trace != data.traces.end()) {
+                    if (trace->second.leaked >= size) {
+                        trace->second.leaked -= size;
+                    } else {
+                        cerr << "inconsistent allocation info, underflowed allocations of " << traceId << endl;
+                        trace->second.leaked = 0;
                     }
-                    uintptr_t offset = 0;
-                    lineIn << hex;
-                    lineIn >> offset;
-                    lineIn << dec;
-                    auto module = data.modules[moduleId];
-                    cout << moduleId << '\t' << offset << '\t' << module.resolveAddress(offset) << ' ' << module.fileName << '\n';
+                } else {
+                    cerr << "failed to find trace " << traceId << endl;
                 }
+            } else {
+                cerr << "failed to parse line: " << line << endl;
             }
         }
     }
 
+    for (const auto& trace : data.traces) {
+        if (!trace.second.leaked) {
+            continue;
+        }
+        cout << trace.second.leaked << " leaked in: " << trace.first << " allocations: " << trace.second.allocations << endl;
+    }
+
     return 0;
 }