Optimize: Reduce memory cost for tracking active allocations.

author Milian Wolff <mail@milianw.de>

Mon, 7 Dec 2015 22:10:58 +0000 (23:10 +0100)

committer Milian Wolff <mail@milianw.de>

Mon, 7 Dec 2015 22:12:30 +0000 (23:12 +0100)
author Milian Wolff <mail@milianw.de>
Mon, 7 Dec 2015 22:10:58 +0000 (23:10 +0100)
committer Milian Wolff <mail@milianw.de>
Mon, 7 Dec 2015 22:12:30 +0000 (23:12 +0100)
diff --git a/accumulatedtracedata.cpp b/accumulatedtracedata.cpp

index 3440409..c734d93 100644 (file)
--- a/accumulatedtracedata.cpp
+++ b/accumulatedtracedata.cpp
@@ -55,7 +55,7 @@ AccumulatedTraceData::AccumulatedTraceData()
      traces.reserve(65536);
      strings.reserve(4096);
      allocations.reserve(16384);
-    activeAllocations.reserve(65536);
+    activeSmallAllocations.reserve(65536);
      stopIndices.reserve(4);
      opNewIpIndices.reserve(16);
  }
@@ -214,7 +214,13 @@ bool AccumulatedTraceData::read(istream& in)
                  continue;
              }
  
-            activeAllocations[ptr] = {traceId, size};
+            if (size <= std::numeric_limits<uint32_t>::max()) {
+                // save memory by storing this allocation in the list of small allocations
+                activeSmallAllocations[ptr] = {traceId, static_cast<uint32_t>(size)};
+            } else {
+                // these rare allocations consume more memory to track, but that's fine
+                activeBigAllocations[ptr] = {traceId, size};
+            }
  
              auto& allocation = findAllocation(traceId);
              allocation.leaked += size;
@@ -239,15 +245,11 @@ bool AccumulatedTraceData::read(istream& in)
                  cerr << "failed to parse line: " << reader.line() << endl;
                  continue;
              }
-            auto ip = activeAllocations.find(ptr);
-            if (ip == activeAllocations.end()) {
-                if (!fromAttached) {
-                    cerr << "unknown pointer in line: " << reader.line() << endl;
-                }
+            const auto info = takeActiveAllocation(ptr);
+            if (!info.traceIndex) {
+                // happens when we attached to a running application
                  continue;
              }
-            const auto info = ip->second;
-            activeAllocations.erase(ip);
  
              auto& allocation = findAllocation(info.traceIndex);
              if (!allocation.allocations || allocation.leaked < info.size) {
@@ -283,7 +285,8 @@ bool AccumulatedTraceData::read(istream& in)
      }
  
      /// these are leaks, but we now have the same data in \c allocations as well
-    activeAllocations.clear();
+    activeSmallAllocations.clear();
+    activeBigAllocations.clear();
  
      if (!reparsing) {
          totalTime = timeStamp + 1;
@@ -340,3 +343,22 @@ bool AccumulatedTraceData::isStopIndex(const StringIndex index) const
  {
      return find(stopIndices.begin(), stopIndices.end(), index) != stopIndices.end();
  }
+
+BigAllocationInfo AccumulatedTraceData::takeActiveAllocation(uint64_t ptr)
+{
+    auto small = activeSmallAllocations.find(ptr);
+    if (small != activeSmallAllocations.end()) {
+        auto ret = small->second;
+        activeSmallAllocations.erase(small);
+        return {ret.traceIndex, ret.size};
+    }
+    // rare
+    auto big = activeBigAllocations.find(ptr);
+    if (big != activeBigAllocations.end()) {
+        auto ret = big->second;
+        activeBigAllocations.erase(big);
+        return ret;
+    }
+    // happens esp. when we runtime-attached
+    return {};
+}
diff --git a/accumulatedtracedata.h b/accumulatedtracedata.h

index 5490a54..975d02c 100644 (file)
--- a/accumulatedtracedata.h
+++ b/accumulatedtracedata.h
@@ -125,9 +125,25 @@ struct MergedAllocation : public AllocationData
  };
  
  /**
- * Information for a single call to an allocation function
+ * Information for a single call to an allocation function for small allocations.
+ *
+ * The split between small and big allocations is done to save memory. Most of
+ * the time apps will only do small allocations, and tons of them. With this
+ * split we can reduce the memory footprint of the active allocation tracker
+ * below by a factor of 2. This is especially notable for apps that do tons
+ * of small allocations and don't free them. A notable example for such an
+ * application is heaptrack_print/heaptrack_gui itself!
+ */
+struct SmallAllocationInfo
+{
+    TraceIndex traceIndex;
+    uint32_t size;
+};
+
+/**
+ * Information for a single call to an allocation function for big allocations.
   */
-struct AllocationInfo
+struct BigAllocationInfo
  {
      TraceIndex traceIndex;
      uint64_t size;
@@ -174,9 +190,12 @@ struct AccumulatedTraceData
  
      bool isStopIndex(const StringIndex index) const;
  
+    BigAllocationInfo takeActiveAllocation(uint64_t ptr);
+
      // indices of functions that should stop the backtrace, e.g. main or static initialization
      std::vector<StringIndex> stopIndices;
-    std::unordered_map<uint64_t, AllocationInfo> activeAllocations;
+    std::unordered_map<uint32_t, SmallAllocationInfo> activeSmallAllocations;
+    std::unordered_map<uint64_t, BigAllocationInfo> activeBigAllocations;
      std::vector<InstructionPointer> instructionPointers;
      std::vector<TraceNode> traces;
      std::vector<std::string> strings;
author	Milian Wolff <mail@milianw.de>
	Mon, 7 Dec 2015 22:10:58 +0000 (23:10 +0100)
committer	Milian Wolff <mail@milianw.de>
	Mon, 7 Dec 2015 22:12:30 +0000 (23:12 +0100)
accumulatedtracedata.cpp		patch \| blob \| history
accumulatedtracedata.h		patch \| blob \| history