1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """This module classifies NativeHeap objects filtering their allocations.
7 The only filter currently available is 'stacktrace', which works as follows:
9 {'name': 'rule-1', 'stacktrace': 'foo' }
10 {'name': 'rule-2', 'stacktrace': ['foo', r'bar\s+baz']}
11 {'name': 'rule-3', 'source_path': 'sk.*allocator'}
12 {'name': 'rule-3', 'source_path': 'sk', 'stacktrace': 'SkAllocator'}
15 rule-1 will match any allocation that has 'foo' in one of its stack frames.
16 rule-2 will match any allocation that has a stack frame matching 'foo' AND a
17 followed by a stack frame matching 'bar baz'. Note that order matters, so rule-2
18 will not match a stacktrace like ['bar baz', 'foo'].
19 rule-3 will match any allocation in which at least one of the source paths in
20 its stack frames matches the regex sk.*allocator.
21 rule-4 will match any allocation which satisfies both the conditions.
23 TODO(primiano): introduce more filters after the first prototype with UI, for
24 instance, filter by library file name or by allocation size.
31 from memory_inspector.classification import results
32 from memory_inspector.classification import rules
33 from memory_inspector.core import exceptions
34 from memory_inspector.core import native_heap
37 _RESULT_KEYS = ['bytes_allocated', 'bytes_resident']
40 def LoadRules(content):
41 """Loads and parses a native-heap rule tree from a content (string).
44 An instance of |rules.Rule|, which nodes are |_NHeapRule| instances.
46 return rules.Load(content, _NHeapRule)
49 def Classify(nativeheap, rule_tree):
50 """Creates aggregated results of native heaps using the provided rules.
53 nativeheap: the heap dump being processed (a |NativeHeap| instance).
54 rule_tree: the user-defined rules that define the filtering categories.
57 An instance of |AggreatedResults|.
59 assert(isinstance(nativeheap, native_heap.NativeHeap))
60 assert(isinstance(rule_tree, rules.Rule))
62 res = results.AggreatedResults(rule_tree, _RESULT_KEYS)
63 for allocation in nativeheap.allocations:
64 res.AddToMatchingNodes(allocation,
65 [allocation.size, allocation.resident_size])
69 def InferHeuristicRulesFromHeap(nheap, max_depth=3, threshold=0.02):
70 """Infers the rules tree from a symbolized heap snapshot.
72 In lack of a specific set of rules, this method can be invoked to infer a
73 meaningful rule tree starting from a heap snapshot. It will build a compact
74 radix tree from the source paths of the stack traces, which height is at most
75 |max_depth|, selecting only those nodes which contribute for at least
76 |threshold| (1.0 = 100%) w.r.t. the total allocation of the heap snapshot.
78 assert(isinstance(nheap, native_heap.NativeHeap))
80 def RadixTreeInsert(node, path):
81 """Inserts a string (path) into a radix tree (a python recursive dict).
83 e.g.: [/a/b/c, /a/b/d, /z/h] -> {'/a/b/': {'c': {}, 'd': {}}, '/z/h': {}}
86 def GetCommonPrefix(args):
87 """Returns the common prefix between two paths (no partial paths).
89 e.g.: /tmp/bar, /tmp/baz will return /tmp/ (and not /tmp/ba as the dumb
90 posixpath.commonprefix implementation would do)
92 parts = posixpath.commonprefix(args).rpartition(posixpath.sep)[0]
93 return parts + posixpath.sep if parts else ''
95 for node_path in node.iterkeys():
96 pfx = GetCommonPrefix([node_path, path])
99 if len(pfx) < len(node_path):
100 node[pfx] = {node_path[len(pfx):] : node[node_path]}
102 if len(path) > len(pfx):
103 RadixTreeInsert(node[pfx], path[len(pfx):])
105 node[path] = {} # No common prefix, create new child in current node.
107 # Given an allocation of size N and its stack trace, heuristically determines
108 # the source directory to be blamed for the N bytes.
109 # The blamed_dir is the one which appears more times in the top 8 stack frames
110 # (excluding the first 2, which usually are just the (m|c)alloc call sites).
111 # At the end, this will generate a *leaderboard* (|blamed_dirs|) which
112 # associates, to each source path directory, the number of bytes allocated.
114 blamed_dirs = collections.Counter() # '/s/path' : bytes_from_this_path (int)
116 for alloc in nheap.allocations:
117 dir_histogram = collections.Counter()
118 for frame in alloc.stack_trace.frames[2:10]:
119 # Compute a histogram (for each allocation) of the top source dirs.
120 if not frame.symbol or not frame.symbol.source_info:
122 src_file = frame.symbol.source_info[0].source_file_path
123 src_dir = posixpath.dirname(src_file.replace('\\', '/')) + '/'
124 dir_histogram.update([src_dir])
125 if not dir_histogram:
127 # Add the blamed dir to the leaderboard.
128 blamed_dir = dir_histogram.most_common()[0][0]
129 blamed_dirs.update({blamed_dir : alloc.size})
130 total_allocated += alloc.size
132 # Select only the top paths from the leaderboard which contribute for more
133 # than |threshold| and make a radix tree out of them.
135 for blamed_dir, alloc_size in blamed_dirs.most_common():
136 if (1.0 * alloc_size / total_allocated) < threshold:
138 RadixTreeInsert(radix_tree, blamed_dir)
140 # The final step consists in generating a rule tree from the radix tree. This
141 # is a pretty straightforward tree-clone operation, they have the same shape.
142 def GenRulesFromRadixTree(radix_tree_node, max_depth, parent_path=''):
145 for node_path, node_children in radix_tree_node.iteritems():
147 'name': node_path[-16:],
148 'source_path': '^' + re.escape(parent_path + node_path),
149 'children': GenRulesFromRadixTree(
150 node_children, max_depth - 1, parent_path + node_path)}
151 children += [child_rule]
154 rules_tree = GenRulesFromRadixTree(radix_tree, max_depth)
155 return LoadRules(str(rules_tree))
158 class _NHeapRule(rules.Rule):
159 def __init__(self, name, filters):
160 super(_NHeapRule, self).__init__(name)
162 # The 'stacktrace' filter can be either a string (simple case, one regex) or
163 # a list of strings (complex case, see doc in the header of this file).
164 stacktrace_regexs = filters.get('stacktrace', [])
165 if isinstance(stacktrace_regexs, basestring):
166 stacktrace_regexs = [stacktrace_regexs]
167 self._stacktrace_regexs = []
168 for regex in stacktrace_regexs:
170 self._stacktrace_regexs.append(re.compile(regex))
171 except re.error, descr:
172 raise exceptions.MemoryInspectorException(
173 'Stacktrace regex error "%s" : %s' % (regex, descr))
175 # The 'source_path' regex, instead, simply matches the source file path.
176 self._path_regex = None
177 path_regex = filters.get('source_path')
180 self._path_regex = re.compile(path_regex)
181 except re.error, descr:
182 raise exceptions.MemoryInspectorException(
183 'Path regex error "%s" : %s' % (path_regex, descr))
185 def Match(self, allocation):
186 # Match the source file path, if the 'source_path' filter is specified.
189 for frame in allocation.stack_trace.frames:
190 if frame.symbol and frame.symbol.source_info:
191 if self._path_regex.search(
192 frame.symbol.source_info[0].source_file_path):
198 # Match the stack traces symbols, if the 'stacktrace' filter is specified.
199 if not self._stacktrace_regexs:
202 cur_regex = self._stacktrace_regexs[0]
203 for frame in allocation.stack_trace.frames:
204 if frame.symbol and cur_regex.search(frame.symbol.name):
205 # The current regex has been matched.
206 if cur_regex_idx == len(self._stacktrace_regexs) - 1:
207 return True # All the provided regexs have been matched, we're happy.
209 cur_regex = self._stacktrace_regexs[cur_regex_idx]
211 return False # Not all the provided regexs have been matched.