1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """This module classifies NativeHeap objects filtering their allocations.
7 The only filter currently available is 'stacktrace', which works as follows:
9 {'name': 'rule-1', 'stacktrace': 'foo' }
10 {'name': 'rule-2', 'stacktrace': ['foo', r'bar\s+baz']}
11 {'name': 'rule-3', 'source_path': 'sk.*allocator'}
12 {'name': 'rule-3', 'source_path': 'sk', 'stacktrace': 'SkAllocator'}
15 rule-1 will match any allocation that has 'foo' in one of its stack frames.
16 rule-2 will match any allocation that has a stack frame matching 'foo' AND a
17 followed by a stack frame matching 'bar baz'. Note that order matters, so rule-2
18 will not match a stacktrace like ['bar baz', 'foo'].
19 rule-3 will match any allocation in which at least one of the source paths in
20 its stack frames matches the regex sk.*allocator.
21 rule-4 will match any allocation which satisfies both the conditions.
23 TODO(primiano): introduce more filters after the first prototype with UI, for
24 instance, filter by library file name or by allocation size.
31 from memory_inspector.classification import results
32 from memory_inspector.classification import rules
33 from memory_inspector.core import exceptions
34 from memory_inspector.core import native_heap
37 _RESULT_KEYS = ['bytes_allocated']
40 def LoadRules(content):
41 """Loads and parses a native-heap rule tree from a content (string).
44 An instance of |rules.Rule|, which nodes are |_NHeapRule| instances.
46 return rules.Load(content, _NHeapRule)
49 def Classify(nativeheap, rule_tree):
50 """Creates aggregated results of native heaps using the provided rules.
53 nativeheap: the heap dump being processed (a |NativeHeap| instance).
54 rule_tree: the user-defined rules that define the filtering categories.
57 An instance of |AggreatedResults|.
59 assert(isinstance(nativeheap, native_heap.NativeHeap))
60 assert(isinstance(rule_tree, rules.Rule))
62 res = results.AggreatedResults(rule_tree, _RESULT_KEYS)
63 for allocation in nativeheap.allocations:
64 res.AddToMatchingNodes(allocation, [allocation.total_size])
68 def InferHeuristicRulesFromHeap(nheap, max_depth=3, threshold=0.02):
69 """Infers the rules tree from a symbolized heap snapshot.
71 In lack of a specific set of rules, this method can be invoked to infer a
72 meaningful rule tree starting from a heap snapshot. It will build a compact
73 radix tree from the source paths of the stack traces, which height is at most
74 |max_depth|, selecting only those nodes which contribute for at least
75 |threshold| (1.0 = 100%) w.r.t. the total allocation of the heap snapshot.
77 assert(isinstance(nheap, native_heap.NativeHeap))
79 def RadixTreeInsert(node, path):
80 """Inserts a string (path) into a radix tree (a python recursive dict).
82 e.g.: [/a/b/c, /a/b/d, /z/h] -> {'/a/b/': {'c': {}, 'd': {}}, '/z/h': {}}
85 def GetCommonPrefix(args):
86 """Returns the common prefix between two paths (no partial paths).
88 e.g.: /tmp/bar, /tmp/baz will return /tmp/ (and not /tmp/ba as the dumb
89 posixpath.commonprefix implementation would do)
91 parts = posixpath.commonprefix(args).rpartition(posixpath.sep)[0]
92 return parts + posixpath.sep if parts else ''
94 for node_path in node.iterkeys():
95 pfx = GetCommonPrefix([node_path, path])
98 if len(pfx) < len(node_path):
99 node[pfx] = {node_path[len(pfx):] : node[node_path]}
101 if len(path) > len(pfx):
102 RadixTreeInsert(node[pfx], path[len(pfx):])
104 node[path] = {} # No common prefix, create new child in current node.
106 # Given an allocation of size N and its stack trace, heuristically determines
107 # the source directory to be blamed for the N bytes.
108 # The blamed_dir is the one which appears more times in the top 8 stack frames
109 # (excluding the first 2, which usually are just the (m|c)alloc call sites).
110 # At the end, this will generate a *leaderboard* (|blamed_dirs|) which
111 # associates, to each source path directory, the number of bytes allocated.
113 blamed_dirs = collections.Counter() # '/s/path' : bytes_from_this_path (int)
115 for alloc in nheap.allocations:
116 dir_histogram = collections.Counter()
117 for frame in alloc.stack_trace.frames[2:10]:
118 # Compute a histogram (for each allocation) of the top source dirs.
119 if not frame.symbol or not frame.symbol.source_info:
121 src_file = frame.symbol.source_info[0].source_file_path
122 src_dir = posixpath.dirname(src_file.replace('\\', '/')) + '/'
123 dir_histogram.update([src_dir])
124 if not dir_histogram:
126 # Add the blamed dir to the leaderboard.
127 blamed_dir = dir_histogram.most_common()[0][0]
128 blamed_dirs.update({blamed_dir : alloc.total_size})
129 total_allocated += alloc.total_size
131 # Select only the top paths from the leaderboard which contribute for more
132 # than |threshold| and make a radix tree out of them.
134 for blamed_dir, alloc_size in blamed_dirs.most_common():
135 if (1.0 * alloc_size / total_allocated) < threshold:
137 RadixTreeInsert(radix_tree, blamed_dir)
139 # The final step consists in generating a rule tree from the radix tree. This
140 # is a pretty straightforward tree-clone operation, they have the same shape.
141 def GenRulesFromRadixTree(radix_tree_node, max_depth, parent_path=''):
144 for node_path, node_children in radix_tree_node.iteritems():
146 'name': node_path[-16:],
147 'source_path': '^' + re.escape(parent_path + node_path),
148 'children': GenRulesFromRadixTree(
149 node_children, max_depth - 1, parent_path + node_path)}
150 children += [child_rule]
153 rules_tree = GenRulesFromRadixTree(radix_tree, max_depth)
154 return LoadRules(str(rules_tree))
157 class _NHeapRule(rules.Rule):
158 def __init__(self, name, filters):
159 super(_NHeapRule, self).__init__(name)
161 # The 'stacktrace' filter can be either a string (simple case, one regex) or
162 # a list of strings (complex case, see doc in the header of this file).
163 stacktrace_regexs = filters.get('stacktrace', [])
164 if isinstance(stacktrace_regexs, basestring):
165 stacktrace_regexs = [stacktrace_regexs]
166 self._stacktrace_regexs = []
167 for regex in stacktrace_regexs:
169 self._stacktrace_regexs.append(re.compile(regex))
170 except re.error, descr:
171 raise exceptions.MemoryInspectorException(
172 'Stacktrace regex error "%s" : %s' % (regex, descr))
174 # The 'source_path' regex, instead, simply matches the source file path.
175 self._path_regex = None
176 path_regex = filters.get('source_path')
179 self._path_regex = re.compile(path_regex)
180 except re.error, descr:
181 raise exceptions.MemoryInspectorException(
182 'Path regex error "%s" : %s' % (path_regex, descr))
184 def Match(self, allocation):
185 # Match the source file path, if the 'source_path' filter is specified.
188 for frame in allocation.stack_trace.frames:
189 if frame.symbol and frame.symbol.source_info:
190 if self._path_regex.search(
191 frame.symbol.source_info[0].source_file_path):
197 # Match the stack traces symbols, if the 'stacktrace' filter is specified.
198 if not self._stacktrace_regexs:
201 cur_regex = self._stacktrace_regexs[0]
202 for frame in allocation.stack_trace.frames:
203 if frame.symbol and cur_regex.search(frame.symbol.name):
204 # The current regex has been matched.
205 if cur_regex_idx == len(self._stacktrace_regexs) - 1:
206 return True # All the provided regexs have been matched, we're happy.
208 cur_regex = self._stacktrace_regexs[cur_regex_idx]
210 return False # Not all the provided regexs have been matched.