1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
12 from lib.bucket import BUCKET_ID
13 from lib.exceptions import EmptyDumpException, InvalidDumpException
14 from lib.exceptions import ObsoleteDumpVersionException, ParsingException
15 from lib.pageframe import PageFrame
16 from lib.range_dict import ExclusiveRangeDict
17 from lib.symbol import procfs
20 LOGGER = logging.getLogger('dmprof')
23 # Heap Profile Dump versions
25 # DUMP_DEEP_[1-4] are obsolete.
26 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
27 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
28 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
29 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
30 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
31 DUMP_DEEP_1 = 'DUMP_DEEP_1'
32 DUMP_DEEP_2 = 'DUMP_DEEP_2'
33 DUMP_DEEP_3 = 'DUMP_DEEP_3'
34 DUMP_DEEP_4 = 'DUMP_DEEP_4'
36 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
38 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
39 # malloc and mmap are identified in bucket files.
40 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
41 DUMP_DEEP_5 = 'DUMP_DEEP_5'
43 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
44 DUMP_DEEP_6 = 'DUMP_DEEP_6'
48 """Represents a heap profile dump."""
50 _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
52 _HOOK_PATTERN = re.compile(
53 r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
54 r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
56 _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
57 '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
58 _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
59 '(?P<RESERVED>[0-9]+)')
61 _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
62 _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
64 _TIME_PATTERN_FORMAT = re.compile(
65 r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
66 _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
68 def __init__(self, path, modified_time):
70 matched = self._PATH_PATTERN.match(path)
71 self._pid = int(matched.group(2))
72 self._count = int(matched.group(3))
73 self._time = modified_time
75 self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
76 self._stacktrace_lines = []
77 self._global_stats = {} # used only in apply_policy
81 self._pageframe_length = 0
82 self._pageframe_encoding = ''
83 self._has_pagecount = False
102 for region in sorted(self._map.iteritems()):
103 yield region[0], region[1]
105 def iter_procmaps(self):
106 for begin, end, attr in self._map.iter_range():
107 yield begin, end, attr
110 def iter_stacktrace(self):
111 for line in self._stacktrace_lines:
114 def global_stat(self, name):
115 return self._global_stats[name]
123 return self._pagesize
126 def pageframe_length(self):
127 return self._pageframe_length
130 def pageframe_encoding(self):
131 return self._pageframe_encoding
134 def has_pagecount(self):
135 return self._has_pagecount
138 def load(path, log_header='Loading a heap profile dump: '):
139 """Loads a heap profile dump.
142 path: A file path string to load.
143 log_header: A preceding string for log messages.
146 A loaded Dump object.
149 ParsingException for invalid heap profile dumps.
151 dump = Dump(path, os.stat(path).st_mtime)
152 with open(path, 'r') as f:
153 dump.load_file(f, log_header)
156 def load_file(self, f, log_header):
157 self._lines = [line for line in f
158 if line and not line.startswith('#')]
161 self._version, ln = self._parse_version()
162 self._parse_meta_information()
163 if self._version == DUMP_DEEP_6:
164 self._parse_mmap_list()
165 self._parse_global_stats()
166 self._extract_stacktrace_lines(ln)
167 except EmptyDumpException:
168 LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
169 except ParsingException, e:
170 LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
173 LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
175 def _parse_version(self):
176 """Parses a version string in self._lines.
179 A pair of (a string representing a version of the stacktrace dump,
180 and an integer indicating a line number next to the version string).
183 ParsingException for invalid dump versions.
187 # Skip until an identifiable line.
188 headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
190 raise EmptyDumpException('Empty heap dump file.')
191 (ln, found) = skip_while(
193 lambda n: not self._lines[n].startswith(headers))
195 raise InvalidDumpException('No version header.')
197 # Identify a version.
198 if self._lines[ln].startswith('heap profile: '):
199 version = self._lines[ln][13:].strip()
200 if version in (DUMP_DEEP_5, DUMP_DEEP_6):
201 (ln, _) = skip_while(
202 ln, len(self._lines),
203 lambda n: self._lines[n] != 'STACKTRACES:\n')
204 elif version in DUMP_DEEP_OBSOLETE:
205 raise ObsoleteDumpVersionException(version)
207 raise InvalidDumpException('Invalid version: %s' % version)
208 elif self._lines[ln] == 'STACKTRACES:\n':
209 raise ObsoleteDumpVersionException(DUMP_DEEP_1)
210 elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
211 raise ObsoleteDumpVersionException(DUMP_DEEP_2)
215 def _parse_global_stats(self):
216 """Parses lines in self._lines as global stats."""
217 (ln, _) = skip_while(
219 lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
221 global_stat_names = [
222 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
223 'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
224 'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
225 'nonprofiled-stack', 'nonprofiled-other',
226 'profiled-mmap', 'profiled-malloc']
228 for prefix in global_stat_names:
229 (ln, _) = skip_while(
230 ln, len(self._lines),
231 lambda n: self._lines[n].split()[0] != prefix)
232 words = self._lines[ln].split()
233 self._global_stats[prefix + '_virtual'] = int(words[-2])
234 self._global_stats[prefix + '_committed'] = int(words[-1])
236 def _parse_meta_information(self):
237 """Parses lines in self._lines for meta information."""
238 (ln, found) = skip_while(
240 lambda n: self._lines[n] != 'META:\n')
246 if self._lines[ln].startswith('Time:'):
247 matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
248 matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
250 self._time = time.mktime(datetime.datetime.strptime(
251 matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
252 if matched_format.group(2):
253 self._time += float(matched_format.group(2)[1:]) / 1000.0
254 elif matched_seconds:
255 self._time = float(matched_seconds.group(1))
256 elif self._lines[ln].startswith('Reason:'):
257 pass # Nothing to do for 'Reason:'
258 elif self._lines[ln].startswith('PageSize: '):
259 self._pagesize = int(self._lines[ln][10:])
260 elif self._lines[ln].startswith('CommandLine:'):
262 elif (self._lines[ln].startswith('PageFrame: ') or
263 self._lines[ln].startswith('PFN: ')):
264 if self._lines[ln].startswith('PageFrame: '):
265 words = self._lines[ln][11:].split(',')
267 words = self._lines[ln][5:].split(',')
270 self._pageframe_length = 24
271 elif word == 'Base64':
272 self._pageframe_encoding = 'base64'
273 elif word == 'PageCount':
274 self._has_pagecount = True
275 elif self._lines[ln].startswith('RunID: '):
276 self._run_id = self._lines[ln][7:].strip()
277 elif (self._lines[ln].startswith('MMAP_LIST:') or
278 self._lines[ln].startswith('GLOBAL_STATS:')):
279 # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
285 def _parse_mmap_list(self):
286 """Parses lines in self._lines as a mmap list."""
287 (ln, found) = skip_while(
289 lambda n: self._lines[n] != 'MMAP_LIST:\n')
298 entry = procfs.ProcMaps.parse_line(self._lines[ln])
301 for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
302 for key, value in entry.as_dict().iteritems():
304 current_vma[key] = value
308 if self._lines[ln].startswith(' PF: '):
309 for pageframe in self._lines[ln][5:].split():
310 pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
314 matched = self._HOOK_PATTERN.match(self._lines[ln])
317 # 2: starting address
319 # 7: hooked or unhooked
320 # 8: additional information
321 if matched.group(7) == 'hooked':
322 submatched = self._HOOKED_PATTERN.match(matched.group(8))
324 submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
325 elif matched.group(7) == 'unhooked':
326 submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
328 submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
330 assert matched.group(7) in ['hooked', 'unhooked']
332 submatched_dict = submatched.groupdict()
333 region_info = { 'vma': current_vma }
334 if submatched_dict.get('TYPE'):
335 region_info['type'] = submatched_dict['TYPE'].strip()
336 if submatched_dict.get('COMMITTED'):
337 region_info['committed'] = int(submatched_dict['COMMITTED'])
338 if submatched_dict.get('RESERVED'):
339 region_info['reserved'] = int(submatched_dict['RESERVED'])
340 if submatched_dict.get('BUCKETID'):
341 region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
343 if matched.group(1) == '(':
344 start = current_vma['begin']
346 start = int(matched.group(2), 16)
347 if matched.group(4) == '(':
348 end = current_vma['end']
350 end = int(matched.group(5), 16)
352 if pageframe_list and pageframe_list[0].start_truncated:
353 pageframe_list[0].set_size(
354 pageframe_list[0].size - start % self._pagesize)
355 if pageframe_list and pageframe_list[-1].end_truncated:
356 pageframe_list[-1].set_size(
357 pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
358 region_info['pageframe'] = pageframe_list
361 self._map[(start, end)] = (matched.group(7), region_info)
364 def _extract_stacktrace_lines(self, line_number):
365 """Extracts the position of stacktrace lines.
367 Valid stacktrace lines are stored into self._stacktrace_lines.
370 line_number: A line number to start parsing in lines.
373 ParsingException for invalid dump versions.
375 if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
376 (line_number, _) = skip_while(
377 line_number, len(self._lines),
378 lambda n: not self._lines[n].split()[0].isdigit())
379 stacktrace_start = line_number
380 (line_number, _) = skip_while(
381 line_number, len(self._lines),
382 lambda n: self._check_stacktrace_line(self._lines[n]))
383 self._stacktrace_lines = self._lines[stacktrace_start:line_number]
385 elif self._version in DUMP_DEEP_OBSOLETE:
386 raise ObsoleteDumpVersionException(self._version)
389 raise InvalidDumpException('Invalid version: %s' % self._version)
392 def _check_stacktrace_line(stacktrace_line):
393 """Checks if a given stacktrace_line is valid as stacktrace.
396 stacktrace_line: A string to be checked.
399 True if the given stacktrace_line is valid.
401 words = stacktrace_line.split()
402 if len(words) < BUCKET_ID + 1:
404 if words[BUCKET_ID - 1] != '@':
409 class DumpList(object):
410 """Represents a sequence of heap profile dumps.
412 Individual dumps are loaded into memory lazily as the sequence is accessed,
413 either while being iterated through or randomly accessed. Loaded dumps are
414 not cached, meaning a newly loaded Dump object is returned every time an
415 element in the list is accessed.
418 def __init__(self, dump_path_list):
419 self._dump_path_list = dump_path_list
423 return DumpList(path_list)
426 return len(self._dump_path_list)
429 for dump in self._dump_path_list:
430 yield Dump.load(dump)
432 def __getitem__(self, index):
433 return Dump.load(self._dump_path_list[index])
436 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
437 """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
438 _DUMMY_ENTRY = procfs.ProcMapsEntry(
453 super(ProcMapsEntryAttribute, self).__init__()
454 self._entry = self._DUMMY_ENTRY.as_dict()
457 return str(self._entry)
460 return 'ProcMapsEntryAttribute' + str(self._entry)
462 def __getitem__(self, key):
463 return self._entry[key]
465 def __setitem__(self, key, value):
466 if key not in self._entry:
468 self._entry[key] = value
471 new_entry = ProcMapsEntryAttribute()
472 for key, value in self._entry.iteritems():
473 new_entry[key] = copy.deepcopy(value)
477 def skip_while(index, max_index, skipping_condition):
478 """Increments |index| until |skipping_condition|(|index|) is False.
481 A pair of an integer indicating a line number after skipped, and a
482 boolean value which is True if found a line which skipping_condition
485 while skipping_condition(index):
487 if index >= max_index: