src/tools/deep_memory_profiler/lib/dump.py

   1 # Copyright 2013 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import copy
   6 import datetime
   7 import logging
   8 import os
   9 import re
  10 import time
  11
  12 from lib.bucket import BUCKET_ID
  13 from lib.exceptions import EmptyDumpException, InvalidDumpException
  14 from lib.exceptions import ObsoleteDumpVersionException, ParsingException
  15 from lib.pageframe import PageFrame
  16 from lib.range_dict import ExclusiveRangeDict
  17 from lib.symbol import procfs
  18
  19
  20 LOGGER = logging.getLogger('dmprof')
  21
  22
  23 # Heap Profile Dump versions
  24
  25 # DUMP_DEEP_[1-4] are obsolete.
  26 # DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
  27 # DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
  28 # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
  29 # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
  30 # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
  31 DUMP_DEEP_1 = 'DUMP_DEEP_1'
  32 DUMP_DEEP_2 = 'DUMP_DEEP_2'
  33 DUMP_DEEP_3 = 'DUMP_DEEP_3'
  34 DUMP_DEEP_4 = 'DUMP_DEEP_4'
  35
  36 DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
  37
  38 # DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
  39 # malloc and mmap are identified in bucket files.
  40 # DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
  41 DUMP_DEEP_5 = 'DUMP_DEEP_5'
  42
  43 # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
  44 DUMP_DEEP_6 = 'DUMP_DEEP_6'
  45
  46
  47 class Dump(object):
  48   """Represents a heap profile dump."""
  49
  50   _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
  51
  52   _HOOK_PATTERN = re.compile(
  53       r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
  54       r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
  55
  56   _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
  57                                '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
  58   _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
  59                                  '(?P<RESERVED>[0-9]+)')
  60
  61   _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
  62   _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
  63
  64   _TIME_PATTERN_FORMAT = re.compile(
  65       r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
  66   _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
  67
  68   def __init__(self, path, modified_time):
  69     self._path = path
  70     matched = self._PATH_PATTERN.match(path)
  71     self._pid = int(matched.group(2))
  72     self._count = int(matched.group(3))
  73     self._time = modified_time
  74     self._map = {}
  75     self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
  76     self._stacktrace_lines = []
  77     self._global_stats = {} # used only in apply_policy
  78
  79     self._run_id = ''
  80     self._pagesize = 4096
  81     self._pageframe_length = 0
  82     self._pageframe_encoding = ''
  83     self._has_pagecount = False
  84
  85     self._version = ''
  86     self._lines = []
  87
  88   @property
  89   def path(self):
  90     return self._path
  91
  92   @property
  93   def count(self):
  94     return self._count
  95
  96   @property
  97   def time(self):
  98     return self._time
  99
 100   @property
 101   def iter_map(self):
 102     for region in sorted(self._map.iteritems()):
 103       yield region[0], region[1]
 104
 105   def iter_procmaps(self):
 106     for begin, end, attr in self._map.iter_range():
 107       yield begin, end, attr
 108
 109   @property
 110   def iter_stacktrace(self):
 111     for line in self._stacktrace_lines:
 112       yield line
 113
 114   def global_stat(self, name):
 115     return self._global_stats[name]
 116
 117   @property
 118   def run_id(self):
 119     return self._run_id
 120
 121   @property
 122   def pagesize(self):
 123     return self._pagesize
 124
 125   @property
 126   def pageframe_length(self):
 127     return self._pageframe_length
 128
 129   @property
 130   def pageframe_encoding(self):
 131     return self._pageframe_encoding
 132
 133   @property
 134   def has_pagecount(self):
 135     return self._has_pagecount
 136
 137   @staticmethod
 138   def load(path, log_header='Loading a heap profile dump: '):
 139     """Loads a heap profile dump.
 140
 141     Args:
 142         path: A file path string to load.
 143         log_header: A preceding string for log messages.
 144
 145     Returns:
 146         A loaded Dump object.
 147
 148     Raises:
 149         ParsingException for invalid heap profile dumps.
 150     """
 151     dump = Dump(path, os.stat(path).st_mtime)
 152     with open(path, 'r') as f:
 153       dump.load_file(f, log_header)
 154     return dump
 155
 156   def load_file(self, f, log_header):
 157     self._lines = [line for line in f
 158                    if line and not line.startswith('#')]
 159
 160     try:
 161       self._version, ln = self._parse_version()
 162       self._parse_meta_information()
 163       if self._version == DUMP_DEEP_6:
 164         self._parse_mmap_list()
 165       self._parse_global_stats()
 166       self._extract_stacktrace_lines(ln)
 167     except EmptyDumpException:
 168       LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
 169     except ParsingException, e:
 170       LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
 171       raise
 172     else:
 173       LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
 174
 175   def _parse_version(self):
 176     """Parses a version string in self._lines.
 177
 178     Returns:
 179         A pair of (a string representing a version of the stacktrace dump,
 180         and an integer indicating a line number next to the version string).
 181
 182     Raises:
 183         ParsingException for invalid dump versions.
 184     """
 185     version = ''
 186
 187     # Skip until an identifiable line.
 188     headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
 189     if not self._lines:
 190       raise EmptyDumpException('Empty heap dump file.')
 191     (ln, found) = skip_while(
 192         0, len(self._lines),
 193         lambda n: not self._lines[n].startswith(headers))
 194     if not found:
 195       raise InvalidDumpException('No version header.')
 196
 197     # Identify a version.
 198     if self._lines[ln].startswith('heap profile: '):
 199       version = self._lines[ln][13:].strip()
 200       if version in (DUMP_DEEP_5, DUMP_DEEP_6):
 201         (ln, _) = skip_while(
 202             ln, len(self._lines),
 203             lambda n: self._lines[n] != 'STACKTRACES:\n')
 204       elif version in DUMP_DEEP_OBSOLETE:
 205         raise ObsoleteDumpVersionException(version)
 206       else:
 207         raise InvalidDumpException('Invalid version: %s' % version)
 208     elif self._lines[ln] == 'STACKTRACES:\n':
 209       raise ObsoleteDumpVersionException(DUMP_DEEP_1)
 210     elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
 211       raise ObsoleteDumpVersionException(DUMP_DEEP_2)
 212
 213     return (version, ln)
 214
 215   def _parse_global_stats(self):
 216     """Parses lines in self._lines as global stats."""
 217     (ln, _) = skip_while(
 218         0, len(self._lines),
 219         lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
 220
 221     global_stat_names = [
 222         'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
 223         'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
 224         'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
 225         'nonprofiled-stack', 'nonprofiled-other',
 226         'profiled-mmap', 'profiled-malloc']
 227
 228     for prefix in global_stat_names:
 229       (ln, _) = skip_while(
 230           ln, len(self._lines),
 231           lambda n: self._lines[n].split()[0] != prefix)
 232       words = self._lines[ln].split()
 233       self._global_stats[prefix + '_virtual'] = int(words[-2])
 234       self._global_stats[prefix + '_committed'] = int(words[-1])
 235
 236   def _parse_meta_information(self):
 237     """Parses lines in self._lines for meta information."""
 238     (ln, found) = skip_while(
 239         0, len(self._lines),
 240         lambda n: self._lines[n] != 'META:\n')
 241     if not found:
 242       return
 243     ln += 1
 244
 245     while True:
 246       if self._lines[ln].startswith('Time:'):
 247         matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
 248         matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
 249         if matched_format:
 250           self._time = time.mktime(datetime.datetime.strptime(
 251               matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
 252           if matched_format.group(2):
 253             self._time += float(matched_format.group(2)[1:]) / 1000.0
 254         elif matched_seconds:
 255           self._time = float(matched_seconds.group(1))
 256       elif self._lines[ln].startswith('Reason:'):
 257         pass  # Nothing to do for 'Reason:'
 258       elif self._lines[ln].startswith('PageSize: '):
 259         self._pagesize = int(self._lines[ln][10:])
 260       elif self._lines[ln].startswith('CommandLine:'):
 261         pass
 262       elif (self._lines[ln].startswith('PageFrame: ') or
 263             self._lines[ln].startswith('PFN: ')):
 264         if self._lines[ln].startswith('PageFrame: '):
 265           words = self._lines[ln][11:].split(',')
 266         else:
 267           words = self._lines[ln][5:].split(',')
 268         for word in words:
 269           if word == '24':
 270             self._pageframe_length = 24
 271           elif word == 'Base64':
 272             self._pageframe_encoding = 'base64'
 273           elif word == 'PageCount':
 274             self._has_pagecount = True
 275       elif self._lines[ln].startswith('RunID: '):
 276         self._run_id = self._lines[ln][7:].strip()
 277       elif (self._lines[ln].startswith('MMAP_LIST:') or
 278             self._lines[ln].startswith('GLOBAL_STATS:')):
 279         # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
 280         break
 281       else:
 282         pass
 283       ln += 1
 284
 285   def _parse_mmap_list(self):
 286     """Parses lines in self._lines as a mmap list."""
 287     (ln, found) = skip_while(
 288         0, len(self._lines),
 289         lambda n: self._lines[n] != 'MMAP_LIST:\n')
 290     if not found:
 291       return {}
 292
 293     ln += 1
 294     self._map = {}
 295     current_vma = {}
 296     pageframe_list = []
 297     while True:
 298       entry = procfs.ProcMaps.parse_line(self._lines[ln])
 299       if entry:
 300         current_vma = {}
 301         for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
 302           for key, value in entry.as_dict().iteritems():
 303             attr[key] = value
 304             current_vma[key] = value
 305         ln += 1
 306         continue
 307
 308       if self._lines[ln].startswith('  PF: '):
 309         for pageframe in self._lines[ln][5:].split():
 310           pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
 311         ln += 1
 312         continue
 313
 314       matched = self._HOOK_PATTERN.match(self._lines[ln])
 315       if not matched:
 316         break
 317       # 2: starting address
 318       # 5: end address
 319       # 7: hooked or unhooked
 320       # 8: additional information
 321       if matched.group(7) == 'hooked':
 322         submatched = self._HOOKED_PATTERN.match(matched.group(8))
 323         if not submatched:
 324           submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
 325       elif matched.group(7) == 'unhooked':
 326         submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
 327         if not submatched:
 328           submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
 329       else:
 330         assert matched.group(7) in ['hooked', 'unhooked']
 331
 332       submatched_dict = submatched.groupdict()
 333       region_info = { 'vma': current_vma }
 334       if submatched_dict.get('TYPE'):
 335         region_info['type'] = submatched_dict['TYPE'].strip()
 336       if submatched_dict.get('COMMITTED'):
 337         region_info['committed'] = int(submatched_dict['COMMITTED'])
 338       if submatched_dict.get('RESERVED'):
 339         region_info['reserved'] = int(submatched_dict['RESERVED'])
 340       if submatched_dict.get('BUCKETID'):
 341         region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
 342
 343       if matched.group(1) == '(':
 344         start = current_vma['begin']
 345       else:
 346         start = int(matched.group(2), 16)
 347       if matched.group(4) == '(':
 348         end = current_vma['end']
 349       else:
 350         end = int(matched.group(5), 16)
 351
 352       if pageframe_list and pageframe_list[0].start_truncated:
 353         pageframe_list[0].set_size(
 354             pageframe_list[0].size - start % self._pagesize)
 355       if pageframe_list and pageframe_list[-1].end_truncated:
 356         pageframe_list[-1].set_size(
 357             pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
 358       region_info['pageframe'] = pageframe_list
 359       pageframe_list = []
 360
 361       self._map[(start, end)] = (matched.group(7), region_info)
 362       ln += 1
 363
 364   def _extract_stacktrace_lines(self, line_number):
 365     """Extracts the position of stacktrace lines.
 366
 367     Valid stacktrace lines are stored into self._stacktrace_lines.
 368
 369     Args:
 370         line_number: A line number to start parsing in lines.
 371
 372     Raises:
 373         ParsingException for invalid dump versions.
 374     """
 375     if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
 376       (line_number, _) = skip_while(
 377           line_number, len(self._lines),
 378           lambda n: not self._lines[n].split()[0].isdigit())
 379       stacktrace_start = line_number
 380       (line_number, _) = skip_while(
 381           line_number, len(self._lines),
 382           lambda n: self._check_stacktrace_line(self._lines[n]))
 383       self._stacktrace_lines = self._lines[stacktrace_start:line_number]
 384
 385     elif self._version in DUMP_DEEP_OBSOLETE:
 386       raise ObsoleteDumpVersionException(self._version)
 387
 388     else:
 389       raise InvalidDumpException('Invalid version: %s' % self._version)
 390
 391   @staticmethod
 392   def _check_stacktrace_line(stacktrace_line):
 393     """Checks if a given stacktrace_line is valid as stacktrace.
 394
 395     Args:
 396         stacktrace_line: A string to be checked.
 397
 398     Returns:
 399         True if the given stacktrace_line is valid.
 400     """
 401     words = stacktrace_line.split()
 402     if len(words) < BUCKET_ID + 1:
 403       return False
 404     if words[BUCKET_ID - 1] != '@':
 405       return False
 406     return True
 407
 408
 409 class DumpList(object):
 410   """Represents a sequence of heap profile dumps.
 411
 412   Individual dumps are loaded into memory lazily as the sequence is accessed,
 413   either while being iterated through or randomly accessed.  Loaded dumps are
 414   not cached, meaning a newly loaded Dump object is returned every time an
 415   element in the list is accessed.
 416   """
 417
 418   def __init__(self, dump_path_list):
 419     self._dump_path_list = dump_path_list
 420
 421   @staticmethod
 422   def load(path_list):
 423     return DumpList(path_list)
 424
 425   def __len__(self):
 426     return len(self._dump_path_list)
 427
 428   def __iter__(self):
 429     for dump in self._dump_path_list:
 430       yield Dump.load(dump)
 431
 432   def __getitem__(self, index):
 433     return Dump.load(self._dump_path_list[index])
 434
 435
 436 class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
 437   """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
 438   _DUMMY_ENTRY = procfs.ProcMapsEntry(
 439       0,     # begin
 440       0,     # end
 441       '-',   # readable
 442       '-',   # writable
 443       '-',   # executable
 444       '-',   # private
 445       0,     # offset
 446       '00',  # major
 447       '00',  # minor
 448       0,     # inode
 449       ''     # name
 450       )
 451
 452   def __init__(self):
 453     super(ProcMapsEntryAttribute, self).__init__()
 454     self._entry = self._DUMMY_ENTRY.as_dict()
 455
 456   def __str__(self):
 457     return str(self._entry)
 458
 459   def __repr__(self):
 460     return 'ProcMapsEntryAttribute' + str(self._entry)
 461
 462   def __getitem__(self, key):
 463     return self._entry[key]
 464
 465   def __setitem__(self, key, value):
 466     if key not in self._entry:
 467       raise KeyError(key)
 468     self._entry[key] = value
 469
 470   def copy(self):
 471     new_entry = ProcMapsEntryAttribute()
 472     for key, value in self._entry.iteritems():
 473       new_entry[key] = copy.deepcopy(value)
 474     return new_entry
 475
 476
 477 def skip_while(index, max_index, skipping_condition):
 478   """Increments |index| until |skipping_condition|(|index|) is False.
 479
 480   Returns:
 481       A pair of an integer indicating a line number after skipped, and a
 482       boolean value which is True if found a line which skipping_condition
 483       is False for.
 484   """
 485   while skipping_condition(index):
 486     index += 1
 487     if index >= max_index:
 488       return index, False
 489   return index, True