src/tools/deep_memory_profiler/subcommands/policies.py

   1 # Copyright 2013 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import datetime
   6 import json
   7 import logging
   8 import sys
   9
  10 from lib.bucket import BUCKET_ID, COMMITTED
  11 from lib.pageframe import PFNCounts
  12 from lib.policy import PolicySet
  13 from lib.subcommand import SubCommand
  14
  15
  16 LOGGER = logging.getLogger('dmprof')
  17
  18
  19 class PolicyCommands(SubCommand):
  20   def __init__(self, command):
  21     super(PolicyCommands, self).__init__(
  22         'Usage: %%prog %s [-p POLICY] <first-dump> [shared-first-dumps...]' %
  23         command)
  24     self._parser.add_option('-p', '--policy', type='string', dest='policy',
  25                             help='profile with POLICY', metavar='POLICY')
  26     self._parser.add_option('--alternative-dirs', dest='alternative_dirs',
  27                             metavar='/path/on/target@/path/on/host[:...]',
  28                             help='Read files in /path/on/host/ instead of '
  29                                  'files in /path/on/target/.')
  30     self._parser.add_option('--timestamp', dest='timestamp',
  31                             action='store_true', help='Use timestamp.')
  32     self._timestamp = False
  33
  34   def _set_up(self, sys_argv):
  35     options, args = self._parse_args(sys_argv, 1)
  36     dump_path = args[1]
  37     shared_first_dump_paths = args[2:]
  38     alternative_dirs_dict = {}
  39     if options.alternative_dirs:
  40       for alternative_dir_pair in options.alternative_dirs.split(':'):
  41         target_path, host_path = alternative_dir_pair.split('@', 1)
  42         alternative_dirs_dict[target_path] = host_path
  43     (bucket_set, dumps) = SubCommand.load_basic_files(
  44         dump_path, True, alternative_dirs=alternative_dirs_dict)
  45
  46     self._timestamp = options.timestamp
  47
  48     pfn_counts_dict = {}
  49     for shared_first_dump_path in shared_first_dump_paths:
  50       shared_dumps = SubCommand._find_all_dumps(shared_first_dump_path)
  51       for shared_dump in shared_dumps:
  52         pfn_counts = PFNCounts.load(shared_dump)
  53         if pfn_counts.pid not in pfn_counts_dict:
  54           pfn_counts_dict[pfn_counts.pid] = []
  55         pfn_counts_dict[pfn_counts.pid].append(pfn_counts)
  56
  57     policy_set = PolicySet.load(SubCommand._parse_policy_list(options.policy))
  58     return policy_set, dumps, pfn_counts_dict, bucket_set
  59
  60   def _apply_policy(self, dump, pfn_counts_dict, policy, bucket_set,
  61                     first_dump_time):
  62     """Aggregates the total memory size of each component.
  63
  64     Iterate through all stacktraces and attribute them to one of the components
  65     based on the policy.  It is important to apply policy in right order.
  66
  67     Args:
  68         dump: A Dump object.
  69         pfn_counts_dict: A dict mapping a pid to a list of PFNCounts.
  70         policy: A Policy object.
  71         bucket_set: A BucketSet object.
  72         first_dump_time: An integer representing time when the first dump is
  73             dumped.
  74
  75     Returns:
  76         A dict mapping components and their corresponding sizes.
  77     """
  78     LOGGER.info('  %s' % dump.path)
  79     all_pfn_dict = {}
  80     if pfn_counts_dict:
  81       LOGGER.info('    shared with...')
  82       for pid, pfnset_list in pfn_counts_dict.iteritems():
  83         closest_pfnset_index = None
  84         closest_pfnset_difference = 1024.0
  85         for index, pfnset in enumerate(pfnset_list):
  86           time_difference = pfnset.time - dump.time
  87           if time_difference >= 3.0:
  88             break
  89           elif ((time_difference < 0.0 and pfnset.reason != 'Exiting') or
  90                 (0.0 <= time_difference and time_difference < 3.0)):
  91             closest_pfnset_index = index
  92             closest_pfnset_difference = time_difference
  93           elif time_difference < 0.0 and pfnset.reason == 'Exiting':
  94             closest_pfnset_index = None
  95             break
  96         if closest_pfnset_index:
  97           for pfn, count in pfnset_list[closest_pfnset_index].iter_pfn:
  98             all_pfn_dict[pfn] = all_pfn_dict.get(pfn, 0) + count
  99           LOGGER.info('      %s (time difference = %f)' %
 100                       (pfnset_list[closest_pfnset_index].path,
 101                        closest_pfnset_difference))
 102         else:
 103           LOGGER.info('      (no match with pid:%d)' % pid)
 104
 105     sizes = dict((c, 0) for c in policy.components)
 106
 107     PolicyCommands._accumulate_malloc(dump, policy, bucket_set, sizes)
 108     verify_global_stats = PolicyCommands._accumulate_maps(
 109         dump, all_pfn_dict, policy, bucket_set, sizes)
 110
 111     # TODO(dmikurube): Remove the verifying code when GLOBAL_STATS is removed.
 112     # http://crbug.com/245603.
 113     for verify_key, verify_value in verify_global_stats.iteritems():
 114       dump_value = dump.global_stat('%s_committed' % verify_key)
 115       if dump_value != verify_value:
 116         LOGGER.warn('%25s: %12d != %d (%d)' % (
 117             verify_key, dump_value, verify_value, dump_value - verify_value))
 118
 119     sizes['mmap-no-log'] = (
 120         dump.global_stat('profiled-mmap_committed') -
 121         sizes['mmap-total-log'])
 122     sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed')
 123     sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual')
 124
 125     sizes['tc-no-log'] = (
 126         dump.global_stat('profiled-malloc_committed') -
 127         sizes['tc-total-log'])
 128     sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed')
 129     sizes['tc-unused'] = (
 130         sizes['mmap-tcmalloc'] -
 131         dump.global_stat('profiled-malloc_committed'))
 132     if sizes['tc-unused'] < 0:
 133       LOGGER.warn('    Assuming tc-unused=0 as it is negative: %d (bytes)' %
 134                   sizes['tc-unused'])
 135       sizes['tc-unused'] = 0
 136     sizes['tc-total'] = sizes['mmap-tcmalloc']
 137
 138     # TODO(dmikurube): global_stat will be deprecated.
 139     # See http://crbug.com/245603.
 140     for key, value in {
 141         'total': 'total_committed',
 142         'filemapped': 'file_committed',
 143         'absent': 'absent_committed',
 144         'file-exec': 'file-exec_committed',
 145         'file-nonexec': 'file-nonexec_committed',
 146         'anonymous': 'anonymous_committed',
 147         'stack': 'stack_committed',
 148         'other': 'other_committed',
 149         'unhooked-absent': 'nonprofiled-absent_committed',
 150         'total-vm': 'total_virtual',
 151         'filemapped-vm': 'file_virtual',
 152         'anonymous-vm': 'anonymous_virtual',
 153         'other-vm': 'other_virtual' }.iteritems():
 154       if key in sizes:
 155         sizes[key] = dump.global_stat(value)
 156
 157     if 'mustbezero' in sizes:
 158       removed_list = (
 159           'profiled-mmap_committed',
 160           'nonprofiled-absent_committed',
 161           'nonprofiled-anonymous_committed',
 162           'nonprofiled-file-exec_committed',
 163           'nonprofiled-file-nonexec_committed',
 164           'nonprofiled-stack_committed',
 165           'nonprofiled-other_committed')
 166       sizes['mustbezero'] = (
 167           dump.global_stat('total_committed') -
 168           sum(dump.global_stat(removed) for removed in removed_list))
 169     if 'total-exclude-profiler' in sizes:
 170       sizes['total-exclude-profiler'] = (
 171           dump.global_stat('total_committed') -
 172           (sizes['mmap-profiler'] + sizes['mmap-type-profiler']))
 173     if 'hour' in sizes:
 174       sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0
 175     if 'minute' in sizes:
 176       sizes['minute'] = (dump.time - first_dump_time) / 60.0
 177     if 'second' in sizes:
 178       if self._timestamp:
 179         sizes['second'] = datetime.datetime.fromtimestamp(dump.time).isoformat()
 180       else:
 181         sizes['second'] = dump.time - first_dump_time
 182
 183     return sizes
 184
 185   @staticmethod
 186   def _accumulate_malloc(dump, policy, bucket_set, sizes):
 187     for line in dump.iter_stacktrace:
 188       words = line.split()
 189       bucket = bucket_set.get(int(words[BUCKET_ID]))
 190       if not bucket or bucket.allocator_type == 'malloc':
 191         component_match = policy.find_malloc(bucket)
 192       elif bucket.allocator_type == 'mmap':
 193         continue
 194       else:
 195         assert False
 196       sizes[component_match] += int(words[COMMITTED])
 197
 198       assert not component_match.startswith('mmap-')
 199       if component_match.startswith('tc-'):
 200         sizes['tc-total-log'] += int(words[COMMITTED])
 201       else:
 202         sizes['other-total-log'] += int(words[COMMITTED])
 203
 204   @staticmethod
 205   def _accumulate_maps(dump, pfn_dict, policy, bucket_set, sizes):
 206     # TODO(dmikurube): Remove the dict when GLOBAL_STATS is removed.
 207     # http://crbug.com/245603.
 208     global_stats = {
 209         'total': 0,
 210         'file-exec': 0,
 211         'file-nonexec': 0,
 212         'anonymous': 0,
 213         'stack': 0,
 214         'other': 0,
 215         'nonprofiled-file-exec': 0,
 216         'nonprofiled-file-nonexec': 0,
 217         'nonprofiled-anonymous': 0,
 218         'nonprofiled-stack': 0,
 219         'nonprofiled-other': 0,
 220         'profiled-mmap': 0,
 221         }
 222
 223     for key, value in dump.iter_map:
 224       # TODO(dmikurube): Remove the subtotal code when GLOBAL_STATS is removed.
 225       # It's temporary verification code for transition described in
 226       # http://crbug.com/245603.
 227       committed = 0
 228       if 'committed' in value[1]:
 229         committed = value[1]['committed']
 230       global_stats['total'] += committed
 231       key = 'other'
 232       name = value[1]['vma']['name']
 233       if name.startswith('/'):
 234         if value[1]['vma']['executable'] == 'x':
 235           key = 'file-exec'
 236         else:
 237           key = 'file-nonexec'
 238       elif name == '[stack]':
 239         key = 'stack'
 240       elif name == '':
 241         key = 'anonymous'
 242       global_stats[key] += committed
 243       if value[0] == 'unhooked':
 244         global_stats['nonprofiled-' + key] += committed
 245       if value[0] == 'hooked':
 246         global_stats['profiled-mmap'] += committed
 247
 248       if value[0] == 'unhooked':
 249         if pfn_dict and dump.pageframe_length:
 250           for pageframe in value[1]['pageframe']:
 251             component_match = policy.find_unhooked(value, pageframe, pfn_dict)
 252             sizes[component_match] += pageframe.size
 253         else:
 254           component_match = policy.find_unhooked(value)
 255           sizes[component_match] += int(value[1]['committed'])
 256       elif value[0] == 'hooked':
 257         if pfn_dict and dump.pageframe_length:
 258           for pageframe in value[1]['pageframe']:
 259             component_match, _ = policy.find_mmap(
 260                 value, bucket_set, pageframe, pfn_dict)
 261             sizes[component_match] += pageframe.size
 262             assert not component_match.startswith('tc-')
 263             if component_match.startswith('mmap-'):
 264               sizes['mmap-total-log'] += pageframe.size
 265             else:
 266               sizes['other-total-log'] += pageframe.size
 267         else:
 268           component_match, _ = policy.find_mmap(value, bucket_set)
 269           sizes[component_match] += int(value[1]['committed'])
 270           if component_match.startswith('mmap-'):
 271             sizes['mmap-total-log'] += int(value[1]['committed'])
 272           else:
 273             sizes['other-total-log'] += int(value[1]['committed'])
 274       else:
 275         LOGGER.error('Unrecognized mapping status: %s' % value[0])
 276
 277     return global_stats
 278
 279
 280 class CSVCommand(PolicyCommands):
 281   def __init__(self):
 282     super(CSVCommand, self).__init__('csv')
 283
 284   def do(self, sys_argv):
 285     policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
 286     return self._output(
 287         policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
 288
 289   def _output(self, policy_set, dumps, pfn_counts_dict, bucket_set, out):
 290     max_components = 0
 291     for label in policy_set:
 292       max_components = max(max_components, len(policy_set[label].components))
 293
 294     for label in sorted(policy_set):
 295       components = policy_set[label].components
 296       if len(policy_set) > 1:
 297         out.write('%s%s\n' % (label, ',' * (max_components - 1)))
 298       out.write('%s%s\n' % (
 299           ','.join(components), ',' * (max_components - len(components))))
 300
 301       LOGGER.info('Applying a policy %s to...' % label)
 302       for index, dump in enumerate(dumps):
 303         if index == 0:
 304           first_dump_time = dump.time
 305         component_sizes = self._apply_policy(
 306             dump, pfn_counts_dict, policy_set[label], bucket_set,
 307             first_dump_time)
 308         s = []
 309         for c in components:
 310           if c in ('hour', 'minute', 'second'):
 311             if isinstance(component_sizes[c], str):
 312               s.append('%s' % component_sizes[c])
 313             else:
 314               s.append('%05.5f' % (component_sizes[c]))
 315           else:
 316             s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))
 317         out.write('%s%s\n' % (
 318               ','.join(s), ',' * (max_components - len(components))))
 319
 320       bucket_set.clear_component_cache()
 321
 322     return 0
 323
 324
 325 class JSONCommand(PolicyCommands):
 326   def __init__(self):
 327     super(JSONCommand, self).__init__('json')
 328
 329   def do(self, sys_argv):
 330     policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
 331     return self._output(
 332         policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
 333
 334   def _output(self, policy_set, dumps, pfn_counts_dict, bucket_set, out):
 335     json_base = {
 336       'version': 'JSON_DEEP_2',
 337       'policies': {},
 338     }
 339
 340     for label in sorted(policy_set):
 341       json_base['policies'][label] = {
 342         'legends': policy_set[label].components,
 343         'snapshots': [],
 344       }
 345
 346       LOGGER.info('Applying a policy %s to...' % label)
 347       for index, dump in enumerate(dumps):
 348         if index == 0:
 349           first_dump_time = dump.time
 350         component_sizes = self._apply_policy(
 351             dump, pfn_counts_dict, policy_set[label], bucket_set,
 352             first_dump_time)
 353         component_sizes['dump_path'] = dump.path
 354         component_sizes['dump_time'] = datetime.datetime.fromtimestamp(
 355             dump.time).strftime('%Y-%m-%d %H:%M:%S')
 356         json_base['policies'][label]['snapshots'].append(component_sizes)
 357
 358       bucket_set.clear_component_cache()
 359
 360     json.dump(json_base, out, indent=2, sort_keys=True)
 361
 362     return 0
 363
 364
 365 class ListCommand(PolicyCommands):
 366   def __init__(self):
 367     super(ListCommand, self).__init__('list')
 368
 369   def do(self, sys_argv):
 370     policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
 371     return self._output(
 372         policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
 373
 374   def _output(self, policy_set, dumps, pfn_counts_dict, bucket_set, out):
 375     for label in sorted(policy_set):
 376       LOGGER.info('Applying a policy %s to...' % label)
 377       for dump in dumps:
 378         component_sizes = self._apply_policy(
 379             dump, pfn_counts_dict, policy_set[label], bucket_set, dump.time)
 380         out.write('%s for %s:\n' % (label, dump.path))
 381         for c in policy_set[label].components:
 382           if c in ['hour', 'minute', 'second']:
 383             out.write('%40s %12.3f\n' % (c, component_sizes[c]))
 384           else:
 385             out.write('%40s %12d\n' % (c, component_sizes[c]))
 386
 387       bucket_set.clear_component_cache()
 388
 389     return 0