src/third_party/chromite/contrib/cros_tree_map

   1 #!/usr/bin/python
   2
   3 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
   4 # Use of this source code is governed by a BSD-style license that can be
   5 # found in the LICENSE file.
   6
   7 # TODO(petkov): Integrate this utility into the build system in a more
   8 # consistent way -- e.g., create an ebuild that pulls the utility from a
   9 # mirrored upstream repo with a patch or upstream the patch.
  10
  11 import optparse
  12 import os
  13 import re
  14 import sys
  15 import json
  16
  17 def format_bytes(bytes):
  18     """Pretty-print a number of bytes."""
  19     if bytes > 1e6:
  20         bytes = bytes / 1.0e6
  21         return '%.1fm' % bytes
  22     if bytes > 1e3:
  23         bytes = bytes / 1.0e3
  24         return '%.1fk' % bytes
  25     return str(bytes)
  26
  27
  28 def symbol_type_to_human(type):
  29     """Convert a symbol type as printed by nm into a human-readable name."""
  30     return {
  31         'b': 'bss',
  32         'd': 'data',
  33         'r': 'read-only data',
  34         't': 'code',
  35         'w': 'weak symbol',
  36         'v': 'weak symbol'
  37         }[type]
  38
  39
  40 def parse_du(input):
  41     """Parse du output.
  42
  43     Argument: an iterable over lines of 'du -B 1' output.'
  44
  45     Yields: (size, path)
  46     """
  47
  48     # Match lines with |size| |path|
  49     line_re = re.compile(r'^([0-9]+)\s+(.*)$')
  50     for line in input:
  51         line = line.rstrip()
  52         match = line_re.match(line)
  53         if match:
  54             size, path = match.groups()[0:2]
  55             size = int(size)
  56             yield size, path
  57
  58
  59 def parse_nm(input):
  60     """Parse nm output.
  61
  62     Argument: an iterable over lines of nm output.
  63
  64     Yields: (symbol name, symbol type, symbol size, source file path).
  65     Path may be None if nm couldn't figure out the source file.
  66     """
  67
  68     # Match lines with size + symbol + optional filename.
  69     sym_re = re.compile(r'^[0-9a-f]+ ([0-9a-f]+) (.) ([^\t]+)(?:\t(.*):\d+)?$')
  70
  71     # Match lines with addr but no size.
  72     addr_re = re.compile(r'^[0-9a-f]+ (.) ([^\t]+)(?:\t.*)?$')
  73     # Match lines that don't have an address at all -- typically external symbols.
  74     noaddr_re = re.compile(r'^ + (.) (.*)$')
  75
  76     for line in input:
  77         line = line.rstrip()
  78         match = sym_re.match(line)
  79         if match:
  80             size, type, sym = match.groups()[0:3]
  81             size = int(size, 16)
  82             type = type.lower()
  83             if type == 'v':
  84                 type = 'w'  # just call them all weak
  85             if type == 'b':
  86                 continue  # skip all BSS for now
  87             path = match.group(4)
  88             yield sym, type, size, path
  89             continue
  90         match = addr_re.match(line)
  91         if match:
  92             type, sym = match.groups()[0:2]
  93             # No size == we don't care.
  94             continue
  95         match = noaddr_re.match(line)
  96         if match:
  97             type, sym = match.groups()
  98             if type in ('U', 'w'):
  99                 # external or weak symbol
 100                 continue
 101
 102         print >>sys.stderr, 'unparsed:', repr(line)
 103
 104
 105 def treeify_du(dulines, strip_prefix=None):
 106     dirs = {}
 107     for size, path in dulines:
 108         if strip_prefix and path.startswith(strip_prefix):
 109             path = path[len(strip_prefix):]
 110         elif path.startswith('/'):
 111             path = path[1:]
 112         parts = path.split('/')
 113         key = parts.pop()
 114         tree = dirs
 115         for part in parts:
 116             if part not in tree:
 117                 tree[part] = {}
 118             tree = tree[part]
 119         if key not in tree:
 120             tree[key] = size
 121         else:
 122             # du reports the total for each directory (which may include files
 123             # contained in the directory itself).
 124             tree[key][None] = size
 125     return dirs
 126
 127
 128 def filter_syms(types, symbols):
 129     for sym, type, size, path in symbols:
 130         if type in types:
 131             yield sym, type, size, path
 132
 133
 134 def treeify_syms(symbols, strip_prefix=None):
 135     dirs = {}
 136     for sym, type, size, path in symbols:
 137         if path:
 138             path = os.path.normpath(path)
 139             if strip_prefix and path.startswith(strip_prefix):
 140                 path = path[len(strip_prefix):]
 141             elif path.startswith('/usr/include'):
 142                 path = path.replace('/usr/include', 'usrinclude')
 143             elif path.startswith('/'):
 144                 path = path[1:]
 145
 146         parts = None
 147         # TODO: make segmenting by namespace work.
 148         if False and '::' in sym:
 149             if sym.startswith('vtable for '):
 150                 sym = sym[len('vtable for '):]
 151                 parts = sym.split('::')
 152                 parts.append('[vtable]')
 153             else:
 154                 parts = sym.split('::')
 155             parts[0] = '::' + parts[0]
 156         elif path and '/' in path:
 157             parts = path.split('/')
 158
 159         if parts:
 160             key = parts.pop()
 161             tree = dirs
 162             try:
 163                 for part in parts:
 164                     assert part != '', path
 165                     if part not in tree:
 166                         tree[part] = {}
 167                     tree = tree[part]
 168                 tree[key] = tree.get(key, 0) + size
 169             except:
 170                 print >>sys.stderr, sym, parts, key
 171                 raise
 172         else:
 173             key = 'symbols without paths'
 174             if key not in dirs:
 175                 dirs[key] = {}
 176             tree = dirs[key]
 177             subkey = 'misc'
 178             if (sym.endswith('::__FUNCTION__') or
 179                 sym.endswith('::__PRETTY_FUNCTION__')):
 180                 subkey = '__FUNCTION__'
 181             elif sym.startswith('CSWTCH.'):
 182                 subkey = 'CSWTCH'
 183             elif '::' in sym:
 184                 subkey = sym[0:sym.find('::') + 2]
 185             else:
 186                 print >>sys.stderr, 'unbucketed (no path?):', sym, type, size, path
 187             tree[subkey] = tree.get(subkey, 0) + size
 188     return dirs
 189
 190
 191 def jsonify_tree(tree, name):
 192     children = []
 193     total = 0
 194     subtree_total = None
 195
 196     for key, val in tree.iteritems():
 197         if key is None:
 198             subtree_total = val
 199             continue
 200         if isinstance(val, dict):
 201             subtree = jsonify_tree(val, key)
 202             total += subtree['data']['$area']
 203             children.append(subtree)
 204         else:
 205             total += val
 206             children.append({
 207                     'name': key + ' ' + format_bytes(val),
 208                     'data': { '$area': val }
 209                     })
 210
 211     # Process du sub-tree totals by creating a '.' child with appropriate area.
 212     if subtree_total:
 213         dot_total = subtree_total - total
 214         if dot_total > 0:
 215             children.append({'name': '. ' + format_bytes(dot_total),
 216                              'data': { '$area': dot_total }})
 217             total = subtree_total
 218
 219     children.sort(key=lambda child: -child['data']['$area'])
 220
 221     return {
 222         'name': name + ' ' + format_bytes(total),
 223         'data': {
 224             '$area': total,
 225             },
 226         'children': children,
 227         }
 228
 229
 230 def dump_du(dufile, strip_prefix):
 231     dirs = treeify_du(parse_du(dufile), strip_prefix)
 232     print 'var kTree = ' + json.dumps(jsonify_tree(dirs, '/'), indent=2)
 233
 234
 235 def dump_nm(nmfile, strip_prefix):
 236     dirs = treeify_syms(parse_nm(nmfile), strip_prefix)
 237     print 'var kTree = ' + json.dumps(jsonify_tree(dirs, '/'), indent=2)
 238
 239
 240 def parse_objdump(input):
 241     """Parse objdump -h output."""
 242     sec_re = re.compile(r'^\d+ (\S+) +([0-9a-z]+)')
 243     sections = []
 244     debug_sections = []
 245
 246     for line in input:
 247         line = line.strip()
 248         match = sec_re.match(line)
 249         if match:
 250             name, size = match.groups()
 251             if name.startswith('.'):
 252                 name = name[1:]
 253             if name.startswith('debug_'):
 254                 name = name[len('debug_'):]
 255                 debug_sections.append((name, int(size, 16)))
 256             else:
 257                 sections.append((name, int(size, 16)))
 258             continue
 259     return sections, debug_sections
 260
 261
 262 def jsonify_sections(name, sections):
 263     children = []
 264     total = 0
 265     for section, size in sections:
 266         children.append({
 267                 'name': section + ' ' + format_bytes(size),
 268                 'data': { '$area': size }
 269                 })
 270         total += size
 271
 272     children.sort(key=lambda child: -child['data']['$area'])
 273
 274     return {
 275         'name': name + ' ' + format_bytes(total),
 276         'data': { '$area': total },
 277         'children': children
 278         }
 279
 280
 281 def dump_sections():
 282     sections, debug_sections = parse_objdump(open('objdump.out'))
 283     sections = jsonify_sections('sections', sections)
 284     debug_sections = jsonify_sections('debug', debug_sections)
 285     print 'var kTree = ' + json.dumps({
 286             'name': 'top',
 287             'data': { '$area': sections['data']['$area'] +
 288                                debug_sections['data']['$area'] },
 289             'children': [ debug_sections, sections ]})
 290
 291
 292 usage="""%prog [options] MODE
 293
 294 Modes are:
 295   du: output 'du' json suitable for a treemap
 296   syms: output symbols json suitable for a treemap
 297   dump: print symbols sorted by size (pipe to head for best output)
 298   sections: output binary sections json suitable for a treemap
 299
 300 du output passsed to --du-output should be from running a command
 301 like the following:
 302   du -B 1 /path/to/root > du.out
 303
 304 nm output passed to --nm-output should from running a command
 305 like the following (note, can take a long time -- 30 minutes):
 306   nm -C -S -l /path/to/binary > nm.out
 307
 308 objdump output passed to --objdump-output should be from a command
 309 like:
 310   objdump -h /path/to/binary > objdump.out"""
 311 parser = optparse.OptionParser(usage=usage)
 312 parser.add_option('--du-output', action='store', dest='dupath',
 313                   metavar='PATH', default='du.out',
 314                   help='path to nm output [default=nm.out]')
 315 parser.add_option('--nm-output', action='store', dest='nmpath',
 316                   metavar='PATH', default='nm.out',
 317                   help='path to nm output [default=nm.out]')
 318 parser.add_option('--objdump-output', action='store', dest='objdump',
 319                   metavar='PATH', default='objdump.out',
 320                   help='path to objdump output [default=objdump.out]')
 321 parser.add_option('--strip-prefix', metavar='PATH', action='store',
 322                   help='strip PATH prefix from paths; e.g. /path/to/src/root')
 323 parser.add_option('--filter', action='store',
 324                   help='include only symbols/files matching FILTER')
 325 opts, args = parser.parse_args()
 326
 327 if len(args) != 1:
 328     parser.print_usage()
 329     sys.exit(1)
 330
 331 mode = args[0]
 332 if mode == 'du':
 333     dufile = open(opts.dupath, 'r')
 334     dump_du(dufile, strip_prefix=opts.strip_prefix)
 335 elif mode == 'syms':
 336     nmfile = open(opts.nmpath, 'r')
 337     dump_nm(nmfile, strip_prefix=opts.strip_prefix)
 338 elif mode == 'sections':
 339     dump_sections()
 340 elif mode == 'dump':
 341     nmfile = open(opts.nmpath, 'r')
 342     syms = list(parse_nm(nmfile))
 343     # a list of (sym, type, size, path); sort by size.
 344     syms.sort(key=lambda x: -x[2])
 345     total = 0
 346     for sym, type, size, path in syms:
 347         if type in ('b', 'w'):
 348             continue  # skip bss and weak symbols
 349         if path is None:
 350             path = ''
 351         if opts.filter and not (opts.filter in sym or opts.filter in path):
 352             continue
 353         print '%6s %s (%s) %s' % (format_bytes(size), sym,
 354                                   symbol_type_to_human(type), path)
 355         total += size
 356     print '%6s %s' % (format_bytes(total), 'total'),
 357 else:
 358     print 'unknown mode'
 359     parser.print_usage()