3 # Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4 # Use of this source code is governed by a BSD-style license that can be
5 # found in the LICENSE file.
7 # TODO(petkov): Integrate this utility into the build system in a more
8 # consistent way -- e.g., create an ebuild that pulls the utility from a
9 # mirrored upstream repo with a patch or upstream the patch.
17 def format_bytes(bytes):
18 """Pretty-print a number of bytes."""
21 return '%.1fm' % bytes
24 return '%.1fk' % bytes
28 def symbol_type_to_human(type):
29 """Convert a symbol type as printed by nm into a human-readable name."""
33 'r': 'read-only data',
43 Argument: an iterable over lines of 'du -B 1' output.'
48 # Match lines with |size| |path|
49 line_re = re.compile(r'^([0-9]+)\s+(.*)$')
52 match = line_re.match(line)
54 size, path = match.groups()[0:2]
62 Argument: an iterable over lines of nm output.
64 Yields: (symbol name, symbol type, symbol size, source file path).
65 Path may be None if nm couldn't figure out the source file.
68 # Match lines with size + symbol + optional filename.
69 sym_re = re.compile(r'^[0-9a-f]+ ([0-9a-f]+) (.) ([^\t]+)(?:\t(.*):\d+)?$')
71 # Match lines with addr but no size.
72 addr_re = re.compile(r'^[0-9a-f]+ (.) ([^\t]+)(?:\t.*)?$')
73 # Match lines that don't have an address at all -- typically external symbols.
74 noaddr_re = re.compile(r'^ + (.) (.*)$')
78 match = sym_re.match(line)
80 size, type, sym = match.groups()[0:3]
84 type = 'w' # just call them all weak
86 continue # skip all BSS for now
88 yield sym, type, size, path
90 match = addr_re.match(line)
92 type, sym = match.groups()[0:2]
93 # No size == we don't care.
95 match = noaddr_re.match(line)
97 type, sym = match.groups()
98 if type in ('U', 'w'):
99 # external or weak symbol
102 print >>sys.stderr, 'unparsed:', repr(line)
105 def treeify_du(dulines, strip_prefix=None):
107 for size, path in dulines:
108 if strip_prefix and path.startswith(strip_prefix):
109 path = path[len(strip_prefix):]
110 elif path.startswith('/'):
112 parts = path.split('/')
122 # du reports the total for each directory (which may include files
123 # contained in the directory itself).
124 tree[key][None] = size
128 def filter_syms(types, symbols):
129 for sym, type, size, path in symbols:
131 yield sym, type, size, path
134 def treeify_syms(symbols, strip_prefix=None):
136 for sym, type, size, path in symbols:
138 path = os.path.normpath(path)
139 if strip_prefix and path.startswith(strip_prefix):
140 path = path[len(strip_prefix):]
141 elif path.startswith('/usr/include'):
142 path = path.replace('/usr/include', 'usrinclude')
143 elif path.startswith('/'):
147 # TODO: make segmenting by namespace work.
148 if False and '::' in sym:
149 if sym.startswith('vtable for '):
150 sym = sym[len('vtable for '):]
151 parts = sym.split('::')
152 parts.append('[vtable]')
154 parts = sym.split('::')
155 parts[0] = '::' + parts[0]
156 elif path and '/' in path:
157 parts = path.split('/')
164 assert part != '', path
168 tree[key] = tree.get(key, 0) + size
170 print >>sys.stderr, sym, parts, key
173 key = 'symbols without paths'
178 if (sym.endswith('::__FUNCTION__') or
179 sym.endswith('::__PRETTY_FUNCTION__')):
180 subkey = '__FUNCTION__'
181 elif sym.startswith('CSWTCH.'):
184 subkey = sym[0:sym.find('::') + 2]
186 print >>sys.stderr, 'unbucketed (no path?):', sym, type, size, path
187 tree[subkey] = tree.get(subkey, 0) + size
191 def jsonify_tree(tree, name):
196 for key, val in tree.iteritems():
200 if isinstance(val, dict):
201 subtree = jsonify_tree(val, key)
202 total += subtree['data']['$area']
203 children.append(subtree)
207 'name': key + ' ' + format_bytes(val),
208 'data': { '$area': val }
211 # Process du sub-tree totals by creating a '.' child with appropriate area.
213 dot_total = subtree_total - total
215 children.append({'name': '. ' + format_bytes(dot_total),
216 'data': { '$area': dot_total }})
217 total = subtree_total
219 children.sort(key=lambda child: -child['data']['$area'])
222 'name': name + ' ' + format_bytes(total),
226 'children': children,
230 def dump_du(dufile, strip_prefix):
231 dirs = treeify_du(parse_du(dufile), strip_prefix)
232 print 'var kTree = ' + json.dumps(jsonify_tree(dirs, '/'), indent=2)
235 def dump_nm(nmfile, strip_prefix):
236 dirs = treeify_syms(parse_nm(nmfile), strip_prefix)
237 print 'var kTree = ' + json.dumps(jsonify_tree(dirs, '/'), indent=2)
240 def parse_objdump(input):
241 """Parse objdump -h output."""
242 sec_re = re.compile(r'^\d+ (\S+) +([0-9a-z]+)')
248 match = sec_re.match(line)
250 name, size = match.groups()
251 if name.startswith('.'):
253 if name.startswith('debug_'):
254 name = name[len('debug_'):]
255 debug_sections.append((name, int(size, 16)))
257 sections.append((name, int(size, 16)))
259 return sections, debug_sections
262 def jsonify_sections(name, sections):
265 for section, size in sections:
267 'name': section + ' ' + format_bytes(size),
268 'data': { '$area': size }
272 children.sort(key=lambda child: -child['data']['$area'])
275 'name': name + ' ' + format_bytes(total),
276 'data': { '$area': total },
282 sections, debug_sections = parse_objdump(open('objdump.out'))
283 sections = jsonify_sections('sections', sections)
284 debug_sections = jsonify_sections('debug', debug_sections)
285 print 'var kTree = ' + json.dumps({
287 'data': { '$area': sections['data']['$area'] +
288 debug_sections['data']['$area'] },
289 'children': [ debug_sections, sections ]})
292 usage="""%prog [options] MODE
295 du: output 'du' json suitable for a treemap
296 syms: output symbols json suitable for a treemap
297 dump: print symbols sorted by size (pipe to head for best output)
298 sections: output binary sections json suitable for a treemap
300 du output passsed to --du-output should be from running a command
302 du -B 1 /path/to/root > du.out
304 nm output passed to --nm-output should from running a command
305 like the following (note, can take a long time -- 30 minutes):
306 nm -C -S -l /path/to/binary > nm.out
308 objdump output passed to --objdump-output should be from a command
310 objdump -h /path/to/binary > objdump.out"""
311 parser = optparse.OptionParser(usage=usage)
312 parser.add_option('--du-output', action='store', dest='dupath',
313 metavar='PATH', default='du.out',
314 help='path to nm output [default=nm.out]')
315 parser.add_option('--nm-output', action='store', dest='nmpath',
316 metavar='PATH', default='nm.out',
317 help='path to nm output [default=nm.out]')
318 parser.add_option('--objdump-output', action='store', dest='objdump',
319 metavar='PATH', default='objdump.out',
320 help='path to objdump output [default=objdump.out]')
321 parser.add_option('--strip-prefix', metavar='PATH', action='store',
322 help='strip PATH prefix from paths; e.g. /path/to/src/root')
323 parser.add_option('--filter', action='store',
324 help='include only symbols/files matching FILTER')
325 opts, args = parser.parse_args()
333 dufile = open(opts.dupath, 'r')
334 dump_du(dufile, strip_prefix=opts.strip_prefix)
336 nmfile = open(opts.nmpath, 'r')
337 dump_nm(nmfile, strip_prefix=opts.strip_prefix)
338 elif mode == 'sections':
341 nmfile = open(opts.nmpath, 'r')
342 syms = list(parse_nm(nmfile))
343 # a list of (sym, type, size, path); sort by size.
344 syms.sort(key=lambda x: -x[2])
346 for sym, type, size, path in syms:
347 if type in ('b', 'w'):
348 continue # skip bss and weak symbols
351 if opts.filter and not (opts.filter in sym or opts.filter in path):
353 print '%6s %s (%s) %s' % (format_bytes(size), sym,
354 symbol_type_to_human(type), path)
356 print '%6s %s' % (format_bytes(total), 'total'),