2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 """Generate a spatial analysis against an arbitrary library.
8 To use, build the 'binary_size_tool' target. Then run this tool, passing
9 in the location of the library to be analyzed along with any other options
16 import multiprocessing
27 import binary_size_utils
29 # This path changee is not beautiful. Temporary (I hope) measure until
30 # the chromium project has figured out a proper way to organize the
31 # library of python tools. http://crbug.com/375725
32 elf_symbolizer_path = os.path.abspath(os.path.join(
33 os.path.dirname(__file__),
39 sys.path.append(elf_symbolizer_path)
40 import symbols.elf_symbolizer as elf_symbolizer # pylint: disable=F0401
43 # Node dictionary keys. These are output in json read by the webapp so
44 # keep them short to save file size.
45 # Note: If these change, the webapp must also change.
48 NODE_CHILDREN_KEY = 'children'
49 NODE_SYMBOL_TYPE_KEY = 't'
50 NODE_SYMBOL_SIZE_KEY = 'value'
51 NODE_MAX_DEPTH_KEY = 'maxDepth'
52 NODE_LAST_PATH_ELEMENT_KEY = 'lastPathElement'
54 # The display name of the bucket where we put symbols without path.
55 NAME_NO_PATH_BUCKET = '(No Path)'
57 # Try to keep data buckets smaller than this to avoid killing the
59 BIG_BUCKET_LIMIT = 3000
62 # TODO(andrewhayden): Only used for legacy reports. Delete.
63 def FormatBytes(byte_count):
64 """Pretty-print a number of bytes."""
66 byte_count = byte_count / 1.0e6
67 return '%.1fm' % byte_count
69 byte_count = byte_count / 1.0e3
70 return '%.1fk' % byte_count
71 return str(byte_count)
74 # TODO(andrewhayden): Only used for legacy reports. Delete.
75 def SymbolTypeToHuman(symbol_type):
76 """Convert a symbol type as printed by nm into a human-readable name."""
79 'r': 'read-only data',
82 'v': 'weak symbol'}[symbol_type]
85 def _MkChild(node, name):
86 child = node[NODE_CHILDREN_KEY].get(name)
88 child = {NODE_NAME_KEY: name,
89 NODE_CHILDREN_KEY: {}}
90 node[NODE_CHILDREN_KEY][name] = child
95 def SplitNoPathBucket(node):
96 """NAME_NO_PATH_BUCKET can be too large for the graphing lib to
97 handle. Split it into sub-buckets in that case."""
98 root_children = node[NODE_CHILDREN_KEY]
99 if NAME_NO_PATH_BUCKET in root_children:
100 no_path_bucket = root_children[NAME_NO_PATH_BUCKET]
101 old_children = no_path_bucket[NODE_CHILDREN_KEY]
103 for symbol_type, symbol_bucket in old_children.iteritems():
104 count += len(symbol_bucket[NODE_CHILDREN_KEY])
105 if count > BIG_BUCKET_LIMIT:
107 no_path_bucket[NODE_CHILDREN_KEY] = new_children
108 current_bucket = None
110 for symbol_type, symbol_bucket in old_children.iteritems():
111 for symbol_name, value in symbol_bucket[NODE_CHILDREN_KEY].iteritems():
112 if index % BIG_BUCKET_LIMIT == 0:
113 group_no = (index / BIG_BUCKET_LIMIT) + 1
114 current_bucket = _MkChild(no_path_bucket,
115 '%s subgroup %d' % (NAME_NO_PATH_BUCKET,
117 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
118 node[NODE_TYPE_KEY] = 'p' # p for path
120 symbol_size = value[NODE_SYMBOL_SIZE_KEY]
121 AddSymbolIntoFileNode(current_bucket, symbol_type,
122 symbol_name, symbol_size)
125 def MakeChildrenDictsIntoLists(node):
127 if NODE_CHILDREN_KEY in node:
128 largest_list_len = len(node[NODE_CHILDREN_KEY])
130 for child in node[NODE_CHILDREN_KEY].itervalues():
131 child_largest_list_len = MakeChildrenDictsIntoLists(child)
132 if child_largest_list_len > largest_list_len:
133 largest_list_len = child_largest_list_len
134 child_list.append(child)
135 node[NODE_CHILDREN_KEY] = child_list
137 return largest_list_len
140 def AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size):
141 """Puts symbol into the file path node |node|.
142 Returns the number of added levels in tree. I.e. returns 2."""
144 # 'node' is the file node and first step is to find its symbol-type bucket.
145 node[NODE_LAST_PATH_ELEMENT_KEY] = True
146 node = _MkChild(node, symbol_type)
147 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'b'
148 node[NODE_SYMBOL_TYPE_KEY] = symbol_type
149 node[NODE_TYPE_KEY] = 'b' # b for bucket
151 # 'node' is now the symbol-type bucket. Make the child entry.
152 node = _MkChild(node, symbol_name)
153 if NODE_CHILDREN_KEY in node:
154 if node[NODE_CHILDREN_KEY]:
155 logging.warning('A container node used as symbol for %s.' % symbol_name)
156 # This is going to be used as a leaf so no use for child list.
157 del node[NODE_CHILDREN_KEY]
158 node[NODE_SYMBOL_SIZE_KEY] = symbol_size
159 node[NODE_SYMBOL_TYPE_KEY] = symbol_type
160 node[NODE_TYPE_KEY] = 's' # s for symbol
162 return 2 # Depth of the added subtree.
165 def MakeCompactTree(symbols, symbol_path_origin_dir):
166 result = {NODE_NAME_KEY: '/',
167 NODE_CHILDREN_KEY: {},
169 NODE_MAX_DEPTH_KEY: 0}
170 seen_symbol_with_path = False
171 cwd = os.path.abspath(os.getcwd())
172 for symbol_name, symbol_type, symbol_size, file_path in symbols:
174 if 'vtable for ' in symbol_name:
175 symbol_type = '@' # hack to categorize these separately
176 # Take path like '/foo/bar/baz', convert to ['foo', 'bar', 'baz']
177 if file_path and file_path != "??":
178 file_path = os.path.abspath(os.path.join(symbol_path_origin_dir,
180 # Let the output structure be relative to $CWD if inside $CWD,
181 # otherwise relative to the disk root. This is to avoid
182 # unnecessary click-through levels in the output.
183 if file_path.startswith(cwd + os.sep):
184 file_path = file_path[len(cwd):]
185 if file_path.startswith('/'):
186 file_path = file_path[1:]
187 seen_symbol_with_path = True
189 file_path = NAME_NO_PATH_BUCKET
191 path_parts = file_path.split('/')
193 # Find pre-existing node in tree, or update if it already exists
196 while len(path_parts) > 0:
197 path_part = path_parts.pop(0)
198 if len(path_part) == 0:
201 node = _MkChild(node, path_part)
202 assert not NODE_TYPE_KEY in node or node[NODE_TYPE_KEY] == 'p'
203 node[NODE_TYPE_KEY] = 'p' # p for path
205 depth += AddSymbolIntoFileNode(node, symbol_type, symbol_name, symbol_size)
206 result[NODE_MAX_DEPTH_KEY] = max(result[NODE_MAX_DEPTH_KEY], depth)
208 if not seen_symbol_with_path:
209 logging.warning('Symbols lack paths. Data will not be structured.')
211 # The (no path) bucket can be extremely large if we failed to get
212 # path information. Split it into subgroups if needed.
213 SplitNoPathBucket(result)
215 largest_list_len = MakeChildrenDictsIntoLists(result)
217 if largest_list_len > BIG_BUCKET_LIMIT:
218 logging.warning('There are sections with %d nodes. '
219 'Results might be unusable.' % largest_list_len)
223 # TODO(andrewhayden): Only used for legacy reports. Delete.
224 def TreeifySymbols(symbols):
225 """Convert symbols into a path-based tree, calculating size information
228 The result is a dictionary that contains two kinds of nodes:
229 1. Leaf nodes, representing source code locations (e.g., c++ files)
230 These nodes have the following dictionary entries:
231 sizes: a dictionary whose keys are categories (such as code, data,
232 vtable, etceteras) and whose values are the size, in bytes, of
234 size: the total size, in bytes, of all the entries in the sizes dict
235 2. Non-leaf nodes, representing directories
236 These nodes have the following dictionary entries:
237 children: a dictionary whose keys are names (path entries; either
238 directory or file names) and whose values are other nodes;
239 size: the total size, in bytes, of all the leaf nodes that are
240 contained within the children dict (recursively expanded)
242 The result object is itself a dictionary that represents the common ancestor
243 of all child nodes, e.g. a path to which all other nodes beneath it are
244 relative. The 'size' attribute of this dict yields the sum of the size of all
245 leaf nodes within the data structure.
247 dirs = {'children': {}, 'size': 0}
248 for sym, symbol_type, size, path in symbols:
251 path = os.path.normpath(path)
252 if path.startswith('/'):
257 parts = path.split('/')
261 file_key = parts.pop()
264 # Traverse the tree to the parent of the file node, creating as needed
267 if part not in tree['children']:
268 tree['children'][part] = {'children': {}, 'size': 0}
269 tree = tree['children'][part]
272 # Get (creating if necessary) the node for the file
273 # This node doesn't have a 'children' attribute
274 if file_key not in tree['children']:
275 tree['children'][file_key] = {'sizes': collections.defaultdict(int),
277 tree = tree['children'][file_key]
280 # Accumulate size into a bucket within the file
281 symbol_type = symbol_type.lower()
282 if 'vtable for ' in sym:
283 tree['sizes']['[vtable]'] += size
284 elif 'r' == symbol_type:
285 tree['sizes']['[rodata]'] += size
286 elif 'd' == symbol_type:
287 tree['sizes']['[data]'] += size
288 elif 'b' == symbol_type:
289 tree['sizes']['[bss]'] += size
290 elif 't' == symbol_type:
291 # 'text' in binary parlance means 'code'.
292 tree['sizes']['[code]'] += size
293 elif 'w' == symbol_type:
294 tree['sizes']['[weak]'] += size
296 tree['sizes']['[other]'] += size
298 print >> sys.stderr, sym, parts, file_key
301 key = 'symbols without paths'
302 if key not in dirs['children']:
303 dirs['children'][key] = {'sizes': collections.defaultdict(int),
305 tree = dirs['children'][key]
307 if (sym.endswith('::__FUNCTION__') or
308 sym.endswith('::__PRETTY_FUNCTION__')):
309 subkey = '__FUNCTION__'
310 elif sym.startswith('CSWTCH.'):
313 subkey = sym[0:sym.find('::') + 2]
314 tree['sizes'][subkey] = tree['sizes'].get(subkey, 0) + size
319 # TODO(andrewhayden): Only used for legacy reports. Delete.
320 def JsonifyTree(tree, name):
321 """Convert TreeifySymbols output to a JSON treemap.
323 The format is very similar, with the notable exceptions being
324 lists of children instead of maps and some different attribute names."""
327 '[vtable]': 'vtable',
328 '[rodata]': 'read-only_data',
332 '[weak]': 'weak_symbol'
334 if 'children' in tree:
335 # Non-leaf node. Recurse.
336 for child_name, child in tree['children'].iteritems():
337 children.append(JsonifyTree(child, child_name))
339 # Leaf node; dump per-file stats as entries in the treemap
340 for kind, size in tree['sizes'].iteritems():
341 child_json = {'name': kind + ' (' + FormatBytes(size) + ')',
342 'data': { '$area': size }}
343 css_class = css_class_map.get(kind)
344 if css_class is not None:
345 child_json['data']['$symbol'] = css_class
346 children.append(child_json)
347 # Sort children by size, largest to smallest.
348 children.sort(key=lambda child: -child['data']['$area'])
350 # For leaf nodes, the 'size' attribute is the size of the leaf;
351 # Non-leaf nodes don't really have a size, but their 'size' attribute is
352 # the sum of the sizes of all their children.
353 return {'name': name + ' (' + FormatBytes(tree['size']) + ')',
354 'data': { '$area': tree['size'] },
355 'children': children }
357 def DumpCompactTree(symbols, symbol_path_origin_dir, outfile):
358 tree_root = MakeCompactTree(symbols, symbol_path_origin_dir)
359 with open(outfile, 'w') as out:
360 out.write('var tree_data=')
361 # Use separators without whitespace to get a smaller file.
362 json.dump(tree_root, out, separators=(',', ':'))
363 print('Writing %d bytes json' % os.path.getsize(outfile))
366 # TODO(andrewhayden): Only used for legacy reports. Delete.
367 def DumpTreemap(symbols, outfile):
368 dirs = TreeifySymbols(symbols)
369 out = open(outfile, 'w')
371 out.write('var kTree = ' + json.dumps(JsonifyTree(dirs, '/')))
377 # TODO(andrewhayden): Only used for legacy reports. Delete.
378 def DumpLargestSymbols(symbols, outfile, n):
379 # a list of (sym, symbol_type, size, path); sort by size.
380 symbols = sorted(symbols, key=lambda x: -x[2])
382 out = open(outfile, 'w')
384 out.write('var largestSymbols = [\n')
385 for sym, symbol_type, size, path in symbols:
386 if symbol_type in ('b', 'w'):
387 continue # skip bss and weak symbols
390 entry = {'size': FormatBytes(size),
392 'type': SymbolTypeToHuman(symbol_type),
394 out.write(json.dumps(entry))
405 def MakeSourceMap(symbols):
407 for _sym, _symbol_type, size, path in symbols:
410 key = os.path.normpath(path)
413 if key not in sources:
414 sources[key] = {'path': path, 'symbol_count': 0, 'size': 0}
415 record = sources[key]
416 record['size'] += size
417 record['symbol_count'] += 1
421 # TODO(andrewhayden): Only used for legacy reports. Delete.
422 def DumpLargestSources(symbols, outfile, n):
423 source_map = MakeSourceMap(symbols)
424 sources = sorted(source_map.values(), key=lambda x: -x['size'])
426 out = open(outfile, 'w')
428 out.write('var largestSources = [\n')
429 for record in sources:
430 entry = {'size': FormatBytes(record['size']),
431 'symbol_count': str(record['symbol_count']),
432 'location': record['path']}
433 out.write(json.dumps(entry))
444 # TODO(andrewhayden): Only used for legacy reports. Delete.
445 def DumpLargestVTables(symbols, outfile, n):
447 for symbol, _type, size, path in symbols:
448 if 'vtable for ' in symbol:
449 vtables.append({'symbol': symbol, 'path': path, 'size': size})
450 vtables = sorted(vtables, key=lambda x: -x['size'])
452 out = open(outfile, 'w')
454 out.write('var largestVTables = [\n')
455 for record in vtables:
456 entry = {'size': FormatBytes(record['size']),
457 'symbol': record['symbol'],
458 'location': record['path']}
459 out.write(json.dumps(entry))
470 # Regex for parsing "nm" output. A sample line looks like this:
471 # 0167b39c 00000018 t ACCESS_DESCRIPTION_free /path/file.c:95
473 # The fields are: address, size, type, name, source location
474 # Regular expression explained ( see also: https://xkcd.com/208 ):
475 # ([0-9a-f]{8,}+) The address
476 # [\s]+ Whitespace separator
477 # ([0-9a-f]{8,}+) The size. From here on out it's all optional.
478 # [\s]+ Whitespace separator
479 # (\S?) The symbol type, which is any non-whitespace char
480 # [\s*] Whitespace separator
481 # ([^\t]*) Symbol name, any non-tab character (spaces ok!)
482 # [\t]? Tab separator
483 # (.*) The location (filename[:linennum|?][ (discriminator n)]
484 sNmPattern = re.compile(
485 r'([0-9a-f]{8,})[\s]+([0-9a-f]{8,})[\s]*(\S?)[\s*]([^\t]*)[\t]?(.*)')
492 self.time_last_output = time.time()
493 self.count_last_output = 0
494 self.disambiguations = 0
495 self.was_ambiguous = 0
498 def RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs,
499 disambiguate, src_path):
500 nm_output = RunNm(library, nm_binary)
501 nm_output_lines = nm_output.splitlines()
502 nm_output_lines_len = len(nm_output_lines)
504 progress = Progress()
505 def map_address_symbol(symbol, addr):
507 if addr in address_symbol:
508 # 'Collision between %s and %s.' % (str(symbol.name),
509 # str(address_symbol[addr].name))
510 progress.collisions += 1
512 if symbol.disambiguated:
513 progress.disambiguations += 1
514 if symbol.was_ambiguous:
515 progress.was_ambiguous += 1
517 address_symbol[addr] = symbol
521 def progress_output():
523 if progress.count % progress_chunk == 0:
524 time_now = time.time()
525 time_spent = time_now - progress.time_last_output
527 # Only output at most once per second.
528 progress.time_last_output = time_now
529 chunk_size = progress.count - progress.count_last_output
530 progress.count_last_output = progress.count
532 speed = chunk_size / time_spent
535 progress_percent = (100.0 * (progress.count + progress.skip_count) /
537 disambiguation_percent = 0
538 if progress.disambiguations != 0:
539 disambiguation_percent = (100.0 * progress.disambiguations /
540 progress.was_ambiguous)
542 sys.stdout.write('\r%.1f%%: Looked up %d symbols (%d collisions, '
543 '%d disambiguations where %.1f%% succeeded)'
544 '- %.1f lookups/s.' %
545 (progress_percent, progress.count, progress.collisions,
546 progress.disambiguations, disambiguation_percent, speed))
548 # In case disambiguation was disabled, we remove the source path (which upon
549 # being set signals the symbolizer to enable disambiguation)
552 symbolizer = elf_symbolizer.ELFSymbolizer(library, addr2line_binary,
554 max_concurrent_jobs=jobs,
555 source_root_path=src_path)
556 user_interrupted = False
558 for line in nm_output_lines:
559 match = sNmPattern.match(line)
561 location = match.group(5)
563 addr = int(match.group(1), 16)
564 size = int(match.group(2), 16)
565 if addr in address_symbol: # Already looked up, shortcut
567 map_address_symbol(address_symbol[addr], addr)
570 # Save time by not looking up empty symbols (do they even exist?)
571 print('Empty symbol: ' + line)
573 symbolizer.SymbolizeAsync(addr, addr)
576 progress.skip_count += 1
577 except KeyboardInterrupt:
578 user_interrupted = True
579 print('Interrupting - killing subprocesses. Please wait.')
583 except KeyboardInterrupt:
584 # Don't want to abort here since we will be finished in a few seconds.
585 user_interrupted = True
586 print('Patience you must have my young padawan.')
591 print('Skipping the rest of the file mapping. '
592 'Output will not be fully classified.')
594 symbol_path_origin_dir = os.path.dirname(os.path.abspath(library))
596 with open(outfile, 'w') as out:
597 for line in nm_output_lines:
598 match = sNmPattern.match(line)
600 location = match.group(5)
602 addr = int(match.group(1), 16)
603 symbol = address_symbol.get(addr)
604 if symbol is not None:
606 if symbol.source_path is not None:
607 path = os.path.abspath(os.path.join(symbol_path_origin_dir,
610 if symbol.source_line is not None:
611 line_number = symbol.source_line
612 out.write('%s\t%s:%d\n' % (line, path, line_number))
615 out.write('%s\n' % line)
617 print('%d symbols in the results.' % len(address_symbol))
620 def RunNm(binary, nm_binary):
622 cmd = [nm_binary, '-C', '--print-size', '--size-sort', '--reverse-sort',
624 nm_process = subprocess.Popen(cmd,
625 stdout=subprocess.PIPE,
626 stderr=subprocess.PIPE)
627 (process_output, err_output) = nm_process.communicate()
629 if nm_process.returncode != 0:
631 raise Exception, err_output
633 raise Exception, process_output
636 return process_output
639 def GetNmSymbols(nm_infile, outfile, library, jobs, verbose,
640 addr2line_binary, nm_binary, disambiguate, src_path):
641 if nm_infile is None:
643 outfile = tempfile.NamedTemporaryFile(delete=False).name
646 print 'Running parallel addr2line, dumping symbols to ' + outfile
647 RunElfSymbolizer(outfile, library, addr2line_binary, nm_binary, jobs,
648 disambiguate, src_path)
653 print 'Using nm input from ' + nm_infile
654 with file(nm_infile, 'r') as infile:
655 return list(binary_size_utils.ParseNm(infile))
658 PAK_RESOURCE_ID_TO_STRING = { "inited": False }
660 def LoadPakIdsFromResourceFile(filename):
661 """Given a file name, it loads everything that looks like a resource id
662 into PAK_RESOURCE_ID_TO_STRING."""
663 with open(filename) as resource_header:
664 for line in resource_header:
665 if line.startswith("#define "):
666 line_data = line.split()
667 if len(line_data) == 3:
669 resource_number = int(line_data[2])
670 resource_name = line_data[1]
671 PAK_RESOURCE_ID_TO_STRING[resource_number] = resource_name
675 def GetReadablePakResourceName(pak_file, resource_id):
676 """Pak resources have a numeric identifier. It is not helpful when
677 trying to locate where footprint is generated. This does its best to
678 map the number to a usable string."""
679 if not PAK_RESOURCE_ID_TO_STRING['inited']:
680 # Try to find resource header files generated by grit when
681 # building the pak file. We'll look for files named *resources.h"
682 # and lines of the type:
683 # #define MY_RESOURCE_JS 1234
684 PAK_RESOURCE_ID_TO_STRING['inited'] = True
685 gen_dir = os.path.join(os.path.dirname(pak_file), 'gen')
686 if os.path.isdir(gen_dir):
687 for dirname, _dirs, files in os.walk(gen_dir):
688 for filename in files:
689 if filename.endswith('resources.h'):
690 LoadPakIdsFromResourceFile(os.path.join(dirname, filename))
691 return PAK_RESOURCE_ID_TO_STRING.get(resource_id,
692 'Pak Resource %d' % resource_id)
694 def AddPakData(symbols, pak_file):
695 """Adds pseudo-symbols from a pak file."""
696 pak_file = os.path.abspath(pak_file)
697 with open(pak_file, 'rb') as pak:
701 HEADER_LENGTH = 2 * 4 + 1 # Two uint32s. (file version, number of entries)
702 # and one uint8 (encoding of text resources)
703 INDEX_ENTRY_SIZE = 2 + 4 # Each entry is a uint16 and a uint32.
704 version, num_entries, _encoding = struct.unpack('<IIB', data[:HEADER_LENGTH])
705 assert version == PAK_FILE_VERSION, ('Unsupported pak file '
706 'version (%d) in %s. Only '
707 'support version %d' %
708 (version, pak_file, PAK_FILE_VERSION))
710 # Read the index and data.
711 data = data[HEADER_LENGTH:]
712 for _ in range(num_entries):
713 resource_id, offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE])
714 data = data[INDEX_ENTRY_SIZE:]
715 _next_id, next_offset = struct.unpack('<HI', data[:INDEX_ENTRY_SIZE])
716 resource_size = next_offset - offset
718 symbol_name = GetReadablePakResourceName(pak_file, resource_id)
719 symbol_path = pak_file
720 symbol_type = 'd' # Data. Approximation.
721 symbol_size = resource_size
722 symbols.append((symbol_name, symbol_type, symbol_size, symbol_path))
724 def _find_in_system_path(binary):
725 """Locate the full path to binary in the system path or return None
727 system_path = os.environ["PATH"].split(os.pathsep)
728 for path in system_path:
729 binary_path = os.path.join(path, binary)
730 if os.path.isfile(binary_path):
734 def CheckDebugFormatSupport(library, addr2line_binary):
735 """Kills the program if debug data is in an unsupported format.
737 There are two common versions of the DWARF debug formats and
738 since we are right now transitioning from DWARF2 to newer formats,
739 it's possible to have a mix of tools that are not compatible. Detect
740 that and abort rather than produce meaningless output."""
741 tool_output = subprocess.check_output([addr2line_binary, '--version'])
742 version_re = re.compile(r'^GNU [^ ]+ .* (\d+).(\d+).*?$', re.M)
743 parsed_output = version_re.match(tool_output)
744 major = int(parsed_output.group(1))
745 minor = int(parsed_output.group(2))
746 supports_dwarf4 = major > 2 or major == 2 and minor > 22
751 print('Checking version of debug information in %s.' % library)
752 debug_info = subprocess.check_output(['readelf', '--debug-dump=info',
753 '--dwarf-depth=1', library])
754 dwarf_version_re = re.compile(r'^\s+Version:\s+(\d+)$', re.M)
755 parsed_dwarf_format_output = dwarf_version_re.search(debug_info)
756 version = int(parsed_dwarf_format_output.group(1))
758 print('The supplied tools only support DWARF2 debug data but the binary\n' +
759 'uses DWARF%d. Update the tools or compile the binary\n' % version +
765 usage = """%prog [options]
767 Runs a spatial analysis on a given library, looking up the source locations
768 of its symbols and calculating how much space each directory, source file,
769 and so on is taking. The result is a report that can be used to pinpoint
770 sources of large portions of the binary, etceteras.
772 Under normal circumstances, you only need to pass two arguments, thusly:
774 %prog --library /path/to/library --destdir /path/to/output
776 In this mode, the program will dump the symbols from the specified library
777 and map those symbols back to source locations, producing a web-based
778 report in the specified output directory.
780 Other options are available via '--help'.
782 parser = optparse.OptionParser(usage=usage)
783 parser.add_option('--nm-in', metavar='PATH',
784 help='if specified, use nm input from <path> instead of '
785 'generating it. Note that source locations should be '
786 'present in the file; i.e., no addr2line symbol lookups '
787 'will be performed when this option is specified. '
788 'Mutually exclusive with --library.')
789 parser.add_option('--destdir', metavar='PATH',
790 help='write output to the specified directory. An HTML '
791 'report is generated here along with supporting files; '
792 'any existing report will be overwritten.')
793 parser.add_option('--library', metavar='PATH',
794 help='if specified, process symbols in the library at '
795 'the specified path. Mutually exclusive with --nm-in.')
796 parser.add_option('--pak', metavar='PATH',
797 help='if specified, includes the contents of the '
798 'specified *.pak file in the output.')
799 parser.add_option('--nm-binary',
800 help='use the specified nm binary to analyze library. '
801 'This is to be used when the nm in the path is not for '
802 'the right architecture or of the right version.')
803 parser.add_option('--addr2line-binary',
804 help='use the specified addr2line binary to analyze '
805 'library. This is to be used when the addr2line in '
806 'the path is not for the right architecture or '
807 'of the right version.')
808 parser.add_option('--jobs', type='int',
809 help='number of jobs to use for the parallel '
810 'addr2line processing pool; defaults to 1. More '
811 'jobs greatly improve throughput but eat RAM like '
812 'popcorn, and take several gigabytes each. Start low '
813 'and ramp this number up until your machine begins to '
814 'struggle with RAM. '
815 'This argument is only valid when using --library.')
816 parser.add_option('-v', dest='verbose', action='store_true',
817 help='be verbose, printing lots of status information.')
818 parser.add_option('--nm-out', metavar='PATH',
819 help='keep the nm output file, and store it at the '
820 'specified path. This is useful if you want to see the '
821 'fully processed nm output after the symbols have been '
822 'mapped to source locations. By default, a tempfile is '
823 'used and is deleted when the program terminates.'
824 'This argument is only valid when using --library.')
825 parser.add_option('--legacy', action='store_true',
826 help='emit legacy binary size report instead of modern')
827 parser.add_option('--disable-disambiguation', action='store_true',
828 help='disables the disambiguation process altogether,'
829 ' NOTE: this may, depending on your toolchain, produce'
830 ' output with some symbols at the top layer if addr2line'
831 ' could not get the entire source path.')
832 parser.add_option('--source-path', default='./',
833 help='the path to the source code of the output binary, '
834 'default set to current directory. Used in the'
835 ' disambiguation process.')
836 opts, _args = parser.parse_args()
838 if ((not opts.library) and (not opts.nm_in)) or (opts.library and opts.nm_in):
839 parser.error('exactly one of --library or --nm-in is required')
842 print >> sys.stderr, ('WARNING: --jobs has no effect '
843 'when used with --nm-in')
845 parser.error('--destdir is required argument')
847 # Use the number of processors but cap between 2 and 4 since raw
848 # CPU power isn't the limiting factor. It's I/O limited, memory
849 # bus limited and available-memory-limited. Too many processes and
850 # the computer will run out of memory and it will be slow.
851 opts.jobs = max(2, min(4, str(multiprocessing.cpu_count())))
853 if opts.addr2line_binary:
854 assert os.path.isfile(opts.addr2line_binary)
855 addr2line_binary = opts.addr2line_binary
857 addr2line_binary = _find_in_system_path('addr2line')
858 assert addr2line_binary, 'Unable to find addr2line in the path. '\
859 'Use --addr2line-binary to specify location.'
862 assert os.path.isfile(opts.nm_binary)
863 nm_binary = opts.nm_binary
865 nm_binary = _find_in_system_path('nm')
866 assert nm_binary, 'Unable to find nm in the path. Use --nm-binary '\
867 'to specify location.'
870 assert os.path.isfile(opts.pak), 'Could not find ' % opts.pak
872 print('addr2line: %s' % addr2line_binary)
873 print('nm: %s' % nm_binary)
875 CheckDebugFormatSupport(opts.library, addr2line_binary)
877 symbols = GetNmSymbols(opts.nm_in, opts.nm_out, opts.library,
878 opts.jobs, opts.verbose is True,
879 addr2line_binary, nm_binary,
880 opts.disable_disambiguation is None,
884 AddPakData(symbols, opts.pak)
886 if not os.path.exists(opts.destdir):
887 os.makedirs(opts.destdir, 0755)
890 if opts.legacy: # legacy report
891 DumpTreemap(symbols, os.path.join(opts.destdir, 'treemap-dump.js'))
892 DumpLargestSymbols(symbols,
893 os.path.join(opts.destdir, 'largest-symbols.js'), 100)
894 DumpLargestSources(symbols,
895 os.path.join(opts.destdir, 'largest-sources.js'), 100)
896 DumpLargestVTables(symbols,
897 os.path.join(opts.destdir, 'largest-vtables.js'), 100)
898 treemap_out = os.path.join(opts.destdir, 'webtreemap')
899 if not os.path.exists(treemap_out):
900 os.makedirs(treemap_out, 0755)
901 treemap_src = os.path.join('third_party', 'webtreemap', 'src')
902 shutil.copy(os.path.join(treemap_src, 'COPYING'), treemap_out)
903 shutil.copy(os.path.join(treemap_src, 'webtreemap.js'), treemap_out)
904 shutil.copy(os.path.join(treemap_src, 'webtreemap.css'), treemap_out)
905 shutil.copy(os.path.join('tools', 'binary_size', 'legacy_template',
906 'index.html'), opts.destdir)
907 else: # modern report
909 symbol_path_origin_dir = os.path.dirname(os.path.abspath(opts.library))
911 # Just a guess. Hopefully all paths in the input file are absolute.
912 symbol_path_origin_dir = os.path.abspath(os.getcwd())
913 data_js_file_name = os.path.join(opts.destdir, 'data.js')
914 DumpCompactTree(symbols, symbol_path_origin_dir, data_js_file_name)
915 d3_out = os.path.join(opts.destdir, 'd3')
916 if not os.path.exists(d3_out):
917 os.makedirs(d3_out, 0755)
918 d3_src = os.path.join(os.path.dirname(__file__),
921 'third_party', 'd3', 'src')
922 template_src = os.path.join(os.path.dirname(__file__),
924 shutil.copy(os.path.join(d3_src, 'LICENSE'), d3_out)
925 shutil.copy(os.path.join(d3_src, 'd3.js'), d3_out)
926 shutil.copy(os.path.join(template_src, 'index.html'), opts.destdir)
927 shutil.copy(os.path.join(template_src, 'D3SymbolTreeMap.js'), opts.destdir)
929 print 'Report saved to ' + opts.destdir + '/index.html'
932 if __name__ == '__main__':