3 # Copyright 2012 the V8 project authors. All rights reserved.
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
8 # * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 # * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following
12 # disclaimer in the documentation and/or other materials provided
13 # with the distribution.
14 # * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived
16 # from this software without specific prior written permission.
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 USAGE="""usage: %prog [OPTION]...
45 Analyses V8 and perf logs to produce profiles.
47 Perf logs can be collected using a command like:
48 $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
49 # -R: collect all data
50 # -e cycles: use cpu-cycles event (run "perf list" for details)
51 # -c 10000: write a sample after each 10000 events
52 # -f: force output file overwrite
53 # -i: limit profiling to our process and the kernel
54 # --ll-prof shell flag enables the right V8 logs
55 This will produce a binary trace file (perf.data) that %prog can analyse.
58 The kernel has an internal maximum for events per second, it is 100K by
59 default. That's not enough for "-c 10000". Set it to some higher value:
60 $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
61 You can also make the warning about kernel address maps go away:
62 $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
64 We have a convenience script that handles all of the above for you:
65 $ tools/run-llprof.sh ./d8 bench.js
68 # Print flat profile with annotated disassembly for the 10 top
69 # symbols. Use default log names and include the snapshot log.
70 $ %prog --snapshot --disasm-top=10
72 # Print flat profile with annotated disassembly for all used symbols.
73 # Use default log names and include kernel symbols into analysis.
74 $ %prog --disasm-all --kernel
76 # Print flat profile. Use custom log names.
77 $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot
82 JS_SNAPSHOT_ORIGIN = "js-snapshot"
93 def __init__(self, name, start_address, end_address, origin, origin_offset):
97 self.other_names = None
98 self.start_address = start_address
99 self.end_address = end_address
101 self.origin_offset = origin_offset
103 self.self_ticks_map = None
104 self.callee_ticks = None
105 if name.startswith("LazyCompile:*"):
106 self.codetype = Code.OPTIMIZED
107 elif name.startswith("LazyCompile:"):
108 self.codetype = Code.FULL_CODEGEN
109 elif name.startswith("v8::internal::"):
110 self.codetype = Code.V8INTERNAL
112 self.codetype = Code.UNKNOWN
114 def AddName(self, name):
115 assert self.name != name
116 if self.other_names is None:
117 self.other_names = [name]
119 if not name in self.other_names:
120 self.other_names.append(name)
123 if self.other_names is None:
125 self.other_names.sort()
126 return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
129 return self.self_ticks > 0 or self.callee_ticks is not None
133 if self.self_ticks_map is None:
134 self.self_ticks_map = collections.defaultdict(lambda: 0)
135 offset = pc - self.start_address
136 self.self_ticks_map[offset] += 1
138 def CalleeTick(self, callee):
139 if self.callee_ticks is None:
140 self.callee_ticks = collections.defaultdict(lambda: 0)
141 self.callee_ticks[callee] += 1
143 def PrintAnnotated(self, arch, options):
144 if self.self_ticks_map is None:
147 ticks_map = self.self_ticks_map.items()
148 # Convert the ticks map to offsets and counts arrays so that later
149 # we can do binary search in the offsets array.
150 ticks_map.sort(key=lambda t: t[0])
151 ticks_offsets = [t[0] for t in ticks_map]
152 ticks_counts = [t[1] for t in ticks_map]
153 # Get a list of disassembled lines and their addresses.
154 lines = self._GetDisasmLines(arch, options)
157 # Print annotated lines.
158 address = lines[0][0]
160 for i in xrange(len(lines)):
161 start_offset = lines[i][0] - address
162 if i == len(lines) - 1:
163 end_offset = self.end_address - self.start_address
165 end_offset = lines[i + 1][0] - address
166 # Ticks (reported pc values) are not always precise, i.e. not
167 # necessarily point at instruction starts. So we have to search
168 # for ticks that touch the current instruction line.
169 j = bisect.bisect_left(ticks_offsets, end_offset)
171 for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
172 if offset < start_offset:
176 count = 100.0 * count / self.self_ticks
178 print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1])
180 print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1])
182 assert total_count == self.self_ticks, \
183 "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
186 return "%s [0x%x, 0x%x) size: %d origin: %s" % (
190 self.end_address - self.start_address,
193 def _GetDisasmLines(self, arch, options):
194 if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN:
196 filename = options.log + ".ll"
199 filename = self.origin
200 return disasm.GetDisasmLines(filename,
202 self.end_address - self.start_address,
207 class CodePage(object):
208 """Group of adjacent code objects."""
210 SHIFT = 20 # 1M pages
215 def PageAddress(address):
216 return address & CodePage.MASK
220 return address >> CodePage.SHIFT
223 def PageAddressFromId(id):
224 return id << CodePage.SHIFT
226 def __init__(self, address):
227 self.address = address
228 self.code_objects = []
231 self.code_objects.append(code)
233 def Remove(self, code):
234 self.code_objects.remove(code)
237 code_objects = self.code_objects
238 for i, code in enumerate(code_objects):
239 if code.start_address <= pc < code.end_address:
240 code_objects[0], code_objects[i] = code, code_objects[0]
245 return self.code_objects.__iter__()
248 class CodeMap(object):
249 """Code object map."""
253 self.min_address = 1 << 64
254 self.max_address = -1
256 def Add(self, code, max_pages=-1):
257 page_id = CodePage.PageId(code.start_address)
258 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
260 while page_id < limit_id:
261 if max_pages >= 0 and pages > max_pages:
262 print >>sys.stderr, \
263 "Warning: page limit (%d) reached for %s [%s]" % (
264 max_pages, code.name, code.origin)
266 if page_id in self.pages:
267 page = self.pages[page_id]
269 page = CodePage(CodePage.PageAddressFromId(page_id))
270 self.pages[page_id] = page
274 self.min_address = min(self.min_address, code.start_address)
275 self.max_address = max(self.max_address, code.end_address)
277 def Remove(self, code):
278 page_id = CodePage.PageId(code.start_address)
279 limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
281 while page_id < limit_id:
282 if page_id not in self.pages:
285 page = self.pages[page_id]
292 for page in self.pages.itervalues():
294 if CodePage.PageAddress(code.start_address) == page.address:
298 for code in self.AllCode():
303 for code in self.AllCode():
307 if pc < self.min_address or pc >= self.max_address:
309 page_id = CodePage.PageId(pc)
310 if page_id not in self.pages:
312 return self.pages[page_id].Find(pc)
315 class CodeInfo(object):
316 """Generic info about generated code objects."""
318 def __init__(self, arch, header_size):
320 self.header_size = header_size
323 class SnapshotLogReader(object):
324 """V8 snapshot log reader."""
326 _SNAPSHOT_CODE_NAME_RE = re.compile(
327 r"snapshot-code-name,(\d+),\"(.*)\"")
329 def __init__(self, log_name):
330 self.log_name = log_name
332 def ReadNameMap(self):
333 log = open(self.log_name, "r")
335 snapshot_pos_to_name = {}
337 match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line)
339 pos = int(match.group(1))
340 name = match.group(2)
341 snapshot_pos_to_name[pos] = name
344 return snapshot_pos_to_name
347 class LogReader(object):
348 """V8 low-level (binary) log reader."""
350 _ARCH_TO_POINTER_TYPE_MAP = {
351 "ia32": ctypes.c_uint32,
352 "arm": ctypes.c_uint32,
353 "mips": ctypes.c_uint32,
354 "x64": ctypes.c_uint64,
355 "arm64": ctypes.c_uint64
358 _CODE_CREATE_TAG = "C"
360 _CODE_DELETE_TAG = "D"
361 _SNAPSHOT_POSITION_TAG = "P"
362 _CODE_MOVING_GC_TAG = "G"
364 def __init__(self, log_name, code_map, snapshot_pos_to_name):
365 self.log_file = open(log_name, "r")
366 self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
368 self.code_map = code_map
369 self.snapshot_pos_to_name = snapshot_pos_to_name
370 self.address_to_snapshot_name = {}
372 self.arch = self.log[:self.log.find("\0")]
373 self.log_pos += len(self.arch) + 1
374 assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
375 "Unsupported architecture %s" % self.arch
376 pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
378 self.code_create_struct = LogReader._DefineStruct([
379 ("name_size", ctypes.c_int32),
380 ("code_address", pointer_type),
381 ("code_size", ctypes.c_int32)])
383 self.code_move_struct = LogReader._DefineStruct([
384 ("from_address", pointer_type),
385 ("to_address", pointer_type)])
387 self.code_delete_struct = LogReader._DefineStruct([
388 ("address", pointer_type)])
390 self.snapshot_position_struct = LogReader._DefineStruct([
391 ("address", pointer_type),
392 ("position", ctypes.c_int32)])
394 def ReadUpToGC(self):
395 while self.log_pos < self.log.size():
396 tag = self.log[self.log_pos]
399 if tag == LogReader._CODE_MOVING_GC_TAG:
400 self.address_to_snapshot_name.clear()
403 if tag == LogReader._CODE_CREATE_TAG:
404 event = self.code_create_struct.from_buffer(self.log, self.log_pos)
405 self.log_pos += ctypes.sizeof(event)
406 start_address = event.code_address
407 end_address = start_address + event.code_size
408 if start_address in self.address_to_snapshot_name:
409 name = self.address_to_snapshot_name[start_address]
410 origin = JS_SNAPSHOT_ORIGIN
412 name = self.log[self.log_pos:self.log_pos + event.name_size]
414 self.log_pos += event.name_size
415 origin_offset = self.log_pos
416 self.log_pos += event.code_size
417 code = Code(name, start_address, end_address, origin, origin_offset)
418 conficting_code = self.code_map.Find(start_address)
420 if not (conficting_code.start_address == code.start_address and
421 conficting_code.end_address == code.end_address):
422 self.code_map.Remove(conficting_code)
424 LogReader._HandleCodeConflict(conficting_code, code)
425 # TODO(vitalyr): this warning is too noisy because of our
426 # attempts to reconstruct code log from the snapshot.
427 # print >>sys.stderr, \
428 # "Warning: Skipping duplicate code log entry %s" % code
430 self.code_map.Add(code)
433 if tag == LogReader._CODE_MOVE_TAG:
434 event = self.code_move_struct.from_buffer(self.log, self.log_pos)
435 self.log_pos += ctypes.sizeof(event)
436 old_start_address = event.from_address
437 new_start_address = event.to_address
438 if old_start_address == new_start_address:
439 # Skip useless code move entries.
441 code = self.code_map.Find(old_start_address)
443 print >>sys.stderr, "Warning: Not found %x" % old_start_address
445 assert code.start_address == old_start_address, \
446 "Inexact move address %x for %s" % (old_start_address, code)
447 self.code_map.Remove(code)
448 size = code.end_address - code.start_address
449 code.start_address = new_start_address
450 code.end_address = new_start_address + size
451 self.code_map.Add(code)
454 if tag == LogReader._CODE_DELETE_TAG:
455 event = self.code_delete_struct.from_buffer(self.log, self.log_pos)
456 self.log_pos += ctypes.sizeof(event)
457 old_start_address = event.address
458 code = self.code_map.Find(old_start_address)
460 print >>sys.stderr, "Warning: Not found %x" % old_start_address
462 assert code.start_address == old_start_address, \
463 "Inexact delete address %x for %s" % (old_start_address, code)
464 self.code_map.Remove(code)
467 if tag == LogReader._SNAPSHOT_POSITION_TAG:
468 event = self.snapshot_position_struct.from_buffer(self.log,
470 self.log_pos += ctypes.sizeof(event)
471 start_address = event.address
472 snapshot_pos = event.position
473 if snapshot_pos in self.snapshot_pos_to_name:
474 self.address_to_snapshot_name[start_address] = \
475 self.snapshot_pos_to_name[snapshot_pos]
478 assert False, "Unknown tag %s" % tag
482 self.log_file.close()
485 def _DefineStruct(fields):
486 class Struct(ctypes.Structure):
491 def _HandleCodeConflict(old_code, new_code):
492 assert (old_code.start_address == new_code.start_address and
493 old_code.end_address == new_code.end_address), \
494 "Conficting code log entries %s and %s" % (old_code, new_code)
495 if old_code.name == new_code.name:
497 # Code object may be shared by a few functions. Collect the full
499 old_code.AddName(new_code.name)
502 class Descriptor(object):
503 """Descriptor of a structure in the binary trace log."""
506 "u16": ctypes.c_uint16,
507 "u32": ctypes.c_uint32,
508 "u64": ctypes.c_uint64
511 def __init__(self, fields):
512 class TraceItem(ctypes.Structure):
513 _fields_ = Descriptor.CtypesFields(fields)
516 return ", ".join("%s: %s" % (field, self.__getattribute__(field))
517 for field, _ in TraceItem._fields_)
519 self.ctype = TraceItem
521 def Read(self, trace, offset):
522 return self.ctype.from_buffer(trace, offset)
525 def CtypesFields(fields):
526 return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
529 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
530 # for the gory details.
533 # Reference: struct perf_file_header in kernel/tools/perf/util/header.h
534 TRACE_HEADER_DESC = Descriptor([
537 ("attr_size", "u64"),
538 ("attrs_offset", "u64"),
539 ("attrs_size", "u64"),
540 ("data_offset", "u64"),
541 ("data_size", "u64"),
542 ("event_types_offset", "u64"),
543 ("event_types_size", "u64")
547 # Reference: /usr/include/linux/perf_event.h
548 PERF_EVENT_ATTR_DESC = Descriptor([
552 ("sample_period_or_freq", "u64"),
553 ("sample_type", "u64"),
554 ("read_format", "u64"),
556 ("wakeup_events_or_watermark", "u32"),
563 # Reference: /usr/include/linux/perf_event.h
564 PERF_EVENT_HEADER_DESC = Descriptor([
571 # Reference: kernel/events/core.c
572 PERF_MMAP_EVENT_BODY_DESC = Descriptor([
581 # perf_event_attr.sample_type bits control the set of
582 # perf_sample_event fields.
583 PERF_SAMPLE_IP = 1 << 0
584 PERF_SAMPLE_TID = 1 << 1
585 PERF_SAMPLE_TIME = 1 << 2
586 PERF_SAMPLE_ADDR = 1 << 3
587 PERF_SAMPLE_READ = 1 << 4
588 PERF_SAMPLE_CALLCHAIN = 1 << 5
589 PERF_SAMPLE_ID = 1 << 6
590 PERF_SAMPLE_CPU = 1 << 7
591 PERF_SAMPLE_PERIOD = 1 << 8
592 PERF_SAMPLE_STREAM_ID = 1 << 9
593 PERF_SAMPLE_RAW = 1 << 10
596 # Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
597 PERF_SAMPLE_EVENT_BODY_FIELDS = [
598 ("ip", "u64", PERF_SAMPLE_IP),
599 ("pid", "u32", PERF_SAMPLE_TID),
600 ("tid", "u32", PERF_SAMPLE_TID),
601 ("time", "u64", PERF_SAMPLE_TIME),
602 ("addr", "u64", PERF_SAMPLE_ADDR),
603 ("id", "u64", PERF_SAMPLE_ID),
604 ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
605 ("cpu", "u32", PERF_SAMPLE_CPU),
606 ("res", "u32", PERF_SAMPLE_CPU),
607 ("period", "u64", PERF_SAMPLE_PERIOD),
608 # Don't want to handle read format that comes after the period and
609 # before the callchain and has variable size.
610 ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
611 # Raw data follows the callchain and is ignored.
615 PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
619 PERF_RECORD_SAMPLE = 9
622 class TraceReader(object):
623 """Perf (linux-2.6/tools/perf) trace file reader."""
625 _TRACE_HEADER_MAGIC = 4993446653023372624
627 def __init__(self, trace_name):
628 self.trace_file = open(trace_name, "r")
629 self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
630 self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
631 if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
632 print >>sys.stderr, "Warning: unsupported trace header magic"
633 self.offset = self.trace_header.data_offset
634 self.limit = self.trace_header.data_offset + self.trace_header.data_size
635 assert self.limit <= self.trace.size(), \
636 "Trace data limit exceeds trace file size"
637 self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
638 assert self.trace_header.attrs_size != 0, \
639 "No perf event attributes found in the trace"
640 perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
641 self.trace_header.attrs_offset)
642 self.sample_event_body_desc = self._SampleEventBodyDesc(
643 perf_event_attr.sample_type)
644 self.callchain_supported = \
645 (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
646 if self.callchain_supported:
647 self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
648 self.ip_size = ctypes.sizeof(self.ip_struct)
650 def ReadEventHeader(self):
651 if self.offset >= self.limit:
654 header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
655 self.offset += header.size
656 return header, offset
658 def ReadMmap(self, header, offset):
659 mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
660 offset + self.header_size)
661 # Read null-terminated filename.
662 filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
663 offset + header.size]
664 mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
667 def ReadSample(self, header, offset):
668 sample = self.sample_event_body_desc.Read(self.trace,
669 offset + self.header_size)
670 if not self.callchain_supported:
673 offset += self.header_size + ctypes.sizeof(sample)
674 for _ in xrange(sample.nr):
676 self.ip_struct.from_buffer(self.trace, offset).value)
677 offset += self.ip_size
682 self.trace_file.close()
684 def _SampleEventBodyDesc(self, sample_type):
685 assert (sample_type & PERF_SAMPLE_READ) == 0, \
686 "Can't hande read format in samples"
687 fields = [(field, format)
688 for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
689 if (bit & sample_type) != 0]
690 return Descriptor(fields)
693 OBJDUMP_SECTION_HEADER_RE = re.compile(
694 r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
695 OBJDUMP_SYMBOL_LINE_RE = re.compile(
696 r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
697 OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
698 r"^DYNAMIC SYMBOL TABLE")
699 OBJDUMP_SKIP_RE = re.compile(
700 r"^.*ld\.so\.cache$")
701 KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
702 PERF_KERNEL_ALLSYMS_RE = re.compile(
704 KERNEL_ALLSYMS_LINE_RE = re.compile(
705 r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
708 class LibraryRepo(object):
715 def HasDynamicSymbols(self, filename):
716 if filename.endswith(".ko"): return False
717 process = subprocess.Popen(
718 "%s -h %s" % (OBJDUMP_BIN, filename),
719 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
720 pipe = process.stdout
723 match = OBJDUMP_SECTION_HEADER_RE.match(line)
724 if match and match.group(1) == 'dynsym': return True
727 assert process.wait() == 0, "Failed to objdump -h %s" % filename
731 def Load(self, mmap_info, code_map, options):
732 # Skip kernel mmaps when requested using the fact that their tid
734 if mmap_info.tid == 0 and not options.kernel:
736 if OBJDUMP_SKIP_RE.match(mmap_info.filename):
738 if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
739 return self._LoadKernelSymbols(code_map)
740 self.infos.append(mmap_info)
742 mmap_info.unique_name = self._UniqueMmapName(mmap_info)
743 if not os.path.exists(mmap_info.filename):
745 # Request section headers (-h), symbols (-t), and dynamic symbols
747 # Unfortunately, section headers span two lines, so we have to
748 # keep the just seen section name (from the first line in each
749 # section header) in the after_section variable.
750 if self.HasDynamicSymbols(mmap_info.filename):
751 dynamic_symbols = "-T"
754 process = subprocess.Popen(
755 "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
756 shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
757 pipe = process.stdout
759 code_sections = set()
760 reloc_sections = set()
765 if line.find("CODE") != -1:
766 code_sections.add(after_section)
767 if line.find("RELOC") != -1:
768 reloc_sections.add(after_section)
772 match = OBJDUMP_SECTION_HEADER_RE.match(line)
774 after_section = match.group(1)
777 if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
781 match = OBJDUMP_SYMBOL_LINE_RE.match(line)
783 start_address = int(match.group(1), 16)
784 origin_offset = start_address
785 flags = match.group(2)
786 section = match.group(3)
787 if section in code_sections:
788 if dynamic or section in reloc_sections:
789 start_address += mmap_info.addr
790 size = int(match.group(4), 16)
791 name = match.group(5)
792 origin = mmap_info.filename
793 code_map.Add(Code(name, start_address, start_address + size,
794 origin, origin_offset))
797 assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
800 for i, mmap_info in enumerate(self.infos):
801 if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
803 self.infos[0], self.infos[i] = mmap_info, self.infos[0]
807 def _UniqueMmapName(self, mmap_info):
808 name = mmap_info.filename
810 while name in self.names:
811 name = "%s-%d" % (mmap_info.filename, index)
816 def _LoadKernelSymbols(self, code_map):
817 if not os.path.exists(KERNEL_ALLSYMS_FILE):
818 print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
820 kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
822 for line in kallsyms:
823 match = KERNEL_ALLSYMS_LINE_RE.match(line)
825 start_address = int(match.group(1), 16)
826 end_address = start_address
827 name = match.group(2)
829 code.end_address = start_address
830 code_map.Add(code, 16)
831 code = Code(name, start_address, end_address, "kernel", 0)
835 def PrintReport(code_map, library_repo, arch, ticks, options):
836 print "Ticks per symbol:"
837 used_code = [code for code in code_map.UsedCode()]
838 used_code.sort(key=lambda x: x.self_ticks, reverse=True)
839 for i, code in enumerate(used_code):
840 code_ticks = code.self_ticks
841 print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
842 code.FullName(), code.origin)
843 if options.disasm_all or i < options.disasm_top:
844 code.PrintAnnotated(arch, options)
846 print "Ticks per library:"
847 mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
848 mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
849 for mmap_info in mmap_infos:
850 mmap_ticks = mmap_info.ticks
851 print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
852 mmap_info.unique_name)
855 def PrintDot(code_map, options):
857 for code in code_map.UsedCode():
858 if code.self_ticks < 10:
860 print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
861 if code.callee_ticks:
862 for callee, ticks in code.callee_ticks.iteritems():
863 print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
867 if __name__ == "__main__":
868 parser = optparse.OptionParser(USAGE)
869 parser.add_option("--snapshot-log",
870 default="obj/release/snapshot.log",
871 help="V8 snapshot log file name [default: %default]")
872 parser.add_option("--log",
874 help="V8 log file name [default: %default]")
875 parser.add_option("--snapshot",
878 help="process V8 snapshot log [default: %default]")
879 parser.add_option("--trace",
881 help="perf trace file name [default: %default]")
882 parser.add_option("--kernel",
885 help="process kernel entries [default: %default]")
886 parser.add_option("--disasm-top",
889 help=("number of top symbols to disassemble and annotate "
890 "[default: %default]"))
891 parser.add_option("--disasm-all",
894 help=("disassemble and annotate all used symbols "
895 "[default: %default]"))
896 parser.add_option("--dot",
899 help="produce dot output (WIP) [default: %default]")
900 parser.add_option("--quiet", "-q",
903 help="no auxiliary messages [default: %default]")
904 parser.add_option("--gc-fake-mmap",
905 default="/tmp/__v8_gc__",
906 help="gc fake mmap file [default: %default]")
907 parser.add_option("--objdump",
908 default="/usr/bin/objdump",
909 help="objdump tool to use [default: %default]")
910 parser.add_option("--host-root",
912 help="Path to the host root [default: %default]")
913 options, args = parser.parse_args()
915 if not options.quiet:
917 print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log,
921 print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log)
922 print "Perf trace file: %s" % options.trace
924 V8_GC_FAKE_MMAP = options.gc_fake_mmap
925 HOST_ROOT = options.host_root
926 if os.path.exists(options.objdump):
927 disasm.OBJDUMP_BIN = options.objdump
928 OBJDUMP_BIN = options.objdump
930 print "Cannot find %s, falling back to default objdump" % options.objdump
936 really_missed_ticks = 0
939 v8_internal_ticks = 0
943 # Process the snapshot log to fill the snapshot name map.
944 snapshot_name_map = {}
946 snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log)
947 snapshot_name_map = snapshot_log_reader.ReadNameMap()
949 # Initialize the log reader.
951 log_reader = LogReader(log_name=options.log + ".ll",
953 snapshot_pos_to_name=snapshot_name_map)
954 if not options.quiet:
955 print "Generated code architecture: %s" % log_reader.arch
959 # Process the code and trace logs.
960 library_repo = LibraryRepo()
961 log_reader.ReadUpToGC()
962 trace_reader = TraceReader(options.trace)
964 header, offset = trace_reader.ReadEventHeader()
968 if header.type == PERF_RECORD_MMAP:
970 mmap_info = trace_reader.ReadMmap(header, offset)
971 if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
972 log_reader.ReadUpToGC()
974 library_repo.Load(mmap_info, code_map, options)
975 mmap_time += time.time() - start
976 elif header.type == PERF_RECORD_SAMPLE:
979 sample = trace_reader.ReadSample(header, offset)
980 code = code_map.Find(sample.ip)
983 if code.codetype == Code.OPTIMIZED:
985 elif code.codetype == Code.FULL_CODEGEN:
987 elif code.codetype == Code.V8INTERNAL:
988 v8_internal_ticks += 1
991 if not library_repo.Tick(sample.ip) and not code:
992 really_missed_ticks += 1
993 if trace_reader.callchain_supported:
994 for ip in sample.ips:
995 caller_code = code_map.Find(ip)
998 caller_code.CalleeTick(code)
1000 sample_time += time.time() - start
1003 PrintDot(code_map, options)
1005 PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
1007 if not options.quiet:
1008 def PrintTicks(number, total, description):
1009 print("%10d %5.1f%% ticks in %s" %
1010 (number, 100.0*number/total, description))
1013 print "%10d total trace events" % events
1014 print "%10d total ticks" % ticks
1015 print "%10d ticks not in symbols" % missed_ticks
1016 unaccounted = "unaccounted ticks"
1017 if really_missed_ticks > 0:
1018 unaccounted += " (probably in the kernel, try --kernel)"
1019 PrintTicks(really_missed_ticks, ticks, unaccounted)
1020 PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
1021 PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
1022 PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
1023 print "%10d total symbols" % len([c for c in code_map.AllCode()])
1024 print "%10d used symbols" % len([c for c in code_map.UsedCode()])
1025 print "%9.2fs library processing time" % mmap_time
1026 print "%9.2fs tick processing time" % sample_time
1028 log_reader.Dispose()
1029 trace_reader.Dispose()