deps/v8/tools/ll_prof.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2012 the V8 project authors. All rights reserved.
   4 # Redistribution and use in source and binary forms, with or without
   5 # modification, are permitted provided that the following conditions are
   6 # met:
   7 #
   8 #     * Redistributions of source code must retain the above copyright
   9 #       notice, this list of conditions and the following disclaimer.
  10 #     * Redistributions in binary form must reproduce the above
  11 #       copyright notice, this list of conditions and the following
  12 #       disclaimer in the documentation and/or other materials provided
  13 #       with the distribution.
  14 #     * Neither the name of Google Inc. nor the names of its
  15 #       contributors may be used to endorse or promote products derived
  16 #       from this software without specific prior written permission.
  17 #
  18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30 import bisect
  31 import collections
  32 import ctypes
  33 import disasm
  34 import mmap
  35 import optparse
  36 import os
  37 import re
  38 import subprocess
  39 import sys
  40 import time
  41
  42
  43 USAGE="""usage: %prog [OPTION]...
  44
  45 Analyses V8 and perf logs to produce profiles.
  46
  47 Perf logs can be collected using a command like:
  48   $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
  49   # -R: collect all data
  50   # -e cycles: use cpu-cycles event (run "perf list" for details)
  51   # -c 10000: write a sample after each 10000 events
  52   # -f: force output file overwrite
  53   # -i: limit profiling to our process and the kernel
  54   # --ll-prof shell flag enables the right V8 logs
  55 This will produce a binary trace file (perf.data) that %prog can analyse.
  56
  57 IMPORTANT:
  58   The kernel has an internal maximum for events per second, it is 100K by
  59   default. That's not enough for "-c 10000". Set it to some higher value:
  60   $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
  61   You can also make the warning about kernel address maps go away:
  62   $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
  63
  64 We have a convenience script that handles all of the above for you:
  65   $ tools/run-llprof.sh ./d8 bench.js
  66
  67 Examples:
  68   # Print flat profile with annotated disassembly for the 10 top
  69   # symbols. Use default log names and include the snapshot log.
  70   $ %prog --snapshot --disasm-top=10
  71
  72   # Print flat profile with annotated disassembly for all used symbols.
  73   # Use default log names and include kernel symbols into analysis.
  74   $ %prog --disasm-all --kernel
  75
  76   # Print flat profile. Use custom log names.
  77   $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot
  78 """
  79
  80
  81 JS_ORIGIN = "js"
  82 JS_SNAPSHOT_ORIGIN = "js-snapshot"
  83
  84 class Code(object):
  85   """Code object."""
  86
  87   _id = 0
  88   UNKNOWN = 0
  89   V8INTERNAL = 1
  90   FULL_CODEGEN = 2
  91   OPTIMIZED = 3
  92
  93   def __init__(self, name, start_address, end_address, origin, origin_offset):
  94     self.id = Code._id
  95     Code._id += 1
  96     self.name = name
  97     self.other_names = None
  98     self.start_address = start_address
  99     self.end_address = end_address
 100     self.origin = origin
 101     self.origin_offset = origin_offset
 102     self.self_ticks = 0
 103     self.self_ticks_map = None
 104     self.callee_ticks = None
 105     if name.startswith("LazyCompile:*"):
 106       self.codetype = Code.OPTIMIZED
 107     elif name.startswith("LazyCompile:"):
 108       self.codetype = Code.FULL_CODEGEN
 109     elif name.startswith("v8::internal::"):
 110       self.codetype = Code.V8INTERNAL
 111     else:
 112       self.codetype = Code.UNKNOWN
 113
 114   def AddName(self, name):
 115     assert self.name != name
 116     if self.other_names is None:
 117       self.other_names = [name]
 118       return
 119     if not name in self.other_names:
 120       self.other_names.append(name)
 121
 122   def FullName(self):
 123     if self.other_names is None:
 124       return self.name
 125     self.other_names.sort()
 126     return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
 127
 128   def IsUsed(self):
 129     return self.self_ticks > 0 or self.callee_ticks is not None
 130
 131   def Tick(self, pc):
 132     self.self_ticks += 1
 133     if self.self_ticks_map is None:
 134       self.self_ticks_map = collections.defaultdict(lambda: 0)
 135     offset = pc - self.start_address
 136     self.self_ticks_map[offset] += 1
 137
 138   def CalleeTick(self, callee):
 139     if self.callee_ticks is None:
 140       self.callee_ticks = collections.defaultdict(lambda: 0)
 141     self.callee_ticks[callee] += 1
 142
 143   def PrintAnnotated(self, arch, options):
 144     if self.self_ticks_map is None:
 145       ticks_map = []
 146     else:
 147       ticks_map = self.self_ticks_map.items()
 148     # Convert the ticks map to offsets and counts arrays so that later
 149     # we can do binary search in the offsets array.
 150     ticks_map.sort(key=lambda t: t[0])
 151     ticks_offsets = [t[0] for t in ticks_map]
 152     ticks_counts = [t[1] for t in ticks_map]
 153     # Get a list of disassembled lines and their addresses.
 154     lines = self._GetDisasmLines(arch, options)
 155     if len(lines) == 0:
 156       return
 157     # Print annotated lines.
 158     address = lines[0][0]
 159     total_count = 0
 160     for i in xrange(len(lines)):
 161       start_offset = lines[i][0] - address
 162       if i == len(lines) - 1:
 163         end_offset = self.end_address - self.start_address
 164       else:
 165         end_offset = lines[i + 1][0] - address
 166       # Ticks (reported pc values) are not always precise, i.e. not
 167       # necessarily point at instruction starts. So we have to search
 168       # for ticks that touch the current instruction line.
 169       j = bisect.bisect_left(ticks_offsets, end_offset)
 170       count = 0
 171       for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
 172         if offset < start_offset:
 173           break
 174         count += cnt
 175       total_count += count
 176       count = 100.0 * count / self.self_ticks
 177       if count >= 0.01:
 178         print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1])
 179       else:
 180         print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1])
 181     print
 182     assert total_count == self.self_ticks, \
 183         "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
 184
 185   def __str__(self):
 186     return "%s [0x%x, 0x%x) size: %d origin: %s" % (
 187       self.name,
 188       self.start_address,
 189       self.end_address,
 190       self.end_address - self.start_address,
 191       self.origin)
 192
 193   def _GetDisasmLines(self, arch, options):
 194     if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN:
 195       inplace = False
 196       filename = options.log + ".ll"
 197     else:
 198       inplace = True
 199       filename = self.origin
 200     return disasm.GetDisasmLines(filename,
 201                                  self.origin_offset,
 202                                  self.end_address - self.start_address,
 203                                  arch,
 204                                  inplace)
 205
 206
 207 class CodePage(object):
 208   """Group of adjacent code objects."""
 209
 210   SHIFT = 20  # 1M pages
 211   SIZE = (1 << SHIFT)
 212   MASK = ~(SIZE - 1)
 213
 214   @staticmethod
 215   def PageAddress(address):
 216     return address & CodePage.MASK
 217
 218   @staticmethod
 219   def PageId(address):
 220     return address >> CodePage.SHIFT
 221
 222   @staticmethod
 223   def PageAddressFromId(id):
 224     return id << CodePage.SHIFT
 225
 226   def __init__(self, address):
 227     self.address = address
 228     self.code_objects = []
 229
 230   def Add(self, code):
 231     self.code_objects.append(code)
 232
 233   def Remove(self, code):
 234     self.code_objects.remove(code)
 235
 236   def Find(self, pc):
 237     code_objects = self.code_objects
 238     for i, code in enumerate(code_objects):
 239       if code.start_address <= pc < code.end_address:
 240         code_objects[0], code_objects[i] = code, code_objects[0]
 241         return code
 242     return None
 243
 244   def __iter__(self):
 245     return self.code_objects.__iter__()
 246
 247
 248 class CodeMap(object):
 249   """Code object map."""
 250
 251   def __init__(self):
 252     self.pages = {}
 253     self.min_address = 1 << 64
 254     self.max_address = -1
 255
 256   def Add(self, code, max_pages=-1):
 257     page_id = CodePage.PageId(code.start_address)
 258     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
 259     pages = 0
 260     while page_id < limit_id:
 261       if max_pages >= 0 and pages > max_pages:
 262         print >>sys.stderr, \
 263             "Warning: page limit (%d) reached for %s [%s]" % (
 264             max_pages, code.name, code.origin)
 265         break
 266       if page_id in self.pages:
 267         page = self.pages[page_id]
 268       else:
 269         page = CodePage(CodePage.PageAddressFromId(page_id))
 270         self.pages[page_id] = page
 271       page.Add(code)
 272       page_id += 1
 273       pages += 1
 274     self.min_address = min(self.min_address, code.start_address)
 275     self.max_address = max(self.max_address, code.end_address)
 276
 277   def Remove(self, code):
 278     page_id = CodePage.PageId(code.start_address)
 279     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
 280     removed = False
 281     while page_id < limit_id:
 282       if page_id not in self.pages:
 283         page_id += 1
 284         continue
 285       page = self.pages[page_id]
 286       page.Remove(code)
 287       removed = True
 288       page_id += 1
 289     return removed
 290
 291   def AllCode(self):
 292     for page in self.pages.itervalues():
 293       for code in page:
 294         if CodePage.PageAddress(code.start_address) == page.address:
 295           yield code
 296
 297   def UsedCode(self):
 298     for code in self.AllCode():
 299       if code.IsUsed():
 300         yield code
 301
 302   def Print(self):
 303     for code in self.AllCode():
 304       print code
 305
 306   def Find(self, pc):
 307     if pc < self.min_address or pc >= self.max_address:
 308       return None
 309     page_id = CodePage.PageId(pc)
 310     if page_id not in self.pages:
 311       return None
 312     return self.pages[page_id].Find(pc)
 313
 314
 315 class CodeInfo(object):
 316   """Generic info about generated code objects."""
 317
 318   def __init__(self, arch, header_size):
 319     self.arch = arch
 320     self.header_size = header_size
 321
 322
 323 class SnapshotLogReader(object):
 324   """V8 snapshot log reader."""
 325
 326   _SNAPSHOT_CODE_NAME_RE = re.compile(
 327     r"snapshot-code-name,(\d+),\"(.*)\"")
 328
 329   def __init__(self, log_name):
 330     self.log_name = log_name
 331
 332   def ReadNameMap(self):
 333     log = open(self.log_name, "r")
 334     try:
 335       snapshot_pos_to_name = {}
 336       for line in log:
 337         match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line)
 338         if match:
 339           pos = int(match.group(1))
 340           name = match.group(2)
 341           snapshot_pos_to_name[pos] = name
 342     finally:
 343       log.close()
 344     return snapshot_pos_to_name
 345
 346
 347 class LogReader(object):
 348   """V8 low-level (binary) log reader."""
 349
 350   _ARCH_TO_POINTER_TYPE_MAP = {
 351     "ia32": ctypes.c_uint32,
 352     "arm": ctypes.c_uint32,
 353     "mips": ctypes.c_uint32,
 354     "x64": ctypes.c_uint64,
 355     "arm64": ctypes.c_uint64
 356   }
 357
 358   _CODE_CREATE_TAG = "C"
 359   _CODE_MOVE_TAG = "M"
 360   _CODE_DELETE_TAG = "D"
 361   _SNAPSHOT_POSITION_TAG = "P"
 362   _CODE_MOVING_GC_TAG = "G"
 363
 364   def __init__(self, log_name, code_map, snapshot_pos_to_name):
 365     self.log_file = open(log_name, "r")
 366     self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
 367     self.log_pos = 0
 368     self.code_map = code_map
 369     self.snapshot_pos_to_name = snapshot_pos_to_name
 370     self.address_to_snapshot_name = {}
 371
 372     self.arch = self.log[:self.log.find("\0")]
 373     self.log_pos += len(self.arch) + 1
 374     assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
 375         "Unsupported architecture %s" % self.arch
 376     pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
 377
 378     self.code_create_struct = LogReader._DefineStruct([
 379         ("name_size", ctypes.c_int32),
 380         ("code_address", pointer_type),
 381         ("code_size", ctypes.c_int32)])
 382
 383     self.code_move_struct = LogReader._DefineStruct([
 384         ("from_address", pointer_type),
 385         ("to_address", pointer_type)])
 386
 387     self.code_delete_struct = LogReader._DefineStruct([
 388         ("address", pointer_type)])
 389
 390     self.snapshot_position_struct = LogReader._DefineStruct([
 391         ("address", pointer_type),
 392         ("position", ctypes.c_int32)])
 393
 394   def ReadUpToGC(self):
 395     while self.log_pos < self.log.size():
 396       tag = self.log[self.log_pos]
 397       self.log_pos += 1
 398
 399       if tag == LogReader._CODE_MOVING_GC_TAG:
 400         self.address_to_snapshot_name.clear()
 401         return
 402
 403       if tag == LogReader._CODE_CREATE_TAG:
 404         event = self.code_create_struct.from_buffer(self.log, self.log_pos)
 405         self.log_pos += ctypes.sizeof(event)
 406         start_address = event.code_address
 407         end_address = start_address + event.code_size
 408         if start_address in self.address_to_snapshot_name:
 409           name = self.address_to_snapshot_name[start_address]
 410           origin = JS_SNAPSHOT_ORIGIN
 411         else:
 412           name = self.log[self.log_pos:self.log_pos + event.name_size]
 413           origin = JS_ORIGIN
 414         self.log_pos += event.name_size
 415         origin_offset = self.log_pos
 416         self.log_pos += event.code_size
 417         code = Code(name, start_address, end_address, origin, origin_offset)
 418         conficting_code = self.code_map.Find(start_address)
 419         if conficting_code:
 420           if not (conficting_code.start_address == code.start_address and
 421             conficting_code.end_address == code.end_address):
 422             self.code_map.Remove(conficting_code)
 423           else:
 424             LogReader._HandleCodeConflict(conficting_code, code)
 425             # TODO(vitalyr): this warning is too noisy because of our
 426             # attempts to reconstruct code log from the snapshot.
 427             # print >>sys.stderr, \
 428             #     "Warning: Skipping duplicate code log entry %s" % code
 429             continue
 430         self.code_map.Add(code)
 431         continue
 432
 433       if tag == LogReader._CODE_MOVE_TAG:
 434         event = self.code_move_struct.from_buffer(self.log, self.log_pos)
 435         self.log_pos += ctypes.sizeof(event)
 436         old_start_address = event.from_address
 437         new_start_address = event.to_address
 438         if old_start_address == new_start_address:
 439           # Skip useless code move entries.
 440           continue
 441         code = self.code_map.Find(old_start_address)
 442         if not code:
 443           print >>sys.stderr, "Warning: Not found %x" % old_start_address
 444           continue
 445         assert code.start_address == old_start_address, \
 446             "Inexact move address %x for %s" % (old_start_address, code)
 447         self.code_map.Remove(code)
 448         size = code.end_address - code.start_address
 449         code.start_address = new_start_address
 450         code.end_address = new_start_address + size
 451         self.code_map.Add(code)
 452         continue
 453
 454       if tag == LogReader._CODE_DELETE_TAG:
 455         event = self.code_delete_struct.from_buffer(self.log, self.log_pos)
 456         self.log_pos += ctypes.sizeof(event)
 457         old_start_address = event.address
 458         code = self.code_map.Find(old_start_address)
 459         if not code:
 460           print >>sys.stderr, "Warning: Not found %x" % old_start_address
 461           continue
 462         assert code.start_address == old_start_address, \
 463             "Inexact delete address %x for %s" % (old_start_address, code)
 464         self.code_map.Remove(code)
 465         continue
 466
 467       if tag == LogReader._SNAPSHOT_POSITION_TAG:
 468         event = self.snapshot_position_struct.from_buffer(self.log,
 469                                                           self.log_pos)
 470         self.log_pos += ctypes.sizeof(event)
 471         start_address = event.address
 472         snapshot_pos = event.position
 473         if snapshot_pos in self.snapshot_pos_to_name:
 474           self.address_to_snapshot_name[start_address] = \
 475               self.snapshot_pos_to_name[snapshot_pos]
 476         continue
 477
 478       assert False, "Unknown tag %s" % tag
 479
 480   def Dispose(self):
 481     self.log.close()
 482     self.log_file.close()
 483
 484   @staticmethod
 485   def _DefineStruct(fields):
 486     class Struct(ctypes.Structure):
 487       _fields_ = fields
 488     return Struct
 489
 490   @staticmethod
 491   def _HandleCodeConflict(old_code, new_code):
 492     assert (old_code.start_address == new_code.start_address and
 493             old_code.end_address == new_code.end_address), \
 494         "Conficting code log entries %s and %s" % (old_code, new_code)
 495     if old_code.name == new_code.name:
 496       return
 497     # Code object may be shared by a few functions. Collect the full
 498     # set of names.
 499     old_code.AddName(new_code.name)
 500
 501
 502 class Descriptor(object):
 503   """Descriptor of a structure in the binary trace log."""
 504
 505   CTYPE_MAP = {
 506     "u16": ctypes.c_uint16,
 507     "u32": ctypes.c_uint32,
 508     "u64": ctypes.c_uint64
 509   }
 510
 511   def __init__(self, fields):
 512     class TraceItem(ctypes.Structure):
 513       _fields_ = Descriptor.CtypesFields(fields)
 514
 515       def __str__(self):
 516         return ", ".join("%s: %s" % (field, self.__getattribute__(field))
 517                          for field, _ in TraceItem._fields_)
 518
 519     self.ctype = TraceItem
 520
 521   def Read(self, trace, offset):
 522     return self.ctype.from_buffer(trace, offset)
 523
 524   @staticmethod
 525   def CtypesFields(fields):
 526     return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
 527
 528
 529 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
 530 # for the gory details.
 531
 532
 533 # Reference: struct perf_file_header in kernel/tools/perf/util/header.h
 534 TRACE_HEADER_DESC = Descriptor([
 535   ("magic", "u64"),
 536   ("size", "u64"),
 537   ("attr_size", "u64"),
 538   ("attrs_offset", "u64"),
 539   ("attrs_size", "u64"),
 540   ("data_offset", "u64"),
 541   ("data_size", "u64"),
 542   ("event_types_offset", "u64"),
 543   ("event_types_size", "u64")
 544 ])
 545
 546
 547 # Reference: /usr/include/linux/perf_event.h
 548 PERF_EVENT_ATTR_DESC = Descriptor([
 549   ("type", "u32"),
 550   ("size", "u32"),
 551   ("config", "u64"),
 552   ("sample_period_or_freq", "u64"),
 553   ("sample_type", "u64"),
 554   ("read_format", "u64"),
 555   ("flags", "u64"),
 556   ("wakeup_events_or_watermark", "u32"),
 557   ("bp_type", "u32"),
 558   ("bp_addr", "u64"),
 559   ("bp_len", "u64")
 560 ])
 561
 562
 563 # Reference: /usr/include/linux/perf_event.h
 564 PERF_EVENT_HEADER_DESC = Descriptor([
 565   ("type", "u32"),
 566   ("misc", "u16"),
 567   ("size", "u16")
 568 ])
 569
 570
 571 # Reference: kernel/events/core.c
 572 PERF_MMAP_EVENT_BODY_DESC = Descriptor([
 573   ("pid", "u32"),
 574   ("tid", "u32"),
 575   ("addr", "u64"),
 576   ("len", "u64"),
 577   ("pgoff", "u64")
 578 ])
 579
 580
 581 # perf_event_attr.sample_type bits control the set of
 582 # perf_sample_event fields.
 583 PERF_SAMPLE_IP = 1 << 0
 584 PERF_SAMPLE_TID = 1 << 1
 585 PERF_SAMPLE_TIME = 1 << 2
 586 PERF_SAMPLE_ADDR = 1 << 3
 587 PERF_SAMPLE_READ = 1 << 4
 588 PERF_SAMPLE_CALLCHAIN = 1 << 5
 589 PERF_SAMPLE_ID = 1 << 6
 590 PERF_SAMPLE_CPU = 1 << 7
 591 PERF_SAMPLE_PERIOD = 1 << 8
 592 PERF_SAMPLE_STREAM_ID = 1 << 9
 593 PERF_SAMPLE_RAW = 1 << 10
 594
 595
 596 # Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
 597 PERF_SAMPLE_EVENT_BODY_FIELDS = [
 598   ("ip", "u64", PERF_SAMPLE_IP),
 599   ("pid", "u32", PERF_SAMPLE_TID),
 600   ("tid", "u32", PERF_SAMPLE_TID),
 601   ("time", "u64", PERF_SAMPLE_TIME),
 602   ("addr", "u64", PERF_SAMPLE_ADDR),
 603   ("id", "u64", PERF_SAMPLE_ID),
 604   ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
 605   ("cpu", "u32", PERF_SAMPLE_CPU),
 606   ("res", "u32", PERF_SAMPLE_CPU),
 607   ("period", "u64", PERF_SAMPLE_PERIOD),
 608   # Don't want to handle read format that comes after the period and
 609   # before the callchain and has variable size.
 610   ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
 611   # Raw data follows the callchain and is ignored.
 612 ]
 613
 614
 615 PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
 616
 617
 618 PERF_RECORD_MMAP = 1
 619 PERF_RECORD_SAMPLE = 9
 620
 621
 622 class TraceReader(object):
 623   """Perf (linux-2.6/tools/perf) trace file reader."""
 624
 625   _TRACE_HEADER_MAGIC = 4993446653023372624
 626
 627   def __init__(self, trace_name):
 628     self.trace_file = open(trace_name, "r")
 629     self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
 630     self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
 631     if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
 632       print >>sys.stderr, "Warning: unsupported trace header magic"
 633     self.offset = self.trace_header.data_offset
 634     self.limit = self.trace_header.data_offset + self.trace_header.data_size
 635     assert self.limit <= self.trace.size(), \
 636         "Trace data limit exceeds trace file size"
 637     self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
 638     assert self.trace_header.attrs_size != 0, \
 639         "No perf event attributes found in the trace"
 640     perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
 641                                                 self.trace_header.attrs_offset)
 642     self.sample_event_body_desc = self._SampleEventBodyDesc(
 643         perf_event_attr.sample_type)
 644     self.callchain_supported = \
 645         (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
 646     if self.callchain_supported:
 647       self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
 648       self.ip_size = ctypes.sizeof(self.ip_struct)
 649
 650   def ReadEventHeader(self):
 651     if self.offset >= self.limit:
 652       return None, 0
 653     offset = self.offset
 654     header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
 655     self.offset += header.size
 656     return header, offset
 657
 658   def ReadMmap(self, header, offset):
 659     mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
 660                                                offset + self.header_size)
 661     # Read null-terminated filename.
 662     filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
 663                           offset + header.size]
 664     mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
 665     return mmap_info
 666
 667   def ReadSample(self, header, offset):
 668     sample = self.sample_event_body_desc.Read(self.trace,
 669                                               offset + self.header_size)
 670     if not self.callchain_supported:
 671       return sample
 672     sample.ips = []
 673     offset += self.header_size + ctypes.sizeof(sample)
 674     for _ in xrange(sample.nr):
 675       sample.ips.append(
 676         self.ip_struct.from_buffer(self.trace, offset).value)
 677       offset += self.ip_size
 678     return sample
 679
 680   def Dispose(self):
 681     self.trace.close()
 682     self.trace_file.close()
 683
 684   def _SampleEventBodyDesc(self, sample_type):
 685     assert (sample_type & PERF_SAMPLE_READ) == 0, \
 686            "Can't hande read format in samples"
 687     fields = [(field, format)
 688               for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
 689               if (bit & sample_type) != 0]
 690     return Descriptor(fields)
 691
 692
 693 OBJDUMP_SECTION_HEADER_RE = re.compile(
 694   r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
 695 OBJDUMP_SYMBOL_LINE_RE = re.compile(
 696   r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
 697 OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
 698   r"^DYNAMIC SYMBOL TABLE")
 699 OBJDUMP_SKIP_RE = re.compile(
 700   r"^.*ld\.so\.cache$")
 701 KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
 702 PERF_KERNEL_ALLSYMS_RE = re.compile(
 703   r".*kallsyms.*")
 704 KERNEL_ALLSYMS_LINE_RE = re.compile(
 705   r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
 706
 707
 708 class LibraryRepo(object):
 709   def __init__(self):
 710     self.infos = []
 711     self.names = set()
 712     self.ticks = {}
 713
 714   def Load(self, mmap_info, code_map, options):
 715     # Skip kernel mmaps when requested using the fact that their tid
 716     # is 0.
 717     if mmap_info.tid == 0 and not options.kernel:
 718       return True
 719     if OBJDUMP_SKIP_RE.match(mmap_info.filename):
 720       return True
 721     if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
 722       return self._LoadKernelSymbols(code_map)
 723     self.infos.append(mmap_info)
 724     mmap_info.ticks = 0
 725     mmap_info.unique_name = self._UniqueMmapName(mmap_info)
 726     if not os.path.exists(mmap_info.filename):
 727       return True
 728     # Request section headers (-h), symbols (-t), and dynamic symbols
 729     # (-T) from objdump.
 730     # Unfortunately, section headers span two lines, so we have to
 731     # keep the just seen section name (from the first line in each
 732     # section header) in the after_section variable.
 733     if mmap_info.filename.endswith(".ko"):
 734       dynamic_symbols = ""
 735     else:
 736       dynamic_symbols = "-T"
 737     process = subprocess.Popen(
 738       "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
 739       shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 740     pipe = process.stdout
 741     after_section = None
 742     code_sections = set()
 743     reloc_sections = set()
 744     dynamic = False
 745     try:
 746       for line in pipe:
 747         if after_section:
 748           if line.find("CODE") != -1:
 749             code_sections.add(after_section)
 750           if line.find("RELOC") != -1:
 751             reloc_sections.add(after_section)
 752           after_section = None
 753           continue
 754
 755         match = OBJDUMP_SECTION_HEADER_RE.match(line)
 756         if match:
 757           after_section = match.group(1)
 758           continue
 759
 760         if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
 761           dynamic = True
 762           continue
 763
 764         match = OBJDUMP_SYMBOL_LINE_RE.match(line)
 765         if match:
 766           start_address = int(match.group(1), 16)
 767           origin_offset = start_address
 768           flags = match.group(2)
 769           section = match.group(3)
 770           if section in code_sections:
 771             if dynamic or section in reloc_sections:
 772               start_address += mmap_info.addr
 773             size = int(match.group(4), 16)
 774             name = match.group(5)
 775             origin = mmap_info.filename
 776             code_map.Add(Code(name, start_address, start_address + size,
 777                               origin, origin_offset))
 778     finally:
 779       pipe.close()
 780     assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
 781
 782   def Tick(self, pc):
 783     for i, mmap_info in enumerate(self.infos):
 784       if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
 785         mmap_info.ticks += 1
 786         self.infos[0], self.infos[i] = mmap_info, self.infos[0]
 787         return True
 788     return False
 789
 790   def _UniqueMmapName(self, mmap_info):
 791     name = mmap_info.filename
 792     index = 1
 793     while name in self.names:
 794       name = "%s-%d" % (mmap_info.filename, index)
 795       index += 1
 796     self.names.add(name)
 797     return name
 798
 799   def _LoadKernelSymbols(self, code_map):
 800     if not os.path.exists(KERNEL_ALLSYMS_FILE):
 801       print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
 802       return False
 803     kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
 804     code = None
 805     for line in kallsyms:
 806       match = KERNEL_ALLSYMS_LINE_RE.match(line)
 807       if match:
 808         start_address = int(match.group(1), 16)
 809         end_address = start_address
 810         name = match.group(2)
 811         if code:
 812           code.end_address = start_address
 813           code_map.Add(code, 16)
 814         code = Code(name, start_address, end_address, "kernel", 0)
 815     return True
 816
 817
 818 def PrintReport(code_map, library_repo, arch, ticks, options):
 819   print "Ticks per symbol:"
 820   used_code = [code for code in code_map.UsedCode()]
 821   used_code.sort(key=lambda x: x.self_ticks, reverse=True)
 822   for i, code in enumerate(used_code):
 823     code_ticks = code.self_ticks
 824     print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
 825                                     code.FullName(), code.origin)
 826     if options.disasm_all or i < options.disasm_top:
 827       code.PrintAnnotated(arch, options)
 828   print
 829   print "Ticks per library:"
 830   mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
 831   mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
 832   for mmap_info in mmap_infos:
 833     mmap_ticks = mmap_info.ticks
 834     print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
 835                                mmap_info.unique_name)
 836
 837
 838 def PrintDot(code_map, options):
 839   print "digraph G {"
 840   for code in code_map.UsedCode():
 841     if code.self_ticks < 10:
 842       continue
 843     print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
 844     if code.callee_ticks:
 845       for callee, ticks in code.callee_ticks.iteritems():
 846         print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
 847   print "}"
 848
 849
 850 if __name__ == "__main__":
 851   parser = optparse.OptionParser(USAGE)
 852   parser.add_option("--snapshot-log",
 853                     default="obj/release/snapshot.log",
 854                     help="V8 snapshot log file name [default: %default]")
 855   parser.add_option("--log",
 856                     default="v8.log",
 857                     help="V8 log file name [default: %default]")
 858   parser.add_option("--snapshot",
 859                     default=False,
 860                     action="store_true",
 861                     help="process V8 snapshot log [default: %default]")
 862   parser.add_option("--trace",
 863                     default="perf.data",
 864                     help="perf trace file name [default: %default]")
 865   parser.add_option("--kernel",
 866                     default=False,
 867                     action="store_true",
 868                     help="process kernel entries [default: %default]")
 869   parser.add_option("--disasm-top",
 870                     default=0,
 871                     type="int",
 872                     help=("number of top symbols to disassemble and annotate "
 873                           "[default: %default]"))
 874   parser.add_option("--disasm-all",
 875                     default=False,
 876                     action="store_true",
 877                     help=("disassemble and annotate all used symbols "
 878                           "[default: %default]"))
 879   parser.add_option("--dot",
 880                     default=False,
 881                     action="store_true",
 882                     help="produce dot output (WIP) [default: %default]")
 883   parser.add_option("--quiet", "-q",
 884                     default=False,
 885                     action="store_true",
 886                     help="no auxiliary messages [default: %default]")
 887   parser.add_option("--gc-fake-mmap",
 888                     default="/tmp/__v8_gc__",
 889                     help="gc fake mmap file [default: %default]")
 890   parser.add_option("--objdump",
 891                     default="/usr/bin/objdump",
 892                     help="objdump tool to use [default: %default]")
 893   parser.add_option("--host-root",
 894                     default="",
 895                     help="Path to the host root [default: %default]")
 896   options, args = parser.parse_args()
 897
 898   if not options.quiet:
 899     if options.snapshot:
 900       print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log,
 901                                         options.log,
 902                                         options.log)
 903     else:
 904       print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log)
 905     print "Perf trace file: %s" % options.trace
 906
 907   V8_GC_FAKE_MMAP = options.gc_fake_mmap
 908   HOST_ROOT = options.host_root
 909   if os.path.exists(options.objdump):
 910     disasm.OBJDUMP_BIN = options.objdump
 911     OBJDUMP_BIN = options.objdump
 912   else:
 913     print "Cannot find %s, falling back to default objdump" % options.objdump
 914
 915   # Stats.
 916   events = 0
 917   ticks = 0
 918   missed_ticks = 0
 919   really_missed_ticks = 0
 920   optimized_ticks = 0
 921   generated_ticks = 0
 922   v8_internal_ticks = 0
 923   mmap_time = 0
 924   sample_time = 0
 925
 926   # Process the snapshot log to fill the snapshot name map.
 927   snapshot_name_map = {}
 928   if options.snapshot:
 929     snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log)
 930     snapshot_name_map = snapshot_log_reader.ReadNameMap()
 931
 932   # Initialize the log reader.
 933   code_map = CodeMap()
 934   log_reader = LogReader(log_name=options.log + ".ll",
 935                          code_map=code_map,
 936                          snapshot_pos_to_name=snapshot_name_map)
 937   if not options.quiet:
 938     print "Generated code architecture: %s" % log_reader.arch
 939     print
 940     sys.stdout.flush()
 941
 942   # Process the code and trace logs.
 943   library_repo = LibraryRepo()
 944   log_reader.ReadUpToGC()
 945   trace_reader = TraceReader(options.trace)
 946   while True:
 947     header, offset = trace_reader.ReadEventHeader()
 948     if not header:
 949       break
 950     events += 1
 951     if header.type == PERF_RECORD_MMAP:
 952       start = time.time()
 953       mmap_info = trace_reader.ReadMmap(header, offset)
 954       if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
 955         log_reader.ReadUpToGC()
 956       else:
 957         library_repo.Load(mmap_info, code_map, options)
 958       mmap_time += time.time() - start
 959     elif header.type == PERF_RECORD_SAMPLE:
 960       ticks += 1
 961       start = time.time()
 962       sample = trace_reader.ReadSample(header, offset)
 963       code = code_map.Find(sample.ip)
 964       if code:
 965         code.Tick(sample.ip)
 966         if code.codetype == Code.OPTIMIZED:
 967           optimized_ticks += 1
 968         elif code.codetype == Code.FULL_CODEGEN:
 969           generated_ticks += 1
 970         elif code.codetype == Code.V8INTERNAL:
 971           v8_internal_ticks += 1
 972       else:
 973         missed_ticks += 1
 974       if not library_repo.Tick(sample.ip) and not code:
 975         really_missed_ticks += 1
 976       if trace_reader.callchain_supported:
 977         for ip in sample.ips:
 978           caller_code = code_map.Find(ip)
 979           if caller_code:
 980             if code:
 981               caller_code.CalleeTick(code)
 982             code = caller_code
 983       sample_time += time.time() - start
 984
 985   if options.dot:
 986     PrintDot(code_map, options)
 987   else:
 988     PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
 989
 990     if not options.quiet:
 991       def PrintTicks(number, total, description):
 992         print("%10d %5.1f%% ticks in %s" %
 993               (number, 100.0*number/total, description))
 994       print
 995       print "Stats:"
 996       print "%10d total trace events" % events
 997       print "%10d total ticks" % ticks
 998       print "%10d ticks not in symbols" % missed_ticks
 999       unaccounted = "unaccounted ticks"
1000       if really_missed_ticks > 0:
1001         unaccounted += " (probably in the kernel, try --kernel)"
1002       PrintTicks(really_missed_ticks, ticks, unaccounted)
1003       PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
1004       PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
1005       PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
1006       print "%10d total symbols" % len([c for c in code_map.AllCode()])
1007       print "%10d used symbols" % len([c for c in code_map.UsedCode()])
1008       print "%9.2fs library processing time" % mmap_time
1009       print "%9.2fs tick processing time" % sample_time
1010
1011   log_reader.Dispose()
1012   trace_reader.Dispose()