src/v8/tools/ll_prof.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2012 the V8 project authors. All rights reserved.
   4 # Redistribution and use in source and binary forms, with or without
   5 # modification, are permitted provided that the following conditions are
   6 # met:
   7 #
   8 #     * Redistributions of source code must retain the above copyright
   9 #       notice, this list of conditions and the following disclaimer.
  10 #     * Redistributions in binary form must reproduce the above
  11 #       copyright notice, this list of conditions and the following
  12 #       disclaimer in the documentation and/or other materials provided
  13 #       with the distribution.
  14 #     * Neither the name of Google Inc. nor the names of its
  15 #       contributors may be used to endorse or promote products derived
  16 #       from this software without specific prior written permission.
  17 #
  18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30 import bisect
  31 import collections
  32 import ctypes
  33 import disasm
  34 import mmap
  35 import optparse
  36 import os
  37 import re
  38 import subprocess
  39 import sys
  40 import time
  41
  42
  43 USAGE="""usage: %prog [OPTION]...
  44
  45 Analyses V8 and perf logs to produce profiles.
  46
  47 Perf logs can be collected using a command like:
  48   $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
  49   # -R: collect all data
  50   # -e cycles: use cpu-cycles event (run "perf list" for details)
  51   # -c 10000: write a sample after each 10000 events
  52   # -f: force output file overwrite
  53   # -i: limit profiling to our process and the kernel
  54   # --ll-prof shell flag enables the right V8 logs
  55 This will produce a binary trace file (perf.data) that %prog can analyse.
  56
  57 IMPORTANT:
  58   The kernel has an internal maximum for events per second, it is 100K by
  59   default. That's not enough for "-c 10000". Set it to some higher value:
  60   $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
  61   You can also make the warning about kernel address maps go away:
  62   $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
  63
  64 We have a convenience script that handles all of the above for you:
  65   $ tools/run-llprof.sh ./d8 bench.js
  66
  67 Examples:
  68   # Print flat profile with annotated disassembly for the 10 top
  69   # symbols. Use default log names and include the snapshot log.
  70   $ %prog --snapshot --disasm-top=10
  71
  72   # Print flat profile with annotated disassembly for all used symbols.
  73   # Use default log names and include kernel symbols into analysis.
  74   $ %prog --disasm-all --kernel
  75
  76   # Print flat profile. Use custom log names.
  77   $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot
  78 """
  79
  80
  81 JS_ORIGIN = "js"
  82 JS_SNAPSHOT_ORIGIN = "js-snapshot"
  83
  84 class Code(object):
  85   """Code object."""
  86
  87   _id = 0
  88   UNKNOWN = 0
  89   V8INTERNAL = 1
  90   FULL_CODEGEN = 2
  91   OPTIMIZED = 3
  92
  93   def __init__(self, name, start_address, end_address, origin, origin_offset):
  94     self.id = Code._id
  95     Code._id += 1
  96     self.name = name
  97     self.other_names = None
  98     self.start_address = start_address
  99     self.end_address = end_address
 100     self.origin = origin
 101     self.origin_offset = origin_offset
 102     self.self_ticks = 0
 103     self.self_ticks_map = None
 104     self.callee_ticks = None
 105     if name.startswith("LazyCompile:*"):
 106       self.codetype = Code.OPTIMIZED
 107     elif name.startswith("LazyCompile:"):
 108       self.codetype = Code.FULL_CODEGEN
 109     elif name.startswith("v8::internal::"):
 110       self.codetype = Code.V8INTERNAL
 111     else:
 112       self.codetype = Code.UNKNOWN
 113
 114   def AddName(self, name):
 115     assert self.name != name
 116     if self.other_names is None:
 117       self.other_names = [name]
 118       return
 119     if not name in self.other_names:
 120       self.other_names.append(name)
 121
 122   def FullName(self):
 123     if self.other_names is None:
 124       return self.name
 125     self.other_names.sort()
 126     return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
 127
 128   def IsUsed(self):
 129     return self.self_ticks > 0 or self.callee_ticks is not None
 130
 131   def Tick(self, pc):
 132     self.self_ticks += 1
 133     if self.self_ticks_map is None:
 134       self.self_ticks_map = collections.defaultdict(lambda: 0)
 135     offset = pc - self.start_address
 136     self.self_ticks_map[offset] += 1
 137
 138   def CalleeTick(self, callee):
 139     if self.callee_ticks is None:
 140       self.callee_ticks = collections.defaultdict(lambda: 0)
 141     self.callee_ticks[callee] += 1
 142
 143   def PrintAnnotated(self, arch, options):
 144     if self.self_ticks_map is None:
 145       ticks_map = []
 146     else:
 147       ticks_map = self.self_ticks_map.items()
 148     # Convert the ticks map to offsets and counts arrays so that later
 149     # we can do binary search in the offsets array.
 150     ticks_map.sort(key=lambda t: t[0])
 151     ticks_offsets = [t[0] for t in ticks_map]
 152     ticks_counts = [t[1] for t in ticks_map]
 153     # Get a list of disassembled lines and their addresses.
 154     lines = self._GetDisasmLines(arch, options)
 155     if len(lines) == 0:
 156       return
 157     # Print annotated lines.
 158     address = lines[0][0]
 159     total_count = 0
 160     for i in xrange(len(lines)):
 161       start_offset = lines[i][0] - address
 162       if i == len(lines) - 1:
 163         end_offset = self.end_address - self.start_address
 164       else:
 165         end_offset = lines[i + 1][0] - address
 166       # Ticks (reported pc values) are not always precise, i.e. not
 167       # necessarily point at instruction starts. So we have to search
 168       # for ticks that touch the current instruction line.
 169       j = bisect.bisect_left(ticks_offsets, end_offset)
 170       count = 0
 171       for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
 172         if offset < start_offset:
 173           break
 174         count += cnt
 175       total_count += count
 176       count = 100.0 * count / self.self_ticks
 177       if count >= 0.01:
 178         print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1])
 179       else:
 180         print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1])
 181     print
 182     assert total_count == self.self_ticks, \
 183         "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
 184
 185   def __str__(self):
 186     return "%s [0x%x, 0x%x) size: %d origin: %s" % (
 187       self.name,
 188       self.start_address,
 189       self.end_address,
 190       self.end_address - self.start_address,
 191       self.origin)
 192
 193   def _GetDisasmLines(self, arch, options):
 194     if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN:
 195       inplace = False
 196       filename = options.log + ".ll"
 197     else:
 198       inplace = True
 199       filename = self.origin
 200     return disasm.GetDisasmLines(filename,
 201                                  self.origin_offset,
 202                                  self.end_address - self.start_address,
 203                                  arch,
 204                                  inplace)
 205
 206
 207 class CodePage(object):
 208   """Group of adjacent code objects."""
 209
 210   SHIFT = 20  # 1M pages
 211   SIZE = (1 << SHIFT)
 212   MASK = ~(SIZE - 1)
 213
 214   @staticmethod
 215   def PageAddress(address):
 216     return address & CodePage.MASK
 217
 218   @staticmethod
 219   def PageId(address):
 220     return address >> CodePage.SHIFT
 221
 222   @staticmethod
 223   def PageAddressFromId(id):
 224     return id << CodePage.SHIFT
 225
 226   def __init__(self, address):
 227     self.address = address
 228     self.code_objects = []
 229
 230   def Add(self, code):
 231     self.code_objects.append(code)
 232
 233   def Remove(self, code):
 234     self.code_objects.remove(code)
 235
 236   def Find(self, pc):
 237     code_objects = self.code_objects
 238     for i, code in enumerate(code_objects):
 239       if code.start_address <= pc < code.end_address:
 240         code_objects[0], code_objects[i] = code, code_objects[0]
 241         return code
 242     return None
 243
 244   def __iter__(self):
 245     return self.code_objects.__iter__()
 246
 247
 248 class CodeMap(object):
 249   """Code object map."""
 250
 251   def __init__(self):
 252     self.pages = {}
 253     self.min_address = 1 << 64
 254     self.max_address = -1
 255
 256   def Add(self, code, max_pages=-1):
 257     page_id = CodePage.PageId(code.start_address)
 258     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
 259     pages = 0
 260     while page_id < limit_id:
 261       if max_pages >= 0 and pages > max_pages:
 262         print >>sys.stderr, \
 263             "Warning: page limit (%d) reached for %s [%s]" % (
 264             max_pages, code.name, code.origin)
 265         break
 266       if page_id in self.pages:
 267         page = self.pages[page_id]
 268       else:
 269         page = CodePage(CodePage.PageAddressFromId(page_id))
 270         self.pages[page_id] = page
 271       page.Add(code)
 272       page_id += 1
 273       pages += 1
 274     self.min_address = min(self.min_address, code.start_address)
 275     self.max_address = max(self.max_address, code.end_address)
 276
 277   def Remove(self, code):
 278     page_id = CodePage.PageId(code.start_address)
 279     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
 280     removed = False
 281     while page_id < limit_id:
 282       if page_id not in self.pages:
 283         page_id += 1
 284         continue
 285       page = self.pages[page_id]
 286       page.Remove(code)
 287       removed = True
 288       page_id += 1
 289     return removed
 290
 291   def AllCode(self):
 292     for page in self.pages.itervalues():
 293       for code in page:
 294         if CodePage.PageAddress(code.start_address) == page.address:
 295           yield code
 296
 297   def UsedCode(self):
 298     for code in self.AllCode():
 299       if code.IsUsed():
 300         yield code
 301
 302   def Print(self):
 303     for code in self.AllCode():
 304       print code
 305
 306   def Find(self, pc):
 307     if pc < self.min_address or pc >= self.max_address:
 308       return None
 309     page_id = CodePage.PageId(pc)
 310     if page_id not in self.pages:
 311       return None
 312     return self.pages[page_id].Find(pc)
 313
 314
 315 class CodeInfo(object):
 316   """Generic info about generated code objects."""
 317
 318   def __init__(self, arch, header_size):
 319     self.arch = arch
 320     self.header_size = header_size
 321
 322
 323 class SnapshotLogReader(object):
 324   """V8 snapshot log reader."""
 325
 326   _SNAPSHOT_CODE_NAME_RE = re.compile(
 327     r"snapshot-code-name,(\d+),\"(.*)\"")
 328
 329   def __init__(self, log_name):
 330     self.log_name = log_name
 331
 332   def ReadNameMap(self):
 333     log = open(self.log_name, "r")
 334     try:
 335       snapshot_pos_to_name = {}
 336       for line in log:
 337         match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line)
 338         if match:
 339           pos = int(match.group(1))
 340           name = match.group(2)
 341           snapshot_pos_to_name[pos] = name
 342     finally:
 343       log.close()
 344     return snapshot_pos_to_name
 345
 346
 347 class LogReader(object):
 348   """V8 low-level (binary) log reader."""
 349
 350   _ARCH_TO_POINTER_TYPE_MAP = {
 351     "ia32": ctypes.c_uint32,
 352     "arm": ctypes.c_uint32,
 353     "mips": ctypes.c_uint32,
 354     "x64": ctypes.c_uint64
 355   }
 356
 357   _CODE_CREATE_TAG = "C"
 358   _CODE_MOVE_TAG = "M"
 359   _CODE_DELETE_TAG = "D"
 360   _SNAPSHOT_POSITION_TAG = "P"
 361   _CODE_MOVING_GC_TAG = "G"
 362
 363   def __init__(self, log_name, code_map, snapshot_pos_to_name):
 364     self.log_file = open(log_name, "r")
 365     self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
 366     self.log_pos = 0
 367     self.code_map = code_map
 368     self.snapshot_pos_to_name = snapshot_pos_to_name
 369     self.address_to_snapshot_name = {}
 370
 371     self.arch = self.log[:self.log.find("\0")]
 372     self.log_pos += len(self.arch) + 1
 373     assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
 374         "Unsupported architecture %s" % self.arch
 375     pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
 376
 377     self.code_create_struct = LogReader._DefineStruct([
 378         ("name_size", ctypes.c_int32),
 379         ("code_address", pointer_type),
 380         ("code_size", ctypes.c_int32)])
 381
 382     self.code_move_struct = LogReader._DefineStruct([
 383         ("from_address", pointer_type),
 384         ("to_address", pointer_type)])
 385
 386     self.code_delete_struct = LogReader._DefineStruct([
 387         ("address", pointer_type)])
 388
 389     self.snapshot_position_struct = LogReader._DefineStruct([
 390         ("address", pointer_type),
 391         ("position", ctypes.c_int32)])
 392
 393   def ReadUpToGC(self):
 394     while self.log_pos < self.log.size():
 395       tag = self.log[self.log_pos]
 396       self.log_pos += 1
 397
 398       if tag == LogReader._CODE_MOVING_GC_TAG:
 399         self.address_to_snapshot_name.clear()
 400         return
 401
 402       if tag == LogReader._CODE_CREATE_TAG:
 403         event = self.code_create_struct.from_buffer(self.log, self.log_pos)
 404         self.log_pos += ctypes.sizeof(event)
 405         start_address = event.code_address
 406         end_address = start_address + event.code_size
 407         if start_address in self.address_to_snapshot_name:
 408           name = self.address_to_snapshot_name[start_address]
 409           origin = JS_SNAPSHOT_ORIGIN
 410         else:
 411           name = self.log[self.log_pos:self.log_pos + event.name_size]
 412           origin = JS_ORIGIN
 413         self.log_pos += event.name_size
 414         origin_offset = self.log_pos
 415         self.log_pos += event.code_size
 416         code = Code(name, start_address, end_address, origin, origin_offset)
 417         conficting_code = self.code_map.Find(start_address)
 418         if conficting_code:
 419           if not (conficting_code.start_address == code.start_address and
 420             conficting_code.end_address == code.end_address):
 421             self.code_map.Remove(conficting_code)
 422           else:
 423             LogReader._HandleCodeConflict(conficting_code, code)
 424             # TODO(vitalyr): this warning is too noisy because of our
 425             # attempts to reconstruct code log from the snapshot.
 426             # print >>sys.stderr, \
 427             #     "Warning: Skipping duplicate code log entry %s" % code
 428             continue
 429         self.code_map.Add(code)
 430         continue
 431
 432       if tag == LogReader._CODE_MOVE_TAG:
 433         event = self.code_move_struct.from_buffer(self.log, self.log_pos)
 434         self.log_pos += ctypes.sizeof(event)
 435         old_start_address = event.from_address
 436         new_start_address = event.to_address
 437         if old_start_address == new_start_address:
 438           # Skip useless code move entries.
 439           continue
 440         code = self.code_map.Find(old_start_address)
 441         if not code:
 442           print >>sys.stderr, "Warning: Not found %x" % old_start_address
 443           continue
 444         assert code.start_address == old_start_address, \
 445             "Inexact move address %x for %s" % (old_start_address, code)
 446         self.code_map.Remove(code)
 447         size = code.end_address - code.start_address
 448         code.start_address = new_start_address
 449         code.end_address = new_start_address + size
 450         self.code_map.Add(code)
 451         continue
 452
 453       if tag == LogReader._CODE_DELETE_TAG:
 454         event = self.code_delete_struct.from_buffer(self.log, self.log_pos)
 455         self.log_pos += ctypes.sizeof(event)
 456         old_start_address = event.address
 457         code = self.code_map.Find(old_start_address)
 458         if not code:
 459           print >>sys.stderr, "Warning: Not found %x" % old_start_address
 460           continue
 461         assert code.start_address == old_start_address, \
 462             "Inexact delete address %x for %s" % (old_start_address, code)
 463         self.code_map.Remove(code)
 464         continue
 465
 466       if tag == LogReader._SNAPSHOT_POSITION_TAG:
 467         event = self.snapshot_position_struct.from_buffer(self.log,
 468                                                           self.log_pos)
 469         self.log_pos += ctypes.sizeof(event)
 470         start_address = event.address
 471         snapshot_pos = event.position
 472         if snapshot_pos in self.snapshot_pos_to_name:
 473           self.address_to_snapshot_name[start_address] = \
 474               self.snapshot_pos_to_name[snapshot_pos]
 475         continue
 476
 477       assert False, "Unknown tag %s" % tag
 478
 479   def Dispose(self):
 480     self.log.close()
 481     self.log_file.close()
 482
 483   @staticmethod
 484   def _DefineStruct(fields):
 485     class Struct(ctypes.Structure):
 486       _fields_ = fields
 487     return Struct
 488
 489   @staticmethod
 490   def _HandleCodeConflict(old_code, new_code):
 491     assert (old_code.start_address == new_code.start_address and
 492             old_code.end_address == new_code.end_address), \
 493         "Conficting code log entries %s and %s" % (old_code, new_code)
 494     if old_code.name == new_code.name:
 495       return
 496     # Code object may be shared by a few functions. Collect the full
 497     # set of names.
 498     old_code.AddName(new_code.name)
 499
 500
 501 class Descriptor(object):
 502   """Descriptor of a structure in the binary trace log."""
 503
 504   CTYPE_MAP = {
 505     "u16": ctypes.c_uint16,
 506     "u32": ctypes.c_uint32,
 507     "u64": ctypes.c_uint64
 508   }
 509
 510   def __init__(self, fields):
 511     class TraceItem(ctypes.Structure):
 512       _fields_ = Descriptor.CtypesFields(fields)
 513
 514       def __str__(self):
 515         return ", ".join("%s: %s" % (field, self.__getattribute__(field))
 516                          for field, _ in TraceItem._fields_)
 517
 518     self.ctype = TraceItem
 519
 520   def Read(self, trace, offset):
 521     return self.ctype.from_buffer(trace, offset)
 522
 523   @staticmethod
 524   def CtypesFields(fields):
 525     return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
 526
 527
 528 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
 529 # for the gory details.
 530
 531
 532 # Reference: struct perf_file_header in kernel/tools/perf/util/header.h
 533 TRACE_HEADER_DESC = Descriptor([
 534   ("magic", "u64"),
 535   ("size", "u64"),
 536   ("attr_size", "u64"),
 537   ("attrs_offset", "u64"),
 538   ("attrs_size", "u64"),
 539   ("data_offset", "u64"),
 540   ("data_size", "u64"),
 541   ("event_types_offset", "u64"),
 542   ("event_types_size", "u64")
 543 ])
 544
 545
 546 # Reference: /usr/include/linux/perf_event.h
 547 PERF_EVENT_ATTR_DESC = Descriptor([
 548   ("type", "u32"),
 549   ("size", "u32"),
 550   ("config", "u64"),
 551   ("sample_period_or_freq", "u64"),
 552   ("sample_type", "u64"),
 553   ("read_format", "u64"),
 554   ("flags", "u64"),
 555   ("wakeup_events_or_watermark", "u32"),
 556   ("bp_type", "u32"),
 557   ("bp_addr", "u64"),
 558   ("bp_len", "u64")
 559 ])
 560
 561
 562 # Reference: /usr/include/linux/perf_event.h
 563 PERF_EVENT_HEADER_DESC = Descriptor([
 564   ("type", "u32"),
 565   ("misc", "u16"),
 566   ("size", "u16")
 567 ])
 568
 569
 570 # Reference: kernel/events/core.c
 571 PERF_MMAP_EVENT_BODY_DESC = Descriptor([
 572   ("pid", "u32"),
 573   ("tid", "u32"),
 574   ("addr", "u64"),
 575   ("len", "u64"),
 576   ("pgoff", "u64")
 577 ])
 578
 579
 580 # perf_event_attr.sample_type bits control the set of
 581 # perf_sample_event fields.
 582 PERF_SAMPLE_IP = 1 << 0
 583 PERF_SAMPLE_TID = 1 << 1
 584 PERF_SAMPLE_TIME = 1 << 2
 585 PERF_SAMPLE_ADDR = 1 << 3
 586 PERF_SAMPLE_READ = 1 << 4
 587 PERF_SAMPLE_CALLCHAIN = 1 << 5
 588 PERF_SAMPLE_ID = 1 << 6
 589 PERF_SAMPLE_CPU = 1 << 7
 590 PERF_SAMPLE_PERIOD = 1 << 8
 591 PERF_SAMPLE_STREAM_ID = 1 << 9
 592 PERF_SAMPLE_RAW = 1 << 10
 593
 594
 595 # Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
 596 PERF_SAMPLE_EVENT_BODY_FIELDS = [
 597   ("ip", "u64", PERF_SAMPLE_IP),
 598   ("pid", "u32", PERF_SAMPLE_TID),
 599   ("tid", "u32", PERF_SAMPLE_TID),
 600   ("time", "u64", PERF_SAMPLE_TIME),
 601   ("addr", "u64", PERF_SAMPLE_ADDR),
 602   ("id", "u64", PERF_SAMPLE_ID),
 603   ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
 604   ("cpu", "u32", PERF_SAMPLE_CPU),
 605   ("res", "u32", PERF_SAMPLE_CPU),
 606   ("period", "u64", PERF_SAMPLE_PERIOD),
 607   # Don't want to handle read format that comes after the period and
 608   # before the callchain and has variable size.
 609   ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
 610   # Raw data follows the callchain and is ignored.
 611 ]
 612
 613
 614 PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
 615
 616
 617 PERF_RECORD_MMAP = 1
 618 PERF_RECORD_SAMPLE = 9
 619
 620
 621 class TraceReader(object):
 622   """Perf (linux-2.6/tools/perf) trace file reader."""
 623
 624   _TRACE_HEADER_MAGIC = 4993446653023372624
 625
 626   def __init__(self, trace_name):
 627     self.trace_file = open(trace_name, "r")
 628     self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
 629     self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
 630     if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
 631       print >>sys.stderr, "Warning: unsupported trace header magic"
 632     self.offset = self.trace_header.data_offset
 633     self.limit = self.trace_header.data_offset + self.trace_header.data_size
 634     assert self.limit <= self.trace.size(), \
 635         "Trace data limit exceeds trace file size"
 636     self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
 637     assert self.trace_header.attrs_size != 0, \
 638         "No perf event attributes found in the trace"
 639     perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
 640                                                 self.trace_header.attrs_offset)
 641     self.sample_event_body_desc = self._SampleEventBodyDesc(
 642         perf_event_attr.sample_type)
 643     self.callchain_supported = \
 644         (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
 645     if self.callchain_supported:
 646       self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
 647       self.ip_size = ctypes.sizeof(self.ip_struct)
 648
 649   def ReadEventHeader(self):
 650     if self.offset >= self.limit:
 651       return None, 0
 652     offset = self.offset
 653     header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
 654     self.offset += header.size
 655     return header, offset
 656
 657   def ReadMmap(self, header, offset):
 658     mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
 659                                                offset + self.header_size)
 660     # Read null-terminated filename.
 661     filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
 662                           offset + header.size]
 663     mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
 664     return mmap_info
 665
 666   def ReadSample(self, header, offset):
 667     sample = self.sample_event_body_desc.Read(self.trace,
 668                                               offset + self.header_size)
 669     if not self.callchain_supported:
 670       return sample
 671     sample.ips = []
 672     offset += self.header_size + ctypes.sizeof(sample)
 673     for _ in xrange(sample.nr):
 674       sample.ips.append(
 675         self.ip_struct.from_buffer(self.trace, offset).value)
 676       offset += self.ip_size
 677     return sample
 678
 679   def Dispose(self):
 680     self.trace.close()
 681     self.trace_file.close()
 682
 683   def _SampleEventBodyDesc(self, sample_type):
 684     assert (sample_type & PERF_SAMPLE_READ) == 0, \
 685            "Can't hande read format in samples"
 686     fields = [(field, format)
 687               for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
 688               if (bit & sample_type) != 0]
 689     return Descriptor(fields)
 690
 691
 692 OBJDUMP_SECTION_HEADER_RE = re.compile(
 693   r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
 694 OBJDUMP_SYMBOL_LINE_RE = re.compile(
 695   r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
 696 OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
 697   r"^DYNAMIC SYMBOL TABLE")
 698 OBJDUMP_SKIP_RE = re.compile(
 699   r"^.*ld\.so\.cache$")
 700 KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
 701 PERF_KERNEL_ALLSYMS_RE = re.compile(
 702   r".*kallsyms.*")
 703 KERNEL_ALLSYMS_LINE_RE = re.compile(
 704   r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
 705
 706
 707 class LibraryRepo(object):
 708   def __init__(self):
 709     self.infos = []
 710     self.names = set()
 711     self.ticks = {}
 712
 713   def Load(self, mmap_info, code_map, options):
 714     # Skip kernel mmaps when requested using the fact that their tid
 715     # is 0.
 716     if mmap_info.tid == 0 and not options.kernel:
 717       return True
 718     if OBJDUMP_SKIP_RE.match(mmap_info.filename):
 719       return True
 720     if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
 721       return self._LoadKernelSymbols(code_map)
 722     self.infos.append(mmap_info)
 723     mmap_info.ticks = 0
 724     mmap_info.unique_name = self._UniqueMmapName(mmap_info)
 725     if not os.path.exists(mmap_info.filename):
 726       return True
 727     # Request section headers (-h), symbols (-t), and dynamic symbols
 728     # (-T) from objdump.
 729     # Unfortunately, section headers span two lines, so we have to
 730     # keep the just seen section name (from the first line in each
 731     # section header) in the after_section variable.
 732     if mmap_info.filename.endswith(".ko"):
 733       dynamic_symbols = ""
 734     else:
 735       dynamic_symbols = "-T"
 736     process = subprocess.Popen(
 737       "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
 738       shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 739     pipe = process.stdout
 740     after_section = None
 741     code_sections = set()
 742     reloc_sections = set()
 743     dynamic = False
 744     try:
 745       for line in pipe:
 746         if after_section:
 747           if line.find("CODE") != -1:
 748             code_sections.add(after_section)
 749           if line.find("RELOC") != -1:
 750             reloc_sections.add(after_section)
 751           after_section = None
 752           continue
 753
 754         match = OBJDUMP_SECTION_HEADER_RE.match(line)
 755         if match:
 756           after_section = match.group(1)
 757           continue
 758
 759         if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
 760           dynamic = True
 761           continue
 762
 763         match = OBJDUMP_SYMBOL_LINE_RE.match(line)
 764         if match:
 765           start_address = int(match.group(1), 16)
 766           origin_offset = start_address
 767           flags = match.group(2)
 768           section = match.group(3)
 769           if section in code_sections:
 770             if dynamic or section in reloc_sections:
 771               start_address += mmap_info.addr
 772             size = int(match.group(4), 16)
 773             name = match.group(5)
 774             origin = mmap_info.filename
 775             code_map.Add(Code(name, start_address, start_address + size,
 776                               origin, origin_offset))
 777     finally:
 778       pipe.close()
 779     assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
 780
 781   def Tick(self, pc):
 782     for i, mmap_info in enumerate(self.infos):
 783       if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
 784         mmap_info.ticks += 1
 785         self.infos[0], self.infos[i] = mmap_info, self.infos[0]
 786         return True
 787     return False
 788
 789   def _UniqueMmapName(self, mmap_info):
 790     name = mmap_info.filename
 791     index = 1
 792     while name in self.names:
 793       name = "%s-%d" % (mmap_info.filename, index)
 794       index += 1
 795     self.names.add(name)
 796     return name
 797
 798   def _LoadKernelSymbols(self, code_map):
 799     if not os.path.exists(KERNEL_ALLSYMS_FILE):
 800       print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
 801       return False
 802     kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
 803     code = None
 804     for line in kallsyms:
 805       match = KERNEL_ALLSYMS_LINE_RE.match(line)
 806       if match:
 807         start_address = int(match.group(1), 16)
 808         end_address = start_address
 809         name = match.group(2)
 810         if code:
 811           code.end_address = start_address
 812           code_map.Add(code, 16)
 813         code = Code(name, start_address, end_address, "kernel", 0)
 814     return True
 815
 816
 817 def PrintReport(code_map, library_repo, arch, ticks, options):
 818   print "Ticks per symbol:"
 819   used_code = [code for code in code_map.UsedCode()]
 820   used_code.sort(key=lambda x: x.self_ticks, reverse=True)
 821   for i, code in enumerate(used_code):
 822     code_ticks = code.self_ticks
 823     print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
 824                                     code.FullName(), code.origin)
 825     if options.disasm_all or i < options.disasm_top:
 826       code.PrintAnnotated(arch, options)
 827   print
 828   print "Ticks per library:"
 829   mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
 830   mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
 831   for mmap_info in mmap_infos:
 832     mmap_ticks = mmap_info.ticks
 833     print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
 834                                mmap_info.unique_name)
 835
 836
 837 def PrintDot(code_map, options):
 838   print "digraph G {"
 839   for code in code_map.UsedCode():
 840     if code.self_ticks < 10:
 841       continue
 842     print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
 843     if code.callee_ticks:
 844       for callee, ticks in code.callee_ticks.iteritems():
 845         print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
 846   print "}"
 847
 848
 849 if __name__ == "__main__":
 850   parser = optparse.OptionParser(USAGE)
 851   parser.add_option("--snapshot-log",
 852                     default="obj/release/snapshot.log",
 853                     help="V8 snapshot log file name [default: %default]")
 854   parser.add_option("--log",
 855                     default="v8.log",
 856                     help="V8 log file name [default: %default]")
 857   parser.add_option("--snapshot",
 858                     default=False,
 859                     action="store_true",
 860                     help="process V8 snapshot log [default: %default]")
 861   parser.add_option("--trace",
 862                     default="perf.data",
 863                     help="perf trace file name [default: %default]")
 864   parser.add_option("--kernel",
 865                     default=False,
 866                     action="store_true",
 867                     help="process kernel entries [default: %default]")
 868   parser.add_option("--disasm-top",
 869                     default=0,
 870                     type="int",
 871                     help=("number of top symbols to disassemble and annotate "
 872                           "[default: %default]"))
 873   parser.add_option("--disasm-all",
 874                     default=False,
 875                     action="store_true",
 876                     help=("disassemble and annotate all used symbols "
 877                           "[default: %default]"))
 878   parser.add_option("--dot",
 879                     default=False,
 880                     action="store_true",
 881                     help="produce dot output (WIP) [default: %default]")
 882   parser.add_option("--quiet", "-q",
 883                     default=False,
 884                     action="store_true",
 885                     help="no auxiliary messages [default: %default]")
 886   parser.add_option("--gc-fake-mmap",
 887                     default="/tmp/__v8_gc__",
 888                     help="gc fake mmap file [default: %default]")
 889   parser.add_option("--objdump",
 890                     default="/usr/bin/objdump",
 891                     help="objdump tool to use [default: %default]")
 892   parser.add_option("--host-root",
 893                     default="",
 894                     help="Path to the host root [default: %default]")
 895   options, args = parser.parse_args()
 896
 897   if not options.quiet:
 898     if options.snapshot:
 899       print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log,
 900                                         options.log,
 901                                         options.log)
 902     else:
 903       print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log)
 904     print "Perf trace file: %s" % options.trace
 905
 906   V8_GC_FAKE_MMAP = options.gc_fake_mmap
 907   HOST_ROOT = options.host_root
 908   if os.path.exists(options.objdump):
 909     disasm.OBJDUMP_BIN = options.objdump
 910     OBJDUMP_BIN = options.objdump
 911   else:
 912     print "Cannot find %s, falling back to default objdump" % options.objdump
 913
 914   # Stats.
 915   events = 0
 916   ticks = 0
 917   missed_ticks = 0
 918   really_missed_ticks = 0
 919   optimized_ticks = 0
 920   generated_ticks = 0
 921   v8_internal_ticks = 0
 922   mmap_time = 0
 923   sample_time = 0
 924
 925   # Process the snapshot log to fill the snapshot name map.
 926   snapshot_name_map = {}
 927   if options.snapshot:
 928     snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log)
 929     snapshot_name_map = snapshot_log_reader.ReadNameMap()
 930
 931   # Initialize the log reader.
 932   code_map = CodeMap()
 933   log_reader = LogReader(log_name=options.log + ".ll",
 934                          code_map=code_map,
 935                          snapshot_pos_to_name=snapshot_name_map)
 936   if not options.quiet:
 937     print "Generated code architecture: %s" % log_reader.arch
 938     print
 939     sys.stdout.flush()
 940
 941   # Process the code and trace logs.
 942   library_repo = LibraryRepo()
 943   log_reader.ReadUpToGC()
 944   trace_reader = TraceReader(options.trace)
 945   while True:
 946     header, offset = trace_reader.ReadEventHeader()
 947     if not header:
 948       break
 949     events += 1
 950     if header.type == PERF_RECORD_MMAP:
 951       start = time.time()
 952       mmap_info = trace_reader.ReadMmap(header, offset)
 953       if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
 954         log_reader.ReadUpToGC()
 955       else:
 956         library_repo.Load(mmap_info, code_map, options)
 957       mmap_time += time.time() - start
 958     elif header.type == PERF_RECORD_SAMPLE:
 959       ticks += 1
 960       start = time.time()
 961       sample = trace_reader.ReadSample(header, offset)
 962       code = code_map.Find(sample.ip)
 963       if code:
 964         code.Tick(sample.ip)
 965         if code.codetype == Code.OPTIMIZED:
 966           optimized_ticks += 1
 967         elif code.codetype == Code.FULL_CODEGEN:
 968           generated_ticks += 1
 969         elif code.codetype == Code.V8INTERNAL:
 970           v8_internal_ticks += 1
 971       else:
 972         missed_ticks += 1
 973       if not library_repo.Tick(sample.ip) and not code:
 974         really_missed_ticks += 1
 975       if trace_reader.callchain_supported:
 976         for ip in sample.ips:
 977           caller_code = code_map.Find(ip)
 978           if caller_code:
 979             if code:
 980               caller_code.CalleeTick(code)
 981             code = caller_code
 982       sample_time += time.time() - start
 983
 984   if options.dot:
 985     PrintDot(code_map, options)
 986   else:
 987     PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
 988
 989     if not options.quiet:
 990       def PrintTicks(number, total, description):
 991         print("%10d %5.1f%% ticks in %s" %
 992               (number, 100.0*number/total, description))
 993       print
 994       print "Stats:"
 995       print "%10d total trace events" % events
 996       print "%10d total ticks" % ticks
 997       print "%10d ticks not in symbols" % missed_ticks
 998       unaccounted = "unaccounted ticks"
 999       if really_missed_ticks > 0:
1000         unaccounted += " (probably in the kernel, try --kernel)"
1001       PrintTicks(really_missed_ticks, ticks, unaccounted)
1002       PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
1003       PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
1004       PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
1005       print "%10d total symbols" % len([c for c in code_map.AllCode()])
1006       print "%10d used symbols" % len([c for c in code_map.UsedCode()])
1007       print "%9.2fs library processing time" % mmap_time
1008       print "%9.2fs tick processing time" % sample_time
1009
1010   log_reader.Dispose()
1011   trace_reader.Dispose()