deps/v8/tools/ll_prof.py

   1 #!/usr/bin/env python
   2 #
   3 # Copyright 2012 the V8 project authors. All rights reserved.
   4 # Redistribution and use in source and binary forms, with or without
   5 # modification, are permitted provided that the following conditions are
   6 # met:
   7 #
   8 #     * Redistributions of source code must retain the above copyright
   9 #       notice, this list of conditions and the following disclaimer.
  10 #     * Redistributions in binary form must reproduce the above
  11 #       copyright notice, this list of conditions and the following
  12 #       disclaimer in the documentation and/or other materials provided
  13 #       with the distribution.
  14 #     * Neither the name of Google Inc. nor the names of its
  15 #       contributors may be used to endorse or promote products derived
  16 #       from this software without specific prior written permission.
  17 #
  18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30 import bisect
  31 import collections
  32 import ctypes
  33 import disasm
  34 import mmap
  35 import optparse
  36 import os
  37 import re
  38 import subprocess
  39 import sys
  40 import time
  41
  42
  43 USAGE="""usage: %prog [OPTION]...
  44
  45 Analyses V8 and perf logs to produce profiles.
  46
  47 Perf logs can be collected using a command like:
  48   $ perf record -R -e cycles -c 10000 -f -i ./d8 bench.js --ll-prof
  49   # -R: collect all data
  50   # -e cycles: use cpu-cycles event (run "perf list" for details)
  51   # -c 10000: write a sample after each 10000 events
  52   # -f: force output file overwrite
  53   # -i: limit profiling to our process and the kernel
  54   # --ll-prof shell flag enables the right V8 logs
  55 This will produce a binary trace file (perf.data) that %prog can analyse.
  56
  57 IMPORTANT:
  58   The kernel has an internal maximum for events per second, it is 100K by
  59   default. That's not enough for "-c 10000". Set it to some higher value:
  60   $ echo 10000000 | sudo tee /proc/sys/kernel/perf_event_max_sample_rate
  61   You can also make the warning about kernel address maps go away:
  62   $ echo 0 | sudo tee /proc/sys/kernel/kptr_restrict
  63
  64 We have a convenience script that handles all of the above for you:
  65   $ tools/run-llprof.sh ./d8 bench.js
  66
  67 Examples:
  68   # Print flat profile with annotated disassembly for the 10 top
  69   # symbols. Use default log names and include the snapshot log.
  70   $ %prog --snapshot --disasm-top=10
  71
  72   # Print flat profile with annotated disassembly for all used symbols.
  73   # Use default log names and include kernel symbols into analysis.
  74   $ %prog --disasm-all --kernel
  75
  76   # Print flat profile. Use custom log names.
  77   $ %prog --log=foo.log --snapshot-log=snap-foo.log --trace=foo.data --snapshot
  78 """
  79
  80
  81 JS_ORIGIN = "js"
  82 JS_SNAPSHOT_ORIGIN = "js-snapshot"
  83
  84 class Code(object):
  85   """Code object."""
  86
  87   _id = 0
  88   UNKNOWN = 0
  89   V8INTERNAL = 1
  90   FULL_CODEGEN = 2
  91   OPTIMIZED = 3
  92
  93   def __init__(self, name, start_address, end_address, origin, origin_offset):
  94     self.id = Code._id
  95     Code._id += 1
  96     self.name = name
  97     self.other_names = None
  98     self.start_address = start_address
  99     self.end_address = end_address
 100     self.origin = origin
 101     self.origin_offset = origin_offset
 102     self.self_ticks = 0
 103     self.self_ticks_map = None
 104     self.callee_ticks = None
 105     if name.startswith("LazyCompile:*"):
 106       self.codetype = Code.OPTIMIZED
 107     elif name.startswith("LazyCompile:"):
 108       self.codetype = Code.FULL_CODEGEN
 109     elif name.startswith("v8::internal::"):
 110       self.codetype = Code.V8INTERNAL
 111     else:
 112       self.codetype = Code.UNKNOWN
 113
 114   def AddName(self, name):
 115     assert self.name != name
 116     if self.other_names is None:
 117       self.other_names = [name]
 118       return
 119     if not name in self.other_names:
 120       self.other_names.append(name)
 121
 122   def FullName(self):
 123     if self.other_names is None:
 124       return self.name
 125     self.other_names.sort()
 126     return "%s (aka %s)" % (self.name, ", ".join(self.other_names))
 127
 128   def IsUsed(self):
 129     return self.self_ticks > 0 or self.callee_ticks is not None
 130
 131   def Tick(self, pc):
 132     self.self_ticks += 1
 133     if self.self_ticks_map is None:
 134       self.self_ticks_map = collections.defaultdict(lambda: 0)
 135     offset = pc - self.start_address
 136     self.self_ticks_map[offset] += 1
 137
 138   def CalleeTick(self, callee):
 139     if self.callee_ticks is None:
 140       self.callee_ticks = collections.defaultdict(lambda: 0)
 141     self.callee_ticks[callee] += 1
 142
 143   def PrintAnnotated(self, arch, options):
 144     if self.self_ticks_map is None:
 145       ticks_map = []
 146     else:
 147       ticks_map = self.self_ticks_map.items()
 148     # Convert the ticks map to offsets and counts arrays so that later
 149     # we can do binary search in the offsets array.
 150     ticks_map.sort(key=lambda t: t[0])
 151     ticks_offsets = [t[0] for t in ticks_map]
 152     ticks_counts = [t[1] for t in ticks_map]
 153     # Get a list of disassembled lines and their addresses.
 154     lines = self._GetDisasmLines(arch, options)
 155     if len(lines) == 0:
 156       return
 157     # Print annotated lines.
 158     address = lines[0][0]
 159     total_count = 0
 160     for i in xrange(len(lines)):
 161       start_offset = lines[i][0] - address
 162       if i == len(lines) - 1:
 163         end_offset = self.end_address - self.start_address
 164       else:
 165         end_offset = lines[i + 1][0] - address
 166       # Ticks (reported pc values) are not always precise, i.e. not
 167       # necessarily point at instruction starts. So we have to search
 168       # for ticks that touch the current instruction line.
 169       j = bisect.bisect_left(ticks_offsets, end_offset)
 170       count = 0
 171       for offset, cnt in reversed(zip(ticks_offsets[:j], ticks_counts[:j])):
 172         if offset < start_offset:
 173           break
 174         count += cnt
 175       total_count += count
 176       count = 100.0 * count / self.self_ticks
 177       if count >= 0.01:
 178         print "%15.2f %x: %s" % (count, lines[i][0], lines[i][1])
 179       else:
 180         print "%s %x: %s" % (" " * 15, lines[i][0], lines[i][1])
 181     print
 182     assert total_count == self.self_ticks, \
 183         "Lost ticks (%d != %d) in %s" % (total_count, self.self_ticks, self)
 184
 185   def __str__(self):
 186     return "%s [0x%x, 0x%x) size: %d origin: %s" % (
 187       self.name,
 188       self.start_address,
 189       self.end_address,
 190       self.end_address - self.start_address,
 191       self.origin)
 192
 193   def _GetDisasmLines(self, arch, options):
 194     if self.origin == JS_ORIGIN or self.origin == JS_SNAPSHOT_ORIGIN:
 195       inplace = False
 196       filename = options.log + ".ll"
 197     else:
 198       inplace = True
 199       filename = self.origin
 200     return disasm.GetDisasmLines(filename,
 201                                  self.origin_offset,
 202                                  self.end_address - self.start_address,
 203                                  arch,
 204                                  inplace)
 205
 206
 207 class CodePage(object):
 208   """Group of adjacent code objects."""
 209
 210   SHIFT = 20  # 1M pages
 211   SIZE = (1 << SHIFT)
 212   MASK = ~(SIZE - 1)
 213
 214   @staticmethod
 215   def PageAddress(address):
 216     return address & CodePage.MASK
 217
 218   @staticmethod
 219   def PageId(address):
 220     return address >> CodePage.SHIFT
 221
 222   @staticmethod
 223   def PageAddressFromId(id):
 224     return id << CodePage.SHIFT
 225
 226   def __init__(self, address):
 227     self.address = address
 228     self.code_objects = []
 229
 230   def Add(self, code):
 231     self.code_objects.append(code)
 232
 233   def Remove(self, code):
 234     self.code_objects.remove(code)
 235
 236   def Find(self, pc):
 237     code_objects = self.code_objects
 238     for i, code in enumerate(code_objects):
 239       if code.start_address <= pc < code.end_address:
 240         code_objects[0], code_objects[i] = code, code_objects[0]
 241         return code
 242     return None
 243
 244   def __iter__(self):
 245     return self.code_objects.__iter__()
 246
 247
 248 class CodeMap(object):
 249   """Code object map."""
 250
 251   def __init__(self):
 252     self.pages = {}
 253     self.min_address = 1 << 64
 254     self.max_address = -1
 255
 256   def Add(self, code, max_pages=-1):
 257     page_id = CodePage.PageId(code.start_address)
 258     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
 259     pages = 0
 260     while page_id < limit_id:
 261       if max_pages >= 0 and pages > max_pages:
 262         print >>sys.stderr, \
 263             "Warning: page limit (%d) reached for %s [%s]" % (
 264             max_pages, code.name, code.origin)
 265         break
 266       if page_id in self.pages:
 267         page = self.pages[page_id]
 268       else:
 269         page = CodePage(CodePage.PageAddressFromId(page_id))
 270         self.pages[page_id] = page
 271       page.Add(code)
 272       page_id += 1
 273       pages += 1
 274     self.min_address = min(self.min_address, code.start_address)
 275     self.max_address = max(self.max_address, code.end_address)
 276
 277   def Remove(self, code):
 278     page_id = CodePage.PageId(code.start_address)
 279     limit_id = CodePage.PageId(code.end_address + CodePage.SIZE - 1)
 280     removed = False
 281     while page_id < limit_id:
 282       if page_id not in self.pages:
 283         page_id += 1
 284         continue
 285       page = self.pages[page_id]
 286       page.Remove(code)
 287       removed = True
 288       page_id += 1
 289     return removed
 290
 291   def AllCode(self):
 292     for page in self.pages.itervalues():
 293       for code in page:
 294         if CodePage.PageAddress(code.start_address) == page.address:
 295           yield code
 296
 297   def UsedCode(self):
 298     for code in self.AllCode():
 299       if code.IsUsed():
 300         yield code
 301
 302   def Print(self):
 303     for code in self.AllCode():
 304       print code
 305
 306   def Find(self, pc):
 307     if pc < self.min_address or pc >= self.max_address:
 308       return None
 309     page_id = CodePage.PageId(pc)
 310     if page_id not in self.pages:
 311       return None
 312     return self.pages[page_id].Find(pc)
 313
 314
 315 class CodeInfo(object):
 316   """Generic info about generated code objects."""
 317
 318   def __init__(self, arch, header_size):
 319     self.arch = arch
 320     self.header_size = header_size
 321
 322
 323 class SnapshotLogReader(object):
 324   """V8 snapshot log reader."""
 325
 326   _SNAPSHOT_CODE_NAME_RE = re.compile(
 327     r"snapshot-code-name,(\d+),\"(.*)\"")
 328
 329   def __init__(self, log_name):
 330     self.log_name = log_name
 331
 332   def ReadNameMap(self):
 333     log = open(self.log_name, "r")
 334     try:
 335       snapshot_pos_to_name = {}
 336       for line in log:
 337         match = SnapshotLogReader._SNAPSHOT_CODE_NAME_RE.match(line)
 338         if match:
 339           pos = int(match.group(1))
 340           name = match.group(2)
 341           snapshot_pos_to_name[pos] = name
 342     finally:
 343       log.close()
 344     return snapshot_pos_to_name
 345
 346
 347 class LogReader(object):
 348   """V8 low-level (binary) log reader."""
 349
 350   _ARCH_TO_POINTER_TYPE_MAP = {
 351     "ia32": ctypes.c_uint32,
 352     "arm": ctypes.c_uint32,
 353     "mips": ctypes.c_uint32,
 354     "x64": ctypes.c_uint64,
 355     "arm64": ctypes.c_uint64
 356   }
 357
 358   _CODE_CREATE_TAG = "C"
 359   _CODE_MOVE_TAG = "M"
 360   _CODE_DELETE_TAG = "D"
 361   _SNAPSHOT_POSITION_TAG = "P"
 362   _CODE_MOVING_GC_TAG = "G"
 363
 364   def __init__(self, log_name, code_map, snapshot_pos_to_name):
 365     self.log_file = open(log_name, "r")
 366     self.log = mmap.mmap(self.log_file.fileno(), 0, mmap.MAP_PRIVATE)
 367     self.log_pos = 0
 368     self.code_map = code_map
 369     self.snapshot_pos_to_name = snapshot_pos_to_name
 370     self.address_to_snapshot_name = {}
 371
 372     self.arch = self.log[:self.log.find("\0")]
 373     self.log_pos += len(self.arch) + 1
 374     assert self.arch in LogReader._ARCH_TO_POINTER_TYPE_MAP, \
 375         "Unsupported architecture %s" % self.arch
 376     pointer_type = LogReader._ARCH_TO_POINTER_TYPE_MAP[self.arch]
 377
 378     self.code_create_struct = LogReader._DefineStruct([
 379         ("name_size", ctypes.c_int32),
 380         ("code_address", pointer_type),
 381         ("code_size", ctypes.c_int32)])
 382
 383     self.code_move_struct = LogReader._DefineStruct([
 384         ("from_address", pointer_type),
 385         ("to_address", pointer_type)])
 386
 387     self.code_delete_struct = LogReader._DefineStruct([
 388         ("address", pointer_type)])
 389
 390     self.snapshot_position_struct = LogReader._DefineStruct([
 391         ("address", pointer_type),
 392         ("position", ctypes.c_int32)])
 393
 394   def ReadUpToGC(self):
 395     while self.log_pos < self.log.size():
 396       tag = self.log[self.log_pos]
 397       self.log_pos += 1
 398
 399       if tag == LogReader._CODE_MOVING_GC_TAG:
 400         self.address_to_snapshot_name.clear()
 401         return
 402
 403       if tag == LogReader._CODE_CREATE_TAG:
 404         event = self.code_create_struct.from_buffer(self.log, self.log_pos)
 405         self.log_pos += ctypes.sizeof(event)
 406         start_address = event.code_address
 407         end_address = start_address + event.code_size
 408         if start_address in self.address_to_snapshot_name:
 409           name = self.address_to_snapshot_name[start_address]
 410           origin = JS_SNAPSHOT_ORIGIN
 411         else:
 412           name = self.log[self.log_pos:self.log_pos + event.name_size]
 413           origin = JS_ORIGIN
 414         self.log_pos += event.name_size
 415         origin_offset = self.log_pos
 416         self.log_pos += event.code_size
 417         code = Code(name, start_address, end_address, origin, origin_offset)
 418         conficting_code = self.code_map.Find(start_address)
 419         if conficting_code:
 420           if not (conficting_code.start_address == code.start_address and
 421             conficting_code.end_address == code.end_address):
 422             self.code_map.Remove(conficting_code)
 423           else:
 424             LogReader._HandleCodeConflict(conficting_code, code)
 425             # TODO(vitalyr): this warning is too noisy because of our
 426             # attempts to reconstruct code log from the snapshot.
 427             # print >>sys.stderr, \
 428             #     "Warning: Skipping duplicate code log entry %s" % code
 429             continue
 430         self.code_map.Add(code)
 431         continue
 432
 433       if tag == LogReader._CODE_MOVE_TAG:
 434         event = self.code_move_struct.from_buffer(self.log, self.log_pos)
 435         self.log_pos += ctypes.sizeof(event)
 436         old_start_address = event.from_address
 437         new_start_address = event.to_address
 438         if old_start_address == new_start_address:
 439           # Skip useless code move entries.
 440           continue
 441         code = self.code_map.Find(old_start_address)
 442         if not code:
 443           print >>sys.stderr, "Warning: Not found %x" % old_start_address
 444           continue
 445         assert code.start_address == old_start_address, \
 446             "Inexact move address %x for %s" % (old_start_address, code)
 447         self.code_map.Remove(code)
 448         size = code.end_address - code.start_address
 449         code.start_address = new_start_address
 450         code.end_address = new_start_address + size
 451         self.code_map.Add(code)
 452         continue
 453
 454       if tag == LogReader._CODE_DELETE_TAG:
 455         event = self.code_delete_struct.from_buffer(self.log, self.log_pos)
 456         self.log_pos += ctypes.sizeof(event)
 457         old_start_address = event.address
 458         code = self.code_map.Find(old_start_address)
 459         if not code:
 460           print >>sys.stderr, "Warning: Not found %x" % old_start_address
 461           continue
 462         assert code.start_address == old_start_address, \
 463             "Inexact delete address %x for %s" % (old_start_address, code)
 464         self.code_map.Remove(code)
 465         continue
 466
 467       if tag == LogReader._SNAPSHOT_POSITION_TAG:
 468         event = self.snapshot_position_struct.from_buffer(self.log,
 469                                                           self.log_pos)
 470         self.log_pos += ctypes.sizeof(event)
 471         start_address = event.address
 472         snapshot_pos = event.position
 473         if snapshot_pos in self.snapshot_pos_to_name:
 474           self.address_to_snapshot_name[start_address] = \
 475               self.snapshot_pos_to_name[snapshot_pos]
 476         continue
 477
 478       assert False, "Unknown tag %s" % tag
 479
 480   def Dispose(self):
 481     self.log.close()
 482     self.log_file.close()
 483
 484   @staticmethod
 485   def _DefineStruct(fields):
 486     class Struct(ctypes.Structure):
 487       _fields_ = fields
 488     return Struct
 489
 490   @staticmethod
 491   def _HandleCodeConflict(old_code, new_code):
 492     assert (old_code.start_address == new_code.start_address and
 493             old_code.end_address == new_code.end_address), \
 494         "Conficting code log entries %s and %s" % (old_code, new_code)
 495     if old_code.name == new_code.name:
 496       return
 497     # Code object may be shared by a few functions. Collect the full
 498     # set of names.
 499     old_code.AddName(new_code.name)
 500
 501
 502 class Descriptor(object):
 503   """Descriptor of a structure in the binary trace log."""
 504
 505   CTYPE_MAP = {
 506     "u16": ctypes.c_uint16,
 507     "u32": ctypes.c_uint32,
 508     "u64": ctypes.c_uint64
 509   }
 510
 511   def __init__(self, fields):
 512     class TraceItem(ctypes.Structure):
 513       _fields_ = Descriptor.CtypesFields(fields)
 514
 515       def __str__(self):
 516         return ", ".join("%s: %s" % (field, self.__getattribute__(field))
 517                          for field, _ in TraceItem._fields_)
 518
 519     self.ctype = TraceItem
 520
 521   def Read(self, trace, offset):
 522     return self.ctype.from_buffer(trace, offset)
 523
 524   @staticmethod
 525   def CtypesFields(fields):
 526     return [(field, Descriptor.CTYPE_MAP[format]) for (field, format) in fields]
 527
 528
 529 # Please see http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=tree;f=tools/perf
 530 # for the gory details.
 531
 532
 533 # Reference: struct perf_file_header in kernel/tools/perf/util/header.h
 534 TRACE_HEADER_DESC = Descriptor([
 535   ("magic", "u64"),
 536   ("size", "u64"),
 537   ("attr_size", "u64"),
 538   ("attrs_offset", "u64"),
 539   ("attrs_size", "u64"),
 540   ("data_offset", "u64"),
 541   ("data_size", "u64"),
 542   ("event_types_offset", "u64"),
 543   ("event_types_size", "u64")
 544 ])
 545
 546
 547 # Reference: /usr/include/linux/perf_event.h
 548 PERF_EVENT_ATTR_DESC = Descriptor([
 549   ("type", "u32"),
 550   ("size", "u32"),
 551   ("config", "u64"),
 552   ("sample_period_or_freq", "u64"),
 553   ("sample_type", "u64"),
 554   ("read_format", "u64"),
 555   ("flags", "u64"),
 556   ("wakeup_events_or_watermark", "u32"),
 557   ("bp_type", "u32"),
 558   ("bp_addr", "u64"),
 559   ("bp_len", "u64")
 560 ])
 561
 562
 563 # Reference: /usr/include/linux/perf_event.h
 564 PERF_EVENT_HEADER_DESC = Descriptor([
 565   ("type", "u32"),
 566   ("misc", "u16"),
 567   ("size", "u16")
 568 ])
 569
 570
 571 # Reference: kernel/events/core.c
 572 PERF_MMAP_EVENT_BODY_DESC = Descriptor([
 573   ("pid", "u32"),
 574   ("tid", "u32"),
 575   ("addr", "u64"),
 576   ("len", "u64"),
 577   ("pgoff", "u64")
 578 ])
 579
 580
 581 # perf_event_attr.sample_type bits control the set of
 582 # perf_sample_event fields.
 583 PERF_SAMPLE_IP = 1 << 0
 584 PERF_SAMPLE_TID = 1 << 1
 585 PERF_SAMPLE_TIME = 1 << 2
 586 PERF_SAMPLE_ADDR = 1 << 3
 587 PERF_SAMPLE_READ = 1 << 4
 588 PERF_SAMPLE_CALLCHAIN = 1 << 5
 589 PERF_SAMPLE_ID = 1 << 6
 590 PERF_SAMPLE_CPU = 1 << 7
 591 PERF_SAMPLE_PERIOD = 1 << 8
 592 PERF_SAMPLE_STREAM_ID = 1 << 9
 593 PERF_SAMPLE_RAW = 1 << 10
 594
 595
 596 # Reference: /usr/include/perf_event.h, the comment for PERF_RECORD_SAMPLE.
 597 PERF_SAMPLE_EVENT_BODY_FIELDS = [
 598   ("ip", "u64", PERF_SAMPLE_IP),
 599   ("pid", "u32", PERF_SAMPLE_TID),
 600   ("tid", "u32", PERF_SAMPLE_TID),
 601   ("time", "u64", PERF_SAMPLE_TIME),
 602   ("addr", "u64", PERF_SAMPLE_ADDR),
 603   ("id", "u64", PERF_SAMPLE_ID),
 604   ("stream_id", "u64", PERF_SAMPLE_STREAM_ID),
 605   ("cpu", "u32", PERF_SAMPLE_CPU),
 606   ("res", "u32", PERF_SAMPLE_CPU),
 607   ("period", "u64", PERF_SAMPLE_PERIOD),
 608   # Don't want to handle read format that comes after the period and
 609   # before the callchain and has variable size.
 610   ("nr", "u64", PERF_SAMPLE_CALLCHAIN)
 611   # Raw data follows the callchain and is ignored.
 612 ]
 613
 614
 615 PERF_SAMPLE_EVENT_IP_FORMAT = "u64"
 616
 617
 618 PERF_RECORD_MMAP = 1
 619 PERF_RECORD_SAMPLE = 9
 620
 621
 622 class TraceReader(object):
 623   """Perf (linux-2.6/tools/perf) trace file reader."""
 624
 625   _TRACE_HEADER_MAGIC = 4993446653023372624
 626
 627   def __init__(self, trace_name):
 628     self.trace_file = open(trace_name, "r")
 629     self.trace = mmap.mmap(self.trace_file.fileno(), 0, mmap.MAP_PRIVATE)
 630     self.trace_header = TRACE_HEADER_DESC.Read(self.trace, 0)
 631     if self.trace_header.magic != TraceReader._TRACE_HEADER_MAGIC:
 632       print >>sys.stderr, "Warning: unsupported trace header magic"
 633     self.offset = self.trace_header.data_offset
 634     self.limit = self.trace_header.data_offset + self.trace_header.data_size
 635     assert self.limit <= self.trace.size(), \
 636         "Trace data limit exceeds trace file size"
 637     self.header_size = ctypes.sizeof(PERF_EVENT_HEADER_DESC.ctype)
 638     assert self.trace_header.attrs_size != 0, \
 639         "No perf event attributes found in the trace"
 640     perf_event_attr = PERF_EVENT_ATTR_DESC.Read(self.trace,
 641                                                 self.trace_header.attrs_offset)
 642     self.sample_event_body_desc = self._SampleEventBodyDesc(
 643         perf_event_attr.sample_type)
 644     self.callchain_supported = \
 645         (perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0
 646     if self.callchain_supported:
 647       self.ip_struct = Descriptor.CTYPE_MAP[PERF_SAMPLE_EVENT_IP_FORMAT]
 648       self.ip_size = ctypes.sizeof(self.ip_struct)
 649
 650   def ReadEventHeader(self):
 651     if self.offset >= self.limit:
 652       return None, 0
 653     offset = self.offset
 654     header = PERF_EVENT_HEADER_DESC.Read(self.trace, self.offset)
 655     self.offset += header.size
 656     return header, offset
 657
 658   def ReadMmap(self, header, offset):
 659     mmap_info = PERF_MMAP_EVENT_BODY_DESC.Read(self.trace,
 660                                                offset + self.header_size)
 661     # Read null-terminated filename.
 662     filename = self.trace[offset + self.header_size + ctypes.sizeof(mmap_info):
 663                           offset + header.size]
 664     mmap_info.filename = HOST_ROOT + filename[:filename.find(chr(0))]
 665     return mmap_info
 666
 667   def ReadSample(self, header, offset):
 668     sample = self.sample_event_body_desc.Read(self.trace,
 669                                               offset + self.header_size)
 670     if not self.callchain_supported:
 671       return sample
 672     sample.ips = []
 673     offset += self.header_size + ctypes.sizeof(sample)
 674     for _ in xrange(sample.nr):
 675       sample.ips.append(
 676         self.ip_struct.from_buffer(self.trace, offset).value)
 677       offset += self.ip_size
 678     return sample
 679
 680   def Dispose(self):
 681     self.trace.close()
 682     self.trace_file.close()
 683
 684   def _SampleEventBodyDesc(self, sample_type):
 685     assert (sample_type & PERF_SAMPLE_READ) == 0, \
 686            "Can't hande read format in samples"
 687     fields = [(field, format)
 688               for (field, format, bit) in PERF_SAMPLE_EVENT_BODY_FIELDS
 689               if (bit & sample_type) != 0]
 690     return Descriptor(fields)
 691
 692
 693 OBJDUMP_SECTION_HEADER_RE = re.compile(
 694   r"^\s*\d+\s(\.\S+)\s+[a-f0-9]")
 695 OBJDUMP_SYMBOL_LINE_RE = re.compile(
 696   r"^([a-f0-9]+)\s(.{7})\s(\S+)\s+([a-f0-9]+)\s+(?:\.hidden\s+)?(.*)$")
 697 OBJDUMP_DYNAMIC_SYMBOLS_START_RE = re.compile(
 698   r"^DYNAMIC SYMBOL TABLE")
 699 OBJDUMP_SKIP_RE = re.compile(
 700   r"^.*ld\.so\.cache$")
 701 KERNEL_ALLSYMS_FILE = "/proc/kallsyms"
 702 PERF_KERNEL_ALLSYMS_RE = re.compile(
 703   r".*kallsyms.*")
 704 KERNEL_ALLSYMS_LINE_RE = re.compile(
 705   r"^([a-f0-9]+)\s(?:t|T)\s(\S+)$")
 706
 707
 708 class LibraryRepo(object):
 709   def __init__(self):
 710     self.infos = []
 711     self.names = set()
 712     self.ticks = {}
 713
 714
 715   def HasDynamicSymbols(self, filename):
 716     if filename.endswith(".ko"): return False
 717     process = subprocess.Popen(
 718       "%s -h %s" % (OBJDUMP_BIN, filename),
 719       shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 720     pipe = process.stdout
 721     try:
 722       for line in pipe:
 723         match = OBJDUMP_SECTION_HEADER_RE.match(line)
 724         if match and match.group(1) == 'dynsym': return True
 725     finally:
 726       pipe.close()
 727     assert process.wait() == 0, "Failed to objdump -h %s" % filename
 728     return False
 729
 730
 731   def Load(self, mmap_info, code_map, options):
 732     # Skip kernel mmaps when requested using the fact that their tid
 733     # is 0.
 734     if mmap_info.tid == 0 and not options.kernel:
 735       return True
 736     if OBJDUMP_SKIP_RE.match(mmap_info.filename):
 737       return True
 738     if PERF_KERNEL_ALLSYMS_RE.match(mmap_info.filename):
 739       return self._LoadKernelSymbols(code_map)
 740     self.infos.append(mmap_info)
 741     mmap_info.ticks = 0
 742     mmap_info.unique_name = self._UniqueMmapName(mmap_info)
 743     if not os.path.exists(mmap_info.filename):
 744       return True
 745     # Request section headers (-h), symbols (-t), and dynamic symbols
 746     # (-T) from objdump.
 747     # Unfortunately, section headers span two lines, so we have to
 748     # keep the just seen section name (from the first line in each
 749     # section header) in the after_section variable.
 750     if self.HasDynamicSymbols(mmap_info.filename):
 751       dynamic_symbols = "-T"
 752     else:
 753       dynamic_symbols = ""
 754     process = subprocess.Popen(
 755       "%s -h -t %s -C %s" % (OBJDUMP_BIN, dynamic_symbols, mmap_info.filename),
 756       shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 757     pipe = process.stdout
 758     after_section = None
 759     code_sections = set()
 760     reloc_sections = set()
 761     dynamic = False
 762     try:
 763       for line in pipe:
 764         if after_section:
 765           if line.find("CODE") != -1:
 766             code_sections.add(after_section)
 767           if line.find("RELOC") != -1:
 768             reloc_sections.add(after_section)
 769           after_section = None
 770           continue
 771
 772         match = OBJDUMP_SECTION_HEADER_RE.match(line)
 773         if match:
 774           after_section = match.group(1)
 775           continue
 776
 777         if OBJDUMP_DYNAMIC_SYMBOLS_START_RE.match(line):
 778           dynamic = True
 779           continue
 780
 781         match = OBJDUMP_SYMBOL_LINE_RE.match(line)
 782         if match:
 783           start_address = int(match.group(1), 16)
 784           origin_offset = start_address
 785           flags = match.group(2)
 786           section = match.group(3)
 787           if section in code_sections:
 788             if dynamic or section in reloc_sections:
 789               start_address += mmap_info.addr
 790             size = int(match.group(4), 16)
 791             name = match.group(5)
 792             origin = mmap_info.filename
 793             code_map.Add(Code(name, start_address, start_address + size,
 794                               origin, origin_offset))
 795     finally:
 796       pipe.close()
 797     assert process.wait() == 0, "Failed to objdump %s" % mmap_info.filename
 798
 799   def Tick(self, pc):
 800     for i, mmap_info in enumerate(self.infos):
 801       if mmap_info.addr <= pc < (mmap_info.addr + mmap_info.len):
 802         mmap_info.ticks += 1
 803         self.infos[0], self.infos[i] = mmap_info, self.infos[0]
 804         return True
 805     return False
 806
 807   def _UniqueMmapName(self, mmap_info):
 808     name = mmap_info.filename
 809     index = 1
 810     while name in self.names:
 811       name = "%s-%d" % (mmap_info.filename, index)
 812       index += 1
 813     self.names.add(name)
 814     return name
 815
 816   def _LoadKernelSymbols(self, code_map):
 817     if not os.path.exists(KERNEL_ALLSYMS_FILE):
 818       print >>sys.stderr, "Warning: %s not found" % KERNEL_ALLSYMS_FILE
 819       return False
 820     kallsyms = open(KERNEL_ALLSYMS_FILE, "r")
 821     code = None
 822     for line in kallsyms:
 823       match = KERNEL_ALLSYMS_LINE_RE.match(line)
 824       if match:
 825         start_address = int(match.group(1), 16)
 826         end_address = start_address
 827         name = match.group(2)
 828         if code:
 829           code.end_address = start_address
 830           code_map.Add(code, 16)
 831         code = Code(name, start_address, end_address, "kernel", 0)
 832     return True
 833
 834
 835 def PrintReport(code_map, library_repo, arch, ticks, options):
 836   print "Ticks per symbol:"
 837   used_code = [code for code in code_map.UsedCode()]
 838   used_code.sort(key=lambda x: x.self_ticks, reverse=True)
 839   for i, code in enumerate(used_code):
 840     code_ticks = code.self_ticks
 841     print "%10d %5.1f%% %s [%s]" % (code_ticks, 100. * code_ticks / ticks,
 842                                     code.FullName(), code.origin)
 843     if options.disasm_all or i < options.disasm_top:
 844       code.PrintAnnotated(arch, options)
 845   print
 846   print "Ticks per library:"
 847   mmap_infos = [m for m in library_repo.infos if m.ticks > 0]
 848   mmap_infos.sort(key=lambda m: m.ticks, reverse=True)
 849   for mmap_info in mmap_infos:
 850     mmap_ticks = mmap_info.ticks
 851     print "%10d %5.1f%% %s" % (mmap_ticks, 100. * mmap_ticks / ticks,
 852                                mmap_info.unique_name)
 853
 854
 855 def PrintDot(code_map, options):
 856   print "digraph G {"
 857   for code in code_map.UsedCode():
 858     if code.self_ticks < 10:
 859       continue
 860     print "n%d [shape=box,label=\"%s\"];" % (code.id, code.name)
 861     if code.callee_ticks:
 862       for callee, ticks in code.callee_ticks.iteritems():
 863         print "n%d -> n%d [label=\"%d\"];" % (code.id, callee.id, ticks)
 864   print "}"
 865
 866
 867 if __name__ == "__main__":
 868   parser = optparse.OptionParser(USAGE)
 869   parser.add_option("--snapshot-log",
 870                     default="obj/release/snapshot.log",
 871                     help="V8 snapshot log file name [default: %default]")
 872   parser.add_option("--log",
 873                     default="v8.log",
 874                     help="V8 log file name [default: %default]")
 875   parser.add_option("--snapshot",
 876                     default=False,
 877                     action="store_true",
 878                     help="process V8 snapshot log [default: %default]")
 879   parser.add_option("--trace",
 880                     default="perf.data",
 881                     help="perf trace file name [default: %default]")
 882   parser.add_option("--kernel",
 883                     default=False,
 884                     action="store_true",
 885                     help="process kernel entries [default: %default]")
 886   parser.add_option("--disasm-top",
 887                     default=0,
 888                     type="int",
 889                     help=("number of top symbols to disassemble and annotate "
 890                           "[default: %default]"))
 891   parser.add_option("--disasm-all",
 892                     default=False,
 893                     action="store_true",
 894                     help=("disassemble and annotate all used symbols "
 895                           "[default: %default]"))
 896   parser.add_option("--dot",
 897                     default=False,
 898                     action="store_true",
 899                     help="produce dot output (WIP) [default: %default]")
 900   parser.add_option("--quiet", "-q",
 901                     default=False,
 902                     action="store_true",
 903                     help="no auxiliary messages [default: %default]")
 904   parser.add_option("--gc-fake-mmap",
 905                     default="/tmp/__v8_gc__",
 906                     help="gc fake mmap file [default: %default]")
 907   parser.add_option("--objdump",
 908                     default="/usr/bin/objdump",
 909                     help="objdump tool to use [default: %default]")
 910   parser.add_option("--host-root",
 911                     default="",
 912                     help="Path to the host root [default: %default]")
 913   options, args = parser.parse_args()
 914
 915   if not options.quiet:
 916     if options.snapshot:
 917       print "V8 logs: %s, %s, %s.ll" % (options.snapshot_log,
 918                                         options.log,
 919                                         options.log)
 920     else:
 921       print "V8 log: %s, %s.ll (no snapshot)" % (options.log, options.log)
 922     print "Perf trace file: %s" % options.trace
 923
 924   V8_GC_FAKE_MMAP = options.gc_fake_mmap
 925   HOST_ROOT = options.host_root
 926   if os.path.exists(options.objdump):
 927     disasm.OBJDUMP_BIN = options.objdump
 928     OBJDUMP_BIN = options.objdump
 929   else:
 930     print "Cannot find %s, falling back to default objdump" % options.objdump
 931
 932   # Stats.
 933   events = 0
 934   ticks = 0
 935   missed_ticks = 0
 936   really_missed_ticks = 0
 937   optimized_ticks = 0
 938   generated_ticks = 0
 939   v8_internal_ticks = 0
 940   mmap_time = 0
 941   sample_time = 0
 942
 943   # Process the snapshot log to fill the snapshot name map.
 944   snapshot_name_map = {}
 945   if options.snapshot:
 946     snapshot_log_reader = SnapshotLogReader(log_name=options.snapshot_log)
 947     snapshot_name_map = snapshot_log_reader.ReadNameMap()
 948
 949   # Initialize the log reader.
 950   code_map = CodeMap()
 951   log_reader = LogReader(log_name=options.log + ".ll",
 952                          code_map=code_map,
 953                          snapshot_pos_to_name=snapshot_name_map)
 954   if not options.quiet:
 955     print "Generated code architecture: %s" % log_reader.arch
 956     print
 957     sys.stdout.flush()
 958
 959   # Process the code and trace logs.
 960   library_repo = LibraryRepo()
 961   log_reader.ReadUpToGC()
 962   trace_reader = TraceReader(options.trace)
 963   while True:
 964     header, offset = trace_reader.ReadEventHeader()
 965     if not header:
 966       break
 967     events += 1
 968     if header.type == PERF_RECORD_MMAP:
 969       start = time.time()
 970       mmap_info = trace_reader.ReadMmap(header, offset)
 971       if mmap_info.filename == HOST_ROOT + V8_GC_FAKE_MMAP:
 972         log_reader.ReadUpToGC()
 973       else:
 974         library_repo.Load(mmap_info, code_map, options)
 975       mmap_time += time.time() - start
 976     elif header.type == PERF_RECORD_SAMPLE:
 977       ticks += 1
 978       start = time.time()
 979       sample = trace_reader.ReadSample(header, offset)
 980       code = code_map.Find(sample.ip)
 981       if code:
 982         code.Tick(sample.ip)
 983         if code.codetype == Code.OPTIMIZED:
 984           optimized_ticks += 1
 985         elif code.codetype == Code.FULL_CODEGEN:
 986           generated_ticks += 1
 987         elif code.codetype == Code.V8INTERNAL:
 988           v8_internal_ticks += 1
 989       else:
 990         missed_ticks += 1
 991       if not library_repo.Tick(sample.ip) and not code:
 992         really_missed_ticks += 1
 993       if trace_reader.callchain_supported:
 994         for ip in sample.ips:
 995           caller_code = code_map.Find(ip)
 996           if caller_code:
 997             if code:
 998               caller_code.CalleeTick(code)
 999             code = caller_code
1000       sample_time += time.time() - start
1001
1002   if options.dot:
1003     PrintDot(code_map, options)
1004   else:
1005     PrintReport(code_map, library_repo, log_reader.arch, ticks, options)
1006
1007     if not options.quiet:
1008       def PrintTicks(number, total, description):
1009         print("%10d %5.1f%% ticks in %s" %
1010               (number, 100.0*number/total, description))
1011       print
1012       print "Stats:"
1013       print "%10d total trace events" % events
1014       print "%10d total ticks" % ticks
1015       print "%10d ticks not in symbols" % missed_ticks
1016       unaccounted = "unaccounted ticks"
1017       if really_missed_ticks > 0:
1018         unaccounted += " (probably in the kernel, try --kernel)"
1019       PrintTicks(really_missed_ticks, ticks, unaccounted)
1020       PrintTicks(optimized_ticks, ticks, "ticks in optimized code")
1021       PrintTicks(generated_ticks, ticks, "ticks in other lazily compiled code")
1022       PrintTicks(v8_internal_ticks, ticks, "ticks in v8::internal::*")
1023       print "%10d total symbols" % len([c for c in code_map.AllCode()])
1024       print "%10d used symbols" % len([c for c in code_map.UsedCode()])
1025       print "%9.2fs library processing time" % mmap_time
1026       print "%9.2fs tick processing time" % sample_time
1027
1028   log_reader.Dispose()
1029   trace_reader.Dispose()