Added support in the profiler for creating 'regions' that cover part
authorchristian.plesner.hansen@gmail.com <christian.plesner.hansen@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Fri, 19 Dec 2008 13:12:43 +0000 (13:12 +0000)
committerchristian.plesner.hansen@gmail.com <christian.plesner.hansen@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Fri, 19 Dec 2008 13:12:43 +0000 (13:12 +0000)
of the generated code.  These can be used by the profiler to
categorize the ticks that occur within generated code and thereby show
more detailed information about where time is spent in generated code.
For instance, this is what the profiler displayed for a simple regexp
benchmark with irregexp-native before:

[JavaScript]:
   total  nonlib   name
   87.2%   87.2%   RegExp: (?:\w*\W+)*

This is what we can display now:

[JavaScript]:
   total  nonlib   name
   87.2%   87.2%   RegExp: (?:\w*\W+)*
                   -  53.0%  56.7% BranchOrBacktrack
                   -  14.9%  59.8% CheckCharacterLT
                   -  13.7%  20.4% CheckStackLimit
                   -   6.7%   6.7% SafeCall
                   -   2.7%   7.0% CheckCharacterGT
                   -   2.4%   2.4% SafeReturn
                   -   2.1%   2.1% LoadCurrentCharacter
                   -   1.8%   1.8% PushRegister
                   -   0.9%   0.9% PopRegister
                   -   0.9%   0.9% AdvanceRegister
                   -   0.3%   0.3% PopCurrentPosition
                   -   0.3%   0.3% CheckGreedyLoop
                   -   0.0%  20.4% PushBacktrack
                   -   0.0%  22.3% CheckCharacter
                   -   0.0%   2.4% IfRegisterLT

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1010 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/assembler-ia32.cc
src/assembler.h
src/globals.h
src/heap.cc
src/log.cc
src/log.h
src/macro-assembler-ia32.h
tools/tickprocessor.py

index c04fd57..f487c41 100644 (file)
@@ -344,6 +344,7 @@ void Assembler::GetCode(CodeDesc* desc) {
   desc->buffer_size = buffer_size_;
   desc->instr_size = pc_offset();
   desc->reloc_size = (buffer_ + buffer_size_) - reloc_info_writer.pos();
+  desc->origin = this;
 
   Counters::reloc_info_size.Increment(desc->reloc_size);
 }
index 971b950..7bd117c 100644 (file)
@@ -382,6 +382,22 @@ class RelocIterator: public Malloced {
 };
 
 
+// A stack-allocated code region logs a name for the code generated
+// while the region is in effect.  This information is used by the
+// profiler to categorize ticks within generated code.
+class CodeRegion BASE_EMBEDDED {
+ public:
+  inline CodeRegion(Assembler* assm, const char *name) : assm_(assm) {
+    LOG(BeginCodeRegionEvent(this, assm, name));
+  }
+  inline ~CodeRegion() {
+    LOG(EndCodeRegionEvent(this, assm_));
+  }
+ private:
+  Assembler* assm_;
+};
+
+
 //------------------------------------------------------------------------------
 // External function
 
index e174bf3..2b5f3ac 100644 (file)
@@ -145,6 +145,7 @@ class Assembler;
 class BreakableStatement;
 class Code;
 class CodeGenerator;
+class CodeRegion;
 class CodeStub;
 class Context;
 class Debug;
@@ -259,6 +260,7 @@ struct CodeDesc {
   int buffer_size;
   int instr_size;
   int reloc_size;
+  Assembler* origin;
 };
 
 
index a10e3f9..c4a860b 100644 (file)
@@ -1663,6 +1663,7 @@ Object* Heap::CreateCode(const CodeDesc& desc,
   // through the self_reference parameter.
   code->CopyFrom(desc);
   if (sinfo != NULL) sinfo->Serialize(code);  // write scope info
+  LOG(CodeAllocateEvent(code, desc.origin));
 
 #ifdef DEBUG
   code->Verify();
index 4fa398c..fbefec9 100644 (file)
@@ -32,6 +32,7 @@
 #include "log.h"
 #include "platform.h"
 #include "string-stream.h"
+#include "macro-assembler.h"
 
 namespace v8 { namespace internal {
 
@@ -556,6 +557,18 @@ void Logger::CodeCreateEvent(const char* tag, Code* code, int args_count) {
 }
 
 
+void Logger::CodeAllocateEvent(Code* code, Assembler* assem) {
+#ifdef ENABLE_LOGGING_AND_PROFILING
+  if (logfile_ == NULL || !FLAG_log_code) return;
+  ScopedLock sl(mutex_);
+
+  fprintf(logfile_, "code-allocate,0x%x,0x%x\n",
+          reinterpret_cast<unsigned int>(code->address()),
+          reinterpret_cast<unsigned int>(assem));
+#endif
+}
+
+
 void Logger::CodeMoveEvent(Address from, Address to) {
 #ifdef ENABLE_LOGGING_AND_PROFILING
   if (logfile_ == NULL || !FLAG_log_code) return;
@@ -576,6 +589,33 @@ void Logger::CodeDeleteEvent(Address from) {
 }
 
 
+void Logger::BeginCodeRegionEvent(CodeRegion* region,
+                                  Assembler* masm,
+                                  const char* name) {
+#ifdef ENABLE_LOGGING_AND_PROFILING
+  if (logfile_ == NULL || !FLAG_log_code) return;
+  ScopedLock sl(mutex_);
+  fprintf(logfile_, "begin-code-region,0x%x,0x%x,0x%x,%s\n",
+          reinterpret_cast<unsigned int>(region),
+          reinterpret_cast<unsigned int>(masm),
+          masm->pc_offset(),
+          name);
+#endif
+}
+
+
+void Logger::EndCodeRegionEvent(CodeRegion* region, Assembler* masm) {
+#ifdef ENABLE_LOGGING_AND_PROFILING
+  if (logfile_ == NULL || !FLAG_log_code) return;
+  ScopedLock sl(mutex_);
+  fprintf(logfile_, "end-code-region,0x%x,0x%x,0x%x\n",
+          reinterpret_cast<unsigned int>(region),
+          reinterpret_cast<unsigned int>(masm),
+          masm->pc_offset());
+#endif
+}
+
+
 void Logger::ResourceEvent(const char* name, const char* tag) {
 #ifdef ENABLE_LOGGING_AND_PROFILING
   if (logfile_ == NULL || !FLAG_log) return;
index 89d3e8a..6257173 100644 (file)
--- a/src/log.h
+++ b/src/log.h
@@ -160,10 +160,16 @@ class Logger {
   static void CodeCreateEvent(const char* tag, Code* code, const char* source);
   static void CodeCreateEvent(const char* tag, Code* code, String* name);
   static void CodeCreateEvent(const char* tag, Code* code, int args_count);
+  static void CodeAllocateEvent(Code* code, Assembler* assem);
   // Emits a code move event.
   static void CodeMoveEvent(Address from, Address to);
   // Emits a code delete event.
   static void CodeDeleteEvent(Address from);
+  // Emits region delimiters
+  static void BeginCodeRegionEvent(CodeRegion* region,
+                                   Assembler* masm,
+                                   const char* name);
+  static void EndCodeRegionEvent(CodeRegion* region, Assembler* masm);
 
   // ==== Events logged by --log-gc. ====
   // Heap sampling events: start, end, and individual types.
index 8bcc651..b8fb3b9 100644 (file)
@@ -305,6 +305,7 @@ class CodePatcher {
   MacroAssembler masm_;  // Macro assembler used to generate the code.
 };
 
+
 // -----------------------------------------------------------------------------
 // Static helper functions.
 
index e3df951..5ef6b5f 100644 (file)
@@ -35,9 +35,12 @@ class CodeEntry(object):
     self.tick_count = 0
     self.name = name
 
-  def IncrementTickCount(self):
+  def Tick(self, pc):
     self.tick_count += 1
 
+  def RegionTicks(self):
+    return None
+
   def SetStartAddress(self, start_addr):
     self.start_addr = start_addr
 
@@ -59,10 +62,45 @@ class SharedLibraryEntry(CodeEntry):
 
 class JSCodeEntry(CodeEntry):
 
-  def __init__(self, start_addr, name, type, size):
+  def __init__(self, start_addr, name, type, size, assembler):
     CodeEntry.__init__(self, start_addr, name)
     self.type = type
     self.size = size
+    self.assembler = assembler
+    self.region_ticks = None
+
+  def Tick(self, pc):
+    super(JSCodeEntry, self).Tick(pc)
+    if not pc is None:
+      offset = pc - self.start_addr
+      seen = []
+      narrowest = None
+      narrowest_width = None
+      for region in self.Regions():
+        if region.Contains(offset):
+          if (not region.name in seen):
+            seen.append(region.name)
+          if narrowest is None or region.Width() < narrowest.Width():
+            narrowest = region
+      if len(seen) == 0:
+        return
+      if self.region_ticks is None:
+        self.region_ticks = {}
+      for name in seen:
+        if not name in self.region_ticks:
+          self.region_ticks[name] = [0, 0]
+        self.region_ticks[name][0] += 1
+        if name == narrowest.name:
+          self.region_ticks[name][1] += 1
+
+  def RegionTicks(self):
+    return self.region_ticks
+
+  def Regions(self):
+    if self.assembler:
+      return self.assembler.regions
+    else:
+      return []
 
   def ToString(self):
     name = self.name
@@ -70,12 +108,39 @@ class JSCodeEntry(CodeEntry):
     return self.type + ': ' + name
 
 
+class CodeRegion(object):
+
+  def __init__(self, start_offset, name):
+    self.start_offset = start_offset
+    self.name = name
+    self.end_offset = None
+
+  def Contains(self, pc):
+    return (self.start_offset <= pc) and (pc <= self.end_offset)
+
+  def Width(self):
+    return self.end_offset - self.start_offset
+
+
+class Assembler(object):
+
+  def __init__(self):
+    # Mapping from region ids to open regions
+    self.pending_regions = {}
+    self.regions = []
+
+
 class TickProcessor(object):
 
   def __init__(self):
     self.log_file = ''
     self.deleted_code = []
     self.vm_extent = {}
+    # Map from assembler ids to the pending assembler objects
+    self.pending_assemblers = {}
+    # Map from code addresses the have been allocated but not yet officially
+    # created to their assemblers.
+    self.assemblers = {}
     self.js_entries = splaytree.SplayTree()
     self.cpp_entries = splaytree.SplayTree()
     self.total_number_of_ticks = 0
@@ -104,6 +169,12 @@ class TickProcessor(object):
         elif row[0] == 'shared-library':
           self.AddSharedLibraryEntry(row[1], int(row[2], 16), int(row[3], 16))
           self.ParseVMSymbols(row[1], int(row[2], 16), int(row[3], 16))
+        elif row[0] == 'begin-code-region':
+          self.ProcessBeginCodeRegion(int(row[1], 16), int(row[2], 16), int(row[3], 16), row[4])
+        elif row[0] == 'end-code-region':
+          self.ProcessEndCodeRegion(int(row[1], 16), int(row[2], 16), int(row[3], 16))
+        elif row[0] == 'code-allocate':
+          self.ProcessCodeAllocate(int(row[1], 16), int(row[2], 16))
     finally:
       logfile.close()
 
@@ -121,8 +192,17 @@ class TickProcessor(object):
   def ParseVMSymbols(self, filename, start, end):
     return
 
+  def ProcessCodeAllocate(self, addr, assem):
+    if assem in self.pending_assemblers:
+      assembler = self.pending_assemblers.pop(assem)
+      self.assemblers[addr] = assembler
+
   def ProcessCodeCreation(self, type, addr, size, name):
-    self.js_entries.Insert(addr, JSCodeEntry(addr, name, type, size))
+    if addr in self.assemblers:
+      assembler = self.assemblers.pop(addr)
+    else:
+      assembler = None
+    self.js_entries.Insert(addr, JSCodeEntry(addr, name, type, size, assembler))
 
   def ProcessCodeMove(self, from_addr, to_addr):
     try:
@@ -139,6 +219,18 @@ class TickProcessor(object):
     except 'KeyNotFound':
       print('Code delete event for unknown code: 0x%x' % from_addr)
 
+  def ProcessBeginCodeRegion(self, id, assm, start, name):
+    if not assm in self.pending_assemblers:
+      self.pending_assemblers[assm] = Assembler()
+    assembler = self.pending_assemblers[assm]
+    assembler.pending_regions[id] = CodeRegion(start, name)
+
+  def ProcessEndCodeRegion(self, id, assm, end):
+    assm = self.pending_assemblers[assm]
+    region = assm.pending_regions.pop(id)
+    region.end_offset = end
+    assm.regions.append(region)
+
   def IncludeTick(self, pc, sp, state):
     return (self.included_state is None) or (self.included_state == state)
 
@@ -152,12 +244,13 @@ class TickProcessor(object):
       entry = self.cpp_entries.FindGreatestsLessThan(pc).value
       if entry.IsSharedLibraryEntry():
         self.number_of_library_ticks += 1
-      entry.IncrementTickCount()
+      entry.Tick(None)
       return
     max = self.js_entries.FindMax()
     min = self.js_entries.FindMin()
     if max != None and pc < max.key and pc > min.key:
-      self.js_entries.FindGreatestsLessThan(pc).value.IncrementTickCount()
+      code_obj = self.js_entries.FindGreatestsLessThan(pc).value
+      code_obj.Tick(pc)
       return
     self.unaccounted_number_of_ticks += 1
 
@@ -201,6 +294,17 @@ class TickProcessor(object):
           'nonlib' : non_library_percentage,
           'name' : entry.ToString()
         })
+        region_ticks = entry.RegionTicks()
+        if not region_ticks is None:
+          items = region_ticks.items()
+          items.sort(key=lambda e: e[1][1], reverse=True)
+          for (name, ticks) in items:
+            print('                      flat   cum')
+            print('                     %(flat)5.1f%% %(accum)5.1f%% %(name)s' % {
+              'flat' : ticks[1] * 100.0 / entry.tick_count,
+              'accum' : ticks[0] * 100.0 / entry.tick_count,
+              'name': name
+            })
 
 if __name__ == '__main__':
   sys.exit('You probably want to run windows-tick-processor.py or linux-tick-processor.py.')