win32kprof: Store the profile data as an caller->callee hash table, instead of a...
authorJosé Fonseca <jrfonseca@tungstengraphics.com>
Tue, 22 Jul 2008 00:45:10 +0000 (09:45 +0900)
committerJosé Fonseca <jrfonseca@tungstengraphics.com>
Tue, 22 Jul 2008 00:45:33 +0000 (09:45 +0900)
bin/win32kprof.py
src/gallium/auxiliary/util/p_debug_prof.c

index 4ffa4cc..c36317d 100755 (executable)
@@ -117,15 +117,7 @@ class Reader:
         self.symbols = []
         self.symbol_cache = {}
         self.base_addr = None
-        self.last_stamp = 0
-        self.stamp_base = 0
     
-    def unwrap_stamp(self, stamp):
-        if stamp < self.last_stamp:
-            self.stamp_base += 1 << 32
-        self.last_stamp = stamp
-        return self.stamp_base + stamp
-
     def read_map(self, mapfile):
         # See http://msdn.microsoft.com/en-us/library/k7xkk3e2.aspx
         last_addr = 0
@@ -140,10 +132,6 @@ class Reader:
                 continue
             section, offset = section_offset.split(':')
             addr = int(offset, 16)
-            if last_addr == addr:
-                # TODO: handle collapsed functions
-                #assert last_name == name
-                continue
             self.symbols.append((addr, name))
             last_addr = addr
             last_name = name
@@ -170,12 +158,12 @@ class Reader:
                 e = i
                 continue
             if addr == end_addr:
-                return next_name
+                return next_name, addr - start_addr
             if addr > end_addr:
                 s = i
                 continue
             return name, addr - start_addr
-        return "0x%08x" % addr, 0
+        raise ValueError
 
     def lookup_symbol(self, name):
         for symbol_addr, symbol_name in self.symbols:
@@ -187,96 +175,76 @@ class Reader:
         profile = Profile()
 
         fp = file(data, "rb")
-        entry_format = "II"
+        entry_format = "IIII"
         entry_size = struct.calcsize(entry_format)
         caller = None
         caller_stack = []
-        last_stamp = 0
-        delta = 0
         while True:
             entry = fp.read(entry_size)
             if len(entry) < entry_size:
                 break
-            addr_exit, stamp = struct.unpack(entry_format, entry)
-            if addr_exit == 0 and stamp == 0:
-                break
-            addr = addr_exit & 0xfffffffe
-            exit = addr_exit & 0x00000001
+            caller_addr, callee_addr, samples_lo, samples_hi = struct.unpack(entry_format, entry)
+            if caller_addr == 0 and callee_addr == 0:
+                continue
 
             if self.base_addr is None:
-                ref_addr = self.lookup_symbol('___debug_profile_reference2@0')
+                ref_addr = self.lookup_symbol('___debug_profile_reference@0')
                 if ref_addr:
-                    self.base_addr = (addr - ref_addr) & ~(options.align - 1)
+                    self.base_addr = (caller_addr - ref_addr) & ~(options.align - 1)
                 else:
                     self.base_addr = 0
-                #print hex(self.base_addr)
-            rel_addr = addr - self.base_addr
-            #print hex(addr - self.base_addr)
-
-            symbol, offset = self.lookup_addr(rel_addr)
-            stamp = self.unwrap_stamp(stamp)
-            delta = stamp - last_stamp
-
-            if not exit:
-                if options.verbose >= 2:
-                    sys.stderr.write("%08x >> 0x%08x\n" % (stamp, addr))
-                if options.verbose:
-                    sys.stderr.write("%+8u >> %s+%u\n" % (delta, symbol, offset))
+                sys.stderr.write('Base addr: %08x\n' % self.base_addr)
+
+            samples = (samples_hi << 32) | samples_lo
+            
+            try:
+                caller_raddr = caller_addr - self.base_addr
+                caller_sym, caller_ofs = self.lookup_addr(caller_raddr)
+
+                try:
+                    caller = profile.functions[caller_sym]
+                except KeyError:
+                    caller_name = demangle(caller_sym)
+                    caller = Function(caller_sym, caller_name)
+                    profile.add_function(caller)
+                    caller[CALLS] = 0
+                    caller[SAMPLES] = 0
+            except ValueError:
+                caller = None
+
+            if not callee_addr:
+                if caller:
+                    caller[SAMPLES] += samples
             else:
-                if options.verbose >= 2:
-                    sys.stderr.write("%08x << 0x%08x\n" % (stamp, addr))
-                if options.verbose:
-                    sys.stderr.write("%+8u << %s+%u\n" % (delta, symbol, offset))
-
-            # Eliminate outliers
-            if exit and delta > 0x1000000:
-                # TODO: Use a statistic test instead of a threshold
-                sys.stderr.write("warning: ignoring excessive delta of +%u in function %s\n" % (delta, symbol))
-                delta = 0
-
-            # Remove overhead
-            # TODO: compute the overhead automatically
-            delta = max(0, delta - 84)
-
-            if caller is not None:
-                caller[SAMPLES] += delta
-
-            if not exit:
-                # Function call
+                callee_raddr = callee_addr - self.base_addr
+                callee_sym, callee_ofs = self.lookup_addr(callee_raddr)
+
                 try:
-                    callee = profile.functions[symbol]
+                    callee = profile.functions[callee_sym]
                 except KeyError:
-                    name = demangle(symbol)
-                    callee = Function(symbol, name)
+                    callee_name = demangle(callee_sym)
+                    callee = Function(callee_sym, callee_name)
                     profile.add_function(callee)
-                    callee[CALLS] = 1
+                    callee[CALLS] = samples
                     callee[SAMPLES] = 0
                 else:
-                    callee[CALLS] += 1
+                    callee[CALLS] += samples
 
                 if caller is not None:
                     try:
                         call = caller.calls[callee.id]
                     except KeyError:
                         call = Call(callee.id)
-                        call[CALLS] = 1
+                        call[CALLS] = samples
                         caller.add_call(call)
                     else:
-                        call[CALLS] += 1
-                    caller_stack.append(caller)
-
-                caller = callee
-
-            else:
-                # Function return
-                if caller is not None:
-                    assert caller.id == symbol
-                    try:
-                        caller = caller_stack.pop()
-                    except IndexError:
-                        caller = None
-
-            last_stamp = stamp
+                        call[CALLS] += samples
+            
+            if options.verbose:
+                if not callee_addr:
+                    sys.stderr.write('%s+%u: %u\n' % (caller_sym, caller_ofs, samples))
+                else:
+                    sys.stderr.write('%s+%u -> %s+%u: %u\n' % (caller_sym, caller_ofs, callee_sym, callee_ofs, samples))
 
         # compute derived data
         profile.validate()
index 69c914c..5f9772e 100644 (file)
 #include "util/u_string.h" 
 
 
-#define PROFILE_FILE_SIZE 4*1024*1024
+#define PROFILE_TABLE_SIZE (1024*1024)
 #define FILE_NAME_SIZE 256
 
-static WCHAR wFileName[FILE_NAME_SIZE] = L"\\??\\c:\\00000000.trace";
-static ULONG_PTR iFile = 0;
-static BYTE *pMap = NULL;
-static BYTE *pMapBegin = NULL;
-static BYTE *pMapEnd = NULL;
+struct debug_profile_entry
+{
+   uintptr_t caller;
+   uintptr_t callee;
+   uint64_t samples;
+};
 
+static unsigned long enabled = 0;
 
-void __declspec(naked) __cdecl 
-debug_profile_close(void)
-{
-   _asm {
-      push eax
-      push ebx
-      push ecx
-      push edx
-      push ebp
-      push edi
-      push esi
-    }
+static WCHAR wFileName[FILE_NAME_SIZE] = L"\\??\\c:\\00000000.prof";
+static ULONG_PTR iFile = 0;
 
-   if(iFile) {
-      EngUnmapFile(iFile);
-      /* Truncate the file */
-      pMap = EngMapFile(wFileName, pMap - pMapBegin, &iFile);
-      if(pMap)
-         EngUnmapFile(iFile);
-   }
-   iFile = 0;
-   pMapBegin = pMapEnd = pMap = NULL;
-   
-   _asm {
-      pop esi
-      pop edi
-      pop ebp
-      pop edx
-      pop ecx
-      pop ebx
-      pop eax
-      ret
-    }
-}
+static struct debug_profile_entry *table = NULL;
+static unsigned long free_table_entries = 0;
+static unsigned long max_table_entries = 0;
 
+uint64_t start_stamp = 0;
+uint64_t end_stamp = 0;
 
-void __declspec(naked) __cdecl 
-debug_profile_open(void)
-{
-   WCHAR *p;
-   
-   _asm {
-      push eax
-      push ebx
-      push ecx
-      push edx
-      push ebp
-      push edi
-      push esi
-    }
 
-   debug_profile_close();
+static void
+debug_profile_entry(uintptr_t caller, uintptr_t callee, uint64_t samples)
+{
+   unsigned hash = ( caller + callee ) & PROFILE_TABLE_SIZE - 1;
    
-   // increment starting from the less significant digit
-   p = &wFileName[14];
    while(1) {
-      if(*p == '9') {
-         *p-- = '0';
+      if(table[hash].caller == 0 && table[hash].callee == 0) {
+         table[hash].caller = caller;
+         table[hash].callee = callee;
+         table[hash].samples = samples;
+         --free_table_entries;
+         break;
       }
-      else {
-         *p += 1;
+      else if(table[hash].caller == caller && table[hash].callee == callee) {
+         table[hash].samples += samples;
          break;
       }
+      else {
+         ++hash;
+      }
    }
+}
 
-   pMap = EngMapFile(wFileName, PROFILE_FILE_SIZE, &iFile);
-   if(pMap) {
-      pMapBegin = pMap;
-      pMapEnd = pMap + PROFILE_FILE_SIZE;
-   }
-   
-   _asm {
-      pop esi
-      pop edi
-      pop ebp
-      pop edx
-      pop ecx
-      pop ebx
-      pop eax
-      ret
-    }
+
+static uintptr_t caller_stack[1024];
+static unsigned last_caller = 0;
+
+
+static int64_t delta(void) {
+   int64_t result = end_stamp - start_stamp;
+   if(result > UINT64_C(0xffffffff))
+      result = 0;
+   return result;
+}
+
+
+static void __cdecl 
+debug_profile_enter(uintptr_t callee)
+{
+   uintptr_t caller = last_caller ? caller_stack[last_caller - 1] : 0;
+                
+   if (caller)
+      debug_profile_entry(caller, 0, delta());
+   debug_profile_entry(caller, callee, 1);
+   caller_stack[last_caller++] = callee;
+}
+
+
+static void __cdecl
+debug_profile_exit(uintptr_t callee)
+{
+   debug_profile_entry(callee, 0, delta());
+   if(last_caller)
+      --last_caller;
 }
    
    
@@ -148,31 +134,49 @@ debug_profile_open(void)
 void __declspec(naked) __cdecl 
 _penter(void) {
    _asm {
-      push ebx
-retry:
-      mov ebx, [pMap]
-      test ebx, ebx
-      jz done
-      cmp ebx, [pMapEnd]
-      jne ready
-      call debug_profile_open
-      jmp retry
-ready:
       push eax
+      mov eax, [enabled]
+      test eax, eax
+      jz skip
+
       push edx
-      mov eax, [esp+12]
-      and eax, 0xfffffffe
-      mov [ebx], eax
-      add ebx, 4
+      
       rdtsc
-      mov [ebx], eax
-      add ebx, 4
-      mov [pMap], ebx
+      mov dword ptr [end_stamp], eax
+      mov dword ptr [end_stamp+4], edx
+
+      xor eax, eax
+      mov [enabled], eax
+
+      mov eax, [esp+8]
+
+      push ebx
+      push ecx
+      push ebp
+      push edi
+      push esi
+
+      push eax
+      call debug_profile_enter
+      add esp, 4
+
+      pop esi
+      pop edi
+      pop ebp
+      pop ecx
+      pop ebx
+
+      mov eax, 1
+      mov [enabled], eax 
+
+      rdtsc
+      mov dword ptr [start_stamp], eax
+      mov dword ptr [start_stamp+4], edx
+      
       pop edx
+skip:
       pop eax
-done:
-      pop ebx
-      ret  
+      ret
    }
 }
 
@@ -185,30 +189,48 @@ done:
 void __declspec(naked) __cdecl 
 _pexit(void) {
    _asm {
-      push ebx
-retry:
-      mov ebx, [pMap]
-      test ebx, ebx
-      jz done
-      cmp ebx, [pMapEnd]
-      jne ready
-      call debug_profile_open
-      jmp retry
-ready:
       push eax
+      mov eax, [enabled]
+      test eax, eax
+      jz skip
+
       push edx
-      mov eax, [esp+12]
-      or eax, 0x00000001
-      mov [ebx], eax
-      add ebx, 4
+      
       rdtsc
-      mov [ebx], eax
-      add ebx, 4
-      mov [pMap], ebx
+      mov dword ptr [end_stamp], eax
+      mov dword ptr [end_stamp+4], edx
+
+      xor eax, eax
+      mov [enabled], eax
+
+      mov eax, [esp+8]
+
+      push ebx
+      push ecx
+      push ebp
+      push edi
+      push esi
+
+      push eax
+      call debug_profile_exit
+      add esp, 4
+
+      pop esi
+      pop edi
+      pop ebp
+      pop ecx
+      pop ebx
+
+      mov eax, 1
+      mov [enabled], eax 
+
+      rdtsc
+      mov dword ptr [start_stamp], eax
+      mov dword ptr [start_stamp+4], edx
+      
       pop edx
+skip:
       pop eax
-done:
-      pop ebx
       ret
    }
 }
@@ -230,15 +252,53 @@ __debug_profile_reference(void) {
 void
 debug_profile_start(void)
 {
-   debug_profile_open();
-   
-   if(pMap) {
+   WCHAR *p;
+
+   // increment starting from the less significant digit
+   p = &wFileName[14];
+   while(1) {
+      if(*p == '9') {
+         *p-- = '0';
+      }
+      else {
+         *p += 1;
+         break;
+      }
+   }
+
+   table = EngMapFile(wFileName, 
+                      PROFILE_TABLE_SIZE*sizeof(struct debug_profile_entry), 
+                      &iFile);
+   if(table) {
       unsigned i;
+      
+      free_table_entries = max_table_entries = PROFILE_TABLE_SIZE;
+      memset(table, 0, PROFILE_TABLE_SIZE*sizeof(struct debug_profile_entry));
+      
+      table[0].caller = (uintptr_t)&__debug_profile_reference;
+      table[0].callee = 0;
+      table[0].samples = 0;
+      --free_table_entries;
+
+      _asm {
+         push edx
+         push eax
+      
+         rdtsc
+         mov dword ptr [start_stamp], eax
+         mov dword ptr [start_stamp+4], edx
+         
+         pop edx
+         pop eax
+      }
+
+      last_caller = 0;
+      
+      enabled = 1;
+
       for(i = 0; i < 8; ++i) {
          _asm {
-            call _penter
             call __debug_profile_reference
-            call _pexit
          }
       }
    }
@@ -248,7 +308,13 @@ debug_profile_start(void)
 void 
 debug_profile_stop(void)
 {
-   debug_profile_close();
+   enabled = 0;
+
+   if(iFile)
+      EngUnmapFile(iFile);
+   iFile = 0;
+   table = NULL;
+   free_table_entries = max_table_entries = 0;
 }
 
 #endif /* PROFILE */