[BOLT][Instrumentation] Don't share counters when using append-pid
authorDenis Revunov <revunov.denis@huawei-partners.com>
Thu, 15 Jun 2023 15:43:04 +0000 (15:43 +0000)
committerDenis Revunov <rnovds@gmail.com>
Thu, 29 Jun 2023 22:03:52 +0000 (01:03 +0300)
The point of append-pid option is to record separate profiles for
separate forks, which is impossible when counters are the same for
every process. It leads to a sum of all profiles in every file, plus
GlobalWriteProfileMutex located in a shared memory prevents some
processes from dumping their data at all.

Reviewed By: rafauler, Amir
Differential Revision: https://reviews.llvm.org/D153771

bolt/runtime/instr.cpp

index 997527a..fa733ed 100644 (file)
@@ -1541,6 +1541,9 @@ extern "C" void __bolt_instr_indirect_tailcall();
 
 /// Initialization code
 extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
+  __bolt_ind_call_counter_func_pointer = __bolt_instr_indirect_call;
+  __bolt_ind_tailcall_counter_func_pointer = __bolt_instr_indirect_tailcall;
+
   const uint64_t CountersStart =
       reinterpret_cast<uint64_t>(&__bolt_instr_locations[0]);
   const uint64_t CountersEnd = alignTo(
@@ -1548,18 +1551,19 @@ extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() {
       0x1000);
   DEBUG(reportNumber("replace mmap start: ", CountersStart, 16));
   DEBUG(reportNumber("replace mmap stop: ", CountersEnd, 16));
-  assert (CountersEnd > CountersStart, "no counters");
-  // Maps our counters to be shared instead of private, so we keep counting for
-  // forked processes
+  assert(CountersEnd > CountersStart, "no counters");
+
+  const bool Shared = !__bolt_instr_use_pid;
+  const uint64_t MapPrivateOrShared = Shared ? MAP_SHARED : MAP_PRIVATE;
+
   void *Ret =
       __mmap(CountersStart, CountersEnd - CountersStart, PROT_READ | PROT_WRITE,
-             MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED, -1, 0);
+             MAP_ANONYMOUS | MapPrivateOrShared | MAP_FIXED, -1, 0);
   assert(Ret != MAP_FAILED, "__bolt_instr_setup: Failed to mmap counters!");
-  __bolt_ind_call_counter_func_pointer = __bolt_instr_indirect_call;
-  __bolt_ind_tailcall_counter_func_pointer = __bolt_instr_indirect_tailcall;
+
   // Conservatively reserve 100MiB shared pages
   GlobalAlloc.setMaxSize(0x6400000);
-  GlobalAlloc.setShared(true);
+  GlobalAlloc.setShared(Shared);
   GlobalWriteProfileMutex = new (GlobalAlloc, 0) Mutex();
   if (__bolt_instr_num_ind_calls > 0)
     GlobalIndCallCounters =