[OpenMP][NFCI] Pipe the IdentTy object through more new RT functions
authorJohannes Doerfert <johannes@jdoerfert.de>
Thu, 27 Jan 2022 20:36:36 +0000 (15:36 -0500)
committerShilei Tian <i@tianshilei.me>
Thu, 27 Jan 2022 20:36:55 +0000 (15:36 -0500)
IdentTy objects are useful for debugging and profiling so we want to
keep them around in more places, especially those that have a large
impact on performance, e.g., everything related to state.

Reviewed By: tianshilei1992

Differential Revision: https://reviews.llvm.org/D112494

openmp/libomptarget/DeviceRTL/include/State.h
openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
openmp/libomptarget/DeviceRTL/src/State.cpp
openmp/libomptarget/DeviceRTL/src/Tasking.cpp

index c860bd1..3365b05 100644 (file)
@@ -40,56 +40,61 @@ enum ValueKind {
 };
 
 /// TODO
-void enterDataEnvironment();
+void enterDataEnvironment(IdentTy *Ident);
 
 /// TODO
 void exitDataEnvironment();
 
 /// TODO
 struct DateEnvironmentRAII {
-  DateEnvironmentRAII() { enterDataEnvironment(); }
+  DateEnvironmentRAII(IdentTy *Ident) { enterDataEnvironment(Ident); }
   ~DateEnvironmentRAII() { exitDataEnvironment(); }
 };
 
 /// TODO
 void resetStateForThread(uint32_t TId);
 
-uint32_t &lookup32(ValueKind VK, bool IsReadonly);
+uint32_t &lookup32(ValueKind VK, bool IsReadonly, IdentTy *Ident);
 void *&lookupPtr(ValueKind VK, bool IsReadonly);
 
 /// A class without actual state used to provide a nice interface to lookup and
 /// update ICV values we can declare in global scope.
 template <typename Ty, ValueKind Kind> struct Value {
   __attribute__((flatten, always_inline)) operator Ty() {
-    return lookup(/* IsReadonly */ true);
+    return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr);
   }
 
   __attribute__((flatten, always_inline)) Value &operator=(const Ty &Other) {
-    set(Other);
+    set(Other, /* IdentTy */ nullptr);
     return *this;
   }
 
   __attribute__((flatten, always_inline)) Value &operator++() {
-    inc(1);
+    inc(1, /* IdentTy */ nullptr);
     return *this;
   }
 
   __attribute__((flatten, always_inline)) Value &operator--() {
-    inc(-1);
+    inc(-1, /* IdentTy */ nullptr);
     return *this;
   }
 
 private:
-  Ty &lookup(bool IsReadonly) {
-    Ty &t = lookup32(Kind, IsReadonly);
+  __attribute__((flatten, always_inline)) Ty &lookup(bool IsReadonly,
+                                                     IdentTy *Ident) {
+    Ty &t = lookup32(Kind, IsReadonly, Ident);
     return t;
   }
 
-  Ty &inc(int UpdateVal) {
-    return (lookup(/* IsReadonly */ false) += UpdateVal);
+  __attribute__((flatten, always_inline)) Ty &inc(int UpdateVal,
+                                                  IdentTy *Ident) {
+    return (lookup(/* IsReadonly */ false, Ident) += UpdateVal);
   }
 
-  Ty &set(Ty UpdateVal) { return (lookup(/* IsReadonly */ false) = UpdateVal); }
+  __attribute__((flatten, always_inline)) Ty &set(Ty UpdateVal,
+                                                  IdentTy *Ident) {
+    return (lookup(/* IsReadonly */ false, Ident) = UpdateVal);
+  }
 
   template <typename VTy, typename Ty2> friend struct ValueRAII;
 };
@@ -99,7 +104,7 @@ private:
 /// we can declare in global scope.
 template <typename Ty, ValueKind Kind> struct PtrValue {
   __attribute__((flatten, always_inline)) operator Ty() {
-    return lookup(/* IsReadonly */ true);
+    return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr);
   }
 
   __attribute__((flatten, always_inline)) PtrValue &operator=(const Ty Other) {
@@ -108,17 +113,19 @@ template <typename Ty, ValueKind Kind> struct PtrValue {
   }
 
 private:
-  Ty &lookup(bool IsReadonly) { return lookupPtr(Kind, IsReadonly); }
+  Ty &lookup(bool IsReadonly, IdentTy *) { return lookupPtr(Kind, IsReadonly); }
 
-  Ty &set(Ty UpdateVal) { return (lookup(/* IsReadonly */ false) = UpdateVal); }
+  Ty &set(Ty UpdateVal) {
+    return (lookup(/* IsReadonly */ false, /* IdentTy */ nullptr) = UpdateVal);
+  }
 
   template <typename VTy, typename Ty2> friend struct ValueRAII;
 };
 
 template <typename VTy, typename Ty> struct ValueRAII {
-  ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active)
-      : Ptr(Active ? V.lookup(/* IsReadonly */ false) : Val), Val(OldValue),
-        Active(Active) {
+  ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active, IdentTy *Ident)
+      : Ptr(Active ? V.lookup(/* IsReadonly */ false, Ident) : Val),
+        Val(OldValue), Active(Active) {
     if (!Active)
       return;
     ASSERT(Ptr == OldValue && "ValueRAII initialization with wrong old value!");
index 610512a..4ce2493 100644 (file)
@@ -87,7 +87,7 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
   uint32_t TId = mapping::getThreadIdInBlock();
   // Handle the serialized case first, same for SPMD/non-SPMD.
   if (OMP_UNLIKELY(!if_expr || icv::Level)) {
-    state::enterDataEnvironment();
+    state::DateEnvironmentRAII DERAII(ident);
     ++icv::Level;
     invokeMicrotask(TId, 0, fn, args, nargs);
     state::exitDataEnvironment();
@@ -104,9 +104,10 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
       // last or the other updates will cause a thread specific state to be
       // created.
       state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, NumThreads,
-                                            1u, TId == 0);
-      state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, TId == 0);
-      state::ValueRAII LevelRAII(icv::Level, 1u, 0u, TId == 0);
+                                            1u, TId == 0, ident);
+      state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, TId == 0,
+                                       ident);
+      state::ValueRAII LevelRAII(icv::Level, 1u, 0u, TId == 0, ident);
 
       // Synchronize all threads after the main thread (TId == 0) set up the
       // team state properly.
@@ -142,7 +143,7 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
 
   bool IsActiveParallelRegion = NumThreads > 1;
   if (!IsActiveParallelRegion) {
-    state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true);
+    state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true, ident);
     invokeMicrotask(TId, 0, fn, args, nargs);
     return;
   }
@@ -160,11 +161,11 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
     // last or the other updates will cause a thread specific state to be
     // created.
     state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, NumThreads,
-                                          1u, true);
+                                          1u, true, ident);
     state::ValueRAII ParallelRegionFnRAII(state::ParallelRegionFn, wrapper_fn,
-                                          (void *)nullptr, true);
-    state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, true);
-    state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true);
+                                          (void *)nullptr, true, ident);
+    state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, true, ident);
+    state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true, ident);
 
     // Master signals work to activate workers.
     synchronize::threads();
index ee6295f..754c706 100644 (file)
@@ -281,7 +281,7 @@ __attribute__((loader_uninitialized))
 ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
 #pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
 
-uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var) {
+uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var, IdentTy *Ident) {
   if (OMP_LIKELY(TeamState.ICVState.LevelVar == 0))
     return TeamState.ICVState.*Var;
   uint32_t TId = mapping::getThreadIdInBlock();
@@ -322,32 +322,32 @@ int returnValIfLevelIsActive(int Level, int Val, int DefaultVal,
 
 } // namespace
 
-uint32_t &state::lookup32(ValueKind Kind, bool IsReadonly) {
+uint32_t &state::lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident) {
   switch (Kind) {
   case state::VK_NThreads:
     if (IsReadonly)
       return lookup32Impl(&ICVStateTy::NThreadsVar);
-    return lookupForModify32Impl(&ICVStateTy::NThreadsVar);
+    return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident);
   case state::VK_Level:
     if (IsReadonly)
       return lookup32Impl(&ICVStateTy::LevelVar);
-    return lookupForModify32Impl(&ICVStateTy::LevelVar);
+    return lookupForModify32Impl(&ICVStateTy::LevelVar, Ident);
   case state::VK_ActiveLevel:
     if (IsReadonly)
       return lookup32Impl(&ICVStateTy::ActiveLevelVar);
-    return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar);
+    return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar, Ident);
   case state::VK_MaxActiveLevels:
     if (IsReadonly)
       return lookup32Impl(&ICVStateTy::MaxActiveLevelsVar);
-    return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar);
+    return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar, Ident);
   case state::VK_RunSched:
     if (IsReadonly)
       return lookup32Impl(&ICVStateTy::RunSchedVar);
-    return lookupForModify32Impl(&ICVStateTy::RunSchedVar);
+    return lookupForModify32Impl(&ICVStateTy::RunSchedVar, Ident);
   case state::VK_RunSchedChunk:
     if (IsReadonly)
       return lookup32Impl(&ICVStateTy::RunSchedChunkVar);
-    return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar);
+    return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar, Ident);
   case state::VK_ParallelTeamSize:
     return TeamState.ParallelTeamSize;
   default:
@@ -376,7 +376,7 @@ void state::init(bool IsSPMD) {
   ThreadStates[mapping::getThreadIdInBlock()] = nullptr;
 }
 
-void state::enterDataEnvironment() {
+void state::enterDataEnvironment(IdentTy *Ident) {
   unsigned TId = mapping::getThreadIdInBlock();
   ThreadStateTy *NewThreadState =
       static_cast<ThreadStateTy *>(__kmpc_alloc_shared(sizeof(ThreadStateTy)));
index 0416395..2c80e71 100644 (file)
@@ -49,7 +49,7 @@ int32_t __kmpc_omp_task_with_deps(IdentTy *Loc, uint32_t TId,
                                   TaskDescriptorTy *TaskDescriptor, int32_t,
                                   void *, int32_t, void *) {
   FunctionTracingRAII();
-  state::DateEnvironmentRAII DERAII;
+  state::DateEnvironmentRAII DERAII(Loc);
 
   TaskDescriptor->TaskFn(0, TaskDescriptor);
 
@@ -60,7 +60,7 @@ int32_t __kmpc_omp_task_with_deps(IdentTy *Loc, uint32_t TId,
 void __kmpc_omp_task_begin_if0(IdentTy *Loc, uint32_t TId,
                                TaskDescriptorTy *TaskDescriptor) {
   FunctionTracingRAII();
-  state::enterDataEnvironment();
+  state::enterDataEnvironment(Loc);
 }
 
 void __kmpc_omp_task_complete_if0(IdentTy *Loc, uint32_t TId,