From: Johannes Doerfert Date: Thu, 27 Jan 2022 20:36:36 +0000 (-0500) Subject: [OpenMP][NFCI] Pipe the IdentTy object through more new RT functions X-Git-Tag: upstream/15.0.7~18818 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1e1215689656459ad41d4501d9204dcea4004a71;p=platform%2Fupstream%2Fllvm.git [OpenMP][NFCI] Pipe the IdentTy object through more new RT functions IdentTy objects are useful for debugging and profiling so we want to keep them around in more places, especially those that have a large impact on performance, e.g., everything related to state. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D112494 --- diff --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h index c860bd1..3365b05 100644 --- a/openmp/libomptarget/DeviceRTL/include/State.h +++ b/openmp/libomptarget/DeviceRTL/include/State.h @@ -40,56 +40,61 @@ enum ValueKind { }; /// TODO -void enterDataEnvironment(); +void enterDataEnvironment(IdentTy *Ident); /// TODO void exitDataEnvironment(); /// TODO struct DateEnvironmentRAII { - DateEnvironmentRAII() { enterDataEnvironment(); } + DateEnvironmentRAII(IdentTy *Ident) { enterDataEnvironment(Ident); } ~DateEnvironmentRAII() { exitDataEnvironment(); } }; /// TODO void resetStateForThread(uint32_t TId); -uint32_t &lookup32(ValueKind VK, bool IsReadonly); +uint32_t &lookup32(ValueKind VK, bool IsReadonly, IdentTy *Ident); void *&lookupPtr(ValueKind VK, bool IsReadonly); /// A class without actual state used to provide a nice interface to lookup and /// update ICV values we can declare in global scope. template struct Value { __attribute__((flatten, always_inline)) operator Ty() { - return lookup(/* IsReadonly */ true); + return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr); } __attribute__((flatten, always_inline)) Value &operator=(const Ty &Other) { - set(Other); + set(Other, /* IdentTy */ nullptr); return *this; } __attribute__((flatten, always_inline)) Value &operator++() { - inc(1); + inc(1, /* IdentTy */ nullptr); return *this; } __attribute__((flatten, always_inline)) Value &operator--() { - inc(-1); + inc(-1, /* IdentTy */ nullptr); return *this; } private: - Ty &lookup(bool IsReadonly) { - Ty &t = lookup32(Kind, IsReadonly); + __attribute__((flatten, always_inline)) Ty &lookup(bool IsReadonly, + IdentTy *Ident) { + Ty &t = lookup32(Kind, IsReadonly, Ident); return t; } - Ty &inc(int UpdateVal) { - return (lookup(/* IsReadonly */ false) += UpdateVal); + __attribute__((flatten, always_inline)) Ty &inc(int UpdateVal, + IdentTy *Ident) { + return (lookup(/* IsReadonly */ false, Ident) += UpdateVal); } - Ty &set(Ty UpdateVal) { return (lookup(/* IsReadonly */ false) = UpdateVal); } + __attribute__((flatten, always_inline)) Ty &set(Ty UpdateVal, + IdentTy *Ident) { + return (lookup(/* IsReadonly */ false, Ident) = UpdateVal); + } template friend struct ValueRAII; }; @@ -99,7 +104,7 @@ private: /// we can declare in global scope. template struct PtrValue { __attribute__((flatten, always_inline)) operator Ty() { - return lookup(/* IsReadonly */ true); + return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr); } __attribute__((flatten, always_inline)) PtrValue &operator=(const Ty Other) { @@ -108,17 +113,19 @@ template struct PtrValue { } private: - Ty &lookup(bool IsReadonly) { return lookupPtr(Kind, IsReadonly); } + Ty &lookup(bool IsReadonly, IdentTy *) { return lookupPtr(Kind, IsReadonly); } - Ty &set(Ty UpdateVal) { return (lookup(/* IsReadonly */ false) = UpdateVal); } + Ty &set(Ty UpdateVal) { + return (lookup(/* IsReadonly */ false, /* IdentTy */ nullptr) = UpdateVal); + } template friend struct ValueRAII; }; template struct ValueRAII { - ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active) - : Ptr(Active ? V.lookup(/* IsReadonly */ false) : Val), Val(OldValue), - Active(Active) { + ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active, IdentTy *Ident) + : Ptr(Active ? V.lookup(/* IsReadonly */ false, Ident) : Val), + Val(OldValue), Active(Active) { if (!Active) return; ASSERT(Ptr == OldValue && "ValueRAII initialization with wrong old value!"); diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp index 610512a..4ce2493 100644 --- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp @@ -87,7 +87,7 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr, uint32_t TId = mapping::getThreadIdInBlock(); // Handle the serialized case first, same for SPMD/non-SPMD. if (OMP_UNLIKELY(!if_expr || icv::Level)) { - state::enterDataEnvironment(); + state::DateEnvironmentRAII DERAII(ident); ++icv::Level; invokeMicrotask(TId, 0, fn, args, nargs); state::exitDataEnvironment(); @@ -104,9 +104,10 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr, // last or the other updates will cause a thread specific state to be // created. state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, NumThreads, - 1u, TId == 0); - state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, TId == 0); - state::ValueRAII LevelRAII(icv::Level, 1u, 0u, TId == 0); + 1u, TId == 0, ident); + state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, TId == 0, + ident); + state::ValueRAII LevelRAII(icv::Level, 1u, 0u, TId == 0, ident); // Synchronize all threads after the main thread (TId == 0) set up the // team state properly. @@ -142,7 +143,7 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr, bool IsActiveParallelRegion = NumThreads > 1; if (!IsActiveParallelRegion) { - state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true); + state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true, ident); invokeMicrotask(TId, 0, fn, args, nargs); return; } @@ -160,11 +161,11 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr, // last or the other updates will cause a thread specific state to be // created. state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, NumThreads, - 1u, true); + 1u, true, ident); state::ValueRAII ParallelRegionFnRAII(state::ParallelRegionFn, wrapper_fn, - (void *)nullptr, true); - state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, true); - state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true); + (void *)nullptr, true, ident); + state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, true, ident); + state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true, ident); // Master signals work to activate workers. synchronize::threads(); diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp index ee6295f..754c706 100644 --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -281,7 +281,7 @@ __attribute__((loader_uninitialized)) ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam]; #pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc) -uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var) { +uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var, IdentTy *Ident) { if (OMP_LIKELY(TeamState.ICVState.LevelVar == 0)) return TeamState.ICVState.*Var; uint32_t TId = mapping::getThreadIdInBlock(); @@ -322,32 +322,32 @@ int returnValIfLevelIsActive(int Level, int Val, int DefaultVal, } // namespace -uint32_t &state::lookup32(ValueKind Kind, bool IsReadonly) { +uint32_t &state::lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident) { switch (Kind) { case state::VK_NThreads: if (IsReadonly) return lookup32Impl(&ICVStateTy::NThreadsVar); - return lookupForModify32Impl(&ICVStateTy::NThreadsVar); + return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident); case state::VK_Level: if (IsReadonly) return lookup32Impl(&ICVStateTy::LevelVar); - return lookupForModify32Impl(&ICVStateTy::LevelVar); + return lookupForModify32Impl(&ICVStateTy::LevelVar, Ident); case state::VK_ActiveLevel: if (IsReadonly) return lookup32Impl(&ICVStateTy::ActiveLevelVar); - return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar); + return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar, Ident); case state::VK_MaxActiveLevels: if (IsReadonly) return lookup32Impl(&ICVStateTy::MaxActiveLevelsVar); - return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar); + return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar, Ident); case state::VK_RunSched: if (IsReadonly) return lookup32Impl(&ICVStateTy::RunSchedVar); - return lookupForModify32Impl(&ICVStateTy::RunSchedVar); + return lookupForModify32Impl(&ICVStateTy::RunSchedVar, Ident); case state::VK_RunSchedChunk: if (IsReadonly) return lookup32Impl(&ICVStateTy::RunSchedChunkVar); - return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar); + return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar, Ident); case state::VK_ParallelTeamSize: return TeamState.ParallelTeamSize; default: @@ -376,7 +376,7 @@ void state::init(bool IsSPMD) { ThreadStates[mapping::getThreadIdInBlock()] = nullptr; } -void state::enterDataEnvironment() { +void state::enterDataEnvironment(IdentTy *Ident) { unsigned TId = mapping::getThreadIdInBlock(); ThreadStateTy *NewThreadState = static_cast(__kmpc_alloc_shared(sizeof(ThreadStateTy))); diff --git a/openmp/libomptarget/DeviceRTL/src/Tasking.cpp b/openmp/libomptarget/DeviceRTL/src/Tasking.cpp index 0416395..2c80e71 100644 --- a/openmp/libomptarget/DeviceRTL/src/Tasking.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Tasking.cpp @@ -49,7 +49,7 @@ int32_t __kmpc_omp_task_with_deps(IdentTy *Loc, uint32_t TId, TaskDescriptorTy *TaskDescriptor, int32_t, void *, int32_t, void *) { FunctionTracingRAII(); - state::DateEnvironmentRAII DERAII; + state::DateEnvironmentRAII DERAII(Loc); TaskDescriptor->TaskFn(0, TaskDescriptor); @@ -60,7 +60,7 @@ int32_t __kmpc_omp_task_with_deps(IdentTy *Loc, uint32_t TId, void __kmpc_omp_task_begin_if0(IdentTy *Loc, uint32_t TId, TaskDescriptorTy *TaskDescriptor) { FunctionTracingRAII(); - state::enterDataEnvironment(); + state::enterDataEnvironment(Loc); } void __kmpc_omp_task_complete_if0(IdentTy *Loc, uint32_t TId,