#define OMPTARGET_STATE_H
#include "Debug.h"
+#include "Mapping.h"
#include "Types.h"
+#include "Utils.h"
#pragma omp begin declare target device_type(nohost)
namespace _OMP {
+namespace memory {
+
+/// Alloca \p Size bytes in shared memory, if possible, for \p Reason.
+///
+/// Note: See the restrictions on __kmpc_alloc_shared for proper usage.
+void *allocShared(uint64_t Size, const char *Reason);
+
+/// Free \p Ptr, alloated via allocShared, for \p Reason.
+///
+/// Note: See the restrictions on __kmpc_free_shared for proper usage.
+void freeShared(void *Ptr, uint64_t Bytes, const char *Reason);
+
+/// Alloca \p Size bytes in global memory, if possible, for \p Reason.
+void *allocGlobal(uint64_t Size, const char *Reason);
+
+/// Return a pointer to the dynamic shared memory buffer.
+void *getDynamicBuffer();
+
+/// Free \p Ptr, alloated via allocGlobal, for \p Reason.
+void freeGlobal(void *Ptr, const char *Reason);
+
+} // namespace memory
+
namespace state {
inline constexpr uint32_t SharedScratchpadSize = SHARED_SCRATCHPAD_SIZE;
+struct ICVStateTy {
+ uint32_t NThreadsVar;
+ uint32_t LevelVar;
+ uint32_t ActiveLevelVar;
+ uint32_t MaxActiveLevelsVar;
+ uint32_t RunSchedVar;
+ uint32_t RunSchedChunkVar;
+
+ bool operator==(const ICVStateTy &Other) const;
+
+ void assertEqual(const ICVStateTy &Other) const;
+};
+
+struct TeamStateTy {
+ void init(bool IsSPMD);
+
+ bool operator==(const TeamStateTy &) const;
+
+ void assertEqual(TeamStateTy &Other) const;
+
+ /// ICVs
+ ///
+ /// Preallocated storage for ICV values that are used if the threads have not
+ /// set a custom default. The latter is supported but unlikely and slow(er).
+ ///
+ ///{
+ ICVStateTy ICVState;
+ ///}
+
+ uint32_t ParallelTeamSize;
+ ParallelRegionFnTy ParallelRegionFnVar;
+};
+
+extern TeamStateTy TeamState;
+#pragma omp allocate(TeamState) allocator(omp_pteam_mem_alloc)
+
+struct ThreadStateTy {
+
+ /// ICVs have preallocated storage in the TeamStateTy which is used if a
+ /// thread has not set a custom value. The latter is supported but unlikely.
+ /// When it happens we will allocate dynamic memory to hold the values of all
+ /// ICVs. Thus, the first time an ICV is set by a thread we will allocate an
+ /// ICV struct to hold them all. This is slower than alternatives but allows
+ /// users to pay only for what they use.
+ ///
+ state::ICVStateTy ICVState;
+
+ ThreadStateTy *PreviousThreadState;
+
+ void init() {
+ ICVState = TeamState.ICVState;
+ PreviousThreadState = nullptr;
+ }
+
+ void init(ThreadStateTy *PreviousTS) {
+ ICVState = PreviousTS ? PreviousTS->ICVState : TeamState.ICVState;
+ PreviousThreadState = PreviousTS;
+ }
+};
+
+extern ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
+#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
+
/// Initialize the state machinery. Must be called by all threads.
void init(bool IsSPMD);
/// TODO
void resetStateForThread(uint32_t TId);
-uint32_t &lookup32(ValueKind VK, bool IsReadonly, IdentTy *Ident);
-void *&lookupPtr(ValueKind VK, bool IsReadonly);
+inline uint32_t &lookupForModify32Impl(uint32_t state::ICVStateTy::*Var,
+ IdentTy *Ident) {
+ if (OMP_LIKELY(!config::mayUseThreadStates() ||
+ TeamState.ICVState.LevelVar == 0))
+ return TeamState.ICVState.*Var;
+ uint32_t TId = mapping::getThreadIdInBlock();
+ if (OMP_UNLIKELY(!ThreadStates[TId])) {
+ ThreadStates[TId] = reinterpret_cast<ThreadStateTy *>(memory::allocGlobal(
+ sizeof(ThreadStateTy), "ICV modification outside data environment"));
+ ASSERT(ThreadStates[TId] != nullptr && "Nullptr returned by malloc!");
+ ThreadStates[TId]->init();
+ }
+ return ThreadStates[TId]->ICVState.*Var;
+}
+
+inline uint32_t &lookupImpl(uint32_t state::ICVStateTy::*Var) {
+ auto TId = mapping::getThreadIdInBlock();
+ if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId]))
+ return ThreadStates[TId]->ICVState.*Var;
+ return TeamState.ICVState.*Var;
+}
+
+__attribute__((always_inline, flatten)) inline uint32_t &
+lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident) {
+ switch (Kind) {
+ case state::VK_NThreads:
+ if (IsReadonly)
+ return lookupImpl(&ICVStateTy::NThreadsVar);
+ return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident);
+ case state::VK_Level:
+ if (IsReadonly)
+ return lookupImpl(&ICVStateTy::LevelVar);
+ return lookupForModify32Impl(&ICVStateTy::LevelVar, Ident);
+ case state::VK_ActiveLevel:
+ if (IsReadonly)
+ return lookupImpl(&ICVStateTy::ActiveLevelVar);
+ return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar, Ident);
+ case state::VK_MaxActiveLevels:
+ if (IsReadonly)
+ return lookupImpl(&ICVStateTy::MaxActiveLevelsVar);
+ return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar, Ident);
+ case state::VK_RunSched:
+ if (IsReadonly)
+ return lookupImpl(&ICVStateTy::RunSchedVar);
+ return lookupForModify32Impl(&ICVStateTy::RunSchedVar, Ident);
+ case state::VK_RunSchedChunk:
+ if (IsReadonly)
+ return lookupImpl(&ICVStateTy::RunSchedChunkVar);
+ return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar, Ident);
+ case state::VK_ParallelTeamSize:
+ return TeamState.ParallelTeamSize;
+ default:
+ break;
+ }
+ __builtin_unreachable();
+}
+
+__attribute__((always_inline, flatten)) inline void *&
+lookupPtr(ValueKind Kind, bool IsReadonly) {
+ switch (Kind) {
+ case state::VK_ParallelRegionFn:
+ return TeamState.ParallelRegionFnVar;
+ default:
+ break;
+ }
+ __builtin_unreachable();
+}
/// A class without actual state used to provide a nice interface to lookup and
/// update ICV values we can declare in global scope.
} // namespace icv
-namespace memory {
-
-/// Alloca \p Size bytes in shared memory, if possible, for \p Reason.
-///
-/// Note: See the restrictions on __kmpc_alloc_shared for proper usage.
-void *allocShared(uint64_t Size, const char *Reason);
-
-/// Free \p Ptr, alloated via allocShared, for \p Reason.
-///
-/// Note: See the restrictions on __kmpc_free_shared for proper usage.
-void freeShared(void *Ptr, uint64_t Bytes, const char *Reason);
-
-/// Alloca \p Size bytes in global memory, if possible, for \p Reason.
-void *allocGlobal(uint64_t Size, const char *Reason);
-
-/// Return a pointer to the dynamic shared memory buffer.
-void *getDynamicBuffer();
-
-/// Free \p Ptr, alloated via allocGlobal, for \p Reason.
-void freeGlobal(void *Ptr, const char *Reason);
-
-} // namespace memory
-
} // namespace _OMP
#pragma omp end declare target
#include "Configuration.h"
#include "Debug.h"
#include "Interface.h"
-#include "Mapping.h"
#include "Synchronization.h"
#include "Types.h"
-#include "Utils.h"
using namespace _OMP;
///}
-namespace {
-
-struct ICVStateTy {
- uint32_t NThreadsVar;
- uint32_t LevelVar;
- uint32_t ActiveLevelVar;
- uint32_t MaxActiveLevelsVar;
- uint32_t RunSchedVar;
- uint32_t RunSchedChunkVar;
-
- bool operator==(const ICVStateTy &Other) const;
-
- void assertEqual(const ICVStateTy &Other) const;
-};
-
-bool ICVStateTy::operator==(const ICVStateTy &Other) const {
+bool state::ICVStateTy::operator==(const ICVStateTy &Other) const {
return (NThreadsVar == Other.NThreadsVar) & (LevelVar == Other.LevelVar) &
(ActiveLevelVar == Other.ActiveLevelVar) &
(MaxActiveLevelsVar == Other.MaxActiveLevelsVar) &
(RunSchedChunkVar == Other.RunSchedChunkVar);
}
-void ICVStateTy::assertEqual(const ICVStateTy &Other) const {
+void state::ICVStateTy::assertEqual(const ICVStateTy &Other) const {
ASSERT(NThreadsVar == Other.NThreadsVar);
ASSERT(LevelVar == Other.LevelVar);
ASSERT(ActiveLevelVar == Other.ActiveLevelVar);
ASSERT(RunSchedChunkVar == Other.RunSchedChunkVar);
}
-struct TeamStateTy {
- /// TODO: provide a proper init function.
- void init(bool IsSPMD);
-
- bool operator==(const TeamStateTy &) const;
-
- void assertEqual(TeamStateTy &Other) const;
-
- /// ICVs
- ///
- /// Preallocated storage for ICV values that are used if the threads have not
- /// set a custom default. The latter is supported but unlikely and slow(er).
- ///
- ///{
- ICVStateTy ICVState;
- ///}
-
- uint32_t ParallelTeamSize;
- ParallelRegionFnTy ParallelRegionFnVar;
-};
-
-TeamStateTy SHARED(TeamState);
-
-void TeamStateTy::init(bool IsSPMD) {
+void state::TeamStateTy::init(bool IsSPMD) {
ICVState.NThreadsVar = mapping::getBlockSize(IsSPMD);
ICVState.LevelVar = 0;
ICVState.ActiveLevelVar = 0;
ParallelRegionFnVar = nullptr;
}
-bool TeamStateTy::operator==(const TeamStateTy &Other) const {
+bool state::TeamStateTy::operator==(const TeamStateTy &Other) const {
return (ICVState == Other.ICVState) &
(ParallelTeamSize == Other.ParallelTeamSize);
}
-void TeamStateTy::assertEqual(TeamStateTy &Other) const {
+void state::TeamStateTy::assertEqual(TeamStateTy &Other) const {
ICVState.assertEqual(Other.ICVState);
ASSERT(ParallelTeamSize == Other.ParallelTeamSize);
}
-struct ThreadStateTy {
-
- /// ICVs have preallocated storage in the TeamStateTy which is used if a
- /// thread has not set a custom value. The latter is supported but unlikely.
- /// When it happens we will allocate dynamic memory to hold the values of all
- /// ICVs. Thus, the first time an ICV is set by a thread we will allocate an
- /// ICV struct to hold them all. This is slower than alternatives but allows
- /// users to pay only for what they use.
- ///
- ICVStateTy ICVState;
-
- ThreadStateTy *PreviousThreadState;
-
- void init() {
- ICVState = TeamState.ICVState;
- PreviousThreadState = nullptr;
- }
+namespace {
- void init(ThreadStateTy *PreviousTS) {
- ICVState = PreviousTS ? PreviousTS->ICVState : TeamState.ICVState;
- PreviousThreadState = PreviousTS;
- }
-};
+state::TeamStateTy SHARED(TeamState);
__attribute__((loader_uninitialized))
-ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
+state::ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
-uint32_t &lookupForModify32Impl(uint32_t ICVStateTy::*Var, IdentTy *Ident) {
- if (OMP_LIKELY(!config::mayUseThreadStates() ||
- TeamState.ICVState.LevelVar == 0))
- return TeamState.ICVState.*Var;
- uint32_t TId = mapping::getThreadIdInBlock();
- if (OMP_UNLIKELY(!ThreadStates[TId])) {
- ThreadStates[TId] = reinterpret_cast<ThreadStateTy *>(memory::allocGlobal(
- sizeof(ThreadStateTy), "ICV modification outside data environment"));
- ASSERT(ThreadStates[TId] != nullptr && "Nullptr returned by malloc!");
- ThreadStates[TId]->init();
- }
- return ThreadStates[TId]->ICVState.*Var;
-}
-
-template <typename IntTy> IntTy &lookupImpl(IntTy ICVStateTy::*Var) {
- IntTy TId = mapping::getThreadIdInBlock();
- if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId]))
- return ThreadStates[TId]->ICVState.*Var;
- return TeamState.ICVState.*Var;
-}
-
int returnValIfLevelIsActive(int Level, int Val, int DefaultVal,
int OutOfBoundsVal = -1) {
if (Level == 0)
} // namespace
-uint32_t &state::lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident) {
- switch (Kind) {
- case state::VK_NThreads:
- if (IsReadonly)
- return lookupImpl<uint32_t>(&ICVStateTy::NThreadsVar);
- return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident);
- case state::VK_Level:
- if (IsReadonly)
- return lookupImpl<uint32_t>(&ICVStateTy::LevelVar);
- return lookupForModify32Impl(&ICVStateTy::LevelVar, Ident);
- case state::VK_ActiveLevel:
- if (IsReadonly)
- return lookupImpl<uint32_t>(&ICVStateTy::ActiveLevelVar);
- return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar, Ident);
- case state::VK_MaxActiveLevels:
- if (IsReadonly)
- return lookupImpl<uint32_t>(&ICVStateTy::MaxActiveLevelsVar);
- return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar, Ident);
- case state::VK_RunSched:
- if (IsReadonly)
- return lookupImpl<uint32_t>(&ICVStateTy::RunSchedVar);
- return lookupForModify32Impl(&ICVStateTy::RunSchedVar, Ident);
- case state::VK_RunSchedChunk:
- if (IsReadonly)
- return lookupImpl<uint32_t>(&ICVStateTy::RunSchedChunkVar);
- return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar, Ident);
- case state::VK_ParallelTeamSize:
- return TeamState.ParallelTeamSize;
- default:
- break;
- }
- __builtin_unreachable();
-}
-
-void *&state::lookupPtr(ValueKind Kind, bool IsReadonly) {
- switch (Kind) {
- case state::VK_ParallelRegionFn:
- return TeamState.ParallelRegionFnVar;
- default:
- break;
- }
- __builtin_unreachable();
-}
-
void state::init(bool IsSPMD) {
SharedMemorySmartStack.init(IsSPMD);
if (mapping::isInitialThreadInLevel0(IsSPMD)) {