device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
int32_t vprintf(const char *, void *);
namespace impl {
-static int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
+int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
return vprintf(Format, Arguments);
}
} // namespace impl
// We do not have a vprintf implementation for AMD GPU yet so we use a stub.
#pragma omp begin declare variant match(device = {arch(amdgcn)})
namespace impl {
-static int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
+int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
return -1;
}
} // namespace impl
///{
#pragma omp begin declare variant match(device = {arch(amdgcn)})
-static const llvm::omp::GV &getGridValue() {
+const llvm::omp::GV &getGridValue() {
return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>();
}
return __nvvm_read_ptx_sreg_ntid_x();
}
-static const llvm::omp::GV &getGridValue() {
- return llvm::omp::NVPTXGridValues;
-}
+const llvm::omp::GV &getGridValue() { return llvm::omp::NVPTXGridValues; }
LaneMaskTy activemask() {
unsigned int Mask;
break;
case 16:
GlobalArgs[15] = args[15];
- // FALLTHROUGH
+ [[fallthrough]];
case 15:
GlobalArgs[14] = args[14];
- // FALLTHROUGH
+ [[fallthrough]];
case 14:
GlobalArgs[13] = args[13];
- // FALLTHROUGH
+ [[fallthrough]];
case 13:
GlobalArgs[12] = args[12];
- // FALLTHROUGH
+ [[fallthrough]];
case 12:
GlobalArgs[11] = args[11];
- // FALLTHROUGH
+ [[fallthrough]];
case 11:
GlobalArgs[10] = args[10];
- // FALLTHROUGH
+ [[fallthrough]];
case 10:
GlobalArgs[9] = args[9];
- // FALLTHROUGH
+ [[fallthrough]];
case 9:
GlobalArgs[8] = args[8];
- // FALLTHROUGH
+ [[fallthrough]];
case 8:
GlobalArgs[7] = args[7];
- // FALLTHROUGH
+ [[fallthrough]];
case 7:
GlobalArgs[6] = args[6];
- // FALLTHROUGH
+ [[fallthrough]];
case 6:
GlobalArgs[5] = args[5];
- // FALLTHROUGH
+ [[fallthrough]];
case 5:
GlobalArgs[4] = args[4];
- // FALLTHROUGH
+ [[fallthrough]];
case 4:
GlobalArgs[3] = args[3];
- // FALLTHROUGH
+ [[fallthrough]];
case 3:
GlobalArgs[2] = args[2];
- // FALLTHROUGH
+ [[fallthrough]];
case 2:
GlobalArgs[1] = args[1];
- // FALLTHROUGH
+ [[fallthrough]];
case 1:
GlobalArgs[0] = args[0];
- // FALLTHROUGH
+ [[fallthrough]];
case 0:
break;
}
uint32_t kmpcMin(uint32_t x, uint32_t y) { return x < y ? x : y; }
-static volatile uint32_t IterCnt = 0;
-static volatile uint32_t Cnt = 0;
+static uint32_t IterCnt = 0;
+static uint32_t Cnt = 0;
} // namespace
// to the number of slots in the buffer.
bool IsMaster = (ThreadId == 0);
while (IsMaster) {
- Bound = atomic::load((uint32_t *)&IterCnt, __ATOMIC_SEQ_CST);
+ Bound = atomic::load(&IterCnt, __ATOMIC_SEQ_CST);
if (TeamId < Bound + num_of_records)
break;
}
// Increment team counter.
// This counter is incremented by all teams in the current
// BUFFER_SIZE chunk.
- ChunkTeamCount =
- atomic::inc((uint32_t *)&Cnt, num_of_records - 1u, __ATOMIC_SEQ_CST);
+ ChunkTeamCount = atomic::inc(&Cnt, num_of_records - 1u, __ATOMIC_SEQ_CST);
}
// Synchronize
if (mapping::isSPMDMode())
if (IsMaster && ChunkTeamCount == num_of_records - 1) {
// Allow SIZE number of teams to proceed writing their
// intermediate results to the global buffer.
- atomic::add((uint32_t *)&IterCnt, uint32_t(num_of_records),
- __ATOMIC_SEQ_CST);
+ atomic::add(&IterCnt, uint32_t(num_of_records), __ATOMIC_SEQ_CST);
}
return 0;
return ThreadStates[TId]->ICVState.*Var;
}
-uint32_t &lookup32Impl(uint32_t ICVStateTy::*Var) {
- uint32_t TId = mapping::getThreadIdInBlock();
- if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId]))
- return ThreadStates[TId]->ICVState.*Var;
- return TeamState.ICVState.*Var;
-}
-uint64_t &lookup64Impl(uint64_t ICVStateTy::*Var) {
- uint64_t TId = mapping::getThreadIdInBlock();
+template <typename IntTy> IntTy &lookupImpl(IntTy ICVStateTy::*Var) {
+ IntTy TId = mapping::getThreadIdInBlock();
if (OMP_UNLIKELY(config::mayUseThreadStates() && ThreadStates[TId]))
return ThreadStates[TId]->ICVState.*Var;
return TeamState.ICVState.*Var;
switch (Kind) {
case state::VK_NThreads:
if (IsReadonly)
- return lookup32Impl(&ICVStateTy::NThreadsVar);
+ return lookupImpl<uint32_t>(&ICVStateTy::NThreadsVar);
return lookupForModify32Impl(&ICVStateTy::NThreadsVar, Ident);
case state::VK_Level:
if (IsReadonly)
- return lookup32Impl(&ICVStateTy::LevelVar);
+ return lookupImpl<uint32_t>(&ICVStateTy::LevelVar);
return lookupForModify32Impl(&ICVStateTy::LevelVar, Ident);
case state::VK_ActiveLevel:
if (IsReadonly)
- return lookup32Impl(&ICVStateTy::ActiveLevelVar);
+ return lookupImpl<uint32_t>(&ICVStateTy::ActiveLevelVar);
return lookupForModify32Impl(&ICVStateTy::ActiveLevelVar, Ident);
case state::VK_MaxActiveLevels:
if (IsReadonly)
- return lookup32Impl(&ICVStateTy::MaxActiveLevelsVar);
+ return lookupImpl<uint32_t>(&ICVStateTy::MaxActiveLevelsVar);
return lookupForModify32Impl(&ICVStateTy::MaxActiveLevelsVar, Ident);
case state::VK_RunSched:
if (IsReadonly)
- return lookup32Impl(&ICVStateTy::RunSchedVar);
+ return lookupImpl<uint32_t>(&ICVStateTy::RunSchedVar);
return lookupForModify32Impl(&ICVStateTy::RunSchedVar, Ident);
case state::VK_RunSchedChunk:
if (IsReadonly)
- return lookup32Impl(&ICVStateTy::RunSchedChunkVar);
+ return lookupImpl<uint32_t>(&ICVStateTy::RunSchedChunkVar);
return lookupForModify32Impl(&ICVStateTy::RunSchedChunkVar, Ident);
case state::VK_ParallelTeamSize:
return TeamState.ParallelTeamSize;
numberOfActiveOMPThreads);
break;
}
+ [[fallthrough]];
} // note: if chunk <=0, use nochunk
case kmp_sched_static_balanced_chunk: {
if (chunk > 0) {
ub = oldUb;
break;
}
+ [[fallthrough]];
} // note: if chunk <=0, use nochunk
case kmp_sched_static_nochunk: {
ForStaticNoChunk(lastiter, lb, ub, stride, chunk, gtid,
ForStaticChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(),
omp_get_num_teams());
break;
- } // note: if chunk <=0, use nochunk
- }
+ }
+ [[fallthrough]];
+ } // note: if chunk <=0, use nochunk
case kmp_sched_distr_static_nochunk: {
ForStaticNoChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(),
omp_get_num_teams());
uint32_t change = utils::popc(active);
__kmpc_impl_lanemask_t lane_mask_lt = mapping::lanemaskLT();
unsigned int rank = utils::popc(active & lane_mask_lt);
- uint64_t warp_res;
+ uint64_t warp_res = 0;
if (rank == 0) {
warp_res = atomic::add(&Cnt, change, __ATOMIC_SEQ_CST);
}