#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
- // Check if the edge into the successor block compares the __kmpc_target_init
- // result with -1. If we are in non-SPMD-mode that signals only the main
- // thread will execute the edge.
+ // Check if the edge into the successor block contains a condition that only
+ // lets the main thread execute it.
auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
if (!Edge || !Edge->isConditional())
return false;
if (!C)
return false;
- // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
+ // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
if (C->isAllOnesValue()) {
auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
return IsSPMDModeCI && IsSPMDModeCI->isZero();
}
+ if (C->isZero()) {
+ // Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x()
+ if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
+ if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
+ return true;
+
+ // Match: 0 == llvm.amdgcn.workitem.id.x()
+ if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
+ if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
+ return true;
+ }
+
return false;
};
%cmp = icmp eq i32 %call, -1
br i1 %cmp, label %if.then, label %if.else
if.then:
- call void @nvptx()
- call void @amdgcn()
br label %if.end
if.else:
br label %if.end
; REMARKS: remark: single_threaded_execution.c:1:0: Could not internalize function. Some optimizations may not be possible.
; REMARKS-NOT: remark: single_threaded_execution.c:1:0: Could not internalize function. Some optimizations may not be possible.
-; CHECK-DAG: [openmp-opt] Basic block @nvptx entry is executed by a single thread.
+; CHECK-NOT: [openmp-opt] Basic block @nvptx entry is executed by a single thread.
; CHECK-DAG: [openmp-opt] Basic block @nvptx if.then is executed by a single thread.
-; CHECK-DAG: [openmp-opt] Basic block @nvptx if.end is executed by a single thread.
+; CHECK-NOT: [openmp-opt] Basic block @nvptx if.end is executed by a single thread.
; Function Attrs: noinline
-define internal void @nvptx() {
+define void @nvptx() {
entry:
- br i1 true, label %if.then, label %if.end
+ %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ %cmp = icmp eq i32 %call, 0
+ br i1 %cmp, label %if.then, label %if.end
if.then:
call void @foo()
ret void
}
-; CHECK-DAG: [openmp-opt] Basic block @amdgcn entry is executed by a single thread.
+; CHECK-NOT: [openmp-opt] Basic block @amdgcn entry is executed by a single thread.
; CHECK-DAG: [openmp-opt] Basic block @amdgcn if.then is executed by a single thread.
-; CHECK-DAG: [openmp-opt] Basic block @amdgcn if.end is executed by a single thread.
+; CHECK-NOT: [openmp-opt] Basic block @amdgcn if.end is executed by a single thread.
; Function Attrs: noinline
-define internal void @amdgcn() {
+define void @amdgcn() {
entry:
- br i1 false, label %if.then, label %if.end
+ %call = call i32 @llvm.amdgcn.workitem.id.x()
+ %cmp = icmp eq i32 %call, 0
+ br i1 %cmp, label %if.then, label %if.end
if.then:
call void @foo()
declare void @__kmpc_kernel_prepare_parallel(i8*)
declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
+
declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
attributes #0 = { cold noinline }