From: Marek Olsak Date: Tue, 24 Oct 2017 10:26:59 +0000 (+0000) Subject: AMDGPU: Add llvm.amdgcn.wqm.vote intrinsic X-Git-Tag: llvmorg-6.0.0-rc1~5068 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2114fc3bcba7f84f1a1d9e7704b5eac9657814a4;p=platform%2Fupstream%2Fllvm.git AMDGPU: Add llvm.amdgcn.wqm.vote intrinsic Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye Differential Revision: https://reviews.llvm.org/D38543 llvm-svn: 316426 --- diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f220347..4df10e8 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -747,6 +747,12 @@ def int_amdgcn_wqm : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; +// Return true if at least one thread within the pixel quad passes true into +// the function. +def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty], + [llvm_i1_ty], [IntrNoMem, IntrConvergent] +>; + // Copies the active channels of the source value to the destination value, // with the guarantee that the source value is computed as if the entire // program were executed in Whole Wavefront Mode, i.e. with all channels diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 0efd948..02a95a4 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -139,7 +139,9 @@ let Defs = [SCC] in { [(set i64:$sdst, (not i64:$src0))] >; def S_WQM_B32 : SOP1_32 <"s_wqm_b32">; - def S_WQM_B64 : SOP1_64 <"s_wqm_b64">; + def S_WQM_B64 : SOP1_64 <"s_wqm_b64", + [(set i1:$sdst, (int_amdgcn_wqm_vote i1:$src0))] + >; } // End Defs = [SCC] diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 61f0329..b2282cf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3532,6 +3532,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::amdgcn_wqm_vote: { + // wqm_vote is identity when the argument is constant. + if (!isa(II->getArgOperand(0))) + break; + + return replaceInstUsesWith(*II, II->getArgOperand(0)); + } case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll new file mode 100644 index 0000000..1946e6a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll @@ -0,0 +1,52 @@ +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=CHECK %s + +;CHECK-LABEL: {{^}}ret: +;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1 +;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[CMP]] +;CHECK: v_cndmask_b32_e64 v0, 0, 1.0, [[WQM]] +define amdgpu_ps float @ret(i32 %v0, i32 %v1) #1 { +main_body: + %c = icmp eq i32 %v0, %v1 + %w = call i1 @llvm.amdgcn.wqm.vote(i1 %c) + %r = select i1 %w, float 1.0, float 0.0 + ret float %r +} + +;CHECK-LABEL: {{^}}true: +;CHECK: s_wqm_b64 +define amdgpu_ps float @true() #1 { +main_body: + %w = call i1 @llvm.amdgcn.wqm.vote(i1 true) + %r = select i1 %w, float 1.0, float 0.0 + ret float %r +} + +;CHECK-LABEL: {{^}}false: +;CHECK: s_wqm_b64 +define amdgpu_ps float @false() #1 { +main_body: + %w = call i1 @llvm.amdgcn.wqm.vote(i1 false) + %r = select i1 %w, float 1.0, float 0.0 + ret float %r +} + +;CHECK-LABEL: {{^}}kill: +;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1 +;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[CMP]] +;FIXME: This could just be: s_and_b64 exec, exec, [[WQM]] +;CHECK: v_cndmask_b32_e64 [[KILL:[^,]+]], -1.0, 1.0, [[WQM]] +;CHECK: v_cmpx_le_f32_e32 {{[^,]+}}, 0, [[KILL]] +;CHECK: s_endpgm +define amdgpu_ps void @kill(i32 %v0, i32 %v1) #1 { +main_body: + %c = icmp eq i32 %v0, %v1 + %w = call i1 @llvm.amdgcn.wqm.vote(i1 %c) + %r = select i1 %w, float 1.0, float -1.0 + call void @llvm.AMDGPU.kill(float %r) + ret void +} + +declare void @llvm.AMDGPU.kill(float) #1 +declare i1 @llvm.amdgcn.wqm.vote(i1) + +attributes #1 = { nounwind } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index 1901997..921f958 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -1537,4 +1537,37 @@ define i64 @fcmp_constant_to_rhs_olt(float %x) { ret i64 %result } +; -------------------------------------------------------------------- +; llvm.amdgcn.wqm.vote +; -------------------------------------------------------------------- + +declare i1 @llvm.amdgcn.wqm.vote(i1) + +; CHECK-LABEL: @wqm_vote_true( +; CHECK: ret float 1.000000e+00 +define float @wqm_vote_true() { +main_body: + %w = call i1 @llvm.amdgcn.wqm.vote(i1 true) + %r = select i1 %w, float 1.0, float 0.0 + ret float %r +} + +; CHECK-LABEL: @wqm_vote_false( +; CHECK: ret float 0.000000e+00 +define float @wqm_vote_false() { +main_body: + %w = call i1 @llvm.amdgcn.wqm.vote(i1 false) + %r = select i1 %w, float 1.0, float 0.0 + ret float %r +} + +; CHECK-LABEL: @wqm_vote_undef( +; CHECK: ret float 0.000000e+00 +define float @wqm_vote_undef() { +main_body: + %w = call i1 @llvm.amdgcn.wqm.vote(i1 undef) + %r = select i1 %w, float 1.0, float 0.0 + ret float %r +} + ; CHECK: attributes #5 = { convergent }