From: Jay Foad Date: Thu, 15 Jun 2023 16:45:17 +0000 (+0100) Subject: [AMDGPU] Stop replacing amdgcn.ballot(1) with amdgcn.s.getreg(exec) X-Git-Tag: upstream/17.0.6~4757 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=84313162bf79a4c16ad3edd2a2e6accad569a907;p=platform%2Fupstream%2Fllvm.git [AMDGPU] Stop replacing amdgcn.ballot(1) with amdgcn.s.getreg(exec) Rationale: - It does not enable any further IR simplifications. - It does not improve the generated code since the isel lowering of ballot also has special cases for 0 and 1. - getreg is "too powerful" since it can read from many different registers, so its intrinsic properties have to be set very conservatively. There is also a correctness problem that getreg can read from exec but it is currently not marked as convergent. Differential Revision: https://reviews.llvm.org/D153047 --- diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp index 18e1b69..b3bb61d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -908,25 +908,6 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { // amdgcn.ballot(i1 0) is zero. return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType())); } - - if (Src->isOne()) { - // amdgcn.ballot(i1 1) is exec. - const char *RegName = "exec"; - if (II.getType()->isIntegerTy(32)) - RegName = "exec_lo"; - else if (!II.getType()->isIntegerTy(64)) - break; - - Function *NewF = Intrinsic::getDeclaration( - II.getModule(), Intrinsic::read_register, II.getType()); - Metadata *MDArgs[] = {MDString::get(II.getContext(), RegName)}; - MDNode *MD = MDNode::get(II.getContext(), MDArgs); - Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)}; - CallInst *NewCall = IC.Builder.CreateCall(NewF, Args); - NewCall->addFnAttr(Attribute::Convergent); - NewCall->takeName(&II); - return IC.replaceInstUsesWith(II, NewCall); - } } break; } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index a661961..3075dae 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -2582,7 +2582,7 @@ define i64 @ballot_zero_64() { define i64 @ballot_one_64() { ; CHECK-LABEL: @ballot_one_64( -; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR17]] +; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) ; CHECK-NEXT: ret i64 [[B]] ; %b = call i64 @llvm.amdgcn.ballot.i64(i1 1) @@ -2608,7 +2608,7 @@ define i32 @ballot_zero_32() { define i32 @ballot_one_32() { ; CHECK-LABEL: @ballot_one_32( -; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR17]] +; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true) ; CHECK-NEXT: ret i32 [[B]] ; %b = call i32 @llvm.amdgcn.ballot.i32(i1 1)