From 605f0a46dc9304b20b2f5fb6ae7cb81638b41a78 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 17 May 2023 15:04:42 +0200 Subject: [PATCH] [InstCombine] Use IRBuilder in evaluateInDifferentElementOrder() This ensures that the new instructions get reprocessed in the same iteration. This should be largely NFC, apart from worklist order effects and naming changes, as seen in the test diff. --- .../InstCombine/InstCombineVectorOps.cpp | 70 +++++++++++----------- .../amdgcn-demanded-vector-elts-inseltpoison.ll | 10 ++-- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 7ffa30b..86ba0be 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1821,9 +1821,9 @@ static bool canEvaluateShuffled(Value *V, ArrayRef Mask, /// Rebuild a new instruction just like 'I' but with the new operands given. /// In the event of type mismatch, the type of the operands is correct. -static Value *buildNew(Instruction *I, ArrayRef NewOps) { - // We don't want to use the IRBuilder here because we want the replacement - // instructions to appear next to 'I', not the builder's insertion point. +static Value *buildNew(Instruction *I, ArrayRef NewOps, + IRBuilderBase &Builder) { + Builder.SetInsertPoint(I); switch (I->getOpcode()) { case Instruction::Add: case Instruction::FAdd: @@ -1845,28 +1845,29 @@ static Value *buildNew(Instruction *I, ArrayRef NewOps) { case Instruction::Xor: { BinaryOperator *BO = cast(I); assert(NewOps.size() == 2 && "binary operator with #ops != 2"); - BinaryOperator *New = - BinaryOperator::Create(cast(I)->getOpcode(), - NewOps[0], NewOps[1], "", BO); - if (isa(BO)) { - New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap()); - New->setHasNoSignedWrap(BO->hasNoSignedWrap()); - } - if (isa(BO)) { - New->setIsExact(BO->isExact()); + Value *New = Builder.CreateBinOp(cast(I)->getOpcode(), + NewOps[0], NewOps[1]); + if (auto *NewI = dyn_cast(New)) { + if (isa(BO)) { + NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap()); + NewI->setHasNoSignedWrap(BO->hasNoSignedWrap()); + } + if (isa(BO)) { + NewI->setIsExact(BO->isExact()); + } + if (isa(BO)) + NewI->copyFastMathFlags(I); } - if (isa(BO)) - New->copyFastMathFlags(I); return New; } case Instruction::ICmp: assert(NewOps.size() == 2 && "icmp with #ops != 2"); - return new ICmpInst(I, cast(I)->getPredicate(), - NewOps[0], NewOps[1]); + return Builder.CreateICmp(cast(I)->getPredicate(), NewOps[0], + NewOps[1]); case Instruction::FCmp: assert(NewOps.size() == 2 && "fcmp with #ops != 2"); - return new FCmpInst(I, cast(I)->getPredicate(), - NewOps[0], NewOps[1]); + return Builder.CreateFCmp(cast(I)->getPredicate(), NewOps[0], + NewOps[1]); case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -1882,27 +1883,26 @@ static Value *buildNew(Instruction *I, ArrayRef NewOps) { I->getType()->getScalarType(), cast(NewOps[0]->getType())->getElementCount()); assert(NewOps.size() == 1 && "cast with #ops != 1"); - return CastInst::Create(cast(I)->getOpcode(), NewOps[0], DestTy, - "", I); + return Builder.CreateCast(cast(I)->getOpcode(), NewOps[0], + DestTy); } case Instruction::GetElementPtr: { Value *Ptr = NewOps[0]; ArrayRef Idx = NewOps.slice(1); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - cast(I)->getSourceElementType(), Ptr, Idx, "", I); - GEP->setIsInBounds(cast(I)->isInBounds()); - return GEP; + return Builder.CreateGEP(cast(I)->getSourceElementType(), + Ptr, Idx, "", + cast(I)->isInBounds()); } } llvm_unreachable("failed to rebuild vector instructions"); } -static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef Mask) { +static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef Mask, + IRBuilderBase &Builder) { // Mask.size() does not need to be equal to the number of vector elements. assert(V->getType()->isVectorTy() && "can't reorder non-vector elements"); Type *EltTy = V->getType()->getScalarType(); - Type *I32Ty = IntegerType::getInt32Ty(V->getContext()); if (match(V, m_Undef())) return UndefValue::get(FixedVectorType::get(EltTy, Mask.size())); @@ -1956,15 +1956,14 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef Mask) { // as well. E.g. GetElementPtr may have scalar operands even if the // return value is a vector, so we need to examine the operand type. if (I->getOperand(i)->getType()->isVectorTy()) - V = evaluateInDifferentElementOrder(I->getOperand(i), Mask); + V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder); else V = I->getOperand(i); NewOps.push_back(V); NeedsRebuild |= (V != I->getOperand(i)); } - if (NeedsRebuild) { - return buildNew(I, NewOps); - } + if (NeedsRebuild) + return buildNew(I, NewOps, Builder); return I; } case Instruction::InsertElement: { @@ -1985,11 +1984,12 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef Mask) { // If element is not in Mask, no need to handle the operand 1 (element to // be inserted). Just evaluate values in operand 0 according to Mask. if (!Found) - return evaluateInDifferentElementOrder(I->getOperand(0), Mask); + return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder); - Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask); - return InsertElementInst::Create(V, I->getOperand(1), - ConstantInt::get(I32Ty, Index), "", I); + Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask, + Builder); + Builder.SetInsertPoint(I); + return Builder.CreateInsertElement(V, I->getOperand(1), Index); } } llvm_unreachable("failed to reorder elements of vector instruction!"); @@ -2859,7 +2859,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { return I; if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) { - Value *V = evaluateInDifferentElementOrder(LHS, Mask); + Value *V = evaluateInDifferentElementOrder(LHS, Mask, Builder); return replaceInstUsesWith(SVI, V); } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll index 609dac5..11b8a44 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts-inseltpoison.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=instcombine -mtriple=amdgcn-amd-amdhsa %s | FileCheck %s +; RUN: opt -S -passes=instcombine -instcombine-infinite-loop-threshold=2 -mtriple=amdgcn-amd-amdhsa %s | FileCheck %s ; -------------------------------------------------------------------- ; llvm.amdgcn.buffer.load @@ -3029,8 +3029,8 @@ define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32( ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float [[DATA]], i64 0 -; CHECK-NEXT: ret <2 x float> [[TMP1]] +; CHECK-NEXT: [[SHUF:%.*]] = insertelement <2 x float> undef, float [[DATA]], i64 0 +; CHECK-NEXT: ret <2 x float> [[SHUF]] ; %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> @@ -3070,8 +3070,8 @@ define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32 define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32( ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x float> undef, float [[DATA]], i64 0 -; CHECK-NEXT: ret <3 x float> [[TMP1]] +; CHECK-NEXT: [[SHUF:%.*]] = insertelement <3 x float> undef, float [[DATA]], i64 0 +; CHECK-NEXT: ret <3 x float> [[SHUF]] ; %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> -- 2.7.4