From fee9078dcd77a067eaad1a5f59f9529faca4376b Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 23 Sep 2016 09:14:08 +0000 Subject: [PATCH] [InstCombine] Fix for PR29124: reduce insertelements to shufflevector If inserting more than one constant into a vector: define <4 x float> @foo(<4 x float> %x) { %ins1 = insertelement <4 x float> %x, float 1.0, i32 1 %ins2 = insertelement <4 x float> %ins1, float 2.0, i32 2 ret <4 x float> %ins2 } InstCombine could reduce that to a shufflevector: define <4 x float> @goo(<4 x float> %x) { %shuf = shufflevector <4 x float> %x, <4 x float> , <4 x i32> ret <4 x float> %shuf } Also, InstCombine tries to convert shuffle instruction to single insertelement, if one of the vectors is a constant vector and only a single element from this constant should be used in shuffle, i.e. shufflevector <4 x float> %v, <4 x float> , <4 x i32> -> insertelement <4 x float> %v, float 1.0, 1 Differential Revision: https://reviews.llvm.org/D24182 llvm-svn: 282237 --- .../InstCombine/InstCombineSimplifyDemanded.cpp | 55 ++++++++- .../InstCombine/InstCombineVectorOps.cpp | 133 ++++++++++++++------- .../Transforms/InstCombine/insert-const-shuf.ll | 9 ++ .../Transforms/InstCombine/vec_demanded_elts.ll | 4 +- .../InstCombine/vector_insertelt_shuffle.ll | 29 ++++- llvm/test/Transforms/InstCombine/x86-insertps.ll | 2 +- 6 files changed, 173 insertions(+), 59 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index f3268d2..7d89a5f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1037,17 +1037,21 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } } - APInt UndefElts4(LHSVWidth, 0); + APInt LHSUndefElts(LHSVWidth, 0); TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded, - UndefElts4, Depth + 1); + LHSUndefElts, Depth + 1); if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; } - APInt UndefElts3(LHSVWidth, 0); + APInt RHSUndefElts(LHSVWidth, 0); TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded, - UndefElts3, Depth + 1); + RHSUndefElts, Depth + 1); if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; } bool NewUndefElts = false; + unsigned LHSIdx = -1u; + unsigned RHSIdx = -1u; + bool LHSUniform = true; + bool RHSUniform = true; for (unsigned i = 0; i < VWidth; i++) { unsigned MaskVal = Shuffle->getMaskValue(i); if (MaskVal == -1u) { @@ -1056,18 +1060,57 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, NewUndefElts = true; UndefElts.setBit(i); } else if (MaskVal < LHSVWidth) { - if (UndefElts4[MaskVal]) { + if (LHSUndefElts[MaskVal]) { NewUndefElts = true; UndefElts.setBit(i); + } else { + LHSIdx = LHSIdx == -1u ? MaskVal : LHSVWidth; + LHSUniform = LHSUniform && (MaskVal == i); } } else { - if (UndefElts3[MaskVal - LHSVWidth]) { + if (RHSUndefElts[MaskVal - LHSVWidth]) { NewUndefElts = true; UndefElts.setBit(i); + } else { + RHSIdx = RHSIdx == -1u ? MaskVal - LHSVWidth : LHSVWidth; + RHSUniform = RHSUniform && (MaskVal - LHSVWidth == i); } } } + // Try to transform shuffle with constant vector and single element from + // this constant vector to single insertelement instruction. + // shufflevector V, C, -> + // insertelement V, C[ci], ci-n + if (LHSVWidth == Shuffle->getType()->getNumElements()) { + Value *Op = nullptr; + Constant *Value = nullptr; + unsigned Idx = -1u; + + // Find constant vector wigth the single element in shuffle (LHS or RHS). + if (LHSIdx < LHSVWidth && RHSUniform) { + if (auto *CV = dyn_cast(Shuffle->getOperand(0))) { + Op = Shuffle->getOperand(1); + Value = CV->getOperand(LHSIdx); + Idx = LHSIdx; + } + } + if (RHSIdx < LHSVWidth && LHSUniform) { + if (auto *CV = dyn_cast(Shuffle->getOperand(1))) { + Op = Shuffle->getOperand(0); + Value = CV->getOperand(RHSIdx); + Idx = RHSIdx; + } + } + // Found constant vector with single element - convert to insertelement. + if (Op && Value) { + Instruction *New = InsertElementInst::Create( + Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx), + Shuffle->getName()); + InsertNewInstWith(New, *Shuffle); + return New; + } + } if (NewUndefElts) { // Add additional discovered undefs. SmallVector Elts; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 59593dd..6c3aa3a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -585,58 +585,103 @@ static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) { return true; } -/// insertelt (shufflevector X, CVec, Mask), C, CIndex --> -/// shufflevector X, CVec', Mask' +/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex +/// --> shufflevector X, CVec', Mask' static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) { - // Bail out if the shuffle has more than one use. In that case, we'd be + auto *Inst = dyn_cast(InsElt.getOperand(0)); + // Bail out if the parent has more than one use. In that case, we'd be // replacing the insertelt with a shuffle, and that's not a clear win. - auto *Shuf = dyn_cast(InsElt.getOperand(0)); - if (!Shuf || !Shuf->hasOneUse()) + if (!Inst || !Inst->hasOneUse()) return nullptr; + if (auto *Shuf = dyn_cast(InsElt.getOperand(0))) { + // The shuffle must have a constant vector operand. The insertelt must have + // a constant scalar being inserted at a constant position in the vector. + Constant *ShufConstVec, *InsEltScalar; + uint64_t InsEltIndex; + if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) || + !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) || + !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex))) + return nullptr; - // The shuffle must have a constant vector operand. The insertelt must have a - // constant scalar being inserted at a constant position in the vector. - Constant *ShufConstVec, *InsEltScalar; - uint64_t InsEltIndex; - if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) || - !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) || - !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex))) - return nullptr; + // Adding an element to an arbitrary shuffle could be expensive, but a + // shuffle that selects elements from vectors without crossing lanes is + // assumed cheap. + // If we're just adding a constant into that shuffle, it will still be + // cheap. + if (!isShuffleEquivalentToSelect(*Shuf)) + return nullptr; - // Adding an element to an arbitrary shuffle could be expensive, but a shuffle - // that selects elements from vectors without crossing lanes is assumed cheap. - // If we're just adding a constant into that shuffle, it will still be cheap. - if (!isShuffleEquivalentToSelect(*Shuf)) - return nullptr; + // From the above 'select' check, we know that the mask has the same number + // of elements as the vector input operands. We also know that each constant + // input element is used in its lane and can not be used more than once by + // the shuffle. Therefore, replace the constant in the shuffle's constant + // vector with the insertelt constant. Replace the constant in the shuffle's + // mask vector with the insertelt index plus the length of the vector + // (because the constant vector operand of a shuffle is always the 2nd + // operand). + Constant *Mask = Shuf->getMask(); + unsigned NumElts = Mask->getType()->getVectorNumElements(); + SmallVector NewShufElts(NumElts); + SmallVector NewMaskElts(NumElts); + for (unsigned I = 0; I != NumElts; ++I) { + if (I == InsEltIndex) { + NewShufElts[I] = InsEltScalar; + Type *Int32Ty = Type::getInt32Ty(Shuf->getContext()); + NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts); + } else { + // Copy over the existing values. + NewShufElts[I] = ShufConstVec->getAggregateElement(I); + NewMaskElts[I] = Mask->getAggregateElement(I); + } + } - // From the above 'select' check, we know that the mask has the same number of - // elements as the vector input operands. We also know that each constant - // input element is used in its lane and can not be used more than once by the - // shuffle. Therefore, replace the constant in the shuffle's constant vector - // with the insertelt constant. Replace the constant in the shuffle's mask - // vector with the insertelt index plus the length of the vector (because the - // constant vector operand of a shuffle is always the 2nd operand). - Constant *Mask = Shuf->getMask(); - unsigned NumElts = Mask->getType()->getVectorNumElements(); - SmallVector NewShufElts(NumElts); - SmallVector NewMaskElts(NumElts); - for (unsigned i = 0; i != NumElts; ++i) { - if (i == InsEltIndex) { - NewShufElts[i] = InsEltScalar; - Type *Int32Ty = Type::getInt32Ty(Shuf->getContext()); - NewMaskElts[i] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts); - } else { - // Copy over the existing values. - NewShufElts[i] = ShufConstVec->getAggregateElement(i); - NewMaskElts[i] = Mask->getAggregateElement(i); + // Create new operands for a shuffle that includes the constant of the + // original insertelt. The old shuffle will be dead now. + return new ShuffleVectorInst(Shuf->getOperand(0), + ConstantVector::get(NewShufElts), + ConstantVector::get(NewMaskElts)); + } else if (auto *IEI = dyn_cast(Inst)) { + // Transform sequences of insertelements ops with constant data/indexes into + // a single shuffle op. + unsigned NumElts = InsElt.getType()->getNumElements(); + + uint64_t InsertIdx[2]; + Constant *Val[2]; + if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) || + !match(InsElt.getOperand(1), m_Constant(Val[0])) || + !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) || + !match(IEI->getOperand(1), m_Constant(Val[1]))) + return nullptr; + SmallVector Values(NumElts); + SmallVector Mask(NumElts); + auto ValI = std::begin(Val); + // Generate new constant vector and mask. + // We have 2 values/masks from the insertelements instructions. Insert them + // into new value/mask vectors. + for (uint64_t I : InsertIdx) { + if (!Values[I]) { + assert(!Mask[I]); + Values[I] = *ValI; + Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), + NumElts + I); + } + ++ValI; } + // Remaining values are filled with 'undef' values. + for (unsigned I = 0; I < NumElts; ++I) { + if (!Values[I]) { + assert(!Mask[I]); + Values[I] = UndefValue::get(InsElt.getType()->getElementType()); + Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I); + } + } + // Create new operands for a shuffle that includes the constant of the + // original insertelt. + return new ShuffleVectorInst(IEI->getOperand(0), + ConstantVector::get(Values), + ConstantVector::get(Mask)); } - - // Create new operands for a shuffle that includes the constant of the - // original insertelt. The old shuffle will be dead now. - Constant *NewShufVec = ConstantVector::get(NewShufElts); - Constant *NewMask = ConstantVector::get(NewMaskElts); - return new ShuffleVectorInst(Shuf->getOperand(0), NewShufVec, NewMask); + return nullptr; } Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { diff --git a/llvm/test/Transforms/InstCombine/insert-const-shuf.ll b/llvm/test/Transforms/InstCombine/insert-const-shuf.ll index 60f272f..3e301e3 100644 --- a/llvm/test/Transforms/InstCombine/insert-const-shuf.ll +++ b/llvm/test/Transforms/InstCombine/insert-const-shuf.ll @@ -26,6 +26,15 @@ define <4 x float> @twoInserts(<4 x float> %x) { ret <4 x float> %ins2 } +define <4 x i32> @shuffleRetain(<4 x i32> %base) { +; CHECK-LABEL: @shuffleRetain( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> %base, <4 x i32> , <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[SHUF]] +; + %shuf = shufflevector <4 x i32> %base, <4 x i32> , <4 x i32> + ret <4 x i32> %shuf +} + ; TODO: Transform an arbitrary shuffle with constant into a shuffle that is equivalant to a vector select. define <4 x float> @disguisedSelect(<4 x float> %x) { diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll index 604806e..2ee30d7 100644 --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -236,8 +236,8 @@ define <4 x float> @test_select(float %f, float %g) { define <2 x i64> @PR24922(<2 x i64> %v) { ; CHECK-LABEL: @PR24922( -; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <2 x i64> %v, <2 x i64> , <2 x i32> -; CHECK-NEXT: ret <2 x i64> [[RESULT]] +; CHECK-NEXT: [[RESULT1:%.*]] = insertelement <2 x i64> %v, i64 0, i32 0 +; CHECK-NEXT: ret <2 x i64> [[RESULT1]] ; %result = select <2 x i1> bitcast (<4 x i32> to <2 x i64>), i64 0), i64 0), i1 true>, <2 x i64> %v, <2 x i64> zeroinitializer ret <2 x i64> %result diff --git a/llvm/test/Transforms/InstCombine/vector_insertelt_shuffle.ll b/llvm/test/Transforms/InstCombine/vector_insertelt_shuffle.ll index 9b45962..b3e6146 100644 --- a/llvm/test/Transforms/InstCombine/vector_insertelt_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vector_insertelt_shuffle.ll @@ -7,10 +7,9 @@ define<4 x float> @foo(<4 x float> %x) { ret<4 x float> %ins2 } -; FIXME: insertelements should fold to shuffle +; insertelements should fold to shuffle ; CHECK-LABEL: @foo -; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 1 -; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2 +; CHECK-NEXT: shufflevector <4 x float> %{{.+}}, <4 x float> , <4 x i32> ; CHECK-NEXT: ret <4 x float> % define<4 x float> @bar(<4 x float> %x, float %a) { @@ -45,12 +44,11 @@ define<4 x float> @bazz(<4 x float> %x, i32 %a) { ret<4 x float> %ins6 } -; FIXME: insertelements should fold to shuffle +; insertelements should fold to shuffle ; CHECK-LABEL: @bazz ; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 3 ; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 5.000000e+00, i32 % -; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 1.000000e+00, i32 1 -; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2 +; CHECK-NEXT: shufflevector <4 x float> %{{.+}}, <4 x float> , <4 x i32> ; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 7.000000e+00, i32 % ; CHECK-NEXT: ret <4 x float> % @@ -75,3 +73,22 @@ define<4 x float> @bazzzz(<4 x float> %x) { ; CHECK-NEXT: insertelement <4 x float> %{{.+}}, float 2.000000e+00, i32 2 ; CHECK-NEXT: ret <4 x float> % +define<4 x float> @bazzzzz() { + %ins1 = insertelement <4 x float> insertelement (<4 x float> , float 4.0, i32 3), float 5.0, i32 1 + %ins2 = insertelement<4 x float> %ins1, float 10.0, i32 2 + ret<4 x float> %ins2 +} + +; insertelements should fold to shuffle +; CHECK-LABEL: @bazzzzz +; CHECK-NEXT: ret <4 x float> + +define<4 x float> @bazzzzzz(<4 x float> %x, i32 %a) { + %ins1 = insertelement <4 x float> insertelement (<4 x float> shufflevector (<4 x float> undef, <4 x float> , <4 x i32> ), float 4.0, i32 3), float 5.0, i32 1 + ret<4 x float> %ins1 +} + +; insertelements should fold to shuffle +; CHECK-LABEL: @bazzzzz +; CHECK-NEXT: ret <4 x float> + diff --git a/llvm/test/Transforms/InstCombine/x86-insertps.ll b/llvm/test/Transforms/InstCombine/x86-insertps.ll index bb1a3f3..f55ea6f 100644 --- a/llvm/test/Transforms/InstCombine/x86-insertps.ll +++ b/llvm/test/Transforms/InstCombine/x86-insertps.ll @@ -74,7 +74,7 @@ define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) { define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) { ; CHECK-LABEL: @insertps_0xc1( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> , <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %v1, float 0.000000e+00, i32 0 ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193) -- 2.7.4