From 26c119a9c2f1d6866fe5996ef5a039b4fc3749ca Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 30 Sep 2018 13:50:42 +0000 Subject: [PATCH] [InstCombine] allow lengthening of insertelement to eliminate shuffles As noted in post-commit comments for D52548, the limitation on increasing vector length can be applied by opcode. As a first step, this patch only allows insertelement to be widened because that has no logical downsides for IR and has little risk of pessimizing codegen. This may cause PR39132 to go into hiding during a full compile, but that bug is not fixed. llvm-svn: 343406 --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 10 ++++++++-- llvm/test/Transforms/InstCombine/vec_shuffle.ll | 9 ++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index a87e323..c391034 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -918,6 +918,13 @@ static bool canEvaluateShuffled(Value *V, ArrayRef Mask, case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::GetElementPtr: { + // Bail out if we would create longer vector ops. We could allow creating + // longer vector ops, but that may result in more expensive codegen. We + // would also need to limit the transform to avoid undefined behavior for + // integer div/rem. + Type *ITy = I->getType(); + if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements()) + return false; for (Value *Operand : I->operands()) { if (!canEvaluateShuffled(Operand, Mask, Depth - 1)) return false; @@ -1464,8 +1471,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (isRHSID) return replaceInstUsesWith(SVI, RHS); } - if (isa(RHS) && !SVI.increasesLength() && - canEvaluateShuffled(LHS, Mask)) { + if (isa(RHS) && canEvaluateShuffled(LHS, Mask)) { Value *V = evaluateInDifferentElementOrder(LHS, Mask); return replaceInstUsesWith(SVI, V); } diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index b3f2ca5..7a5ff36 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -218,14 +218,13 @@ define <3 x i32> @div_wider(i32 %y, i32 %z) { ret <3 x i32> %ext } -; TODO: Increasing length of insertelements (no math ops) is a good canonicalization. +; Increasing length of insertelements (no math ops) is a good canonicalization. define <3 x i8> @fold_inselts_with_widening_shuffle(i8 %x, i8 %y) { ; CHECK-LABEL: @fold_inselts_with_widening_shuffle( -; CHECK-NEXT: [[INS0:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0 -; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x i8> [[INS0]], i8 [[Y:%.*]], i32 1 -; CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x i8> [[INS1]], <2 x i8> undef, <3 x i32> -; CHECK-NEXT: ret <3 x i8> [[WIDEN]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i8> undef, i8 [[X:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x i8> [[TMP1]], i8 [[Y:%.*]], i32 1 +; CHECK-NEXT: ret <3 x i8> [[TMP2]] ; %ins0 = insertelement <2 x i8> undef, i8 %x, i32 0 %ins1 = insertelement <2 x i8> %ins0, i8 %y, i32 1 -- 2.7.4