From 509a1e7a9b444bccc88b57685a0e664f61c37f0e Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 10 Jul 2018 15:12:31 +0000 Subject: [PATCH] [InstCombine] safely allow non-commutative binop identity constant folds This was originally intended with D48893, but as discussed there, we have to make the folds safe from producing extra poison. This should give the single binop folds the same capabilities as the existing folds for 2-binops+shuffle. LLVM binary opcode review: there are a total of 18 binops. There are 7 commutative binops (add, mul, and, or, xor, fadd, fmul) which we already fold. We're able to fold 6 more opcodes with this patch (shl, lshr, ashr, fdiv, udiv, sdiv). There are no folds for srem/urem/frem AFAIK. We don't bother with sub/fsub with constant operand 1 because those are canonicalized to add/fadd. 7 + 6 + 3 + 2 = 18. llvm-svn: 336684 --- .../InstCombine/InstCombineVectorOps.cpp | 19 +++++----- llvm/test/Transforms/InstCombine/shuffle_select.ll | 42 ++++++++-------------- 2 files changed, 25 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index f317252..8fecd69 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1197,17 +1197,12 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) { else return nullptr; - auto *BO = cast(Op0IsBinop ? Op0 : Op1); - Value *X = Op0IsBinop ? Op1 : Op0; - // TODO: Allow div/rem by accounting for potential UB due to undef elements. - if (BO->isIntDivRem()) - return nullptr; - // The identity constant for a binop leaves a variable operand unchanged. For // a vector, this is a splat of something like 0, -1, or 1. // If there's no identity constant for this binop, we're done. + auto *BO = cast(Op0IsBinop ? Op0 : Op1); BinaryOperator::BinaryOps BOpcode = BO->getOpcode(); - Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType()); + Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true); if (!IdC) return nullptr; @@ -1219,14 +1214,22 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) { Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) : ConstantExpr::getShuffleVector(IdC, C, Mask); + bool MightCreatePoisonOrUB = + Mask->containsUndefElement() && + (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode)); + if (MightCreatePoisonOrUB) + NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true); + // shuf (bop X, C), X, M --> bop X, C' // shuf X, (bop X, C), M --> bop X, C' + Value *X = Op0IsBinop ? Op1 : Op0; Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC); NewBO->copyIRFlags(BO); // An undef shuffle mask element may propagate as an undef constant element in // the new binop. That would produce poison where the original code might not. - if (Mask->containsUndefElement()) + // If we already made a safe constant, then there's no danger. + if (Mask->containsUndefElement() && !MightCreatePoisonOrUB) NewBO->dropPoisonGeneratingFlags(); return NewBO; } diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll index 0ccf8d1..57f6aee 100644 --- a/llvm/test/Transforms/InstCombine/shuffle_select.ll +++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll @@ -77,8 +77,7 @@ define <4 x i32> @mul(<4 x i32> %v) { define <4 x i32> @shl(<4 x i32> %v) { ; CHECK-LABEL: @shl( -; CHECK-NEXT: [[B:%.*]] = shl <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl <4 x i32> %v, @@ -88,8 +87,7 @@ define <4 x i32> @shl(<4 x i32> %v) { define <4 x i32> @shl_nsw(<4 x i32> %v) { ; CHECK-LABEL: @shl_nsw( -; CHECK-NEXT: [[B:%.*]] = shl nsw <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shl nsw <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl nsw <4 x i32> %v, @@ -99,8 +97,7 @@ define <4 x i32> @shl_nsw(<4 x i32> %v) { define <4 x i32> @shl_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @shl_undef_mask_elt( -; CHECK-NEXT: [[B:%.*]] = shl <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl <4 x i32> %v, @@ -110,8 +107,7 @@ define <4 x i32> @shl_undef_mask_elt(<4 x i32> %v) { define <4 x i32> @shl_nuw_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @shl_nuw_undef_mask_elt( -; CHECK-NEXT: [[B:%.*]] = shl nuw <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shl nuw <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl nuw <4 x i32> %v, @@ -121,8 +117,7 @@ define <4 x i32> @shl_nuw_undef_mask_elt(<4 x i32> %v) { define <4 x i32> @lshr_constant_op0(<4 x i32> %v) { ; CHECK-LABEL: @lshr_constant_op0( -; CHECK-NEXT: [[B:%.*]] = lshr <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = lshr <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = lshr <4 x i32> %v, @@ -132,8 +127,7 @@ define <4 x i32> @lshr_constant_op0(<4 x i32> %v) { define <4 x i32> @lshr_exact_constant_op0(<4 x i32> %v) { ; CHECK-LABEL: @lshr_exact_constant_op0( -; CHECK-NEXT: [[B:%.*]] = lshr exact <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = lshr exact <4 x i32> %v, @@ -143,8 +137,7 @@ define <4 x i32> @lshr_exact_constant_op0(<4 x i32> %v) { define <4 x i32> @lshr_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @lshr_undef_mask_elt( -; CHECK-NEXT: [[B:%.*]] = shl <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl <4 x i32> %v, @@ -154,8 +147,7 @@ define <4 x i32> @lshr_undef_mask_elt(<4 x i32> %v) { define <4 x i32> @lshr_exact_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @lshr_exact_undef_mask_elt( -; CHECK-NEXT: [[B:%.*]] = lshr exact <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = lshr exact <4 x i32> %v, @@ -178,8 +170,7 @@ define <4 x i32> @lshr_constant_op1(<4 x i32> %v) { define <3 x i32> @ashr(<3 x i32> %v) { ; CHECK-LABEL: @ashr( -; CHECK-NEXT: [[B:%.*]] = ashr <3 x i32> [[V:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x i32> [[B]], <3 x i32> [[V]], <3 x i32> +; CHECK-NEXT: [[S:%.*]] = ashr <3 x i32> [[V:%.*]], ; CHECK-NEXT: ret <3 x i32> [[S]] ; %b = ashr <3 x i32> %v, @@ -270,8 +261,7 @@ define <4 x i32> @udiv_exact_undef_mask_elt(<4 x i32> %v) { define <4 x i32> @sdiv(<4 x i32> %v) { ; CHECK-LABEL: @sdiv( -; CHECK-NEXT: [[B:%.*]] = sdiv <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = sdiv <4 x i32> %v, @@ -281,8 +271,7 @@ define <4 x i32> @sdiv(<4 x i32> %v) { define <4 x i32> @sdiv_exact(<4 x i32> %v) { ; CHECK-LABEL: @sdiv_exact( -; CHECK-NEXT: [[B:%.*]] = sdiv exact <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = sdiv exact <4 x i32> %v, @@ -294,8 +283,7 @@ define <4 x i32> @sdiv_exact(<4 x i32> %v) { define <4 x i32> @sdiv_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @sdiv_undef_mask_elt( -; CHECK-NEXT: [[B:%.*]] = sdiv <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = sdiv <4 x i32> %v, @@ -305,8 +293,7 @@ define <4 x i32> @sdiv_undef_mask_elt(<4 x i32> %v) { define <4 x i32> @sdiv_exact_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @sdiv_exact_undef_mask_elt( -; CHECK-NEXT: [[B:%.*]] = sdiv exact <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = sdiv exact <4 x i32> %v, @@ -395,8 +382,7 @@ define <4 x double> @fdiv_constant_op0(<4 x double> %v) { define <4 x double> @fdiv_constant_op1(<4 x double> %v) { ; CHECK-LABEL: @fdiv_constant_op1( -; CHECK-NEXT: [[B:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], ; CHECK-NEXT: ret <4 x double> [[S]] ; %b = fdiv reassoc <4 x double> %v, -- 2.7.4