From 509a1e7a9b444bccc88b57685a0e664f61c37f0e Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 10 Jul 2018 15:12:31 +0000
Subject: [PATCH] [InstCombine] safely allow non-commutative binop identity
 constant folds

This was originally intended with D48893, but as discussed there, we
have to make the folds safe from producing extra poison. This should
give the single binop folds the same capabilities as the existing
folds for 2-binops+shuffle.

LLVM binary opcode review: there are a total of 18 binops. There are 7
commutative binops (add, mul, and, or, xor, fadd, fmul) which we already
fold. We're able to fold 6 more opcodes with this patch (shl, lshr, ashr,
fdiv, udiv, sdiv). There are no folds for srem/urem/frem AFAIK. We don't
bother with sub/fsub with constant operand 1 because those are
canonicalized to add/fadd. 7 + 6 + 3 + 2 = 18.

llvm-svn: 336684
---
 .../InstCombine/InstCombineVectorOps.cpp           | 19 +++++-----
 llvm/test/Transforms/InstCombine/shuffle_select.ll | 42 ++++++++--------------
 2 files changed, 25 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index f317252..8fecd69 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1197,17 +1197,12 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
   else
     return nullptr;
 
-  auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
-  Value *X = Op0IsBinop ? Op1 : Op0;
-  // TODO: Allow div/rem by accounting for potential UB due to undef elements.
-  if (BO->isIntDivRem())
-    return nullptr;
-
   // The identity constant for a binop leaves a variable operand unchanged. For
   // a vector, this is a splat of something like 0, -1, or 1.
   // If there's no identity constant for this binop, we're done.
+  auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
   BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
-  Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType());
+  Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
   if (!IdC)
     return nullptr;
 
@@ -1219,14 +1214,22 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
   Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
                                 ConstantExpr::getShuffleVector(IdC, C, Mask);
 
+  bool MightCreatePoisonOrUB =
+      Mask->containsUndefElement() &&
+      (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
+  if (MightCreatePoisonOrUB)
+    NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true);
+
   // shuf (bop X, C), X, M --> bop X, C'
   // shuf X, (bop X, C), M --> bop X, C'
+  Value *X = Op0IsBinop ? Op1 : Op0;
   Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
   NewBO->copyIRFlags(BO);
 
   // An undef shuffle mask element may propagate as an undef constant element in
   // the new binop. That would produce poison where the original code might not.
-  if (Mask->containsUndefElement())
+  // If we already made a safe constant, then there's no danger.
+  if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
     NewBO->dropPoisonGeneratingFlags();
   return NewBO;
 }
diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll
index 0ccf8d1..57f6aee 100644
--- a/llvm/test/Transforms/InstCombine/shuffle_select.ll
+++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll
@@ -77,8 +77,7 @@ define <4 x i32> @mul(<4 x i32> %v) {
 
 define <4 x i32> @shl(<4 x i32> %v) {
 ; CHECK-LABEL: @shl(
-; CHECK-NEXT:    [[B:%.*]] = shl <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 4, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
 ; CHECK-NEXT:    ret <4 x i32> [[S]]
 ;
   %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
@@ -88,8 +87,7 @@ define <4 x i32> @shl(<4 x i32> %v) {
 
 define <4 x i32> @shl_nsw(<4 x i32> %v) {
 ; CHECK-LABEL: @shl_nsw(
-; CHECK-NEXT:    [[B:%.*]] = shl nsw <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 4, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[S:%.*]] = shl nsw <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
 ; CHECK-NEXT:    ret <4 x i32> [[S]]
 ;
   %b = shl nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
@@ -99,8 +97,7 @@ define <4 x i32> @shl_nsw(<4 x i32> %v) {
 
 define <4 x i32> @shl_undef_mask_elt(<4 x i32> %v) {
 ; CHECK-LABEL: @shl_undef_mask_elt(
-; CHECK-NEXT:    [[B:%.*]] = shl <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
 ; CHECK-NEXT:    ret <4 x i32> [[S]]
 ;
   %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
@@ -110,8 +107,7 @@ define <4 x i32> @shl_undef_mask_elt(<4 x i32> %v) {
 
 define <4 x i32> @shl_nuw_undef_mask_elt(<4 x i32> %v) {
 ; CHECK-LABEL: @shl_nuw_undef_mask_elt(
-; CHECK-NEXT:    [[B:%.*]] = shl nuw <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
+; CHECK-NEXT:    [[S:%.*]] = shl nuw <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0>
 ; CHECK-NEXT:    ret <4 x i32> [[S]]
 ;
   %b = shl nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
@@ -121,8 +117,7 @@ define <4 x i32> @shl_nuw_undef_mask_elt(<4 x i32> %v) {
 
 define <4 x i32> @lshr_constant_op0(<4 x i32> %v) {
 ; CHECK-LABEL: @lshr_constant_op0(
-; CHECK-NEXT:    [[B:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[S:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14>
 ; CHECK-NEXT:    ret <4 x i32> [[S]]
 ;
   %b = lshr <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
@@ -132,8 +127,7 @@ define <4 x i32> @lshr_constant_op0(<4 x i32> %v) {
 
 define <4 x i32> @lshr_exact_constant_op0(<4 x i32> %v) {
 ; CHECK-LABEL: @lshr_exact_constant_op0(
-; CHECK-NEXT:    [[B:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+; CHECK-NEXT:    [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14>
 ; CHECK-NEXT:    ret <4 x i32> [[S]]
 ;
   %b = lshr exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
@@ -143,8 +137,7 @@ define <4 x i32> @lshr_exact_constant_op0(<4 x i32> %v) {
 
 define <4 x i32> @lshr_undef_mask_elt(<4 x i32> %v) {
 ; CHECK-LABEL: @lshr_undef_mask_elt(
-; CHECK-NEXT:    [[B:%.*]] = shl <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
 ; CHECK-NEXT:    ret <4 x i32> [[S]]
 ;
   %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
@@ -154,8 +147,7 @@ define <4 x i32> @lshr_undef_mask_elt(<4 x i32> %v) {
 
 define <4 x i32> @lshr_exact_undef_mask_elt(<4 x i32> %v) {
 ; CHECK-LABEL: @lshr_exact_undef_mask_elt(
-; CHECK-NEXT:    [[B:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
+; CHECK-NEXT:    [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0>
 ; CHECK-NEXT:    ret <4 x i32> [[S]]
 ;
   %b = lshr exact  <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
@@ -178,8 +170,7 @@ define <4 x i32> @lshr_constant_op1(<4 x i32> %v) {
 
 define <3 x i32> @ashr(<3 x i32> %v) {
 ; CHECK-LABEL: @ashr(
-; CHECK-NEXT:    [[B:%.*]] = ashr <3 x i32> [[V:%.*]], <i32 11, i32 12, i32 13>
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <3 x i32> [[B]], <3 x i32> [[V]], <3 x i32> <i32 3, i32 1, i32 2>
+; CHECK-NEXT:    [[S:%.*]] = ashr <3 x i32> [[V:%.*]], <i32 0, i32 12, i32 13>
 ; CHECK-NEXT:    ret <3 x i32> [[S]]
 ;
   %b = ashr <3 x i32> %v, <i32 11, i32 12, i32 13>
@@ -270,8 +261,7 @@ define <4 x i32> @udiv_exact_undef_mask_elt(<4 x i32> %v) {
 
 define <4 x i32> @sdiv(<4 x i32> %v) {
 ; CHECK-LABEL: @sdiv(
-; CHECK-NEXT:    [[B:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1>
 ; CHECK-NEXT:    ret <4 x i32> [[S]]
 ;
   %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
@@ -281,8 +271,7 @@ define <4 x i32> @sdiv(<4 x i32> %v) {
 
 define <4 x i32> @sdiv_exact(<4 x i32> %v) {
 ; CHECK-LABEL: @sdiv_exact(
-; CHECK-NEXT:    [[B:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1>
 ; CHECK-NEXT:    ret <4 x i32> [[S]]
 ;
   %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
@@ -294,8 +283,7 @@ define <4 x i32> @sdiv_exact(<4 x i32> %v) {
 
 define <4 x i32> @sdiv_undef_mask_elt(<4 x i32> %v) {
 ; CHECK-LABEL: @sdiv_undef_mask_elt(
-; CHECK-NEXT:    [[B:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
+; CHECK-NEXT:    [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1>
 ; CHECK-NEXT:    ret <4 x i32> [[S]]
 ;
   %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
@@ -305,8 +293,7 @@ define <4 x i32> @sdiv_undef_mask_elt(<4 x i32> %v) {
 
 define <4 x i32> @sdiv_exact_undef_mask_elt(<4 x i32> %v) {
 ; CHECK-LABEL: @sdiv_exact_undef_mask_elt(
-; CHECK-NEXT:    [[B:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
+; CHECK-NEXT:    [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1>
 ; CHECK-NEXT:    ret <4 x i32> [[S]]
 ;
   %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
@@ -395,8 +382,7 @@ define <4 x double> @fdiv_constant_op0(<4 x double> %v) {
 
 define <4 x double> @fdiv_constant_op1(<4 x double> %v) {
 ; CHECK-LABEL: @fdiv_constant_op1(
-; CHECK-NEXT:    [[B:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double 4.100000e+01, double 4.200000e+01, double 4.300000e+01, double 4.400000e+01>
-; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double undef, double 1.000000e+00, double 4.300000e+01, double 4.400000e+01>
 ; CHECK-NEXT:    ret <4 x double> [[S]]
 ;
   %b = fdiv reassoc <4 x double> %v, <double 41.0, double 42.0, double 43.0, double 44.0>
-- 
2.7.4