From 79120e80b84c3f41cedef2c160a1bd63863ae301 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@gmail.com>
Date: Mon, 3 Apr 2017 17:22:23 +0000
Subject: [PATCH] Revert r299337 "[InstCombine] Remove redundant combine from
 visitAnd"

One of the tsan bots started failing at this commit. I don't see anything obviously wrong with the commit so trying this to see if it recovers.

Failing log: http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-autoconf/builds/6792

llvm-svn: 299366
---
 .../Transforms/InstCombine/InstCombineAndOrXor.cpp | 87 ++++++++++++++++++++++
 .../Transforms/InstCombine/InstCombineInternal.h   |  2 +
 2 files changed, 89 insertions(+)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index c7ecd9c..86a04c8a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -295,6 +295,77 @@ Value *InstCombiner::insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
   return Builder->CreateICmp(Pred, VMinusLo, HiMinusLo);
 }
 
+/// Returns true iff Val consists of one contiguous run of 1s with any number
+/// of 0s on either side.  The 1s are allowed to wrap from LSB to MSB,
+/// so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is
+/// not, since all 1s are not contiguous.
+static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
+  const APInt& V = Val->getValue();
+  uint32_t BitWidth = Val->getType()->getBitWidth();
+  if (!V.isShiftedMask()) return false;
+
+  // look for the first zero bit after the run of ones
+  MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
+  // look for the first non-zero bit
+  ME = V.getActiveBits();
+  return true;
+}
+
+/// This is part of an expression (LHS +/- RHS) & Mask, where isSub determines
+/// whether the operator is a sub. If we can fold one of the following xforms:
+///
+/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
+/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+///
+/// return (A +/- B).
+///
+Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
+                                        ConstantInt *Mask, bool isSub,
+                                        Instruction &I) {
+  Instruction *LHSI = dyn_cast<Instruction>(LHS);
+  if (!LHSI || LHSI->getNumOperands() != 2 ||
+      !isa<ConstantInt>(LHSI->getOperand(1))) return nullptr;
+
+  ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
+
+  switch (LHSI->getOpcode()) {
+  default: return nullptr;
+  case Instruction::And:
+    if (ConstantExpr::getAnd(N, Mask) == Mask) {
+      // If the AndRHS is a power of two minus one (0+1+), this is simple.
+      if ((Mask->getValue().countLeadingZeros() +
+           Mask->getValue().countPopulation()) ==
+          Mask->getValue().getBitWidth())
+        break;
+
+      // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
+      // part, we don't need any explicit masks to take them out of A.  If that
+      // is all N is, ignore it.
+      uint32_t MB = 0, ME = 0;
+      if (isRunOfOnes(Mask, MB, ME)) {  // begin/end bit of run, inclusive
+        uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
+        APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
+        if (MaskedValueIsZero(RHS, Mask, 0, &I))
+          break;
+      }
+    }
+    return nullptr;
+  case Instruction::Or:
+  case Instruction::Xor:
+    // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
+    if ((Mask->getValue().countLeadingZeros() +
+         Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
+        && ConstantExpr::getAnd(N, Mask)->isNullValue())
+      break;
+    return nullptr;
+  }
+
+  if (isSub)
+    return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
+  return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
+}
+
 /// Classify (icmp eq (A & B), C) and (icmp ne (A & B), C) as matching patterns
 /// that can be simplified.
 /// One of A and B is considered the mask. The other is the value. This is
@@ -1249,7 +1320,23 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
 
         break;
       }
+      case Instruction::Add:
+        // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
+        // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+        // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
+          return BinaryOperator::CreateAnd(V, AndRHS);
+        if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
+          return BinaryOperator::CreateAnd(V, AndRHS);  // Add commutes
+        break;
+
       case Instruction::Sub:
+        // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
+        // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+        // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
+          return BinaryOperator::CreateAnd(V, AndRHS);
+
         // -x & 1 -> x & 1
         if (AndRHSMask == 1 && match(Op0LHS, m_Zero()))
           return BinaryOperator::CreateAnd(Op0RHS, AndRHS);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index ab15fd8..b5b48d2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -653,6 +653,8 @@ private:
   Instruction *OptAndOp(BinaryOperator *Op, ConstantInt *OpRHS,
                         ConstantInt *AndRHS, BinaryOperator &TheAnd);
 
+  Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
+                            bool isSub, Instruction &I);
   Value *insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
                          bool isSigned, bool Inside);
   Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
-- 
2.7.4