From cf65b9207b58e09e9a763ff4f1aed8687c086f37 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Thu, 6 Dec 2018 23:50:32 +0000
Subject: [PATCH] [DemandedBits][BDCE] Support vectors of integers

DemandedBits and BDCE currently only support scalar integers. This
patch extends them to also handle vector integer operations. In this
case bits are not tracked for individual vector elements, instead a
bit is demanded if it is demanded for any of the elements. This matches
the behavior of computeKnownBits in ValueTracking and
SimplifyDemandedBits in InstCombine.

The getDemandedBits() method can now only be called on instructions that
have integer or vector of integer type. Previously it could be called on
any sized instruction (even if it was not particularly useful). The size
of the return value is now always the scalar size in bits (while
previously it was the type size in bits).

Differential Revision: https://reviews.llvm.org/D55297

llvm-svn: 348549
---
 llvm/include/llvm/Analysis/DemandedBits.h  |   5 ++
 llvm/lib/Analysis/DemandedBits.cpp         |  66 +++++++++-----
 llvm/lib/Transforms/Scalar/BDCE.cpp        |  13 +--
 llvm/test/Analysis/DemandedBits/vectors.ll | 136 +++++++++++++++++++++++++++++
 llvm/test/Transforms/BDCE/vectors.ll       |  24 ++---
 5 files changed, 198 insertions(+), 46 deletions(-)
 create mode 100644 llvm/test/Analysis/DemandedBits/vectors.ll
diff --git a/llvm/include/llvm/Analysis/DemandedBits.h b/llvm/include/llvm/Analysis/DemandedBits.h
index d438460..477b253 100644
--- a/llvm/include/llvm/Analysis/DemandedBits.h
+++ b/llvm/include/llvm/Analysis/DemandedBits.h
@@ -44,6 +44,11 @@ public:
     F(F), AC(AC), DT(DT) {}
 
   /// Return the bits demanded from instruction I.
+  ///
+  /// The instruction must have integer of vector of integer type. For vector
+  /// instructions individual vector elements are not distinguished: A bit is
+  /// demanded if it is demanded for any of the vector elements. The size of
+  /// the return value corresponds to the scalar size in bits.
   APInt getDemandedBits(Instruction *I);
 
   /// Return true if, during analysis, I could not be reached.
diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp
index 6bef771..ea5a509 100644
--- a/llvm/lib/Analysis/DemandedBits.cpp
+++ b/llvm/lib/Analysis/DemandedBits.cpp
@@ -39,6 +39,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/Pass.h"
@@ -50,6 +51,7 @@
 #include <cstdint>
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "demanded-bits"
 
@@ -143,17 +145,17 @@ void DemandedBits::determineLiveOperandBits(
         }
         break;
       case Intrinsic::fshl:
-      case Intrinsic::fshr:
+      case Intrinsic::fshr: {
+        const APInt *SA;
         if (OperandNo == 2) {
           // Shift amount is modulo the bitwidth. For powers of two we have
           // SA % BW == SA & (BW - 1).
           if (isPowerOf2_32(BitWidth))
             AB = BitWidth - 1;
-        } else if (auto *SA = dyn_cast<ConstantInt>(II->getOperand(2))) {
-          // TODO: Support vectors.
+        } else if (match(II->getOperand(2), m_APInt(SA))) {
           // Normalize to funnel shift left. APInt shifts of BitWidth are well-
           // defined, so no need to special-case zero shifts here.
-          uint64_t ShiftAmt = SA->getValue().urem(BitWidth);
+          uint64_t ShiftAmt = SA->urem(BitWidth);
           if (II->getIntrinsicID() == Intrinsic::fshr)
             ShiftAmt = BitWidth - ShiftAmt;
 
@@ -164,6 +166,7 @@ void DemandedBits::determineLiveOperandBits(
         }
         break;
       }
+      }
     break;
   case Instruction::Add:
   case Instruction::Sub:
@@ -174,8 +177,9 @@ void DemandedBits::determineLiveOperandBits(
     AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
     break;
   case Instruction::Shl:
-    if (OperandNo == 0)
-      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+    if (OperandNo == 0) {
+      const APInt *ShiftAmtC;
+      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
         uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.lshr(ShiftAmt);
 
@@ -187,10 +191,12 @@ void DemandedBits::determineLiveOperandBits(
         else if (S->hasNoUnsignedWrap())
           AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
       }
+    }
     break;
   case Instruction::LShr:
-    if (OperandNo == 0)
-      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+    if (OperandNo == 0) {
+      const APInt *ShiftAmtC;
+      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
         uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.shl(ShiftAmt);
 
@@ -199,10 +205,12 @@ void DemandedBits::determineLiveOperandBits(
         if (cast<LShrOperator>(UserI)->isExact())
           AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
       }
+    }
     break;
   case Instruction::AShr:
-    if (OperandNo == 0)
-      if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+    if (OperandNo == 0) {
+      const APInt *ShiftAmtC;
+      if (match(UserI->getOperand(1), m_APInt(ShiftAmtC))) {
         uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1);
         AB = AOut.shl(ShiftAmt);
         // Because the high input bit is replicated into the
@@ -217,6 +225,7 @@ void DemandedBits::determineLiveOperandBits(
         if (cast<AShrOperator>(UserI)->isExact())
           AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
       }
+    }
     break;
   case Instruction::And:
     AB = AOut;
@@ -274,6 +283,15 @@ void DemandedBits::determineLiveOperandBits(
     if (OperandNo != 0)
       AB = AOut;
     break;
+  case Instruction::ExtractElement:
+    if (OperandNo == 0)
+      AB = AOut;
+    break;
+  case Instruction::InsertElement:
+  case Instruction::ShuffleVector:
+    if (OperandNo == 0 || OperandNo == 1)
+      AB = AOut;
+    break;
   }
 }
 
@@ -309,8 +327,9 @@ void DemandedBits::performAnalysis() {
     // bits and add the instruction to the work list. For other instructions
     // add their operands to the work list (for integer values operands, mark
     // all bits as live).
-    if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
-      if (AliveBits.try_emplace(&I, IT->getBitWidth(), 0).second)
+    Type *T = I.getType();
+    if (T->isIntOrIntVectorTy()) {
+      if (AliveBits.try_emplace(&I, T->getScalarSizeInBits(), 0).second)
         Worklist.push_back(&I);
 
       continue;
@@ -319,8 +338,9 @@ void DemandedBits::performAnalysis() {
     // Non-integer-typed instructions...
     for (Use &OI : I.operands()) {
       if (Instruction *J = dyn_cast<Instruction>(OI)) {
-        if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
-          AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
+        Type *T = J->getType();
+        if (T->isIntOrIntVectorTy())
+          AliveBits[J] = APInt::getAllOnesValue(T->getScalarSizeInBits());
         Worklist.push_back(J);
       }
     }
@@ -336,13 +356,13 @@ void DemandedBits::performAnalysis() {
 
     LLVM_DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
     APInt AOut;
-    if (UserI->getType()->isIntegerTy()) {
+    if (UserI->getType()->isIntOrIntVectorTy()) {
       AOut = AliveBits[UserI];
       LLVM_DEBUG(dbgs() << " Alive Out: " << AOut);
     }
     LLVM_DEBUG(dbgs() << "\n");
 
-    if (!UserI->getType()->isIntegerTy())
+    if (!UserI->getType()->isIntOrIntVectorTy())
       Visited.insert(UserI);
 
     KnownBits Known, Known2;
@@ -351,10 +371,11 @@ void DemandedBits::performAnalysis() {
     // operand is added to the work-list.
     for (Use &OI : UserI->operands()) {
       if (Instruction *I = dyn_cast<Instruction>(OI)) {
-        if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
-          unsigned BitWidth = IT->getBitWidth();
+        Type *T = I->getType();
+        if (T->isIntOrIntVectorTy()) {
+          unsigned BitWidth = T->getScalarSizeInBits();
           APInt AB = APInt::getAllOnesValue(BitWidth);
-          if (UserI->getType()->isIntegerTy() && !AOut &&
+          if (UserI->getType()->isIntOrIntVectorTy() && !AOut &&
               !isAlwaysLive(UserI)) {
             AB = APInt(BitWidth, 0);
           } else {
@@ -387,13 +408,14 @@ void DemandedBits::performAnalysis() {
 }
 
 APInt DemandedBits::getDemandedBits(Instruction *I) {
-  performAnalysis();
+  assert(I->getType()->isIntOrIntVectorTy() &&
+         "Not an integer or vector of integer instruction");
 
-  const DataLayout &DL = I->getModule()->getDataLayout();
+  performAnalysis();
   auto Found = AliveBits.find(I);
   if (Found != AliveBits.end())
     return Found->second;
-  return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType()));
+  return APInt::getAllOnesValue(I->getType()->getScalarSizeInBits());
 }
 
 bool DemandedBits::isInstructionDead(Instruction *I) {
diff --git a/llvm/lib/Transforms/Scalar/BDCE.cpp b/llvm/lib/Transforms/Scalar/BDCE.cpp
index 3a8ef07..f63182e 100644
--- a/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -38,7 +38,8 @@ STATISTIC(NumSimplified, "Number of instructions trivialized (dead bits)");
 /// instruction may need to be cleared of assumptions that can no longer be
 /// guaranteed correct.
 static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
-  assert(I->getType()->isIntegerTy() && "Trivializing a non-integer value?");
+  assert(I->getType()->isIntOrIntVectorTy() &&
+         "Trivializing a non-integer value?");
 
   // Initialize the worklist with eligible direct users.
   SmallVector<Instruction *, 16> WorkList;
@@ -46,13 +47,13 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
     // If all bits of a user are demanded, then we know that nothing below that
     // in the def-use chain needs to be changed.
     auto *J = dyn_cast<Instruction>(JU);
-    if (J && J->getType()->isSized() &&
+    if (J && J->getType()->isIntOrIntVectorTy() &&
         !DB.getDemandedBits(J).isAllOnesValue())
       WorkList.push_back(J);
 
-    // Note that we need to check for unsized types above before asking for
+    // Note that we need to check for non-int types above before asking for
     // demanded bits. Normally, the only way to reach an instruction with an
-    // unsized type is via an instruction that has side effects (or otherwise
+    // non-int type is via an instruction that has side effects (or otherwise
     // will demand its input bits). However, if we have a readnone function
     // that returns an unsized type (e.g., void), we must avoid asking for the
     // demanded bits of the function call's return value. A void-returning
@@ -78,7 +79,7 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
       // If all bits of a user are demanded, then we know that nothing below
       // that in the def-use chain needs to be changed.
       auto *K = dyn_cast<Instruction>(KU);
-      if (K && !Visited.count(K) && K->getType()->isSized() &&
+      if (K && !Visited.count(K) && K->getType()->isIntOrIntVectorTy() &&
           !DB.getDemandedBits(K).isAllOnesValue())
         WorkList.push_back(K);
     }
@@ -95,7 +96,7 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
     if (I.mayHaveSideEffects() && I.use_empty())
       continue;
 
-    if (I.getType()->isIntegerTy() &&
+    if (I.getType()->isIntOrIntVectorTy() &&
         !DB.getDemandedBits(&I).getBoolValue()) {
       // For live instructions that have all dead bits, first make them dead by
       // replacing all uses with something else. Then, if they don't need to
diff --git a/llvm/test/Analysis/DemandedBits/vectors.ll b/llvm/test/Analysis/DemandedBits/vectors.ll
new file mode 100644
index 0000000..36cde05
--- /dev/null
+++ b/llvm/test/Analysis/DemandedBits/vectors.ll
@@ -0,0 +1,136 @@
+; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s
+; RUN: opt -S -disable-output -passes="print<demanded-bits>" < %s 2>&1 | FileCheck %s
+
+; CHECK-DAG: DemandedBits: 0xff00 for   %x = or <2 x i32> %a, zeroinitializer
+; CHECK-DAG: DemandedBits: 0xff00 for   %y = or <2 x i32> %b, zeroinitializer
+; CHECK-DAG: DemandedBits: 0xff00 for   %z = or <2 x i32> %x, %y
+; CHECK-DAG: DemandedBits: 0xff for   %u = lshr <2 x i32> %z, <i32 8, i32 8>
+; CHECK-DAG: DemandedBits: 0xff for   %r = trunc <2 x i32> %u to <2 x i8>
+define <2 x i8> @test_basic(<2 x i32> %a, <2 x i32> %b) {
+  %x = or <2 x i32> %a, zeroinitializer
+  %y = or <2 x i32> %b, zeroinitializer
+  %z = or <2 x i32> %x, %y
+  %u = lshr <2 x i32> %z, <i32 8, i32 8>
+  %r = trunc <2 x i32> %u to <2 x i8>
+  ret <2 x i8> %r
+}
+
+; Vector-specific instructions
+
+; CHECK-DAG: DemandedBits: 0xff for   %x = or <2 x i32> %a, zeroinitializer
+; CHECK-DAG: DemandedBits: 0xf0 for   %z = extractelement <2 x i32> %x, i32 1
+; CHECK-DAG: DemandedBits: 0xf for   %y = extractelement <2 x i32> %x, i32 0
+; CHECK-DAG: DemandedBits: 0xffffffff for   %u = and i32 %y, 15
+; CHECK-DAG: DemandedBits: 0xffffffff for   %v = and i32 %z, 240
+; CHECK-DAG: DemandedBits: 0xffffffff for   %r = or i32 %u, %v
+define i32 @test_extractelement(<2 x i32> %a) {
+  %x = or <2 x i32> %a, zeroinitializer
+  %y = extractelement <2 x i32> %x, i32 0
+  %z = extractelement <2 x i32> %x, i32 1
+  %u = and i32 %y, 15
+  %v = and i32 %z, 240
+  %r = or i32 %u, %v
+  ret i32 %r
+}
+
+; CHECK-DAG: DemandedBits: 0xff for   %x = or i32 %a, 0
+; CHECK-DAG: DemandedBits: 0xff for   %y = or i32 %b, 0
+; CHECK-DAG: DemandedBits: 0xff for   %z = insertelement <2 x i32> undef, i32 %x, i32 0
+; CHECK-DAG: DemandedBits: 0xff for   %u = insertelement <2 x i32> %z, i32 %y, i32 1
+; CHECK-DAG: DemandedBits: 0xffffffff for   %r = and <2 x i32> %u, <i32 255, i32 127>
+define <2 x i32> @test_insertelement(i32 %a, i32 %b) {
+  %x = or i32 %a, 0
+  %y = or i32 %b, 0
+  %z = insertelement <2 x i32> undef, i32 %x, i32 0
+  %u = insertelement <2 x i32> %z, i32 %y, i32 1
+  %r = and <2 x i32> %u, <i32 255, i32 127>
+  ret <2 x i32> %r
+}
+
+; CHECK-DAG: DemandedBits: 0xff for   %x = or <2 x i32> %a, zeroinitializer
+; CHECK-DAG: DemandedBits: 0xff for   %y = or <2 x i32> %b, zeroinitializer
+; CHECK-DAG: DemandedBits: 0xff for   %z = shufflevector <2 x i32> %x, <2 x i32> %y, <3 x i32> <i32 0, i32 3, i32 1>
+; CHECK-DAG: DemandedBits: 0xffffffff for   %r = and <3 x i32> %z, <i32 255, i32 127, i32 0>
+define <3 x i32> @test_shufflevector(<2 x i32> %a, <2 x i32> %b) {
+  %x = or <2 x i32> %a, zeroinitializer
+  %y = or <2 x i32> %b, zeroinitializer
+  %z = shufflevector <2 x i32> %x, <2 x i32> %y, <3 x i32> <i32 0, i32 3, i32 1>
+  %r = and <3 x i32> %z, <i32 255, i32 127, i32 0>
+  ret <3 x i32> %r
+}
+
+; Shifts with splat shift amounts
+
+; CHECK-DAG: DemandedBits: 0xf for   %x = or <2 x i32> %a, zeroinitializer
+; CHECK-DAG: DemandedBits: 0xf0 for   %y = shl <2 x i32> %x, <i32 4, i32 4>
+; CHECK-DAG: DemandedBits: 0xffffffff for   %r = and <2 x i32> %y, <i32 240, i32 240>
+define <2 x i32> @test_shl(<2 x i32> %a) {
+  %x = or <2 x i32> %a, zeroinitializer
+  %y = shl <2 x i32> %x, <i32 4, i32 4>
+  %r = and <2 x i32> %y, <i32 240, i32 240>
+  ret <2 x i32> %r
+}
+
+; CHECK-DAG: DemandedBits: 0xf00 for   %x = or <2 x i32> %a, zeroinitializer
+; CHECK-DAG: DemandedBits: 0xf0 for   %y = ashr <2 x i32> %x, <i32 4, i32 4>
+; CHECK-DAG: DemandedBits: 0xffffffff for   %r = and <2 x i32> %y, <i32 240, i32 240>
+define <2 x i32> @test_ashr(<2 x i32> %a) {
+  %x = or <2 x i32> %a, zeroinitializer
+  %y = ashr <2 x i32> %x, <i32 4, i32 4>
+  %r = and <2 x i32> %y, <i32 240, i32 240>
+  ret <2 x i32> %r
+}
+
+; CHECK-DAG: DemandedBits: 0xf00 for   %x = or <2 x i32> %a, zeroinitializer
+; CHECK-DAG: DemandedBits: 0xf0 for   %y = lshr <2 x i32> %x, <i32 4, i32 4>
+; CHECK-DAG: DemandedBits: 0xffffffff for   %r = and <2 x i32> %y, <i32 240, i32 240>
+define <2 x i32> @test_lshr(<2 x i32> %a) {
+  %x = or <2 x i32> %a, zeroinitializer
+  %y = lshr <2 x i32> %x, <i32 4, i32 4>
+  %r = and <2 x i32> %y, <i32 240, i32 240>
+  ret <2 x i32> %r
+}
+
+declare <2 x i32> @llvm.fshl.i32(<2 x i32>, <2 x i32>, <2 x i32>)
+declare <2 x i32> @llvm.fshr.i32(<2 x i32>, <2 x i32>, <2 x i32>)
+
+; CHECK-DAG: DemandedBits: 0xf for   %x = or <2 x i32> %a, zeroinitializer
+; CHECK-DAG: DemandedBits: 0xf0000000 for   %y = or <2 x i32> %b, zeroinitializer
+; CHECK-DAG: DemandedBits: 0xff for   %z = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 4, i32 4>)
+; CHECK-DAG: DemandedBits: 0xffffffff for   %r = and <2 x i32> %z, <i32 255, i32 255>
+define <2 x i32> @test_fshl(<2 x i32> %a, <2 x i32> %b) {
+  %x = or <2 x i32> %a, zeroinitializer
+  %y = or <2 x i32> %b, zeroinitializer
+  %z = call <2 x i32> @llvm.fshl.i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 4, i32 4>)
+  %r = and <2 x i32> %z, <i32 255, i32 255>
+  ret <2 x i32> %r
+}
+
+; CHECK-DAG: DemandedBits: 0xf for   %x = or <2 x i32> %a, zeroinitializer
+; CHECK-DAG: DemandedBits: 0xf0000000 for   %y = or <2 x i32> %b, zeroinitializer
+; CHECK-DAG: DemandedBits: 0xff for   %z = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 28, i32 28>)
+; CHECK-DAG: DemandedBits: 0xffffffff for   %r = and <2 x i32> %z, <i32 255, i32 255>
+define <2 x i32> @test_fshr(<2 x i32> %a, <2 x i32> %b) {
+  %x = or <2 x i32> %a, zeroinitializer
+  %y = or <2 x i32> %b, zeroinitializer
+  %z = call <2 x i32> @llvm.fshr.i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 28, i32 28>)
+  %r = and <2 x i32> %z, <i32 255, i32 255>
+  ret <2 x i32> %r
+}
+
+; FP / Int conversion. These have different input / output types.
+
+; CHECK-DAG: DemandedBits: 0xffffffff for   %x = or <2 x i32> %a, zeroinitializer
+define <2 x float> @test_uitofp(<2 x i32> %a) {
+  %x = or <2 x i32> %a, zeroinitializer
+  %r = uitofp <2 x i32> %x to <2 x float>
+  ret <2 x float> %r
+}
+
+; CHECK-DAG: DemandedBits: 0xffffffff for   %y = fptoui <2 x float> %x to <2 x i32>
+define <2 x i32> @test_fptoui(<2 x float> %a) {
+  %x = fadd <2 x float> %a, <float 1.0, float 1.0>
+  %y = fptoui <2 x float> %x to <2 x i32>
+  %r = and <2 x i32> %y, <i32 255, i32 255>
+  ret <2 x i32> %y
+}
diff --git a/llvm/test/Transforms/BDCE/vectors.ll b/llvm/test/Transforms/BDCE/vectors.ll
index d5ac150..fde22fd 100644
--- a/llvm/test/Transforms/BDCE/vectors.ll
+++ b/llvm/test/Transforms/BDCE/vectors.ll
@@ -7,12 +7,9 @@
 
 define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @test_basic(
-; CHECK-NEXT:    [[A2:%.*]] = add <2 x i32> [[A:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> [[A2]], <i32 4, i32 4>
 ; CHECK-NEXT:    [[B2:%.*]] = add <2 x i32> [[B:%.*]], <i32 1, i32 1>
 ; CHECK-NEXT:    [[B3:%.*]] = and <2 x i32> [[B2]], <i32 8, i32 8>
-; CHECK-NEXT:    [[C:%.*]] = or <2 x i32> [[A3]], [[B3]]
-; CHECK-NEXT:    [[D:%.*]] = ashr <2 x i32> [[C]], <i32 3, i32 3>
+; CHECK-NEXT:    [[D:%.*]] = ashr <2 x i32> [[B3]], <i32 3, i32 3>
 ; CHECK-NEXT:    ret <2 x i32> [[D]]
 ;
 ; CHECK-IO-LABEL: @test_basic(
@@ -36,12 +33,9 @@ define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) {
 ; Going vector -> scalar
 define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @test_extractelement(
-; CHECK-NEXT:    [[A2:%.*]] = add <2 x i32> [[A:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> [[A2]], <i32 4, i32 4>
 ; CHECK-NEXT:    [[B2:%.*]] = add <2 x i32> [[B:%.*]], <i32 1, i32 1>
 ; CHECK-NEXT:    [[B3:%.*]] = and <2 x i32> [[B2]], <i32 8, i32 8>
-; CHECK-NEXT:    [[C:%.*]] = or <2 x i32> [[A3]], [[B3]]
-; CHECK-NEXT:    [[D:%.*]] = extractelement <2 x i32> [[C]], i32 0
+; CHECK-NEXT:    [[D:%.*]] = extractelement <2 x i32> [[B3]], i32 0
 ; CHECK-NEXT:    [[E:%.*]] = ashr i32 [[D]], 3
 ; CHECK-NEXT:    ret i32 [[E]]
 ;
@@ -68,14 +62,10 @@ define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) {
 ; Going scalar -> vector
 define <2 x i32> @test_insertelement(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test_insertelement(
-; CHECK-NEXT:    [[X:%.*]] = insertelement <2 x i32> undef, i32 [[A:%.*]], i32 0
-; CHECK-NEXT:    [[X2:%.*]] = insertelement <2 x i32> [[X]], i32 [[B:%.*]], i32 1
-; CHECK-NEXT:    [[X3:%.*]] = and <2 x i32> [[X2]], <i32 4, i32 4>
-; CHECK-NEXT:    [[Y:%.*]] = insertelement <2 x i32> undef, i32 [[B]], i32 0
-; CHECK-NEXT:    [[Y2:%.*]] = insertelement <2 x i32> [[Y]], i32 [[A]], i32 1
+; CHECK-NEXT:    [[Y:%.*]] = insertelement <2 x i32> undef, i32 [[B:%.*]], i32 0
+; CHECK-NEXT:    [[Y2:%.*]] = insertelement <2 x i32> [[Y]], i32 [[A:%.*]], i32 1
 ; CHECK-NEXT:    [[Y3:%.*]] = and <2 x i32> [[Y2]], <i32 8, i32 8>
-; CHECK-NEXT:    [[Z:%.*]] = or <2 x i32> [[X3]], [[Y3]]
-; CHECK-NEXT:    [[U:%.*]] = ashr <2 x i32> [[Z]], <i32 3, i32 3>
+; CHECK-NEXT:    [[U:%.*]] = ashr <2 x i32> [[Y3]], <i32 3, i32 3>
 ; CHECK-NEXT:    ret <2 x i32> [[U]]
 ;
 ; CHECK-IO-LABEL: @test_insertelement(
@@ -132,10 +122,8 @@ define <2 x i32> @test_conversion(<2 x i32> %a) {
 ; Assumption invalidation (adapted from invalidate-assumptions.ll)
 define <2 x i1> @test_assumption_invalidation(<2 x i1> %b, <2 x i8> %x) {
 ; CHECK-LABEL: @test_assumption_invalidation(
-; CHECK-NEXT:    [[SETBIT:%.*]] = or <2 x i8> [[X:%.*]], <i8 64, i8 64>
 ; CHECK-NEXT:    [[LITTLE_NUMBER:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[BIG_NUMBER:%.*]] = shl <2 x i8> [[SETBIT]], <i8 1, i8 1>
-; CHECK-NEXT:    [[SUB:%.*]] = sub nuw <2 x i8> [[BIG_NUMBER]], [[LITTLE_NUMBER]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i8> zeroinitializer, [[LITTLE_NUMBER]]
 ; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <2 x i8> [[SUB]] to <2 x i1>
 ; CHECK-NEXT:    ret <2 x i1> [[TRUNC]]
 ;
-- 
2.7.4