From 7cdeac43e57274fdac01f61bf2365a9efaffa5e8 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Mon, 7 Oct 2019 20:53:27 +0000
Subject: [PATCH] [InstCombine] Fold conditional sign-extend of
 high-bit-extract into high-bit-extract-with-signext (PR42389)

This can come up in Bit Stream abstractions.

The pattern looks big/scary, but it can't be simplified any further.
It only is so simple because a number of my preparatory folds had
happened already (shift amount reassociation / shift amount
reassociation in bit test, sign bit test detection).

Highlights:
* There are two main flavors: https://rise4fun.com/Alive/zWi
  The difference is add vs. sub, and left-shift of -1 vs. 1
* Since we only change the shift opcode,
  we can preserve the exact-ness: https://rise4fun.com/Alive/4u4
* There can be truncation after high-bit-extraction:
  https://rise4fun.com/Alive/slHc1   (the main pattern i'm after!)
  Which means that we need to ignore zext of shift amounts and of NBits.
* The sign-extending magic can be extended itself (in add pattern
  via sext, in sub pattern via zext. not the other way around!)
  https://rise4fun.com/Alive/NhG
  (or those sext/zext can be sinked into `select`!)
  Which again means we should pay attention when matching NBits.
* We can have both truncation of extraction and widening of magic:
  https://rise4fun.com/Alive/XTw
  In other words, i don't believe we need to have any checks on
  bitwidths of any of these constructs.

This is worsened in general by the fact that we may have `sext` instead
of `zext` for shift amounts, and we don't yet canonicalize to `zext`,
although we should. I have not done anything about that here.

Also, we really should have something to weed out `sub` like these,
by folding them into `add` variant.

https://bugs.llvm.org/show_bug.cgi?id=42389

llvm-svn: 373964
---
 .../Transforms/InstCombine/InstCombineAddSub.cpp   | 110 +++++++++++++++++++++
 ...riable-length-signext-after-high-bit-extract.ll |  38 +++----
 2 files changed, 129 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 40cc188..5d306cd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1097,6 +1097,106 @@ static Instruction *foldToUnsignedSaturatedAdd(BinaryOperator &I) {
   return nullptr;
 }
 
+static Instruction *
+canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(
+    BinaryOperator &I, InstCombiner::BuilderTy &Builder) {
+  assert((I.getOpcode() == Instruction::Add ||
+          I.getOpcode() == Instruction::Sub) &&
+         "Expecting add/sub instruction");
+
+  // We have a subtraction/addition between a (potentially truncated) *logical*
+  // right-shift of X and a "select".
+  Value *X, *Select;
+  Instruction *LowBitsToSkip, *Extract;
+  if (!match(&I, m_c_BinOp(m_TruncOrSelf(m_CombineAnd(
+                               m_LShr(m_Value(X), m_Instruction(LowBitsToSkip)),
+                               m_Instruction(Extract))),
+                           m_Value(Select))))
+    return nullptr;
+
+  // `add` is commutative; but for `sub`, "select" *must* be on RHS.
+  if (I.getOpcode() == Instruction::Sub && I.getOperand(1) != Select)
+    return nullptr;
+
+  Type *XTy = X->getType();
+  bool HadTrunc = I.getType() != XTy;
+
+  // If there was a truncation of extracted value, then we'll need to produce
+  // one extra instruction, so we need to ensure one instruction will go away.
+  if (HadTrunc && !match(&I, m_c_BinOp(m_OneUse(m_Value()), m_Value())))
+    return nullptr;
+
+  // Extraction should extract high NBits bits, with shift amount calculated as:
+  //   low bits to skip = shift bitwidth - high bits to extract
+  // The shift amount itself may be extended, and we need to look past zero-ext
+  // when matching NBits, that will matter for matching later.
+  Constant *C;
+  Value *NBits;
+  if (!match(
+          LowBitsToSkip,
+          m_ZExtOrSelf(m_Sub(m_Constant(C), m_ZExtOrSelf(m_Value(NBits))))) ||
+      !match(C, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_EQ,
+                                   APInt(C->getType()->getScalarSizeInBits(),
+                                         X->getType()->getScalarSizeInBits()))))
+    return nullptr;
+
+  // Sign-extending value can be sign-extended itself if we `add` it,
+  // or zero-extended if we `sub`tract it.
+  auto SkipExtInMagic = [&I](Value *&V) {
+    if (I.getOpcode() == Instruction::Add)
+      match(V, m_SExtOrSelf(m_Value(V)));
+    else
+      match(V, m_ZExtOrSelf(m_Value(V)));
+  };
+
+  // Now, finally validate the sign-extending magic.
+  // `select` itself may be appropriately extended, look past that.
+  SkipExtInMagic(Select);
+
+  ICmpInst::Predicate Pred;
+  const APInt *Thr;
+  Value *SignExtendingValue, *Zero;
+  bool ShouldSignext;
+  // It must be a select between two values we will later estabilish to be a
+  // sign-extending value and a zero constant. The condition guarding the
+  // sign-extension must be based on a sign bit of the same X we had in `lshr`.
+  if (!match(Select, m_Select(m_ICmp(Pred, m_Specific(X), m_APInt(Thr)),
+                              m_Value(SignExtendingValue), m_Value(Zero))) ||
+      !isSignBitCheck(Pred, *Thr, ShouldSignext))
+    return nullptr;
+
+  // icmp-select pair is commutative.
+  if (!ShouldSignext)
+    std::swap(SignExtendingValue, Zero);
+
+  // If we should not perform sign-extension then we must add/subtract zero.
+  if (!match(Zero, m_Zero()))
+    return nullptr;
+  // Otherwise, it should be some constant, left-shifted by the same NBits we
+  // had in `lshr`. Said left-shift can also be appropriately extended.
+  // Again, we must look past zero-ext when looking for NBits.
+  SkipExtInMagic(SignExtendingValue);
+  Constant *SignExtendingValueBaseConstant;
+  if (!match(SignExtendingValue,
+             m_Shl(m_Constant(SignExtendingValueBaseConstant),
+                   m_ZExtOrSelf(m_Specific(NBits)))))
+    return nullptr;
+  // If we `add`, then the constant should be all-ones, else it should be one.
+  if (I.getOpcode() == Instruction::Add
+          ? !match(SignExtendingValueBaseConstant, m_AllOnes())
+          : !match(SignExtendingValueBaseConstant, m_One()))
+    return nullptr;
+
+  auto *NewAShr = BinaryOperator::CreateAShr(X, LowBitsToSkip,
+                                             Extract->getName() + ".sext");
+  NewAShr->copyIRFlags(Extract); // Preserve `exact`-ness.
+  if (!HadTrunc)
+    return NewAShr;
+
+  Builder.Insert(NewAShr);
+  return TruncInst::CreateTruncOrBitCast(NewAShr, I.getType());
+}
+
 Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (Value *V = SimplifyAddInst(I.getOperand(0), I.getOperand(1),
                                  I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
@@ -1302,6 +1402,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (Instruction *V = canonicalizeLowbitMask(I, Builder))
     return V;
 
+  if (Instruction *V =
+          canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(
+              I, Builder))
+    return V;
+
   if (Instruction *SatAdd = foldToUnsignedSaturatedAdd(I))
     return SatAdd;
 
@@ -1900,6 +2005,11 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     return SelectInst::Create(Cmp, Neg, A);
   }
 
+  if (Instruction *V =
+          canonicalizeCondSignextOfHighBitExtractToSignextHighBitExtract(
+              I, Builder))
+    return V;
+
   if (Instruction *Ext = narrowMathIfNoOverflow(I))
     return Ext;
 
diff --git a/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll b/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll
index 70877dd..0277556 100644
--- a/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll
+++ b/llvm/test/Transforms/InstCombine/conditional-variable-length-signext-after-high-bit-extract.ll
@@ -26,7 +26,7 @@ define i32 @t0_notrunc_add(i32 %data, i32 %nbits) {
 ; CHECK-NEXT:    call void @use1(i1 [[SHOULD_SIGNEXT]])
 ; CHECK-NEXT:    call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]])
 ; CHECK-NEXT:    call void @use32(i32 [[MAGIC]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]]
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %low_bits_to_skip = sub i32 32, %nbits
@@ -57,7 +57,7 @@ define i32 @t1_notrunc_sub(i32 %data, i32 %nbits) {
 ; CHECK-NEXT:    call void @use1(i1 [[SHOULD_SIGNEXT]])
 ; CHECK-NEXT:    call void @use32(i32 [[HIGHER_BIT_AFTER_SIGNBIT]])
 ; CHECK-NEXT:    call void @use32(i32 [[MAGIC]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = sub i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]]
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %low_bits_to_skip = sub i32 32, %nbits
@@ -84,14 +84,14 @@ define i32 @t2_trunc_add(i64 %data, i32 %nbits) {
 ; CHECK-NEXT:    [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32
 ; CHECK-NEXT:    [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0
 ; CHECK-NEXT:    [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]]
-; CHECK-NEXT:    [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0
 ; CHECK-NEXT:    call void @use32(i32 [[LOW_BITS_TO_SKIP]])
 ; CHECK-NEXT:    call void @use64(i64 [[LOW_BITS_TO_SKIP_WIDE]])
 ; CHECK-NEXT:    call void @use64(i64 [[HIGH_BITS_EXTRACTED_WIDE]])
 ; CHECK-NEXT:    call void @use32(i32 [[HIGH_BITS_EXTRACTED]])
 ; CHECK-NEXT:    call void @use1(i1 [[SHOULD_SIGNEXT]])
 ; CHECK-NEXT:    call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = add i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]]
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i64 [[DATA]], [[LOW_BITS_TO_SKIP_WIDE]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = trunc i64 [[TMP1]] to i32
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %low_bits_to_skip = sub i32 64, %nbits
@@ -121,14 +121,14 @@ define i32 @t3_trunc_sub(i64 %data, i32 %nbits) {
 ; CHECK-NEXT:    [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32
 ; CHECK-NEXT:    [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0
 ; CHECK-NEXT:    [[HIGHER_BIT_AFTER_SIGNBIT:%.*]] = shl i32 1, [[NBITS]]
-; CHECK-NEXT:    [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[HIGHER_BIT_AFTER_SIGNBIT]], i32 0
 ; CHECK-NEXT:    call void @use32(i32 [[LOW_BITS_TO_SKIP]])
 ; CHECK-NEXT:    call void @use64(i64 [[LOW_BITS_TO_SKIP_WIDE]])
 ; CHECK-NEXT:    call void @use64(i64 [[HIGH_BITS_EXTRACTED_WIDE]])
 ; CHECK-NEXT:    call void @use32(i32 [[HIGH_BITS_EXTRACTED]])
 ; CHECK-NEXT:    call void @use1(i1 [[SHOULD_SIGNEXT]])
 ; CHECK-NEXT:    call void @use32(i32 [[HIGHER_BIT_AFTER_SIGNBIT]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = sub i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]]
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i64 [[DATA]], [[LOW_BITS_TO_SKIP_WIDE]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = trunc i64 [[TMP1]] to i32
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %low_bits_to_skip = sub i32 64, %nbits
@@ -164,7 +164,7 @@ define i32 @t4_commutativity0(i32 %data, i32 %nbits) {
 ; CHECK-NEXT:    call void @use1(i1 [[SHOULD_SIGNEXT]])
 ; CHECK-NEXT:    call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]])
 ; CHECK-NEXT:    call void @use32(i32 [[MAGIC]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]]
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %low_bits_to_skip = sub i32 32, %nbits
@@ -194,7 +194,7 @@ define i32 @t5_commutativity1(i32 %data, i32 %nbits) {
 ; CHECK-NEXT:    call void @use1(i1 [[SHOULD_SIGNEXT]])
 ; CHECK-NEXT:    call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]])
 ; CHECK-NEXT:    call void @use32(i32 [[MAGIC]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]]
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %low_bits_to_skip = sub i32 32, %nbits
@@ -224,7 +224,7 @@ define i32 @t6_commutativity2(i32 %data, i32 %nbits) {
 ; CHECK-NEXT:    call void @use1(i1 [[SHOULD_SIGNEXT]])
 ; CHECK-NEXT:    call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]])
 ; CHECK-NEXT:    call void @use32(i32 [[MAGIC]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = add i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]]
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %low_bits_to_skip = sub i32 32, %nbits
@@ -253,14 +253,14 @@ define i32 @t7_trunc_extrause0(i64 %data, i32 %nbits) {
 ; CHECK-NEXT:    [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32
 ; CHECK-NEXT:    [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0
 ; CHECK-NEXT:    [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]]
-; CHECK-NEXT:    [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0
 ; CHECK-NEXT:    call void @use32(i32 [[LOW_BITS_TO_SKIP]])
 ; CHECK-NEXT:    call void @use64(i64 [[LOW_BITS_TO_SKIP_WIDE]])
 ; CHECK-NEXT:    call void @use64(i64 [[HIGH_BITS_EXTRACTED_WIDE]])
 ; CHECK-NEXT:    call void @use32(i32 [[HIGH_BITS_EXTRACTED]])
 ; CHECK-NEXT:    call void @use1(i1 [[SHOULD_SIGNEXT]])
 ; CHECK-NEXT:    call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = add i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]]
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i64 [[DATA]], [[LOW_BITS_TO_SKIP_WIDE]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = trunc i64 [[TMP1]] to i32
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %low_bits_to_skip = sub i32 64, %nbits
@@ -286,7 +286,6 @@ define i32 @t8_trunc_extrause1(i64 %data, i32 %nbits) {
 ; CHECK-NEXT:    [[LOW_BITS_TO_SKIP:%.*]] = sub i32 64, [[NBITS:%.*]]
 ; CHECK-NEXT:    [[LOW_BITS_TO_SKIP_WIDE:%.*]] = zext i32 [[LOW_BITS_TO_SKIP]] to i64
 ; CHECK-NEXT:    [[HIGH_BITS_EXTRACTED_WIDE:%.*]] = lshr i64 [[DATA:%.*]], [[LOW_BITS_TO_SKIP_WIDE]]
-; CHECK-NEXT:    [[HIGH_BITS_EXTRACTED:%.*]] = trunc i64 [[HIGH_BITS_EXTRACTED_WIDE]] to i32
 ; CHECK-NEXT:    [[SHOULD_SIGNEXT:%.*]] = icmp slt i64 [[DATA]], 0
 ; CHECK-NEXT:    [[ALL_BITS_EXCEPT_LOW_NBITS:%.*]] = shl i32 -1, [[NBITS]]
 ; CHECK-NEXT:    [[MAGIC:%.*]] = select i1 [[SHOULD_SIGNEXT]], i32 [[ALL_BITS_EXCEPT_LOW_NBITS]], i32 0
@@ -296,7 +295,8 @@ define i32 @t8_trunc_extrause1(i64 %data, i32 %nbits) {
 ; CHECK-NEXT:    call void @use1(i1 [[SHOULD_SIGNEXT]])
 ; CHECK-NEXT:    call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]])
 ; CHECK-NEXT:    call void @use32(i32 [[MAGIC]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = add i32 [[MAGIC]], [[HIGH_BITS_EXTRACTED]]
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i64 [[DATA]], [[LOW_BITS_TO_SKIP_WIDE]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = trunc i64 [[TMP1]] to i32
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %low_bits_to_skip = sub i32 64, %nbits
@@ -368,7 +368,7 @@ define i32 @t10_preserve_exact(i32 %data, i32 %nbits) {
 ; CHECK-NEXT:    call void @use1(i1 [[SHOULD_SIGNEXT]])
 ; CHECK-NEXT:    call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]])
 ; CHECK-NEXT:    call void @use32(i32 [[MAGIC]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = ashr exact i32 [[DATA]], [[LOW_BITS_TO_SKIP]]
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %low_bits_to_skip = sub i32 32, %nbits
@@ -405,7 +405,7 @@ define i32 @t11_different_zext_of_shamt(i32 %data, i8 %nbits) {
 ; CHECK-NEXT:    call void @use32(i32 [[NBITS_32BIT]])
 ; CHECK-NEXT:    call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS]])
 ; CHECK-NEXT:    call void @use32(i32 [[MAGIC]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP_32]]
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %nbits_16bit = zext i8 %nbits to i16
@@ -448,7 +448,7 @@ define i32 @t12_add_sext_of_magic(i32 %data, i8 %nbits) {
 ; CHECK-NEXT:    call void @use16(i16 [[ALL_BITS_EXCEPT_LOW_NBITS]])
 ; CHECK-NEXT:    call void @use16(i16 [[MAGIC]])
 ; CHECK-NEXT:    call void @use32(i32 [[MAGIC_WIDE]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC_WIDE]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]]
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %nbits_32bit = zext i8 %nbits to i32
@@ -491,7 +491,7 @@ define i32 @t13_sub_zext_of_magic(i32 %data, i8 %nbits) {
 ; CHECK-NEXT:    call void @use16(i16 [[ALL_BITS_EXCEPT_LOW_NBITS]])
 ; CHECK-NEXT:    call void @use16(i16 [[MAGIC]])
 ; CHECK-NEXT:    call void @use32(i32 [[MAGIC_WIDE]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = sub i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC_WIDE]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]]
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %nbits_32bit = zext i8 %nbits to i32
@@ -534,7 +534,7 @@ define i32 @t14_add_sext_of_shl(i32 %data, i8 %nbits) {
 ; CHECK-NEXT:    call void @use16(i16 [[ALL_BITS_EXCEPT_LOW_NBITS]])
 ; CHECK-NEXT:    call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS_WIDE]])
 ; CHECK-NEXT:    call void @use32(i32 [[MAGIC]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = add i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]]
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %nbits_32bit = zext i8 %nbits to i32
@@ -577,7 +577,7 @@ define i32 @t15_sub_zext_of_shl(i32 %data, i8 %nbits) {
 ; CHECK-NEXT:    call void @use16(i16 [[ALL_BITS_EXCEPT_LOW_NBITS]])
 ; CHECK-NEXT:    call void @use32(i32 [[ALL_BITS_EXCEPT_LOW_NBITS_WIDE]])
 ; CHECK-NEXT:    call void @use32(i32 [[MAGIC]])
-; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = sub i32 [[HIGH_BITS_EXTRACTED]], [[MAGIC]]
+; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = ashr i32 [[DATA]], [[LOW_BITS_TO_SKIP]]
 ; CHECK-NEXT:    ret i32 [[SIGNEXTENDED]]
 ;
   %nbits_32bit = zext i8 %nbits to i32
-- 
2.7.4