[InstCombine] Use uadd.sat and usub.sat for canonicalization

author Nikita Popov <nikita.ppv@gmail.com>

Wed, 27 Mar 2019 17:56:15 +0000 (17:56 +0000)

committer Nikita Popov <nikita.ppv@gmail.com>

Wed, 27 Mar 2019 17:56:15 +0000 (17:56 +0000)
author Nikita Popov <nikita.ppv@gmail.com>
Wed, 27 Mar 2019 17:56:15 +0000 (17:56 +0000)
committer Nikita Popov <nikita.ppv@gmail.com>
Wed, 27 Mar 2019 17:56:15 +0000 (17:56 +0000)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

index 582353e..3fb99bc 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -622,11 +622,7 @@ static Value *foldSelectICmpAndOr(const ICmpInst *IC, Value *TrueVal,
    return Builder.CreateOr(V, Y);
  }
  
-/// Transform patterns such as: (a > b) ? a - b : 0
-/// into: ((a > b) ? a : b) - b)
-/// This produces a canonical max pattern that is more easily recognized by the
-/// backend and converted into saturated subtraction instructions if those
-/// exist.
+/// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b).
  /// There are 8 commuted/swapped variants of this pattern.
  /// TODO: Also support a - UMIN(a,b) patterns.
  static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
@@ -668,11 +664,12 @@ static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
    if (!TrueVal->hasOneUse())
      return nullptr;
  
-  // All checks passed, convert to canonical unsigned saturated subtraction
-  // form: sub(max()).
-  // (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b)
-  Value *Max = Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
-  return IsNegative ? Builder.CreateSub(B, Max) : Builder.CreateSub(Max, B);
+  // (a > b) ? a - b : 0 -> usub.sat(a, b)
+  // (a > b) ? b - a : 0 -> -usub.sat(a, b)
+  Value *Result = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, B);
+  if (IsNegative)
+    Result = Builder.CreateNeg(Result);
+  return Result;
  }
  
  static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
@@ -689,15 +686,16 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
    if (Pred == ICmpInst::ICMP_ULT &&
        match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 &&
        match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) {
-    // Commute compare predicate and select operands:
-    // (X u< ~C) ? (X + C) : -1 --> (X u> ~C) ? -1 : (X + C)
-    Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, X, Cmp1);
-    return Builder.CreateSelect(NewCmp, FVal, TVal);
+    // (X u< ~C) ? (X + C) : -1 --> uadd.sat(X, C)
+    return Builder.CreateBinaryIntrinsic(
+        Intrinsic::uadd_sat, X, ConstantInt::get(X->getType(), *C));
    }
  
    // Match unsigned saturated add of 2 variables with an unnecessary 'not'.
    // There are 8 commuted variants.
-  // Canonicalize -1 (saturated result) to true value of the select.
+  // Canonicalize -1 (saturated result) to true value of the select. Just
+  // swapping the compare operands is legal, because the selected value is the
+  // same in case of equality, so we can interchange u< and u<=.
    if (match(FVal, m_AllOnes())) {
      std::swap(TVal, FVal);
      std::swap(Cmp0, Cmp1);
@@ -717,24 +715,19 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
    Value *Y;
    if (match(Cmp0, m_Not(m_Value(X))) &&
        match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) {
-    // Change the comparison to use the sum (false value of the select). That is
-    // a canonical pattern match form for uadd.with.overflow and eliminates a
-    // use of the 'not' op:
-    // (~X u< Y) ? -1 : (X + Y) --> ((X + Y) u< Y) ? -1 : (X + Y)
-    // (~X u< Y) ? -1 : (Y + X) --> ((Y + X) u< Y) ? -1 : (Y + X)
-    Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, FVal, Y);
-    return Builder.CreateSelect(NewCmp, TVal, FVal);
+    // (~X u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y)
+    // (~X u< Y) ? -1 : (Y + X) --> uadd.sat(X, Y)
+    return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, X, Y);
    }
    // The 'not' op may be included in the sum but not the compare.
    X = Cmp0;
    Y = Cmp1;
    if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) {
-    // Change the comparison to use the sum (false value of the select). That is
-    // a canonical pattern match form for uadd.with.overflow:
-    // (X u< Y) ? -1 : (~X + Y) --> ((~X + Y) u< Y) ? -1 : (~X + Y)
-    // (X u< Y) ? -1 : (Y + ~X) --> ((Y + ~X) u< Y) ? -1 : (Y + ~X)
-    Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, FVal, Y);
-    return Builder.CreateSelect(NewCmp, TVal, FVal);
+    // (X u< Y) ? -1 : (~X + Y) --> uadd.sat(~X, Y)
+    // (X u< Y) ? -1 : (Y + ~X) --> uadd.sat(Y, ~X)
+    BinaryOperator *BO = cast<BinaryOperator>(FVal);
+    return Builder.CreateBinaryIntrinsic(
+        Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1));
    }
  
    return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll

index 4d0e722..3ab5008 100644 (file)
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -893,10 +893,8 @@ define <2 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat3(<2 x i8> %a) {
  
  define i32 @uadd_sat(i32 %x, i32 %y) {
  ; CHECK-LABEL: @uadd_sat(
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
  ;
    %notx = xor i32 %x, -1
    %a = add i32 %y, %x
@@ -908,10 +906,8 @@ define i32 @uadd_sat(i32 %x, i32 %y) {
  define i32 @uadd_sat_commute_add(i32 %xp, i32 %y) {
  ; CHECK-LABEL: @uadd_sat_commute_add(
  ; CHECK-NEXT:    [[X:%.*]] = urem i32 42, [[XP:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
  ;
    %x = urem i32 42, %xp ; thwart complexity-based-canonicalization
    %notx = xor i32 %x, -1
@@ -924,10 +920,8 @@ define i32 @uadd_sat_commute_add(i32 %xp, i32 %y) {
  define i32 @uadd_sat_ugt(i32 %x, i32 %yp) {
  ; CHECK-LABEL: @uadd_sat_ugt(
  ; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[Y]], [[X:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
  ;
    %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
    %notx = xor i32 %x, -1
@@ -941,10 +935,8 @@ define <2 x i32> @uadd_sat_ugt_commute_add(<2 x i32> %xp, <2 x i32> %yp) {
  ; CHECK-LABEL: @uadd_sat_ugt_commute_add(
  ; CHECK-NEXT:    [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], <i32 2442, i32 4242>
  ; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> <i32 42, i32 43>, [[XP:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[X]], [[Y]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
-; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X]], <2 x i32> [[Y]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
  ;
    %y = sdiv <2 x i32> %yp, <i32 2442, i32 4242> ; thwart complexity-based-canonicalization
    %x = srem <2 x i32> <i32 42, i32 43>, %xp     ; thwart complexity-based-canonicalization
@@ -958,10 +950,8 @@ define <2 x i32> @uadd_sat_ugt_commute_add(<2 x i32> %xp, <2 x i32> %yp) {
  define i32 @uadd_sat_commute_select(i32 %x, i32 %yp) {
  ; CHECK-LABEL: @uadd_sat_commute_select(
  ; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[Y]], [[X:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
  ;
    %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
    %notx = xor i32 %x, -1
@@ -975,10 +965,8 @@ define i32 @uadd_sat_commute_select_commute_add(i32 %xp, i32 %yp) {
  ; CHECK-LABEL: @uadd_sat_commute_select_commute_add(
  ; CHECK-NEXT:    [[X:%.*]] = urem i32 42, [[XP:%.*]]
  ; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
-; CHECK-NEXT:    [[A:%.*]] = add nsw i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
  ;
    %x = urem i32 42, %xp ; thwart complexity-based-canonicalization
    %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
@@ -991,10 +979,8 @@ define i32 @uadd_sat_commute_select_commute_add(i32 %xp, i32 %yp) {
  
  define <2 x i32> @uadd_sat_commute_select_ugt(<2 x i32> %x, <2 x i32> %y) {
  ; CHECK-LABEL: @uadd_sat_commute_select_ugt(
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
-; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
  ;
    %notx = xor <2 x i32> %x, <i32 -1, i32 -1>
    %a = add <2 x i32> %y, %x
@@ -1006,10 +992,8 @@ define <2 x i32> @uadd_sat_commute_select_ugt(<2 x i32> %x, <2 x i32> %y) {
  define i32 @uadd_sat_commute_select_ugt_commute_add(i32 %xp, i32 %y) {
  ; CHECK-LABEL: @uadd_sat_commute_select_ugt_commute_add(
  ; CHECK-NEXT:    [[X:%.*]] = srem i32 42, [[XP:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
  ;
    %x = srem i32 42, %xp   ; thwart complexity-based-canonicalization
    %notx = xor i32 %x, -1
@@ -1054,10 +1038,8 @@ define i32 @not_uadd_sat2(i32 %x, i32 %y) {
  define i32 @uadd_sat_not(i32 %x, i32 %y) {
  ; CHECK-LABEL: @uadd_sat_not(
  ; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
  ;
    %notx = xor i32 %x, -1
    %a = add i32 %notx, %y
@@ -1071,10 +1053,8 @@ define i32 @uadd_sat_not_commute_add(i32 %xp, i32 %yp) {
  ; CHECK-NEXT:    [[X:%.*]] = srem i32 42, [[XP:%.*]]
  ; CHECK-NEXT:    [[Y:%.*]] = urem i32 42, [[YP:%.*]]
  ; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X]], -1
-; CHECK-NEXT:    [[A:%.*]] = add nsw i32 [[Y]], [[NOTX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y]], i32 [[NOTX]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
  ;
    %x = srem i32 42, %xp ; thwart complexity-based-canonicalization
    %y = urem i32 42, %yp ; thwart complexity-based-canonicalization
@@ -1088,10 +1068,8 @@ define i32 @uadd_sat_not_commute_add(i32 %xp, i32 %yp) {
  define i32 @uadd_sat_not_ugt(i32 %x, i32 %y) {
  ; CHECK-LABEL: @uadd_sat_not_ugt(
  ; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
  ;
    %notx = xor i32 %x, -1
    %a = add i32 %notx, %y
@@ -1104,10 +1082,8 @@ define <2 x i32> @uadd_sat_not_ugt_commute_add(<2 x i32> %x, <2 x i32> %yp) {
  ; CHECK-LABEL: @uadd_sat_not_ugt_commute_add(
  ; CHECK-NEXT:    [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], <i32 2442, i32 4242>
  ; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[Y]], [[NOTX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
-; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
  ;
    %y = sdiv <2 x i32> %yp, <i32 2442, i32 4242> ; thwart complexity-based-canonicalization
    %notx = xor <2 x i32> %x, <i32 -1, i32 -1>
@@ -1120,10 +1096,8 @@ define <2 x i32> @uadd_sat_not_ugt_commute_add(<2 x i32> %x, <2 x i32> %yp) {
  define i32 @uadd_sat_not_commute_select(i32 %x, i32 %y) {
  ; CHECK-LABEL: @uadd_sat_not_commute_select(
  ; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
  ;
    %notx = xor i32 %x, -1
    %a = add i32 %notx, %y
@@ -1136,10 +1110,8 @@ define i32 @uadd_sat_not_commute_select_commute_add(i32 %x, i32 %yp) {
  ; CHECK-LABEL: @uadd_sat_not_commute_select_commute_add(
  ; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 42, [[YP:%.*]]
  ; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[Y]], [[NOTX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y]], i32 [[NOTX]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
  ;
    %y = sdiv i32 42, %yp ; thwart complexity-based-canonicalization
    %notx = xor i32 %x, -1
@@ -1154,10 +1126,8 @@ define <2 x i32> @uadd_sat_not_commute_select_ugt(<2 x i32> %xp, <2 x i32> %yp)
  ; CHECK-NEXT:    [[X:%.*]] = urem <2 x i32> <i32 42, i32 -42>, [[XP:%.*]]
  ; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i32> <i32 12, i32 412>, [[YP:%.*]]
  ; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i32> [[X]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[Y]], [[NOTX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
-; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
  ;
    %x = urem <2 x i32> <i32 42, i32 -42>, %xp ; thwart complexity-based-canonicalization
    %y = srem <2 x i32> <i32 12, i32 412>, %yp ; thwart complexity-based-canonicalization
@@ -1171,10 +1141,8 @@ define <2 x i32> @uadd_sat_not_commute_select_ugt(<2 x i32> %xp, <2 x i32> %yp)
  define i32 @uadd_sat_not_commute_select_ugt_commute_add(i32 %x, i32 %y) {
  ; CHECK-LABEL: @uadd_sat_not_commute_select_ugt_commute_add(
  ; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
  ;
    %notx = xor i32 %x, -1
    %a = add i32 %notx, %y
@@ -1198,10 +1166,8 @@ define i32 @uadd_sat_constant(i32 %x) {
  
  define i32 @uadd_sat_constant_commute(i32 %x) {
  ; CHECK-LABEL: @uadd_sat_constant_commute(
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], 42
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[X]], -43
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 42)
+; CHECK-NEXT:    ret i32 [[TMP1]]
  ;
    %a = add i32 %x, 42
    %c = icmp ult i32 %x, -43
@@ -1224,10 +1190,8 @@ define <4 x i32> @uadd_sat_constant_vec(<4 x i32> %x) {
  
  define <4 x i32> @uadd_sat_constant_vec_commute(<4 x i32> %x) {
  ; CHECK-LABEL: @uadd_sat_constant_vec_commute(
-; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> [[X]], <i32 -43, i32 -43, i32 -43, i32 -43>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[A]]
-; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> <i32 42, i32 42, i32 42, i32 42>)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
  ;
    %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
    %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>
diff --git a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll

index 5e79fda..44aa7de 100644 (file)
--- a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll
+++ b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll
@@ -1,21 +1,17 @@
  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
  ; RUN: opt -instcombine -S < %s | FileCheck %s
  
-; Transforms for unsigned saturated subtraction idioms are tested here.
-; In all cases, we want to form a canonical min/max op (the compare and
-; select operands are the same), so that is recognized by the backend.
-; The backend recognition is tested in test/CodeGen/X86/psubus.ll.
+; Canonicalization of unsigned saturated subtraction idioms to
+; usub.sat() intrinsics is tested here.
  
  declare void @use(i64)
  
-; (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b)
+; (a > b) ? a - b : 0 -> usub.sat(a, b)
  
  define i64 @max_sub_ugt(i64 %a, i64 %b) {
  ; CHECK-LABEL: @max_sub_ugt(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP1]]
  ;
    %cmp = icmp ugt i64 %a, %b
    %sub = sub i64 %a, %b
@@ -23,14 +19,12 @@ define i64 @max_sub_ugt(i64 %a, i64 %b) {
    ret i64 %sel
  }
  
-; (a >= b) ? a - b : 0 -> ((a >= b) ? a : b) - b)
+; (a >= b) ? a - b : 0 -> usub.sat(a, b)
  
  define i64 @max_sub_uge(i64 %a, i64 %b) {
  ; CHECK-LABEL: @max_sub_uge(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP1]]
  ;
    %cmp = icmp uge i64 %a, %b
    %sub = sub i64 %a, %b
@@ -39,14 +33,12 @@ define i64 @max_sub_uge(i64 %a, i64 %b) {
  }
  
  ; Again, with vectors:
-; (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b)
+; (a > b) ? a - b : 0 -> usub.sat(a, b)
  
  define <4 x i32> @max_sub_ugt_vec(<4 x i32> %a, <4 x i32> %b) {
  ; CHECK-LABEL: @max_sub_ugt_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[B]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], [[B]]
-; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
  ;
    %cmp = icmp ugt <4 x i32> %a, %b
    %sub = sub <4 x i32> %a, %b
@@ -55,16 +47,14 @@ define <4 x i32> @max_sub_ugt_vec(<4 x i32> %a, <4 x i32> %b) {
  }
  
  ; Use extra ops to thwart icmp swapping canonicalization.
-; (b < a) ? a - b : 0 -> ((a > b) ? a : b) - b)
+; (b < a) ? a - b : 0 -> usub.sat(a, b)
  
  define i64 @max_sub_ult(i64 %a, i64 %b) {
  ; CHECK-LABEL: @max_sub_ult(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
  ; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]]
  ; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
  ;
    %cmp = icmp ult i64 %b, %a
    %sub = sub i64 %a, %b
@@ -74,16 +64,14 @@ define i64 @max_sub_ult(i64 %a, i64 %b) {
    ret i64 %sel
  }
  
-; (b > a) ? 0 : a - b -> ((a > b) ? a : b) - b)
+; (b > a) ? 0 : a - b -> usub.sat(a, b)
  
  define i64 @max_sub_ugt_sel_swapped(i64 %a, i64 %b) {
  ; CHECK-LABEL: @max_sub_ugt_sel_swapped(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
  ; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]]
  ; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
  ;
    %cmp = icmp ugt i64 %b, %a
    %sub = sub i64 %a, %b
@@ -93,14 +81,12 @@ define i64 @max_sub_ugt_sel_swapped(i64 %a, i64 %b) {
    ret i64 %sel
  }
  
-; (a < b) ? 0 : a - b -> ((a > b) ? a : b) - b)
+; (a < b) ? 0 : a - b -> usub.sat(a, b)
  
  define i64 @max_sub_ult_sel_swapped(i64 %a, i64 %b) {
  ; CHECK-LABEL: @max_sub_ult_sel_swapped(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP1]]
  ;
    %cmp = icmp ult i64 %a, %b
    %sub = sub i64 %a, %b
@@ -108,16 +94,15 @@ define i64 @max_sub_ult_sel_swapped(i64 %a, i64 %b) {
    ret i64 %sel
  }
  
-; ((a > b) ? b - a : 0) -> (b - ((a > b) ? a : b))
+; ((a > b) ? b - a : 0) -> -usub.sat(a, b)
  
  define i64 @neg_max_sub_ugt(i64 %a, i64 %b) {
  ; CHECK-LABEL: @neg_max_sub_ugt(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[TMP1]]
  ; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]]
  ; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
  ;
    %cmp = icmp ugt i64 %a, %b
    %sub = sub i64 %b, %a
@@ -127,14 +112,13 @@ define i64 @neg_max_sub_ugt(i64 %a, i64 %b) {
    ret i64 %sel
  }
  
-; ((b < a) ? b - a : 0) -> - ((a > b) ? a : b) - b)
+; ((b < a) ? b - a : 0) -> -usub.sat(a, b)
  
  define i64 @neg_max_sub_ult(i64 %a, i64 %b) {
  ; CHECK-LABEL: @neg_max_sub_ult(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
  ;
    %cmp = icmp ult i64 %b, %a
    %sub = sub i64 %b, %a
@@ -142,14 +126,13 @@ define i64 @neg_max_sub_ult(i64 %a, i64 %b) {
    ret i64 %sel
  }
  
-; ((b > a) ? 0 : b - a) -> - ((a > b) ? a : b) - b)
+; ((b > a) ? 0 : b - a) -> -usub.sat(a, b)
  
  define i64 @neg_max_sub_ugt_sel_swapped(i64 %a, i64 %b) {
  ; CHECK-LABEL: @neg_max_sub_ugt_sel_swapped(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
  ;
    %cmp = icmp ugt i64 %b, %a
    %sub = sub i64 %b, %a
@@ -157,16 +140,15 @@ define i64 @neg_max_sub_ugt_sel_swapped(i64 %a, i64 %b) {
    ret i64 %sel
  }
  
-; ((a < b) ? 0 : b - a) -> - ((a > b) ? a : b) - b)
+; ((a < b) ? 0 : b - a) -> -usub.sat(a, b)
  
  define i64 @neg_max_sub_ult_sel_swapped(i64 %a, i64 %b) {
  ; CHECK-LABEL: @neg_max_sub_ult_sel_swapped(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[TMP1]]
  ; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]]
  ; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
  ;
    %cmp = icmp ult i64 %a, %b
    %sub = sub i64 %b, %a
author	Nikita Popov <nikita.ppv@gmail.com>
	Wed, 27 Mar 2019 17:56:15 +0000 (17:56 +0000)
committer	Nikita Popov <nikita.ppv@gmail.com>
	Wed, 27 Mar 2019 17:56:15 +0000 (17:56 +0000)
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp		patch \| blob \| history
llvm/test/Transforms/InstCombine/saturating-add-sub.ll		patch \| blob \| history
llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll		patch \| blob \| history