[InstCombine] Drop nuw flag when CtlzOp is a sub nuw (#91776)

author Yingwei Zheng <dtcxzyw2333@gmail.com>

Mon, 13 May 2024 06:27:59 +0000 (14:27 +0800)

committer Tom Stellard <tstellar@redhat.com>

Thu, 16 May 2024 18:42:50 +0000 (11:42 -0700)
author Yingwei Zheng <dtcxzyw2333@gmail.com>
Mon, 13 May 2024 06:27:59 +0000 (14:27 +0800)
committer Tom Stellard <tstellar@redhat.com>
Thu, 16 May 2024 18:42:50 +0000 (11:42 -0700)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

index 8cc7901cbac7fa576cac96af163634524e0f44d9..86a39cf2ee93fc056c8e2074d595e5bf092c5145 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3201,7 +3201,8 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
  // pattern.
  static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
                                          const APInt *Cond1, Value *CtlzOp,
-                                        unsigned BitWidth) {
+                                        unsigned BitWidth,
+                                        bool &ShouldDropNUW) {
    // The challenge in recognizing std::bit_ceil(X) is that the operand is used
    // for the CTLZ proper and select condition, each possibly with some
    // operation like add and sub.
@@ -3224,6 +3225,8 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
    ConstantRange CR = ConstantRange::makeExactICmpRegion(
        CmpInst::getInversePredicate(Pred), *Cond1);
  
+  ShouldDropNUW = false;
+
    // Match the operation that's used to compute CtlzOp from CommonAncestor.  If
    // CtlzOp == CommonAncestor, return true as no operation is needed.  If a
    // match is found, execute the operation on CR, update CR, and return true.
@@ -3237,6 +3240,7 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
        return true;
      }
      if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) {
+      ShouldDropNUW = true;
        CR = ConstantRange(*C).sub(CR);
        return true;
      }
@@ -3306,14 +3310,20 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) {
      Pred = CmpInst::getInversePredicate(Pred);
    }
  
+  bool ShouldDropNUW;
+
    if (!match(FalseVal, m_One()) ||
        !match(TrueVal,
               m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth),
                                                      m_Value(Ctlz)))))) ||
        !match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Zero())) ||
-      !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth))
+      !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth,
+                                   ShouldDropNUW))
      return nullptr;
  
+  if (ShouldDropNUW)
+    cast<Instruction>(CtlzOp)->setHasNoUnsignedWrap(false);
+
    // Build 1 << (-CTLZ & (BitWidth-1)).  The negation likely corresponds to a
    // single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth
    // is an integer constant.  Masking with BitWidth-1 comes free on some
diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll

index 52e70c78ba54289f212a04509449c6f3400958f0..63a5ae012eeb681a367292a196a0a5ff9f02ee89 100644 (file)
--- a/llvm/test/Transforms/InstCombine/bit_ceil.ll
+++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll
@@ -284,6 +284,42 @@ define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) {
    ret <4 x i32> %sel
  }
  
+define i32 @pr91691(i32 %0) {
+; CHECK-LABEL: @pr91691(
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 -2, [[TMP0:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
+; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 31
+; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw i32 1, [[TMP5]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %2 = sub nuw i32 -2, %0
+  %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false)
+  %4 = sub i32 32, %3
+  %5 = shl i32 1, %4
+  %6 = icmp ult i32 %0, -2
+  %7 = select i1 %6, i32 %5, i32 1
+  ret i32 %7
+}
+
+define i32 @pr91691_keep_nsw(i32 %0) {
+; CHECK-LABEL: @pr91691_keep_nsw(
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i32 -2, [[TMP0:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
+; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[TMP4]], 31
+; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw i32 1, [[TMP5]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %2 = sub nsw i32 -2, %0
+  %3 = tail call i32 @llvm.ctlz.i32(i32 %2, i1 false)
+  %4 = sub i32 32, %3
+  %5 = shl i32 1, %4
+  %6 = icmp ult i32 %0, -2
+  %7 = select i1 %6, i32 %5, i32 1
+  ret i32 %7
+}
+
  declare i32 @llvm.ctlz.i32(i32, i1 immarg)
  declare i64 @llvm.ctlz.i64(i64, i1 immarg)
  declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
author	Yingwei Zheng <dtcxzyw2333@gmail.com>
	Mon, 13 May 2024 06:27:59 +0000 (14:27 +0800)
committer	Tom Stellard <tstellar@redhat.com>
	Thu, 16 May 2024 18:42:50 +0000 (11:42 -0700)
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp		patch \| blob \| history
llvm/test/Transforms/InstCombine/bit_ceil.ll		patch \| blob \| history