[InstCombine] canonicalize 'icmp (trunc X), C' to 'icmp (X & Mask), C'

author Sanjay Patel <spatel@rotateright.com>

Thu, 30 Jun 2022 19:00:12 +0000 (15:00 -0400)

committer Sanjay Patel <spatel@rotateright.com>

Thu, 30 Jun 2022 19:51:39 +0000 (15:51 -0400)
author Sanjay Patel <spatel@rotateright.com>
Thu, 30 Jun 2022 19:00:12 +0000 (15:00 -0400)
committer Sanjay Patel <spatel@rotateright.com>
Thu, 30 Jun 2022 19:51:39 +0000 (15:51 -0400)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

index ebe1676..2e97df7 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1534,7 +1534,7 @@ Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
    return nullptr;
  }
  
-/// Fold icmp (trunc X, Y), C.
+/// Fold icmp (trunc X), C.
  Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
                                                       TruncInst *Trunc,
                                                       const APInt &C) {
@@ -1551,6 +1551,14 @@ Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
    unsigned DstBits = Trunc->getType()->getScalarSizeInBits(),
             SrcBits = X->getType()->getScalarSizeInBits();
    if (Cmp.isEquality() && Trunc->hasOneUse()) {
+    if (!X->getType()->isVectorTy() && shouldChangeType(DstBits, SrcBits)) {
+      Constant *Mask = ConstantInt::get(X->getType(),
+                                        APInt::getLowBitsSet(SrcBits, DstBits));
+      Value *And = Builder.CreateAnd(X, Mask);
+      Constant *WideC = ConstantInt::get(X->getType(), C.zext(SrcBits));
+      return new ICmpInst(Pred, And, WideC);
+    }
+
      // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
      // of the high bits truncated out of x are known.
      KnownBits Known = computeKnownBits(X, 0, &Cmp);
diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll

index 54d3561..6230703 100644 (file)
--- a/llvm/test/Transforms/InstCombine/icmp-gep.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll
@@ -214,8 +214,8 @@ define i1 @PR8882(i64 %i) {
  
  define i1 @test24_as1(i64 %i) {
  ; CHECK-LABEL: @test24_as1(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i16
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[TMP1]], 1000
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[I:%.*]], 65535
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[TMP1]], 1000
  ; CHECK-NEXT:    ret i1 [[CMP]]
  ;
    %p1 = getelementptr inbounds i32, i32 addrspace(1)* getelementptr inbounds ([1000 x i32], [1000 x i32] addrspace(1)* @X_as1, i64 0, i64 0), i64 %i
diff --git a/llvm/test/Transforms/InstCombine/icmp-trunc.ll b/llvm/test/Transforms/InstCombine/icmp-trunc.ll

index 5b38b94..d1c981e 100644 (file)
--- a/llvm/test/Transforms/InstCombine/icmp-trunc.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-trunc.ll
@@ -313,10 +313,15 @@ define i1 @sgt_n1_use(i32 %x) {
  }
  
  define i1 @trunc_eq_i32_i8(i32 %x) {
-; CHECK-LABEL: @trunc_eq_i32_i8(
-; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[T]], 42
-; CHECK-NEXT:    ret i1 [[R]]
+; DL64-LABEL: @trunc_eq_i32_i8(
+; DL64-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 255
+; DL64-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 42
+; DL64-NEXT:    ret i1 [[R]]
+;
+; DL8-LABEL: @trunc_eq_i32_i8(
+; DL8-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i8
+; DL8-NEXT:    [[R:%.*]] = icmp eq i8 [[T]], 42
+; DL8-NEXT:    ret i1 [[R]]
  ;
    %t = trunc i32 %x to i8
    %r = icmp eq i8 %t, 42
@@ -335,10 +340,15 @@ define <2 x i1> @trunc_eq_v2i32_v2i8(<2 x i32> %x) {
  }
  
  define i1 @trunc_ne_i64_i10(i64 %x) {
-; CHECK-LABEL: @trunc_ne_i64_i10(
-; CHECK-NEXT:    [[T:%.*]] = trunc i64 [[X:%.*]] to i10
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i10 [[T]], 42
-; CHECK-NEXT:    ret i1 [[R]]
+; DL64-LABEL: @trunc_ne_i64_i10(
+; DL64-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 1023
+; DL64-NEXT:    [[R:%.*]] = icmp eq i64 [[TMP1]], 42
+; DL64-NEXT:    ret i1 [[R]]
+;
+; DL8-LABEL: @trunc_ne_i64_i10(
+; DL8-NEXT:    [[T:%.*]] = trunc i64 [[X:%.*]] to i10
+; DL8-NEXT:    [[R:%.*]] = icmp eq i10 [[T]], 42
+; DL8-NEXT:    ret i1 [[R]]
  ;
    %t = trunc i64 %x to i10
    %r = icmp eq i10 %t, 42
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll

index 5f584a5..757fe2e 100644 (file)
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -284,8 +284,8 @@ define i1 @test10_struct_arr_i16(i16 %x) {
  
  define i1 @test10_struct_arr_i64(i64 %x) {
  ; CHECK-LABEL: @test10_struct_arr_i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967295
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i64 [[TMP1]], 1
  ; CHECK-NEXT:    ret i1 [[R]]
  ;
    %p = getelementptr inbounds [4 x %Foo], [4 x %Foo]* @GStructArr, i64 0, i64 %x, i32 2
diff --git a/llvm/test/Transforms/InstCombine/memchr.ll b/llvm/test/Transforms/InstCombine/memchr.ll

index 0278631..429658d 100644 (file)
--- a/llvm/test/Transforms/InstCombine/memchr.ll
+++ b/llvm/test/Transforms/InstCombine/memchr.ll
@@ -172,8 +172,8 @@ define i1 @test13(i32 %C) {
  
  define i1 @test14(i32 %C) {
  ; CHECK-LABEL: @test14(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8
-; CHECK-NEXT:    [[MEMCHR_CHAR0CMP:%.*]] = icmp eq i8 [[TMP1]], 31
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[C:%.*]], 255
+; CHECK-NEXT:    [[MEMCHR_CHAR0CMP:%.*]] = icmp eq i32 [[TMP1]], 31
  ; CHECK-NEXT:    ret i1 [[MEMCHR_CHAR0CMP]]
  ;
    %dst = call i8* @memchr(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @single, i64 0, i64 0), i32 %C, i32 1)
diff --git a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll

index 9e0ab52..7d014e1 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll
@@ -30,8 +30,8 @@ define void @loop(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
  ; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT]]
  ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4
  ; CHECK-NEXT:    store i32 [[TMP2]], i32* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 512
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP3]], 512
  ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
  ; CHECK:       for.end:
  ; CHECK-NEXT:    ret void
@@ -166,8 +166,8 @@ define void @mixed_metadata(i32* nocapture %a, i32* nocapture %b) nounwind uwtab
  ; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV_NEXT]]
  ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !7
  ; CHECK-NEXT:    store i32 [[TMP2]], i32* [[ARRAYIDX2]], align 4, !llvm.access.group !7
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 512
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP3]], 512
  ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
  ; CHECK:       for.end:
  ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll

index cc3d6b1..071ea8f 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -58,21 +58,21 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
  ; CHECK:       vector.body:
  ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
  ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
  ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
  ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
  ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
  ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
  ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND1]])
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]])
  ; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], [[VEC_PHI]]
  ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
  ; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD3]])
+; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]])
  ; CHECK-NEXT:    [[TMP9]] = add i32 [[TMP8]], [[TMP7]]
  ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
  ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
  ; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
  ; CHECK:       middle.block:
@@ -166,21 +166,21 @@ define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B)
  ; CHECK:       vector.body:
  ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
  ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
  ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
  ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
  ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
  ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
  ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_IND1]])
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_IND]])
  ; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], [[VEC_PHI]]
  ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]])
  ; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD3]])
+; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD1]])
  ; CHECK-NEXT:    [[TMP9]] = mul i32 [[TMP8]], [[TMP7]]
  ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
  ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
  ; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
  ; CHECK:       middle.block:
@@ -226,20 +226,20 @@ define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B)
  ; CHECK:       vector.body:
  ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
  ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
  ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
  ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
  ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
  ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
  ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND1]])
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]])
  ; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], [[VEC_PHI]]
  ; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
  ; CHECK-NEXT:    [[TMP8]] = add i32 [[TMP7]], [[TMP6]]
  ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
  ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
  ; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
  ; CHECK:       middle.block:
@@ -898,8 +898,8 @@ define i32 @reduction_sum_multiuse(i32* noalias nocapture %A, i32* noalias nocap
  ; CHECK-NEXT:    [[L8:%.*]] = add i32 [[L7]], [[L3]]
  ; CHECK-NEXT:    [[L10]] = add i32 [[L8]], [[SUM_02]]
  ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256
+; CHECK-NEXT:    [[TMP0:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP0]], 256
  ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[DOTLR_PH]]
  ; CHECK:       end:
  ; CHECK-NEXT:    ret i32 [[L10]]
@@ -939,21 +939,21 @@ define i32 @reduction_predicated(i32* noalias nocapture %A, i32* noalias nocaptu
  ; CHECK:       vector.body:
  ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
  ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
  ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
  ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
  ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
  ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
  ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND1]])
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]])
  ; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], [[VEC_PHI]]
  ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
  ; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD3]])
+; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]])
  ; CHECK-NEXT:    [[TMP9]] = add i32 [[TMP8]], [[TMP7]]
  ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
  ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
  ; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
  ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll b/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll

index ee64afd..b1c07a3 100644 (file)
--- a/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll
+++ b/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll
@@ -124,18 +124,15 @@ define i32 @PR56119(i32 %e.coerce) {
  ; OZ-LABEL: @PR56119(
  ; OZ-NEXT:  entry:
  ; OZ-NEXT:    [[E_COERCE_FR:%.*]] = freeze i32 [[E_COERCE:%.*]]
-; OZ-NEXT:    [[REM_LHS_TRUNC:%.*]] = trunc i32 [[E_COERCE_FR]] to i8
-; OZ-NEXT:    [[DOTNOT:%.*]] = icmp ne i8 [[REM_LHS_TRUNC]], -1
-; OZ-NEXT:    [[E_COERCE_FR_OP:%.*]] = and i32 [[E_COERCE_FR]], 255
-; OZ-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[E_COERCE_FR_OP]], 7
-; OZ-NEXT:    [[CMP:%.*]] = and i1 [[DOTNOT]], [[CMP2]]
-; OZ-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; OZ-NEXT:    [[TMP0:%.*]] = and i32 [[E_COERCE_FR]], 255
+; OZ-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[TMP0]], 7
+; OZ-NEXT:    br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
  ; OZ:       if.then:
  ; OZ-NEXT:    tail call void (...) @foo()
  ; OZ-NEXT:    br label [[IF_END]]
  ; OZ:       if.end:
-; OZ-NEXT:    [[TMP0:%.*]] = load i32, ptr @c, align 4
-; OZ-NEXT:    ret i32 [[TMP0]]
+; OZ-NEXT:    [[TMP1:%.*]] = load i32, ptr @c, align 4
+; OZ-NEXT:    ret i32 [[TMP1]]
  ;
  entry:
    %e = alloca %struct.a, align 4
author	Sanjay Patel <spatel@rotateright.com>
	Thu, 30 Jun 2022 19:00:12 +0000 (15:00 -0400)
committer	Sanjay Patel <spatel@rotateright.com>
	Thu, 30 Jun 2022 19:51:39 +0000 (15:51 -0400)
llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp		patch \| blob \| history
llvm/test/Transforms/InstCombine/icmp-gep.ll		patch \| blob \| history
llvm/test/Transforms/InstCombine/icmp-trunc.ll		patch \| blob \| history
llvm/test/Transforms/InstCombine/load-cmp.ll		patch \| blob \| history
llvm/test/Transforms/InstCombine/memchr.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/reduction-inloop.ll		patch \| blob \| history
llvm/test/Transforms/PhaseOrdering/cmp-logic.ll		patch \| blob \| history