From fabaca10b86f77f7d2d34db91fa6b284da924395 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 28 Jan 2022 12:10:26 +0100 Subject: [PATCH] Revert "[InstCombine] Fold and-reduce idiom" It causes builds to fail with llvm/include/llvm/Support/Casting.h:269: typename llvm::cast_retty::ret_type llvm::cast(Y*) [with X = llvm::IntegerType; Y = const llvm::Type; typename llvm::cast_retty::ret_type = const llvm::IntegerType*]: Assertion `isa(Val) && "cast() argument of incompatible type!"' failed. See the code review for link to a reproducer. > This patch introduces folding of and-reduce idiom and generates code > that is easier to read and which is lest costly in terms of icmp operations. > The folding is > ``` > icmp eq (bitcast(icmp ne (lhs, rhs)), 0) > ``` > into > ``` > icmp eq(bitcast(lhs), bitcast(rhs)) > ``` > > See PR53419. > > Differential Revision: https://reviews.llvm.org/D118317 > Reviewed By: lebedev.ri, spatel This reverts commit 8599bb0f26738ed88aae62aba57d82f7cf326cf9. This also revertes the dependent change: "[Test] Add 'ne' tests for and-reduce pattern folding" This reverts commit a4aaa5995308ac2ba1bf180c9ce9c321cdb9f28a. --- .../Transforms/InstCombine/InstCombineCompares.cpp | 51 ------------------- llvm/test/Transforms/InstCombine/icmp-vec.ll | 6 +-- .../InstCombine/reduction-and-sext-zext-i1.ll | 58 +++------------------- .../InstCombine/reduction-or-sext-zext-i1.ll | 57 +++------------------ 4 files changed, 18 insertions(+), 154 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index b9aff7c..fd58a44 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5882,54 +5882,6 @@ static Instruction *foldICmpInvariantGroup(ICmpInst &I) { return nullptr; } -/// This function folds patterns produced by lowering of reduce idioms, such as -/// llvm.vector.reduce.and which are lowered into instruction chains. This code -/// attempts to generate fewer number of scalar comparisons instead of vector -/// comparisons when possible. -static Instruction *foldReductionIdiom(ICmpInst &I, - InstCombiner::BuilderTy &Builder, - const DataLayout &DL) { - if (I.getType()->isVectorTy()) - return nullptr; - ICmpInst::Predicate OuterPred, InnerPred; - Value *LHS, *RHS; - - // Match lowering of @llvm.vector.reduce.and. Turn - /// %vec_ne = icmp ne <8 x i8> %lhs, %rhs - /// %scalar_ne = bitcast <8 x i1> %vec_ne to i8 - /// %all_eq = icmp eq i8 %scalar_ne, 0 - /// - /// into - /// - /// %lhs.scalar = bitcast <8 x i8> %lhs to i64 - /// %rhs.scalar = bitcast <8 x i8> %rhs to i64 - /// %all_eq = icmp eq i64 %lhs.scalar, %rhs.scalar - if (!match(&I, m_ICmp(OuterPred, - m_OneUse(m_BitCast(m_OneUse( - m_ICmp(InnerPred, m_Value(LHS), m_Value(RHS))))), - m_Zero()))) - return nullptr; - auto *LHSTy = dyn_cast(LHS->getType()); - if (!LHSTy) - return nullptr; - unsigned NumBits = - LHSTy->getNumElements() * LHSTy->getElementType()->getIntegerBitWidth(); - // TODO: Relax this to "not wider than max legal integer type"? - if (!DL.isLegalInteger(NumBits)) - return nullptr; - - // TODO: Generalize to isEquality and support other patterns. - if (OuterPred == ICmpInst::ICMP_EQ && InnerPred == ICmpInst::ICMP_NE) { - auto *ScalarTy = Builder.getIntNTy(NumBits); - LHS = Builder.CreateBitCast(LHS, ScalarTy, LHS->getName() + ".scalar"); - RHS = Builder.CreateBitCast(RHS, ScalarTy, RHS->getName() + ".scalar"); - return ICmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_EQ, LHS, RHS, - I.getName()); - } - - return nullptr; -} - Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { bool Changed = false; const SimplifyQuery Q = SQ.getWithInstruction(&I); @@ -6172,9 +6124,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) { if (Instruction *Res = foldICmpInvariantGroup(I)) return Res; - if (Instruction *Res = foldReductionIdiom(I, Builder, DL)) - return Res; - return Changed ? &I : nullptr; } diff --git a/llvm/test/Transforms/InstCombine/icmp-vec.ll b/llvm/test/Transforms/InstCombine/icmp-vec.ll index 39067e6..8c90fd3 100644 --- a/llvm/test/Transforms/InstCombine/icmp-vec.ll +++ b/llvm/test/Transforms/InstCombine/icmp-vec.ll @@ -404,9 +404,9 @@ define @icmp_logical_or_scalablevec( %x, %x, <2 x i4> %y) { ; CHECK-LABEL: @eq_cast_eq-1( -; CHECK-NEXT: [[X_SCALAR:%.*]] = bitcast <2 x i4> [[X:%.*]] to i8 -; CHECK-NEXT: [[Y_SCALAR:%.*]] = bitcast <2 x i4> [[Y:%.*]] to i8 -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[X_SCALAR]], [[Y_SCALAR]] +; CHECK-NEXT: [[IC:%.*]] = icmp ne <2 x i4> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i1> [[IC]] to i2 +; CHECK-NEXT: [[R:%.*]] = icmp eq i2 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %ic = icmp eq <2 x i4> %x, %y diff --git a/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll b/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll index a38d26b..40af570 100644 --- a/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll +++ b/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll @@ -100,12 +100,14 @@ define i64 @reduce_and_zext_external_use(<8 x i1> %x) { define i1 @reduce_and_pointer_cast(i8* %arg, i8* %arg1) { ; CHECK-LABEL: @reduce_and_pointer_cast( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[ARG1:%.*]] to i64* -; CHECK-NEXT: [[LHS1:%.*]] = load i64, i64* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[ARG:%.*]] to i64* -; CHECK-NEXT: [[RHS2:%.*]] = load i64, i64* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[LHS1]], [[RHS2]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[PTR1:%.*]] = bitcast i8* [[ARG1:%.*]] to <8 x i8>* +; CHECK-NEXT: [[PTR2:%.*]] = bitcast i8* [[ARG:%.*]] to <8 x i8>* +; CHECK-NEXT: [[LHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR1]], align 8 +; CHECK-NEXT: [[RHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR2]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i8> [[LHS]], [[RHS]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i1> [[CMP]] to i8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: ret i1 [[TMP1]] ; bb: %ptr1 = bitcast i8* %arg1 to <8 x i8>* @@ -139,50 +141,6 @@ bb: ret i1 %all_eq } -define i1 @reduce_and_pointer_cast_ne(i8* %arg, i8* %arg1) { -; CHECK-LABEL: @reduce_and_pointer_cast_ne( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[ARG1:%.*]] to i64* -; CHECK-NEXT: [[LHS1:%.*]] = load i64, i64* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[ARG:%.*]] to i64* -; CHECK-NEXT: [[RHS2:%.*]] = load i64, i64* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[LHS1]], [[RHS2]] -; CHECK-NEXT: ret i1 [[TMP2]] -; -bb: - %ptr1 = bitcast i8* %arg1 to <8 x i8>* - %ptr2 = bitcast i8* %arg to <8 x i8>* - %lhs = load <8 x i8>, <8 x i8>* %ptr1 - %rhs = load <8 x i8>, <8 x i8>* %ptr2 - %cmp = icmp eq <8 x i8> %lhs, %rhs - %all_eq = call i1 @llvm.vector.reduce.and.v8i32(<8 x i1> %cmp) - %any_ne = xor i1 %all_eq, 1 - ret i1 %any_ne -} - -define i1 @reduce_and_pointer_cast_ne_wide(i8* %arg, i8* %arg1) { -; CHECK-LABEL: @reduce_and_pointer_cast_ne_wide( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[PTR1:%.*]] = bitcast i8* [[ARG1:%.*]] to <8 x i16>* -; CHECK-NEXT: [[PTR2:%.*]] = bitcast i8* [[ARG:%.*]] to <8 x i16>* -; CHECK-NEXT: [[LHS:%.*]] = load <8 x i16>, <8 x i16>* [[PTR1]], align 16 -; CHECK-NEXT: [[RHS:%.*]] = load <8 x i16>, <8 x i16>* [[PTR2]], align 16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i16> [[LHS]], [[RHS]] -; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i1> [[CMP]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[TMP0]], 0 -; CHECK-NEXT: ret i1 [[TMP1]] -; -bb: - %ptr1 = bitcast i8* %arg1 to <8 x i16>* - %ptr2 = bitcast i8* %arg to <8 x i16>* - %lhs = load <8 x i16>, <8 x i16>* %ptr1 - %rhs = load <8 x i16>, <8 x i16>* %ptr2 - %cmp = icmp eq <8 x i16> %lhs, %rhs - %all_eq = call i1 @llvm.vector.reduce.and.v8i32(<8 x i1> %cmp) - %any_ne = xor i1 %all_eq, 1 - ret i1 %any_ne -} - declare i1 @llvm.vector.reduce.and.v8i32(<8 x i1> %a) declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a) declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %a) diff --git a/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll b/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll index 35174d4..6f5b34e 100644 --- a/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll +++ b/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll @@ -100,11 +100,13 @@ define i64 @reduce_or_zext_external_use(<8 x i1> %x) { define i1 @reduce_or_pointer_cast(i8* %arg, i8* %arg1) { ; CHECK-LABEL: @reduce_or_pointer_cast( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[ARG1:%.*]] to i64* -; CHECK-NEXT: [[LHS1:%.*]] = load i64, i64* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[ARG:%.*]] to i64* -; CHECK-NEXT: [[RHS2:%.*]] = load i64, i64* [[TMP1]], align 8 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[LHS1]], [[RHS2]] +; CHECK-NEXT: [[PTR1:%.*]] = bitcast i8* [[ARG1:%.*]] to <8 x i8>* +; CHECK-NEXT: [[PTR2:%.*]] = bitcast i8* [[ARG:%.*]] to <8 x i8>* +; CHECK-NEXT: [[LHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR1]], align 8 +; CHECK-NEXT: [[RHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR2]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i8> [[LHS]], [[RHS]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i1> [[CMP]] to i8 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP0]], 0 ; CHECK-NEXT: ret i1 [[DOTNOT]] ; bb: @@ -141,51 +143,6 @@ bb: ret i1 %all_eq } - -define i1 @reduce_or_pointer_cast_ne(i8* %arg, i8* %arg1) { -; CHECK-LABEL: @reduce_or_pointer_cast_ne( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[PTR1:%.*]] = bitcast i8* [[ARG1:%.*]] to <8 x i8>* -; CHECK-NEXT: [[PTR2:%.*]] = bitcast i8* [[ARG:%.*]] to <8 x i8>* -; CHECK-NEXT: [[LHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR1]], align 8 -; CHECK-NEXT: [[RHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR2]], align 8 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i8> [[LHS]], [[RHS]] -; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i1> [[CMP]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[TMP0]], 0 -; CHECK-NEXT: ret i1 [[TMP1]] -; -bb: - %ptr1 = bitcast i8* %arg1 to <8 x i8>* - %ptr2 = bitcast i8* %arg to <8 x i8>* - %lhs = load <8 x i8>, <8 x i8>* %ptr1 - %rhs = load <8 x i8>, <8 x i8>* %ptr2 - %cmp = icmp ne <8 x i8> %lhs, %rhs - %any_ne = call i1 @llvm.vector.reduce.or.v8i32(<8 x i1> %cmp) - ret i1 %any_ne -} - -define i1 @reduce_or_pointer_cast_ne_wide(i8* %arg, i8* %arg1) { -; CHECK-LABEL: @reduce_or_pointer_cast_ne_wide( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[PTR1:%.*]] = bitcast i8* [[ARG1:%.*]] to <8 x i16>* -; CHECK-NEXT: [[PTR2:%.*]] = bitcast i8* [[ARG:%.*]] to <8 x i16>* -; CHECK-NEXT: [[LHS:%.*]] = load <8 x i16>, <8 x i16>* [[PTR1]], align 16 -; CHECK-NEXT: [[RHS:%.*]] = load <8 x i16>, <8 x i16>* [[PTR2]], align 16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i16> [[LHS]], [[RHS]] -; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i1> [[CMP]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[TMP0]], 0 -; CHECK-NEXT: ret i1 [[TMP1]] -; -bb: - %ptr1 = bitcast i8* %arg1 to <8 x i16>* - %ptr2 = bitcast i8* %arg to <8 x i16>* - %lhs = load <8 x i16>, <8 x i16>* %ptr1 - %rhs = load <8 x i16>, <8 x i16>* %ptr2 - %cmp = icmp ne <8 x i16> %lhs, %rhs - %any_ne = call i1 @llvm.vector.reduce.or.v8i32(<8 x i1> %cmp) - ret i1 %any_ne -} - declare i1 @llvm.vector.reduce.or.v8i32(<8 x i1> %a) declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a) declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %a) -- 2.7.4