From 25ee55c0baff316d3a7b1d7d2830a168af3fc46a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 14 Jul 2021 09:02:31 -0400 Subject: [PATCH] [SLP] match logical and/or as reduction candidates This has been a work-in-progress for a long time...we finally have all of the pieces in place to handle vectorization of compare code as shown in: https://llvm.org/PR41312 To do this (see PhaseOrdering tests), we converted SimplifyCFG and InstCombine to the poison-safe (select) forms of the logic ops, so now we need to have SLP recognize those patterns and insert a freeze op to make a safe reduction: https://alive2.llvm.org/ce/z/NH54Ah We get the minimal patterns with this patch, but the PhaseOrdering tests show that we still need adjustments to get the ideal IR in some or all of the motivating cases. Differential Revision: https://reviews.llvm.org/D105730 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 36 ++- .../PhaseOrdering/X86/vector-reductions-logical.ll | 281 +++++++++------------ .../SLPVectorizer/X86/reduction-logical.ll | 118 +++------ 3 files changed, 183 insertions(+), 252 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c7f36b2..3dd3c1f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7295,11 +7295,22 @@ class HorizontalReduction { RecurrenceDescriptor::isMinMaxRecurrenceKind(getRdxKind(I)); } + // And/or are potentially poison-safe logical patterns like: + // select x, y, false + // select x, true, y + static bool isBoolLogicOp(Instruction *I) { + return match(I, m_LogicalAnd(m_Value(), m_Value())) || + match(I, m_LogicalOr(m_Value(), m_Value())); + } + /// Checks if instruction is associative and can be vectorized. static bool isVectorizable(RecurKind Kind, Instruction *I) { if (Kind == RecurKind::None) return false; - if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind)) + + // Integer ops that map to select instructions or intrinsics are fine. + if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind) || + isBoolLogicOp(I)) return true; if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) { @@ -7312,6 +7323,16 @@ class HorizontalReduction { return I->isAssociative(); } + static Value *getRdxOperand(Instruction *I, unsigned Index) { + // Poison-safe 'or' takes the form: select X, true, Y + // To make that work with the normal operand processing, we skip the + // true value operand. + // TODO: Change the code and data structures to handle this without a hack. + if (getRdxKind(I) == RecurKind::Or && isa(I) && Index == 1) + return I->getOperand(2); + return I->getOperand(Index); + } + /// Checks if the ParentStackElem.first should be marked as a reduction /// operation with an extra argument or as extra argument itself. void markExtraArg(std::pair &ParentStackElem, @@ -7420,9 +7441,11 @@ class HorizontalReduction { return RecurKind::Add; if (match(I, m_Mul(m_Value(), m_Value()))) return RecurKind::Mul; - if (match(I, m_And(m_Value(), m_Value()))) + if (match(I, m_And(m_Value(), m_Value())) || + match(I, m_LogicalAnd(m_Value(), m_Value()))) return RecurKind::And; - if (match(I, m_Or(m_Value(), m_Value()))) + if (match(I, m_Or(m_Value(), m_Value())) || + match(I, m_LogicalOr(m_Value(), m_Value()))) return RecurKind::Or; if (match(I, m_Xor(m_Value(), m_Value()))) return RecurKind::Xor; @@ -7664,7 +7687,7 @@ public: } // Visit operands. - Value *EdgeVal = TreeN->getOperand(EdgeToVisit); + Value *EdgeVal = getRdxOperand(TreeN, EdgeToVisit); auto *EdgeInst = dyn_cast(EdgeVal); if (!EdgeInst) { // Edge value is not a reduction instruction or a leaf instruction. @@ -7849,6 +7872,11 @@ public: else Builder.SetInsertPoint(RdxRootInst); + // To prevent poison from leaking across what used to be sequential, safe, + // scalar boolean logic operations, the reduction operand must be frozen. + if (isa(RdxRootInst) && isBoolLogicOp(RdxRootInst)) + VectorizedRoot = Builder.CreateFreeze(VectorizedRoot); + Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI); diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll index ea8a6c0..479ad43 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll @@ -7,31 +7,23 @@ target triple = "x86_64--" define float @test_merge_allof_v4sf(<4 x float> %t) { ; CHECK-LABEL: @test_merge_allof_v4sf( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[T:%.*]], i32 0 -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[VECEXT]], 0.000000e+00 -; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[T]], i32 1 -; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt float [[VECEXT2]], 0.000000e+00 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP4]], i1 false -; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x float> [[T]], i32 2 -; CHECK-NEXT: [[CMP9:%.*]] = fcmp olt float [[VECEXT7]], 0.000000e+00 -; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 [[CMP9]], i1 false -; CHECK-NEXT: [[VECEXT12:%.*]] = extractelement <4 x float> [[T]], i32 3 -; CHECK-NEXT: [[CMP14:%.*]] = fcmp olt float [[VECEXT12]], 0.000000e+00 -; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 [[CMP14]], i1 false -; CHECK-NEXT: br i1 [[OR_COND2]], label [[COMMON_RET:%.*]], label [[LOR_LHS_FALSE:%.*]] +; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1 +; CHECK-NEXT: br i1 [[TMP2]], label [[COMMON_RET:%.*]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi float [ [[SPEC_SELECT:%.*]], [[LOR_LHS_FALSE]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret float [[COMMON_RET_OP]] ; CHECK: lor.lhs.false: -; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[VECEXT]], 1.000000e+00 -; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[VECEXT2]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[CMP18]], i1 [[CMP23]], i1 false -; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[VECEXT7]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 [[CMP28]], i1 false -; CHECK-NEXT: [[CMP33:%.*]] = fcmp ogt float [[VECEXT12]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 [[CMP33]], i1 false -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[VECEXT]], [[VECEXT2]] -; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[OR_COND5]], float 0.000000e+00, float [[ADD]] +; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x float> [[T]] +; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[T_FR6]], +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1 +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT]], [[T]] +; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP6]], i32 0 +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[TMP5]], float 0.000000e+00, float [[ADD]] ; CHECK-NEXT: br label [[COMMON_RET]] ; entry: @@ -99,26 +91,23 @@ return: define float @test_merge_anyof_v4sf(<4 x float> %t) { ; CHECK-LABEL: @test_merge_anyof_v4sf( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[T:%.*]], i32 0 -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[VECEXT]], 0.000000e+00 -; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[T]], i32 1 -; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt float [[VECEXT2]], 0.000000e+00 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 [[CMP4]] -; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x float> [[T]], i32 2 -; CHECK-NEXT: [[CMP9:%.*]] = fcmp olt float [[VECEXT7]], 0.000000e+00 -; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP9]] -; CHECK-NEXT: [[VECEXT12:%.*]] = extractelement <4 x float> [[T]], i32 3 -; CHECK-NEXT: [[CMP14:%.*]] = fcmp olt float [[VECEXT12]], 0.000000e+00 -; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP14]] -; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[VECEXT]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[OR_COND2]], i1 true, i1 [[CMP19]] -; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[VECEXT2]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP24]] -; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[VECEXT7]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP29]] -; CHECK-NEXT: [[CMP34:%.*]] = fcmp ogt float [[VECEXT12]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP34]] -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[VECEXT]], [[VECEXT2]] +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[T:%.*]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[T]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[T]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i32 0 +; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]] +; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer +; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00 +; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00 +; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[CMP19]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[CMP24]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[CMP29]] +; CHECK-NEXT: [[CMP34:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[TMP9]], i1 true, i1 [[CMP34]] +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]] ; CHECK-NEXT: ret float [[RETVAL_0]] ; @@ -187,31 +176,23 @@ return: define float @test_separate_allof_v4sf(<4 x float> %t) { ; CHECK-LABEL: @test_separate_allof_v4sf( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[T:%.*]], i32 0 -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[VECEXT]], 0.000000e+00 -; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[T]], i32 1 -; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt float [[VECEXT2]], 0.000000e+00 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP4]], i1 false -; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x float> [[T]], i32 2 -; CHECK-NEXT: [[CMP9:%.*]] = fcmp olt float [[VECEXT7]], 0.000000e+00 -; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 [[CMP9]], i1 false -; CHECK-NEXT: [[VECEXT12:%.*]] = extractelement <4 x float> [[T]], i32 3 -; CHECK-NEXT: [[CMP14:%.*]] = fcmp olt float [[VECEXT12]], 0.000000e+00 -; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 [[CMP14]], i1 false -; CHECK-NEXT: br i1 [[OR_COND2]], label [[COMMON_RET:%.*]], label [[IF_END:%.*]] +; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1 +; CHECK-NEXT: br i1 [[TMP2]], label [[COMMON_RET:%.*]], label [[IF_END:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi float [ [[SPEC_SELECT:%.*]], [[IF_END]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret float [[COMMON_RET_OP]] ; CHECK: if.end: -; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[VECEXT]], 1.000000e+00 -; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[VECEXT2]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[CMP18]], i1 [[CMP23]], i1 false -; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[VECEXT7]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 [[CMP28]], i1 false -; CHECK-NEXT: [[CMP33:%.*]] = fcmp ogt float [[VECEXT12]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 [[CMP33]], i1 false -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[VECEXT]], [[VECEXT2]] -; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[OR_COND5]], float 0.000000e+00, float [[ADD]] +; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x float> [[T]] +; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[T_FR6]], +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1 +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[SHIFT]], [[T]] +; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP6]], i32 0 +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[TMP5]], float 0.000000e+00, float [[ADD]] ; CHECK-NEXT: br label [[COMMON_RET]] ; entry: @@ -282,26 +263,23 @@ return: define float @test_separate_anyof_v4sf(<4 x float> %t) { ; CHECK-LABEL: @test_separate_anyof_v4sf( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[T:%.*]], i32 0 -; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[VECEXT]], 0.000000e+00 -; CHECK-NEXT: [[VECEXT2:%.*]] = extractelement <4 x float> [[T]], i32 1 -; CHECK-NEXT: [[CMP4:%.*]] = fcmp olt float [[VECEXT2]], 0.000000e+00 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 [[CMP4]] -; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x float> [[T]], i32 2 -; CHECK-NEXT: [[CMP9:%.*]] = fcmp olt float [[VECEXT7]], 0.000000e+00 -; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP9]] -; CHECK-NEXT: [[VECEXT12:%.*]] = extractelement <4 x float> [[T]], i32 3 -; CHECK-NEXT: [[CMP14:%.*]] = fcmp olt float [[VECEXT12]], 0.000000e+00 -; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP14]] -; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[VECEXT]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[OR_COND2]], i1 true, i1 [[CMP18]] -; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[VECEXT2]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP23]] -; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[VECEXT7]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP28]] -; CHECK-NEXT: [[CMP33:%.*]] = fcmp ogt float [[VECEXT12]], 1.000000e+00 -; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP33]] -; CHECK-NEXT: [[ADD:%.*]] = fadd float [[VECEXT]], [[VECEXT2]] +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[T:%.*]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[T]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[T]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i32 0 +; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]] +; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer +; CHECK-NEXT: [[CMP18:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00 +; CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00 +; CHECK-NEXT: [[CMP28:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[CMP18]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[CMP23]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[CMP28]] +; CHECK-NEXT: [[CMP33:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00 +; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[TMP9]], i1 true, i1 [[CMP33]] +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]] ; CHECK-NEXT: ret float [[RETVAL_0]] ; @@ -373,29 +351,21 @@ return: define float @test_merge_allof_v4si(<4 x i32> %t) { ; CHECK-LABEL: @test_merge_allof_v4si( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x i32> [[T:%.*]], i32 0 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[VECEXT]], 1 -; CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x i32> [[T]], i32 1 -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[VECEXT1]], 1 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP2]], i1 false -; CHECK-NEXT: [[VECEXT4:%.*]] = extractelement <4 x i32> [[T]], i32 2 -; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[VECEXT4]], 1 -; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 [[CMP5]], i1 false -; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x i32> [[T]], i32 3 -; CHECK-NEXT: [[CMP8:%.*]] = icmp slt i32 [[VECEXT7]], 1 -; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 [[CMP8]], i1 false -; CHECK-NEXT: br i1 [[OR_COND2]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]] +; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1 +; CHECK-NEXT: br i1 [[TMP2]], label [[RETURN:%.*]], label [[LOR_LHS_FALSE:%.*]] ; CHECK: lor.lhs.false: -; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[VECEXT]], 255 -; CHECK-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[VECEXT1]], 255 -; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[CMP10]], i1 [[CMP13]], i1 false -; CHECK-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[VECEXT4]], 255 -; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 [[CMP16]], i1 false -; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[VECEXT7]], 255 -; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 [[CMP19]], i1 false -; CHECK-NEXT: br i1 [[OR_COND5]], label [[RETURN]], label [[IF_END:%.*]] +; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x i32> [[T]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR6]], +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1 +; CHECK-NEXT: br i1 [[TMP5]], label [[RETURN]], label [[IF_END:%.*]] ; CHECK: if.end: -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[VECEXT]], [[VECEXT1]] +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T]] +; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float ; CHECK-NEXT: br label [[RETURN]] ; CHECK: return: @@ -460,26 +430,23 @@ return: define float @test_merge_anyof_v4si(<4 x i32> %t) { ; CHECK-LABEL: @test_merge_anyof_v4si( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x i32> [[T:%.*]], i32 0 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[VECEXT]], 1 -; CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x i32> [[T]], i32 1 -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[VECEXT1]], 1 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 [[CMP2]] -; CHECK-NEXT: [[VECEXT4:%.*]] = extractelement <4 x i32> [[T]], i32 2 -; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[VECEXT4]], 1 -; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP5]] -; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x i32> [[T]], i32 3 -; CHECK-NEXT: [[CMP8:%.*]] = icmp slt i32 [[VECEXT7]], 1 -; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP8]] -; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[VECEXT]], 255 -; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[OR_COND2]], i1 true, i1 [[CMP11]] -; CHECK-NEXT: [[CMP14:%.*]] = icmp sgt i32 [[VECEXT1]], 255 -; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP14]] -; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[VECEXT4]], 255 -; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP17]] -; CHECK-NEXT: [[CMP20:%.*]] = icmp sgt i32 [[VECEXT7]], 255 -; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP20]] -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[VECEXT]], [[VECEXT1]] +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[T:%.*]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[T]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[T]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[T]], i32 0 +; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP3]], 255 +; CHECK-NEXT: [[CMP14:%.*]] = icmp sgt i32 [[TMP2]], 255 +; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[TMP1]], 255 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[CMP11]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[CMP14]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[CMP17]] +; CHECK-NEXT: [[CMP20:%.*]] = icmp sgt i32 [[TMP0]], 255 +; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[TMP9]], i1 true, i1 [[CMP20]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float ; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[CONV]] ; CHECK-NEXT: ret float [[RETVAL_0]] @@ -542,31 +509,23 @@ return: define i32 @test_separate_allof_v4si(<4 x i32> %t) { ; CHECK-LABEL: @test_separate_allof_v4si( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x i32> [[T:%.*]], i32 0 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[VECEXT]], 1 -; CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x i32> [[T]], i32 1 -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[VECEXT1]], 1 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[CMP2]], i1 false -; CHECK-NEXT: [[VECEXT4:%.*]] = extractelement <4 x i32> [[T]], i32 2 -; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[VECEXT4]], 1 -; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 [[CMP5]], i1 false -; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x i32> [[T]], i32 3 -; CHECK-NEXT: [[CMP8:%.*]] = icmp slt i32 [[VECEXT7]], 1 -; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 [[CMP8]], i1 false -; CHECK-NEXT: br i1 [[OR_COND2]], label [[COMMON_RET:%.*]], label [[IF_END:%.*]] +; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i4 [[TMP1]], -1 +; CHECK-NEXT: br i1 [[TMP2]], label [[COMMON_RET:%.*]], label [[IF_END:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], [[IF_END]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] ; CHECK: if.end: -; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[VECEXT]], 255 -; CHECK-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[VECEXT1]], 255 -; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[CMP10]], i1 [[CMP13]], i1 false -; CHECK-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[VECEXT4]], 255 -; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 [[CMP16]], i1 false -; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[VECEXT7]], 255 -; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 [[CMP19]], i1 false -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[VECEXT]], [[VECEXT1]] -; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[OR_COND5]], i32 0, i32 [[ADD]] +; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x i32> [[T]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR6]], +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i4 [[TMP4]], -1 +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T]] +; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[TMP5]], i32 0, i32 [[ADD]] ; CHECK-NEXT: br label [[COMMON_RET]] ; entry: @@ -629,31 +588,23 @@ return: define i32 @test_separate_anyof_v4si(<4 x i32> %t) { ; CHECK-LABEL: @test_separate_anyof_v4si( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x i32> [[T:%.*]], i32 0 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[VECEXT]], 1 -; CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <4 x i32> [[T]], i32 1 -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[VECEXT1]], 1 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 [[CMP2]] -; CHECK-NEXT: [[VECEXT4:%.*]] = extractelement <4 x i32> [[T]], i32 2 -; CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[VECEXT4]], 1 -; CHECK-NEXT: [[OR_COND1:%.*]] = select i1 [[OR_COND]], i1 true, i1 [[CMP5]] -; CHECK-NEXT: [[VECEXT7:%.*]] = extractelement <4 x i32> [[T]], i32 3 -; CHECK-NEXT: [[CMP8:%.*]] = icmp slt i32 [[VECEXT7]], 1 -; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[OR_COND1]], i1 true, i1 [[CMP8]] -; CHECK-NEXT: br i1 [[OR_COND2]], label [[COMMON_RET:%.*]], label [[IF_END:%.*]] +; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[DOTNOT]], label [[IF_END:%.*]], label [[COMMON_RET:%.*]] ; CHECK: common.ret: ; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], [[IF_END]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] ; CHECK: if.end: -; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[VECEXT]], 255 -; CHECK-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[VECEXT1]], 255 -; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[CMP10]], i1 true, i1 [[CMP13]] -; CHECK-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[VECEXT4]], 255 -; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP16]] -; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[VECEXT7]], 255 -; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP19]] -; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[VECEXT]], [[VECEXT1]] -; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[OR_COND5]], i32 0, i32 [[ADD]] +; CHECK-NEXT: [[T_FR6:%.*]] = freeze <4 x i32> [[T]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[T_FR6]], +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 +; CHECK-NEXT: [[DOTNOT7:%.*]] = icmp eq i4 [[TMP3]], 0 +; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <4 x i32> [[SHIFT]], [[T]] +; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 +; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[DOTNOT7]], i32 [[ADD]], i32 0 ; CHECK-NEXT: br label [[COMMON_RET]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll index 141ede2..312217d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -3,18 +3,10 @@ define i1 @logical_and_icmp(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 -; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], 0 -; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 0 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 0 -; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 0 -; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false -; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false -; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false -; CHECK-NEXT: ret i1 [[S3]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: ret i1 [[TMP3]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 @@ -32,22 +24,10 @@ define i1 @logical_and_icmp(<4 x i32> %x) { define i1 @logical_or_icmp(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @logical_or_icmp( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 -; CHECK-NEXT: [[Y0:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 0 -; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i32> [[Y]], i32 1 -; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i32> [[Y]], i32 2 -; CHECK-NEXT: [[Y3:%.*]] = extractelement <4 x i32> [[Y]], i32 3 -; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], [[Y0]] -; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], [[Y1]] -; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], [[Y2]] -; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], [[Y3]] -; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 true, i1 [[C1]] -; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 true, i1 [[C2]] -; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 true, i1 [[C3]] -; CHECK-NEXT: ret i1 [[S3]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: ret i1 [[TMP3]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 @@ -69,18 +49,10 @@ define i1 @logical_or_icmp(<4 x i32> %x, <4 x i32> %y) { define i1 @logical_and_fcmp(<4 x float> %x) { ; CHECK-LABEL: @logical_and_fcmp( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i32 1 -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[X]], i32 2 -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[X]], i32 3 -; CHECK-NEXT: [[C0:%.*]] = fcmp olt float [[X0]], 0.000000e+00 -; CHECK-NEXT: [[C1:%.*]] = fcmp olt float [[X1]], 0.000000e+00 -; CHECK-NEXT: [[C2:%.*]] = fcmp olt float [[X2]], 0.000000e+00 -; CHECK-NEXT: [[C3:%.*]] = fcmp olt float [[X3]], 0.000000e+00 -; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false -; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false -; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false -; CHECK-NEXT: ret i1 [[S3]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[X:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: ret i1 [[TMP3]] ; %x0 = extractelement <4 x float> %x, i32 0 %x1 = extractelement <4 x float> %x, i32 1 @@ -98,18 +70,10 @@ define i1 @logical_and_fcmp(<4 x float> %x) { define i1 @logical_or_fcmp(<4 x float> %x) { ; CHECK-LABEL: @logical_or_fcmp( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[X]], i32 1 -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[X]], i32 2 -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x float> [[X]], i32 3 -; CHECK-NEXT: [[C0:%.*]] = fcmp olt float [[X0]], 0.000000e+00 -; CHECK-NEXT: [[C1:%.*]] = fcmp olt float [[X1]], 0.000000e+00 -; CHECK-NEXT: [[C2:%.*]] = fcmp olt float [[X2]], 0.000000e+00 -; CHECK-NEXT: [[C3:%.*]] = fcmp olt float [[X3]], 0.000000e+00 -; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 true, i1 [[C1]] -; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 true, i1 [[C2]] -; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 true, i1 [[C3]] -; CHECK-NEXT: ret i1 [[S3]] +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[X:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: ret i1 [[TMP3]] ; %x0 = extractelement <4 x float> %x, i32 0 %x1 = extractelement <4 x float> %x, i32 1 @@ -156,18 +120,10 @@ define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) { define i1 @logical_and_icmp_diff_const(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_diff_const( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 -; CHECK-NEXT: [[C0:%.*]] = icmp sgt i32 [[X0]], 0 -; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[X1]], 1 -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 2 -; CHECK-NEXT: [[C3:%.*]] = icmp sgt i32 [[X3]], 3 -; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false -; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false -; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false -; CHECK-NEXT: ret i1 [[S3]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]]) +; CHECK-NEXT: ret i1 [[TMP3]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 @@ -214,25 +170,21 @@ define i1 @mixed_logical_icmp(<4 x i32> %x) { define i1 @logical_and_icmp_clamp(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_clamp( -; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x i32> [[X]], i32 1 -; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i32> [[X]], i32 3 -; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[X0]], 42 -; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[X1]], 42 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[X2]], 42 -; CHECK-NEXT: [[C3:%.*]] = icmp slt i32 [[X3]], 42 -; CHECK-NEXT: [[D0:%.*]] = icmp sgt i32 [[X0]], 17 -; CHECK-NEXT: [[D1:%.*]] = icmp sgt i32 [[X1]], 17 -; CHECK-NEXT: [[D2:%.*]] = icmp sgt i32 [[X2]], 17 -; CHECK-NEXT: [[D3:%.*]] = icmp sgt i32 [[X3]], 17 -; CHECK-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false -; CHECK-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false -; CHECK-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[C3]], i1 false -; CHECK-NEXT: [[S4:%.*]] = select i1 [[S3]], i1 [[D0]], i1 false -; CHECK-NEXT: [[S5:%.*]] = select i1 [[S4]], i1 [[D1]], i1 false -; CHECK-NEXT: [[S6:%.*]] = select i1 [[S5]], i1 [[D2]], i1 false -; CHECK-NEXT: [[S7:%.*]] = select i1 [[S6]], i1 [[D3]], i1 false +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[X]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[X]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[X]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[X]], +; CHECK-NEXT: [[D0:%.*]] = icmp sgt i32 [[TMP4]], 17 +; CHECK-NEXT: [[D1:%.*]] = icmp sgt i32 [[TMP3]], 17 +; CHECK-NEXT: [[D2:%.*]] = icmp sgt i32 [[TMP2]], 17 +; CHECK-NEXT: [[D3:%.*]] = icmp sgt i32 [[TMP1]], 17 +; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP7]], [[D0]] +; CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP8]], [[D1]] +; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP9]], [[D2]] +; CHECK-NEXT: [[S7:%.*]] = select i1 [[TMP10]], i1 [[D3]], i1 false ; CHECK-NEXT: ret i1 [[S7]] ; %x0 = extractelement <4 x i32> %x, i32 0 -- 2.7.4