From b2f5164deb0e7bd27603022772dfba8814a389b1 Mon Sep 17 00:00:00 2001 From: zhongyunde Date: Mon, 14 Feb 2022 09:27:49 +0800 Subject: [PATCH] [IVDescriptors] Support FOR where we have multiple sink pointed Handles the case where Previous doesn't come before LastPrev incorrectly. Fix https://github.com/llvm/llvm-project/issues/53483 Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D118558 --- llvm/lib/Analysis/IVDescriptors.cpp | 18 +++++--- .../first-order-recurrence-complex.ll | 48 +++++++++++++++++++--- 2 files changed, 54 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 74b0d67..6399c75 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -917,12 +917,18 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence( SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator()) return false; - // Do not try to sink an instruction multiple times (if multiple operands - // are first order recurrences). - // TODO: We can support this case, by sinking the instruction after the - // 'deepest' previous instruction. - if (SinkAfter.find(SinkCandidate) != SinkAfter.end()) - return false; + // Try to sink an instruction after the 'deepest' previous instruction, + // which has multiple operands for first order recurrences. + auto It = SinkAfter.find(SinkCandidate); + if (It != SinkAfter.end()) { + auto LastPrev = It->second; + if (LastPrev->getParent() != Previous->getParent()) + return false; + + // If LastPrev comes after the current Previous, SinkCandidate already + // gets sunk past Previous and nothing left to do. + return Previous->comesBefore(LastPrev); + } // If we reach a PHI node that is not dominated by Previous, we reached a // header PHI. No need for sinking. diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index 14d2dc9..70b8c81 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -373,21 +373,57 @@ exit: ret void } -; TODO: We should be able to sink %tmp38 after %tmp60. +; Sink %tmp38 after %tmp60, then it enable the loop vectorization. define void @instruction_with_2_FOR_operands() { ; CHECK-LABEL: @instruction_with_2_FOR_operands( ; CHECK-NEXT: bb: +; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 undef, i64 0) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[SMAX]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = load float, float* undef, align 4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR1]], <4 x float> [[BROADCAST_SPLAT]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = load float, float* undef, align 4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP4]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP3]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT3]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT3]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI5:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT]], i32 2 +; CHECK-NEXT: br i1 [[CMP_N]], label [[BB74:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT6:%.*]] = phi float [ undef, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT4]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ undef, [[BB]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; CHECK-NEXT: br label [[BB13:%.*]] ; CHECK: bb13: -; CHECK-NEXT: [[TMP37:%.*]] = phi float [ [[TMP60:%.*]], [[BB13]] ], [ undef, [[BB:%.*]] ] -; CHECK-NEXT: [[TMP27:%.*]] = phi float [ [[TMP49:%.*]], [[BB13]] ], [ undef, [[BB]] ] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB13]] ], [ 0, [[BB]] ] -; CHECK-NEXT: [[TMP38:%.*]] = fmul fast float [[TMP37]], [[TMP27]] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[TMP60:%.*]], [[BB13]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[SCALAR_RECUR7:%.*]] = phi float [ [[TMP49:%.*]], [[BB13]] ], [ [[SCALAR_RECUR_INIT6]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB13]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[TMP38:%.*]] = fmul fast float [[SCALAR_RECUR]], [[SCALAR_RECUR7]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[TMP49]] = load float, float* undef, align 4 ; CHECK-NEXT: [[TMP60]] = load float, float* undef, align 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp slt i64 [[INDVARS_IV]], undef -; CHECK-NEXT: br i1 [[TMP12]], label [[BB13]], label [[BB74:%.*]] +; CHECK-NEXT: br i1 [[TMP12]], label [[BB13]], label [[BB74]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: bb74: ; CHECK-NEXT: ret void ; -- 2.7.4