From bfefde22b670359eddd7843573b63dc4326f33c1 Mon Sep 17 00:00:00 2001 From: Congzhe Cao Date: Mon, 31 May 2021 16:11:04 -0400 Subject: [PATCH] [LoopInterhcange] Handle movement of reduction phis appropriately This patch fixes pr43326 and pr48212. Currently when we move reduction phis to the right place, loop interchange assumes the first phi in loop headers is an induction phi, skips the first phi and assumes the rest of phis are candidate reduction phis to move. However, it may not always be the case. This patch loops over all phis in loop headers and considers a phi node as a candidate reduction phi to move only when it is indeed a reduction phi across outer and inner loop. Reviewed By: Whitney Differential Revision: https://reviews.llvm.org/D102743 --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 14 ++-- llvm/test/Transforms/LoopInterchange/pr43326.ll | 86 +++++++++++++++++++++++++ llvm/test/Transforms/LoopInterchange/pr48212.ll | 54 ++++++++++++++++ 3 files changed, 149 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/LoopInterchange/pr43326.ll create mode 100644 llvm/test/Transforms/LoopInterchange/pr48212.ll diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index c63bf16..745bc0e 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1661,15 +1661,19 @@ bool LoopInterchangeTransform::adjustLoopBranches() { // replaced by Inners'. OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch); + auto &OuterInnerReductions = LIL.getOuterInnerReductions(); // Now update the reduction PHIs in the inner and outer loop headers. SmallVector InnerLoopPHIs, OuterLoopPHIs; - for (PHINode &PHI : drop_begin(InnerLoopHeader->phis())) + for (PHINode &PHI : InnerLoopHeader->phis()) { + if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) + continue; InnerLoopPHIs.push_back(cast(&PHI)); - for (PHINode &PHI : drop_begin(OuterLoopHeader->phis())) + } + for (PHINode &PHI : OuterLoopHeader->phis()) { + if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) + continue; OuterLoopPHIs.push_back(cast(&PHI)); - - auto &OuterInnerReductions = LIL.getOuterInnerReductions(); - (void)OuterInnerReductions; + } // Now move the remaining reduction PHIs from outer to inner loop header and // vice versa. The PHI nodes must be part of a reduction across the inner and diff --git a/llvm/test/Transforms/LoopInterchange/pr43326.ll b/llvm/test/Transforms/LoopInterchange/pr43326.ll new file mode 100644 index 0000000..68bcc4e7 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/pr43326.ll @@ -0,0 +1,86 @@ +; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \ +; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1 +; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s + +@a = global i32 0 +@b = global i8 0 +@c = global i32 0 +@d = global i32 0 +@e = global [1 x [1 x i32]] zeroinitializer + +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Interchanged +; REMARKS-NEXT: Function: pr43326 + +define void @pr43326() { +entry: + %0 = load i32, i32* @a + %tobool.not2 = icmp eq i32 %0, 0 + br i1 %tobool.not2, label %for.end14, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %d.promoted = load i32, i32* @d + %a.promoted = load i32, i32* @a + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.inc12 + %inc1312 = phi i32 [ %a.promoted, %for.body.lr.ph ], [ %inc13, %for.inc12 ] + %xor.lcssa.lcssa11 = phi i32 [ %d.promoted, %for.body.lr.ph ], [ %xor.lcssa.lcssa, %for.inc12 ] + br label %for.body3 + +for.body3: ; preds = %for.body, %for.inc10 + %xor.lcssa9 = phi i32 [ %xor.lcssa.lcssa11, %for.body ], [ %xor.lcssa, %for.inc10 ] + %dec7 = phi i8 [ 0, %for.body ], [ %dec, %for.inc10 ] + %idxprom8 = sext i8 %dec7 to i64 + br label %for.body7 + +for.body7: ; preds = %for.body3, %for.inc + %xor5 = phi i32 [ %xor.lcssa9, %for.body3 ], [ %xor, %for.inc ] + %inc4 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ] + %idxprom = sext i32 %inc4 to i64 + %arrayidx9 = getelementptr inbounds [1 x [1 x i32]], [1 x [1 x i32]]* @e, i64 0, i64 %idxprom, i64 %idxprom8 + %1 = load i32, i32* %arrayidx9 + %xor = xor i32 %xor5, %1 + br label %for.inc + +for.inc: ; preds = %for.body7 + %inc = add nsw i32 %inc4, 1 + %cmp5 = icmp slt i32 %inc, 1 + br i1 %cmp5, label %for.body7, label %for.end + +for.end: ; preds = %for.inc + %xor.lcssa = phi i32 [ %xor, %for.inc ] + %inc.lcssa = phi i32 [ %inc, %for.inc ] + br label %for.inc10 + +for.inc10: ; preds = %for.end + %dec = add i8 %dec7, -1 + %cmp = icmp sgt i8 %dec, -1 + br i1 %cmp, label %for.body3, label %for.end11 + +for.end11: ; preds = %for.inc10 + %xor.lcssa.lcssa = phi i32 [ %xor.lcssa, %for.inc10 ] + %dec.lcssa = phi i8 [ %dec, %for.inc10 ] + %inc.lcssa.lcssa = phi i32 [ %inc.lcssa, %for.inc10 ] + br label %for.inc12 + +for.inc12: ; preds = %for.end11 + %inc13 = add nsw i32 %inc1312, 1 + %tobool.not = icmp eq i32 %inc13, 0 + br i1 %tobool.not, label %for.cond.for.end14_crit_edge, label %for.body + +for.cond.for.end14_crit_edge: ; preds = %for.inc12 + %inc13.lcssa = phi i32 [ %inc13, %for.inc12 ] + %inc.lcssa.lcssa.lcssa = phi i32 [ %inc.lcssa.lcssa, %for.inc12 ] + %xor.lcssa.lcssa.lcssa = phi i32 [ %xor.lcssa.lcssa, %for.inc12 ] + %dec.lcssa.lcssa = phi i8 [ %dec.lcssa, %for.inc12 ] + store i8 %dec.lcssa.lcssa, i8* @b + store i32 %xor.lcssa.lcssa.lcssa, i32* @d + store i32 %inc.lcssa.lcssa.lcssa, i32* @c + store i32 %inc13.lcssa, i32* @a + br label %for.end14 + +for.end14: ; preds = %for.cond.for.end14_crit_edge, %entry + ret void +} diff --git a/llvm/test/Transforms/LoopInterchange/pr48212.ll b/llvm/test/Transforms/LoopInterchange/pr48212.ll new file mode 100644 index 0000000..b6894bc --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/pr48212.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \ +; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 +; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s + +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Interchanged +; REMARKS-NEXT: Function: pr48212 + +define void @pr48212([5 x i32]* %filter) { +entry: + br label %L1 + +L1: ; preds = %entry + br label %for.body + +for.body: ; preds = %L1, %for.inc6 + %temp.04 = phi i32 [ undef, %L1 ], [ %temp.1.lcssa, %for.inc6 ] + %k1.03 = phi i32 [ 0, %L1 ], [ %inc7, %for.inc6 ] + br label %L2 + +L2: ; preds = %for.body + br label %for.body3 + +for.body3: ; preds = %L2, %for.inc + %temp.12 = phi i32 [ %temp.04, %L2 ], [ %add, %for.inc ] + %k2.01 = phi i32 [ 0, %L2 ], [ %inc, %for.inc ] + %idxprom = sext i32 %k2.01 to i64 + %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* %filter, i64 %idxprom + %idxprom4 = sext i32 %k1.03 to i64 + %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %arrayidx, i64 0, i64 %idxprom4 + %0 = load i32, i32* %arrayidx5 + %add = add nsw i32 %temp.12, %0 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %k2.01, 1 + %cmp2 = icmp slt i32 %inc, 3 + br i1 %cmp2, label %for.body3, label %for.end + +for.end: ; preds = %for.inc + %temp.1.lcssa = phi i32 [ %add, %for.inc ] + br label %for.inc6 + +for.inc6: ; preds = %for.end + %inc7 = add nsw i32 %k1.03, 1 + %cmp = icmp slt i32 %inc7, 5 + br i1 %cmp, label %for.body, label %for.end8 + +for.end8: ; preds = %for.inc6 + ret void +} + + -- 2.7.4