From c5ab1ab79786c415082d9342b37bd8b7611f4516 Mon Sep 17 00:00:00 2001 From: Hiroshi Inoue Date: Mon, 5 Feb 2018 12:25:29 +0000 Subject: [PATCH] [PowerPC] Check hot loop exit edge in PPCCTRLoops PPCCTRLoops transform loops using mtctr/bdnz instructions if loop trip count is known and big enough to compensate for the cost of mtctr. But if there is a loop exit edge which is known to be frequently taken (by builtin_expect or by PGO), we should not transform the loop to avoid the cost of mtctr instruction. Here is an example of a loop with hot exit edge: for (unsigned i = 0; i < TripCount; i++) { // do something if (__builtin_expect(check(), 1)) break; // do something } Differential Revision: https://reviews.llvm.org/D42637 llvm-svn: 324229 --- llvm/lib/Target/PowerPC/PPCCTRLoops.cpp | 21 +++ llvm/test/CodeGen/PowerPC/ctrloops-hot-exit.ll | 187 +++++++++++++++++++++++++ 2 files changed, 208 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/ctrloops-hot-exit.ll diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp index 96ad1c6..e853a3c 100644 --- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -528,6 +528,27 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); + // If there is an exit edge known to be frequently taken, + // we should not transform this loop. + for (auto &BB : ExitingBlocks) { + Instruction *TI = BB->getTerminator(); + if (!TI) continue; + + if (BranchInst *BI = dyn_cast(TI)) { + uint64_t TrueWeight, FalseWeight; + if (!BI->isConditional() || + !BI->extractProfMetadata(TrueWeight, FalseWeight)) + continue; + + // If the exit path is more frequent than the loop path, + // we return here without further analysis for this loop. + bool TrueIsExit = !L->contains(BI->getSuccessor(0)); + if (( TrueIsExit && FalseWeight < TrueWeight) || + (!TrueIsExit && FalseWeight > TrueWeight)) + return MadeChange; + } + } + BasicBlock *CountedExitBlock = nullptr; const SCEV *ExitCount = nullptr; BranchInst *CountedExitBranch = nullptr; diff --git a/llvm/test/CodeGen/PowerPC/ctrloops-hot-exit.ll b/llvm/test/CodeGen/PowerPC/ctrloops-hot-exit.ll new file mode 100644 index 0000000..6bb4113 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ctrloops-hot-exit.ll @@ -0,0 +1,187 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s + +; If there is an exit edge known to be frequently taken, +; we should not transform this loop. + +; A loop having a hot exit edge (exit in false branch) +define signext i64 @func() { +; CHECK: @func +; CHECK-NOT: mtctr +; CHECK-NOT: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp eq i32 %1, 0 + br i1 %tobool, label %if.end, label %cleanup, !prof !1 + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +; A loop having a cold exit edge (exit in false branch) +define signext i64 @func2() { +; CHECK: @func2 +; CHECK: mtctr +; CHECK: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp eq i32 %1, 0 + br i1 %tobool, label %if.end, label %cleanup, !prof !2 + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +; A loop having an exit edge without profile data (exit in false branch) +define signext i64 @func3() { +; CHECK: @func3 +; CHECK: mtctr +; CHECK: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp eq i32 %1, 0 + br i1 %tobool, label %if.end, label %cleanup + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +; A loop having a hot exit edge (exit in true branch) +define signext i64 @func4() { +; CHECK: @func4 +; CHECK-NOT: mtctr +; CHECK-NOT: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp ne i32 %1, 0 + br i1 %tobool, label %cleanup, label %if.end, !prof !2 + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +; A loop having a cold exit edge (exit in true branch) +define signext i64 @func5() { +; CHECK: @func5 +; CHECK: mtctr +; CHECK: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp ne i32 %1, 0 + br i1 %tobool, label %cleanup, label %if.end, !prof !1 + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +; A loop having an exit edge without profile data (exit in true branch) +define signext i64 @func6() { +; CHECK: @func6 +; CHECK: mtctr +; CHECK: bdnz + +entry: + %a = alloca [1000 x i32], align 4 + %0 = bitcast [1000 x i32]* %a to i8* + br label %for.body + +for.body: + %i.013 = phi i64 [ 0, %entry ], [ %inc, %if.end ] + %b.012 = phi i64 [ 0, %entry ], [ %xor, %if.end ] + %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* %a, i64 0, i64 %i.013 + %1 = load i32, i32* %arrayidx, align 4 + %tobool = icmp ne i32 %1, 0 + br i1 %tobool, label %cleanup, label %if.end + +if.end: + %xor = xor i64 %i.013, %b.012 + %inc = add nuw nsw i64 %i.013, 1 + %cmp = icmp ult i64 %inc, 1000 + br i1 %cmp, label %for.body, label %cleanup + +cleanup: + %res = phi i64 [ %b.012, %for.body ], [ %xor, %if.end ] + ret i64 %res +} + +!1 = !{!"branch_weights", i32 1, i32 2000} +!2 = !{!"branch_weights", i32 2000, i32 1} -- 2.7.4