From 82cee24e3d15798c6f7f5b9879723d7f758d8fa0 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Mon, 16 Jan 2023 18:53:34 +0700 Subject: [PATCH] [JumpThreading] Preserve profile metadata during select unfolding, take 2 Jump threading can replace select and unconditional branch with conditional branch, but when doing so loses profile information. This destructive transform can eventually lead to a performance degradation due to folding of branches in shouldFoldCondBranchesToCommonDestination as branch probabilities are no longer known. The first version was reverted due to assert caused by i32 overflow, fixed in this version. Patch by Roman Paukner! Differential Revision: https://reviews.llvm.org/D138132 Reviewed By: mkazantsev --- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 18 ++++++++++++++ llvm/test/Transforms/JumpThreading/select.ll | 37 ++++++++++++++++++++++------ 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index e9bf66c..eefbaec 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -2785,8 +2785,26 @@ void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, // Create a conditional branch and update PHI nodes. auto *BI = BranchInst::Create(NewBB, BB, SI->getCondition(), Pred); BI->applyMergedLocation(PredTerm->getDebugLoc(), SI->getDebugLoc()); + BI->copyMetadata(*SI, {LLVMContext::MD_prof}); SIUse->setIncomingValue(Idx, SI->getFalseValue()); SIUse->addIncoming(SI->getTrueValue(), NewBB); + // Set the block frequency of NewBB. + if (HasProfileData) { + uint64_t TrueWeight, FalseWeight; + if (extractBranchWeights(*SI, TrueWeight, FalseWeight) && + (TrueWeight + FalseWeight) != 0) { + SmallVector BP; + BP.emplace_back(BranchProbability::getBranchProbability( + TrueWeight, TrueWeight + FalseWeight)); + BP.emplace_back(BranchProbability::getBranchProbability( + FalseWeight, TrueWeight + FalseWeight)); + BPI->setEdgeProbability(Pred, BP); + } + + auto NewBBFreq = + BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, NewBB); + BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency()); + } // The select is now dead. SI->eraseFromParent(); diff --git a/llvm/test/Transforms/JumpThreading/select.ll b/llvm/test/Transforms/JumpThreading/select.ll index aedfab8..67025b2 100644 --- a/llvm/test/Transforms/JumpThreading/select.ll +++ b/llvm/test/Transforms/JumpThreading/select.ll @@ -1,5 +1,13 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=jump-threading < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; RUN: opt -S -passes=jump-threading -debug-only=branch-prob < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +; CHECK-LABEL: ---- Branch Probability Info : unfold1 ---- +; CHECK: set edge cond.false -> 0 successor probability to 0x20000000 / 0x80000000 = 25.00% +; CHECK: set edge cond.false -> 1 successor probability to 0x60000000 / 0x80000000 = 75.00% +; CHECK-LABEL: ---- Branch Probability Info : unfold2 ---- +; CHECK: set edge cond.false -> 0 successor probability to 0x20000000 / 0x80000000 = 25.00% +; CHECK: set edge cond.false -> 1 successor probability to 0x60000000 / 0x80000000 = 75.00% declare void @foo() declare void @bar() @@ -11,6 +19,9 @@ declare void @quux() ; Mostly theoretical since instruction combining simplifies all selects of ; booleans where at least one operand is true/false/undef. +;. +; CHECK: @[[ANCHOR:[a-zA-Z0-9_$"\\.-]+]] = constant [3 x ptr] [ptr blockaddress(@test_indirectbr, [[L1:%.*]]), ptr inttoptr (i32 1 to ptr), ptr blockaddress(@test_indirectbr, [[L3:%.*]])] +;. define void @test_br(i1 %cond, i1 %value) nounwind { ; CHECK-LABEL: @test_br( ; CHECK-NEXT: entry: @@ -265,7 +276,7 @@ L4: ret void } -define void @unfold1(double %x, double %y) nounwind { +define void @unfold1(double %x, double %y) nounwind !prof !1 { ; CHECK-LABEL: @unfold1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X:%.*]], [[Y:%.*]] @@ -274,7 +285,7 @@ define void @unfold1(double %x, double %y) nounwind { ; CHECK: cond.false: ; CHECK-NEXT: [[ADD:%.*]] = fadd double [[X]], [[Y]] ; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[ADD]], 1.000000e+01 -; CHECK-NEXT: br i1 [[CMP1]], label [[COND_END4]], label [[IF_THEN:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[COND_END4]], label [[IF_THEN:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: cond.end4: ; CHECK-NEXT: [[COND5:%.*]] = phi double [ [[SUB]], [[ENTRY:%.*]] ], [ [[ADD]], [[COND_FALSE]] ] ; CHECK-NEXT: [[CMP6:%.*]] = fcmp oeq double [[COND5]], 0.000000e+00 @@ -293,7 +304,7 @@ entry: cond.false: ; preds = %entry %add = fadd double %x, %y %cmp1 = fcmp ogt double %add, 1.000000e+01 - %add. = select i1 %cmp1, double %add, double 0.000000e+00 + %add. = select i1 %cmp1, double %add, double 0.000000e+00, !prof !0 br label %cond.end4 cond.end4: ; preds = %entry, %cond.false @@ -311,7 +322,7 @@ if.end: ; preds = %if.then, %cond.end4 } -define void @unfold2(i32 %x, i32 %y) nounwind { +define void @unfold2(i32 %x, i32 %y) nounwind !prof !1 { ; CHECK-LABEL: @unfold2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]] @@ -320,7 +331,7 @@ define void @unfold2(i32 %x, i32 %y) nounwind { ; CHECK: cond.false: ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X]], [[Y]] ; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[ADD]], 10 -; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[COND_END4:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[COND_END4:%.*]], !prof [[PROF1]] ; CHECK: cond.end4: ; CHECK-NEXT: [[COND5:%.*]] = phi i32 [ [[ADD]], [[COND_FALSE]] ] ; CHECK-NEXT: [[CMP6:%.*]] = icmp eq i32 [[COND5]], 0 @@ -339,7 +350,7 @@ entry: cond.false: ; preds = %entry %add = add nsw i32 %x, %y %cmp1 = icmp sgt i32 %add, 10 - %add. = select i1 %cmp1, i32 0, i32 %add + %add. = select i1 %cmp1, i32 0, i32 %add, !prof !0 br label %cond.end4 cond.end4: ; preds = %entry, %cond.false @@ -652,3 +663,13 @@ if.end: %v1 = select i1 %v, i32 %s, i32 42 ret i32 %v1 } + +; branch_weights overflowing uint32_t +!0 = !{!"branch_weights", i64 1073741824, i64 3221225472} +!1 = !{!"function_entry_count", i64 1984} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind } +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1984} +; CHECK: [[PROF1]] = !{!"branch_weights", i64 1073741824, i64 3221225472} +;. -- 2.7.4