#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
+#include <limits>
#include <cassert>
#include <string>
cl::desc("Name for the section containing cold functions "
"extracted by hot-cold splitting."));
+static cl::opt<int> MaxParametersForSplit(
+ "hotcoldsplit-max-params", cl::init(4), cl::Hidden,
+ cl::desc("Maximum number of parameters for a split function"));
+
namespace {
// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
// this function unless you modify the MBB version as well.
if (SplittingThreshold <= 0)
return Penalty;
- // The typical code size cost for materializing an argument for the outlined
- // call.
- LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumInputs << " inputs\n");
- const int CostForArgMaterialization = TargetTransformInfo::TCC_Basic;
- Penalty += CostForArgMaterialization * NumInputs;
-
- // The typical code size cost for an output alloca, its associated store, and
- // its associated reload.
- LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputs << " outputs\n");
- const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
- Penalty += CostForRegionOutput * NumOutputs;
-
// Find the number of distinct exit blocks for the region. Use a conservative
// check to determine whether control returns from the region.
bool NoBlocksReturn = true;
}
}
+ // Count the number of phis in exit blocks with >= 2 incoming values from the
+ // outlining region. These phis are split (\ref severSplitPHINodesOfExits),
+ // and new outputs are created to supply the split phis. CodeExtractor can't
+ // report these new outputs until extraction begins, but it's important to
+ // factor the cost of the outputs into the cost calculation.
+ unsigned NumSplitExitPhis = 0;
+ for (BasicBlock *ExitBB : SuccsOutsideRegion) {
+ for (PHINode &PN : ExitBB->phis()) {
+ // Find all incoming values from the outlining region.
+ int NumIncomingVals = 0;
+ for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i)
+ if (find(Region, PN.getIncomingBlock(i)) != Region.end()) {
+ ++NumIncomingVals;
+ if (NumIncomingVals > 1) {
+ ++NumSplitExitPhis;
+ break;
+ }
+ }
+ }
+ }
+
+ // Apply a penalty for calling the split function. Factor in the cost of
+ // materializing all of the parameters.
+ int NumOutputsAndSplitPhis = NumOutputs + NumSplitExitPhis;
+ int NumParams = NumInputs + NumOutputsAndSplitPhis;
+ if (NumParams > MaxParametersForSplit) {
+ LLVM_DEBUG(dbgs() << NumInputs << " inputs and " << NumOutputsAndSplitPhis
+ << " outputs exceeds parameter limit ("
+ << MaxParametersForSplit << ")\n");
+ return std::numeric_limits<int>::max();
+ }
+ const int CostForArgMaterialization = 2 * TargetTransformInfo::TCC_Basic;
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumParams << " params\n");
+ Penalty += CostForArgMaterialization * NumParams;
+
+ // Apply the typical code size cost for an output alloca and its associated
+ // reload in the caller. Also penalize the associated store in the callee.
+ LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputsAndSplitPhis
+ << " outputs/split phis\n");
+ const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
+ Penalty += CostForRegionOutput * NumOutputsAndSplitPhis;
+
// Apply a `noreturn` bonus.
if (NoBlocksReturn) {
LLVM_DEBUG(dbgs() << "Applying bonus for: " << Region.size()
// Apply a penalty for having more than one successor outside of the region.
// This penalty accounts for the switch needed in the caller.
- if (!SuccsOutsideRegion.empty()) {
+ if (SuccsOutsideRegion.size() > 1) {
LLVM_DEBUG(dbgs() << "Applying penalty for: " << SuccsOutsideRegion.size()
<< " non-region successors\n");
Penalty += (SuccsOutsideRegion.size() - 1) * TargetTransformInfo::TCC_Basic;
; REQUIRES: asserts
-; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -hotcoldsplit-threshold=2 -hotcoldsplit-max-params=2 -S < %s -o /dev/null 2>&1 | FileCheck %s
declare void @sink(i32*, i32, i32) cold
br i1 undef, label %cold, label %exit
cold:
- ; CHECK: Applying penalty for: 2 inputs
+ ; CHECK: Applying penalty for splitting: 2
+ ; CHECK-NEXT: Applying penalty for: 2 params
+ ; CHECK-NEXT: Applying penalty for: 0 outputs/split phis
+ ; CHECK-NEXT: penalty = 6
call void @sink(i32* @g, i32 %arg, i32 %local)
ret void
exit:
ret void
}
+
+define void @bar(i32* %p1, i32 %p2, i32 %p3) {
+ br i1 undef, label %cold, label %exit
+
+cold:
+ ; CHECK: Applying penalty for splitting: 2
+ ; CHECK-NEXT: 3 inputs and 0 outputs exceeds parameter limit (2)
+ ; CHECK-NEXT: penalty = 2147483647
+ call void @sink(i32* %p1, i32 %p2, i32 %p3)
+ ret void
+
+exit:
+ ret void
+}
; REQUIRES: asserts
-; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -hotcoldsplit-threshold=2 -S < %s -o /dev/null 2>&1 | FileCheck %s
declare void @sink() cold
br i1 undef, label %cold1, label %exit
cold1:
- ; CHECK: Applying penalty for: 1 non-region successor
+ ; CHECK: Applying penalty for splitting: 2
+ ; CHECK-NEXT: Applying penalty for: 0 params
+ ; CHECK-NEXT: Applying penalty for: 0 outputs/split phis
+ ; CHECK-NEXT: penalty = 2
call void @sink()
br i1 undef, label %cold2, label %cold3
br i1 undef, label %cold1, label %exit1
cold1:
- ; CHECK: Applying penalty for: 2 non-region successors
+ ; CHECK: Applying penalty for splitting: 2
+ ; CHECK-NEXT: Applying penalty for: 0 params
+ ; CHECK-NEXT: Applying penalty for: 0 outputs/split phis
+ ; CHECK-NEXT: Applying penalty for: 2 non-region successors
+ ; CHECK-NEXT: penalty = 3
call void @sink()
br i1 undef, label %cold2, label %cold3