bool run();
+ static unsigned getBlockFreqMultiplier();
+
InstCostVisitor getInstCostVisitorFor(Function *F) {
auto &BFI = (GetBFI)(*F);
auto &TTI = (GetTTI)(*F);
"Force function specialization for every call site with a constant "
"argument"));
+// Set to 2^3 to model three levels of if-else nest.
+static cl::opt<unsigned> BlockFreqMultiplier(
+ "funcspec-block-freq-multiplier", cl::init(8), cl::Hidden, cl::desc(
+ "Multiplier to scale block frequency of user instructions during "
+ "specialization bonus estimation"));
+
+static cl::opt<unsigned> MinEntryFreq(
+ "funcspec-min-entry-freq", cl::init(450), cl::Hidden, cl::desc(
+ "Do not specialize functions with entry block frequency lower than "
+ "this value"));
+
+static cl::opt<unsigned> MinScore(
+ "funcspec-min-score", cl::init(2), cl::Hidden, cl::desc(
+ "Do not specialize functions with score lower than this value "
+ "(the ratio of specialization bonus over specialization cost)"));
+
static cl::opt<unsigned> MaxClones(
"funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc(
"The maximum number of clones allowed for a single function "
"funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
"Enable function specialization on the address of global values"));
-// Disabled by default as it can significantly increase compilation times.
-//
-// https://llvm-compile-time-tracker.com
-// https://github.com/nikic/llvm-compile-time-tracker
static cl::opt<bool> SpecializeLiteralConstant(
- "funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc(
+ "funcspec-for-literal-constant", cl::init(true), cl::Hidden, cl::desc(
"Enable specialization of functions that take a literal constant as an "
"argument"));
+unsigned FunctionSpecializer::getBlockFreqMultiplier() {
+ return BlockFreqMultiplier;
+}
+
// Estimates the instruction cost of all the basic blocks in \p WorkList.
// The successors of such blocks are added to the list as long as they are
// executable and they have a unique predecessor. \p WorkList represents
while (!WorkList.empty()) {
BasicBlock *BB = WorkList.pop_back_val();
- uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() /
+ uint64_t Weight = BlockFreqMultiplier *
+ BFI.getBlockFreq(BB).getFrequency() /
BFI.getEntryFreq();
if (!Weight)
continue;
KnownConstants.insert({User, C});
- uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() /
+ uint64_t Weight = BlockFreqMultiplier *
+ BFI.getBlockFreq(User->getParent()).getFrequency() /
BFI.getEntryFreq();
if (!Weight)
return 0;
if (Args.empty())
return false;
+ bool HasCheckedEntryFreq = false;
for (User *U : F->users()) {
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
continue;
if (S.Args.empty())
continue;
+ // Check the function entry frequency only once. We sink this code here to
+ // postpone running the Block Frequency Analysis until we know for sure
+ // there are Specialization candidates, otherwise we are adding unnecessary
+ // overhead.
+ if (!HasCheckedEntryFreq) {
+ // Reject cold functions (for some definition of 'cold').
+ uint64_t EntryFreq = (GetBFI)(*F).getEntryFreq();
+ if (!ForceSpecialization && EntryFreq < MinEntryFreq)
+ return false;
+
+ HasCheckedEntryFreq = true;
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Entry block frequency for "
+ << F->getName() << " = " << EntryFreq << "\n");
+ }
+
// Check if we have encountered the same specialisation already.
if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) {
// Existing specialisation. Add the call to the list to rewrite, unless
AllSpecs[Index].CallSites.push_back(&CS);
} else {
// Calculate the specialisation gain.
- Cost Score = 0 - SpecCost;
+ Cost Score = 0;
InstCostVisitor Visitor = getInstCostVisitorFor(F);
for (ArgInfo &A : S.Args)
Score += getSpecializationBonus(A.Formal, A.Actual, Visitor);
+ Score /= SpecCost;
// Discard unprofitable specialisations.
- if (!ForceSpecialization && Score <= 0)
+ if (!ForceSpecialization && Score < MinScore)
continue;
// Create a new specialisation entry.
-; RUN: opt -S --passes="default<O3>" < %s | FileCheck %s
+; RUN: opt -S --passes="default<O3>" -force-specialization < %s | FileCheck %s
define dso_local i32 @g0(i32 noundef %x) local_unnamed_addr {
entry:
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
; Test function specialization wouldn't crash due to constant expression.
; Note that this test case shows that function specialization pass would
; transform the function even if no specialization happened.
-; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
-
%struct = type { i8, i16, i32, i64, i64}
@Global = internal constant %struct {i8 0, i16 1, i32 2, i64 3, i64 4}
}
define internal i64 @zoo(i1 %flag) {
-; CHECK-LABEL: @zoo(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
-; CHECK: plus:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3))
-; CHECK-NEXT: br label [[MERGE:%.*]]
-; CHECK: minus:
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 4))
-; CHECK-NEXT: br label [[MERGE]]
-; CHECK: merge:
-; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3) to i64), [[PLUS]] ], [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), [[MINUS]] ]
-; CHECK-NEXT: ret i64 [[TMP2]]
-;
entry:
br i1 %flag, label %plus, label %minus
define i64 @main() {
; CHECK-LABEL: @main(
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @zoo(i1 false)
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @zoo(i1 true)
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: ret i64 [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @zoo.4(i1 false)
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @zoo.3(i1 true)
+; CHECK-NEXT: ret i64 add (i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 3) to i64))
;
%1 = call i64 @zoo(i1 0)
%2 = call i64 @zoo(i1 1)
ret i64 %3
}
+; CHECK-LABEL: @func2.1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i64 undef
+
+; CHECK-LABEL: @func2.2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i64 undef
+
+; CHECK-LABEL: @zoo.3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[PLUS:%.*]]
+; CHECK: plus:
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3))
+; CHECK-NEXT: br label [[MERGE:%.*]]
+; CHECK: merge:
+; CHECK-NEXT: ret i64 undef
+
+; CHECK-LABEL: @zoo.4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[MINUS:%.*]]
+; CHECK: minus:
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4))
+; CHECK-NEXT: br label [[MERGE:%.*]]
+; CHECK: merge:
+; CHECK-NEXT: ret i64 undef
+
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
; Checks for callsites that have been annotated with MinSize. We only expect
; specialisation for the call that does not have the attribute:
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
-; RUN: opt -passes="ipsccp<no-func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s --check-prefix=NOFSPEC
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<no-func-spec>" -force-specialization -S < %s | FileCheck %s --check-prefix=NOFSPEC
define i64 @main(i64 %x, i1 %flag) {
;
+++ /dev/null
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -force-specialization -S < %s | FileCheck %s
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
-
-; DISABLED-NOT: @func.1(
-; DISABLED-NOT: @func.2(
-
-define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
- %4 = alloca i32, align 4
- store i32 %1, ptr %4, align 4
- %5 = load i32, ptr %4, align 4
- %6 = icmp slt i32 %5, 1
- br i1 %6, label %14, label %7
-
-7: ; preds = %3
- %8 = load i32, ptr %4, align 4
- %9 = sext i32 %8 to i64
- %10 = getelementptr inbounds i32, ptr %0, i64 %9
- call void %2(ptr %10)
- %11 = load i32, ptr %4, align 4
- %12 = add nsw i32 %11, -1
- %13 = call i32 @func(ptr %0, i32 %12, ptr %2)
- br label %14
-
-14: ; preds = %3, %7
- ret i32 0
-}
-
-define internal void @increment(ptr nocapture %0) {
- %2 = load i32, ptr %0, align 4
- %3 = add nsw i32 %2, 1
- store i32 %3, ptr %0, align 4
- ret void
-}
-
-define internal void @decrement(ptr nocapture %0) {
- %2 = load i32, ptr %0, align 4
- %3 = add nsw i32 %2, -1
- store i32 %3, ptr %0, align 4
- ret void
-}
-
-define i32 @main(ptr %0, i32 %1) {
-; CHECK: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
- %3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment)
-; CHECK: call void @func.1(ptr [[TMP0]], i32 0)
- %4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement)
-; CHECK: ret i32 0
- ret i32 %4
-}
-
-; CHECK: @func.1(
-; CHECK: [[TMP3:%.*]] = alloca i32, align 4
-; CHECK: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
-; CHECK: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
-; CHECK: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
-; CHECK: 6:
-; CHECK: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
-; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
-; CHECK: call void @decrement(ptr [[TMP9]])
-; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
-; CHECK: call void @func.1(ptr [[TMP0]], i32 [[TMP11]])
-; CHECK: br label [[TMP12:%.*]]
-; CHECK: 12:
-; CHECK: ret void
-;
-;
-; CHECK: @func.2(
-; CHECK: [[TMP3:%.*]] = alloca i32, align 4
-; CHECK: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
-; CHECK: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
-; CHECK: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
-; CHECK: 6:
-; CHECK: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
-; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
-; CHECK: call void @increment(ptr [[TMP9]])
-; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
-; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
-; CHECK: call void @func.2(ptr [[TMP0]], i32 [[TMP11]])
-; CHECK: br label [[TMP12:%.*]]
-; CHECK: 12:
-; CHECK: ret void
-; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
+; RUN: opt -S --passes="ipsccp<func-spec>" -force-specialization < %s | FileCheck %s
define dso_local i32 @p0(i32 noundef %x) {
entry:
%add = add nsw i32 %x, 1
-; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 < %s | FileCheck %s
+; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 -force-specialization < %s | FileCheck %s
+
define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline {
entry:
%call = tail call i32 %p(i32 noundef %x)
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
; CHECK: plus:
-; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus)
+; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 42, ptr @plus, ptr @minus)
; CHECK-NEXT: br label [[MERGE:%.*]]
; CHECK: minus:
-; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
+; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y:%.*]], ptr @minus, ptr @plus)
; CHECK-NEXT: br label [[MERGE]]
; CHECK: merge:
; CHECK-NEXT: [[PH:%.*]] = phi i64 [ [[CMP0]], [[PLUS]] ], [ [[CMP1]], [[MINUS]] ]
br i1 %flag, label %plus, label %minus
plus:
- %cmp0 = call i64 @compute(i64 %x, i64 %y, ptr @plus, ptr @minus)
+ %cmp0 = call i64 @compute(i64 %x, i64 42, ptr @plus, ptr @minus)
br label %merge
minus:
; CHECK-LABEL: @compute.2
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]])
-; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
-; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
+; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 42)
+; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 42)
+; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 42, ptr @plus, ptr @plus)
; CHECK-LABEL: @compute.3
; CHECK-NEXT: entry:
; RUN: opt -S --passes="ipsccp<func-spec>" \
+; RUN: -funcspec-for-literal-constant=0 \
; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-NOLIT
; RUN: opt -S --passes="ipsccp<func-spec>" \
-; RUN: -funcspec-for-literal-constant \
+; RUN: -funcspec-for-literal-constant=1 \
; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-LIT
define i32 @f0(i32 noundef %x) {
--- /dev/null
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=2 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS2
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
+
+; DISABLED-NOT: @func.1(
+; DISABLED-NOT: @func.2(
+; DISABLED-NOT: @func.3(
+
+define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
+ %4 = alloca i32, align 4
+ store i32 %1, ptr %4, align 4
+ %5 = load i32, ptr %4, align 4
+ %6 = icmp slt i32 %5, 1
+ br i1 %6, label %14, label %7
+
+7: ; preds = %3
+ %8 = load i32, ptr %4, align 4
+ %9 = sext i32 %8 to i64
+ %10 = getelementptr inbounds i32, ptr %0, i64 %9
+ call void %2(ptr %10)
+ %11 = load i32, ptr %4, align 4
+ %12 = add nsw i32 %11, -1
+ %13 = call i32 @func(ptr %0, i32 %12, ptr %2)
+ br label %14
+
+14: ; preds = %3, %7
+ ret i32 0
+}
+
+define internal void @increment(ptr nocapture %0) {
+ %2 = load i32, ptr %0, align 4
+ %3 = add nsw i32 %2, 1
+ store i32 %3, ptr %0, align 4
+ ret void
+}
+
+define internal void @decrement(ptr nocapture %0) {
+ %2 = load i32, ptr %0, align 4
+ %3 = add nsw i32 %2, -1
+ store i32 %3, ptr %0, align 4
+ ret void
+}
+
+define i32 @main(ptr %0, i32 %1) {
+; COMMON: define i32 @main(
+; COMMON-NEXT: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; COMMON-NEXT: call void @func.1(ptr [[TMP0]])
+; COMMON-NEXT: ret i32 0
+;
+ %3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment)
+ %4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement)
+ ret i32 %4
+}
+
+; COMMON: define internal void @func.1(
+; COMMON-NEXT: [[TMP2:%.*]] = alloca i32, align 4
+; COMMON-NEXT: store i32 0, ptr [[TMP2]], align 4
+; COMMON-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+; COMMON-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1
+; COMMON-NEXT: br i1 [[TMP4]], label [[TMP11:%.*]], label [[TMP5:%.*]]
+; COMMON: 5:
+; COMMON-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
+; COMMON-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
+; COMMON-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP7]]
+; COMMON-NEXT: call void @decrement(ptr [[TMP8]])
+; COMMON-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4
+; COMMON-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
+; ITERS1-NEXT: call void @func(ptr [[TMP0]], i32 [[TMP10]], ptr @decrement)
+; ITERS2-NEXT: call void @func.3(ptr [[TMP0]], i32 [[TMP10]])
+; COMMON-NEXT: br label [[TMP11:%.*]]
+; COMMON: 11:
+; COMMON-NEXT: ret void
+;
+; COMMON: define internal void @func.2(
+; COMMON-NEXT: [[TMP3:%.*]] = alloca i32, align 4
+; COMMON-NEXT: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
+; COMMON-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+; COMMON-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
+; COMMON-NEXT: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
+; COMMON: 6:
+; COMMON-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
+; COMMON-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
+; COMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
+; COMMON-NEXT: call void @increment(ptr [[TMP9]])
+; COMMON-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
+; COMMON-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
+; COMMON-NEXT: call void @func.2(ptr [[TMP0]], i32 [[TMP11]])
+; COMMON-NEXT: br label [[TMP12:%.*]]
+; COMMON: 12:
+; COMMON-NEXT: ret void
+;
+; ITERS2: define internal void @func.3(
+; ITERS2-NEXT: [[TMP3:%.*]] = alloca i32, align 4
+; ITERS2-NEXT: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
+; ITERS2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+; ITERS2-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
+; ITERS2-NEXT: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
+; ITERS2: 6:
+; ITERS2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
+; ITERS2-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
+; ITERS2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
+; ITERS2-NEXT: call void @decrement(ptr [[TMP9]])
+; ITERS2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
+; ITERS2-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
+; ITERS2-NEXT: call void @func.3(ptr [[TMP0]], i32 [[TMP11]])
+; ITERS2-NEXT: br label [[TMP12:%.*]]
+; ITERS2: 12:
+; ITERS2-NEXT: ret void
+
-; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
+; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-min-entry-freq=1 < %s | FileCheck %s
define dso_local i32 @p0(i32 noundef %x) {
entry:
%add = add nsw i32 %x, 1
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
define i64 @main(i64 %x, i1 %flag) {
entry:
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=0 -force-specialization -S < %s | FileCheck %s --check-prefix=NONE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=2 -force-specialization -S < %s | FileCheck %s --check-prefix=TWO
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=3 -force-specialization -S < %s | FileCheck %s --check-prefix=THREE
; Make sure that we iterate correctly after sorting the specializations:
-; FnSpecialization: Specializations for function compute
-; FnSpecialization: Gain = 608
-; FnSpecialization: FormalArg = binop1, ActualArg = power
-; FnSpecialization: FormalArg = binop2, ActualArg = mul
-; FnSpecialization: Gain = 982
-; FnSpecialization: FormalArg = binop1, ActualArg = plus
-; FnSpecialization: FormalArg = binop2, ActualArg = minus
-; FnSpecialization: Gain = 795
-; FnSpecialization: FormalArg = binop1, ActualArg = minus
-; FnSpecialization: FormalArg = binop2, ActualArg = power
+;
+; Score(@plus, @minus) > Score(42, @minus, @power) > Score(@power, @mul)
define i64 @main(i64 %x, i64 %y, i1 %flag) {
; NONE-LABEL: @main(
;
; THREE-LABEL: define internal i64 @compute.3(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
; THREE-NEXT: entry:
-; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y)
-; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y)
+; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 42)
+; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 42)
; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
-; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y
+; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], 42
; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2
; THREE-NEXT: ret i64 [[TMP5]]
; THREE-NEXT: }
auto &TTI = FAM.getResult<TargetIRAnalysis>(*I.getFunction());
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*I.getFunction());
- return BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() *
+ uint64_t Weight = FunctionSpecializer::getBlockFreqMultiplier() *
+ BFI.getBlockFreq(I.getParent()).getFrequency() /
+ BFI.getEntryFreq();
+ return Weight *
TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
}
};