"speculatively executed instructions"));
static cl::opt<int>
-MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10),
- cl::desc("Max size of a block which is still considered "
- "small enough to thread through"));
+ MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
+ cl::init(10),
+ cl::desc("Max size of a block which is still considered "
+ "small enough to thread through"));
// Two is chosen to allow one negation and a logical combine.
static cl::opt<unsigned>
static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
int Size = 0;
- for (Instruction &I : BB->instructionsWithoutDebug()) {
- if (Size > MaxSmallBlockSize)
- return false; // Don't clone large BB's.
+ SmallPtrSet<const Value *, 32> EphValues;
+ auto IsEphemeral = [&](const Value *V) {
+ if (isa<AssumeInst>(V))
+ return true;
+ return isSafeToSpeculativelyExecute(V) &&
+ all_of(V->users(),
+ [&](const User *U) { return EphValues.count(U); });
+ };
+ // Walk the loop in reverse so that we can identify ephemeral values properly
+ // (values only feeding assumes).
+ for (Instruction &I : reverse(BB->instructionsWithoutDebug())) {
// Can't fold blocks that contain noduplicate or convergent calls.
if (CallInst *CI = dyn_cast<CallInst>(&I))
if (CI->cannotDuplicate() || CI->isConvergent())
return false;
+ // Ignore ephemeral values which are deleted during codegen.
+ if (IsEphemeral(&I))
+ EphValues.insert(&I);
// We will delete Phis while threading, so Phis should not be accounted in
- // block's size
- if (!isa<PHINode>(I))
- ++Size;
+ // block's size.
+ else if (!isa<PHINode>(I)) {
+ if (Size++ > MaxSmallBlockSize)
+ return false; // Don't clone large BB's.
+ }
// We can only support instructions that do not define values that are
// live outside of the current basic block.
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s
-; RUN: opt -passes=simplify-cfg -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s
+; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-max-small-block-size=6 -S < %s | FileCheck %s
+; RUN: opt -passes=simplify-cfg -simplifycfg-max-small-block-size=6 -S < %s | FileCheck %s
target datalayout = "e-p:64:64-p5:32:32-A5"
declare void @llvm.assume(i1)
+declare i1 @llvm.type.test(i8*, metadata) nounwind readnone
define void @test_01(i1 %c, i64* align 1 %ptr) local_unnamed_addr #0 {
; CHECK-LABEL: @test_01(
store volatile i64 3, i64* %ptr, align 8
ret void
}
+
+; Try the max block size for PRE again but with the bitcast/type test/assume
+; sequence used for whole program devirt.
+define void @test_04(i1 %c, i64* align 1 %ptr, [3 x i8*]* %vtable) local_unnamed_addr #0 {
+; CHECK-LABEL: @test_04(
+; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE2_CRITEDGE:%.*]], label [[FALSE1:%.*]]
+; CHECK: false1:
+; CHECK-NEXT: store volatile i64 1, i64* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[VTABLE:%.*]] = bitcast [3 x i8*]* %vtable to i8*
+; CHECK-NEXT: [[P:%.*]] = call i1 @llvm.type.test(i8* [[VTABLE]], metadata !"foo")
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[P]])
+; CHECK-NEXT: store volatile i64 0, i64* [[PTR]], align 8
+; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT: store volatile i64 3, i64* [[PTR]], align 8
+; CHECK-NEXT: ret void
+; CHECK: true2.critedge:
+; CHECK-NEXT: [[VTABLE:%.*]] = bitcast [3 x i8*]* %vtable to i8*
+; CHECK-NEXT: [[P:%.*]] = call i1 @llvm.type.test(i8* [[VTABLE]], metadata !"foo")
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[P]])
+; CHECK-NEXT: store volatile i64 0, i64* [[PTR]], align 8
+; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT: store volatile i64 2, i64* [[PTR]], align 8
+; CHECK-NEXT: ret void
+;
+ br i1 %c, label %true1, label %false1
+
+true1: ; preds = %false1, %0
+ %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"foo")
+ tail call void @llvm.assume(i1 %p)
+ store volatile i64 0, i64* %ptr, align 8
+ store volatile i64 -1, i64* %ptr, align 8
+ store volatile i64 -1, i64* %ptr, align 8
+ store volatile i64 -1, i64* %ptr, align 8
+ store volatile i64 -1, i64* %ptr, align 8
+ store volatile i64 -1, i64* %ptr, align 8
+ br i1 %c, label %true2, label %false2
+
+false1: ; preds = %0
+ store volatile i64 1, i64* %ptr, align 4
+ br label %true1
+
+true2: ; preds = %true1
+ store volatile i64 2, i64* %ptr, align 8
+ ret void
+
+false2: ; preds = %true1
+ store volatile i64 3, i64* %ptr, align 8
+ ret void
+}