[SimplifyCFG] Ignore ephemeral values when counting insts for threading

author Teresa Johnson <tejohnson@google.com>

Wed, 28 Apr 2021 22:20:04 +0000 (15:20 -0700)

committer Teresa Johnson <tejohnson@google.com>

Mon, 10 May 2021 02:06:54 +0000 (19:06 -0700)
author Teresa Johnson <tejohnson@google.com>
Wed, 28 Apr 2021 22:20:04 +0000 (15:20 -0700)
committer Teresa Johnson <tejohnson@google.com>
Mon, 10 May 2021 02:06:54 +0000 (19:06 -0700)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp

index be108c0..7e96cc8 100644 (file)
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -151,9 +151,10 @@ static cl::opt<unsigned> MaxSpeculationDepth(
               "speculatively executed instructions"));
  
  static cl::opt<int>
-MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10),
-                  cl::desc("Max size of a block which is still considered "
-                           "small enough to thread through"));
+    MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
+                      cl::init(10),
+                      cl::desc("Max size of a block which is still considered "
+                               "small enough to thread through"));
  
  // Two is chosen to allow one negation and a logical combine.
  static cl::opt<unsigned>
@@ -2527,19 +2528,32 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
  static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
    int Size = 0;
  
-  for (Instruction &I : BB->instructionsWithoutDebug()) {
-    if (Size > MaxSmallBlockSize)
-      return false; // Don't clone large BB's.
+  SmallPtrSet<const Value *, 32> EphValues;
+  auto IsEphemeral = [&](const Value *V) {
+    if (isa<AssumeInst>(V))
+      return true;
+    return isSafeToSpeculativelyExecute(V) &&
+           all_of(V->users(),
+                  [&](const User *U) { return EphValues.count(U); });
+  };
  
+  // Walk the loop in reverse so that we can identify ephemeral values properly
+  // (values only feeding assumes).
+  for (Instruction &I : reverse(BB->instructionsWithoutDebug())) {
      // Can't fold blocks that contain noduplicate or convergent calls.
      if (CallInst *CI = dyn_cast<CallInst>(&I))
        if (CI->cannotDuplicate() || CI->isConvergent())
          return false;
  
+    // Ignore ephemeral values which are deleted during codegen.
+    if (IsEphemeral(&I))
+      EphValues.insert(&I);
      // We will delete Phis while threading, so Phis should not be accounted in
-    // block's size
-    if (!isa<PHINode>(I))
-      ++Size;
+    // block's size.
+    else if (!isa<PHINode>(I)) {
+      if (Size++ > MaxSmallBlockSize)
+        return false; // Don't clone large BB's.
+    }
  
      // We can only support instructions that do not define values that are
      // live outside of the current basic block.
diff --git a/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll b/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll

index 283922f..8801d57 100644 (file)
--- a/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll
+++ b/llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll
@@ -1,10 +1,11 @@
  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s
-; RUN: opt -passes=simplify-cfg -simplifycfg-max-small-block-size=10 -S < %s | FileCheck %s
+; RUN: opt -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-max-small-block-size=6 -S < %s | FileCheck %s
+; RUN: opt -passes=simplify-cfg -simplifycfg-max-small-block-size=6 -S < %s | FileCheck %s
  
  target datalayout = "e-p:64:64-p5:32:32-A5"
  
  declare void @llvm.assume(i1)
+declare i1 @llvm.type.test(i8*, metadata) nounwind readnone
  
  define void @test_01(i1 %c, i64* align 1 %ptr) local_unnamed_addr #0 {
  ; CHECK-LABEL: @test_01(
@@ -165,3 +166,61 @@ false2:                                           ; preds = %true1
    store volatile i64 3, i64* %ptr, align 8
    ret void
  }
+
+; Try the max block size for PRE again but with the bitcast/type test/assume
+; sequence used for whole program devirt.
+define void @test_04(i1 %c, i64* align 1 %ptr, [3 x i8*]* %vtable) local_unnamed_addr #0 {
+; CHECK-LABEL: @test_04(
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[TRUE2_CRITEDGE:%.*]], label [[FALSE1:%.*]]
+; CHECK:       false1:
+; CHECK-NEXT:    store volatile i64 1, i64* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[VTABLE:%.*]] = bitcast [3 x i8*]* %vtable to i8*
+; CHECK-NEXT:    [[P:%.*]] = call i1 @llvm.type.test(i8* [[VTABLE]], metadata !"foo")
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[P]])
+; CHECK-NEXT:    store volatile i64 0, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 3, i64* [[PTR]], align 8
+; CHECK-NEXT:    ret void
+; CHECK:       true2.critedge:
+; CHECK-NEXT:    [[VTABLE:%.*]] = bitcast [3 x i8*]* %vtable to i8*
+; CHECK-NEXT:    [[P:%.*]] = call i1 @llvm.type.test(i8* [[VTABLE]], metadata !"foo")
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[P]])
+; CHECK-NEXT:    store volatile i64 0, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
+; CHECK-NEXT:    store volatile i64 2, i64* [[PTR]], align 8
+; CHECK-NEXT:    ret void
+;
+  br i1 %c, label %true1, label %false1
+
+true1:                                            ; preds = %false1, %0
+  %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+  %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"foo")
+  tail call void @llvm.assume(i1 %p)
+  store volatile i64 0, i64* %ptr, align 8
+  store volatile i64 -1, i64* %ptr, align 8
+  store volatile i64 -1, i64* %ptr, align 8
+  store volatile i64 -1, i64* %ptr, align 8
+  store volatile i64 -1, i64* %ptr, align 8
+  store volatile i64 -1, i64* %ptr, align 8
+  br i1 %c, label %true2, label %false2
+
+false1:                                           ; preds = %0
+  store volatile i64 1, i64* %ptr, align 4
+  br label %true1
+
+true2:                                            ; preds = %true1
+  store volatile i64 2, i64* %ptr, align 8
+  ret void
+
+false2:                                           ; preds = %true1
+  store volatile i64 3, i64* %ptr, align 8
+  ret void
+}
author	Teresa Johnson <tejohnson@google.com>
	Wed, 28 Apr 2021 22:20:04 +0000 (15:20 -0700)
committer	Teresa Johnson <tejohnson@google.com>
	Mon, 10 May 2021 02:06:54 +0000 (19:06 -0700)
llvm/lib/Transforms/Utils/SimplifyCFG.cpp		patch \| blob \| history
llvm/test/Transforms/SimplifyCFG/unprofitable-pr.ll		patch \| blob \| history