From 39c1653b3dbb7d1c439a3e8cf31d1aa159a4afc5 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 10 Sep 2020 15:49:04 +0900 Subject: [PATCH] [JumpThreading] Conditionally freeze its condition when unfolding select This patch fixes pr45956 (https://bugs.llvm.org/show_bug.cgi?id=45956 ). To minimize its impact to the quality of generated code, I suggest enabling this only for LTO as a start (it has two JumpThreading passes registered). This patch contains a flag that makes JumpThreading enable it. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D84940 --- llvm/include/llvm/Transforms/Scalar.h | 8 +- .../include/llvm/Transforms/Scalar/JumpThreading.h | 3 +- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 29 ++- .../JumpThreading/select-unfold-freeze.ll | 248 +++++++++++++++++++++ 4 files changed, 272 insertions(+), 16 deletions(-) create mode 100644 llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index 242ffa0..5ab8a05 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -240,10 +240,12 @@ FunctionPass *createReassociatePass(); //===----------------------------------------------------------------------===// // // JumpThreading - Thread control through mult-pred/multi-succ blocks where some -// preds always go to some succ. Thresholds other than minus one override the -// internal BB duplication default threshold. +// preds always go to some succ. If FreezeSelectCond is true, unfold the +// condition of a select that unfolds to branch. Thresholds other than minus one +// override the internal BB duplication default threshold. // -FunctionPass *createJumpThreadingPass(int Threshold = -1); +FunctionPass *createJumpThreadingPass(bool FreezeSelectCond = false, + int Threshold = -1); //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h index 327bf6d..b5b9074 100644 --- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -91,9 +91,10 @@ class JumpThreadingPass : public PassInfoMixin { unsigned BBDupThreshold; unsigned DefaultBBDupThreshold; + bool InsertFreezeWhenUnfoldingSelect; public: - JumpThreadingPass(int T = -1); + JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1); // Glue for old PM. bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_, diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 311ca11..354afc7 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -104,6 +104,11 @@ static cl::opt PrintLVIAfterJumpThreading( cl::desc("Print the LazyValueInfo cache after JumpThreading"), cl::init(false), cl::Hidden); +static cl::opt JumpThreadingFreezeSelectCond( + "jump-threading-freeze-select-cond", + cl::desc("Freeze the condition when unfolding select"), cl::init(false), + cl::Hidden); + static cl::opt ThreadAcrossLoopHeaders( "jump-threading-across-loop-headers", cl::desc("Allow JumpThreading to thread across loop headers, for testing"), @@ -133,7 +138,8 @@ namespace { public: static char ID; // Pass identification - JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) { + JumpThreading(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1) + : FunctionPass(ID), Impl(InsertFreezeWhenUnfoldingSelect, T) { initializeJumpThreadingPass(*PassRegistry::getPassRegistry()); } @@ -166,11 +172,12 @@ INITIALIZE_PASS_END(JumpThreading, "jump-threading", "Jump Threading", false, false) // Public interface to the Jump Threading pass -FunctionPass *llvm::createJumpThreadingPass(int Threshold) { - return new JumpThreading(Threshold); +FunctionPass *llvm::createJumpThreadingPass(bool InsertFr, int Threshold) { + return new JumpThreading(InsertFr, Threshold); } -JumpThreadingPass::JumpThreadingPass(int T) { +JumpThreadingPass::JumpThreadingPass(bool InsertFr, int T) { + InsertFreezeWhenUnfoldingSelect = JumpThreadingFreezeSelectCond | InsertFr; DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T); } @@ -2798,13 +2805,8 @@ bool JumpThreadingPass::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) { /// select is not jump-threaded, it will be folded again in the later /// optimizations. bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { - // This transform can introduce a UB (a conditional branch that depends on a - // poison value) that was not present in the original program. See - // @TryToUnfoldSelectInCurrBB test in test/Transforms/JumpThreading/select.ll. + // This transform would reduce the quality of msan diagnostics. // Disable this transform under MemorySanitizer. - // FIXME: either delete it or replace with a valid transform. This issue is - // not limited to MemorySanitizer (but has only been observed as an MSan false - // positive in practice so far). if (BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory)) return false; @@ -2852,8 +2854,11 @@ bool JumpThreadingPass::TryToUnfoldSelectInCurrBB(BasicBlock *BB) { if (!SI) continue; // Expand the select. - Instruction *Term = - SplitBlockAndInsertIfThen(SI->getCondition(), SI, false); + Value *Cond = SI->getCondition(); + if (InsertFreezeWhenUnfoldingSelect && + !isGuaranteedNotToBeUndefOrPoison(Cond, SI, &DTU->getDomTree())) + Cond = new FreezeInst(Cond, "cond.fr", SI); + Instruction *Term = SplitBlockAndInsertIfThen(Cond, SI, false); BasicBlock *SplitBB = SI->getParent(); BasicBlock *NewBB = Term->getParent(); PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI); diff --git a/llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll b/llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll new file mode 100644 index 0000000..12288fc --- /dev/null +++ b/llvm/test/Transforms/JumpThreading/select-unfold-freeze.ll @@ -0,0 +1,248 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -jump-threading-freeze-select-cond -jump-threading < %s | FileCheck %s + +declare void @foo() +declare void @bar() +declare void @baz() +declare void @quux() + + +define void @test_switch_cmp(i1 %cond, i32 %val, i8 %value) nounwind { +; CHECK-LABEL: @test_switch_cmp( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[L0:%.*]], label [[L0_THREAD:%.*]] +; CHECK: L0: +; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i32 [ [[VAL:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[VAL_PHI]], 0 +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[CMP]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[L1:%.*]], label [[TMP0:%.*]] +; CHECK: 0: +; CHECK-NEXT: [[TMP1:%.*]] = phi i8 [ [[VALUE:%.*]], [[L0]] ] +; CHECK-NEXT: switch i8 [[TMP1]], label [[L3:%.*]] [ +; CHECK-NEXT: i8 1, label [[L1]] +; CHECK-NEXT: i8 2, label [[L2:%.*]] +; CHECK-NEXT: ] +; CHECK: L1: +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: ret void +; CHECK: L2: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: ret void +; CHECK: L3: +; CHECK-NEXT: call void @baz() +; CHECK-NEXT: ret void +; CHECK: L0.thread: +; CHECK-NEXT: call void @quux() +; CHECK-NEXT: br label [[L1]] +; +entry: + br i1 %cond, label %L0, label %L4 +L0: + %val.phi = phi i32 [%val, %entry], [-1, %L4] + %cmp = icmp slt i32 %val.phi, 0 + %expr = select i1 %cmp, i8 1, i8 %value + switch i8 %expr, label %L3 [i8 1, label %L1 i8 2, label %L2] + +L1: + call void @foo() + ret void +L2: + call void @bar() + ret void +L3: + call void @baz() + ret void +L4: + call void @quux() + br label %L0 +} + +define i32 @unfold3(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind { +; CHECK-LABEL: @unfold3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]] +; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD4:%.*]], label [[COND_FALSE_I:%.*]] +; CHECK: cond.false.i: +; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]] +; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_6_I:%.*]] +; CHECK: cond.false.6.i: +; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]] +; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD4]], label [[COND_FALSE_10_I:%.*]] +; CHECK: cond.false.10.i: +; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]] +; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD]], label [[DOTEXIT:%.*]] +; CHECK: .exit: +; CHECK-NEXT: [[PHITMP:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[PHITMP]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD4]] +; CHECK: .exit.thread: +; CHECK-NEXT: br label [[DOTEXIT_THREAD4]] +; CHECK: .exit.thread4: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[ENTRY:%.*]] ], [ [[ADD3]], [[COND_FALSE_6_I]] ] +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %add3 = add nsw i32 %j, 2 + %cmp.i = icmp slt i32 %u, %v + br i1 %cmp.i, label %.exit, label %cond.false.i + +cond.false.i: ; preds = %entry + %cmp4.i = icmp sgt i32 %u, %v + br i1 %cmp4.i, label %.exit, label %cond.false.6.i + +cond.false.6.i: ; preds = %cond.false.i + %cmp8.i = icmp slt i32 %w, %x + br i1 %cmp8.i, label %.exit, label %cond.false.10.i + +cond.false.10.i: ; preds = %cond.false.6.i + %cmp13.i = icmp sgt i32 %w, %x + br i1 %cmp13.i, label %.exit, label %cond.false.15.i + +cond.false.15.i: ; preds = %cond.false.10.i + %phitmp = icmp sge i32 %y, %z + br label %.exit + +.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i + %cond23.i = phi i1 [ false, %entry ], [ true, %cond.false.i ], [ false, %cond.false.6.i ], [ %phitmp, %cond.false.15.i ], [ true, %cond.false.10.i ] + %j.add3 = select i1 %cond23.i, i32 %j, i32 %add3 + ret i32 %j.add3 +} + +define i32 @unfold4(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind { +; CHECK-LABEL: @unfold4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]] +; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_I:%.*]] +; CHECK: cond.false.i: +; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]] +; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD5:%.*]], label [[COND_FALSE_6_I:%.*]] +; CHECK: cond.false.6.i: +; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]] +; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_10_I:%.*]] +; CHECK: cond.false.10.i: +; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]] +; CHECK-NEXT: br i1 [[CMP13_I]], label [[DOTEXIT_THREAD5]], label [[DOTEXIT:%.*]] +; CHECK: .exit: +; CHECK-NEXT: [[CMP19_I:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP19_I]] to i32 +; CHECK-NEXT: [[LNOT_I18:%.*]] = icmp eq i32 [[CONV]], 1 +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[LNOT_I18]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[DOTEXIT_THREAD]], label [[DOTEXIT_THREAD5]] +; CHECK: .exit.thread: +; CHECK-NEXT: br label [[DOTEXIT_THREAD5]] +; CHECK: .exit.thread5: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[J]], [[DOTEXIT_THREAD]] ], [ [[ADD3]], [[DOTEXIT]] ], [ [[ADD3]], [[COND_FALSE_I]] ], [ [[ADD3]], [[COND_FALSE_10_I]] ] +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %add3 = add nsw i32 %j, 2 + %cmp.i = icmp slt i32 %u, %v + br i1 %cmp.i, label %.exit, label %cond.false.i + +cond.false.i: ; preds = %entry + %cmp4.i = icmp sgt i32 %u, %v + br i1 %cmp4.i, label %.exit, label %cond.false.6.i + +cond.false.6.i: ; preds = %cond.false.i + %cmp8.i = icmp slt i32 %w, %x + br i1 %cmp8.i, label %.exit, label %cond.false.10.i + +cond.false.10.i: ; preds = %cond.false.6.i + %cmp13.i = icmp sgt i32 %w, %x + br i1 %cmp13.i, label %.exit, label %cond.false.15.i + +cond.false.15.i: ; preds = %cond.false.10.i + %cmp19.i = icmp sge i32 %y, %z + %conv = zext i1 %cmp19.i to i32 + br label %.exit + +.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i + %cond23.i = phi i32 [ 1, %entry ], [ 0, %cond.false.i ], [ 1, %cond.false.6.i ], [ %conv, %cond.false.15.i ], [ 0, %cond.false.10.i ] + %lnot.i18 = icmp eq i32 %cond23.i, 1 + %j.add3 = select i1 %lnot.i18, i32 %j, i32 %add3 + ret i32 %j.add3 +} + +; TODO: cond23_i should be constant-folded. +define i32 @unfold5(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind { +; CHECK-LABEL: @unfold5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[J:%.*]], 2 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[U:%.*]], [[V:%.*]] +; CHECK-NEXT: br i1 [[CMP_I]], label [[DOTEXIT_THREAD:%.*]], label [[COND_FALSE_I:%.*]] +; CHECK: cond.false.i: +; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[U]], [[V]] +; CHECK-NEXT: br i1 [[CMP4_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_6_I:%.*]] +; CHECK: cond.false.6.i: +; CHECK-NEXT: [[CMP8_I:%.*]] = icmp slt i32 [[W:%.*]], [[X:%.*]] +; CHECK-NEXT: br i1 [[CMP8_I]], label [[DOTEXIT_THREAD]], label [[COND_FALSE_10_I:%.*]] +; CHECK: cond.false.10.i: +; CHECK-NEXT: [[CMP13_I:%.*]] = icmp sgt i32 [[W]], [[X]] +; CHECK-NEXT: br i1 [[CMP13_I]], label [[TMP0:%.*]], label [[COND_FALSE_15_I:%.*]] +; CHECK: cond.false.15.i: +; CHECK-NEXT: [[CMP19_I:%.*]] = icmp sge i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP19_I]] to i32 +; CHECK-NEXT: br label [[DOTEXIT_THREAD]] +; CHECK: 0: +; CHECK-NEXT: [[COND23_I:%.*]] = phi i32 [ 7, [[COND_FALSE_10_I]] ] +; CHECK-NEXT: [[LNOT_I18:%.*]] = icmp sgt i32 [[COND23_I]], 5 +; CHECK-NEXT: br label [[DOTEXIT_THREAD]] +; CHECK: .exit.thread: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[J]], [[TMP0]] ], [ [[CONV]], [[COND_FALSE_15_I]] ], [ 1, [[COND_FALSE_6_I]] ], [ 3, [[COND_FALSE_I]] ], [ 2, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + %add3 = add nsw i32 %j, 2 + %cmp.i = icmp slt i32 %u, %v + br i1 %cmp.i, label %.exit, label %cond.false.i + +cond.false.i: ; preds = %entry + %cmp4.i = icmp sgt i32 %u, %v + br i1 %cmp4.i, label %.exit, label %cond.false.6.i + +cond.false.6.i: ; preds = %cond.false.i + %cmp8.i = icmp slt i32 %w, %x + br i1 %cmp8.i, label %.exit, label %cond.false.10.i + +cond.false.10.i: ; preds = %cond.false.6.i + %cmp13.i = icmp sgt i32 %w, %x + br i1 %cmp13.i, label %.exit, label %cond.false.15.i + +cond.false.15.i: ; preds = %cond.false.10.i + %cmp19.i = icmp sge i32 %y, %z + %conv = zext i1 %cmp19.i to i32 + br label %.exit + +.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i + %cond23.i = phi i32 [ 2, %entry ], [ 3, %cond.false.i ], [ 1, %cond.false.6.i ], [ %conv, %cond.false.15.i ], [ 7, %cond.false.10.i ] + %lnot.i18 = icmp sgt i32 %cond23.i, 5 + %j.add3 = select i1 %lnot.i18, i32 %j, i32 %cond23.i + ret i32 %j.add3 +} + +define i32 @TryToUnfoldSelectInCurrBB(i1 %b, i1 %ui, i32 %s, i1 %x) { +; CHECK-LABEL: @TryToUnfoldSelectInCurrBB( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[B:%.*]], label [[IF_END_THREAD:%.*]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[COND_FR:%.*]] = freeze i1 [[X:%.*]] +; CHECK-NEXT: br i1 [[COND_FR]], label [[TMP0:%.*]], label [[IF_END_THREAD]] +; CHECK: 0: +; CHECK-NEXT: br label [[IF_END_THREAD]] +; CHECK: if.end.thread: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[S:%.*]], [[TMP0]] ], [ 42, [[IF_END]] ], [ 42, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + br i1 %b, label %if.end, label %if.else + +if.else: + br label %if.end + +if.end: + %v = phi i1 [ %x, %if.else ], [ false, %entry ] + %v1 = select i1 %v, i32 %s, i32 42 + ret i32 %v1 +} -- 2.7.4