From 4604695d7c20e72b551a1a5224f3de877cb41bd3 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 24 Sep 2021 16:14:53 +0200 Subject: [PATCH] Revert "[JumpThreading] Ignore free instructions" It caused compiler crashes, see comment on the code review for repro. > This is basically D108837 but for jump threading. Free instructions > should be ignored for the threading decision. JumpThreading already > skips some free instructions (like pointer bitcasts), but does not > skip various free intrinsics -- in fact, it currently gives them a > fairly large cost of 2. > > Differential Revision: https://reviews.llvm.org/D110290 This reverts commit 1e3c6fc7cb9d2ee6a5328881f95d6643afeadbff. --- .../include/llvm/Transforms/Scalar/JumpThreading.h | 8 ++- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 61 ++++++++++++---------- .../Transforms/JumpThreading/free_instructions.ll | 24 ++++----- .../inlining-alignment-assumptions.ll | 12 ++--- 4 files changed, 53 insertions(+), 52 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h index 0ac7d7c..816ea10 100644 --- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -44,7 +44,6 @@ class PHINode; class SelectInst; class SwitchInst; class TargetLibraryInfo; -class TargetTransformInfo; class Value; /// A private "module" namespace for types and utilities used by @@ -79,7 +78,6 @@ enum ConstantPreference { WantInteger, WantBlockAddress }; /// revectored to the false side of the second if. class JumpThreadingPass : public PassInfoMixin { TargetLibraryInfo *TLI; - TargetTransformInfo *TTI; LazyValueInfo *LVI; AAResults *AA; DomTreeUpdater *DTU; @@ -101,9 +99,9 @@ public: JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1); // Glue for old PM. - bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, - LazyValueInfo *LVI, AAResults *AA, DomTreeUpdater *DTU, - bool HasProfileData, std::unique_ptr BFI, + bool runImpl(Function &F, TargetLibraryInfo *TLI, LazyValueInfo *LVI, + AAResults *AA, DomTreeUpdater *DTU, bool HasProfileData, + std::unique_ptr BFI, std::unique_ptr BPI); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index fe9a721..688902e 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -331,7 +331,7 @@ bool JumpThreading::runOnFunction(Function &F) { BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = Impl.runImpl(F, TLI, TTI, LVI, AA, &DTU, F.hasProfileData(), + bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, F.hasProfileData(), std::move(BFI), std::move(BPI)); if (PrintLVIAfterJumpThreading) { dbgs() << "LVI for function '" << F.getName() << "':\n"; @@ -360,7 +360,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, BFI.reset(new BlockFrequencyInfo(F, *BPI, LI)); } - bool Changed = runImpl(F, &TLI, &TTI, &LVI, &AA, &DTU, F.hasProfileData(), + bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(), std::move(BFI), std::move(BPI)); if (PrintLVIAfterJumpThreading) { @@ -377,14 +377,12 @@ PreservedAnalyses JumpThreadingPass::run(Function &F, } bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, - TargetTransformInfo *TTI_, LazyValueInfo *LVI_, - AliasAnalysis *AA_, DomTreeUpdater *DTU_, - bool HasProfileData_, + LazyValueInfo *LVI_, AliasAnalysis *AA_, + DomTreeUpdater *DTU_, bool HasProfileData_, std::unique_ptr BFI_, std::unique_ptr BPI_) { LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); TLI = TLI_; - TTI = TTI_; LVI = LVI_; AA = AA_; DTU = DTU_; @@ -516,8 +514,7 @@ static void replaceFoldableUses(Instruction *Cond, Value *ToVal) { /// Return the cost of duplicating a piece of this block from first non-phi /// and before StopAt instruction to thread across it. Stop scanning the block /// when exceeding the threshold. If duplication is impossible, returns ~0U. -static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, - BasicBlock *BB, +static unsigned getJumpThreadDuplicationCost(BasicBlock *BB, Instruction *StopAt, unsigned Threshold) { assert(StopAt->getParent() == BB && "Not an instruction from proper BB?"); @@ -553,21 +550,26 @@ static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, if (Size > Threshold) return Size; + // Debugger intrinsics don't incur code size. + if (isa(I)) continue; + + // Pseudo-probes don't incur code size. + if (isa(I)) + continue; + + // If this is a pointer->pointer bitcast, it is free. + if (isa(I) && I->getType()->isPointerTy()) + continue; + + // Freeze instruction is free, too. + if (isa(I)) + continue; + // Bail out if this instruction gives back a token type, it is not possible // to duplicate it if it is used outside this BB. if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB)) return ~0U; - // Blocks with NoDuplicate are modelled as having infinite cost, so they - // are never duplicated. - if (const CallInst *CI = dyn_cast(I)) - if (CI->cannotDuplicate() || CI->isConvergent()) - return ~0U; - - if (TTI->getUserCost(&*I, TargetTransformInfo::TCK_SizeAndLatency) - == TargetTransformInfo::TCC_Free) - continue; - // All other instructions count for at least one unit. ++Size; @@ -576,7 +578,11 @@ static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, // as having cost of 2 total, and if they are a vector intrinsic, we model // them as having cost 1. if (const CallInst *CI = dyn_cast(I)) { - if (!isa(CI)) + if (CI->cannotDuplicate() || CI->isConvergent()) + // Blocks with NoDuplicate are modelled as having infinite cost, so they + // are never duplicated. + return ~0U; + else if (!isa(CI)) Size += 3; else if (!CI->getType()->isVectorTy()) Size += 1; @@ -2228,10 +2234,10 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB, } // Compute the cost of duplicating BB and PredBB. - unsigned BBCost = getJumpThreadDuplicationCost( - TTI, BB, BB->getTerminator(), BBDupThreshold); + unsigned BBCost = + getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold); unsigned PredBBCost = getJumpThreadDuplicationCost( - TTI, PredBB, PredBB->getTerminator(), BBDupThreshold); + PredBB, PredBB->getTerminator(), BBDupThreshold); // Give up if costs are too high. We need to check BBCost and PredBBCost // individually before checking their sum because getJumpThreadDuplicationCost @@ -2339,8 +2345,8 @@ bool JumpThreadingPass::tryThreadEdge( return false; } - unsigned JumpThreadCost = getJumpThreadDuplicationCost( - TTI, BB, BB->getTerminator(), BBDupThreshold); + unsigned JumpThreadCost = + getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold); if (JumpThreadCost > BBDupThreshold) { LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName() << "' - Cost is too high: " << JumpThreadCost << "\n"); @@ -2608,8 +2614,8 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred( return false; } - unsigned DuplicationCost = getJumpThreadDuplicationCost( - TTI, BB, BB->getTerminator(), BBDupThreshold); + unsigned DuplicationCost = + getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold); if (DuplicationCost > BBDupThreshold) { LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName() << "' - Cost is too high: " << DuplicationCost << "\n"); @@ -3025,8 +3031,7 @@ bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard, ValueToValueMapTy UnguardedMapping, GuardedMapping; Instruction *AfterGuard = Guard->getNextNode(); - unsigned Cost = - getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold); + unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold); if (Cost > BBDupThreshold) return false; // Duplicate all instructions before the guard and the guard itself to the diff --git a/llvm/test/Transforms/JumpThreading/free_instructions.ll b/llvm/test/Transforms/JumpThreading/free_instructions.ll index 76392af..f768ec9 100644 --- a/llvm/test/Transforms/JumpThreading/free_instructions.ll +++ b/llvm/test/Transforms/JumpThreading/free_instructions.ll @@ -5,28 +5,26 @@ ; the jump threading threshold, as everything else are free instructions. define i32 @free_instructions(i1 %c, i32* %p) { ; CHECK-LABEL: @free_instructions( -; CHECK-NEXT: br i1 [[C:%.*]], label [[IF2:%.*]], label [[ELSE2:%.*]] -; CHECK: if2: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: ; CHECK-NEXT: store i32 -1, i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: else: +; CHECK-NEXT: store i32 -2, i32* [[P]], align 4 +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: ; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]]) ; CHECK-NEXT: store i32 1, i32* [[P]], align 4, !noalias !0 ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 32) ] ; CHECK-NEXT: store i32 2, i32* [[P]], align 4 -; CHECK-NEXT: [[P21:%.*]] = bitcast i32* [[P]] to i8* -; CHECK-NEXT: [[P32:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* [[P21]]) -; CHECK-NEXT: [[P43:%.*]] = bitcast i8* [[P32]] to i32* -; CHECK-NEXT: store i32 3, i32* [[P43]], align 4, !invariant.group !3 -; CHECK-NEXT: ret i32 0 -; CHECK: else2: -; CHECK-NEXT: store i32 -2, i32* [[P]], align 4 -; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]]) -; CHECK-NEXT: store i32 1, i32* [[P]], align 4, !noalias !4 -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[P]], i64 32) ] -; CHECK-NEXT: store i32 2, i32* [[P]], align 4 ; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P]] to i8* ; CHECK-NEXT: [[P3:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* [[P2]]) ; CHECK-NEXT: [[P4:%.*]] = bitcast i8* [[P3]] to i32* ; CHECK-NEXT: store i32 3, i32* [[P4]], align 4, !invariant.group !3 +; CHECK-NEXT: br i1 [[C]], label [[IF2:%.*]], label [[ELSE2:%.*]] +; CHECK: if2: +; CHECK-NEXT: ret i32 0 +; CHECK: else2: ; CHECK-NEXT: ret i32 1 ; br i1 %c, label %if, label %else diff --git a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll index f764a59..57014e8 100644 --- a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll +++ b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll @@ -32,10 +32,13 @@ define void @caller1(i1 %c, i64* align 1 %ptr) { ; ASSUMPTIONS-OFF-NEXT: br label [[COMMON_RET]] ; ; ASSUMPTIONS-ON-LABEL: @caller1( -; ASSUMPTIONS-ON-NEXT: br i1 [[C:%.*]], label [[COMMON_RET:%.*]], label [[FALSE2:%.*]] +; ASSUMPTIONS-ON-NEXT: br i1 [[C:%.*]], label [[COMMON_RET:%.*]], label [[FALSE1:%.*]] +; ASSUMPTIONS-ON: false1: +; ASSUMPTIONS-ON-NEXT: store volatile i64 1, i64* [[PTR:%.*]], align 4 +; ASSUMPTIONS-ON-NEXT: br label [[COMMON_RET]] ; ASSUMPTIONS-ON: common.ret: -; ASSUMPTIONS-ON-NEXT: [[DOTSINK:%.*]] = phi i64 [ 3, [[FALSE2]] ], [ 2, [[TMP0:%.*]] ] -; ASSUMPTIONS-ON-NEXT: call void @llvm.assume(i1 true) [ "align"(i64* [[PTR:%.*]], i64 8) ] +; ASSUMPTIONS-ON-NEXT: [[DOTSINK:%.*]] = phi i64 [ 3, [[FALSE1]] ], [ 2, [[TMP0:%.*]] ] +; ASSUMPTIONS-ON-NEXT: call void @llvm.assume(i1 true) [ "align"(i64* [[PTR]], i64 8) ] ; ASSUMPTIONS-ON-NEXT: store volatile i64 0, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 @@ -44,9 +47,6 @@ define void @caller1(i1 %c, i64* align 1 %ptr) { ; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: store volatile i64 [[DOTSINK]], i64* [[PTR]], align 8 ; ASSUMPTIONS-ON-NEXT: ret void -; ASSUMPTIONS-ON: false2: -; ASSUMPTIONS-ON-NEXT: store volatile i64 1, i64* [[PTR]], align 4 -; ASSUMPTIONS-ON-NEXT: br label [[COMMON_RET]] ; br i1 %c, label %true1, label %false1 -- 2.7.4