From c6b88cb9184fc6c17bb3d9c2c9568646dc46638d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 16 Jun 2022 12:13:37 +0200 Subject: [PATCH] [InstCombine] Push freeze through recurrence phi We really want to push freezes through recurrence phis, so that we freeze only the start value, rather than the IV value on every iteration. foldOpIntoPhi() already handles this for the case where the transfer function doesn't produce poison, e.g. %iv.next = add %iv, 1. However, this does not work if nowrap flags are present, e.g. the very common %iv.next = add nuw %iv, 1 case. This patch adds a fold that pushes freeze instructions to the start value by checking whether all backedge values will be non-poison after poison generating flags have been dropped. This allows pushing freezes out of loops in most cases. I suspect that this also obsoletes the CanonicalizeFreezeInLoops pass, and we can probably drop it. Fixes https://github.com/llvm/llvm-project/issues/56048. Differential Revision: https://reviews.llvm.org/D127960 --- .../Transforms/InstCombine/InstCombineInternal.h | 1 + .../InstCombine/InstructionCombining.cpp | 70 ++++++++++++++++++++++ llvm/test/Transforms/InstCombine/freeze.ll | 25 ++++---- 3 files changed, 83 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index dcfc32f..271154b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -173,6 +173,7 @@ public: Instruction *visitVAEndInst(VAEndInst &I); Value *pushFreezeToPreventPoisonFromPropagating(FreezeInst &FI); bool freezeOtherUses(FreezeInst &FI); + Instruction *foldFreezeIntoRecurrence(FreezeInst &I, PHINode *PN); Instruction *visitFreeze(FreezeInst &I); /// Specify what to return for unhandled instructions. diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 59b9b8f..59491a8 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3773,6 +3773,74 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) { return OrigOp; } +Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI, + PHINode *PN) { + // Detect whether this is a recurrence with a start value and some number of + // backedge values. We'll check whether we can push the freeze through the + // backedge values (possibly dropping poison flags along the way) until we + // reach the phi again. In that case, we can move the freeze to the start + // value. + Use *StartU = nullptr; + SmallVector Worklist; + for (Use &U : PN->incoming_values()) { + if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) { + // Add backedge value to worklist. + Worklist.push_back(U.get()); + continue; + } + + // Don't bother handling multiple start values. + if (StartU) + return nullptr; + StartU = &U; + } + + if (!StartU || Worklist.empty()) + return nullptr; // Not a recurrence. + + Value *StartV = StartU->get(); + BasicBlock *StartBB = PN->getIncomingBlock(*StartU); + bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV); + // We can't insert freeze if the the start value is the result of the + // terminator (e.g. an invoke). + if (StartNeedsFreeze && StartBB->getTerminator() == StartV) + return nullptr; + + SmallPtrSet Visited; + SmallVector DropFlags; + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + if (!Visited.insert(V).second) + continue; + + if (Visited.size() > 32) + return nullptr; // Limit the total number of values we inspect. + + // Assume that PN is non-poison, because it will be after the transform. + if (V == PN || isGuaranteedNotToBeUndefOrPoison(V)) + continue; + + Instruction *I = dyn_cast(V); + if (!I || canCreateUndefOrPoison(cast(I), + /*ConsiderFlags*/ false)) + return nullptr; + + DropFlags.push_back(I); + append_range(Worklist, I->operands()); + } + + for (Instruction *I : DropFlags) + I->dropPoisonGeneratingFlags(); + + if (StartNeedsFreeze) { + Builder.SetInsertPoint(StartBB->getTerminator()); + Value *FrozenStartV = Builder.CreateFreeze(StartV, + StartV->getName() + ".fr"); + replaceUse(*StartU, FrozenStartV); + } + return replaceInstUsesWith(FI, PN); +} + bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) { Value *Op = FI.getOperand(0); @@ -3826,6 +3894,8 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) { if (auto *PN = dyn_cast(Op0)) { if (Instruction *NV = foldOpIntoPhi(I, PN)) return NV; + if (Instruction *NV = foldFreezeIntoRecurrence(I, PN)) + return NV; } if (Value *NI = pushFreezeToPreventPoisonFromPropagating(I)) diff --git a/llvm/test/Transforms/InstCombine/freeze.ll b/llvm/test/Transforms/InstCombine/freeze.ll index 2460703..314a3c6 100644 --- a/llvm/test/Transforms/InstCombine/freeze.ll +++ b/llvm/test/Transforms/InstCombine/freeze.ll @@ -743,11 +743,11 @@ exit: ; preds = %loop define void @fold_phi_drop_flags(i32 %init, i32 %n) { ; CHECK-LABEL: @fold_phi_drop_flags( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INIT_FR:%.*]] = freeze i32 [[INIT:%.*]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INIT:%.*]], [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[I_FR:%.*]] = freeze i32 [[I]] -; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I_FR]], 1 +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INIT_FR]], [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -824,12 +824,12 @@ exit: ; preds = %loop define void @fold_phi_multiple_insts(i32 %init, i32 %n) { ; CHECK-LABEL: @fold_phi_multiple_insts( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INIT_FR:%.*]] = freeze i32 [[INIT:%.*]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INIT:%.*]], [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[I_FR:%.*]] = freeze i32 [[I]] -; CHECK-NEXT: [[I_SQ:%.*]] = mul nuw nsw i32 [[I_FR]], [[I_FR]] -; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I_SQ]], 1 +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INIT_FR]], [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[I_SQ:%.*]] = mul i32 [[I]], [[I]] +; CHECK-NEXT: [[I_NEXT]] = add i32 [[I_SQ]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: @@ -853,15 +853,15 @@ exit: ; preds = %loop define void @fold_phi_multiple_back_edges(i32 %init, i32 %n) { ; CHECK-LABEL: @fold_phi_multiple_back_edges( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[INIT_FR:%.*]] = freeze i32 [[INIT:%.*]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INIT:%.*]], [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ], [ [[I_NEXT2:%.*]], [[LOOP_LATCH2:%.*]] ] -; CHECK-NEXT: [[I_FR:%.*]] = freeze i32 [[I]] -; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I_FR]], 1 +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INIT_FR]], [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ], [ [[I_NEXT2:%.*]], [[LOOP_LATCH2:%.*]] ] +; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOP_LATCH2]] ; CHECK: loop.latch2: -; CHECK-NEXT: [[I_NEXT2]] = add nuw nsw i32 [[I_FR]], 2 +; CHECK-NEXT: [[I_NEXT2]] = add i32 [[I]], 2 ; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -961,8 +961,7 @@ define void @fold_phi_invoke_noundef_start_value(i32 %n) personality i8* undef { ; CHECK-NEXT: to label [[LOOP:%.*]] unwind label [[UNWIND:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INIT]], [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[I_FR:%.*]] = freeze i32 [[I]] -; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I_FR]], 1 +; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: unwind: -- 2.7.4