From 3d9beefc7d713ad8462d92427ccd17b9532ce904 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sun, 15 Aug 2021 19:01:44 +0300 Subject: [PATCH] Reland [SimplifyCFG] performBranchToCommonDestFolding(): form block-closed SSA form before cloning instructions (PR51125) ... with test change this time. LLVM IR SSA form is "implicit" in `@pr51125`. While is a valid LLVM IR, and does not require any PHI nodes, that completely breaks the further logic in `CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses()` that updates the live-out uses of the bonus instructions. What i believe we need to do, is to first make the SSA form explicit, by inserting tautological PHI nodes, and rewriting the offending uses. ``` $ /builddirs/llvm-project/build-Clang12/bin/opt -load /repositories/alive2/build-Clang-release/tv/tv.so -load-pass-plugin /repositories/alive2/build-Clang-release/tv/tv.so -tv -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -bonus-inst-threshold=10 -tv -o /dev/null /tmp/test.ll ---------------------------------------- @global_pr51125 = global 4 bytes, align 4 define i32 @pr51125() { %entry: br label %L %L: %ld = load i32, * @global_pr51125, align 4 %iszero = icmp eq i32 %ld, 0 br i1 %iszero, label %exit, label %L2 %L2: store i32 4294967295, * @global_pr51125, align 4 %cmp = icmp eq i32 %ld, 4294967295 br i1 %cmp, label %L, label %exit %exit: %r = phi i32 [ %ld, %L2 ], [ %ld, %L ] ret i32 %r } => @global_pr51125 = global 4 bytes, align 4 define i32 @pr51125() { %entry: %ld.old = load i32, * @global_pr51125, align 4 %iszero.old = icmp eq i32 %ld.old, 0 br i1 %iszero.old, label %exit, label %L2 %L2: %ld2 = phi i32 [ %ld.old, %entry ], [ %ld, %L2 ] store i32 4294967295, * @global_pr51125, align 4 %cmp = icmp ne i32 %ld2, 4294967295 %ld = load i32, * @global_pr51125, align 4 %iszero = icmp eq i32 %ld, 0 %or.cond = select i1 %cmp, i1 1, i1 %iszero br i1 %or.cond, label %exit, label %L2 %exit: %ld1 = phi i32 [ poison, %L2 ], [ %ld.old, %entry ] %r = phi i32 [ %ld2, %L2 ], [ %ld.old, %entry ] ret i32 %r } Transformation seems to be correct! ``` Fixes https://bugs.llvm.org/show_bug.cgi?id=51125 Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D106317 --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 75 +++++++++++++++++++--- .../SimplifyCFG/fold-branch-to-common-dest.ll | 18 +++--- 2 files changed, 75 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 847fdd7..68a0388 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1095,17 +1095,24 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( // Update (liveout) uses of bonus instructions, // now that the bonus instruction has been cloned into predecessor. - SSAUpdater SSAUpdate; - SSAUpdate.Initialize(BonusInst.getType(), - (NewBonusInst->getName() + ".merge").str()); - SSAUpdate.AddAvailableValue(BB, &BonusInst); - SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst); + // Note that we expect to be in a block-closed SSA form for this to work! for (Use &U : make_early_inc_range(BonusInst.uses())) { auto *UI = cast(U.getUser()); - if (UI->getParent() != PredBlock) - SSAUpdate.RewriteUseAfterInsertions(U); - else // Use is in the same block as, and comes before, NewBonusInst. - SSAUpdate.RewriteUse(U); + auto *PN = dyn_cast(UI); + if (!PN) { + assert(UI->getParent() == BB && BonusInst.comesBefore(UI) && + "If the user is not a PHI node, then it should be in the same " + "block as, and come after, the original bonus instruction."); + continue; // Keep using the original bonus instruction. + } + // Is this the block-closed SSA form PHI node? + if (PN->getIncomingBlock(U) == BB) + continue; // Great, keep using the original bonus instruction. + // The only other alternative is an "use" when coming from + // the predecessor block - here we should refer to the cloned bonus instr. + assert(PN->getIncomingBlock(U) == PredBlock && + "Not in block-closed SSA form?"); + U.set(NewBonusInst); } } } @@ -3032,6 +3039,56 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); + // We want to duplicate all the bonus instructions in this block, + // and rewrite their uses, but in some cases with self-loops, + // the naive use rewrite approach won't work (will result in miscompilations). + // To avoid this problem, let's form block-closed SSA form. + for (Instruction &BonusInst : + reverse(iterator_range(*BB))) { + auto IsBCSSAUse = [BB, &BonusInst](Use &U) { + auto *UI = cast(U.getUser()); + if (auto *PN = dyn_cast(UI)) + return PN->getIncomingBlock(U) == BB; + return UI->getParent() == BB && BonusInst.comesBefore(UI); + }; + + // Does this instruction require rewriting of uses? + if (all_of(BonusInst.uses(), IsBCSSAUse)) + continue; + + SSAUpdater SSAUpdate; + Type *Ty = BonusInst.getType(); + SmallVector BCSSAPHIs; + SSAUpdate.Initialize(Ty, BonusInst.getName()); + + // Into each successor block of BB, insert a PHI node, that receives + // the BonusInst when coming from it's basic block, or poison otherwise. + for (BasicBlock *Succ : successors(BB)) { + // The block may have the same successor multiple times. Do it only once. + if (SSAUpdate.HasValueForBlock(Succ)) + continue; + BCSSAPHIs.emplace_back(PHINode::Create( + Ty, 0, BonusInst.getName() + ".bcssa", &Succ->front())); + PHINode *PN = BCSSAPHIs.back(); + for (BasicBlock *PredOfSucc : predecessors(Succ)) + PN->addIncoming(PredOfSucc == BB ? (Value *)&BonusInst + : PoisonValue::get(Ty), + PredOfSucc); + SSAUpdate.AddAvailableValue(Succ, PN); + } + + // And rewrite all uses that break block-closed SSA form. + for (Use &U : make_early_inc_range(BonusInst.uses())) + if (!IsBCSSAUse(U)) + SSAUpdate.RewriteUseAfterInsertions(U); + + // We might not have ended up needing PHI's in all of the succ blocks, + // drop the ones that are certainly unused, but don't bother otherwise. + for (PHINode *PN : BCSSAPHIs) + if (PN->use_empty()) + PN->eraseFromParent(); + } + IRBuilder<> Builder(PBI); // The builder is used to create instructions to eliminate the branch in BB. // If BB's terminator has !annotation metadata, add it to the new diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll index 2ff0418..d948b61 100644 --- a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll +++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll @@ -834,7 +834,7 @@ define void @pr48450() { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[COUNTDOWN:%.*]] = phi i8 [ 8, [[ENTRY:%.*]] ], [ [[DEC_MERGE:%.*]], [[FOR_BODYTHREAD_PRE_SPLIT:%.*]] ] +; CHECK-NEXT: [[COUNTDOWN:%.*]] = phi i8 [ 8, [[ENTRY:%.*]] ], [ [[DEC_BCSSA1:%.*]], [[FOR_BODYTHREAD_PRE_SPLIT:%.*]] ] ; CHECK-NEXT: [[C:%.*]] = call i1 @gen1() ; CHECK-NEXT: br i1 [[C]], label [[FOR_INC:%.*]], label [[IF_THEN:%.*]] ; CHECK: for.inc: @@ -849,7 +849,7 @@ define void @pr48450() { ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C2_NOT]], i1 true, i1 [[CMP_NOT]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END_LOOPEXIT]], label [[FOR_BODYTHREAD_PRE_SPLIT]] ; CHECK: for.bodythread-pre-split: -; CHECK-NEXT: [[DEC_MERGE]] = phi i8 [ [[DEC]], [[IF_THEN]] ], [ [[DEC_OLD]], [[FOR_INC]] ] +; CHECK-NEXT: [[DEC_BCSSA1]] = phi i8 [ [[DEC_OLD]], [[FOR_INC]] ], [ [[DEC]], [[IF_THEN]] ] ; CHECK-NEXT: call void @sideeffect0() ; CHECK-NEXT: br label [[FOR_BODY]] ; CHECK: if.end.loopexit: @@ -885,7 +885,7 @@ define void @pr48450_2(i1 %enable_loopback) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[COUNTDOWN:%.*]] = phi i8 [ 8, [[ENTRY:%.*]] ], [ [[DEC_MERGE:%.*]], [[FOR_BODYTHREAD_PRE_SPLIT:%.*]] ] +; CHECK-NEXT: [[COUNTDOWN:%.*]] = phi i8 [ 8, [[ENTRY:%.*]] ], [ [[DEC_BCSSA1:%.*]], [[FOR_BODYTHREAD_PRE_SPLIT:%.*]] ] ; CHECK-NEXT: [[C:%.*]] = call i1 @gen1() ; CHECK-NEXT: br i1 [[C]], label [[FOR_INC:%.*]], label [[IF_THEN:%.*]] ; CHECK: for.inc: @@ -900,7 +900,7 @@ define void @pr48450_2(i1 %enable_loopback) { ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C2_NOT]], i1 true, i1 [[CMP_NOT]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END_LOOPEXIT]], label [[FOR_BODYTHREAD_PRE_SPLIT]] ; CHECK: for.bodythread-pre-split: -; CHECK-NEXT: [[DEC_MERGE]] = phi i8 [ [[DEC_OLD]], [[FOR_INC]] ], [ [[DEC_MERGE]], [[FOR_BODYTHREAD_PRE_SPLIT_LOOPBACK:%.*]] ], [ [[DEC]], [[IF_THEN]] ] +; CHECK-NEXT: [[DEC_BCSSA1]] = phi i8 [ poison, [[FOR_BODYTHREAD_PRE_SPLIT_LOOPBACK:%.*]] ], [ [[DEC_OLD]], [[FOR_INC]] ], [ [[DEC]], [[IF_THEN]] ] ; CHECK-NEXT: [[SHOULD_LOOPBACK:%.*]] = phi i1 [ true, [[FOR_INC]] ], [ false, [[FOR_BODYTHREAD_PRE_SPLIT_LOOPBACK]] ], [ true, [[IF_THEN]] ] ; CHECK-NEXT: [[DO_LOOPBACK:%.*]] = and i1 [[SHOULD_LOOPBACK]], [[ENABLE_LOOPBACK:%.*]] ; CHECK-NEXT: call void @sideeffect0() @@ -1005,8 +1005,8 @@ define void @pr49510() { ; CHECK-NEXT: [[TOBOOL_OLD:%.*]] = icmp ne i16 [[DOTOLD]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_OLD]], label [[LAND_RHS:%.*]], label [[FOR_END:%.*]] ; CHECK: land.rhs: -; CHECK-NEXT: [[DOTMERGE:%.*]] = phi i16 [ [[TMP0:%.*]], [[LAND_RHS]] ], [ [[DOTOLD]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[DOTMERGE]], 0 +; CHECK-NEXT: [[DOTBCSSA:%.*]] = phi i16 [ [[DOTOLD]], [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[LAND_RHS]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[DOTBCSSA]], 0 ; CHECK-NEXT: [[TMP0]] = load i16, i16* @global_pr49510, align 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i16 [[TMP0]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[TOBOOL]], i1 false @@ -1043,15 +1043,15 @@ define i32 @pr51125() { ; CHECK-NEXT: [[ISZERO_OLD:%.*]] = icmp eq i32 [[LD_OLD]], 0 ; CHECK-NEXT: br i1 [[ISZERO_OLD]], label [[EXIT:%.*]], label [[L2:%.*]] ; CHECK: L2: -; CHECK-NEXT: [[LD_MERGE:%.*]] = phi i32 [ [[LD:%.*]], [[L2]] ], [ [[LD_OLD]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LD_BCSSA1:%.*]] = phi i32 [ [[LD_OLD]], [[ENTRY:%.*]] ], [ [[LD:%.*]], [[L2]] ] ; CHECK-NEXT: store i32 -1, i32* @global_pr51125, align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LD_MERGE]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LD_BCSSA1]], -1 ; CHECK-NEXT: [[LD]] = load i32, i32* @global_pr51125, align 4 ; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[LD]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 [[ISZERO]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT]], label [[L2]] ; CHECK: exit: -; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[LD]], [[L2]] ], [ [[LD_OLD]], [[ENTRY]] ] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[LD_BCSSA1]], [[L2]] ], [ [[LD_OLD]], [[ENTRY]] ] ; CHECK-NEXT: ret i32 [[R]] ; entry: -- 2.7.4