From 10280dac1dad5f182ef8a2bde0ee543b246b1599 Mon Sep 17 00:00:00 2001 From: Evgeniy Stepanov Date: Wed, 25 Jun 2014 07:54:58 +0000 Subject: [PATCH] [LICM] Don't create more than one copy of an instruction per loop exit block when sinking. Fixes exponential compilation complexity in PR19835, caused by LICM::sink not handling the following pattern well: f = op g e = op f, g d = op e c = op d, e b = op c a = op b, c When an instruction with N uses is sunk, each of its operands gets N new uses (all of them - phi nodes). In the example above, if a had 1 use, c would have 2, e would have 4, and g would have 8. llvm-svn: 211673 --- llvm/lib/Transforms/Scalar/LICM.cpp | 58 ++++++++++++++++++------------- llvm/test/Transforms/LICM/extra-copies.ll | 29 ++++++++++++++++ 2 files changed, 63 insertions(+), 24 deletions(-) create mode 100644 llvm/test/Transforms/LICM/extra-copies.ll diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 0a8d16f..c16ffdb 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -550,6 +550,9 @@ void LICM::sink(Instruction &I) { SmallPtrSet ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); #endif + // Clones of this instruction. Don't create more than one per exit block! + SmallDenseMap SunkCopies; + // If this instruction is only used outside of the loop, then all users are // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of // the instruction. @@ -561,30 +564,37 @@ void LICM::sink(Instruction &I) { assert(ExitBlockSet.count(ExitBlock) && "The LCSSA PHI is not in an exit block!"); - Instruction *New = I.clone(); - ExitBlock->getInstList().insert(ExitBlock->getFirstInsertionPt(), New); - if (!I.getName().empty()) - New->setName(I.getName() + ".le"); - - // Build LCSSA PHI nodes for any in-loop operands. Note that this is - // particularly cheap because we can rip off the PHI node that we're - // replacing for the number and blocks of the predecessors. - // OPT: If this shows up in a profile, we can instead finish sinking all - // invariant instructions, and then walk their operands to re-establish - // LCSSA. That will eliminate creating PHI nodes just to nuke them when - // sinking bottom-up. - for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; - ++OI) - if (Instruction *OInst = dyn_cast(*OI)) - if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) - if (!OLoop->contains(PN)) { - PHINode *OpPN = PHINode::Create( - OInst->getType(), PN->getNumIncomingValues(), - OInst->getName() + ".lcssa", ExitBlock->begin()); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - OpPN->addIncoming(OInst, PN->getIncomingBlock(i)); - *OI = OpPN; - } + Instruction *New; + auto It = SunkCopies.find(ExitBlock); + if (It != SunkCopies.end()) { + New = It->second; + } else { + New = I.clone(); + SunkCopies[ExitBlock] = New; + ExitBlock->getInstList().insert(ExitBlock->getFirstInsertionPt(), New); + if (!I.getName().empty()) + New->setName(I.getName() + ".le"); + + // Build LCSSA PHI nodes for any in-loop operands. Note that this is + // particularly cheap because we can rip off the PHI node that we're + // replacing for the number and blocks of the predecessors. + // OPT: If this shows up in a profile, we can instead finish sinking all + // invariant instructions, and then walk their operands to re-establish + // LCSSA. That will eliminate creating PHI nodes just to nuke them when + // sinking bottom-up. + for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; + ++OI) + if (Instruction *OInst = dyn_cast(*OI)) + if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) + if (!OLoop->contains(PN)) { + PHINode *OpPN = PHINode::Create( + OInst->getType(), PN->getNumIncomingValues(), + OInst->getName() + ".lcssa", ExitBlock->begin()); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + OpPN->addIncoming(OInst, PN->getIncomingBlock(i)); + *OI = OpPN; + } + } PN->replaceAllUsesWith(New); PN->eraseFromParent(); diff --git a/llvm/test/Transforms/LICM/extra-copies.ll b/llvm/test/Transforms/LICM/extra-copies.ll new file mode 100644 index 0000000..ef52f9f --- /dev/null +++ b/llvm/test/Transforms/LICM/extra-copies.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -licm -S | FileCheck %s +; PR19835 +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @f(i32 %x) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %storemerge4 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %mul = mul nsw i32 %x, %x + %add2 = add nsw i32 %mul, %x + %mul3 = add nsw i32 %add2, %mul + %inc = add nsw i32 %storemerge4, 1 + %cmp = icmp slt i32 %inc, 100 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + %a9.0.lcssa = phi i32 [ %mul3, %for.body ] + ret i32 %a9.0.lcssa +} + +; Test that there is exactly one copy of mul nsw i32 %x, %x in the exit block. +; CHECK: define i32 @f(i32 [[X:%.*]]) +; CHECK: for.end: +; CHECK-NOT: mul nsw i32 [[X]], [[X]] +; CHECK: mul nsw i32 [[X]], [[X]] +; CHECK-NOT: mul nsw i32 [[X]], [[X]] -- 2.7.4