From a998735def5f04459fa6cbeaaffbb42f1a1dec5c Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Mon, 9 Mar 2015 06:14:07 +0000 Subject: [PATCH] Run LICM pass after loop unrolling pass. Runtime unrollng will introduce a runtime check in loop prologue. If the unrolled loop is a inner loop, then the proglogue will be inside the outer loop. LICM pass can help to promote the runtime check out if the checked value is loop invariant. llvm-svn: 231630 --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 8 ++++- llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp | 9 ++--- llvm/test/Transforms/LoopUnroll/runtime-loop4.ll | 43 ++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 7 deletions(-) create mode 100644 llvm/test/Transforms/LoopUnroll/runtime-loop4.ll diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 21fa34d..7ff55a7 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -364,8 +364,14 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createCFGSimplificationPass()); MPM.add(createInstructionCombiningPass()); - if (!DisableUnrollLoops) + if (!DisableUnrollLoops) { MPM.add(createLoopUnrollPass()); // Unroll small loops + // Runtime unrollng will introduce runtime check in loop prologue. If the + // unrolled loop is a inner loop, then the prologue will be inside the + // outer loop. LICM pass can help to promote the runtime check out if the + // checked value is loop invariant. + MPM.add(createLICMPass()); + } // After vectorization and unrolling, assume intrinsics may tell us more // about pointer alignments. diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 91b688c..9e2d8c9 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -142,7 +142,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, BasicBlock *InsertTop, BasicBlock *InsertBot, std::vector &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - LoopInfo *LI) { + LoopInfo *LI, LPPassManager *LPM) { BasicBlock *Preheader = L->getLoopPreheader(); BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); @@ -153,10 +153,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog, Loop *ParentLoop = L->getParentLoop(); if (!UnrollProlog) { NewLoop = new Loop(); - if (ParentLoop) - ParentLoop->addChildLoop(NewLoop); - else - LI->addTopLevelLoop(NewLoop); + LPM->insertLoop(NewLoop, ParentLoop); } // For each block in the original loop, create a new copy, @@ -390,7 +387,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks, - VMap, LI); + VMap, LI, LPM); // Insert the cloned blocks into function just before the original loop F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), NewBlocks[0], diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop4.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop4.ll new file mode 100644 index 0000000..9be0ffd --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop4.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -S -O2 -unroll-runtime=true | FileCheck %s + +; Check runtime unrolling prologue can be promoted by LICM pass. + +; CHECK: entry: +; CHECK: %xtraiter +; CHECK: %lcmp.mod +; CHECK: loop1: +; CHECK: br i1 %lcmp.mod +; CHECK: loop2.prol: + +define void @unroll(i32 %iter, i32* %addr1, i32* %addr2) nounwind { +entry: + br label %loop1 + +loop1: + %iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ] + %offset1 = getelementptr i32, i32* %addr1, i32 %iv1 + store i32 %iv1, i32* %offset1, align 4 + br label %loop2.header + +loop2.header: + br label %loop2 + +loop2: + %iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ] + %offset2 = getelementptr i32, i32* %addr2, i32 %iv2 + store i32 %iv2, i32* %offset2, align 4 + %inc2 = add i32 %iv2, 1 + %exitcnd2 = icmp uge i32 %inc2, %iter + br i1 %exitcnd2, label %exit2, label %loop2 + +exit2: + br label %loop1.latch + +loop1.latch: + %inc1 = add i32 %iv1, 1 + %exitcnd1 = icmp uge i32 %inc1, 1024 + br i1 %exitcnd1, label %exit, label %loop1 + +exit: + ret void +} -- 2.7.4