From 13c4ab89ba18ff614b1ab294cddeaf6303ac24eb Mon Sep 17 00:00:00 2001 From: Erik Eckstein Date: Wed, 14 Jan 2015 11:24:47 +0000 Subject: [PATCH] reapply: SLPVectorizer: Cache results from memory alias checking. This speeds up the dependency calculations for blocks with many load/store/call instructions. Beside the improved runtime, there is no functional change. Compared to the original commit, this re-applied commit contains a bug fix which ensures that there are no incorrect collisions in the alias cache. llvm-svn: 225977 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 92 +++++++++++++++++++------ 1 file changed, 71 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 24d02b0..4834782 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" @@ -388,6 +389,15 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, } } +/// \returns the AA location that is being access by the instruction. +static AliasAnalysis::Location getLocation(Instruction *I, AliasAnalysis *AA) { + if (StoreInst *SI = dyn_cast(I)) + return AA->getLocation(SI); + if (LoadInst *LI = dyn_cast(I)) + return AA->getLocation(LI); + return AliasAnalysis::Location(); +} + /// Bottom Up SLP Vectorizer. class BoUpSLP { public: @@ -555,6 +565,52 @@ private: }; typedef SmallVector UserList; + /// Checks if two instructions may access the same memory. + /// + /// \p Loc1 is the location of \p Inst1. It is passed explicitly because it + /// is invariant in the calling loop. + bool isAliased(const AliasAnalysis::Location &Loc1, Instruction *Inst1, + Instruction *Inst2) { + + // First check if the result is already in the cache. + AliasCacheKey key = std::make_pair(Inst1, Inst2); + Optional &result = AliasCache[key]; + if (result.hasValue()) { + return result.getValue(); + } + AliasAnalysis::Location Loc2 = getLocation(Inst2, AA); + bool aliased = true; + if (Loc1.Ptr && Loc2.Ptr) { + // Do the alias check. + aliased = AA->alias(Loc1, Loc2); + } + // Store the result in the cache. + result = aliased; + return aliased; + } + + typedef std::pair AliasCacheKey; + + /// Cache for alias results. + /// TODO: consider moving this to the AliasAnalysis itself. + DenseMap> AliasCache; + + /// Removes an instruction from its block and eventually deletes it. + /// It's like Instruction::eraseFromParent() except that the actual deletion + /// is delayed until BoUpSLP is destructed. + /// This is required to ensure that there are no incorrect collisions in the + /// AliasCache, which can happen if a new instruction is allocated at the + /// same address as a previously deleted instruction. + void eraseInstruction(Instruction *I) { + I->removeFromParent(); + I->dropAllReferences(); + DeletedInstructions.push_back(std::unique_ptr(I)); + } + + /// Temporary store for deleted instructions. Instructions will be deleted + /// eventually when the BoUpSLP is destructed. + SmallVector, 8> DeletedInstructions; + /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). UserList ExternalUses; @@ -791,7 +847,7 @@ private: /// Checks if a bundle of instructions can be scheduled, i.e. has no /// cyclic dependencies. This is only a dry-run, no instructions are /// actually moved at this stage. - bool tryScheduleBundle(ArrayRef VL, AliasAnalysis *AA); + bool tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP); /// Un-bundles a group of instructions. void cancelScheduling(ArrayRef VL); @@ -808,7 +864,7 @@ private: /// Updates the dependency information of a bundle and of all instructions/ /// bundles which depend on the original bundle. void calculateDependencies(ScheduleData *SD, bool InsertInReadyList, - AliasAnalysis *AA); + BoUpSLP *SLP); /// Sets all instruction in the scheduling region to un-scheduled. void resetSchedule(); @@ -1069,7 +1125,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { } BlockScheduling &BS = *BSRef.get(); - if (!BS.tryScheduleBundle(VL, AA)) { + if (!BS.tryScheduleBundle(VL, this)) { DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); BS.cancelScheduling(VL); newTreeEntry(VL, false); @@ -2360,7 +2416,7 @@ Value *BoUpSLP::vectorizeTree() { Scalar->replaceAllUsesWith(Undef); } DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); - cast(Scalar)->eraseFromParent(); + eraseInstruction(cast(Scalar)); } } @@ -2442,7 +2498,7 @@ void BoUpSLP::optimizeGatherSequence() { if (In->isIdenticalTo(*v) && DT->dominates((*v)->getParent(), In->getParent())) { In->replaceAllUsesWith(*v); - In->eraseFromParent(); + eraseInstruction(In); In = nullptr; break; } @@ -2460,7 +2516,7 @@ void BoUpSLP::optimizeGatherSequence() { // Groups the instructions to a bundle (which is then a single scheduling entity) // and schedules instructions until the bundle gets ready. bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, - AliasAnalysis *AA) { + BoUpSLP *SLP) { if (isa(VL[0])) return true; @@ -2517,7 +2573,7 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block " << BB->getName() << "\n"); - calculateDependencies(Bundle, true, AA); + calculateDependencies(Bundle, true, SLP); // Now try to schedule the new bundle. As soon as the bundle is "ready" it // means that there are no cyclic dependencies and we can schedule it. @@ -2648,18 +2704,9 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI, } } -/// \returns the AA location that is being access by the instruction. -static AliasAnalysis::Location getLocation(Instruction *I, AliasAnalysis *AA) { - if (StoreInst *SI = dyn_cast(I)) - return AA->getLocation(SI); - if (LoadInst *LI = dyn_cast(I)) - return AA->getLocation(LI); - return AliasAnalysis::Location(); -} - void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, bool InsertInReadyList, - AliasAnalysis *AA) { + BoUpSLP *SLP) { assert(SD->isSchedulingEntity()); SmallVector WorkList; @@ -2704,14 +2751,14 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD, // Handle the memory dependencies. ScheduleData *DepDest = BundleMember->NextLoadStore; if (DepDest) { - AliasAnalysis::Location SrcLoc = getLocation(BundleMember->Inst, AA); + Instruction *SrcInst = BundleMember->Inst; + AliasAnalysis::Location SrcLoc = getLocation(SrcInst, SLP->AA); bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory(); while (DepDest) { assert(isInSchedulingRegion(DepDest)); if (SrcMayWrite || DepDest->Inst->mayWriteToMemory()) { - AliasAnalysis::Location DstLoc = getLocation(DepDest->Inst, AA); - if (!SrcLoc.Ptr || !DstLoc.Ptr || AA->alias(SrcLoc, DstLoc)) { + if (SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)) { DepDest->MemoryDependencies.push_back(BundleMember); BundleMember->Dependencies++; ScheduleData *DestBundle = DepDest->FirstInBundle; @@ -2779,7 +2826,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) { "scheduler and vectorizer have different opinion on what is a bundle"); SD->FirstInBundle->SchedulingPriority = Idx++; if (SD->isSchedulingEntity()) { - BS->calculateDependencies(SD, false, AA); + BS->calculateDependencies(SD, false, this); NumToSchedule++; } } @@ -2872,6 +2919,9 @@ struct SLPVectorizer : public FunctionPass { // store instructions. BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT, AC); + // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to + // delete instructions. + // Scan the blocks in the function in post order. for (po_iterator it = po_begin(&F.getEntryBlock()), e = po_end(&F.getEntryBlock()); it != e; ++it) { -- 2.7.4