From cd2deacac6dc8c65288ce293f3e62067f94e2bf2 Mon Sep 17 00:00:00 2001 From: Daniel Berlin Date: Thu, 20 Oct 2016 20:13:45 +0000 Subject: [PATCH] [MSSA] Avoid unnecessary use walks when calling getClobberingMemoryAccess Summary: This allows us to mark when uses have been optimized. This lets us avoid rewalking (IE when people call getClobberingAccess on everything), and also enables us to later relax the requirement of use optimization during updates with less cost. Reviewers: george.burgess.iv Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D25172 llvm-svn: 284771 --- llvm/include/llvm/Transforms/Utils/MemorySSA.h | 29 +++++++++++++---- llvm/lib/Transforms/Utils/MemorySSA.cpp | 43 ++++++++++++++++++++++---- llvm/unittests/Transforms/Utils/MemorySSA.cpp | 42 ++++++++++++++++--------- 3 files changed, 88 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/MemorySSA.h b/llvm/include/llvm/Transforms/Utils/MemorySSA.h index 9ee44a0..d7c6b26 100644 --- a/llvm/include/llvm/Transforms/Utils/MemorySSA.h +++ b/llvm/include/llvm/Transforms/Utils/MemorySSA.h @@ -110,6 +110,11 @@ class Instruction; class MemoryAccess; class LLVMContext; class raw_ostream; +enum { + // Used to signify what the default invalid ID is for MemoryAccess's + // getID() + INVALID_MEMORYACCESS_ID = 0 +}; template class memoryaccess_def_iterator_base; using memoryaccess_def_iterator = memoryaccess_def_iterator_base; @@ -157,7 +162,8 @@ protected: friend class MemoryDef; friend class MemoryPhi; - /// \brief Used internally to give IDs to MemoryAccesses for printing + /// \brief Used for debugging and tracking things about MemoryAccesses. + /// Guaranteed unique among MemoryAccesses, no guarantees otherwise. virtual unsigned getID() const = 0; MemoryAccess(LLVMContext &C, unsigned Vty, BasicBlock *BB, @@ -235,7 +241,7 @@ public: void *operator new(size_t s) { return User::operator new(s, 1); } MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB) - : MemoryUseOrDef(C, DMA, MemoryUseVal, MI, BB) {} + : MemoryUseOrDef(C, DMA, MemoryUseVal, MI, BB), OptimizedID(0) {} static inline bool classof(const MemoryUse *) { return true; } static inline bool classof(const Value *MA) { @@ -243,6 +249,18 @@ public: } void print(raw_ostream &OS) const override; + void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false) { + if (Optimized) + OptimizedID = DMA->getID(); + MemoryUseOrDef::setDefiningAccess(DMA); + } + bool isOptimized() const { + return getDefiningAccess() && OptimizedID == getDefiningAccess()->getID(); + } + /// \brief Reset the ID of what this MemoryUse was optimized to, causing it to + /// be rewalked by the walker if necessary. + /// This really should only be called by tests. + void resetOptimized() { OptimizedID = INVALID_MEMORYACCESS_ID; } protected: friend class MemorySSA; @@ -250,6 +268,9 @@ protected: unsigned getID() const override { llvm_unreachable("MemoryUses do not have IDs"); } + +private: + unsigned int OptimizedID; }; template <> @@ -288,8 +309,6 @@ public: protected: friend class MemorySSA; - // For debugging only. This gets used to give memory accesses pretty numbers - // when printing them out unsigned getID() const override { return ID; } private: @@ -446,8 +465,6 @@ protected: User::allocHungoffUses(N, /* IsPhi */ true); } - /// For debugging only. This gets used to give memory accesses pretty numbers - /// when printing them out unsigned getID() const final { return ID; } private: diff --git a/llvm/lib/Transforms/Utils/MemorySSA.cpp b/llvm/lib/Transforms/Utils/MemorySSA.cpp index b02a3f5..5c3ca75 100644 --- a/llvm/lib/Transforms/Utils/MemorySSA.cpp +++ b/llvm/lib/Transforms/Utils/MemorySSA.cpp @@ -1229,7 +1229,7 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), - NextID(0) { + NextID(INVALID_MEMORYACCESS_ID) { buildMemorySSA(); } @@ -1332,7 +1332,7 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock( } if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) { - MU->setDefiningAccess(MSSA->getLiveOnEntryDef()); + MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true); continue; } @@ -1428,13 +1428,13 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock( // At the end of this loop, UpperBound is either a clobber, or lower bound // PHI walking may cause it to be < LowerBound, and in fact, < LastKill. if (FoundClobberResult || UpperBound < LocInfo.LastKill) { - MU->setDefiningAccess(VersionStack[UpperBound]); + MU->setDefiningAccess(VersionStack[UpperBound], true); // We were last killed now by where we got to LocInfo.LastKill = UpperBound; } else { // Otherwise, we checked all the new ones, and now we know we can get to // LastKill. - MU->setDefiningAccess(VersionStack[LocInfo.LastKill]); + MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true); } LocInfo.LowerBound = VersionStack.size() - 1; LocInfo.LowerBoundBlock = BB; @@ -1754,8 +1754,27 @@ void MemorySSA::removeMemoryAccess(MemoryAccess *MA) { } // Re-point the uses at our defining access - if (!MA->use_empty()) - MA->replaceAllUsesWith(NewDefTarget); + if (!MA->use_empty()) { + // Reset optimized on users of this store, and reset the uses. + // A few notes: + // 1. This is a slightly modified version of RAUW to avoid walking the + // uses twice here. + // 2. If we wanted to be complete, we would have to reset the optimized + // flags on users of phi nodes if doing the below makes a phi node have all + // the same arguments. Instead, we prefer users to removeMemoryAccess those + // phi nodes, because doing it here would be N^3. + if (MA->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(MA, NewDefTarget); + // Note: We assume MemorySSA is not used in metadata since it's not really + // part of the IR. + + while (!MA->use_empty()) { + Use &U = *MA->use_begin(); + if (MemoryUse *MU = dyn_cast(U.getUser())) + MU->resetOptimized(); + U.set(NewDefTarget); + } + } // The call below to erase will destroy MA, so we can't change the order we // are doing things here @@ -2115,6 +2134,7 @@ void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) { if (MemoryUse *MU = dyn_cast(MA)) { UpwardsMemoryQuery Q(MU->getMemoryInst(), MU); Cache.remove(MU, Q.StartingLoc, Q.IsCall); + MU->resetOptimized(); } else { // If it is not a use, the best we can do right now is destroy the cache. Cache.clear(); @@ -2188,6 +2208,13 @@ MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { if (!StartingAccess) return MA; + // If this is an already optimized use or def, return the optimized result. + // Note: Currently, we do not store the optimized def result because we'd need + // a separate field, since we can't use it as the defining access. + if (MemoryUse *MU = dyn_cast(StartingAccess)) + if (MU->isOptimized()) + return MU->getDefiningAccess(); + const Instruction *I = StartingAccess->getMemoryInst(); UpwardsMemoryQuery Q(I, StartingAccess); // We can't sanely do anything with a fences, they conservatively @@ -2202,6 +2229,8 @@ MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) { MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); Cache.insert(StartingAccess, LiveOnEntry, Q.StartingLoc, Q.IsCall); + if (MemoryUse *MU = dyn_cast(StartingAccess)) + MU->setDefiningAccess(LiveOnEntry, true); return LiveOnEntry; } @@ -2218,6 +2247,8 @@ MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) { DEBUG(dbgs() << *DefiningAccess << "\n"); DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); DEBUG(dbgs() << *Result << "\n"); + if (MemoryUse *MU = dyn_cast(StartingAccess)) + MU->setDefiningAccess(Result, true); return Result; } diff --git a/llvm/unittests/Transforms/Utils/MemorySSA.cpp b/llvm/unittests/Transforms/Utils/MemorySSA.cpp index b6c5576..d5d2e80 100644 --- a/llvm/unittests/Transforms/Utils/MemorySSA.cpp +++ b/llvm/unittests/Transforms/Utils/MemorySSA.cpp @@ -225,9 +225,15 @@ TEST_F(MemorySSATest, RemoveMemoryAccess) { // but we should now get live on entry for the clobbering definition of the // load, since it will walk past the phi node since every argument is the // same. + // XXX: This currently requires either removing the phi or resetting optimized + // on the load + + EXPECT_FALSE( + MSSA.isLiveOnEntryDef(Walker->getClobberingMemoryAccess(LoadInst))); + // If we reset optimized, we get live on entry. + LoadAccess->resetOptimized(); EXPECT_TRUE( MSSA.isLiveOnEntryDef(Walker->getClobberingMemoryAccess(LoadInst))); - // The phi should now be a two entry phi with two live on entry defs. for (const auto &Op : DefiningAccess->operands()) { MemoryAccess *Operand = cast(&*Op); @@ -450,24 +456,32 @@ TEST_F(MemorySSATest, WalkerInvariantLoadOpt) { EXPECT_EQ(LoadClobber, MSSA.getLiveOnEntryDef()); } -// At one point, we were building MSSA with 0 AA passes. This ensures that we -// actually use BasicAA. -TEST_F(MemorySSATest, AAIsPresentAtBuildTime) { +// Test loads get reoptimized properly by the walker. +TEST_F(MemorySSATest, WalkerReopt) { F = Function::Create(FunctionType::get(B.getVoidTy(), {}, false), GlobalValue::ExternalLinkage, "F", &M); B.SetInsertPoint(BasicBlock::Create(C, "", F)); - Type *Int8 = Type::getInt8Ty(C); - Constant *One = ConstantInt::get(Int8, 1); - Value *AllocaA = B.CreateAlloca(Int8); - Instruction *StoreA = B.CreateStore(One, AllocaA); - - Value *AllocaB = B.CreateAlloca(Int8); - B.CreateStore(One, AllocaB); - Instruction *LoadA = B.CreateLoad(AllocaA); + Value *AllocaA = B.CreateAlloca(Int8, ConstantInt::get(Int8, 1), "A"); + Instruction *SIA = B.CreateStore(ConstantInt::get(Int8, 0), AllocaA); + Value *AllocaB = B.CreateAlloca(Int8, ConstantInt::get(Int8, 1), "B"); + Instruction *SIB = B.CreateStore(ConstantInt::get(Int8, 0), AllocaB); + Instruction *LIA = B.CreateLoad(AllocaA); setupAnalyses(); MemorySSA &MSSA = *Analyses->MSSA; - auto *MU = cast(MSSA.getMemoryAccess(LoadA)); - EXPECT_EQ(MU->getDefiningAccess(), MSSA.getMemoryAccess(StoreA)); + MemorySSAWalker *Walker = Analyses->Walker; + + MemoryAccess *LoadClobber = Walker->getClobberingMemoryAccess(LIA); + MemoryUse *LoadAccess = cast(MSSA.getMemoryAccess(LIA)); + EXPECT_EQ(LoadClobber, MSSA.getMemoryAccess(SIA)); + EXPECT_TRUE(MSSA.isLiveOnEntryDef(Walker->getClobberingMemoryAccess(SIA))); + MSSA.removeMemoryAccess(LoadAccess); + + // Create the load memory access pointing to an unoptimized place. + MemoryUse *NewLoadAccess = cast(MSSA.createMemoryAccessInBB( + LIA, MSSA.getMemoryAccess(SIB), LIA->getParent(), MemorySSA::End)); + // This should it cause it to be optimized + EXPECT_EQ(Walker->getClobberingMemoryAccess(NewLoadAccess), LoadClobber); + EXPECT_EQ(NewLoadAccess->getDefiningAccess(), LoadClobber); } -- 2.7.4