From e2f66ceed9a241d53158f077c9e887958290135f Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 22 Dec 2014 22:46:00 +0000 Subject: [PATCH] [SROA] Lift the logic for traversing the alloca slices one partition at a time into a partition iterator and a Partition class. There is a lot of knock-on simplification that this enables, largely stemming from having a Partition object to refer to in lots of helpers. I've only done a minimal amount of that because enoguh stuff is changing as-is in this commit. This shouldn't change any observable behavior. I've worked hard to preserve the *exact* traversal semantics which were originally present even though some of them make no sense. I'll be changing some of this in subsequent commits now that the logic is carefully factored into a reusable place. The primary motivation for this change is to break the rewriting into phases in order to support more intelligent rewriting. For example, I'm planning to change how split loads and stores are rewritten to remove the significant overuse of integer bit packing in the resulting code and allow more effective secondary splitting of aggregates. For any of this to work, they have to share the exact traversal logic. llvm-svn: 224742 --- llvm/lib/Transforms/Scalar/SROA.cpp | 460 ++++++++++++++++++++++++------------ 1 file changed, 303 insertions(+), 157 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 3777227..8fc9a94 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -237,6 +237,273 @@ public: const_iterator end() const { return Slices.end(); } /// @} + // Forward declare an iterator to befriend it. + class partition_iterator; + + /// \brief A partition of the slices. + /// + /// An ephemeral representation for a range of slices which can be viewed as + /// a partition of the alloca. This range represents a span of the alloca's + /// memory which cannot be split, and provides access to all of the slices + /// overlapping some part of the partition. + /// + /// Objects of this type are produced by traversing the alloca's slices, but + /// are only ephemeral and not persistent. + class Partition { + private: + friend class AllocaSlices; + friend class AllocaSlices::partition_iterator; + + /// \brief The begining and ending offsets of the alloca for this partition. + uint64_t BeginOffset, EndOffset; + + /// \brief The start end end iterators of this partition. + iterator SI, SJ; + + /// \brief A collection of split slices. + SmallVector SplitSlices; + + /// \brief Raw constructor builds an empty partition starting and ending at + /// the given iterator. + Partition(iterator SI) : SI(SI), SJ(SI) {} + + public: + /// \brief The start offset of this partition. + /// + /// All of the contained slices start at or after this offset. + uint64_t beginOffset() const { return BeginOffset; } + + /// \brief The end offset of this partition. + /// + /// All of the contained slices end at or before this offset. + uint64_t endOffset() const { return EndOffset; } + + /// \brief The size of the partition. + /// + /// Note that this can never be zero. + uint64_t size() const { + assert(BeginOffset < EndOffset && "Partitions must span some bytes!"); + return EndOffset - BeginOffset; + } + + /// \brief Test whether this partition contains no slices, and merely spans + /// a region occupied by split slices. + bool empty() const { return SI == SJ; } + + /// \name Iterate contained slices. + /// All of these slices are fully contained in the partition. They may be + /// splittable or unsplittable. + /// @{ + iterator begin() const { return SI; } + iterator end() const { return SJ; } + /// @} + + /// \brief Get the sequence of split slices. + ArrayRef splitSlices() const { return SplitSlices; } + }; + + /// \brief An iterator over partitions of the alloca's slices. + /// + /// This iterator implements the core algorithm for partitioning the alloca's + /// slices. It is a forward iterator as we don't support backtracking for + /// efficiency reasons, and re-use a single storage area to maintain the + /// current set of split slices. + /// + /// It is templated on the slice iterator type to use so that it can operate + /// with either const or non-const slice iterators. + class partition_iterator + : public iterator_facade_base { + friend class AllocaSlices; + + /// \brief Most of the state for walking the partitions is held in a class + /// with a nice interface for examining them. + Partition P; + + /// \brief We need to keep the end of the slices to know when to stop. + AllocaSlices::iterator SE; + + /// \brief We also need to keep track of the maximum split end offset seen. + /// FIXME: Do we really? + uint64_t MaxSplitSliceEndOffset; + + /// \brief Sets the partition to be empty at given iterator, and sets the + /// end iterator. + partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE) + : P(SI), SE(SE), MaxSplitSliceEndOffset(0) { + // If not already at the end, advance our state to form the initial + // partition. + if (SI != SE) + advance(); + } + + /// \brief Advance the iterator to the next partition. + /// + /// Requires that the iterator not be at the end of the slices. + void advance() { + assert((P.SI != SE || !P.SplitSlices.empty()) && + "Cannot advance past the end of the slices!"); + + // Clear out any split uses which have ended. + if (!P.SplitSlices.empty()) { + if (P.EndOffset >= MaxSplitSliceEndOffset) { + // If we've finished all splits, this is easy. + P.SplitSlices.clear(); + MaxSplitSliceEndOffset = 0; + } else { + // Remove the uses which have ended in the prior partition. This + // cannot change the max split slice end because we just checked that + // the prior partition ended prior to that max. + P.SplitSlices.erase( + std::remove_if( + P.SplitSlices.begin(), P.SplitSlices.end(), + [&](Slice *S) { return S->endOffset() <= P.EndOffset; }), + P.SplitSlices.end()); + assert(std::any_of(P.SplitSlices.begin(), P.SplitSlices.end(), + [&](Slice *S) { + return S->endOffset() == MaxSplitSliceEndOffset; + }) && + "Could not find the current max split slice offset!"); + assert(std::all_of(P.SplitSlices.begin(), P.SplitSlices.end(), + [&](Slice *S) { + return S->endOffset() <= MaxSplitSliceEndOffset; + }) && + "Max split slice end offset is not actually the max!"); + } + } + + // If P.SI is already at the end, then we've cleared the split tail and + // now have an end iterator. + if (P.SI == SE) { + assert(P.SplitSlices.empty() && "Failed to clear the split slices!"); + return; + } + + // If we had a non-empty partition previously, set up the state for + // subsequent partitions. + if (P.SI != P.SJ) { + // Accumulate all the splittable slices which started in the old + // partition into the split list. + for (Slice &S : P) + if (S.isSplittable() && S.endOffset() > P.EndOffset) { + P.SplitSlices.push_back(&S); + MaxSplitSliceEndOffset = + std::max(S.endOffset(), MaxSplitSliceEndOffset); + } + + // Start from the end of the previous partition. + P.SI = P.SJ; + + // If P.SI is now at the end, we at most have a tail of split slices. + if (P.SI == SE) { + P.BeginOffset = P.EndOffset; + P.EndOffset = MaxSplitSliceEndOffset; + return; + } + + // If the we have split slices and the next slice is after a gap and is + // not splittable immediately form an empty partition for the split + // slices up until the next slice begins. + if (!P.SplitSlices.empty() && P.SI->beginOffset() != P.EndOffset && + !P.SI->isSplittable()) { + P.BeginOffset = P.EndOffset; + P.EndOffset = P.SI->beginOffset(); + return; + } + } + + // OK, we need to consume new slices. Set the end offset based on the + // current slice, and step SJ past it. The beginning offset of the + // parttion is the beginning offset of the next slice unless we have + // pre-existing split slices that are continuing, in which case we begin + // at the prior end offset. + P.BeginOffset = P.SplitSlices.empty() ? P.SI->beginOffset() : P.EndOffset; + P.EndOffset = P.SI->endOffset(); + ++P.SJ; + + // There are two strategies to form a partition based on whether the + // partition starts with an unsplittable slice or a splittable slice. + if (!P.SI->isSplittable()) { + // When we're forming an unsplittable region, it must always start at + // the first slice and will extend through its end. + assert(P.BeginOffset == P.SI->beginOffset()); + + // Form a partition including all of the overlapping slices with this + // unsplittable slice. + while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) { + if (!P.SJ->isSplittable()) + P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset()); + ++P.SJ; + } + + // We have a partition across a set of overlapping unsplittable + // partitions. + return; + } + + // If we're starting with a splittable slice, then we need to form + // a synthetic partition spanning it and any other overlapping splittable + // splices. + assert(P.SI->isSplittable() && "Forming a splittable partition!"); + + // Collect all of the overlapping splittable slices. + while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset && + P.SJ->isSplittable()) { + P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset()); + ++P.SJ; + } + + // Back upiP.EndOffset if we ended the span early when encountering an + // unsplittable slice. This synthesizes the early end offset of + // a partition spanning only splittable slices. + if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) { + assert(!P.SJ->isSplittable()); + P.EndOffset = P.SJ->beginOffset(); + } + } + + public: + bool operator==(const partition_iterator &RHS) const { + assert(SE == RHS.SE && + "End iterators don't match between compared partition iterators!"); + + // The observed positions of partitions is marked by the P.SI iterator and + // the emptyness of the split slices. The latter is only relevant when + // P.SI == SE, as the end iterator will additionally have an empty split + // slices list, but the prior may have the same P.SI and a tail of split + // slices. + if (P.SI == RHS.P.SI && + P.SplitSlices.empty() == RHS.P.SplitSlices.empty()) { + assert(P.SJ == RHS.P.SJ && + "Same set of slices formed two different sized partitions!"); + assert(P.SplitSlices.size() == RHS.P.SplitSlices.size() && + "Same slice position with differently sized non-empty split " + "slices sets!"); + return true; + } + return false; + } + + partition_iterator &operator++() { + advance(); + return *this; + } + + Partition &operator*() { return P; } + }; + + /// \brief A forward range over the partitions of the alloca's slices. + /// + /// This accesses an iterator range over the partitions of the alloca's + /// slices. It computes these partitions on the fly based on the overlapping + /// offsets of the slices and the ability to split them. It will visit "empty" + /// partitions to cover regions of the alloca only accessed via split + /// slices. + iterator_range partitions() { + return make_range(partition_iterator(begin(), end()), + partition_iterator(end(), end())); + } + /// \brief Access the dead users for this alloca. ArrayRef getDeadUsers() const { return DeadUsers; } @@ -974,9 +1241,7 @@ private: friend class AllocaSliceRewriter; bool rewritePartition(AllocaInst &AI, AllocaSlices &AS, - AllocaSlices::iterator B, AllocaSlices::iterator E, - int64_t BeginOffset, int64_t EndOffset, - ArrayRef SplitUses); + AllocaSlices::Partition &P); bool splitAlloca(AllocaInst &AI, AllocaSlices &AS); bool runOnAlloca(AllocaInst &AI); void clobberUse(Use &U); @@ -3171,39 +3436,36 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, /// at enabling promotion and if it was successful queues the alloca to be /// promoted. bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, - AllocaSlices::iterator B, AllocaSlices::iterator E, - int64_t BeginOffset, int64_t EndOffset, - ArrayRef SplitUses) { - assert(BeginOffset < EndOffset); - uint64_t SliceSize = EndOffset - BeginOffset; - + AllocaSlices::Partition &P) { // Try to compute a friendly type for this partition of the alloca. This // won't always succeed, in which case we fall back to a legal integer type // or an i8 array of an appropriate size. Type *SliceTy = nullptr; - if (Type *CommonUseTy = findCommonType(B, E, EndOffset)) - if (DL->getTypeAllocSize(CommonUseTy) >= SliceSize) + if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset())) + if (DL->getTypeAllocSize(CommonUseTy) >= P.size()) SliceTy = CommonUseTy; if (!SliceTy) if (Type *TypePartitionTy = getTypePartition(*DL, AI.getAllocatedType(), - BeginOffset, SliceSize)) + P.beginOffset(), P.size())) SliceTy = TypePartitionTy; if ((!SliceTy || (SliceTy->isArrayTy() && SliceTy->getArrayElementType()->isIntegerTy())) && - DL->isLegalInteger(SliceSize * 8)) - SliceTy = Type::getIntNTy(*C, SliceSize * 8); + DL->isLegalInteger(P.size() * 8)) + SliceTy = Type::getIntNTy(*C, P.size() * 8); if (!SliceTy) - SliceTy = ArrayType::get(Type::getInt8Ty(*C), SliceSize); - assert(DL->getTypeAllocSize(SliceTy) >= SliceSize); + SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size()); + assert(DL->getTypeAllocSize(SliceTy) >= P.size()); bool IsIntegerPromotable = isIntegerWideningViable( - *DL, SliceTy, BeginOffset, AllocaSlices::const_range(B, E), SplitUses); + *DL, SliceTy, P.beginOffset(), + AllocaSlices::const_range(P.begin(), P.end()), P.splitSlices()); VectorType *VecTy = IsIntegerPromotable ? nullptr - : isVectorPromotionViable(*DL, BeginOffset, EndOffset, - AllocaSlices::const_range(B, E), SplitUses); + : isVectorPromotionViable( + *DL, P.beginOffset(), P.endOffset(), + AllocaSlices::const_range(P.begin(), P.end()), P.splitSlices()); if (VecTy) SliceTy = VecTy; @@ -3213,7 +3475,8 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // perform phi and select speculation. AllocaInst *NewAI; if (SliceTy == AI.getAllocatedType()) { - assert(BeginOffset == 0 && "Non-zero begin offset but same alloca type"); + assert(P.beginOffset() == 0 && + "Non-zero begin offset but same alloca type"); NewAI = &AI; // FIXME: We should be able to bail at this point with "nothing changed". // FIXME: We might want to defer PHI speculation until after here. @@ -3225,14 +3488,14 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // type. Alignment = DL->getABITypeAlignment(AI.getAllocatedType()); } - Alignment = MinAlign(Alignment, BeginOffset); + Alignment = MinAlign(Alignment, P.beginOffset()); // If we will get at least this much alignment from the type alone, leave // the alloca's alignment unconstrained. if (Alignment <= DL->getABITypeAlignment(SliceTy)) Alignment = 0; - NewAI = - new AllocaInst(SliceTy, nullptr, Alignment, - AI.getName() + ".sroa." + Twine(B - AS.begin()), &AI); + NewAI = new AllocaInst( + SliceTy, nullptr, Alignment, + AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI); ++NumNewAllocas; // Migrate debug information from the old alloca to the new alloca @@ -3247,9 +3510,9 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // with a piece. It would be even better to just compare against the size // of the type described in the debug info, but then we would need to // build an expensive DIRefMap. - if (SliceSize < DL->getTypeAllocSize(AI.getAllocatedType()) || + if (P.size() < DL->getTypeAllocSize(AI.getAllocatedType()) || DIExpression(DbgDecl->getExpression()).isVariablePiece()) - Piece = DIB.createPieceExpression(BeginOffset, SliceSize); + Piece = DIB.createPieceExpression(P.beginOffset(), P.size()); Instruction *NewDDI = DIB.insertDeclare(NewAI, Var, Piece, &AI); NewDDI->setDebugLoc(DbgDecl->getDebugLoc()); DbgDeclares.insert(std::make_pair(NewAI, cast(NewDDI))); @@ -3258,8 +3521,8 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, } DEBUG(dbgs() << "Rewriting alloca partition " - << "[" << BeginOffset << "," << EndOffset << ") to: " << *NewAI - << "\n"); + << "[" << P.beginOffset() << "," << P.endOffset() + << ") to: " << *NewAI << "\n"); // Track the high watermark on the worklist as it is only relevant for // promoted allocas. We will reset it to this point if the alloca is not in @@ -3269,20 +3532,20 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, SmallPtrSet PHIUsers; SmallPtrSet SelectUsers; - AllocaSliceRewriter Rewriter(*DL, AS, *this, AI, *NewAI, BeginOffset, - EndOffset, IsIntegerPromotable, VecTy, PHIUsers, - SelectUsers); + AllocaSliceRewriter Rewriter(*DL, AS, *this, AI, *NewAI, P.beginOffset(), + P.endOffset(), IsIntegerPromotable, VecTy, + PHIUsers, SelectUsers); bool Promotable = true; - for (auto &SplitUse : SplitUses) { + for (Slice *S : P.splitSlices()) { DEBUG(dbgs() << " rewriting split "); - DEBUG(AS.printSlice(dbgs(), SplitUse, "")); - Promotable &= Rewriter.visit(SplitUse); + DEBUG(AS.printSlice(dbgs(), S, "")); + Promotable &= Rewriter.visit(S); ++NumUses; } - for (AllocaSlices::iterator I = B; I != E; ++I) { + for (Slice &S : P) { DEBUG(dbgs() << " rewriting "); - DEBUG(AS.printSlice(dbgs(), I, "")); - Promotable &= Rewriter.visit(I); + DEBUG(AS.printSlice(dbgs(), &S, "")); + Promotable &= Rewriter.visit(&S); ++NumUses; } @@ -3340,32 +3603,6 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, return true; } -static void -removeFinishedSplitUses(SmallVectorImpl &SplitUses, - uint64_t &MaxSplitUseEndOffset, uint64_t Offset) { - if (Offset >= MaxSplitUseEndOffset) { - SplitUses.clear(); - MaxSplitUseEndOffset = 0; - return; - } - - size_t SplitUsesOldSize = SplitUses.size(); - SplitUses.erase(std::remove_if( - SplitUses.begin(), SplitUses.end(), - [Offset](const AllocaSlices::iterator &I) { - return I->endOffset() <= Offset; - }), - SplitUses.end()); - if (SplitUsesOldSize == SplitUses.size()) - return; - - // Recompute the max. While this is linear, so is remove_if. - MaxSplitUseEndOffset = 0; - for (AllocaSlices::iterator SplitUse : SplitUses) - MaxSplitUseEndOffset = - std::max(SplitUse->endOffset(), MaxSplitUseEndOffset); -} - /// \brief Walks the slices of an alloca and form partitions based on them, /// rewriting each of their uses. bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { @@ -3374,102 +3611,11 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { unsigned NumPartitions = 0; bool Changed = false; - SmallVector SplitUses; - uint64_t MaxSplitUseEndOffset = 0; - - uint64_t BeginOffset = AS.begin()->beginOffset(); - - for (AllocaSlices::iterator SI = AS.begin(), SJ = std::next(SI), - SE = AS.end(); - SI != SE; SI = SJ) { - uint64_t MaxEndOffset = SI->endOffset(); - - if (!SI->isSplittable()) { - // When we're forming an unsplittable region, it must always start at the - // first slice and will extend through its end. - assert(BeginOffset == SI->beginOffset()); - - // Form a partition including all of the overlapping slices with this - // unsplittable slice. - while (SJ != SE && SJ->beginOffset() < MaxEndOffset) { - if (!SJ->isSplittable()) - MaxEndOffset = std::max(MaxEndOffset, SJ->endOffset()); - ++SJ; - } - } else { - assert(SI->isSplittable()); // Established above. - - // Collect all of the overlapping splittable slices. - while (SJ != SE && SJ->beginOffset() < MaxEndOffset && - SJ->isSplittable()) { - MaxEndOffset = std::max(MaxEndOffset, SJ->endOffset()); - ++SJ; - } - - // Back up MaxEndOffset and SJ if we ended the span early when - // encountering an unsplittable slice. - if (SJ != SE && SJ->beginOffset() < MaxEndOffset) { - assert(!SJ->isSplittable()); - MaxEndOffset = SJ->beginOffset(); - } - } - - // Check if we have managed to move the end offset forward yet. If so, - // we'll have to rewrite uses and erase old split uses. - if (BeginOffset < MaxEndOffset) { - // Rewrite a sequence of overlapping slices. - Changed |= rewritePartition(AI, AS, SI, SJ, BeginOffset, MaxEndOffset, - SplitUses); - ++NumPartitions; - - removeFinishedSplitUses(SplitUses, MaxSplitUseEndOffset, MaxEndOffset); - } - - // Accumulate all the splittable slices from the [SI,SJ) region which - // overlap going forward. - for (AllocaSlices::iterator SK = SI; SK != SJ; ++SK) - if (SK->isSplittable() && SK->endOffset() > MaxEndOffset) { - SplitUses.push_back(SK); - MaxSplitUseEndOffset = std::max(SK->endOffset(), MaxSplitUseEndOffset); - } - - // If we're already at the end and we have no split uses, we're done. - if (SJ == SE && SplitUses.empty()) - break; - - // If we have no split uses or no gap in offsets, we're ready to move to - // the next slice. - if (SplitUses.empty() || (SJ != SE && MaxEndOffset == SJ->beginOffset())) { - BeginOffset = SJ->beginOffset(); - continue; - } - // Even if we have split slices, if the next slice is splittable and the - // split slices reach it, we can simply set up the beginning offset of the - // next iteration to bridge between them. - if (SJ != SE && SJ->isSplittable() && - MaxSplitUseEndOffset > SJ->beginOffset()) { - BeginOffset = MaxEndOffset; - continue; - } - - // Otherwise, we have a tail of split slices. Rewrite them with an empty - // range of slices. - uint64_t PostSplitEndOffset = - SJ == SE ? MaxSplitUseEndOffset : SJ->beginOffset(); - - Changed |= rewritePartition(AI, AS, SJ, SJ, MaxEndOffset, - PostSplitEndOffset, SplitUses); + // Rewrite each parttion. + for (auto &P : AS.partitions()) { + Changed |= rewritePartition(AI, AS, P); ++NumPartitions; - - if (SJ == SE) - break; // Skip the rest, we don't need to do any cleanup. - - removeFinishedSplitUses(SplitUses, MaxSplitUseEndOffset, - PostSplitEndOffset); - - // Now just reset the begin offset for the next iteration. - BeginOffset = SJ->beginOffset(); } NumAllocaPartitions += NumPartitions; -- 2.7.4