From 90c4a3ae20c1c264e28ee21638f7d7c27c9cb576 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 5 Oct 2012 01:29:06 +0000 Subject: [PATCH] Lift the speculation visitor above all the helpers that are targeted at the rewrite visitor to make the fact that the speculation is completely independent a bit more clear. I promise that this is just a cut/paste of the one visitor and adding the annonymous namespace wrappings. The diff may look completely preposterous, it does in git for some reason. llvm-svn: 165284 --- llvm/lib/Transforms/Scalar/SROA.cpp | 1218 ++++++++++++++++++----------------- 1 file changed, 610 insertions(+), 608 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 54a08e6..eb16e8e 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1368,715 +1368,717 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", false, false) -/// \brief Accumulate the constant offsets in a GEP into a single APInt offset. -/// -/// If the provided GEP is all-constant, the total byte offset formed by the -/// GEP is computed and Offset is set to it. If the GEP has any non-constant -/// operands, the function returns false and the value of Offset is unmodified. -static bool accumulateGEPOffsets(const TargetData &TD, GEPOperator &GEP, - APInt &Offset) { - APInt GEPOffset(Offset.getBitWidth(), 0); - for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); - GTI != GTE; ++GTI) { - ConstantInt *OpC = dyn_cast(GTI.getOperand()); - if (!OpC) - return false; - if (OpC->isZero()) continue; +namespace { +/// \brief Visitor to speculate PHIs and Selects where possible. +class PHIOrSelectSpeculator : public InstVisitor { + // Befriend the base class so it can delegate to private visit methods. + friend class llvm::InstVisitor; - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast(*GTI)) { - unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = TD.getStructLayout(STy); - GEPOffset += APInt(Offset.getBitWidth(), - SL->getElementOffset(ElementIdx)); - continue; - } + const TargetData &TD; + AllocaPartitioning &P; + SROA &Pass; - APInt TypeSize(Offset.getBitWidth(), - TD.getTypeAllocSize(GTI.getIndexedType())); - if (VectorType *VTy = dyn_cast(*GTI)) { - assert((VTy->getScalarSizeInBits() % 8) == 0 && - "vector element size is not a multiple of 8, cannot GEP over it"); - TypeSize = VTy->getScalarSizeInBits() / 8; - } +public: + PHIOrSelectSpeculator(const TargetData &TD, AllocaPartitioning &P, SROA &Pass) + : TD(TD), P(P), Pass(Pass) {} - GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) * TypeSize; + /// \brief Visit the users of an alloca partition and rewrite them. + void visitUsers(AllocaPartitioning::const_iterator PI) { + // Note that we need to use an index here as the underlying vector of uses + // may be grown during speculation. However, we never need to re-visit the + // new uses, and so we can use the initial size bound. + for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) { + const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx); + if (!PU.U) + continue; // Skip dead use. + + visit(cast(PU.U->getUser())); + } } - Offset = GEPOffset; - return true; -} -/// \brief Build a GEP out of a base pointer and indices. -/// -/// This will return the BasePtr if that is valid, or build a new GEP -/// instruction using the IRBuilder if GEP-ing is needed. -static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, - SmallVectorImpl &Indices, - const Twine &Prefix) { - if (Indices.empty()) - return BasePtr; +private: + // By default, skip this instruction. + void visitInstruction(Instruction &I) {} - // A single zero index is a no-op, so check for this and avoid building a GEP - // in that case. - if (Indices.size() == 1 && cast(Indices.back())->isZero()) - return BasePtr; + /// PHI instructions that use an alloca and are subsequently loaded can be + /// rewritten to load both input pointers in the pred blocks and then PHI the + /// results, allowing the load of the alloca to be promoted. + /// From this: + /// %P2 = phi [i32* %Alloca, i32* %Other] + /// %V = load i32* %P2 + /// to: + /// %V1 = load i32* %Alloca -> will be mem2reg'd + /// ... + /// %V2 = load i32* %Other + /// ... + /// %V = phi [i32 %V1, i32 %V2] + /// + /// We can do this to a select if its only uses are loads and if the operands + /// to the select can be loaded unconditionally. + /// + /// FIXME: This should be hoisted into a generic utility, likely in + /// Transforms/Util/Local.h + bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl &Loads) { + // For now, we can only do this promotion if the load is in the same block + // as the PHI, and if there are no stores between the phi and load. + // TODO: Allow recursive phi users. + // TODO: Allow stores. + BasicBlock *BB = PN.getParent(); + unsigned MaxAlign = 0; + for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end(); + UI != UE; ++UI) { + LoadInst *LI = dyn_cast(*UI); + if (LI == 0 || !LI->isSimple()) return false; - return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx"); -} + // For now we only allow loads in the same block as the PHI. This is + // a common case that happens when instcombine merges two loads through + // a PHI. + if (LI->getParent() != BB) return false; -/// \brief Get a natural GEP off of the BasePtr walking through Ty toward -/// TargetTy without changing the offset of the pointer. -/// -/// This routine assumes we've already established a properly offset GEP with -/// Indices, and arrived at the Ty type. The goal is to continue to GEP with -/// zero-indices down through type layers until we find one the same as -/// TargetTy. If we can't find one with the same type, we at least try to use -/// one with the same size. If none of that works, we just produce the GEP as -/// indicated by Indices to have the correct offset. -static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const TargetData &TD, - Value *BasePtr, Type *Ty, Type *TargetTy, - SmallVectorImpl &Indices, - const Twine &Prefix) { - if (Ty == TargetTy) - return buildGEP(IRB, BasePtr, Indices, Prefix); + // Ensure that there are no instructions between the PHI and the load that + // could store. + for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI) + if (BBI->mayWriteToMemory()) + return false; - // See if we can descend into a struct and locate a field with the correct - // type. - unsigned NumLayers = 0; - Type *ElementTy = Ty; - do { - if (ElementTy->isPointerTy()) - break; - if (SequentialType *SeqTy = dyn_cast(ElementTy)) { - ElementTy = SeqTy->getElementType(); - Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(), 0))); - } else if (StructType *STy = dyn_cast(ElementTy)) { - ElementTy = *STy->element_begin(); - Indices.push_back(IRB.getInt32(0)); - } else { - break; + MaxAlign = std::max(MaxAlign, LI->getAlignment()); + Loads.push_back(LI); } - ++NumLayers; - } while (ElementTy != TargetTy); - if (ElementTy != TargetTy) - Indices.erase(Indices.end() - NumLayers, Indices.end()); - return buildGEP(IRB, BasePtr, Indices, Prefix); -} + // We can only transform this if it is safe to push the loads into the + // predecessor blocks. The only thing to watch out for is that we can't put + // a possibly trapping load in the predecessor if it is a critical edge. + for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; + ++Idx) { + TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator(); + Value *InVal = PN.getIncomingValue(Idx); -/// \brief Recursively compute indices for a natural GEP. -/// -/// This is the recursive step for getNaturalGEPWithOffset that walks down the -/// element types adding appropriate indices for the GEP. -static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const TargetData &TD, - Value *Ptr, Type *Ty, APInt &Offset, - Type *TargetTy, - SmallVectorImpl &Indices, - const Twine &Prefix) { - if (Offset == 0) - return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix); + // If the value is produced by the terminator of the predecessor (an + // invoke) or it has side-effects, there is no valid place to put a load + // in the predecessor. + if (TI == InVal || TI->mayHaveSideEffects()) + return false; - // We can't recurse through pointer types. - if (Ty->isPointerTy()) - return 0; + // If the predecessor has a single successor, then the edge isn't + // critical. + if (TI->getNumSuccessors() == 1) + continue; - // We try to analyze GEPs over vectors here, but note that these GEPs are - // extremely poorly defined currently. The long-term goal is to remove GEPing - // over a vector from the IR completely. - if (VectorType *VecTy = dyn_cast(Ty)) { - unsigned ElementSizeInBits = VecTy->getScalarSizeInBits(); - if (ElementSizeInBits % 8) - return 0; // GEPs over non-multiple of 8 size vector elements are invalid. - APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8); - APInt NumSkippedElements = Offset.udiv(ElementSize); - if (NumSkippedElements.ugt(VecTy->getNumElements())) - return 0; - Offset -= NumSkippedElements * ElementSize; - Indices.push_back(IRB.getInt(NumSkippedElements)); - return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(), - Offset, TargetTy, Indices, Prefix); - } + // If this pointer is always safe to load, or if we can prove that there + // is already a load in the block, then we can move the load to the pred + // block. + if (InVal->isDereferenceablePointer() || + isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD)) + continue; - if (ArrayType *ArrTy = dyn_cast(Ty)) { - Type *ElementTy = ArrTy->getElementType(); - APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy)); - APInt NumSkippedElements = Offset.udiv(ElementSize); - if (NumSkippedElements.ugt(ArrTy->getNumElements())) - return 0; + return false; + } - Offset -= NumSkippedElements * ElementSize; - Indices.push_back(IRB.getInt(NumSkippedElements)); - return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); + return true; } - StructType *STy = dyn_cast(Ty); - if (!STy) - return 0; + void visitPHINode(PHINode &PN) { + DEBUG(dbgs() << " original: " << PN << "\n"); - const StructLayout *SL = TD.getStructLayout(STy); - uint64_t StructOffset = Offset.getZExtValue(); - if (StructOffset >= SL->getSizeInBytes()) - return 0; - unsigned Index = SL->getElementContainingOffset(StructOffset); - Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index)); - Type *ElementTy = STy->getElementType(Index); - if (Offset.uge(TD.getTypeAllocSize(ElementTy))) - return 0; // The offset points into alignment padding. + SmallVector Loads; + if (!isSafePHIToSpeculate(PN, Loads)) + return; - Indices.push_back(IRB.getInt32(Index)); - return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); -} + assert(!Loads.empty()); -/// \brief Get a natural GEP from a base pointer to a particular offset and -/// resulting in a particular type. -/// -/// The goal is to produce a "natural" looking GEP that works with the existing -/// composite types to arrive at the appropriate offset and element type for -/// a pointer. TargetTy is the element type the returned GEP should point-to if -/// possible. We recurse by decreasing Offset, adding the appropriate index to -/// Indices, and setting Ty to the result subtype. -/// -/// If no natural GEP can be constructed, this function returns null. -static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const TargetData &TD, - Value *Ptr, APInt Offset, Type *TargetTy, - SmallVectorImpl &Indices, - const Twine &Prefix) { - PointerType *Ty = cast(Ptr->getType()); + Type *LoadTy = cast(PN.getType())->getElementType(); + IRBuilder<> PHIBuilder(&PN); + PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), + PN.getName() + ".sroa.speculated"); - // Don't consider any GEPs through an i8* as natural unless the TargetTy is - // an i8. - if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8)) - return 0; - - Type *ElementTy = Ty->getElementType(); - if (!ElementTy->isSized()) - return 0; // We can't GEP through an unsized element. - APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy)); - if (ElementSize == 0) - return 0; // Zero-length arrays can't help us build a natural GEP. - APInt NumSkippedElements = Offset.udiv(ElementSize); - - Offset -= NumSkippedElements * ElementSize; - Indices.push_back(IRB.getInt(NumSkippedElements)); - return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); -} + // Get the TBAA tag and alignment to use from one of the loads. It doesn't + // matter which one we get and if any differ, it doesn't matter. + LoadInst *SomeLoad = cast(Loads.back()); + MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); + unsigned Align = SomeLoad->getAlignment(); -/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the -/// resulting pointer has PointerTy. -/// -/// This tries very hard to compute a "natural" GEP which arrives at the offset -/// and produces the pointer type desired. Where it cannot, it will try to use -/// the natural GEP to arrive at the offset and bitcast to the type. Where that -/// fails, it will try to use an existing i8* and GEP to the byte offset and -/// bitcast to the type. -/// -/// The strategy for finding the more natural GEPs is to peel off layers of the -/// pointer, walking back through bit casts and GEPs, searching for a base -/// pointer from which we can compute a natural GEP with the desired -/// properities. The algorithm tries to fold as many constant indices into -/// a single GEP as possible, thus making each GEP more independent of the -/// surrounding code. -static Value *getAdjustedPtr(IRBuilder<> &IRB, const TargetData &TD, - Value *Ptr, APInt Offset, Type *PointerTy, - const Twine &Prefix) { - // Even though we don't look through PHI nodes, we could be called on an - // instruction in an unreachable block, which may be on a cycle. - SmallPtrSet Visited; - Visited.insert(Ptr); - SmallVector Indices; + // Rewrite all loads of the PN to use the new PHI. + do { + LoadInst *LI = Loads.pop_back_val(); + LI->replaceAllUsesWith(NewPN); + Pass.DeadInsts.push_back(LI); + } while (!Loads.empty()); - // We may end up computing an offset pointer that has the wrong type. If we - // never are able to compute one directly that has the correct type, we'll - // fall back to it, so keep it around here. - Value *OffsetPtr = 0; + // Inject loads into all of the pred blocks. + for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { + BasicBlock *Pred = PN.getIncomingBlock(Idx); + TerminatorInst *TI = Pred->getTerminator(); + Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx)); + Value *InVal = PN.getIncomingValue(Idx); + IRBuilder<> PredBuilder(TI); - // Remember any i8 pointer we come across to re-use if we need to do a raw - // byte offset. - Value *Int8Ptr = 0; - APInt Int8PtrOffset(Offset.getBitWidth(), 0); + LoadInst *Load + = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." + + Pred->getName())); + ++NumLoadsSpeculated; + Load->setAlignment(Align); + if (TBAATag) + Load->setMetadata(LLVMContext::MD_tbaa, TBAATag); + NewPN->addIncoming(Load, Pred); - Type *TargetTy = PointerTy->getPointerElementType(); + Instruction *Ptr = dyn_cast(InVal); + if (!Ptr) + // No uses to rewrite. + continue; - do { - // First fold any existing GEPs into the offset. - while (GEPOperator *GEP = dyn_cast(Ptr)) { - APInt GEPOffset(Offset.getBitWidth(), 0); - if (!accumulateGEPOffsets(TD, *GEP, GEPOffset)) - break; - Offset += GEPOffset; - Ptr = GEP->getPointerOperand(); - if (!Visited.insert(Ptr)) - break; - } + // Try to lookup and rewrite any partition uses corresponding to this phi + // input. + AllocaPartitioning::iterator PI + = P.findPartitionForPHIOrSelectOperand(InUse); + if (PI == P.end()) + continue; - // See if we can perform a natural GEP here. - Indices.clear(); - if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy, - Indices, Prefix)) { - if (P->getType() == PointerTy) { - // Zap any offset pointer that we ended up computing in previous rounds. - if (OffsetPtr && OffsetPtr->use_empty()) - if (Instruction *I = dyn_cast(OffsetPtr)) - I->eraseFromParent(); - return P; - } - if (!OffsetPtr) { - OffsetPtr = P; - } + // Replace the Use in the PartitionUse for this operand with the Use + // inside the load. + AllocaPartitioning::use_iterator UI + = P.findPartitionUseForPHIOrSelectOperand(InUse); + assert(isa(*UI->U->getUser())); + UI->U = &Load->getOperandUse(Load->getPointerOperandIndex()); } + DEBUG(dbgs() << " speculated to: " << *NewPN << "\n"); + } - // Stash this pointer if we've found an i8*. - if (Ptr->getType()->isIntegerTy(8)) { - Int8Ptr = Ptr; - Int8PtrOffset = Offset; - } + /// Select instructions that use an alloca and are subsequently loaded can be + /// rewritten to load both input pointers and then select between the result, + /// allowing the load of the alloca to be promoted. + /// From this: + /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other + /// %V = load i32* %P2 + /// to: + /// %V1 = load i32* %Alloca -> will be mem2reg'd + /// %V2 = load i32* %Other + /// %V = select i1 %cond, i32 %V1, i32 %V2 + /// + /// We can do this to a select if its only uses are loads and if the operand + /// to the select can be loaded unconditionally. + bool isSafeSelectToSpeculate(SelectInst &SI, + SmallVectorImpl &Loads) { + Value *TValue = SI.getTrueValue(); + Value *FValue = SI.getFalseValue(); + bool TDerefable = TValue->isDereferenceablePointer(); + bool FDerefable = FValue->isDereferenceablePointer(); - // Peel off a layer of the pointer and update the offset appropriately. - if (Operator::getOpcode(Ptr) == Instruction::BitCast) { - Ptr = cast(Ptr)->getOperand(0); - } else if (GlobalAlias *GA = dyn_cast(Ptr)) { - if (GA->mayBeOverridden()) - break; - Ptr = GA->getAliasee(); - } else { - break; - } - assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!"); - } while (Visited.insert(Ptr)); + for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end(); + UI != UE; ++UI) { + LoadInst *LI = dyn_cast(*UI); + if (LI == 0 || !LI->isSimple()) return false; - if (!OffsetPtr) { - if (!Int8Ptr) { - Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(), - Prefix + ".raw_cast"); - Int8PtrOffset = Offset; + // Both operands to the select need to be dereferencable, either + // absolutely (e.g. allocas) or at this point because we can see other + // accesses to it. + if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI, + LI->getAlignment(), &TD)) + return false; + if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI, + LI->getAlignment(), &TD)) + return false; + Loads.push_back(LI); } - OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr : - IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset), - Prefix + ".raw_idx"); + return true; } - Ptr = OffsetPtr; - // On the off chance we were targeting i8*, guard the bitcast here. - if (Ptr->getType() != PointerTy) - Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast"); + void visitSelectInst(SelectInst &SI) { + DEBUG(dbgs() << " original: " << SI << "\n"); + IRBuilder<> IRB(&SI); - return Ptr; -} + // If the select isn't safe to speculate, just use simple logic to emit it. + SmallVector Loads; + if (!isSafeSelectToSpeculate(SI, Loads)) + return; -/// \brief Test whether the given alloca partition can be promoted to a vector. -/// -/// This is a quick test to check whether we can rewrite a particular alloca -/// partition (and its newly formed alloca) into a vector alloca with only -/// whole-vector loads and stores such that it could be promoted to a vector -/// SSA value. We only can ensure this for a limited set of operations, and we -/// don't want to do the rewrites unless we are confident that the result will -/// be promotable, so we have an early test here. -static bool isVectorPromotionViable(const TargetData &TD, - Type *AllocaTy, - AllocaPartitioning &P, - uint64_t PartitionBeginOffset, - uint64_t PartitionEndOffset, - AllocaPartitioning::const_use_iterator I, - AllocaPartitioning::const_use_iterator E) { - VectorType *Ty = dyn_cast(AllocaTy); - if (!Ty) - return false; + Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) }; + AllocaPartitioning::iterator PIs[2]; + AllocaPartitioning::PartitionUse PUs[2]; + for (unsigned i = 0, e = 2; i != e; ++i) { + PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]); + if (PIs[i] != P.end()) { + // If the pointer is within the partitioning, remove the select from + // its uses. We'll add in the new loads below. + AllocaPartitioning::use_iterator UI + = P.findPartitionUseForPHIOrSelectOperand(Ops[i]); + PUs[i] = *UI; + // Clear out the use here so that the offsets into the use list remain + // stable but this use is ignored when rewriting. + UI->U = 0; + } + } - uint64_t VecSize = TD.getTypeSizeInBits(Ty); - uint64_t ElementSize = Ty->getScalarSizeInBits(); + Value *TV = SI.getTrueValue(); + Value *FV = SI.getFalseValue(); + // Replace the loads of the select with a select of two loads. + while (!Loads.empty()) { + LoadInst *LI = Loads.pop_back_val(); - // While the definition of LLVM vectors is bitpacked, we don't support sizes - // that aren't byte sized. - if (ElementSize % 8) - return false; - assert((VecSize % 8) == 0 && "vector size not a multiple of element size?"); - VecSize /= 8; - ElementSize /= 8; + IRB.SetInsertPoint(LI); + LoadInst *TL = + IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true"); + LoadInst *FL = + IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false"); + NumLoadsSpeculated += 2; - for (; I != E; ++I) { - if (!I->U) - continue; // Skip dead use. + // Transfer alignment and TBAA info if present. + TL->setAlignment(LI->getAlignment()); + FL->setAlignment(LI->getAlignment()); + if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { + TL->setMetadata(LLVMContext::MD_tbaa, Tag); + FL->setMetadata(LLVMContext::MD_tbaa, Tag); + } - uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset; - uint64_t BeginIndex = BeginOffset / ElementSize; - if (BeginIndex * ElementSize != BeginOffset || - BeginIndex >= Ty->getNumElements()) - return false; - uint64_t EndOffset = I->EndOffset - PartitionBeginOffset; - uint64_t EndIndex = EndOffset / ElementSize; - if (EndIndex * ElementSize != EndOffset || - EndIndex > Ty->getNumElements()) - return false; - - // FIXME: We should build shuffle vector instructions to handle - // non-element-sized accesses. - if ((EndOffset - BeginOffset) != ElementSize && - (EndOffset - BeginOffset) != VecSize) - return false; + Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL, + LI->getName() + ".sroa.speculated"); - if (MemIntrinsic *MI = dyn_cast(I->U->getUser())) { - if (MI->isVolatile()) - return false; - if (MemTransferInst *MTI = dyn_cast(I->U->getUser())) { - const AllocaPartitioning::MemTransferOffsets &MTO - = P.getMemTransferOffsets(*MTI); - if (!MTO.IsSplittable) - return false; + LoadInst *Loads[2] = { TL, FL }; + for (unsigned i = 0, e = 2; i != e; ++i) { + if (PIs[i] != P.end()) { + Use *LoadUse = &Loads[i]->getOperandUse(0); + assert(PUs[i].U->get() == LoadUse->get()); + PUs[i].U = LoadUse; + P.use_push_back(PIs[i], PUs[i]); + } } - } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) { - // Disable vector promotion when there are loads or stores of an FCA. - return false; - } else if (!isa(I->U->getUser()) && - !isa(I->U->getUser())) { - return false; + + DEBUG(dbgs() << " speculated to: " << *V << "\n"); + LI->replaceAllUsesWith(V); + Pass.DeadInsts.push_back(LI); } } - return true; +}; } -/// \brief Test whether the given alloca partition can be promoted to an int. +/// \brief Accumulate the constant offsets in a GEP into a single APInt offset. /// -/// This is a quick test to check whether we can rewrite a particular alloca -/// partition (and its newly formed alloca) into an integer alloca suitable for -/// promotion to an SSA value. We only can ensure this for a limited set of -/// operations, and we don't want to do the rewrites unless we are confident -/// that the result will be promotable, so we have an early test here. -static bool isIntegerPromotionViable(const TargetData &TD, - Type *AllocaTy, - uint64_t AllocBeginOffset, - AllocaPartitioning &P, - AllocaPartitioning::const_use_iterator I, - AllocaPartitioning::const_use_iterator E) { - IntegerType *Ty = dyn_cast(AllocaTy); - if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth()) - return false; - - // Check the uses to ensure the uses are (likely) promoteable integer uses. - // Also ensure that the alloca has a covering load or store. We don't want - // promote because of some other unsplittable entry (which we may make - // splittable later) and lose the ability to promote each element access. - bool WholeAllocaOp = false; - for (; I != E; ++I) { - if (!I->U) - continue; // Skip dead use. - - // We can't reasonably handle cases where the load or store extends past - // the end of the aloca's type and into its padding. - if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty)) +/// If the provided GEP is all-constant, the total byte offset formed by the +/// GEP is computed and Offset is set to it. If the GEP has any non-constant +/// operands, the function returns false and the value of Offset is unmodified. +static bool accumulateGEPOffsets(const TargetData &TD, GEPOperator &GEP, + APInt &Offset) { + APInt GEPOffset(Offset.getBitWidth(), 0); + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); + GTI != GTE; ++GTI) { + ConstantInt *OpC = dyn_cast(GTI.getOperand()); + if (!OpC) return false; + if (OpC->isZero()) continue; - if (LoadInst *LI = dyn_cast(I->U->getUser())) { - if (LI->isVolatile() || !LI->getType()->isIntegerTy()) - return false; - if (LI->getType() == Ty) - WholeAllocaOp = true; - } else if (StoreInst *SI = dyn_cast(I->U->getUser())) { - if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy()) - return false; - if (SI->getValueOperand()->getType() == Ty) - WholeAllocaOp = true; - } else if (MemIntrinsic *MI = dyn_cast(I->U->getUser())) { - if (MI->isVolatile()) - return false; - if (MemTransferInst *MTI = dyn_cast(I->U->getUser())) { - const AllocaPartitioning::MemTransferOffsets &MTO - = P.getMemTransferOffsets(*MTI); - if (!MTO.IsSplittable) - return false; - } - } else { - return false; + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast(*GTI)) { + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = TD.getStructLayout(STy); + GEPOffset += APInt(Offset.getBitWidth(), + SL->getElementOffset(ElementIdx)); + continue; } + + APInt TypeSize(Offset.getBitWidth(), + TD.getTypeAllocSize(GTI.getIndexedType())); + if (VectorType *VTy = dyn_cast(*GTI)) { + assert((VTy->getScalarSizeInBits() % 8) == 0 && + "vector element size is not a multiple of 8, cannot GEP over it"); + TypeSize = VTy->getScalarSizeInBits() / 8; + } + + GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) * TypeSize; } - return WholeAllocaOp; + Offset = GEPOffset; + return true; } -namespace { -/// \brief Visitor to speculate PHIs and Selects where possible. -class PHIOrSelectSpeculator : public InstVisitor { - // Befriend the base class so it can delegate to private visit methods. - friend class llvm::InstVisitor; +/// \brief Build a GEP out of a base pointer and indices. +/// +/// This will return the BasePtr if that is valid, or build a new GEP +/// instruction using the IRBuilder if GEP-ing is needed. +static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, + SmallVectorImpl &Indices, + const Twine &Prefix) { + if (Indices.empty()) + return BasePtr; - const TargetData &TD; - AllocaPartitioning &P; - SROA &Pass; + // A single zero index is a no-op, so check for this and avoid building a GEP + // in that case. + if (Indices.size() == 1 && cast(Indices.back())->isZero()) + return BasePtr; -public: - PHIOrSelectSpeculator(const TargetData &TD, AllocaPartitioning &P, SROA &Pass) - : TD(TD), P(P), Pass(Pass) {} + return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx"); +} - /// \brief Visit the users of an alloca partition and rewrite them. - void visitUsers(AllocaPartitioning::const_iterator PI) { - // Note that we need to use an index here as the underlying vector of uses - // may be grown during speculation. However, we never need to re-visit the - // new uses, and so we can use the initial size bound. - for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) { - const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx); - if (!PU.U) - continue; // Skip dead use. +/// \brief Get a natural GEP off of the BasePtr walking through Ty toward +/// TargetTy without changing the offset of the pointer. +/// +/// This routine assumes we've already established a properly offset GEP with +/// Indices, and arrived at the Ty type. The goal is to continue to GEP with +/// zero-indices down through type layers until we find one the same as +/// TargetTy. If we can't find one with the same type, we at least try to use +/// one with the same size. If none of that works, we just produce the GEP as +/// indicated by Indices to have the correct offset. +static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const TargetData &TD, + Value *BasePtr, Type *Ty, Type *TargetTy, + SmallVectorImpl &Indices, + const Twine &Prefix) { + if (Ty == TargetTy) + return buildGEP(IRB, BasePtr, Indices, Prefix); - visit(cast(PU.U->getUser())); + // See if we can descend into a struct and locate a field with the correct + // type. + unsigned NumLayers = 0; + Type *ElementTy = Ty; + do { + if (ElementTy->isPointerTy()) + break; + if (SequentialType *SeqTy = dyn_cast(ElementTy)) { + ElementTy = SeqTy->getElementType(); + Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(), 0))); + } else if (StructType *STy = dyn_cast(ElementTy)) { + ElementTy = *STy->element_begin(); + Indices.push_back(IRB.getInt32(0)); + } else { + break; } - } + ++NumLayers; + } while (ElementTy != TargetTy); + if (ElementTy != TargetTy) + Indices.erase(Indices.end() - NumLayers, Indices.end()); -private: - // By default, skip this instruction. - void visitInstruction(Instruction &I) {} + return buildGEP(IRB, BasePtr, Indices, Prefix); +} - /// PHI instructions that use an alloca and are subsequently loaded can be - /// rewritten to load both input pointers in the pred blocks and then PHI the - /// results, allowing the load of the alloca to be promoted. - /// From this: - /// %P2 = phi [i32* %Alloca, i32* %Other] - /// %V = load i32* %P2 - /// to: - /// %V1 = load i32* %Alloca -> will be mem2reg'd - /// ... - /// %V2 = load i32* %Other - /// ... - /// %V = phi [i32 %V1, i32 %V2] - /// - /// We can do this to a select if its only uses are loads and if the operands - /// to the select can be loaded unconditionally. - /// - /// FIXME: This should be hoisted into a generic utility, likely in - /// Transforms/Util/Local.h - bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl &Loads) { - // For now, we can only do this promotion if the load is in the same block - // as the PHI, and if there are no stores between the phi and load. - // TODO: Allow recursive phi users. - // TODO: Allow stores. - BasicBlock *BB = PN.getParent(); - unsigned MaxAlign = 0; - for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end(); - UI != UE; ++UI) { - LoadInst *LI = dyn_cast(*UI); - if (LI == 0 || !LI->isSimple()) return false; +/// \brief Recursively compute indices for a natural GEP. +/// +/// This is the recursive step for getNaturalGEPWithOffset that walks down the +/// element types adding appropriate indices for the GEP. +static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const TargetData &TD, + Value *Ptr, Type *Ty, APInt &Offset, + Type *TargetTy, + SmallVectorImpl &Indices, + const Twine &Prefix) { + if (Offset == 0) + return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix); - // For now we only allow loads in the same block as the PHI. This is - // a common case that happens when instcombine merges two loads through - // a PHI. - if (LI->getParent() != BB) return false; + // We can't recurse through pointer types. + if (Ty->isPointerTy()) + return 0; - // Ensure that there are no instructions between the PHI and the load that - // could store. - for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI) - if (BBI->mayWriteToMemory()) - return false; + // We try to analyze GEPs over vectors here, but note that these GEPs are + // extremely poorly defined currently. The long-term goal is to remove GEPing + // over a vector from the IR completely. + if (VectorType *VecTy = dyn_cast(Ty)) { + unsigned ElementSizeInBits = VecTy->getScalarSizeInBits(); + if (ElementSizeInBits % 8) + return 0; // GEPs over non-multiple of 8 size vector elements are invalid. + APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8); + APInt NumSkippedElements = Offset.udiv(ElementSize); + if (NumSkippedElements.ugt(VecTy->getNumElements())) + return 0; + Offset -= NumSkippedElements * ElementSize; + Indices.push_back(IRB.getInt(NumSkippedElements)); + return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(), + Offset, TargetTy, Indices, Prefix); + } - MaxAlign = std::max(MaxAlign, LI->getAlignment()); - Loads.push_back(LI); - } + if (ArrayType *ArrTy = dyn_cast(Ty)) { + Type *ElementTy = ArrTy->getElementType(); + APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy)); + APInt NumSkippedElements = Offset.udiv(ElementSize); + if (NumSkippedElements.ugt(ArrTy->getNumElements())) + return 0; - // We can only transform this if it is safe to push the loads into the - // predecessor blocks. The only thing to watch out for is that we can't put - // a possibly trapping load in the predecessor if it is a critical edge. - for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; - ++Idx) { - TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator(); - Value *InVal = PN.getIncomingValue(Idx); + Offset -= NumSkippedElements * ElementSize; + Indices.push_back(IRB.getInt(NumSkippedElements)); + return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, + Indices, Prefix); + } - // If the value is produced by the terminator of the predecessor (an - // invoke) or it has side-effects, there is no valid place to put a load - // in the predecessor. - if (TI == InVal || TI->mayHaveSideEffects()) - return false; + StructType *STy = dyn_cast(Ty); + if (!STy) + return 0; - // If the predecessor has a single successor, then the edge isn't - // critical. - if (TI->getNumSuccessors() == 1) - continue; + const StructLayout *SL = TD.getStructLayout(STy); + uint64_t StructOffset = Offset.getZExtValue(); + if (StructOffset >= SL->getSizeInBytes()) + return 0; + unsigned Index = SL->getElementContainingOffset(StructOffset); + Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index)); + Type *ElementTy = STy->getElementType(Index); + if (Offset.uge(TD.getTypeAllocSize(ElementTy))) + return 0; // The offset points into alignment padding. - // If this pointer is always safe to load, or if we can prove that there - // is already a load in the block, then we can move the load to the pred - // block. - if (InVal->isDereferenceablePointer() || - isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD)) - continue; + Indices.push_back(IRB.getInt32(Index)); + return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, + Indices, Prefix); +} - return false; - } +/// \brief Get a natural GEP from a base pointer to a particular offset and +/// resulting in a particular type. +/// +/// The goal is to produce a "natural" looking GEP that works with the existing +/// composite types to arrive at the appropriate offset and element type for +/// a pointer. TargetTy is the element type the returned GEP should point-to if +/// possible. We recurse by decreasing Offset, adding the appropriate index to +/// Indices, and setting Ty to the result subtype. +/// +/// If no natural GEP can be constructed, this function returns null. +static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const TargetData &TD, + Value *Ptr, APInt Offset, Type *TargetTy, + SmallVectorImpl &Indices, + const Twine &Prefix) { + PointerType *Ty = cast(Ptr->getType()); - return true; - } + // Don't consider any GEPs through an i8* as natural unless the TargetTy is + // an i8. + if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8)) + return 0; - void visitPHINode(PHINode &PN) { - DEBUG(dbgs() << " original: " << PN << "\n"); + Type *ElementTy = Ty->getElementType(); + if (!ElementTy->isSized()) + return 0; // We can't GEP through an unsized element. + APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy)); + if (ElementSize == 0) + return 0; // Zero-length arrays can't help us build a natural GEP. + APInt NumSkippedElements = Offset.udiv(ElementSize); - SmallVector Loads; - if (!isSafePHIToSpeculate(PN, Loads)) - return; + Offset -= NumSkippedElements * ElementSize; + Indices.push_back(IRB.getInt(NumSkippedElements)); + return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, + Indices, Prefix); +} - assert(!Loads.empty()); +/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the +/// resulting pointer has PointerTy. +/// +/// This tries very hard to compute a "natural" GEP which arrives at the offset +/// and produces the pointer type desired. Where it cannot, it will try to use +/// the natural GEP to arrive at the offset and bitcast to the type. Where that +/// fails, it will try to use an existing i8* and GEP to the byte offset and +/// bitcast to the type. +/// +/// The strategy for finding the more natural GEPs is to peel off layers of the +/// pointer, walking back through bit casts and GEPs, searching for a base +/// pointer from which we can compute a natural GEP with the desired +/// properities. The algorithm tries to fold as many constant indices into +/// a single GEP as possible, thus making each GEP more independent of the +/// surrounding code. +static Value *getAdjustedPtr(IRBuilder<> &IRB, const TargetData &TD, + Value *Ptr, APInt Offset, Type *PointerTy, + const Twine &Prefix) { + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet Visited; + Visited.insert(Ptr); + SmallVector Indices; - Type *LoadTy = cast(PN.getType())->getElementType(); - IRBuilder<> PHIBuilder(&PN); - PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), - PN.getName() + ".sroa.speculated"); + // We may end up computing an offset pointer that has the wrong type. If we + // never are able to compute one directly that has the correct type, we'll + // fall back to it, so keep it around here. + Value *OffsetPtr = 0; - // Get the TBAA tag and alignment to use from one of the loads. It doesn't - // matter which one we get and if any differ, it doesn't matter. - LoadInst *SomeLoad = cast(Loads.back()); - MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); - unsigned Align = SomeLoad->getAlignment(); + // Remember any i8 pointer we come across to re-use if we need to do a raw + // byte offset. + Value *Int8Ptr = 0; + APInt Int8PtrOffset(Offset.getBitWidth(), 0); - // Rewrite all loads of the PN to use the new PHI. - do { - LoadInst *LI = Loads.pop_back_val(); - LI->replaceAllUsesWith(NewPN); - Pass.DeadInsts.push_back(LI); - } while (!Loads.empty()); + Type *TargetTy = PointerTy->getPointerElementType(); - // Inject loads into all of the pred blocks. - for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { - BasicBlock *Pred = PN.getIncomingBlock(Idx); - TerminatorInst *TI = Pred->getTerminator(); - Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx)); - Value *InVal = PN.getIncomingValue(Idx); - IRBuilder<> PredBuilder(TI); + do { + // First fold any existing GEPs into the offset. + while (GEPOperator *GEP = dyn_cast(Ptr)) { + APInt GEPOffset(Offset.getBitWidth(), 0); + if (!accumulateGEPOffsets(TD, *GEP, GEPOffset)) + break; + Offset += GEPOffset; + Ptr = GEP->getPointerOperand(); + if (!Visited.insert(Ptr)) + break; + } - LoadInst *Load - = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." + - Pred->getName())); - ++NumLoadsSpeculated; - Load->setAlignment(Align); - if (TBAATag) - Load->setMetadata(LLVMContext::MD_tbaa, TBAATag); - NewPN->addIncoming(Load, Pred); + // See if we can perform a natural GEP here. + Indices.clear(); + if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy, + Indices, Prefix)) { + if (P->getType() == PointerTy) { + // Zap any offset pointer that we ended up computing in previous rounds. + if (OffsetPtr && OffsetPtr->use_empty()) + if (Instruction *I = dyn_cast(OffsetPtr)) + I->eraseFromParent(); + return P; + } + if (!OffsetPtr) { + OffsetPtr = P; + } + } - Instruction *Ptr = dyn_cast(InVal); - if (!Ptr) - // No uses to rewrite. - continue; + // Stash this pointer if we've found an i8*. + if (Ptr->getType()->isIntegerTy(8)) { + Int8Ptr = Ptr; + Int8PtrOffset = Offset; + } - // Try to lookup and rewrite any partition uses corresponding to this phi - // input. - AllocaPartitioning::iterator PI - = P.findPartitionForPHIOrSelectOperand(InUse); - if (PI == P.end()) - continue; + // Peel off a layer of the pointer and update the offset appropriately. + if (Operator::getOpcode(Ptr) == Instruction::BitCast) { + Ptr = cast(Ptr)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast(Ptr)) { + if (GA->mayBeOverridden()) + break; + Ptr = GA->getAliasee(); + } else { + break; + } + assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!"); + } while (Visited.insert(Ptr)); - // Replace the Use in the PartitionUse for this operand with the Use - // inside the load. - AllocaPartitioning::use_iterator UI - = P.findPartitionUseForPHIOrSelectOperand(InUse); - assert(isa(*UI->U->getUser())); - UI->U = &Load->getOperandUse(Load->getPointerOperandIndex()); + if (!OffsetPtr) { + if (!Int8Ptr) { + Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(), + Prefix + ".raw_cast"); + Int8PtrOffset = Offset; } - DEBUG(dbgs() << " speculated to: " << *NewPN << "\n"); + + OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr : + IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset), + Prefix + ".raw_idx"); } + Ptr = OffsetPtr; - /// Select instructions that use an alloca and are subsequently loaded can be - /// rewritten to load both input pointers and then select between the result, - /// allowing the load of the alloca to be promoted. - /// From this: - /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other - /// %V = load i32* %P2 - /// to: - /// %V1 = load i32* %Alloca -> will be mem2reg'd - /// %V2 = load i32* %Other - /// %V = select i1 %cond, i32 %V1, i32 %V2 - /// - /// We can do this to a select if its only uses are loads and if the operand - /// to the select can be loaded unconditionally. - bool isSafeSelectToSpeculate(SelectInst &SI, - SmallVectorImpl &Loads) { - Value *TValue = SI.getTrueValue(); - Value *FValue = SI.getFalseValue(); - bool TDerefable = TValue->isDereferenceablePointer(); - bool FDerefable = FValue->isDereferenceablePointer(); + // On the off chance we were targeting i8*, guard the bitcast here. + if (Ptr->getType() != PointerTy) + Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast"); - for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end(); - UI != UE; ++UI) { - LoadInst *LI = dyn_cast(*UI); - if (LI == 0 || !LI->isSimple()) return false; + return Ptr; +} - // Both operands to the select need to be dereferencable, either - // absolutely (e.g. allocas) or at this point because we can see other - // accesses to it. - if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI, - LI->getAlignment(), &TD)) - return false; - if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI, - LI->getAlignment(), &TD)) - return false; - Loads.push_back(LI); - } +/// \brief Test whether the given alloca partition can be promoted to a vector. +/// +/// This is a quick test to check whether we can rewrite a particular alloca +/// partition (and its newly formed alloca) into a vector alloca with only +/// whole-vector loads and stores such that it could be promoted to a vector +/// SSA value. We only can ensure this for a limited set of operations, and we +/// don't want to do the rewrites unless we are confident that the result will +/// be promotable, so we have an early test here. +static bool isVectorPromotionViable(const TargetData &TD, + Type *AllocaTy, + AllocaPartitioning &P, + uint64_t PartitionBeginOffset, + uint64_t PartitionEndOffset, + AllocaPartitioning::const_use_iterator I, + AllocaPartitioning::const_use_iterator E) { + VectorType *Ty = dyn_cast(AllocaTy); + if (!Ty) + return false; - return true; - } + uint64_t VecSize = TD.getTypeSizeInBits(Ty); + uint64_t ElementSize = Ty->getScalarSizeInBits(); - void visitSelectInst(SelectInst &SI) { - DEBUG(dbgs() << " original: " << SI << "\n"); - IRBuilder<> IRB(&SI); + // While the definition of LLVM vectors is bitpacked, we don't support sizes + // that aren't byte sized. + if (ElementSize % 8) + return false; + assert((VecSize % 8) == 0 && "vector size not a multiple of element size?"); + VecSize /= 8; + ElementSize /= 8; - // If the select isn't safe to speculate, just use simple logic to emit it. - SmallVector Loads; - if (!isSafeSelectToSpeculate(SI, Loads)) - return; + for (; I != E; ++I) { + if (!I->U) + continue; // Skip dead use. - Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) }; - AllocaPartitioning::iterator PIs[2]; - AllocaPartitioning::PartitionUse PUs[2]; - for (unsigned i = 0, e = 2; i != e; ++i) { - PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]); - if (PIs[i] != P.end()) { - // If the pointer is within the partitioning, remove the select from - // its uses. We'll add in the new loads below. - AllocaPartitioning::use_iterator UI - = P.findPartitionUseForPHIOrSelectOperand(Ops[i]); - PUs[i] = *UI; - // Clear out the use here so that the offsets into the use list remain - // stable but this use is ignored when rewriting. - UI->U = 0; + uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset; + uint64_t BeginIndex = BeginOffset / ElementSize; + if (BeginIndex * ElementSize != BeginOffset || + BeginIndex >= Ty->getNumElements()) + return false; + uint64_t EndOffset = I->EndOffset - PartitionBeginOffset; + uint64_t EndIndex = EndOffset / ElementSize; + if (EndIndex * ElementSize != EndOffset || + EndIndex > Ty->getNumElements()) + return false; + + // FIXME: We should build shuffle vector instructions to handle + // non-element-sized accesses. + if ((EndOffset - BeginOffset) != ElementSize && + (EndOffset - BeginOffset) != VecSize) + return false; + + if (MemIntrinsic *MI = dyn_cast(I->U->getUser())) { + if (MI->isVolatile()) + return false; + if (MemTransferInst *MTI = dyn_cast(I->U->getUser())) { + const AllocaPartitioning::MemTransferOffsets &MTO + = P.getMemTransferOffsets(*MTI); + if (!MTO.IsSplittable) + return false; } + } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) { + // Disable vector promotion when there are loads or stores of an FCA. + return false; + } else if (!isa(I->U->getUser()) && + !isa(I->U->getUser())) { + return false; } + } + return true; +} - Value *TV = SI.getTrueValue(); - Value *FV = SI.getFalseValue(); - // Replace the loads of the select with a select of two loads. - while (!Loads.empty()) { - LoadInst *LI = Loads.pop_back_val(); - - IRB.SetInsertPoint(LI); - LoadInst *TL = - IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true"); - LoadInst *FL = - IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false"); - NumLoadsSpeculated += 2; +/// \brief Test whether the given alloca partition can be promoted to an int. +/// +/// This is a quick test to check whether we can rewrite a particular alloca +/// partition (and its newly formed alloca) into an integer alloca suitable for +/// promotion to an SSA value. We only can ensure this for a limited set of +/// operations, and we don't want to do the rewrites unless we are confident +/// that the result will be promotable, so we have an early test here. +static bool isIntegerPromotionViable(const TargetData &TD, + Type *AllocaTy, + uint64_t AllocBeginOffset, + AllocaPartitioning &P, + AllocaPartitioning::const_use_iterator I, + AllocaPartitioning::const_use_iterator E) { + IntegerType *Ty = dyn_cast(AllocaTy); + if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth()) + return false; - // Transfer alignment and TBAA info if present. - TL->setAlignment(LI->getAlignment()); - FL->setAlignment(LI->getAlignment()); - if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { - TL->setMetadata(LLVMContext::MD_tbaa, Tag); - FL->setMetadata(LLVMContext::MD_tbaa, Tag); - } + // Check the uses to ensure the uses are (likely) promoteable integer uses. + // Also ensure that the alloca has a covering load or store. We don't want + // promote because of some other unsplittable entry (which we may make + // splittable later) and lose the ability to promote each element access. + bool WholeAllocaOp = false; + for (; I != E; ++I) { + if (!I->U) + continue; // Skip dead use. - Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL, - LI->getName() + ".sroa.speculated"); + // We can't reasonably handle cases where the load or store extends past + // the end of the aloca's type and into its padding. + if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty)) + return false; - LoadInst *Loads[2] = { TL, FL }; - for (unsigned i = 0, e = 2; i != e; ++i) { - if (PIs[i] != P.end()) { - Use *LoadUse = &Loads[i]->getOperandUse(0); - assert(PUs[i].U->get() == LoadUse->get()); - PUs[i].U = LoadUse; - P.use_push_back(PIs[i], PUs[i]); - } + if (LoadInst *LI = dyn_cast(I->U->getUser())) { + if (LI->isVolatile() || !LI->getType()->isIntegerTy()) + return false; + if (LI->getType() == Ty) + WholeAllocaOp = true; + } else if (StoreInst *SI = dyn_cast(I->U->getUser())) { + if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy()) + return false; + if (SI->getValueOperand()->getType() == Ty) + WholeAllocaOp = true; + } else if (MemIntrinsic *MI = dyn_cast(I->U->getUser())) { + if (MI->isVolatile()) + return false; + if (MemTransferInst *MTI = dyn_cast(I->U->getUser())) { + const AllocaPartitioning::MemTransferOffsets &MTO + = P.getMemTransferOffsets(*MTI); + if (!MTO.IsSplittable) + return false; } - - DEBUG(dbgs() << " speculated to: " << *V << "\n"); - LI->replaceAllUsesWith(V); - Pass.DeadInsts.push_back(LI); + } else { + return false; } } -}; + return WholeAllocaOp; +} +namespace { /// \brief Visitor to rewrite instructions using a partition of an alloca to /// use a new alloca. /// -- 2.7.4