OptimizationRemarkEmitter *getORE() { return ORE; }
- /// This structure holds any data we need about the edges being traversed
- /// during buildTree_rec(). We keep track of:
- /// (i) the user TreeEntry index, and
- /// (ii) the index of the edge.
- struct EdgeInfo {
- EdgeInfo() = default;
- /// The index of the user TreeEntry in VectorizableTree.
- int Idx = -1;
- /// The operand index of the use.
- unsigned EdgeIdx = UINT_MAX;
-
- /// Debug print.
- void dump(raw_ostream &OS) const {
- OS << "{User:" << Idx << " EdgeIdx:" << EdgeIdx << "}";
- }
- };
-
private:
struct TreeEntry;
int getEntryCost(TreeEntry *E);
/// This is the recursive part of buildTree.
- void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, EdgeInfo EI);
+ void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, int);
/// \returns true if the ExtractElement/ExtractValue instructions in \p VL can
/// be vectorized to use the original vector (or aggregate "bitcast" to a
/// The TreeEntry index containing the user of this entry. We can actually
/// have multiple users so the data structure is not truly a tree.
- SmallVector<EdgeInfo, 1> UserTreeIndices;
-
- private:
- /// The operands of each instruction in each lane Operands[op_index][lane].
- /// Note: This helps avoid the replication of the code that performs the
- /// reordering of operands during buildTree_rec() and vectorizeTree().
- SmallVector<ValueList, 2> Operands;
-
- public:
- /// Set this bundle's \p OpIdx'th operand to \p OpVL.
- void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL,
- ArrayRef<unsigned> ReuseShuffleIndices) {
- if (Operands.size() < OpIdx + 1)
- Operands.resize(OpIdx + 1);
- assert(Operands[OpIdx].size() == 0 && "Already resized?");
- Operands[OpIdx].resize(Scalars.size());
- for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane)
- Operands[OpIdx][Lane] = (!ReuseShuffleIndices.empty())
- ? OpVL[ReuseShuffleIndices[Lane]]
- : OpVL[Lane];
- }
-
- /// If there is a user TreeEntry, then set its operand.
- void trySetUserTEOperand(const EdgeInfo &UserTreeIdx,
- ArrayRef<Value *> OpVL,
- ArrayRef<unsigned> ReuseShuffleIndices) {
- if (UserTreeIdx.Idx >= 0) {
- auto &VectorizableTree = Container;
- VectorizableTree[UserTreeIdx.Idx].setOperand(UserTreeIdx.EdgeIdx, OpVL,
- ReuseShuffleIndices);
- }
- }
-
- /// \returns the \p OpIdx operand of this TreeEntry.
- ValueList &getOperand(unsigned OpIdx) {
- assert(OpIdx < Operands.size() && "Off bounds");
- return Operands[OpIdx];
- }
-
- /// \return the single \p OpIdx operand.
- Value *getSingleOperand(unsigned OpIdx) const {
- assert(OpIdx < Operands.size() && "Off bounds");
- assert(!Operands[OpIdx].empty() && "No operand availabe");
- return Operands[OpIdx][0];
- }
-
-#ifndef NDEBUG
- /// Debug printer.
- LLVM_DUMP_METHOD void dump() const {
- for (unsigned OpI = 0, OpE = Operands.size(); OpI != OpE; ++OpI) {
- dbgs() << "Operand " << OpI << ":\n";
- for (const Value *V : Operands[OpI])
- dbgs().indent(2) << *V << "\n";
- }
- dbgs() << "Scalars: \n";
- for (Value *V : Scalars)
- dbgs().indent(2) << *V << "\n";
- dbgs() << "NeedToGather: " << NeedToGather << "\n";
- dbgs() << "VectorizedValue: ";
- if (VectorizedValue)
- dbgs() << *VectorizedValue;
- else
- dbgs() << "NULL";
- dbgs() << "\n";
- dbgs() << "ReuseShuffleIndices: ";
- if (ReuseShuffleIndices.empty())
- dbgs() << "Emtpy";
- else
- for (unsigned Idx : ReuseShuffleIndices)
- dbgs() << Idx << ", ";
- dbgs() << "\n";
- dbgs() << "ReorderIndices: ";
- for (unsigned Idx : ReorderIndices)
- dbgs() << Idx << ", ";
- dbgs() << "\n";
- dbgs() << "UserTreeIndices: ";
- for (const auto &EInfo : UserTreeIndices)
- dbgs() << EInfo << ", ";
- dbgs() << "\n";
- }
-#endif
+ SmallVector<int, 1> UserTreeIndices;
};
/// Create a new VectorizableTree entry.
- void newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
- EdgeInfo &UserTreeIdx,
+ void newTreeEntry(ArrayRef<Value *> VL, bool Vectorized, int &UserTreeIdx,
ArrayRef<unsigned> ReuseShuffleIndices = None,
ArrayRef<unsigned> ReorderIndices = None) {
VectorizableTree.emplace_back(VectorizableTree);
MustGather.insert(VL.begin(), VL.end());
}
- if (UserTreeIdx.Idx >= 0)
+ if (UserTreeIdx >= 0)
Last->UserTreeIndices.push_back(UserTreeIdx);
-
- Last->trySetUserTEOperand(UserTreeIdx, VL, ReuseShuffleIndices);
-
- UserTreeIdx.Idx = idx;
+ UserTreeIdx = idx;
}
/// -- Vectorization State --
/// Holds all of the tree entries.
std::vector<TreeEntry> VectorizableTree;
-#ifndef NDEBUG
- /// Debug printer.
- LLVM_DUMP_METHOD void dumpVectorizableTree() const {
- for (unsigned Id = 0, IdE = VectorizableTree.size(); Id != IdE; ++Id) {
- dbgs() << Id << ".\n";
- VectorizableTree[Id].dump();
- dbgs() << "\n";
- }
- }
-#endif
-
TreeEntry *getTreeEntry(Value *V) {
auto I = ScalarToTreeEntry.find(V);
if (I != ScalarToTreeEntry.end())
SD.dump(os);
return os;
}
- friend inline raw_ostream &operator<<(raw_ostream &OS,
- const BoUpSLP::EdgeInfo &EI) {
- EI.dump(OS);
- return OS;
- }
#endif
friend struct GraphTraits<BoUpSLP *>;
/// Add the VectorizableTree to the index iterator to be able to return
/// TreeEntry pointers.
struct ChildIteratorType
- : public iterator_adaptor_base<
- ChildIteratorType, SmallVector<BoUpSLP::EdgeInfo, 1>::iterator> {
+ : public iterator_adaptor_base<ChildIteratorType,
+ SmallVector<int, 1>::iterator> {
std::vector<TreeEntry> &VectorizableTree;
- ChildIteratorType(SmallVector<BoUpSLP::EdgeInfo, 1>::iterator W,
+ ChildIteratorType(SmallVector<int, 1>::iterator W,
std::vector<TreeEntry> &VT)
: ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {}
- NodeRef operator*() { return &VectorizableTree[I->Idx]; }
+ NodeRef operator*() { return &VectorizableTree[*I]; }
};
static NodeRef getEntryNode(BoUpSLP &R) { return &R.VectorizableTree[0]; }
UserIgnoreList = UserIgnoreLst;
if (!allSameType(Roots))
return;
- buildTree_rec(Roots, 0, EdgeInfo());
+ buildTree_rec(Roots, 0, -1);
// Collect the values that we need to extract from the tree.
for (TreeEntry &EIdx : VectorizableTree) {
}
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
- EdgeInfo UserTreeIdx) {
+ int UserTreeIdx) {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
InstructionsState S = getSameOpcode(VL);
E->UserTreeIndices.push_back(UserTreeIdx);
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
<< ".\n");
- E->trySetUserTEOperand(UserTreeIdx, VL, None);
return;
}
Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
PH->getIncomingBlock(i)));
- UserTreeIdx.EdgeIdx = i;
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
++NumOpsWantToKeepOriginalOrder;
newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
ReuseShuffleIndicies);
- // This is a special case, as it does not gather, but at the same time
- // we are not extending buildTree_rec() towards the operands.
- ValueList Op0;
- Op0.assign(VL.size(), VL0->getOperand(0));
- VectorizableTree.back().setOperand(0, Op0, ReuseShuffleIndicies);
return;
}
if (!CurrentOrder.empty()) {
++StoredCurrentOrderAndNum->getSecond();
newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies,
StoredCurrentOrderAndNum->getFirst());
- // This is a special case, as it does not gather, but at the same time
- // we are not extending buildTree_rec() towards the operands.
- ValueList Op0;
- Op0.assign(VL.size(), VL0->getOperand(0));
- VectorizableTree.back().setOperand(0, Op0, ReuseShuffleIndicies);
return;
}
LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- UserTreeIdx.EdgeIdx = i;
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- UserTreeIdx.EdgeIdx = i;
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(S.getOpcode(), VL, Left, Right);
- UserTreeIdx.EdgeIdx = 0;
buildTree_rec(Left, Depth + 1, UserTreeIdx);
- UserTreeIdx.EdgeIdx = 1;
buildTree_rec(Right, Depth + 1, UserTreeIdx);
return;
}
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- UserTreeIdx.EdgeIdx = i;
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- UserTreeIdx.EdgeIdx = i;
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(0));
- UserTreeIdx.EdgeIdx = 0;
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
return;
}
CallInst *CI2 = dyn_cast<CallInst>(j);
Operands.push_back(CI2->getArgOperand(i));
}
- UserTreeIdx.EdgeIdx = i;
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
if (isa<BinaryOperator>(VL0)) {
ValueList Left, Right;
reorderAltShuffleOperands(S, VL, Left, Right);
- UserTreeIdx.EdgeIdx = 0;
buildTree_rec(Left, Depth + 1, UserTreeIdx);
- UserTreeIdx.EdgeIdx = 1;
buildTree_rec(Right, Depth + 1, UserTreeIdx);
return;
}
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- UserTreeIdx.EdgeIdx = i;
buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
continue;
}
+ // Prepare the operand vector.
+ for (Value *V : E->Scalars)
+ Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(IBB));
+
Builder.SetInsertPoint(IBB->getTerminator());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
- Value *Vec = vectorizeTree(E->getOperand(i));
+ Value *Vec = vectorizeTree(Operands);
NewPhi->addIncoming(Vec, IBB);
}
case Instruction::ExtractElement: {
if (!E->NeedToGather) {
- Value *V = E->getSingleOperand(0);
+ Value *V = VL0->getOperand(0);
if (!E->ReorderIndices.empty()) {
OrdersType Mask;
inversePermutation(E->ReorderIndices, Mask);
}
case Instruction::ExtractValue: {
if (!E->NeedToGather) {
- LoadInst *LI = cast<LoadInst>(E->getSingleOperand(0));
+ LoadInst *LI = cast<LoadInst>(VL0->getOperand(0));
Builder.SetInsertPoint(LI);
PointerType *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
+ ValueList INVL;
+ for (Value *V : E->Scalars)
+ INVL.push_back(cast<Instruction>(V)->getOperand(0));
+
setInsertPointAfterBundle(E->Scalars, S);
- Value *InVec = vectorizeTree(E->getOperand(0));
+ Value *InVec = vectorizeTree(INVL);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
}
case Instruction::FCmp:
case Instruction::ICmp: {
+ ValueList LHSV, RHSV;
+ for (Value *V : E->Scalars) {
+ LHSV.push_back(cast<Instruction>(V)->getOperand(0));
+ RHSV.push_back(cast<Instruction>(V)->getOperand(1));
+ }
+
setInsertPointAfterBundle(E->Scalars, S);
- Value *L = vectorizeTree(E->getOperand(0));
- Value *R = vectorizeTree(E->getOperand(1));
+ Value *L = vectorizeTree(LHSV);
+ Value *R = vectorizeTree(RHSV);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return V;
}
case Instruction::Select: {
+ ValueList TrueVec, FalseVec, CondVec;
+ for (Value *V : E->Scalars) {
+ CondVec.push_back(cast<Instruction>(V)->getOperand(0));
+ TrueVec.push_back(cast<Instruction>(V)->getOperand(1));
+ FalseVec.push_back(cast<Instruction>(V)->getOperand(2));
+ }
+
setInsertPointAfterBundle(E->Scalars, S);
- Value *Cond = vectorizeTree(E->getOperand(0));
- Value *True = vectorizeTree(E->getOperand(1));
- Value *False = vectorizeTree(E->getOperand(2));
+ Value *Cond = vectorizeTree(CondVec);
+ Value *True = vectorizeTree(TrueVec);
+ Value *False = vectorizeTree(FalseVec);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
+ ValueList LHSVL, RHSVL;
+ if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
+ reorderInputsAccordingToOpcode(S.getOpcode(), E->Scalars, LHSVL,
+ RHSVL);
+ else
+ for (Value *V : E->Scalars) {
+ auto *I = cast<Instruction>(V);
+ LHSVL.push_back(I->getOperand(0));
+ RHSVL.push_back(I->getOperand(1));
+ }
+
setInsertPointAfterBundle(E->Scalars, S);
- Value *LHS = vectorizeTree(E->getOperand(0));
- Value *RHS = vectorizeTree(E->getOperand(1));
+ Value *LHS = vectorizeTree(LHSVL);
+ Value *RHS = vectorizeTree(RHSVL);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
unsigned Alignment = SI->getAlignment();
unsigned AS = SI->getPointerAddressSpace();
+ ValueList ScalarStoreValues;
+ for (Value *V : E->Scalars)
+ ScalarStoreValues.push_back(cast<StoreInst>(V)->getValueOperand());
+
setInsertPointAfterBundle(E->Scalars, S);
- Value *VecValue = vectorizeTree(E->getOperand(0));
+ Value *VecValue = vectorizeTree(ScalarStoreValues);
Value *ScalarPtr = SI->getPointerOperand();
Value *VecPtr = Builder.CreateBitCast(ScalarPtr, VecTy->getPointerTo(AS));
StoreInst *ST = Builder.CreateStore(VecValue, VecPtr);
case Instruction::GetElementPtr: {
setInsertPointAfterBundle(E->Scalars, S);
- Value *Op0 = vectorizeTree(E->getOperand(0));
+ ValueList Op0VL;
+ for (Value *V : E->Scalars)
+ Op0VL.push_back(cast<GetElementPtrInst>(V)->getOperand(0));
+
+ Value *Op0 = vectorizeTree(Op0VL);
std::vector<Value *> OpVecs;
for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
++j) {
- Value *OpVec = vectorizeTree(E->getOperand(j));
+ ValueList OpVL;
+ for (Value *V : E->Scalars)
+ OpVL.push_back(cast<GetElementPtrInst>(V)->getOperand(j));
+
+ Value *OpVec = vectorizeTree(OpVL);
OpVecs.push_back(OpVec);
}
OpVecs.push_back(CEI->getArgOperand(j));
continue;
}
+ for (Value *V : E->Scalars) {
+ CallInst *CEI = cast<CallInst>(V);
+ OpVL.push_back(CEI->getArgOperand(j));
+ }
- Value *OpVec = vectorizeTree(E->getOperand(j));
+ Value *OpVec = vectorizeTree(OpVL);
LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
OpVecs.push_back(OpVec);
}
return V;
}
case Instruction::ShuffleVector: {
+ ValueList LHSVL, RHSVL;
assert(S.isAltShuffle() &&
((Instruction::isBinaryOp(S.getOpcode()) &&
Instruction::isBinaryOp(S.getAltOpcode())) ||
Value *LHS, *RHS;
if (Instruction::isBinaryOp(S.getOpcode())) {
+ reorderAltShuffleOperands(S, E->Scalars, LHSVL, RHSVL);
setInsertPointAfterBundle(E->Scalars, S);
- LHS = vectorizeTree(E->getOperand(0));
- RHS = vectorizeTree(E->getOperand(1));
+ LHS = vectorizeTree(LHSVL);
+ RHS = vectorizeTree(RHSVL);
} else {
+ ValueList INVL;
+ for (Value *V : E->Scalars)
+ INVL.push_back(cast<Instruction>(V)->getOperand(0));
setInsertPointAfterBundle(E->Scalars, S);
- LHS = vectorizeTree(E->getOperand(0));
+ LHS = vectorizeTree(INVL);
}
if (E->VectorizedValue) {