return vmuxp(Bytes, L, R, Results);
}
+namespace {
+ struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
+ template <typename T>
+ Deleter(SelectionDAG &D, T &C)
+ : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
+ C.erase(N);
+ }) {}
+ };
+
+ template <typename T>
+ struct NullifyingVector : public T {
+ DenseMap<SDNode*, SDNode**> Refs;
+ NullifyingVector(T &&V) : T(V) {
+ for (unsigned i = 0, e = T::size(); i != e; ++i) {
+ SDNode *&N = T::operator[](i);
+ Refs[N] = &N;
+ }
+ }
+ void erase(SDNode *N) {
+ auto F = Refs.find(N);
+ if (F != Refs.end())
+ *F->second = nullptr;
+ }
+ };
+}
+
bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
MVT ResTy, SDValue Va, SDValue Vb,
SDNode *N) {
bool HavePairs = (2*HwLen == VecLen);
MVT SingleTy = getSingleVT(MVT::i8);
+ // The prior attempts to handle this shuffle may have left a bunch of
+ // dead nodes in the DAG (such as constants). These nodes will be added
+ // at the end of DAG's node list, which at that point had already been
+ // sorted topologically. In the main selection loop, the node list is
+ // traversed backwards from the root node, which means that any new
+ // nodes (from the end of the list) will not be visited.
+ // Scalarization will replace the shuffle node with the scalarized
+ // expression, and if that expression reused any if the leftoever (dead)
+ // nodes, these nodes would not be selected (since the "local" selection
+ // only visits nodes that are not in AllNodes).
+ // To avoid this issue, remove all dead nodes from the DAG now.
+ DAG.RemoveDeadNodes();
+ DenseSet<SDNode*> AllNodes;
+ for (SDNode &S : DAG.allnodes())
+ AllNodes.insert(&S);
+
+ Deleter DUA(DAG, AllNodes);
+
SmallVector<SDValue,128> Ops;
LLVMContext &Ctx = *DAG.getContext();
MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
assert(!N->use_empty());
ISel.ReplaceNode(N, LV.getNode());
- DAG.RemoveDeadNodes();
- std::deque<SDNode*> SubNodes;
- SubNodes.push_back(LV.getNode());
+ if (AllNodes.count(LV.getNode())) {
+ DAG.RemoveDeadNodes();
+ return true;
+ }
+
+ // The lowered build-vector node will now need to be selected. It needs
+ // to be done here because this node and its submodes are not included
+ // in the main selection loop.
+ // Implement essentially the same topological ordering algorithm as is
+ // used in SelectionDAGISel.
+
+ SetVector<SDNode*> SubNodes, TmpQ;
+ std::map<SDNode*,unsigned> NumOps;
+
+ SubNodes.insert(LV.getNode());
for (unsigned I = 0; I != SubNodes.size(); ++I) {
- for (SDValue Op : SubNodes[I]->ops())
- SubNodes.push_back(Op.getNode());
+ unsigned OpN = 0;
+ SDNode *S = SubNodes[I];
+ for (SDValue Op : S->ops()) {
+ if (AllNodes.count(Op.getNode()))
+ continue;
+ SubNodes.insert(Op.getNode());
+ ++OpN;
+ }
+ NumOps.insert({S, OpN});
+ if (OpN == 0)
+ TmpQ.insert(S);
}
- while (!SubNodes.empty()) {
- SDNode *S = SubNodes.front();
- SubNodes.pop_front();
- if (S->use_empty())
- continue;
- // This isn't great, but users need to be selected before any nodes that
- // they use. (The reason is to match larger patterns, and avoid nodes that
- // cannot be matched on their own, e.g. ValueType, TokenFactor, etc.).
- bool PendingUser = llvm::any_of(S->uses(), [&SubNodes](const SDNode *U) {
- return llvm::any_of(SubNodes, [U](const SDNode *T) {
- return T == U;
- });
- });
- if (PendingUser)
- SubNodes.push_back(S);
- else
- ISel.Select(S);
+
+ for (unsigned I = 0; I != TmpQ.size(); ++I) {
+ SDNode *S = TmpQ[I];
+ for (SDNode *U : S->uses()) {
+ if (!SubNodes.count(U))
+ continue;
+ auto F = NumOps.find(U);
+ assert(F != NumOps.end());
+ assert(F->second > 0);
+ if (!--F->second)
+ TmpQ.insert(F->first);
+ }
}
+ assert(SubNodes.size() == TmpQ.size());
+ NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
+
+ Deleter DUQ(DAG, Queue);
+ for (SDNode *S : reverse(Queue))
+ if (S != nullptr)
+ ISel.Select(S);
DAG.RemoveDeadNodes();
return true;