Replace the LSRA stack with a hash table.
authorPat Gavlin <pagavlin@microsoft.com>
Tue, 26 Jul 2016 13:27:06 +0000 (06:27 -0700)
committerPat Gavlin <pagavlin@microsoft.com>
Mon, 1 Aug 2016 22:14:01 +0000 (15:14 -0700)
LSRA currently uses a stack to find the `LocationInfo` for each register consumed
by a node. The changes in this stack's contents given a particular node are
governed by three factors:
- The number of registers the node consumes (`gtLsraInfo.srcCount`)
- The number of registers the node produces (`gtLstaInfo.dstCount`)
- Whether or not the node produces an unused value (`gtLsraInfo.isLocalDefUse`)

In all cases, `gtLsraInfo.srcCount` values are popped off of the stack in the
order in which they were pushed (i.e. in FIFO rather than LIFO order). If the
node produces a value that will be used, `gtLsraInfo.dstCount` values are
then pushed onto the stack. If the node produces an unused value, nothing is
pushed onto the stack.

Naively, it would appear that the number of registers a node consumes would be
at least the count of the node's non-leaf operands (to put it differently, one
might assume that any non-leaf operator that produces a value would define at
least one register). However, contained nodes complicate the situation: because
a contained node's execution is subsumed by its user, the contained node's
sources become sources for its user and the contained node does not define any
registers. As a result, both the number of registers consumed and the number of
registers produced by a contained node are 0. Thus, contained nodes do not
update the stack, and the node's parent (if it is not also contained) will
pop the values produced by the contained node's operands. Logically speaking,
it is as if a contained node defines the transitive closure of the registers
defined by its own non-contained operands.

The use of the stack relies on the property that even in linear order the
JIT's IR is still tree ordered. That is to say, given an operator and its
operands, any nodes that execute between any two operands do not produce
SDSU temps that are consumed after the second operand. IR with such a
shape would unbalance the stack.

The planned move to the LIR design given in dotnet/coreclr#6366 removes the tree order
constraint in order to simplify understanding and manipulating the IR in the
backend. Because LIR may not be tree ordered, LSRA may no longer use a stack
to find the `LocationInfo` for a node's operands. This change replaces the
stack with a map from nodes to lists of `LocationInfo` values, each of
which describes a register that is logically defined (if not physically
defined) by that node. Only contained nodes logically define registers that
they do not physically define: contained nodes map to the list of
`LocationInfo` values logically defined by their operands. All non-contained
nodes map to the list of `LocationInfo` values that they physically define.
Non-contained nodes that do not define any registers are not inserted into
the map.

Commit migrated from https://github.com/dotnet/coreclr/commit/41a1d0a422c8c444479809ab9acd3854d8bf66cc

src/coreclr/src/jit/gentree.cpp
src/coreclr/src/jit/gentree.h
src/coreclr/src/jit/lower.cpp
src/coreclr/src/jit/lsra.cpp
src/coreclr/src/jit/lsra.h
src/coreclr/src/jit/nodeinfo.h
src/coreclr/src/jit/smallhash.h [new file with mode: 0644]
src/coreclr/src/jit/utils.h

index 8eafd68..afb2fad 100644 (file)
@@ -7817,6 +7817,504 @@ GenTreePtr GenTree::GetChild(unsigned childNum)
     }
 }
 
+GenTreeOperandIterator::GenTreeOperandIterator()
+    : m_node(nullptr)
+    , m_operand(nullptr)
+    , m_argList(nullptr)
+    , m_multiRegArg(nullptr)
+    , m_expandMultiRegArgs(false)
+    , m_state(-1)
+{
+}
+
+GenTreeOperandIterator::GenTreeOperandIterator(GenTree* node, bool expandMultiRegArgs)
+    : m_node(node)
+    , m_operand(nullptr)
+    , m_argList(nullptr)
+    , m_multiRegArg(nullptr)
+    , m_expandMultiRegArgs(expandMultiRegArgs)
+    , m_state(0)
+{
+    assert(m_node != nullptr);
+
+    // Advance to the first operand.
+    ++(*this);
+}
+
+//------------------------------------------------------------------------
+// GenTreeOperandIterator::GetNextOperand:
+//    Gets the next operand of a node with a fixed number of operands.
+//    This covers all nodes besides GT_CALL, GT_PHI, and GT_SIMD. For the
+//    node types handled by this method, the `m_state` field indicates the
+//    index of the next operand to produce.
+//
+// Returns:
+//    The node's next operand or nullptr if all operands have been
+//    produced.
+GenTree* GenTreeOperandIterator::GetNextOperand() const
+{
+    switch (m_node->OperGet())
+    {
+    case GT_CMPXCHG:
+        switch (m_state)
+        {
+        case 0:
+            return m_node->AsCmpXchg()->gtOpLocation;
+        case 1:
+            return m_node->AsCmpXchg()->gtOpValue;
+        case 2:
+            return m_node->AsCmpXchg()->gtOpComparand;
+        default:
+            return nullptr;
+        }
+    case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+    case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+        switch (m_state)
+        {
+        case 0:
+            return m_node->AsBoundsChk()->gtArrLen;
+        case 1:
+            return m_node->AsBoundsChk()->gtIndex;
+        default:
+            return nullptr;
+        }
+
+    case GT_FIELD:
+        if (m_state == 0)
+        {
+            return m_node->AsField()->gtFldObj;
+        }
+        return nullptr;
+
+    case GT_STMT:
+        if (m_state == 0)
+        {
+            return m_node->AsStmt()->gtStmtExpr;
+        }
+        return nullptr;
+
+    case GT_ARR_ELEM:
+        if (m_state == 0)
+        {
+            return m_node->AsArrElem()->gtArrObj;
+        }
+        else if (m_state <= m_node->AsArrElem()->gtArrRank)
+        {
+            return m_node->AsArrElem()->gtArrInds[m_state-1];
+        }
+        return nullptr;
+
+    case GT_ARR_OFFSET:
+        switch (m_state)
+        {
+        case 0:
+            return m_node->AsArrOffs()->gtOffset;
+        case 1:
+            return m_node->AsArrOffs()->gtIndex;
+        case 2:
+            return m_node->AsArrOffs()->gtArrObj;
+        default:
+            return nullptr;
+        }
+
+    // Call, phi, and SIMD nodes are handled by MoveNext{Call,Phi,SIMD}Operand, repsectively.
+    case GT_CALL:
+    case GT_PHI:
+#ifdef FEATURE_SIMD
+    case GT_SIMD:
+#endif
+        break;
+
+    case GT_INITBLK:
+    case GT_COPYBLK:
+    case GT_COPYOBJ:
+        {
+            GenTreeBlkOp* blkOp = m_node->AsBlkOp();
+
+            bool blkOpReversed = (blkOp->gtFlags & GTF_REVERSE_OPS) != 0;
+            bool srcDstReversed = (blkOp->gtOp1->gtFlags & GTF_REVERSE_OPS) != 0;
+
+            if (!blkOpReversed)
+            {
+                switch (m_state)
+                {
+                case 0:
+                    return !srcDstReversed ? blkOp->gtOp1->AsArgList()->gtOp1 : blkOp->gtOp1->AsArgList()->gtOp2;
+                case 1:
+                    return !srcDstReversed ? blkOp->gtOp1->AsArgList()->gtOp2 : blkOp->gtOp1->AsArgList()->gtOp1;
+                case 2:
+                    return blkOp->gtOp2;
+                default:
+                    return nullptr;
+                }
+            }
+            else
+            {
+                switch (m_state)
+                {
+                case 0:
+                    return blkOp->gtOp2;
+                case 1:
+                    return !srcDstReversed ? blkOp->gtOp1->AsArgList()->gtOp1 : blkOp->gtOp1->AsArgList()->gtOp2;
+                case 2:
+                    return !srcDstReversed ? blkOp->gtOp1->AsArgList()->gtOp2 : blkOp->gtOp1->AsArgList()->gtOp1;
+                default:
+                    return nullptr;
+                }
+            }
+        }
+        break;
+
+    case GT_LEA:
+        {
+            GenTreeAddrMode* lea = m_node->AsAddrMode();
+
+            bool hasOp1 = lea->gtOp1 != nullptr;
+            if (!hasOp1)
+            {
+                return m_state == 0 ? lea->gtOp2 : nullptr;
+            }
+
+            bool operandsReversed = (lea->gtFlags & GTF_REVERSE_OPS) != 0;
+            switch (m_state)
+            {
+                case 0:
+                    return !operandsReversed ? lea->gtOp1 : lea->gtOp2;
+                case 1:
+                    return !operandsReversed ? lea->gtOp2 : lea->gtOp1;
+                default:
+                    return nullptr;
+            }
+        }
+        break;
+
+    default:
+        if (m_node->OperIsConst() || m_node->OperIsLeaf())
+        {
+            return nullptr;
+        }
+        else if (m_node->OperIsUnary())
+        {
+            return m_state == 0 ? m_node->AsUnOp()->gtOp1 : nullptr;
+        }
+        else if (m_node->OperIsBinary())
+        {
+            bool operandsReversed = (m_node->gtFlags & GTF_REVERSE_OPS) != 0;
+            switch (m_state)
+            {
+                case 0:
+                    return !operandsReversed ? m_node->AsOp()->gtOp1 : m_node->AsOp()->gtOp2;
+                case 1:
+                    return !operandsReversed ? m_node->AsOp()->gtOp2 : m_node->AsOp()->gtOp1;
+                default:
+                    return nullptr;
+            }
+        }
+    }
+
+    unreached();
+}
+
+//------------------------------------------------------------------------
+// GenTreeOperandIterator::MoveToNextCallOperand:
+//    Moves to the next operand of a call node. Unlike the simple nodes
+//    handled by `GetNextOperand`, call nodes have a variable number of
+//    operands stored in cons lists. This method expands the cons lists
+//    into the operands stored within.
+//
+void GenTreeOperandIterator::MoveToNextCallOperand()
+{
+    GenTreeCall* call = m_node->AsCall();
+
+    for (;;)
+    {
+        switch (m_state)
+        {
+            case 0:
+                m_state = 1;
+                m_argList = call->gtCallArgs;
+
+                if (call->gtCallObjp != nullptr)
+                {
+                    m_operand = call->gtCallObjp;
+                    return;
+                }
+                break;
+
+            case 1:
+            case 3:
+                if (m_argList == nullptr)
+                {
+                    m_state += 2;
+
+                    if (m_state == 3)
+                    {
+                        m_argList = call->gtCallLateArgs;
+                    }
+                }
+                else
+                {
+                    GenTreeArgList* argNode = m_argList->AsArgList();
+                    if (m_expandMultiRegArgs && argNode->gtOp1->OperGet() == GT_LIST)
+                    {
+                        m_state += 1;
+                        m_multiRegArg = argNode->gtOp1;
+                    }
+                    else
+                    {
+                        m_operand = argNode->gtOp1;
+                        m_argList = argNode->Rest();
+                        return;
+                    }
+                }
+                break;
+
+            case 2:
+            case 4:
+                if (m_multiRegArg == nullptr)
+                {
+                    m_state -= 1;
+                    m_argList = m_argList->AsArgList()->Rest();
+                }
+                else
+                {
+                    GenTreeArgList* regNode = m_multiRegArg->AsArgList();
+                    m_operand = regNode->gtOp1;
+                    m_multiRegArg = regNode->Rest();
+                    return;
+                }
+                break;
+
+
+            case 5:
+                m_state = call->gtCallType == CT_INDIRECT ? 6 : 8;
+
+                if (call->gtControlExpr != nullptr)
+                {
+                    m_operand = call->gtControlExpr;
+                    return;
+                }
+                break;
+
+            case 6:
+                assert(call->gtCallType == CT_INDIRECT);
+
+                m_state = 7;
+
+                if (call->gtCallCookie != nullptr)
+                {
+                    m_operand = call->gtCallCookie;
+                    return;
+                }
+                break;
+
+            case 7:
+                assert(call->gtCallType == CT_INDIRECT);
+
+                m_state = 8;
+                if (call->gtCallAddr != nullptr)
+                {
+                    m_operand = call->gtCallAddr;
+                    return;
+                }
+                break;
+
+            default:
+                m_node = nullptr;
+                m_operand = nullptr;
+                m_argList = nullptr;
+                m_state = -1;
+                return;
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// GenTreeOperandIterator::MoveToNextPhiOperand:
+//    Moves to the next operand of a phi node. Unlike the simple nodes
+//    handled by `GetNextOperand`, phi nodes have a variable number of
+//    operands stored in a cons list. This method expands the cons list
+//    into the operands stored within.
+//
+void GenTreeOperandIterator::MoveToNextPhiOperand()
+{
+    GenTreeUnOp* phi = m_node->AsUnOp();
+
+    for (;;)
+    {
+        switch (m_state)
+        {
+            case 0:
+                m_state = 1;
+                m_argList = phi->gtOp1;
+                break;
+
+            case 1:
+                if (m_argList == nullptr)
+                {
+                    m_state = 2;
+                }
+                else
+                {
+                    GenTreeArgList* argNode = m_argList->AsArgList();
+                    m_operand = argNode->gtOp1;
+                    m_argList = argNode->Rest();
+                    return;
+                }
+                break;
+
+            default:
+                m_node = nullptr;
+                m_operand = nullptr;
+                m_argList = nullptr;
+                m_state = -1;
+                return;
+        }
+    }
+}
+
+#ifdef FEATURE_SIMD
+//------------------------------------------------------------------------
+// GenTreeOperandIterator::MoveToNextSIMDOperand:
+//    Moves to the next operand of a SIMD node. Most SIMD nodes have a
+//    fixed number of operands and are handled accordingly.
+//    `SIMDIntrinsicInitN` nodes, however, have a variable number of
+//    operands stored in a cons list. This method expands the cons list
+//    into the operands stored within.
+//
+void GenTreeOperandIterator::MoveToNextSIMDOperand()
+{
+    GenTreeSIMD* simd = m_node->AsSIMD();
+
+    if (simd->gtSIMDIntrinsicID != SIMDIntrinsicInitN)
+    {
+        bool operandsReversed = (simd->gtFlags & GTF_REVERSE_OPS) != 0;
+        switch (m_state)
+        {
+            case 0:
+                m_operand = !operandsReversed ? simd->gtOp1 : simd->gtOp2;
+                break;
+            case 1:
+                m_operand = !operandsReversed ? simd->gtOp2 : simd->gtOp1;
+                break;
+            default:
+                m_operand = nullptr;
+                break;
+        }
+
+        if (m_operand != nullptr)
+        {
+            m_state++;
+        }
+        else
+        {
+            m_node = nullptr;
+            m_state = -1;
+        }
+
+        return;
+    }
+
+    for (;;)
+    {
+        switch (m_state)
+        {
+            case 0:
+                m_state = 1;
+                m_argList = simd->gtOp1;
+                break;
+
+            case 1:
+                if (m_argList == nullptr)
+                {
+                    m_state = 2;
+                }
+                else
+                {
+                    GenTreeArgList* argNode = m_argList->AsArgList();
+                    m_operand = argNode->gtOp1;
+                    m_argList = argNode->Rest();
+                    return;
+                }
+                break;
+
+            default:
+                m_node = nullptr;
+                m_operand = nullptr;
+                m_argList = nullptr;
+                m_state = -1;
+                return;
+        }
+    }
+}
+#endif
+
+//------------------------------------------------------------------------
+// GenTreeOperandIterator::operator++:
+//    Advances the iterator to the next operand.
+//
+GenTreeOperandIterator& GenTreeOperandIterator::operator++()
+{
+    if (m_state == -1)
+    {
+        // If we've reached the terminal state, do nothing.
+        assert(m_node == nullptr);
+        assert(m_operand == nullptr);
+        assert(m_argList == nullptr);
+    }
+    else
+    {
+        // Otherwise, move to the next operand in the node.
+        genTreeOps op = m_node->OperGet();
+        if (op == GT_CALL)
+        {
+            MoveToNextCallOperand();
+        }
+        else if (op == GT_PHI)
+        {
+            MoveToNextPhiOperand();
+        }
+#ifdef FEATURE_SIMD
+        else if (op == GT_SIMD)
+        {
+            MoveToNextSIMDOperand();
+        }
+#endif
+        else
+        {
+            m_operand = GetNextOperand();
+            if (m_operand != nullptr)
+            {
+                m_state++;
+            }
+            else
+            {
+                m_node = nullptr;
+                m_state = -1;
+            }
+        }
+    }
+
+    return *this;
+}
+
+GenTreeOperandIterator GenTree::OperandsBegin(bool expandMultiRegArgs)
+{
+    return GenTreeOperandIterator(this, expandMultiRegArgs);
+}
+
+GenTreeOperandIterator GenTree::OperandsEnd()
+{
+    return GenTreeOperandIterator();
+}
+
+IteratorPair<GenTreeOperandIterator> GenTree::Operands(bool expandMultiRegArgs)
+{
+    return MakeIteratorPair(OperandsBegin(expandMultiRegArgs), OperandsEnd());
+}
+
 #ifdef DEBUG
 
 /* static */ int GenTree::gtDispFlags(unsigned flags, unsigned debugFlags)
index ef98214..e16aecc 100644 (file)
@@ -229,7 +229,7 @@ public:
     }
 };
 
-
+class GenTreeOperandIterator;
 
 /*****************************************************************************/
 
@@ -1661,6 +1661,16 @@ public:
     // Requires "childNum < NumChildren()".  Returns the "n"th child of "this."
     GenTreePtr GetChild(unsigned childNum);
 
+    // Returns an iterator that will produce each operand of this node. Differs from the sequence
+    // of nodes produced by a loop over `GetChild` in its handling of call, phi, and block op
+    // nodes. If `expandMultiRegArgs` is true, an multi-reg args passed to a call will appear
+    // be expanded from their GT_LIST node into that node's contents.
+    GenTreeOperandIterator OperandsBegin(bool expandMultiRegArgs = false);
+    GenTreeOperandIterator OperandsEnd();
+
+    // Returns a range that will produce the operands of this node in use order.
+    IteratorPair<GenTreeOperandIterator> Operands(bool expandMultiRegArgs = false);
+
     // The maximum possible # of children of any node.
     static const int MAX_CHILDREN = 6;
 
@@ -1712,6 +1722,72 @@ public:
                    DEBUGARG(bool largeNode = false));
 };
 
+//------------------------------------------------------------------------
+// GenTreeOperandIterator: an iterator that will produce each operand of a
+//                         GenTree node in the order in which they are
+//                         used. Note that the operands of a node may not
+//                         correspond exactly to the nodes on the other
+//                         ends of its use edges: in particular, GT_LIST
+//                         nodes are expanded into their component parts
+//                         (with the optional exception of multi-reg
+//                         arguments). This differs from the behavior of
+//                         GenTree::GetChild(), which does not expand
+//                         lists.
+//
+// Note: valid values of this type may be obtained by calling
+// `GenTree::OperandsBegin` and `GenTree::OperandsEnd`.
+class GenTreeOperandIterator
+{
+    friend GenTreeOperandIterator GenTree::OperandsBegin(bool expandMultiRegArgs);
+    friend GenTreeOperandIterator GenTree::OperandsEnd();
+
+    GenTree* m_node;
+    GenTree* m_operand;
+    GenTree* m_argList;
+    GenTree* m_multiRegArg;
+    bool m_expandMultiRegArgs;
+    int m_state;
+
+    GenTreeOperandIterator(GenTree* node, bool expandMultiRegArgs);
+
+    GenTree* GetNextOperand() const;
+    void MoveToNextCallOperand();
+    void MoveToNextPhiOperand();
+#ifdef FEATURE_SIMD
+    void MoveToNextSIMDOperand();
+#endif
+
+public:
+    GenTreeOperandIterator();
+
+    inline GenTree*& operator*()
+    {
+        return m_operand;
+    }
+
+    inline GenTree** operator->()
+    {
+        return &m_operand;
+    }
+
+    inline bool operator==(const GenTreeOperandIterator& other) const
+    {
+        if (m_state == -1 || other.m_state == -1)
+        {
+            return m_state == other.m_state;
+        }
+
+        return (m_node == other.m_node) && (m_operand == other.m_operand) && (m_argList == other.m_argList) && (m_state == other.m_state);
+    }
+
+    inline bool operator!=(const GenTreeOperandIterator& other) const
+    {
+        return !(operator==(other));
+    }
+
+    GenTreeOperandIterator& operator++();
+};
+
 
 /*****************************************************************************/
 // In the current design, we never instantiate GenTreeUnOp: it exists only to be
index f644b93..a0621e3 100644 (file)
@@ -3921,30 +3921,8 @@ void Lowering::DoPhase()
 #ifdef DEBUG
                 node->gtSeqNum = currentLoc;
 #endif
-                TreeNodeInfo* info = &node->gtLsraInfo;
-                info->internalIntCount = 0;
-                info->internalFloatCount = 0;
-                info->isLocalDefUse = false;
-                info->isHelperCallWithKills = false;
-                info->isLsraAdded = false;
-
-                // if there is a reg indicated on the tree node, use that for dstCandidates
-                // the exception is the NOP, which sometimes show up around late args.
-                // TODO-Cleanup: get rid of those NOPs.
-                if (node->gtRegNum == REG_NA
-                    || node->gtOper == GT_NOP)
-                {
-                    info->setDstCandidates(m_lsra, m_lsra->allRegs(node->TypeGet()));
-                }
-                else
-                {
-                    info->setDstCandidates(m_lsra, genRegMask(node->gtRegNum));
-                }
 
-                info->setSrcCandidates(m_lsra, info->getDstCandidates(m_lsra));
-                info->setInternalCandidates(m_lsra, m_lsra->allRegs(TYP_INT));
-                info->isInitialized = true;
-                info->loc = currentLoc;
+                node->gtLsraInfo.Initialize(m_lsra, node, currentLoc);
                 node->gtClearReg(comp);
                 currentLoc += 2;
             }
index 266d68e..8213a05 100644 (file)
@@ -2868,6 +2868,234 @@ fixedCandidateMask(var_types type, regMaskTP candidates)
     return RBM_NONE;
 }
 
+//------------------------------------------------------------------------
+// LocationInfoListNode: used to store a single `LocationInfo` value for a
+//                       node during `buildIntervals`.
+//
+// This is the node type for `LocationInfoList` below.
+//
+class LocationInfoListNode final : public LocationInfo
+{
+    friend class LocationInfoList;
+    friend class LocationInfoListNodePool;
+
+    LocationInfoListNode* m_next; // The next node in the list
+
+public:
+    LocationInfoListNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0)
+        : LocationInfo(l, i, t, regIdx)
+    {
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoListNode::Next: Returns the next node in the list.
+    LocationInfoListNode* Next() const
+    {
+        return m_next;
+    }
+};
+
+//------------------------------------------------------------------------
+// LocationInfoList: used to store a list of `LocationInfo` values for a
+//                   node during `buildIntervals`.
+//
+// Given an IR node that either directly defines N registers or that is a
+// contained node with uses that define a total of N registers, that node
+// will map to N `LocationInfo` values. These values are stored as a
+// linked list of `LocationInfoListNode` values.
+//
+class LocationInfoList final
+{
+    friend class LocationInfoListNodePool;
+
+    LocationInfoListNode* m_head; // The head of the list
+    LocationInfoListNode* m_tail; // The tail of the list
+
+public:
+    LocationInfoList()
+        : m_head(nullptr)
+        , m_tail(nullptr)
+    {
+    }
+
+    LocationInfoList(LocationInfoListNode* node)
+        : m_head(node)
+        , m_tail(node)
+    {
+        assert(m_head->m_next == nullptr);
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoList::IsEmpty: Returns true if the list is empty.
+    //
+    bool IsEmpty() const
+    {
+        return m_head == nullptr;
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoList::Begin: Returns the first node in the list.
+    //
+    LocationInfoListNode* Begin() const
+    {
+        return m_head;
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoList::End: Returns the position after the last node in the
+    //                        list. The returned value is suitable for use as
+    //                        a sentinel for iteration.
+    //
+    LocationInfoListNode* End() const
+    {
+        return nullptr;
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoList::Append: Appends a node to the list.
+    //
+    // Arguments:
+    //    node - The node to append. Must not be part of an existing list.
+    //
+    void Append(LocationInfoListNode* node)
+    {
+        assert(node->m_next == nullptr);
+
+        if (m_tail == nullptr)
+        {
+            assert(m_head == nullptr);
+            m_head = node;
+        }
+        else
+        {
+            m_tail->m_next = node;
+        }
+
+        m_tail = node;
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoList::Append: Appends another list to this list.
+    //
+    // Arguments:
+    //    other - The list to append.
+    //
+    void Append(LocationInfoList other)
+    {
+        if (m_tail == nullptr)
+        {
+            assert(m_head == nullptr);
+            m_head = other.m_head;
+        }
+        else
+        {
+            m_tail->m_next = other.m_head;
+        }
+
+        m_tail = other.m_tail;
+    }
+};
+
+//------------------------------------------------------------------------
+// LocationInfoListNodePool: manages a pool of `LocationInfoListNode`
+//                           values to decrease overall memory usage
+//                           during `buildIntervals`.
+//
+// `buildIntervals` involves creating a list of location info values per
+// node that either directly produces a set of registers or that is a
+// contained node with register-producing sources. However, these lists
+// are short-lived: they are destroyed once the use of the corresponding
+// node is processed. As such, there is typically only a small number of
+// `LocationInfoListNode` values in use at any given time. Pooling these
+// values avoids otherwise frequent allocations.
+class LocationInfoListNodePool final
+{
+    LocationInfoListNode* m_freeList;
+    Compiler* m_compiler;
+
+public:
+    //------------------------------------------------------------------------
+    // LocationInfoListNodePool::LocationInfoListNodePool:
+    //    Creates a pool of `LocationInfoListNode` values.
+    //
+    // Arguments:
+    //    compiler    - The compiler context.
+    //    preallocate - The number of nodes to preallocate.
+    //
+    LocationInfoListNodePool(Compiler* compiler, unsigned preallocate = 0)
+        : m_compiler(compiler)
+    {
+        if (preallocate > 0)
+        {
+            size_t preallocateSize = sizeof(LocationInfoListNode) * preallocate;
+            auto* preallocatedNodes = reinterpret_cast<LocationInfoListNode*>(compiler->compGetMem(preallocateSize));
+
+            LocationInfoListNode* head = preallocatedNodes;
+            head->m_next = nullptr;
+
+            for (unsigned i = 1; i < preallocate; i++)
+            {
+                LocationInfoListNode* node = &preallocatedNodes[i];
+                node->m_next = head;
+                head = node;
+            }
+
+            m_freeList = head;
+        }
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoListNodePool::GetNode: Fetches an unused node from the
+    //                                    pool.
+    //
+    // Arguments:
+    //    l -    - The `LsraLocation` for the `LocationInfo` value.
+    //    i      - The interval for the `LocationInfo` value.
+    //    t      - The IR node for the `LocationInfo` value
+    //    regIdx - The register index for the `LocationInfo` value.
+    //
+    // Returns:
+    //    A pooled or newly-allocated `LocationInfoListNode`, depending on the
+    //    contents of the pool.
+    LocationInfoListNode* GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0)
+    {
+        LocationInfoListNode* head = m_freeList;
+        if (head == nullptr)
+        {
+            head = reinterpret_cast<LocationInfoListNode*>(m_compiler->compGetMem(sizeof(LocationInfoListNode)));
+        }
+        else
+        {
+            m_freeList = head->m_next;
+        }
+
+        head->loc = l;
+        head->interval = i;
+        head->treeNode = t;
+        head->multiRegIdx = regIdx;
+        head->m_next = nullptr;
+
+        return head;
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoListNodePool::ReturnNodes: Returns a list of nodes to the
+    //                                        pool.
+    //
+    // Arguments:
+    //    list - The list to return.
+    //
+    void ReturnNodes(LocationInfoList& list)
+    {
+        assert(list.m_head != nullptr);
+        assert(list.m_tail != nullptr);
+
+        LocationInfoListNode* head = m_freeList;
+        list.m_tail->m_next = head;
+        m_freeList = list.m_head;
+    }
+};
+
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 VARSET_VALRET_TP
 LinearScan::buildUpperVectorSaveRefPositions(GenTree *tree,
@@ -2928,10 +3156,98 @@ LinearScan::buildUpperVectorRestoreRefPositions(GenTree *tree,
 }
 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// ComputeOperandDstCount: computes the number of registers defined by a
+//                         node.
+//
+// For most nodes, this is simple:
+// - Nodes that do not produce values (e.g. stores and other void-typed
+//   nodes) and nodes that immediately use the registers they define
+//   produce no registers
+// - Nodes that are marked as defining N registers define N registers.
+//
+// For contained nodes, however, things are more complicated: for purposes
+// of bookkeeping, a contained node is treated as producing the transitive
+// closure of the registers produced by its sources.
+//
+// Arguments:
+//    operand - The operand for which to compute a register count.
+//
+// Returns:
+//    The number of registers defined by `operand`.
+//
+static int ComputeOperandDstCount(GenTree* operand)
+{
+    TreeNodeInfo& operandInfo = operand->gtLsraInfo;
+
+    if (operandInfo.isLocalDefUse)
+    {
+        // Operands that define an unused value do not produce any registers.
+        return 0;
+    }
+    else if (operandInfo.dstCount != 0)
+    {
+        // Operands that have a specified number of destination registers consume all of their operands
+        // and therefore produce exactly that number of registers.
+        return operandInfo.dstCount;
+    }
+    else if (operandInfo.srcCount != 0)
+    {
+        // If an operand has no destination registers but does have source registers, it must be a store.
+        assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk());
+        return 0;
+    }
+    else if (operand->OperIsStore() || operand->TypeGet() == TYP_VOID)
+    {
+        // Stores and void-typed operands may be encountered when processing call nodes, which contain
+        // pointers to argument setup stores.
+        return 0;
+    }
+    else
+    {
+        // If a non-void-types operand is not an unsued value and does not have source registers, that
+        // argument is contained within its parent and produces `sum(operand_dst_count)` registers.
+        int dstCount = 0;
+        for (GenTree* op : operand->Operands(true))
+        {
+            dstCount += ComputeOperandDstCount(op);
+        }
+
+        return dstCount;
+    }
+}
+
+//------------------------------------------------------------------------
+// ComputeAvailableSrcCount: computes the number of registers available as
+//                           sources for a node.
+//
+// This is simply the sum of the number of registers prduced by each
+// operand to the node.
+//
+// Arguments:
+//    node - The node for which to compute a source count.
+//
+// Retures:
+//    The number of registers available as sources for `node`.
+//
+static int ComputeAvailableSrcCount(GenTree* node)
+{
+    int numSources = 0;
+    for (GenTree* operand : node->Operands(true))
+    {
+        numSources += ComputeOperandDstCount(operand);
+    }
+
+    return numSources;
+}
+#endif
+
 void 
 LinearScan::buildRefPositionsForNode(GenTree *tree,
                                      BasicBlock *block, 
-                                     ArrayStack<LocationInfo> *stack,
+                                     LocationInfoListNodePool& listNodePool,
+                                     HashTableBase<GenTree*, LocationInfoList>& operandToLocationInfoMap,
                                      LsraLocation currentLoc)
 {
 #ifdef _TARGET_ARM_
@@ -2958,12 +3274,23 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
 #ifdef DEBUG
     if (VERBOSE)
     {
-        JITDUMP("at start of tree, stack is : [ ");
-        for (int i=0; i<stack->Height(); i++)
+        JITDUMP("at start of tree, map contains: { ");
+        bool first = true;
+        for (auto kvp : operandToLocationInfoMap)
         {
-            JITDUMP("%d.%s ", stack->Index(i).loc, GenTree::NodeName(stack->Index(i).treeNode->OperGet()));
+            GenTree* node = kvp.Key();
+            LocationInfoList defList = kvp.Value();
+
+            JITDUMP("%sN%03u. %s -> (", first ? "" : "; ", node->gtSeqNum, GenTree::NodeName(node->OperGet()));
+            for (LocationInfoListNode* def = defList.Begin(), *end = defList.End(); def != end; def = def->Next())
+            {
+                JITDUMP("%s%d.N%03u", def == defList.Begin() ? "" : ", ", def->loc, def->treeNode->gtSeqNum);
+            }
+            JITDUMP(")");
+
+            first = false;
         }
-        JITDUMP("]\n");
+        JITDUMP(" }\n");
     }
 #endif // DEBUG
 
@@ -2972,8 +3299,12 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
     int consume = info.srcCount;
     int produce = info.dstCount;
 
+    assert(((consume == 0) && (produce == 0)) || (ComputeAvailableSrcCount(tree) == consume));
+
     if (isCandidateLocalRef(tree) && !tree->OperIsLocalStore())
     {
+        assert(consume == 0);
+
         // We handle tracked variables differently from non-tracked ones.  If it is tracked,
         // we simply add a use or def of the tracked variable.  Otherwise, for a use we need
         // to actually add the appropriate references for loading or storing the variable.
@@ -3007,14 +3338,18 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
         {
             if (produce != 0)
             {
-                stack->Push(LocationInfo(currentLoc, interval, tree));
+                LocationInfoList list(listNodePool.GetNode(currentLoc, interval, tree));
+                bool added = operandToLocationInfoMap.AddOrUpdate(tree, list);
+                assert(added);
+
+                tree->gtLsraInfo.definesAnyRegisters = true;
             }
 
             return;
         }
         else
         {
-            JITDUMP("    Not pushed on stack\n");
+            JITDUMP("    Not added to map\n");
             regMaskTP candidates = getUseCandidates(tree);
 
             if (fixedAssignment != RBM_NONE)
@@ -3046,11 +3381,11 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
 
     GenTree * defNode = tree;
 
-    // noPush means the node creates a def but for purposes of stack
-    // management do not push it because data is not flowing up the
+    // noAdd means the node creates a def but for purposes of map
+    // management do not add it because data is not flowing up the
     // tree but over (as in ASG nodes)
 
-    bool noPush = info.isLocalDefUse;
+    bool noAdd = info.isLocalDefUse;
     RefPosition * prevPos = nullptr;
 
     bool isSpecialPutArg = false;
@@ -3067,19 +3402,30 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
             if (produce == 0)
             {
                 produce = 1;
-                noPush = true;
+                noAdd = true;
             }
 
             assert(consume <= MAX_RET_REG_COUNT);
             if (consume == 1)
             {
-                Interval * srcInterval = stack->TopRef().interval;
+                // Get the location info for the register defined by the first operand.
+                LocationInfoList operandDefs;
+                bool found = operandToLocationInfoMap.TryGetValue(*(tree->OperandsBegin(true)), &operandDefs);
+                assert(found);
+
+                // Since we only expect to consume one register, we should only have a single register to
+                // consume.
+                assert(operandDefs.Begin()->Next() == operandDefs.End());
+
+                LocationInfo& operandInfo = *static_cast<LocationInfo*>(operandDefs.Begin());
+
+                Interval * srcInterval = operandInfo.interval;
                 if (srcInterval->relatedInterval == nullptr)
                 {
                     // Preference the source to the dest, unless this is a non-last-use localVar.
                     // Note that the last-use info is not correct, but it is a better approximation than preferencing
                     // the source to the dest, if the source's lifetime extends beyond the dest.
-                    if (!srcInterval->isLocalVar || (stack->TopRef().treeNode->gtFlags & GTF_VAR_DEATH) != 0)
+                    if (!srcInterval->isLocalVar || (operandInfo.treeNode->gtFlags & GTF_VAR_DEATH) != 0)
                     {
                         srcInterval->assignRelatedInterval(varDefInterval);
                     }
@@ -3109,7 +3455,7 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
             }
         }
     }
-    else if (noPush && produce == 0)
+    else if (noAdd && produce == 0)
     {
         // This is the case for dead nodes that occur after
         // tree rationalization
@@ -3149,11 +3495,6 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
     }
 #endif // DEBUG
 
-
-    // The RefPositions need to be constructed in execution order, which is the order they are pushed.
-    // So in order to pop them in execution order we need to reverse the stack.
-    stack->ReverseTop(consume);
-
     Interval *prefSrcInterval = nullptr;
 
     // If this is a binary operator that will be encoded with 2 operand fields
@@ -3166,7 +3507,14 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
         // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register
         // (which is actually what we hope will happen).
         JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n");
-        Interval * srcInterval = stack->TopRef().interval;
+
+        // Get the register information for the first operand of the node.
+        LocationInfoList operandDefs;
+        bool found = operandToLocationInfoMap.TryGetValue(*(tree->OperandsBegin(true)), &operandDefs);
+        assert(found);
+
+        // Preference the destination to the interval of the first register defined by the first operand.
+        Interval * srcInterval = operandDefs.Begin()->interval;
         assert(srcInterval->isLocalVar);
         prefSrcInterval = srcInterval;
         isSpecialPutArg = true;
@@ -3179,9 +3527,52 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
     int internalCount = buildInternalRegisterDefsForNode(tree, currentLoc, internalRefs);
 
     // pop all ref'd tree temps
-    for (int useIndex=0; useIndex < consume; useIndex++)
+    GenTreeOperandIterator iterator = tree->OperandsBegin(true);
+
+    // `operandDefs` holds the list of `LocationInfo` values for the registers defined by the current
+    // operand. `operandDefsIterator` points to the current `LocationInfo` value in `operandDefs`.
+    LocationInfoList operandDefs;
+    LocationInfoListNode* operandDefsIterator = operandDefs.End();
+    for (int useIndex = 0; useIndex < consume; useIndex++)
     {
-        LocationInfo locInfo = stack->Pop();
+        // If we've consumed all of the registers defined by the current operand, advance to the next
+        // operand that defines any registers.
+        if (operandDefsIterator == operandDefs.End())
+        {
+            // Skip operands that do not define any registers, whether directly or indirectly.
+            GenTree* operand;
+            do
+            {
+                assert(iterator != tree->OperandsEnd());
+                operand = *iterator;
+
+                ++iterator;
+            } while (!operand->gtLsraInfo.definesAnyRegisters);
+
+            // If we have already processed a previous operand, return its `LocationInfo` list to the
+            // pool.
+            if (useIndex > 0)
+            {
+                assert(!operandDefs.IsEmpty());
+                listNodePool.ReturnNodes(operandDefs);
+            }
+
+            // Remove the list of registers defined by the current operand from the map. Note that this
+            // is only correct because tree nodes are singly-used: if this property ever changes (e.g.
+            // if tree nodes are eventually allowed to be multiply-used), then the removal is only
+            // correct at the last use.
+            bool removed = operandToLocationInfoMap.TryRemove(operand, &operandDefs);
+            assert(removed);
+
+            // Move the operand def iterator to the `LocationInfo` for the first register defined by the
+            // current operand.
+            operandDefsIterator = operandDefs.Begin();
+            assert(operandDefsIterator != operandDefs.End());
+        }
+
+        LocationInfo& locInfo = *static_cast<LocationInfo*>(operandDefsIterator);
+        operandDefsIterator = operandDefsIterator->Next();
+
         JITDUMP("t%u ", locInfo.loc);
 
         // for interstitial tree temps, a use is always last and end;
@@ -3275,7 +3666,12 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
         }
     }
     JITDUMP("\n");
-    
+
+    if (!operandDefs.IsEmpty())
+    {
+        listNodePool.ReturnNodes(operandDefs);
+    }
+
     buildInternalRegisterUsesForNode(tree, currentLoc, internalRefs, internalCount);
 
     RegisterType registerType = getDefType(tree);
@@ -3323,6 +3719,7 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
     }
 
     // push defs
+    LocationInfoList locationInfoList;
     LsraLocation defLocation = currentLoc + 1;
     for (int i=0; i < produce; i++)
     {        
@@ -3374,9 +3771,9 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
         
         // for assignments, we want to create a refposition for the def
         // but not push it
-        if (!noPush)
+        if (!noAdd)
         {
-            stack->Push(LocationInfo(defLocation, interval, tree, (unsigned) i));
+            locationInfoList.Append(listNodePool.GetNode(defLocation, interval, tree, (unsigned) i));
         }
 
         RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, (unsigned)i);
@@ -3393,6 +3790,33 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
     buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectors);
 #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+    bool isContainedNode = !noAdd && consume == 0 && produce == 0 && tree->TypeGet() != TYP_VOID && !tree->OperIsStore();
+    if (isContainedNode)
+    {
+        // Contained nodes map to the concatenated lists of their operands.
+        for (GenTree* op : tree->Operands(true))
+        {
+            if (!op->gtLsraInfo.definesAnyRegisters)
+            {
+                assert(ComputeOperandDstCount(op) == 0);
+                continue;
+            }
+
+            LocationInfoList operandList;
+            bool removed = operandToLocationInfoMap.TryRemove(op, &operandList);
+            assert(removed);
+
+            locationInfoList.Append(operandList);
+        }
+    }
+
+    if (!locationInfoList.IsEmpty())
+    {
+        bool added = operandToLocationInfoMap.AddOrUpdate(tree, locationInfoList);
+        assert(added);
+        tree->gtLsraInfo.definesAnyRegisters = true;
+    }
 }
 
 // make an interval for each physical register
@@ -3823,7 +4247,8 @@ LinearScan::buildIntervals()
         intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
     }
 
-    ArrayStack<LocationInfo> stack(compiler);
+    LocationInfoListNodePool listNodePool(compiler, 8);
+    SmallHashTable<GenTree*, LocationInfoList, 32> operandToLocationInfoMap(compiler);
 
     BasicBlock* predBlock = nullptr;
     BasicBlock* prevBlock = nullptr;
@@ -3941,7 +4366,7 @@ LinearScan::buildIntervals()
                 assert (treeNode->gtLsraInfo.loc >= currentLoc);
                 currentLoc = treeNode->gtLsraInfo.loc;
                 dstCount = treeNode->gtLsraInfo.dstCount;       
-                buildRefPositionsForNode(treeNode, block, &stack, currentLoc);
+                buildRefPositionsForNode(treeNode, block, listNodePool, operandToLocationInfoMap, currentLoc);
 #ifdef DEBUG
                 if (currentLoc > maxNodeLocation)
                 {
@@ -3949,13 +4374,27 @@ LinearScan::buildIntervals()
                 }
 #endif // DEBUG
             }
-            // At this point the stack should be empty, unless:
-            // 1) we've got a node that produces a result that's ignored, in which
-            //    case the stack height should match the dstCount.
-            // 2) we've got a comma node
-            JITDUMP("stack height after tree processed was %d\n", stack.Height());
-            assert(stmtExpr->OperGet() == GT_COMMA || stack.Height() == dstCount);
-            stack.Reset();
+
+#ifdef DEBUG
+            // At this point the map should be empty, unless: we have a node that
+            // produces a result that's ignored, in which case the map should contain
+            // one element that maps to dstCount locations.
+            JITDUMP("map size after tree processed was %d\n", operandToLocationInfoMap.Count());
+
+            int locCount = 0;
+            for (auto kvp : operandToLocationInfoMap)
+            {
+                LocationInfoList defList = kvp.Value();
+                for (LocationInfoListNode* def = defList.Begin(), *end = defList.End(); def != end; def = def->Next())
+                {
+                    locCount++;
+                }
+            }
+
+            assert(locCount == dstCount);
+#endif
+
+            operandToLocationInfoMap.Clear();
         }
         // Increment the LsraLocation at this point, so that the dummy RefPositions
         // will not have the same LsraLocation as any "real" RefPosition.
@@ -9130,6 +9569,42 @@ LinearScan::resolveEdge(BasicBlock*      fromBlock,
     }
 }
 
+void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node, LsraLocation location)
+{
+    regMaskTP dstCandidates;
+
+    // if there is a reg indicated on the tree node, use that for dstCandidates
+    // the exception is the NOP, which sometimes show up around late args.
+    // TODO-Cleanup: get rid of those NOPs.
+    if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP)
+    {
+        dstCandidates = lsra->allRegs(node->TypeGet());
+    }
+    else
+    {
+        dstCandidates = genRegMask(node->gtRegNum);
+    }
+
+    internalIntCount = 0;
+    internalFloatCount = 0;
+    isLocalDefUse = false;
+    isHelperCallWithKills = false;
+    isLsraAdded = false;
+    definesAnyRegisters = false;
+
+    setDstCandidates(lsra, dstCandidates);
+    srcCandsIndex = dstCandsIndex;
+
+    setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
+
+    loc = location;
+#ifdef DEBUG
+    isInitialized = true;
+#endif
+
+    assert(IsValid(lsra));
+}
+
 regMaskTP TreeNodeInfo::getSrcCandidates(LinearScan *lsra)
 {
     return lsra->GetRegMaskForIndex(srcCandsIndex);
index 9ce2bd7..7987cfc 100644 (file)
@@ -7,6 +7,7 @@
 #define _LSRA_H_
 
 #include "arraylist.h"
+#include "smallhash.h"
 #include "nodeinfo.h"
 
 // Minor and forward-reference types
@@ -300,11 +301,15 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 // to the next RefPosition in code order
 // THIS IS THE OPTION CURRENTLY BEING PURSUED
 
+class LocationInfoList;
+class LocationInfoListNodePool;
+
 class LinearScan : public LinearScanInterface
 {
     friend class RefPosition;
     friend class Interval;
     friend class Lowering;
+    friend class TreeNodeInfo;
 
 public:
 
@@ -607,7 +612,8 @@ private:
     void            resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition);
 
     void            buildRefPositionsForNode(GenTree *tree, BasicBlock *block,
-                                             ArrayStack<LocationInfo> *stack,
+                                             LocationInfoListNodePool& listNodePool,
+                                             HashTableBase<GenTree*, LocationInfoList>& operandToLocationInfoMap,
                                              LsraLocation loc);
 
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
index 8fc48f9..872e9d2 100644 (file)
@@ -25,13 +25,16 @@ public:
         dstCandsIndex         = 0;
         internalCandsIndex    = 0;
         isLocalDefUse         = false;
-        isInitialized         = false;
         isHelperCallWithKills = false;
         isLsraAdded           = false;
         isDelayFree           = false;
         hasDelayFreeSrc       = false;
         isTgtPref             = false;
         regOptional           = false;
+        definesAnyRegisters    = false;
+#ifdef DEBUG
+        isInitialized         = false;
+#endif
     }
 
     // dst
@@ -104,8 +107,6 @@ public:
     // Examples include stack arguments to a call (they are immediately stored), lhs of comma
     // nodes, or top-level nodes that are non-void.
     unsigned char isLocalDefUse:1;
-    // isInitialized is set when the tree node is handled.
-    unsigned char isInitialized:1;
     // isHelperCallWithKills is set when this is a helper call that kills more than just its in/out regs.
     unsigned char isHelperCallWithKills:1;
     // Is this node added by LSRA, e.g. as a resolution or copy/reload move.
@@ -122,12 +123,22 @@ public:
     unsigned char isTgtPref:1;
     // Whether a spilled second src can be treated as a contained operand
     unsigned char regOptional:1;
+    // Whether or not a node defines any registers, whether directly (for nodes where dstCout is non-zero)
+    // or indirectly (for contained nodes, which propagate the transitive closure of the registers
+    // defined by their inputs). Used during buildRefPositionsForNode in order to avoid unnecessary work.
+    unsigned char definesAnyRegisters:1;
+
+#ifdef DEBUG
+    // isInitialized is set when the tree node is handled.
+    unsigned char isInitialized:1;
+#endif
 
 public:
+    // Initializes the TreeNodeInfo value with the given values.
+    void Initialize(LinearScan* lsra, GenTree* node, LsraLocation location);
 
 #ifdef DEBUG
     void dump(LinearScan *lsra);
-#endif // DEBUG
 
     // This method checks to see whether the information has been initialized,
     // and is in a consistent state
@@ -136,6 +147,7 @@ public:
         return (isInitialized &&
                 ((getSrcCandidates(lsra)|getInternalCandidates(lsra)|getDstCandidates(lsra)) & ~(RBM_ALLFLOAT|RBM_ALLINT)) == 0);
     }
+#endif // DEBUG
 };
 
 #endif // _NODEINFO_H_
diff --git a/src/coreclr/src/jit/smallhash.h b/src/coreclr/src/jit/smallhash.h
new file mode 100644 (file)
index 0000000..e2ccf20
--- /dev/null
@@ -0,0 +1,596 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _SMALLHASHTABLE_H_
+#define _SMALLHASHTABLE_H_
+
+//------------------------------------------------------------------------
+// HashTableInfo: a concept that provides equality and hashing methods for
+//                a particular key type. Used by HashTableBase and its
+//                subclasses.
+template<typename TKey>
+struct HashTableInfo
+{
+    // static bool Equals(const TKey& x, const TKey& y);
+    // static unsigned GetHashCode(const TKey& key);
+};
+
+//------------------------------------------------------------------------
+// HashTableInfo<TKey*>: specialized version of HashTableInfo for pointer-
+//                       typed keys.
+template<typename TKey>
+struct HashTableInfo<TKey*>
+{
+    static bool Equals(const TKey* x, const TKey* y)
+    {
+        return x == y;
+    }
+
+    static unsigned GetHashCode(const TKey* key)
+    {
+        // Shift off bits that are not likely to be significant
+        size_t keyval = reinterpret_cast<size_t>(key) >> ConstLog2<__alignof(TKey)>::value;
+
+        // Truncate and return the result
+        return static_cast<unsigned>(keyval);
+    }
+};
+
+//------------------------------------------------------------------------
+// HashTableBase: base type for HashTable and SmallHashTable. This class
+//                provides the vast majority of the implementation. The
+//                subclasses differ in the storage they use at the time of
+//                construction: HashTable allocates the initial bucket
+//                array on the heap; SmallHashTable contains a small inline
+//                array.
+//
+// This implementation is based on the ideas presented in Herlihy, Shavit,
+// and Tzafrir '08 (http://mcg.cs.tau.ac.il/papers/disc2008-hopscotch.pdf),
+// though it does not currently implement the hopscotch algorithm.
+//
+// The approach taken is intended to perform well in both space and speed.
+// This approach is a hybrid of separate chaining and open addressing with
+// linear probing: collisions are resolved using a bucket chain, but that
+// chain is stored in the bucket array itself.
+//
+// Resolving collisions using a bucket chain avoids the primary clustering
+// issue common in linearly-probed open addressed hash tables, while using
+// buckets as chain nodes avoids the allocaiton traffic typical of chained
+// tables. Applying the hopscotch algorithm in the aforementioned paper
+// could further improve performance by optimizing access patterns for
+// better cache usage.
+//
+// Template parameters:
+//    TKey     - The type of the table's keys.
+//    TValue   - The type of the table's values.
+//    TKeyInfo - A type that conforms to the HashTableInfo<TKey> concept.
+template<typename TKey, typename TValue, typename TKeyInfo = HashTableInfo<TKey>>
+class HashTableBase
+{
+    friend class KeyValuePair;
+    friend class Iterator;
+
+    enum : unsigned
+    {
+        InitialNumBuckets = 8
+    };
+
+protected:
+    //------------------------------------------------------------------------
+    // HashTableBase::Bucket: provides storage for the key-value pairs that
+    //                        make up the contents of the table.
+    //
+    // The "home" bucket for a particular key is the bucket indexed by the
+    // key's hash code modulo the size of the bucket array (the "home index").
+    //
+    // The home bucket is always considered to be part of the chain that it
+    // roots, even if it is also part of the chain rooted at a different
+    // bucket. `m_firstOffset` indicates the offset of the first non-home
+    // bucket in the home bucket's chain. If the `m_firstOffset` of a bucket
+    // is 0, the chain rooted at that bucket is empty.
+    //
+    // The index of the next bucket in a chain is calculated by adding the
+    // value in `m_nextOffset` to the index of the current bucket. If
+    // `m_nextOffset` is 0, the current bucket is the end of its chain. Each
+    // bucket in a chain must be occupied (i.e. `m_isFull` will be true).
+    struct Bucket
+    {
+        bool m_isFull;          // True if the bucket is occupied; false otherwise.
+
+        unsigned m_firstOffset; // The offset to the first node in the chain for this bucket index.
+        unsigned m_nextOffset;  // The offset to the next node in the chain for this bucket index.
+
+        unsigned m_hash;        // The hash code for the element stored in this bucket.
+        TKey m_key;             // The key for the element stored in this bucket.
+        TValue m_value;         // The value for the element stored in this bucket.
+    };
+
+private:
+    Compiler* m_compiler;      // The compiler context to use for allocations.
+    Bucket* m_buckets;         // The bucket array.
+    unsigned m_numBuckets;     // The number of buckets in the bucket array.
+    unsigned m_numFullBuckets; // The number of occupied buckets.
+
+    //------------------------------------------------------------------------
+    // HashTableBase::Insert: inserts a key-value pair into a bucket array.
+    //
+    // Arguments:
+    //    buckets    - The bucket array in which to insert the key-value pair.
+    //    numBuckets - The number of buckets in the bucket array.
+    //    hash       - The hash code of the key to insert.
+    //    key        - The key to insert.
+    //    value      - The value to insert.
+    //
+    // Returns:
+    //    True if the key-value pair was successfully inserted; false
+    //    otherwise.
+    static bool Insert(Bucket* buckets, unsigned numBuckets, unsigned hash, const TKey& key, const TValue& value)
+    {
+        const unsigned mask = numBuckets - 1;
+        unsigned homeIndex = hash & mask;
+
+        Bucket* home = &buckets[homeIndex];
+        if (!home->m_isFull)
+        {
+            // The home bucket is empty; use it.
+            //
+            // Note that the next offset does not need to be updated: whether or not it is non-zero,
+            // it is already correct, since we're inserting at the head of the list.
+            home->m_isFull = true;
+            home->m_firstOffset = 0;
+            home->m_hash = hash;
+            home->m_key = key;
+            home->m_value = value;
+            return true;
+        }
+
+        // If the home bucket is full, probe to find the next empty bucket.
+        unsigned precedingIndexInChain = homeIndex;
+        unsigned nextIndexInChain = (homeIndex + home->m_firstOffset) & mask;
+        for (unsigned j = 1; j < numBuckets; j++)
+        {
+            unsigned bucketIndex = (homeIndex + j) & mask;
+            Bucket* bucket = &buckets[bucketIndex];
+            if (bucketIndex == nextIndexInChain)
+            {
+                assert(bucket->m_isFull);
+                precedingIndexInChain = bucketIndex;
+                nextIndexInChain = (bucketIndex + bucket->m_nextOffset) & mask;
+            }
+            else if (!bucket->m_isFull)
+            {
+                bucket->m_isFull = true;
+                if (precedingIndexInChain == nextIndexInChain)
+                {
+                    bucket->m_nextOffset = 0;
+                }
+                else
+                {
+                    assert(((nextIndexInChain - bucketIndex) & mask) > 0);
+                    bucket->m_nextOffset = (nextIndexInChain - bucketIndex) & mask;
+                }
+
+                unsigned offset = (bucketIndex - precedingIndexInChain) & mask;
+                if (precedingIndexInChain == homeIndex)
+                {
+                    buckets[precedingIndexInChain].m_firstOffset = offset;
+                }
+                else
+                {
+                    buckets[precedingIndexInChain].m_nextOffset = offset;
+                }
+
+                bucket->m_hash = hash;
+                bucket->m_key = key;
+                bucket->m_value = value;
+                return true;
+            }
+        }
+
+        // No more free buckets.
+        return false;
+    }
+
+    //------------------------------------------------------------------------
+    // HashTableBase::TryGetBucket: attempts to get the bucket that holds a
+    //                              particular key.
+    //
+    // Arguments:
+    //    hash           - The hash code of the key to find.
+    //    key            - The key to find.
+    //    precedingIndex - An output parameter that will hold the index of the
+    //                     preceding bucket in the chain for the key. May be
+    //                     equal to `bucketIndex` if the key is stored in its
+    //                     home bucket.
+    //    bucketIndex    - An output parameter that will hold the index of the
+    //                     bucket that stores the key.
+    //
+    // Returns:
+    //    True if the key was successfully found; false otherwise.
+    bool TryGetBucket(unsigned hash, const TKey& key, unsigned* precedingIndex, unsigned* bucketIndex) const
+    {
+        if (m_numBuckets == 0)
+        {
+            return false;
+        }
+
+        const unsigned mask = m_numBuckets - 1;
+        unsigned index = hash & mask;
+
+        Bucket* bucket = &m_buckets[index];
+        if (bucket->m_isFull && bucket->m_hash == hash && TKeyInfo::Equals(bucket->m_key, key))
+        {
+            *precedingIndex = index;
+            *bucketIndex = index;
+            return true;
+        }
+
+        for (unsigned offset = bucket->m_firstOffset; offset != 0; offset = bucket->m_nextOffset)
+        {
+            unsigned precedingIndexInChain = index;
+
+            index = (index + offset) & mask;
+            bucket = &m_buckets[index];
+
+            assert(bucket->m_isFull);
+            if (bucket->m_hash == hash && TKeyInfo::Equals(bucket->m_key, key))
+            {
+                *precedingIndex = precedingIndexInChain;
+                *bucketIndex = index;
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    //------------------------------------------------------------------------
+    // HashTableBase::Resize: allocates a new bucket array twice the size of
+    //                        the current array and copies the key-value pairs
+    //                        from the current bucket array into the new array.
+    void Resize()
+    {
+        Bucket* currentBuckets = m_buckets;
+
+        unsigned newNumBuckets = m_numBuckets == 0 ? InitialNumBuckets : m_numBuckets * 2;
+        size_t allocSize = sizeof(Bucket) * newNumBuckets;
+        assert((sizeof(Bucket) * m_numBuckets) < allocSize);
+
+        auto* newBuckets = reinterpret_cast<Bucket*>(m_compiler->compGetMem(allocSize));
+        memset(newBuckets, 0, allocSize);
+
+        for (unsigned currentIndex = 0; currentIndex < m_numBuckets; currentIndex++)
+        {
+            Bucket* currentBucket = &currentBuckets[currentIndex];
+            if (!currentBucket->m_isFull)
+            {
+                continue;
+            }
+
+            bool inserted = Insert(newBuckets, newNumBuckets, currentBucket->m_hash, currentBucket->m_key, currentBucket->m_value);
+            (assert(inserted), (void)inserted);
+        }
+
+        m_numBuckets = newNumBuckets;
+        m_buckets = newBuckets;
+    }
+
+protected:
+    HashTableBase(Compiler* compiler, Bucket* buckets, unsigned numBuckets)
+        : m_compiler(compiler)
+        , m_buckets(buckets)
+        , m_numBuckets(numBuckets)
+        , m_numFullBuckets(0)
+    {
+        assert(compiler != nullptr);
+
+        if (numBuckets > 0)
+        {
+            assert((numBuckets & (numBuckets - 1)) == 0); // Size must be a power of 2
+            assert(m_buckets != nullptr);
+
+            memset(m_buckets, 0, sizeof(Bucket) * numBuckets);
+        }
+    }
+
+public:
+#ifdef DEBUG
+    class Iterator;
+
+    class KeyValuePair final
+    {
+        friend class HashTableBase<TKey, TValue, TKeyInfo>::Iterator;
+
+        Bucket* m_bucket;
+
+        KeyValuePair(Bucket* bucket)
+            : m_bucket(bucket)
+        {
+            assert(m_bucket != nullptr);
+        }
+
+    public:
+        KeyValuePair()
+            : m_bucket(nullptr)
+        {
+        }
+
+        inline TKey& Key()
+        {
+            return m_bucket->m_key;
+        }
+
+        inline TValue& Value()
+        {
+            return m_bucket->m_value;
+        }
+    };
+
+    // NOTE: HashTableBase only provides iterators in debug builds because the order in which
+    // the iterator type produces values is undefined (e.g. it is not related to the order in
+    // which key-value pairs were inserted).
+    class Iterator final
+    {
+        friend class HashTableBase<TKey, TValue, TKeyInfo>;
+
+        Bucket* m_buckets;
+        unsigned m_numBuckets;
+        unsigned m_index;
+
+        Iterator(Bucket* buckets, unsigned numBuckets, unsigned index)
+            : m_buckets(buckets)
+            , m_numBuckets(numBuckets)
+            , m_index(index)
+        {
+            assert((buckets != nullptr) || (numBuckets == 0));
+            assert(index <= numBuckets);
+
+            // Advance to the first occupied bucket
+            while (m_index != m_numBuckets && !m_buckets[m_index].m_isFull)
+            {
+                m_index++;
+            }
+        }
+
+    public:
+        Iterator()
+            : m_buckets(nullptr)
+            , m_numBuckets(0)
+            , m_index(0)
+        {
+        }
+
+        KeyValuePair operator*() const
+        {
+            if (m_index >= m_numBuckets)
+            {
+                return KeyValuePair();
+            }
+
+            Bucket* bucket = &m_buckets[m_index];
+            assert(bucket->m_isFull);
+            return KeyValuePair(bucket);
+        }
+
+        KeyValuePair operator->() const
+        {
+            return this->operator*();
+        }
+
+        bool operator==(const Iterator& other) const
+        {
+            return (m_buckets == other.m_buckets) && (m_index == other.m_index);
+        }
+
+        bool operator!=(const Iterator& other) const
+        {
+            return (m_buckets != other.m_buckets) || (m_index != other.m_index);
+        }
+
+        Iterator& operator++()
+        {
+            do
+            {
+                m_index++;
+            } while (m_index != m_numBuckets && !m_buckets[m_index].m_isFull);
+
+            return *this;
+        }
+    };
+
+    Iterator begin() const
+    {
+        return Iterator(m_buckets, m_numBuckets, 0);
+    }
+
+    Iterator end() const
+    {
+        return Iterator(m_buckets, m_numBuckets, m_numBuckets);
+    }
+#endif // DEBUG
+
+    unsigned Count() const
+    {
+        return m_numFullBuckets;
+    }
+
+    void Clear()
+    {
+        if (m_numBuckets > 0)
+        {
+            memset(m_buckets, 0, sizeof(Bucket) * m_numBuckets);
+            m_numFullBuckets = 0;
+        }
+    }
+
+    //------------------------------------------------------------------------
+    // HashTableBase::AddOrUpdate: adds a key-value pair to the hash table if
+    //                             the key does not already exist in the
+    //                             table, or updates the value if the key
+    //                             already exists.
+    //                             
+    // Arguments:
+    //    key   - The key for which to add or update a value.
+    //    value - The value.
+    //
+    // Returns:
+    //    True if the value was added; false if it was updated.
+    bool AddOrUpdate(const TKey& key, const TValue& value)
+    {
+        unsigned hash = TKeyInfo::GetHashCode(key);
+
+        unsigned unused, index;
+        if (TryGetBucket(hash, key, &unused, &index))
+        {
+            m_buckets[index].m_value = value;
+            return false;
+        }
+
+        // If the load is greater than 0.8, resize the table before inserting.
+        if ((m_numFullBuckets * 5) >= (m_numBuckets * 4))
+        {
+            Resize();
+        }
+
+        bool inserted = Insert(m_buckets, m_numBuckets, hash, key, value);
+        (assert(inserted), (void)inserted);
+
+        m_numFullBuckets++;
+
+        return true;
+    }
+
+    //------------------------------------------------------------------------
+    // HashTableBase::TryRemove: removes a key from the hash table and returns
+    //                           its value if the key exists in the table.
+    //
+    // Arguments:
+    //    key   - The key to remove from the table.
+    //    value - An output parameter that will hold the value for the removed
+    //            key.
+    //
+    // Returns:
+    //    True if the key was removed from the table; false otherwise.
+    bool TryRemove(const TKey& key, TValue* value)
+    {
+        unsigned hash = TKeyInfo::GetHashCode(key);
+
+        unsigned precedingIndexInChain, bucketIndex;
+        if (!TryGetBucket(hash, key, &precedingIndexInChain, &bucketIndex))
+        {
+            return false;
+        }
+
+        Bucket* bucket = &m_buckets[bucketIndex];
+        bucket->m_isFull = false;
+
+        if (precedingIndexInChain != bucketIndex)
+        {
+            const unsigned mask = m_numBuckets - 1;
+            unsigned homeIndex = hash & mask;
+
+            unsigned nextOffset;
+            if (bucket->m_nextOffset == 0)
+            {
+                nextOffset = 0;
+            }
+            else
+            {
+                unsigned nextIndexInChain = (bucketIndex + bucket->m_nextOffset) & mask;
+                nextOffset = (nextIndexInChain - precedingIndexInChain) & mask;
+            }
+
+            if (precedingIndexInChain == homeIndex)
+            {
+                m_buckets[precedingIndexInChain].m_firstOffset = nextOffset;
+            }
+            else
+            {
+                m_buckets[precedingIndexInChain].m_nextOffset = nextOffset;
+            }
+        }
+
+        m_numFullBuckets--;
+
+        *value = bucket->m_value;
+        return true;
+    }
+
+    //------------------------------------------------------------------------
+    // HashTableBase::TryGetValue: retrieves the value for a key if the key
+    //                             exists in the table.
+    //
+    // Arguments:
+    //    key   - The key to find from the table.
+    //    value - An output parameter that will hold the value for the key.
+    //
+    // Returns:
+    //    True if the key was found in the table; false otherwise.
+    bool TryGetValue(const TKey& key, TValue* value) const
+    {
+        unsigned unused, index;
+        if (!TryGetBucket(TKeyInfo::GetHashCode(key), key, &unused, &index))
+        {
+            return false;
+        }
+
+        *value = m_buckets[index].m_value;
+        return true;
+    }
+};
+
+//------------------------------------------------------------------------
+// HashTable: a simple subclass of `HashTableBase` that always uses heap
+//            storage for its bucket array.
+template<typename TKey, typename TValue, typename TKeyInfo = HashTableInfo<TKey>>
+class HashTable final : public HashTableBase<TKey, TValue, TKeyInfo>
+{
+    typedef HashTableBase<TKey, TValue, TKeyInfo> TBase;
+
+    static unsigned RoundUp(unsigned initialSize)
+    {
+        return 1 << genLog2(initialSize);
+    }
+
+public:
+    HashTable(Compiler* compiler)
+        : TBase(compiler, nullptr, 0)
+    {
+    }
+
+    HashTable(Compiler* compiler, unsigned initialSize)
+        : TBase(compiler,
+            reinterpret_cast<typename TBase::Bucket*>(compiler->compGetMem(RoundUp(initialSize) * sizeof(typename TBase::Bucket))),
+            RoundUp(initialSize))
+    {
+    }
+};
+
+//------------------------------------------------------------------------
+// SmallHashTable: an alternative to `HashTable` that stores the initial
+//                 bucket array inline. Most useful for situations where
+//                 the number of key-value pairs that will be stored in
+//                 the map at any given time falls below a certain
+//                 threshold. Switches to heap storage once the initial
+//                 inline storage is exhausted.
+template<typename TKey, typename TValue, unsigned NumInlineBuckets = 8, typename TKeyInfo = HashTableInfo<TKey>>
+class SmallHashTable final : public HashTableBase<TKey, TValue, TKeyInfo>
+{
+    typedef HashTableBase<TKey, TValue, TKeyInfo> TBase;
+
+    enum : unsigned
+    {
+        RoundedNumInlineBuckets = 1 << ConstLog2<NumInlineBuckets>::value
+    };
+
+    typename TBase::Bucket m_inlineBuckets[RoundedNumInlineBuckets];
+
+public:
+    SmallHashTable(Compiler* compiler)
+        : TBase(compiler, m_inlineBuckets, RoundedNumInlineBuckets)
+    {
+    }
+};
+
+#endif // _SMALLHASHTABLE_H_
index 7d77e76..f9fb391 100644 (file)
@@ -39,6 +39,48 @@ inline bool isPow2(T i)
     return (i > 0 && ((i-1)&i) == 0);
 }
 
+// Adapter for iterators to a type that is compatible with C++11
+// range-based for loops.
+template<typename TIterator>
+class IteratorPair
+{
+    TIterator m_begin;
+    TIterator m_end;
+
+public:
+    IteratorPair(TIterator begin, TIterator end)
+        : m_begin(begin)
+        , m_end(end)
+    {
+    }
+
+    inline TIterator begin()
+    {
+        return m_begin;
+    }
+
+    inline TIterator end()
+    {
+        return m_end;
+    }
+};
+
+template<typename TIterator>
+inline IteratorPair<TIterator> MakeIteratorPair(TIterator begin, TIterator end)
+{
+    return IteratorPair<TIterator>(begin, end);
+}
+
+// Recursive template definition to calculate the base-2 logarithm
+// of a constant value.
+template <unsigned val, unsigned acc = 0>
+struct ConstLog2 { enum { value = ConstLog2<val / 2, acc + 1>::value }; };
+
+template <unsigned acc>
+struct ConstLog2<0, acc> { enum { value = acc }; };
+
+template <unsigned acc>
+struct ConstLog2<1, acc> { enum { value = acc }; };
 
 inline const char* dspBool(bool b)
 {