Improvements for object stack allocation.
authorEugene Rozenfeld <erozen@microsoft.com>
Sat, 5 Jan 2019 05:27:20 +0000 (21:27 -0800)
committerEugene Rozenfeld <erozen@microsoft.com>
Tue, 15 Jan 2019 01:33:21 +0000 (17:33 -0800)
This change enables object stack allocation for more cases.

1. Objects with gc fields can now be stack-allocated.
2. Object stack allocation is enabled for x86.

ObjectAllocator updates the types of trees containing references
to possibly-stack-allocated objects to TYP_BYREF or TYP_I_IMPL as appropriate.
That allows us to remove the hacks in gcencode.cpp and refine reporting of pointers:
the pointer is not reported when we can prove that it always points to a stack-allocated object or is null (typed as TYP_I_IMPL);
the pointer is reported as an interior pointer when it may point to either a stack-allocated object or a heap-allocated object (typed as TYP_BYREF);
the pointer is reported as a normal pointer when it points to a heap-allocated object (typed as TYP_REF).

ObjectAllocator also adds flags to indirections:
GTF_IND_TGTANYWHERE when the indirection may be the heap or the stack
(that results in checked write barriers used for writes)
or the new GTF_IND_TGT_NOT_HEAP when the indirection is null or stack memory
(that results in no barrier used for writes).

src/jit/compiler.cpp
src/jit/gcencode.cpp
src/jit/gcinfo.cpp
src/jit/gentree.cpp
src/jit/gentree.h
src/jit/jitgcinfo.h
src/jit/morph.cpp
src/jit/objectalloc.cpp
src/jit/objectalloc.h
tests/issues.targets
tests/src/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests.cs

index 4041290..58197f9 100644 (file)
@@ -9269,6 +9269,10 @@ int cTreeFlagsIR(Compiler* comp, GenTree* tree)
                 {
                     chars += printf("[IND_TGTANYWHERE]");
                 }
+                if (tree->gtFlags & GTF_IND_TGT_NOT_HEAP)
+                {
+                    chars += printf("[IND_TGT_NOT_HEAP]");
+                }
                 if (tree->gtFlags & GTF_IND_TLS_REF)
                 {
                     chars += printf("[IND_TLS_REF]");
index af37304..3d3cd88 100644 (file)
@@ -4245,7 +4245,6 @@ void GCInfo::gcMakeRegPtrTable(
                 // Or in byref_OFFSET_FLAG for 'byref' pointer tracking
                 flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
             }
-            gcUpdateFlagForStackAllocatedObjects(flags);
 
             if (varDsc->lvPinned)
             {
@@ -4345,7 +4344,6 @@ void GCInfo::gcMakeRegPtrTable(
                 {
                     flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
                 }
-                gcUpdateFlagForStackAllocatedObjects(flags);
 
                 GcStackSlotBase stackSlotBase = GC_SP_REL;
                 if (compiler->isFramePointerUsed())
@@ -4730,7 +4728,6 @@ void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder,
         {
             regFlags = (GcSlotFlags)(regFlags | GC_SLOT_INTERIOR);
         }
-        gcUpdateFlagForStackAllocatedObjects(regFlags);
 
         RegSlotIdKey rskey(regNum, regFlags);
         GcSlotId     regSlotId;
@@ -4821,7 +4818,6 @@ void GCInfo::gcMakeVarPtrTable(GcInfoEncoder* gcInfoEncoder, MakeRegPtrMode mode
         {
             flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
         }
-        gcUpdateFlagForStackAllocatedObjects(flags);
 
         if ((lowBits & pinned_OFFSET_FLAG) != 0)
         {
@@ -4925,30 +4921,6 @@ void GCInfo::gcInfoRecordGCStackArgsDead(GcInfoEncoder* gcInfoEncoder,
     }
 }
 
-//------------------------------------------------------------------------
-// gcUpdateFlagForStackAllocatedObjects: Update the flags to handle a possibly stack-allocated object.
-//                                       allocation.
-// Arguments:
-//    flags - flags to update
-//
-//
-// Notes:
-//    TODO-ObjectStackAllocation: This is a temporary conservative implementation.
-//    Currently variables pointing to heap and/or stack allocated objects have type TYP_REF so we
-//    conservatively report them as INTERIOR.
-//    Ideally we should have the following types for object pointers:
-//        1. TYP_I_IMPL for variables always pointing to stack-allocated objects (not reporting to GC)
-//        2. TYP_REF for variables always pointing to heap-allocated objects (reporting as normal objects to GC)
-//        3. TYP_BYREF for variables that may point to the stack or to the heap (reporting as interior objects to GC)
-
-void GCInfo::gcUpdateFlagForStackAllocatedObjects(GcSlotFlags& flags)
-{
-    if ((compiler->optMethodFlags & OMF_HAS_OBJSTACKALLOC) != 0)
-    {
-        flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
-    }
-}
-
 #undef GCENCODER_WITH_LOGGING
 
 #endif // !JIT32_GCENCODER
index ed4c468..14ca8a8 100644 (file)
@@ -267,6 +267,12 @@ GCInfo::WriteBarrierForm GCInfo::gcIsWriteBarrierCandidate(GenTree* tgt, GenTree
                 // This case occurs for Span<T>.
                 return WBF_NoBarrier;
             }
+            if (tgt->gtFlags & GTF_IND_TGT_NOT_HEAP)
+            {
+                // This indirection is not from to the heap.
+                // This case occurs for stack-allocated objects.
+                return WBF_NoBarrier;
+            }
             return gcWriteBarrierFormFromTargetAddress(tgt->gtOp.gtOp1);
 
         case GT_LEA:
index d983111..4610579 100644 (file)
@@ -9337,6 +9337,12 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, __in __in_z _
                         --msgLength;
                         break;
                     }
+                    if (tree->gtFlags & GTF_IND_TGT_NOT_HEAP)
+                    {
+                        printf("s");
+                        --msgLength;
+                        break;
+                    }
                     if (tree->gtFlags & GTF_IND_INVARIANT)
                     {
                         printf("#");
@@ -14439,6 +14445,11 @@ GenTree* Compiler::gtNewTempAssign(
         {
             ok = true;
         }
+        // 3) TYP_BYREF = TYP_REF when object stack allocation is enabled
+        else if (JitConfig.JitObjectStackAllocation() && (dstTyp == TYP_BYREF) && (valTyp == TYP_REF))
+        {
+            ok = true;
+        }
 
         if (!ok)
         {
index 6a1fd4a..e639d54 100644 (file)
@@ -798,6 +798,7 @@ public:
 #define GTF_INX_REFARR_LAYOUT       0x20000000 // GT_INDEX
 #define GTF_INX_STRING_LAYOUT       0x40000000 // GT_INDEX -- this uses the special string array layout
 
+#define GTF_IND_TGT_NOT_HEAP        0x80000000 // GT_IND   -- the target is not on the heap
 #define GTF_IND_VOLATILE            0x40000000 // GT_IND   -- the load or store must use volatile sematics (this is a nop on X86)
 #define GTF_IND_NONFAULTING         0x20000000 // Operations for which OperIsIndir() is true  -- An indir that cannot fault.
                                                // Same as GTF_ARRLEN_NONFAULTING.
@@ -818,7 +819,7 @@ public:
 
 #define GTF_IND_FLAGS \
     (GTF_IND_VOLATILE | GTF_IND_TGTANYWHERE | GTF_IND_NONFAULTING | GTF_IND_TLS_REF |          \
-     GTF_IND_UNALIGNED | GTF_IND_INVARIANT | GTF_IND_ARR_INDEX)
+     GTF_IND_UNALIGNED | GTF_IND_INVARIANT | GTF_IND_ARR_INDEX | GTF_IND_TGT_NOT_HEAP)
 
 #define GTF_CLS_VAR_VOLATILE        0x40000000 // GT_FIELD/GT_CLS_VAR -- same as GTF_IND_VOLATILE
 #define GTF_CLS_VAR_INITCLASS       0x20000000 // GT_FIELD/GT_CLS_VAR -- same as GTF_FLD_INITCLASS
@@ -1807,8 +1808,11 @@ public:
         while (node->gtOper == GT_COMMA)
         {
             node = node->gtGetOp2();
-            assert(node->gtType == oldType);
-            node->gtType = newType;
+            if (node->gtType != newType)
+            {
+                assert(node->gtType == oldType);
+                node->gtType = newType;
+            }
         }
     }
 
index 1d643e9..193e77c 100644 (file)
@@ -231,9 +231,6 @@ public:
                                      regPtrDsc*     genStackPtrFirst,
                                      regPtrDsc*     genStackPtrLast);
 
-    // Update the flags for a stack allocated object
-    void gcUpdateFlagForStackAllocatedObjects(GcSlotFlags& flags);
-
 #endif
 
 #if MEASURE_PTRTAB_SIZE
index 1cf4f37..5f675dd 100644 (file)
@@ -16857,14 +16857,10 @@ void Compiler::fgMorph()
     // local variable allocation on the stack.
     ObjectAllocator objectAllocator(this); // PHASE_ALLOCATE_OBJECTS
 
-// TODO-ObjectStackAllocation: Enable the optimization for architectures using
-// JIT32_GCENCODER (i.e., x86).
-#ifndef JIT32_GCENCODER
     if (JitConfig.JitObjectStackAllocation() && opts.OptimizationEnabled())
     {
         objectAllocator.EnableObjectStackAllocation();
     }
-#endif // JIT32_GCENCODER
 
     objectAllocator.Run();
 
index 7b9fac8..1a8f779 100644 (file)
@@ -49,6 +49,7 @@ void ObjectAllocator::DoPhase()
 
     if (didStackAllocate)
     {
+        ComputeStackObjectPointers(&m_bitVecTraits);
         RewriteUses();
     }
 }
@@ -66,18 +67,29 @@ void ObjectAllocator::MarkLclVarAsEscaping(unsigned int lclNum)
 }
 
 //------------------------------------------------------------------------------
-// IsLclVarEscaping : Check if the local variable has been marked as escaping.
+// MarkLclVarAsPossiblyStackPointing : Mark local variable as possibly pointing
+//                                     to a stack-allocated object.
 //
 //
 // Arguments:
-//    lclNum  - Local variable number
+//    lclNum  - Possibly stack-object-pointing local variable number
+
+void ObjectAllocator::MarkLclVarAsPossiblyStackPointing(unsigned int lclNum)
+{
+    BitVecOps::AddElemD(&m_bitVecTraits, m_PossiblyStackPointingPointers, lclNum);
+}
+
+//------------------------------------------------------------------------------
+// MarkLclVarAsDefinitelyStackPointing : Mark local variable as definitely pointing
+//                                       to a stack-allocated object.
 //
-// Return Value:
-//    True if the local variable has been marked as escaping; false otherwise
+//
+// Arguments:
+//    lclNum  - Definitely stack-object-pointing local variable number
 
-bool ObjectAllocator::IsLclVarEscaping(unsigned int lclNum)
+void ObjectAllocator::MarkLclVarAsDefinitelyStackPointing(unsigned int lclNum)
 {
-    return BitVecOps::IsMember(&m_bitVecTraits, m_EscapingPointers, lclNum);
+    BitVecOps::AddElemD(&m_bitVecTraits, m_DefinitelyStackPointingPointers, lclNum);
 }
 
 //------------------------------------------------------------------------------
@@ -163,7 +175,7 @@ void ObjectAllocator::MarkEscapingVarsAndBuildConnGraph()
 
                 if (m_allocator->CanLclVarEscapeViaParentStack(&m_ancestors, lclNum))
                 {
-                    if (!m_allocator->IsLclVarEscaping(lclNum))
+                    if (!m_allocator->CanLclVarEscape(lclNum))
                     {
                         JITDUMP("V%02u first escapes via [%06u]\n", lclNum, m_compiler->dspTreeID(tree));
                     }
@@ -178,7 +190,7 @@ void ObjectAllocator::MarkEscapingVarsAndBuildConnGraph()
     {
         var_types type = comp->lvaTable[lclNum].TypeGet();
 
-        if (type == TYP_REF || type == TYP_I_IMPL || type == TYP_BYREF)
+        if (type == TYP_REF || genActualType(type) == TYP_I_IMPL || type == TYP_BYREF)
         {
             m_ConnGraphAdjacencyMatrix[lclNum] = BitVecOps::MakeEmpty(&m_bitVecTraits);
 
@@ -230,18 +242,78 @@ void ObjectAllocator::ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& e
 
         while (iterator.NextElem(&lclNum))
         {
-            doOneMoreIteration = true;
-
-            // newEscapingNodes         = adjacentNodes[lclNum]
-            BitVecOps::Assign(bitVecTraits, newEscapingNodes, m_ConnGraphAdjacencyMatrix[lclNum]);
-            // newEscapingNodes         = newEscapingNodes \ escapingNodes
-            BitVecOps::DiffD(bitVecTraits, newEscapingNodes, escapingNodes);
-            // escapingNodesToProcess   = escapingNodesToProcess U newEscapingNodes
-            BitVecOps::UnionD(bitVecTraits, escapingNodesToProcess, newEscapingNodes);
-            // escapingNodes = escapingNodes U newEscapingNodes
-            BitVecOps::UnionD(bitVecTraits, escapingNodes, newEscapingNodes);
-            // escapingNodesToProcess   = escapingNodesToProcess \ { lclNum }
-            BitVecOps::RemoveElemD(bitVecTraits, escapingNodesToProcess, lclNum);
+            if (m_ConnGraphAdjacencyMatrix[lclNum] != nullptr)
+            {
+                doOneMoreIteration = true;
+
+                // newEscapingNodes         = adjacentNodes[lclNum]
+                BitVecOps::Assign(bitVecTraits, newEscapingNodes, m_ConnGraphAdjacencyMatrix[lclNum]);
+                // newEscapingNodes         = newEscapingNodes \ escapingNodes
+                BitVecOps::DiffD(bitVecTraits, newEscapingNodes, escapingNodes);
+                // escapingNodesToProcess   = escapingNodesToProcess U newEscapingNodes
+                BitVecOps::UnionD(bitVecTraits, escapingNodesToProcess, newEscapingNodes);
+                // escapingNodes = escapingNodes U newEscapingNodes
+                BitVecOps::UnionD(bitVecTraits, escapingNodes, newEscapingNodes);
+                // escapingNodesToProcess   = escapingNodesToProcess \ { lclNum }
+                BitVecOps::RemoveElemD(bitVecTraits, escapingNodesToProcess, lclNum);
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
+// ComputeStackObjectPointers : Given an initial set of possibly stack-pointing nodes,
+//                              and an initial set of definitely stack-pointing nodes,
+//                              update both sets by computing nodes reachable from the
+//                              given set in the reverse connection graph.
+//
+// Arguments:
+//    bitVecTraits                    - Bit vector traits
+
+void ObjectAllocator::ComputeStackObjectPointers(BitVecTraits* bitVecTraits)
+{
+    bool changed = true;
+
+    while (changed)
+    {
+        changed = false;
+        for (unsigned int lclNum = 0; lclNum < comp->lvaCount; ++lclNum)
+        {
+            LclVarDsc* lclVarDsc = comp->lvaTable + lclNum;
+            var_types  type      = lclVarDsc->TypeGet();
+
+            if (type == TYP_REF || type == TYP_I_IMPL || type == TYP_BYREF)
+            {
+                if (!MayLclVarPointToStack(lclNum) &&
+                    !BitVecOps::IsEmptyIntersection(bitVecTraits, m_PossiblyStackPointingPointers,
+                                                    m_ConnGraphAdjacencyMatrix[lclNum]))
+                {
+                    // We discovered a new pointer that may point to the stack.
+                    MarkLclVarAsPossiblyStackPointing(lclNum);
+
+                    // Check if this pointer always points to the stack.
+                    if (lclVarDsc->lvSingleDef == 1)
+                    {
+                        // Check if we know what is assigned to this pointer.
+                        unsigned bitCount = BitVecOps::Count(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclNum]);
+                        assert(bitCount <= 1);
+                        if (bitCount == 1)
+                        {
+                            BitVecOps::Iter iter(bitVecTraits, m_ConnGraphAdjacencyMatrix[lclNum]);
+                            unsigned        rhsLclNum = 0;
+                            iter.NextElem(&rhsLclNum);
+
+                            if (DoesLclVarPointToStack(rhsLclNum))
+                            {
+                                // The only assignment to lclNum local is definitely-stack-pointing
+                                // rhsLclNum local so lclNum local is also definitely-stack-pointing.
+                                MarkLclVarAsDefinitelyStackPointing(lclNum);
+                            }
+                        }
+                    }
+                    changed = true;
+                }
+            }
         }
     }
 }
@@ -258,7 +330,9 @@ void ObjectAllocator::ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& e
 
 bool ObjectAllocator::MorphAllocObjNodes()
 {
-    bool didStackAllocate = false;
+    bool didStackAllocate             = false;
+    m_PossiblyStackPointingPointers   = BitVecOps::MakeEmpty(&m_bitVecTraits);
+    m_DefinitelyStackPointingPointers = BitVecOps::MakeEmpty(&m_bitVecTraits);
 
     BasicBlock* block;
 
@@ -321,6 +395,10 @@ bool ObjectAllocator::MorphAllocObjNodes()
 
                     const unsigned int stackLclNum = MorphAllocObjNodeIntoStackAlloc(asAllocObj, block, stmt);
                     m_HeapLocalToStackLocalMap.AddOrUpdate(lclNum, stackLclNum);
+                    // We keep the set of possibly-stack-pointing pointers as a superset of the set of
+                    // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both sets.
+                    MarkLclVarAsDefinitelyStackPointing(lclNum);
+                    MarkLclVarAsPossiblyStackPointing(lclNum);
                     stmt->gtStmtExpr->gtBashToNOP();
                     comp->optMethodFlags |= OMF_HAS_OBJSTACKALLOC;
                     didStackAllocate = true;
@@ -623,6 +701,119 @@ bool ObjectAllocator::CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parent
     return canLclVarEscapeViaParentStack;
 }
 
+//------------------------------------------------------------------------
+// UpdateAncestorTypes: Update types of some ancestor nodes of a possibly-stack-pointing
+//                      tree from TYP_REF to TYP_BYREF or TYP_I_IMPL.
+//
+// Arguments:
+//    tree            - Possibly-stack-pointing tree
+//    parentStack     - Parent stack of the possibly-stack-pointing tree
+//    newType         - New type of the possibly-stack-pointing tree
+//
+// Notes:
+//                      If newType is TYP_I_IMPL, the tree is definitely pointing to the stack (or is null);
+//                      if newType is TYP_BYREF, the tree may point to the stack.
+//                      In addition to updating types this method may set GTF_IND_TGTANYWHERE
+//                      or GTF_IND_TGT_NOT_HEAP on ancestor indirections to help codegen
+//                      with write barrier selection.
+
+void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack<GenTree*>* parentStack, var_types newType)
+{
+    assert(newType == TYP_BYREF || newType == TYP_I_IMPL);
+    assert(parentStack != nullptr);
+    int parentIndex = 1;
+
+    bool keepChecking = true;
+
+    while (keepChecking && (parentStack->Height() > parentIndex))
+    {
+        GenTree* parent = parentStack->Index(parentIndex);
+        keepChecking    = false;
+
+        switch (parent->OperGet())
+        {
+            case GT_ASG:
+            {
+                GenTree* op2 = parent->AsOp()->gtGetOp2();
+
+                if ((op2 == tree) && (parent->TypeGet() == TYP_REF))
+                {
+                    assert(parent->AsOp()->gtGetOp1()->OperGet() == GT_LCL_VAR);
+                    parent->ChangeType(newType);
+                }
+
+                break;
+            }
+
+            case GT_EQ:
+            case GT_NE:
+                break;
+
+            case GT_COMMA:
+                if (parent->AsOp()->gtGetOp1() == parentStack->Index(parentIndex - 1))
+                {
+                    // Left child of GT_COMMA, it will be discarded
+                    break;
+                }
+                __fallthrough;
+            case GT_COLON:
+            case GT_QMARK:
+            case GT_ADD:
+                if (parent->TypeGet() == TYP_REF)
+                {
+                    parent->ChangeType(newType);
+                }
+                ++parentIndex;
+                keepChecking = true;
+                break;
+
+            case GT_FIELD:
+            case GT_IND:
+            {
+                if (newType == TYP_BYREF)
+                {
+                    // This ensures that a checked write barrier is used when writing
+                    // to this field/indirection (it can be inside a stack-allocated object).
+                    parent->gtFlags |= GTF_IND_TGTANYWHERE;
+                }
+                else
+                {
+                    // This indicates that a write barrier is not needed when writing
+                    // to this field/indirection since the address is not pointing to the heap.
+                    // It's either null or points to inside a stack-allocated object.
+                    parent->gtFlags |= GTF_IND_TGT_NOT_HEAP;
+                }
+                int grandParentIndex = parentIndex + 1;
+
+                if (parentStack->Height() > grandParentIndex)
+                {
+                    GenTree* grandParent = parentStack->Index(grandParentIndex);
+                    if (grandParent->OperGet() == GT_ADDR)
+                    {
+                        if (grandParent->TypeGet() == TYP_REF)
+                        {
+                            grandParent->ChangeType(newType);
+                        }
+                        parentIndex += 2;
+                        keepChecking = true;
+                    }
+                }
+                break;
+            }
+
+            default:
+                unreached();
+        }
+
+        if (keepChecking)
+        {
+            tree = parentStack->Index(parentIndex - 1);
+        }
+    }
+
+    return;
+}
+
 #ifdef DEBUG
 //------------------------------------------------------------------------
 // AssertWhenAllocObjFoundVisitor: Look for a GT_ALLOCOBJ node and assert
@@ -662,6 +853,7 @@ void ObjectAllocator::RewriteUses()
         {
             DoPreOrder    = true,
             DoLclVarsOnly = true,
+            ComputeStack  = true,
         };
 
         RewriteUsesVisitor(ObjectAllocator* allocator)
@@ -677,12 +869,35 @@ void ObjectAllocator::RewriteUses()
 
             const unsigned int lclNum    = tree->AsLclVarCommon()->gtLclNum;
             unsigned int       newLclNum = BAD_VAR_NUM;
+            LclVarDsc*         lclVarDsc = m_compiler->lvaTable + lclNum;
 
-            if (m_allocator->m_HeapLocalToStackLocalMap.TryGetValue(lclNum, &newLclNum))
+            if ((lclNum < BitVecTraits::GetSize(&m_allocator->m_bitVecTraits)) &&
+                m_allocator->MayLclVarPointToStack(lclNum))
             {
-                GenTree* newTree =
-                    m_compiler->gtNewOperNode(GT_ADDR, TYP_I_IMPL, m_compiler->gtNewLclvNode(newLclNum, TYP_STRUCT));
-                *use = newTree;
+                var_types newType;
+                if (m_allocator->m_HeapLocalToStackLocalMap.TryGetValue(lclNum, &newLclNum))
+                {
+                    newType = TYP_I_IMPL;
+                    tree =
+                        m_compiler->gtNewOperNode(GT_ADDR, newType, m_compiler->gtNewLclvNode(newLclNum, TYP_STRUCT));
+                    *use = tree;
+                }
+                else
+                {
+                    newType = m_allocator->DoesLclVarPointToStack(lclNum) ? TYP_I_IMPL : TYP_BYREF;
+                    if (tree->TypeGet() == TYP_REF)
+                    {
+                        tree->ChangeType(newType);
+                    }
+                }
+
+                if (lclVarDsc->lvType != newType)
+                {
+                    JITDUMP("changing the type of V%02u from %s to %s\n", lclNum, varTypeName(lclVarDsc->lvType),
+                            varTypeName(newType));
+                    lclVarDsc->lvType = newType;
+                }
+                m_allocator->UpdateAncestorTypes(tree, &m_ancestors, newType);
             }
 
             return Compiler::fgWalkResult::WALK_CONTINUE;
index 7be82eb..b114c2b 100644 (file)
@@ -26,10 +26,14 @@ class ObjectAllocator final : public Phase
 
     //===============================================================================
     // Data members
-    bool                m_IsObjectStackAllocationEnabled;
-    bool                m_AnalysisDone;
-    BitVecTraits        m_bitVecTraits;
-    BitVec              m_EscapingPointers;
+    bool         m_IsObjectStackAllocationEnabled;
+    bool         m_AnalysisDone;
+    BitVecTraits m_bitVecTraits;
+    BitVec       m_EscapingPointers;
+    // We keep the set of possibly-stack-pointing pointers as a superset of the set of
+    // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both sets.
+    BitVec              m_PossiblyStackPointingPointers;
+    BitVec              m_DefinitelyStackPointingPointers;
     LocalToLocalMap     m_HeapLocalToStackLocalMap;
     BitSetShortLongRep* m_ConnGraphAdjacencyMatrix;
 
@@ -46,18 +50,23 @@ protected:
 private:
     bool CanAllocateLclVarOnStack(unsigned int lclNum, CORINFO_CLASS_HANDLE clsHnd);
     bool CanLclVarEscape(unsigned int lclNum);
+    void MarkLclVarAsPossiblyStackPointing(unsigned int lclNum);
+    void MarkLclVarAsDefinitelyStackPointing(unsigned int lclNum);
+    bool MayLclVarPointToStack(unsigned int lclNum);
+    bool DoesLclVarPointToStack(unsigned int lclNum);
     void DoAnalysis();
     void MarkLclVarAsEscaping(unsigned int lclNum);
-    bool IsLclVarEscaping(unsigned int lclNum);
     void MarkEscapingVarsAndBuildConnGraph();
     void AddConnGraphEdge(unsigned int sourceLclNum, unsigned int targetLclNum);
     void ComputeEscapingNodes(BitVecTraits* bitVecTraits, BitVec& escapingNodes);
+    void ComputeStackObjectPointers(BitVecTraits* bitVecTraits);
     bool     MorphAllocObjNodes();
     void     RewriteUses();
     GenTree* MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj);
     unsigned int MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* allocObj, BasicBlock* block, GenTreeStmt* stmt);
     struct BuildConnGraphVisitorCallbackData;
     bool CanLclVarEscapeViaParentStack(ArrayStack<GenTree*>* parentStack, unsigned int lclNum);
+    void UpdateAncestorTypes(GenTree* tree, ArrayStack<GenTree*>* parentStack, var_types newType);
 #ifdef DEBUG
     static Compiler::fgWalkResult AssertWhenAllocObjFoundVisitor(GenTree** pTree, Compiler::fgWalkData* data);
 #endif // DEBUG
@@ -75,9 +84,11 @@ inline ObjectAllocator::ObjectAllocator(Compiler* comp)
 {
     // Disable checks since this phase runs before fgComputePreds phase.
     // Checks are not expected to pass before fgComputePreds.
-    doChecks                   = false;
-    m_EscapingPointers         = BitVecOps::UninitVal();
-    m_ConnGraphAdjacencyMatrix = nullptr;
+    doChecks                          = false;
+    m_EscapingPointers                = BitVecOps::UninitVal();
+    m_PossiblyStackPointingPointers   = BitVecOps::UninitVal();
+    m_DefinitelyStackPointingPointers = BitVecOps::UninitVal();
+    m_ConnGraphAdjacencyMatrix        = nullptr;
 }
 
 //------------------------------------------------------------------------
@@ -119,12 +130,6 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNum, CORIN
 
     DWORD classAttribs = comp->info.compCompHnd->getClassAttribs(clsHnd);
 
-    if ((classAttribs & CORINFO_FLG_CONTAINS_GC_PTR) != 0)
-    {
-        // TODO-ObjectStackAllocation: enable stack allocation of objects with gc fields
-        return false;
-    }
-
     if ((classAttribs & CORINFO_FLG_VALUECLASS) != 0)
     {
         // TODO-ObjectStackAllocation: enable stack allocation of boxed structs
@@ -153,10 +158,42 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNum, CORIN
 
 inline bool ObjectAllocator::CanLclVarEscape(unsigned int lclNum)
 {
-    assert(m_AnalysisDone);
     return BitVecOps::IsMember(&m_bitVecTraits, m_EscapingPointers, lclNum);
 }
 
+//------------------------------------------------------------------------
+// MayLclVarPointToStack:          Returns true iff local variable may
+//                                 point to a stack-allocated object
+//
+// Arguments:
+//    lclNum   - Local variable number
+//
+// Return Value:
+//    Returns true iff local variable may point to a stack-allocated object
+
+inline bool ObjectAllocator::MayLclVarPointToStack(unsigned int lclNum)
+{
+    assert(m_AnalysisDone);
+    return BitVecOps::IsMember(&m_bitVecTraits, m_PossiblyStackPointingPointers, lclNum);
+}
+
+//------------------------------------------------------------------------
+// DoesLclVarPointToStack:         Returns true iff local variable definitely
+//                                 points to a stack-allocated object (or is null)
+//
+// Arguments:
+//    lclNum   - Local variable number
+//
+// Return Value:
+//    Returns true iff local variable definitely points to a stack-allocated object
+//    (or is null)
+
+inline bool ObjectAllocator::DoesLclVarPointToStack(unsigned int lclNum)
+{
+    assert(m_AnalysisDone);
+    return BitVecOps::IsMember(&m_bitVecTraits, m_DefinitelyStackPointingPointers, lclNum);
+}
+
 //===============================================================================
 
 #endif // OBJECTALLOC_H
index fd1d965..b89a010 100644 (file)
         </ExcludeList>
     </ItemGroup>
 
-    <!-- x86 specific excludes -->
-    <ItemGroup Condition="'$(XunitTestBinBase)' != '' and '$(BuildArch)' == 'x86'">
-      <ExcludeList Include="$(XunitTestBinBase)/JIT/opt/ObjectStackAllocation/ObjectStackAllocationTests/*">
-        <Issue>Object stack allocation is not supported on x86 yet</Issue>
-      </ExcludeList>
-    </ItemGroup>
-
     <!-- Windows arm32 specific excludes -->
     <ItemGroup Condition="'$(XunitTestBinBase)' != '' and ('$(BuildArch)' == 'arm' or '$(AltJitArch)' == 'arm') and '$(TargetsWindows)' == 'true'">
         <ExcludeList Include="$(XunitTestBinBase)/Interop/COM/NETClients/Primitives/NETClientPrimitives/*">
index 9e66524..7c524ba 100644 (file)
@@ -33,7 +33,7 @@ namespace ObjectStackAllocation
         }
     }
 
-    class SimpleClassWithGCField : SimpleClassA
+    sealed class SimpleClassWithGCField : SimpleClassA
     {
         public object o;
 
@@ -80,6 +80,13 @@ namespace ObjectStackAllocation
     {
         static volatile int f1 = 5;
         static volatile int f2 = 7;
+        static SimpleClassA classA;
+        static SimpleClassWithGCField classWithGCField;
+        static string str0;
+        static string str1;
+        static string str2;
+        static string str3;
+        static string str4;
 
         delegate int Test();
 
@@ -89,12 +96,24 @@ namespace ObjectStackAllocation
         {
             AllocationKind expectedAllocationKind = AllocationKind.Stack;
             if (GCStressEnabled()) {
+                Console.WriteLine("GCStress is enabled");
                 expectedAllocationKind = AllocationKind.Undefined;
             }
             else if (!SPCOptimizationsEnabled()) {
+                Console.WriteLine("System.Private.CoreLib.dll optimizations are disabled");
                 expectedAllocationKind = AllocationKind.Heap;
             }
 
+            classA = new SimpleClassA(f1, f2);
+
+            classWithGCField = new SimpleClassWithGCField(f1, f2, null);
+
+            str0 = "str_zero";
+            str1 = "str_one";
+            str2 = "str_two";
+            str3 = "str_three";
+            str4 = "str_four";
+
             CallTestAndVerifyAllocation(AllocateSimpleClassAndAddFields, 12, expectedAllocationKind);
 
             CallTestAndVerifyAllocation(AllocateSimpleClassesAndEQCompareThem, 0, expectedAllocationKind);
@@ -107,23 +126,25 @@ namespace ObjectStackAllocation
 
             CallTestAndVerifyAllocation(AllocateClassWithNestedStructAndAddFields, 24, expectedAllocationKind);
 
+            CallTestAndVerifyAllocation(AllocateSimpleClassAndCheckTypeNoHelper, 1, expectedAllocationKind);
+
+            CallTestAndVerifyAllocation(AllocateSimpleClassWithGCFieldAndAddFields, 12, expectedAllocationKind);
+
+            CallTestAndVerifyAllocation(AllocateSimpleClassAndAssignRefToAField, 12, expectedAllocationKind);
+
+            CallTestAndVerifyAllocation(TestMixOfReportingAndWriteBarriers, 34, expectedAllocationKind);
+
             // The remaining tests currently never allocate on the stack
             if (expectedAllocationKind == AllocationKind.Stack) {
                 expectedAllocationKind = AllocationKind.Heap;
             }
 
             // This test calls CORINFO_HELP_ISINSTANCEOFCLASS
-            CallTestAndVerifyAllocation(AllocateSimpleClassAndCheckType, 1, expectedAllocationKind);
+            CallTestAndVerifyAllocation(AllocateSimpleClassAndCheckTypeHelper, 1, expectedAllocationKind);
 
             // This test calls CORINFO_HELP_CHKCASTCLASS_SPECIAL
             CallTestAndVerifyAllocation(AllocateSimpleClassAndCast, 7, expectedAllocationKind);
 
-            // Stack allocation of classes with GC fields is currently disabled
-            CallTestAndVerifyAllocation(AllocateSimpleClassWithGCFieldAndAddFields, 12, expectedAllocationKind);
-
-            // Assigning class ref to a field of another object currently always disables stack allocation
-            CallTestAndVerifyAllocation(AllocateSimpleClassAndAssignRefToAField, 12, expectedAllocationKind);
-
             // Stack allocation of boxed structs is currently disabled
             CallTestAndVerifyAllocation(BoxSimpleStructAndAddFields, 12, expectedAllocationKind);
 
@@ -183,6 +204,7 @@ namespace ObjectStackAllocation
         {
             SimpleClassA a1 = new SimpleClassA(f1, f2);
             SimpleClassA a2 = (f1 == 0) ? a1 : new SimpleClassA(f2, f1);
+            GC.Collect();
             return (a1 == a2) ? 1 : 0;
         }
 
@@ -191,20 +213,31 @@ namespace ObjectStackAllocation
         {
             SimpleClassA a1 = new SimpleClassA(f1, f2);
             SimpleClassA a2 = (f1 == 0) ? a1 : new SimpleClassA(f2, f1);
+            GC.Collect();
             return (a1 != a2) ? 1 : 0;
         }
 
         [MethodImpl(MethodImplOptions.NoInlining)]
-        static int AllocateSimpleClassAndCheckType()
+        static int AllocateSimpleClassAndCheckTypeNoHelper()
+        {
+            object o = (f1 == 0) ? (object)new SimpleClassB(f1, f2) : (object)new SimpleClassA(f1, f2);
+            GC.Collect();
+            return (o is SimpleClassB) ? 0 : 1;
+        }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int AllocateSimpleClassAndCheckTypeHelper()
         {
             object o = (f1 == 0) ? (object)new SimpleClassB(f1, f2) : (object)new SimpleClassA(f1, f2);
-            return (o is SimpleClassB) || !(o is SimpleClassA) ? 0 : 1;
+            GC.Collect();
+            return !(o is SimpleClassA) ? 0 : 1;
         }
 
         [MethodImpl(MethodImplOptions.NoInlining)]
         static int AllocateSimpleClassAndCast()
         {
             object o = (f1 == 0) ? (object)new SimpleClassB(f1, f2) : (object)new SimpleClassA(f2, f1);
+            GC.Collect();
             return ((SimpleClassA)o).f1;
         }
 
@@ -212,6 +245,7 @@ namespace ObjectStackAllocation
         static int AllocateSimpleClassAndGetField()
         {
             SimpleClassA a = new SimpleClassA(f1, f2);
+            GC.Collect();
             ref int f = ref a.f2;
             return f;
         }
@@ -220,6 +254,7 @@ namespace ObjectStackAllocation
         static int AllocateClassWithNestedStructAndGetField()
         {
             ClassWithNestedStruct c = new ClassWithNestedStruct(f1, f2);
+            GC.Collect();
             ref int f = ref c.ns.s.f1;
             return f;
         }
@@ -228,6 +263,7 @@ namespace ObjectStackAllocation
         static int AllocateClassWithNestedStructAndAddFields()
         {
             ClassWithNestedStruct c = new ClassWithNestedStruct(f1, f2);
+            GC.Collect();
             return c.ns.f1 + c.ns.f2 + c.ns.s.f1 + c.ns.s.f2;
         }
 
@@ -235,14 +271,15 @@ namespace ObjectStackAllocation
         static int AllocateSimpleClassWithGCFieldAndAddFields()
         {
             SimpleClassWithGCField c = new SimpleClassWithGCField(f1, f2, null);
+            GC.Collect();
             return c.f1 + c.f2;
         }
 
         static int AllocateSimpleClassAndAssignRefToAField()
         {
             SimpleClassWithGCField c = new SimpleClassWithGCField(f1, f2, null);
-            SimpleClassA a = new SimpleClassA(f1, f2);
-            c.o = a;
+            GC.Collect();
+            c.o = classA;
             return c.f1 + c.f2;
         }
 
@@ -253,7 +290,38 @@ namespace ObjectStackAllocation
             str.f1 = f1;
             str.f2 = f2;
             object boxedSimpleStruct = (object)str;
+            GC.Collect();
             return ((SimpleStruct)boxedSimpleStruct).f1 + ((SimpleStruct)boxedSimpleStruct).f2;
         }
+
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static int TestMixOfReportingAndWriteBarriers()
+        {
+            // c1 doesn't escape and is allocated on the stack
+            SimpleClassWithGCField c1 = new SimpleClassWithGCField(f1, f2, str0);
+
+            // c2 always points to a heap-allocated object
+            SimpleClassWithGCField c2 = classWithGCField;
+
+            // c2 and c3 may point to a heap-allocated object or to a stack-allocated object
+            SimpleClassWithGCField c3 = (f1 == 0) ? c1 : c2;
+            SimpleClassWithGCField c4 = (f2 == 0) ? c2 : c1;
+
+            // c1 doesn't have to be reported to GC (but can be conservatively reported as an interior pointer)
+            // c1.o should be reported to GC as a normal pointer (but can be conservatively reported as an interior pointer)
+            // c2 should be reported to GC as a normal pointer (but can be conservatively reported as an interior pointer)
+            // c3 and c4 must be reported as interior pointers
+            GC.Collect();
+
+            // This assignment doesn't need a write barrier but may conservatively use a checked barrier
+            c1.o = str1;
+            // This assignment should optimally use a normal write barrier but may conservatively use a checked barrier
+            c2.o = str2;
+            // These assignments require a checked write barrier
+            c3.o = str3;
+            c4.o = str4;
+
+            return c1.o.ToString().Length + c2.o.ToString().Length + c3.o.ToString().Length + c4.o.ToString().Length;
+        }
     }
 }