Transform `STRUCT`-typed uses of primitives in local morph (#78131)
authorSingleAccretion <62474226+SingleAccretion@users.noreply.github.com>
Wed, 30 Nov 2022 12:23:56 +0000 (15:23 +0300)
committerGitHub <noreply@github.com>
Wed, 30 Nov 2022 12:23:56 +0000 (13:23 +0100)
* Add fwd-sub workaround

* Fold STRUCT-based access to primitives in local morph

* Solve a tiny regression

* Adapt args morhing

* Adapt block morphing

* Adapt return morphing

* Delete dead code

src/coreclr/jit/compiler.h
src/coreclr/jit/flowgraph.cpp
src/coreclr/jit/forwardsub.cpp
src/coreclr/jit/lclmorph.cpp
src/coreclr/jit/morph.cpp
src/coreclr/jit/morphblock.cpp

index b2003ee..4ea3e4a 100644 (file)
@@ -5712,7 +5712,7 @@ private:
     GenTree* fgMorphExpandStackArgForVarArgs(GenTreeLclVarCommon* lclNode);
 #endif // TARGET_X86
     GenTree* fgMorphExpandImplicitByRefArg(GenTreeLclVarCommon* lclNode);
-    GenTree* fgMorphLocalVar(GenTree* tree, bool forceRemorph);
+    GenTree* fgMorphLocalVar(GenTree* tree);
 
 public:
     bool fgAddrCouldBeNull(GenTree* addr);
index 4fc2b9b..e5bcee3 100644 (file)
@@ -944,6 +944,8 @@ bool Compiler::fgAddrCouldBeNull(GenTree* addr)
         case GT_CNS_STR:
         case GT_ADDR:
         case GT_FIELD_ADDR:
+        case GT_LCL_VAR_ADDR:
+        case GT_LCL_FLD_ADDR:
         case GT_CLS_VAR_ADDR:
             // A GT_ADDR node, by itself, never requires null checking.  The expression whose address is being
             // taken is either a local or static variable, whose address is necessarily non-null, or else it is
index 0642df2..d498c22 100644 (file)
@@ -641,6 +641,15 @@ bool Compiler::fgForwardSubStatement(Statement* stmt)
         return false;
     }
 
+    // A "CanBeReplacedWithItsField" SDSU can serve as a sort of "BITCAST<primitive>(struct)"
+    // device, forwarding it risks forcing things to memory.
+    //
+    if (fwdSubNode->IsCall() && varDsc->CanBeReplacedWithItsField(this))
+    {
+        JITDUMP(" fwd sub local is 'CanBeReplacedWithItsField'\n");
+        return false;
+    }
+
     // There are implicit assumptions downstream on where/how multi-reg ops
     // can appear.
     //
index 4a9961f..f2f5c44 100644 (file)
@@ -992,7 +992,13 @@ private:
                 indir->AsLclFld()->SetLayout(indirLayout);
                 lclNode = indir->AsLclVarCommon();
 
-                m_compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::LocalField));
+                if (!indir->TypeIs(TYP_STRUCT))
+                {
+                    // The general invariant in the compiler is that whoever creates a LCL_FLD node after local morph
+                    // must mark the associated local DNER. We break this invariant here, for STRUCT fields, to allow
+                    // global morph to transform these into enregisterable LCL_VARs, applying DNER otherwise.
+                    m_compiler->lvaSetVarDoNotEnregister(val.LclNum() DEBUGARG(DoNotEnregisterReason::LocalField));
+                }
                 break;
 
             default:
@@ -1097,14 +1103,6 @@ private:
             return IndirTransform::LclFld;
         }
 
-        if (varDsc->TypeGet() != TYP_STRUCT)
-        {
-            // TODO-ADDR: STRUCT uses of primitives require more work: "fgMorphOneAsgBlockOp"
-            // and init block morphing need to be updated to recognize them. Alternatively,
-            // we could consider moving some of their functionality here.
-            return IndirTransform::None;
-        }
-
         ClassLayout* indirLayout = nullptr;
 
         if (indir->OperIs(GT_FIELD))
@@ -1122,8 +1120,10 @@ private:
 
         *pStructLayout = indirLayout;
 
-        // We're only processing TYP_STRUCT uses and variables now.
-        assert(indir->TypeIs(TYP_STRUCT) && (varDsc->GetLayout() != nullptr));
+        if (varDsc->TypeGet() != TYP_STRUCT)
+        {
+            return IndirTransform::LclFld;
+        }
 
         if ((val.Offset() == 0) && ClassLayout::AreCompatible(indirLayout, varDsc->GetLayout()))
         {
index a670100..ba3aebd 100644 (file)
@@ -3293,126 +3293,86 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                     }
 #endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64)
                 }
-                else
+                else if (argObj->TypeGet() != structBaseType)
                 {
                     // We have a struct argument that fits into a register, and it is either a power of 2,
                     // or a local.
                     // Change our argument, as needed, into a value of the appropriate type.
                     assert((structBaseType != TYP_STRUCT) && (genTypeSize(structBaseType) >= originalSize));
 
-                    if (argObj->OperIs(GT_OBJ))
+                    if (argObj->OperIsIndir())
                     {
-                        argObj->ChangeOper(GT_IND);
-
-                        // Now see if we can fold *(&X) into X
-                        if (argObj->AsOp()->gtOp1->gtOper == GT_ADDR)
-                        {
-                            GenTree* temp = argObj->AsOp()->gtOp1->AsOp()->gtOp1;
-
-                            // Keep the DONT_CSE flag in sync
-                            // (as the addr always marks it for its op1)
-                            temp->gtFlags &= ~GTF_DONT_CSE;
-                            temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
-                            DEBUG_DESTROY_NODE(argObj->AsOp()->gtOp1); // GT_ADDR
-                            DEBUG_DESTROY_NODE(argObj);                // GT_IND
-
-                            argObj      = temp;
-                            *parentArgx = temp;
-                            argx        = temp;
-                        }
+                        assert(argObj->AsIndir()->Size() == genTypeSize(structBaseType));
+                        argObj->ChangeType(structBaseType);
+                        argObj->SetOper(GT_IND);
                     }
-                    if (argObj->gtOper == GT_LCL_VAR)
+                    else if (argObj->OperIsLocalRead())
                     {
-                        unsigned   lclNum = argObj->AsLclVarCommon()->GetLclNum();
-                        LclVarDsc* varDsc = lvaGetDesc(lclNum);
+                        unsigned   lclNum    = argObj->AsLclVarCommon()->GetLclNum();
+                        LclVarDsc* varDsc    = lvaGetDesc(lclNum);
+                        unsigned   lclOffset = argObj->AsLclVarCommon()->GetLclOffs();
+                        unsigned   argLclNum = BAD_VAR_NUM;
+                        LclVarDsc* argVarDsc = nullptr;
 
                         if (varDsc->lvPromoted)
                         {
-                            if (varDsc->lvFieldCnt == 1)
-                            {
-                                // get the first and only promoted field
-                                LclVarDsc* fieldVarDsc = lvaGetDesc(varDsc->lvFieldLclStart);
-                                if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
-                                {
-                                    // we will use the first and only promoted field
-                                    argObj->AsLclVarCommon()->SetLclNum(varDsc->lvFieldLclStart);
+                            argLclNum = lvaGetFieldLocal(varDsc, lclOffset);
+                        }
+                        else if (lclOffset == 0)
+                        {
+                            argLclNum = lclNum;
+                        }
 
-                                    if (varTypeIsEnregisterable(fieldVarDsc->TypeGet()) &&
-                                        (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
-                                    {
-                                        // Just use the existing field's type
-                                        argObj->gtType = fieldVarDsc->TypeGet();
-                                    }
-                                    else
-                                    {
-                                        // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
-                                        // to a new type
-                                        lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::SwizzleArg));
-                                        argObj->ChangeOper(GT_LCL_FLD);
-                                        argObj->gtType = structBaseType;
-                                    }
-                                    assert(varTypeIsEnregisterable(argObj->TypeGet()));
-                                    assert(!makeOutArgCopy);
-                                }
-                                else
-                                {
-                                    // use GT_LCL_FLD to swizzle the single field struct to a new type
-                                    lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::SwizzleArg));
-                                    argObj->ChangeOper(GT_LCL_FLD);
-                                    argObj->gtType = structBaseType;
-                                }
-                            }
-                            else
+                        // See if this local goes into the right register file.
+                        // TODO-CQ: we could use a bitcast here, if it does not.
+                        if (argLclNum != BAD_VAR_NUM)
+                        {
+                            argVarDsc = lvaGetDesc(argLclNum);
+                            if ((genTypeSize(argVarDsc) != originalSize) ||
+                                (varTypeUsesFloatReg(argVarDsc) != varTypeUsesFloatReg(structBaseType)))
                             {
-                                // The struct fits into a single register, but it has been promoted into its
-                                // constituent fields, and so we have to re-assemble it
-                                makeOutArgCopy = true;
+                                argLclNum = BAD_VAR_NUM;
                             }
                         }
-                        else if (genTypeSize(varDsc) != genTypeSize(structBaseType))
+
+                        if (argLclNum != BAD_VAR_NUM)
                         {
-                            // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
-                            lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::SwizzleArg));
-                            argObj->ChangeOper(GT_LCL_FLD);
-                            argObj->gtType = structBaseType;
+                            argObj->ChangeType(argVarDsc->TypeGet());
+                            argObj->SetOper(GT_LCL_VAR);
+                            argObj->AsLclVar()->SetLclNum(argLclNum);
                         }
-                        else if (varTypeUsesFloatReg(varDsc) != varTypeUsesFloatReg(structBaseType))
+                        else if (varDsc->lvPromoted)
                         {
-                            // Here we can see int <-> float, long <-> double, long <-> simd8 mismatches, due
-                            // to the "OBJ(ADDR(LCL))" => "LCL" folding above. The latter case is handled in
-                            // lowering, others we will handle here via swizzling.
-                            CLANG_FORMAT_COMMENT_ANCHOR;
+                            // Preserve independent promotion of "argObj" by decomposing the copy.
+                            // TODO-CQ: condition this on the promotion actually being independent.
+                            makeOutArgCopy = true;
+                        }
 #ifdef TARGET_AMD64
-                            if (varDsc->TypeGet() != TYP_SIMD8)
-#endif // TARGET_AMD64
+                        else if (!argObj->OperIs(GT_LCL_VAR) || !argObj->TypeIs(TYP_SIMD8)) // Handled by lowering.
+#else  // !TARGET_ARM64
+                        else
+#endif // !TARGET_ARM64
+                        {
+                            // TODO-CQ: perform this transformation in lowering instead of here and
+                            // avoid marking enregisterable structs DNER.
+                            argObj->ChangeType(structBaseType);
+                            if (argObj->OperIs(GT_LCL_VAR))
                             {
-                                lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::SwizzleArg));
-                                argObj->ChangeOper(GT_LCL_FLD);
-                                argObj->gtType = structBaseType;
+                                argObj->SetOper(GT_LCL_FLD);
                             }
+                            lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::SwizzleArg));
                         }
                     }
-                    else if (argObj->OperIs(GT_LCL_FLD, GT_IND))
-                    {
-                        // We can just change the type on the node
-                        argObj->gtType = structBaseType;
-                    }
-                    else
-                    {
-#ifdef FEATURE_SIMD
-                        // We leave the SIMD8 <-> LONG (Windows x64) case to lowering. For SIMD8 <-> DOUBLE (Unix x64),
-                        // we do not need to do anything as both types already use floating-point registers.
-                        assert((argObj->TypeIs(TYP_SIMD8) &&
-                                ((structBaseType == TYP_LONG) || (structBaseType == TYP_DOUBLE))) ||
-                               argObj->TypeIs(structBaseType));
-#else  // !FEATURE_SIMD
-                        unreached();
-#endif // !FEATURE_SIMD
-                    }
 
-                    assert(varTypeIsEnregisterable(argObj->TypeGet()) ||
+                    assert(varTypeIsEnregisterable(argObj) ||
                            (makeOutArgCopy && varTypeIsEnregisterable(structBaseType)));
                 }
+                else if (argObj->OperIs(GT_LCL_VAR) && lvaGetDesc(argObj->AsLclVar())->lvPromoted)
+                {
+                    // Set DNER to block independent promotion.
+                    lvaSetVarDoNotEnregister(argObj->AsLclVar()->GetLclNum()
+                                                 DEBUGARG(DoNotEnregisterReason::IsStructArg));
+                }
             }
         }
 
@@ -3477,33 +3437,15 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
 #if defined(TARGET_X86)
         if (isStructArg)
         {
-            GenTreeLclVar* lcl = nullptr;
-
-            // TODO-ADDR: always perform "OBJ(ADDR(LCL)) => LCL" transformation in local morph and delete this code.
-            if (argx->OperGet() == GT_OBJ)
+            if (argx->OperIs(GT_LCL_VAR) &&
+                (lvaGetPromotionType(argx->AsLclVar()->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT))
             {
-                if (argx->gtGetOp1()->OperIs(GT_ADDR) && argx->gtGetOp1()->gtGetOp1()->OperIs(GT_LCL_VAR))
-                {
-                    lcl = argx->gtGetOp1()->gtGetOp1()->AsLclVar();
-                }
+                argx = fgMorphLclArgToFieldlist(argx->AsLclVar());
+                arg.SetEarlyNode(argx);
             }
-            else if (argx->OperGet() == GT_LCL_VAR)
+            else if (argx->OperIs(GT_LCL_FLD))
             {
-                lcl = argx->AsLclVar();
-            }
-            if ((lcl != nullptr) && (lvaGetPromotionType(lcl->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT))
-            {
-                if (argx->OperIs(GT_LCL_VAR) ||
-                    ClassLayout::AreCompatible(argx->AsObj()->GetLayout(), lvaGetDesc(lcl)->GetLayout()))
-                {
-                    argx = fgMorphLclArgToFieldlist(lcl);
-                    arg.SetEarlyNode(argx);
-                }
-                else
-                {
-                    // Set DNER to block independent promotion.
-                    lvaSetVarDoNotEnregister(lcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::IsStructArg));
-                }
+                lvaSetVarDoNotEnregister(argx->AsLclFld()->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField));
             }
         }
 #endif // TARGET_X86
@@ -3660,36 +3602,24 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg)
     if (arg->AbiInfo.GetRegNum() == REG_STK)
 #endif
     {
-        GenTreeLclVar* lcl       = nullptr;
-        GenTree*       actualArg = argNode->gtEffectiveVal();
-
-        // TODO-ADDR: always perform "OBJ(ADDR(LCL)) => LCL" transformation in local morph and delete this code.
-        if (actualArg->OperGet() == GT_OBJ)
-        {
-            if (actualArg->gtGetOp1()->OperIs(GT_ADDR) && actualArg->gtGetOp1()->gtGetOp1()->OperIs(GT_LCL_VAR))
-            {
-                lcl = actualArg->gtGetOp1()->gtGetOp1()->AsLclVar();
-            }
-        }
-        else if (actualArg->OperGet() == GT_LCL_VAR)
-        {
-            lcl = actualArg->AsLclVar();
-        }
-        if ((lcl != nullptr) && (lvaGetPromotionType(lcl->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT))
+        if (argNode->OperIs(GT_LCL_VAR) &&
+            (lvaGetPromotionType(argNode->AsLclVar()->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT))
         {
             // TODO-Arm-CQ: support decomposing "large" promoted structs into field lists.
-            if (!arg->AbiInfo.IsSplit() &&
-                (argNode->OperIs(GT_LCL_VAR) ||
-                 ClassLayout::AreCompatible(argNode->AsObj()->GetLayout(), lvaGetDesc(lcl)->GetLayout())))
+            if (!arg->AbiInfo.IsSplit())
             {
-                argNode = fgMorphLclArgToFieldlist(lcl);
+                argNode = fgMorphLclArgToFieldlist(argNode->AsLclVar());
             }
             else
             {
                 // Set DNER to block independent promotion.
-                lvaSetVarDoNotEnregister(lcl->GetLclNum() DEBUGARG(DoNotEnregisterReason::IsStructArg));
+                lvaSetVarDoNotEnregister(argNode->AsLclVar()->GetLclNum() DEBUGARG(DoNotEnregisterReason::IsStructArg));
             }
         }
+        else if (argNode->OperIs(GT_LCL_FLD))
+        {
+            lvaSetVarDoNotEnregister(argNode->AsLclFld()->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField));
+        }
 
         return argNode;
     }
@@ -3907,7 +3837,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg)
                 newArg->AddField(this, lclFld, offset, lclFld->TypeGet());
             }
 
-            // Set DNER to block independent promotion.
             lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::LocalField));
         }
         else
@@ -3916,16 +3845,8 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg)
 
             GenTree*  baseAddr = argNode->AsIndir()->Addr();
             var_types addrType = baseAddr->TypeGet();
+            newArg             = new (this, GT_FIELD_LIST) GenTreeFieldList();
 
-            // TODO-ADDR: make sure all such OBJs are transformed into TYP_STRUCT LCL_FLDs and delete this condition.
-            GenTreeLclVarCommon* lclSrcNode = baseAddr->IsLocalAddrExpr();
-            if (lclSrcNode != nullptr)
-            {
-                // Set DNER to block independent promotion.
-                lvaSetVarDoNotEnregister(lclSrcNode->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField));
-            }
-
-            newArg = new (this, GT_FIELD_LIST) GenTreeFieldList();
             for (unsigned inx = 0; inx < elemCount; inx++)
             {
                 unsigned offset  = elems[inx].Offset;
@@ -4678,7 +4599,7 @@ GenTree* Compiler::fgMorphLocal(GenTreeLclVarCommon* lclNode)
 
     if (lclNode->OperIs(GT_LCL_VAR))
     {
-        return fgMorphLocalVar(lclNode, /* forceRemorph */ false);
+        return fgMorphLocalVar(lclNode);
     }
 
     if (lvaGetDesc(lclNode)->IsAddressExposed())
@@ -4857,7 +4778,7 @@ GenTree* Compiler::fgMorphExpandImplicitByRefArg(GenTreeLclVarCommon* lclNode)
  *  Transform the given GT_LCL_VAR tree for code generation.
  */
 
-GenTree* Compiler::fgMorphLocalVar(GenTree* tree, bool forceRemorph)
+GenTree* Compiler::fgMorphLocalVar(GenTree* tree)
 {
     assert(tree->OperIs(GT_LCL_VAR));
 
@@ -4869,7 +4790,7 @@ GenTree* Compiler::fgMorphLocalVar(GenTree* tree, bool forceRemorph)
     }
 
     // If not during the global morphing phase bail.
-    if (!fgGlobalMorph && !forceRemorph)
+    if (!fgGlobalMorph)
     {
         return tree;
     }
@@ -8700,26 +8621,23 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
         return nullptr;
     }
 
-    var_types asgType = TYP_UNDEF;
-    GenTree*  asg     = tree;
-    GenTree*  dest    = asg->gtGetOp1();
-    GenTree*  src     = asg->gtGetOp2();
+    var_types  asgType    = TYP_UNDEF;
+    GenTree*   asg        = tree;
+    GenTree*   dest       = asg->gtGetOp1();
+    GenTree*   src        = asg->gtGetOp2();
+    LclVarDsc* destVarDsc = nullptr;
     assert((src == src->gtEffectiveVal()) && (dest == dest->gtEffectiveVal()));
 
-    GenTree* destLclVarTree = nullptr;
-    if (dest->OperIsBlk() && impIsAddressInLocal(dest->AsBlk()->Addr(), &destLclVarTree))
+    if (dest->OperIs(GT_LCL_FLD))
     {
-        unsigned   destLclNum = destLclVarTree->AsLclVar()->GetLclNum();
-        LclVarDsc* destVarDsc = lvaGetDesc(destLclNum);
-        asgType               = destVarDsc->TypeGet();
+        destVarDsc = lvaGetDesc(dest->AsLclFld());
+        asgType    = destVarDsc->TypeGet();
 
         // We will use the dest local directly.
-        if (!varTypeIsIntegralOrI(asgType) || (dest->AsBlk()->Size() != genTypeSize(asgType)))
+        if (!varTypeIsIntegralOrI(asgType) || (dest->AsLclFld()->GetSize() != genTypeSize(asgType)))
         {
             return nullptr;
         }
-
-        dest = destLclVarTree;
     }
     else
     {
@@ -8732,14 +8650,12 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
         return nullptr;
     }
 
-    GenTree* srcLclVarTree = nullptr;
-    if (src->OperIsIndir() && impIsAddressInLocal(src->AsIndir()->Addr(), &srcLclVarTree) &&
-        srcLclVarTree->TypeIs(asgType))
+    if (src->OperIs(GT_LCL_FLD) && (lvaGetDesc(src->AsLclFld())->TypeGet() == asgType))
     {
-        assert(srcLclVarTree->OperIs(GT_LCL_VAR));
-        src = srcLclVarTree;
+        src->SetOper(GT_LCL_VAR);
+        src->ChangeType(asgType);
     }
-    if (src->OperIs(GT_LCL_VAR) && lvaGetDesc(src->AsLclVar())->lvPromoted)
+    else if (src->OperIs(GT_LCL_VAR) && lvaGetDesc(src->AsLclVar())->lvPromoted)
     {
         // Leave handling these to block morphing.
         return nullptr;
@@ -8750,7 +8666,8 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
     // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
     // have done that normalization.  If we're now making it into an assignment,
     // the NormalizeOnStore will work, and it can be a full def.
-    assert(dest->OperIs(GT_LCL_VAR));
+    dest->SetOper(GT_LCL_VAR);
+    dest->ChangeType(destVarDsc->lvNormalizeOnLoad() ? asgType : genActualType(asgType));
     dest->gtFlags &= ~GTF_VAR_USEASG;
 
     // Retype the RHS.
@@ -9446,7 +9363,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optA
         case GT_RETURN:
             if (!tree->TypeIs(TYP_VOID))
             {
-                if (op1->OperIs(GT_OBJ, GT_BLK, GT_IND))
+                if (op1->OperIs(GT_LCL_FLD))
                 {
                     op1 = fgMorphRetInd(tree->AsUnOp());
                 }
@@ -9922,8 +9839,6 @@ DONE_MORPHING_CHILDREN:
      * Perform the required oper-specific postorder morphing
      */
 
-    GenTree* temp;
-
     switch (oper)
     {
         case GT_ASG:
@@ -10271,125 +10186,27 @@ DONE_MORPHING_CHILDREN:
                 break;
             }
 
-            // Can not remove a GT_IND if it is currently a CSE candidate.
-            if (gtIsActiveCSE_Candidate(tree))
+            if (op1->IsIconHandle(GTF_ICON_OBJ_HDL))
             {
-                break;
+                tree->gtFlags |= (GTF_IND_INVARIANT | GTF_IND_NONFAULTING | GTF_IND_NONNULL);
             }
 
-            bool foldAndReturnTemp = false;
-            temp                   = nullptr;
-
-            // Don't remove a volatile GT_IND, even if the address points to a local variable.
-            //
-            if (!tree->AsIndir()->IsVolatile())
-            {
-                if (op1->IsIconHandle(GTF_ICON_OBJ_HDL))
-                {
-                    tree->gtFlags |= (GTF_IND_INVARIANT | GTF_IND_NONFAULTING | GTF_IND_NONNULL);
-                }
-
-                /* Try to Fold *(&X) into X */
-                if (op1->gtOper == GT_ADDR)
-                {
-                    if (gtIsActiveCSE_Candidate(op1))
-                    {
-                        break;
-                    }
-
-                    temp = op1->AsOp()->gtOp1; // X
-
-                    if ((typ == temp->TypeGet()) && (typ != TYP_STRUCT))
-                    {
-                        foldAndReturnTemp = true;
-                    }
-                }
-                else
-                {
 #ifdef TARGET_ARM
-                    GenTree* effOp1 = op1->gtEffectiveVal(true);
-                    // Check for a misalignment floating point indirection.
-                    if (effOp1->OperIs(GT_ADD) && varTypeIsFloating(typ))
-                    {
-                        GenTree* addOp2 = effOp1->gtGetOp2();
-                        if (addOp2->IsCnsIntOrI())
-                        {
-                            ssize_t offset = addOp2->AsIntCon()->gtIconVal;
-                            if ((offset % emitTypeSize(TYP_FLOAT)) != 0)
-                            {
-                                tree->gtFlags |= GTF_IND_UNALIGNED;
-                            }
-                        }
-                    }
-#endif // TARGET_ARM
-                }
-            }
-
-            // At this point we may have a lclVar or lclFld of some mismatched type. In this case, we will change
-            // the lclVar or lclFld into a lclFld of the appropriate type if doing so is legal. The only cases in
-            // which this transformation is illegal is when we have a STRUCT indirection, as we do not have the
-            // necessary layout information, or if the load would extend beyond the local.
-            if ((temp != nullptr) && !foldAndReturnTemp)
+            GenTree* effOp1 = op1->gtEffectiveVal(true);
+            // Check for a misalignment floating point indirection.
+            if (effOp1->OperIs(GT_ADD) && varTypeIsFloating(typ))
             {
-                assert(temp->OperIs(GT_LCL_VAR, GT_LCL_FLD));
-
-                unsigned lclNum  = temp->AsLclVarCommon()->GetLclNum();
-                unsigned lclOffs = temp->AsLclVarCommon()->GetLclOffs();
-
-                // Make sure we do not enregister this lclVar.
-                lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::LocalField));
-
-                if ((typ != TYP_STRUCT) && ((lclOffs + genTypeSize(typ)) <= lvaLclExactSize(lclNum)))
+                GenTree* addOp2 = effOp1->gtGetOp2();
+                if (addOp2->IsCnsIntOrI())
                 {
-                    // We will change the type of the node to match the original GT_IND type.
-                    //
-                    temp->gtType = typ;
-
-                    if (temp->OperIs(GT_LCL_VAR))
+                    ssize_t offset = addOp2->AsIntCon()->gtIconVal;
+                    if ((offset % emitTypeSize(TYP_FLOAT)) != 0)
                     {
-                        temp->ChangeOper(GT_LCL_FLD);
+                        tree->gtFlags |= GTF_IND_UNALIGNED;
                     }
-
-                    foldAndReturnTemp = true;
-                }
-            }
-
-            if (foldAndReturnTemp)
-            {
-                assert(temp != nullptr);
-                assert(temp->TypeGet() == typ);
-                assert(op1->OperIs(GT_ADDR));
-
-                // Copy the value of GTF_DONT_CSE from the original tree to `temp`: it can be set for
-                // 'temp' because a GT_ADDR always marks it for its operand.
-                temp->gtFlags &= ~GTF_DONT_CSE;
-                temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
-                temp->SetVNsFromNode(tree);
-
-                DEBUG_DESTROY_NODE(op1);  // GT_ADDR
-                DEBUG_DESTROY_NODE(tree); // GT_IND
-
-                // If the result of the fold is a local var, we may need to perform further adjustments e.g. for
-                // normalization.
-                if (temp->OperIs(GT_LCL_VAR))
-                {
-#ifdef DEBUG
-                    // We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear
-                    // and the node in question must have this bit set (as it has already been morphed).
-                    temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
-#endif // DEBUG
-                    const bool forceRemorph = true;
-                    temp                    = fgMorphLocalVar(temp, forceRemorph);
-#ifdef DEBUG
-                    // We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the
-                    // caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function
-                    // returns.
-                    temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
-#endif // DEBUG
                 }
-
-                return temp;
             }
+#endif // TARGET_ARM
 
             // Only do this optimization when we are in the global optimizer. Doing this after value numbering
             // could result in an invalid value number for the newly generated GT_IND node.
@@ -10453,9 +10270,8 @@ DONE_MORPHING_CHILDREN:
                 commaNode->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
                 return tree;
             }
-
-            break;
         }
+        break;
 
         case GT_ADDR:
             // Can not remove a GT_ADDR if it is currently a CSE candidate.
@@ -12054,10 +11870,10 @@ GenTree* Compiler::fgPropagateCommaThrow(GenTree* parent, GenTreeOp* commaThrow,
 }
 
 //----------------------------------------------------------------------------------------------
-// fgMorphRetInd: Try to get rid of extra IND(ADDR()) pairs in a return tree.
+// fgMorphRetInd: Try to get rid of extra local indirections in a return tree.
 //
 // Arguments:
-//    node - The return node that uses an indirection.
+//    node - The return node that uses an local field.
 //
 // Return Value:
 //    the original op1 of the ret if there was no optimization or an optimized new op1.
@@ -12065,61 +11881,45 @@ GenTree* Compiler::fgPropagateCommaThrow(GenTree* parent, GenTreeOp* commaThrow,
 GenTree* Compiler::fgMorphRetInd(GenTreeUnOp* ret)
 {
     assert(ret->OperIs(GT_RETURN));
-    assert(ret->gtGetOp1()->OperIs(GT_IND, GT_BLK, GT_OBJ));
-    GenTreeIndir* ind  = ret->gtGetOp1()->AsIndir();
-    GenTree*      addr = ind->Addr();
+    assert(ret->gtGetOp1()->OperIs(GT_LCL_FLD));
+    GenTreeLclFld* lclFld = ret->gtGetOp1()->AsLclFld();
+    unsigned       lclNum = lclFld->GetLclNum();
 
-    if (addr->OperIs(GT_ADDR) && addr->gtGetOp1()->OperIs(GT_LCL_VAR))
+    if (fgGlobalMorph && varTypeIsStruct(lclFld) && !lvaIsImplicitByRefLocal(lclNum))
     {
-        // If `return` retypes LCL_VAR as a smaller struct it should not set `doNotEnregister` on that
-        // LclVar.
-        // Example: in `Vector128:AsVector2` we have RETURN SIMD8(OBJ SIMD8(ADDR byref(LCL_VAR SIMD16))).
-        GenTreeLclVar* lclVar = addr->gtGetOp1()->AsLclVar();
-
-        if (!lvaIsImplicitByRefLocal(lclVar->GetLclNum()))
-        {
-            assert(!gtIsActiveCSE_Candidate(addr) && !gtIsActiveCSE_Candidate(ind));
-
-            LclVarDsc* varDsc     = lvaGetDesc(lclVar);
-            unsigned   indSize    = ind->Size();
-            unsigned   lclVarSize = lvaLclExactSize(lclVar->GetLclNum());
+        LclVarDsc* varDsc     = lvaGetDesc(lclNum);
+        unsigned   indSize    = lclFld->GetSize();
+        unsigned   lclVarSize = lvaLclExactSize(lclNum);
 
-            // TODO: change conditions in `canFold` to `indSize <= lclVarSize`, but currently do not support `BITCAST
-            // int<-SIMD16` etc.
-            assert((indSize <= lclVarSize) || varDsc->lvDoNotEnregister);
+        // TODO: change conditions in `canFold` to `indSize <= lclVarSize`, but currently do not support `BITCAST
+        // int<-SIMD16` etc. Note this will also require the offset of the field to be zero.
+        assert(indSize <= lclVarSize);
 
 #if defined(TARGET_64BIT)
-            bool canFold = (indSize == lclVarSize);
+        bool canFold = (indSize == lclVarSize);
 #else // !TARGET_64BIT
-            // TODO: improve 32 bit targets handling for LONG returns if necessary, nowadays we do not support `BITCAST
-            // long<->double` there.
-            bool canFold = (indSize == lclVarSize) && (lclVarSize <= REGSIZE_BYTES);
+        // TODO: improve 32 bit targets handling for LONG returns if necessary, nowadays we do not support `BITCAST
+        // long<->double` there.
+        bool canFold = (indSize == lclVarSize) && (lclVarSize <= REGSIZE_BYTES);
 #endif
-            // TODO: support `genReturnBB != nullptr`, it requires #11413 to avoid `Incompatible types for
-            // gtNewTempAssign`.
-            if (canFold && (genReturnBB == nullptr))
-            {
-                // Fold (TYPE1)*(&(TYPE2)x) even if types do not match, lowering will handle it.
-                // Getting rid of this IND(ADDR()) pair allows to keep lclVar as not address taken
-                // and enregister it.
-                DEBUG_DESTROY_NODE(ind);
-                DEBUG_DESTROY_NODE(addr);
-                ret->gtOp1 = lclVar;
-                // We use GTF_DONT_CSE as an "is under GT_ADDR" check. We can
-                // get rid of it now since the GT_RETURN node should never have
-                // its address taken.
-                assert((ret->gtFlags & GTF_DONT_CSE) == 0);
-                lclVar->gtFlags &= ~GTF_DONT_CSE;
-                return lclVar;
-            }
-            else if (!varDsc->lvDoNotEnregister)
-            {
-                lvaSetVarDoNotEnregister(lclVar->GetLclNum() DEBUGARG(DoNotEnregisterReason::BlockOpRet));
-            }
+
+        // TODO: support `genReturnBB != nullptr`, it requires #11413 to avoid `Incompatible types for
+        // gtNewTempAssign`.
+        if (canFold && (genReturnBB == nullptr))
+        {
+            // Fold even if types do not match, lowering will handle it. This allows the local
+            // to remain DNER-free and be enregistered.
+            assert(lclFld->GetLclOffs() == 0);
+            lclFld->ChangeType(varDsc->TypeGet());
+            lclFld->SetOper(GT_LCL_VAR);
+        }
+        else if (!varDsc->lvDoNotEnregister)
+        {
+            lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::BlockOpRet));
         }
     }
 
-    return ind;
+    return lclFld;
 }
 
 #ifdef _PREFAST_
index 5bffde1..e4fff99 100644 (file)
@@ -135,7 +135,7 @@ GenTree* MorphInitBlockHelper::Morph()
     if (m_transformationDecision == BlockTransformation::Undefined)
     {
         GenTree* oneAsgTree = nullptr;
-        if (!m_initBlock && (m_dst != m_dstLclNode))
+        if (!m_initBlock)
         {
             oneAsgTree = m_comp->fgMorphOneAsgBlockOp(m_asg);
         }
@@ -176,10 +176,6 @@ GenTree* MorphInitBlockHelper::Morph()
 // PrepareDst: Transform the asg destination to an appropriate form and initialize member fields
 //    with information about it.
 //
-// Notes:
-//    When assertion propagation is enabled this method kills assertions about the dst local,
-//    so the correctness depends on `IsLocalAddrExpr` recognizing all patterns.
-//
 void MorphInitBlockHelper::PrepareDst()
 {
     GenTree* origDst = m_asg->gtGetOp1();
@@ -202,69 +198,34 @@ void MorphInitBlockHelper::PrepareDst()
     {
         m_dstLclNode   = m_dst->AsLclVarCommon();
         m_dstLclOffset = m_dstLclNode->GetLclOffs();
+        m_dstLclNum    = m_dstLclNode->GetLclNum();
+        m_dstVarDsc    = m_comp->lvaGetDesc(m_dstLclNum);
+
+        assert((m_dstVarDsc->TypeGet() != TYP_STRUCT) ||
+               (m_dstVarDsc->GetLayout()->GetSize() == m_dstVarDsc->lvExactSize));
 
-        if (m_dst->TypeIs(TYP_STRUCT))
+        // Kill everything about m_dstLclNum (and its field locals)
+        if (m_comp->optLocalAssertionProp && (m_comp->optAssertionCount > 0))
         {
-            m_blockLayout = m_dstLclNode->GetLayout(m_comp);
+            m_comp->fgKillDependentAssertions(m_dstLclNum DEBUGARG(m_asg));
         }
     }
     else
     {
         assert(m_dst == m_dst->gtEffectiveVal() && "the commas were skipped in MorphBlock");
         assert(m_dst->OperIs(GT_IND, GT_BLK, GT_OBJ) && (!m_dst->OperIs(GT_IND) || !m_dst->TypeIs(TYP_STRUCT)));
-
-        GenTree* dstAddr = m_dst->AsIndir()->Addr();
-        noway_assert(dstAddr->TypeIs(TYP_BYREF, TYP_I_IMPL));
-
-        ssize_t dstLclOffset = 0;
-        if (dstAddr->DefinesLocalAddr(&m_dstLclNode, &dstLclOffset))
-        {
-            // Treat out-of-bounds access to locals opaquely to simplify downstream logic.
-            unsigned dstLclSize = m_comp->lvaLclExactSize(m_dstLclNode->GetLclNum());
-
-            if (!FitsIn<unsigned>(dstLclOffset) ||
-                ((static_cast<uint64_t>(dstLclOffset) + m_dst->AsIndir()->Size()) > dstLclSize))
-            {
-                assert(m_comp->lvaGetDesc(m_dstLclNode)->IsAddressExposed());
-                m_dstLclNode = nullptr;
-            }
-            else
-            {
-                m_dstLclOffset = static_cast<unsigned>(dstLclOffset);
-            }
-        }
-
-        if (m_dst->TypeIs(TYP_STRUCT))
-        {
-            m_blockLayout = m_dst->AsBlk()->GetLayout();
-        }
     }
 
     if (m_dst->TypeIs(TYP_STRUCT))
     {
-        m_blockSize = m_blockLayout->GetSize();
+        m_blockLayout = m_dst->GetLayout(m_comp);
+        m_blockSize   = m_blockLayout->GetSize();
     }
     else
     {
-        assert(m_blockLayout == nullptr);
         m_blockSize = genTypeSize(m_dst);
     }
 
-    if (m_dstLclNode != nullptr)
-    {
-        m_dstLclNum = m_dstLclNode->GetLclNum();
-        m_dstVarDsc = m_comp->lvaGetDesc(m_dstLclNum);
-
-        assert((m_dstVarDsc->TypeGet() != TYP_STRUCT) ||
-               (m_dstVarDsc->GetLayout()->GetSize() == m_dstVarDsc->lvExactSize));
-
-        // Kill everything about m_dstLclNum (and its field locals)
-        if (m_comp->optLocalAssertionProp && (m_comp->optAssertionCount > 0))
-        {
-            m_comp->fgKillDependentAssertions(m_dstLclNum DEBUGARG(m_asg));
-        }
-    }
-
 #if defined(DEBUG)
     if (m_comp->verbose)
     {
@@ -362,13 +323,9 @@ void MorphInitBlockHelper::MorphStructCases()
 
         if (m_dstVarDsc != nullptr)
         {
-            if (m_dst != m_dstLclNode)
+            if (m_dst->OperIs(GT_LCL_FLD))
             {
-                // If we access the dst as a whole but not directly, for example, with OBJ(ADDR(LCL_VAR))
-                // then set doNotEnreg.
-                // TODO-1stClassStructs: remove it when we can represent narowing struct cast
-                // without taking address of the lcl.
-                m_comp->lvaSetVarDoNotEnregister(m_dstLclNum DEBUGARG(DoNotEnregisterReason::CastTakesAddr));
+                m_comp->lvaSetVarDoNotEnregister(m_dstLclNum DEBUGARG(DoNotEnregisterReason::LocalField));
             }
             else if (m_dstVarDsc->lvPromoted)
             {
@@ -792,31 +749,8 @@ void MorphCopyBlockHelper::PrepareSrc()
     {
         m_srcLclNode   = m_src->AsLclVarCommon();
         m_srcLclOffset = m_srcLclNode->GetLclOffs();
-    }
-    else if (m_src->OperIsIndir())
-    {
-        ssize_t srcLclOffset = 0;
-        if (m_src->AsIndir()->Addr()->DefinesLocalAddr(&m_srcLclNode, &srcLclOffset))
-        {
-            // Treat out-of-bounds access to locals opaquely to simplify downstream logic.
-            unsigned srcLclSize = m_comp->lvaLclExactSize(m_srcLclNode->GetLclNum());
-
-            if (!FitsIn<unsigned>(srcLclOffset) || ((static_cast<uint64_t>(srcLclOffset) + m_blockSize) > srcLclSize))
-            {
-                assert(m_comp->lvaGetDesc(m_srcLclNode)->IsAddressExposed());
-                m_srcLclNode = nullptr;
-            }
-            else
-            {
-                m_srcLclOffset = static_cast<unsigned>(srcLclOffset);
-            }
-        }
-    }
-
-    if (m_srcLclNode != nullptr)
-    {
-        m_srcLclNum = m_srcLclNode->GetLclNum();
-        m_srcVarDsc = m_comp->lvaGetDesc(m_srcLclNum);
+        m_srcLclNum    = m_srcLclNode->GetLclNum();
+        m_srcVarDsc    = m_comp->lvaGetDesc(m_srcLclNum);
     }
 
     // Verify that the types on the LHS and RHS match and morph away "IND<struct>" nodes.
@@ -1131,10 +1065,9 @@ void MorphCopyBlockHelper::MorphStructCases()
     //
     if (!m_dstDoFldAsg && (m_dstVarDsc != nullptr) && !m_dstSingleLclVarAsg)
     {
-        if (m_dst != m_dstLclNode)
+        if (m_dst->OperIs(GT_LCL_FLD))
         {
-            // Mark it as DoNotEnregister.
-            m_comp->lvaSetVarDoNotEnregister(m_dstLclNum DEBUGARG(DoNotEnregisterReason::CastTakesAddr));
+            m_comp->lvaSetVarDoNotEnregister(m_dstLclNum DEBUGARG(DoNotEnregisterReason::LocalField));
         }
         else if (m_dstVarDsc->lvPromoted)
         {
@@ -1145,9 +1078,9 @@ void MorphCopyBlockHelper::MorphStructCases()
 
     if (!m_srcDoFldAsg && (m_srcVarDsc != nullptr) && !m_srcSingleLclVarAsg)
     {
-        if (m_src != m_srcLclNode)
+        if (m_src->OperIs(GT_LCL_FLD))
         {
-            m_comp->lvaSetVarDoNotEnregister(m_srcLclNum DEBUGARG(DoNotEnregisterReason::CastTakesAddr));
+            m_comp->lvaSetVarDoNotEnregister(m_srcLclNum DEBUGARG(DoNotEnregisterReason::LocalField));
         }
         else if (m_srcVarDsc->lvPromoted)
         {