Don't require BLK nodes for SIMD
authorCarol Eidt <carol.eidt@microsoft.com>
Sat, 17 Nov 2018 01:48:46 +0000 (17:48 -0800)
committerCarol Eidt <carol.eidt@microsoft.com>
Tue, 11 Dec 2018 00:28:42 +0000 (16:28 -0800)
Eliminate most cases where an OBJ or BLK node is required for SIMD
values. The exception is the case where a value produced by an intrinsic
(SIMD or HWIntrinsic) is used as an argument but the argument is of a
different SIMD type (e.g. a different baseType),

Commit migrated from https://github.com/dotnet/coreclr/commit/d57aca4b2f64942724bbfc0e40db6a125848df47

src/coreclr/src/jit/assertionprop.cpp
src/coreclr/src/jit/compiler.h
src/coreclr/src/jit/flowgraph.cpp
src/coreclr/src/jit/gentree.cpp
src/coreclr/src/jit/importer.cpp
src/coreclr/src/jit/lsraxarch.cpp
src/coreclr/src/jit/morph.cpp

index 91892b4..2968820 100644 (file)
@@ -2646,12 +2646,15 @@ GenTree* Compiler::optConstantAssertionProp(AssertionDsc* curAssertion,
 #ifdef FEATURE_SIMD
                 if (varTypeIsSIMD(tree))
                 {
-                    var_types simdType = tree->TypeGet();
-                    tree->ChangeOperConst(GT_CNS_DBL);
-                    GenTree* initVal = tree;
-                    initVal->gtType  = TYP_FLOAT;
-                    newTree =
-                        gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, TYP_FLOAT, genTypeSize(simdType));
+                    LclVarDsc* varDsc   = lvaGetDesc(lclNum);
+                    var_types  simdType = tree->TypeGet();
+                    assert(varDsc->TypeGet() == simdType);
+                    var_types baseType = varDsc->lvBaseType;
+                    newTree            = gtGetSIMDZero(simdType, baseType, varDsc->lvVerTypeInfo.GetClassHandle());
+                    if (newTree == nullptr)
+                    {
+                        return nullptr;
+                    }
                 }
                 else
 #endif // FEATURE_SIMD
index ae23da1..0186720 100644 (file)
@@ -7619,6 +7619,9 @@ private:
 
     SIMDHandlesCache* m_simdHandleCache;
 
+    // Get an appropriate "zero" for the given type and class handle.
+    GenTree* gtGetSIMDZero(var_types simdType, var_types baseType, CORINFO_CLASS_HANDLE simdHandle);
+
     // Get the handle for a SIMD type.
     CORINFO_CLASS_HANDLE gtGetStructHandleForSIMD(var_types simdType, var_types simdBaseType)
     {
index b3f8994..64b9bfb 100644 (file)
@@ -23274,7 +23274,7 @@ GenTree* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
                     const var_types argType = lclVarInfo[argNum].lclTypeInfo;
 
                     // Create the temp assignment for this argument
-                    CORINFO_CLASS_HANDLE structHnd = DUMMY_INIT(0);
+                    CORINFO_CLASS_HANDLE structHnd = NO_CLASS_HANDLE;
 
                     if (varTypeIsStruct(argType))
                     {
index 6d36b0b..d2297db 100644 (file)
@@ -4388,6 +4388,15 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
 
 DONE:
 
+#ifdef FEATURE_HW_INTRINSICS
+    if ((oper == GT_HWIntrinsic) && (tree->gtGetOp1() == nullptr))
+    {
+        // We can have nullary HWIntrinsic nodes, and we must have non-zero cost.
+        costEx = 1;
+        costSz = 1;
+    }
+#endif // FEATURE_HW_INTRINSICS
+
     // Some path through this function must have set the costs.
     assert(costEx != -1);
     assert(costSz != -1);
@@ -14468,14 +14477,17 @@ GenTree* Compiler::gtNewTempAssign(
     CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(val);
     if (varTypeIsStruct(valTyp) && ((structHnd != NO_CLASS_HANDLE) || (varTypeIsSIMD(valTyp))))
     {
-        // The GT_OBJ may be be a child of a GT_COMMA.
+        // The struct value may be be a child of a GT_COMMA.
         GenTree* valx = val->gtEffectiveVal(/*commaOnly*/ true);
 
-        if (valx->gtOper == GT_OBJ)
+        if (structHnd != NO_CLASS_HANDLE)
         {
-            assert(structHnd != nullptr);
             lvaSetStruct(tmp, structHnd, false);
         }
+        else
+        {
+            assert(valx->gtOper != GT_OBJ);
+        }
         dest->gtFlags |= GTF_DONT_CSE;
         valx->gtFlags |= GTF_DONT_CSE;
         asg = impAssignStruct(dest, val, structHnd, (unsigned)CHECK_SPILL_NONE, pAfterStmt, ilOffset, block);
@@ -16179,6 +16191,232 @@ bool Compiler::gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_
     return fieldTyp != TYP_REF;
 }
 
+#ifdef FEATURE_SIMD
+//------------------------------------------------------------------------
+// gtGetSIMDZero: Get a zero value of the appropriate SIMD type.
+//
+// Arguments:
+//    var_types - The simdType
+//    baseType  - The base type we need
+//    simdHandle - The handle for the SIMD type
+//
+// Return Value:
+//    A node generating the appropriate Zero, if we are able to discern it,
+//    otherwise null (note that this shouldn't happen, but callers should
+//    be tolerant of this case).
+
+GenTree* Compiler::gtGetSIMDZero(var_types simdType, var_types baseType, CORINFO_CLASS_HANDLE simdHandle)
+{
+    bool found    = false;
+    bool isHWSIMD = true;
+    noway_assert(m_simdHandleCache != nullptr);
+
+    // First, determine whether this is Vector<T>.
+    if (simdType == getSIMDVectorType())
+    {
+        switch (baseType)
+        {
+            case TYP_FLOAT:
+                found = (simdHandle == m_simdHandleCache->SIMDFloatHandle);
+                break;
+            case TYP_DOUBLE:
+                found = (simdHandle == m_simdHandleCache->SIMDDoubleHandle);
+                break;
+            case TYP_INT:
+                found = (simdHandle == m_simdHandleCache->SIMDIntHandle);
+                break;
+            case TYP_USHORT:
+                found = (simdHandle == m_simdHandleCache->SIMDUShortHandle);
+                break;
+            case TYP_UBYTE:
+                found = (simdHandle == m_simdHandleCache->SIMDUByteHandle);
+                break;
+            case TYP_SHORT:
+                found = (simdHandle == m_simdHandleCache->SIMDShortHandle);
+                break;
+            case TYP_BYTE:
+                found = (simdHandle == m_simdHandleCache->SIMDByteHandle);
+                break;
+            case TYP_LONG:
+                found = (simdHandle == m_simdHandleCache->SIMDLongHandle);
+                break;
+            case TYP_UINT:
+                found = (simdHandle == m_simdHandleCache->SIMDUIntHandle);
+                break;
+            case TYP_ULONG:
+                found = (simdHandle == m_simdHandleCache->SIMDULongHandle);
+                break;
+            default:
+                break;
+        }
+        if (found)
+        {
+            isHWSIMD = false;
+        }
+    }
+
+    if (!found)
+    {
+        // We must still have isHWSIMD set to true, and the only non-HW types left are the fixed types.
+        switch (simdType)
+        {
+            case TYP_SIMD8:
+                switch (baseType)
+                {
+                    case TYP_FLOAT:
+                        if (simdHandle == m_simdHandleCache->SIMDVector2Handle)
+                        {
+                            isHWSIMD = false;
+                        }
+#if defined(_TARGET_ARM64_)
+                        else
+                        {
+                            assert(simdHandle == m_simdHandleCache->Vector64FloatHandle);
+                        }
+                        break;
+                    case TYP_INT:
+                        assert(simdHandle == m_simdHandleCache->Vector64IntHandle);
+                        break;
+                    case TYP_USHORT:
+                        assert(simdHandle == m_simdHandleCache->Vector64UShortHandle);
+                        break;
+                    case TYP_UBYTE:
+                        assert(simdHandle == m_simdHandleCache->Vector64UByteHandle);
+                        break;
+                    case TYP_SHORT:
+                        assert(simdHandle == m_simdHandleCache->Vector64ShortHandle);
+                        break;
+                    case TYP_BYTE:
+                        assert(simdHandle == m_simdHandleCache->Vector64ByteHandle);
+                        break;
+                    case TYP_UINT:
+                        assert(simdHandle == m_simdHandleCache->Vector64UIntHandle);
+                        break;
+#endif // defined(_TARGET_ARM64_)
+                    default:
+                        break;
+                }
+                break;
+
+            case TYP_SIMD12:
+                assert((baseType == TYP_FLOAT) && (simdHandle == m_simdHandleCache->SIMDVector3Handle));
+                isHWSIMD = false;
+                break;
+
+            case TYP_SIMD16:
+                switch (baseType)
+                {
+                    case TYP_FLOAT:
+                        if (simdHandle == m_simdHandleCache->SIMDVector4Handle)
+                        {
+                            isHWSIMD = false;
+                        }
+                        else
+                        {
+                            assert(simdHandle == m_simdHandleCache->Vector128FloatHandle);
+                        }
+                        break;
+                    case TYP_DOUBLE:
+                        assert(simdHandle == m_simdHandleCache->Vector128DoubleHandle);
+                        break;
+                    case TYP_INT:
+                        assert(simdHandle == m_simdHandleCache->Vector128IntHandle);
+                        break;
+                    case TYP_USHORT:
+                        assert(simdHandle == m_simdHandleCache->Vector128UShortHandle);
+                        break;
+                    case TYP_UBYTE:
+                        assert(simdHandle == m_simdHandleCache->Vector128UByteHandle);
+                        break;
+                    case TYP_SHORT:
+                        assert(simdHandle == m_simdHandleCache->Vector128ShortHandle);
+                        break;
+                    case TYP_BYTE:
+                        assert(simdHandle == m_simdHandleCache->Vector128ByteHandle);
+                        break;
+                    case TYP_LONG:
+                        assert(simdHandle == m_simdHandleCache->Vector128LongHandle);
+                        break;
+                    case TYP_UINT:
+                        assert(simdHandle == m_simdHandleCache->Vector128UIntHandle);
+                        break;
+                    case TYP_ULONG:
+                        assert(simdHandle == m_simdHandleCache->Vector128ULongHandle);
+                        break;
+                    default:
+                        break;
+                }
+                break;
+
+#ifdef _TARGET_XARCH_
+            case TYP_SIMD32:
+                switch (baseType)
+                {
+                    case TYP_FLOAT:
+                        assert(simdHandle == m_simdHandleCache->Vector256FloatHandle);
+                        break;
+                    case TYP_DOUBLE:
+                        assert(simdHandle == m_simdHandleCache->Vector256DoubleHandle);
+                        break;
+                    case TYP_INT:
+                        assert(simdHandle == m_simdHandleCache->Vector256IntHandle);
+                        break;
+                    case TYP_USHORT:
+                        assert(simdHandle == m_simdHandleCache->Vector256UShortHandle);
+                        break;
+                    case TYP_UBYTE:
+                        assert(simdHandle == m_simdHandleCache->Vector256UByteHandle);
+                        break;
+                    case TYP_SHORT:
+                        assert(simdHandle == m_simdHandleCache->Vector256ShortHandle);
+                        break;
+                    case TYP_BYTE:
+                        assert(simdHandle == m_simdHandleCache->Vector256ByteHandle);
+                        break;
+                    case TYP_LONG:
+                        assert(simdHandle == m_simdHandleCache->Vector256LongHandle);
+                        break;
+                    case TYP_UINT:
+                        assert(simdHandle == m_simdHandleCache->Vector256UIntHandle);
+                        break;
+                    case TYP_ULONG:
+                        assert(simdHandle == m_simdHandleCache->Vector256ULongHandle);
+                        break;
+                    default:
+                        break;
+                }
+                break;
+#endif // _TARGET_XARCH_
+            default:
+                break;
+        }
+    }
+
+    unsigned size = genTypeSize(simdType);
+    if (isHWSIMD)
+    {
+#ifdef _TARGET_XARCH_
+        switch (simdType)
+        {
+            case TYP_SIMD16:
+                return gtNewSimdHWIntrinsicNode(simdType, NI_Base_Vector128_Zero, baseType, size);
+            case TYP_SIMD32:
+                return gtNewSimdHWIntrinsicNode(simdType, NI_Base_Vector256_Zero, baseType, size);
+            default:
+                break;
+        }
+#endif // _TARGET_XARCH_
+        JITDUMP("Coudn't find the matching HW intrinsic SIMD type for %s<%s> in gtGetSIMDZero\n", varTypeName(simdType),
+                varTypeName(baseType));
+    }
+    else
+    {
+        return gtNewSIMDVectorZero(simdType, baseType, size);
+    }
+    return nullptr;
+}
+#endif // FEATURE_SIMD
+
 CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
 {
     CORINFO_CLASS_HANDLE structHnd = NO_CLASS_HANDLE;
index 1b101d0..c62660e 100644 (file)
@@ -762,7 +762,8 @@ void Compiler::impAssignTempGen(unsigned             tmpNum,
 {
     GenTree* asg;
 
-    if (varTypeIsStruct(val))
+    assert(val->TypeGet() != TYP_STRUCT || structType != NO_CLASS_HANDLE);
+    if (varTypeIsStruct(val) && (structType != NO_CLASS_HANDLE))
     {
         assert(tmpNum < lvaCount);
         assert(structType != NO_CLASS_HANDLE);
@@ -851,6 +852,20 @@ GenTreeArgList* Compiler::impPopList(unsigned count, CORINFO_SIG_INFO* sig, GenT
             // Morph trees that aren't already OBJs or MKREFANY to be OBJs
             assert(ti.IsType(TI_STRUCT));
             structType = ti.GetClassHandleForValueClass();
+
+            bool forceNormalization = false;
+            if (varTypeIsSIMD(temp))
+            {
+                // We need to ensure that fgMorphArgs will use the correct struct handle to ensure proper
+                // ABI handling of this argument.
+                // Note that this can happen, for example, if we have a SIMD intrinsic that returns a SIMD type
+                // with a different baseType than we've seen.
+                // TODO-Cleanup: Consider whether we can eliminate all of these cases.
+                if (gtGetStructHandleIfPresent(temp) != structType)
+                {
+                    forceNormalization = true;
+                }
+            }
 #ifdef DEBUG
             if (verbose)
             {
@@ -858,7 +873,7 @@ GenTreeArgList* Compiler::impPopList(unsigned count, CORINFO_SIG_INFO* sig, GenT
                 gtDispTree(temp);
             }
 #endif
-            temp = impNormStructVal(temp, structType, (unsigned)CHECK_SPILL_ALL);
+            temp = impNormStructVal(temp, structType, (unsigned)CHECK_SPILL_ALL, forceNormalization);
 #ifdef DEBUG
             if (verbose)
             {
@@ -1014,11 +1029,11 @@ GenTreeArgList* Compiler::impPopRevList(unsigned count, CORINFO_SIG_INFO* sig, u
 }
 
 //------------------------------------------------------------------------
-// impAssignStruct: Assign (copy) the structure from 'src' to 'dest'.
+// impAssignStruct: Create a struct assignment
 //
 // Arguments:
-//    dest         - destination of the assignment
-//    src          - source of the assignment
+//    dest         - the destination of the assignment
+//    src          - the value to be assigned
 //    structHnd    - handle representing the struct type
 //    curLevel     - stack level for which a spill may be being done
 //    pAfterStmt   - statement to insert any additional statements after
@@ -1049,7 +1064,8 @@ GenTree* Compiler::impAssignStruct(GenTree*             dest,
 
     while (dest->gtOper == GT_COMMA)
     {
-        assert(varTypeIsStruct(dest->gtOp.gtOp2)); // Second thing is the struct
+        // Second thing is the struct.
+        assert(varTypeIsStruct(dest->gtOp.gtOp2));
 
         // Append all the op1 of GT_COMMA trees before we evaluate op2 of the GT_COMMA tree.
         if (pAfterStmt)
@@ -1068,10 +1084,10 @@ GenTree* Compiler::impAssignStruct(GenTree*             dest,
     assert(dest->gtOper == GT_LCL_VAR || dest->gtOper == GT_RETURN || dest->gtOper == GT_FIELD ||
            dest->gtOper == GT_IND || dest->gtOper == GT_OBJ || dest->gtOper == GT_INDEX);
 
+    // Return a NOP if this is a self-assignment.
     if (dest->OperGet() == GT_LCL_VAR && src->OperGet() == GT_LCL_VAR &&
         src->gtLclVarCommon.gtLclNum == dest->gtLclVarCommon.gtLclNum)
     {
-        // Make this a NOP
         return gtNewNothingNode();
     }
 
@@ -1127,24 +1143,12 @@ GenTree* Compiler::impAssignStructPtr(GenTree*             destAddr,
         ilOffset = impCurStmtOffs;
     }
 
-#if defined(UNIX_AMD64_ABI)
-    assert(varTypeIsStruct(src) || (src->gtOper == GT_ADDR && src->TypeGet() == TYP_BYREF));
-    // TODO-ARM-BUG: Does ARM need this?
-    // TODO-ARM64-BUG: Does ARM64 need this?
-    assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_OBJ ||
-           src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || src->gtOper == GT_RET_EXPR ||
-           src->gtOper == GT_COMMA || src->gtOper == GT_ADDR ||
-           (src->TypeGet() != TYP_STRUCT &&
-            (GenTree::OperIsSIMD(src->gtOper) || src->OperIsSimdHWIntrinsic() || src->gtOper == GT_LCL_FLD)));
-#else  // !defined(UNIX_AMD64_ABI)
-    assert(varTypeIsStruct(src));
-
     assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_OBJ ||
            src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || src->gtOper == GT_RET_EXPR ||
            src->gtOper == GT_COMMA ||
            (src->TypeGet() != TYP_STRUCT &&
             (GenTree::OperIsSIMD(src->gtOper) || src->OperIsSimdHWIntrinsic() || src->gtOper == GT_LCL_FLD)));
-#endif // !defined(UNIX_AMD64_ABI)
+
     if (destAddr->OperGet() == GT_ADDR)
     {
         GenTree* destNode = destAddr->gtGetOp1();
@@ -1556,9 +1560,22 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd,
     return structType;
 }
 
-//****************************************************************************
-//  Given TYP_STRUCT value 'structVal', make sure it is 'canonical', that is
-//  it is either an OBJ or a MKREFANY node, or a node (e.g. GT_INDEX) that will be morphed.
+//------------------------------------------------------------------------
+//  Compiler::impNormStructVal: Normalize a struct value
+//
+//  Arguments:
+//     structVal          - the node we are going to normalize
+//     structHnd          - the class handle for the node
+//     curLevel           - the current stack level
+//     forceNormalization - Force the creation of an OBJ node (default is false).
+//
+// Notes:
+//     Given struct value 'structVal', make sure it is 'canonical', that is
+//     it is either:
+//     - a known struct type (non-TYP_STRUCT, e.g. TYP_SIMD8)
+//     - an OBJ or a MKREFANY node, or
+//     - a node (e.g. GT_INDEX) that will be morphed.
+//    If the node is a CALL or RET_EXPR, a copy will be made to a new temp.
 //
 GenTree* Compiler::impNormStructVal(GenTree*             structVal,
                                     CORINFO_CLASS_HANDLE structHnd,
@@ -1707,8 +1724,7 @@ GenTree* Compiler::impNormStructVal(GenTree*             structVal,
             noway_assert(!"Unexpected node in impNormStructVal()");
             break;
     }
-    structVal->gtType  = structType;
-    GenTree* structObj = structVal;
+    structVal->gtType = structType;
 
     if (!alreadyNormalized || forceNormalization)
     {
@@ -1721,13 +1737,12 @@ GenTree* Compiler::impNormStructVal(GenTree*             structVal,
             // The structVal is now the temp itself
 
             structLcl = gtNewLclvNode(tmpNum, structType)->AsLclVarCommon();
-            // TODO-1stClassStructs: Avoid always wrapping in GT_OBJ.
-            structObj = gtNewObjNode(structHnd, gtNewOperNode(GT_ADDR, TYP_BYREF, structLcl));
+            structVal = structLcl;
         }
-        else if (varTypeIsStruct(structType) && !structVal->OperIsBlk())
+        if ((forceNormalization || (structType == TYP_STRUCT)) && !structVal->OperIsBlk())
         {
             // Wrap it in a GT_OBJ
-            structObj = gtNewObjNode(structHnd, gtNewOperNode(GT_ADDR, TYP_BYREF, structVal));
+            structVal = gtNewObjNode(structHnd, gtNewOperNode(GT_ADDR, TYP_BYREF, structVal));
         }
     }
 
@@ -1737,15 +1752,15 @@ GenTree* Compiler::impNormStructVal(GenTree*             structVal,
         // so we don't set GTF_EXCEPT here.
         if (!lvaIsImplicitByRefLocal(structLcl->gtLclNum))
         {
-            structObj->gtFlags &= ~GTF_GLOB_REF;
+            structVal->gtFlags &= ~GTF_GLOB_REF;
         }
     }
-    else
+    else if (structVal->OperIsBlk())
     {
         // In general a OBJ is an indirection and could raise an exception.
-        structObj->gtFlags |= GTF_EXCEPT;
+        structVal->gtFlags |= GTF_EXCEPT;
     }
-    return (structObj);
+    return structVal;
 }
 
 /******************************************************************************/
index 774334c..c925bec 100644 (file)
@@ -1894,8 +1894,9 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
             // Mark op1 as contained if it is either zero or int constant of all 1's,
             // or a float constant with 16 or 32 byte simdType (AVX case)
             //
-            // Should never see small int base type vectors except for zero initialization.
-            assert(!varTypeIsSmallInt(simdTree->gtSIMDBaseType) || op1->IsIntegralConst(0));
+            // Note that for small int base types, the initVal has been constructed so that
+            // we can use the full int value.
+            CLANG_FORMAT_COMMENT_ANCHOR;
 
 #if !defined(_TARGET_64BIT_)
             if (op1->OperGet() == GT_LONG)
index 164bf89..cc7cdc1 100644 (file)
@@ -2384,29 +2384,8 @@ void fgArgInfo::EvalArgsToTemps()
 
             if (varTypeIsStruct(defArg))
             {
-                // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
-                GenTree* defArgTmp = defArg;
-
-                // The GT_OBJ may be be a child of a GT_COMMA.
-                while (defArgTmp->gtOper == GT_COMMA)
-                {
-                    defArgTmp = defArgTmp->gtOp.gtOp2;
-                }
-                assert(varTypeIsStruct(defArgTmp));
-
-                // We handle two opcodes: GT_MKREFANY and GT_OBJ.
-                if (defArgTmp->gtOper == GT_MKREFANY)
-                {
-                    clsHnd = compiler->impGetRefAnyClass();
-                }
-                else if (defArgTmp->gtOper == GT_OBJ)
-                {
-                    clsHnd = defArgTmp->AsObj()->gtClass;
-                }
-                else
-                {
-                    BADCODE("Unhandled struct argument tree in fgMorphArgs");
-                }
+                clsHnd = compiler->gtGetStructHandleIfPresent(defArg);
+                noway_assert(clsHnd != NO_CLASS_HANDLE);
             }
 
 #endif // !(defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI))
@@ -3187,24 +3166,34 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
         CORINFO_CLASS_HANDLE objClass = NO_CLASS_HANDLE;
         if (isStructArg)
         {
-            // TODO-1stClassStructs: An OBJ node should not be required for lclVars.
-            if (!actualArg->OperIs(GT_OBJ, GT_MKREFANY))
-            {
-                BADCODE("illegal argument tree in fgInitArgInfo");
-            }
-            if (actualArg->OperIs(GT_OBJ))
+            objClass = gtGetStructHandle(argx);
+            if (argx->TypeGet() == TYP_STRUCT)
             {
-                structSize = actualArg->AsObj()->gtBlkSize;
-                objClass   = actualArg->AsObj()->gtClass;
-                assert(structSize == info.compCompHnd->getClassSize(objClass));
+                // For TYP_STRUCT arguments we must have an OBJ, LCL_VAR or MKREFANY
+                switch (actualArg->OperGet())
+                {
+                    case GT_OBJ:
+                        // Get the size off the OBJ node.
+                        structSize = actualArg->AsObj()->gtBlkSize;
+                        assert(structSize == info.compCompHnd->getClassSize(objClass));
+                        break;
+                    case GT_LCL_VAR:
+                        structSize = lvaGetDesc(actualArg->AsLclVarCommon())->lvExactSize;
+                        break;
+                    case GT_MKREFANY:
+                        structSize = info.compCompHnd->getClassSize(objClass);
+                        break;
+                    default:
+                        BADCODE("illegal argument tree in fgInitArgInfo");
+                        break;
+                }
             }
             else
             {
-                objClass   = impGetRefAnyClass();
-                structSize = info.compCompHnd->getClassSize(objClass);
+                structSize = genTypeSize(argx);
+                assert(structSize == info.compCompHnd->getClassSize(objClass));
             }
         }
-
 #if defined(_TARGET_AMD64_)
 #ifdef UNIX_AMD64_ABI
         if (!isStructArg)
@@ -3265,55 +3254,52 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
 #endif // _TARGET_XXX_
         if (isStructArg)
         {
-            if (!argx->OperIs(GT_MKREFANY))
-            {
-                // We have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
-                GenTree* argObj = argx->gtEffectiveVal(true /*commaOnly*/);
+            // We have an argument with a struct type, but it may be be a child of a GT_COMMA
+            GenTree* argObj = argx->gtEffectiveVal(true /*commaOnly*/);
 
-                assert(args->OperIsList());
-                assert(argx == args->Current());
+            assert(args->OperIsList());
+            assert(argx == args->Current());
 
-                unsigned originalSize = structSize;
-                originalSize          = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
-                unsigned roundupSize  = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
+            unsigned originalSize = structSize;
+            originalSize          = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
+            unsigned roundupSize  = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
 
-                structSize = originalSize;
+            structSize = originalSize;
 
-                structPassingKind howToPassStruct;
+            structPassingKind howToPassStruct;
 
-                structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, callIsVararg, originalSize);
+            structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, callIsVararg, originalSize);
 
-                bool passedInRegisters = false;
-                passStructByRef        = (howToPassStruct == SPK_ByReference);
+            bool passedInRegisters = false;
+            passStructByRef        = (howToPassStruct == SPK_ByReference);
 
-                if (howToPassStruct == SPK_PrimitiveType)
-                {
+            if (howToPassStruct == SPK_PrimitiveType)
+            {
 // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register.
 // For ARM or AMD64/Windows only power-of-2 structs are passed in registers.
 #if !defined(_TARGET_ARM64_) && !defined(UNIX_AMD64_ABI)
-                    if (!isPow2(originalSize))
+                if (!isPow2(originalSize))
 #endif //  !_TARGET_ARM64_ && !UNIX_AMD64_ABI
-                    {
-                        passedInRegisters = true;
-                    }
+                {
+                    passedInRegisters = true;
+                }
 #ifdef _TARGET_ARM_
-                    // TODO-CQ: getArgTypeForStruct should *not* return TYP_DOUBLE for a double struct,
-                    // or for a struct of two floats. This causes the struct to be address-taken.
-                    if (structBaseType == TYP_DOUBLE)
-                    {
-                        size = 2;
-                    }
-                    else
-#endif // _TARGET_ARM_
-                    {
-                        size = 1;
-                    }
+                // TODO-CQ: getArgTypeForStruct should *not* return TYP_DOUBLE for a double struct,
+                // or for a struct of two floats. This causes the struct to be address-taken.
+                if (structBaseType == TYP_DOUBLE)
+                {
+                    size = 2;
                 }
-                else if (passStructByRef)
+                else
+#endif // _TARGET_ARM_
                 {
                     size = 1;
                 }
             }
+            else if (passStructByRef)
+            {
+                size = 1;
+            }
         }
 
         // The 'size' value has now must have been set. (the original value of zero is an invalid value)
@@ -3779,7 +3765,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
 
         unsigned             argAlign     = argEntry->alignment;
         unsigned             size         = argEntry->getSize();
-        CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
+        CORINFO_CLASS_HANDLE copyBlkClass = NO_CLASS_HANDLE;
 
         if (argAlign == 2)
         {
@@ -3817,94 +3803,123 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
         // was a struct and the struct classification.
         bool isStructArg = argEntry->isStruct;
 
-        if (isStructArg)
+        GenTree* argObj = argx->gtEffectiveVal(true /*commaOnly*/);
+        if (isStructArg && varTypeIsStruct(argObj) && !argObj->OperIs(GT_ASG, GT_MKREFANY, GT_FIELD_LIST, GT_ARGPLACE))
         {
-            GenTree* argObj = argx->gtEffectiveVal(true /*commaOnly*/);
-            if (argObj->OperIs(GT_OBJ))
+            CORINFO_CLASS_HANDLE objClass = gtGetStructHandle(argObj);
+            unsigned             originalSize;
+            if (argObj->TypeGet() == TYP_STRUCT)
+            {
+                if (argObj->OperIs(GT_OBJ))
+                {
+                    // Get the size off the OBJ node.
+                    originalSize = argObj->AsObj()->gtBlkSize;
+                    assert(originalSize == info.compCompHnd->getClassSize(objClass));
+                }
+                else
+                {
+                    // We have a BADCODE assert for this in fgInitArgInfo.
+                    assert(argObj->OperIs(GT_LCL_VAR));
+                    originalSize = lvaGetDesc(argObj->AsLclVarCommon())->lvExactSize;
+                }
+            }
+            else
             {
-                CORINFO_CLASS_HANDLE objClass       = argObj->AsObj()->gtClass;
-                unsigned             originalSize   = argObj->AsObj()->gtBlkSize;
-                unsigned             roundupSize    = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
-                var_types            structBaseType = argEntry->argType;
+                originalSize = genTypeSize(argx);
+                assert(originalSize == info.compCompHnd->getClassSize(objClass));
+            }
+            unsigned  roundupSize    = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
+            var_types structBaseType = argEntry->argType;
 
+#ifndef _TARGET_X86_
+            // First, handle the case where the argument is passed by reference.
+            if (argEntry->passedByRef)
+            {
+                assert(size == 1);
+                copyBlkClass = objClass;
+#ifdef UNIX_AMD64_ABI
+                assert(!"Structs are not passed by reference on x64/ux");
+#endif // UNIX_AMD64_ABI
+            }
+            else
+            {
+                // This is passed by value.
+                // Check to see if we can transform this into load of a primitive type.
                 // 'size' must be the number of pointer sized items
-                assert(argEntry->passedByRef || (size == roundupSize / TARGET_POINTER_SIZE));
+                assert(size == roundupSize / TARGET_POINTER_SIZE);
 
                 structSize           = originalSize;
                 unsigned passingSize = originalSize;
 
-#ifndef _TARGET_X86_
                 // Check to see if we can transform this struct load (GT_OBJ) into a GT_IND of the appropriate size.
-                // That is the else clause of the if statement below.
                 // When it can do this is platform-dependent:
                 // - In general, it can be done for power of 2 structs that fit in a single register.
                 // - For ARM and ARM64 it must also be a non-HFA struct, or have a single field.
                 // - This is irrelevant for X86, since structs are always passed by value on the stack.
 
-                GenTree** parentOfArgObj    = parentArgx;
-                GenTree*  lclVar            = fgIsIndirOfAddrOfLocal(argObj);
-                bool      canTransformToInd = false;
+                GenTree** parentOfArgObj = parentArgx;
+                GenTree*  lclVar         = fgIsIndirOfAddrOfLocal(argObj);
+                bool      canTransform   = false;
 
-                // TODO-1stClassStructs: We should be able to transform to a GT_IND for an enregisterable struct
-                // (e.g. SIMD), not just scalars.
-                if (!varTypeIsStruct(structBaseType))
+                if (structBaseType != TYP_STRUCT)
                 {
                     if (isPow2(passingSize))
                     {
-                        canTransformToInd = true;
+                        canTransform = true;
                     }
 
 #if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
                     // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register, but we can
-                    // only transform to an indirection in that case if we are loading from a local.
+                    // only transform in that case if the arg is a local.
                     // TODO-CQ: This transformation should be applicable in general, not just for the ARM64
                     // or UNIX_AMD64_ABI cases where they will be passed in registers.
                     else
                     {
-                        canTransformToInd = (lclVar != nullptr);
-                        passingSize       = genTypeSize(structBaseType);
+                        canTransform = (lclVar != nullptr);
+                        passingSize  = genTypeSize(structBaseType);
                     }
 #endif //  _TARGET_ARM64_ || UNIX_AMD64_ABI
                 }
 
-                if (!canTransformToInd)
+                if (!canTransform)
                 {
 #if defined(_TARGET_AMD64_)
 #ifndef UNIX_AMD64_ABI
-                    // On Windows structs are always copied and passed by reference unless they are
+                    // On Windows structs are always copied and passed by reference (handled above) unless they are
                     // passed by value in a single register.
-                    assert((size == 1) && argEntry->passedByRef);
+                    assert(size == 1);
                     copyBlkClass = objClass;
 #else  // UNIX_AMD64_ABI
                     // On Unix, structs are always passed by value.
                     // We only need a copy if we have one of the following:
                     // - We have a lclVar that has been promoted and is passed in registers.
-                    // - The sizes don't match.
-                    // - We have a vector intrinsic.
-                    // TODO-Amd64-Unix-CQ: The first and last case could and should be handled without copies.
-
-                    copyBlkClass = NO_CLASS_HANDLE;
-
-                    // TODO-Amd64-Unix-CQ: This should use the condition below, which captures whether it is actually
-                    // being passed in registers (not just that the struct is eligible if there are enough regs left).
-                    // Also, this way we don't need to keep the structDesc in the argEntry if it's not actually passed
-                    // in registers.
-                    // if (argEntry->isPassedInRegisters())
-                    if (argEntry->structDesc.passedInRegisters)
+                    // - The sizes don't match for a non-lclVar argument.
+                    // - We have a known struct type (e.g. SIMD) that requires multiple registers.
+                    // TODO-Amd64-Unix-CQ: The first case could and should be handled without copies.
+                    // TODO-Amd64-Unix-Throughput: We don't need to keep the structDesc in the argEntry if it's not
+                    // actually passed in registers.
+                    if (argEntry->isPassedInRegisters())
                     {
-                        if ((lclVar != nullptr) &&
-                            (lvaGetPromotionType(lclVar->gtLclVarCommon.gtLclNum) == PROMOTION_TYPE_INDEPENDENT))
+                        assert(argEntry->structDesc.passedInRegisters);
+                        if (lclVar != nullptr)
                         {
-                            copyBlkClass = objClass;
+                            if (lvaGetPromotionType(lclVar->gtLclVarCommon.gtLclNum) == PROMOTION_TYPE_INDEPENDENT)
+                            {
+                                copyBlkClass = objClass;
+                            }
                         }
-                        else if (passingSize != structSize)
+                        else if (argObj->OperIs(GT_OBJ))
                         {
-                            copyBlkClass = objClass;
+                            if (passingSize != structSize)
+                            {
+                                copyBlkClass = objClass;
+                            }
                         }
                         else
                         {
-                            GenTree* addr = argObj->gtGetOp1();
-                            if (addr->OperIs(GT_ADDR) && addr->gtGetOp1()->OperIs(GT_SIMD, GT_HWIntrinsic))
+                            // This should only be the case of a value directly producing a known struct type.
+                            assert(argObj->TypeGet() != TYP_STRUCT);
+                            if (argEntry->numRegs > 1)
                             {
                                 copyBlkClass = objClass;
                             }
@@ -3912,27 +3927,19 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                     }
 #endif // UNIX_AMD64_ABI
 #elif defined(_TARGET_ARM64_)
-                    if (argEntry->passedByRef)
-                    {
-                        // This must be copied to a temp and passed by address.
-                        assert(size == 1);
-                        copyBlkClass = objClass;
-                    }
-                    else if ((passingSize != structSize) && (lclVar == nullptr))
+                    if ((passingSize != structSize) && (lclVar == nullptr))
                     {
                         copyBlkClass = objClass;
                     }
 #endif
 
 #ifdef _TARGET_ARM_
-                    if (lclVar != nullptr)
+                    // TODO-1stClassStructs: Unify these conditions across targets.
+                    if (((lclVar != nullptr) &&
+                         (lvaGetPromotionType(lclVar->gtLclVarCommon.gtLclNum) == PROMOTION_TYPE_INDEPENDENT)) ||
+                        ((argObj->OperIs(GT_OBJ)) && (passingSize != structSize)))
                     {
-                        LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
-                        if (varDsc->lvPromoted)
-                        {
-                            assert(argObj->OperGet() == GT_OBJ);
-                            copyBlkClass = objClass;
-                        }
+                        copyBlkClass = objClass;
                     }
 
                     if (structSize < TARGET_POINTER_SIZE)
@@ -3955,27 +3962,31 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
 #endif
 
                     assert((structBaseType != TYP_STRUCT) && (genTypeSize(structBaseType) >= originalSize));
-                    argObj->ChangeOper(GT_IND);
 
-                    // Now see if we can fold *(&X) into X
-                    if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
+                    if (argObj->OperIs(GT_OBJ))
                     {
-                        GenTree* temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
+                        argObj->ChangeOper(GT_IND);
 
-                        // Keep the DONT_CSE flag in sync
-                        // (as the addr always marks it for its op1)
-                        temp->gtFlags &= ~GTF_DONT_CSE;
-                        temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
-                        DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
-                        DEBUG_DESTROY_NODE(argObj);             // GT_IND
+                        // Now see if we can fold *(&X) into X
+                        if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
+                        {
+                            GenTree* temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
 
-                        argObj          = temp;
-                        *parentOfArgObj = temp;
+                            // Keep the DONT_CSE flag in sync
+                            // (as the addr always marks it for its op1)
+                            temp->gtFlags &= ~GTF_DONT_CSE;
+                            temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
+                            DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
+                            DEBUG_DESTROY_NODE(argObj);             // GT_IND
 
-                        // If the OBJ had been the top level node, we've now changed argx.
-                        if (parentOfArgObj == parentArgx)
-                        {
-                            argx = temp;
+                            argObj          = temp;
+                            *parentOfArgObj = temp;
+
+                            // If the OBJ had been the top level node, we've now changed argx.
+                            if (parentOfArgObj == parentArgx)
+                            {
+                                argx = temp;
+                            }
                         }
                     }
                     if (argObj->gtOper == GT_LCL_VAR)
@@ -4043,7 +4054,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                     size = 1;
                 }
 
-#endif // not _TARGET_X86_
 #ifndef UNIX_AMD64_ABI
                 // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
                 if (varTypeIsStruct(structBaseType) && !argEntry->passedByRef)
@@ -4075,8 +4085,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                         size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
                     }
                 }
-#endif // UNIX_AMD64_ABI
+#endif // !UNIX_AMD64_ABI
             }
+#endif // !_TARGET_X86_
         }
 
         if (argEntry->isPassedInRegisters())
@@ -4446,12 +4457,8 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
                 lcl = actualArg->gtGetOp1()->gtGetOp1()->AsLclVarCommon();
             }
         }
-        else
+        else if (actualArg->OperGet() == GT_LCL_VAR)
         {
-            assert(actualArg->OperGet() == GT_LCL_VAR);
-
-            // We need to construct a `GT_OBJ` node for the argument,
-            // so we need to get the address of the lclVar.
             lcl = actualArg->AsLclVarCommon();
         }
         if (lcl != nullptr)
@@ -4480,7 +4487,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
 #if FEATURE_MULTIREG_ARGS
     // Examine 'arg' and setup argValue objClass and structSize
     //
-    CORINFO_CLASS_HANDLE objClass   = NO_CLASS_HANDLE;
+    CORINFO_CLASS_HANDLE objClass   = gtGetStructHandleIfPresent(arg);
     GenTree*             argValue   = arg; // normally argValue will be arg, but see right below
     unsigned             structSize = 0;
 
@@ -4488,7 +4495,8 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
     {
         GenTreeObj* argObj = arg->AsObj();
         objClass           = argObj->gtClass;
-        structSize         = info.compCompHnd->getClassSize(objClass);
+        structSize         = argObj->Size();
+        assert(structSize == info.compCompHnd->getClassSize(objClass));
 
         // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR.
         GenTree* op1 = argObj->gtOp1;
@@ -4511,10 +4519,15 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
         assert(varNum < lvaCount);
         LclVarDsc* varDsc = &lvaTable[varNum];
 
-        objClass   = lvaGetStruct(varNum);
         structSize = varDsc->lvExactSize;
+        assert(structSize == info.compCompHnd->getClassSize(objClass));
     }
-    noway_assert(objClass != nullptr);
+    else
+    {
+        objClass   = gtGetStructHandleIfPresent(arg);
+        structSize = info.compCompHnd->getClassSize(objClass);
+    }
+    noway_assert(objClass != NO_CLASS_HANDLE);
 
     var_types hfaType                 = TYP_UNDEF;
     var_types elemType                = TYP_UNDEF;
@@ -6664,6 +6677,7 @@ void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
             // Detach the GT_CALL tree from the original statement by
             // hanging a "nothing" node to it. Later the "nothing" node will be removed
             // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
+
             noway_assert(fgMorphStmt->gtStmtExpr == call);
             fgMorphStmt->gtStmtExpr = gtNewNothingNode();
         }
@@ -7015,10 +7029,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
         if (varTypeIsStruct(argx))
         {
             // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
-            while (argx->gtOper == GT_COMMA)
-            {
-                argx = argx->gtOp.gtOp2;
-            }
+            argx = argx->gtEffectiveVal(true /*commaOnly*/);
 
             // Get the size of the struct and see if it is register passable.
             CORINFO_CLASS_HANDLE objClass = nullptr;
@@ -8885,14 +8896,14 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
     noway_assert(tree->OperIsBlkOp());
     var_types asgType = tree->TypeGet();
 
-    GenTree*   asg         = tree;
-    GenTree*   dest        = asg->gtGetOp1();
-    GenTree*   src         = asg->gtGetOp2();
-    unsigned   destVarNum  = BAD_VAR_NUM;
-    LclVarDsc* destVarDsc  = nullptr;
-    GenTree*   lclVarTree  = nullptr;
-    bool       isCopyBlock = asg->OperIsCopyBlkOp();
-    bool       isInitBlock = !isCopyBlock;
+    GenTree*   asg            = tree;
+    GenTree*   dest           = asg->gtGetOp1();
+    GenTree*   src            = asg->gtGetOp2();
+    unsigned   destVarNum     = BAD_VAR_NUM;
+    LclVarDsc* destVarDsc     = nullptr;
+    GenTree*   destLclVarTree = nullptr;
+    bool       isCopyBlock    = asg->OperIsCopyBlkOp();
+    bool       isInitBlock    = !isCopyBlock;
 
     unsigned             size;
     CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
@@ -8917,9 +8928,9 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
     {
         GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
         size               = lhsBlk->Size();
-        if (impIsAddressInLocal(lhsBlk->Addr(), &lclVarTree))
+        if (impIsAddressInLocal(lhsBlk->Addr(), &destLclVarTree))
         {
-            destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
+            destVarNum = destLclVarTree->AsLclVarCommon()->gtLclNum;
             destVarDsc = &(lvaTable[destVarNum]);
         }
         if (lhsBlk->OperGet() == GT_OBJ)
@@ -8936,9 +8947,9 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
             return tree;
         }
         noway_assert(dest->OperIsLocal());
-        lclVarTree = dest;
-        destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
-        destVarDsc = &(lvaTable[destVarNum]);
+        destLclVarTree = dest;
+        destVarNum     = destLclVarTree->AsLclVarCommon()->gtLclNum;
+        destVarDsc     = &(lvaTable[destVarNum]);
         if (isCopyBlock)
         {
             clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
@@ -8960,47 +8971,76 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
     //       [dest] [src]
     //
 
-    if (size == REGSIZE_BYTES)
+    if (asgType == TYP_STRUCT)
     {
-        if (clsHnd == NO_CLASS_HANDLE)
+        if (size == REGSIZE_BYTES)
         {
-            // A register-sized cpblk can be treated as an integer asignment.
-            asgType = TYP_I_IMPL;
+            if (clsHnd == NO_CLASS_HANDLE)
+            {
+                // A register-sized cpblk can be treated as an integer asignment.
+                asgType = TYP_I_IMPL;
+            }
+            else
+            {
+                BYTE gcPtr;
+                info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
+                asgType = getJitGCType(gcPtr);
+            }
         }
         else
         {
-            BYTE gcPtr;
-            info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
-            asgType = getJitGCType(gcPtr);
+            switch (size)
+            {
+                case 1:
+                    asgType = TYP_BYTE;
+                    break;
+                case 2:
+                    asgType = TYP_SHORT;
+                    break;
+
+#ifdef _TARGET_64BIT_
+                case 4:
+                    asgType = TYP_INT;
+                    break;
+#endif // _TARGET_64BIT_
+            }
         }
     }
-    else
+
+    if ((destVarDsc != nullptr) && varTypeIsStruct(destLclVarTree) && destVarDsc->lvPromoted)
     {
-        switch (size)
-        {
-            case 1:
-                asgType = TYP_BYTE;
-                break;
-            case 2:
-                asgType = TYP_SHORT;
-                break;
+        // Let fgMorphCopyBlock handle it.
+        return nullptr;
+    }
 
-#ifdef _TARGET_64BIT_
-            case 4:
-                asgType = TYP_INT;
-                break;
-#endif // _TARGET_64BIT_
+    GenTree*   srcLclVarTree = nullptr;
+    LclVarDsc* srcVarDsc     = nullptr;
+    if (isCopyBlock)
+    {
+        if (src->OperGet() == GT_LCL_VAR)
+        {
+            srcLclVarTree = src;
+            srcVarDsc     = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
+        }
+        else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &srcLclVarTree))
+        {
+            srcVarDsc = &(lvaTable[srcLclVarTree->AsLclVarCommon()->gtLclNum]);
+        }
+        if ((srcVarDsc != nullptr) && varTypeIsStruct(srcLclVarTree) && srcVarDsc->lvPromoted)
+        {
+            // Let fgMorphCopyBlock handle it.
+            return nullptr;
         }
     }
 
-    // TODO-1stClassStructs: Change this to asgType != TYP_STRUCT.
-    if (!varTypeIsStruct(asgType))
+    if (asgType != TYP_STRUCT)
     {
+        noway_assert((size <= REGSIZE_BYTES) || varTypeIsSIMD(asgType));
+
         // For initBlk, a non constant source is not going to allow us to fiddle
         // with the bits to create a single assigment.
-        noway_assert(size <= REGSIZE_BYTES);
-
-        if (isInitBlock && !src->IsConstInitVal())
+        // Nor do we (for now) support transforming an InitBlock of SIMD type.
+        if (isInitBlock && (!src->IsConstInitVal() || varTypeIsSIMD(asgType)))
         {
             return nullptr;
         }
@@ -9026,15 +9066,15 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
             // holes, whose contents could be meaningful in unsafe code.  If we decide that's a valid
             // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
             // memory of the struct prevent field-wise assignments.  Same situation exists for the "src" decision.
-            if (varTypeIsStruct(lclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
+            if (varTypeIsStruct(destLclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
             {
                 // Let fgMorphInitBlock handle it.  (Since we'll need to do field-var-wise assignments.)
                 return nullptr;
             }
-            else if (!varTypeIsFloating(lclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
+            else if (!varTypeIsFloating(destLclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
             {
                 // Use the dest local var directly, as well as its type.
-                dest    = lclVarTree;
+                dest    = destLclVarTree;
                 asgType = destVarDsc->lvType;
 
                 // If the block operation had been a write to a local var of a small int type,
@@ -9054,13 +9094,13 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
                 lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));
 
                 // Mark the local var tree as a definition point of the local.
-                lclVarTree->gtFlags |= GTF_VAR_DEF;
+                destLclVarTree->gtFlags |= GTF_VAR_DEF;
                 if (size < destVarDsc->lvExactSize)
                 { // If it's not a full-width assignment....
-                    lclVarTree->gtFlags |= GTF_VAR_USEASG;
+                    destLclVarTree->gtFlags |= GTF_VAR_USEASG;
                 }
 
-                if (dest == lclVarTree)
+                if (dest == destLclVarTree)
                 {
                     dest = gtNewIndir(asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
                 }
@@ -9106,41 +9146,28 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)
             tree->gtFlags |= (dest->gtFlags & GTF_EXCEPT);
         }
 
-        LclVarDsc* srcVarDsc = nullptr;
         if (isCopyBlock)
         {
-            if (src->OperGet() == GT_LCL_VAR)
-            {
-                lclVarTree = src;
-                srcVarDsc  = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
-            }
-            else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &lclVarTree))
-            {
-                srcVarDsc = &(lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum]);
-            }
             if (srcVarDsc != nullptr)
             {
-                if (varTypeIsStruct(lclVarTree) && (srcVarDsc->lvPromoted || srcVarDsc->lvIsSIMDType()))
-                {
-                    // Let fgMorphCopyBlock handle it.
-                    return nullptr;
-                }
-                else if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
-                         size == genTypeSize(genActualType(lclVarTree->TypeGet())))
+                // Handled above.
+                assert(!varTypeIsStruct(srcLclVarTree) || !srcVarDsc->lvPromoted);
+                if (!varTypeIsFloating(srcLclVarTree->TypeGet()) &&
+                    size == genTypeSize(genActualType(srcLclVarTree->TypeGet())))
                 {
                     // Use the src local var directly.
-                    src = lclVarTree;
+                    src = srcLclVarTree;
                 }
                 else
                 {
                     // The source argument of the copyblk can potentially be accessed only through indir(addr(lclVar))
                     // or indir(lclVarAddr) in rational form and liveness won't account for these uses. That said,
                     // we have to mark this local as address exposed so we don't delete it as a dead store later on.
-                    unsigned lclVarNum                = lclVarTree->gtLclVarCommon.gtLclNum;
+                    unsigned lclVarNum                = srcLclVarTree->gtLclVarCommon.gtLclNum;
                     lvaTable[lclVarNum].lvAddrExposed = true;
                     lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
                     GenTree* srcAddr;
-                    if (src == lclVarTree)
+                    if (src == srcLclVarTree)
                     {
                         srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
                         src     = gtNewOperNode(GT_IND, asgType, srcAddr);
@@ -9764,7 +9791,7 @@ GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigne
 {
     GenTree* effectiveVal = tree->gtEffectiveVal();
 
-    if (!varTypeIsStruct(asgType))
+    if (asgType != TYP_STRUCT)
     {
         if (effectiveVal->OperIsIndir())
         {