Unify struct arg handling (#18358)
authorCarol Eidt <carol.eidt@microsoft.com>
Tue, 12 Jun 2018 15:38:56 +0000 (08:38 -0700)
committerGitHub <noreply@github.com>
Tue, 12 Jun 2018 15:38:56 +0000 (08:38 -0700)
* Unify struct arg handling

Eliminate unnecessary struct copies, especially on Linux, and reduce code duplication.
Across all targets, use GT_FIELD_LIST to pass promoted structs on stack, and avoid
requiring a copy and/or marking `lvDoNotEnregister` for those cases.

Unify the specification of multi-reg args:
- numRegs now indicates the actual number of reg args (not the size in pointer-size units)
- regNums contains all the arg register numbers

15 files changed:
src/jit/codegenarmarch.cpp
src/jit/codegencommon.cpp
src/jit/codegenlinear.cpp
src/jit/codegenlinear.h
src/jit/codegenxarch.cpp
src/jit/compiler.cpp
src/jit/compiler.h
src/jit/gentree.cpp
src/jit/gentree.h
src/jit/lclvars.cpp
src/jit/lower.cpp
src/jit/lsraarmarch.cpp
src/jit/lsrabuild.cpp
src/jit/lsraxarch.cpp
src/jit/morph.cpp

index 17bb104..60f1121 100644 (file)
@@ -649,42 +649,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
 
         if (source->OperGet() == GT_FIELD_LIST)
         {
-            // Deal with the multi register passed struct args.
-            GenTreeFieldList* fieldListPtr = source->AsFieldList();
-
-#ifdef _TARGET_ARM64_
-            // Arm64 ABI does not include argument splitting between registers and stack
-            assert(fieldListPtr);
-            assert(fieldListPtr->gtFieldOffset == 0);
-#endif // _TARGET_ARM64_
-
-            // Evaluate each of the GT_FIELD_LIST items into their register
-            // and store their register into the outgoing argument area
-            for (; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest())
-            {
-                GenTree* nextArgNode = fieldListPtr->gtOp.gtOp1;
-                genConsumeReg(nextArgNode);
-
-                regNumber reg  = nextArgNode->gtRegNum;
-                var_types type = nextArgNode->TypeGet();
-                emitAttr  attr = emitTypeSize(type);
-
-#ifdef _TARGET_ARM64_
-                // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
-                // argument area
-                emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut + fieldListPtr->gtFieldOffset);
-
-                // We can't write beyound the outgoing area area
-                assert((argOffsetOut + fieldListPtr->gtFieldOffset + EA_SIZE_IN_BYTES(attr)) <= argOffsetMax);
-#else
-                // TODO-ARM-Bug?  The following code will pack copied structs
-                // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
-                // argument area
-                emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut);
-                argOffsetOut += EA_SIZE_IN_BYTES(attr);
-                assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
-#endif // _TARGET_ARM64_
-            }
+            genPutArgStkFieldList(treeNode, varNumOut);
         }
         else // We must have a GT_OBJ or a GT_LCL_VAR
         {
index a449756..622c6a0 100644 (file)
@@ -10453,46 +10453,32 @@ instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shi
 //    On x64 Windows the caller always creates slots (homing space) in its frame for the
 //    first 4 arguments of a callee (register passed args). So, the the variable number
 //    (lclNum) for the first argument with a stack slot is always 0.
-//    For System V systems or armarch, there is no such calling convention requirement, and the code needs to find
-//    the first stack passed argument from the caller. This is done by iterating over
+//    For System V systems or armarch, there is no such calling convention requirement, and the code
+//    needs to find the first stack passed argument from the caller. This is done by iterating over
 //    all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
 //
 unsigned CodeGen::getFirstArgWithStackSlot()
 {
 #if defined(UNIX_AMD64_ABI) || defined(_TARGET_ARMARCH_)
     unsigned baseVarNum = 0;
-#if defined(FEATURE_UNIX_AMR64_STRUCT_PASSING)
-    baseVarNum = compiler->lvaFirstStackIncomingArgNum;
-
-    if (compiler->lvaFirstStackIncomingArgNum != BAD_VAR_NUM)
+    // Iterate over all the lvParam variables in the Lcl var table until we find the first one
+    // that's passed on the stack.
+    LclVarDsc* varDsc = nullptr;
+    for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
     {
-        baseVarNum = compiler->lvaFirstStackIncomingArgNum;
-    }
-    else
-#endif // FEATURE_UNIX_ARM64_STRUCT_PASSING
-    {
-        // Iterate over all the local variables in the Lcl var table.
-        // They contain all the implicit arguments - thisPtr, retBuf,
-        // generic context, PInvoke cookie, var arg cookie,no-standard args, etc.
-        LclVarDsc* varDsc = nullptr;
-        for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
-        {
-            varDsc = &(compiler->lvaTable[i]);
+        varDsc = &(compiler->lvaTable[i]);
 
-            // We are iterating over the arguments only.
-            assert(varDsc->lvIsParam);
+        // We should have found a stack parameter (and broken out of this loop) before
+        // we find any non-parameters.
+        assert(varDsc->lvIsParam);
 
-            if (varDsc->lvArgReg == REG_STK)
-            {
-                baseVarNum = i;
-#if defined(FEATURE_UNIX_AMR64_STRUCT_PASSING)
-                compiler->lvaFirstStackIncomingArgNum = baseVarNum;
-#endif // FEATURE_UNIX_ARM64_STRUCT_PASSING
-                break;
-            }
+        if (varDsc->lvArgReg == REG_STK)
+        {
+            baseVarNum = i;
+            break;
         }
-        assert(varDsc != nullptr);
     }
+    assert(varDsc != nullptr);
 
     return baseVarNum;
 #elif defined(_TARGET_AMD64_)
index 8d4cd55..8e1efa9 100644 (file)
@@ -1468,6 +1468,45 @@ void CodeGen::genConsumeArgSplitStruct(GenTreePutArgSplit* putArgNode)
 #endif
 
 //------------------------------------------------------------------------
+// genPutArgStkFieldList: Generate code for a putArgStk whose source is a GT_FIELD_LIST
+//
+// Arguments:
+//    putArgStk    - The putArgStk node
+//    outArgVarNum - The lclVar num for the argument
+//
+// Notes:
+//    The x86 version of this is in codegenxarch.cpp, and doesn't take an
+//    outArgVarNum, as it pushes its args onto the stack.
+//
+#ifndef _TARGET_X86_
+void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArgVarNum)
+{
+    assert(putArgStk->gtOp1->OperIs(GT_FIELD_LIST));
+
+    // Evaluate each of the GT_FIELD_LIST items into their register
+    // and store their register into the outgoing argument area.
+    unsigned argOffset = putArgStk->getArgOffset();
+    for (GenTreeFieldList* fieldListPtr = putArgStk->gtOp1->AsFieldList(); fieldListPtr != nullptr;
+         fieldListPtr                   = fieldListPtr->Rest())
+    {
+        GenTree* nextArgNode = fieldListPtr->gtOp.gtOp1;
+        genConsumeReg(nextArgNode);
+
+        regNumber reg  = nextArgNode->gtRegNum;
+        var_types type = nextArgNode->TypeGet();
+        emitAttr  attr = emitTypeSize(type);
+
+        // Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
+        // argument area
+        getEmitter()->emitIns_S_R(ins_Store(type), attr, reg, outArgVarNum, argOffset);
+        argOffset += EA_SIZE_IN_BYTES(attr);
+        // We can't write beyound the arg area
+        assert(argOffset <= compiler->lvaLclSize(outArgVarNum));
+    }
+}
+#endif // !_TARGET_X86_
+
+//------------------------------------------------------------------------
 // genSetBlockSize: Ensure that the block size is in the given register
 //
 // Arguments:
index 5a8df9c..140bc98 100644 (file)
@@ -273,6 +273,10 @@ void AddNestedAlignment(unsigned adjustment)
 
 #endif
 
+#ifndef _TARGET_X86_
+void genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArgVarNum);
+#endif // !_TARGET_X86_
+
 #ifdef FEATURE_PUT_STRUCT_ARG_STK
 #ifdef _TARGET_X86_
 bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk);
index b248743..a6fd7cc 100644 (file)
@@ -5012,11 +5012,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
                 genConsumeReg(putArgRegNode);
 
                 // Validate the putArgRegNode has the right type.
-                assert(putArgRegNode->TypeGet() ==
-                       compiler->GetTypeFromClassificationAndSizes(curArgTabEntry->structDesc
-                                                                       .eightByteClassifications[iterationNum],
-                                                                   curArgTabEntry->structDesc
-                                                                       .eightByteSizes[iterationNum]));
+                assert(varTypeIsFloating(putArgRegNode->TypeGet()) == genIsValidFloatReg(argReg));
                 if (putArgRegNode->gtRegNum != argReg)
                 {
                     inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), false), argReg, putArgRegNode->gtRegNum);
@@ -7915,6 +7911,11 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
             m_stkArgVarNum = BAD_VAR_NUM;
             return;
         }
+        else if (data->OperIs(GT_FIELD_LIST))
+        {
+            genPutArgStkFieldList(putArgStk, baseVarNum);
+            return;
+        }
 #endif // UNIX_AMD64_ABI
 
         noway_assert(targetType != TYP_STRUCT);
index a75ad6c..be1067d 100644 (file)
@@ -605,12 +605,12 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
             useType = TYP_SHORT;
             break;
 
-#ifndef _TARGET_XARCH_
+#if !defined(_TARGET_XARCH_) || defined(UNIX_AMD64_ABI)
         case 3:
             useType = TYP_INT;
             break;
 
-#endif // _TARGET_XARCH_
+#endif // !_TARGET_XARCH_ || UNIX_AMD64_ABI
 
 #ifdef _TARGET_64BIT_
         case 4:
@@ -625,14 +625,14 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
             }
             break;
 
-#ifndef _TARGET_XARCH_
+#if !defined(_TARGET_XARCH_) || defined(UNIX_AMD64_ABI)
         case 5:
         case 6:
         case 7:
             useType = TYP_I_IMPL;
             break;
 
-#endif // _TARGET_XARCH_
+#endif // !_TARGET_XARCH_ || UNIX_AMD64_ABI
 #endif // _TARGET_64BIT_
 
         case TARGET_POINTER_SIZE:
@@ -767,6 +767,9 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
     }
     assert(structSize > 0);
 
+// Determine if we can pass the struct as a primitive type.
+// Note that on x86 we never pass structs as primitive types (unless the VM unwraps them for us).
+#ifndef _TARGET_X86_
 #ifdef UNIX_AMD64_ABI
 
     // An 8-byte struct may need to be passed in a floating point register
@@ -775,32 +778,33 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
     SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
     eeGetSystemVAmd64PassStructInRegisterDescriptor(clsHnd, &structDesc);
 
-    // If we have one eightByteCount then we can set 'useType' based on that
-    if (structDesc.eightByteCount == 1)
+    if (structDesc.passedInRegisters && (structDesc.eightByteCount != 1))
     {
-        // Set 'useType' to the type of the first eightbyte item
+        // We can't pass this as a primitive type.
+    }
+    else if (structDesc.eightByteClassifications[0] == SystemVClassificationTypeSSE)
+    {
+        // If this is passed as a floating type, use that.
+        // Otherwise, we'll use the general case - we don't want to use the "EightByteType"
+        // directly, because it returns `TYP_INT` for any integral type <= 4 bytes, and
+        // we need to preserve small types.
         useType = GetEightByteType(structDesc, 0);
     }
+    else
+#endif // UNIX_AMD64_ABI
 
-#elif defined(_TARGET_X86_)
-
-    // On x86 we never pass structs as primitive types (unless the VM unwraps them for us)
-    useType = TYP_UNKNOWN;
-
-#else // all other targets
-
-    // The largest primitive type is 8 bytes (TYP_DOUBLE)
-    // so we can skip calling getPrimitiveTypeForStruct when we
-    // have a struct that is larger than that.
-    //
-    if (structSize <= sizeof(double))
+        // The largest primitive type is 8 bytes (TYP_DOUBLE)
+        // so we can skip calling getPrimitiveTypeForStruct when we
+        // have a struct that is larger than that.
+        //
+        if (structSize <= sizeof(double))
     {
         // We set the "primitive" useType based upon the structSize
         // and also examine the clsHnd to see if it is an HFA of count one
         useType = getPrimitiveTypeForStruct(structSize, clsHnd);
     }
 
-#endif // all other targets
+#endif // !_TARGET_X86_
 
     // Did we change this struct type into a simple "primitive" type?
     //
@@ -834,7 +838,7 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
 #ifdef UNIX_AMD64_ABI
 
                 // The case of (structDesc.eightByteCount == 1) should have already been handled
-                if (structDesc.eightByteCount > 1)
+                if ((structDesc.eightByteCount > 1) || !structDesc.passedInRegisters)
                 {
                     // setup wbPassType and useType indicate that this is passed by value in multiple registers
                     //  (when all of the parameters registers are used, then the stack will be used)
index c702aad..28771e9 100644 (file)
@@ -132,7 +132,7 @@ unsigned ReinterpretHexAsDecimal(unsigned);
 #if defined(_TARGET_XARCH_)
 const unsigned TEMP_MAX_SIZE = YMM_REGSIZE_BYTES;
 #elif defined(_TARGET_ARM64_)
-const unsigned TEMP_MAX_SIZE = FP_REGSIZE_BYTES;
+const unsigned       TEMP_MAX_SIZE = FP_REGSIZE_BYTES;
 #endif // defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
 #else  // !FEATURE_SIMD
 const unsigned TEMP_MAX_SIZE = sizeof(double);
@@ -1133,25 +1133,19 @@ struct FuncInfoDsc
 
 struct fgArgTabEntry
 {
-
-#if defined(UNIX_AMD64_ABI)
-    fgArgTabEntry()
-    {
-        otherRegNum = REG_NA;
-        isStruct    = false; // is this a struct arg
-    }
-#endif // defined(UNIX_AMD64_ABI)
-
     GenTree* node;   // Initially points at the Op1 field of 'parent', but if the argument is replaced with an GT_ASG or
-                     // placeholder
-                     //  it will point at the actual argument in the gtCallLateArgs list.
+                     // placeholder it will point at the actual argument in the gtCallLateArgs list.
     GenTree* parent; // Points at the GT_LIST node in the gtCallArgs for this argument
 
     unsigned argNum; // The original argument number, also specifies the required argument evaluation order from the IL
 
-    regNumber regNum; // The (first) register to use when passing this argument, set to REG_STK for arguments passed on
-                      // the stack
-    unsigned numRegs; // Count of number of registers that this argument uses
+private:
+    regNumberSmall regNums[MAX_ARG_REG_COUNT]; // The registers to use when passing this argument, set to REG_STK for
+                                               // arguments passed on the stack
+public:
+    unsigned numRegs; // Count of number of registers that this argument uses.
+                      // Note that on ARM, if we have a double hfa, this reflects the number
+                      // of DOUBLE registers.
 
     // A slot is a pointer sized region in the OutArg area.
     unsigned slotNum;  // When an argument is passed in the OutArg area this is the slot number in the OutArg area
@@ -1161,37 +1155,123 @@ struct fgArgTabEntry
     unsigned lateArgInx; // index into gtCallLateArgs list
     unsigned tmpNum;     // the LclVar number if we had to force evaluation of this arg
 
-    bool isSplit : 1;       // True when this argument is split between the registers and OutArg area
     bool needTmp : 1;       // True when we force this argument's evaluation into a temp LclVar
     bool needPlace : 1;     // True when we must replace this argument with a placeholder node
     bool isTmp : 1;         // True when we setup a temp LclVar for this argument due to size issues with the struct
     bool processed : 1;     // True when we have decided the evaluation order for this argument in the gtCallLateArgs
-    bool isHfaRegArg : 1;   // True when the argument is passed as a HFA in FP registers.
     bool isBackFilled : 1;  // True when the argument fills a register slot skipped due to alignment requirements of
                             // previous arguments.
     bool isNonStandard : 1; // True if it is an arg that is passed in a reg other than a standard arg reg, or is forced
                             // to be on the stack despite its arg list position.
+    bool isStruct : 1;      // True if this is a struct arg
+#ifdef _TARGET_ARM_
+    bool _isSplit : 1; // True when this argument is split between the registers and OutArg area
+#endif
+#ifdef FEATURE_HFA
+    bool _isHfaRegArg : 1; // True when the argument is passed as a HFA in FP registers.
+    bool _isDoubleHfa : 1; // True when the argument is passed as an HFA, with an element type of DOUBLE.
+#endif
+    __declspec(property(get = getRegNum)) regNumber regNum;
+    regNumber getRegNum()
+    {
+        return (regNumber)regNums[0];
+    }
+    __declspec(property(get = getOtherRegNum)) regNumber otherRegNum;
+    regNumber getOtherRegNum()
+    {
+        return (regNumber)regNums[1];
+    }
 
 #if defined(UNIX_AMD64_ABI)
-    bool isStruct : 1; // True if this is a struct arg
-
-    regNumber otherRegNum; // The (second) register to use when passing this argument.
-
     SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
-#elif !defined(_TARGET_64BIT_)
-    __declspec(property(get = getIsStruct)) bool isStruct;
-    bool getIsStruct()
+#endif
+
+    void setRegNum(unsigned int i, regNumber regNum)
     {
-        return varTypeIsStruct(node);
+        assert(i < MAX_ARG_REG_COUNT);
+        regNums[i] = (regNumberSmall)regNum;
+    }
+    regNumber getRegNum(unsigned int i)
+    {
+        assert(i < MAX_ARG_REG_COUNT);
+        return (regNumber)regNums[i];
     }
-#endif // !_TARGET_64BIT_
 
+    __declspec(property(get = getIsSplit, put = setIsSplit)) bool isSplit;
+    bool getIsSplit()
+    {
+#ifdef _TARGET_ARM_
+        return _isSplit;
+#else
+        return false;
+#endif
+    }
+    void setIsSplit(bool value)
+    {
 #ifdef _TARGET_ARM_
-    void SetIsHfaRegArg(bool hfaRegArg)
+        _isSplit = value;
+#endif
+    }
+
+    __declspec(property(get = getIsHfaRegArg)) bool isHfaRegArg;
+    bool getIsHfaRegArg()
+    {
+#ifdef FEATURE_HFA
+        return _isHfaRegArg;
+#else
+        return false;
+#endif
+    }
+
+    __declspec(property(get = getHfaType)) var_types hfaType;
+    var_types getHfaType()
+    {
+#ifdef FEATURE_HFA
+        return _isHfaRegArg ? (_isDoubleHfa ? TYP_DOUBLE : TYP_FLOAT) : TYP_UNDEF;
+#else
+        return TYP_UNDEF;
+#endif
+    }
+
+    void setHfaType(var_types type, unsigned hfaSlots)
     {
-        isHfaRegArg = hfaRegArg;
+#ifdef FEATURE_HFA
+        if (type != TYP_UNDEF)
+        {
+            unsigned numHfaRegs = hfaSlots;
+// We originally set numRegs according to the size of the struct, but if the size of the
+// hfaType is not the same as the pointer size, we need to correct it.
+// Note that hfaSlots is the number of registers we will use. For ARM, that is twice
+// the number of "double registers".
+#ifdef _TARGET_ARM_
+            if (type == TYP_DOUBLE)
+            {
+                // Must be an even number of registers.
+                assert((numRegs & 1) == 0);
+                numHfaRegs = hfaSlots / 2;
+            }
+            else
+#endif // _TARGET_ARM_
+            {
+                numHfaRegs = hfaSlots;
+            }
+            if (isHfaRegArg)
+            {
+                // This should already be set correctly.
+                assert(hfaType == type);
+                assert(numRegs == numHfaRegs);
+            }
+            else
+            {
+                _isDoubleHfa = (type == TYP_DOUBLE);
+                _isHfaRegArg = true;
+                numRegs      = numHfaRegs;
+            }
+        }
+#endif // FEATURE_HFA
     }
 
+#ifdef _TARGET_ARM_
     void SetIsBackFilled(bool backFilled)
     {
         isBackFilled = backFilled;
@@ -1202,12 +1282,6 @@ struct fgArgTabEntry
         return isBackFilled;
     }
 #else  // !_TARGET_ARM_
-    // To make the callers easier, we allow these calls (and the isHfaRegArg and isBackFilled data members) for all
-    // platforms.
-    void SetIsHfaRegArg(bool hfaRegArg)
-    {
-    }
-
     void SetIsBackFilled(bool backFilled)
     {
     }
@@ -1218,6 +1292,73 @@ struct fgArgTabEntry
     }
 #endif // !_TARGET_ARM_
 
+    bool isPassedInRegisters()
+    {
+        return !isSplit && (numRegs != 0);
+    }
+
+    bool isSingleRegOrSlot()
+    {
+        return !isSplit && ((numRegs == 1) || (numSlots == 1));
+    }
+
+    void SetMultiRegNums()
+    {
+#if FEATURE_MULTIREG_ARGS
+        if (numRegs == 1)
+        {
+            return;
+        }
+
+        regNumber argReg = getRegNum(0);
+#ifdef _TARGET_ARM_
+        unsigned int regSize = (hfaType == TYP_DOUBLE) ? 2 : 1;
+#else
+        unsigned int regSize       = 1;
+#endif
+        for (unsigned int regIndex = 1; regIndex < numRegs; regIndex++)
+        {
+            argReg = (regNumber)(argReg + regSize);
+            setRegNum(regIndex, argReg);
+        }
+#endif
+    }
+
+    // Check that the value of 'isStruct' is consistent.
+    // A struct arg must be one of the following:
+    // - A node of struct type,
+    // - A GT_FIELD_LIST, or
+    // - A node of a scalar type, passed in a single register or slot
+    //   (or two slots in the case of a struct pass on the stack as TYP_DOUBLE).
+    //
+    void checkIsStruct()
+    {
+        if (isStruct)
+        {
+            if (!varTypeIsStruct(node) && !node->OperIs(GT_FIELD_LIST))
+            {
+                // This is the case where we are passing a struct as a primitive type.
+                // On most targets, this is always a single register or slot.
+                // However, on ARM this could be two slots if it is TYP_DOUBLE.
+                bool isPassedAsPrimitiveType = ((numRegs == 1) || ((numRegs == 0) && (numSlots == 1)));
+#ifdef _TARGET_ARM_
+                if (!isPassedAsPrimitiveType)
+                {
+                    if (node->TypeGet() == TYP_DOUBLE && numRegs == 0 && (numSlots == 2))
+                    {
+                        isPassedAsPrimitiveType = true;
+                    }
+                }
+#endif // _TARGET_ARM_
+                assert(isPassedAsPrimitiveType);
+            }
+        }
+        else
+        {
+            assert(!varTypeIsStruct(node));
+        }
+    }
+
 #ifdef DEBUG
     void Dump();
 #endif
@@ -1264,8 +1405,13 @@ public:
     fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned argCount);
     fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall);
 
-    fgArgTabEntry* AddRegArg(
-        unsigned argNum, GenTree* node, GenTree* parent, regNumber regNum, unsigned numRegs, unsigned alignment);
+    fgArgTabEntry* AddRegArg(unsigned  argNum,
+                             GenTree*  node,
+                             GenTree*  parent,
+                             regNumber regNum,
+                             unsigned  numRegs,
+                             unsigned  alignment,
+                             bool      isStruct);
 
 #ifdef UNIX_AMD64_ABI
     fgArgTabEntry* AddRegArg(unsigned                                                         argNum,
@@ -1275,15 +1421,12 @@ public:
                              unsigned                                                         numRegs,
                              unsigned                                                         alignment,
                              const bool                                                       isStruct,
-                             const regNumber                                                  otherRegNum   = REG_NA,
+                             const regNumber                                                  otherRegNum,
                              const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr = nullptr);
 #endif // UNIX_AMD64_ABI
 
-    fgArgTabEntry* AddStkArg(unsigned argNum,
-                             GenTree* node,
-                             GenTree* parent,
-                             unsigned numSlots,
-                             unsigned alignment UNIX_AMD64_ABI_ONLY_ARG(const bool isStruct));
+    fgArgTabEntry* AddStkArg(
+        unsigned argNum, GenTree* node, GenTree* parent, unsigned numSlots, unsigned alignment, bool isStruct);
 
     void           RemorphReset();
     fgArgTabEntry* RemorphRegArg(
@@ -2360,11 +2503,6 @@ public:
     unsigned short lvaTrackedCount;       // actual # of locals being tracked
     unsigned lvaTrackedCountInSizeTUnits; // min # of size_t's sufficient to hold a bit for all the locals being tracked
 
-#ifdef UNIX_AMD64_ABI
-    // Only for AMD64 System V cache the first caller stack homed argument.
-    unsigned lvaFirstStackIncomingArgNum; // First argument with stack slot in the caller.
-#endif                                    // !UNIX_AMD64_ABI
-
 #ifdef DEBUG
     VARSET_TP lvaTrackedVars; // set of tracked variables
 #endif
@@ -4411,7 +4549,7 @@ public:
 
     bool fgCastNeeded(GenTree* tree, var_types toType);
     GenTree* fgDoNormalizeOnStore(GenTree* tree);
-    GenTree* fgMakeTmpArgNode(unsigned tmpVarNum UNIX_AMD64_ABI_ONLY_ARG(const bool passedInRegisters));
+    GenTree* fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry);
 
     // The following check for loops that don't execute calls
     bool fgLoopCallMarked;
@@ -4739,8 +4877,7 @@ private:
     void fgMakeOutgoingStructArgCopy(GenTreeCall*         call,
                                      GenTree*             args,
                                      unsigned             argIndex,
-                                     CORINFO_CLASS_HANDLE copyBlkClass UNIX_AMD64_ABI_ONLY_ARG(
-                                         const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structDescPtr));
+                                     CORINFO_CLASS_HANDLE copyBlkClass);
 
     void fgFixupStructReturn(GenTree* call);
     GenTree* fgMorphLocalVar(GenTree* tree, bool forceRemorph);
@@ -9276,7 +9413,6 @@ public:
                              unsigned __int8*     offset0,
                              unsigned __int8*     offset1);
 
-    void fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument);
 #endif // defined(UNIX_AMD64_ABI)
 
     void fgMorphMultiregStructArgs(GenTreeCall* call);
index 88a02f3..1bc28e7 100644 (file)
@@ -11622,28 +11622,11 @@ void Compiler::gtGetLateArgMsg(
 #if FEATURE_MULTIREG_ARGS
             if (curArgTabEntry->numRegs >= 2)
             {
-                regNumber otherRegNum;
-#if defined(UNIX_AMD64_ABI)
-                assert(curArgTabEntry->numRegs == 2);
-                otherRegNum = curArgTabEntry->otherRegNum;
-#else
-                otherRegNum = (regNumber)(((unsigned)curArgTabEntry->regNum) + curArgTabEntry->numRegs - 1);
-#endif // UNIX_AMD64_ABI
-
-                if (listCount == -1)
-                {
-                    char seperator = (curArgTabEntry->numRegs == 2) ? ',' : '-';
-
-                    sprintf_s(bufp, bufLength, "arg%d %s%c%s%c", curArgTabEntry->argNum, compRegVarName(argReg),
-                              seperator, compRegVarName(otherRegNum), 0);
-                }
-                else // listCount is 0,1,2 or 3
-                {
-                    assert(listCount <= MAX_ARG_REG_COUNT);
-                    regNumber curReg = (listCount == 1) ? otherRegNum : (regNumber)((unsigned)(argReg) + listCount);
-                    sprintf_s(bufp, bufLength, "arg%d m%d %s%c", curArgTabEntry->argNum, listCount,
-                              compRegVarName(curReg), 0);
-                }
+                // listCount could be -1 but it is signed, so this comparison is OK.
+                assert(listCount <= MAX_ARG_REG_COUNT);
+                char separator = (curArgTabEntry->numRegs == 2) ? ',' : '-';
+                sprintf_s(bufp, bufLength, "arg%d %s%c%s%c", curArgTabEntry->argNum, compRegVarName(argReg), separator,
+                          compRegVarName(curArgTabEntry->getRegNum(curArgTabEntry->numRegs - 1)), 0);
             }
             else
 #endif
index e41819e..009646d 100644 (file)
@@ -5801,29 +5801,6 @@ inline bool GenTree::IsValidCallArgument()
 
 #else // FEATURE_MULTIREG_ARGS or FEATURE_PUT_STRUCT_ARG_STK
 
-#ifdef UNIX_AMD64_ABI
-        // For UNIX ABI we currently only allow a GT_FIELD_LIST of GT_LCL_FLDs nodes
-        GenTree* gtListPtr = this;
-        while (gtListPtr != nullptr)
-        {
-            // ToDo: fix UNIX_AMD64 so that we do not generate this kind of a List
-            //  Note the list as currently created is malformed, as the last entry is a nullptr
-            if (gtListPtr->Current() == nullptr)
-            {
-                break;
-            }
-
-            // Only a list of GT_LCL_FLDs is allowed
-            if (gtListPtr->Current()->OperGet() != GT_LCL_FLD)
-            {
-                return false;
-            }
-            gtListPtr = gtListPtr->MoveNext();
-        }
-#endif // UNIX_AMD64_ABI
-
-        // Note that for non-UNIX ABI the GT_FIELD_LIST may contain any node
-        //
         // We allow this GT_FIELD_LIST as an argument
         return true;
 
index 1a18700..4deec99 100644 (file)
@@ -79,9 +79,6 @@ void Compiler::lvaInit()
     lvaSIMDInitTempVarNum = BAD_VAR_NUM;
 #endif // FEATURE_SIMD
     lvaCurEpoch = 0;
-#ifdef UNIX_AMD64_ABI
-    lvaFirstStackIncomingArgNum = BAD_VAR_NUM;
-#endif // UNIX_AMD64_ABI
 }
 
 /*****************************************************************************/
@@ -853,8 +850,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
                 printf("Arg #%u    passed in register(s) ", varDscInfo->varNum);
                 bool isFloat = false;
 #if defined(UNIX_AMD64_ABI)
-                // In case of one eightbyte struct the type is already normalized earlier.
-                // The varTypeIsFloating(argType) is good for this case.
                 if (varTypeIsStruct(argType) && (structDesc.eightByteCount >= 1))
                 {
                     isFloat = varTypeIsFloating(firstEightByteType);
@@ -895,6 +890,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
                 else
 #endif // defined(UNIX_AMD64_ABI)
                 {
+                    isFloat            = varTypeIsFloating(argType);
                     unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, argType);
 
                     for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++)
index 6671aa6..f45dadc 100644 (file)
@@ -329,6 +329,16 @@ GenTree* Lowering::LowerNode(GenTree* node)
             break;
 #endif
 
+#ifndef _TARGET_ARM_
+        // TODO-ARM-CQ: We should contain this as long as the offset fits.
+        case GT_OBJ:
+            if (node->AsObj()->Addr()->OperIsLocalAddr())
+            {
+                node->AsObj()->Addr()->SetContained();
+            }
+            break;
+#endif // !_TARGET_ARM_
+
         default:
             break;
     }
@@ -1007,18 +1017,7 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
     bool     updateArgTable = true;
 
     bool isOnStack = true;
-#ifdef UNIX_AMD64_ABI
-    if (varTypeIsStruct(type))
-    {
-        isOnStack = !info->structDesc.passedInRegisters;
-    }
-    else
-    {
-        isOnStack = info->regNum == REG_STK;
-    }
-#else  // !UNIX_AMD64_ABI
-    isOnStack = info->regNum == REG_STK;
-#endif // !UNIX_AMD64_ABI
+    isOnStack      = info->regNum == REG_STK;
 
 #ifdef _TARGET_ARMARCH_
     // Mark contained when we pass struct
@@ -1098,134 +1097,17 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
     {
         if (!isOnStack)
         {
-#if defined(UNIX_AMD64_ABI)
-            if (info->isStruct)
-            {
-                // The following code makes sure a register passed struct arg is moved to
-                // the register before the call is made.
-                // There are two cases (comments added in the code below.)
-                // 1. The struct is of size one eightbyte:
-                //    In this case a new tree is created that is GT_PUTARG_REG
-                //    with a op1 the original argument.
-                // 2. The struct is contained in 2 eightbytes:
-                //    in this case the arg comes as a GT_FIELD_LIST of two GT_LCL_FLDs
-                //     - the two eightbytes of the struct.
-                //    The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_FIELD_LIST
-                //    and splices it in the list with the corresponding original GT_LCL_FLD tree as op1.
-
-                assert(info->structDesc.eightByteCount != 0);
-
-                if (info->structDesc.eightByteCount == 1)
-                {
-                    // clang-format off
-                    // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree.
-                    //
-                    // Here the IR for this operation:
-                    // lowering call :
-                    //     N001(3, 2)[000017] ------ - N---- / --*  &lclVar   byref  V00 loc0
-                    //     N003(6, 5)[000052] * --XG------ - / --*  indir     int
-                    //     N004(3, 2)[000046] ------ - N---- + --*  &lclVar   byref  V02 tmp0
-                    //     (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --*  storeIndir int
-                    //     N009(3, 4)[000054] ------ - N----arg0 in rdi + --*  lclFld    int    V02 tmp0[+0](last use)
-                    //     N011(33, 21)[000018] --CXG------ - *call      void   Test.Foo.test1
-                    //
-                    // args :
-                    //     lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int
-                    //
-                    // late :
-                    //    lowering arg : N009(3, 4)[000054] ------ - N----             *  lclFld    int    V02 tmp0[+0](last use)
-                    //    new node is : (3, 4)[000071] ------------             *  putarg_reg int    RV
-                    //
-                    // after :
-                    //    N001(3, 2)[000017] ------ - N---- / --*  &lclVar   byref  V00 loc0
-                    //    N003(6, 5)[000052] * --XG------ - / --*  indir     int
-                    //    N004(3, 2)[000046] ------ - N---- + --*  &lclVar   byref  V02 tmp0
-                    //    (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --*  storeIndir int
-                    //    N009(3, 4)[000054] ------ - N---- | / --*  lclFld    int    V02 tmp0[+0](last use)
-                    //    (3, 4)[000071] ------------arg0 in rdi + --*  putarg_reg int    RV
-                    //    N011(33, 21)[000018] --CXG------ - *call      void   Test.Foo.test1
-                    //
-                    // clang-format on
-
-                    putArg = comp->gtNewPutArgReg(type, arg, info->regNum);
-                }
-                else if (info->structDesc.eightByteCount == 2)
-                {
-                    // clang-format off
-                    // Case 2 above: Convert the LCL_FLDs to PUTARG_REG
-                    //
-                    // lowering call :
-                    //     N001(3, 2)  [000025] ------ - N----Source / --*  &lclVar   byref  V01 loc1
-                    //     N003(3, 2)  [000056] ------ - N----Destination + --*  &lclVar   byref  V03 tmp1
-                    //     N006(1, 1)  [000058] ------------ + --*  const     int    16
-                    //     N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --*  copyBlk   void
-                    //     N009(3, 4)  [000061] ------ - N----arg0 in rdi + --*  lclFld    long   V03 tmp1[+0]
-                    //     N010(3, 4)  [000063] ------------arg0 in rsi + --*  lclFld    long   V03 tmp1[+8](last use)
-                    //     N014(40, 31)[000026] --CXG------ - *call      void   Test.Foo.test2
-                    //
-                    // args :
-                    //     lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk   void
-                    //
-                    // late :
-                    //     lowering arg : N012(11, 13)[000065] ------------             *  <list>    struct
-                    //
-                    // after :
-                    //     N001(3, 2)[000025] ------ - N----Source / --*  &lclVar   byref  V01 loc1
-                    //     N003(3, 2)[000056] ------ - N----Destination + --*  &lclVar   byref  V03 tmp1
-                    //     N006(1, 1)[000058] ------------ + --*  const     int    16
-                    //     N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --*  copyBlk   void
-                    //     N009(3, 4)[000061] ------ - N---- | / --*  lclFld    long   V03 tmp1[+0]
-                    //     (3, 4)[000072] ------------arg0 in rdi + --*  putarg_reg long
-                    //     N010(3, 4)[000063] ------------ | / --*  lclFld    long   V03 tmp1[+8](last use)
-                    //     (3, 4)[000073] ------------arg0 in rsi + --*  putarg_reg long
-                    //     N014(40, 31)[000026] --CXG------ - *call      void   Test.Foo.test2
-                    //
-                    // clang-format on
-
-                    assert(arg->OperGet() == GT_FIELD_LIST);
-
-                    GenTreeFieldList* fieldListPtr = arg->AsFieldList();
-                    assert(fieldListPtr->IsFieldListHead());
-
-                    for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++)
-                    {
-                        // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD.
-                        GenTree* newOper = comp->gtNewPutArgReg(
-                            comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr],
-                                                                    info->structDesc.eightByteSizes[ctr]),
-                            fieldListPtr->gtOp.gtOp1, (ctr == 0) ? info->regNum : info->otherRegNum);
-
-                        // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST
-                        ReplaceArgWithPutArgOrBitcast(&fieldListPtr->gtOp.gtOp1, newOper);
-
-                        // Initialize all the gtRegNum's since the list won't be traversed in an LIR traversal.
-                        fieldListPtr->gtRegNum = REG_NA;
-                    }
-
-                    // Just return arg. The GT_FIELD_LIST is not replaced.
-                    // Nothing more to do.
-                    return arg;
-                }
-                else
-                {
-                    assert(false && "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes
-                                                                                            // for the CLR.
-                }
-            }
-            else
-#else // not defined(UNIX_AMD64_ABI)
 #if FEATURE_MULTIREG_ARGS
             if ((info->numRegs > 1) && (arg->OperGet() == GT_FIELD_LIST))
             {
                 assert(arg->OperGet() == GT_FIELD_LIST);
 
-                GenTreeFieldList* fieldListPtr = arg->AsFieldList();
-                assert(fieldListPtr->IsFieldListHead());
-
-                // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
-                regNumber argReg = info->regNum;
-                for (unsigned ctr = 0; fieldListPtr != nullptr; fieldListPtr = fieldListPtr->Rest(), ctr++)
+                assert(arg->AsFieldList()->IsFieldListHead());
+                unsigned int regIndex = 0;
+                for (GenTreeFieldList* fieldListPtr = arg->AsFieldList(); fieldListPtr != nullptr;
+                     fieldListPtr                   = fieldListPtr->Rest())
                 {
+                    regNumber argReg = info->getRegNum(regIndex);
                     GenTree*  curOp  = fieldListPtr->gtOp.gtOp1;
                     var_types curTyp = curOp->TypeGet();
 
@@ -1234,17 +1116,8 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
 
                     // Splice in the new GT_PUTARG_REG node in the GT_FIELD_LIST
                     ReplaceArgWithPutArgOrBitcast(&fieldListPtr->gtOp.gtOp1, newOper);
+                    regIndex++;
 
-                    // Update argReg for the next putarg_reg (if any)
-                    argReg = genRegArgNext(argReg);
-
-#if defined(_TARGET_ARM_)
-                    // A double register is modelled as an even-numbered single one
-                    if (fieldListPtr->Current()->TypeGet() == TYP_DOUBLE)
-                    {
-                        argReg = genRegArgNext(argReg);
-                    }
-#endif // _TARGET_ARM_
                     // Initialize all the gtRegNum's since the list won't be traversed in an LIR traversal.
                     fieldListPtr->gtRegNum = REG_NA;
                 }
@@ -1255,7 +1128,6 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
             }
             else
 #endif // FEATURE_MULTIREG_ARGS
-#endif // not defined(UNIX_AMD64_ABI)
             {
                 putArg = comp->gtNewPutArgReg(type, arg, info->regNum);
             }
@@ -1270,7 +1142,7 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
             // a result. So the type of its operand must be the correct type to push on the stack.
             // For a FIELD_LIST, this will be the type of the field (not the type of the arg),
             // but otherwise it is generally the type of the operand.
-            PUT_STRUCT_ARG_STK_ONLY(assert(info->isStruct == varTypeIsStruct(type)));
+            info->checkIsStruct();
             if ((arg->OperGet() != GT_FIELD_LIST))
             {
 #if defined(FEATURE_SIMD) && defined(FEATURE_PUT_STRUCT_ARG_STK)
@@ -1300,13 +1172,13 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
             // pair copying using XMM registers or rep mov instructions.
             if (info->isStruct)
             {
-                // We use GT_OBJ for non-SIMD struct arguments. However, for
-                // SIMD arguments the GT_OBJ has already been transformed.
-                if (arg->gtOper != GT_OBJ)
+                // We use GT_OBJ only for non-lclVar, non-SIMD, non-FIELD_LIST struct arguments.
+                if (arg->OperIsLocal())
                 {
-                    assert(varTypeIsSIMD(arg));
+                    // This must have a type with a known size (SIMD or has been morphed to a primitive type).
+                    assert(arg->TypeGet() != TYP_STRUCT);
                 }
-                else
+                else if (arg->OperIs(GT_OBJ))
                 {
                     unsigned numRefs  = 0;
                     BYTE*    gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots];
@@ -1351,6 +1223,10 @@ GenTree* Lowering::NewPutArg(GenTreeCall* call, GenTree* arg, fgArgTabEntry* inf
                     }
 #endif // _TARGET_X86_
                 }
+                else if (!arg->OperIs(GT_FIELD_LIST))
+                {
+                    assert(varTypeIsSIMD(arg) || (info->numSlots == 1));
+                }
             }
 #endif // FEATURE_PUT_STRUCT_ARG_STK
         }
index 0afe0e2..e0886eb 100644 (file)
@@ -434,8 +434,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
                     // We will generate all of the code for the GT_PUTARG_STK and its child node
                     // as one contained operation
                     //
-                    BuildUse(objChild);
-                    srcCount = 1;
+                    srcCount = BuildOperandUses(objChild);
                 }
             }
             else
index ffacd7a..71f2f5c 100644 (file)
@@ -3067,16 +3067,40 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node)
     assert(node->OperIsPutArgReg());
     regNumber argReg = node->gtRegNum;
     assert(argReg != REG_NA);
-    bool isSpecialPutArg = false;
-    int  srcCount        = 1;
+    bool     isSpecialPutArg = false;
+    int      srcCount        = 1;
+    GenTree* op1             = node->gtGetOp1();
 
-    // Set the register requirements for the node.
-    regMaskTP argMask = genRegMask(argReg);
+    // First, handle the GT_OBJ case, which loads into the arg register
+    // (so we don't set the use to prefer that register for the source address).
+    if (op1->OperIs(GT_OBJ))
+    {
+        GenTreeObj* obj  = op1->AsObj();
+        GenTree*    addr = obj->Addr();
+        unsigned    size = obj->gtBlkSize;
+        assert(size <= TARGET_POINTER_SIZE);
+        if (addr->OperIsLocalAddr())
+        {
+            // We don't need a source register.
+            assert(addr->isContained());
+            srcCount = 0;
+        }
+        else if (!isPow2(size))
+        {
+            // We'll need an internal register to do the odd-size load.
+            // This can only happen with integer registers.
+            assert(genIsValidIntReg(argReg));
+            buildInternalIntRegisterDefForNode(node);
+            BuildUse(addr);
+            buildInternalRegisterUses();
+        }
+        return srcCount;
+    }
 
     // To avoid redundant moves, have the argument operand computed in the
     // register in which the argument is passed to the call.
-    GenTree*     op1 = node->gtOp1;
-    RefPosition* use = BuildUse(op1, argMask);
+    regMaskTP    argMask = genRegMask(argReg);
+    RefPosition* use     = BuildUse(op1, argMask);
 
     if (supportsSpecialPutArg() && isCandidateLocalRef(op1) && ((op1->gtFlags & GTF_VAR_DEATH) == 0))
     {
index aa4640b..5383efa 100644 (file)
@@ -1460,7 +1460,6 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
     {
         assert(putArgStk->gtOp1->isContained());
 
-#ifdef _TARGET_X86_
         RefPosition* simdTemp   = nullptr;
         RefPosition* intTemp    = nullptr;
         unsigned     prevOffset = putArgStk->getArgSize();
@@ -1471,7 +1470,10 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
             GenTree* const  fieldNode   = current->Current();
             const var_types fieldType   = fieldNode->TypeGet();
             const unsigned  fieldOffset = current->gtFieldOffset;
+
+#ifdef _TARGET_X86_
             assert(fieldType != TYP_LONG);
+#endif // _TARGET_X86_
 
 #if defined(FEATURE_SIMD)
             // Note that we need to check the GT_FIELD_LIST type, not 'fieldType'. This is because the
@@ -1483,6 +1485,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
             }
 #endif // defined(FEATURE_SIMD)
 
+#ifdef _TARGET_X86_
             if (putArgStk->gtPutArgStkKind == GenTreePutArgStk::Kind::Push)
             {
                 // We can treat as a slot any field that is stored at a slot boundary, where the previous
@@ -1501,6 +1504,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
                     intTemp->registerAssignment &= allByteRegs();
                 }
             }
+#endif // _TARGET_X86_
 
             if (varTypeIsGC(fieldType))
             {
@@ -1508,6 +1512,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
             }
             prevOffset = fieldOffset;
         }
+
         for (GenTreeFieldList* current = putArgStk->gtOp1->AsFieldList(); current != nullptr; current = current->Rest())
         {
             GenTree* const fieldNode = current->Current();
@@ -1520,7 +1525,6 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
         buildInternalRegisterUses();
 
         return srcCount;
-#endif // _TARGET_X86_
     }
 
     GenTree*  src  = putArgStk->gtOp1;
index c4d6bee..bc5b6f9 100644 (file)
@@ -832,19 +832,10 @@ void fgArgTabEntry::Dump()
     if (regNum != REG_STK)
     {
         printf(", %u reg%s:", numRegs, numRegs == 1 ? "" : "s");
-        printf(" %s", getRegName(regNum));
-#if defined(UNIX_AMD64_ABI)
-        if (numRegs > 1)
-        {
-            printf(" %s", getRegName(otherRegNum));
-        }
-#else  // !UNIX_AMD64_ABI
-        // Note that for all other targets, we rely on the fact that arg regs are sequential.
-        for (unsigned i = 1; i < numRegs; i++)
+        for (unsigned i = 0; i < numRegs; i++)
         {
-            printf(" %s", getRegName((regNumber)(regNum + i)));
+            printf(" %s", getRegName(regNums[i]));
         }
-#endif // !UNIX_AMD64_ABI
     }
     if (numSlots > 0)
     {
@@ -887,6 +878,10 @@ void fgArgTabEntry::Dump()
     {
         printf(", isNonStandard");
     }
+    if (isStruct)
+    {
+        printf(", isStruct");
+    }
     printf("]\n");
 }
 #endif
@@ -1126,29 +1121,42 @@ void fgArgInfo::AddArg(fgArgTabEntry* curArgTabEntry)
     argCount++;
 }
 
-fgArgTabEntry* fgArgInfo::AddRegArg(
-    unsigned argNum, GenTree* node, GenTree* parent, regNumber regNum, unsigned numRegs, unsigned alignment)
+fgArgTabEntry* fgArgInfo::AddRegArg(unsigned  argNum,
+                                    GenTree*  node,
+                                    GenTree*  parent,
+                                    regNumber regNum,
+                                    unsigned  numRegs,
+                                    unsigned  alignment,
+                                    bool      isStruct)
 {
     fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
 
-    curArgTabEntry->argNum        = argNum;
-    curArgTabEntry->node          = node;
-    curArgTabEntry->parent        = parent;
-    curArgTabEntry->regNum        = regNum;
-    curArgTabEntry->slotNum       = 0;
-    curArgTabEntry->numRegs       = numRegs;
-    curArgTabEntry->numSlots      = 0;
-    curArgTabEntry->alignment     = alignment;
-    curArgTabEntry->lateArgInx    = (unsigned)-1;
-    curArgTabEntry->tmpNum        = (unsigned)-1;
-    curArgTabEntry->isSplit       = false;
-    curArgTabEntry->isTmp         = false;
-    curArgTabEntry->needTmp       = false;
-    curArgTabEntry->needPlace     = false;
-    curArgTabEntry->processed     = false;
-    curArgTabEntry->isHfaRegArg   = false;
+    // Any additional register numbers are set by the caller.
+    // This is primarily because on ARM we don't yet know if it
+    // will be split or if it is a double HFA, so the number of registers
+    // may actually be less.
+    curArgTabEntry->setRegNum(0, regNum);
+
+    curArgTabEntry->argNum     = argNum;
+    curArgTabEntry->node       = node;
+    curArgTabEntry->parent     = parent;
+    curArgTabEntry->slotNum    = 0;
+    curArgTabEntry->numRegs    = numRegs;
+    curArgTabEntry->numSlots   = 0;
+    curArgTabEntry->alignment  = alignment;
+    curArgTabEntry->lateArgInx = (unsigned)-1;
+    curArgTabEntry->tmpNum     = (unsigned)-1;
+    curArgTabEntry->isSplit    = false;
+    curArgTabEntry->isTmp      = false;
+    curArgTabEntry->needTmp    = false;
+    curArgTabEntry->needPlace  = false;
+    curArgTabEntry->processed  = false;
+#ifdef FEATURE_HFA
+    curArgTabEntry->_isHfaRegArg = false;
+#endif
     curArgTabEntry->isBackFilled  = false;
     curArgTabEntry->isNonStandard = false;
+    curArgTabEntry->isStruct      = isStruct;
 
     hasRegArgs = true;
     AddArg(curArgTabEntry);
@@ -1166,16 +1174,18 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned
                                     const regNumber                                                  otherRegNum,
                                     const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
 {
-    fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
+    fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment, isStruct);
     assert(curArgTabEntry != nullptr);
 
-    // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
-    // PlaceHolder node (in case of needed late argument, for example.)
-    // This requires using of an extra flag. At creation time the state is right, so
-    // and this assert enforces that.
-    assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
-    curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
-    curArgTabEntry->isStruct    = isStruct;    // is this a struct arg
+    curArgTabEntry->isStruct = isStruct; // is this a struct arg
+
+    curArgTabEntry->checkIsStruct();
+    assert(numRegs <= 2);
+    if (numRegs == 2)
+    {
+        curArgTabEntry->setRegNum(1, otherRegNum);
+    }
+    curArgTabEntry->isStruct = isStruct; // is this a struct arg
 
     if (isStruct && structDescPtr != nullptr)
     {
@@ -1186,43 +1196,34 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned
 }
 #endif // defined(UNIX_AMD64_ABI)
 
-fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum,
-                                    GenTree* node,
-                                    GenTree* parent,
-                                    unsigned numSlots,
-                                    unsigned alignment UNIX_AMD64_ABI_ONLY_ARG(const bool isStruct))
+fgArgTabEntry* fgArgInfo::AddStkArg(
+    unsigned argNum, GenTree* node, GenTree* parent, unsigned numSlots, unsigned alignment, bool isStruct)
 {
     fgArgTabEntry* curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
 
     nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
 
-#if defined(UNIX_AMD64_ABI)
-    // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
-    // PlaceHolder node (in case of needed late argument, for example.)
-    // This reqires using of an extra flag. At creation time the state is right, so
-    // and this assert enforces that.
-    assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
-    curArgTabEntry->isStruct = isStruct; // is this a struct arg
-#endif                                   // defined(UNIX_AMD64_ABI)
-
-    curArgTabEntry->argNum        = argNum;
-    curArgTabEntry->node          = node;
-    curArgTabEntry->parent        = parent;
-    curArgTabEntry->regNum        = REG_STK;
-    curArgTabEntry->slotNum       = nextSlotNum;
-    curArgTabEntry->numRegs       = 0;
-    curArgTabEntry->numSlots      = numSlots;
-    curArgTabEntry->alignment     = alignment;
-    curArgTabEntry->lateArgInx    = (unsigned)-1;
-    curArgTabEntry->tmpNum        = (unsigned)-1;
-    curArgTabEntry->isSplit       = false;
-    curArgTabEntry->isTmp         = false;
-    curArgTabEntry->needTmp       = false;
-    curArgTabEntry->needPlace     = false;
-    curArgTabEntry->processed     = false;
-    curArgTabEntry->isHfaRegArg   = false;
+    curArgTabEntry->setRegNum(0, REG_STK);
+    curArgTabEntry->argNum     = argNum;
+    curArgTabEntry->node       = node;
+    curArgTabEntry->parent     = parent;
+    curArgTabEntry->slotNum    = nextSlotNum;
+    curArgTabEntry->numRegs    = 0;
+    curArgTabEntry->numSlots   = numSlots;
+    curArgTabEntry->alignment  = alignment;
+    curArgTabEntry->lateArgInx = (unsigned)-1;
+    curArgTabEntry->tmpNum     = (unsigned)-1;
+    curArgTabEntry->isSplit    = false;
+    curArgTabEntry->isTmp      = false;
+    curArgTabEntry->needTmp    = false;
+    curArgTabEntry->needPlace  = false;
+    curArgTabEntry->processed  = false;
+#ifdef FEATURE_HFA
+    curArgTabEntry->_isHfaRegArg = false;
+#endif
     curArgTabEntry->isBackFilled  = false;
     curArgTabEntry->isNonStandard = false;
+    curArgTabEntry->isStruct      = isStruct;
 
     hasStackArgs = true;
     AddArg(curArgTabEntry);
@@ -2098,9 +2099,10 @@ void fgArgInfo::Dump(Compiler* compiler)
 // Return Value:
 //    the newly created temp var tree.
 
-GenTree* Compiler::fgMakeTmpArgNode(unsigned tmpVarNum UNIX_AMD64_ABI_ONLY_ARG(const bool passedInRegisters))
+GenTree* Compiler::fgMakeTmpArgNode(fgArgTabEntry* curArgTabEntry)
 {
-    LclVarDsc* varDsc = &lvaTable[tmpVarNum];
+    unsigned   tmpVarNum = curArgTabEntry->tmpNum;
+    LclVarDsc* varDsc    = &lvaTable[tmpVarNum];
     assert(varDsc->lvIsTemp);
     var_types type = varDsc->TypeGet();
 
@@ -2113,43 +2115,53 @@ GenTree* Compiler::fgMakeTmpArgNode(unsigned tmpVarNum UNIX_AMD64_ABI_ONLY_ARG(c
 
 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || defined(_TARGET_ARM_)
 
-#ifdef UNIX_AMD64_ABI
-
-        arg->gtFlags |= GTF_DONT_CSE;
-
-#else  // !UNIX_AMD64_ABI
-        // Can this type be passed in a single register?
+        // Can this type be passed as a primitive type?
         // If so, the following call will return the corresponding primitive type.
-        // Otherwise, it will return TYP_UNKNOWN and we will pass by reference.
-
-        bool                 passedInRegisters = false;
-        CORINFO_CLASS_HANDLE clsHnd            = varDsc->lvVerTypeInfo.GetClassHandle();
-        var_types            structBaseType    = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
+        // Otherwise, it will return TYP_UNKNOWN and we will pass it as a struct type.
 
-        if (structBaseType != TYP_UNKNOWN)
+        bool passedAsPrimitive = false;
+        if (curArgTabEntry->isSingleRegOrSlot())
         {
-            passedInRegisters = true;
-            type              = structBaseType;
+            CORINFO_CLASS_HANDLE clsHnd         = varDsc->lvVerTypeInfo.GetClassHandle();
+            var_types            structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
+
+            if (structBaseType != TYP_UNKNOWN)
+            {
+                passedAsPrimitive = true;
+#if defined(UNIX_AMD64_ABI)
+                // TODO-Cleanup: This is inelegant, but eventually we'll track this in the fgArgTabEntry,
+                // and otherwise we'd have to either modify getPrimitiveTypeForStruct() to take
+                // a structDesc or call eeGetSystemVAmd64PassStructInRegisterDescriptor yet again.
+                //
+                if (genIsValidFloatReg(curArgTabEntry->regNum))
+                {
+                    if (structBaseType == TYP_INT)
+                    {
+                        structBaseType = TYP_FLOAT;
+                    }
+                    else
+                    {
+                        assert(structBaseType == TYP_LONG);
+                        structBaseType = TYP_DOUBLE;
+                    }
+                }
+#endif
+                type = structBaseType;
+            }
         }
-#endif // !UNIX_AMD64_ABI
 
         // If it is passed in registers, don't get the address of the var. Make it a
         // field instead. It will be loaded in registers with putarg_reg tree in lower.
-        if (passedInRegisters)
+        if (passedAsPrimitive)
         {
             arg->ChangeOper(GT_LCL_FLD);
             arg->gtType = type;
         }
         else
         {
-#ifdef UNIX_AMD64_ABI
-            // TODO-Cleanup: Fix this - we should never have an address that is TYP_STRUCT.
-            var_types addrType = type;
-#else
             var_types addrType = TYP_BYREF;
-#endif
-            arg      = gtNewOperNode(GT_ADDR, addrType, arg);
-            addrNode = arg;
+            arg                = gtNewOperNode(GT_ADDR, addrType, arg);
+            addrNode           = arg;
 
 #if FEATURE_MULTIREG_ARGS
 #ifdef _TARGET_ARM64_
@@ -2170,11 +2182,11 @@ GenTree* Compiler::fgMakeTmpArgNode(unsigned tmpVarNum UNIX_AMD64_ABI_ONLY_ARG(c
                 // values can be pessimizing, so enabling this may require some additional tuning).
                 arg->gtFlags |= GTF_DONT_CSE;
             }
-#elif defined(_TARGET_ARM_)
+#else
             // Always create an Obj of the temp to use it as a call argument.
             arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
             arg->gtFlags |= GTF_DONT_CSE;
-#endif // _TARGET_ARM_
+#endif // !_TARGET_ARM64_
 #endif // FEATURE_MULTIREG_ARGS
         }
 
@@ -2241,9 +2253,7 @@ void fgArgInfo::EvalArgsToTemps()
             if (curArgTabEntry->isTmp == true)
             {
                 // Create a copy of the temp to go into the late argument list
-                tmpVarNum = curArgTabEntry->tmpNum;
-                defArg    = compiler->fgMakeTmpArgNode(
-                    tmpVarNum UNIX_AMD64_ABI_ONLY_ARG(argTable[curInx]->structDesc.passedInRegisters));
+                defArg = compiler->fgMakeTmpArgNode(curArgTabEntry);
 
                 // mark the original node as a late argument
                 argx->gtFlags |= GTF_LATE_ARG;
@@ -2751,13 +2761,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
     }
 #endif // _TARGET_UNIX_
 
-#ifdef UNIX_AMD64_ABI
-    // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
-    // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
-    // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
-    bool hasStackArgCopy = false;
-#endif
-
     // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
     // following the normal calling convention or in the normal argument registers. We either mark existing
     // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
@@ -3132,12 +3135,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
             assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));
 
             /* this is a register argument - put it in the table */
-            call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
-#ifdef UNIX_AMD64_ABI
-                                       ,
-                                       false, REG_STK, nullptr
-#endif // UNIX_AMD64_ABI
-                                       );
+            call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1,
+                                       false UNIX_AMD64_ABI_ONLY_ARG(REG_STK) UNIX_AMD64_ABI_ONLY_ARG(nullptr));
         }
         // this can't be a struct.
         assert(argx->gtType != TYP_STRUCT);
@@ -3244,22 +3243,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
     SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
 #endif // UNIX_AMD64_ABI
 
-    bool hasStructArgument = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
-    // hasMultiregStructArgs is true if there are any structs that are eligible for passing
-    // in registers; this is true even if it is not actually passed in registers (i.e. because
-    // previous arguments have used up available argument registers).
+    // Note that this name is a bit of a misnomer - it indicates that there are struct args
+    // that occupy more than a single slot that are passed by value (not necessarily in regs).
     bool hasMultiregStructArgs = false;
     for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
     {
         GenTree** parentArgx = &args->gtOp.gtOp1;
 
-#if FEATURE_MULTIREG_ARGS
-        if (!hasStructArgument)
-        {
-            hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
-        }
-#endif // FEATURE_MULTIREG_ARGS
-
         // Record the index of any nonStandard arg that we may be processing here, as we are
         // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
         GenTree* orig_argx         = *parentArgx;
@@ -3286,25 +3276,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
             argx->gtType = TYP_I_IMPL;
         }
 
-        bool     passUsingFloatRegs;
-        unsigned argAlign = 1;
-        // Setup any HFA information about 'argx'
-        var_types hfaType  = GetHfaType(argx);
-        bool      isHfaArg = varTypeIsFloating(hfaType);
-        unsigned  hfaSlots = 0;
-
-        if (isHfaArg)
-        {
-            hfaSlots = GetHfaCount(argx);
-
-            // If we have a HFA struct it's possible we transition from a method that originally
-            // only had integer types to now start having FP types.  We have to communicate this
-            // through this flag since LSRA later on will use this flag to determine whether
-            // or not to track the FP register set.
-            //
-            compFloatingPointUsed = true;
-        }
-
+        bool                 passUsingFloatRegs;
+        unsigned             argAlign      = 1;
         unsigned             size          = 0;
         CORINFO_CLASS_HANDLE copyBlkClass  = nullptr;
         bool                 isRegArg      = false;
@@ -3318,8 +3291,40 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
             argEntry = gtArgEntryByArgNum(call, argIndex);
         }
 
-#ifdef _TARGET_ARM_
+        // Setup any HFA information about 'argx'
+        var_types hfaType  = TYP_UNDEF;
+        bool      isHfaArg = false;
+        unsigned  hfaSlots = 0;
+
+#ifdef FEATURE_HFA
+        if (reMorphing)
+        {
+            isHfaArg = argEntry->isHfaRegArg;
+            hfaType  = argEntry->hfaType;
+            hfaSlots = argEntry->numRegs;
+        }
+        else
+        {
+            hfaType = GetHfaType(argx);
+            if (varTypeIsFloating(hfaType))
+            {
+                isHfaArg = true;
+                hfaSlots = GetHfaCount(argx);
+            }
+        }
 
+        if (isHfaArg)
+        {
+            // If we have a HFA struct it's possible we transition from a method that originally
+            // only had integer types to now start having FP types.  We have to communicate this
+            // through this flag since LSRA later on will use this flag to determine whether
+            // or not to track the FP register set.
+            //
+            compFloatingPointUsed = true;
+        }
+#endif // FEATURE_HFA
+
+#ifdef _TARGET_ARM_
         bool passUsingIntRegs;
         if (reMorphing)
         {
@@ -3410,28 +3415,23 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
         var_types structBaseType   = TYP_STRUCT;
         unsigned  structSize       = 0;
 
-        bool isStructArg = varTypeIsStruct(argx);
+        bool isStructArg;
 
         if (reMorphing)
         {
-#if defined(UNIX_AMD64_ABI)
-            // Get the struct description for the already completed struct argument.
-            fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, argx);
-            assert(fgEntryPtr != nullptr);
-
-            // As described in few other places, this can happen when the argx was morphed
-            // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
+            assert(argEntry != nullptr);
+            // Struct arguments may be morphed into a node that is not a struct type.
             // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
             // was a struct and the struct classification.
-            isStructArg = fgEntryPtr->isStruct;
+            isStructArg = argEntry->isStruct;
 
+#if defined(UNIX_AMD64_ABI)
             if (isStructArg)
             {
-                structDesc.CopyFrom(fgEntryPtr->structDesc);
+                structDesc.CopyFrom(argEntry->structDesc);
             }
 #endif // defined(UNIX_AMD64_ABI)
 
-            assert(argEntry != nullptr);
             if (argEntry->IsBackFilled())
             {
                 isRegArg         = true;
@@ -3451,6 +3451,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                 isRegArg = true;
                 assert(argEntry->numRegs > 0);
                 size = argEntry->numRegs + argEntry->numSlots;
+#ifdef _TARGET_ARM_
+                if (argEntry->isHfaRegArg && (hfaType == TYP_DOUBLE))
+                {
+                    assert(!argEntry->isSplit);
+                    size <<= 1;
+                }
+#endif // _TARGET_ARM_
             }
 
             // This size has now been computed
@@ -3465,6 +3472,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
             // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
             // the stack.
             //
+            isStructArg = varTypeIsStruct(argx);
             if (argx->IsArgPlaceHolderNode() || (!isStructArg))
             {
 #if defined(_TARGET_AMD64_)
@@ -3485,7 +3493,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                     }
                 }
 #else // !UNIX_AMD64_ABI
-                size           = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
+                size       = 1; // On AMD64 Windows, all primitives fit in a single (64-bit) 'slot'
 #endif // UNIX_AMD64_ABI
 #elif defined(_TARGET_ARM64_)
                 if (isStructArg)
@@ -3569,7 +3577,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                         size = 1;
                     }
 #else
-                    size                 = 2;
+                    size = 2;
 #endif
                 }
                 else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
@@ -3607,125 +3615,75 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                     structPassingKind howToPassStruct;
                     structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, originalSize);
 
-#ifdef _TARGET_ARM64_
+#if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
+                    // For ARM64 or AMD64/UX we can pass non-power-of-2 structs in a register.
                     if ((howToPassStruct == SPK_PrimitiveType) && // Passed in a single register
                         !isPow2(originalSize))                    // size is 3,5,6 or 7 bytes
                     {
-                        if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
-                        {
-                            // For ARM64 we pass structs that are 3,5,6,7 bytes in size
-                            // we can read 4 or 8 bytes from the LclVar to pass this arg
-                            originalSize = genTypeSize(structBaseType);
-                        }
+                        originalSize = genTypeSize(structBaseType);
                     }
-#endif //  _TARGET_ARM64_
+#endif //  _TARGET_ARM64_ || UNIX_AMD64_ABI
 
-#ifdef UNIX_AMD64_ABI
-                    // On System V OS-es a struct is never passed by reference.
-                    // It is either passed by value on the stack or in registers.
-                    bool passStructInRegisters = false;
-#else  // !UNIX_AMD64_ABI
                     bool passStructByRef = false;
-#endif // !UNIX_AMD64_ABI
 
-                    // The following if-then-else needs to be carefully refactored.
-                    // Basically the else portion wants to turn a struct load (a GT_OBJ)
-                    // into a GT_IND of the appropriate size.
-                    // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
-                    // It can't do this when UNIX_AMD64_ABI is defined  (Why?)
-                    // TODO-Cleanup: Remove the #ifndef UNIX_AMD64_ABI below.
-                    // It also can't do this if we have a HFA arg,
-                    // unless we have a 1-elem HFA in which case we want to do the optimization.
+                    // Check to see if we can transform this struct load (GT_OBJ) into a GT_IND of the appropriate size.
+                    // That is the else clause of the if statement below.
+                    // When it can do this is platform-dependent:
+                    // - In general, it can be done for power of 2 structs that fit in a single register
+                    //   (or, for ARM64 and AMD64/UX, lclVars that are less than pointer size, see above).
+                    // - For ARM and ARM64 it must also be a non-HFA struct, or have a single field.
+                    // - This is irrelevant for X86, since structs are always passed by value on the stack.
+                    // Note that 'howToPassStruct' captures all but the power-of-2 requirement.
                     CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifndef _TARGET_X86_
-#ifndef UNIX_AMD64_ABI
                     // Check for struct argument with size 1, 2, 4 or 8 bytes
                     // As we can optimize these by turning them into a GT_IND of the correct type
                     //
                     // Check for cases that we cannot optimize:
-                    CLANG_FORMAT_COMMENT_ANCHOR;
-#ifdef _TARGET_ARM_
-                    if (((originalSize > TARGET_POINTER_SIZE) &&  // it is struct that is larger than a pointer
-                         howToPassStruct != SPK_PrimitiveType) || // it is struct that is not one double HFA
-                        !isPow2(originalSize) ||                  // it is not a power of two (1, 2, 4 or 8)
-                        (isHfaArg && (howToPassStruct != SPK_PrimitiveType))) // it is a one element HFA struct
-#else                                                                         // !_TARGET_ARM_
-                    if ((originalSize > TARGET_POINTER_SIZE) || // it is struct that is larger than a pointer
-                        !isPow2(originalSize) ||                // it is not a power of two (1, 2, 4 or 8)
-                        (isHfaArg && (hfaSlots != 1)))          // it is a one element HFA struct
-#endif                                                                        // !_TARGET_ARM_
-#endif                                                                        // UNIX_AMD64_ABI
+                    bool canTransformToInd = (howToPassStruct == SPK_PrimitiveType) && isPow2(originalSize);
+                    if (!canTransformToInd)
                     {
+                        GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj);
                         // Normalize 'size' to the number of pointer sized items
                         // 'size' is the number of register slots that we will use to pass the argument
                         size = roundupSize / TARGET_POINTER_SIZE;
 #if defined(_TARGET_AMD64_)
 #ifndef UNIX_AMD64_ABI
+                        // On Windows structs are always copied and passed by reference unless they are
+                        // passed by value in a single register.
                         size            = 1; // This must be copied to a temp and passed by address
                         passStructByRef = true;
                         copyBlkClass    = objClass;
-#else // UNIX_AMD64_ABI
-                        if (!structDesc.passedInRegisters)
+#else  // UNIX_AMD64_ABI
+                        // On Unix, structs are always passed by value.
+                        // We only need a copy if we have one of the following:
+                        // - We have a lclVar that has been promoted and is passed in registers.
+                        // - The sizes don't match.
+                        // - We have a vector intrinsic.
+                        // TODO-Amd64-Unix-CQ: The first and last case could and should be handled without copies.
+
+                        copyBlkClass = NO_CLASS_HANDLE;
+                        if (structDesc.passedInRegisters)
                         {
-                            GenTree* lclVar     = fgIsIndirOfAddrOfLocal(argObj);
-                            bool     needCpyBlk = false;
-                            if (lclVar != nullptr)
+                            if ((lclVar != nullptr) &&
+                                (lvaGetPromotionType(lclVar->gtLclVarCommon.gtLclNum) == PROMOTION_TYPE_INDEPENDENT))
                             {
-                                // If the struct is promoted to registers, it has to be materialized
-                                // on stack. We may want to support promoted structures in
-                                // codegening pugarg_stk instead of creating a copy here.
-                                LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
-                                needCpyBlk        = varDsc->lvPromoted;
-                            }
-                            else
-                            {
-                                // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
-                                // sets structDesc.passedInRegisters to be false.
-                                //
-                                // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
-                                // by rationalizer. For now we will let SIMD struct arg to be copied to
-                                // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
-                                //
-                                // +--*  obj       simd16
-                                // |  \--*  addr      byref
-                                // |     |  /--*  lclVar    simd16 V05 loc4
-                                // |     \--*  simd      simd16 int -
-                                // |        \--*  lclVar    simd16 V08 tmp1
-                                //
-                                // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
-                                // so that we don't need to generate a copy here.
-                                GenTree* addr = argObj->gtOp.gtOp1;
-                                if (addr->OperGet() == GT_ADDR)
-                                {
-                                    GenTree* addrChild = addr->gtOp.gtOp1;
-                                    if (addrChild->OperIsSIMDorSimdHWintrinsic())
-                                    {
-                                        needCpyBlk = true;
-                                    }
-                                }
+                                copyBlkClass = objClass;
                             }
-                            passStructInRegisters = false;
-                            if (needCpyBlk)
+                            else if (originalSize != structSize)
                             {
                                 copyBlkClass = objClass;
                             }
                             else
                             {
-                                copyBlkClass = NO_CLASS_HANDLE;
+                                GenTree* addr = argObj->gtGetOp1();
+                                if (addr->OperIs(GT_ADDR) && addr->gtGetOp1()->OperIs(GT_SIMD, GT_HWIntrinsic))
+                                {
+                                    copyBlkClass = objClass;
+                                }
                             }
                         }
-                        else
-                        {
-                            // The objClass is used to materialize the struct on stack.
-                            // For SystemV, the code below generates copies for struct arguments classified
-                            // as register argument.
-                            // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
-                            // can be passed on registers or can be copied directly to outgoing area.
-                            passStructInRegisters = true;
-                            copyBlkClass          = objClass;
-                        }
-
 #endif // UNIX_AMD64_ABI
 #elif defined(_TARGET_ARM64_)
                         if ((size > 2) && !isHfaArg)
@@ -3734,13 +3692,16 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                             passStructByRef = true;
                             copyBlkClass    = objClass;
                         }
+                        else if ((originalSize != structSize) && (lclVar == nullptr))
+                        {
+                            copyBlkClass = objClass;
+                        }
 #endif
 
 #ifdef _TARGET_ARM_
                         // If we're passing a promoted struct local var,
                         // we may need to skip some registers due to alignment; record those.
-                        GenTree* lclVar = fgIsIndirOfAddrOfLocal(argObj);
-                        if (lclVar != NULL)
+                        if (lclVar != nullptr)
                         {
                             LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
                             if (varDsc->lvPromoted)
@@ -3760,9 +3721,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                         }
 #endif // _TARGET_ARM_
                     }
-#ifndef UNIX_AMD64_ABI
-                    // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
-                    // generated for struct 1, 2, 4, or 8.
                     else // We have a struct argument with size 1, 2, 4 or 8 bytes
                     {
                         // change our GT_OBJ into a GT_IND of the correct type.
@@ -3770,23 +3728,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                         // size.
 
                         assert(howToPassStruct == SPK_PrimitiveType);
-
-                        // ToDo: remove this block as getArgTypeForStruct properly handles turning one element HFAs into
-                        // primitives
-                        if (isHfaArg)
-                        {
-#ifdef _TARGET_ARM_
-                            // If we reach here with an HFA arg it has to be a one element HFA
-                            // If HFA type is double and it has one element, hfaSlot is 2
-                            assert(hfaSlots == 1 || (hfaSlots == 2 && hfaType == TYP_DOUBLE));
-#else
-                            // If we reach here with an HFA arg it has to be a one element HFA
-                            assert(hfaSlots == 1);
-#endif
-                            structBaseType = hfaType; // change the indirection type to a floating point type
-                        }
-
                         noway_assert(structBaseType != TYP_UNKNOWN);
+                        assert(originalSize == genTypeSize(structBaseType));
 
                         argObj->ChangeOper(GT_IND);
 
@@ -3876,7 +3819,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                             argObj->gtType = structBaseType;
                         }
                         assert(varTypeCanReg(argObj->TypeGet()) ||
-                               ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));
+                               ((copyBlkClass != NO_CLASS_HANDLE) && varTypeCanReg(structBaseType)));
 
                         size = 1;
 #ifdef _TARGET_ARM_
@@ -3886,17 +3829,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                         }
 #endif
                     }
-#endif // UNIX_AMD64_ABI
 
 #endif // not _TARGET_X86_
+#ifndef UNIX_AMD64_ABI
                     // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
-                    if (varTypeIsStruct(structBaseType) &&
-#if defined(UNIX_AMD64_ABI)
-                        !passStructInRegisters
-#else  // !defined(UNIX_AMD64_ABI)
-                        !passStructByRef
-#endif // !defined(UNIX_AMD64_ABI)
-                        )
+                    if (varTypeIsStruct(structBaseType) && !passStructByRef)
                     {
                         if (isHfaArg && passUsingFloatRegs)
                         {
@@ -3925,6 +3862,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                             size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
                         }
                     }
+#endif // UNIX_AMD64_ABI
                 }
 
 #if defined(_TARGET_64BIT_)
@@ -4192,15 +4130,19 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
             else
             {
                 // This is a register argument - put it in the table
-                newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
-#if defined(UNIX_AMD64_ABI)
-                                                         ,
-                                                         isStructArg, nextOtherRegNum, &structDesc
-#endif // defined(UNIX_AMD64_ABI)
-                                                         );
+                newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign,
+                                                         isStructArg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum)
+                                                             UNIX_AMD64_ABI_ONLY_ARG(&structDesc));
+
+#ifdef FEATURE_HFA
+                if (!passUsingFloatRegs)
+                {
+                    // Note on ARM and ARM64 Windows, an HFA is passed in int regs for varargs
+                    hfaType = TYP_UNDEF;
+                }
+                newArgEntry->setHfaType(hfaType, hfaSlots);
+#endif // FEATURE_HFA
 
-                newArgEntry->SetIsHfaRegArg(passUsingFloatRegs &&
-                                            isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
                 newArgEntry->SetIsBackFilled(isBackFilled);
                 newArgEntry->isNonStandard = isNonStandard;
             }
@@ -4217,12 +4159,28 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
 #if defined(UNIX_AMD64_ABI)
                 if (isStructArg)
                 {
+                    // For this case, we've already set the regNums in the argTabEntry
                     intArgRegNum += structIntRegs;
                     fltArgRegNum += structFloatRegs;
                 }
                 else
 #endif // defined(UNIX_AMD64_ABI)
                 {
+#ifdef _TARGET_ARM_
+                    // Check for a split (partially enregistered) struct
+                    if (!passUsingFloatRegs && (intArgRegNum + size) > MAX_REG_ARG)
+                    {
+                        // This indicates a partial enregistration of a struct type
+                        assert((isStructArg) || argx->OperIsFieldList() || argx->OperIsCopyBlkOp() ||
+                               (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
+                        unsigned numRegsPartial = MAX_REG_ARG - intArgRegNum;
+                        assert((unsigned char)numRegsPartial == numRegsPartial);
+                        call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
+                        fgPtrArgCntCur += size - numRegsPartial;
+                    }
+#endif // _TARGET_ARM_
+
+                    newArgEntry->SetMultiRegNums();
                     if (passUsingFloatRegs)
                     {
                         fltArgRegNum += size;
@@ -4232,8 +4190,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                         // we skip the corresponding floating point register argument
                         intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
 #endif // WINDOWS_AMD64_ABI
-                        // There is no partial struct using float registers
-                        // on all supported architectures
+                        // No supported architecture supports partial structs using float registers.
                         assert(fltArgRegNum <= MAX_FLOAT_REG_ARG);
                     }
                     else
@@ -4241,22 +4198,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                         // Increment intArgRegNum by 'size' registers
                         intArgRegNum += size;
 
-#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+#ifdef WINDOWS_AMD64_ABI
                         fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
-#endif // _TARGET_AMD64_
-#ifdef _TARGET_ARM_
-                        if (intArgRegNum > MAX_REG_ARG)
-                        {
-                            // This indicates a partial enregistration of a struct type
-                            assert((isStructArg) || argx->OperIsFieldList() || argx->OperIsCopyBlkOp() ||
-                                   (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
-                            unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
-                            assert((unsigned char)numRegsPartial == numRegsPartial);
-                            call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
-                            intArgRegNum = MAX_REG_ARG;
-                            fgPtrArgCntCur += size - numRegsPartial;
-                        }
-#endif // _TARGET_ARM_
+#endif // WINDOWS_AMD64_ABI
                     }
                 }
             }
@@ -4275,18 +4219,14 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
             else
             {
                 // This is a stack argument - put it in the table
-                call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign UNIX_AMD64_ABI_ONLY_ARG(isStructArg));
+                call->fgArgInfo->AddStkArg(argIndex, argx, args, size, argAlign, isStructArg);
             }
         }
 
         if (copyBlkClass != NO_CLASS_HANDLE)
         {
             noway_assert(!reMorphing);
-            fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass UNIX_AMD64_ABI_ONLY_ARG(&structDesc));
-
-#ifdef UNIX_AMD64_ABI
-            hasStackArgCopy = true;
-#endif
+            fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass);
         }
 
         if (argx->gtOper == GT_MKREFANY)
@@ -4480,11 +4420,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
     // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
     // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
     // is added to make sure to call EvalArgsToTemp.
-    if (!reMorphing && (call->fgArgInfo->HasRegArgs()
-#ifdef UNIX_AMD64_ABI
-                        || hasStackArgCopy
-#endif // UNIX_AMD64_ABI
-                        ))
+    if (!reMorphing && (call->fgArgInfo->HasRegArgs()))
     {
         // This is the first time that we morph this call AND it has register arguments.
         // Follow into the code below and do the 'defer or eval to temp' analysis.
@@ -4500,22 +4436,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
         }
     }
 
-#ifdef UNIX_AMD64_ABI
-
-    // Rewrite the struct args to be passed by value on stack or in registers.
-    fgMorphSystemVStructArgs(call, hasStructArgument);
-
-#else // !UNIX_AMD64_ABI
-
-    // In the future we can migrate UNIX_AMD64 to use this
-    // method instead of fgMorphSystemVStructArgs
     if (hasMultiregStructArgs)
     {
         fgMorphMultiregStructArgs(call);
     }
 
-#endif // UNIX_AMD64_ABI
-
 #ifdef DEBUG
     if (verbose)
     {
@@ -4528,189 +4453,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
 #pragma warning(pop)
 #endif
 
-#ifdef UNIX_AMD64_ABI
-// fgMorphSystemVStructArgs:
-//   Rewrite the struct args to be passed by value on stack or in registers.
-//
-// args:
-//   call: The call whose arguments need to be morphed.
-//   hasStructArgument: Whether this call has struct arguments.
-//
-void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
-{
-    unsigned flagsSummary = 0;
-
-    if (hasStructArgument)
-    {
-        fgArgInfo* allArgInfo = call->fgArgInfo;
-
-        for (GenTree* args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
-        {
-            // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
-            // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
-            // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
-            // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
-            // otherwise points to the list in the late args list.
-            bool           isLateArg  = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
-            fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
-            assert(fgEntryPtr != nullptr);
-            GenTree* argx     = fgEntryPtr->node;
-            GenTree* lateList = nullptr;
-            GenTree* lateNode = nullptr;
-
-            if (isLateArg)
-            {
-                for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
-                {
-                    assert(list->OperIsList());
-
-                    GenTree* argNode = list->Current();
-                    if (argx == argNode)
-                    {
-                        lateList = list;
-                        lateNode = argNode;
-                        break;
-                    }
-                }
-                assert(lateList != nullptr && lateNode != nullptr);
-            }
-            GenTree* arg            = argx;
-            bool     argListCreated = false;
-
-            var_types type = arg->TypeGet();
-
-            if (varTypeIsStruct(type))
-            {
-                var_types originalType = type;
-                // If we have already processed the arg...
-                if (arg->OperGet() == GT_FIELD_LIST && varTypeIsStruct(arg))
-                {
-                    continue;
-                }
-
-                // If already OBJ it is set properly already.
-                if (arg->OperGet() == GT_OBJ)
-                {
-                    assert(!fgEntryPtr->structDesc.passedInRegisters);
-                    continue;
-                }
-
-                assert(arg->OperGet() == GT_LCL_VAR || arg->OperGet() == GT_LCL_FLD ||
-                       (arg->OperGet() == GT_ADDR &&
-                        (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));
-
-                GenTreeLclVarCommon* lclCommon =
-                    arg->OperGet() == GT_ADDR ? arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
-                if (fgEntryPtr->structDesc.passedInRegisters)
-                {
-                    if (fgEntryPtr->structDesc.eightByteCount == 1)
-                    {
-                        // Change the type and below the code will change the LclVar to a LCL_FLD
-                        type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
-                                                                 fgEntryPtr->structDesc.eightByteSizes[0]);
-                    }
-                    else if (fgEntryPtr->structDesc.eightByteCount == 2)
-                    {
-                        // Create LCL_FLD for each eightbyte.
-                        argListCreated = true;
-
-                        // First eightbyte.
-                        arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
-                        arg->gtType =
-                            GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
-                                                              fgEntryPtr->structDesc.eightByteSizes[0]);
-                        GenTreeFieldList* fieldList =
-                            new (this, GT_FIELD_LIST) GenTreeFieldList(arg, 0, originalType, nullptr);
-                        fieldList->gtType = originalType; // Preserve the type. It is a special case.
-                        arg               = fieldList;
-
-                        // Second eightbyte.
-                        GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
-                            GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
-                                                                                .eightByteClassifications[1],
-                                                                            fgEntryPtr->structDesc.eightByteSizes[1]),
-                                          lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);
-
-                        fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(newLclField, 0, originalType, fieldList);
-                        fieldList->gtType       = originalType; // Preserve the type. It is a special case.
-                        newLclField->gtFieldSeq = FieldSeqStore::NotAField();
-                    }
-                    else
-                    {
-                        assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes
-                                                                                       // for the CLR.
-                    }
-                }
-
-                // If we didn't change the type of the struct, it means
-                // its classification doesn't support to be passed directly through a
-                // register, so we need to pass a pointer to the destination where
-                // where we copied the struct to.
-                if (!argListCreated)
-                {
-                    if (fgEntryPtr->structDesc.passedInRegisters)
-                    {
-                        arg->gtType = type;
-                    }
-                    else
-                    {
-                        // Make sure this is an addr node.
-                        if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
-                        {
-                            arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
-                        }
-
-                        assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
-
-                        // Create an Obj of the temp to use it as a call argument.
-                        arg = gtNewObjNode(lvaGetStruct(lclCommon->gtLclNum), arg);
-                    }
-                }
-            }
-
-            if (argx != arg)
-            {
-                bool           isLateArg  = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
-                fgArgTabEntry* fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
-                assert(fgEntryPtr != nullptr);
-                GenTree* argx     = fgEntryPtr->node;
-                GenTree* lateList = nullptr;
-                GenTree* lateNode = nullptr;
-                if (isLateArg)
-                {
-                    for (GenTree* list = call->gtCallLateArgs; list; list = list->MoveNext())
-                    {
-                        assert(list->OperIsList());
-
-                        GenTree* argNode = list->Current();
-                        if (argx == argNode)
-                        {
-                            lateList = list;
-                            lateNode = argNode;
-                            break;
-                        }
-                    }
-                    assert(lateList != nullptr && lateNode != nullptr);
-                }
-
-                fgEntryPtr->node = arg;
-                if (isLateArg)
-                {
-                    lateList->gtOp.gtOp1 = arg;
-                }
-                else
-                {
-                    args->gtOp.gtOp1 = arg;
-                }
-            }
-        }
-    }
-
-    // Update the flags
-    call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
-}
-#endif // UNIX_AMD64_ABI
-
 //-----------------------------------------------------------------------------
 // fgMorphMultiregStructArgs:  Locate the TYP_STRUCT arguments and
 //                             call fgMorphMultiregStructArg on each of them.
@@ -4730,19 +4472,11 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
     unsigned   flagsSummary   = 0;
     fgArgInfo* allArgInfo     = call->fgArgInfo;
 
-    // Currently ARM64/ARM is using this method to morph the MultiReg struct args
-    //  in the future AMD64_UNIX will also use this method
-    CLANG_FORMAT_COMMENT_ANCHOR;
-
 #ifdef _TARGET_X86_
     assert(!"Logic error: no MultiregStructArgs for X86");
 #endif
-#ifdef _TARGET_AMD64_
-#if defined(UNIX_AMD64_ABI)
-    NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
-#else  // WINDOWS_AMD64_ABI
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
     assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
-#endif // !UNIX_AMD64_ABI
 #endif
 
     for (GenTree* args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
@@ -4839,7 +4573,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
 {
     assert(varTypeIsStruct(arg->TypeGet()));
 
-#ifndef _TARGET_ARMARCH_
+#if !defined(_TARGET_ARMARCH_) && !defined(UNIX_AMD64_ABI)
     NYI("fgMorphMultiregStructArg requires implementation for this target");
 #endif
 
@@ -4852,7 +4586,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
     {
         GenTreeLclVarCommon* lcl = nullptr;
 
-        // If already OBJ it is set properly already.
         if (arg->OperGet() == GT_OBJ)
         {
             if (arg->gtGetOp1()->OperIs(GT_ADDR) && arg->gtGetOp1()->gtGetOp1()->OperIs(GT_LCL_VAR))
@@ -4874,8 +4607,9 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             {
                 arg = fgMorphLclArgToFieldlist(lcl);
             }
-            else
+            else if (arg->TypeGet() == TYP_STRUCT)
             {
+                // If this is a non-register struct, it must be referenced from memory.
                 if (!arg->OperIs(GT_OBJ))
                 {
                     // Create an Obj of the temp to use it as a call argument.
@@ -4910,7 +4644,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             GenTree* underlyingTree = op1->gtOp.gtOp1;
 
             // Only update to the same type.
-            if ((underlyingTree->TypeGet() == argValue->TypeGet()) &&
+            if (underlyingTree->OperIs(GT_LCL_VAR) && (underlyingTree->TypeGet() == argValue->TypeGet()) &&
                 (objClass == gtGetStructHandleIfPresent(underlyingTree)))
             {
                 argValue = underlyingTree;
@@ -4949,28 +4683,27 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
     }
     else
     {
-#ifdef _TARGET_ARM64_
-        assert(structSize <= 2 * TARGET_POINTER_SIZE);
-#elif defined(_TARGET_ARM_)
-        assert(structSize <= 4 * TARGET_POINTER_SIZE);
-#endif
-
-#ifdef _TARGET_ARM64_
-        BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
-        info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
-        elemCount = 2;
-        type[0]   = getJitGCType(gcPtrs[0]);
-        type[1]   = getJitGCType(gcPtrs[1]);
-#elif defined(_TARGET_ARM_)
-        BYTE gcPtrs[4] = {TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE, TYPE_GC_NONE};
-        elemCount      = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
+        assert(structSize <= MAX_ARG_REG_COUNT * TARGET_POINTER_SIZE);
+        BYTE gcPtrs[MAX_ARG_REG_COUNT];
+        elemCount = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE;
         info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
+
         for (unsigned inx = 0; inx < elemCount; inx++)
         {
-            type[inx] = getJitGCType(gcPtrs[inx]);
+#ifdef UNIX_AMD64_ABI
+            if (gcPtrs[inx] == TYPE_GC_NONE)
+            {
+                type[inx] = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[inx],
+                                                              fgEntryPtr->structDesc.eightByteSizes[inx]);
+            }
+            else
+#endif // UNIX_AMD64_ABI
+            {
+                type[inx] = getJitGCType(gcPtrs[inx]);
+            }
         }
-#endif // _TARGET_ARM_
 
+#ifndef UNIX_AMD64_ABI
         if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
         {
             elemSize = TARGET_POINTER_SIZE;
@@ -5000,18 +4733,20 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
                     case 2:
                         type[lastElem] = TYP_SHORT;
                         break;
-#ifdef _TARGET_ARM64_
+#if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
                     case 4:
                         type[lastElem] = TYP_INT;
                         break;
-#endif // _TARGET_ARM64_
+#endif // (_TARGET_ARM64_) || (UNIX_AMD64_ABI)
                     default:
                         noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
                         break;
                 }
             }
         }
+#endif // !UNIX_AMD64_ABI
     }
+
     // We should still have a TYP_STRUCT
     assert(varTypeIsStruct(argValue->TypeGet()));
 
@@ -5041,6 +4776,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
         }
 #endif // DEBUG
 
+#ifndef UNIX_AMD64_ABI
         // This local variable must match the layout of the 'objClass' type exactly
         if (varDsc->lvIsHfa())
         {
@@ -5057,7 +4793,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
         }
         else
         {
-#ifdef _TARGET_ARM64_
+#if defined(_TARGET_ARM64_)
             // We must have a 16-byte struct (non-HFA)
             noway_assert(elemCount == 2);
 #elif defined(_TARGET_ARM_)
@@ -5083,8 +4819,9 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
                 }
             }
         }
+#endif // !UNIX_AMD64_ABI
 
-#ifdef _TARGET_ARM64_
+#if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
         // Is this LclVar a promoted struct with exactly 2 fields?
         // TODO-ARM64-CQ: Support struct promoted HFA types here
         if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
@@ -5217,7 +4954,7 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             LclVarDsc* varDsc = &lvaTable[varNum];
 
             unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
-            unsigned lastOffset = baseOffset + (elemCount * elemSize);
+            unsigned lastOffset = baseOffset + structSize;
 
             // The allocated size of our LocalVar must be at least as big as lastOffset
             assert(varDsc->lvSize() >= lastOffset);
@@ -5226,13 +4963,18 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
             {
                 // alignment of the baseOffset is required
                 noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
+#ifndef UNIX_AMD64_ABI
                 noway_assert(elemSize == TARGET_POINTER_SIZE);
+#endif
                 unsigned    baseIndex = baseOffset / TARGET_POINTER_SIZE;
                 const BYTE* gcPtrs    = varDsc->lvGcLayout; // Get the GC layout for the local variable
                 for (unsigned inx = 0; (inx < elemCount); inx++)
                 {
                     // The GC information must match what we setup using 'objClass'
-                    noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
+                    if ((gcPtrs[baseIndex + inx] != TYPE_GC_NONE) || varTypeGCtype(type[inx]))
+                    {
+                        noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
+                    }
                 }
             }
             else //  this varDsc contains no GC pointers
@@ -5409,19 +5151,19 @@ GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl)
 
 // Make a copy of a struct variable if necessary, to pass to a callee.
 // returns: tree that computes address of the outgoing arg
-void Compiler::fgMakeOutgoingStructArgCopy(
-    GenTreeCall*         call,
-    GenTree*             args,
-    unsigned             argIndex,
-    CORINFO_CLASS_HANDLE copyBlkClass
-        UNIX_AMD64_ABI_ONLY_ARG(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
+void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall*         call,
+                                           GenTree*             args,
+                                           unsigned             argIndex,
+                                           CORINFO_CLASS_HANDLE copyBlkClass)
 {
     GenTree* argx = args->Current();
     noway_assert(argx->gtOper != GT_MKREFANY);
+    fgArgTabEntry*       fp  = Compiler::gtArgEntryByNode(call, argx);
+    GenTreeLclVarCommon* lcl = nullptr;
+
     // See if we need to insert a copy at all
     // Case 1: don't need a copy if it is the last use of a local.  We can't determine that all of the time
     // but if there is only one use and no loops, the use must be last.
-    GenTreeLclVarCommon* lcl = nullptr;
     if (argx->OperIsLocal())
     {
         lcl = argx->AsLclVarCommon();
@@ -5442,10 +5184,9 @@ void Compiler::fgMakeOutgoingStructArgCopy(
             // struct parameters if they are passed as arguments to a tail call.
             if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt == 1) && !fgMightHaveLoop())
             {
-                varDsc->lvRefCnt  = 0;
-                args->gtOp.gtOp1  = lcl;
-                fgArgTabEntry* fp = Compiler::gtArgEntryByNode(call, argx);
-                fp->node          = lcl;
+                varDsc->lvRefCnt = 0;
+                args->gtOp.gtOp1 = lcl;
+                fp->node         = lcl;
 
                 JITDUMP("did not have to make outgoing copy for V%2d", varNum);
                 return;
@@ -5532,8 +5273,9 @@ void Compiler::fgMakeOutgoingStructArgCopy(
 #else // FEATURE_FIXED_OUT_ARGS
 
     // Structs are always on the stack, and thus never need temps
-    // so we have to put the copy and temp all into one expression
-    GenTree* arg = fgMakeTmpArgNode(tmp UNIX_AMD64_ABI_ONLY_ARG(structDescPtr->passedInRegisters));
+    // so we have to put the copy and temp all into one expression.
+    fp->tmpNum   = tmp;
+    GenTree* arg = fgMakeTmpArgNode(fp);
 
     // Change the expression to "(tmp=val),tmp"
     arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
@@ -7452,6 +7194,10 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
     // that cannot be passed in a register. Note that we don't need to count
     // non-standard and secret params passed in registers (e.g. R10, R11) since
     // these won't contribute to out-going arg size.
+    // For each struct arg, hasMultiByteStackArgs will track if it can be passed in registers.
+    // If it cannot we will break the loop and not fastTailCall. This is an implementation limitation
+    // where the callee only is checked for non enregisterable structs.
+    // It is tracked with https://github.com/dotnet/coreclr/issues/12644.
     bool   hasMultiByteStackArgs = false;
     bool   hasTwoSlotSizedStruct = false;
     bool   hasHfaArg             = false;
@@ -7486,14 +7232,10 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
             {
 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
 
-                // hasMultiByteStackArgs will determine if the struct can be passed
-                // in registers. If it cannot we will break the loop and not
-                // fastTailCall. This is an implementation limitation
-                // where the callee only is checked for non enregisterable structs.
-                // It is tracked with https://github.com/dotnet/coreclr/issues/12644.
-                unsigned typeSize     = 0;
-                hasMultiByteStackArgs = hasMultiByteStackArgs ||
-                                        !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
+                unsigned typeSize = 0;
+                // We should have already broken out of the loop if we've set hasMultiByteStackArgs to true.
+                assert(!hasMultiByteStackArgs);
+                hasMultiByteStackArgs = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
 
 #if defined(UNIX_AMD64_ABI)
                 SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
@@ -7528,6 +7270,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
                 else
                 {
                     calleeStackSize += roundUp(typeSize, TARGET_POINTER_SIZE);
+                    hasMultiByteStackArgs = true;
                 }
 
 #elif defined(_TARGET_ARM64_) // ARM64