[Arm64] SIMD genFnPrologCalleeRegArgs
authorSteve MacLean <sdmaclea.qdt@qualcommdatacenter.com>
Fri, 3 Nov 2017 17:08:48 +0000 (13:08 -0400)
committerSteve MacLean <sdmaclea.qdt@qualcommdatacenter.com>
Tue, 7 Nov 2017 17:03:09 +0000 (12:03 -0500)
src/jit/codegencommon.cpp

index ac8c667..89f7b91 100644 (file)
@@ -4574,7 +4574,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
                 {
                     destRegNum = varDsc->lvRegNum;
                 }
-#if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(_TARGET_AMD64_)
+#if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(_TARGET_64BIT_)
                 else
                 {
                     assert(regArgTab[argNum].slot == 2);
@@ -5190,7 +5190,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
                 destRegNum  = REG_NEXT(varDsc->lvRegNum);
             }
 #endif // !_TARGET_64BIT_
-#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+#if (defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD)
             else
             {
                 assert(regArgTab[argNum].slot == 2);
@@ -5201,7 +5201,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
                 noway_assert(regNum != destRegNum);
                 continue;
             }
-#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+#endif // (defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)) && defined(FEATURE_SIMD)
             noway_assert(destRegNum != REG_NA);
             if (destRegNum != regNum)
             {
@@ -5228,6 +5228,17 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
 
                 emitAttr size = emitActualTypeSize(destMemType);
 
+#if defined(_TARGET_ARM64_)
+                if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
+                {
+                    // For a SIMD type that is passed in two integer registers,
+                    // Limit the copy below to the first 8 bytes from the first integer register.
+                    // Handle the remaining 8 bytes from the second slot in the code further below
+                    assert(EA_SIZE(size) >= 8);
+                    size = EA_8BYTE;
+                }
+#endif
+
                 getEmitter()->emitIns_R_R(ins_Copy(destMemType), size, destRegNum, regNum);
 
                 psiMoveToReg(varNum);
@@ -5261,6 +5272,23 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
                 destRegNum = regNum;
             }
 #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+#if defined(_TARGET_ARM64_) && defined(FEATURE_SIMD)
+            if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
+            {
+                // For a SIMD type that is passed in two integer registers,
+                // Code above copies the first integer argument register into the lower 8 bytes
+                // of the target register. Here we must handle the second 8 bytes of the slot pair by
+                // inserting the second integer register into the upper 8 bytes of the target
+                // SIMD floating point register.
+                argRegCount          = 2;
+                int       nextArgNum = argNum + 1;
+                regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
+                noway_assert(regArgTab[nextArgNum].varNum == varNum);
+                noway_assert(genIsValidIntReg(nextRegNum));
+                noway_assert(genIsValidFloatReg(destRegNum));
+                getEmitter()->emitIns_R_R_I(INS_mov, EA_8BYTE, destRegNum, nextRegNum, 1);
+            }
+#endif // defined(_TARGET_ARM64_) && defined(FEATURE_SIMD)
             // Mark the rest of the argument registers corresponding to this multi-reg type as
             // being processed and no longer live.
             for (int regSlot = 1; regSlot < argRegCount; regSlot++)