Mark simd type vars as lvRegStruct accurately.
authorsivarv <sivarv@microsoft.com>
Fri, 6 May 2016 21:36:58 +0000 (14:36 -0700)
committersivarv <sivarv@microsoft.com>
Sat, 7 May 2016 00:33:40 +0000 (17:33 -0700)
Commit migrated from https://github.com/dotnet/coreclr/commit/60d4c09a23dbc0e1ef5ccc348a05f6545bb6472d

src/coreclr/src/jit/codegenxarch.cpp
src/coreclr/src/jit/lsra.cpp
src/coreclr/src/jit/morph.cpp
src/coreclr/tests/src/JIT/SIMD/VectorReturn.cs

index 15a1742..c8c10a3 100644 (file)
@@ -2839,15 +2839,26 @@ CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
         {
             // targeReg[63:0] = targetReg[63:0]
             // targetReg[127:64] = reg1[127:64]
-            inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x10);
+            inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00);
         }
         else 
         {
             assert(targetReg == reg1);
 
-            // targeReg[63:0] = targetReg[127:64]
+            // We need two shuffles to achieve this
+            // First:
+            // targeReg[63:0] = targetReg[63:0]
             // targetReg[127:64] = reg0[63:0]
-            inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg0, 0x01);
+            //
+            // Second:
+            // targeReg[63:0] = targetReg[127:64]
+            // targetReg[127:64] = targetReg[63:0]
+            //
+            // Essentially copy low 8-bytes from reg0 to high 8-bytes of targetReg
+            // and next swap low and high 8-bytes of targetReg to have them
+            // rearranged in the right order.
+            inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg0, 0x00);
+            inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, targetReg, 0x01);
         }
     }
     else
index 127419b..091b4c6 100644 (file)
@@ -3060,7 +3060,7 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
                 noPush = true;
             }
 
-            assert(consume <= 1);
+            assert(consume <= MAX_RET_REG_COUNT);
             if (consume == 1)
             {
                 Interval * srcInterval = stack->TopRef().interval;
index a1f5710..590217a 100644 (file)
@@ -15564,43 +15564,54 @@ void                Compiler::fgPromoteStructs()
     //
 
     lvaStructPromotionInfo structPromotionInfo;
+    bool tooManyLocals = false;
 
     for (unsigned lclNum = 0;
-        lclNum < startLvaCount;
-        lclNum++)
+         lclNum < startLvaCount;
+         lclNum++)
     {
+        // Whether this var got promoted
+        bool promotedVar = false;
         LclVarDsc*  varDsc = &lvaTable[lclNum];
 
+#ifdef FEATURE_SIMD
+        if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic)
+        {
+            // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
+            // its fields.  Instead, we will attempt to enregister the entire struct.
+            varDsc->lvRegStruct = true;
+        }
+        else
+#endif //FEATURE_SIMD
         // Don't promote if we have reached the tracking limit.
         if (lvaHaveManyLocals())
         {
-            JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
-            break;
+            // Print the message first time when we detected this condition
+            if (!tooManyLocals)
+            {
+                JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
+            }
+            tooManyLocals = true;
         }
 #if !FEATURE_MULTIREG_STRUCT_PROMOTE
-        if (varDsc->lvIsMultiRegArgOrRet)
+        else if (varDsc->lvIsMultiRegArgOrRet)
         {
             JITDUMP("Skipping V%02u: marked lvIsMultiRegArgOrRet.\n", lclNum);
-            continue;
         }
 #endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
-
-#ifdef FEATURE_SIMD
-        if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic)
-        {
-            // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
-            // its fields.  Instead, we will attempt to enregister the entire struct.
-            // Note, however, that if the code below does not decide to promote this struct,
-            // we will still set lvRegStruct if its fields have not been accessed.
-            varDsc->lvRegStruct = true;
-        }
-        else
-#endif // FEATURE_SIMD
-        if (varTypeIsStruct(varDsc))
+        else if (varTypeIsStruct(varDsc))
         {
             lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
-            if (structPromotionInfo.canPromote)
+            bool canPromote = structPromotionInfo.canPromote;            
+
+            // We start off with shouldPromote same as canPromote.
+            // Based on further profitablity checks done below, shouldPromote
+            // could be set to false.
+            bool shouldPromote = canPromote;
+
+            if (canPromote)
             {
+                
                 // We *can* promote; *should* we promote?
                 // We should only do so if promotion has potential savings.  One source of savings
                 // is if a field of the struct is accessed, since this access will be turned into
@@ -15614,7 +15625,7 @@ void                Compiler::fgPromoteStructs()
                 {
                     JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n",
                         lclNum, structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed);
-                    continue;
+                    shouldPromote = false;
                 }
 #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
                 // TODO-PERF - Only do this when the LclVar is used in an argument context
@@ -15625,29 +15636,29 @@ void                Compiler::fgPromoteStructs()
                 // Promoting it can cause us to shuffle it back and forth between the int and 
                 //  the float regs when it is used as a argument, which is very expensive for XARCH
                 //
-                if (structPromotionInfo.fieldCnt==1
-                    && varTypeIsFloating(structPromotionInfo.fields[0].fldType))
+                else if ((structPromotionInfo.fieldCnt == 1) &&
+                          varTypeIsFloating(structPromotionInfo.fields[0].fldType))
                 {
                     JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with single float field.\n",
                             lclNum, structPromotionInfo.fieldCnt);
-                    continue;
+                    shouldPromote = false;
                 }
 #endif // _TARGET_AMD64_ || _TARGET_ARM64_
+
 #if !FEATURE_MULTIREG_STRUCT_PROMOTE
 #if defined(_TARGET_ARM64_)
                 //
                 // For now we currently don't promote structs that could be passed in registers
                 //
-                if (varDsc->lvIsMultiregStruct())
+                else if (varDsc->lvIsMultiregStruct())
                 {
                     JITDUMP("Not promoting promotable struct local V%02u (size==%d): ",
                             lclNum, lvaLclExactSize(lclNum));
-                    continue;
+                    shouldPromote = false;
                 }
 #endif // _TARGET_ARM64_
 #endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
-
-                if (varDsc->lvIsParam)
+                else if (varDsc->lvIsParam)
                 {
 #if FEATURE_MULTIREG_STRUCT_PROMOTE                   
                     if (varDsc->lvIsMultiregStruct() &&         // Is this a variable holding a value that is passed in multiple registers?
@@ -15655,8 +15666,9 @@ void                Compiler::fgPromoteStructs()
                     {
                         JITDUMP("Not promoting multireg struct local V%02u, because lvIsParam is true and #fields != 2\n",
                                 lclNum);
-                        continue;
+                        shouldPromote = false;
                     }
+                    else
 #endif  // !FEATURE_MULTIREG_STRUCT_PROMOTE
 
                     // TODO-PERF - Implement struct promotion for incoming multireg structs
@@ -15666,7 +15678,7 @@ void                Compiler::fgPromoteStructs()
                     {
                         JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = %d.\n",
                                 lclNum, structPromotionInfo.fieldCnt);
-                        continue;
+                        shouldPromote = false;
                     }
                 }
 
@@ -15685,9 +15697,14 @@ void                Compiler::fgPromoteStructs()
                 if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
 #endif // 0
 
+                if (shouldPromote)
                 {
+                    assert(canPromote);
+
                     // Promote the this struct local var.
                     lvaPromoteStructVar(lclNum, &structPromotionInfo);
+                    promotedVar = true;
+
 #ifdef _TARGET_ARM_
                     if (structPromotionInfo.requiresScratchVar)
                     {
@@ -15703,15 +15720,17 @@ void                Compiler::fgPromoteStructs()
 #endif // _TARGET_ARM_
                 }
             }
+        }
+
 #ifdef FEATURE_SIMD
-            else if (varDsc->lvSIMDType && !varDsc->lvFieldAccessed)
-            {
-                // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
-                // we will treat it as a reg struct.
-                varDsc->lvRegStruct = true;
-            }
-#endif // FEATURE_SIMD
+        if (!promotedVar && varDsc->lvSIMDType && !varDsc->lvFieldAccessed)
+        {
+            // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
+            // we will treat it as a reg struct.
+            varDsc->lvRegStruct = true;
         }
+#endif // FEATURE_SIMD
+
     }
 }
 
index 97fe80c..d1785af 100644 (file)
@@ -57,6 +57,79 @@ internal partial class VectorTest
         return F1(u);
     }
 
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static Vector<T> VectorOne<T>() where T: struct
+    {
+        return Vector<T>.One;
+    }
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static Vector<T> VectorPlusOne<T>(Vector<T> v1) where T : struct
+    {
+        Vector<T> v2 = VectorOne<T>();
+        return v1 + v2;
+    }
+
+    public static int VectorTReturnTest()
+    {
+        Vector<float> v1 = new Vector<float>(2.0f);
+        Vector<float> result1 = VectorPlusOne<float>(v1);
+        for (int i=0; i < Vector<float>.Count; ++i)
+        {
+            if (!CheckValue<float>(result1[i], 3.0f))
+            {
+                Console.WriteLine("Expected result is " + 3.0f);
+                Console.WriteLine("Instead got " + result1[i]);
+                Console.WriteLine("FAILED");
+                return Fail;
+            }
+        }
+
+        Vector<int> v2 = new Vector<int>(5);
+        Vector<int> result2 = VectorPlusOne<int>(v2);
+        for (int i = 0; i < Vector<int>.Count; ++i)
+        {
+            if (!CheckValue<int>(result2[i], 6))
+            {
+                Console.WriteLine("Expected result is " + 6);
+                Console.WriteLine("Instead got " + result2[i]);
+                Console.WriteLine("FAILED");
+                return Fail;
+            }
+        }
+
+        return Pass;
+    }
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static Vector3 GetVector3One()
+    {
+        return new Vector3(1.0f);
+    }
+
+    [MethodImplAttribute(MethodImplOptions.NoInlining)]
+    public static Vector3 GetVector3PlusOne(Vector3 v1)
+    {
+        Vector3 v2 = GetVector3One();
+        return v1 + v2;
+    }
+
+    public static int Vector3ReturnTest()
+    {
+        Vector3 v1 = new Vector3(3.0f, 4.0f, 5.0f);
+        Vector3 result = GetVector3PlusOne(v1);
+
+        if (!CheckValue<float>(result.X, 4.0f) ||
+            !CheckValue<float>(result.Y, 5.0f) ||
+            !CheckValue<float>(result.Z, 6.0f))
+        {
+            Console.WriteLine("Vector3ReturnTest did not return expected value");
+            return Fail;
+        }
+
+        return Pass;
+    }
+
     public static int Main()
     {
         init();
@@ -69,6 +142,19 @@ internal partial class VectorTest
             Console.WriteLine("FAILED");
             return Fail;
         }
+
+        if (VectorTReturnTest() != Pass)
+        {
+            Console.WriteLine("FAILED");
+            return Fail;
+        }
+
+        if (Vector3ReturnTest() != Pass)
+        {
+            Console.WriteLine("FAILED");
+            return Fail;
+        }
+
         Console.WriteLine("PASSED");
         return Pass;
     }