Enable AVX SIMD support for Unix systems.
authorCarol Eidt <carol.eidt@microsoft.com>
Wed, 20 Jan 2016 00:58:00 +0000 (16:58 -0800)
committerCarol Eidt <carol.eidt@microsoft.com>
Wed, 20 Jan 2016 05:01:05 +0000 (21:01 -0800)
The existing code was assuming that there would be callee-save
registers for SIMD, but with the Unix ABI there are none.

Fix #983

src/jit/CMakeLists.txt
src/jit/lsra.cpp
src/jit/lsra.h
src/jit/target.h

index e9d5d393c1a3986d7e74687ec0d9c24037660ecd..b7870c72c8df64acd5053aa518b70b5fe186fdff 100644 (file)
@@ -8,9 +8,7 @@ include_directories("../inc")
 
 if (CLR_CMAKE_PLATFORM_ARCH_AMD64)
   add_definitions(-DFEATURE_SIMD) 
-if (WIN32) 
   add_definitions(-DFEATURE_AVX_SUPPORT) 
-endif (WIN32)
 endif (CLR_CMAKE_PLATFORM_ARCH_AMD64)
 
 set( JIT_SOURCES
index eb8b27c213f75b0deab66ced00a5f0c0dd1924c0..a2cbcaf368c12aa32902b9eed1f66dcf30e0861a 100644 (file)
@@ -1744,9 +1744,10 @@ void LinearScan::identifyCandidates()
     // fp callee save registers will be needed, such as loops or many fp vars.
     // We keep two sets of vars, since we collect some of the information to determine which set to
     // use as we iterate over the vars.
-    // When we are generating AVX code, we maintain an additional set of LargeVectorType vars, and
-    // there is a separate threshold defined for those.  It is assumed that if we encounter these
-    // that we should consider this a "high use" scenario, so we don't maintain two sets of these vars.
+    // When we are generating AVX code on non-Unix (FEATURE_PARTIAL_SIMD_CALLEE_SAVE), we maintain an
+    // additional set of LargeVectorType vars, and there is a separate threshold defined for those.
+    // It is assumed that if we encounter these, that we should consider this a "high use" scenario,
+    // so we don't maintain two sets of these vars.
     // This is defined as thresholdLargeVectorRefCntWtd, as we are likely to use the same mechanism
     // for vectors on Arm64, though the actual value may differ.
 
@@ -1755,12 +1756,12 @@ void LinearScan::identifyCandidates()
     unsigned int floatVarCount = 0;
     unsigned int thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT;
     unsigned int maybeFPRefCntWtd = 2 * BB_UNITY_WEIGHT;
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
     VarSetOps::AssignNoCopy(compiler, largeVectorVars, VarSetOps::MakeEmpty(compiler));
     VarSetOps::AssignNoCopy(compiler, largeVectorCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler));
     unsigned int largeVectorVarCount = 0;
     unsigned int thresholdLargeVectorRefCntWtd = 4 * BB_UNITY_WEIGHT;
-#endif // FEATURE_SIMD
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 
     for (lclNum = 0, varDsc = compiler->lvaTable;
          lclNum < compiler->lvaCount;
@@ -1913,8 +1914,8 @@ void LinearScan::identifyCandidates()
         // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count,
         // and those that meet the second (see the definitions of thresholdFPRefCntWtd and maybeFPRefCntWtd
         // above).
-        // Additionally, when we are generating AVX code, we keep a separate set of the LargeVectorType vars.
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+        // Additionally, when we are generating AVX on non-UNIX amd64, we keep a separate set of the LargeVectorType vars.
         if (varDsc->lvType == LargeVectorType)
         {
             largeVectorVarCount++;
@@ -1926,7 +1927,7 @@ void LinearScan::identifyCandidates()
             }
         }
         else
-#endif // FEATURE_SIMD
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
         if (regType(newInt->registerType) == FloatRegisterType)
         {
             floatVarCount++;
@@ -2597,7 +2598,7 @@ LinearScan::buildKillPositionsForNode(GenTree*     tree,
             {
                 unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
                 LclVarDsc *varDsc = compiler->lvaTable + varNum;
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
                 if (varDsc->lvType == LargeVectorType)
                 {
                     if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex))
@@ -2606,7 +2607,7 @@ LinearScan::buildKillPositionsForNode(GenTree*     tree,
                     }
                 }
                 else
-#endif // FEATURE_SIMD
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
                 if (varTypeIsFloating(varDsc) && !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex))
                 {
                     continue;
@@ -2747,7 +2748,7 @@ fixedCandidateMask(var_types type, regMaskTP candidates)
     return RBM_NONE;
 }
 
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 VARSET_VALRET_TP
 LinearScan::buildUpperVectorSaveRefPositions(GenTree *tree,
                                              LsraLocation currentLoc)
@@ -2805,7 +2806,7 @@ LinearScan::buildUpperVectorRestoreRefPositions(GenTree *tree,
         }
     }
 }
-#endif // FEATURE_SIMD
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 
 void 
 LinearScan::buildRefPositionsForNode(GenTree *tree,
@@ -3160,20 +3161,23 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
     assert(!varTypeIsMultiReg(tree->TypeGet()));
 #endif // _TARGET_xxx_
 
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
     VARSET_TP       VARSET_INIT_NOCOPY(liveLargeVectors, VarSetOps::UninitVal());
-#endif // FEATURE_SIMD
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 
     // push defs
     if (produce == 0)
     {
         buildKillPositionsForNode(tree, currentLoc + 1);
 
-#ifdef FEATURE_SIMD
-        // Build RefPositions for saving any live large vectors.
-        // This must be done after the kills, so that we know which large vectors are still live.
-        VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc));
-#endif // FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+        if (RBM_FLT_CALLEE_SAVED != RBM_NONE)
+        {
+            // Build RefPositions for saving any live large vectors.
+            // This must be done after the kills, so that we know which large vectors are still live.
+            VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc));
+        }
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
     }
 
     for (int i=0; i < produce; i++)
@@ -3186,11 +3190,11 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
         {
             generatedKills = buildKillPositionsForNode(tree, lastDefLocation);
 
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
             // Build RefPositions for saving any live large vectors.
             // This must be done after the kills, so that we know which large vectors are still live.
             VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc));
-#endif // FEATURE_SIMD
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
         }
         regMaskTP currCandidates = candidates;
 
@@ -3245,9 +3249,9 @@ LinearScan::buildRefPositionsForNode(GenTree *tree,
         interval->updateRegisterPreferences(currCandidates);
         interval->updateRegisterPreferences(useCandidates);
     }
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
     buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectors);
-#endif // FEATURE_SIMD
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 }
 
 // make an interval for each physical register
@@ -6842,7 +6846,7 @@ LinearScan::insertCopyOrReload(GenTreePtr tree, RefPosition* refPosition)
     tree->InsertAfterSelf(newNode);
 }
 
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 //------------------------------------------------------------------------ 
 // insertUpperVectorSaveAndReload: Insert code to save and restore the upper half of a vector that lives
 //                                 in a callee-save register at the point of a kill (the upper half is
@@ -6923,7 +6927,7 @@ LinearScan::insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* refPosi
     compiler->fgInsertTreeAfterAsEmbedded(simdNode, tree, stmt->AsStmt(), block);
 
 }
-#endif // FEATURE_SIMD
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 
 //------------------------------------------------------------------------
 // initMaxSpill: Initializes the LinearScan members used to track the max number
@@ -7036,13 +7040,13 @@ LinearScan::updateMaxSpill(RefPosition* refPosition)
             // to know what they are here.
             RefType refType = refPosition->refType;
             var_types typ;
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
             if ((refType == RefTypeUpperVectorSaveDef) || (refType == RefTypeUpperVectorSaveUse))
             {
                 typ = LargeVectorSaveType;
             }
             else
-#endif // !FEATURE_SIMD
+#endif // !FEATURE_PARTIAL_SIMD_CALLEE_SAVE
             {
                 GenTreePtr treeNode = refPosition->treeNode;
                 if (treeNode == nullptr)
@@ -7268,7 +7272,7 @@ LinearScan::resolveRegisters()
             updateMaxSpill(currentRefPosition);
             GenTree *treeNode = currentRefPosition->treeNode;
 
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
             if (currentRefPosition->refType == RefTypeUpperVectorSaveDef)
             {
                 // The treeNode must be a call, and this must be a RefPosition for a LargeVectorType LocalVar.
@@ -7283,7 +7287,7 @@ LinearScan::resolveRegisters()
             {
                 continue;
             }
-#endif // FEATURE_SIMD
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 
             // Most uses won't actually need to be recorded (they're on the def).
             // In those cases, treeNode will be nullptr.
index 69acddc2025c46fa6420e0e73eec1a4680761535..67243e80024557fbec9c1fb06a2dfd3745a5ab77 100644 (file)
@@ -360,12 +360,12 @@ public:
     // than the one it was spilled from
     void            insertCopyOrReload(GenTreePtr tree, RefPosition* refPosition);
 
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
     // Insert code to save and restore the upper half of a vector that lives
     // in a callee-save register at the point of a call (the upper half is
     // not preserved).
     void            insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* refPosition, BasicBlock* block);
-#endif // FEATURE_SIMD
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 
     // resolve along one block-block edge
     enum ResolveType { ResolveSplit, ResolveJoin, ResolveCritical, ResolveSharedCritical, ResolveTypeCount };
@@ -569,10 +569,10 @@ private:
                                              ArrayStack<LocationInfo> *stack,
                                              LsraLocation loc);
 
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
     VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree *tree, LsraLocation currentLoc);
     void             buildUpperVectorRestoreRefPositions(GenTree *tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);
-#endif //FEATURE_SIMD
+#endif //FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 
 #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
     // For AMD64 on SystemV machines. This method 
@@ -990,7 +990,7 @@ private:
     VARSET_TP           currentLiveVars;
     // Set of floating point variables to consider for callee-save registers.
     VARSET_TP           fpCalleeSaveCandidateVars;
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 #if defined(_TARGET_AMD64_)
     static const var_types     LargeVectorType = TYP_SIMD32;
     static const var_types     LargeVectorSaveType = TYP_SIMD16;
@@ -1005,7 +1005,7 @@ private:
     VARSET_TP           largeVectorVars;
     // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers.
     VARSET_TP           largeVectorCalleeSaveCandidateVars;
-#endif // FEATURE_SIMD
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 };
 
 /*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
@@ -1262,9 +1262,9 @@ public:
     bool            RequiresRegister()
     {
         return (refType == RefTypeDef || refType == RefTypeUse
-#ifdef FEATURE_SIMD
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
                || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse
-#endif // FEATURE_SIMD
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
                );
     }
 
index 19672e105ebcce419f4bb3bd275686c92a887aa8..7f53845fa41398a7bbf5984ed71df5cd32114d0f 100644 (file)
@@ -681,6 +681,11 @@ typedef unsigned short          regPairNoSmall; // arm: need 12 bits
 
 #ifdef FEATURE_SIMD
   #define ALIGN_SIMD_TYPES         1       // whether SIMD type locals are to be aligned
+#if defined(UNIX_AMD64_ABI) || !defined(FEATURE_AVX_SUPPORT)
+  #define FEATURE_PARTIAL_SIMD_CALLEE_SAVE 0 // Whether SIMD registers are partially saved at calls
+#else // !UNIX_AMD64_ABI && !FEATURE_AVX_SUPPORT
+  #define FEATURE_PARTIAL_SIMD_CALLEE_SAVE 1 // Whether SIMD registers are partially saved at calls
+#endif // !UNIX_AMD64_ABI
 #endif
   #define FEATURE_WRITE_BARRIER    1       // Generate the WriteBarrier calls for GC (currently not the x86-style register-customized barriers)
   #define FEATURE_FIXED_OUT_ARGS   1       // Preallocate the outgoing arg area in the prolog