[ARM64] SIMD Partial Callee Save SIMD12
authorSteve MacLean <sdmaclea.qdt@qualcommdatacenter.com>
Wed, 8 Nov 2017 18:47:38 +0000 (13:47 -0500)
committerSteve MacLean <sdmaclea.qdt@qualcommdatacenter.com>
Fri, 10 Nov 2017 15:17:59 +0000 (10:17 -0500)
src/jit/codegenarm64.cpp
src/jit/lsra.cpp
src/jit/lsra.h

index f507ceb..1b41150 100644 (file)
@@ -4695,7 +4695,8 @@ void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
     assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperSave);
 
     GenTree* op1 = simdNode->gtGetOp1();
-    assert(op1->IsLocal() && op1->TypeGet() == TYP_SIMD16);
+    assert(op1->IsLocal());
+    assert(emitTypeSize(op1->TypeGet()) == 16);
     regNumber targetReg = simdNode->gtRegNum;
     regNumber op1Reg    = genConsumeReg(op1);
     assert(op1Reg != REG_NA);
@@ -4731,7 +4732,8 @@ void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)
     assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore);
 
     GenTree* op1 = simdNode->gtGetOp1();
-    assert(op1->IsLocal() && op1->TypeGet() == TYP_SIMD16);
+    assert(op1->IsLocal());
+    assert(emitTypeSize(op1->TypeGet()) == 16);
     regNumber srcReg    = simdNode->gtRegNum;
     regNumber lclVarReg = genConsumeReg(op1);
     unsigned  varNum    = op1->AsLclVarCommon()->gtLclNum;
index 382eb51..c865ceb 100644 (file)
@@ -2332,7 +2332,7 @@ void LinearScan::identifyCandidates()
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
             // Additionally, when we are generating AVX on non-UNIX amd64, we keep a separate set of the LargeVectorType
             // vars.
-            if (varDsc->lvType == LargeVectorType)
+            if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
             {
                 largeVectorVarCount++;
                 VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex);
@@ -3044,7 +3044,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo
                 unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
                 LclVarDsc* varDsc = compiler->lvaTable + varNum;
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
-                if (varDsc->lvType == LargeVectorType)
+                if (varTypeNeedsPartialCalleeSave(varDsc->lvType))
                 {
                     if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex))
                     {
@@ -3478,7 +3478,7 @@ LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation current
             while (iter.NextElem(&varIndex))
             {
                 Interval* varInterval    = getIntervalForLocalVar(varIndex);
-                Interval* tempInterval   = newInterval(LargeVectorType);
+                Interval* tempInterval   = newInterval(varInterval->registerType);
                 tempInterval->isInternal = true;
                 RefPosition* pos =
                     newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED);
@@ -8334,6 +8334,8 @@ void LinearScan::allocateRegisters()
                 if (refType == RefTypeUpperVectorSaveDef)
                 {
                     // TODO-CQ: Determine whether copying to two integer callee-save registers would be profitable.
+                    // TODO-ARM64-CQ: Determine whether copying to one integer callee-save registers would be
+                    // profitable.
 
                     // SaveDef position occurs after the Use of args and at the same location as Kill/Def
                     // positions of a call node.  But SaveDef position cannot use any of the arg regs as
@@ -9013,7 +9015,7 @@ void LinearScan::insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* re
     Interval* lclVarInterval = refPosition->getInterval()->relatedInterval;
     assert(lclVarInterval->isLocalVar == true);
     LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
-    assert(varDsc->lvType == LargeVectorType);
+    assert(varTypeNeedsPartialCalleeSave(varDsc->lvType));
     regNumber lclVarReg = lclVarInterval->physReg;
     if (lclVarReg == REG_NA)
     {
@@ -9029,14 +9031,14 @@ void LinearScan::insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* re
 
     // First, insert the save before the call.
 
-    GenTreePtr saveLcl                = compiler->gtNewLclvNode(lclVarInterval->varNum, LargeVectorType);
+    GenTreePtr saveLcl                = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
     saveLcl->gtLsraInfo.isLsraAdded   = true;
     saveLcl->gtRegNum                 = lclVarReg;
     saveLcl->gtLsraInfo.isLocalDefUse = false;
 
     GenTreeSIMD* simdNode =
         new (compiler, GT_SIMD) GenTreeSIMD(LargeVectorSaveType, saveLcl, nullptr, SIMDIntrinsicUpperSave,
-                                            varDsc->lvBaseType, genTypeSize(LargeVectorType));
+                                            varDsc->lvBaseType, genTypeSize(varDsc->lvType));
     simdNode->gtLsraInfo.isLsraAdded = true;
     simdNode->gtRegNum               = spillReg;
     if (spillToMem)
@@ -9048,13 +9050,13 @@ void LinearScan::insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* re
 
     // Now insert the restore after the call.
 
-    GenTreePtr restoreLcl                = compiler->gtNewLclvNode(lclVarInterval->varNum, LargeVectorType);
+    GenTreePtr restoreLcl                = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType);
     restoreLcl->gtLsraInfo.isLsraAdded   = true;
     restoreLcl->gtRegNum                 = lclVarReg;
     restoreLcl->gtLsraInfo.isLocalDefUse = false;
 
-    simdNode = new (compiler, GT_SIMD)
-        GenTreeSIMD(LargeVectorType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore, varDsc->lvBaseType, 32);
+    simdNode = new (compiler, GT_SIMD) GenTreeSIMD(varDsc->lvType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore,
+                                                   varDsc->lvBaseType, genTypeSize(varDsc->lvType));
     simdNode->gtLsraInfo.isLsraAdded = true;
     simdNode->gtRegNum               = spillReg;
     if (spillToMem)
index 00ce7ee..fb58364 100644 (file)
@@ -1219,11 +1219,19 @@ private:
     VARSET_TP fpCalleeSaveCandidateVars;
 #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
 #if defined(_TARGET_AMD64_)
-    static const var_types LargeVectorType     = TYP_SIMD32;
+    static bool varTypeNeedsPartialCalleeSave(var_types type)
+    {
+        return (emitTypeSize(type) == 32);
+    }
     static const var_types LargeVectorSaveType = TYP_SIMD16;
 #elif defined(_TARGET_ARM64_)
-    static const var_types LargeVectorType      = TYP_SIMD16;
-    static const var_types LargeVectorSaveType  = TYP_DOUBLE;
+    static bool varTypeNeedsPartialCalleeSave(var_types type)
+    {
+        // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes
+        // For SIMD types longer then 8 bytes Caller is responsible for saving and restoring Upper bytes.
+        return (emitTypeSize(type) == 16);
+    }
+    static const var_types LargeVectorSaveType = TYP_DOUBLE;
 #else // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
 #error("Unknown target architecture for FEATURE_SIMD")
 #endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)