From dfbb40a8ab7d551eb0e8411ffa4a057ad32aa9ce Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Wed, 8 Nov 2017 13:47:38 -0500 Subject: [PATCH] [ARM64] SIMD Partial Callee Save SIMD12 --- src/jit/codegenarm64.cpp | 6 ++++-- src/jit/lsra.cpp | 20 +++++++++++--------- src/jit/lsra.h | 14 +++++++++++--- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index f507ceb..1b41150 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -4695,7 +4695,8 @@ void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode) assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperSave); GenTree* op1 = simdNode->gtGetOp1(); - assert(op1->IsLocal() && op1->TypeGet() == TYP_SIMD16); + assert(op1->IsLocal()); + assert(emitTypeSize(op1->TypeGet()) == 16); regNumber targetReg = simdNode->gtRegNum; regNumber op1Reg = genConsumeReg(op1); assert(op1Reg != REG_NA); @@ -4731,7 +4732,8 @@ void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode) assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore); GenTree* op1 = simdNode->gtGetOp1(); - assert(op1->IsLocal() && op1->TypeGet() == TYP_SIMD16); + assert(op1->IsLocal()); + assert(emitTypeSize(op1->TypeGet()) == 16); regNumber srcReg = simdNode->gtRegNum; regNumber lclVarReg = genConsumeReg(op1); unsigned varNum = op1->AsLclVarCommon()->gtLclNum; diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 382eb51..c865ceb 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -2332,7 +2332,7 @@ void LinearScan::identifyCandidates() #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE // Additionally, when we are generating AVX on non-UNIX amd64, we keep a separate set of the LargeVectorType // vars. - if (varDsc->lvType == LargeVectorType) + if (varTypeNeedsPartialCalleeSave(varDsc->lvType)) { largeVectorVarCount++; VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex); @@ -3044,7 +3044,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; LclVarDsc* varDsc = compiler->lvaTable + varNum; #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (varDsc->lvType == LargeVectorType) + if (varTypeNeedsPartialCalleeSave(varDsc->lvType)) { if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex)) { @@ -3478,7 +3478,7 @@ LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation current while (iter.NextElem(&varIndex)) { Interval* varInterval = getIntervalForLocalVar(varIndex); - Interval* tempInterval = newInterval(LargeVectorType); + Interval* tempInterval = newInterval(varInterval->registerType); tempInterval->isInternal = true; RefPosition* pos = newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED); @@ -8334,6 +8334,8 @@ void LinearScan::allocateRegisters() if (refType == RefTypeUpperVectorSaveDef) { // TODO-CQ: Determine whether copying to two integer callee-save registers would be profitable. + // TODO-ARM64-CQ: Determine whether copying to one integer callee-save registers would be + // profitable. // SaveDef position occurs after the Use of args and at the same location as Kill/Def // positions of a call node. But SaveDef position cannot use any of the arg regs as @@ -9013,7 +9015,7 @@ void LinearScan::insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* re Interval* lclVarInterval = refPosition->getInterval()->relatedInterval; assert(lclVarInterval->isLocalVar == true); LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum; - assert(varDsc->lvType == LargeVectorType); + assert(varTypeNeedsPartialCalleeSave(varDsc->lvType)); regNumber lclVarReg = lclVarInterval->physReg; if (lclVarReg == REG_NA) { @@ -9029,14 +9031,14 @@ void LinearScan::insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* re // First, insert the save before the call. - GenTreePtr saveLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, LargeVectorType); + GenTreePtr saveLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType); saveLcl->gtLsraInfo.isLsraAdded = true; saveLcl->gtRegNum = lclVarReg; saveLcl->gtLsraInfo.isLocalDefUse = false; GenTreeSIMD* simdNode = new (compiler, GT_SIMD) GenTreeSIMD(LargeVectorSaveType, saveLcl, nullptr, SIMDIntrinsicUpperSave, - varDsc->lvBaseType, genTypeSize(LargeVectorType)); + varDsc->lvBaseType, genTypeSize(varDsc->lvType)); simdNode->gtLsraInfo.isLsraAdded = true; simdNode->gtRegNum = spillReg; if (spillToMem) @@ -9048,13 +9050,13 @@ void LinearScan::insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* re // Now insert the restore after the call. - GenTreePtr restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, LargeVectorType); + GenTreePtr restoreLcl = compiler->gtNewLclvNode(lclVarInterval->varNum, varDsc->lvType); restoreLcl->gtLsraInfo.isLsraAdded = true; restoreLcl->gtRegNum = lclVarReg; restoreLcl->gtLsraInfo.isLocalDefUse = false; - simdNode = new (compiler, GT_SIMD) - GenTreeSIMD(LargeVectorType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore, varDsc->lvBaseType, 32); + simdNode = new (compiler, GT_SIMD) GenTreeSIMD(varDsc->lvType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore, + varDsc->lvBaseType, genTypeSize(varDsc->lvType)); simdNode->gtLsraInfo.isLsraAdded = true; simdNode->gtRegNum = spillReg; if (spillToMem) diff --git a/src/jit/lsra.h b/src/jit/lsra.h index 00ce7ee..fb58364 100644 --- a/src/jit/lsra.h +++ b/src/jit/lsra.h @@ -1219,11 +1219,19 @@ private: VARSET_TP fpCalleeSaveCandidateVars; #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE #if defined(_TARGET_AMD64_) - static const var_types LargeVectorType = TYP_SIMD32; + static bool varTypeNeedsPartialCalleeSave(var_types type) + { + return (emitTypeSize(type) == 32); + } static const var_types LargeVectorSaveType = TYP_SIMD16; #elif defined(_TARGET_ARM64_) - static const var_types LargeVectorType = TYP_SIMD16; - static const var_types LargeVectorSaveType = TYP_DOUBLE; + static bool varTypeNeedsPartialCalleeSave(var_types type) + { + // ARM64 ABI FP Callee save registers only require Callee to save lower 8 Bytes + // For SIMD types longer then 8 bytes Caller is responsible for saving and restoring Upper bytes. + return (emitTypeSize(type) == 16); + } + static const var_types LargeVectorSaveType = TYP_DOUBLE; #else // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_) #error("Unknown target architecture for FEATURE_SIMD") #endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_) -- 2.7.4