From 65a27aa018ee2b49a7c2f939a024992e8f23a18d Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Wed, 7 Oct 2020 10:00:14 -0700 Subject: [PATCH] Allow enregistering more structs args (#39326) * Allow enregistering more structs args Allow HFAs and other structs with matching fields and registers. Contributes to #37924 --- src/coreclr/src/jit/codegencommon.cpp | 93 +++++++++++++++++----- src/coreclr/src/jit/lclvars.cpp | 144 ++++++++++++++++++++++------------ src/coreclr/src/jit/lsrabuild.cpp | 23 ++++-- src/coreclr/src/jit/morph.cpp | 13 +++ 4 files changed, 197 insertions(+), 76 deletions(-) diff --git a/src/coreclr/src/jit/codegencommon.cpp b/src/coreclr/src/jit/codegencommon.cpp index cadfd3e..22a050a 100644 --- a/src/coreclr/src/jit/codegencommon.cpp +++ b/src/coreclr/src/jit/codegencommon.cpp @@ -3375,8 +3375,6 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) { - noway_assert(parentVarDsc->lvFieldCnt == 1); // We only handle one field here - // For register arguments that are independent promoted structs we put the promoted field varNum in the // regArgTab[] if (varDsc->lvPromoted) @@ -3730,11 +3728,33 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere regNumber regNum = genMapRegArgNumToRegNum(argNum, regType); regNumber destRegNum = REG_NA; - if (regArgTab[argNum].slot == 1) + if (varTypeIsStruct(varDsc) && + (compiler->lvaGetPromotionType(varDsc) == Compiler::PROMOTION_TYPE_INDEPENDENT)) + { + assert(regArgTab[argNum].slot <= varDsc->lvFieldCnt); + LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + regArgTab[argNum].slot - 1); + destRegNum = fieldVarDsc->GetRegNum(); + } + else if (regArgTab[argNum].slot == 1) { destRegNum = varDsc->GetRegNum(); } -#if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(TARGET_64BIT) +#if defined(TARGET_ARM64) && defined(FEATURE_SIMD) + else if (varDsc->lvIsHfa()) + { + // This must be a SIMD type that's fully enregistered, but is passed as an HFA. + // Each field will be inserted into the same destination register. + assert(varTypeIsSIMD(varDsc) && + !compiler->isOpaqueSIMDType(varDsc->lvVerTypeInfo.GetClassHandle())); + assert(regArgTab[argNum].slot <= (int)varDsc->lvHfaSlots()); + assert(argNum > 0); + assert(regArgTab[argNum - 1].varNum == varNum); + regArgMaskLive &= ~genRegMask(regNum); + regArgTab[argNum].circular = false; + change = true; + continue; + } +#elif defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) else { assert(regArgTab[argNum].slot == 2); @@ -3747,7 +3767,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere change = true; continue; } -#elif !defined(TARGET_64BIT) +#endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) +#if !defined(TARGET_64BIT) else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG) { destRegNum = varDsc->GetOtherReg(); @@ -4447,23 +4468,55 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere destRegNum = regNum; } #endif // defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD) -#if defined(TARGET_ARM64) && defined(FEATURE_SIMD) - if (varTypeIsSIMD(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2) +#ifdef TARGET_ARMARCH + if (varDsc->lvIsHfa()) { - // For a SIMD type that is passed in two integer registers, - // Code above copies the first integer argument register into the lower 8 bytes - // of the target register. Here we must handle the second 8 bytes of the slot pair by - // inserting the second integer register into the upper 8 bytes of the target - // SIMD floating point register. - argRegCount = 2; - int nextArgNum = argNum + 1; - regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler)); - noway_assert(regArgTab[nextArgNum].varNum == varNum); - noway_assert(genIsValidIntReg(nextRegNum)); - noway_assert(genIsValidFloatReg(destRegNum)); - GetEmitter()->emitIns_R_R_I(INS_mov, EA_8BYTE, destRegNum, nextRegNum, 1); - } + // This includes both fixed-size SIMD types that are independently promoted, as well + // as other HFA structs. + argRegCount = varDsc->lvHfaSlots(); + if (argNum < (argMax - argRegCount + 1)) + { + if (compiler->lvaGetPromotionType(varDsc) == Compiler::PROMOTION_TYPE_INDEPENDENT) + { + // For an HFA type that is passed in multiple registers and promoted, we copy each field to its + // destination register. + for (int i = 0; i < argRegCount; i++) + { + int nextArgNum = argNum + i; + LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + i); + regNumber nextRegNum = + genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler)); + destRegNum = fieldVarDsc->GetRegNum(); + noway_assert(regArgTab[nextArgNum].varNum == varNum); + noway_assert(genIsValidFloatReg(nextRegNum)); + noway_assert(genIsValidFloatReg(destRegNum)); + GetEmitter()->emitIns_R_R(INS_mov, EA_8BYTE, destRegNum, nextRegNum); + } + } +#if defined(TARGET_ARM64) && defined(FEATURE_SIMD) + else + { + // For a SIMD type that is passed in multiple registers but enregistered as a vector, + // the code above copies the first argument register into the lower 4 or 8 bytes + // of the target register. Here we must handle the subsequent fields by + // inserting them into the upper bytes of the target SIMD floating point register. + argRegCount = varDsc->lvHfaSlots(); + for (int i = 1; i < argRegCount; i++) + { + int nextArgNum = argNum + i; + regArgElem* nextArgElem = ®ArgTab[nextArgNum]; + var_types nextArgType = nextArgElem->getRegType(compiler); + regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, nextArgType); + noway_assert(nextArgElem->varNum == varNum); + noway_assert(genIsValidFloatReg(nextRegNum)); + noway_assert(genIsValidFloatReg(destRegNum)); + GetEmitter()->emitIns_R_R_I_I(INS_mov, EA_4BYTE, destRegNum, nextRegNum, i, 0); + } + } #endif // defined(TARGET_ARM64) && defined(FEATURE_SIMD) + } + } +#endif // TARGET_ARMARCH // Mark the rest of the argument registers corresponding to this multi-reg type as // being processed and no longer live. diff --git a/src/coreclr/src/jit/lclvars.cpp b/src/coreclr/src/jit/lclvars.cpp index 101f31c..f75f7c6 100644 --- a/src/coreclr/src/jit/lclvars.cpp +++ b/src/coreclr/src/jit/lclvars.cpp @@ -1027,7 +1027,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) #else // !UNIX_AMD64_ABI compArgSize += argSize; #endif // !UNIX_AMD64_ABI - if (info.compIsVarArgs || (isHfaArg && varDsc->lvHfaSlots() != 1) || isSoftFPPreSpill) + if (info.compIsVarArgs || isSoftFPPreSpill) { #if defined(TARGET_X86) varDsc->SetStackOffset(compArgSize); @@ -1832,17 +1832,6 @@ bool Compiler::StructPromotionHelper::CanPromoteStructVar(unsigned lclNum) return false; } - // Explicitly check for HFA reg args and reject them for promotion here. - // Promoting HFA args will fire an assert in lvaAssignFrameOffsets - // when the HFA reg arg is struct promoted. - // - // TODO-PERF - Allow struct promotion for HFA register arguments - if (varDsc->lvIsHfaRegArg()) - { - JITDUMP(" struct promotion of V%02u is disabled because lvIsHfaRegArg()\n", lclNum); - return false; - } - if (!compiler->lvaEnregMultiRegVars && varDsc->lvIsMultiRegArgOrRet()) { JITDUMP(" struct promotion of V%02u is disabled because lvIsMultiRegArgOrRet()\n", lclNum); @@ -1862,18 +1851,55 @@ bool Compiler::StructPromotionHelper::CanPromoteStructVar(unsigned lclNum) bool canPromote = CanPromoteStructType(typeHnd); if (canPromote && varDsc->lvIsMultiRegArgOrRet()) { - if (structPromotionInfo.fieldCnt > MAX_MULTIREG_COUNT) + unsigned fieldCnt = structPromotionInfo.fieldCnt; + if (fieldCnt > MAX_MULTIREG_COUNT) { canPromote = false; } -#ifdef UNIX_AMD64_ABI +#if defined(FEATURE_SIMD) && defined(TARGET_ARMARCH) + else if (fieldCnt > 1) + { + // If we have a register-passed struct with mixed non-opaque SIMD types (i.e. with defined fields) + // and non-SIMD types, we don't currently handle that case in the prolog, so we can't promote. + for (unsigned i = 0; canPromote && (i < fieldCnt); i++) + { + if (varTypeIsStruct(structPromotionInfo.fields[i].fldType) && + !compiler->isOpaqueSIMDType(structPromotionInfo.fields[i].fldTypeHnd)) + { + canPromote = false; + } + } + } +#elif defined(UNIX_AMD64_ABI) else { SortStructFields(); - if ((structPromotionInfo.fieldCnt == 2) && (structPromotionInfo.fields[1].fldOffset != TARGET_POINTER_SIZE)) + // Only promote if the field types match the registers. + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc); + unsigned regCount = structDesc.eightByteCount; + if (structPromotionInfo.fieldCnt != regCount) { canPromote = false; } + else + { + for (unsigned i = 0; canPromote && (i < regCount); i++) + { + lvaStructFieldInfo* fieldInfo = &(structPromotionInfo.fields[i]); + var_types fieldType = fieldInfo->fldType; + // We don't currently support passing SIMD types in registers. + if (varTypeIsSIMD(fieldType)) + { + canPromote = false; + } + else if (varTypeUsesFloatReg(fieldType) != + (structDesc.eightByteClassifications[i] == SystemVClassificationTypeSSE)) + { + canPromote = false; + } + } + } } #endif // UNIX_AMD64_ABI } @@ -1948,7 +1974,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) shouldPromote = false; } #endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM - else if (varDsc->lvIsParam && !compiler->lvaIsImplicitByRefLocal(lclNum)) + else if (varDsc->lvIsParam && !compiler->lvaIsImplicitByRefLocal(lclNum) && !varDsc->lvIsHfa()) { #if FEATURE_MULTIREG_STRUCT_PROMOTE // Is this a variable holding a value with exactly two fields passed in @@ -2250,25 +2276,49 @@ void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum) // Reset the implicitByRef flag. fieldVarDsc->lvIsImplicitByRef = 0; +#endif + +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) + // Do we have a parameter that can be enregistered? // if (varDsc->lvIsRegArg) { fieldVarDsc->lvIsRegArg = true; - fieldVarDsc->SetArgReg(varDsc->GetArgReg()); -#if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) - if (varTypeIsSIMD(fieldVarDsc) && !compiler->lvaIsImplicitByRefLocal(lclNum)) + regNumber parentArgReg = varDsc->GetArgReg(); +#if FEATURE_MULTIREG_ARGS + if (!compiler->lvaIsImplicitByRefLocal(lclNum)) { - // This field is a SIMD type, and will be considered to be passed in multiple registers - // if the parent struct was. Note that this code relies on the fact that if there is - // a SIMD field of an enregisterable struct, it is the only field. - // We will assert that, in case future changes are made to the ABI. - assert(varDsc->lvFieldCnt == 1); - fieldVarDsc->SetOtherArgReg(varDsc->GetOtherArgReg()); +#ifdef UNIX_AMD64_ABI + if (varTypeIsSIMD(fieldVarDsc) && (varDsc->lvFieldCnt == 1)) + { + // This SIMD typed field may be passed in multiple registers. + fieldVarDsc->SetArgReg(parentArgReg); + fieldVarDsc->SetOtherArgReg(varDsc->GetOtherArgReg()); + } + else +#endif // UNIX_AMD64_ABI + { + // TODO: Need to determine if/how to handle split args. + // TODO: Assert that regs are always sequential. + unsigned regIncrement = fieldVarDsc->lvFldOrdinal; +#ifdef TARGET_ARM + if (varDsc->lvIsHfa() && (varDsc->GetHfaType() == TYP_DOUBLE)) + { + regIncrement = (fieldVarDsc->lvFldOrdinal * 2); + } +#endif // TARGET_ARM + regNumber fieldRegNum = (regNumber)(parentArgReg + regIncrement); + fieldVarDsc->SetArgReg(fieldRegNum); + } } + else #endif // FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) + { + fieldVarDsc->SetArgReg(parentArgReg); + } } -#endif +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) #ifdef FEATURE_SIMD if (varTypeIsSIMD(pFieldInfo->fldType)) @@ -5142,10 +5192,11 @@ bool Compiler::lvaIsPreSpilled(unsigned lclNum, regMaskTP preSpillMask) #endif // TARGET_ARM //------------------------------------------------------------------------ -// UpdateLifeFieldVar: Update live sets for only the given field of a multi-reg LclVar node. +// lvaUpdateArgWithInitialReg: Set the initial register of a local variable +// to the one assigned by the register allocator. // // Arguments: -// lclNode - the GT_LCL_VAR node. +// varDsc - the local variable descriptor // void Compiler::lvaUpdateArgWithInitialReg(LclVarDsc* varDsc) { @@ -5174,15 +5225,14 @@ void Compiler::lvaUpdateArgsWithInitialReg() if (varDsc->lvPromotedStruct()) { - noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here - - unsigned fieldVarNum = varDsc->lvFieldLclStart; - varDsc = lvaTable + fieldVarNum; + for (unsigned fieldVarNum = varDsc->lvFieldLclStart; + fieldVarNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++fieldVarNum) + { + LclVarDsc* fieldVarDsc = lvaGetDesc(fieldVarNum); + lvaUpdateArgWithInitialReg(fieldVarDsc); + } } - - noway_assert(varDsc->lvIsParam); - - if (varDsc->lvIsRegCandidate()) + else { lvaUpdateArgWithInitialReg(varDsc); } @@ -5464,12 +5514,6 @@ int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, offset += fieldVarDsc->lvFldOffset; } } - // For an independent promoted struct field we also assign the parent struct stack offset - else if (varDsc->lvIsStructField) - { - noway_assert(varDsc->lvParentLcl < lvaCount); - lvaTable[varDsc->lvParentLcl].SetStackOffset(varDsc->GetStackOffset()); - } if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg) { @@ -6403,23 +6447,24 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // Reserve the stack space for this variable stkOffs = lvaAllocLocalAndSetVirtualOffset(lclNum, lvaLclSize(lclNum), stkOffs); -#ifdef TARGET_ARM64 +#ifdef TARGET_ARMARCH // If we have an incoming register argument that has a struct promoted field // then we need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar // if (varDsc->lvIsRegArg && varDsc->lvPromotedStruct()) { - noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here - - unsigned fieldVarNum = varDsc->lvFieldLclStart; - lvaTable[fieldVarNum].SetStackOffset(varDsc->GetStackOffset()); + unsigned firstFieldNum = varDsc->lvFieldLclStart; + for (unsigned i = 0; i < varDsc->lvFieldCnt; i++) + { + LclVarDsc* fieldVarDsc = lvaGetDesc(firstFieldNum + i); + fieldVarDsc->SetStackOffset(varDsc->GetStackOffset() + fieldVarDsc->lvFldOffset); + } } -#endif // TARGET_ARM64 #ifdef TARGET_ARM // If we have an incoming register argument that has a promoted long // then we need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar // - if (varDsc->lvIsRegArg && varDsc->lvPromoted) + else if (varDsc->lvIsRegArg && varDsc->lvPromoted) { assert(varTypeIsLong(varDsc) && (varDsc->lvFieldCnt == 2)); @@ -6428,6 +6473,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() lvaTable[fieldVarNum + 1].SetStackOffset(varDsc->GetStackOffset() + 4); } #endif // TARGET_ARM +#endif // TARGET_ARM64 } } diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index 146a956..7b581f2 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -2180,15 +2180,24 @@ void LinearScan::buildIntervals() if (argDsc->lvPromotedStruct()) { - noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here - - unsigned fieldVarNum = argDsc->lvFieldLclStart; - argDsc = &(compiler->lvaTable[fieldVarNum]); + for (unsigned fieldVarNum = argDsc->lvFieldLclStart; + fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum) + { + LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldVarNum); + noway_assert(fieldVarDsc->lvIsParam); + if (!fieldVarDsc->lvTracked && fieldVarDsc->lvIsRegArg) + { + updateRegStateForArg(fieldVarDsc); + } + } } - noway_assert(argDsc->lvIsParam); - if (!argDsc->lvTracked && argDsc->lvIsRegArg) + else { - updateRegStateForArg(argDsc); + noway_assert(argDsc->lvIsParam); + if (!argDsc->lvTracked && argDsc->lvIsRegArg) + { + updateRegStateForArg(argDsc); + } } } diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index d4d97fc..08b7bf03 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -4214,6 +4214,10 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) { structSize = argx->AsObj()->GetLayout()->GetSize(); } + else if (varTypeIsSIMD(argx)) + { + structSize = genTypeSize(argx); + } else { assert(argx->OperIs(GT_LCL_VAR)); @@ -10509,6 +10513,10 @@ GenTree* Compiler::fgMorphCopyBlock(GenTree* tree) JITDUMP(" not morphing a multireg call return\n"); return tree; } + else if (dest->IsMultiRegLclVar() && !src->IsMultiRegNode()) + { + dest->AsLclVar()->ClearMultiReg(); + } #endif // FEATURE_MULTIREG_RET if (src->IsCall() && !compDoOldStructRetyping()) @@ -17583,6 +17591,11 @@ void Compiler::fgPromoteStructs() lvaStructPromotionInfo structPromotionInfo; bool tooManyLocalsReported = false; + // Clear the structPromotionHelper, since it is used during inlining, at which point it + // may be conservative about looking up SIMD info. + // We don't want to preserve those conservative decisions for the actual struct promotion. + structPromotionHelper->Clear(); + for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++) { // Whether this var got promoted -- 2.7.4