From ebe11caae43041522d990bf9a5c0b00cc909f72d Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 12 Apr 2023 12:13:28 -0700 Subject: [PATCH] Allow promotion of structs with single float fields (#84627) * Allow promotion of structs with single float fields * Ensure x86 works for storing small types * Apply formatting patch --- src/coreclr/jit/codegencommon.cpp | 97 +++++++++++++++------------------------ src/coreclr/jit/lclvars.cpp | 33 ++----------- 2 files changed, 42 insertions(+), 88 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index bbb324a..0133175 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -2917,50 +2917,18 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere // struct regArgElem { - unsigned varNum; // index into compiler->lvaTable[] for this register argument -#if defined(UNIX_AMD64_ABI) - var_types type; // the Jit type of this regArgTab entry -#endif // defined(UNIX_AMD64_ABI) - unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register. - // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to - // argument register number 'x'. Only used when circular = true. - char slot; // 0 means the register is not used for a register argument - // 1 means the first part of a register argument - // 2, 3 or 4 means the second,third or fourth part of a multireg argument - bool stackArg; // true if the argument gets homed to the stack - bool writeThru; // true if the argument gets homed to both stack and register - bool processed; // true after we've processed the argument (and it is in its final location) - bool circular; // true if this register participates in a circular dependency loop. - -#ifdef UNIX_AMD64_ABI - - // For UNIX AMD64 struct passing, the type of the register argument slot can differ from - // the type of the lclVar in ways that are not ascertainable from lvType. - // So, for that case we retain the type of the register in the regArgTab. - - var_types getRegType(Compiler* compiler) - { - return type; // UNIX_AMD64 implementation - } - -#else // !UNIX_AMD64_ABI - - // In other cases, we simply use the type of the lclVar to determine the type of the register. - var_types getRegType(Compiler* compiler) - { - const LclVarDsc* varDsc = compiler->lvaGetDesc(varNum); - // Check if this is an HFA register arg and return the HFA type - if (varDsc->lvIsHfaRegArg()) - { - // Cannot have hfa types on windows arm targets - // in vararg methods. - assert(!TargetOS::IsWindows || !compiler->info.compIsVarArgs); - return varDsc->GetHfaType(); - } - return compiler->mangleVarArgsType(varDsc->lvType); - } - -#endif // !UNIX_AMD64_ABI + unsigned varNum; // index into compiler->lvaTable[] for this register argument + var_types type; // the Jit type of this regArgTab entry + unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register. + // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to + // argument register number 'x'. Only used when circular = true. + char slot; // 0 means the register is not used for a register argument + // 1 means the first part of a register argument + // 2, 3 or 4 means the second,third or fourth part of a multireg argument + bool stackArg; // true if the argument gets homed to the stack + bool writeThru; // true if the argument gets homed to both stack and register + bool processed; // true after we've processed the argument (and it is in its final location) + bool circular; // true if this register participates in a circular dependency loop. } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {}; unsigned varNum; @@ -3034,12 +3002,26 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere if (!varTypeIsStruct(regType)) #endif // defined(UNIX_AMD64_ABI) { - // A struct might be passed partially in XMM register for System V calls. - // So a single arg might use both register files. - if (emitter::isFloatReg(varDsc->GetArgReg()) != doingFloat) + bool isFloatReg = emitter::isFloatReg(varDsc->GetArgReg()); + + if (isFloatReg != doingFloat) { + // A struct might be passed partially in XMM register for System V calls. + // So a single arg might use both register files. continue; } + else if (isFloatReg != varTypeUsesFloatArgReg(regType)) + { + if (regType == TYP_FLOAT) + { + regType = TYP_INT; + } + else + { + assert(regType == TYP_DOUBLE); + regType = TYP_LONG; + } + } } int slots = 0; @@ -3187,15 +3169,13 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere regArgTab[regArgNum + i].varNum = varNum; regArgTab[regArgNum + i].slot = static_cast(i + 1); -#if defined(UNIX_AMD64_ABI) regArgTab[regArgNum + i].type = regType; // Set the register type. -#endif // defined(UNIX_AMD64_ABI) } } for (int i = 0; i < slots; i++) { - regType = regArgTab[regArgNum + i].getRegType(compiler); + regType = regArgTab[regArgNum + i].type; regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType); #if !defined(UNIX_AMD64_ABI) @@ -3335,7 +3315,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere noway_assert(varDsc->lvIsInReg()); noway_assert(!regArgTab[argNum].stackArg); - var_types regType = regArgTab[argNum].getRegType(compiler); + var_types regType = regArgTab[argNum].type; regNumber regNum = genMapRegArgNumToRegNum(argNum, regType); regNumber destRegNum = REG_NA; @@ -3534,7 +3514,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere } else // Not a struct type { - storeType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet())); + storeType = genActualType(regArgTab[argNum].type); } size = emitActualTypeSize(storeType); #ifdef TARGET_X86 @@ -3861,7 +3841,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere varNum = regArgTab[argNum].varNum; varDsc = compiler->lvaGetDesc(varNum); - const var_types regType = regArgTab[argNum].getRegType(compiler); + const var_types regType = regArgTab[argNum].type; const regNumber regNum = genMapRegArgNumToRegNum(argNum, regType); const var_types varRegType = varDsc->GetRegisterType(); @@ -4025,7 +4005,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere { argRegCount = 2; int nextArgNum = argNum + 1; - regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler)); + regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type); noway_assert(regArgTab[nextArgNum].varNum == varNum); // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg // and moves the 0th element of the src reg into the 1st element of the dest reg. @@ -4051,9 +4031,8 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere { int nextArgNum = argNum + i; LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + i); - regNumber nextRegNum = - genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler)); - destRegNum = fieldVarDsc->GetRegNum(); + regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type); + destRegNum = fieldVarDsc->GetRegNum(); noway_assert(regArgTab[nextArgNum].varNum == varNum); noway_assert(genIsValidFloatReg(nextRegNum)); noway_assert(genIsValidFloatReg(destRegNum)); @@ -4072,7 +4051,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere { int nextArgNum = argNum + i; regArgElem* nextArgElem = ®ArgTab[nextArgNum]; - var_types nextArgType = nextArgElem->getRegType(compiler); + var_types nextArgType = nextArgElem->type; regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, nextArgType); noway_assert(nextArgElem->varNum == varNum); noway_assert(genIsValidFloatReg(nextRegNum)); @@ -4092,7 +4071,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere int nextArgNum = argNum + regSlot; assert(!regArgTab[nextArgNum].processed); regArgTab[nextArgNum].processed = true; - regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler)); + regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].type); regArgMaskLive &= ~genRegMask(nextRegNum); } #endif // FEATURE_MULTIREG_ARGS diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index eb856cf..ebaa816 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -2119,24 +2119,6 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) JITDUMP("Not promoting multi-reg returned struct local V%02u with holes.\n", lclNum); shouldPromote = false; } -#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || \ - defined(TARGET_RISCV64) - // TODO-PERF - Only do this when the LclVar is used in an argument context - // TODO-ARM64 - HFA support should also eliminate the need for this. - // TODO-ARM32 - HFA support should also eliminate the need for this. - // TODO-LSRA - Currently doesn't support the passing of floating point LCL_VARS in the integer registers - // - // For now we currently don't promote structs with a single float field - // Promoting it can cause us to shuffle it back and forth between the int and - // the float regs when it is used as a argument, which is very expensive for XARCH - // - else if ((structPromotionInfo.fieldCnt == 1) && varTypeIsFloating(structPromotionInfo.fields[0].fldType)) - { - JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with " - "single float field.\n", - lclNum, structPromotionInfo.fieldCnt); - shouldPromote = false; - } #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) else if ((structPromotionInfo.fieldCnt == 2) && (varTypeIsFloating(structPromotionInfo.fields[0].fldType) || varTypeIsFloating(structPromotionInfo.fields[1].fldType))) @@ -2147,8 +2129,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) lclNum, structPromotionInfo.fieldCnt); shouldPromote = false; } -#endif -#endif // TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_LOONGARCH64 || TARGET_RISCV64 +#endif // TARGET_LOONGARCH64 || TARGET_RISCV64 else if (varDsc->lvIsParam && !compiler->lvaIsImplicitByRefLocal(lclNum) && !varDsc->lvIsHfa()) { #if FEATURE_MULTIREG_STRUCT_PROMOTE @@ -2315,20 +2296,14 @@ bool Compiler::StructPromotionHelper::TryPromoteStructField(lvaStructFieldInfo& var_types fieldVarType = JITtype2varType(fieldCorType); unsigned fieldSize = genTypeSize(fieldVarType); - // Do not promote if the field is not a primitive type, is floating-point, - // or is not properly aligned. - // - // TODO-PERF: Structs containing a single floating-point field on Amd64 - // need to be passed in integer registers. Right now LSRA doesn't support - // passing of floating-point LCL_VARS in integer registers. Enabling promotion - // of such structs results in an assert in lsra right now. + // Do not promote if the field is not a primitive type or is not properly aligned. // // TODO-CQ: Right now we only promote an actual SIMD typed field, which would cause // a nested SIMD type to fail promotion. - if (fieldSize == 0 || fieldSize > TARGET_POINTER_SIZE || varTypeIsFloating(fieldVarType)) + if (fieldSize == 0 || fieldSize > TARGET_POINTER_SIZE) { JITDUMP("Promotion blocked: struct contains struct field with one field," - " but that field has invalid size or type.\n"); + " but that field has invalid size.\n"); return false; } -- 2.7.4