From: jashook Date: Mon, 5 Jun 2017 21:48:51 +0000 (-0700) Subject: [Unix x64|Arm64] Correct canfastTailCall decisions X-Git-Tag: accepted/tizen/base/20180629.140029~670^2~281^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ee95d7c5f552dcfc1b69f8ac2567c4afda40695e;p=platform%2Fupstream%2Fcoreclr.git [Unix x64|Arm64] Correct canfastTailCall decisions This will change how the fastTailCall decision is made for x64 unix and arm64. Before this change the decision was based on the amount of incoming and outgoing caller arguments like on Windows. This was incorrect on Unix x64 and Arm64 because one argument does not translate to one register or one stack slot use. Before this change structs on Arm64 and Amd64 Unix could pessimize when we could fastTailCall if they were engregisterable and took more than one register. This change also fixes several cases when determining to fastTailCall. It fixes #12479 and will cause a no fastTailCalls decisions for case #12468. In addition this change adds several regression cases for #12479 and #12468. It includes more logging ofr fastTailCall decisions, including a new COMPlus variable named COMPlus_JitReportFastTailCallDecisions, which can be toggled with COMPlus_JitReportFastTailCallDecisions=1. --- diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 2c6f9b2..64e5c4f 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -722,6 +722,8 @@ public: return (unsigned)(roundUp(lvExactSize, TARGET_POINTER_SIZE)); } + const size_t lvArgStackSize() const; + unsigned lvSlotNum; // original slot # (if remapped) typeInfo lvVerTypeInfo; // type info needed for verification @@ -8285,7 +8287,14 @@ public: var_types compRetNativeType; // Normalized return type as per target arch ABI unsigned compILargsCount; // Number of arguments (incl. implicit but not hidden) unsigned compArgsCount; // Number of arguments (incl. implicit and hidden) - unsigned compRetBuffArg; // position of hidden return param var (0, 1) (BAD_VAR_NUM means not present); + +#if FEATURE_FASTTAILCALL + unsigned compArgRegCount; // Number of incoming integer argument registers used for incoming arguments + unsigned compFloatArgRegCount; // Number of incoming floating argument registers used for incoming arguments + size_t compArgStackSize; // Incoming argument stack size in bytes +#endif // FEATURE_FASTTAILCALL + + unsigned compRetBuffArg; // position of hidden return param var (0, 1) (BAD_VAR_NUM means not present); int compTypeCtxtArg; // position of hidden param for type context for generic code (CORINFO_CALLCONV_PARAMTYPE) unsigned compThisArg; // position of implicit this pointer param (not to be confused with lvaArg0Var) unsigned compILlocalsCount; // Number of vars : args + locals (incl. implicit but not hidden) diff --git a/src/jit/jitconfigvalues.h b/src/jit/jitconfigvalues.h index 2db5dfe..cb50365 100644 --- a/src/jit/jitconfigvalues.h +++ b/src/jit/jitconfigvalues.h @@ -94,6 +94,7 @@ CONFIG_INTEGER(JitNoRegLoc, W("JitNoRegLoc"), 0) CONFIG_INTEGER(JitNoStructPromotion, W("JitNoStructPromotion"), 0) // Disables struct promotion in Jit32 CONFIG_INTEGER(JitNoUnroll, W("JitNoUnroll"), 0) CONFIG_INTEGER(JitOrder, W("JitOrder"), 0) +CONFIG_INTEGER(JitReportFastTailCallDecisions, W("JitReportFastTailCallDecisions"), 0) CONFIG_INTEGER(JitPInvokeCheckEnabled, W("JITPInvokeCheckEnabled"), 0) CONFIG_INTEGER(JitPInvokeEnabled, W("JITPInvokeEnabled"), 1) CONFIG_INTEGER(JitPrintInlinedMethods, W("JitPrintInlinedMethods"), 0) diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index fe8aaac..b469487 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -235,6 +235,60 @@ void Compiler::lvaInitTypeRef() lvaInitArgs(&varDscInfo); +#if FEATURE_FASTTAILCALL + + //------------------------------------------------------------------------- + // Calculate the argument register usage. + // + // This will later be used for fastTailCall determination + //------------------------------------------------------------------------- + + unsigned argRegCount = 0; + unsigned floatingRegCount = 0; + size_t stackSize = 0; + + auto incrementRegCount = [&floatingRegCount, &argRegCount](LclVarDsc* varDsc) { + if (varDsc->lvIsHfa()) + { + floatingRegCount += varDsc->lvHfaSlots(); + } + else + { + varDsc->IsFloatRegType() ? ++floatingRegCount : ++argRegCount; + } + }; + + unsigned argNum; + LclVarDsc* curDsc; + + for (curDsc = lvaTable, argNum = 0; argNum < varDscInfo.varNum; argNum++, curDsc++) + { + if (curDsc->lvIsRegArg) + { + incrementRegCount(curDsc); +#if FEATURE_MULTIREG_ARGS + if (curDsc->lvOtherArgReg != REG_NA) + { + incrementRegCount(curDsc); + } +#endif // FEATURE_MULTIREG_ARGS + } + else + { + stackSize += curDsc->lvArgStackSize(); + } + } + + //------------------------------------------------------------------------- + // Save the register usage information and stack size. + //------------------------------------------------------------------------- + + info.compArgRegCount = argRegCount; + info.compFloatArgRegCount = floatingRegCount; + info.compArgStackSize = stackSize; + +#endif // FEATURE_FASTTAILCALL + //------------------------------------------------------------------------- // Finally the local variables //------------------------------------------------------------------------- @@ -247,15 +301,16 @@ void Compiler::lvaInitTypeRef() i++, varNum++, varDsc++, localsSig = info.compCompHnd->getArgNext(localsSig)) { CORINFO_CLASS_HANDLE typeHnd; - CorInfoTypeWithMod corInfoType = + CorInfoTypeWithMod corInfoTypeWithMod = info.compCompHnd->getArgType(&info.compMethodInfo->locals, localsSig, &typeHnd); + CorInfoType corInfoType = strip(corInfoTypeWithMod); - lvaInitVarDsc(varDsc, varNum, strip(corInfoType), typeHnd, localsSig, &info.compMethodInfo->locals); + lvaInitVarDsc(varDsc, varNum, corInfoType, typeHnd, localsSig, &info.compMethodInfo->locals); - varDsc->lvPinned = ((corInfoType & CORINFO_TYPE_MOD_PINNED) != 0); + varDsc->lvPinned = ((corInfoTypeWithMod & CORINFO_TYPE_MOD_PINNED) != 0); varDsc->lvOnFrame = true; // The final home for this local variable might be our local stack frame - if (strip(corInfoType) == CORINFO_TYPE_CLASS) + if (corInfoType == CORINFO_TYPE_CLASS) { CORINFO_CLASS_HANDLE clsHnd = info.compCompHnd->getArgClass(&info.compMethodInfo->locals, localsSig); lvaSetClass(varNum, clsHnd); @@ -1253,6 +1308,10 @@ void Compiler::lvaInitVarDsc(LclVarDsc* varDsc, #ifdef DEBUG varDsc->lvStkOffs = BAD_STK_OFFS; #endif + +#if FEATURE_MULTIREG_ARGS + varDsc->lvOtherArgReg = REG_NA; +#endif // FEATURE_MULTIREG_ARGS } /***************************************************************************** @@ -3450,6 +3509,48 @@ void LclVarDsc::lvaDisqualifyVar() } #endif // ASSERTION_PROP +/********************************************************************************** +* Get stack size of the varDsc. +*/ +const size_t LclVarDsc::lvArgStackSize() const +{ + // Make sure this will have a stack size + assert(!this->lvIsRegArg); + + size_t stackSize = 0; + if (varTypeIsStruct(this)) + { +#if defined(WINDOWS_AMD64_ABI) + // Structs are either passed by reference or can be passed by value using one pointer + stackSize = TARGET_POINTER_SIZE; +#elif defined(_TARGET_ARM64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + // lvSize performs a roundup. + stackSize = this->lvSize(); + +#if defined(_TARGET_ARM64_) + if ((stackSize > TARGET_POINTER_SIZE * 2) && (!this->lvIsHfa())) + { + // If the size is greater than 16 bytes then it will + // be passed by reference. + stackSize = TARGET_POINTER_SIZE; + } +#endif // defined(_TARGET_ARM64_) + +#else // !_TARGET_ARM64_ !WINDOWS_AMD64_ABI !FEATURE_UNIX_AMD64_STRUCT_PASSING + + NYI("Unsupported target."); + unreached(); + +#endif // !_TARGET_ARM64_ !WINDOWS_AMD64_ABI !FEATURE_UNIX_AMD64_STRUCT_PASSING + } + else + { + stackSize = TARGET_POINTER_SIZE; + } + + return stackSize; +} + #ifndef LEGACY_BACKEND /********************************************************************************** * Get type of a variable when passed as an argument. diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 2dd18a2..947ac71 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -7001,15 +7001,127 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) #endif } -/***************************************************************************** - * - * Performs checks to see if this tail call can be optimized as epilog+jmp. - */ +//------------------------------------------------------------------------ +// fgCanFastTailCall: Check to see if this tail call can be optimized as epilog+jmp. +// +// Arguments: +// callee - The callee to check +// +// Return Value: +// Returns true or false based on whether the callee can be fastTailCalled +// +// Notes: +// This function is target specific and each target will make the fastTailCall +// decision differently. See the notes below. +// +// +// Windows Amd64: +// A fast tail call can be made whenever the number of callee arguments +// is larger than or equal to the number of caller arguments, or we have four +// or fewer callee arguments. This is because, on Windows AMD64, each +// argument uses exactly one register or one 8-byte stack slot. Thus, we only +// need to count arguments, and not be concerned with the size of each +// incoming or outgoing argument. +// +// Can fast tail call examples (amd64 Windows): +// +// -- Callee will have all register arguments -- +// caller(int, int, int, int) +// callee(int, int, float, int) +// +// -- Callee requires stack space that is equal to the caller -- +// caller(struct, struct, struct, struct, struct, struct) +// callee(int, int, int, int, int, int) +// +// -- Callee requires stack space that is less than the caller -- +// caller(struct, double, struct, float, struct, struct) +// callee(int, int, int, int, int) +// +// -- Callee will have all register arguments -- +// caller(int) +// callee(int, int, int, int) +// +// Cannot fast tail call examples (amd64 Windows): +// +// -- Callee requires stack space that is larger than the caller -- +// caller(struct, double, struct, float, struct, struct) +// callee(int, int, int, int, int, double, double, double) +// +// Unix Amd64 && Arm64: +// A fastTailCall decision can be made whenever the callee's stack space is +// less than or equal to the caller's stack space. There are many permutations +// of when the caller and callee have different stack sizes if there are +// structs being passed to either the caller or callee. +// +// Exceptions: +// 1) If the callee has structs which cannot be enregistered it will be +// reported as cannot fast tail call. This is an implementation limitation +// where the callee only is checked for non enregisterable structs. This is +// tracked with https://github.com/dotnet/coreclr/issues/12644. +// +// 2) If the caller or callee has stack arguments and the callee has more +// arguments then the caller it will be reported as cannot fast tail call. +// This is due to a bug in LowerFastTailCall which assumes that +// nCalleeArgs <= nCallerArgs, which is always true on Windows Amd64. This +// is tracked with https://github.com/dotnet/coreclr/issues/12468. +// +// 3) If the callee has a 9 to 16 byte struct argument and the callee has +// stack arguments, the decision will be to not fast tail call. This is +// because before fgMorphArgs is done, the struct is unknown whether it +// will be placed on the stack or enregistered. Therefore, the conservative +// decision of do not fast tail call is taken. This limitations should be +// removed if/when fgMorphArgs no longer depends on fgCanFastTailCall. +// +// 4) Arm64 Only, if there are HFA arguments and the callee has stack +// arguments, the decision will be reported as cannot fast tail call. +// This is because before fgMorphArgs is done, the struct is unknown whether it +// will be placed on the stack or enregistered. Therefore, the conservative +// decision of do not fast tail call is taken. +// +// Can fast tail call examples (amd64 Unix): +// +// -- Callee will have all register arguments -- +// caller(int, int, int, int) +// callee(int, int, float, int) +// +// -- Callee requires stack space that is equal to the caller -- +// caller({ int, int }, { int, int }, { int }, { int }, { int }, { int }) -- 6 int register arguments, 16 byte stack +// space +// callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space +// +// -- Callee requires stack space that is less than the caller -- +// caller({ int, int }, int, { int, int }, int, { int, int }, { int, int }) 6 int register arguments, 32 byte stack +// space +// callee(int, int, int, int, int, int, { int, int } ) // 6 int register arguments, 16 byte stack space +// +// -- Callee will have all register arguments -- +// caller(int) +// callee(int, int, int, int) +// +// Cannot fast tail call examples (amd64 Unix): +// +// -- Callee requires stack space that is larger than the caller -- +// caller(float, float, float, float, float, float, float, float) -- 8 float register arguments +// callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space +// +// -- Callee has structs which cannot be enregistered (Implementation Limitation) -- +// caller(float, float, float, float, float, float, float, float, { double, double, double }) -- 8 float register +// arguments, 24 byte stack space +// callee({ double, double, double }) -- 24 bytes stack space +// +// -- Callee requires stack space and has a struct argument >8 bytes and <16 bytes (Implementation Limitation) -- +// caller(int, int, int, int, int, int, { double, double, double }) -- 6 int register arguments, 24 byte stack space +// callee(int, int, int, int, int, int, { int, int }) -- 6 int registers, 16 byte stack space +// +// -- Caller requires stack space and nCalleeArgs > nCallerArgs (Bug) -- +// caller({ double, double, double, double, double, double }) // 48 byte stack +// callee(int, int) -- 2 int registers + bool Compiler::fgCanFastTailCall(GenTreeCall* callee) { #if FEATURE_FASTTAILCALL - // Reached here means that return types of caller and callee are tail call compatible. - // In case of structs that can be returned in a register, compRetNativeType is set to the actual return type. + // To reach here means that the return types of the caller and callee are tail call compatible. + // In the case of structs that can be returned in a register, compRetNativeType is set to the actual return type. // // In an implicit tail call case callSig may not be available but it is guaranteed to be available // for explicit tail call cases. The reason implicit tail case callSig may not be available is that @@ -7026,6 +7138,42 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) } #endif + auto reportFastTailCallDecision = [this, callee](const char* msg, size_t callerStackSize, size_t calleeStackSize) { +#if DEBUG + if ((JitConfig.JitReportFastTailCallDecisions()) == 1) + { + if (callee->gtCallType != CT_INDIRECT) + { + const char* methodName; + + methodName = eeGetMethodFullName(callee->gtCallMethHnd); + + printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: %s -- Decision: ", + info.compFullName, methodName); + } + else + { + printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: IndirectCall -- " + "Decision: ", + info.compFullName); + } + + if (callerStackSize != -1) + { + printf("%s (CallerStackSize: %d, CalleeStackSize: %d)\n\n", msg, callerStackSize, calleeStackSize); + } + else + { + printf("%s\n\n", msg); + } + } + else + { + JITDUMP("[Fast tailcall decision]: %s\n", msg); + } +#endif // DEBUG + }; + // Note on vararg methods: // If the caller is vararg method, we don't know the number of arguments passed by caller's caller. // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its @@ -7033,27 +7181,31 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // out-going area required for callee is bounded by caller's fixed argument space. // // Note that callee being a vararg method is not a problem since we can account the params being passed. - - // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie) unsigned nCallerArgs = info.compArgsCount; + size_t callerArgRegCount = info.compArgRegCount; + size_t callerFloatArgRegCount = info.compFloatArgRegCount; + // Count the callee args including implicit and hidden. // Note that GenericContext and VarargCookie are added by importer while // importing the call to gtCallArgs list along with explicit user args. - unsigned nCalleeArgs = 0; + size_t calleeArgRegCount = 0; + size_t calleeFloatArgRegCount = 0; + if (callee->gtCallObjp) // thisPtr { - nCalleeArgs++; + ++calleeArgRegCount; } if (callee->HasRetBufArg()) // RetBuf { - nCalleeArgs++; + ++calleeArgRegCount; // If callee has RetBuf param, caller too must have it. // Otherwise go the slow route. if (info.compRetBuffArg == BAD_VAR_NUM) { + reportFastTailCallDecision("Callee has RetBuf but caller does not.", 0, 0); return false; } } @@ -7062,11 +7214,14 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // that cannot be passed in a register. Note that we don't need to count // non-standard and secret params passed in registers (e.g. R10, R11) since // these won't contribute to out-going arg size. - bool hasMultiByteArgs = false; - for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2) - { - nCalleeArgs++; - + bool hasMultiByteStackArgs = false; + bool hasTwoSlotSizedStruct = false; + bool hasHfaArg = false; + size_t nCalleeArgs = calleeArgRegCount; // Keep track of how many args we have. + size_t calleeStackSize = 0; + for (GenTreePtr args = callee->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2) + { + ++nCalleeArgs; assert(args->OperIsList()); GenTreePtr argx = args->gtOp.gtOp1; @@ -7093,24 +7248,85 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) { #if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) - unsigned typeSize = 0; - hasMultiByteArgs = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false); + // hasMultiByteStackArgs will determine if the struct can be passed + // in registers. If it cannot we will break the loop and not + // fastTailCall. This is an implementation limitation + // where the callee only is checked for non enregisterable structs. + // It is tracked with https://github.com/dotnet/coreclr/issues/12644. + unsigned typeSize = 0; + hasMultiByteStackArgs = hasMultiByteStackArgs || + !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false); + +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; + + assert(objClass != nullptr); + eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc); + + if (structDesc.passedInRegisters) + { + if (structDesc.eightByteCount == 2) + { + hasTwoSlotSizedStruct = true; + } + + for (unsigned int i = 0; i < structDesc.eightByteCount; i++) + { + if (structDesc.IsIntegralSlot(i)) + { + ++calleeArgRegCount; + } + else if (structDesc.IsSseSlot(i)) + { + ++calleeFloatArgRegCount; + } + else + { + assert(false && "Invalid eightbyte classification type."); + break; + } + } + } + else + { + calleeStackSize += roundUp(typeSize, TARGET_POINTER_SIZE); + } + +#elif defined(_TARGET_ARM64_) // ARM64 + var_types hfaType = GetHfaType(argx); + bool isHfaArg = varTypeIsFloating(hfaType); + size_t size = 1; -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_) - // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers. - // Account for the second eightbyte in the nCalleeArgs. - // https://github.com/dotnet/coreclr/issues/2666 - // TODO-CQ-Amd64-Unix/arm64: Structs of size between 9 to 16 bytes are conservatively estimated - // as two args, since they need two registers whereas nCallerArgs is - // counting such an arg as one. This would mean we will not be optimizing - // certain calls though technically possible. + if (isHfaArg) + { + hasHfaArg = true; - if (typeSize > TARGET_POINTER_SIZE) + calleeFloatArgRegCount += GetHfaCount(argx); + } + else { - unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE); - nCalleeArgs += extraArgRegsToAdd; + // Structs are either passed in 1 or 2 (64-bit) slots + size_t roundupSize = roundUp(typeSize, TARGET_POINTER_SIZE); + size = roundupSize / TARGET_POINTER_SIZE; + + if (size > 2) + { + size = 1; + } + + else if (size == 2) + { + hasTwoSlotSizedStruct = true; + } + + calleeArgRegCount += size; } -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_ + +#elif defined(WINDOWS_AMD64_ABI) + + ++calleeArgRegCount; + +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #else assert(!"Target platform ABI rules regarding passing struct type args in registers"); @@ -7119,31 +7335,142 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) } else { - hasMultiByteArgs = true; + hasMultiByteStackArgs = true; } } + else + { + varTypeIsFloating(argx) ? ++calleeFloatArgRegCount : ++calleeArgRegCount; + } + + // We can break early on multiByte cases. + if (hasMultiByteStackArgs) + { + break; + } + } + + const unsigned maxRegArgs = MAX_REG_ARG; + +// If we reached here means that callee has only those argument types which can be passed in +// a register and if passed on stack will occupy exactly one stack slot in out-going arg area. +// If we are passing args on stack for the callee and it has more args passed on stack than +// the caller, then fast tail call cannot be performed. +// +// Note that the GC'ness of on stack args need not match since the arg setup area is marked +// as non-interruptible for fast tail calls. + +#ifdef WINDOWS_AMD64_ABI + assert(calleeStackSize == 0); + size_t calleeStackSlots = ((calleeArgRegCount + calleeFloatArgRegCount) > maxRegArgs) + ? (calleeArgRegCount + calleeFloatArgRegCount) - maxRegArgs + : 0; + calleeStackSize = calleeStackSlots * TARGET_POINTER_SIZE; + size_t callerStackSize = info.compArgStackSize; + + bool hasStackArgs = false; + + if (callerStackSize > 0 || calleeStackSize > 0) + { + hasStackArgs = true; } - // Go the slow route, if it has multi-byte params - if (hasMultiByteArgs) + // Go the slow route, if it has multi-byte params. This is an implementation + // limitatio see https://github.com/dotnet/coreclr/issues/12644. + if (hasMultiByteStackArgs) { + reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs", callerStackSize, calleeStackSize); return false; } - // If we reached here means that callee has only those argument types which can be passed in - // a register and if passed on stack will occupy exactly one stack slot in out-going arg area. - // If we are passing args on stack for callee and it has more args passed on stack than - // caller, then fast tail call cannot be performed. + // x64 Windows: If we have more callee registers used than MAX_REG_ARG, then + // make sure the callee's incoming arguments is less than the caller's + if (hasStackArgs && (nCalleeArgs > nCallerArgs)) + { + reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)", callerStackSize, + calleeStackSize); + return false; + } + +#elif (defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_) + + // For *nix Amd64 and Arm64 check to see if all arguments for the callee + // and caller are passing in registers. If not, ensure that the outgoing argument stack size + // requirement for the callee is less than or equal to the caller's entire stack frame usage. // - // Note that the GC'ness of on stack args need not match since the arg setup area is marked - // as non-interruptible for fast tail calls. - if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs)) + // Also, in the case that we have to pass arguments on the stack make sure + // that we are not dealing with structs that are >8 bytes. + + bool hasStackArgs = false; + size_t maxFloatRegArgs = MAX_FLOAT_REG_ARG; + + size_t calleeIntStackArgCount = calleeArgRegCount > maxRegArgs ? calleeArgRegCount - maxRegArgs : 0; + size_t calleeFloatStackArgCount = + calleeFloatArgRegCount > maxFloatRegArgs ? calleeFloatArgRegCount - maxFloatRegArgs : 0; + + size_t calleeStackArgCount = calleeIntStackArgCount + calleeFloatStackArgCount; + size_t callerStackSize = info.compArgStackSize; + calleeStackSize += calleeStackArgCount * TARGET_POINTER_SIZE; + + if (callerStackSize > 0 || calleeStackSize > 0) { + hasStackArgs = true; + } + + // Go the slow route, if it has multi-byte params. This is an implementation + // limitation see https://github.com/dotnet/coreclr/issues/12644. + if (hasMultiByteStackArgs) + { + reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs", callerStackSize, calleeStackSize); + return false; + } + + // Callee has a >8 and <=16 byte struct and arguments that has to go on the stack. Do not fastTailCall. + if (calleeStackSize > 0 && hasTwoSlotSizedStruct) + { + reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasTwoSlotSizedStruct", + callerStackSize, calleeStackSize); + return false; + } + + // Callee has an HFA struct and arguments that has to go on the stack. Do not fastTailCall. + if (calleeStackSize > 0 && hasHfaArg) + { + reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasHfaArg", callerStackSize, + calleeStackSize); + return false; + } + + // TODO-AMD64-Unix + // TODO-ARM64 + // + // LowerFastTailCall currently assumes nCalleeArgs <= nCallerArgs. This is + // not true in many cases on x64 linux, remove this pessimization when + // LowerFastTailCall is fixed. See https://github.com/dotnet/coreclr/issues/12468 + // for more information. + if (hasStackArgs && (nCalleeArgs > nCallerArgs)) + { + reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)", callerStackSize, + calleeStackSize); + return false; + } + + if (calleeStackSize > callerStackSize) + { + reportFastTailCallDecision("Will not fastTailCall calleeStackSize > callerStackSize", callerStackSize, + calleeStackSize); return false; } - return true; #else + + NYI("fastTailCall not supported on this Architecture."); + +#endif // WINDOWS_AMD64_ABI + + reportFastTailCallDecision("Will fastTailCall", callerStackSize, calleeStackSize); + return true; +#else // FEATURE_FASTTAILCALL return false; #endif } diff --git a/tests/arm/Tests.lst b/tests/arm/Tests.lst index a4dd8a7..7fae7aa 100644 --- a/tests/arm/Tests.lst +++ b/tests/arm/Tests.lst @@ -12292,6 +12292,38 @@ MaxAllowedDurationSeconds=600 Categories=EXPECTED_FAIL;EXCLUDED;ILLEGAL_IL_TAILCALL_POP_RET HostStyle=0 +[FastTailCallCandidates.cmd_11631] +RelativePath=JIT\opt\FastTailcall\FastTailCallCandidates\FastTailCallCandidates.cmd +WorkingDir=JIT\opt\FastTailcall\FastTailCallCandidates +Expected=0 +MaxAllowedDurationSeconds=600 +Categories=EXPECTED_PASS +HostStyle=0 + +[GitHubIssue12479.cmd_11632] +RelativePath=JIT\opt\FastTailcall\GitHubIssue12479\GitHubIssue12479.cmd +WorkingDir=JIT\opt\FastTailcall\GitHubIssue12479 +Expected=0 +MaxAllowedDurationSeconds=600 +Categories=EXPECTED_PASS +HostStyle=0 + +[StackFixup.cmd_11632] +RelativePath=JIT\opt\FastTailcall\StackFixup\StackFixup.cmd +WorkingDir=JIT\opt\FastTailcall\StackFixup +Expected=0 +MaxAllowedDurationSeconds=600 +Categories=EXPECTED_PASS +HostStyle=0 + +[StructPassingSimple.cmd_11632] +RelativePath=JIT\opt\FastTailcall\StructPassingSimple\StructPassingSimple.cmd +WorkingDir=JIT\opt\FastTailcall\StructPassingSimple +Expected=0 +MaxAllowedDurationSeconds=600 +Categories=EXPECTED_PASS +HostStyle=0 + [FPArea.cmd_1537] RelativePath=JIT\CodeGenBringUpTests\FPArea\FPArea.cmd WorkingDir=JIT\CodeGenBringUpTests\FPArea @@ -21124,14 +21156,6 @@ MaxAllowedDurationSeconds=600 Categories=EXPECTED_PASS HostStyle=0 -[FastTailCallStackFixup.cmd_2641] -RelativePath=JIT\opt\Tailcall\FastTailCallStackFixup\FastTailCallStackFixup.cmd -WorkingDir=JIT\opt\Tailcall\FastTailCallStackFixup -Expected=0 -MaxAllowedDurationSeconds=600 -Categories=EXPECTED_PASS -HostStyle=0 - [hfa_sd1E_d.cmd_2642] RelativePath=JIT\jit64\hfa\main\testE\hfa_sd1E_d\hfa_sd1E_d.cmd WorkingDir=JIT\jit64\hfa\main\testE\hfa_sd1E_d diff --git a/tests/arm64/Tests.lst b/tests/arm64/Tests.lst index 6effab8..1cfffc3 100644 --- a/tests/arm64/Tests.lst +++ b/tests/arm64/Tests.lst @@ -78308,6 +78308,38 @@ MaxAllowedDurationSeconds=600 Categories=EXPECTED_FAIL;EXCLUDED;ILLEGAL_IL_TAILCALL_POP_RET HostStyle=0 +[FastTailCallCandidates.cmd_11631] +RelativePath=JIT\opt\FastTailcall\FastTailCallCandidates\FastTailCallCandidates.cmd +WorkingDir=JIT\opt\FastTailcall\FastTailCallCandidates +Expected=0 +MaxAllowedDurationSeconds=600 +Categories=EXPECTED_PASS +HostStyle=0 + +[GitHubIssue12479.cmd_11632] +RelativePath=JIT\opt\FastTailcall\GitHubIssue12479\GitHubIssue12479.cmd +WorkingDir=JIT\opt\FastTailcall\GitHubIssue12479 +Expected=0 +MaxAllowedDurationSeconds=600 +Categories=EXPECTED_PASS +HostStyle=0 + +[StackFixup.cmd_11632] +RelativePath=JIT\opt\FastTailcall\StackFixup\StackFixup.cmd +WorkingDir=JIT\opt\FastTailcall\StackFixup +Expected=0 +MaxAllowedDurationSeconds=600 +Categories=EXPECTED_PASS +HostStyle=0 + +[StructPassingSimple.cmd_11632] +RelativePath=JIT\opt\FastTailcall\StructPassingSimple\StructPassingSimple.cmd +WorkingDir=JIT\opt\FastTailcall\StructPassingSimple +Expected=0 +MaxAllowedDurationSeconds=600 +Categories=EXPECTED_PASS +HostStyle=0 + [Generated611.cmd_10093] RelativePath=Loader\classloader\TypeGeneratorTests\TypeGeneratorTest611\Generated611\Generated611.cmd WorkingDir=Loader\classloader\TypeGeneratorTests\TypeGeneratorTest611\Generated611 diff --git a/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs b/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs new file mode 100644 index 0000000..5bed5fd --- /dev/null +++ b/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs @@ -0,0 +1,1055 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +//////////////////////////////////////////////////////////////////////////////// +// Types +//////////////////////////////////////////////////////////////////////////////// + +public class FastTailCallCandidates +{ + //////////////////////////////////////////////////////////////////////////// + // Globals + //////////////////////////////////////////////////////////////////////////// + + public static int s_ret_value = 100; + + //////////////////////////////////////////////////////////////////////////// + // Helpers + //////////////////////////////////////////////////////////////////////////// + + /// + /// Check the return value of the test and set s_ret_value if incorrect + /// + public static void CheckOutput(int code) + { + // If there has been a previous failure then do not reset the first + // failure this will be the return value. + if (s_ret_value != 100) + { + return; + } + + if (code != 100) + { + s_ret_value = code; + } + } + + /// + /// Run each individual test case + /// + /// + /// + /// If you add any new test case scenarios please use reuse code and follow + /// the pattern below. Please increment the return value so it + /// is easy to determine in the future which scenario is failing. + /// + public static int Tester(int a) + { + CheckOutput(SimpleTestCase()); + CheckOutput(IntegerArgs(10, 11, 12, 13, 14, 15)); + CheckOutput(FloatArgs(10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f)); + CheckOutput(IntAndFloatArgs(10, 11, 12, 13, 14, 15, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f)); + CheckOutput(CallerGithubIssue12468(1, 2, 3, 4, 5, 6, 7, 8, new StructSizeSixteenNotExplicit(1, 2))); + CheckOutput(DoNotFastTailCallSimple(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)); + CheckOutput(StackBasedCaller(16, new StructSizeTwentyFour(1, 2, 3))); + CheckOutput(CallerSimpleHFACase(new HFASize32(1.0, 2.0, 3.0, 4.0), 1.0, 2.0, 3.0, 4.0)); + CheckOutput(CallerHFACaseWithStack(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, new HFASize32(1.0, 2.0, 3.0, 4.0))); + CheckOutput(CallerHFACaseCalleeOnly(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0)); + CheckOutput(CallerHFaCaseCalleeStackArgs(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0)); + + return s_ret_value; + + } + + //////////////////////////////////////////////////////////////////////////// + // Simple fast tail call case + //////////////////////////////////////////////////////////////////////////// + + /// + /// Simple fast tail call case. + /// + /// + /// + /// This is mostly supposed to be a smoke test. It can also be seen as a + /// constant + /// + /// Return 100 is a pass. + /// + /// + public static int SimpleTestCase(int retValue = 10) + { + retValue += 1; + + if (retValue == 100) + { + return retValue; + } + else + { + return SimpleTestCase(retValue); + } + } + + //////////////////////////////////////////////////////////////////////////// + // Integer args + //////////////////////////////////////////////////////////////////////////// + + /// + /// Simple fast tail call case that includes integer args + /// + /// + /// + /// Return 100 is a pass. + /// Return 101 is a failure. + /// + /// + public static int IntegerArgs(int arg1, + int arg2, + int arg3, + int arg4, + int arg5, + int arg6, + int retValue = 10) + { + retValue += 1; + + if (retValue == 100) + { + if (arg1 != 10 || + arg2 != 11 || + arg3 != 12 || + arg4 != 13 || + arg5 != 14 || + arg6 != 15) + { + return 101; + } + + return retValue; + } + else + { + return IntegerArgs(arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + retValue); + } + } + + //////////////////////////////////////////////////////////////////////////// + // Float args + //////////////////////////////////////////////////////////////////////////// + + /// + /// Simple fast tail call case that includes floating point args + /// + /// + /// + /// Return 100 is a pass. + /// Return 102 is a failure. + /// + /// + public static int FloatArgs(float arg1, + float arg2, + float arg3, + float arg4, + float arg5, + float arg6, + int retValue = 10) + { + retValue += 1; + + if (retValue == 100) + { + if (arg1 != 10.0f || + arg2 != 11.0f || + arg3 != 12.0f || + arg4 != 13.0f || + arg5 != 14.0f || + arg6 != 15.0f) + { + return 102; + } + + return retValue; + } + else + { + return FloatArgs(arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + retValue); + } + } + + //////////////////////////////////////////////////////////////////////////// + // Integer and Float args + //////////////////////////////////////////////////////////////////////////// + + /// + /// Simple fast tail call case that includes integer and floating point args + /// + /// + /// + /// Return 100 is a pass. + /// Return 103 is a failure. + /// + /// + public static int IntAndFloatArgs(int argi1, + int argi2, + int argi3, + int argi4, + int argi5, + int argi6, + float argf1, + float argf2, + float argf3, + float argf4, + float argf5, + float argf6, + int retValue = 10) + { + retValue += 1; + + if (retValue == 100) + { + if (argi1 != 10 || + argi2 != 11 || + argi3 != 12 || + argi4 != 13 || + argi5 != 14 || + argi6 != 15 || + argf1 != 10.0f || + argf2 != 11.0f || + argf3 != 12.0f || + argf4 != 13.0f || + argf5 != 14.0f || + argf6 != 15.0f) + { + return 103; + } + + return retValue; + } + else + { + return IntAndFloatArgs(argi1, + argi2, + argi3, + argi4, + argi5, + argi6, + argf1, + argf2, + argf3, + argf4, + argf5, + argf6, + retValue); + } + } + + /// + /// Decision not to tail call. See DoNotFastTailCallSimple for more info + /// + public static int DoNotFastTailCallHelper(int one, + int two, + int three, + int four, + int five, + int six, + int seven, + int eight, + int nine, + int ten, + int eleven, + int twelve, + int thirteen, + int fourteen) + { + if (one == 1) + { + two = one + two; + } + + if (two == 3) + { + three = two + three; + } + + if (three == 6) + { + four = four + three; + } + + if (four != 10) + { + return 104; + } + + if (five != 5) + { + return 104; + } + + if (six != 6) + { + return 104; + } + + if (seven != 7) + { + return 104; + } + + if (eight != 8) + { + return 104; + } + + if (nine != 9) + { + return 104; + } + + if (ten != 10) + { + return 104; + } + + if (eleven != 11) + { + return 104; + } + + if (twelve != 12) + { + return 104; + } + + if (thirteen != 13) + { + return 104; + } + + if (fourteen != 14) + { + return 104; + } + + return 100; + } + + /// + /// Decision not to tail call. + /// + /// + /// + /// The callee has 6 int register arguments on x64 linux. + /// With 8 * 8 (64) bytes stack size + /// + /// Return 100 is a pass. + /// Return 104 is a failure. + /// + /// + public static int DoNotFastTailCallSimple(float one, + float two, + float three, + float four, + float five, + float six, + float seven, + float eight, + int first, + int second, + int third, + int fourth, + int fifth, + int sixth) + { + if (one % 2 == 0) + { + return DoNotFastTailCallHelper((int) two, + (int) one, + (int) three, + (int) four, + (int) five, + (int) six, + (int) seven, + (int) eight, + first, + second, + third, + fourth, + fifth, + sixth); // Cannot fast tail call + } + else + { + return DoNotFastTailCallHelper((int) one, + (int) two, + (int) three, + (int) four, + (int) five, + (int) six, + (int) seven, + (int) eight, + first, + second, + third, + fourth, + fifth, + sixth); // Cannot fast tail call + } + } + + //////////////////////////////////////////////////////////////////////////// + // HFAs + //////////////////////////////////////////////////////////////////////////// + + public struct HFASize16 + { + public double a; + public double b; + + public HFASize16(double a, double b) + { + this.a = a; + this.b = b; + } + } + + public struct HFASize24 + { + public double a; + public double b; + public double c; + + public HFASize24(double a, double b, double c) + { + this.a = a; + this.b = b; + this.c = c; + } + } + + public struct HFASize32 + { + public double a; + public double b; + public double c; + public double d; + + public HFASize32(double a, double b, double c, double d) + { + this.a = a; + this.b = b; + this.c = c; + this.d = d; + } + } + + /// + /// Possible to fast tail call only on arm64. See CallerSimpleHFACase for + /// more information. + /// + public static int CalleeSimpleHFACase(double one, + double two, + double three, + double four, + double five, + double six, + double seven, + double eight) + { + int count = 0; + for (double i = 0; i < one; ++i) + { + if (i % 2 == 0) + { + ++count; + } + } + + if (count == 1) + { + return 100; + } + + else + { + return 107; + } + } + + /// + /// Possible to fast tail call only on arm64 + /// + /// + /// + /// This test case is really only interesting on arm64 + /// + /// Arm64: + /// caller has 8 register arguments and no stack space + /// callee has 8 register arguments and no stack space + /// + /// x64 Linux: + /// caller has 4 register arguments and 64 bytes of stack space + /// callee has 8 register arguments + /// + /// Arm64 and linux x64 can both fast tail call + /// + /// Return 100 is a pass. + /// Return 107 is a failure. + /// + /// + public static int CallerSimpleHFACase(HFASize32 s1, + double one, + double two, + double three, + double four) + { + if (one % 2 == 0) + { + double a = one * 100; + double b = one + 1100; + return CalleeSimpleHFACase(one, + two, + three, + four, + a, + b, + one, + two); + } + else + { + double b = one + 1599; + double a = one + 16; + return CalleeSimpleHFACase(two, + one, + three, + four, + a, + b, + two, + one); + } + } + + /// + /// Possible to fast tail call only on arm64. See CallerHFACaseWithStack + /// for more information. + /// + public static int CalleeHFAStackSpace(double one, + double two, + double three, + double four, + double five, + double six, + double seven, + double eight, + double nine, + double ten) + { + int count = 0; + for (double i = 0; i < one; ++i) + { + if (i % 2 == 0) + { + ++count; + } + } + + if (count == 1) + { + return 100; + } + + else + { + return 108; + } + } + + /// + /// Possible to fast tail call only on arm64 + /// + /// + /// + /// This test case is really only interesting on arm64 + /// + /// Arm64: + /// caller has 8 register arguments and 32 bytes of stack space + /// callee has 8 register arguments and 16 bytes of stack space + /// + /// x64 linix: + /// caller has 8 register arguments and 32 bytes of stack space + /// callee has 8 register arguments and 16 bytes of stack space + /// + /// Arm64 can fast tail call while x64 linux will not. + /// Note that this is due to a bug in LowerFastTailCall that assumes + /// nCallerArgs <= nCalleeArgs + /// + /// Return 100 is a pass. + /// Return 108 is a failure. + /// + /// + public static int CallerHFACaseWithStack(double one, + double two, + double three, + double four, + double five, + double six, + double seven, + double eight, + HFASize32 s1) + { + if (one % 2 == 0) + { + double a = one * 100; + double b = one + 1100; + return CalleeHFAStackSpace(one, + two, + three, + four, + a, + b, + five, + six, + seven, + eight); + } + else + { + double b = one + 1599; + double a = one + 16; + return CalleeHFAStackSpace(one, + two, + three, + four, + a, + b, + six, + five, + seven, + eight); + } + } + + /// + /// Possible to fast tail call only on arm64. See CallerHFACaseCalleeOnly + /// for more information. + /// + public static int CalleeWithHFA(double one, + double two, + double three, + double four, + HFASize32 s1) + { + int count = 0; + for (double i = 0; i < one; ++i) + { + if (i % 2 == 0) + { + ++count; + } + } + + if (count == 1) + { + return 100; + } + + else + { + return 109; + } + } + + /// + /// Possible to fast tail call only on arm64 + /// + /// + /// + /// This test case is really only interesting on arm64 + /// + /// Arm64: + /// caller has 8 register arguments + /// callee has 8 register arguments + /// + /// x64 Linux: + /// caller has 8 register arguments + /// callee has 4 register arguments and 32 bytes of stack space + /// + /// Arm64 can fast tail call while x64 linux cannot + /// + /// Return 100 is a pass. + /// Return 109 is a failure. + /// + /// + public static int CallerHFACaseCalleeOnly(double one, + double two, + double three, + double four, + double five, + double six, + double seven, + double eight) + { + if (one % 2 == 0) + { + double a = one * 100; + double b = one + 1100; + return CalleeWithHFA(one, + a, + b, + four, + new HFASize32(a, b, five, six)); + } + else + { + double b = one + 1599; + double a = one + 16; + return CalleeWithHFA(one, + b, + a, + four, + new HFASize32(a, b, five, six)); + } + } + + /// + /// Possible to fast tail call on all targets. See + /// CallerHFaCaseCalleeStackArgs for info. + /// + /// + public static int CalleeWithStackHFA(double one, + double two, + double three, + double four, + double five, + double six, + double seven, + double eight, + HFASize16 s1) + { + int count = 0; + for (double i = 0; i < one; ++i) + { + if (i % 2 == 0) + { + ++count; + } + } + + if (count == 1) + { + return 100; + } + + else + { + return 110; + } + } + + /// + /// Possible to fast tail call on all targets + /// + /// + /// + /// This test case is really only interesting on arm64 and Linux x64 + /// because the decision to fast tail call will be reported as false. + /// + /// On arm64 this is because callee has stack args and has an hfa arg. + /// While on x64 Linux this is because the callee has stack args and has + /// a special 16 byte struct. + /// + /// Arm64: + /// caller has 8 register arguments and 16 bytes of stack space + /// callee has 8 register arguments and 16 bytes of stack space + /// + /// x64 Linux: + /// caller has 8 register arguments and 16 bytes of stack space + /// callee has 8 register arguments and 16 bytes of stack space + /// + /// Arm64 can fast tail call while x64 linux cannot. Note that this is + /// due to an implementation limitation. fgCanFastTail call relies on + /// fgMorphArgs, but fgMorphArgs relies on fgCanfast tail call. Therefore, + /// fgCanFastTailCall will not fast tail call if there is a 16 byte + /// struct and stack usage. + /// + /// Return 100 is a pass. + /// Return 110 is a failure. + /// + /// + public static int CallerHFaCaseCalleeStackArgs(double one, + double two, + double three, + double four, + double five, + double six, + double seven, + double eight, + double nine, + double ten) + { + if (one % 2 == 0) + { + double a = one * 100; + double b = one + 1100; + return CalleeWithStackHFA(one, + a, + b, + four, + five, + six, + seven, + eight, + new HFASize16(a, b)); + } + else + { + double b = one + 1599; + double a = one + 16; + return CalleeWithStackHFA(one, + a, + b, + four, + five, + six, + seven, + eight, + new HFASize16(a, b)); + } + } + + //////////////////////////////////////////////////////////////////////////// + // Stack Based args. + //////////////////////////////////////////////////////////////////////////// + + public struct StructSizeEightNotExplicit + { + public long a; + + public StructSizeEightNotExplicit(long a) + { + this.a = a; + } + } + + public struct StructSizeSixteenNotExplicit + { + public long a; + public long b; + + public StructSizeSixteenNotExplicit(long a, long b) + { + this.a = a; + this.b = b; + } + + } + + /// + /// Possible to fast tail call. See CallerGithubIssue12468 for more info. + /// + public static int CalleeGithubIssue12468(int one, + int two, + int three, + int four, + int five, + int six, + int seven, + int eight, + StructSizeEightNotExplicit s1, + StructSizeEightNotExplicit s2) + { + int count = 0; + for (int i = 0; i < s1.a; ++i) + { + if (i % 10 == 0) + { + ++count; + } + } + + if (count == 160) + { + return 100; + } + + else + { + return 106; + } + } + + /// + /// Possible to fast tail call + /// + /// + /// + /// Caller has 6 register arguments and 1 stack argument (size 16) + /// Callee has 6 register arguments and 2 stack arguments (size 16) + /// + /// It is possible to fast tail call but will not due to a bug in + /// LowerFastTailCall which assumes nCallerArgs <= nCalleeArgs + /// + /// + /// Return 100 is a pass. + /// Return 106 is a failure. + /// + /// + public static int CallerGithubIssue12468(int one, + int two, + int three, + int four, + int five, + int six, + int seven, + int eight, + StructSizeSixteenNotExplicit s1) + { + if (one % 2 == 0) + { + long a = one * 100; + long b = one + 1100; + return CalleeGithubIssue12468(two, + one, + three, + four, + five, + six, + seven, + eight, + new StructSizeEightNotExplicit(a), + new StructSizeEightNotExplicit(b)); + } + else + { + long b = one + 1599; + long a = one + 16; + return CalleeGithubIssue12468(one, + two, + three, + four, + five, + six, + seven, + eight, + new StructSizeEightNotExplicit(b), + new StructSizeEightNotExplicit(a)); + } + } + + [StructLayout(LayoutKind.Explicit, Size=8, CharSet=CharSet.Ansi)] + public struct StructSizeThirtyTwo + { + [FieldOffset(0)] public int a; + [FieldOffset(8)] public int b; + [FieldOffset(16)] public int c; + [FieldOffset(24)] public int d; + + public StructSizeThirtyTwo(int a, int b, int c, int d) + { + this.a = a; + this.b = b; + this.c = c; + this.d = d; + } + }; + + [StructLayout(LayoutKind.Explicit, Size=8, CharSet=CharSet.Ansi)] + public struct StructSizeTwentyFour + { + [FieldOffset(0)] public int a; + [FieldOffset(8)] public int b; + [FieldOffset(16)] public int c; + + public StructSizeTwentyFour(int a, int b, int c) + { + this.a = a; + this.b = b; + this.c = c; + } + } + + /// + /// Decision to fast tail call. See StackBasedCaller for more + /// information. + /// + public static int StackBasedCallee(int a, int b, StructSizeThirtyTwo sstt) + { + int count = 0; + for (int i = 0; i < sstt.a; ++i) + { + if (i % 10 == 0) + { + ++count; + } + } + + if (count == 160) + { + return 100; + } + + else + { + return 105; + } + } + + /// + /// Decision to fast tail call + /// + /// + /// + /// On x64 linux this will not fast tail call. + /// + /// The caller has one stack argument of size 24 + /// The callee has one stack argument of size 32 + /// + /// On Arm64 this will fast tail call + /// + /// Both caller and callee have two register args. + /// + /// Return 100 is a pass. + /// Return 105 is a failure. + /// + /// + public static int StackBasedCaller(int i, StructSizeTwentyFour sstf) + { + if (i % 2 == 0) + { + int a = i * 100; + int b = i + 1100; + return StackBasedCallee(a, b, new StructSizeThirtyTwo(a, b, b, a)); + } + else + { + int b = i + 829; + int a = i + 16; + return StackBasedCallee(b, a, new StructSizeThirtyTwo(b, a, a, b)); + } + } + + public static int Main() + { + return Tester(1); + } +} \ No newline at end of file diff --git a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.csproj b/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.csproj similarity index 97% rename from tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.csproj rename to tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.csproj index b065595..e7e45d8 100644 --- a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.csproj +++ b/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.csproj @@ -34,7 +34,7 @@ $(DefineConstants);CORECLR - + diff --git a/tests/src/JIT/opt/FastTailCall/GitHubIssue12479.cs b/tests/src/JIT/opt/FastTailCall/GitHubIssue12479.cs new file mode 100644 index 0000000..0887e9a --- /dev/null +++ b/tests/src/JIT/opt/FastTailCall/GitHubIssue12479.cs @@ -0,0 +1,94 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; + +public class GitHubIssue12479 +{ + public static int callee(int one, + int two, + int three, + int four, + int five, + int six, + int seven, + int eight) + { + int count = 0; + + // Make sure this is not inlined. + for (int i = 0; i < one; ++i) + { + if (i % 4 == 0) ++count; + } + + return count; + } + + // Linux (x64): Eight incoming arguments, all passed in registers + // + // nCallerArgs: 8, stackSize: 0 bytes + public static int caller(float one, + float two, + float three, + float four, + float five, + float six, + float seven, + float eight) + { + if (one % 2 == 0) + { + // Eight outgoing arguments, six passed in registers, two on the stack. + // + // nCalleeArgs: 8, stackSize: 8 bytes + // + // This is a fast tail call candidate that should not be fast tail called + // because the callee's stack size will be larger than the caller's + return callee((int) two, + (int) one, + (int) eight, + (int) five, + (int) four, + (int) seven, + (int) six, + (int) three); + } + else + { + // Eight outgoing arguments, six passed in registers, two on the stack. + // + // nCalleeArgs: 8, stackSize: 8 bytes + // + // This is a fast tail call candidate that should not be fast tail called + // because the callee's stack size will be larger than the caller's + return callee((int) one, + (int) two, + (int) three, + (int) four, + (int) five, + (int) six, + (int) seven, + (int) eight); + } + + + } + + public static int Main() + { + // We have 8 floating args on unix. + int a = caller(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); + + if (a == 1) + { + return 100; + } + + else + { + return 101; + } + } +} \ No newline at end of file diff --git a/tests/src/JIT/opt/FastTailCall/GitHubIssue12479.csproj b/tests/src/JIT/opt/FastTailCall/GitHubIssue12479.csproj new file mode 100644 index 0000000..b1369e6 --- /dev/null +++ b/tests/src/JIT/opt/FastTailCall/GitHubIssue12479.csproj @@ -0,0 +1,45 @@ + + + + + Debug + AnyCPU + $(MSBuildProjectName) + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + + + False + + + + + None + True + True + True + True + True + True + $(DefineConstants);CORECLR + + + + + + + + + + + diff --git a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.cs b/tests/src/JIT/opt/FastTailCall/StackFixup.cs similarity index 89% rename from tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.cs rename to tests/src/JIT/opt/FastTailCall/StackFixup.cs index f4caf37..eaf0950 100644 --- a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.cs +++ b/tests/src/JIT/opt/FastTailCall/StackFixup.cs @@ -1,3 +1,6 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. using System; public struct A diff --git a/tests/src/JIT/opt/FastTailCall/StackFixup.csproj b/tests/src/JIT/opt/FastTailCall/StackFixup.csproj new file mode 100644 index 0000000..f7e3e97 --- /dev/null +++ b/tests/src/JIT/opt/FastTailCall/StackFixup.csproj @@ -0,0 +1,45 @@ + + + + + Debug + AnyCPU + $(MSBuildProjectName) + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + + + False + + + + + None + True + True + True + True + True + True + $(DefineConstants);CORECLR + + + + + + + + + + + diff --git a/tests/src/JIT/opt/FastTailCall/StructPassingSimple.cs b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.cs new file mode 100644 index 0000000..13ca0cb --- /dev/null +++ b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.cs @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +using System; + +// 10 byte struct +public struct A +{ + public int a; + public int b; + public short c; +} + +class TailCallStructPassingSimple +{ + // Simple tail call candidate that would be ignored on Arm64 and amd64 Unix + // due to https://github.com/dotnet/coreclr/issues/2666 + public static int ImplicitTailCallTenByteStruct(A a, int count=1000) + { + if (count-- == 0) + { + return 100; + } + + return ImplicitTailCallTenByteStruct(a, count); + } + + public static int Main() + { + A temp = new A(); + temp.a = 50; + temp.b = 500; + temp.c = 62; + + int ret = ImplicitTailCallTenByteStruct(temp); + return ret; + } +} \ No newline at end of file diff --git a/tests/src/JIT/opt/FastTailCall/StructPassingSimple.csproj b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.csproj new file mode 100644 index 0000000..6ca1a88 --- /dev/null +++ b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.csproj @@ -0,0 +1,45 @@ + + + + + Debug + AnyCPU + $(MSBuildProjectName) + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + + + False + + + + + None + True + True + True + True + True + True + $(DefineConstants);CORECLR + + + + + + + + + + +