From: Andy Ayers Date: Wed, 11 Mar 2020 20:16:23 +0000 (-0700) Subject: JIT: enable tail calls and copy omission for implicit byref structs (#33004) X-Git-Tag: submit/tizen/20210909.063632~9220 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=514db28b87f5053720db1c3a0e5b82495278515e;p=platform%2Fupstream%2Fdotnet%2Fruntime.git JIT: enable tail calls and copy omission for implicit byref structs (#33004) Handle cases where a caller passes an implicit byref argument struct to a callee in tail position. There is no need to create a local copy or to block tail calling. To prove this is safe we must show that we're not introducing aliasing by not copying the arguments. We do a simplistic and limited form of alias analysis. This pattern comes up increasingly often as we write more layered code operating on spans and similar structs. --- diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 5243e5a..53efb3e 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -16220,6 +16220,55 @@ bool GenTree::IsLocalAddrExpr(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, } //------------------------------------------------------------------------ +// IsImplicitByrefParameterValue: determine if this tree is the entire +// value of a local implicit byref parameter +// +// Arguments: +// compiler -- compiler instance +// +// Return Value: +// GenTreeLclVar node for the local, or nullptr. +// +GenTreeLclVar* GenTree::IsImplicitByrefParameterValue(Compiler* compiler) +{ +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) + + GenTreeLclVar* lcl = nullptr; + + if (OperIs(GT_LCL_VAR)) + { + lcl = AsLclVar(); + } + else if (OperIs(GT_OBJ)) + { + GenTree* addr = AsIndir()->Addr(); + + if (addr->OperIs(GT_LCL_VAR)) + { + lcl = addr->AsLclVar(); + } + else if (addr->OperIs(GT_ADDR)) + { + GenTree* base = addr->AsOp()->gtOp1; + + if (base->OperIs(GT_LCL_VAR)) + { + lcl = base->AsLclVar(); + } + } + } + + if ((lcl != nullptr) && compiler->lvaIsImplicitByRefLocal(lcl->GetLclNum())) + { + return lcl; + } + +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) + + return nullptr; +} + +//------------------------------------------------------------------------ // IsLclVarUpdateTree: Determine whether this is an assignment tree of the // form Vn = Vn 'oper' 'otherTree' where Vn is a lclVar // diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 3acf4ac..ad1e1e1 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -1840,6 +1840,10 @@ public: // yields an address into a local GenTreeLclVarCommon* IsLocalAddrExpr(); + // Determine if this tree represents the value of an entire implict byref parameter, + // and if so return the tree for the parameter. + GenTreeLclVar* IsImplicitByrefParameterValue(Compiler* compiler); + // Determine if this is a LclVarCommon node and return some additional info about it in the // two out parameters. bool IsLocalExpr(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, FieldSeqNode** pFldSeq); diff --git a/src/coreclr/src/jit/lclmorph.cpp b/src/coreclr/src/jit/lclmorph.cpp index 546a5d4..176f199 100644 --- a/src/coreclr/src/jit/lclmorph.cpp +++ b/src/coreclr/src/jit/lclmorph.cpp @@ -1097,10 +1097,67 @@ private: { return; } + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); - JITDUMP("LocalAddressVisitor incrementing ref count from %d to %d for V%02d\n", varDsc->lvRefCnt(RCS_EARLY), - varDsc->lvRefCnt(RCS_EARLY) + 1, lclNum); + JITDUMP("LocalAddressVisitor incrementing ref count from %d to %d for implict by-ref V%02d\n", + varDsc->lvRefCnt(RCS_EARLY), varDsc->lvRefCnt(RCS_EARLY) + 1, lclNum); varDsc->incLvRefCnt(1, RCS_EARLY); + + // See if this struct is an argument to a call. This information is recorded + // via the weighted early ref count for the local, and feeds the undo promotion + // heuristic. + // + // It can be approximate, so the pattern match below need not be exhaustive. + // But the pattern should at least subset the implicit byref cases that are + // handled in fgCanFastTailCall and fgMakeOutgoingStructArgCopy. + // + // CALL(OBJ(ADDR(LCL_VAR...))) + bool isArgToCall = false; + bool keepSearching = true; + for (int i = 0; i < m_ancestors.Height() && keepSearching; i++) + { + GenTree* node = m_ancestors.Top(i); + switch (i) + { + case 0: + { + keepSearching = node->OperIs(GT_LCL_VAR); + } + break; + + case 1: + { + keepSearching = node->OperIs(GT_ADDR); + } + break; + + case 2: + { + keepSearching = node->OperIs(GT_OBJ); + } + break; + + case 3: + { + keepSearching = false; + isArgToCall = node->IsCall(); + } + break; + default: + { + keepSearching = false; + } + break; + } + } + + if (isArgToCall) + { + JITDUMP("LocalAddressVisitor incrementing weighted ref count from %d to %d" + " for implict by-ref V%02d arg passed to call\n", + varDsc->lvRefCntWtd(RCS_EARLY), varDsc->lvRefCntWtd(RCS_EARLY) + 1, lclNum); + varDsc->incLvRefCntWtd(1, RCS_EARLY); + } } }; diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index d97b095..7303259 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -4796,46 +4796,52 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, // // We don't need a copy if this is the last use of an implicit by-ref local. // - // We can't determine that all of the time, but if there is only - // one use and the method has no loops, then this use must be the last. if (opts.OptimizationEnabled()) { - GenTreeLclVarCommon* lcl = nullptr; - - if (argx->OperIsLocal()) - { - lcl = argx->AsLclVarCommon(); - } - else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal()) - { - lcl = argx->AsObj()->Addr()->AsLclVarCommon(); - } + GenTreeLclVar* const lcl = argx->IsImplicitByrefParameterValue(this); if (lcl != nullptr) { - unsigned varNum = lcl->AsLclVarCommon()->GetLclNum(); - if (lvaIsImplicitByRefLocal(varNum)) - { - LclVarDsc* varDsc = &lvaTable[varNum]; - // JIT_TailCall helper has an implicit assumption that all tail call arguments live - // on the caller's frame. If an argument lives on the caller caller's frame, it may get - // overwritten if that frame is reused for the tail call. Therefore, we should always copy - // struct parameters if they are passed as arguments to a tail call. - if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt(RCS_EARLY) == 1) && !fgMightHaveLoop()) - { - assert(!call->IsTailCall()); + const unsigned varNum = lcl->GetLclNum(); + LclVarDsc* const varDsc = lvaGetDesc(varNum); + const unsigned short totalAppearances = varDsc->lvRefCnt(RCS_EARLY); - varDsc->setLvRefCnt(0, RCS_EARLY); - args->SetNode(lcl); - assert(argEntry->GetNode() == lcl); + // We don't have liveness so we rely on other indications of last use. + // + // We handle these cases: + // + // * (must not copy) If the call is a tail call, the use is a last use. + // We must skip the copy if we have a fast tail call. + // + // * (must copy) However the current slow tail call helper always copies + // the tail call args from the current frame, so we must copy + // if the tail call is a slow tail call. + // + // * (may not copy) if the call is noreturn, the use is a last use. + // We also check for just one reference here as we are not doing + // alias analysis of the call's parameters, or checking if the call + // site is not within some try region. + // + // * (may not copy) if there is exactly one use of the local in the method, + // and the call is not in loop, this is a last use. + // + const bool isTailCallLastUse = call->IsTailCall(); + const bool isCallLastUse = (totalAppearances == 1) && !fgMightHaveLoop(); + const bool isNoReturnLastUse = (totalAppearances == 1) && call->IsNoReturn(); + if (!call->IsTailCallViaHelper() && (isTailCallLastUse || isCallLastUse || isNoReturnLastUse)) + { + varDsc->setLvRefCnt(0, RCS_EARLY); + args->SetNode(lcl); + assert(argEntry->GetNode() == lcl); - JITDUMP("did not have to make outgoing copy for V%2d", varNum); - return; - } + JITDUMP("did not need to make outgoing copy for last use of implicit byref V%2d\n", varNum); + return; } } } + JITDUMP("making an outgoing copy for struct arg\n"); + if (fgOutgoingArgTemps == nullptr) { fgOutgoingArgTemps = hashBv::Create(this); @@ -6557,7 +6563,7 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) // // Windows Amd64: // A fast tail call can be made whenever the number of callee arguments -// is larger than or equal to the number of caller arguments, or we have four +// is less than or equal to the number of caller arguments, or we have four // or fewer callee arguments. This is because, on Windows AMD64, each // argument uses exactly one register or one 8-byte stack slot. Thus, we only // need to count arguments, and not be concerned with the size of each @@ -6683,9 +6689,9 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) fgInitArgInfo(callee); fgArgInfo* argInfo = callee->fgArgInfo; - bool hasByrefParameter = false; - size_t calleeArgStackSize = 0; - size_t callerArgStackSize = info.compArgStackSize; + bool hasMustCopyByrefParameter = false; + size_t calleeArgStackSize = 0; + size_t callerArgStackSize = info.compArgStackSize; for (unsigned index = 0; index < argInfo->ArgCount(); ++index) { @@ -6695,12 +6701,222 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) if (arg->isStruct) { - // Byref struct arguments are not allowed to fast tail call as the information - // of the caller's stack is lost when the callee is compiled. if (arg->passedByRef) { - hasByrefParameter = true; - break; + // Generally a byref arg will block tail calling, as we have to + // make a local copy of the struct for the callee. + hasMustCopyByrefParameter = true; + + // If we're optimizing, we may be able to pass our caller's byref to our callee, + // and so still be able to tail call. + if (opts.OptimizationEnabled()) + { + // First, see if this arg is an implicit byref param. + GenTreeLclVar* const lcl = arg->GetNode()->IsImplicitByrefParameterValue(this); + + if (lcl != nullptr) + { + // Yes, the arg is an implicit byref param. + const unsigned lclNum = lcl->GetLclNum(); + LclVarDsc* const varDsc = lvaGetDesc(lcl); + + // The param must not be promoted; if we've promoted, then the arg will be + // a local struct assembled from the promoted fields. + if (varDsc->lvPromoted) + { + JITDUMP("Arg [%06u] is promoted implicit byref V%02u, so no tail call\n", + dspTreeID(arg->GetNode()), lclNum); + } + else + { + JITDUMP("Arg [%06u] is unpromoted implicit byref V%02u, seeing if we can still tail call\n", + dspTreeID(arg->GetNode()), lclNum); + + // We have to worry about introducing aliases if we bypass copying + // the struct at the call. We'll do some limited analysis to see if we + // can rule this out. + const unsigned argLimit = 6; + + // If this is the only appearance of the byref in the method, then + // aliasing is not possible. + // + // If no other call arg refers to this byref, and no other arg is + // a pointer which could refer to this byref, we can optimize. + // + // We only check this for calls with small numbers of arguments, + // as the analysis cost will be quadratic. + // + if (varDsc->lvRefCnt(RCS_EARLY) == 1) + { + JITDUMP("... yes, arg is the only appearance of V%02u\n", lclNum); + hasMustCopyByrefParameter = false; + } + else if (argInfo->ArgCount() <= argLimit) + { + GenTree* interferingArg = nullptr; + for (unsigned index2 = 0; index2 < argInfo->ArgCount(); ++index2) + { + if (index2 == index) + { + continue; + } + + fgArgTabEntry* const arg2 = argInfo->GetArgEntry(index2, false); + JITDUMP("... checking other arg [%06u]...\n", dspTreeID(arg2->GetNode())); + DISPTREE(arg2->GetNode()); + + // Do we pass 'lcl' more than once to the callee? + if (arg2->isStruct && arg2->passedByRef) + { + GenTreeLclVarCommon* const lcl2 = + arg2->GetNode()->IsImplicitByrefParameterValue(this); + + if ((lcl2 != nullptr) && (lclNum == lcl2->GetLclNum())) + { + // not copying would introduce aliased implicit byref structs + // in the callee ... we can't optimize. + interferingArg = arg2->GetNode(); + break; + } + else + { + JITDUMP("... arg refers to different implicit byref V%02u\n", + lcl2->GetLclNum()); + continue; + } + } + + // Do we pass a byref pointer which might point within 'lcl'? + // + // We can assume the 'lcl' is unaliased on entry to the + // method, so the only way we can have an aliasing byref pointer at + // the call is if 'lcl' is address taken/exposed in the method. + // + // Note even though 'lcl' is not promoted, we are in the middle + // of the promote->rewrite->undo->(morph)->demote cycle, and so + // might see references to promoted fields of 'lcl' that haven't yet + // been demoted (see fgMarkDemotedImplicitByRefArgs). + // + // So, we also need to scan all 'lcl's fields, if any, to see if they + // are exposed. + // + // When looking for aliases from other args, we check for both TYP_BYREF + // and TYP_I_IMPL typed args here. Conceptually anything that points into + // an implicit byref parameter should be TYP_BYREF, as these parameters could + // refer to boxed heap locations (say if the method is invoked by reflection) + // but there are some stack only structs (like typed references) where + // the importer/runtime code uses TYP_I_IMPL, and fgInitArgInfo will + // transiently retype all simple address-of implicit parameter args as + // TYP_I_IMPL. + // + if ((arg2->argType == TYP_BYREF) || (arg2->argType == TYP_I_IMPL)) + { + JITDUMP("...arg is a byref, must run an alias check\n"); + bool checkExposure = true; + bool hasExposure = false; + + // See if there is any way arg could refer to a parameter struct. + GenTree* arg2Node = arg2->GetNode(); + if (arg2Node->OperIs(GT_LCL_VAR)) + { + GenTreeLclVarCommon* arg2LclNode = arg2Node->AsLclVarCommon(); + assert(arg2LclNode->GetLclNum() != lclNum); + LclVarDsc* arg2Dsc = lvaGetDesc(arg2LclNode); + + // Other params can't alias implicit byref params + if (arg2Dsc->lvIsParam) + { + checkExposure = false; + } + } + // Because we're checking TYP_I_IMPL above, at least + // screen out obvious things that can't cause aliases. + else if (arg2Node->IsIntegralConst()) + { + checkExposure = false; + } + + if (checkExposure) + { + JITDUMP( + "... not sure where byref arg points, checking if V%02u is exposed\n", + lclNum); + // arg2 might alias arg, see if we've exposed + // arg somewhere in the method. + if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed) + { + // Struct as a whole is exposed, can't optimize + JITDUMP("... V%02u is exposed\n", lclNum); + hasExposure = true; + } + else if (varDsc->lvFieldLclStart != 0) + { + // This is the promoted/undone struct case. + // + // The field start is actually the local number of the promoted local, + // use it to enumerate the fields. + const unsigned promotedLcl = varDsc->lvFieldLclStart; + LclVarDsc* const promotedVarDsc = lvaGetDesc(promotedLcl); + JITDUMP("...promoted-unpromoted case -- also checking exposure of " + "fields of V%02u\n", + promotedLcl); + + for (unsigned fieldIndex = 0; fieldIndex < promotedVarDsc->lvFieldCnt; + fieldIndex++) + { + LclVarDsc* fieldDsc = + lvaGetDesc(promotedVarDsc->lvFieldLclStart + fieldIndex); + + if (fieldDsc->lvHasLdAddrOp || fieldDsc->lvAddrExposed) + { + // Promoted and not yet demoted field is exposed, can't optimize + JITDUMP("... field V%02u is exposed\n", + promotedVarDsc->lvFieldLclStart + fieldIndex); + hasExposure = true; + break; + } + } + } + } + + if (hasExposure) + { + interferingArg = arg2->GetNode(); + break; + } + } + else + { + JITDUMP("...arg is not a byref or implicit byref (%s)\n", + varTypeName(arg2->GetNode()->TypeGet())); + } + } + + if (interferingArg != nullptr) + { + JITDUMP("... no, arg [%06u] may alias with V%02u\n", dspTreeID(interferingArg), + lclNum); + } + else + { + JITDUMP("... yes, no other arg in call can alias V%02u\n", lclNum); + hasMustCopyByrefParameter = false; + } + } + else + { + JITDUMP(" ... no, call has %u > %u args, alias analysis deemed too costly\n", + argInfo->ArgCount(), argLimit); + } + } + } + } + + if (hasMustCopyByrefParameter) + { + // This arg blocks the tail call. No reason to keep scanning the remaining args. + break; + } } } } @@ -6755,17 +6971,18 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) #endif // DEBUG }; -// Note on vararg methods: -// If the caller is vararg method, we don't know the number of arguments passed by caller's caller. -// But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its -// fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as -// out-going area required for callee is bounded by caller's fixed argument space. -// -// Note that callee being a vararg method is not a problem since we can account the params being passed. -// -// We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg -// method. This is due to the ABI differences for native vararg methods for these platforms. There is -// work required to shuffle arguments to the correct locations. + // Note on vararg methods: + // If the caller is vararg method, we don't know the number of arguments passed by caller's caller. + // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its + // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as + // out-going area required for callee is bounded by caller's fixed argument space. + // + // Note that callee being a vararg method is not a problem since we can account the params being passed. + // + // We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg + // method. This is due to the ABI differences for native vararg methods for these platforms. There is + // work required to shuffle arguments to the correct locations. + CLANG_FORMAT_COMMENT_ANCHOR; #if (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || (defined(TARGET_WINDOWS) && defined(TARGET_ARM64)) if (info.compIsVarArgs || callee->IsVarargs()) @@ -6789,7 +7006,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // For Windows some struct parameters are copied on the local frame // and then passed by reference. We cannot fast tail call in these situation // as we need to keep our frame around. - if (hasByrefParameter) + if (hasMustCopyByrefParameter) { reportFastTailCallDecision("Callee has a byref parameter"); return false; @@ -6843,13 +7060,18 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // It cannot be an inline candidate assert(!call->IsInlineCandidate()); - auto failTailCall = [&](const char* reason) { + auto failTailCall = [&](const char* reason, unsigned lclNum = BAD_VAR_NUM) { #ifdef DEBUG if (verbose) { printf("\nRejecting tail call in morph for call "); printTreeID(call); - printf(": %s\n", reason); + printf(": %s", reason); + if (lclNum != BAD_VAR_NUM) + { + printf(" V%02u", lclNum); + } + printf("\n"); } #endif @@ -6953,9 +7175,9 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // if (call->IsImplicitTailCall()) { - if (varDsc->lvHasLdAddrOp) + if (varDsc->lvHasLdAddrOp && !lvaIsImplicitByRefLocal(varNum)) { - failTailCall("Local address taken"); + failTailCall("Local address taken", varNum); return nullptr; } if (varDsc->lvAddrExposed) @@ -6978,20 +7200,20 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) } else { - failTailCall("Local address taken"); + failTailCall("Local address taken", varNum); return nullptr; } } if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum)) { - failTailCall("Has Struct Promoted Param"); + failTailCall("Has Struct Promoted Param", varNum); return nullptr; } if (varDsc->lvPinned) { // A tail call removes the method from the stack, which means the pinning // goes away for the callee. We can't allow that. - failTailCall("Has Pinned Vars"); + failTailCall("Has Pinned Vars", varNum); return nullptr; } } @@ -16835,8 +17057,23 @@ void Compiler::fgRetypeImplicitByRefArgs() // through the pointer parameter, the same as we'd do for this // parameter if it weren't promoted at all (otherwise the initialization // of the new temp would just be a needless memcpy at method entry). - bool undoPromotion = (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) || - (varDsc->lvRefCnt(RCS_EARLY) <= varDsc->lvFieldCnt); + // + // Otherwise, see how many appearances there are. We keep two early ref counts: total + // number of references to the struct or some field, and how many of these are + // arguments to calls. We undo promotion unless we see enough non-call uses. + // + const unsigned totalAppearances = varDsc->lvRefCnt(RCS_EARLY); + const unsigned callAppearances = varDsc->lvRefCntWtd(RCS_EARLY); + assert(totalAppearances >= callAppearances); + const unsigned nonCallAppearances = totalAppearances - callAppearances; + + bool undoPromotion = ((lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) || + (nonCallAppearances <= varDsc->lvFieldCnt)); + + JITDUMP("%s promotion of implicit by-ref V%02u: %s total: %u non-call: %u fields: %u\n", + undoPromotion ? "Undoing" : "Keeping", lclNum, + (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ? "dependent;" : "", + totalAppearances, nonCallAppearances, varDsc->lvFieldCnt); if (!undoPromotion) { @@ -16870,7 +17107,7 @@ void Compiler::fgRetypeImplicitByRefArgs() { // Set the new parent. fieldVarDsc->lvParentLcl = newLclNum; - // Clear the ref count field; it is used to communicate the nubmer of references + // Clear the ref count field; it is used to communicate the number of references // to the implicit byref parameter when morphing calls that pass the implicit byref // out as an outgoing argument value, but that doesn't pertain to this field local // which is now a field of a non-arg local. diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs new file mode 100644 index 0000000..161b38e --- /dev/null +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs @@ -0,0 +1,237 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.CompilerServices; + +// Tail calls with implicit byref parameters as arguments. +// +// Generally, we can tail call provided we don't copy the +// argument to the local frame. + +public class ImplicitByrefTailCalls +{ + public static void Z() { } + public static bool Z(bool b) => b; + + [MethodImpl(MethodImplOptions.NoOptimization)] + public static int ZZ(Span x) + { + int result = 0; + for (int i = 0; i < x.Length; i++) + { + result += x[i] * x[i] + i; + } + return result; + } + + [MethodImpl(MethodImplOptions.NoOptimization)] + public static int ZZZ(Span x, int a, int b, int c, int d, int e, int f) + { + int result = 0; + for (int i = 0; i < x.Length; i++) + { + result += x[i] * x[i] + i; + } + return result; + } + + // Various methods that should fast tail call + + public static int A(Span x) + { + Z(); Z(); return ZZ(x); + } + + public static int B(Span x) + { + Z(); Z(); return A(x); + } + + public static int C(Span x, bool p) + { + Z(); Z(); + if (Z(p)) + { + return A(x); + } + else + { + return B(x); + } + } + + public static int D(Span x, bool p) + { + Z(); Z(); + return Z(p) ? A(x) : B(x); + } + + public static int E(Span x, Span y, bool p) + { + Z(); Z(); Z(); Z(); + if (Z(p)) + { + return A(x); + } + else + { + return B(y); + } + } + + public static int F(Span x, Span y, bool p) + { + Z(); Z(); Z(); Z(); + return Z(p) ? ZZ(x) : ZZ(y); + } + + // Methods with tail call sites and other uses + + public static int G(Span x, bool p) + { + Z(); Z(); + if (Z(p)) + { + return ZZ(x); + } + else + { + return x.Length; + } + } + + // Here there are enough actual uses to + // justify promotion, hence we can't tail + // call, as "undoing" promotion at the end + // entails making a local copy. + // + // We could handle this by writing the updated + // struct values back to the original byref + // before tail calling.... + public static int H(Span x, int y) + { + Z(); Z(); + if (y < x.Length) + { + int result = 0; + for (int i = 0; i < y; i++) + { + result += x[i]; + } + return result; + } + + return ZZ(x); + } + + // Here the call to ZZZ would need to be a slow tail call, + // so we won't tail call at all. But the only + // reference to x is at the call, and it's not in + // a loop, so we can still can avoid making a copy. + public static int S(Span x) + { + Z(); Z(); + return ZZZ(x, 1, 2, 3, 4, 5, 6); + } + + // Must copy at the normal call site + // but can avoid at tail call site. However + // we're currently blocked because we reuse + // the temp used to pass the argument and + // it is exposed in the first call. + public static int T(Span x, ref int q) + { + Z(); Z(); + q = ZZ(x); + return ZZ(x); + } + + // Here ZZZ is called in a loop, so despite + // the call argument x being the only reference to + // x in the method, we must copy x. + // We can't consider it as last use. + public static int L(Span x) + { + Z(); Z(); + int result = 0; + int limit = 10; + for (int i = 0; i < limit; i++) + { + result += ZZZ(x, 1, 2, 3, 4, 5, 6); + } + + return result / limit; + } + + static void MustThrow(Span x) + { + x[0]++; + throw new Exception(); + } + + // Because MustThrow must throw + // and the argument is the only + // mention of x, we do not need to copy x. + public static int M(Span x) + { + Z(); Z(); Z(); Z(); + if (p) + { + MustThrow(x); + } + return 10000; + } + + // Although MustThrow must throw, + // the argument x is the not only + // mention of x, so we must copy x. + public static int N(Span x) + { + Z(); Z(); Z(); Z(); + try + { + if (p) + { + MustThrow(x); + } + } + catch (Exception) + { + } + + return 10000 + x[0]; + } + + static bool p; + + public static int Main() + { + int[] a = new int[100]; + a[45] = 55; + a[55] = 45; + p = false; + + Span s = new Span(a); + int q = 0; + + int ra = A(s); + int rb = B(s); + int rc = C(s, p); + int rd = D(s, p); + int re = E(s, s, p); + int rf = F(s, s, p); + int rg = G(s, p); + int rh = H(s, 46); + int rs = S(s); + int rt = T(s, ref q); + int rl = L(s); + int rm = M(s); + int rn = N(s); + + Console.WriteLine($"{ra},{rb},{rc},{rd},{re},{rf},{rg},{rh},{rs},{rt},{rl},{rm},{rn}"); + + return ra + rb + rc + rd + re + rf + rg + rh + rs + rt + rl + rm + rn - 110055; + } +} diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.csproj b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.csproj new file mode 100644 index 0000000..bf6f589 --- /dev/null +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.csproj @@ -0,0 +1,13 @@ + + + Exe + + + None + True + True + + + + + diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs new file mode 100644 index 0000000..bc9b53a --- /dev/null +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs @@ -0,0 +1,268 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.CompilerServices; + +public struct S +{ + public long s_x; + public long s_y; +} + +public struct R +{ + public S r_s; + public S r_t; +} + +// Tail calls with implicit byref parameters as arguments. +// +// We need to ensure that we don't introduce aliased +// implicit byref parameters by optimizing away copies. + +public class ImplicitByrefTailCalls +{ + // Helper method to make callees unattractive for inlining. + public static void Z() { } + + // Will return different answers if x and y refer to the same struct. + [MethodImpl(MethodImplOptions.NoOptimization)] + public static long Alias(S x, S y) + { + y.s_x++; + long result = 0; + for (int i = 0; i < 100; i++) + { + x.s_x++; + result += x.s_x + x.s_y; + } + return result; + } + + // Will return different answers if y refers to some part of x. + [MethodImpl(MethodImplOptions.NoOptimization)] + public static long Alias2(S x, ref long y) + { + y++; + long result = 0; + for (int i = 0; i < 100; i++) + { + x.s_x++; + result += x.s_x + x.s_y; + } + return result; + } + + // Will return different answers if x and y refer to same struct + [MethodImpl(MethodImplOptions.NoOptimization)] + public static long Alias3(int a, int b, int c, int d, int e, int f, S x, S y) + { + y.s_x++; + long result = 0; + for (int i = 0; i < 100; i++) + { + x.s_x++; + result += x.s_x + x.s_y; + } + return result; + } + + // Will return different answers if y refers to some part of x + [MethodImpl(MethodImplOptions.NoOptimization)] + public static long Alias4(ref long y, int b, int c, int d, int e, int f, S x) + { + y++; + long result = 0; + for (int i = 0; i < 100; i++) + { + x.s_x++; + result += x.s_x + x.s_y; + } + return result; + } + + // Will return different answers if x and r.s refer to the same struct. + [MethodImpl(MethodImplOptions.NoOptimization)] + public static long Alias5(S x, R r) + { + r.r_s.s_x++; + long result = 0; + for (int i = 0; i < 100; i++) + { + x.s_x++; + result += x.s_x + x.s_y; + } + return result; + } + + // Will return different answers if ss and x refer to the same struct + [MethodImpl(MethodImplOptions.NoOptimization)] + public static long Alias6(Span ss, S x) + { + ss[0].s_x++; + long result = 0; + for (int i = 0; i < 100; i++) + { + x.s_x++; + result += x.s_x + x.s_y; + } + return result; + } + + // Will return different answers if x and y refer to the same struct. + [MethodImpl(MethodImplOptions.NoOptimization)] + public static long Alias7(S x, ref S y) + { + y.s_x++; + long result = 0; + for (int i = 0; i < 100; i++) + { + x.s_x++; + result += x.s_x + x.s_y; + } + return result; + } + + // A must copy params locally when calling Alias + // and so can't tail call + public static long A(S x) + { + Z(); Z(); Z(); Z(); + return Alias(x, x); + } + + // B must copy params locally when calling Alias2 + // and so can't tail call + public static long B(S x) + { + Z(); Z(); Z(); Z(); + return Alias2(x, ref x.s_y); + } + + // C must copy params locally when calling Alias2 + // and so can't tail call. Here the problematic + // tree is not part of the call. + public static long C(S x) + { + ref long z = ref x.s_y; + Z(); Z(); Z(); Z(); + return Alias2(x, ref z); + } + + // D should not be able to tail call, as doing so + // means we are not making local copies of x, and + // so introducing aliasing. + public static long D(int a, int b, int c, int d, int e, int f, S x, S y) + { + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + return Alias3(1, 2, 3, 4, 5, 6, x, x); + } + + // E should be able to tail call + public static long E(int a, int b, int c, int d, int e, int f, S x, S y) + { + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + return Alias3(1, 2, 3, 4, 5, 6, x, y); + } + + // F should be able to tail call + public static long F(int a, int b, int c, int d, int e, int f, S x, S y) + { + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + return Alias3(1, 2, 3, 4, 5, 6, y, x); + } + + // G should be able to tail call, but we + // might not want to pay the cost to prove it safe. + public static long G(int a, int b, int c, int d, int e, int f, S x, S y) + { + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + + if (a != 0) + { + return Alias4(ref x.s_x, 2, 3, 4, 5, 6, y); + } + else + { + return Alias4(ref y.s_x, 2, 3, 4, 5, 6, x); + } + } + + // H must copy params locally when calling Alias + // and so can't tail call + public static long H(R r) + { + Z(); Z(); Z(); Z(); + return Alias(r.r_s, r.r_s); + } + + // I must copy params locally when calling Alias + // and so can't tail call + public static long I(R r) + { + Z(); Z(); Z(); Z(); + return Alias5(r.r_s, r); + } + + // J can tail call, but we might not recognize this + public static long J(R r) + { + Z(); Z(); Z(); Z(); + return Alias(r.r_s, r.r_t); + } + + // K cannot tail call + public static unsafe long K(S s) + { + Z(); Z(); Z(); Z(); + Span ss = new Span((void*)&s, 1); + return Alias6(ss, s); + } + + // L cannot tail call as the first arg to + // Alias7 must be copied locally + public static long L(S s) + { + Z(); Z(); Z(); Z(); + return Alias7(s, ref s); + } + + public static int Main() + { + S s = new S(); + s.s_x = 1; + s.s_y = 2; + R r = new R(); + r.r_s = s; + r.r_t = s; + long ra = A(s); + long rb = B(s); + long rc = C(s); + long rd = D(0, 0, 0, 0, 0, 0, s, s); + long re = E(0, 0, 0, 0, 0, 0, s, s); + long rf = F(0, 0, 0, 0, 0, 0, s, s); + long rg = G(0, 0, 0, 0, 0, 0, s, s); + long rh = H(r); + long ri = I(r); + long rj = J(r); + long rk = K(s); + long rl = L(s); + + Console.WriteLine($"{ra},{rb},{rc},{rd},{re},{rf},{rg},{rh},{ri},{rj},{rk},{rl}"); + + return + (ra == 5350) && (rb == 5350) && (rc == 5350) && (rd == 5350) && + (re == 5350) && (rf == 5350) && (rg == 5350) && (rh == 5350) && + (ri == 5350) && (rj == 5350) && (rk == 5350) && (rl == 5350) ? 100 : -1; + } +} diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.csproj b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.csproj new file mode 100644 index 0000000..bf6f589 --- /dev/null +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.csproj @@ -0,0 +1,13 @@ + + + Exe + + + None + True + True + + + + +