}
//------------------------------------------------------------------------
+// IsImplicitByrefParameterValue: determine if this tree is the entire
+// value of a local implicit byref parameter
+//
+// Arguments:
+// compiler -- compiler instance
+//
+// Return Value:
+// GenTreeLclVar node for the local, or nullptr.
+//
+GenTreeLclVar* GenTree::IsImplicitByrefParameterValue(Compiler* compiler)
+{
+#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
+
+ GenTreeLclVar* lcl = nullptr;
+
+ if (OperIs(GT_LCL_VAR))
+ {
+ lcl = AsLclVar();
+ }
+ else if (OperIs(GT_OBJ))
+ {
+ GenTree* addr = AsIndir()->Addr();
+
+ if (addr->OperIs(GT_LCL_VAR))
+ {
+ lcl = addr->AsLclVar();
+ }
+ else if (addr->OperIs(GT_ADDR))
+ {
+ GenTree* base = addr->AsOp()->gtOp1;
+
+ if (base->OperIs(GT_LCL_VAR))
+ {
+ lcl = base->AsLclVar();
+ }
+ }
+ }
+
+ if ((lcl != nullptr) && compiler->lvaIsImplicitByRefLocal(lcl->GetLclNum()))
+ {
+ return lcl;
+ }
+
+#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
+
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
// IsLclVarUpdateTree: Determine whether this is an assignment tree of the
// form Vn = Vn 'oper' 'otherTree' where Vn is a lclVar
//
// yields an address into a local
GenTreeLclVarCommon* IsLocalAddrExpr();
+ // Determine if this tree represents the value of an entire implict byref parameter,
+ // and if so return the tree for the parameter.
+ GenTreeLclVar* IsImplicitByrefParameterValue(Compiler* compiler);
+
// Determine if this is a LclVarCommon node and return some additional info about it in the
// two out parameters.
bool IsLocalExpr(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, FieldSeqNode** pFldSeq);
{
return;
}
+
LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum);
- JITDUMP("LocalAddressVisitor incrementing ref count from %d to %d for V%02d\n", varDsc->lvRefCnt(RCS_EARLY),
- varDsc->lvRefCnt(RCS_EARLY) + 1, lclNum);
+ JITDUMP("LocalAddressVisitor incrementing ref count from %d to %d for implict by-ref V%02d\n",
+ varDsc->lvRefCnt(RCS_EARLY), varDsc->lvRefCnt(RCS_EARLY) + 1, lclNum);
varDsc->incLvRefCnt(1, RCS_EARLY);
+
+ // See if this struct is an argument to a call. This information is recorded
+ // via the weighted early ref count for the local, and feeds the undo promotion
+ // heuristic.
+ //
+ // It can be approximate, so the pattern match below need not be exhaustive.
+ // But the pattern should at least subset the implicit byref cases that are
+ // handled in fgCanFastTailCall and fgMakeOutgoingStructArgCopy.
+ //
+ // CALL(OBJ(ADDR(LCL_VAR...)))
+ bool isArgToCall = false;
+ bool keepSearching = true;
+ for (int i = 0; i < m_ancestors.Height() && keepSearching; i++)
+ {
+ GenTree* node = m_ancestors.Top(i);
+ switch (i)
+ {
+ case 0:
+ {
+ keepSearching = node->OperIs(GT_LCL_VAR);
+ }
+ break;
+
+ case 1:
+ {
+ keepSearching = node->OperIs(GT_ADDR);
+ }
+ break;
+
+ case 2:
+ {
+ keepSearching = node->OperIs(GT_OBJ);
+ }
+ break;
+
+ case 3:
+ {
+ keepSearching = false;
+ isArgToCall = node->IsCall();
+ }
+ break;
+ default:
+ {
+ keepSearching = false;
+ }
+ break;
+ }
+ }
+
+ if (isArgToCall)
+ {
+ JITDUMP("LocalAddressVisitor incrementing weighted ref count from %d to %d"
+ " for implict by-ref V%02d arg passed to call\n",
+ varDsc->lvRefCntWtd(RCS_EARLY), varDsc->lvRefCntWtd(RCS_EARLY) + 1, lclNum);
+ varDsc->incLvRefCntWtd(1, RCS_EARLY);
+ }
}
};
//
// We don't need a copy if this is the last use of an implicit by-ref local.
//
- // We can't determine that all of the time, but if there is only
- // one use and the method has no loops, then this use must be the last.
if (opts.OptimizationEnabled())
{
- GenTreeLclVarCommon* lcl = nullptr;
-
- if (argx->OperIsLocal())
- {
- lcl = argx->AsLclVarCommon();
- }
- else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
- {
- lcl = argx->AsObj()->Addr()->AsLclVarCommon();
- }
+ GenTreeLclVar* const lcl = argx->IsImplicitByrefParameterValue(this);
if (lcl != nullptr)
{
- unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
- if (lvaIsImplicitByRefLocal(varNum))
- {
- LclVarDsc* varDsc = &lvaTable[varNum];
- // JIT_TailCall helper has an implicit assumption that all tail call arguments live
- // on the caller's frame. If an argument lives on the caller caller's frame, it may get
- // overwritten if that frame is reused for the tail call. Therefore, we should always copy
- // struct parameters if they are passed as arguments to a tail call.
- if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt(RCS_EARLY) == 1) && !fgMightHaveLoop())
- {
- assert(!call->IsTailCall());
+ const unsigned varNum = lcl->GetLclNum();
+ LclVarDsc* const varDsc = lvaGetDesc(varNum);
+ const unsigned short totalAppearances = varDsc->lvRefCnt(RCS_EARLY);
- varDsc->setLvRefCnt(0, RCS_EARLY);
- args->SetNode(lcl);
- assert(argEntry->GetNode() == lcl);
+ // We don't have liveness so we rely on other indications of last use.
+ //
+ // We handle these cases:
+ //
+ // * (must not copy) If the call is a tail call, the use is a last use.
+ // We must skip the copy if we have a fast tail call.
+ //
+ // * (must copy) However the current slow tail call helper always copies
+ // the tail call args from the current frame, so we must copy
+ // if the tail call is a slow tail call.
+ //
+ // * (may not copy) if the call is noreturn, the use is a last use.
+ // We also check for just one reference here as we are not doing
+ // alias analysis of the call's parameters, or checking if the call
+ // site is not within some try region.
+ //
+ // * (may not copy) if there is exactly one use of the local in the method,
+ // and the call is not in loop, this is a last use.
+ //
+ const bool isTailCallLastUse = call->IsTailCall();
+ const bool isCallLastUse = (totalAppearances == 1) && !fgMightHaveLoop();
+ const bool isNoReturnLastUse = (totalAppearances == 1) && call->IsNoReturn();
+ if (!call->IsTailCallViaHelper() && (isTailCallLastUse || isCallLastUse || isNoReturnLastUse))
+ {
+ varDsc->setLvRefCnt(0, RCS_EARLY);
+ args->SetNode(lcl);
+ assert(argEntry->GetNode() == lcl);
- JITDUMP("did not have to make outgoing copy for V%2d", varNum);
- return;
- }
+ JITDUMP("did not need to make outgoing copy for last use of implicit byref V%2d\n", varNum);
+ return;
}
}
}
+ JITDUMP("making an outgoing copy for struct arg\n");
+
if (fgOutgoingArgTemps == nullptr)
{
fgOutgoingArgTemps = hashBv::Create(this);
//
// Windows Amd64:
// A fast tail call can be made whenever the number of callee arguments
-// is larger than or equal to the number of caller arguments, or we have four
+// is less than or equal to the number of caller arguments, or we have four
// or fewer callee arguments. This is because, on Windows AMD64, each
// argument uses exactly one register or one 8-byte stack slot. Thus, we only
// need to count arguments, and not be concerned with the size of each
fgInitArgInfo(callee);
fgArgInfo* argInfo = callee->fgArgInfo;
- bool hasByrefParameter = false;
- size_t calleeArgStackSize = 0;
- size_t callerArgStackSize = info.compArgStackSize;
+ bool hasMustCopyByrefParameter = false;
+ size_t calleeArgStackSize = 0;
+ size_t callerArgStackSize = info.compArgStackSize;
for (unsigned index = 0; index < argInfo->ArgCount(); ++index)
{
if (arg->isStruct)
{
- // Byref struct arguments are not allowed to fast tail call as the information
- // of the caller's stack is lost when the callee is compiled.
if (arg->passedByRef)
{
- hasByrefParameter = true;
- break;
+ // Generally a byref arg will block tail calling, as we have to
+ // make a local copy of the struct for the callee.
+ hasMustCopyByrefParameter = true;
+
+ // If we're optimizing, we may be able to pass our caller's byref to our callee,
+ // and so still be able to tail call.
+ if (opts.OptimizationEnabled())
+ {
+ // First, see if this arg is an implicit byref param.
+ GenTreeLclVar* const lcl = arg->GetNode()->IsImplicitByrefParameterValue(this);
+
+ if (lcl != nullptr)
+ {
+ // Yes, the arg is an implicit byref param.
+ const unsigned lclNum = lcl->GetLclNum();
+ LclVarDsc* const varDsc = lvaGetDesc(lcl);
+
+ // The param must not be promoted; if we've promoted, then the arg will be
+ // a local struct assembled from the promoted fields.
+ if (varDsc->lvPromoted)
+ {
+ JITDUMP("Arg [%06u] is promoted implicit byref V%02u, so no tail call\n",
+ dspTreeID(arg->GetNode()), lclNum);
+ }
+ else
+ {
+ JITDUMP("Arg [%06u] is unpromoted implicit byref V%02u, seeing if we can still tail call\n",
+ dspTreeID(arg->GetNode()), lclNum);
+
+ // We have to worry about introducing aliases if we bypass copying
+ // the struct at the call. We'll do some limited analysis to see if we
+ // can rule this out.
+ const unsigned argLimit = 6;
+
+ // If this is the only appearance of the byref in the method, then
+ // aliasing is not possible.
+ //
+ // If no other call arg refers to this byref, and no other arg is
+ // a pointer which could refer to this byref, we can optimize.
+ //
+ // We only check this for calls with small numbers of arguments,
+ // as the analysis cost will be quadratic.
+ //
+ if (varDsc->lvRefCnt(RCS_EARLY) == 1)
+ {
+ JITDUMP("... yes, arg is the only appearance of V%02u\n", lclNum);
+ hasMustCopyByrefParameter = false;
+ }
+ else if (argInfo->ArgCount() <= argLimit)
+ {
+ GenTree* interferingArg = nullptr;
+ for (unsigned index2 = 0; index2 < argInfo->ArgCount(); ++index2)
+ {
+ if (index2 == index)
+ {
+ continue;
+ }
+
+ fgArgTabEntry* const arg2 = argInfo->GetArgEntry(index2, false);
+ JITDUMP("... checking other arg [%06u]...\n", dspTreeID(arg2->GetNode()));
+ DISPTREE(arg2->GetNode());
+
+ // Do we pass 'lcl' more than once to the callee?
+ if (arg2->isStruct && arg2->passedByRef)
+ {
+ GenTreeLclVarCommon* const lcl2 =
+ arg2->GetNode()->IsImplicitByrefParameterValue(this);
+
+ if ((lcl2 != nullptr) && (lclNum == lcl2->GetLclNum()))
+ {
+ // not copying would introduce aliased implicit byref structs
+ // in the callee ... we can't optimize.
+ interferingArg = arg2->GetNode();
+ break;
+ }
+ else
+ {
+ JITDUMP("... arg refers to different implicit byref V%02u\n",
+ lcl2->GetLclNum());
+ continue;
+ }
+ }
+
+ // Do we pass a byref pointer which might point within 'lcl'?
+ //
+ // We can assume the 'lcl' is unaliased on entry to the
+ // method, so the only way we can have an aliasing byref pointer at
+ // the call is if 'lcl' is address taken/exposed in the method.
+ //
+ // Note even though 'lcl' is not promoted, we are in the middle
+ // of the promote->rewrite->undo->(morph)->demote cycle, and so
+ // might see references to promoted fields of 'lcl' that haven't yet
+ // been demoted (see fgMarkDemotedImplicitByRefArgs).
+ //
+ // So, we also need to scan all 'lcl's fields, if any, to see if they
+ // are exposed.
+ //
+ // When looking for aliases from other args, we check for both TYP_BYREF
+ // and TYP_I_IMPL typed args here. Conceptually anything that points into
+ // an implicit byref parameter should be TYP_BYREF, as these parameters could
+ // refer to boxed heap locations (say if the method is invoked by reflection)
+ // but there are some stack only structs (like typed references) where
+ // the importer/runtime code uses TYP_I_IMPL, and fgInitArgInfo will
+ // transiently retype all simple address-of implicit parameter args as
+ // TYP_I_IMPL.
+ //
+ if ((arg2->argType == TYP_BYREF) || (arg2->argType == TYP_I_IMPL))
+ {
+ JITDUMP("...arg is a byref, must run an alias check\n");
+ bool checkExposure = true;
+ bool hasExposure = false;
+
+ // See if there is any way arg could refer to a parameter struct.
+ GenTree* arg2Node = arg2->GetNode();
+ if (arg2Node->OperIs(GT_LCL_VAR))
+ {
+ GenTreeLclVarCommon* arg2LclNode = arg2Node->AsLclVarCommon();
+ assert(arg2LclNode->GetLclNum() != lclNum);
+ LclVarDsc* arg2Dsc = lvaGetDesc(arg2LclNode);
+
+ // Other params can't alias implicit byref params
+ if (arg2Dsc->lvIsParam)
+ {
+ checkExposure = false;
+ }
+ }
+ // Because we're checking TYP_I_IMPL above, at least
+ // screen out obvious things that can't cause aliases.
+ else if (arg2Node->IsIntegralConst())
+ {
+ checkExposure = false;
+ }
+
+ if (checkExposure)
+ {
+ JITDUMP(
+ "... not sure where byref arg points, checking if V%02u is exposed\n",
+ lclNum);
+ // arg2 might alias arg, see if we've exposed
+ // arg somewhere in the method.
+ if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed)
+ {
+ // Struct as a whole is exposed, can't optimize
+ JITDUMP("... V%02u is exposed\n", lclNum);
+ hasExposure = true;
+ }
+ else if (varDsc->lvFieldLclStart != 0)
+ {
+ // This is the promoted/undone struct case.
+ //
+ // The field start is actually the local number of the promoted local,
+ // use it to enumerate the fields.
+ const unsigned promotedLcl = varDsc->lvFieldLclStart;
+ LclVarDsc* const promotedVarDsc = lvaGetDesc(promotedLcl);
+ JITDUMP("...promoted-unpromoted case -- also checking exposure of "
+ "fields of V%02u\n",
+ promotedLcl);
+
+ for (unsigned fieldIndex = 0; fieldIndex < promotedVarDsc->lvFieldCnt;
+ fieldIndex++)
+ {
+ LclVarDsc* fieldDsc =
+ lvaGetDesc(promotedVarDsc->lvFieldLclStart + fieldIndex);
+
+ if (fieldDsc->lvHasLdAddrOp || fieldDsc->lvAddrExposed)
+ {
+ // Promoted and not yet demoted field is exposed, can't optimize
+ JITDUMP("... field V%02u is exposed\n",
+ promotedVarDsc->lvFieldLclStart + fieldIndex);
+ hasExposure = true;
+ break;
+ }
+ }
+ }
+ }
+
+ if (hasExposure)
+ {
+ interferingArg = arg2->GetNode();
+ break;
+ }
+ }
+ else
+ {
+ JITDUMP("...arg is not a byref or implicit byref (%s)\n",
+ varTypeName(arg2->GetNode()->TypeGet()));
+ }
+ }
+
+ if (interferingArg != nullptr)
+ {
+ JITDUMP("... no, arg [%06u] may alias with V%02u\n", dspTreeID(interferingArg),
+ lclNum);
+ }
+ else
+ {
+ JITDUMP("... yes, no other arg in call can alias V%02u\n", lclNum);
+ hasMustCopyByrefParameter = false;
+ }
+ }
+ else
+ {
+ JITDUMP(" ... no, call has %u > %u args, alias analysis deemed too costly\n",
+ argInfo->ArgCount(), argLimit);
+ }
+ }
+ }
+ }
+
+ if (hasMustCopyByrefParameter)
+ {
+ // This arg blocks the tail call. No reason to keep scanning the remaining args.
+ break;
+ }
}
}
}
#endif // DEBUG
};
-// Note on vararg methods:
-// If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
-// But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
-// fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
-// out-going area required for callee is bounded by caller's fixed argument space.
-//
-// Note that callee being a vararg method is not a problem since we can account the params being passed.
-//
-// We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg
-// method. This is due to the ABI differences for native vararg methods for these platforms. There is
-// work required to shuffle arguments to the correct locations.
+ // Note on vararg methods:
+ // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
+ // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
+ // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
+ // out-going area required for callee is bounded by caller's fixed argument space.
+ //
+ // Note that callee being a vararg method is not a problem since we can account the params being passed.
+ //
+ // We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg
+ // method. This is due to the ABI differences for native vararg methods for these platforms. There is
+ // work required to shuffle arguments to the correct locations.
+ CLANG_FORMAT_COMMENT_ANCHOR;
#if (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || (defined(TARGET_WINDOWS) && defined(TARGET_ARM64))
if (info.compIsVarArgs || callee->IsVarargs())
// For Windows some struct parameters are copied on the local frame
// and then passed by reference. We cannot fast tail call in these situation
// as we need to keep our frame around.
- if (hasByrefParameter)
+ if (hasMustCopyByrefParameter)
{
reportFastTailCallDecision("Callee has a byref parameter");
return false;
// It cannot be an inline candidate
assert(!call->IsInlineCandidate());
- auto failTailCall = [&](const char* reason) {
+ auto failTailCall = [&](const char* reason, unsigned lclNum = BAD_VAR_NUM) {
#ifdef DEBUG
if (verbose)
{
printf("\nRejecting tail call in morph for call ");
printTreeID(call);
- printf(": %s\n", reason);
+ printf(": %s", reason);
+ if (lclNum != BAD_VAR_NUM)
+ {
+ printf(" V%02u", lclNum);
+ }
+ printf("\n");
}
#endif
//
if (call->IsImplicitTailCall())
{
- if (varDsc->lvHasLdAddrOp)
+ if (varDsc->lvHasLdAddrOp && !lvaIsImplicitByRefLocal(varNum))
{
- failTailCall("Local address taken");
+ failTailCall("Local address taken", varNum);
return nullptr;
}
if (varDsc->lvAddrExposed)
}
else
{
- failTailCall("Local address taken");
+ failTailCall("Local address taken", varNum);
return nullptr;
}
}
if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum))
{
- failTailCall("Has Struct Promoted Param");
+ failTailCall("Has Struct Promoted Param", varNum);
return nullptr;
}
if (varDsc->lvPinned)
{
// A tail call removes the method from the stack, which means the pinning
// goes away for the callee. We can't allow that.
- failTailCall("Has Pinned Vars");
+ failTailCall("Has Pinned Vars", varNum);
return nullptr;
}
}
// through the pointer parameter, the same as we'd do for this
// parameter if it weren't promoted at all (otherwise the initialization
// of the new temp would just be a needless memcpy at method entry).
- bool undoPromotion = (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ||
- (varDsc->lvRefCnt(RCS_EARLY) <= varDsc->lvFieldCnt);
+ //
+ // Otherwise, see how many appearances there are. We keep two early ref counts: total
+ // number of references to the struct or some field, and how many of these are
+ // arguments to calls. We undo promotion unless we see enough non-call uses.
+ //
+ const unsigned totalAppearances = varDsc->lvRefCnt(RCS_EARLY);
+ const unsigned callAppearances = varDsc->lvRefCntWtd(RCS_EARLY);
+ assert(totalAppearances >= callAppearances);
+ const unsigned nonCallAppearances = totalAppearances - callAppearances;
+
+ bool undoPromotion = ((lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ||
+ (nonCallAppearances <= varDsc->lvFieldCnt));
+
+ JITDUMP("%s promotion of implicit by-ref V%02u: %s total: %u non-call: %u fields: %u\n",
+ undoPromotion ? "Undoing" : "Keeping", lclNum,
+ (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ? "dependent;" : "",
+ totalAppearances, nonCallAppearances, varDsc->lvFieldCnt);
if (!undoPromotion)
{
{
// Set the new parent.
fieldVarDsc->lvParentLcl = newLclNum;
- // Clear the ref count field; it is used to communicate the nubmer of references
+ // Clear the ref count field; it is used to communicate the number of references
// to the implicit byref parameter when morphing calls that pass the implicit byref
// out as an outgoing argument value, but that doesn't pertain to this field local
// which is now a field of a non-arg local.
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.CompilerServices;
+
+// Tail calls with implicit byref parameters as arguments.
+//
+// Generally, we can tail call provided we don't copy the
+// argument to the local frame.
+
+public class ImplicitByrefTailCalls
+{
+ public static void Z() { }
+ public static bool Z(bool b) => b;
+
+ [MethodImpl(MethodImplOptions.NoOptimization)]
+ public static int ZZ(Span<int> x)
+ {
+ int result = 0;
+ for (int i = 0; i < x.Length; i++)
+ {
+ result += x[i] * x[i] + i;
+ }
+ return result;
+ }
+
+ [MethodImpl(MethodImplOptions.NoOptimization)]
+ public static int ZZZ(Span<int> x, int a, int b, int c, int d, int e, int f)
+ {
+ int result = 0;
+ for (int i = 0; i < x.Length; i++)
+ {
+ result += x[i] * x[i] + i;
+ }
+ return result;
+ }
+
+ // Various methods that should fast tail call
+
+ public static int A(Span<int> x)
+ {
+ Z(); Z(); return ZZ(x);
+ }
+
+ public static int B(Span<int> x)
+ {
+ Z(); Z(); return A(x);
+ }
+
+ public static int C(Span<int> x, bool p)
+ {
+ Z(); Z();
+ if (Z(p))
+ {
+ return A(x);
+ }
+ else
+ {
+ return B(x);
+ }
+ }
+
+ public static int D(Span<int> x, bool p)
+ {
+ Z(); Z();
+ return Z(p) ? A(x) : B(x);
+ }
+
+ public static int E(Span<int> x, Span<int> y, bool p)
+ {
+ Z(); Z(); Z(); Z();
+ if (Z(p))
+ {
+ return A(x);
+ }
+ else
+ {
+ return B(y);
+ }
+ }
+
+ public static int F(Span<int> x, Span<int> y, bool p)
+ {
+ Z(); Z(); Z(); Z();
+ return Z(p) ? ZZ(x) : ZZ(y);
+ }
+
+ // Methods with tail call sites and other uses
+
+ public static int G(Span<int> x, bool p)
+ {
+ Z(); Z();
+ if (Z(p))
+ {
+ return ZZ(x);
+ }
+ else
+ {
+ return x.Length;
+ }
+ }
+
+ // Here there are enough actual uses to
+ // justify promotion, hence we can't tail
+ // call, as "undoing" promotion at the end
+ // entails making a local copy.
+ //
+ // We could handle this by writing the updated
+ // struct values back to the original byref
+ // before tail calling....
+ public static int H(Span<int> x, int y)
+ {
+ Z(); Z();
+ if (y < x.Length)
+ {
+ int result = 0;
+ for (int i = 0; i < y; i++)
+ {
+ result += x[i];
+ }
+ return result;
+ }
+
+ return ZZ(x);
+ }
+
+ // Here the call to ZZZ would need to be a slow tail call,
+ // so we won't tail call at all. But the only
+ // reference to x is at the call, and it's not in
+ // a loop, so we can still can avoid making a copy.
+ public static int S(Span<int> x)
+ {
+ Z(); Z();
+ return ZZZ(x, 1, 2, 3, 4, 5, 6);
+ }
+
+ // Must copy at the normal call site
+ // but can avoid at tail call site. However
+ // we're currently blocked because we reuse
+ // the temp used to pass the argument and
+ // it is exposed in the first call.
+ public static int T(Span<int> x, ref int q)
+ {
+ Z(); Z();
+ q = ZZ(x);
+ return ZZ(x);
+ }
+
+ // Here ZZZ is called in a loop, so despite
+ // the call argument x being the only reference to
+ // x in the method, we must copy x.
+ // We can't consider it as last use.
+ public static int L(Span<int> x)
+ {
+ Z(); Z();
+ int result = 0;
+ int limit = 10;
+ for (int i = 0; i < limit; i++)
+ {
+ result += ZZZ(x, 1, 2, 3, 4, 5, 6);
+ }
+
+ return result / limit;
+ }
+
+ static void MustThrow(Span<int> x)
+ {
+ x[0]++;
+ throw new Exception();
+ }
+
+ // Because MustThrow must throw
+ // and the argument is the only
+ // mention of x, we do not need to copy x.
+ public static int M(Span<int> x)
+ {
+ Z(); Z(); Z(); Z();
+ if (p)
+ {
+ MustThrow(x);
+ }
+ return 10000;
+ }
+
+ // Although MustThrow must throw,
+ // the argument x is the not only
+ // mention of x, so we must copy x.
+ public static int N(Span<int> x)
+ {
+ Z(); Z(); Z(); Z();
+ try
+ {
+ if (p)
+ {
+ MustThrow(x);
+ }
+ }
+ catch (Exception)
+ {
+ }
+
+ return 10000 + x[0];
+ }
+
+ static bool p;
+
+ public static int Main()
+ {
+ int[] a = new int[100];
+ a[45] = 55;
+ a[55] = 45;
+ p = false;
+
+ Span<int> s = new Span<int>(a);
+ int q = 0;
+
+ int ra = A(s);
+ int rb = B(s);
+ int rc = C(s, p);
+ int rd = D(s, p);
+ int re = E(s, s, p);
+ int rf = F(s, s, p);
+ int rg = G(s, p);
+ int rh = H(s, 46);
+ int rs = S(s);
+ int rt = T(s, ref q);
+ int rl = L(s);
+ int rm = M(s);
+ int rn = N(s);
+
+ Console.WriteLine($"{ra},{rb},{rc},{rd},{re},{rf},{rg},{rh},{rs},{rt},{rl},{rm},{rn}");
+
+ return ra + rb + rc + rd + re + rf + rg + rh + rs + rt + rl + rm + rn - 110055;
+ }
+}
--- /dev/null
+<Project Sdk="Microsoft.NET.Sdk">
+ <PropertyGroup>
+ <OutputType>Exe</OutputType>
+ </PropertyGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize>True</Optimize>
+ <AllowUnsafeBlocks>True</AllowUnsafeBlocks>
+ </PropertyGroup>
+ <ItemGroup>
+ <Compile Include="$(MSBuildProjectName).cs" />
+ </ItemGroup>
+</Project>
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Runtime.CompilerServices;
+
+public struct S
+{
+ public long s_x;
+ public long s_y;
+}
+
+public struct R
+{
+ public S r_s;
+ public S r_t;
+}
+
+// Tail calls with implicit byref parameters as arguments.
+//
+// We need to ensure that we don't introduce aliased
+// implicit byref parameters by optimizing away copies.
+
+public class ImplicitByrefTailCalls
+{
+ // Helper method to make callees unattractive for inlining.
+ public static void Z() { }
+
+ // Will return different answers if x and y refer to the same struct.
+ [MethodImpl(MethodImplOptions.NoOptimization)]
+ public static long Alias(S x, S y)
+ {
+ y.s_x++;
+ long result = 0;
+ for (int i = 0; i < 100; i++)
+ {
+ x.s_x++;
+ result += x.s_x + x.s_y;
+ }
+ return result;
+ }
+
+ // Will return different answers if y refers to some part of x.
+ [MethodImpl(MethodImplOptions.NoOptimization)]
+ public static long Alias2(S x, ref long y)
+ {
+ y++;
+ long result = 0;
+ for (int i = 0; i < 100; i++)
+ {
+ x.s_x++;
+ result += x.s_x + x.s_y;
+ }
+ return result;
+ }
+
+ // Will return different answers if x and y refer to same struct
+ [MethodImpl(MethodImplOptions.NoOptimization)]
+ public static long Alias3(int a, int b, int c, int d, int e, int f, S x, S y)
+ {
+ y.s_x++;
+ long result = 0;
+ for (int i = 0; i < 100; i++)
+ {
+ x.s_x++;
+ result += x.s_x + x.s_y;
+ }
+ return result;
+ }
+
+ // Will return different answers if y refers to some part of x
+ [MethodImpl(MethodImplOptions.NoOptimization)]
+ public static long Alias4(ref long y, int b, int c, int d, int e, int f, S x)
+ {
+ y++;
+ long result = 0;
+ for (int i = 0; i < 100; i++)
+ {
+ x.s_x++;
+ result += x.s_x + x.s_y;
+ }
+ return result;
+ }
+
+ // Will return different answers if x and r.s refer to the same struct.
+ [MethodImpl(MethodImplOptions.NoOptimization)]
+ public static long Alias5(S x, R r)
+ {
+ r.r_s.s_x++;
+ long result = 0;
+ for (int i = 0; i < 100; i++)
+ {
+ x.s_x++;
+ result += x.s_x + x.s_y;
+ }
+ return result;
+ }
+
+ // Will return different answers if ss and x refer to the same struct
+ [MethodImpl(MethodImplOptions.NoOptimization)]
+ public static long Alias6(Span<S> ss, S x)
+ {
+ ss[0].s_x++;
+ long result = 0;
+ for (int i = 0; i < 100; i++)
+ {
+ x.s_x++;
+ result += x.s_x + x.s_y;
+ }
+ return result;
+ }
+
+ // Will return different answers if x and y refer to the same struct.
+ [MethodImpl(MethodImplOptions.NoOptimization)]
+ public static long Alias7(S x, ref S y)
+ {
+ y.s_x++;
+ long result = 0;
+ for (int i = 0; i < 100; i++)
+ {
+ x.s_x++;
+ result += x.s_x + x.s_y;
+ }
+ return result;
+ }
+
+ // A must copy params locally when calling Alias
+ // and so can't tail call
+ public static long A(S x)
+ {
+ Z(); Z(); Z(); Z();
+ return Alias(x, x);
+ }
+
+ // B must copy params locally when calling Alias2
+ // and so can't tail call
+ public static long B(S x)
+ {
+ Z(); Z(); Z(); Z();
+ return Alias2(x, ref x.s_y);
+ }
+
+ // C must copy params locally when calling Alias2
+ // and so can't tail call. Here the problematic
+ // tree is not part of the call.
+ public static long C(S x)
+ {
+ ref long z = ref x.s_y;
+ Z(); Z(); Z(); Z();
+ return Alias2(x, ref z);
+ }
+
+ // D should not be able to tail call, as doing so
+ // means we are not making local copies of x, and
+ // so introducing aliasing.
+ public static long D(int a, int b, int c, int d, int e, int f, S x, S y)
+ {
+ Z(); Z(); Z(); Z();
+ Z(); Z(); Z(); Z();
+ Z(); Z(); Z(); Z();
+ return Alias3(1, 2, 3, 4, 5, 6, x, x);
+ }
+
+ // E should be able to tail call
+ public static long E(int a, int b, int c, int d, int e, int f, S x, S y)
+ {
+ Z(); Z(); Z(); Z();
+ Z(); Z(); Z(); Z();
+ Z(); Z(); Z(); Z();
+ return Alias3(1, 2, 3, 4, 5, 6, x, y);
+ }
+
+ // F should be able to tail call
+ public static long F(int a, int b, int c, int d, int e, int f, S x, S y)
+ {
+ Z(); Z(); Z(); Z();
+ Z(); Z(); Z(); Z();
+ Z(); Z(); Z(); Z();
+ return Alias3(1, 2, 3, 4, 5, 6, y, x);
+ }
+
+ // G should be able to tail call, but we
+ // might not want to pay the cost to prove it safe.
+ public static long G(int a, int b, int c, int d, int e, int f, S x, S y)
+ {
+ Z(); Z(); Z(); Z();
+ Z(); Z(); Z(); Z();
+ Z(); Z(); Z(); Z();
+
+ if (a != 0)
+ {
+ return Alias4(ref x.s_x, 2, 3, 4, 5, 6, y);
+ }
+ else
+ {
+ return Alias4(ref y.s_x, 2, 3, 4, 5, 6, x);
+ }
+ }
+
+ // H must copy params locally when calling Alias
+ // and so can't tail call
+ public static long H(R r)
+ {
+ Z(); Z(); Z(); Z();
+ return Alias(r.r_s, r.r_s);
+ }
+
+ // I must copy params locally when calling Alias
+ // and so can't tail call
+ public static long I(R r)
+ {
+ Z(); Z(); Z(); Z();
+ return Alias5(r.r_s, r);
+ }
+
+ // J can tail call, but we might not recognize this
+ public static long J(R r)
+ {
+ Z(); Z(); Z(); Z();
+ return Alias(r.r_s, r.r_t);
+ }
+
+ // K cannot tail call
+ public static unsafe long K(S s)
+ {
+ Z(); Z(); Z(); Z();
+ Span<S> ss = new Span<S>((void*)&s, 1);
+ return Alias6(ss, s);
+ }
+
+ // L cannot tail call as the first arg to
+ // Alias7 must be copied locally
+ public static long L(S s)
+ {
+ Z(); Z(); Z(); Z();
+ return Alias7(s, ref s);
+ }
+
+ public static int Main()
+ {
+ S s = new S();
+ s.s_x = 1;
+ s.s_y = 2;
+ R r = new R();
+ r.r_s = s;
+ r.r_t = s;
+ long ra = A(s);
+ long rb = B(s);
+ long rc = C(s);
+ long rd = D(0, 0, 0, 0, 0, 0, s, s);
+ long re = E(0, 0, 0, 0, 0, 0, s, s);
+ long rf = F(0, 0, 0, 0, 0, 0, s, s);
+ long rg = G(0, 0, 0, 0, 0, 0, s, s);
+ long rh = H(r);
+ long ri = I(r);
+ long rj = J(r);
+ long rk = K(s);
+ long rl = L(s);
+
+ Console.WriteLine($"{ra},{rb},{rc},{rd},{re},{rf},{rg},{rh},{ri},{rj},{rk},{rl}");
+
+ return
+ (ra == 5350) && (rb == 5350) && (rc == 5350) && (rd == 5350) &&
+ (re == 5350) && (rf == 5350) && (rg == 5350) && (rh == 5350) &&
+ (ri == 5350) && (rj == 5350) && (rk == 5350) && (rl == 5350) ? 100 : -1;
+ }
+}
--- /dev/null
+<Project Sdk="Microsoft.NET.Sdk">
+ <PropertyGroup>
+ <OutputType>Exe</OutputType>
+ </PropertyGroup>
+ <PropertyGroup>
+ <DebugType>None</DebugType>
+ <Optimize>True</Optimize>
+ <AllowUnsafeBlocks>True</AllowUnsafeBlocks>
+ </PropertyGroup>
+ <ItemGroup>
+ <Compile Include="$(MSBuildProjectName).cs" />
+ </ItemGroup>
+</Project>