* Added GTF_CALL_M_EXPANDED_EARLY to flag virtual calls that should be expanded early during fgMorph
Added COMPlus_JitExpandCallsEarly variable to enable virtual calls to be expanded early on a per method basis
Set opts.compExpandCallsEarly to true when we are optimizing and have COMPlus_JitExpandCallsEarly enabled
Update gtSetEvalOrder to also include the call->gtControlExpr
Update morph to call fgExpandVirtualVtableCallTarget when we are expanding early
Update lower to not call LowerVirtualVtableCall when we have already expanded it early
Modify CheckTreeId to print the duplicated gtTreeID before it asserts.
All tests are passing when using COMPLUS_JitExpandCallsEarly=*
Expand the Virtual Call target after we morph the args
Fix an inadvertent change in the GT_CALL weights
* Changed the default for Virtual calls to be expanded early in Morph
Use COMPlus_JitExpandCallsEarly=0 to disable and use old behavior
* Code Review feedback
Added comment stating the the isRelative code path is never executed
* Fixes for propagating gtControlExpr->gtFlags
* Fix a few code size regressions when we perform a tail call
* Tailcall lower fix
* Code Review changes
* Fixes for the TAILCALL_HELPER path for x86
* Address the Arm/Linux failure
}
}
+#if TARGET_ARM
+ // A single JitStress=1 Linux ARM32 test fails when we expand virtual calls early
+ // JIT\HardwareIntrinsics\General\Vector128_1\Vector128_1_ro
+ //
+ opts.compExpandCallsEarly = (JitConfig.JitExpandCallsEarly() == 2);
+#else
+ opts.compExpandCallsEarly = (JitConfig.JitExpandCallsEarly() != 0);
+#endif
+
fgCanRelocateEHRegions = true;
}
#ifdef TARGET_UNIX
info.compMatchedVM = info.compMatchedVM && (eeInfo->osType == CORINFO_UNIX);
#else
- info.compMatchedVM = info.compMatchedVM && (eeInfo->osType == CORINFO_WINNT);
+ info.compMatchedVM = info.compMatchedVM && (eeInfo->osType == CORINFO_WINNT);
#endif
// If we are not compiling for a matched VM, then we are getting JIT flags that don't match our target
noway_assert(!"GetArgEntry: argNum not found");
return nullptr;
}
+ void SetNeedsTemps()
+ {
+ needsTemps = true;
+ }
// Get the node for the arg at position argIndex.
// Caller must ensure that this index is a valid arg index.
Statement* paramAssignmentInsertionPoint);
static int fgEstimateCallStackSize(GenTreeCall* call);
GenTree* fgMorphCall(GenTreeCall* call);
+ GenTree* fgExpandVirtualVtableCallTarget(GenTreeCall* call);
void fgMorphCallInline(GenTreeCall* call, InlineResult* result);
void fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result);
#if DEBUG
bool dspGCtbls; // Display the GC tables
#endif
+ bool compExpandCallsEarly; // True if we should expand virtual call targets early for this method
+
// Default numbers used to perform loop alignment. All the numbers are choosen
// based on experimenting with various benchmarks.
chkFlags |= (call->gtCallAddr->gtFlags & GTF_SIDE_EFFECT);
}
+ if ((call->gtControlExpr != nullptr) && call->IsExpandedEarly() && call->IsVirtualVtable())
+ {
+ fgDebugCheckFlags(call->gtControlExpr);
+ chkFlags |= (call->gtControlExpr->gtFlags & GTF_SIDE_EFFECT);
+ }
+
if (call->IsUnmanaged() && (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL))
{
if (call->gtCallArgs->GetNode()->OperGet() == GT_NOP)
}
//------------------------------------------------------------------------
- // CheckTreeId: Check that this tree was not visit before and memorize it as visited.
+ // CheckTreeId: Check that this tree was not visited before and memorize it as visited.
//
// Arguments:
// gtTreeID - identificator of GenTree.
//
+ // Note:
+ // This method causes an assert failure when we find a duplicated node in our tree
+ //
void CheckTreeId(unsigned gtTreeID)
{
- assert(!BitVecOps::IsMember(&nodesVecTraits, uniqueNodes, gtTreeID));
- BitVecOps::AddElemD(&nodesVecTraits, uniqueNodes, gtTreeID);
+ if (BitVecOps::IsMember(&nodesVecTraits, uniqueNodes, gtTreeID))
+ {
+ if (comp->verbose)
+ {
+ printf("Duplicate gtTreeID was found: %d\n", gtTreeID);
+ }
+ assert(!"Duplicate gtTreeID was found");
+ }
+ else
+ {
+ BitVecOps::AddElemD(&nodesVecTraits, uniqueNodes, gtTreeID);
+ }
}
private:
costEx = 5;
costSz = 2;
+ GenTreeCall* call;
+ call = tree->AsCall();
+
/* Evaluate the 'this' argument, if present */
if (tree->AsCall()->gtCallThisArg != nullptr)
/* Evaluate the arguments, right to left */
- if (tree->AsCall()->gtCallArgs != nullptr)
+ if (call->gtCallArgs != nullptr)
{
const bool lateArgs = false;
- lvl2 = gtSetCallArgsOrder(tree->AsCall()->Args(), lateArgs, &costEx, &costSz);
+ lvl2 = gtSetCallArgsOrder(call->Args(), lateArgs, &costEx, &costSz);
if (level < lvl2)
{
level = lvl2;
* This is a "hidden" list and its only purpose is to
* extend the life of temps until we make the call */
- if (tree->AsCall()->gtCallLateArgs != nullptr)
+ if (call->gtCallLateArgs != nullptr)
{
const bool lateArgs = true;
- lvl2 = gtSetCallArgsOrder(tree->AsCall()->LateArgs(), lateArgs, &costEx, &costSz);
+ lvl2 = gtSetCallArgsOrder(call->LateArgs(), lateArgs, &costEx, &costSz);
if (level < lvl2)
{
level = lvl2;
}
}
- if (tree->AsCall()->gtCallType == CT_INDIRECT)
+ if (call->gtCallType == CT_INDIRECT)
{
// pinvoke-calli cookie is a constant, or constant indirection
- assert(tree->AsCall()->gtCallCookie == nullptr || tree->AsCall()->gtCallCookie->gtOper == GT_CNS_INT ||
- tree->AsCall()->gtCallCookie->gtOper == GT_IND);
+ assert(call->gtCallCookie == nullptr || call->gtCallCookie->gtOper == GT_CNS_INT ||
+ call->gtCallCookie->gtOper == GT_IND);
- GenTree* indirect = tree->AsCall()->gtCallAddr;
+ GenTree* indirect = call->gtCallAddr;
lvl2 = gtSetEvalOrder(indirect);
if (level < lvl2)
}
else
{
+ if (call->IsVirtual())
+ {
+ GenTree* controlExpr = call->gtControlExpr;
+ if (controlExpr != nullptr)
+ {
+ lvl2 = gtSetEvalOrder(controlExpr);
+ if (level < lvl2)
+ {
+ level = lvl2;
+ }
+ costEx += controlExpr->GetCostEx();
+ costSz += controlExpr->GetCostSz();
+ }
+ }
#ifdef TARGET_ARM
- if (tree->AsCall()->IsVirtualStub())
+ if (call->IsVirtualStub())
{
// We generate movw/movt/ldr
costEx += (1 + IND_COST_EX);
costSz += 8;
- if (tree->AsCall()->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
+ if (call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
{
// Must use R12 for the ldr target -- REG_JUMP_THUNK_PARAM
costSz += 2;
}
costSz += 2;
#endif
+
#ifdef TARGET_XARCH
costSz += 3;
#endif
level += 1;
/* Virtual calls are a bit more expensive */
- if (tree->AsCall()->IsVirtual())
+ if (call->IsVirtual())
{
costEx += 2 * IND_COST_EX;
costSz += 2;
copy->gtCallType = tree->gtCallType;
copy->gtReturnType = tree->gtReturnType;
- copy->gtControlExpr = tree->gtControlExpr;
+ copy->gtControlExpr = gtCloneExpr(tree->gtControlExpr, addFlags, deepVarNum, deepVarVal);
/* Copy the union */
if (tree->gtCallType == CT_INDIRECT)
}
if (tree->AsCall()->IsVirtualVtable())
{
- gtfType = " ind";
+ gtfType = " vt-ind";
}
else if (tree->AsCall()->IsVirtualStub())
{
#define GTF_CALL_M_SUPPRESS_GC_TRANSITION 0x00800000 // GT_CALL -- suppress the GC transition (i.e. during a pinvoke) but a separate GC safe point is required.
#define GTF_CALL_M_EXP_RUNTIME_LOOKUP 0x01000000 // GT_CALL -- this call needs to be tranformed into CFG for the dynamic dictionary expansion feature.
#define GTF_CALL_M_STRESS_TAILCALL 0x02000000 // GT_CALL -- the call is NOT "tail" prefixed but GTF_CALL_M_EXPLICIT_TAILCALL was added because of tail call stress mode
+#define GTF_CALL_M_EXPANDED_EARLY 0x04000000 // GT_CALL -- the Virtual Call target address is expanded and placed in gtControlExpr in Morph rather than in Lower
// clang-format on
return (gtCallMoreFlags & GTF_CALL_M_EXP_RUNTIME_LOOKUP) != 0;
}
+ void SetExpandedEarly()
+ {
+ gtCallMoreFlags |= GTF_CALL_M_EXPANDED_EARLY;
+ }
+
+ void ClearExpandedEarly()
+ {
+ gtCallMoreFlags &= ~GTF_CALL_M_EXPANDED_EARLY;
+ }
+
+ bool IsExpandedEarly() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_EXPANDED_EARLY) != 0;
+ }
+
unsigned gtCallMoreFlags; // in addition to gtFlags
unsigned char gtCallType : 3; // value from the gtCallTypes enumeration
assert(!(clsFlags & CORINFO_FLG_VALUECLASS));
call = gtNewCallNode(CT_USER_FUNC, callInfo->hMethod, callRetTyp, nullptr, ilOffset);
call->gtFlags |= GTF_CALL_VIRT_VTABLE;
+
+ // Should we expand virtual call targets early for this method?
+ //
+ if (opts.compExpandCallsEarly)
+ {
+ // Mark this method to expand the virtual call target early in fgMorpgCall
+ call->AsCall()->SetExpandedEarly();
+ }
break;
}
CONFIG_INTEGER(JitClassProfiling, W("JitClassProfiling"), 0)
CONFIG_INTEGER(JitEdgeProfiling, W("JitEdgeProfiling"), 0)
+// Control when Virtual Calls are expanded
+CONFIG_INTEGER(JitExpandCallsEarly, W("JitExpandCallsEarly"), 1) // Expand Call targets early (in the global morph
+ // phase)
+
#if defined(DEBUG)
// JitFunctionFile: Name of a file that contains a list of functions. If the currently compiled function is in the
// file, certain other JIT config variables will be active. If the currently compiled function is not in the file,
LowerArgsForCall(call);
// note that everything generated from this point on runs AFTER the outgoing args are placed
- GenTree* controlExpr = nullptr;
+ GenTree* controlExpr = nullptr;
+ bool callWasExpandedEarly = false;
// for x86, this is where we record ESP for checking later to make sure stack is balanced
break;
case GTF_CALL_VIRT_VTABLE:
- // stub dispatching is off or this is not a virtual call (could be a tailcall)
- controlExpr = LowerVirtualVtableCall(call);
+ assert(call->IsVirtualVtable());
+ if (!call->IsExpandedEarly())
+ {
+ assert(call->gtControlExpr == nullptr);
+ controlExpr = LowerVirtualVtableCall(call);
+ }
+ else
+ {
+ callWasExpandedEarly = true;
+ controlExpr = call->gtControlExpr;
+ }
break;
case GTF_CALL_NONVIRT:
controlExpr = LowerTailCallViaJitHelper(call, controlExpr);
}
- if (controlExpr != nullptr)
+ // Check if we need to thread a newly created controlExpr into the LIR
+ //
+ if ((controlExpr != nullptr) && !callWasExpandedEarly)
{
LIR::Range controlExprRange = LIR::SeqTree(comp, controlExpr);
// This is a register argument - possibly update it in the table.
call->fgArgInfo->UpdateRegArg(thisArgEntry, argx, reMorphing);
flagsSummary |= argx->gtFlags;
+
+ if (!reMorphing && call->IsExpandedEarly() && call->IsVirtualVtable())
+ {
+ if (!argx->OperIsLocal())
+ {
+ thisArgEntry->needTmp = true;
+ call->fgArgInfo->SetNeedsTemps();
+ }
+ }
assert(argIndex == 0);
argIndex++;
DEBUG_ARG_SLOTS_ONLY(argSlots++;)
info.compCompHnd->reportTailCallDecision(nullptr,
(call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
call->IsTailPrefixedCall(), tailCallResult, nullptr);
+
+ // Are we currently planning to expand the gtControlExpr as an early virtual call target?
+ //
+ if (call->IsExpandedEarly() && call->IsVirtualVtable())
+ {
+ // It isn't alway profitable to expand a virtual call early
+ //
+ // We alway expand the TAILCALL_HELPER type late.
+ // And we exapnd late when we have an optimized tail call
+ // and the this pointer needs to be evaluated into a temp.
+ //
+ if (tailCallResult == TAILCALL_HELPER)
+ {
+ // We will alway expand this late in lower instead.
+ // (see LowerTailCallViaJitHelper as it needs some work
+ // for us to be able to expand this earlier in morph)
+ //
+ call->ClearExpandedEarly();
+ }
+ else if ((tailCallResult == TAILCALL_OPTIMIZED) &&
+ ((call->gtCallThisArg->GetNode()->gtFlags & GTF_SIDE_EFFECT) != 0))
+ {
+ // We generate better code when we expand this late in lower instead.
+ //
+ call->ClearExpandedEarly();
+ }
+ }
+
// Now actually morph the call.
compTailCallUsed = true;
// This will prevent inlining this call.
call = fgMorphArgs(call);
noway_assert(call->gtOper == GT_CALL);
+ // Should we expand this virtual method call target early here?
+ //
+ if (call->IsExpandedEarly() && call->IsVirtualVtable())
+ {
+ // We only expand the Vtable Call target once in the global morph phase
+ if (fgGlobalMorph)
+ {
+ assert(call->gtControlExpr == nullptr); // We only call this method and assign gtControlExpr once
+ call->gtControlExpr = fgExpandVirtualVtableCallTarget(call);
+ }
+ // We always have to morph or re-morph the control expr
+ //
+ call->gtControlExpr = fgMorphTree(call->gtControlExpr);
+
+ // Propogate any gtFlags into the call
+ call->gtFlags |= call->gtControlExpr->gtFlags;
+ }
+
// Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
// This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
if (opts.OptimizationEnabled() && (call->gtCallType == CT_HELPER) &&
/*****************************************************************************
*
+ * Expand and return the call target address for a VirtualCall
+ * The code here should match that generated by LowerVirtualVtableCall
+ */
+
+GenTree* Compiler::fgExpandVirtualVtableCallTarget(GenTreeCall* call)
+{
+ GenTree* result;
+
+ JITDUMP("Expanding virtual call target for %d.%s:\n", call->gtTreeID, GenTree::OpName(call->gtOper));
+
+ noway_assert(call->gtCallType == CT_USER_FUNC);
+
+ // get a reference to the thisPtr being passed
+ fgArgTabEntry* thisArgTabEntry = gtArgEntryByArgNum(call, 0);
+ GenTree* thisPtr = thisArgTabEntry->GetNode();
+
+ // fgMorphArgs must enforce this invariant by creating a temp
+ //
+ assert(thisPtr->OperIsLocal());
+
+ // Make a copy of the thisPtr by cloning
+ //
+ thisPtr = gtClone(thisPtr, true);
+
+ noway_assert(thisPtr != nullptr);
+
+ // Get hold of the vtable offset
+ unsigned vtabOffsOfIndirection;
+ unsigned vtabOffsAfterIndirection;
+ bool isRelative;
+ info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection,
+ &isRelative);
+
+ // Dereference the this pointer to obtain the method table, it is called vtab below
+ GenTree* vtab;
+ assert(VPTR_OFFS == 0); // We have to add this value to the thisPtr to get the methodTable
+ vtab = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
+ vtab->gtFlags |= GTF_IND_INVARIANT;
+
+ // Get the appropriate vtable chunk
+ if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK)
+ {
+ // Note this isRelative code path is currently never executed
+ // as the VM doesn't ever return: isRelative == true
+ //
+ if (isRelative)
+ {
+ // MethodTable offset is a relative pointer.
+ //
+ // Additional temporary variable is used to store virtual table pointer.
+ // Address of method is obtained by the next computations:
+ //
+ // Save relative offset to tmp (vtab is virtual table pointer, vtabOffsOfIndirection is offset of
+ // vtable-1st-level-indirection):
+ // tmp = vtab
+ //
+ // Save address of method to result (vtabOffsAfterIndirection is offset of vtable-2nd-level-indirection):
+ // result = [tmp + vtabOffsOfIndirection + vtabOffsAfterIndirection + [tmp + vtabOffsOfIndirection]]
+ //
+ //
+ // When isRelative is true we need to setup two temporary variables
+ // var1 = vtab
+ // var2 = var1 + vtabOffsOfIndirection + vtabOffsAfterIndirection + [var1 + vtabOffsOfIndirection]
+ // result = [var2] + var2
+ //
+ unsigned varNum1 = lvaGrabTemp(true DEBUGARG("var1 - vtab"));
+ unsigned varNum2 = lvaGrabTemp(true DEBUGARG("var2 - relative"));
+ GenTree* asgVar1 = gtNewTempAssign(varNum1, vtab); // var1 = vtab
+
+ // [tmp + vtabOffsOfIndirection]
+ GenTree* tmpTree1 = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtNewLclvNode(varNum1, TYP_I_IMPL),
+ gtNewIconNode(vtabOffsOfIndirection, TYP_INT));
+ tmpTree1 = gtNewOperNode(GT_IND, TYP_I_IMPL, tmpTree1, false);
+ tmpTree1->gtFlags |= GTF_IND_NONFAULTING;
+ tmpTree1->gtFlags |= GTF_IND_INVARIANT;
+
+ // var1 + vtabOffsOfIndirection + vtabOffsAfterIndirection
+ GenTree* tmpTree2 = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtNewLclvNode(varNum1, TYP_I_IMPL),
+ gtNewIconNode(vtabOffsOfIndirection + vtabOffsAfterIndirection, TYP_INT));
+
+ // var1 + vtabOffsOfIndirection + vtabOffsAfterIndirection + [var1 + vtabOffsOfIndirection]
+ tmpTree2 = gtNewOperNode(GT_ADD, TYP_I_IMPL, tmpTree2, tmpTree1);
+ GenTree* asgVar2 = gtNewTempAssign(varNum2, tmpTree2); // var2 = <expression>
+
+ // This last indirection is not invariant, but is non-faulting
+ result = gtNewOperNode(GT_IND, TYP_I_IMPL, gtNewLclvNode(varNum2, TYP_I_IMPL), false); // [var2]
+ result->gtFlags |= GTF_IND_NONFAULTING;
+
+ result = gtNewOperNode(GT_ADD, TYP_I_IMPL, result, gtNewLclvNode(varNum2, TYP_I_IMPL)); // [var2] + var2
+
+ // Now stitch together the two assignment and the calculation of result into a single tree
+ GenTree* commaTree = gtNewOperNode(GT_COMMA, TYP_I_IMPL, asgVar2, result);
+ result = gtNewOperNode(GT_COMMA, TYP_I_IMPL, asgVar1, commaTree);
+ }
+ else
+ {
+ // result = [vtab + vtabOffsOfIndirection]
+ result = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtab, gtNewIconNode(vtabOffsOfIndirection, TYP_INT));
+ result = gtNewOperNode(GT_IND, TYP_I_IMPL, result, false);
+ result->gtFlags |= GTF_IND_NONFAULTING;
+ result->gtFlags |= GTF_IND_INVARIANT;
+ }
+ }
+ else
+ {
+ result = vtab;
+ assert(!isRelative);
+ }
+
+ if (!isRelative)
+ {
+ // Load the function address
+ // result = [result + vtabOffsAfterIndirection]
+ result = gtNewOperNode(GT_ADD, TYP_I_IMPL, result, gtNewIconNode(vtabOffsAfterIndirection, TYP_INT));
+ // This last indirection is not invariant, but is non-faulting
+ result = gtNewOperNode(GT_IND, TYP_I_IMPL, result, false);
+ result->gtFlags |= GTF_IND_NONFAULTING;
+ }
+
+ return result;
+}
+
+/*****************************************************************************
+ *
* Transform the given GTK_CONST tree for code generation.
*/