Indirect VSD calls on x86 require not only that the address of the
VSD indirection cell is passed to the stub in EAX, but also that the
call instruction is
a) preceeded by a 3-byte NOP, and
b) exactly `call [eax]`.
On x64, these types of calls only require that the indirection cell
address is passed in R11 (i.e. they do not require the generation of
a specific call instruction encoding). The RyuJIT IR is therefore
able to represent such calls succinctly as something like:
t72 = lclVar ref V04 loc1 u:3 (last use) $240
/--* t72 ref
t295 = * putarg_reg ref
t202 = lclVar long V09 tmp4 u:4 $382
/--* t202 long
t296 = * putarg_reg long
t106 = lclVar long V09 tmp4 u:4 (last use) $382
/--* t106 long
t297 = * indir long
/--* t295 ref this in rcx
+--* t296 long arg1 in r11
+--* t297 long calli tgt
t107 = * call ind stub ref $24a
In this form, the address of the indirection cell is in the lclVar
`tmp4`, which is then used by both a `putarg_reg` to move the
argument into R11 and by the indirection that generates the call
target. Because there are a relatively large number of registers on
x64, this works out nicely: the address of the indirection cell is
frequently allocated to R11, few extraneous copies are required,
and the code generator produces `call [r11]` for the call instruction.
Unfortunately, the situation is not so straightforward on x86: not
only must code generator must both pass the address of the indirection
cell in EAX and produce the specific call form mentioned earlier,
but there are also far fewer available registers. As a result, the
address of the indirection cell is infrequently allocated to EAX and
(barring an implicit understanding in the code generator that a
previous putarg_reg has placed the address of the indirection cell
into EAX) requires a redundant `mov eax, ...` before the call.
Ideally, we would be able to store the address of the indirection cell
to a local with a very short lifetime and pre-allocate that local to
EAX, but the IR does not have that capability, and adding it now
seems to be prohibitively expensive. Instead, this change omits the
`putarg_reg` used to put the the indirection cell address into the
required register on other platforms and simply uses the `calli tgt`
operand to the call to represent both the non-standard argument and
the call target:
t40 = lclVar ref V04 loc1 u:3 $1c0
/--* t40 ref
t280 = * putarg_reg ref
t70 = lclVar int V06 loc3 u:4 (last use) $2c1
/--* t70 int
t281 = * indir int
/--* t280 ref this in ecx
+--* t281 int calli tgt
t71 = * call ind stub ref $1c6
Lowering then marks the indirection as contained and sets the
destination candidates for its operand to EAX.
Commit migrated from https://github.com/dotnet/coreclr/commit/
f7d8a4a7dbd9ce5b2981d7c9632ac108b2dcef93
if (target != nullptr)
{
+#ifdef _TARGET_X86_
+ if (((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_STUB) && (call->gtCallType == CT_INDIRECT))
+ {
+ // On x86, we need to generate a very specific pattern for indirect VSD calls:
+ //
+ // 3-byte nop
+ // call dword ptr [eax]
+ //
+ // Where EAX is also used as an argument to the stub dispatch helper. Make
+ // sure that the call target address is computed into EAX in this case.
+
+ assert(target->isContainedIndir());
+
+ // Disable random NOP emission
+ getEmitter()->emitDisableRandomNops();
+
+ GenTreeIndir* indir = target->AsIndir();
+ assert(indir->Addr() == indir->Base());
+ assert(indir->HasBase());
+ assert(!indir->HasIndex());
+ assert(indir->Scale() == 1);
+ assert(indir->Offset() == 0);
+
+ GenTree* base = indir->Base();
+ genConsumeReg(base);
+
+ if (base->gtRegNum != REG_EAX)
+ {
+ inst_RV_RV(INS_mov, REG_EAX, base->gtRegNum, TYP_I_IMPL);
+ }
+
+ getEmitter()->emitIns_Nop(3);
+
+ getEmitter()->emitIns_Call(emitter::EmitCallType(emitter::EC_INDIR_ARD), methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) nullptr, argSizeForEmitter, retSize
+ MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+ gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+ ilOffset, REG_EAX, REG_NA, indir->Scale(), indir->Offset());
+
+ // Re-enable random NOP emission
+ getEmitter()->emitEnableRandomNops();
+ }
+ else
+#endif
+
if (target->isContainedIndir())
{
if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed())
}
#endif
- // TODO-Cleanup: Disable emitting random NOPs
-
// This is code to set up an indirect call to a stub address computed
// via dictionary lookup.
if (call->gtCallType == CT_INDIRECT)
{
- NYI_X86("Virtual Stub dispatched call lowering via dictionary lookup");
-
// The importer decided we needed a stub call via a computed
// stub dispatch address, i.e. an address which came from a dictionary lookup.
// - The dictionary lookup produces an indirected address, suitable for call
assert(ctrlExpr == nullptr);
assert(call->gtCallAddr != nullptr);
ctrlExpr = call->gtCallAddr;
+
+#ifdef _TARGET_X86_
+ // Fast tail calls aren't currently supported on x86, but if they ever are, the code
+ // below that handles indirect VSD calls will need to be fixed.
+ assert(!call->IsFastTailCall() || ((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_VIRT_STUB));
+#endif // _TARGET_X86_
}
// set reg requirements on call target represented as control sequence.
// computed into a register.
if (!call->IsFastTailCall())
{
+#ifdef _TARGET_X86_
+ // On x86, we need to generate a very specific pattern for indirect VSD calls:
+ //
+ // 3-byte nop
+ // call dword ptr [eax]
+ //
+ // Where EAX is also used as an argument to the stub dispatch helper. Make
+ // sure that the call target address is computed into EAX in this case.
+ if (((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_STUB) && (call->gtCallType == CT_INDIRECT))
+ {
+ assert(ctrlExpr->isIndir());
+
+ ctrlExpr->gtGetOp1()->gtLsraInfo.setDstCandidates(l, RBM_EAX);
+ MakeSrcContained(call, ctrlExpr);
+ }
+ else
+#endif // _TARGET_X86_
if (ctrlExpr->isIndir())
{
MakeSrcContained(call, ctrlExpr);
// *********** END NOTE *********
CLANG_FORMAT_COMMENT_ANCHOR;
-#if !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
+#if !defined(LEGACY_BACKEND)
+#if defined(_TARGET_X86_)
// The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper has a custom calling convention. Set the argument registers
// correctly here.
if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
assert(arg2 != nullptr);
nonStandardArgs.Add(arg2, REG_LNGARG_HI);
}
-#endif // !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
-
-#if !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_)
+#else // defined(_TARGET_X86_)
// TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
// If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
// convention for x86/SSE.
nonStandardArgs.Add(arg, REG_VIRTUAL_STUB_PARAM);
}
- else if (call->gtCallType == CT_INDIRECT && call->gtCallCookie)
+ else
+#endif // !defined(_TARGET_X86_)
+ if (call->gtCallType == CT_INDIRECT && call->gtCallCookie)
{
assert(!call->IsUnmanaged());
- // put cookie into R11
GenTree* arg = call->gtCallCookie;
noway_assert(arg != nullptr);
call->gtCallCookie = nullptr;
call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
numArgs++;
+ // x86 passes the cookie on the stack.
+#if !defined(_TARGET_X86_)
+ // put cookie into R11
nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
+#endif
- // put destination into R10
+ // put destination into R10/EAX
arg = gtClone(call->gtCallAddr, true);
call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
numArgs++;
call->gtCallType = CT_HELPER;
call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
}
-#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_)
+#endif // !defined(LEGACY_BACKEND)
// Allocate the fgArgInfo for the call node;
//
#define RBM_CALLEE_TRASH_NOGC RBM_EDX
#endif // NOGC_WRITE_BARRIERS
+ // GenericPInvokeCalliHelper unmanaged target parameter
+ #define REG_PINVOKE_TARGET_PARAM REG_EAX
+ #define RBM_PINVOKE_TARGET_PARAM RBM_EAX
+
// IL stub's secret parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM)
#define REG_SECRET_STUB_PARAM REG_EAX
#define RBM_SECRET_STUB_PARAM RBM_EAX