// x8 through x15 are scratch registers on ARM64.
IntReg x8 = IntReg(8);
IntReg x9 = IntReg(9);
+
+#elif defined(HOST_RISCV64)
+#else
#error unsupported platform
#endif
}
argState.AddArg(vaSigCookieIndex);
}
-#if defined(HOST_ARM) || defined(HOST_AMD64) || defined(HOST_ARM64)
+#if defined(HOST_ARM) || defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_RISCV64)
// Generics context comes before args on ARM. Would be better if I factored this out as a call,
// to avoid large swatches of duplicate code.
if (hasGenericsContextArg)
argPerm[genericsContextArgIndex] = physArgIndex; physArgIndex++;
argState.AddArg(genericsContextArgIndex);
}
-#endif // HOST_ARM || HOST_AMD64 || HOST_ARM64
+#endif // HOST_ARM || HOST_AMD64 || HOST_ARM64 || HOST_RISCV64
CORINFO_ARG_LIST_HANDLE argPtr = info->args.args;
// Some arguments are have been passed in registers, some in memory. We must generate code that
sl.X86EmitPopReg(kEBP);
sl.X86EmitReturn(static_cast<WORD>(argState.callerArgStackSlots * sizeof(void*)));
#elif defined(UNIX_AMD64_ABI)
- bool hasTowRetSlots = info->args.retType == CORINFO_TYPE_VALUECLASS &&
+ bool hasTwoRetSlots = info->args.retType == CORINFO_TYPE_VALUECLASS &&
getClassSize(info->args.retTypeClass) == 16;
int fixedTwoSlotSize = 16;
sl.X86EmitRegLoad(ARGUMENT_kREG1, reinterpret_cast<UINT_PTR>(interpMethInfo));
sl.X86EmitCall(sl.NewExternalCodeLabel(interpretMethodFunc), 0);
- if (hasTowRetSlots) {
+ if (hasTwoRetSlots) {
sl.X86EmitEspOffset(0x8b, kRAX, 0);
sl.X86EmitEspOffset(0x8b, kRDX, 8);
}
#elif defined(HOST_LOONGARCH64)
assert(!"unimplemented on LOONGARCH yet");
#elif defined(HOST_RISCV64)
- assert(!"unimplemented on RISCV64 yet");
+ bool hasTwoRetSlots = info->args.retType == CORINFO_TYPE_VALUECLASS &&
+ getClassSize(info->args.retTypeClass) == 16;
+
+ UINT stackFrameSize = argState.numFPRegArgSlots;
+
+ sl.EmitProlog(argState.numRegArgs, argState.numFPRegArgSlots, hasTwoRetSlots ? 2 * sizeof(void*) : 0);
+
+#if INTERP_ILSTUBS
+ if (pMD->IsILStub())
+ {
+ // Third argument is stubcontext, in t2 (METHODDESC_REGISTER).
+ sl.EmitMovReg(IntReg(12), IntReg(7));
+ }
+ else
+#endif
+ {
+ // For a non-ILStub method, push NULL as the third StubContext argument.
+ sl.EmitMovConstant(IntReg(12), 0);
+ }
+ // Second arg is pointer to the base of the ILargs arr -- i.e., the current stack value.
+ sl.EmitAddImm(IntReg(11), RegSp, sl.GetSavedRegArgsOffset());
+
+ // First arg is the pointer to the interpMethodInfo structure
+ sl.EmitMovConstant(IntReg(10), reinterpret_cast<UINT64>(interpMethInfo));
+
+ sl.EmitCallLabel(sl.NewExternalCodeLabel((LPVOID)interpretMethodFunc), FALSE, FALSE);
+ if (hasTwoRetSlots)
+ {
+ // TODO: handle return registers to use int or float registers
+ sl.EmitLoad(IntReg(10), RegSp, 0);
+ sl.EmitLoad(IntReg(11), RegSp, sizeof(void*));
+ }
+
+ sl.EmitEpilog();
#else
#error unsupported platform
#endif
//The Fixed Two slot return buffer address
memcpy(m_ilArgs-16, OpStackGet<void*>(0), sz);
}
+#elif defined(TARGET_RISCV64)
+ // Is it an struct contained in two slots
+ else if (m_methInfo->m_returnType == CORINFO_TYPE_VALUECLASS
+ && sz == 16)
+ {
+ //The Fixed Two slot return buffer address
+ memcpy(m_ilArgs-32, OpStackGet<void*>(0), sz);
+ }
#endif
else if (CorInfoTypeIsFloatingPoint(m_methInfo->m_returnType) &&
CorInfoTypeIsFloatingPoint(retValIt.ToCorInfoType()))
HFAReturnArgSlots = (HFAReturnArgSlots + sizeof(ARG_SLOT) - 1) / sizeof(ARG_SLOT);
}
}
-#elif defined(UNIX_AMD64_ABI)
+#elif defined(UNIX_AMD64_ABI) || defined(TARGET_RISCV64)
unsigned HasTwoSlotBuf = sigInfo.retType == CORINFO_TYPE_VALUECLASS &&
getClassSize(sigInfo.retTypeClass) == 16;
#endif
// This is the argument slot that will be used to hold the return value.
// In UNIX_AMD64_ABI, return type may have need tow ARG_SLOTs.
ARG_SLOT retVals[2] = {0, 0};
-#if !defined(HOST_ARM) && !defined(UNIX_AMD64_ABI)
+#if !defined(HOST_ARM) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_RISCV64)
_ASSERTE (NUMBER_RETURNVALUE_SLOTS == 1);
#endif
bool b = CycleTimer::GetThreadCyclesS(&startCycles); _ASSERTE(b);
#endif // INTERP_ILCYCLE_PROFILE
-#if defined(UNIX_AMD64_ABI)
+#if defined(UNIX_AMD64_ABI) || defined(TARGET_RISCV64)
mdcs.CallTargetWorker(args, retVals, HasTwoSlotBuf ? 16: 8);
#else
mdcs.CallTargetWorker(args, retVals, 8);
{
OpStackSet<INT64>(m_curStackHt, GetSmallStructValue(&smallStructRetVal, retTypeSz));
}
-#if defined(UNIX_AMD64_ABI)
+#if defined(UNIX_AMD64_ABI) || defined(TARGET_RISCV64)
else if (HasTwoSlotBuf)
{
void* dst = LargeStructOperandStackPush(16);
void EmitComputedInstantiatingMethodStub(MethodDesc* pSharedMD, struct ShuffleEntry *pShuffleEntryArray, void* extraArg);
#endif // FEATURE_SHARE_GENERIC_CODE
-private:
void EmitMovConstant(IntReg target, UINT64 constant);
void EmitJumpRegister(IntReg regTarget);
void EmitMovReg(IntReg dest, IntReg source);
void EmitLoad(FloatReg dest, IntReg srcAddr, int offset = 0);
void EmitStore(IntReg src, IntReg destAddr, int offset = 0);
void EmitStore(FloatReg src, IntReg destAddr, int offset = 0);
+
+ void EmitProlog(unsigned short cIntRegArgs, unsigned short cFpRegArgs, unsigned short cbStackSpace = 0);
+ void EmitEpilog();
};
extern "C" void SinglecastDelegateInvokeStub();
Emit32(0x00000067 | (regTarget << 15));
}
+void StubLinkerCPU::EmitProlog(unsigned short cIntRegArgs, unsigned short cFpRegArgs, unsigned short cbStackSpace)
+{
+ _ASSERTE(!m_fProlog);
+
+ unsigned short numberOfEntriesOnStack = 2 + cIntRegArgs + cFpRegArgs; // 2 for fp, ra
+
+ // Stack needs to be 16 byte aligned. Compute the required padding before saving it
+ unsigned short totalPaddedFrameSize = static_cast<unsigned short>(ALIGN_UP(cbStackSpace + numberOfEntriesOnStack * sizeof(void*), 2 * sizeof(void*)));
+ // The padding is going to be applied to the local stack
+ cbStackSpace = totalPaddedFrameSize - numberOfEntriesOnStack * sizeof(void*);
+
+ // Record the parameters of this prolog so that we can generate a matching epilog and unwind info.
+ DescribeProlog(cIntRegArgs, cFpRegArgs, cbStackSpace);
+
+
+ // N.B Despite the range of a jump with a sub sp is 4KB, we're limiting to 504 to save from emitting right prolog that's
+ // expressable in unwind codes efficiently. The largest offset in typical unwindinfo encodings that we use is 504.
+ // so allocations larger than 504 bytes would require setting the SP in multiple strides, which would complicate both
+ // prolog and epilog generation as well as unwindinfo generation.
+ _ASSERTE((totalPaddedFrameSize <= 504) && "NYI:RISCV64 Implement StubLinker prologs with larger than 504 bytes of frame size");
+ if (totalPaddedFrameSize > 504)
+ COMPlusThrow(kNotSupportedException);
+
+ // Here is how the stack would look like (Stack grows up)
+ // [Low Address]
+ // +------------+
+ // SP -> | | <-+
+ // : : | Stack Frame, (i.e outgoing arguments) including padding
+ // | | <-+
+ // +------------+
+ // | FP |
+ // +------------+
+ // | RA |
+ // +------------+
+ // | F10 | <-+
+ // +------------+ |
+ // : : | Fp Args
+ // +------------+ |
+ // | F17 | <-+
+ // +------------+
+ // | X10 | <-+
+ // +------------+ |
+ // : : | Int Args
+ // +------------+ |
+ // | X17 | <-+
+ // +------------+
+ // Old SP -> |[Stack Args]|
+ // [High Address]
+
+ // Regarding the order of operations in the prolog and epilog;
+ // If the prolog and the epilog matches each other we can simplify emitting the unwind codes and save a few
+ // bytes of unwind codes by making prolog and epilog share the same unwind codes.
+ // In order to do that we need to make the epilog be the reverse of the prolog.
+ // But we wouldn't want to add restoring of the argument registers as that's completely unnecessary.
+ // Besides, saving argument registers cannot be expressed by the unwind code encodings.
+ // So, we'll push saving the argument registers to the very last in the prolog, skip restoring it in epilog,
+ // and also skip reporting it to the OS.
+ //
+ // Another bit that we can save is resetting the frame pointer.
+ // This is not necessary when the SP doesn't get modified beyond prolog and epilog. (i.e no alloca/localloc)
+ // And in that case we don't need to report setting up the FP either.
+
+ // 1. Relocate SP
+ EmitSubImm(RegSp, RegSp, totalPaddedFrameSize);
+
+ unsigned cbOffset = 2 * sizeof(void*) + cbStackSpace; // 2 is for fp, ra
+
+ // 2. Store FP/RA
+ EmitStore(RegFp, RegSp, cbStackSpace);
+ EmitStore(RegRa, RegSp, cbStackSpace + sizeof(void*));
+
+ // 3. Set the frame pointer
+ EmitMovReg(RegFp, RegSp);
+
+ // 4. Store floating point argument registers
+ _ASSERTE(cFpRegArgs <= 8);
+ for (unsigned short i = 0; i < cFpRegArgs; i++)
+ EmitStore(FloatReg(i + 10), RegSp, cbOffset + i * sizeof(void*));
+
+ // 5. Store int argument registers
+ cbOffset += cFpRegArgs * sizeof(void*);
+ _ASSERTE(cIntRegArgs <= 8);
+ for (unsigned short i = 0 ; i < cIntRegArgs; i++)
+ EmitStore(IntReg(i + 10), RegSp, cbOffset + i * sizeof(void*));
+}
+
+void StubLinkerCPU::EmitEpilog()
+{
+ _ASSERTE(m_fProlog);
+
+ // 5. Restore int argument registers
+ // nop: We don't need to. They are scratch registers
+
+ // 4. Restore floating point argument registers
+ // nop: We don't need to. They are scratch registers
+
+ // 3. Restore the SP from FP
+ // N.B. We're assuming that the stublinker stubs doesn't do alloca, hence nop
+
+ // 2. Restore FP/RA
+ EmitLoad(RegFp, RegSp, m_cbStackSpace);
+ EmitLoad(RegRa, RegSp, m_cbStackSpace + sizeof(void*));
+
+ // 1. Restore SP
+ EmitAddImm(RegSp, RegSp, GetStackFrameSize());
+
+ // jalr x0, 0(ra)
+ EmitJumpRegister(RegRa);
+}
+
// Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings
static unsigned ITypeInstr(unsigned opcode, unsigned funct3, unsigned rd, unsigned rs1, int imm12)
{
m_cbStackFrame = 0;
m_fPushArgRegs = FALSE;
#endif
+#ifdef TARGET_RISCV64
+ m_fProlog = FALSE;
+ m_cIntRegArgs = 0;
+ m_cFpRegArgs = 0;
+ m_cbStackSpace = 0;
+#endif
#ifdef STUBLINKER_GENERATES_UNWIND_INFO
#ifdef _DEBUG
m_pUnwindInfoCheckLabel = NULL;
return m_cbStackSpace + (2 + m_cCalleeSavedRegs + m_cIntRegArgs + m_cVecRegArgs)*sizeof(void*);
}
-#endif // ifdef TARGET_ARM, elif defined(TARGET_ARM64)
+#elif defined(TARGET_RISCV64)
+void StubLinker::DescribeProlog(UINT cIntRegArgs, UINT cFpRegArgs, UINT cbStackSpace)
+{
+ m_fProlog = TRUE;
+ m_cIntRegArgs = cIntRegArgs;
+ m_cFpRegArgs = cFpRegArgs;
+ m_cbStackSpace = cbStackSpace;
+}
+
+UINT StubLinker::GetSavedRegArgsOffset()
+{
+ _ASSERTE(m_fProlog);
+ // This is the offset from SP
+ // We're assuming that the stublinker will push the arg registers to the bottom of the stack frame
+ return m_cbStackSpace + 2 * sizeof(void*); // 2 is for FP and LR
+}
+
+UINT StubLinker::GetStackFrameSize()
+{
+ _ASSERTE(m_fProlog);
+ return m_cbStackSpace + (2 + m_cIntRegArgs + m_cFpRegArgs) * sizeof(void*);
+}
+
+#endif // ifdef TARGET_ARM, elif defined(TARGET_ARM64), elif defined(TARGET_RISCV64)
#endif // #ifndef DACCESS_COMPILE
void DescribeProlog(UINT cIntRegArgs, UINT cVecRegArgs, UINT cCalleeSavedRegs, UINT cbStackFrame);
UINT GetSavedRegArgsOffset();
UINT GetStackFrameSize();
+#elif defined(TARGET_RISCV64)
+ void DescribeProlog(UINT cIntRegArgs, UINT cVecRegArgs, UINT cbStackFrame);
+ UINT GetSavedRegArgsOffset();
+ UINT GetStackFrameSize();
#endif
//===========================================================================
UINT m_cbStackSpace; // Additional stack space for return buffer and stack alignment
#endif // TARGET_ARM64
+#ifdef TARGET_RISCV64
+protected:
+ BOOL m_fProlog; // True if DescribeProlog has been called
+ UINT m_cIntRegArgs; // Count of int register arguments (x10 - x17)
+ UINT m_cFpRegArgs; // Count of FP register arguments (f10 - f17)
+ UINT m_cbStackSpace; // Additional stack space for return buffer and stack alignment
+#endif // TARGET_RISCV64
+
#ifdef STUBLINKER_GENERATES_UNWIND_INFO
#ifdef _DEBUG