1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
6 // NOTE on Frame Size C_ASSERT usage in this file
7 // if the frame size changes then the stubs have to be revisited for correctness
8 // kindly revist the logic and then update the constants so that the C_ASSERT will again fire
9 // if someone changes the frame size. You are expected to keep this hard coded constant
10 // up to date so that changes in the frame size trigger errors at compile time if the code is not altered
22 #include "dllimport.h"
24 #include "comdelegate.h"
26 #include "jitinterface.h"
28 #include "dbginterface.h"
29 #include "eeprofinterfaces.h"
32 #include "asmconstants.h"
33 #endif // _TARGET_X86_
35 #include "stublink.inl"
37 #ifdef FEATURE_COMINTEROP
38 #include "comtoclrcall.h"
39 #include "runtimecallablewrapper.h"
41 #include "olevariant.h"
42 #include "notifyexternals.h"
43 #endif // FEATURE_COMINTEROP
49 #if defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO)
54 #ifndef DACCESS_COMPILE
56 extern "C" VOID __cdecl StubRareEnable(Thread *pThread);
57 #ifdef FEATURE_COMINTEROP
58 extern "C" HRESULT __cdecl StubRareDisableHR(Thread *pThread);
59 #endif // FEATURE_COMINTEROP
60 extern "C" VOID __cdecl StubRareDisableTHROW(Thread *pThread, Frame *pFrame);
62 #ifndef FEATURE_ARRAYSTUB_AS_IL
63 extern "C" VOID __cdecl ArrayOpStubNullException(void);
64 extern "C" VOID __cdecl ArrayOpStubRangeException(void);
65 extern "C" VOID __cdecl ArrayOpStubTypeMismatchException(void);
67 #if defined(_TARGET_AMD64_)
68 #define EXCEPTION_HELPERS(base) \
69 extern "C" VOID __cdecl base##_RSIRDI_ScratchArea(void); \
70 extern "C" VOID __cdecl base##_ScratchArea(void); \
71 extern "C" VOID __cdecl base##_RSIRDI(void); \
72 extern "C" VOID __cdecl base(void)
73 EXCEPTION_HELPERS(ArrayOpStubNullException);
74 EXCEPTION_HELPERS(ArrayOpStubRangeException);
75 EXCEPTION_HELPERS(ArrayOpStubTypeMismatchException);
76 #undef EXCEPTION_HELPERS
77 #endif // !_TARGET_AMD64_
78 #endif // !FEATURE_ARRAYSTUB_AS_IL
80 #if defined(_TARGET_AMD64_)
82 extern "C" VOID __cdecl DebugCheckStubUnwindInfo();
84 #endif // _TARGET_AMD64_
86 // Presumably this code knows what it is doing with TLS. If we are hiding these
87 // services from normal code, reveal them here.
92 #ifdef FEATURE_COMINTEROP
93 Thread* __stdcall CreateThreadBlockReturnHr(ComMethodFrame *pFrame);
100 BOOL IsPreservedReg (X86Reg reg)
102 UINT16 PreservedRegMask =
111 return PreservedRegMask & (1 << reg);
114 #endif // _TARGET_AMD64_
116 #ifdef _TARGET_AMD64_
117 //-----------------------------------------------------------------------
118 // InstructionFormat for near Jump and short Jump
119 //-----------------------------------------------------------------------
121 //X64EmitTailcallWithRSPAdjust
122 class X64NearJumpSetup : public InstructionFormat
125 X64NearJumpSetup() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32
126 | InstructionFormat::k64Small | InstructionFormat::k64
129 LIMITED_METHOD_CONTRACT;
132 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
134 LIMITED_METHOD_CONTRACT
150 _ASSERTE(!"unexpected refsize");
156 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
158 LIMITED_METHOD_CONTRACT
161 // do nothing, X64NearJump will take care of this
163 else if (k32 == refsize)
165 // do nothing, X64NearJump will take care of this
167 else if (k64Small == refsize)
169 UINT64 TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode);
170 _ASSERTE(FitsInU4(TargetAddress));
172 // mov eax, imm32 ; zero-extended
173 pOutBuffer[0] = 0xB8;
174 *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress;
176 else if (k64 == refsize)
179 pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
180 pOutBuffer[1] = 0xB8;
181 *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode));
185 _ASSERTE(!"unreached");
189 virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
191 STATIC_CONTRACT_NOTHROW;
192 STATIC_CONTRACT_GC_NOTRIGGER;
193 STATIC_CONTRACT_FORBID_FAULT;
200 case InstructionFormat::k8:
201 // For external, we don't have enough info to predict
205 case InstructionFormat::k32:
206 return sizeof(PVOID) <= sizeof(UINT32);
208 case InstructionFormat::k64Small:
209 return FitsInI4(offset);
211 case InstructionFormat::k64:
212 // intentional fallthru
213 case InstructionFormat::kAllowAlways:
225 case InstructionFormat::k8:
226 return FitsInI1(offset);
228 case InstructionFormat::k32:
229 return FitsInI4(offset);
231 case InstructionFormat::k64Small:
232 // EmitInstruction emits a non-relative jmp for
233 // k64Small. We don't have enough info to predict the
234 // target address. (Even if we did, this would only
235 // handle the set of unsigned offsets with bit 31 set
236 // and no higher bits set, too uncommon/hard to test.)
239 case InstructionFormat::k64:
240 // intentional fallthru
241 case InstructionFormat::kAllowAlways:
251 class X64NearJumpExecute : public InstructionFormat
254 X64NearJumpExecute() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32
255 | InstructionFormat::k64Small | InstructionFormat::k64
258 LIMITED_METHOD_CONTRACT;
261 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
263 LIMITED_METHOD_CONTRACT
279 _ASSERTE(!"unexpected refsize");
285 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
287 LIMITED_METHOD_CONTRACT
290 pOutBuffer[0] = 0xeb;
291 *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference;
293 else if (k32 == refsize)
295 pOutBuffer[0] = 0xe9;
296 *((__int32*)(pOutBuffer+1)) = (__int32)fixedUpReference;
298 else if (k64Small == refsize)
301 pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
302 pOutBuffer[1] = 0xFF;
303 pOutBuffer[2] = 0xE0;
305 else if (k64 == refsize)
308 pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
309 pOutBuffer[1] = 0xFF;
310 pOutBuffer[2] = 0xE0;
314 _ASSERTE(!"unreached");
318 virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
320 STATIC_CONTRACT_NOTHROW;
321 STATIC_CONTRACT_GC_NOTRIGGER;
322 STATIC_CONTRACT_FORBID_FAULT;
329 case InstructionFormat::k8:
330 // For external, we don't have enough info to predict
334 case InstructionFormat::k32:
335 return sizeof(PVOID) <= sizeof(UINT32);
337 case InstructionFormat::k64Small:
338 return FitsInI4(offset);
340 case InstructionFormat::k64:
341 // intentional fallthru
342 case InstructionFormat::kAllowAlways:
354 case InstructionFormat::k8:
355 return FitsInI1(offset);
357 case InstructionFormat::k32:
358 return FitsInI4(offset);
360 case InstructionFormat::k64Small:
361 // EmitInstruction emits a non-relative jmp for
362 // k64Small. We don't have enough info to predict the
363 // target address. (Even if we did, this would only
364 // handle the set of unsigned offsets with bit 31 set
365 // and no higher bits set, too uncommon/hard to test.)
368 case InstructionFormat::k64:
369 // intentional fallthru
370 case InstructionFormat::kAllowAlways:
382 //-----------------------------------------------------------------------
383 // InstructionFormat for near Jump and short Jump
384 //-----------------------------------------------------------------------
385 class X86NearJump : public InstructionFormat
388 X86NearJump() : InstructionFormat( InstructionFormat::k8|InstructionFormat::k32
389 #ifdef _TARGET_AMD64_
390 | InstructionFormat::k64Small | InstructionFormat::k64
391 #endif // _TARGET_AMD64_
394 LIMITED_METHOD_CONTRACT;
397 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
399 LIMITED_METHOD_CONTRACT
407 #ifdef _TARGET_AMD64_
413 #endif // _TARGET_AMD64_
415 _ASSERTE(!"unexpected refsize");
421 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
423 LIMITED_METHOD_CONTRACT
426 pOutBuffer[0] = 0xeb;
427 *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference;
429 else if (k32 == refsize)
431 pOutBuffer[0] = 0xe9;
432 *((__int32*)(pOutBuffer+1)) = (__int32)fixedUpReference;
434 #ifdef _TARGET_AMD64_
435 else if (k64Small == refsize)
437 UINT64 TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode);
438 _ASSERTE(FitsInU4(TargetAddress));
440 // mov eax, imm32 ; zero-extended
441 pOutBuffer[0] = 0xB8;
442 *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress;
445 pOutBuffer[5] = 0xFF;
446 pOutBuffer[6] = 0xE0;
448 else if (k64 == refsize)
451 pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
452 pOutBuffer[1] = 0xB8;
453 *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode));
456 pOutBuffer[10] = 0xFF;
457 pOutBuffer[11] = 0xE0;
459 #endif // _TARGET_AMD64_
462 _ASSERTE(!"unreached");
466 virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
468 STATIC_CONTRACT_NOTHROW;
469 STATIC_CONTRACT_GC_NOTRIGGER;
470 STATIC_CONTRACT_FORBID_FAULT;
477 case InstructionFormat::k8:
478 // For external, we don't have enough info to predict
482 case InstructionFormat::k32:
483 return sizeof(PVOID) <= sizeof(UINT32);
485 #ifdef _TARGET_AMD64_
486 case InstructionFormat::k64Small:
487 return FitsInI4(offset);
489 case InstructionFormat::k64:
490 // intentional fallthru
492 case InstructionFormat::kAllowAlways:
504 case InstructionFormat::k8:
505 return FitsInI1(offset);
507 case InstructionFormat::k32:
508 #ifdef _TARGET_AMD64_
509 return FitsInI4(offset);
514 #ifdef _TARGET_AMD64_
515 case InstructionFormat::k64Small:
516 // EmitInstruction emits a non-relative jmp for
517 // k64Small. We don't have enough info to predict the
518 // target address. (Even if we did, this would only
519 // handle the set of unsigned offsets with bit 31 set
520 // and no higher bits set, too uncommon/hard to test.)
523 case InstructionFormat::k64:
524 // intentional fallthru
526 case InstructionFormat::kAllowAlways:
537 //-----------------------------------------------------------------------
538 // InstructionFormat for conditional jump. Set the variationCode
539 // to members of X86CondCode.
540 //-----------------------------------------------------------------------
541 class X86CondJump : public InstructionFormat
544 X86CondJump(UINT allowedSizes) : InstructionFormat(allowedSizes)
546 LIMITED_METHOD_CONTRACT;
549 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
551 LIMITED_METHOD_CONTRACT
552 return (refsize == k8 ? 2 : 6);
555 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
557 LIMITED_METHOD_CONTRACT
560 pOutBuffer[0] = static_cast<BYTE>(0x70 | variationCode);
561 *((__int8*)(pOutBuffer+1)) = (__int8)fixedUpReference;
565 pOutBuffer[0] = 0x0f;
566 pOutBuffer[1] = static_cast<BYTE>(0x80 | variationCode);
567 *((__int32*)(pOutBuffer+2)) = (__int32)fixedUpReference;
573 //-----------------------------------------------------------------------
574 // InstructionFormat for near call.
575 //-----------------------------------------------------------------------
576 class X86Call : public InstructionFormat
580 : InstructionFormat( InstructionFormat::k32
581 #ifdef _TARGET_AMD64_
582 | InstructionFormat::k64Small | InstructionFormat::k64
583 #endif // _TARGET_AMD64_
586 LIMITED_METHOD_CONTRACT;
589 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
591 LIMITED_METHOD_CONTRACT;
598 #ifdef _TARGET_AMD64_
604 #endif // _TARGET_AMD64_
607 _ASSERTE(!"unexpected refsize");
612 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
614 LIMITED_METHOD_CONTRACT
619 pOutBuffer[0] = 0xE8;
620 *((__int32*)(1+pOutBuffer)) = (__int32)fixedUpReference;
623 #ifdef _TARGET_AMD64_
625 UINT64 TargetAddress;
627 TargetAddress = (INT64)pOutBuffer + fixedUpReference + GetSizeOfInstruction(refsize, variationCode);
628 _ASSERTE(FitsInU4(TargetAddress));
630 // mov eax,<fixedUpReference> ; zero-extends
631 pOutBuffer[0] = 0xB8;
632 *((UINT32*)&pOutBuffer[1]) = (UINT32)TargetAddress;
635 pOutBuffer[5] = 0xff;
636 pOutBuffer[6] = 0xd0;
640 // mov rax,<fixedUpReference>
641 pOutBuffer[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
642 pOutBuffer[1] = 0xB8;
643 *((UINT64*)&pOutBuffer[2]) = (UINT64)(((INT64)pOutBuffer) + fixedUpReference + GetSizeOfInstruction(refsize, variationCode));
646 pOutBuffer[10] = 0xff;
647 pOutBuffer[11] = 0xd0;
649 #endif // _TARGET_AMD64_
652 _ASSERTE(!"unreached");
657 // For x86, the default CanReach implementation will suffice. It only needs
659 #ifdef _TARGET_AMD64_
660 virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
666 case InstructionFormat::k32:
667 // For external, we don't have enough info to predict
671 case InstructionFormat::k64Small:
672 return FitsInI4(offset);
674 case InstructionFormat::k64:
675 // intentional fallthru
676 case InstructionFormat::kAllowAlways:
688 case InstructionFormat::k32:
689 return FitsInI4(offset);
691 case InstructionFormat::k64Small:
692 // EmitInstruction emits a non-relative jmp for
693 // k64Small. We don't have enough info to predict the
694 // target address. (Even if we did, this would only
695 // handle the set of unsigned offsets with bit 31 set
696 // and no higher bits set, too uncommon/hard to test.)
699 case InstructionFormat::k64:
700 // intentional fallthru
701 case InstructionFormat::kAllowAlways:
709 #endif // _TARGET_AMD64_
713 //-----------------------------------------------------------------------
714 // InstructionFormat for push imm32.
715 //-----------------------------------------------------------------------
716 class X86PushImm32 : public InstructionFormat
719 X86PushImm32(UINT allowedSizes) : InstructionFormat(allowedSizes)
721 LIMITED_METHOD_CONTRACT;
724 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
726 LIMITED_METHOD_CONTRACT;
731 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
733 LIMITED_METHOD_CONTRACT;
735 pOutBuffer[0] = 0x68;
736 // only support absolute pushimm32 of the label address. The fixedUpReference is
737 // the offset to the label from the current point, so add to get address
738 *((__int32*)(1+pOutBuffer)) = (__int32)(fixedUpReference);
742 #if defined(_TARGET_AMD64_)
743 //-----------------------------------------------------------------------
744 // InstructionFormat for lea reg, [RIP relative].
745 //-----------------------------------------------------------------------
746 class X64LeaRIP : public InstructionFormat
749 X64LeaRIP() : InstructionFormat(InstructionFormat::k64Small)
751 LIMITED_METHOD_CONTRACT;
754 virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode)
756 LIMITED_METHOD_CONTRACT;
761 virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset)
767 case InstructionFormat::k64Small:
768 // For external, we don't have enough info to predict
772 case InstructionFormat::k64:
773 // intentional fallthru
774 case InstructionFormat::kAllowAlways:
786 case InstructionFormat::k64Small:
787 return FitsInI4(offset);
789 case InstructionFormat::k64:
790 // intentional fallthru
791 case InstructionFormat::kAllowAlways:
801 virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer)
803 LIMITED_METHOD_CONTRACT;
805 X86Reg reg = (X86Reg)variationCode;
806 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
810 rex |= REX_MODRM_REG_EXT;
811 reg = X86RegFromAMD64Reg(reg);
815 pOutBuffer[1] = 0x8D;
816 pOutBuffer[2] = 0x05 | (reg << 3);
817 // only support absolute pushimm32 of the label address. The fixedUpReference is
818 // the offset to the label from the current point, so add to get address
819 *((__int32*)(3+pOutBuffer)) = (__int32)(fixedUpReference);
823 #endif // _TARGET_AMD64_
825 #if defined(_TARGET_AMD64_)
826 static BYTE gX64NearJumpSetup[sizeof(X64NearJumpSetup)];
827 static BYTE gX64NearJumpExecute[sizeof(X64NearJumpExecute)];
828 static BYTE gX64LeaRIP[sizeof(X64LeaRIP)];
831 static BYTE gX86NearJump[sizeof(X86NearJump)];
832 static BYTE gX86CondJump[sizeof(X86CondJump)];
833 static BYTE gX86Call[sizeof(X86Call)];
834 static BYTE gX86PushImm32[sizeof(X86PushImm32)];
836 /* static */ void StubLinkerCPU::Init()
842 INJECT_FAULT(COMPlusThrowOM(););
845 new (gX86NearJump) X86NearJump();
846 new (gX86CondJump) X86CondJump( InstructionFormat::k8|InstructionFormat::k32);
847 new (gX86Call) X86Call();
848 new (gX86PushImm32) X86PushImm32(InstructionFormat::k32);
850 #if defined(_TARGET_AMD64_)
851 new (gX64NearJumpSetup) X64NearJumpSetup();
852 new (gX64NearJumpExecute) X64NearJumpExecute();
853 new (gX64LeaRIP) X64LeaRIP();
857 //---------------------------------------------------------------
859 // mov destReg, srcReg
860 //---------------------------------------------------------------
861 VOID StubLinkerCPU::X86EmitMovRegReg(X86Reg destReg, X86Reg srcReg)
863 STANDARD_VM_CONTRACT;
865 #ifdef _TARGET_AMD64_
866 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
870 rex |= REX_MODRM_RM_EXT;
871 destReg = X86RegFromAMD64Reg(destReg);
875 rex |= REX_MODRM_REG_EXT;
876 srcReg = X86RegFromAMD64Reg(srcReg);
882 Emit8(static_cast<UINT8>(0xC0 | (srcReg << 3) | destReg));
885 //---------------------------------------------------------------
887 VOID StubLinkerCPU::X86EmitMovSPReg(X86Reg srcReg)
889 STANDARD_VM_CONTRACT;
890 const X86Reg kESP = (X86Reg)4;
891 X86EmitMovRegReg(kESP, srcReg);
894 VOID StubLinkerCPU::X86EmitMovRegSP(X86Reg destReg)
896 STANDARD_VM_CONTRACT;
897 const X86Reg kESP = (X86Reg)4;
898 X86EmitMovRegReg(destReg, kESP);
902 //---------------------------------------------------------------
905 //---------------------------------------------------------------
906 VOID StubLinkerCPU::X86EmitPushReg(X86Reg reg)
908 STANDARD_VM_CONTRACT;
910 #ifdef STUBLINKER_GENERATES_UNWIND_INFO
911 X86Reg origReg = reg;
914 #ifdef _TARGET_AMD64_
917 Emit8(REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT | REX_OPCODE_REG_EXT);
918 reg = X86RegFromAMD64Reg(reg);
921 Emit8(static_cast<UINT8>(0x50 + reg));
923 #ifdef STUBLINKER_GENERATES_UNWIND_INFO
924 if (IsPreservedReg(origReg))
926 UnwindPushedReg(origReg);
936 //---------------------------------------------------------------
939 //---------------------------------------------------------------
940 VOID StubLinkerCPU::X86EmitPopReg(X86Reg reg)
942 STANDARD_VM_CONTRACT;
944 #ifdef _TARGET_AMD64_
947 Emit8(REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT | REX_OPCODE_REG_EXT);
948 reg = X86RegFromAMD64Reg(reg);
950 #endif // _TARGET_AMD64_
952 Emit8(static_cast<UINT8>(0x58 + reg));
956 //---------------------------------------------------------------
959 //---------------------------------------------------------------
960 VOID StubLinkerCPU::X86EmitPushImm32(UINT32 value)
962 STANDARD_VM_CONTRACT;
970 //---------------------------------------------------------------
973 //---------------------------------------------------------------
974 VOID StubLinkerCPU::X86EmitPushImm32(CodeLabel &target)
976 STANDARD_VM_CONTRACT;
978 EmitLabelRef(&target, reinterpret_cast<X86PushImm32&>(gX86PushImm32), 0);
982 //---------------------------------------------------------------
985 //---------------------------------------------------------------
986 VOID StubLinkerCPU::X86EmitPushImm8(BYTE value)
988 STANDARD_VM_CONTRACT;
996 //---------------------------------------------------------------
999 //---------------------------------------------------------------
1000 VOID StubLinkerCPU::X86EmitPushImmPtr(LPVOID value WIN64_ARG(X86Reg tmpReg /*=kR10*/))
1002 STANDARD_VM_CONTRACT;
1004 #ifdef _TARGET_AMD64_
1005 X86EmitRegLoad(tmpReg, (UINT_PTR) value);
1006 X86EmitPushReg(tmpReg);
1008 X86EmitPushImm32((UINT_PTR) value);
1012 //---------------------------------------------------------------
1014 // XOR <reg32>,<reg32>
1015 //---------------------------------------------------------------
1016 VOID StubLinkerCPU::X86EmitZeroOutReg(X86Reg reg)
1018 STANDARD_VM_CONTRACT;
1020 #ifdef _TARGET_AMD64_
1021 // 32-bit results are zero-extended, so we only need the REX byte if
1022 // it's an extended register.
1025 Emit8(REX_PREFIX_BASE | REX_MODRM_REG_EXT | REX_MODRM_RM_EXT);
1026 reg = X86RegFromAMD64Reg(reg);
1030 Emit8(static_cast<UINT8>(0xc0 | (reg << 3) | reg));
1033 //---------------------------------------------------------------
1036 //---------------------------------------------------------------
1037 VOID StubLinkerCPU::X86EmitJumpReg(X86Reg reg)
1046 Emit8(static_cast<BYTE>(0xe0) | static_cast<BYTE>(reg));
1049 //---------------------------------------------------------------
1051 // CMP <reg32>,imm32
1052 //---------------------------------------------------------------
1053 VOID StubLinkerCPU::X86EmitCmpRegImm32(X86Reg reg, INT32 imm32)
1058 PRECONDITION((int) reg < NumX86Regs);
1062 #ifdef _TARGET_AMD64_
1063 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
1067 rex |= REX_OPCODE_REG_EXT;
1068 reg = X86RegFromAMD64Reg(reg);
1073 if (FitsInI1(imm32)) {
1075 Emit8(static_cast<UINT8>(0xF8 | reg));
1079 Emit8(static_cast<UINT8>(0xF8 | reg));
1084 #ifdef _TARGET_AMD64_
1085 //---------------------------------------------------------------
1087 // CMP [reg+offs], imm32
1089 //---------------------------------------------------------------
1090 VOID StubLinkerCPU:: X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32)
1092 STANDARD_VM_CONTRACT;
1094 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
1098 rex |= REX_OPCODE_REG_EXT;
1099 reg = X86RegFromAMD64Reg(reg);
1103 X64EmitCmp32RegIndexImm32(reg, offs, imm32);
1106 VOID StubLinkerCPU:: X64EmitCmp32RegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32)
1107 #else // _TARGET_AMD64_
1108 VOID StubLinkerCPU:: X86EmitCmpRegIndexImm32(X86Reg reg, INT32 offs, INT32 imm32)
1109 #endif // _TARGET_AMD64_
1114 PRECONDITION((int) reg < NumX86Regs);
1119 // The binary representation of "cmp [mem], imm32" is :
1120 // 1000-00sw mod11-1r/m
1123 unsigned wBit = (FitsInI1(imm32) ? 0 : 1);
1124 Emit8(static_cast<UINT8>(0x80 | wBit));
1129 else if (FitsInI1(offs))
1134 Emit8(static_cast<UINT8>((modBits << 6) | 0x38 | reg));
1144 if (FitsInI1(imm32))
1150 //---------------------------------------------------------------
1152 #if defined(_TARGET_AMD64_)
1153 // mov rax, <target>
1160 //---------------------------------------------------------------
1161 VOID StubLinkerCPU::X86EmitTailcallWithESPAdjust(CodeLabel *pTarget, INT32 imm32)
1163 STANDARD_VM_CONTRACT;
1165 #if defined(_TARGET_AMD64_)
1166 EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpSetup&>(gX64NearJumpSetup), 0);
1167 X86EmitAddEsp(imm32);
1168 EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpExecute&>(gX64NearJumpExecute), 0);
1170 X86EmitAddEsp(imm32);
1171 X86EmitNearJump(pTarget);
1175 //---------------------------------------------------------------
1177 #if defined(_TARGET_AMD64_)
1178 // mov rax, <target>
1185 //---------------------------------------------------------------
1186 VOID StubLinkerCPU::X86EmitTailcallWithSinglePop(CodeLabel *pTarget, X86Reg reg)
1188 STANDARD_VM_CONTRACT;
1190 #if defined(_TARGET_AMD64_)
1191 EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpSetup&>(gX64NearJumpSetup), 0);
1193 EmitLabelRef(pTarget, reinterpret_cast<X64NearJumpExecute&>(gX64NearJumpExecute), 0);
1196 X86EmitNearJump(pTarget);
1200 //---------------------------------------------------------------
1204 //---------------------------------------------------------------
1205 VOID StubLinkerCPU::X86EmitNearJump(CodeLabel *target)
1207 STANDARD_VM_CONTRACT;
1208 EmitLabelRef(target, reinterpret_cast<X86NearJump&>(gX86NearJump), 0);
1212 //---------------------------------------------------------------
1216 //---------------------------------------------------------------
1217 VOID StubLinkerCPU::X86EmitCondJump(CodeLabel *target, X86CondCode::cc condcode)
1219 STANDARD_VM_CONTRACT;
1220 EmitLabelRef(target, reinterpret_cast<X86CondJump&>(gX86CondJump), condcode);
1224 //---------------------------------------------------------------
1227 //---------------------------------------------------------------
1228 VOID StubLinkerCPU::X86EmitCall(CodeLabel *target, int iArgBytes)
1230 STANDARD_VM_CONTRACT;
1232 EmitLabelRef(target, reinterpret_cast<X86Call&>(gX86Call), 0);
1234 INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that
1235 // we know that this is a call that can directly call
1237 #ifndef _TARGET_AMD64_
1239 #endif // !_TARGET_AMD64_
1243 //---------------------------------------------------------------
1246 //---------------------------------------------------------------
1247 VOID StubLinkerCPU::X86EmitReturn(WORD wArgBytes)
1252 #if defined(_TARGET_AMD64_) || defined(UNIX_X86_ABI)
1253 PRECONDITION(wArgBytes == 0);
1270 #ifdef _TARGET_AMD64_
1271 //---------------------------------------------------------------
1275 //---------------------------------------------------------------
1276 VOID StubLinkerCPU::X86EmitLeaRIP(CodeLabel *target, X86Reg reg)
1278 STANDARD_VM_CONTRACT;
1279 EmitLabelRef(target, reinterpret_cast<X64LeaRIP&>(gX64LeaRIP), reg);
1281 #endif // _TARGET_AMD64_
1285 VOID StubLinkerCPU::X86EmitPushRegs(unsigned regSet)
1287 STANDARD_VM_CONTRACT;
1289 for (X86Reg r = kEAX; r <= NumX86Regs; r = (X86Reg)(r+1))
1290 if (regSet & (1U<<r))
1297 VOID StubLinkerCPU::X86EmitPopRegs(unsigned regSet)
1299 STANDARD_VM_CONTRACT;
1301 for (X86Reg r = NumX86Regs; r >= kEAX; r = (X86Reg)(r-1))
1302 if (regSet & (1U<<r))
1307 //---------------------------------------------------------------
1309 // mov <dstreg>, [<srcreg> + <ofs>]
1310 //---------------------------------------------------------------
1311 VOID StubLinkerCPU::X86EmitIndexRegLoad(X86Reg dstreg,
1315 STANDARD_VM_CONTRACT;
1316 X86EmitOffsetModRM(0x8b, dstreg, srcreg, ofs);
1320 //---------------------------------------------------------------
1322 // mov [<dstreg> + <ofs>],<srcreg>
1324 // Note: If you intend to use this to perform 64bit moves to a RSP
1325 // based offset, then this method may not work. Consider
1326 // using X86EmitIndexRegStoreRSP.
1327 //---------------------------------------------------------------
1328 VOID StubLinkerCPU::X86EmitIndexRegStore(X86Reg dstreg,
1332 STANDARD_VM_CONTRACT;
1334 if (dstreg != kESP_Unsafe)
1335 X86EmitOffsetModRM(0x89, srcreg, dstreg, ofs);
1337 X86EmitOp(0x89, srcreg, (X86Reg)kESP_Unsafe, ofs);
1340 #if defined(_TARGET_AMD64_)
1341 //---------------------------------------------------------------
1343 // mov [RSP + <ofs>],<srcreg>
1345 // It marks the instruction has 64bit so that the processor
1346 // performs a 8byte data move to a RSP based stack location.
1347 //---------------------------------------------------------------
1348 VOID StubLinkerCPU::X86EmitIndexRegStoreRSP(__int32 ofs,
1351 STANDARD_VM_CONTRACT;
1353 X86EmitOp(0x89, srcreg, (X86Reg)kESP_Unsafe, ofs, (X86Reg)0, 0, k64BitOp);
1356 //---------------------------------------------------------------
1358 // mov [R12 + <ofs>],<srcreg>
1360 // It marks the instruction has 64bit so that the processor
1361 // performs a 8byte data move to a R12 based stack location.
1362 //---------------------------------------------------------------
1363 VOID StubLinkerCPU::X86EmitIndexRegStoreR12(__int32 ofs,
1366 STANDARD_VM_CONTRACT;
1368 X86EmitOp(0x89, srcreg, (X86Reg)kR12, ofs, (X86Reg)0, 0, k64BitOp);
1370 #endif // defined(_TARGET_AMD64_)
1372 //---------------------------------------------------------------
1374 // push dword ptr [<srcreg> + <ofs>]
1375 //---------------------------------------------------------------
1376 VOID StubLinkerCPU::X86EmitIndexPush(X86Reg srcreg, __int32 ofs)
1378 STANDARD_VM_CONTRACT;
1380 if(srcreg != kESP_Unsafe)
1381 X86EmitOffsetModRM(0xff, (X86Reg)0x6, srcreg, ofs);
1383 X86EmitOp(0xff,(X86Reg)0x6, srcreg, ofs);
1385 Push(sizeof(void*));
1388 //---------------------------------------------------------------
1390 // push dword ptr [<baseReg> + <indexReg>*<scale> + <ofs>]
1391 //---------------------------------------------------------------
1392 VOID StubLinkerCPU::X86EmitBaseIndexPush(
1398 STANDARD_VM_CONTRACT;
1400 X86EmitOffsetModRmSIB(0xff, (X86Reg)0x6, baseReg, indexReg, scale, ofs);
1401 Push(sizeof(void*));
1404 //---------------------------------------------------------------
1406 // push dword ptr [ESP + <ofs>]
1407 //---------------------------------------------------------------
1408 VOID StubLinkerCPU::X86EmitSPIndexPush(__int32 ofs)
1410 STANDARD_VM_CONTRACT;
1412 __int8 ofs8 = (__int8) ofs;
1413 if (ofs == (__int32) ofs8)
1415 // The offset can be expressed in a byte (can use the byte
1416 // form of the push esp instruction)
1418 BYTE code[] = {0xff, 0x74, 0x24, ofs8};
1419 EmitBytes(code, sizeof(code));
1423 // The offset requires 4 bytes (need to use the long form
1424 // of the push esp instruction)
1426 BYTE code[] = {0xff, 0xb4, 0x24, 0x0, 0x0, 0x0, 0x0};
1427 *(__int32 *)(&code[3]) = ofs;
1428 EmitBytes(code, sizeof(code));
1431 Push(sizeof(void*));
1435 //---------------------------------------------------------------
1437 // pop dword ptr [<srcreg> + <ofs>]
1438 //---------------------------------------------------------------
1439 VOID StubLinkerCPU::X86EmitIndexPop(X86Reg srcreg, __int32 ofs)
1441 STANDARD_VM_CONTRACT;
1443 if(srcreg != kESP_Unsafe)
1444 X86EmitOffsetModRM(0x8f, (X86Reg)0x0, srcreg, ofs);
1446 X86EmitOp(0x8f,(X86Reg)0x0, srcreg, ofs);
1451 //---------------------------------------------------------------
1453 // lea <dstreg>, [<srcreg> + <ofs>
1454 //---------------------------------------------------------------
1455 VOID StubLinkerCPU::X86EmitIndexLea(X86Reg dstreg, X86Reg srcreg, __int32 ofs)
1460 PRECONDITION((int) dstreg < NumX86Regs);
1461 PRECONDITION((int) srcreg < NumX86Regs);
1465 X86EmitOffsetModRM(0x8d, dstreg, srcreg, ofs);
1468 #if defined(_TARGET_AMD64_)
1469 VOID StubLinkerCPU::X86EmitIndexLeaRSP(X86Reg dstreg, X86Reg srcreg, __int32 ofs)
1471 STANDARD_VM_CONTRACT;
1473 X86EmitOp(0x8d, dstreg, (X86Reg)kESP_Unsafe, ofs, (X86Reg)0, 0, k64BitOp);
1475 #endif // defined(_TARGET_AMD64_)
1477 //---------------------------------------------------------------
1480 //---------------------------------------------------------------
1481 VOID StubLinkerCPU::X86EmitSubEsp(INT32 imm32)
1483 STANDARD_VM_CONTRACT;
1485 if (imm32 < 0x1000-100)
1487 // As long as the esp size is less than 1 page plus a small
1488 // safety fudge factor, we can just bump esp.
1489 X86EmitSubEspWorker(imm32);
1493 // Otherwise, must touch at least one byte for each page.
1494 while (imm32 >= 0x1000)
1497 X86EmitSubEspWorker(0x1000-4);
1498 X86EmitPushReg(kEAX);
1504 X86EmitSubEspWorker(imm32);
1508 // If the remainder is large, touch the last byte - again,
1509 // as a fudge factor.
1510 X86EmitSubEspWorker(imm32-4);
1511 X86EmitPushReg(kEAX);
1517 //---------------------------------------------------------------
1520 //---------------------------------------------------------------
1521 VOID StubLinkerCPU::X86EmitSubEspWorker(INT32 imm32)
1527 // On Win32, stacks must be faulted in one page at a time.
1528 PRECONDITION(imm32 < 0x1000);
1538 X86_64BitOperands();
1540 if (FitsInI1(imm32))
1556 //---------------------------------------------------------------
1559 //---------------------------------------------------------------
1560 VOID StubLinkerCPU::X86EmitAddEsp(INT32 imm32)
1562 STANDARD_VM_CONTRACT;
1570 X86_64BitOperands();
1572 if (FitsInI1(imm32))
1586 VOID StubLinkerCPU::X86EmitAddReg(X86Reg reg, INT32 imm32)
1591 PRECONDITION((int) reg < NumX86Regs);
1598 #ifdef _TARGET_AMD64_
1599 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
1603 rex |= REX_OPCODE_REG_EXT;
1604 reg = X86RegFromAMD64Reg(reg);
1609 if (FitsInI1(imm32)) {
1611 Emit8(static_cast<UINT8>(0xC0 | reg));
1612 Emit8(static_cast<UINT8>(imm32));
1615 Emit8(static_cast<UINT8>(0xC0 | reg));
1620 //---------------------------------------------------------------
1621 // Emits: add destReg, srcReg
1622 //---------------------------------------------------------------
1624 VOID StubLinkerCPU::X86EmitAddRegReg(X86Reg destReg, X86Reg srcReg)
1626 STANDARD_VM_CONTRACT;
1628 X86EmitR2ROp(0x01, srcReg, destReg);
1634 VOID StubLinkerCPU::X86EmitSubReg(X86Reg reg, INT32 imm32)
1639 PRECONDITION((int) reg < NumX86Regs);
1643 #ifdef _TARGET_AMD64_
1644 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
1648 rex |= REX_OPCODE_REG_EXT;
1649 reg = X86RegFromAMD64Reg(reg);
1654 if (FitsInI1(imm32)) {
1656 Emit8(static_cast<UINT8>(0xE8 | reg));
1657 Emit8(static_cast<UINT8>(imm32));
1660 Emit8(static_cast<UINT8>(0xE8 | reg));
1665 //---------------------------------------------------------------
1666 // Emits: sub destReg, srcReg
1667 //---------------------------------------------------------------
1669 VOID StubLinkerCPU::X86EmitSubRegReg(X86Reg destReg, X86Reg srcReg)
1671 STANDARD_VM_CONTRACT;
1673 X86EmitR2ROp(0x29, srcReg, destReg);
1676 #if defined(_TARGET_AMD64_)
1678 //---------------------------------------------------------------
1679 // movdqa destXmmreg, srcXmmReg
1680 //---------------------------------------------------------------
1681 VOID StubLinkerCPU::X64EmitMovXmmXmm(X86Reg destXmmreg, X86Reg srcXmmReg)
1683 STANDARD_VM_CONTRACT;
1684 // There are several that could be used to mov xmm registers. MovAps is
1685 // what C++ compiler uses so let's use it here too.
1686 X86EmitR2ROp(X86_INSTR_MOVAPS_R_RM, destXmmreg, srcXmmReg, k32BitOp);
1689 //---------------------------------------------------------------
1690 // movdqa XmmN, [baseReg + offset]
1691 //---------------------------------------------------------------
1692 VOID StubLinkerCPU::X64EmitMovdqaFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
1694 STANDARD_VM_CONTRACT;
1695 X64EmitMovXmmWorker(0x66, 0x6F, Xmmreg, baseReg, ofs);
1698 //---------------------------------------------------------------
1699 // movdqa [baseReg + offset], XmmN
1700 //---------------------------------------------------------------
1701 VOID StubLinkerCPU::X64EmitMovdqaToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
1703 STANDARD_VM_CONTRACT;
1704 X64EmitMovXmmWorker(0x66, 0x7F, Xmmreg, baseReg, ofs);
1707 //---------------------------------------------------------------
1708 // movsd XmmN, [baseReg + offset]
1709 //---------------------------------------------------------------
1710 VOID StubLinkerCPU::X64EmitMovSDFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
1712 STANDARD_VM_CONTRACT;
1713 X64EmitMovXmmWorker(0xF2, 0x10, Xmmreg, baseReg, ofs);
1716 //---------------------------------------------------------------
1717 // movsd [baseReg + offset], XmmN
1718 //---------------------------------------------------------------
1719 VOID StubLinkerCPU::X64EmitMovSDToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
1721 STANDARD_VM_CONTRACT;
1722 X64EmitMovXmmWorker(0xF2, 0x11, Xmmreg, baseReg, ofs);
1725 //---------------------------------------------------------------
1726 // movss XmmN, [baseReg + offset]
1727 //---------------------------------------------------------------
1728 VOID StubLinkerCPU::X64EmitMovSSFromMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
1730 STANDARD_VM_CONTRACT;
1731 X64EmitMovXmmWorker(0xF3, 0x10, Xmmreg, baseReg, ofs);
1734 //---------------------------------------------------------------
1735 // movss [baseReg + offset], XmmN
1736 //---------------------------------------------------------------
1737 VOID StubLinkerCPU::X64EmitMovSSToMem(X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
1739 STANDARD_VM_CONTRACT;
1740 X64EmitMovXmmWorker(0xF3, 0x11, Xmmreg, baseReg, ofs);
1743 //---------------------------------------------------------------
1744 // Helper method for emitting of XMM from/to memory moves
1745 //---------------------------------------------------------------
1746 VOID StubLinkerCPU::X64EmitMovXmmWorker(BYTE prefix, BYTE opcode, X86Reg Xmmreg, X86Reg baseReg, __int32 ofs)
1748 STANDARD_VM_CONTRACT;
1750 BYTE codeBuffer[10];
1751 unsigned int nBytes = 0;
1753 // Setup the legacyPrefix for movsd
1754 codeBuffer[nBytes++] = prefix;
1756 // By default, assume we dont have to emit the REX byte.
1757 bool fEmitRex = false;
1759 BYTE rex = REX_PREFIX_BASE;
1763 rex |= REX_MODRM_RM_EXT;
1764 baseReg = X86RegFromAMD64Reg(baseReg);
1767 if (Xmmreg >= kXMM8)
1769 rex |= REX_MODRM_REG_EXT;
1770 Xmmreg = X86RegFromAMD64Reg(Xmmreg);
1774 if (fEmitRex == true)
1776 codeBuffer[nBytes++] = rex;
1779 // Next, specify the two byte opcode - first byte is always 0x0F.
1780 codeBuffer[nBytes++] = 0x0F;
1781 codeBuffer[nBytes++] = opcode;
1783 BYTE modrm = static_cast<BYTE>((Xmmreg << 3) | baseReg);
1784 bool fOffsetFitsInSignedByte = FitsInI1(ofs)?true:false;
1786 if (fOffsetFitsInSignedByte)
1787 codeBuffer[nBytes++] = 0x40|modrm;
1789 codeBuffer[nBytes++] = 0x80|modrm;
1791 // If we are dealing with RSP or R12 as the baseReg, we need to emit the SIB byte.
1792 if ((baseReg == (X86Reg)4 /*kRSP*/) || (baseReg == kR12))
1794 codeBuffer[nBytes++] = 0x24;
1797 // Finally, specify the offset
1798 if (fOffsetFitsInSignedByte)
1800 codeBuffer[nBytes++] = (BYTE)ofs;
1804 *((__int32*)(codeBuffer+nBytes)) = ofs;
1808 _ASSERTE(nBytes <= _countof(codeBuffer));
1810 // Lastly, emit the encoded bytes
1811 EmitBytes(codeBuffer, nBytes);
1814 #endif // defined(_TARGET_AMD64_)
1816 //---------------------------------------------------------------
1817 // Emits a MOD/RM for accessing a dword at [<indexreg> + ofs32]
1818 //---------------------------------------------------------------
1819 VOID StubLinkerCPU::X86EmitOffsetModRM(BYTE opcode, X86Reg opcodereg, X86Reg indexreg, __int32 ofs)
1821 STANDARD_VM_CONTRACT;
1824 BYTE* code = codeBuffer;
1826 #ifdef _TARGET_AMD64_
1829 // code points to base X86 instruction,
1830 // codeBuffer points to full AMD64 instruction
1832 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
1834 if (indexreg >= kR8)
1836 rex |= REX_MODRM_RM_EXT;
1837 indexreg = X86RegFromAMD64Reg(indexreg);
1839 if (opcodereg >= kR8)
1841 rex |= REX_MODRM_REG_EXT;
1842 opcodereg = X86RegFromAMD64Reg(opcodereg);
1850 BYTE modrm = static_cast<BYTE>((opcodereg << 3) | indexreg);
1851 if (ofs == 0 && indexreg != kEBP)
1855 EmitBytes(codeBuffer, nBytes);
1857 else if (FitsInI1(ofs))
1859 code[1] = 0x40|modrm;
1860 code[2] = (BYTE)ofs;
1862 EmitBytes(codeBuffer, nBytes);
1866 code[1] = 0x80|modrm;
1867 *((__int32*)(2+code)) = ofs;
1869 EmitBytes(codeBuffer, nBytes);
1873 //---------------------------------------------------------------
1874 // Emits a MOD/RM for accessing a dword at [<baseReg> + <indexReg>*<scale> + ofs32]
1875 //---------------------------------------------------------------
1876 VOID StubLinkerCPU::X86EmitOffsetModRmSIB(BYTE opcode, X86Reg opcodeOrReg, X86Reg baseReg, X86Reg indexReg, __int32 scale, __int32 ofs)
1881 PRECONDITION(scale == 1 || scale == 2 || scale == 4 || scale == 8);
1882 PRECONDITION(indexReg != kESP_Unsafe);
1887 BYTE* code = codeBuffer;
1890 #ifdef _TARGET_AMD64_
1899 case 1: scaleEnc = 0; break;
1900 case 2: scaleEnc = 1; break;
1901 case 4: scaleEnc = 2; break;
1902 case 8: scaleEnc = 3; break;
1903 default: _ASSERTE(!"Unexpected");
1906 BYTE sib = static_cast<BYTE>((scaleEnc << 6) | (indexReg << 3) | baseReg);
1910 code[1] = static_cast<BYTE>(0x44 | (opcodeOrReg << 3));
1912 code[3] = (BYTE)ofs;
1914 EmitBytes(codeBuffer, nBytes);
1918 code[1] = static_cast<BYTE>(0x84 | (opcodeOrReg << 3));
1920 *(__int32*)(&code[3]) = ofs;
1922 EmitBytes(codeBuffer, nBytes);
1928 VOID StubLinkerCPU::X86EmitRegLoad(X86Reg reg, UINT_PTR imm)
1930 STANDARD_VM_CONTRACT;
1934 X86EmitZeroOutReg(reg);
1938 UINT cbimm = sizeof(void*);
1940 #ifdef _TARGET_AMD64_
1941 // amd64 zero-extends all 32-bit operations. If the immediate will fit in
1942 // 32 bits, use the smaller encoding.
1944 if (reg >= kR8 || !FitsInU4(imm))
1946 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
1949 rex |= REX_MODRM_RM_EXT;
1950 reg = X86RegFromAMD64Reg(reg);
1956 // amd64 is little endian, so the &imm below will correctly read off
1958 cbimm = sizeof(UINT32);
1960 #endif // _TARGET_AMD64_
1961 Emit8(0xB8 | (BYTE)reg);
1962 EmitBytes((BYTE*)&imm, cbimm);
1966 //---------------------------------------------------------------
1967 // Emits the most efficient form of the operation:
1969 // opcode altreg, [basereg + scaledreg*scale + ofs]
1973 // opcode [basereg + scaledreg*scale + ofs], altreg
1975 // (the opcode determines which comes first.)
1980 // scale must be 0,1,2,4 or 8.
1981 // if scale == 0, scaledreg is ignored.
1982 // basereg and altreg may be equal to 4 (ESP) but scaledreg cannot
1983 // for some opcodes, "altreg" may actually select an operation
1984 // rather than a second register argument.
1985 // if basereg is EBP, scale must be 0.
1987 //---------------------------------------------------------------
1988 VOID StubLinkerCPU::X86EmitOp(WORD opcode,
1992 X86Reg scaledreg /*=0*/,
1994 AMD64_ARG(X86OperandSize OperandSize /*= k32BitOp*/))
2000 // All 2-byte opcodes start with 0x0f.
2001 PRECONDITION(!(opcode >> 8) || (opcode & 0xff) == 0x0f);
2003 PRECONDITION(scale == 0 || scale == 1 || scale == 2 || scale == 4 || scale == 8);
2004 PRECONDITION(scaledreg != (X86Reg)4);
2005 PRECONDITION(!(basereg == kEBP && scale != 0));
2007 PRECONDITION( ((UINT)basereg) < NumX86Regs );
2008 PRECONDITION( ((UINT)scaledreg) < NumX86Regs );
2009 PRECONDITION( ((UINT)altreg) < NumX86Regs );
2013 #ifdef _TARGET_AMD64_
2014 if ( k64BitOp == OperandSize
2017 || scaledreg >= kR8)
2019 BYTE rex = REX_PREFIX_BASE;
2021 if (k64BitOp == OperandSize)
2022 rex |= REX_OPERAND_SIZE_64BIT;
2026 rex |= REX_MODRM_REG_EXT;
2027 altreg = X86RegFromAMD64Reg(altreg);
2032 // basereg might be in the modrm or sib fields. This will be
2033 // decided below, but the encodings are the same either way.
2034 _ASSERTE(REX_SIB_BASE_EXT == REX_MODRM_RM_EXT);
2035 rex |= REX_SIB_BASE_EXT;
2036 basereg = X86RegFromAMD64Reg(basereg);
2039 if (scaledreg >= kR8)
2041 rex |= REX_SIB_INDEX_EXT;
2042 scaledreg = X86RegFromAMD64Reg(scaledreg);
2047 #endif // _TARGET_AMD64_
2049 BYTE modrmbyte = static_cast<BYTE>(altreg << 3);
2050 BOOL fNeedSIB = FALSE;
2053 BYTE scaleselect= 0;
2055 if (ofs == 0 && basereg != kEBP)
2057 ofssize = 0; // Don't change this constant!
2059 else if (FitsInI1(ofs))
2061 ofssize = 1; // Don't change this constant!
2065 ofssize = 2; // Don't change this constant!
2070 case 1: scaleselect = 0; break;
2071 case 2: scaleselect = 1; break;
2072 case 4: scaleselect = 2; break;
2073 case 8: scaleselect = 3; break;
2076 if (scale == 0 && basereg != (X86Reg)4 /*ESP*/)
2079 modrmbyte |= basereg | (ofssize << 6);
2081 else if (scale == 0)
2084 _ASSERTE(basereg == (X86Reg)4);
2088 modrmbyte |= 4 | (ofssize << 6);
2093 //[basereg + scaledreg*scale + ofs]
2095 modrmbyte |= 0004 | (ofssize << 6);
2097 SIBbyte = static_cast<BYTE>((scaleselect << 6) | (scaledreg << 3) | basereg);
2101 //Some sanity checks:
2102 _ASSERTE(!(fNeedSIB && basereg == kEBP)); // EBP not valid as a SIB base register.
2103 _ASSERTE(!( (!fNeedSIB) && basereg == (X86Reg)4 )) ; // ESP addressing requires SIB byte
2105 Emit8((BYTE)opcode);
2118 case 1: Emit8( (__int8)ofs ); break;
2119 case 2: Emit32( ofs ); break;
2120 default: _ASSERTE(!"Can't get here.");
2127 // opcode altreg, modrmreg
2131 // opcode modrmreg, altreg
2133 // (the opcode determines which one comes first)
2135 // For single-operand opcodes, "altreg" actually selects
2136 // an operation rather than a register.
2138 VOID StubLinkerCPU::X86EmitR2ROp (WORD opcode,
2141 AMD64_ARG(X86OperandSize OperandSize /*= k64BitOp*/)
2148 // All 2-byte opcodes start with 0x0f.
2149 PRECONDITION(!(opcode >> 8) || (opcode & 0xff) == 0x0f);
2151 PRECONDITION( ((UINT)altreg) < NumX86Regs );
2152 PRECONDITION( ((UINT)modrmreg) < NumX86Regs );
2156 #ifdef _TARGET_AMD64_
2159 if (modrmreg >= kR8)
2161 rex |= REX_MODRM_RM_EXT;
2162 modrmreg = X86RegFromAMD64Reg(modrmreg);
2167 rex |= REX_MODRM_REG_EXT;
2168 altreg = X86RegFromAMD64Reg(altreg);
2171 if (k64BitOp == OperandSize)
2172 rex |= REX_OPERAND_SIZE_64BIT;
2175 Emit8(REX_PREFIX_BASE | rex);
2176 #endif // _TARGET_AMD64_
2178 Emit8((BYTE)opcode);
2183 Emit8(static_cast<UINT8>(0300 | (altreg << 3) | modrmreg));
2187 //---------------------------------------------------------------
2189 // op altreg, [esp+ofs]
2190 //---------------------------------------------------------------
2191 VOID StubLinkerCPU::X86EmitEspOffset(BYTE opcode,
2194 AMD64_ARG(X86OperandSize OperandSize /*= k64BitOp*/)
2197 STANDARD_VM_CONTRACT;
2200 BYTE *code = codeBuffer;
2203 #ifdef _TARGET_AMD64_
2206 if (k64BitOp == OperandSize)
2207 rex |= REX_OPERAND_SIZE_64BIT;
2211 rex |= REX_MODRM_REG_EXT;
2212 altreg = X86RegFromAMD64Reg(altreg);
2217 *code = (REX_PREFIX_BASE | rex);
2222 #endif // _TARGET_AMD64_
2228 BYTE modrm = static_cast<BYTE>((altreg << 3) | 004);
2233 EmitBytes(codeBuffer, 3 + nBytes);
2235 else if (FitsInI1(ofs))
2237 code[1] = 0x40|modrm;
2239 code[3] = (BYTE)ofs;
2240 EmitBytes(codeBuffer, 4 + nBytes);
2244 code[1] = 0x80|modrm;
2246 *((__int32*)(3+code)) = ofs;
2247 EmitBytes(codeBuffer, 7 + nBytes);
2252 //---------------------------------------------------------------
2254 VOID StubLinkerCPU::X86EmitPushEBPframe()
2256 STANDARD_VM_CONTRACT;
2259 X86EmitPushReg(kEBP);
2261 X86EmitMovRegSP(kEBP);
2265 //---------------------------------------------------------------
2267 // mov <reg32>,0xcccccccc
2268 //---------------------------------------------------------------
2269 VOID StubLinkerCPU::X86EmitDebugTrashReg(X86Reg reg)
2271 STANDARD_VM_CONTRACT;
2273 #ifdef _TARGET_AMD64_
2274 BYTE rex = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
2278 rex |= REX_OPCODE_REG_EXT;
2279 reg = X86RegFromAMD64Reg(reg);
2283 Emit64(0xcccccccccccccccc);
2285 Emit8(static_cast<UINT8>(0xb8 | reg));
2292 // Get X86Reg indexes of argument registers based on offset into ArgumentRegister
2293 X86Reg GetX86ArgumentRegisterFromOffset(size_t ofs)
2303 #define ARGUMENT_REGISTER(reg) if (ofs == offsetof(ArgumentRegisters, reg)) RETURN k##reg ;
2304 ENUM_ARGUMENT_REGISTERS();
2305 #undef ARGUMENT_REGISTER
2307 _ASSERTE(0);//Can't get here.
2312 #ifdef _TARGET_AMD64_
2313 static const X86Reg c_argRegs[] = {
2314 #define ARGUMENT_REGISTER(regname) k##regname,
2315 ENUM_ARGUMENT_REGISTERS()
2316 #undef ARGUMENT_REGISTER
2321 #ifndef CROSSGEN_COMPILE
2323 #if defined(_DEBUG) && !defined(FEATURE_PAL)
2324 void StubLinkerCPU::EmitJITHelperLoggingThunk(PCODE pJitHelper, LPVOID helperFuncCount)
2326 STANDARD_VM_CONTRACT;
2328 VMHELPCOUNTDEF* pHelperFuncCount = (VMHELPCOUNTDEF*)helperFuncCount;
2331 mov rcx, &(pHelperFuncCount->count)
2334 #ifdef _TARGET_AMD64_
2335 mov rax, <pJitHelper>
2343 // mov rcx, &(pHelperFuncCount->count)
2344 X86EmitPushReg(kECX);
2345 X86EmitRegLoad(kECX, (UINT_PTR)(&(pHelperFuncCount->count)));
2348 BYTE lock_inc_RCX[] = { 0xf0, 0xff, 0x01 };
2349 EmitBytes(lock_inc_RCX, sizeof(lock_inc_RCX));
2351 #if defined(_TARGET_AMD64_)
2352 // mov rax, <pJitHelper>
2359 X86EmitTailcallWithSinglePop(NewExternalCodeLabel(pJitHelper), kECX);
2361 #endif // _DEBUG && !FEATURE_PAL
2363 VOID StubLinkerCPU::X86EmitCurrentThreadFetch(X86Reg dstreg, unsigned preservedRegSet)
2369 // It doesn't make sense to have the destination register be preserved
2370 PRECONDITION((preservedRegSet & (1 << dstreg)) == 0);
2371 AMD64_ONLY(PRECONDITION(dstreg < 8)); // code below doesn't support high registers
2377 X86EmitPushRegs(preservedRegSet & ((1 << kEAX) | (1 << kEDX) | (1 << kECX)));
2380 X86EmitCall(NewExternalCodeLabel((LPVOID)GetThread), sizeof(void*));
2383 X86EmitMovRegReg(dstreg, kEAX);
2385 X86EmitPopRegs(preservedRegSet & ((1 << kEAX) | (1 << kEDX) | (1 << kECX)));
2388 // Trash caller saved regs that we were not told to preserve, and that aren't the dstreg.
2389 preservedRegSet |= 1 << dstreg;
2390 if (!(preservedRegSet & (1 << kEAX)))
2391 X86EmitDebugTrashReg(kEAX);
2392 if (!(preservedRegSet & (1 << kEDX)))
2393 X86EmitDebugTrashReg(kEDX);
2394 if (!(preservedRegSet & (1 << kECX)))
2395 X86EmitDebugTrashReg(kECX);
2398 #else // FEATURE_PAL
2400 #ifdef _TARGET_AMD64_
2401 BYTE code[] = { 0x65,0x48,0x8b,0x04,0x25 }; // mov dstreg, qword ptr gs:[IMM32]
2402 static const int regByteIndex = 3;
2403 #elif defined(_TARGET_X86_)
2404 BYTE code[] = { 0x64,0x8b,0x05 }; // mov dstreg, dword ptr fs:[IMM32]
2405 static const int regByteIndex = 2;
2407 code[regByteIndex] |= (dstreg << 3);
2409 EmitBytes(code, sizeof(code));
2410 Emit32(offsetof(TEB, ThreadLocalStoragePointer));
2412 X86EmitIndexRegLoad(dstreg, dstreg, sizeof(void *) * (g_TlsIndex & 0xFFFF));
2414 X86EmitIndexRegLoad(dstreg, dstreg, (g_TlsIndex & 0x7FFF0000) >> 16);
2416 #endif // FEATURE_PAL
2419 #if defined(_TARGET_X86_)
2421 #if defined(PROFILING_SUPPORTED) && !defined(FEATURE_STUBS_AS_IL)
2422 VOID StubLinkerCPU::EmitProfilerComCallProlog(TADDR pFrameVptr, X86Reg regFrame)
2424 STANDARD_VM_CONTRACT;
2426 if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
2428 // Load the methoddesc into ECX (UMThkCallFrame->m_pvDatum->m_pMD)
2429 X86EmitIndexRegLoad(kECX, regFrame, UMThkCallFrame::GetOffsetOfDatum());
2430 X86EmitIndexRegLoad(kECX, kECX, UMEntryThunk::GetOffsetOfMethodDesc());
2432 // Push arguments and notify profiler
2433 X86EmitPushImm32(COR_PRF_TRANSITION_CALL); // Reason
2434 X86EmitPushReg(kECX); // MethodDesc*
2435 X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerUnmanagedToManagedTransitionMD), 2*sizeof(void*));
2438 #ifdef FEATURE_COMINTEROP
2439 else if (pFrameVptr == ComMethodFrame::GetMethodFrameVPtr())
2441 // Load the methoddesc into ECX (Frame->m_pvDatum->m_pMD)
2442 X86EmitIndexRegLoad(kECX, regFrame, ComMethodFrame::GetOffsetOfDatum());
2443 X86EmitIndexRegLoad(kECX, kECX, ComCallMethodDesc::GetOffsetOfMethodDesc());
2445 // Push arguments and notify profiler
2446 X86EmitPushImm32(COR_PRF_TRANSITION_CALL); // Reason
2447 X86EmitPushReg(kECX); // MethodDesc*
2448 X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerUnmanagedToManagedTransitionMD), 2*sizeof(void*));
2450 #endif // FEATURE_COMINTEROP
2452 // Unrecognized frame vtbl
2455 _ASSERTE(!"Unrecognized vtble passed to EmitComMethodStubProlog with profiling turned on.");
2460 VOID StubLinkerCPU::EmitProfilerComCallEpilog(TADDR pFrameVptr, X86Reg regFrame)
2465 #ifdef FEATURE_COMINTEROP
2466 PRECONDITION(pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr() || pFrameVptr == ComMethodFrame::GetMethodFrameVPtr());
2468 PRECONDITION(pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr());
2469 #endif // FEATURE_COMINTEROP
2473 if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
2475 // Load the methoddesc into ECX (UMThkCallFrame->m_pvDatum->m_pMD)
2476 X86EmitIndexRegLoad(kECX, regFrame, UMThkCallFrame::GetOffsetOfDatum());
2477 X86EmitIndexRegLoad(kECX, kECX, UMEntryThunk::GetOffsetOfMethodDesc());
2479 // Push arguments and notify profiler
2480 X86EmitPushImm32(COR_PRF_TRANSITION_RETURN); // Reason
2481 X86EmitPushReg(kECX); // MethodDesc*
2482 X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerManagedToUnmanagedTransitionMD), 2*sizeof(void*));
2485 #ifdef FEATURE_COMINTEROP
2486 else if (pFrameVptr == ComMethodFrame::GetMethodFrameVPtr())
2488 // Load the methoddesc into ECX (Frame->m_pvDatum->m_pMD)
2489 X86EmitIndexRegLoad(kECX, regFrame, ComMethodFrame::GetOffsetOfDatum());
2490 X86EmitIndexRegLoad(kECX, kECX, ComCallMethodDesc::GetOffsetOfMethodDesc());
2492 // Push arguments and notify profiler
2493 X86EmitPushImm32(COR_PRF_TRANSITION_RETURN); // Reason
2494 X86EmitPushReg(kECX); // MethodDesc*
2495 X86EmitCall(NewExternalCodeLabel((LPVOID) ProfilerManagedToUnmanagedTransitionMD), 2*sizeof(void*));
2497 #endif // FEATURE_COMINTEROP
2499 // Unrecognized frame vtbl
2502 _ASSERTE(!"Unrecognized vtble passed to EmitComMethodStubEpilog with profiling turned on.");
2505 #endif // PROFILING_SUPPORTED && !FEATURE_STUBS_AS_IL
2508 #ifndef FEATURE_STUBS_AS_IL
2509 //========================================================================
2510 // Prolog for entering managed code from COM
2511 // pushes the appropriate frame ptr
2512 // sets up a thread and returns a label that needs to be emitted by the caller
2514 // ESI will hold the pointer to the ComMethodFrame or UMThkCallFrame
2515 // EBX will hold the result of GetThread()
2516 // EDI will hold the previous Frame ptr
2518 void StubLinkerCPU::EmitComMethodStubProlog(TADDR pFrameVptr,
2519 CodeLabel** rgRareLabels,
2520 CodeLabel** rgRejoinLabels,
2521 BOOL bShouldProfile)
2527 PRECONDITION(rgRareLabels != NULL);
2528 PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL);
2529 PRECONDITION(rgRejoinLabels != NULL);
2530 PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL);
2534 // push ebp ;; save callee-saved register
2535 // push ebx ;; save callee-saved register
2536 // push esi ;; save callee-saved register
2537 // push edi ;; save callee-saved register
2538 X86EmitPushEBPframe();
2540 X86EmitPushReg(kEBX);
2541 X86EmitPushReg(kESI);
2542 X86EmitPushReg(kEDI);
2545 X86EmitPushReg(kEAX);
2547 // push edx ;leave room for m_next (edx is an arbitrary choice)
2548 X86EmitPushReg(kEDX);
2550 // push IMM32 ; push Frame vptr
2551 X86EmitPushImmPtr((LPVOID) pFrameVptr);
2553 X86EmitPushImmPtr((LPVOID)GetProcessGSCookie());
2555 // lea esi, [esp+4] ;; set ESI -> new frame
2556 X86EmitEspOffset(0x8d, kESI, 4); // lea ESI, [ESP+4]
2558 if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
2560 // Preserve argument registers for thiscall/fastcall
2561 X86EmitPushReg(kECX);
2562 X86EmitPushReg(kEDX);
2565 // Emit Setup thread
2566 EmitSetup(rgRareLabels[0]); // rareLabel for rare setup
2567 EmitLabel(rgRejoinLabels[0]); // rejoin label for rare setup
2569 #ifdef PROFILING_SUPPORTED
2570 // If profiling is active, emit code to notify profiler of transition
2571 // Must do this before preemptive GC is disabled, so no problem if the
2573 if (CORProfilerTrackTransitions() && bShouldProfile)
2575 EmitProfilerComCallProlog(pFrameVptr, /*Frame*/ kESI);
2577 #endif // PROFILING_SUPPORTED
2579 //-----------------------------------------------------------------------
2580 // Generate the inline part of disabling preemptive GC. It is critical
2581 // that this part happen before we link in the frame. That's because
2582 // we won't be able to unlink the frame from preemptive mode. And during
2583 // shutdown, we cannot switch to cooperative mode under some circumstances
2584 //-----------------------------------------------------------------------
2585 EmitDisable(rgRareLabels[1], /*fCallIn=*/TRUE, kEBX); // rare disable gc
2586 EmitLabel(rgRejoinLabels[1]); // rejoin for rare disable gc
2588 // If we take an SO after installing the new frame but before getting the exception
2589 // handlers in place, we will have a corrupt frame stack. So probe-by-touch first for
2590 // sufficient stack space to erect the handler. Because we know we will be touching
2591 // that stack right away when install the handler, this probe-by-touch will not incur
2592 // unnecessary cache misses. And this allows us to do the probe with one instruction.
2594 // Note that for Win64, the personality routine will handle unlinking the frame, so
2595 // we don't need to probe in the Win64 stubs. The exception is ComToCLRWorker
2596 // where we don't setup a personality routine. However, we push the frame inside
2597 // that function and it is probe-protected with an entry point probe first, so we are
2600 // We push two registers to setup the EH handler and none to setup the frame
2601 // so probe for double that to give ourselves a small margin for error.
2602 // mov eax, [esp+n] ;; probe for sufficient stack to setup EH
2603 X86EmitEspOffset(0x8B, kEAX, -0x20);
2604 // mov edi,[ebx + Thread.GetFrame()] ;; get previous frame
2605 X86EmitIndexRegLoad(kEDI, kEBX, Thread::GetOffsetOfCurrentFrame());
2607 // mov [esi + Frame.m_next], edi
2608 X86EmitIndexRegStore(kESI, Frame::GetOffsetOfNextLink(), kEDI);
2610 // mov [ebx + Thread.GetFrame()], esi
2611 X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kESI);
2613 if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
2615 // push UnmanagedToManagedExceptHandler
2616 X86EmitPushImmPtr((LPVOID)UMThunkPrestubHandler);
2619 static const BYTE codeSEH1[] = { 0x64, 0xA1, 0x0, 0x0, 0x0, 0x0};
2620 EmitBytes(codeSEH1, sizeof(codeSEH1));
2623 X86EmitPushReg(kEAX);
2625 // mov dword ptr fs:[0], esp
2626 static const BYTE codeSEH2[] = { 0x64, 0x89, 0x25, 0x0, 0x0, 0x0, 0x0};
2627 EmitBytes(codeSEH2, sizeof(codeSEH2));
2631 if (Frame::ShouldLogTransitions())
2633 // call LogTransition
2634 X86EmitPushReg(kESI);
2635 X86EmitCall(NewExternalCodeLabel((LPVOID) Frame::LogTransition), sizeof(void*));
2640 //========================================================================
2641 // Epilog for stubs that enter managed code from COM
2643 // At this point of the stub, the state should be as follows:
2644 // ESI holds the ComMethodFrame or UMThkCallFrame ptr
2645 // EBX holds the result of GetThread()
2646 // EDI holds the previous Frame ptr
2648 void StubLinkerCPU::EmitComMethodStubEpilog(TADDR pFrameVptr,
2649 CodeLabel** rgRareLabels,
2650 CodeLabel** rgRejoinLabels,
2651 BOOL bShouldProfile)
2657 PRECONDITION(rgRareLabels != NULL);
2658 PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL);
2659 PRECONDITION(rgRejoinLabels != NULL);
2660 PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL);
2664 EmitCheckGSCookie(kESI, UnmanagedToManagedFrame::GetOffsetOfGSCookie());
2666 if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
2668 // if we are using exceptions, unlink the SEH
2669 // mov ecx,[esp] ;;pointer to the next exception record
2670 X86EmitEspOffset(0x8b, kECX, 0);
2672 // mov dword ptr fs:[0], ecx
2673 static const BYTE codeSEH[] = { 0x64, 0x89, 0x0D, 0x0, 0x0, 0x0, 0x0 };
2674 EmitBytes(codeSEH, sizeof(codeSEH));
2676 X86EmitAddEsp(sizeof(EXCEPTION_REGISTRATION_RECORD));
2679 // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame
2680 X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI);
2682 //-----------------------------------------------------------------------
2683 // Generate the inline part of disabling preemptive GC
2684 //-----------------------------------------------------------------------
2685 EmitEnable(rgRareLabels[2]); // rare gc
2686 EmitLabel(rgRejoinLabels[2]); // rejoin for rare gc
2688 if (pFrameVptr == UMThkCallFrame::GetMethodFrameVPtr())
2690 // Restore argument registers for thiscall/fastcall
2691 X86EmitPopReg(kEDX);
2692 X86EmitPopReg(kECX);
2695 // add esp, popstack
2696 X86EmitAddEsp(sizeof(GSCookie) + UnmanagedToManagedFrame::GetOffsetOfCalleeSavedRegisters());
2698 // pop edi ; restore callee-saved registers
2702 X86EmitPopReg(kEDI);
2703 X86EmitPopReg(kESI);
2704 X86EmitPopReg(kEBX);
2705 X86EmitPopReg(kEBP);
2707 // jmp eax //reexecute!
2708 X86EmitR2ROp(0xff, (X86Reg)4, kEAX);
2711 // This will never be executed. It is just to help out stack-walking logic
2712 // which disassembles the epilog to unwind the stack. A "ret" instruction
2713 // indicates that no more code needs to be disassembled, if the stack-walker
2714 // keeps on going past the previous "jmp eax".
2717 //-----------------------------------------------------------------------
2718 // The out-of-line portion of enabling preemptive GC - rarely executed
2719 //-----------------------------------------------------------------------
2720 EmitLabel(rgRareLabels[2]); // label for rare enable gc
2721 EmitRareEnable(rgRejoinLabels[2]); // emit rare enable gc
2723 //-----------------------------------------------------------------------
2724 // The out-of-line portion of disabling preemptive GC - rarely executed
2725 //-----------------------------------------------------------------------
2726 EmitLabel(rgRareLabels[1]); // label for rare disable gc
2727 EmitRareDisable(rgRejoinLabels[1]); // emit rare disable gc
2729 //-----------------------------------------------------------------------
2730 // The out-of-line portion of setup thread - rarely executed
2731 //-----------------------------------------------------------------------
2732 EmitLabel(rgRareLabels[0]); // label for rare setup thread
2733 EmitRareSetup(rgRejoinLabels[0], /*fThrow*/ TRUE); // emit rare setup thread
2735 #endif // !FEATURE_STUBS_AS_IL
2737 //---------------------------------------------------------------
2738 // Emit code to store the setup current Thread structure in eax.
2739 // TRASHES eax,ecx&edx.
2740 // RESULTS ebx = current Thread
2741 //---------------------------------------------------------------
2742 VOID StubLinkerCPU::EmitSetup(CodeLabel *pForwardRef)
2744 STANDARD_VM_CONTRACT;
2746 X86EmitCurrentThreadFetch(kEBX, 0);
2749 static const BYTE b[] = { 0x83, 0xFB, 0x0};
2751 EmitBytes(b, sizeof(b));
2754 X86EmitCondJump(pForwardRef, X86CondCode::kJZ);
2757 X86EmitDebugTrashReg(kECX);
2758 X86EmitDebugTrashReg(kEDX);
2762 VOID StubLinkerCPU::EmitRareSetup(CodeLabel *pRejoinPoint, BOOL fThrow)
2764 STANDARD_VM_CONTRACT;
2766 #ifndef FEATURE_COMINTEROP
2768 #else // !FEATURE_COMINTEROP
2771 X86EmitPushReg(kESI);
2772 X86EmitCall(NewExternalCodeLabel((LPVOID) CreateThreadBlockReturnHr), sizeof(void*));
2775 #endif // !FEATURE_COMINTEROP
2777 X86EmitCall(NewExternalCodeLabel((LPVOID) CreateThreadBlockThrow), 0);
2782 X86EmitNearJump(pRejoinPoint);
2785 //========================================================================
2786 #endif // _TARGET_X86_
2787 //========================================================================
2788 #if defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
2789 //========================================================================
2790 // Epilog for stubs that enter managed code from COM
2792 // On entry, ESI points to the Frame
2793 // ESP points to below FramedMethodFrame::m_vc5Frame
2794 // EBX hold GetThread()
2795 // EDI holds the previous Frame
2797 void StubLinkerCPU::EmitSharedComMethodStubEpilog(TADDR pFrameVptr,
2798 CodeLabel** rgRareLabels,
2799 CodeLabel** rgRejoinLabels,
2800 unsigned offsetRetThunk,
2801 BOOL bShouldProfile)
2807 PRECONDITION(rgRareLabels != NULL);
2808 PRECONDITION(rgRareLabels[0] != NULL && rgRareLabels[1] != NULL && rgRareLabels[2] != NULL);
2809 PRECONDITION(rgRejoinLabels != NULL);
2810 PRECONDITION(rgRejoinLabels[0] != NULL && rgRejoinLabels[1] != NULL && rgRejoinLabels[2] != NULL);
2814 CodeLabel *NoEntryLabel;
2815 NoEntryLabel = NewCodeLabel();
2817 EmitCheckGSCookie(kESI, UnmanagedToManagedFrame::GetOffsetOfGSCookie());
2819 // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame
2820 X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI);
2822 //-----------------------------------------------------------------------
2823 // Generate the inline part of enabling preemptive GC
2824 //-----------------------------------------------------------------------
2825 EmitLabel(NoEntryLabel); // need to enable preemp mode even when we fail the disable as rare disable will return in coop mode
2827 EmitEnable(rgRareLabels[2]); // rare enable gc
2828 EmitLabel(rgRejoinLabels[2]); // rejoin for rare enable gc
2830 #ifdef PROFILING_SUPPORTED
2831 // If profiling is active, emit code to notify profiler of transition
2832 if (CORProfilerTrackTransitions() && bShouldProfile)
2834 // Save return value
2835 X86EmitPushReg(kEAX);
2836 X86EmitPushReg(kEDX);
2838 EmitProfilerComCallEpilog(pFrameVptr, kESI);
2840 // Restore return value
2841 X86EmitPopReg(kEDX);
2842 X86EmitPopReg(kEAX);
2844 #endif // PROFILING_SUPPORTED
2846 X86EmitAddEsp(sizeof(GSCookie) + UnmanagedToManagedFrame::GetOffsetOfDatum());
2849 X86EmitPopReg(kECX); // pop the MethodDesc*
2851 // pop edi ; restore callee-saved registers
2855 X86EmitPopReg(kEDI);
2856 X86EmitPopReg(kESI);
2857 X86EmitPopReg(kEBX);
2858 X86EmitPopReg(kEBP);
2860 // add ecx, offsetRetThunk
2861 X86EmitAddReg(kECX, offsetRetThunk);
2864 // This will jump to the "ret cbStackArgs" instruction in COMMETHOD_PREPAD.
2865 static const BYTE bjmpecx[] = { 0xff, 0xe1 };
2866 EmitBytes(bjmpecx, sizeof(bjmpecx));
2869 // This will never be executed. It is just to help out stack-walking logic
2870 // which disassembles the epilog to unwind the stack. A "ret" instruction
2871 // indicates that no more code needs to be disassembled, if the stack-walker
2872 // keeps on going past the previous "jmp ecx".
2875 //-----------------------------------------------------------------------
2876 // The out-of-line portion of enabling preemptive GC - rarely executed
2877 //-----------------------------------------------------------------------
2878 EmitLabel(rgRareLabels[2]); // label for rare enable gc
2879 EmitRareEnable(rgRejoinLabels[2]); // emit rare enable gc
2881 //-----------------------------------------------------------------------
2882 // The out-of-line portion of disabling preemptive GC - rarely executed
2883 //-----------------------------------------------------------------------
2884 EmitLabel(rgRareLabels[1]); // label for rare disable gc
2885 EmitRareDisableHRESULT(rgRejoinLabels[1], NoEntryLabel);
2887 //-----------------------------------------------------------------------
2888 // The out-of-line portion of setup thread - rarely executed
2889 //-----------------------------------------------------------------------
2890 EmitLabel(rgRareLabels[0]); // label for rare setup thread
2891 EmitRareSetup(rgRejoinLabels[0],/*fThrow*/ FALSE); // emit rare setup thread
2894 //========================================================================
2895 #endif // defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
2897 #ifndef FEATURE_STUBS_AS_IL
2898 /*==============================================================================
2899 Pushes a TransitionFrame on the stack
2900 If you make any changes to the prolog instruction sequence, be sure
2901 to update UpdateRegdisplay, too!! This service should only be called from
2902 within the runtime. It should not be called for any unmanaged -> managed calls in.
2904 At the end of the generated prolog stub code:
2905 pFrame is in ESI/RSI.
2906 the previous pFrame is in EDI/RDI
2907 The current Thread* is in EBX/RBX.
2908 For x86, ESP points to TransitionFrame
2909 For amd64, ESP points to the space reserved for the outgoing argument registers
2912 VOID StubLinkerCPU::EmitMethodStubProlog(TADDR pFrameVptr, int transitionBlockOffset)
2914 STANDARD_VM_CONTRACT;
2916 #ifdef _TARGET_AMD64_
2917 X86EmitPushReg(kR15); // CalleeSavedRegisters
2918 X86EmitPushReg(kR14);
2919 X86EmitPushReg(kR13);
2920 X86EmitPushReg(kR12);
2921 X86EmitPushReg(kRBP);
2922 X86EmitPushReg(kRBX);
2923 X86EmitPushReg(kRSI);
2924 X86EmitPushReg(kRDI);
2927 X86EmitPushReg(SCRATCH_REGISTER_X86REG);
2929 // push edx ;leave room for m_next (edx is an arbitrary choice)
2930 X86EmitPushReg(kEDX);
2933 X86EmitPushImmPtr((LPVOID) pFrameVptr);
2936 X86EmitR2ROp(0x8b, kRSI, (X86Reg)4 /*kESP*/);
2937 UnwindSetFramePointer(kRSI);
2939 // Save ArgumentRegisters
2940 #define ARGUMENT_REGISTER(regname) X86EmitRegSave(k##regname, SecureDelegateFrame::GetOffsetOfTransitionBlock() + \
2941 sizeof(TransitionBlock) + offsetof(ArgumentRegisters, regname));
2942 ENUM_ARGUMENT_REGISTERS();
2943 #undef ARGUMENT_REGISTER
2945 _ASSERTE(((Frame*)&pFrameVptr)->GetGSCookiePtr() == PTR_GSCookie(PBYTE(&pFrameVptr) - sizeof(GSCookie)));
2946 X86EmitPushImmPtr((LPVOID)GetProcessGSCookie());
2948 // sub rsp, 4*sizeof(void*) ;; allocate callee scratch area and ensure rsp is 16-byte-aligned
2949 const INT32 padding = sizeof(ArgumentRegisters) + ((sizeof(FramedMethodFrame) % (2 * sizeof(LPVOID))) ? 0 : sizeof(LPVOID));
2950 X86EmitSubEsp(padding);
2951 #endif // _TARGET_AMD64_
2954 // push ebp ;; save callee-saved register
2956 // push ebx ;; save callee-saved register
2957 // push esi ;; save callee-saved register
2958 // push edi ;; save callee-saved register
2959 X86EmitPushEBPframe();
2961 X86EmitPushReg(kEBX);
2962 X86EmitPushReg(kESI);
2963 X86EmitPushReg(kEDI);
2965 // Push & initialize ArgumentRegisters
2966 #define ARGUMENT_REGISTER(regname) X86EmitPushReg(k##regname);
2967 ENUM_ARGUMENT_REGISTERS();
2968 #undef ARGUMENT_REGISTER
2971 X86EmitPushReg(kEAX);
2973 // push edx ;leave room for m_next (edx is an arbitrary choice)
2974 X86EmitPushReg(kEDX);
2977 X86EmitPushImmPtr((LPVOID) pFrameVptr);
2980 X86EmitMovRegSP(kESI);
2982 X86EmitPushImmPtr((LPVOID)GetProcessGSCookie());
2983 #endif // _TARGET_X86_
2985 // ebx <-- GetThread()
2986 X86EmitCurrentThreadFetch(kEBX, 0);
2990 // call ObjectRefFlush
2991 #ifdef _TARGET_AMD64_
2994 X86EmitR2ROp(0x8b, kECX, kEBX); // arg in reg
2996 #else // !_TARGET_AMD64_
2997 X86EmitPushReg(kEBX); // arg on stack
2998 #endif // _TARGET_AMD64_
3001 X86EmitCall(NewExternalCodeLabel((LPVOID) Thread::ObjectRefFlush), sizeof(void*));
3005 // mov edi,[ebx + Thread.GetFrame()] ;; get previous frame
3006 X86EmitIndexRegLoad(kEDI, kEBX, Thread::GetOffsetOfCurrentFrame());
3008 // mov [esi + Frame.m_next], edi
3009 X86EmitIndexRegStore(kESI, Frame::GetOffsetOfNextLink(), kEDI);
3011 // mov [ebx + Thread.GetFrame()], esi
3012 X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kESI);
3016 if (Frame::ShouldLogTransitions())
3018 // call LogTransition
3019 #ifdef _TARGET_AMD64_
3022 X86EmitR2ROp(0x8b, kECX, kESI); // arg in reg
3024 #else // !_TARGET_AMD64_
3025 X86EmitPushReg(kESI); // arg on stack
3026 #endif // _TARGET_AMD64_
3028 X86EmitCall(NewExternalCodeLabel((LPVOID) Frame::LogTransition), sizeof(void*));
3030 #ifdef _TARGET_AMD64_
3031 // Reload parameter registers
3032 // mov r, [esp+offs]
3033 #define ARGUMENT_REGISTER(regname) X86EmitEspOffset(0x8b, k##regname, sizeof(ArgumentRegisters) + \
3034 sizeof(TransitionFrame) + offsetof(ArgumentRegisters, regname));
3035 ENUM_ARGUMENT_REGISTERS();
3036 #undef ARGUMENT_REGISTER
3038 #endif // _TARGET_AMD64_
3044 #ifdef _TARGET_AMD64_
3045 // OK for the debugger to examine the new frame now
3046 // (Note that if it's not OK yet for some stub, another patch label
3047 // can be emitted later which will override this one.)
3050 // For x86, the patch label can be specified only after the GSCookie is pushed
3051 // Otherwise the debugger will see a Frame without a valid GSCookie
3055 /*==============================================================================
3056 EmitMethodStubEpilog generates the part of the stub that will pop off the
3059 restoreArgRegs - indicates whether the argument registers need to be
3060 restored from m_argumentRegisters
3062 At this point of the stub:
3063 pFrame is in ESI/RSI.
3064 the previous pFrame is in EDI/RDI
3065 The current Thread* is in EBX/RBX.
3066 For x86, ESP points to the FramedMethodFrame::NegInfo
3069 VOID StubLinkerCPU::EmitMethodStubEpilog(WORD numArgBytes, int transitionBlockOffset)
3071 STANDARD_VM_CONTRACT;
3073 // mov [ebx + Thread.GetFrame()], edi ;; restore previous frame
3074 X86EmitIndexRegStore(kEBX, Thread::GetOffsetOfCurrentFrame(), kEDI);
3078 X86EmitAddEsp(sizeof(GSCookie) + transitionBlockOffset + TransitionBlock::GetOffsetOfCalleeSavedRegisters());
3080 #elif defined(_TARGET_AMD64_)
3081 // lea rsp, [rsi + <offset of preserved registers>]
3082 X86EmitOffsetModRM(0x8d, (X86Reg)4 /*kRSP*/, kRSI, transitionBlockOffset + TransitionBlock::GetOffsetOfCalleeSavedRegisters());
3083 #endif // _TARGET_AMD64_
3085 // pop edi ; restore callee-saved registers
3089 X86EmitPopReg(kEDI);
3090 X86EmitPopReg(kESI);
3091 X86EmitPopReg(kEBX);
3092 X86EmitPopReg(kEBP);
3094 #ifdef _TARGET_AMD64_
3095 X86EmitPopReg(kR12);
3096 X86EmitPopReg(kR13);
3097 X86EmitPopReg(kR14);
3098 X86EmitPopReg(kR15);
3101 #if defined(_TARGET_AMD64_) || defined(UNIX_X86_ABI)
3102 // Caller deallocates argument space. (Bypasses ASSERT in
3107 X86EmitReturn(numArgBytes);
3111 // On entry, ESI should be pointing to the Frame
3113 VOID StubLinkerCPU::EmitCheckGSCookie(X86Reg frameReg, int gsCookieOffset)
3115 STANDARD_VM_CONTRACT;
3118 // cmp dword ptr[frameReg-gsCookieOffset], gsCookie
3120 X86EmitCmpRegIndexImm32(frameReg, gsCookieOffset, GetProcessGSCookie());
3122 X64EmitCmp32RegIndexImm32(frameReg, gsCookieOffset, (INT32)GetProcessGSCookie());
3125 CodeLabel * pLabel = NewCodeLabel();
3126 X86EmitCondJump(pLabel, X86CondCode::kJE);
3128 X86EmitCall(NewExternalCodeLabel((LPVOID) JIT_FailFast), 0);
3133 #endif // !FEATURE_STUBS_AS_IL
3136 // This method unboxes the THIS pointer and then calls pRealMD
3137 // If it's shared code for a method in a generic value class, then also extract the vtable pointer
3138 // and pass it as an extra argument. Thus this stub generator really covers both
3139 // - Unboxing, non-instantiating stubs
3140 // - Unboxing, method-table-instantiating stubs
3141 VOID StubLinkerCPU::EmitUnboxMethodStub(MethodDesc* pUnboxMD)
3146 PRECONDITION(!pUnboxMD->IsStatic());
3150 #ifdef FEATURE_STUBS_AS_IL
3151 _ASSERTE(!pUnboxMD->RequiresInstMethodTableArg());
3153 if (pUnboxMD->RequiresInstMethodTableArg())
3155 EmitInstantiatingMethodStub(pUnboxMD, NULL);
3161 // unboxing a value class simply means adding sizeof(void*) to the THIS pointer
3163 #ifdef _TARGET_AMD64_
3164 X86EmitAddReg(THIS_kREG, sizeof(void*));
3166 // Use direct call if possible
3167 if (pUnboxMD->HasStableEntryPoint())
3169 X86EmitRegLoad(kRAX, pUnboxMD->GetStableEntryPoint());// MOV RAX, DWORD
3173 X86EmitRegLoad(kRAX, (UINT_PTR)pUnboxMD->GetAddrOfSlot()); // MOV RAX, DWORD
3175 X86EmitIndexRegLoad(kRAX, kRAX); // MOV RAX, [RAX]
3178 Emit16(X86_INSTR_JMP_EAX); // JMP EAX
3179 #else // _TARGET_AMD64_
3180 X86EmitAddReg(THIS_kREG, sizeof(void*));
3182 // Use direct call if possible
3183 if (pUnboxMD->HasStableEntryPoint())
3185 X86EmitNearJump(NewExternalCodeLabel((LPVOID) pUnboxMD->GetStableEntryPoint()));
3191 Emit32((DWORD)(size_t)pUnboxMD->GetAddrOfSlot());
3193 #endif //_TARGET_AMD64_
3197 #if defined(FEATURE_SHARE_GENERIC_CODE) && !defined(FEATURE_STUBS_AS_IL)
3198 // The stub generated by this method passes an extra dictionary argument before jumping to
3199 // shared-instantiation generic code.
3202 // * An InstantiatedMethodDesc for a generic method whose code is shared across instantiations.
3203 // In this case, the extra argument is the InstantiatedMethodDesc for the instantiation-specific stub itself.
3204 // or * A MethodDesc for a static method in a generic class whose code is shared across instantiations.
3205 // In this case, the extra argument is the MethodTable pointer of the instantiated type.
3206 // or * A MethodDesc for unboxing stub. In this case, the extra argument is null.
3207 VOID StubLinkerCPU::EmitInstantiatingMethodStub(MethodDesc* pMD, void* extra)
3212 PRECONDITION(pMD->RequiresInstArg());
3217 ArgIterator argit(&msig);
3219 #ifdef _TARGET_AMD64_
3220 int paramTypeArgOffset = argit.GetParamTypeArgOffset();
3221 int paramTypeArgIndex = TransitionBlock::GetArgumentIndexFromOffset(paramTypeArgOffset);
3223 CorElementType argTypes[5];
3225 int firstRealArg = paramTypeArgIndex + 1;
3226 int argNum = firstRealArg;
3229 // Compute types of the 4 register args and first stack arg
3232 CorElementType sigType;
3233 while ((sigType = msig.NextArgNormalized()) != ELEMENT_TYPE_END)
3235 argTypes[argNum++] = sigType;
3241 BOOL fUseInstantiatingMethodStubWorker = FALSE;
3246 // We will need to go through assembly helper.
3248 fUseInstantiatingMethodStubWorker = TRUE;
3250 // Allocate space for frame before pushing the arguments for the assembly helper
3251 X86EmitSubEsp((INT32)(AlignUp(sizeof(void *) /* extra stack param */ + sizeof(GSCookie) + sizeof(StubHelperFrame), 16) - sizeof(void *) /* return address */));
3254 // Store extra arg stack arg param for the helper.
3256 CorElementType argType = argTypes[--argNum];
3259 case ELEMENT_TYPE_R4:
3260 // movss dword ptr [rsp], xmm?
3261 X64EmitMovSSToMem(kXMM3, (X86Reg)4 /*kRSP*/);
3263 case ELEMENT_TYPE_R8:
3264 // movsd qword ptr [rsp], xmm?
3265 X64EmitMovSDToMem(kXMM3, (X86Reg)4 /*kRSP*/);
3268 X86EmitIndexRegStoreRSP(0, kR9);
3274 // Shuffle the register arguments
3276 while (argNum > firstRealArg)
3278 CorElementType argType = argTypes[--argNum];
3282 case ELEMENT_TYPE_R4:
3283 case ELEMENT_TYPE_R8:
3285 X64EmitMovXmmXmm((X86Reg)argNum, (X86Reg)(argNum - 1));
3289 X86EmitMovRegReg(c_argRegs[argNum], c_argRegs[argNum-1]);
3295 // Setup the hidden instantiation argument
3299 X86EmitRegLoad(c_argRegs[paramTypeArgIndex], (UINT_PTR)extra);
3303 X86EmitIndexRegLoad(c_argRegs[paramTypeArgIndex], THIS_kREG);
3305 X86EmitAddReg(THIS_kREG, sizeof(void*));
3308 // Use direct call if possible
3309 if (pMD->HasStableEntryPoint())
3311 X86EmitRegLoad(kRAX, pMD->GetStableEntryPoint());// MOV RAX, DWORD
3315 X86EmitRegLoad(kRAX, (UINT_PTR)pMD->GetAddrOfSlot()); // MOV RAX, DWORD
3317 X86EmitIndexRegLoad(kRAX, kRAX); // MOV RAX, [RAX]
3320 if (fUseInstantiatingMethodStubWorker)
3322 X86EmitPushReg(kRAX);
3324 UINT cbStack = argit.SizeOfArgStack();
3325 _ASSERTE(cbStack > 0);
3327 X86EmitPushImm32((AlignUp(cbStack, 16) / sizeof(void*)) - 1); // -1 for extra stack arg
3329 X86EmitRegLoad(kRAX, GetEEFuncEntryPoint(InstantiatingMethodStubWorker));// MOV RAX, DWORD
3333 _ASSERTE(argit.SizeOfArgStack() == 0);
3336 Emit16(X86_INSTR_JMP_EAX);
3339 int paramTypeArgOffset = argit.GetParamTypeArgOffset();
3341 // It's on the stack
3342 if (TransitionBlock::IsStackArgumentOffset(paramTypeArgOffset))
3344 // Pop return address into AX
3345 X86EmitPopReg(kEAX);
3349 // Push extra dictionary argument
3350 X86EmitPushImmPtr(extra);
3354 // Push the vtable pointer from "this"
3355 X86EmitIndexPush(THIS_kREG, 0);
3358 // Put return address back
3359 X86EmitPushReg(kEAX);
3361 // It's in a register
3364 X86Reg paramReg = GetX86ArgumentRegisterFromOffset(paramTypeArgOffset - TransitionBlock::GetOffsetOfArgumentRegisters());
3368 X86EmitRegLoad(paramReg, (UINT_PTR)extra);
3372 // Just extract the vtable pointer from "this"
3373 X86EmitIndexRegLoad(paramReg, THIS_kREG);
3379 // Unboxing stub case.
3380 X86EmitAddReg(THIS_kREG, sizeof(void*));
3383 // Use direct call if possible
3384 if (pMD->HasStableEntryPoint())
3386 X86EmitNearJump(NewExternalCodeLabel((LPVOID) pMD->GetStableEntryPoint()));
3392 Emit32((DWORD)(size_t)pMD->GetAddrOfSlot());
3396 #endif // FEATURE_SHARE_GENERIC_CODE && FEATURE_STUBS_AS_IL
3399 #if defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO)
3401 typedef BOOL GetModuleInformationProc(
3404 LPMODULEINFO lpmodinfo,
3408 GetModuleInformationProc *g_pfnGetModuleInformation = NULL;
3410 extern "C" VOID __cdecl DebugCheckStubUnwindInfoWorker (CONTEXT *pStubContext)
3412 BEGIN_ENTRYPOINT_VOIDRET;
3414 LOG((LF_STUBS, LL_INFO1000000, "checking stub unwind info:\n"));
3417 // Make a copy of the CONTEXT. RtlVirtualUnwind will modify this copy.
3418 // DebugCheckStubUnwindInfo will need to restore registers from the
3419 // original CONTEXT.
3421 CONTEXT ctx = *pStubContext;
3422 ctx.ContextFlags = (CONTEXT_CONTROL | CONTEXT_INTEGER);
3425 // Find the upper bound of the stack and address range of KERNEL32. This
3426 // is where we expect the unwind to stop.
3428 void *pvStackTop = GetThread()->GetCachedStackBase();
3430 if (!g_pfnGetModuleInformation)
3432 HMODULE hmodPSAPI = WszGetModuleHandle(W("PSAPI.DLL"));
3436 hmodPSAPI = WszLoadLibrary(W("PSAPI.DLL"));
3439 _ASSERTE(!"unable to load PSAPI.DLL");
3444 g_pfnGetModuleInformation = (GetModuleInformationProc*)GetProcAddress(hmodPSAPI, "GetModuleInformation");
3445 if (!g_pfnGetModuleInformation)
3447 _ASSERTE(!"can't find PSAPI!GetModuleInformation");
3451 // Intentionally leak hmodPSAPI. We don't want to
3452 // LoadLibrary/FreeLibrary every time, this is slow + produces lots of
3453 // debugger spew. This is just debugging code after all...
3456 HMODULE hmodKERNEL32 = WszGetModuleHandle(W("KERNEL32"));
3457 _ASSERTE(hmodKERNEL32);
3459 MODULEINFO modinfoKERNEL32;
3460 if (!g_pfnGetModuleInformation(GetCurrentProcess(), hmodKERNEL32, &modinfoKERNEL32, sizeof(modinfoKERNEL32)))
3462 _ASSERTE(!"unable to get bounds of KERNEL32");
3467 // Unwind until IP is 0, sp is at the stack top, and callee IP is in kernel32.
3472 ULONG64 ControlPc = (ULONG64)GetIP(&ctx);
3474 LOG((LF_STUBS, LL_INFO1000000, "pc %p, sp %p\n", ControlPc, GetSP(&ctx)));
3477 T_RUNTIME_FUNCTION *pFunctionEntry = RtlLookupFunctionEntry(
3484 ULONG64 EstablisherFrame;
3496 ULONG64 NewControlPc = (ULONG64)GetIP(&ctx);
3498 LOG((LF_STUBS, LL_INFO1000000, "function %p, image %p, new pc %p, new sp %p\n", pFunctionEntry, ImageBase, NewControlPc, GetSP(&ctx)));
3502 if (dac_cast<PTR_BYTE>(GetSP(&ctx)) < (BYTE*)pvStackTop - 0x100)
3504 _ASSERTE(!"SP did not end up at top of stack");
3508 if (!( ControlPc > (ULONG64)modinfoKERNEL32.lpBaseOfDll
3509 && ControlPc < (ULONG64)modinfoKERNEL32.lpBaseOfDll + modinfoKERNEL32.SizeOfImage))
3511 _ASSERTE(!"PC did not end up in KERNEL32");
3520 // Nested functions that do not use any stack space or nonvolatile
3521 // registers are not required to have unwind info (ex.
3522 // USER32!ZwUserCreateWindowEx).
3523 ctx.Rip = *(ULONG64*)(ctx.Rsp);
3524 ctx.Rsp += sizeof(ULONG64);
3529 END_ENTRYPOINT_VOIDRET;
3534 VOID StubLinkerCPU::EmitUnwindInfoCheckWorker (CodeLabel *pCheckLabel)
3536 STANDARD_VM_CONTRACT;
3537 X86EmitCall(pCheckLabel, 0);
3541 VOID StubLinkerCPU::EmitUnwindInfoCheckSubfunction()
3543 STANDARD_VM_CONTRACT;
3545 #ifdef _TARGET_AMD64_
3546 // X86EmitCall will generate "mov rax, target/jmp rax", so we have to save
3547 // rax on the stack. DO NOT use X86EmitPushReg. That will induce infinite
3548 // recursion, since the push may require more unwind info. This "push rax"
3549 // will be accounted for by DebugCheckStubUnwindInfo's unwind info
3550 // (considered part of its locals), so there doesn't have to be unwind
3555 X86EmitNearJump(NewExternalCodeLabel(DebugCheckStubUnwindInfo));
3558 #endif // defined(_DEBUG) && defined(STUBLINKER_GENERATES_UNWIND_INFO)
3563 //-----------------------------------------------------------------------
3564 // Generates the inline portion of the code to enable preemptive GC. Hopefully,
3565 // the inline code is all that will execute most of the time. If this code
3566 // path is entered at certain times, however, it will need to jump out to
3567 // a separate out-of-line path which is more expensive. The "pForwardRef"
3568 // label indicates the start of the out-of-line path.
3573 // all registers except ecx.
3575 //-----------------------------------------------------------------------
3576 VOID StubLinkerCPU::EmitEnable(CodeLabel *pForwardRef)
3582 PRECONDITION(4 == sizeof( ((Thread*)0)->m_State ));
3583 PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled ));
3587 // move byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],0
3588 X86EmitOffsetModRM(0xc6, (X86Reg)0, kEBX, Thread::GetOffsetOfGCFlag());
3591 _ASSERTE(FitsInI1(Thread::TS_CatchAtSafePoint));
3593 // test byte ptr [ebx + Thread.m_State], TS_CatchAtSafePoint
3594 X86EmitOffsetModRM(0xf6, (X86Reg)0, kEBX, Thread::GetOffsetOfState());
3595 Emit8(Thread::TS_CatchAtSafePoint);
3598 X86EmitCondJump(pForwardRef, X86CondCode::kJNZ);
3601 X86EmitDebugTrashReg(kECX);
3607 //-----------------------------------------------------------------------
3608 // Generates the out-of-line portion of the code to enable preemptive GC.
3609 // After the work is done, the code jumps back to the "pRejoinPoint"
3610 // which should be emitted right after the inline part is generated.
3615 // all registers except ecx.
3617 //-----------------------------------------------------------------------
3618 VOID StubLinkerCPU::EmitRareEnable(CodeLabel *pRejoinPoint)
3620 STANDARD_VM_CONTRACT;
3622 X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareEnable), 0);
3624 X86EmitDebugTrashReg(kECX);
3628 X86EmitNearJump(pRejoinPoint);
3634 //-----------------------------------------------------------------------
3635 // Generates the inline portion of the code to disable preemptive GC. Hopefully,
3636 // the inline code is all that will execute most of the time. If this code
3637 // path is entered at certain times, however, it will need to jump out to
3638 // a separate out-of-line path which is more expensive. The "pForwardRef"
3639 // label indicates the start of the out-of-line path.
3644 // all registers except ecx.
3646 //-----------------------------------------------------------------------
3647 VOID StubLinkerCPU::EmitDisable(CodeLabel *pForwardRef, BOOL fCallIn, X86Reg ThreadReg)
3653 PRECONDITION(4 == sizeof( ((Thread*)0)->m_fPreemptiveGCDisabled ));
3654 PRECONDITION(4 == sizeof(g_TrapReturningThreads));
3658 #if defined(FEATURE_COMINTEROP) && defined(MDA_SUPPORTED)
3659 // If we are checking whether the current thread is already holds the loader lock, vector
3660 // such cases to the rare disable pathway, where we can check again.
3661 if (fCallIn && (NULL != MDA_GET_ASSISTANT(Reentrancy)))
3663 CodeLabel *pNotReentrantLabel = NewCodeLabel();
3665 // test byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],1
3666 X86EmitOffsetModRM(0xf6, (X86Reg)0, ThreadReg, Thread::GetOffsetOfGCFlag());
3670 X86EmitCondJump(pNotReentrantLabel, X86CondCode::kJZ);
3672 X86EmitPushReg(kEAX);
3673 X86EmitPushReg(kEDX);
3674 X86EmitPushReg(kECX);
3676 X86EmitCall(NewExternalCodeLabel((LPVOID) HasIllegalReentrancy), 0);
3678 // If the probe fires, we go ahead and allow the call anyway. At this point, there could be
3679 // GC heap corruptions. So the probe detects the illegal case, but doesn't prevent it.
3681 X86EmitPopReg(kECX);
3682 X86EmitPopReg(kEDX);
3683 X86EmitPopReg(kEAX);
3685 EmitLabel(pNotReentrantLabel);
3689 // move byte ptr [ebx + Thread.m_fPreemptiveGCDisabled],1
3690 X86EmitOffsetModRM(0xc6, (X86Reg)0, ThreadReg, Thread::GetOffsetOfGCFlag());
3693 // cmp dword ptr g_TrapReturningThreads, 0
3695 EmitPtr((void *)&g_TrapReturningThreads);
3699 X86EmitCondJump(pForwardRef, X86CondCode::kJNZ);
3701 #if defined(FEATURE_COMINTEROP) && !defined(FEATURE_CORESYSTEM)
3702 // If we are checking whether the current thread holds the loader lock, vector
3703 // such cases to the rare disable pathway, where we can check again.
3704 if (fCallIn && ShouldCheckLoaderLock())
3706 X86EmitPushReg(kEAX);
3707 X86EmitPushReg(kEDX);
3709 if (ThreadReg == kECX)
3710 X86EmitPushReg(kECX);
3712 // BOOL AuxUlibIsDLLSynchronizationHeld(BOOL *IsHeld)
3714 // So we need to be sure that both the return value and the passed BOOL are both TRUE.
3715 // If either is FALSE, then the call failed or the lock is not held. Either way, the
3716 // probe should not fire.
3718 X86EmitPushReg(kEDX); // BOOL temp
3719 Emit8(0x54); // push ESP because arg is &temp
3720 X86EmitCall(NewExternalCodeLabel((LPVOID) AuxUlibIsDLLSynchronizationHeld), 0);
3722 // callee has popped.
3723 X86EmitPopReg(kEDX); // recover temp
3725 CodeLabel *pPopLabel = NewCodeLabel();
3727 Emit16(0xc085); // test eax, eax
3728 X86EmitCondJump(pPopLabel, X86CondCode::kJZ);
3730 Emit16(0xd285); // test edx, edx
3732 EmitLabel(pPopLabel); // retain the conditional flags across the pops
3734 if (ThreadReg == kECX)
3735 X86EmitPopReg(kECX);
3737 X86EmitPopReg(kEDX);
3738 X86EmitPopReg(kEAX);
3740 X86EmitCondJump(pForwardRef, X86CondCode::kJNZ);
3745 if (ThreadReg != kECX)
3746 X86EmitDebugTrashReg(kECX);
3752 //-----------------------------------------------------------------------
3753 // Generates the out-of-line portion of the code to disable preemptive GC.
3754 // After the work is done, the code jumps back to the "pRejoinPoint"
3755 // which should be emitted right after the inline part is generated. However,
3756 // if we cannot execute managed code at this time, an exception is thrown
3757 // which cannot be caught by managed code.
3762 // all registers except ecx, eax.
3764 //-----------------------------------------------------------------------
3765 VOID StubLinkerCPU::EmitRareDisable(CodeLabel *pRejoinPoint)
3767 STANDARD_VM_CONTRACT;
3769 X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareDisableTHROW), 0);
3772 X86EmitDebugTrashReg(kECX);
3774 X86EmitNearJump(pRejoinPoint);
3777 #ifdef FEATURE_COMINTEROP
3778 //-----------------------------------------------------------------------
3779 // Generates the out-of-line portion of the code to disable preemptive GC.
3780 // After the work is done, the code normally jumps back to the "pRejoinPoint"
3781 // which should be emitted right after the inline part is generated. However,
3782 // if we cannot execute managed code at this time, an HRESULT is returned
3783 // via the ExitPoint.
3788 // all registers except ecx, eax.
3790 //-----------------------------------------------------------------------
3791 VOID StubLinkerCPU::EmitRareDisableHRESULT(CodeLabel *pRejoinPoint, CodeLabel *pExitPoint)
3793 STANDARD_VM_CONTRACT;
3795 X86EmitCall(NewExternalCodeLabel((LPVOID) StubRareDisableHR), 0);
3798 X86EmitDebugTrashReg(kECX);
3801 // test eax, eax ;; test the result of StubRareDisableHR
3805 X86EmitCondJump(pRejoinPoint, X86CondCode::kJZ);
3807 X86EmitNearJump(pExitPoint);
3809 #endif // FEATURE_COMINTEROP
3811 #endif // _TARGET_X86_
3813 #endif // CROSSGEN_COMPILE
3816 VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray)
3818 STANDARD_VM_CONTRACT;
3820 #ifdef _TARGET_AMD64_
3822 // mov SCRATCHREG,rsp
3823 X86_64BitOperands();
3825 Emit8(0304 | (SCRATCH_REGISTER_X86REG << 3));
3827 // save the real target in r11, will jump to it later. r10 is used below.
3828 // Windows: mov r11, rcx
3829 // Unix: mov r11, rdi
3830 X86EmitMovRegReg(kR11, THIS_kREG);
3832 #ifdef UNIX_AMD64_ABI
3833 for (ShuffleEntry* pEntry = pShuffleEntryArray; pEntry->srcofs != ShuffleEntry::SENTINEL; pEntry++)
3835 if (pEntry->srcofs & ShuffleEntry::REGMASK)
3837 // If source is present in register then destination must also be a register
3838 _ASSERTE(pEntry->dstofs & ShuffleEntry::REGMASK);
3839 // Both the srcofs and dstofs must be of the same kind of registers - float or general purpose.
3840 _ASSERTE((pEntry->dstofs & ShuffleEntry::FPREGMASK) == (pEntry->srcofs & ShuffleEntry::FPREGMASK));
3842 int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK;
3843 int srcRegIndex = pEntry->srcofs & ShuffleEntry::OFSREGMASK;
3845 if (pEntry->srcofs & ShuffleEntry::FPREGMASK)
3847 // movdqa dstReg, srcReg
3848 X64EmitMovXmmXmm((X86Reg)(kXMM0 + dstRegIndex), (X86Reg)(kXMM0 + srcRegIndex));
3852 // mov dstReg, srcReg
3853 X86EmitMovRegReg(c_argRegs[dstRegIndex], c_argRegs[srcRegIndex]);
3856 else if (pEntry->dstofs & ShuffleEntry::REGMASK)
3858 // source must be on the stack
3859 _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK));
3861 int dstRegIndex = pEntry->dstofs & ShuffleEntry::OFSREGMASK;
3862 int srcOffset = (pEntry->srcofs + 1) * sizeof(void*);
3864 if (pEntry->dstofs & ShuffleEntry::FPREGMASK)
3866 if (pEntry->dstofs & ShuffleEntry::FPSINGLEMASK)
3868 // movss dstReg, [rax + src]
3869 X64EmitMovSSFromMem((X86Reg)(kXMM0 + dstRegIndex), SCRATCH_REGISTER_X86REG, srcOffset);
3873 // movsd dstReg, [rax + src]
3874 X64EmitMovSDFromMem((X86Reg)(kXMM0 + dstRegIndex), SCRATCH_REGISTER_X86REG, srcOffset);
3879 // mov dstreg, [rax + src]
3880 X86EmitIndexRegLoad(c_argRegs[dstRegIndex], SCRATCH_REGISTER_X86REG, srcOffset);
3885 // source must be on the stack
3886 _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK));
3888 // dest must be on the stack
3889 _ASSERTE(!(pEntry->dstofs & ShuffleEntry::REGMASK));
3891 // mov r10, [rax + src]
3892 X86EmitIndexRegLoad (kR10, SCRATCH_REGISTER_X86REG, (pEntry->srcofs + 1) * sizeof(void*));
3894 // mov [rax + dst], r10
3895 X86EmitIndexRegStore (SCRATCH_REGISTER_X86REG, (pEntry->dstofs + 1) * sizeof(void*), kR10);
3898 #else // UNIX_AMD64_ABI
3901 if (pShuffleEntryArray->argtype == ELEMENT_TYPE_END)
3903 // Special handling of open instance methods with return buffer. Move "this"
3904 // by two slots, and leave the "retbufptr" between the two slots intact.
3907 X86EmitMovRegReg(kRCX, kR8);
3910 pShuffleEntryArray++;
3912 // Skip this entry and leave retbufptr intact
3916 // Now shuffle the args by one position:
3917 // steps 1-3 : reg args (rcx, rdx, r8)
3918 // step 4 : stack->reg arg (r9)
3919 // step >4 : stack args
3922 pShuffleEntryArray->srcofs != ShuffleEntry::SENTINEL;
3923 step++, pShuffleEntryArray++)
3930 switch (pShuffleEntryArray->argtype)
3932 case ELEMENT_TYPE_R4:
3933 case ELEMENT_TYPE_R8:
3935 X64EmitMovXmmXmm((X86Reg)(step - 1), (X86Reg)(step));
3938 // mov argRegs[step-1], argRegs[step]
3939 X86EmitMovRegReg(c_argRegs[step-1], c_argRegs[step]);
3946 switch (pShuffleEntryArray->argtype)
3948 case ELEMENT_TYPE_R4:
3949 X64EmitMovSSFromMem(kXMM3, kRAX, 0x28);
3952 case ELEMENT_TYPE_R8:
3953 X64EmitMovSDFromMem(kXMM3, kRAX, 0x28);
3957 // mov r9, [rax + 28h]
3958 X86EmitIndexRegLoad (kR9, SCRATCH_REGISTER_X86REG, 5*sizeof(void*));
3964 // mov r10, [rax + (step+1)*sizeof(void*)]
3965 X86EmitIndexRegLoad (kR10, SCRATCH_REGISTER_X86REG, (step+1)*sizeof(void*));
3967 // mov [rax + step*sizeof(void*)], r10
3968 X86EmitIndexRegStore (SCRATCH_REGISTER_X86REG, step*sizeof(void*), kR10);
3971 #endif // UNIX_AMD64_ABI
3973 // mov r10, [r11 + Delegate._methodptraux]
3974 X86EmitIndexRegLoad(kR10, kR11, DelegateObject::GetOffsetOfMethodPtrAux());
3975 // add r11, DelegateObject::GetOffsetOfMethodPtrAux() - load the indirection cell into r11
3976 X86EmitAddReg(kR11, DelegateObject::GetOffsetOfMethodPtrAux());
3977 // Now jump to real target
3979 X86EmitR2ROp(0xff, (X86Reg)4, kR10);
3981 #else // _TARGET_AMD64_
3984 BOOL haveMemMemMove = FALSE;
3986 ShuffleEntry *pWalk = NULL;
3987 for (pWalk = pShuffleEntryArray; pWalk->srcofs != ShuffleEntry::SENTINEL; pWalk++)
3989 if (!(pWalk->dstofs & ShuffleEntry::REGMASK) &&
3990 !(pWalk->srcofs & ShuffleEntry::REGMASK) &&
3991 pWalk->srcofs != pWalk->dstofs)
3993 haveMemMemMove = TRUE;
3994 espadjust = sizeof(void*);
4002 X86EmitPushReg(THIS_kREG);
4008 Emit8(0300 | SCRATCH_REGISTER_X86REG << 3 | THIS_kREG);
4011 UINT16 emptySpot = 0x4 | ShuffleEntry::REGMASK;
4015 for (pWalk = pShuffleEntryArray; pWalk->srcofs != ShuffleEntry::SENTINEL; pWalk++)
4016 if (pWalk->dstofs == emptySpot)
4019 if (pWalk->srcofs == ShuffleEntry::SENTINEL)
4022 if ((pWalk->dstofs & ShuffleEntry::REGMASK))
4024 if (pWalk->srcofs & ShuffleEntry::REGMASK)
4026 // mov <dstReg>,<srcReg>
4028 Emit8(static_cast<UINT8>(0300 |
4029 (GetX86ArgumentRegisterFromOffset( pWalk->dstofs & ShuffleEntry::OFSMASK ) << 3) |
4030 (GetX86ArgumentRegisterFromOffset( pWalk->srcofs & ShuffleEntry::OFSMASK ))));
4034 X86EmitEspOffset(0x8b, GetX86ArgumentRegisterFromOffset( pWalk->dstofs & ShuffleEntry::OFSMASK ), pWalk->srcofs+espadjust);
4039 // if the destination is not a register, the source shouldn't be either.
4040 _ASSERTE(!(pWalk->srcofs & ShuffleEntry::REGMASK));
4041 if (pWalk->srcofs != pWalk->dstofs)
4043 X86EmitEspOffset(0x8b, kEAX, pWalk->srcofs+espadjust);
4044 X86EmitEspOffset(0x89, kEAX, pWalk->dstofs+espadjust);
4047 emptySpot = pWalk->srcofs;
4050 // Capture the stacksizedelta while we're at the end of the list.
4051 _ASSERTE(pWalk->srcofs == ShuffleEntry::SENTINEL);
4054 X86EmitPopReg(SCRATCH_REGISTER_X86REG);
4057 _ASSERTE(pWalk->stacksizedelta == 0);
4060 if (pWalk->stacksizedelta)
4061 X86EmitAddEsp(pWalk->stacksizedelta);
4063 // Now jump to real target
4065 // we need to jump indirect so that for virtual delegates eax contains a pointer to the indirection cell
4066 X86EmitAddReg(SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtrAux());
4067 static const BYTE bjmpeax[] = { 0xff, 0x20 };
4068 EmitBytes(bjmpeax, sizeof(bjmpeax));
4070 #endif // _TARGET_AMD64_
4074 #if !defined(CROSSGEN_COMPILE) && !defined(FEATURE_STUBS_AS_IL)
4076 //===========================================================================
4077 // Computes hash code for MulticastDelegate.Invoke()
4078 UINT_PTR StubLinkerCPU::HashMulticastInvoke(MetaSig* pSig)
4087 ArgIterator argit(pSig);
4089 UINT numStackBytes = argit.SizeOfArgStack();
4091 if (numStackBytes > 0x7FFF)
4092 COMPlusThrow(kNotSupportedException, W("NotSupported_TooManyArgs"));
4094 #ifdef _TARGET_AMD64_
4095 // Generate a hash key as follows:
4096 // UINT Arg0Type:2; // R4 (1), R8 (2), other (3)
4097 // UINT Arg1Type:2; // R4 (1), R8 (2), other (3)
4098 // UINT Arg2Type:2; // R4 (1), R8 (2), other (3)
4099 // UINT Arg3Type:2; // R4 (1), R8 (2), other (3)
4100 // UINT NumArgs:24; // number of arguments
4101 // (This should cover all the prestub variations)
4103 _ASSERTE(!(numStackBytes & 7));
4104 UINT hash = (numStackBytes / sizeof(void*)) << 8;
4108 // NextArg() doesn't take into account the "this" pointer.
4109 // That's why we have to special case it here.
4110 if (argit.HasThis())
4112 hash |= 3 << (2*argNum);
4116 if (argit.HasRetBuffArg())
4118 hash |= 3 << (2*argNum);
4122 for (; argNum < 4; argNum++)
4124 switch (pSig->NextArgNormalized())
4126 case ELEMENT_TYPE_END:
4129 case ELEMENT_TYPE_R4:
4130 hash |= 1 << (2*argNum);
4132 case ELEMENT_TYPE_R8:
4133 hash |= 2 << (2*argNum);
4136 hash |= 3 << (2*argNum);
4141 #else // _TARGET_AMD64_
4143 // check if the function is returning a float, in which case the stub has to take
4144 // care of popping the floating point stack except for the last invocation
4146 _ASSERTE(!(numStackBytes & 3));
4148 UINT hash = numStackBytes;
4150 if (CorTypeInfo::IsFloat(pSig->GetReturnType()))
4154 #endif // _TARGET_AMD64_
4160 //===========================================================================
4161 // Emits code for MulticastDelegate.Invoke()
4162 VOID StubLinkerCPU::EmitDelegateInvoke()
4164 STANDARD_VM_CONTRACT;
4166 CodeLabel *pNullLabel = NewCodeLabel();
4168 // test THISREG, THISREG
4169 X86EmitR2ROp(0x85, THIS_kREG, THIS_kREG);
4172 X86EmitCondJump(pNullLabel, X86CondCode::kJZ);
4174 // mov SCRATCHREG, [THISREG + Delegate.FP] ; Save target stub in register
4175 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfMethodPtr());
4177 // mov THISREG, [THISREG + Delegate.OR] ; replace "this" pointer
4178 X86EmitIndexRegLoad(THIS_kREG, THIS_kREG, DelegateObject::GetOffsetOfTarget());
4181 Emit16(0xe0ff | (SCRATCH_REGISTER_X86REG<<8));
4184 EmitLabel(pNullLabel);
4186 // mov ECX, CORINFO_NullReferenceException
4188 Emit32(CORINFO_NullReferenceException);
4190 X86EmitCall(NewExternalCodeLabel(GetEEFuncEntryPoint(JIT_InternalThrowFromHelper)), 0);
4194 #endif // _TARGET_X86_
4196 VOID StubLinkerCPU::EmitMulticastInvoke(UINT_PTR hash)
4198 STANDARD_VM_CONTRACT;
4200 int thisRegOffset = MulticastFrame::GetOffsetOfTransitionBlock() +
4201 TransitionBlock::GetOffsetOfArgumentRegisters() + offsetof(ArgumentRegisters, THIS_REG);
4203 // push the methoddesc on the stack
4204 // mov eax, [ecx + offsetof(_methodAuxPtr)]
4205 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfMethodPtrAux());
4207 // Push a MulticastFrame on the stack.
4208 EmitMethodStubProlog(MulticastFrame::GetMethodFrameVPtr(), MulticastFrame::GetOffsetOfTransitionBlock());
4211 // Frame is ready to be inspected by debugger for patch location
4213 #else // _TARGET_AMD64_
4215 // Save register arguments in their home locations.
4216 // Non-FP registers are already saved by EmitMethodStubProlog.
4217 // (Assumes Sig.NextArg() does not enum RetBuffArg or "this".)
4220 __int32 argOfs = MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
4221 CorElementType argTypes[4];
4222 CorElementType argType;
4225 argOfs += sizeof(void*);
4226 argTypes[argNum] = ELEMENT_TYPE_I8;
4231 argType = ELEMENT_TYPE_END;
4233 switch ((hash >> (2 * argNum)) & 3)
4236 argType = ELEMENT_TYPE_END;
4239 argType = ELEMENT_TYPE_R4;
4241 // movss dword ptr [rsp + argOfs], xmm?
4242 X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs);
4245 argType = ELEMENT_TYPE_R8;
4247 // movsd qword ptr [rsp + argOfs], xmm?
4248 X64EmitMovSDToMem((X86Reg)argNum, kRSI, argOfs);
4251 argType = ELEMENT_TYPE_I;
4255 argOfs += sizeof(void*);
4256 argTypes[argNum] = argType;
4259 while (argNum < 4 && ELEMENT_TYPE_END != argType);
4261 _ASSERTE(4 == argNum || ELEMENT_TYPE_END == argTypes[argNum-1]);
4263 #endif // _TARGET_AMD64_
4265 // TODO: on AMD64, pick different regs for locals so don't need the pushes
4267 // push edi ;; Save EDI (want to use it as loop index)
4268 X86EmitPushReg(kEDI);
4270 // xor edi,edi ;; Loop counter: EDI=0,1,2...
4271 X86EmitZeroOutReg(kEDI);
4273 CodeLabel *pLoopLabel = NewCodeLabel();
4274 CodeLabel *pEndLoopLabel = NewCodeLabel();
4276 EmitLabel(pLoopLabel);
4279 // EDI == iteration counter
4281 // mov ecx, [esi + this] ;; get delegate
4282 X86EmitIndexRegLoad(THIS_kREG, kESI, thisRegOffset);
4284 // cmp edi,[ecx]._invocationCount
4285 X86EmitOp(0x3b, kEDI, THIS_kREG, DelegateObject::GetOffsetOfInvocationCount());
4288 X86EmitCondJump(pEndLoopLabel, X86CondCode::kJZ);
4290 #ifdef _TARGET_AMD64_
4292 INT32 numStackBytes = (INT32)((hash >> 8) * sizeof(void *));
4294 INT32 stackUsed, numStackArgs, ofs;
4296 // Push any stack args, plus an extra location
4297 // for rsp alignment if needed
4299 numStackArgs = numStackBytes / sizeof(void*);
4301 // 1 push above, so stack is currently misaligned
4302 const unsigned STACK_ALIGN_ADJUST = 8;
4306 // sub rsp, 28h ;; 4 reg arg home locs + rsp alignment
4307 stackUsed = 0x20 + STACK_ALIGN_ADJUST;
4308 X86EmitSubEsp(stackUsed);
4312 stackUsed = numStackArgs * sizeof(void*);
4314 // If the stack is misaligned, then an odd number of arguments
4315 // will naturally align the stack.
4316 if ( ((numStackArgs & 1) == 0)
4317 != (STACK_ALIGN_ADJUST == 0))
4319 X86EmitPushReg(kRAX);
4320 stackUsed += sizeof(void*);
4323 ofs = MulticastFrame::GetOffsetOfTransitionBlock() +
4324 TransitionBlock::GetOffsetOfArgs() + sizeof(ArgumentRegisters) + numStackBytes;
4326 while (numStackArgs--)
4328 ofs -= sizeof(void*);
4330 // push [rsi + ofs] ;; Push stack args
4331 X86EmitIndexPush(kESI, ofs);
4334 // sub rsp, 20h ;; Create 4 reg arg home locations
4335 X86EmitSubEsp(0x20);
4341 argNum = 0, argOfs = MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
4342 argNum < 4 && argTypes[argNum] != ELEMENT_TYPE_END;
4343 argNum++, argOfs += sizeof(void*)
4346 switch (argTypes[argNum])
4348 case ELEMENT_TYPE_R4:
4349 // movss xmm?, dword ptr [rsi + argOfs]
4350 X64EmitMovSSFromMem((X86Reg)argNum, kRSI, argOfs);
4352 case ELEMENT_TYPE_R8:
4353 // movsd xmm?, qword ptr [rsi + argOfs]
4354 X64EmitMovSDFromMem((X86Reg)argNum, kRSI, argOfs);
4357 if (c_argRegs[argNum] != THIS_kREG)
4359 // mov r*, [rsi + dstOfs]
4360 X86EmitIndexRegLoad(c_argRegs[argNum], kESI,argOfs);
4366 // mov SCRATCHREG, [rcx+Delegate._invocationList] ;;fetch invocation list
4367 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
4369 // mov SCRATCHREG, [SCRATCHREG+m_Array+rdi*8] ;; index into invocation list
4370 X86EmitOp(0x8b, kEAX, SCRATCH_REGISTER_X86REG, static_cast<int>(PtrArray::GetDataOffset()), kEDI, sizeof(void*), k64BitOp);
4372 // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
4373 X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
4375 // call [SCRATCHREG+Delegate.target] ;; call current subscriber
4376 X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
4378 // add rsp, stackUsed ;; Clean up stack
4379 X86EmitAddEsp(stackUsed);
4384 #else // _TARGET_AMD64_
4386 UINT16 numStackBytes = static_cast<UINT16>(hash & ~3);
4388 // ..repush & reenregister args..
4389 INT32 ofs = numStackBytes + MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
4390 while (ofs != MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs())
4392 ofs -= sizeof(void*);
4393 X86EmitIndexPush(kESI, ofs);
4396 #define ARGUMENT_REGISTER(regname) if (k##regname != THIS_kREG) { X86EmitIndexRegLoad(k##regname, kESI, \
4397 offsetof(ArgumentRegisters, regname) + MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgumentRegisters()); }
4399 ENUM_ARGUMENT_REGISTERS_BACKWARD();
4401 #undef ARGUMENT_REGISTER
4403 // mov SCRATCHREG, [ecx+Delegate._invocationList] ;;fetch invocation list
4404 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
4406 // mov SCRATCHREG, [SCRATCHREG+m_Array+edi*4] ;; index into invocation list
4407 X86EmitOp(0x8b, kEAX, SCRATCH_REGISTER_X86REG, PtrArray::GetDataOffset(), kEDI, sizeof(void*));
4409 // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
4410 X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
4412 // call [SCRATCHREG+Delegate.target] ;; call current subscriber
4413 X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
4414 INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that
4415 // we know that this is a call that can directly call
4421 if (hash & 2) // CorTypeInfo::IsFloat(pSig->GetReturnType())
4423 // if the return value is a float/double check if we just did the last call - if not,
4424 // emit the pop of the float stack
4426 // mov SCRATCHREG, [esi + this] ;; get delegate
4427 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, kESI, thisRegOffset);
4429 // cmp edi,[SCRATCHREG]._invocationCount
4430 X86EmitOffsetModRM(0x3b, kEDI, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfInvocationCount());
4432 CodeLabel *pNoFloatStackPopLabel = NewCodeLabel();
4434 // je NOFLOATSTACKPOP
4435 X86EmitCondJump(pNoFloatStackPopLabel, X86CondCode::kJZ);
4440 // NoFloatStackPopLabel:
4441 EmitLabel(pNoFloatStackPopLabel);
4444 #endif // _TARGET_AMD64_
4446 // The debugger may need to stop here, so grab the offset of this code.
4450 X86EmitNearJump(pLoopLabel);
4453 EmitLabel(pEndLoopLabel);
4455 // pop edi ;; Restore edi
4456 X86EmitPopReg(kEDI);
4458 EmitCheckGSCookie(kESI, MulticastFrame::GetOffsetOfGSCookie());
4461 EmitMethodStubEpilog(numStackBytes, MulticastFrame::GetOffsetOfTransitionBlock());
4464 VOID StubLinkerCPU::EmitSecureDelegateInvoke(UINT_PTR hash)
4466 STANDARD_VM_CONTRACT;
4468 int thisRegOffset = SecureDelegateFrame::GetOffsetOfTransitionBlock() +
4469 TransitionBlock::GetOffsetOfArgumentRegisters() + offsetof(ArgumentRegisters, THIS_REG);
4471 // push the methoddesc on the stack
4472 // mov eax, [ecx + offsetof(_invocationCount)]
4473 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationCount());
4475 // Push a SecureDelegateFrame on the stack.
4476 EmitMethodStubProlog(SecureDelegateFrame::GetMethodFrameVPtr(), SecureDelegateFrame::GetOffsetOfTransitionBlock());
4479 // Frame is ready to be inspected by debugger for patch location
4481 #else // _TARGET_AMD64_
4483 // Save register arguments in their home locations.
4484 // Non-FP registers are already saved by EmitMethodStubProlog.
4485 // (Assumes Sig.NextArg() does not enum RetBuffArg or "this".)
4488 __int32 argOfs = SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
4489 CorElementType argTypes[4];
4490 CorElementType argType;
4493 argOfs += sizeof(void*);
4494 argTypes[argNum] = ELEMENT_TYPE_I8;
4499 argType = ELEMENT_TYPE_END;
4501 switch ((hash >> (2 * argNum)) & 3)
4504 argType = ELEMENT_TYPE_END;
4507 argType = ELEMENT_TYPE_R4;
4509 // movss dword ptr [rsp + argOfs], xmm?
4510 X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs);
4513 argType = ELEMENT_TYPE_R8;
4515 // movsd qword ptr [rsp + argOfs], xmm?
4516 X64EmitMovSSToMem((X86Reg)argNum, kRSI, argOfs);
4519 argType = ELEMENT_TYPE_I;
4523 argOfs += sizeof(void*);
4524 argTypes[argNum] = argType;
4527 while (argNum < 4 && ELEMENT_TYPE_END != argType);
4529 _ASSERTE(4 == argNum || ELEMENT_TYPE_END == argTypes[argNum-1]);
4531 #endif // _TARGET_AMD64_
4533 // mov ecx, [esi + this] ;; get delegate
4534 X86EmitIndexRegLoad(THIS_kREG, kESI, thisRegOffset);
4536 #ifdef _TARGET_AMD64_
4538 INT32 numStackBytes = (INT32)((hash >> 8) * sizeof(void *));
4540 INT32 stackUsed, numStackArgs, ofs;
4542 // Push any stack args, plus an extra location
4543 // for rsp alignment if needed
4545 numStackArgs = numStackBytes / sizeof(void*);
4547 // 1 push above, so stack is currently misaligned
4548 const unsigned STACK_ALIGN_ADJUST = 0;
4552 // sub rsp, 28h ;; 4 reg arg home locs + rsp alignment
4553 stackUsed = 0x20 + STACK_ALIGN_ADJUST;
4554 X86EmitSubEsp(stackUsed);
4558 stackUsed = numStackArgs * sizeof(void*);
4560 // If the stack is misaligned, then an odd number of arguments
4561 // will naturally align the stack.
4562 if ( ((numStackArgs & 1) == 0)
4563 != (STACK_ALIGN_ADJUST == 0))
4565 X86EmitPushReg(kRAX);
4566 stackUsed += sizeof(void*);
4569 ofs = SecureDelegateFrame::GetOffsetOfTransitionBlock() +
4570 TransitionBlock::GetOffsetOfArgs() + sizeof(ArgumentRegisters) + numStackBytes;
4572 while (numStackArgs--)
4574 ofs -= sizeof(void*);
4576 // push [rsi + ofs] ;; Push stack args
4577 X86EmitIndexPush(kESI, ofs);
4580 // sub rsp, 20h ;; Create 4 reg arg home locations
4581 X86EmitSubEsp(0x20);
4589 argNum = 0, argOfs = SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
4590 argNum < 4 && argTypes[argNum] != ELEMENT_TYPE_END;
4591 argNum++, argOfs += sizeof(void*)
4594 switch (argTypes[argNum])
4596 case ELEMENT_TYPE_R4:
4597 // movss xmm?, dword ptr [rsi + argOfs]
4598 X64EmitMovSSFromMem((X86Reg)argNum, kRSI, argOfs);
4600 case ELEMENT_TYPE_R8:
4601 // movsd xmm?, qword ptr [rsi + argOfs]
4602 X64EmitMovSDFromMem((X86Reg)argNum, kRSI, argOfs);
4605 if (c_argRegs[argNum] != THIS_kREG)
4607 // mov r*, [rsi + dstOfs]
4608 X86EmitIndexRegLoad(c_argRegs[argNum], kESI,argOfs);
4614 // mov SCRATCHREG, [rcx+Delegate._invocationList] ;;fetch the inner delegate
4615 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
4617 // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
4618 X86EmitIndexRegLoad(c_argRegs[thisArgNum], SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
4620 // call [SCRATCHREG+Delegate.target] ;; call current subscriber
4621 X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
4623 // add rsp, stackUsed ;; Clean up stack
4624 X86EmitAddEsp(stackUsed);
4626 #else // _TARGET_AMD64_
4628 UINT16 numStackBytes = static_cast<UINT16>(hash & ~3);
4630 // ..repush & reenregister args..
4631 INT32 ofs = numStackBytes + SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs();
4632 while (ofs != SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs())
4634 ofs -= sizeof(void*);
4635 X86EmitIndexPush(kESI, ofs);
4638 #define ARGUMENT_REGISTER(regname) if (k##regname != THIS_kREG) { X86EmitIndexRegLoad(k##regname, kESI, \
4639 offsetof(ArgumentRegisters, regname) + SecureDelegateFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgumentRegisters()); }
4641 ENUM_ARGUMENT_REGISTERS_BACKWARD();
4643 #undef ARGUMENT_REGISTER
4645 // mov SCRATCHREG, [ecx+Delegate._invocationList] ;;fetch the inner delegate
4646 X86EmitIndexRegLoad(SCRATCH_REGISTER_X86REG, THIS_kREG, DelegateObject::GetOffsetOfInvocationList());
4648 // mov THISREG, [SCRATCHREG+Delegate.object] ;;replace "this" pointer
4649 X86EmitIndexRegLoad(THIS_kREG, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfTarget());
4651 // call [SCRATCHREG+Delegate.target] ;; call current subscriber
4652 X86EmitOffsetModRM(0xff, (X86Reg)2, SCRATCH_REGISTER_X86REG, DelegateObject::GetOffsetOfMethodPtr());
4653 INDEBUG(Emit8(0x90)); // Emit a nop after the call in debug so that
4654 // we know that this is a call that can directly call
4657 #endif // _TARGET_AMD64_
4659 // The debugger may need to stop here, so grab the offset of this code.
4662 EmitCheckGSCookie(kESI, SecureDelegateFrame::GetOffsetOfGSCookie());
4665 EmitMethodStubEpilog(numStackBytes, SecureDelegateFrame::GetOffsetOfTransitionBlock());
4667 #endif // !CROSSGEN_COMPILE && !FEATURE_STUBS_AS_IL
4669 #if !defined(CROSSGEN_COMPILE) && !defined(FEATURE_ARRAYSTUB_AS_IL)
4671 // Little helper to generate code to move nbytes bytes of non Ref memory
4673 void generate_noref_copy (unsigned nbytes, StubLinkerCPU* sl)
4679 INJECT_FAULT(COMPlusThrowOM(););
4683 // If the size is pointer-aligned, we'll use movsd
4684 if (IS_ALIGNED(nbytes, sizeof(void*)))
4686 // If there are less than 4 pointers to copy, "unroll" the "rep movsd"
4687 if (nbytes <= 3*sizeof(void*))
4692 sl->X86_64BitOperands();
4695 nbytes -= sizeof(void*);
4700 // mov ECX, size / 4
4701 sl->Emit8(0xb8+kECX);
4702 sl->Emit32(nbytes / sizeof(void*));
4706 sl->X86_64BitOperands();
4713 sl->Emit8(0xb8+kECX);
4722 X86Reg LoadArrayOpArg (
4725 X86Reg kRegIfFromMem,
4727 AMD64_ARG(StubLinkerCPU::X86OperandSize OperandSize = StubLinkerCPU::k64BitOp)
4730 STANDARD_VM_CONTRACT;
4732 if (!TransitionBlock::IsStackArgumentOffset(idxloc))
4733 return GetX86ArgumentRegisterFromOffset(idxloc - TransitionBlock::GetOffsetOfArgumentRegisters());
4735 psl->X86EmitEspOffset(0x8b, kRegIfFromMem, idxloc + ofsadjust AMD64_ARG(OperandSize));
4736 return kRegIfFromMem;
4739 VOID StubLinkerCPU::EmitArrayOpStubThrow(unsigned exConst, unsigned cbRetArg)
4741 STANDARD_VM_CONTRACT;
4743 //ArrayOpStub*Exception
4744 X86EmitPopReg(kESI);
4745 X86EmitPopReg(kEDI);
4747 //mov CORINFO_NullReferenceException_ASM, %ecx
4750 //InternalExceptionWorker
4752 X86EmitPopReg(kEDX);
4753 // add pArrayOpScript->m_cbretpop, %esp (was add %eax, %esp)
4757 X86EmitPushReg(kEDX);
4758 X86EmitNearJump(NewExternalCodeLabel((PVOID)JIT_InternalThrow));
4761 //===========================================================================
4762 // Emits code to do an array operation.
4764 #pragma warning(push)
4765 #pragma warning(disable:21000) // Suppress PREFast warning about overly large function
4767 VOID StubLinkerCPU::EmitArrayOpStub(const ArrayOpScript* pArrayOpScript)
4769 STANDARD_VM_CONTRACT;
4771 // This is the offset to the parameters/what's already pushed on the stack:
4773 const INT locsize = sizeof(void*);
4775 // ArrayOpScript's stack offsets are built using ArgIterator, which
4776 // assumes a TransitionBlock has been pushed, which is not the case
4777 // here. rsp + ofsadjust should point at the first argument. Any further
4778 // stack modifications below need to adjust ofsadjust appropriately.
4779 // baseofsadjust needs to be the stack adjustment at the entry point -
4780 // this is used further below to compute how much stack space was used.
4782 INT ofsadjust = locsize - (INT)sizeof(TransitionBlock);
4788 // managed array THIS_kREG (ecx) THIS_kREG (rcx)
4790 // index 1/value <stack> r8
4791 // index 2/value <stack> r9
4792 // expected element type for LOADADDR eax rax rdx
4793 // Working registers:
4794 // total (accumulates unscaled offset) edi r10
4795 // factor (accumulates the slice factor) esi r11
4796 X86Reg kArrayRefReg = THIS_kREG;
4797 #ifdef _TARGET_AMD64_
4798 const X86Reg kArrayMTReg = kR10;
4799 const X86Reg kTotalReg = kR10;
4800 const X86Reg kFactorReg = kR11;
4802 const X86Reg kArrayMTReg = kESI;
4803 const X86Reg kTotalReg = kEDI;
4804 const X86Reg kFactorReg = kESI;
4807 #ifdef _TARGET_AMD64_
4808 // Simplifying assumption for fNeedPrologue.
4809 _ASSERTE(!pArrayOpScript->m_gcDesc || (pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER));
4810 // Simplifying assumption for saving rsi and rdi.
4811 _ASSERTE(!(pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER) || ArgIterator::IsArgPassedByRef(pArrayOpScript->m_elemsize));
4813 // Cases where we need to make calls
4814 BOOL fNeedScratchArea = ( (pArrayOpScript->m_flags & (ArrayOpScript::NEEDSTYPECHECK | ArrayOpScript::NEEDSWRITEBARRIER))
4815 && ( pArrayOpScript->m_op == ArrayOpScript::STORE
4816 || ( pArrayOpScript->m_op == ArrayOpScript::LOAD
4817 && (pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER))));
4819 // Cases where we need to copy large values
4820 BOOL fNeedRSIRDI = ( ArgIterator::IsArgPassedByRef(pArrayOpScript->m_elemsize)
4821 && ArrayOpScript::LOADADDR != pArrayOpScript->m_op);
4823 BOOL fNeedPrologue = ( fNeedScratchArea
4829 CodeLabel *Epilog = NewCodeLabel();
4830 CodeLabel *Inner_nullexception = NewCodeLabel();
4831 CodeLabel *Inner_rangeexception = NewCodeLabel();
4832 CodeLabel *Inner_typeMismatchexception = NULL;
4835 // Set up the stack frame.
4849 // value, if rank > 2
4851 // + 0x48 more indices
4856 // + 0x20 return address
4857 // + 0x18 scratch area (callee's r9)
4858 // + 0x10 scratch area (callee's r8)
4859 // + 8 scratch area (callee's rdx)
4860 // rsp -> scratch area (callee's rcx)
4862 // If the element type is a value class w/ object references, then rsi
4863 // and rdi will also be saved above the scratch area:
4868 // + 0x18 scratch area (callee's r9)
4869 // + 0x10 scratch area (callee's r8)
4870 // + 8 scratch area (callee's rdx)
4871 // rsp -> scratch area (callee's rcx)
4873 // And if no call or movsb is necessary, then the scratch area sits
4874 // directly under the MethodDesc*.
4876 BOOL fSavedESI = FALSE;
4877 BOOL fSavedEDI = FALSE;
4879 #ifdef _TARGET_AMD64_
4882 // Save argument registers if we'll be making a call before using
4883 // them. Note that in this case the element value will always be an
4884 // object type, and never be in an xmm register.
4886 if ( (pArrayOpScript->m_flags & ArrayOpScript::NEEDSTYPECHECK)
4887 && ArrayOpScript::STORE == pArrayOpScript->m_op)
4889 // mov [rsp+0x08], rcx
4890 X86EmitEspOffset(0x89, kRCX, 0x08);
4891 X86EmitEspOffset(0x89, kRDX, 0x10);
4892 X86EmitEspOffset(0x89, kR8, 0x18);
4894 if (pArrayOpScript->m_rank >= 2)
4895 X86EmitEspOffset(0x89, kR9, 0x20);
4900 X86EmitPushReg(kRSI);
4901 X86EmitPushReg(kRDI);
4903 fSavedESI = fSavedEDI = TRUE;
4908 if (fNeedScratchArea)
4910 // Callee scratch area (0x8 for aligned esp)
4911 X86EmitSubEsp(sizeof(ArgumentRegisters) + 0x8);
4912 ofsadjust += sizeof(ArgumentRegisters) + 0x8;
4916 // Preserve the callee-saved registers
4917 // NOTE: if you change the sequence of these pushes, you must also update:
4918 // ArrayOpStubNullException
4919 // ArrayOpStubRangeException
4920 // ArrayOpStubTypeMismatchException
4921 _ASSERTE( kTotalReg == kEDI);
4922 X86EmitPushReg(kTotalReg);
4923 _ASSERTE( kFactorReg == kESI);
4924 X86EmitPushReg(kFactorReg);
4926 fSavedESI = fSavedEDI = TRUE;
4928 ofsadjust += 2*sizeof(void*);
4932 X86EmitR2ROp(0x85, kArrayRefReg, kArrayRefReg); // TEST ECX, ECX
4933 X86EmitCondJump(Inner_nullexception, X86CondCode::kJZ); // jz Inner_nullexception
4935 // Do Type Check if needed
4936 if (pArrayOpScript->m_flags & ArrayOpScript::NEEDSTYPECHECK)
4938 if (pArrayOpScript->m_op == ArrayOpScript::STORE)
4940 // Get the value to be stored.
4941 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kEAX, ofsadjust);
4943 X86EmitR2ROp(0x85, kValueReg, kValueReg); // TEST kValueReg, kValueReg
4944 CodeLabel *CheckPassed = NewCodeLabel();
4945 X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // storing NULL is OK
4947 // mov EAX, element type ; possibly trashes kValueReg
4948 X86EmitOp(0x8b, kArrayMTReg, kArrayRefReg, 0 AMD64_ARG(k64BitOp)); // mov ESI/R10, [kArrayRefReg]
4950 X86EmitOp(0x8b, kEAX, kValueReg, 0 AMD64_ARG(k64BitOp)); // mov EAX, [kValueReg] ; possibly trashes kValueReg
4951 // cmp EAX, [ESI/R10+m_ElementType]
4953 X86EmitOp(0x3b, kEAX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp));
4954 X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // Exact match is OK
4956 X86EmitRegLoad(kEAX, (UINT_PTR)g_pObjectClass); // mov EAX, g_pObjectMethodTable
4957 // cmp EAX, [ESI/R10+m_ElementType]
4959 X86EmitOp(0x3b, kEAX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp));
4960 X86EmitCondJump(CheckPassed, X86CondCode::kJZ); // Assigning to array of object is OK
4962 // Try to call the fast helper first ( ObjIsInstanceOfNoGC ).
4963 // If that fails we will fall back to calling the slow helper ( ArrayStoreCheck ) that erects a frame.
4964 // See also JitInterfaceX86::JIT_Stelem_Ref
4966 #ifdef _TARGET_AMD64_
4967 // RCX contains pointer to object to check (Object*)
4968 // RDX contains array type handle
4970 // mov RCX, [rsp+offsetToObject] ; RCX = Object*
4971 X86EmitEspOffset(0x8b, kRCX, ofsadjust + pArrayOpScript->m_fValLoc);
4973 // get Array TypeHandle
4974 // mov RDX, [RSP+offsetOfTypeHandle]
4976 X86EmitEspOffset(0x8b, kRDX, ofsadjust
4977 + TransitionBlock::GetOffsetOfArgumentRegisters()
4978 + FIELD_OFFSET(ArgumentRegisters, THIS_REG));
4980 // mov RDX, [kArrayMTReg+offsetof(MethodTable, m_ElementType)]
4981 X86EmitIndexRegLoad(kRDX, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle());
4984 X86EmitPushReg(kEDX); // Save EDX
4985 X86EmitPushReg(kECX); // Pass array object
4987 X86EmitIndexPush(kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle()); // push [kArrayMTReg + m_ElementType] ; Array element type handle
4989 // get address of value to store
4990 _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register
4991 X86EmitSPIndexPush(pArrayOpScript->m_fValLoc + ofsadjust + 3*sizeof(void*)); // push [ESP+offset] ; the object pointer
4996 // emit a call to the fast helper
4997 // One side effect of this is that we are going to generate a "jnz Epilog" and we DON'T need it
4998 // in the fast path, however there are no side effects in emitting
4999 // it in the fast path anyway. the reason for that is that it makes
5000 // the cleanup code much easier ( we have only 1 place to cleanup the stack and
5001 // restore it to the original state )
5002 X86EmitCall(NewExternalCodeLabel((LPVOID)ObjIsInstanceOfNoGC), 0);
5003 X86EmitCmpRegImm32( kEAX, TypeHandle::CanCast); // CMP EAX, CanCast ; if ObjIsInstanceOfNoGC returns CanCast, we will go the fast path
5004 CodeLabel * Cleanup = NewCodeLabel();
5005 X86EmitCondJump(Cleanup, X86CondCode::kJZ);
5007 #ifdef _TARGET_AMD64_
5008 // get address of value to store
5009 // lea rcx, [rsp+offs]
5010 X86EmitEspOffset(0x8d, kRCX, ofsadjust + pArrayOpScript->m_fValLoc);
5012 // get address of 'this'/rcx
5013 // lea rdx, [rsp+offs]
5014 X86EmitEspOffset(0x8d, kRDX, ofsadjust
5015 + TransitionBlock::GetOffsetOfArgumentRegisters()
5016 + FIELD_OFFSET(ArgumentRegisters, THIS_REG));
5019 // The stack is already setup correctly for the slow helper.
5020 _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register
5021 X86EmitEspOffset(0x8d, kECX, pArrayOpScript->m_fValLoc + ofsadjust + 2*sizeof(void*)); // lea ECX, [ESP+offset]
5023 // get address of 'this'
5024 X86EmitEspOffset(0x8d, kEDX, 0); // lea EDX, [ESP] ; (address of ECX)
5028 AMD64_ONLY(_ASSERTE(fNeedScratchArea));
5029 X86EmitCall(NewExternalCodeLabel((LPVOID)ArrayStoreCheck), 0);
5032 #ifdef _TARGET_AMD64_
5033 X86EmitEspOffset(0x8b, kRCX, 0x00 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
5034 X86EmitEspOffset(0x8b, kRDX, 0x08 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
5035 X86EmitEspOffset(0x8b, kR8, 0x10 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
5037 if (pArrayOpScript->m_rank >= 2)
5038 X86EmitEspOffset(0x8b, kR9, 0x18 + ofsadjust + TransitionBlock::GetOffsetOfArgumentRegisters());
5040 X86EmitPopReg(kECX); // restore regs
5041 X86EmitPopReg(kEDX);
5044 X86EmitR2ROp(0x3B, kEAX, kEAX); // CMP EAX, EAX
5045 X86EmitCondJump(Epilog, X86CondCode::kJNZ); // This branch never taken, but epilog walker uses it
5048 EmitLabel(CheckPassed);
5052 _ASSERTE(pArrayOpScript->m_op == ArrayOpScript::LOADADDR);
5054 // Load up the hidden type parameter into 'typeReg'
5055 X86Reg typeReg = LoadArrayOpArg(pArrayOpScript->m_typeParamOffs, this, kEAX, ofsadjust);
5057 // 'typeReg' holds the typeHandle for the ARRAY. This must be a ArrayTypeDesc*, so
5058 // mask off the low two bits to get the TypeDesc*
5059 X86EmitR2ROp(0x83, (X86Reg)4, typeReg); // AND typeReg, 0xFFFFFFFC
5062 // If 'typeReg' is NULL then we're executing the readonly ::Address and no type check is
5064 CodeLabel *Inner_passedTypeCheck = NewCodeLabel();
5066 X86EmitCondJump(Inner_passedTypeCheck, X86CondCode::kJZ);
5068 // Get the parameter of the parameterize type
5069 // mov typeReg, [typeReg.m_Arg]
5070 X86EmitOp(0x8b, typeReg, typeReg, offsetof(ParamTypeDesc, m_Arg) AMD64_ARG(k64BitOp));
5072 // Compare this against the element type of the array.
5073 // mov ESI/R10, [kArrayRefReg]
5074 X86EmitOp(0x8b, kArrayMTReg, kArrayRefReg, 0 AMD64_ARG(k64BitOp));
5075 // cmp typeReg, [ESI/R10+m_ElementType];
5076 X86EmitOp(0x3b, typeReg, kArrayMTReg, MethodTable::GetOffsetOfArrayElementTypeHandle() AMD64_ARG(k64BitOp));
5078 // Throw error if not equal
5079 Inner_typeMismatchexception = NewCodeLabel();
5080 X86EmitCondJump(Inner_typeMismatchexception, X86CondCode::kJNZ);
5081 EmitLabel(Inner_passedTypeCheck);
5085 CodeLabel* DoneCheckLabel = 0;
5086 if (pArrayOpScript->m_rank == 1 && pArrayOpScript->m_fHasLowerBounds)
5088 DoneCheckLabel = NewCodeLabel();
5089 CodeLabel* NotSZArrayLabel = NewCodeLabel();
5091 // for rank1 arrays, we might actually have two different layouts depending on
5092 // if we are ELEMENT_TYPE_ARRAY or ELEMENT_TYPE_SZARRAY.
5094 // mov EAX, [ARRAY] // EAX holds the method table
5095 X86_64BitOperands();
5096 X86EmitOp(0x8b, kEAX, kArrayRefReg);
5098 // test [EAX + m_dwFlags], enum_flag_Category_IfArrayThenSzArray
5099 X86_64BitOperands();
5100 X86EmitOffsetModRM(0xf7, (X86Reg)0, kEAX, MethodTable::GetOffsetOfFlags());
5101 Emit32(MethodTable::GetIfArrayThenSzArrayFlag());
5103 // jz NotSZArrayLabel
5104 X86EmitCondJump(NotSZArrayLabel, X86CondCode::kJZ);
5106 //Load the passed-in index into the scratch register.
5107 const ArrayOpIndexSpec *pai = pArrayOpScript->GetArrayOpIndexSpecs();
5108 X86Reg idxReg = LoadArrayOpArg(pai->m_idxloc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
5110 // cmp idxReg, [kArrayRefReg + LENGTH]
5111 X86EmitOp(0x3b, idxReg, kArrayRefReg, ArrayBase::GetOffsetOfNumComponents());
5113 // jae Inner_rangeexception
5114 X86EmitCondJump(Inner_rangeexception, X86CondCode::kJAE);
5116 // <TODO> if we cared efficiency of this, this move can be optimized</TODO>
5117 X86EmitR2ROp(0x8b, kTotalReg, idxReg AMD64_ARG(k32BitOp));
5119 // sub ARRAY. 8 // 8 is accounts for the Lower bound and Dim count in the ARRAY
5120 X86EmitSubReg(kArrayRefReg, 8); // adjust this pointer so that indexing works out for SZARRAY
5122 X86EmitNearJump(DoneCheckLabel);
5123 EmitLabel(NotSZArrayLabel);
5126 // For each index, range-check and mix into accumulated total.
5127 UINT idx = pArrayOpScript->m_rank;
5128 BOOL firstTime = TRUE;
5131 const ArrayOpIndexSpec *pai = pArrayOpScript->GetArrayOpIndexSpecs() + idx;
5133 //Load the passed-in index into the scratch register.
5134 X86Reg srcreg = LoadArrayOpArg(pai->m_idxloc, this, SCRATCH_REGISTER_X86REG, ofsadjust AMD64_ARG(k32BitOp));
5135 if (SCRATCH_REGISTER_X86REG != srcreg)
5136 X86EmitR2ROp(0x8b, SCRATCH_REGISTER_X86REG, srcreg AMD64_ARG(k32BitOp));
5138 // sub SCRATCH, dword ptr [kArrayRefReg + LOWERBOUND]
5139 if (pArrayOpScript->m_fHasLowerBounds)
5141 X86EmitOp(0x2b, SCRATCH_REGISTER_X86REG, kArrayRefReg, pai->m_lboundofs);
5144 // cmp SCRATCH, dword ptr [kArrayRefReg + LENGTH]
5145 X86EmitOp(0x3b, SCRATCH_REGISTER_X86REG, kArrayRefReg, pai->m_lengthofs);
5147 // jae Inner_rangeexception
5148 X86EmitCondJump(Inner_rangeexception, X86CondCode::kJAE);
5151 // SCRATCH == idx - LOWERBOUND
5153 // imul SCRATCH, FACTOR
5156 //Can skip the first time since FACTOR==1
5157 X86EmitR2ROp(0xaf0f, SCRATCH_REGISTER_X86REG, kFactorReg AMD64_ARG(k32BitOp));
5163 // First time, we must zero-init TOTAL. Since
5164 // zero-initing and then adding is just equivalent to a
5165 // "mov", emit a "mov"
5166 // mov TOTAL, SCRATCH
5167 X86EmitR2ROp(0x8b, kTotalReg, SCRATCH_REGISTER_X86REG AMD64_ARG(k32BitOp));
5171 // add TOTAL, SCRATCH
5172 X86EmitR2ROp(0x03, kTotalReg, SCRATCH_REGISTER_X86REG AMD64_ARG(k32BitOp));
5175 // FACTOR *= [kArrayRefReg + LENGTH]
5178 // No need to update FACTOR on the last iteration
5179 // since we won't use it again
5183 // must init FACTOR to 1 first: hence,
5184 // the "imul" becomes a "mov"
5185 // mov FACTOR, [kArrayRefReg + LENGTH]
5186 X86EmitOp(0x8b, kFactorReg, kArrayRefReg, pai->m_lengthofs);
5190 // imul FACTOR, [kArrayRefReg + LENGTH]
5191 X86EmitOp(0xaf0f, kFactorReg, kArrayRefReg, pai->m_lengthofs);
5198 if (DoneCheckLabel != 0)
5199 EmitLabel(DoneCheckLabel);
5201 // Pass these values to X86EmitArrayOp() to generate the element address.
5202 X86Reg elemBaseReg = kArrayRefReg;
5203 X86Reg elemScaledReg = kTotalReg;
5204 UINT32 elemSize = pArrayOpScript->m_elemsize;
5205 UINT32 elemOfs = pArrayOpScript->m_ofsoffirst;
5207 if (!(elemSize == 1 || elemSize == 2 || elemSize == 4 || elemSize == 8))
5211 // No way to express this as a SIB byte. Fold the scale
5216 X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp));
5222 X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp));
5228 X86EmitR2ROp(0xc1, (X86Reg)4, kTotalReg AMD64_ARG(k32BitOp));
5233 // imul TOTAL, elemScale
5234 X86EmitR2ROp(0x69, kTotalReg, kTotalReg AMD64_ARG(k32BitOp));
5241 _ASSERTE(FitsInU1(elemSize));
5242 BYTE elemScale = static_cast<BYTE>(elemSize);
5244 // Now, do the operation:
5246 switch (pArrayOpScript->m_op)
5248 case ArrayOpScript::LOADADDR:
5249 // lea eax, ELEMADDR
5250 X86EmitOp(0x8d, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
5253 case ArrayOpScript::LOAD:
5254 if (pArrayOpScript->m_flags & ArrayOpScript::HASRETVALBUFFER)
5256 // Ensure that these registers have been saved!
5257 _ASSERTE(fSavedESI && fSavedEDI);
5260 X86EmitOp(0x8d, kESI, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
5262 _ASSERTE(!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fRetBufLoc));
5263 // mov edi, retbufptr
5264 X86EmitR2ROp(0x8b, kEDI, GetX86ArgumentRegisterFromOffset(pArrayOpScript->m_fRetBufLoc - TransitionBlock::GetOffsetOfArgumentRegisters()));
5268 size_t size = pArrayOpScript->m_elemsize;
5270 if(pArrayOpScript->m_gcDesc)
5272 CGCDescSeries* cur = pArrayOpScript->m_gcDesc->GetHighestSeries();
5273 if ((cur->startoffset-elemOfs) > 0)
5274 generate_noref_copy ((unsigned) (cur->startoffset - elemOfs), this);
5275 total += cur->startoffset - elemOfs;
5277 SSIZE_T cnt = (SSIZE_T) pArrayOpScript->m_gcDesc->GetNumSeries();
5278 // special array encoding
5281 for (SSIZE_T __i = 0; __i > cnt; __i--)
5283 HALF_SIZE_T skip = cur->val_serie[__i].skip;
5284 HALF_SIZE_T nptrs = cur->val_serie[__i].nptrs;
5285 total += nptrs*sizeof (DWORD*);
5288 AMD64_ONLY(_ASSERTE(fNeedScratchArea));
5290 X86EmitCall(NewExternalCodeLabel((LPVOID) JIT_ByRefWriteBarrier), 0);
5294 //check if we are at the end of the series
5295 if (__i == (cnt + 1))
5296 skip = skip - (HALF_SIZE_T)(cur->startoffset - elemOfs);
5298 generate_noref_copy (skip, this);
5303 _ASSERTE (size == total);
5307 // no ref anywhere, just copy the bytes.
5309 generate_noref_copy ((unsigned)size, this);
5315 switch (pArrayOpScript->m_elemsize)
5318 // mov[zs]x eax, byte ptr ELEMADDR
5319 X86EmitOp(pArrayOpScript->m_signed ? 0xbe0f : 0xb60f, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5323 // mov[zs]x eax, word ptr ELEMADDR
5324 X86EmitOp(pArrayOpScript->m_signed ? 0xbf0f : 0xb70f, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5328 if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
5330 #ifdef _TARGET_AMD64_
5331 // movss xmm0, dword ptr ELEMADDR
5333 X86EmitOp(0x100f, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5334 #else // !_TARGET_AMD64_
5335 // fld dword ptr ELEMADDR
5336 X86EmitOp(0xd9, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5337 #endif // !_TARGET_AMD64_
5341 // mov eax, ELEMADDR
5342 X86EmitOp(0x8b, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5347 if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
5349 #ifdef _TARGET_AMD64_
5350 // movsd xmm0, qword ptr ELEMADDR
5352 X86EmitOp(0x100f, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5353 #else // !_TARGET_AMD64_
5354 // fld qword ptr ELEMADDR
5355 X86EmitOp(0xdd, (X86Reg)0, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5356 #endif // !_TARGET_AMD64_
5360 // mov eax, ELEMADDR
5361 X86EmitOp(0x8b, kEAX, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
5363 // mov edx, ELEMADDR + 4
5364 X86EmitOp(0x8b, kEDX, elemBaseReg, elemOfs + 4, elemScaledReg, elemScale);
5376 case ArrayOpScript::STORE:
5378 switch (pArrayOpScript->m_elemsize)
5381 // mov SCRATCH, [esp + valoffset]
5382 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
5383 // mov byte ptr ELEMADDR, SCRATCH.b
5384 X86EmitOp(0x88, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5387 // mov SCRATCH, [esp + valoffset]
5388 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
5389 // mov word ptr ELEMADDR, SCRATCH.w
5391 X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5394 #ifndef _TARGET_AMD64_
5395 if (pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER)
5397 // mov SCRATCH, [esp + valoffset]
5398 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
5400 _ASSERTE(SCRATCH_REGISTER_X86REG == kEAX); // value to store is already in EAX where we want it.
5401 // lea edx, ELEMADDR
5402 X86EmitOp(0x8d, kEDX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5404 // call JIT_Writeable_Thunks_Buf.WriteBarrierReg[0] (== EAX)
5405 X86EmitCall(NewExternalCodeLabel((LPVOID) &JIT_WriteBarrierEAX), 0);
5408 #else // _TARGET_AMD64_
5409 if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
5411 if (!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc))
5413 kValueReg = (X86Reg)TransitionBlock::GetArgumentIndexFromOffset(pArrayOpScript->m_fValLoc);
5417 kValueReg = (X86Reg)0; // xmm0
5419 // movss xmm0, dword ptr [rsp+??]
5421 X86EmitOp(0x100f, kValueReg, (X86Reg)4 /*rsp*/, ofsadjust + pArrayOpScript->m_fValLoc);
5424 // movss dword ptr ELEMADDR, xmm?
5426 X86EmitOp(0x110f, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5429 #endif // _TARGET_AMD64_
5431 // mov SCRATCH, [esp + valoffset]
5432 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust AMD64_ARG(k32BitOp));
5434 // mov ELEMADDR, SCRATCH
5435 X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5441 if (!(pArrayOpScript->m_flags & ArrayOpScript::NEEDSWRITEBARRIER))
5443 #ifdef _TARGET_AMD64_
5444 if (pArrayOpScript->m_flags & ArrayOpScript::ISFPUTYPE)
5446 if (!TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc))
5448 kValueReg = (X86Reg)TransitionBlock::GetArgumentIndexFromOffset(pArrayOpScript->m_fValLoc);
5452 kValueReg = (X86Reg)0; // xmm0
5454 // movsd xmm0, qword ptr [rsp+??]
5456 X86EmitOp(0x100f, kValueReg, (X86Reg)4 /*rsp*/, ofsadjust + pArrayOpScript->m_fValLoc);
5459 // movsd qword ptr ELEMADDR, xmm?
5461 X86EmitOp(0x110f, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5465 // mov SCRATCH, [esp + valoffset]
5466 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, SCRATCH_REGISTER_X86REG, ofsadjust);
5468 // mov ELEMADDR, SCRATCH
5469 X86EmitOp(0x89, kValueReg, elemBaseReg, elemOfs, elemScaledReg, elemScale, k64BitOp);
5471 #else // !_TARGET_AMD64_
5472 _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register: so too lazy to implement that case
5473 // mov SCRATCH, [esp + valoffset]
5474 X86EmitEspOffset(0x8b, SCRATCH_REGISTER_X86REG, pArrayOpScript->m_fValLoc + ofsadjust);
5475 // mov ELEMADDR, SCRATCH
5476 X86EmitOp(0x89, SCRATCH_REGISTER_X86REG, elemBaseReg, elemOfs, elemScaledReg, elemScale);
5478 _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc)); // on x86, value will never get a register: so too lazy to implement that case
5479 // mov SCRATCH, [esp + valoffset + 4]
5480 X86EmitEspOffset(0x8b, SCRATCH_REGISTER_X86REG, pArrayOpScript->m_fValLoc + ofsadjust + 4);
5481 // mov ELEMADDR+4, SCRATCH
5482 X86EmitOp(0x89, SCRATCH_REGISTER_X86REG, elemBaseReg, elemOfs+4, elemScaledReg, elemScale);
5483 #endif // !_TARGET_AMD64_
5486 #ifdef _TARGET_AMD64_
5489 _ASSERTE(SCRATCH_REGISTER_X86REG == kEAX); // value to store is already in EAX where we want it.
5490 // lea rcx, ELEMADDR
5491 X86EmitOp(0x8d, kRCX, elemBaseReg, elemOfs, elemScaledReg, elemScale, k64BitOp);
5493 // mov rdx, [rsp + valoffset]
5494 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kRDX, ofsadjust);
5495 _ASSERT(kRCX != kValueReg);
5496 if (kRDX != kValueReg)
5497 X86EmitR2ROp(0x8b, kRDX, kValueReg);
5499 _ASSERTE(fNeedScratchArea);
5500 X86EmitCall(NewExternalCodeLabel((PVOID)JIT_WriteBarrier), 0);
5503 #endif // _TARGET_AMD64_
5504 // FALL THROUGH (on x86)
5506 // Ensure that these registers have been saved!
5507 _ASSERTE(fSavedESI && fSavedEDI);
5509 #ifdef _TARGET_AMD64_
5510 // mov rsi, [rsp + valoffset]
5511 kValueReg = LoadArrayOpArg(pArrayOpScript->m_fValLoc, this, kRSI, ofsadjust);
5512 if (kRSI != kValueReg)
5513 X86EmitR2ROp(0x8b, kRSI, kValueReg);
5514 #else // !_TARGET_AMD64_
5515 _ASSERTE(TransitionBlock::IsStackArgumentOffset(pArrayOpScript->m_fValLoc));
5516 // lea esi, [esp + valoffset]
5517 X86EmitEspOffset(0x8d, kESI, pArrayOpScript->m_fValLoc + ofsadjust);
5518 #endif // !_TARGET_AMD64_
5520 // lea edi, ELEMADDR
5521 X86EmitOp(0x8d, kEDI, elemBaseReg, elemOfs, elemScaledReg, elemScale AMD64_ARG(k64BitOp));
5522 goto COPY_VALUE_CLASS;
5532 #ifdef _TARGET_AMD64_
5535 if (fNeedScratchArea)
5537 // Throw away scratch area
5538 X86EmitAddEsp(sizeof(ArgumentRegisters) + 0x8);
5542 X86EmitPopReg(kRDI);
5545 X86EmitPopReg(kRSI);
5549 #else // !_TARGET_AMD64_
5550 // Restore the callee-saved registers
5551 X86EmitPopReg(kFactorReg);
5552 X86EmitPopReg(kTotalReg);
5554 #ifndef UNIX_X86_ABI
5556 X86EmitReturn(pArrayOpScript->m_cbretpop);
5560 #endif // !_TARGET_AMD64_
5562 // Exception points must clean up the stack for all those extra args.
5563 // kFactorReg and kTotalReg will be popped by the jump targets.
5565 void *pvExceptionThrowFn;
5567 #if defined(_TARGET_AMD64_)
5568 #define ARRAYOP_EXCEPTION_HELPERS(base) { (PVOID)base, (PVOID)base##_RSIRDI, (PVOID)base##_ScratchArea, (PVOID)base##_RSIRDI_ScratchArea }
5569 static void *rgNullExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubNullException);
5570 static void *rgRangeExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubRangeException);
5571 static void *rgTypeMismatchExceptionHelpers[] = ARRAYOP_EXCEPTION_HELPERS(ArrayOpStubTypeMismatchException);
5572 #undef ARRAYOP_EXCEPTION_HELPERS
5574 UINT iExceptionHelper = (fNeedRSIRDI ? 1 : 0) + (fNeedScratchArea ? 2 : 0);
5575 #endif // defined(_TARGET_AMD64_)
5577 EmitLabel(Inner_nullexception);
5579 #ifndef _TARGET_AMD64_
5580 pvExceptionThrowFn = (LPVOID)ArrayOpStubNullException;
5582 Emit8(0xb8); // mov EAX, <stack cleanup>
5583 Emit32(pArrayOpScript->m_cbretpop);
5584 #else //_TARGET_AMD64_
5585 pvExceptionThrowFn = rgNullExceptionHelpers[iExceptionHelper];
5586 #endif //!_TARGET_AMD64_
5587 X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn));
5589 EmitLabel(Inner_rangeexception);
5590 #ifndef _TARGET_AMD64_
5591 pvExceptionThrowFn = (LPVOID)ArrayOpStubRangeException;
5592 Emit8(0xb8); // mov EAX, <stack cleanup>
5593 Emit32(pArrayOpScript->m_cbretpop);
5594 #else //_TARGET_AMD64_
5595 pvExceptionThrowFn = rgRangeExceptionHelpers[iExceptionHelper];
5596 #endif //!_TARGET_AMD64_
5597 X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn));
5599 if (Inner_typeMismatchexception != NULL)
5601 EmitLabel(Inner_typeMismatchexception);
5602 #ifndef _TARGET_AMD64_
5603 pvExceptionThrowFn = (LPVOID)ArrayOpStubTypeMismatchException;
5604 Emit8(0xb8); // mov EAX, <stack cleanup>
5605 Emit32(pArrayOpScript->m_cbretpop);
5606 #else //_TARGET_AMD64_
5607 pvExceptionThrowFn = rgTypeMismatchExceptionHelpers[iExceptionHelper];
5608 #endif //!_TARGET_AMD64_
5609 X86EmitNearJump(NewExternalCodeLabel(pvExceptionThrowFn));
5613 #pragma warning(pop)
5616 #endif // !CROSSGEN_COMPILE && !FEATURE_ARRAYSTUB_AS_IL
5618 #if !defined(CROSSGEN_COMPILE) && !defined(FEATURE_STUBS_AS_IL)
5619 //===========================================================================
5620 // Emits code to break into debugger
5621 VOID StubLinkerCPU::EmitDebugBreak()
5623 STANDARD_VM_CONTRACT;
5629 #if defined(FEATURE_COMINTEROP) && defined(_TARGET_X86_)
5632 #pragma warning(push)
5633 #pragma warning (disable : 4740) // There is inline asm code in this function, which disables
5634 // global optimizations.
5635 #pragma warning (disable : 4731)
5637 Thread* __stdcall CreateThreadBlockReturnHr(ComMethodFrame *pFrame)
5640 WRAPPER_NO_CONTRACT;
5642 Thread *pThread = NULL;
5646 // This means that a thread is FIRST coming in from outside the EE.
5647 BEGIN_ENTRYPOINT_THROWS;
5648 pThread = SetupThreadNoThrow(&hr);
5649 END_ENTRYPOINT_THROWS;
5651 if (pThread == NULL) {
5652 // Unwind stack, and return hr
5653 // NOTE: assumes __stdcall
5654 // Note that this code does not handle the rare COM signatures that do not return HRESULT
5655 // compute the callee pop stack bytes
5656 UINT numArgStackBytes = pFrame->GetNumCallerStackBytes();
5657 unsigned frameSize = sizeof(Frame) + sizeof(LPVOID);
5658 LPBYTE iEsp = ((LPBYTE)pFrame) + ComMethodFrame::GetOffsetOfCalleeSavedRegisters();
5662 mov edx, numArgStackBytes
5663 //*****************************************
5664 // reset the stack pointer
5665 // none of the locals above can be used in the asm below
5666 // if we wack the stack pointer
5668 // pop callee saved registers
5673 pop ecx ; //return address
5674 // pop the callee cleanup stack args
5675 add esp, edx ;// callee cleanup of args
5676 jmp ecx; // jump to the address to continue execution
5678 // We will never get here. This "ret" is just so that code-disassembling
5679 // profilers know to stop disassembling any further
5686 #if defined(_MSC_VER)
5687 #pragma warning(pop)
5690 #endif // FEATURE_COMINTEROP && _TARGET_X86_
5692 #endif // !CROSSGEN_COMPILE && !FEATURE_STUBS_AS_IL
5694 #endif // !DACCESS_COMPILE
5697 #ifdef _TARGET_AMD64_
5700 // TailCallFrame Object Scanning
5702 // This handles scanning/promotion of GC objects that were
5703 // protected by the TailCallHelper routine. Note that the objects
5704 // being protected is somewhat dynamic and is dependent upon the
5708 void TailCallFrame::GcScanRoots(promote_func *fn, ScanContext* sc)
5710 WRAPPER_NO_CONTRACT;
5712 if (m_pGCLayout != NULL)
5714 struct FrameOffsetDecoder {
5720 PTR_SBYTE pbOffsets;
5722 DWORD ReadNumber() {
5725 while ((i = *pbOffsets++) >= 0)
5727 offset = (offset << 7) | i;
5729 offset = (offset << 7) | (i & 0x7F);
5734 FrameOffsetDecoder(PTR_GSCookie _base, TADDR offsets)
5735 : prevOffset(dac_cast<TADDR>(_base)), rangeEnd(~0LL), atEnd(FALSE), pbOffsets(dac_cast<PTR_SBYTE>(offsets)) { maybeInterior = FALSE;}
5738 LIMITED_METHOD_CONTRACT;
5740 if (rangeEnd < prevOffset)
5742 prevOffset -= sizeof(void*);
5745 if (atEnd) return false;
5746 DWORD offset = ReadNumber();
5747 atEnd = (offset & 1);
5748 BOOL range = (offset & 2);
5749 maybeInterior = (offset & 0x80000000);
5751 offset &= 0x7FFFFFFC;
5756 offset += sizeof(void*);
5757 _ASSERTE(prevOffset > offset);
5758 prevOffset -= offset;
5763 _ASSERTE(!maybeInterior);
5764 DWORD offsetEnd = ReadNumber();
5765 atEnd = (offsetEnd & 1);
5766 offsetEnd = (offsetEnd & ~1) << 1;
5767 // range encoding starts with a range of 3 (2 is better to encode as
5768 // 2 offsets), so 0 == 2 (the last offset in the range)
5769 offsetEnd += sizeof(void*) * 2;
5770 rangeEnd = prevOffset - offsetEnd;
5776 BOOL MaybeInterior() const { return maybeInterior; }
5778 PTR_PTR_Object Current() const { return PTR_PTR_Object(prevOffset); }
5780 } decoder(GetGSCookiePtr(), m_pGCLayout);
5782 while (decoder.MoveNext())
5784 PTR_PTR_Object ppRef = decoder.Current();
5786 LOG((LF_GC, INFO3, "Tail Call Frame Promoting" FMT_ADDR "to",
5787 DBG_ADDR(OBJECTREF_TO_UNCHECKED_OBJECTREF(*ppRef)) ));
5788 if (decoder.MaybeInterior())
5789 PromoteCarefully(fn, ppRef, sc, GC_CALL_INTERIOR|CHECK_APP_DOMAIN);
5791 (*fn)(ppRef, sc, 0);
5792 LOG((LF_GC, INFO3, FMT_ADDR "\n", DBG_ADDR(OBJECTREF_TO_UNCHECKED_OBJECTREF(*ppRef)) ));
5797 #ifndef DACCESS_COMPILE
5798 static void EncodeOneGCOffset(CPUSTUBLINKER *pSl, ULONG delta, BOOL maybeInterior, BOOL range, BOOL last)
5802 THROWS; // From the stublinker
5808 // Everything should be pointer aligned
5809 // but we use a high bit for interior, and the 0 bit to denote the end of the list
5810 // we use the 1 bit to denote a range
5811 _ASSERTE((delta % sizeof(void*)) == 0);
5814 // For 64-bit, we have 3 bits of alignment, so we allow larger frames
5815 // by shifting and gaining a free high-bit.
5816 ULONG encodedDelta = delta >> 1;
5818 // For 32-bit, we just limit our frame size to <2GB. (I know, such a bummer!)
5819 ULONG encodedDelta = delta;
5821 _ASSERTE((encodedDelta & 0x80000003) == 0);
5831 else if (maybeInterior)
5834 encodedDelta |= 0x80000000;
5839 bytes[--index] = (BYTE)((encodedDelta & 0x7F) | 0x80);
5841 while (encodedDelta > 0)
5843 bytes[--index] = (BYTE)(encodedDelta & 0x7F);
5846 pSl->EmitBytes(&bytes[index], 5 - index);
5849 static void EncodeGCOffsets(CPUSTUBLINKER *pSl, /* const */ ULONGARRAY & gcOffsets)
5859 _ASSERTE(gcOffsets.Count() > 0);
5861 ULONG prevOffset = 0;
5865 ULONG offset = gcOffsets[i];
5866 // Everything should be pointer aligned
5867 // but we use the 0-bit to mean maybeInterior, for byrefs.
5868 _ASSERTE(((offset % sizeof(void*)) == 0) || ((offset % sizeof(void*)) == 1));
5869 BOOL maybeInterior = (offset & 1);
5872 // Encode just deltas because they're smaller (and the list should be sorted)
5873 _ASSERTE(offset >= (prevOffset + sizeof(void*)));
5874 ULONG delta = offset - (prevOffset + sizeof(void*));
5875 if (!maybeInterior && gcOffsets.Count() > i + 2)
5877 // Check for a potential range.
5878 // Only do it if we have 3 or more pointers in a row
5879 ULONG rangeOffset = offset;
5882 ULONG nextOffset = gcOffsets[j];
5883 // interior pointers can't be in ranges
5886 // ranges must be saturated
5887 if (nextOffset != (rangeOffset + sizeof(void*)))
5890 rangeOffset = nextOffset;
5891 } while(j < gcOffsets.Count());
5895 EncodeOneGCOffset(pSl, delta, FALSE, TRUE, last);
5897 _ASSERTE(rangeOffset >= (offset + (sizeof(void*) * 2)));
5898 delta = rangeOffset - (offset + (sizeof(void*) * 2));
5899 offset = rangeOffset;
5902 last = (++i == gcOffsets.Count());
5905 EncodeOneGCOffset(pSl, delta, maybeInterior, FALSE, last);
5907 prevOffset = offset;
5911 static void AppendGCLayout(ULONGARRAY &gcLayout, size_t baseOffset, BOOL fIsTypedRef, TypeHandle VMClsHnd)
5913 STANDARD_VM_CONTRACT;
5915 _ASSERTE((baseOffset % 16) == 0);
5916 _ASSERTE(FitsInU4(baseOffset));
5920 *gcLayout.AppendThrowing() = (ULONG)(baseOffset | 1); // "| 1" to mark it as an interior pointer
5922 else if (!VMClsHnd.IsNativeValueType())
5924 MethodTable* pMT = VMClsHnd.GetMethodTable();
5926 _ASSERTE(pMT->IsValueType());
5928 if (pMT->IsByRefLike())
5930 FindByRefPointerOffsetsInByRefLikeObject(
5933 [&](size_t pointerOffset)
5935 *gcLayout.AppendThrowing() = (ULONG)(pointerOffset | 1); // "| 1" to mark it as an interior pointer
5939 // walk the GC descriptors, reporting the correct offsets
5940 if (pMT->ContainsPointers())
5942 // size of instance when unboxed must be adjusted for the syncblock
5943 // index and the VTable pointer.
5944 DWORD size = pMT->GetBaseSize();
5946 // we don't include this term in our 'ppstop' calculation below.
5947 _ASSERTE(pMT->GetComponentSize() == 0);
5949 CGCDesc* map = CGCDesc::GetCGCDescFromMT(pMT);
5950 CGCDescSeries* cur = map->GetLowestSeries();
5951 CGCDescSeries* last = map->GetHighestSeries();
5953 _ASSERTE(cur <= last);
5956 // offset to embedded references in this series must be
5957 // adjusted by the VTable pointer, when in the unboxed state.
5958 size_t adjustOffset = cur->GetSeriesOffset() - sizeof(void *);
5960 _ASSERTE(baseOffset >= adjustOffset);
5961 size_t start = baseOffset - adjustOffset;
5962 size_t stop = start - (cur->GetSeriesSize() + size);
5963 for (size_t off = stop + sizeof(void*); off <= start; off += sizeof(void*))
5965 _ASSERTE(gcLayout.Count() == 0 || off > gcLayout[gcLayout.Count() - 1]);
5966 _ASSERTE(FitsInU4(off));
5967 *gcLayout.AppendThrowing() = (ULONG)off;
5971 } while (cur <= last);
5976 Stub * StubLinkerCPU::CreateTailCallCopyArgsThunk(CORINFO_SIG_INFO * pSig,
5977 CorInfoHelperTailCallSpecialHandling flags)
5979 STANDARD_VM_CONTRACT;
5982 CPUSTUBLINKER* pSl = &sl;
5984 // Generates a function that looks like this:
5985 // size_t CopyArguments(va_list args, (RCX)
5986 // CONTEXT *pCtx, (RDX)
5987 // DWORD64 *pvStack, (R8)
5988 // size_t cbStack) (R9)
5990 // if (pCtx != NULL) {
5991 // foreach (arg in args) {
5992 // copy into pCtx or pvStack
5995 // return <size of stack needed>;
5999 CodeLabel *pNullLabel = pSl->NewCodeLabel();
6002 pSl->X86EmitR2ROp(0x85, kRDX, kRDX);
6005 pSl->X86EmitCondJump(pNullLabel, X86CondCode::kJZ);
6008 UINT totalArgs = pSig->totalILArgs() + ((pSig->isVarArg() || pSig->hasTypeArg()) ? 1 : 0);
6009 bool fR10Loaded = false;
6011 static const UINT rgcbArgRegCtxtOffsets[4] = { offsetof(CONTEXT, Rcx), offsetof(CONTEXT, Rdx),
6012 offsetof(CONTEXT, R8), offsetof(CONTEXT, R9) };
6013 static const UINT rgcbFpArgRegCtxtOffsets[4] = { offsetof(CONTEXT, Xmm0.Low), offsetof(CONTEXT, Xmm1.Low),
6014 offsetof(CONTEXT, Xmm2.Low), offsetof(CONTEXT, Xmm3.Low) };
6016 ULONGARRAY gcLayout;
6018 // On input to the function R9 contains the size of the buffer
6019 // The first time this macro runs, R10 is loaded with the 'top' of the Frame
6020 // and R9 is changed to point to the 'top' of the copy buffer.
6021 // Then both R9 and R10 are decremented by the size of the struct we're copying
6022 // So R10 is the value to put in the argument slot, and R9 is where the data
6023 // should be copied to (or zeroed out in the case of the return buffer).
6024 #define LOAD_STRUCT_OFFSET_IF_NEEDED(cbSize) \
6026 _ASSERTE(cbSize > 0); \
6027 _ASSERTE(FitsInI4(cbSize)); \
6028 __int32 offset = (__int32)cbSize; \
6029 if (!fR10Loaded) { \
6030 /* mov r10, [rdx + offset of RSP] */ \
6031 pSl->X86EmitIndexRegLoad(kR10, kRDX, offsetof(CONTEXT, Rsp)); \
6032 /* add an extra 8 because RSP is pointing at the return address */ \
6035 pSl->X86EmitAddRegReg(kR10, kR9); \
6037 pSl->X86EmitAddRegReg(kR9, kR8); \
6038 fR10Loaded = true; \
6040 /* sub r10, offset */ \
6041 pSl->X86EmitSubReg(kR10, offset); \
6042 /* sub r9, cbSize */ \
6043 pSl->X86EmitSubReg(kR9, cbSize); \
6047 if (flags & CORINFO_TAILCALL_STUB_DISPATCH_ARG) {
6048 // This is set for stub dispatch
6049 // The JIT placed an extra argument in the list that needs to
6050 // get shoved into R11, and not counted.
6051 // pCtx->R11 = va_arg(args, DWORD64);
6054 pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
6056 pSl->X86EmitAddReg(kRCX, 8);
6057 // mov [rdx + offset of R11], rax
6058 pSl->X86EmitIndexRegStore(kRDX, offsetof(CONTEXT, R11), kRAX);
6061 ULONG cbStructOffset = 0;
6063 // First comes the 'this' pointer
6064 if (pSig->hasThis()) {
6066 pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
6068 pSl->X86EmitAddReg(kRCX, 8);
6069 // mov [rdx + offset of RCX/RDX], rax
6070 pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX);
6073 // Next the return buffer
6075 TypeHandle th(pSig->retTypeClass);
6076 if ((pSig->retType == CORINFO_TYPE_REFANY) || (pSig->retType == CORINFO_TYPE_VALUECLASS)) {
6077 cbArg = th.GetSize();
6080 if (ArgIterator::IsArgPassedByRef(cbArg)) {
6083 // We always reserve space for the return buffer, and we always zero it out,
6084 // so the GC won't complain, but if it's already pointing above the frame,
6085 // then we need to pass it in (so it will get passed out).
6086 // Otherwise we assume the caller is returning void, so we just pass in
6087 // dummy space to be overwritten.
6088 UINT cbUsed = (cbArg + 0xF) & ~0xF;
6089 LOAD_STRUCT_OFFSET_IF_NEEDED(cbUsed);
6090 // now emit a 'memset(r9, 0, cbUsed)'
6093 pSl->X86EmitR2ROp(X86_INSTR_XORPS, kXMM0, kXMM0);
6094 if (cbUsed <= 4 * 16) {
6095 // movaps [r9], xmm0
6096 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0);
6098 // movaps [r9 + 16], xmm0
6099 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 16);
6101 // movaps [r9 + 32], xmm0
6102 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 32);
6104 // movaps [r9 + 48], xmm0
6105 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 48);
6111 // a loop (one double-quadword at a time)
6112 pSl->X86EmitZeroOutReg(kR11);
6114 CodeLabel *pLoopLabel = pSl->NewCodeLabel();
6115 pSl->EmitLabel(pLoopLabel);
6116 // movaps [r9 + r11], xmm0
6117 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0, kR11, 1);
6119 pSl->X86EmitAddReg(kR11, 16);
6121 pSl->X86EmitCmpRegImm32(kR11, cbUsed);
6123 pSl->X86EmitCondJump(pLoopLabel, X86CondCode::kJL);
6126 cbStructOffset += cbUsed;
6127 AppendGCLayout(gcLayout, cbStructOffset, pSig->retType == CORINFO_TYPE_REFANY, th);
6130 pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
6132 pSl->X86EmitAddReg(kRCX, 8);
6133 // cmp rax, [rdx + offset of R12]
6134 pSl->X86EmitOffsetModRM(0x3B, kRAX, kRDX, offsetof(CONTEXT, R12));
6136 CodeLabel *pSkipLabel = pSl->NewCodeLabel();
6138 pSl->X86EmitCondJump(pSkipLabel, X86CondCode::kJNB);
6140 // Also check the lower bound of the stack in case the return buffer is on the GC heap
6141 // and the GC heap is below the stack
6143 pSl->X86EmitR2ROp(0x3B, kRAX, (X86Reg)4 /*kRSP*/);
6145 pSl->X86EmitCondJump(pSkipLabel, X86CondCode::kJB);
6147 pSl->X86EmitMovRegReg(kRAX, kR10);
6149 pSl->EmitLabel(pSkipLabel);
6150 // mov [rdx + offset of RCX], rax
6151 pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX);
6154 // VarArgs Cookie *or* Generics Instantiation Parameter
6155 if (pSig->hasTypeArg() || pSig->isVarArg()) {
6157 pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
6159 pSl->X86EmitAddReg(kRCX, 8);
6160 // mov [rdx + offset of RCX/RDX], rax
6161 pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kRAX);
6164 _ASSERTE(nArgSlot <= 4);
6166 // Now for *all* the 'real' arguments
6167 SigPointer ptr((PCCOR_SIGNATURE)pSig->args);
6168 Module * module = GetModule(pSig->scope);
6169 Instantiation classInst((TypeHandle*)pSig->sigInst.classInst, pSig->sigInst.classInstCount);
6170 Instantiation methodInst((TypeHandle*)pSig->sigInst.methInst, pSig->sigInst.methInstCount);
6171 SigTypeContext typeCtxt(classInst, methodInst);
6173 for( ;nArgSlot < totalArgs; ptr.SkipExactlyOne()) {
6174 CorElementType et = ptr.PeekElemTypeNormalized(module, &typeCtxt);
6175 if (et == ELEMENT_TYPE_SENTINEL)
6179 pSl->X86EmitIndexRegLoad(kRAX, kRCX, 0);
6181 pSl->X86EmitAddReg(kRCX, 8);
6183 case ELEMENT_TYPE_INTERNAL:
6185 _ASSERTE(!"Shouldn't see ELEMENT_TYPE_INTERNAL");
6187 case ELEMENT_TYPE_TYPEDBYREF:
6188 case ELEMENT_TYPE_VALUETYPE:
6189 th = ptr.GetTypeHandleThrowing(module, &typeCtxt, ClassLoader::LoadTypes, CLASS_LOAD_UNRESTOREDTYPEKEY);
6190 _ASSERTE(!th.IsNull());
6191 g_IBCLogger.LogEEClassAndMethodTableAccess(th.GetMethodTable());
6192 cbArg = (UINT)th.GetSize();
6193 if (ArgIterator::IsArgPassedByRef(cbArg)) {
6194 UINT cbUsed = (cbArg + 0xF) & ~0xF;
6195 LOAD_STRUCT_OFFSET_IF_NEEDED(cbUsed);
6196 // rax has the source pointer
6197 // r9 has the intermediate copy location
6198 // r10 has the final destination
6200 pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot++], kR10);
6203 pSl->X86EmitIndexRegStore(kR8, 8 * nArgSlot++, kR10);
6205 // now emit a 'memcpy(rax, r9, cbUsed)'
6206 // These structs are supposed to be 16-byte aligned, but
6207 // Reflection puts them on the GC heap, which is only 8-byte
6208 // aligned. It also means we have to be careful about not
6209 // copying too much (because we might cross a page boundary)
6210 UINT cbUsed16 = (cbArg + 7) & ~0xF;
6211 _ASSERTE((cbUsed16 == cbUsed) || ((cbUsed16 + 16) == cbUsed));
6214 // Unrolled version (6 x 16 bytes in parallel)
6216 while (offset < cbUsed16) {
6217 // movups xmm0, [rax + offset]
6218 pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM0, kRAX, offset);
6219 if (offset + 16 < cbUsed16) {
6220 // movups xmm1, [rax + offset + 16]
6221 pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM1, kRAX, offset + 16);
6222 if (offset + 32 < cbUsed16) {
6223 // movups xmm2, [rax + offset + 32]
6224 pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM2, kRAX, offset + 32);
6225 if (offset + 48 < cbUsed16) {
6226 // movups xmm3, [rax + offset + 48]
6227 pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM3, kRAX, offset + 48);
6228 if (offset + 64 < cbUsed16) {
6229 // movups xmm4, [rax + offset + 64]
6230 pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM4, kRAX, offset + 64);
6231 if (offset + 80 < cbUsed16) {
6232 // movups xmm5, [rax + offset + 80]
6233 pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM5, kRAX, offset + 80);
6239 // movaps [r9 + offset], xmm0
6240 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, offset);
6242 if (offset < cbUsed16) {
6243 // movaps [r9 + 16], xmm1
6244 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM1, kR9, offset);
6246 if (offset < cbUsed16) {
6247 // movaps [r9 + 32], xmm2
6248 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM2, kR9, offset);
6250 if (offset < cbUsed16) {
6251 // movaps [r9 + 48], xmm3
6252 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM3, kR9, offset);
6254 if (offset < cbUsed16) {
6255 // movaps [r9 + 64], xmm4
6256 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM4, kR9, offset);
6258 if (offset < cbUsed16) {
6259 // movaps [r9 + 80], xmm5
6260 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM5, kR9, offset);
6268 // Copy the last 8 bytes if needed
6269 if (cbUsed > cbUsed16) {
6270 _ASSERTE(cbUsed16 < cbArg);
6271 // movlps xmm0, [rax + offset]
6272 pSl->X86EmitOp(X86_INSTR_MOVLPS_R_RM, kXMM0, kRAX, offset);
6273 // movlps [r9 + offset], xmm0
6274 pSl->X86EmitOp(X86_INSTR_MOVLPS_RM_R, kXMM0, kR9, offset);
6278 // a loop (one double-quadword at a time)
6279 pSl->X86EmitZeroOutReg(kR11);
6281 CodeLabel *pLoopLabel = pSl->NewCodeLabel();
6282 pSl->EmitLabel(pLoopLabel);
6283 // movups xmm0, [rax + r11]
6284 pSl->X86EmitOp(X86_INSTR_MOVUPS_R_RM, kXMM0, kRAX, 0, kR11, 1);
6285 // movaps [r9 + r11], xmm0
6286 pSl->X86EmitOp(X86_INSTR_MOVAPS_RM_R, kXMM0, kR9, 0, kR11, 1);
6288 pSl->X86EmitAddReg(kR11, 16);
6289 // cmp r11, cbUsed16
6290 pSl->X86EmitCmpRegImm32(kR11, cbUsed16);
6292 pSl->X86EmitCondJump(pLoopLabel, X86CondCode::kJL);
6293 if (cbArg > cbUsed16) {
6294 _ASSERTE(cbUsed16 + 8 >= cbArg);
6295 // movlps xmm0, [rax + r11]
6296 pSl->X86EmitOp(X86_INSTR_MOVLPS_R_RM, kXMM0, kRAX, 0, kR11, 1);
6297 // movlps [r9 + r11], xmm0
6298 pSl->X86EmitOp(X86_INSTR_MOVLPS_RM_R, kXMM0, kR9, 0, kR11, 1);
6301 cbStructOffset += cbUsed;
6302 AppendGCLayout(gcLayout, cbStructOffset, et == ELEMENT_TYPE_TYPEDBYREF, th);
6307 // Explicit Fall-Through for non-IsArgPassedByRef
6312 pSl->X86EmitIndexRegStore(kRDX, rgcbArgRegCtxtOffsets[nArgSlot], kRAX);
6313 if ((et == ELEMENT_TYPE_R4) || (et == ELEMENT_TYPE_R8)) {
6314 pSl->X86EmitIndexRegStore(kRDX, rgcbFpArgRegCtxtOffsets[nArgSlot], kRAX);
6318 pSl->X86EmitIndexRegStore(kR8, 8 * nArgSlot, kRAX);
6325 #undef LOAD_STRUCT_OFFSET_IF_NEEDED
6327 // Keep our 4 shadow slots and even number of slots (to keep 16-byte aligned)
6330 else if (nArgSlot & 1)
6333 _ASSERTE((cbStructOffset % 16) == 0);
6336 pSl->X86EmitZeroOutReg(kRAX);
6338 pSl->X86EmitReturn(0);
6341 pSl->EmitLabel(pNullLabel);
6343 CodeLabel *pGCLayoutLabel = NULL;
6344 if (gcLayout.Count() == 0) {
6346 pSl->X86EmitZeroOutReg(kRAX);
6349 // lea rax, [rip + offset to gclayout]
6350 pGCLayoutLabel = pSl->NewCodeLabel();
6351 pSl->X86EmitLeaRIP(pGCLayoutLabel, kRAX);
6354 pSl->X86EmitIndexRegStore(kR9, 0, kRAX);
6355 // mov rax, cbStackNeeded
6356 pSl->X86EmitRegLoad(kRAX, cbStructOffset + nArgSlot * 8);
6358 pSl->X86EmitReturn(0);
6360 if (gcLayout.Count() > 0) {
6362 pSl->EmitLabel(pGCLayoutLabel);
6363 EncodeGCOffsets(pSl, gcLayout);
6368 #endif // DACCESS_COMPILE
6370 #endif // _TARGET_AMD64_
6373 #ifdef HAS_FIXUP_PRECODE
6375 #ifdef HAS_FIXUP_PRECODE_CHUNKS
6376 TADDR FixupPrecode::GetMethodDesc()
6378 LIMITED_METHOD_CONTRACT;
6381 // This lookup is also manually inlined in PrecodeFixupThunk assembly code
6382 TADDR base = *PTR_TADDR(GetBase());
6385 return base + (m_MethodDescChunkIndex * MethodDesc::ALIGNMENT);
6389 #ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6390 PCODE FixupPrecode::GetDynamicMethodEntryJumpStub()
6392 _ASSERTE(((PTR_MethodDesc)GetMethodDesc())->IsLCGMethod());
6394 // m_PrecodeChunkIndex has a value inverted to the order of precodes in memory (the precode at the lowest address has the
6395 // highest index, and the precode at the highest address has the lowest index). To map a precode to its jump stub by memory
6396 // order, invert the precode index to get the jump stub index.
6397 UINT32 count = ((PTR_MethodDesc)GetMethodDesc())->GetMethodDescChunk()->GetCount();
6398 _ASSERTE(m_PrecodeChunkIndex < count);
6399 SIZE_T jumpStubIndex = count - 1 - m_PrecodeChunkIndex;
6401 return GetBase() + sizeof(PTR_MethodDesc) + jumpStubIndex * BACK_TO_BACK_JUMP_ALLOCATE_SIZE;
6403 #endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6405 #ifdef DACCESS_COMPILE
6406 void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags)
6409 DacEnumMemoryRegion(dac_cast<TADDR>(this), sizeof(FixupPrecode));
6411 DacEnumMemoryRegion(GetBase(), sizeof(TADDR));
6413 #endif // DACCESS_COMPILE
6415 #endif // HAS_FIXUP_PRECODE
6417 #ifndef DACCESS_COMPILE
6419 BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, TADDR expected, MethodDesc* pMD)
6423 THROWS; // Creating a JumpStub could throw OutOfMemory
6428 BYTE* callAddrAdj = (BYTE*)pRel32 + 4;
6429 INT32 expectedRel32 = static_cast<INT32>((BYTE*)expected - callAddrAdj);
6431 INT32 targetRel32 = rel32UsingJumpStub((INT32*)pRel32, target, pMD);
6433 _ASSERTE(IS_ALIGNED(pRel32, sizeof(INT32)));
6434 return FastInterlockCompareExchange((LONG*)pRel32, (LONG)targetRel32, (LONG)expectedRel32) == (LONG)expectedRel32;
6437 void StubPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator /* = NULL */,
6438 BYTE type /* = StubPrecode::Type */, TADDR target /* = NULL */)
6440 WRAPPER_NO_CONTRACT;
6442 IN_WIN64(m_movR10 = X86_INSTR_MOV_R10_IMM64); // mov r10, pMethodDesc
6443 IN_WIN32(m_movEAX = X86_INSTR_MOV_EAX_IMM32); // mov eax, pMethodDesc
6444 m_pMethodDesc = (TADDR)pMD;
6445 IN_WIN32(m_mov_rm_r = X86_INSTR_MOV_RM_R); // mov reg,reg
6447 m_jmp = X86_INSTR_JMP_REL32; // jmp rel32
6449 if (pLoaderAllocator != NULL)
6451 // Use pMD == NULL in all precode initialization methods to allocate the initial jump stub in non-dynamic heap
6452 // that has the same lifetime like as the precode itself
6454 target = GetPreStubEntryPoint();
6455 m_rel32 = rel32UsingJumpStub(&m_rel32, target, NULL /* pMD */, pLoaderAllocator);
6459 #ifdef HAS_NDIRECT_IMPORT_PRECODE
6461 void NDirectImportPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
6463 WRAPPER_NO_CONTRACT;
6464 StubPrecode::Init(pMD, pLoaderAllocator, NDirectImportPrecode::Type, GetEEFuncEntryPoint(NDirectImportThunk));
6467 #endif // HAS_NDIRECT_IMPORT_PRECODE
6470 #ifdef HAS_REMOTING_PRECODE
6472 void RemotingPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator /* = NULL */)
6474 WRAPPER_NO_CONTRACT;
6476 IN_WIN64(m_movR10 = X86_INSTR_MOV_R10_IMM64); // mov r10, pMethodDesc
6477 IN_WIN32(m_movEAX = X86_INSTR_MOV_EAX_IMM32); // mov eax, pMethodDesc
6478 m_pMethodDesc = (TADDR)pMD;
6479 m_type = PRECODE_REMOTING; // nop
6480 m_call = X86_INSTR_CALL_REL32;
6481 m_jmp = X86_INSTR_JMP_REL32; // jmp rel32
6483 if (pLoaderAllocator != NULL)
6485 m_callRel32 = rel32UsingJumpStub(&m_callRel32,
6486 GetEEFuncEntryPoint(PrecodeRemotingThunk), NULL /* pMD */, pLoaderAllocator);
6487 m_rel32 = rel32UsingJumpStub(&m_rel32,
6488 GetPreStubEntryPoint(), NULL /* pMD */, pLoaderAllocator);
6492 #endif // HAS_REMOTING_PRECODE
6495 #ifdef HAS_FIXUP_PRECODE
6496 void FixupPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex /*=0*/, int iPrecodeChunkIndex /*=0*/)
6498 WRAPPER_NO_CONTRACT;
6500 m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk
6501 m_type = FixupPrecode::TypePrestub;
6503 // Initialize chunk indices only if they are not initialized yet. This is necessary to make MethodDesc::Reset work.
6504 if (m_PrecodeChunkIndex == 0)
6506 _ASSERTE(FitsInU1(iPrecodeChunkIndex));
6507 m_PrecodeChunkIndex = static_cast<BYTE>(iPrecodeChunkIndex);
6510 if (iMethodDescChunkIndex != -1)
6512 if (m_MethodDescChunkIndex == 0)
6514 _ASSERTE(FitsInU1(iMethodDescChunkIndex));
6515 m_MethodDescChunkIndex = static_cast<BYTE>(iMethodDescChunkIndex);
6518 if (*(void**)GetBase() == NULL)
6519 *(void**)GetBase() = (BYTE*)pMD - (iMethodDescChunkIndex * MethodDesc::ALIGNMENT);
6522 _ASSERTE(GetMethodDesc() == (TADDR)pMD);
6524 PCODE target = (PCODE)GetEEFuncEntryPoint(PrecodeFixupThunk);
6525 #ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6526 if (pMD->IsLCGMethod())
6528 m_rel32 = rel32UsingPreallocatedJumpStub(&m_rel32, target, GetDynamicMethodEntryJumpStub());
6531 #endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6532 if (pLoaderAllocator != NULL)
6534 m_rel32 = rel32UsingJumpStub(&m_rel32, target, NULL /* pMD */, pLoaderAllocator);
6538 BOOL FixupPrecode::SetTargetInterlocked(TADDR target, TADDR expected)
6542 THROWS; // Creating a JumpStub could throw OutOfMemory
6547 INT64 oldValue = *(INT64*)this;
6548 BYTE* pOldValue = (BYTE*)&oldValue;
6550 MethodDesc * pMD = (MethodDesc*)GetMethodDesc();
6551 g_IBCLogger.LogMethodPrecodeWriteAccess(pMD);
6553 INT64 newValue = oldValue;
6554 BYTE* pNewValue = (BYTE*)&newValue;
6556 if (pOldValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] == FixupPrecode::TypePrestub)
6558 pNewValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] = FixupPrecode::Type;
6560 pOldValue[offsetof(FixupPrecode, m_op)] = X86_INSTR_CALL_REL32;
6561 pNewValue[offsetof(FixupPrecode, m_op)] = X86_INSTR_JMP_REL32;
6563 else if (pOldValue[OFFSETOF_PRECODE_TYPE_CALL_OR_JMP] == FixupPrecode::Type)
6565 #ifdef FEATURE_CODE_VERSIONING
6566 // No change needed, jmp is already in place
6568 // Setting the target more than once is unexpected
6574 // Pre-existing code doesn't conform to the expectations for a FixupPrecode
6578 *(INT32*)(&pNewValue[offsetof(FixupPrecode, m_rel32)]) =
6579 #ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6580 pMD->IsLCGMethod() ?
6581 rel32UsingPreallocatedJumpStub(&m_rel32, target, GetDynamicMethodEntryJumpStub()) :
6582 #endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
6583 rel32UsingJumpStub(&m_rel32, target, pMD);
6585 _ASSERTE(IS_ALIGNED(this, sizeof(INT64)));
6586 EnsureWritableExecutablePages(this, sizeof(INT64));
6587 return FastInterlockCompareExchangeLong((INT64*) this, newValue, oldValue) == oldValue;
6590 #ifdef FEATURE_NATIVE_IMAGE_GENERATION
6591 // Partial initialization. Used to save regrouped chunks.
6592 void FixupPrecode::InitForSave(int iPrecodeChunkIndex)
6594 m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk
6595 m_type = FixupPrecode::TypePrestub;
6597 _ASSERTE(FitsInU1(iPrecodeChunkIndex));
6598 m_PrecodeChunkIndex = static_cast<BYTE>(iPrecodeChunkIndex);
6600 // The rest is initialized in code:FixupPrecode::Fixup
6603 void FixupPrecode::Fixup(DataImage *image, MethodDesc * pMD)
6605 STANDARD_VM_CONTRACT;
6607 // Note that GetMethodDesc() does not return the correct value because of
6608 // regrouping of MethodDescs into hot and cold blocks. That's why the caller
6609 // has to supply the actual MethodDesc
6611 SSIZE_T mdChunkOffset;
6612 ZapNode * pMDChunkNode = image->GetNodeForStructure(pMD, &mdChunkOffset);
6613 ZapNode * pHelperThunk = image->GetHelperThunk(CORINFO_HELP_EE_PRECODE_FIXUP);
6615 image->FixupFieldToNode(this, offsetof(FixupPrecode, m_rel32),
6616 pHelperThunk, 0, IMAGE_REL_BASED_REL32);
6618 // Set the actual chunk index
6619 FixupPrecode * pNewPrecode = (FixupPrecode *)image->GetImagePointer(this);
6621 size_t mdOffset = mdChunkOffset - sizeof(MethodDescChunk);
6622 size_t chunkIndex = mdOffset / MethodDesc::ALIGNMENT;
6623 _ASSERTE(FitsInU1(chunkIndex));
6624 pNewPrecode->m_MethodDescChunkIndex = (BYTE) chunkIndex;
6626 // Fixup the base of MethodDescChunk
6627 if (m_PrecodeChunkIndex == 0)
6629 image->FixupFieldToNode(this, (BYTE *)GetBase() - (BYTE *)this,
6630 pMDChunkNode, sizeof(MethodDescChunk));
6633 #endif // FEATURE_NATIVE_IMAGE_GENERATION
6635 #endif // HAS_FIXUP_PRECODE
6637 #endif // !DACCESS_COMPILE
6640 #ifdef HAS_THISPTR_RETBUF_PRECODE
6642 // rel32 jmp target that points back to the jump (infinite loop).
6643 // Used to mark uninitialized ThisPtrRetBufPrecode target
6644 #define REL32_JMP_SELF (-5)
6646 #ifndef DACCESS_COMPILE
6647 void ThisPtrRetBufPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator)
6649 WRAPPER_NO_CONTRACT;
6651 IN_WIN64(m_nop1 = X86_INSTR_NOP;) // nop
6652 #ifdef UNIX_AMD64_ABI
6654 m_movScratchArg0 = 0xC78B; // mov rax,rdi
6656 m_movArg0Arg1 = 0xFE8B; // mov rdi,rsi
6658 m_movArg1Scratch = 0xF08B; // mov rsi,rax
6660 IN_WIN64(m_prefix1 = 0x48;)
6661 m_movScratchArg0 = 0xC889; // mov r/eax,r/ecx
6662 IN_WIN64(m_prefix2 = 0x48;)
6663 m_movArg0Arg1 = 0xD189; // mov r/ecx,r/edx
6664 IN_WIN64(m_prefix3 = 0x48;)
6665 m_movArg1Scratch = 0xC289; // mov r/edx,r/eax
6667 m_nop2 = X86_INSTR_NOP; // nop
6668 m_jmp = X86_INSTR_JMP_REL32; // jmp rel32
6669 m_pMethodDesc = (TADDR)pMD;
6671 // This precode is never patched lazily - avoid unnecessary jump stub allocation
6672 m_rel32 = REL32_JMP_SELF;
6675 BOOL ThisPtrRetBufPrecode::SetTargetInterlocked(TADDR target, TADDR expected)
6684 // This precode is never patched lazily - the interlocked semantics is not required.
6685 _ASSERTE(m_rel32 == REL32_JMP_SELF);
6687 // Use pMD == NULL to allocate the jump stub in non-dynamic heap that has the same lifetime as the precode itself
6688 m_rel32 = rel32UsingJumpStub(&m_rel32, target, NULL /* pMD */, ((MethodDesc *)GetMethodDesc())->GetLoaderAllocatorForCode());
6692 #endif // !DACCESS_COMPILE
6694 PCODE ThisPtrRetBufPrecode::GetTarget()
6696 LIMITED_METHOD_DAC_CONTRACT;
6698 // This precode is never patched lazily - pretend that the uninitialized m_rel32 points to prestub
6699 if (m_rel32 == REL32_JMP_SELF)
6700 return GetPreStubEntryPoint();
6702 return rel32Decode(PTR_HOST_MEMBER_TADDR(ThisPtrRetBufPrecode, this, m_rel32));
6705 #endif // HAS_THISPTR_RETBUF_PRECODE