From 8b8a3fca1ace915d13c221d0fb3a794da2eef823 Mon Sep 17 00:00:00 2001 From: Egor Chesakov Date: Mon, 4 Jan 2021 17:07:09 -0800 Subject: [PATCH] Update SKIP_ALLOC_FRAME to match 3.1, 5.0 and 6.0 instruction sequences in function prolog (#45184) * Add regression tests for https://github.com/dotnet/runtime/issues/45090 * Add instruction opcodes in src/coreclr/vm/eetwain.cpp * Add SKIP_LEA_EAX_ESP in src/coreclr/vm/eetwain.cpp * Add SKIP_HELPER_CALL in src/coreclr/vm/eetwain.cpp * Update SKIP_ALLOC_FRAME in src/coreclr/vm/eetwain.cpp * Use constant (0x1000) rather than calling GetOsPageSize() in SKIP_ALLOC_FRAME in src/coreclr/vm/eetwain.cpp * Draft of the changes that are back-compatible with earlier versions of R2R code in src/coreclr/vm/eetwain.cpp * Update src/coreclr/vm/eetwain.cpp and include missing SKIP_ARITH_REG for 3.1 stack probing loop * Update test and include more scenarios * Update R2R version to 5.1 in src/coreclr/inc/readytorun.h src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs --- src/coreclr/inc/readytorun.h | 4 +- .../tools/Common/Internal/Runtime/ModuleHeaders.cs | 4 +- src/coreclr/vm/eetwain.cpp | 194 ++++++++++++-- .../JitBlue/Runtime_45090/Runtime_45090.cs | 286 +++++++++++++++++++++ .../JitBlue/Runtime_45090/Runtime_45090.csproj | 15 ++ 5 files changed, 472 insertions(+), 31 deletions(-) create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_45090/Runtime_45090.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_45090/Runtime_45090.csproj diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h index 5122aa5..4b68acb 100644 --- a/src/coreclr/inc/readytorun.h +++ b/src/coreclr/inc/readytorun.h @@ -15,8 +15,8 @@ #define READYTORUN_SIGNATURE 0x00525452 // 'RTR' // Keep these in sync with src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs -#define READYTORUN_MAJOR_VERSION 0x0004 -#define READYTORUN_MINOR_VERSION 0x0002 +#define READYTORUN_MAJOR_VERSION 0x0005 +#define READYTORUN_MINOR_VERSION 0x0001 #define MINIMUM_READYTORUN_MAJOR_VERSION 0x003 diff --git a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs index e003ff6..cdd5bac 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs @@ -14,8 +14,8 @@ namespace Internal.Runtime { public const uint Signature = 0x00525452; // 'RTR' - public const ushort CurrentMajorVersion = 4; - public const ushort CurrentMinorVersion = 2; + public const ushort CurrentMajorVersion = 5; + public const ushort CurrentMinorVersion = 1; } #pragma warning disable 0169 diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index 88ba0a6..a7e5e01 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -20,7 +20,6 @@ #include "argdestination.h" -#define X86_INSTR_W_TEST_ESP 0x4485 // test [esp+N], eax #define X86_INSTR_TEST_ESP_SIB 0x24 #define X86_INSTR_PUSH_0 0x6A // push 00, entire instruction is 0x6A00 #define X86_INSTR_PUSH_IMM 0x68 // push NNNN, @@ -36,15 +35,21 @@ #define X86_INSTR_NOP5_5 0x90 // 5th byte of 5-byte nop #define X86_INSTR_INT3 0xCC // int3 #define X86_INSTR_HLT 0xF4 // hlt +#define X86_INSTR_PUSH_EAX 0x50 // push eax #define X86_INSTR_PUSH_EBP 0x55 // push ebp #define X86_INSTR_W_MOV_EBP_ESP 0xEC8B // mov ebp, esp #define X86_INSTR_POP_ECX 0x59 // pop ecx #define X86_INSTR_RET 0xC2 // ret imm16 #define X86_INSTR_RETN 0xC3 // ret +#define X86_INSTR_XOR 0x33 // xor +#define X86_INSTR_w_TEST_ESP_EAX 0x0485 // test [esp], eax +#define X86_INSTR_w_TEST_ESP_DWORD_OFFSET_EAX 0x8485 // test [esp-dwOffset], eax #define X86_INSTR_w_LEA_ESP_EBP_BYTE_OFFSET 0x658d // lea esp, [ebp-bOffset] #define X86_INSTR_w_LEA_ESP_EBP_DWORD_OFFSET 0xa58d // lea esp, [ebp-dwOffset] -#define X86_INSTR_JMP_NEAR_REL32 0xE9 // near jmp rel32 -#define X86_INSTR_w_JMP_FAR_IND_IMM 0x25FF // far jmp [addr32] +#define X86_INSTR_w_LEA_EAX_ESP_BYTE_OFFSET 0x448d // lea eax, [esp-bOffset] +#define X86_INSTR_w_LEA_EAX_ESP_DWORD_OFFSET 0x848d // lea eax, [esp-dwOffset] +#define X86_INSTR_JMP_NEAR_REL32 0xE9 // near jmp rel32 +#define X86_INSTR_w_JMP_FAR_IND_IMM 0x25FF // far jmp [addr32] #ifndef USE_GC_INFO_DECODER @@ -3063,6 +3068,51 @@ inline unsigned SKIP_LEA_ESP_EBP(int val, PTR_CBYTE base, unsigned offset) return(offset + delta); } +inline unsigned SKIP_LEA_EAX_ESP(int val, PTR_CBYTE base, unsigned offset) +{ + LIMITED_METHOD_DAC_CONTRACT; + +#ifdef _DEBUG + WORD wOpcode = *(PTR_WORD)(base + offset); + if (CheckInstrWord(wOpcode, X86_INSTR_w_LEA_EAX_ESP_BYTE_OFFSET)) + { + _ASSERTE(val == *(PTR_SBYTE)(base + offset + 3)); + _ASSERTE(CAN_COMPRESS(val)); + } + else + { + _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_w_LEA_EAX_ESP_DWORD_OFFSET)); + _ASSERTE(val == *(PTR_INT32)(base + offset + 3)); + _ASSERTE(!CAN_COMPRESS(val)); + } +#endif + + unsigned delta = 3 + (CAN_COMPRESS(-val) ? 1 : 4); + return(offset + delta); +} + +inline unsigned SKIP_HELPER_CALL(PTR_CBYTE base, unsigned offset) +{ + LIMITED_METHOD_DAC_CONTRACT; + + unsigned delta; + + if (CheckInstrByte(base[offset], X86_INSTR_CALL_REL32)) + { + delta = 5; + } + else + { +#ifdef _DEBUG + WORD wOpcode = *(PTR_WORD)(base+offset); + _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_W_CALL_IND_IMM)); +#endif + delta = 6; + } + + return(offset+delta); +} + unsigned SKIP_ALLOC_FRAME(int size, PTR_CBYTE base, unsigned offset) { CONTRACTL { @@ -3075,48 +3125,138 @@ unsigned SKIP_ALLOC_FRAME(int size, PTR_CBYTE base, unsigned offset) if (size == sizeof(void*)) { - // We do "push eax" instead of "sub esp,4" - return (SKIP_PUSH_REG(base, offset)); + // JIT emits "push eax" instead of "sub esp,4" + return SKIP_PUSH_REG(base, offset); } - if (size >= (int)GetOsPageSize()) + const int STACK_PROBE_PAGE_SIZE_BYTES = 4096; + const int STACK_PROBE_BOUNDARY_THRESHOLD_BYTES = 1024; + + int lastProbedLocToFinalSp = size; + + if (size < STACK_PROBE_PAGE_SIZE_BYTES) + { + // sub esp, size + offset = SKIP_ARITH_REG(size, base, offset); + } + else { - if (size < int(3 * GetOsPageSize())) + WORD wOpcode = *(PTR_WORD)(base + offset); + + if (CheckInstrWord(wOpcode, X86_INSTR_w_TEST_ESP_DWORD_OFFSET_EAX)) { - // add 7 bytes for one or two TEST EAX, [ESP+GetOsPageSize()] - offset += (size / GetOsPageSize()) * 7; + // In .NET 5.0 and earlier for frames that have size smaller than 0x3000 bytes + // JIT emits one or two 'test eax, [esp-dwOffset]' instructions before adjusting the stack pointer. + _ASSERTE(size < 0x3000); + + // test eax, [esp-0x1000] + offset += 7; + lastProbedLocToFinalSp -= 0x1000; + + if (size >= 0x2000) + { +#ifdef _DEBUG + wOpcode = *(PTR_WORD)(base + offset); + _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_w_TEST_ESP_DWORD_OFFSET_EAX)); +#endif + //test eax, [esp-0x2000] + offset += 7; + lastProbedLocToFinalSp -= 0x1000; + } + + // sub esp, size + offset = SKIP_ARITH_REG(size, base, offset); } else { - // xor eax, eax 2 - // [nop] 0-3 - // loop: - // test [esp + eax], eax 3 - // sub eax, 0x1000 5 - // cmp EAX, -size 5 - // jge loop 2 - offset += 2; - - // NGEN images that support rejit may have extra nops we need to skip over - while (offset < 5) + bool pushedStubParam = false; + + if (CheckInstrByte(base[offset], X86_INSTR_PUSH_EAX)) + { + // push eax + offset = SKIP_PUSH_REG(base, offset); + pushedStubParam = true; + } + + if (CheckInstrByte(base[offset], X86_INSTR_XOR)) + { + // In .NET Core 3.1 and earlier for frames that have size greater than or equal to 0x3000 bytes + // JIT emits the following loop. + _ASSERTE(size >= 0x3000); + + offset += 2; + // xor eax, eax 2 + // [nop] 0-3 + // loop: + // test [esp + eax], eax 3 + // sub eax, 0x1000 5 + // cmp eax, -size 5 + // jge loop 2 + + // R2R images that support ReJIT may have extra nops we need to skip over. + while (offset < 5) + { + if (CheckInstrByte(base[offset], X86_INSTR_NOP)) + { + offset++; + } + else + { + break; + } + } + + offset += 15; + + if (pushedStubParam) + { + // pop eax + offset = SKIP_POP_REG(base, offset); + } + + // sub esp, size + return SKIP_ARITH_REG(size, base, offset); + } + else { - if (CheckInstrByte(base[offset], X86_INSTR_NOP)) + // In .NET 5.0 and later JIT emits a call to JIT_StackProbe helper. + + if (pushedStubParam) { - offset++; + // lea eax, [esp-size+4] + offset = SKIP_LEA_EAX_ESP(-size + 4, base, offset); + // call JIT_StackProbe + offset = SKIP_HELPER_CALL(base, offset); + // pop eax + offset = SKIP_POP_REG(base, offset); + // sub esp, size + return SKIP_ARITH_REG(size, base, offset); } else { - break; + // lea eax, [esp-size] + offset = SKIP_LEA_EAX_ESP(-size, base, offset); + // call JIT_StackProbe + offset = SKIP_HELPER_CALL(base, offset); + // mov esp, eax + return SKIP_MOV_REG_REG(base, offset); } } - offset += 15; } } - // sub ESP, size - return (SKIP_ARITH_REG(size, base, offset)); -} + if (lastProbedLocToFinalSp + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > STACK_PROBE_PAGE_SIZE_BYTES) + { +#ifdef _DEBUG + WORD wOpcode = *(PTR_WORD)(base + offset); + _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_w_TEST_ESP_EAX)); +#endif + // test [esp], eax + offset += 3; + } + return offset; +} #endif // !USE_GC_INFO_DECODER diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_45090/Runtime_45090.cs b/src/tests/JIT/Regression/JitBlue/Runtime_45090/Runtime_45090.cs new file mode 100644 index 0000000..b9469ec --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_45090/Runtime_45090.cs @@ -0,0 +1,286 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; + +#pragma warning disable CS0649 + +// This test is to validate that SKIP_ALLOC_FRAME() procedure used by UnwindEspFrameProlog() in eetwain.cpp +// functions correctly on win-x86. The platform doesn't have unwind information as other platforms and hence unwinder either uses ebp chain of the stack frames +// or, in a case of an esp-based frame, unwinds the frame "manually" by decoding instructions in a function prolog. +// +// In order to validate that the unwinding procedure works correctly with such frames: +// +// 1) We need to construct a method with esp-based frame. +// That means that such method cannot call other methods (i.e. it must be a leaf method). +// The method also should not be inlined into its caller. +// However, we cannot simply decorate the method with NoInlining attribute, since the method in that case would have JIT_FLAG_FRAMED set by VM and will have ebp-based frame. +// Therefore, in order to prevent such method from inlining it can be declared as virtual. +// +// 2) There are a number of different options for the JIT to allocate a frame of a method. Such frame may or may not require stack probing. +// All of these options need to be tested. +// This include testing that runtime is back compatible and can run (and unwind) R2R code generated by earlier versions of the JIT. + +namespace Runtime_45090 +{ + struct _4 + { + public byte _0; + public byte _1; + public byte _2; + public byte _3; + } + + struct _10 + { + public long _0; + public long _8; + } + + struct _100 + { + public _10 _0; + public _10 _1; + public _10 _2; + public _10 _3; + public _10 _4; + public _10 _5; + public _10 _6; + public _10 _7; + public _10 _8; + public _10 _9; + public _10 _a; + public _10 _b; + public _10 _c; + public _10 _d; + public _10 _e; + public _10 _f; + } + + struct _e00 + { + public _100 _0; + public _100 _1; + public _100 _2; + public _100 _3; + public _100 _4; + public _100 _5; + public _100 _6; + public _100 _7; + public _100 _8; + public _100 _9; + public _100 _a; + public _100 _b; + public _100 _c; + public _100 _d; + } + + struct _1000 + { + public _100 _0; + public _100 _1; + public _100 _2; + public _100 _3; + public _100 _4; + public _100 _5; + public _100 _6; + public _100 _7; + public _100 _8; + public _100 _9; + public _100 _a; + public _100 _b; + public _100 _c; + public _100 _d; + public _100 _e; + public _100 _f; + } + + struct _1e00 + { + public _1000 _0; + public _e00 _1; + } + + struct _2000 + { + public _1000 _0; + public _1000 _1; + } + + struct _2e00 + { + public _1000 _0; + public _1000 _1; + public _e00 _2; + } + + struct _3000 + { + public _1000 _0; + public _1000 _1; + public _1000 _2; + } + + abstract class AllocFrame + { + public abstract int VirtMethodEspBasedFrame(); + } + + class PushReg : AllocFrame + { + // Frame size is 4 bytes + // push eax + public unsafe override int VirtMethodEspBasedFrame() + { + _4 tmp = new _4(); + *(int*)(&tmp) = 45090; + return (int)tmp._1; + } + } + + class SubSp : AllocFrame + { + // Frame size is such that it doesn't require probing + // sub esp, #frameSize + public override int VirtMethodEspBasedFrame() + { + _100 tmp = new _100(); + return (int)tmp._0._0; + } + } + + class ProbeAfterSubSp : AllocFrame + { + // Frame size is smaller than 0x1000 bytes, but needs probing **after** allocation + // sub esp, #frameSize + // test eax, [esp] + public override int VirtMethodEspBasedFrame() + { + _e00 tmp = new _e00(); + return (int)tmp._0._0._0; + } + } + + class ProbeBeforeSubSp1 : AllocFrame + { + // Frame size is in range [0x1000, 0x2000) bytes + // Since 6.0 + // lea eax, [esp-#frameSize] + // call JIT_StackProbe + // mov esp, eax + // Before 6.0 + // test [esp-0x1000], eax + // sub esp, #frameSize + public override int VirtMethodEspBasedFrame() + { + _1000 tmp = new _1000(); + return (int)tmp._0._0._0; + } + } + + class ProbeBeforeSubSp2 : AllocFrame + { + // Frame size is in range [0x1000, 0x2000) bytes and needs additional probing **after** allocation + // Since 6.0 + // lea eax, [esp-#frameSize] + // call JIT_StackProbe + // mov esp, eax + // Before 6.0 + // test [esp-0x1000], eax + // sub esp, #frameSize + // test [esp], eax + public override int VirtMethodEspBasedFrame() + { + _1e00 tmp = new _1e00(); + return (int)tmp._0._0._0._0; + } + } + + class ProbeBeforeSubSp3 : AllocFrame + { + // Frame size is in range [0x2000, 0x3000) bytes + // Since 6.0 + // lea eax, [esp-#frameSize] + // call JIT_StackProbe + // mov esp, eax + // Before 6.0 + // test [esp-0x1000], eax + // test [esp-0x2000], eax + // sub esp, #frameSize + public override int VirtMethodEspBasedFrame() + { + _2000 tmp = new _2000(); + return (int)tmp._0._0._0._0; + } + } + + class ProbeBeforeSubSp4 : AllocFrame + { + // Frame size is in range [0x2000, 0x3000) bytes and needs additional probing **after** allocation + // + // Since 6.0 + // lea eax, [esp-#frameSize] + // call JIT_StackProbe + // mov esp, eax + // Before 6.0 + // test [esp-0x1000], eax + // test [esp-0x2000], eax + // sub esp, #frameSize + // test [esp], eax + public override int VirtMethodEspBasedFrame() + { + _2e00 tmp = new _2e00(); + return (int)tmp._0._0._0._0; + } + } + + class ProbeBeforeSubSp5 : AllocFrame + { + // Frame size is in range [0x3000, 0x3000) bytes + // + // Since 5.0 + // lea eax, [esp-#frameSize] + // call JIT_StackProbe + // mov esp, eax + // Before 5.0 + // xor eax, eax + // [nop] + // loop: + // test [esp + eax], eax + // sub eax, 0x1000 + // cmp eax, -#frameSize + // jge loop + // sub esp, #frameSize + public override int VirtMethodEspBasedFrame() + { + _3000 tmp = new _3000(); + return (int)tmp._0._0._0._0; + } + } + + class Program + { + [MethodImpl(MethodImplOptions.NoInlining)] + static void TestSkipAllocFrame(AllocFrame scenario) + { + scenario.VirtMethodEspBasedFrame(); + } + + static int Main(string[] args) + { + TestSkipAllocFrame(new PushReg()); + TestSkipAllocFrame(new SubSp()); + TestSkipAllocFrame(new ProbeAfterSubSp()); + TestSkipAllocFrame(new ProbeBeforeSubSp1()); + TestSkipAllocFrame(new ProbeBeforeSubSp2()); + TestSkipAllocFrame(new ProbeBeforeSubSp3()); + TestSkipAllocFrame(new ProbeBeforeSubSp4()); + TestSkipAllocFrame(new ProbeBeforeSubSp5()); + + return 100; + } + } +} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_45090/Runtime_45090.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_45090/Runtime_45090.csproj new file mode 100644 index 0000000..1253dec --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_45090/Runtime_45090.csproj @@ -0,0 +1,15 @@ + + + Exe + None + True + True + + + + + + -- 2.7.4