Update SKIP_ALLOC_FRAME to match 3.1, 5.0 and 6.0 instruction sequences in function...
authorEgor Chesakov <Egor.Chesakov@microsoft.com>
Tue, 5 Jan 2021 01:07:09 +0000 (17:07 -0800)
committerGitHub <noreply@github.com>
Tue, 5 Jan 2021 01:07:09 +0000 (17:07 -0800)
* Add regression tests for https://github.com/dotnet/runtime/issues/45090

* Add instruction opcodes in src/coreclr/vm/eetwain.cpp

* Add SKIP_LEA_EAX_ESP in src/coreclr/vm/eetwain.cpp

* Add SKIP_HELPER_CALL in src/coreclr/vm/eetwain.cpp

* Update SKIP_ALLOC_FRAME in src/coreclr/vm/eetwain.cpp

* Use constant (0x1000) rather than calling GetOsPageSize() in SKIP_ALLOC_FRAME in src/coreclr/vm/eetwain.cpp

* Draft of the changes that are back-compatible with earlier versions of R2R code in src/coreclr/vm/eetwain.cpp

* Update src/coreclr/vm/eetwain.cpp and include missing SKIP_ARITH_REG for 3.1 stack probing loop

* Update test and include more scenarios

* Update R2R version to 5.1 in src/coreclr/inc/readytorun.h src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs

src/coreclr/inc/readytorun.h
src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs
src/coreclr/vm/eetwain.cpp
src/tests/JIT/Regression/JitBlue/Runtime_45090/Runtime_45090.cs [new file with mode: 0644]
src/tests/JIT/Regression/JitBlue/Runtime_45090/Runtime_45090.csproj [new file with mode: 0644]

index 5122aa5..4b68acb 100644 (file)
@@ -15,8 +15,8 @@
 #define READYTORUN_SIGNATURE 0x00525452 // 'RTR'
 
 // Keep these in sync with src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs
-#define READYTORUN_MAJOR_VERSION 0x0004
-#define READYTORUN_MINOR_VERSION 0x0002
+#define READYTORUN_MAJOR_VERSION 0x0005
+#define READYTORUN_MINOR_VERSION 0x0001
 
 #define MINIMUM_READYTORUN_MAJOR_VERSION 0x003
 
index e003ff6..cdd5bac 100644 (file)
@@ -14,8 +14,8 @@ namespace Internal.Runtime
     {
         public const uint Signature = 0x00525452; // 'RTR'
 
-        public const ushort CurrentMajorVersion = 4;
-        public const ushort CurrentMinorVersion = 2;
+        public const ushort CurrentMajorVersion = 5;
+        public const ushort CurrentMinorVersion = 1;
     }
 
 #pragma warning disable 0169
index 88ba0a6..a7e5e01 100644 (file)
@@ -20,7 +20,6 @@
 
 #include "argdestination.h"
 
-#define X86_INSTR_W_TEST_ESP            0x4485  // test [esp+N], eax
 #define X86_INSTR_TEST_ESP_SIB          0x24
 #define X86_INSTR_PUSH_0                0x6A    // push 00, entire instruction is 0x6A00
 #define X86_INSTR_PUSH_IMM              0x68    // push NNNN,
 #define X86_INSTR_NOP5_5                0x90    // 5th byte of 5-byte nop
 #define X86_INSTR_INT3                  0xCC    // int3
 #define X86_INSTR_HLT                   0xF4    // hlt
+#define X86_INSTR_PUSH_EAX              0x50    // push eax
 #define X86_INSTR_PUSH_EBP              0x55    // push ebp
 #define X86_INSTR_W_MOV_EBP_ESP         0xEC8B  // mov ebp, esp
 #define X86_INSTR_POP_ECX               0x59    // pop ecx
 #define X86_INSTR_RET                   0xC2    // ret imm16
 #define X86_INSTR_RETN                  0xC3    // ret
+#define X86_INSTR_XOR                   0x33    // xor
+#define X86_INSTR_w_TEST_ESP_EAX        0x0485  // test [esp], eax
+#define X86_INSTR_w_TEST_ESP_DWORD_OFFSET_EAX   0x8485      // test [esp-dwOffset], eax
 #define X86_INSTR_w_LEA_ESP_EBP_BYTE_OFFSET     0x658d      // lea esp, [ebp-bOffset]
 #define X86_INSTR_w_LEA_ESP_EBP_DWORD_OFFSET    0xa58d      // lea esp, [ebp-dwOffset]
-#define X86_INSTR_JMP_NEAR_REL32     0xE9        // near jmp rel32
-#define X86_INSTR_w_JMP_FAR_IND_IMM     0x25FF        // far jmp [addr32]
+#define X86_INSTR_w_LEA_EAX_ESP_BYTE_OFFSET     0x448d      // lea eax, [esp-bOffset]
+#define X86_INSTR_w_LEA_EAX_ESP_DWORD_OFFSET    0x848d      // lea eax, [esp-dwOffset]
+#define X86_INSTR_JMP_NEAR_REL32        0xE9    // near jmp rel32
+#define X86_INSTR_w_JMP_FAR_IND_IMM     0x25FF  // far jmp [addr32]
 
 #ifndef USE_GC_INFO_DECODER
 
@@ -3063,6 +3068,51 @@ inline unsigned SKIP_LEA_ESP_EBP(int val, PTR_CBYTE base, unsigned offset)
     return(offset + delta);
 }
 
+inline unsigned SKIP_LEA_EAX_ESP(int val, PTR_CBYTE base, unsigned offset)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+#ifdef _DEBUG
+    WORD wOpcode = *(PTR_WORD)(base + offset);
+    if (CheckInstrWord(wOpcode, X86_INSTR_w_LEA_EAX_ESP_BYTE_OFFSET))
+    {
+        _ASSERTE(val == *(PTR_SBYTE)(base + offset + 3));
+        _ASSERTE(CAN_COMPRESS(val));
+    }
+    else
+    {
+        _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_w_LEA_EAX_ESP_DWORD_OFFSET));
+        _ASSERTE(val == *(PTR_INT32)(base + offset + 3));
+        _ASSERTE(!CAN_COMPRESS(val));
+    }
+#endif
+
+    unsigned delta = 3 + (CAN_COMPRESS(-val) ? 1 : 4);
+    return(offset + delta);
+}
+
+inline unsigned SKIP_HELPER_CALL(PTR_CBYTE base, unsigned offset)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    unsigned delta;
+
+    if (CheckInstrByte(base[offset], X86_INSTR_CALL_REL32))
+    {
+        delta = 5;
+    }
+    else
+    {
+#ifdef _DEBUG
+        WORD wOpcode = *(PTR_WORD)(base+offset);
+        _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_W_CALL_IND_IMM));
+#endif
+        delta = 6;
+    }
+
+    return(offset+delta);
+}
+
 unsigned SKIP_ALLOC_FRAME(int size, PTR_CBYTE base, unsigned offset)
 {
     CONTRACTL {
@@ -3075,48 +3125,138 @@ unsigned SKIP_ALLOC_FRAME(int size, PTR_CBYTE base, unsigned offset)
 
     if (size == sizeof(void*))
     {
-        // We do "push eax" instead of "sub esp,4"
-        return (SKIP_PUSH_REG(base, offset));
+        // JIT emits "push eax" instead of "sub esp,4"
+        return SKIP_PUSH_REG(base, offset);
     }
 
-    if (size >= (int)GetOsPageSize())
+    const int STACK_PROBE_PAGE_SIZE_BYTES = 4096;
+    const int STACK_PROBE_BOUNDARY_THRESHOLD_BYTES = 1024;
+
+    int lastProbedLocToFinalSp = size;
+
+    if (size < STACK_PROBE_PAGE_SIZE_BYTES)
+    {
+        // sub esp, size
+        offset = SKIP_ARITH_REG(size, base, offset);
+    }
+    else
     {
-        if (size < int(3 * GetOsPageSize()))
+        WORD wOpcode = *(PTR_WORD)(base + offset);
+
+        if (CheckInstrWord(wOpcode, X86_INSTR_w_TEST_ESP_DWORD_OFFSET_EAX))
         {
-            // add 7 bytes for one or two TEST EAX, [ESP+GetOsPageSize()]
-            offset += (size / GetOsPageSize()) * 7;
+            // In .NET 5.0 and earlier for frames that have size smaller than 0x3000 bytes
+            // JIT emits one or two 'test eax, [esp-dwOffset]' instructions before adjusting the stack pointer.
+            _ASSERTE(size < 0x3000);
+
+            // test eax, [esp-0x1000]
+            offset += 7;
+            lastProbedLocToFinalSp -= 0x1000;
+
+            if (size >= 0x2000)
+            {
+#ifdef _DEBUG
+                wOpcode = *(PTR_WORD)(base + offset);
+                _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_w_TEST_ESP_DWORD_OFFSET_EAX));
+#endif
+                //test eax, [esp-0x2000]
+                offset += 7;
+                lastProbedLocToFinalSp -= 0x1000;
+            }
+
+            // sub esp, size
+            offset = SKIP_ARITH_REG(size, base, offset);
         }
         else
         {
-            //      xor eax, eax                2
-            //      [nop]                       0-3
-            // loop:
-            //      test [esp + eax], eax       3
-            //      sub eax, 0x1000             5
-            //      cmp EAX, -size              5
-            //      jge loop                    2
-            offset += 2;
-
-            // NGEN images that support rejit may have extra nops we need to skip over
-            while (offset < 5)
+            bool pushedStubParam = false;
+
+            if (CheckInstrByte(base[offset], X86_INSTR_PUSH_EAX))
+            {
+                // push eax
+                offset = SKIP_PUSH_REG(base, offset);
+                pushedStubParam = true;
+            }
+
+            if (CheckInstrByte(base[offset], X86_INSTR_XOR))
+            {
+                // In .NET Core 3.1 and earlier for frames that have size greater than or equal to 0x3000 bytes
+                // JIT emits the following loop.
+                _ASSERTE(size >= 0x3000);
+
+                offset += 2;
+                //      xor eax, eax                2
+                //      [nop]                       0-3
+                // loop:
+                //      test [esp + eax], eax       3
+                //      sub eax, 0x1000             5
+                //      cmp eax, -size              5
+                //      jge loop                    2
+
+                // R2R images that support ReJIT may have extra nops we need to skip over.
+                while (offset < 5)
+                {
+                    if (CheckInstrByte(base[offset], X86_INSTR_NOP))
+                    {
+                        offset++;
+                    }
+                    else
+                    {
+                        break;
+                    }
+                }
+
+                offset += 15;
+
+                if (pushedStubParam)
+                {
+                    // pop eax
+                    offset = SKIP_POP_REG(base, offset);
+                }
+
+                // sub esp, size
+                return SKIP_ARITH_REG(size, base, offset);
+            }
+            else
             {
-                if (CheckInstrByte(base[offset], X86_INSTR_NOP))
+                // In .NET 5.0 and later JIT emits a call to JIT_StackProbe helper.
+
+                if (pushedStubParam)
                 {
-                    offset++;
+                    // lea eax, [esp-size+4]
+                    offset = SKIP_LEA_EAX_ESP(-size + 4, base, offset);
+                    // call JIT_StackProbe
+                    offset = SKIP_HELPER_CALL(base, offset);
+                    // pop eax
+                    offset = SKIP_POP_REG(base, offset);
+                    // sub esp, size
+                    return SKIP_ARITH_REG(size, base, offset);
                 }
                 else
                 {
-                    break;
+                    // lea eax, [esp-size]
+                    offset = SKIP_LEA_EAX_ESP(-size, base, offset);
+                    // call JIT_StackProbe
+                    offset = SKIP_HELPER_CALL(base, offset);
+                    // mov esp, eax
+                    return SKIP_MOV_REG_REG(base, offset);
                 }
             }
-            offset += 15;
         }
     }
 
-    // sub ESP, size
-    return (SKIP_ARITH_REG(size, base, offset));
-}
+    if (lastProbedLocToFinalSp + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > STACK_PROBE_PAGE_SIZE_BYTES)
+    {
+#ifdef _DEBUG
+        WORD wOpcode = *(PTR_WORD)(base + offset);
+        _ASSERTE(CheckInstrWord(wOpcode, X86_INSTR_w_TEST_ESP_EAX));
+#endif
+        // test [esp], eax
+        offset += 3;
+    }
 
+    return offset;
+}
 
 #endif // !USE_GC_INFO_DECODER
 
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_45090/Runtime_45090.cs b/src/tests/JIT/Regression/JitBlue/Runtime_45090/Runtime_45090.cs
new file mode 100644 (file)
index 0000000..b9469ec
--- /dev/null
@@ -0,0 +1,286 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using System.Threading.Tasks;
+
+#pragma warning disable CS0649
+
+// This test is to validate that SKIP_ALLOC_FRAME() procedure used by UnwindEspFrameProlog() in eetwain.cpp
+// functions correctly on win-x86. The platform doesn't have unwind information as other platforms and hence unwinder either uses ebp chain of the stack frames
+// or, in a case of an esp-based frame, unwinds the frame "manually" by decoding instructions in a function prolog.
+//
+// In order to validate that the unwinding procedure works correctly with such frames:
+//
+// 1) We need to construct a method with esp-based frame.
+// That means that such method cannot call other methods (i.e. it must be a leaf method).
+// The method also should not be inlined into its caller.
+// However, we cannot simply decorate the method with NoInlining attribute, since the method in that case would have JIT_FLAG_FRAMED set by VM and will have ebp-based frame.
+// Therefore, in order to prevent such method from inlining it can be declared as virtual.
+//
+// 2) There are a number of different options for the JIT to allocate a frame of a method. Such frame may or may not require stack probing.
+// All of these options need to be tested.
+// This include testing that runtime is back compatible and can run (and unwind) R2R code generated by earlier versions of the JIT.
+
+namespace Runtime_45090
+{
+    struct _4
+    {
+        public byte _0;
+        public byte _1;
+        public byte _2;
+        public byte _3;
+    }
+
+    struct _10
+    {
+        public long _0;
+        public long _8;
+    }
+
+    struct _100
+    {
+        public _10 _0;
+        public _10 _1;
+        public _10 _2;
+        public _10 _3;
+        public _10 _4;
+        public _10 _5;
+        public _10 _6;
+        public _10 _7;
+        public _10 _8;
+        public _10 _9;
+        public _10 _a;
+        public _10 _b;
+        public _10 _c;
+        public _10 _d;
+        public _10 _e;
+        public _10 _f;
+    }
+
+    struct _e00
+    {
+        public _100 _0;
+        public _100 _1;
+        public _100 _2;
+        public _100 _3;
+        public _100 _4;
+        public _100 _5;
+        public _100 _6;
+        public _100 _7;
+        public _100 _8;
+        public _100 _9;
+        public _100 _a;
+        public _100 _b;
+        public _100 _c;
+        public _100 _d;
+    }
+
+    struct _1000
+    {
+        public _100 _0;
+        public _100 _1;
+        public _100 _2;
+        public _100 _3;
+        public _100 _4;
+        public _100 _5;
+        public _100 _6;
+        public _100 _7;
+        public _100 _8;
+        public _100 _9;
+        public _100 _a;
+        public _100 _b;
+        public _100 _c;
+        public _100 _d;
+        public _100 _e;
+        public _100 _f;
+    }
+
+    struct _1e00
+    {
+        public _1000 _0;
+        public _e00  _1;
+    }
+
+    struct _2000
+    {
+        public _1000 _0;
+        public _1000 _1;
+    }
+
+    struct _2e00
+    {
+        public _1000 _0;
+        public _1000 _1;
+        public _e00  _2;
+    }
+
+    struct _3000
+    {
+        public _1000 _0;
+        public _1000 _1;
+        public _1000 _2;
+    }
+
+    abstract class AllocFrame
+    {
+        public abstract int VirtMethodEspBasedFrame();
+    }
+
+    class PushReg : AllocFrame
+    {
+        // Frame size is 4 bytes
+        //   push eax
+        public unsafe override int VirtMethodEspBasedFrame()
+        {
+            _4 tmp = new _4();
+            *(int*)(&tmp) = 45090;
+            return (int)tmp._1;
+        }
+    }
+
+    class SubSp : AllocFrame
+    {
+        // Frame size is such that it doesn't require probing
+        //     sub esp, #frameSize
+        public override int VirtMethodEspBasedFrame()
+        {
+            _100 tmp = new _100();
+            return (int)tmp._0._0;
+        }
+    }
+
+    class ProbeAfterSubSp : AllocFrame
+    {
+        // Frame size is smaller than 0x1000 bytes, but needs probing **after** allocation
+        //     sub esp, #frameSize
+        //     test eax, [esp]
+        public override int VirtMethodEspBasedFrame()
+        {
+            _e00 tmp = new _e00();
+            return (int)tmp._0._0._0;
+        }
+    }
+
+    class ProbeBeforeSubSp1 : AllocFrame
+    {
+        // Frame size is in range [0x1000, 0x2000) bytes
+        // Since 6.0
+        //     lea eax, [esp-#frameSize]
+        //     call JIT_StackProbe
+        //     mov esp, eax
+        // Before 6.0
+        //     test [esp-0x1000], eax
+        //     sub esp, #frameSize
+        public override int VirtMethodEspBasedFrame()
+        {
+            _1000 tmp = new _1000();
+            return (int)tmp._0._0._0;
+        }
+    }
+
+    class ProbeBeforeSubSp2 : AllocFrame
+    {
+        // Frame size is in range [0x1000, 0x2000) bytes and needs additional probing **after** allocation
+        // Since 6.0
+        //     lea eax, [esp-#frameSize]
+        //     call JIT_StackProbe
+        //     mov esp, eax
+        // Before 6.0
+        //     test [esp-0x1000], eax
+        //     sub esp, #frameSize
+        //     test [esp], eax
+        public override int VirtMethodEspBasedFrame()
+        {
+            _1e00 tmp = new _1e00();
+            return (int)tmp._0._0._0._0;
+        }
+    }
+
+    class ProbeBeforeSubSp3 : AllocFrame
+    {
+        // Frame size is in range [0x2000, 0x3000) bytes
+        // Since 6.0
+        //     lea eax, [esp-#frameSize]
+        //     call JIT_StackProbe
+        //     mov esp, eax
+        // Before 6.0
+        //     test [esp-0x1000], eax
+        //     test [esp-0x2000], eax
+        //     sub esp, #frameSize
+        public override int VirtMethodEspBasedFrame()
+        {
+            _2000 tmp = new _2000();
+            return (int)tmp._0._0._0._0;
+        }
+    }
+
+    class ProbeBeforeSubSp4 : AllocFrame
+    {
+        // Frame size is in range [0x2000, 0x3000) bytes and needs additional probing **after** allocation
+        //
+        // Since 6.0
+        //     lea eax, [esp-#frameSize]
+        //     call JIT_StackProbe
+        //     mov esp, eax
+        // Before 6.0
+        //     test [esp-0x1000], eax
+        //     test [esp-0x2000], eax
+        //     sub esp, #frameSize
+        //     test [esp], eax
+        public override int VirtMethodEspBasedFrame()
+        {
+            _2e00 tmp = new _2e00();
+            return (int)tmp._0._0._0._0;
+        }
+    }
+
+    class ProbeBeforeSubSp5 : AllocFrame
+    {
+        // Frame size is in range [0x3000, 0x3000) bytes
+        //
+        // Since 5.0
+        //     lea eax, [esp-#frameSize]
+        //     call JIT_StackProbe
+        //     mov esp, eax
+        // Before 5.0
+        //     xor eax, eax
+        //     [nop]
+        //   loop:
+        //     test [esp + eax], eax
+        //     sub eax, 0x1000
+        //     cmp eax, -#frameSize
+        //     jge loop
+        //     sub esp, #frameSize
+        public override int VirtMethodEspBasedFrame()
+        {
+            _3000 tmp = new _3000();
+            return (int)tmp._0._0._0._0;
+        }
+    }
+
+    class Program
+    {
+        [MethodImpl(MethodImplOptions.NoInlining)]
+        static void TestSkipAllocFrame(AllocFrame scenario)
+        {
+            scenario.VirtMethodEspBasedFrame();
+        }
+
+        static int Main(string[] args)
+        {
+            TestSkipAllocFrame(new PushReg());
+            TestSkipAllocFrame(new SubSp());
+            TestSkipAllocFrame(new ProbeAfterSubSp());
+            TestSkipAllocFrame(new ProbeBeforeSubSp1());
+            TestSkipAllocFrame(new ProbeBeforeSubSp2());
+            TestSkipAllocFrame(new ProbeBeforeSubSp3());
+            TestSkipAllocFrame(new ProbeBeforeSubSp4());
+            TestSkipAllocFrame(new ProbeBeforeSubSp5());
+
+            return 100;
+        }
+    }
+}
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_45090/Runtime_45090.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_45090/Runtime_45090.csproj
new file mode 100644 (file)
index 0000000..1253dec
--- /dev/null
@@ -0,0 +1,15 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+    <AllowUnsafeBlocks>True</AllowUnsafeBlocks>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_GCStress=0xC
+]]></CLRTestBatchPreCommands>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+</Project>