From: Bruce Forstall Date: Fri, 12 Apr 2019 23:54:55 +0000 (-0700) Subject: Fix x86 stack probing (#23881) X-Git-Tag: accepted/tizen/unified/20190813.215958~46^2~101 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d5865236e7898b730de28a7a6f034e975bb7282e;p=platform%2Fupstream%2Fcoreclr.git Fix x86 stack probing (#23881) * Fix x86 stack probing On x86, structs are passed by value on the stack. We copy structs to the stack in various ways, but one way is to first subtract the size of the struct and then use a "rep movsb" instruction. If the struct we are passing is sufficiently large, this can cause us to miss the stack guard page. So, introduce stack probes for these struct copies. It turns out the stack pointer after prolog probing can be sitting near the very end of the guard page (one `STACK_ALIGN` slot before the end, which allows a "call" instruction which pushes its return address to touch the guard page with the return address push). We don't want to probe with every argument push, though. So change the prolog probing to insert an "extra" touch at the final SP location if the previous touch was "too far" away, leaving at least some buffer zone for un-probed SP adjustments. I chose this to be the size of the largest SIMD register, which also can get copied to the argument stack with a "SUB;MOV" sequence. Added several test case variations showing different large stack probe situations. Fixes #23796 * Increase the argument size probe buffer * Formatting --- diff --git a/src/jit/codegen.h b/src/jit/codegen.h index d490057..cde0fad 100644 --- a/src/jit/codegen.h +++ b/src/jit/codegen.h @@ -1211,6 +1211,14 @@ protected: void genReturn(GenTree* treeNode); +#if defined(_TARGET_XARCH_) + void genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, bool hideSpChangeFromEmitter, regNumber regTmp); + void genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, + bool hideSpChangeFromEmitter, + regNumber regTmp); + void genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, regNumber regTmp); +#endif // defined(_TARGET_XARCH_) + void genLclHeap(GenTree* tree); bool genIsRegCandidateLocal(GenTree* tree) diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp index 9661d43..c2e1c74 100644 --- a/src/jit/codegenlinear.cpp +++ b/src/jit/codegenlinear.cpp @@ -1646,7 +1646,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArg unsigned thisFieldOffset = argOffset + fieldListPtr->gtFieldOffset; getEmitter()->emitIns_S_R(ins_Store(type), attr, reg, outArgVarNum, thisFieldOffset); - // We can't write beyound the arg area + // We can't write beyond the arg area assert((thisFieldOffset + EA_SIZE_IN_BYTES(attr)) <= compiler->lvaLclSize(outArgVarNum)); } } diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 014011b..97d04e7 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -2171,7 +2171,8 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni return; } - const target_size_t pageSize = compiler->eeGetPageSize(); + const target_size_t pageSize = compiler->eeGetPageSize(); + target_size_t lastTouchDelta = 0; // What offset from the final SP was the last probe? if (frameSize == REGSIZE_BYTES) { @@ -2182,19 +2183,25 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni { // Frame size is (0x0008..0x1000) inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE); + lastTouchDelta = frameSize; } else if (frameSize < compiler->getVeryLargeFrameSize()) { + lastTouchDelta = frameSize; + // Frame size is (0x1000..0x3000) getEmitter()->emitIns_AR_R(INS_test, EA_PTRSIZE, REG_EAX, REG_SPBASE, -(int)pageSize); + lastTouchDelta -= pageSize; if (frameSize >= 0x2000) { getEmitter()->emitIns_AR_R(INS_test, EA_PTRSIZE, REG_EAX, REG_SPBASE, -2 * (int)pageSize); + lastTouchDelta -= pageSize; } inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE); + assert(lastTouchDelta == frameSize % pageSize); } else { @@ -2230,6 +2237,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni // The encoding differs based on the architecture and what register is // used (namely, using RAX has a smaller encoding). // + // xor eax,eax // loop: // For x86 // test [esp + eax], eax 3 @@ -2264,7 +2272,11 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni // Branch backwards to start of loop inst_IV(INS_jge, bytesForBackwardJump); + + lastTouchDelta = frameSize % pageSize; + #else // _TARGET_UNIX_ + // Code size for each instruction. We need this because the // backward branch is hard-coded with the number of bytes to branch. // The encoding differs based on the architecture and what register is @@ -2317,6 +2329,9 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, initReg, frameSize); // restore stack pointer + + lastTouchDelta = 0; // The loop code above actually over-probes: it always probes beyond the final SP we need. + #endif // _TARGET_UNIX_ *pInitRegZeroed = false; // The initReg does not contain zero @@ -2332,7 +2347,18 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE); } + if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize) + { + // We haven't probed almost a complete page. If the next action on the stack might subtract from SP + // first, before touching the current SP, then we do one more probe at the very bottom. This can + // happen on x86, for example, when we copy an argument to the stack using a "SUB ESP; REP MOV" + // strategy. + + getEmitter()->emitIns_AR_R(INS_test, EA_PTRSIZE, REG_EAX, REG_SPBASE, 0); + } + compiler->unwindAllocStack(frameSize); + #ifdef USING_SCOPE_INFO if (!doubleAlignOrFramePointerUsed()) { @@ -2342,6 +2368,172 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni } //------------------------------------------------------------------------ +// genStackPointerConstantAdjustmentWithProbe: add a specified constant value to the stack pointer, +// and probe the stack as appropriate. Should only be called as a helper for +// genStackPointerConstantAdjustmentLoopWithProbe. +// +// Arguments: +// spDelta - the value to add to SP. Must be negative or zero. If zero, the probe happens, +// but the stack pointer doesn't move. +// hideSpChangeFromEmitter - if true, hide the SP adjustment from the emitter. This only applies to x86, +// and requires that `regTmp` be valid. +// regTmp - an available temporary register. Will be trashed. Only used on x86. +// Must be REG_NA on non-x86 platforms. +// +// Return Value: +// None. +// +void CodeGen::genStackPointerConstantAdjustmentWithProbe(ssize_t spDelta, + bool hideSpChangeFromEmitter, + regNumber regTmp) +{ + assert(spDelta < 0); + assert((target_size_t)(-spDelta) <= compiler->eeGetPageSize()); + + getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); + + if (hideSpChangeFromEmitter) + { + // For x86, some cases don't want to use "sub ESP" because we don't want the emitter to track the adjustment + // to ESP. So do the work in the count register. + // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require + // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't + // track". + assert(regTmp != REG_NA); + inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); + inst_RV_IV(INS_sub, regTmp, -spDelta, EA_PTRSIZE); + inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL); + } + else + { + assert(regTmp == REG_NA); + inst_RV_IV(INS_sub, REG_SPBASE, -spDelta, EA_PTRSIZE); + } +} + +//------------------------------------------------------------------------ +// genStackPointerConstantAdjustmentLoopWithProbe: Add a specified constant value to the stack pointer, +// and probe the stack as appropriate. Generates one probe per page, up to the total amount required. +// This will generate a sequence of probes in-line. It is required for the case where we need to expose +// (not hide) the stack level adjustment. We can't use the dynamic loop in that case, because the total +// stack adjustment would not be visible to the emitter. It would be possible to use this version for +// multiple hidden constant stack level adjustments but we don't do that currently (we use the loop +// version in genStackPointerDynamicAdjustmentWithProbe instead). +// +// Arguments: +// spDelta - the value to add to SP. Must be negative. +// hideSpChangeFromEmitter - if true, hide the SP adjustment from the emitter. This only applies to x86, +// and requires that `regTmp` be valid. +// regTmp - an available temporary register. Will be trashed. Only used on x86. +// Must be REG_NA on non-x86 platforms. +// +// Return Value: +// None. +// +void CodeGen::genStackPointerConstantAdjustmentLoopWithProbe(ssize_t spDelta, + bool hideSpChangeFromEmitter, + regNumber regTmp) +{ + assert(spDelta < 0); + + const target_size_t pageSize = compiler->eeGetPageSize(); + + ssize_t spRemainingDelta = spDelta; + do + { + ssize_t spOneDelta = -(ssize_t)min((target_size_t)-spRemainingDelta, pageSize); + genStackPointerConstantAdjustmentWithProbe(spOneDelta, hideSpChangeFromEmitter, regTmp); + spRemainingDelta -= spOneDelta; + } while (spRemainingDelta < 0); + + // What offset from the final SP was the last probe? This depends on the fact that + // genStackPointerConstantAdjustmentWithProbe() probes first, then does "SUB SP". + target_size_t lastTouchDelta = (target_size_t)(-spDelta) % pageSize; + if ((lastTouchDelta == 0) || (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)) + { + // We haven't probed almost a complete page. If lastTouchDelta==0, then spDelta was an exact + // multiple of pageSize, which means we last probed exactly one page back. Otherwise, we probed + // the page, but very far from the end. If the next action on the stack might subtract from SP + // first, before touching the current SP, then we do one more probe at the very bottom. This can + // happen on x86, for example, when we copy an argument to the stack using a "SUB ESP; REP MOV" + // strategy. + + getEmitter()->emitIns_AR_R(INS_test, EA_PTRSIZE, REG_EAX, REG_SPBASE, 0); + } +} + +//------------------------------------------------------------------------ +// genStackPointerDynamicAdjustmentWithProbe: add a register value to the stack pointer, +// and probe the stack as appropriate. +// +// Note that for x86, we hide the ESP adjustment from the emitter. To do that, currently, +// requires a temporary register and extra code. +// +// Arguments: +// regSpDelta - the register value to add to SP. The value in this register must be negative. +// This register might be trashed. +// regTmp - an available temporary register. Will be trashed. Only used on x86. +// Must be REG_NA on non-x86 platforms. +// +// Return Value: +// None. +// +void CodeGen::genStackPointerDynamicAdjustmentWithProbe(regNumber regSpDelta, regNumber regTmp) +{ + assert(regSpDelta != REG_NA); + assert(regTmp != REG_NA); + + // Tickle the pages to ensure that ESP is always valid and is + // in sync with the "stack guard page". Note that in the worst + // case ESP is on the last byte of the guard page. Thus you must + // touch ESP-0 first not ESP-0x1000. + // + // Another subtlety is that you don't want ESP to be exactly on the + // boundary of the guard page because PUSH is predecrement, thus + // call setup would not touch the guard page but just beyond it. + // + // Note that we go through a few hoops so that ESP never points to + // illegal pages at any time during the tickling process + // + // add regSpDelta, ESP // reg now holds ultimate ESP + // jb loop // result is smaller than original ESP (no wrap around) + // xor regSpDelta, regSpDelta // Overflow, pick lowest possible number + // loop: + // test ESP, [ESP+0] // tickle the page + // mov regTmp, ESP + // sub regTmp, eeGetPageSize() + // mov ESP, regTmp + // cmp ESP, regSpDelta + // jae loop + // mov ESP, regSpDelta + + BasicBlock* loop = genCreateTempLabel(); + + inst_RV_RV(INS_add, regSpDelta, REG_SPBASE, TYP_I_IMPL); + inst_JMP(EJ_jb, loop); + + instGen_Set_Reg_To_Zero(EA_PTRSIZE, regSpDelta); + + genDefineTempLabel(loop); + + // Tickle the decremented value. Note that it must be done BEFORE the update of ESP since ESP might already + // be on the guard page. It is OK to leave the final value of ESP on the guard page. + getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); + + // Subtract a page from ESP. This is a trick to avoid the emitter trying to track the + // decrement of the ESP - we do the subtraction in another reg instead of adjusting ESP directly. + inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); + inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE); + inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL); + + inst_RV_RV(INS_cmp, REG_SPBASE, regSpDelta, TYP_I_IMPL); + inst_JMP(EJ_jae, loop); + + // Move the final value to ESP + inst_RV_RV(INS_mov, REG_SPBASE, regSpDelta); +} + +//------------------------------------------------------------------------ // genLclHeap: Generate code for localloc. // // Arguments: @@ -2380,8 +2572,7 @@ void CodeGen::genLclHeap(GenTree* tree) noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes noway_assert(genStackLevel == 0); // Can't have anything on the stack - unsigned stackAdjustment = 0; - BasicBlock* loop = nullptr; + unsigned stackAdjustment = 0; // compute the amount of memory to allocate to properly STACK_ALIGN. size_t amount = 0; @@ -2503,9 +2694,11 @@ void CodeGen::genLclHeap(GenTree* tree) !compiler->info.compInitMem && (amount < compiler->eeGetPageSize()); // must be < not <= #ifdef _TARGET_X86_ - bool needRegCntRegister = true; + bool needRegCntRegister = true; + bool hideSpChangeFromEmitter = true; #else // !_TARGET_X86_ - bool needRegCntRegister = !doNoInitLessThanOnePageAlloc; + bool needRegCntRegister = !doNoInitLessThanOnePageAlloc; + bool hideSpChangeFromEmitter = false; #endif // !_TARGET_X86_ if (needRegCntRegister) @@ -2529,23 +2722,9 @@ void CodeGen::genLclHeap(GenTree* tree) // Since the size is less than a page, simply adjust ESP. // ESP might already be in the guard page, so we must touch it BEFORE // the alloc, not after. - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef _TARGET_X86_ - // For x86, we don't want to use "sub ESP" because we don't want the emitter to track the adjustment - // to ESP. So do the work in the count register. - // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require - // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't - // track". - inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL); - getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); - inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE); - inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL); -#else // !_TARGET_X86_ - getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); - inst_RV_IV(INS_sub, REG_SPBASE, amount, EA_PTRSIZE); -#endif // !_TARGET_X86_ + assert(amount < compiler->eeGetPageSize()); // must be < not <= + genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)amount, hideSpChangeFromEmitter, regCnt); goto ALLOC_DONE; } @@ -2561,7 +2740,6 @@ void CodeGen::genLclHeap(GenTree* tree) genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG); } - loop = genCreateTempLabel(); if (compiler->info.compInitMem) { // At this point 'regCnt' is set to the number of loop iterations for this loop, if each @@ -2572,6 +2750,7 @@ void CodeGen::genLclHeap(GenTree* tree) assert(genIsValidIntReg(regCnt)); // Loop: + BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); static_assert_no_msg((STACK_ALIGN % REGSIZE_BYTES) == 0); @@ -2590,62 +2769,12 @@ void CodeGen::genLclHeap(GenTree* tree) else { // At this point 'regCnt' is set to the total number of bytes to localloc. - // - // We don't need to zero out the allocated memory. However, we do have - // to tickle the pages to ensure that ESP is always valid and is - // in sync with the "stack guard page". Note that in the worst - // case ESP is on the last byte of the guard page. Thus you must - // touch ESP+0 first not ESP+x01000. - // - // Another subtlety is that you don't want ESP to be exactly on the - // boundary of the guard page because PUSH is predecrement, thus - // call setup would not touch the guard page but just beyond it - // - // Note that we go through a few hoops so that ESP never points to - // illegal pages at any time during the tickling process - // - // neg REGCNT - // add REGCNT, ESP // reg now holds ultimate ESP - // jb loop // result is smaller than orignial ESP (no wrap around) - // xor REGCNT, REGCNT, // Overflow, pick lowest possible number - // loop: - // test ESP, [ESP+0] // tickle the page - // mov REGTMP, ESP - // sub REGTMP, eeGetPageSize() - // mov ESP, REGTMP - // cmp ESP, REGCNT - // jae loop - // - // mov ESP, REG - // end: - inst_RV(INS_NEG, regCnt, TYP_I_IMPL); - inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL); - inst_JMP(EJ_jb, loop); - - instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt); - - genDefineTempLabel(loop); - - // Tickle the decremented value, and move back to ESP, - // note that it has to be done BEFORE the update of ESP since - // ESP might already be on the guard page. It is OK to leave - // the final value of ESP on the guard page - getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0); + // Negate this value before calling the function to adjust the stack (which + // adds to ESP). - // This is a harmless trick to avoid the emitter trying to track the - // decrement of the ESP - we do the subtraction in another reg instead - // of adjusting ESP directly. + inst_RV(INS_NEG, regCnt, TYP_I_IMPL); regNumber regTmp = tree->GetSingleTempReg(); - - inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL); - inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE); - inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL); - - inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL); - inst_JMP(EJ_jae, loop); - - // Move the final value to ESP - inst_RV_RV(INS_mov, REG_SPBASE, regCnt); + genStackPointerDynamicAdjustmentWithProbe(regCnt, regTmp); } ALLOC_DONE: @@ -7508,7 +7637,27 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk) else { m_pushStkArg = false; - inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE); + + // If argSize is large, we need to probe the stack like we do in the prolog (genAllocLclFrame) + // or for localloc (genLclHeap), to ensure we touch the stack pages sequentially, and don't miss + // the stack guard pages. The prolog probes, but we don't know at this point how much higher + // the last probed stack pointer value is. We default a threshold. Any size below this threshold + // we are guaranteed the stack has been probed. Above this threshold, we don't know. The threshold + // should be high enough to cover all common cases. Increasing the threshold means adding a few + // more "lowest address of stack" probes in the prolog. Since this is relatively rare, add it to + // stress modes. + + if ((argSize >= ARG_STACK_PROBE_THRESHOLD_BYTES) || + compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5)) + { + genStackPointerConstantAdjustmentLoopWithProbe(-(ssize_t)argSize, /* hideSpChangeFromEmitter */ false, + REG_NA); + } + else + { + inst_RV_IV(INS_sub, REG_SPBASE, argSize, EA_PTRSIZE); + } + AddStackLevel(argSize); return true; } diff --git a/src/jit/target.h b/src/jit/target.h index 0dd37e7..97df447 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -488,6 +488,15 @@ typedef unsigned char regNumberSmall; #define INS_stosp INS_stosd #define INS_r_stosp INS_r_stosd + // Any stack pointer adjustments larger than this (in bytes) when setting up outgoing call arguments + // requires a stack probe. Set it large enough so all normal stack arguments don't get a probe. + #define ARG_STACK_PROBE_THRESHOLD_BYTES 1024 + + // The number of bytes from the end the last probed page that must also be probed, to allow for some + // small SP adjustments without probes. If zero, then the stack pointer can point to the last byte/word + // on the stack guard page, and must be touched before any further "SUB SP". + #define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES ARG_STACK_PROBE_THRESHOLD_BYTES + #elif defined(_TARGET_AMD64_) // TODO-AMD64-CQ: Fine tune the following xxBlk threshold values: @@ -889,6 +898,9 @@ typedef unsigned char regNumberSmall; #define INS_stosp INS_stosq #define INS_r_stosp INS_r_stosq + // AMD64 uses FEATURE_FIXED_OUT_ARGS so this can be zero. + #define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES 0 + #elif defined(_TARGET_ARM_) // TODO-ARM-CQ: Use shift for division by power of 2 diff --git a/tests/src/JIT/Methodical/largeframes/skip3/skippage3.cs b/tests/src/JIT/Methodical/largeframes/skip3/skippage3.cs new file mode 100644 index 0000000..0b3b745 --- /dev/null +++ b/tests/src/JIT/Methodical/largeframes/skip3/skippage3.cs @@ -0,0 +1,88 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Passing a very large struct by value on the stack, on arm32 and x86, +// can cause it to be copied from a temp to the outgoing space without +// probing the stack. + +using System; +using System.Runtime.InteropServices; +using System.Runtime.CompilerServices; + +namespace BigFrames +{ + + [StructLayout(LayoutKind.Explicit)] + public struct LargeStruct + { + [FieldOffset(0)] + public int i1; + [FieldOffset(65512)] + public int i2; + } + + public class Test + { + public static int iret = 1; + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static void TestWrite(int i1, int i2, int i3, int i4, LargeStruct s) + { + Console.Write("Enter TestWrite: "); + Console.WriteLine(i1 + i2 + i3 + i4 + s.i2); + iret = 100; + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static void Test1() + { + Console.WriteLine("Enter Test1"); + LargeStruct s = new LargeStruct(); + s.i2 = 5; + TestWrite(1, 2, 3, 4, s); // 4 int reg args, then struct stack arg + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static void Escape(ref LargeStruct s) + { + } + + // A lot of time the stack when we are called has a bunch of committed pages + // before the guard page. So eat up a bunch of stack before doing our test, + // where we want to be near the guard page. + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static void EatStackThenTest1(int level = 0) + { + LargeStruct s = new LargeStruct(); + s.i2 = level; + Escape(ref s); + + if (level < 20) + { + EatStackThenTest1(level + 1); + } + else + { + Test1(); + } + } + + public static int Main() + { + Test1(); // force JIT of this + + EatStackThenTest1(); // If that didn't fail, eat stack then try again. + + if (iret == 100) + { + Console.WriteLine("TEST PASSED"); + } + else + { + Console.WriteLine("TEST FAILED"); + } + return iret; + } + } +} diff --git a/tests/src/JIT/Methodical/largeframes/skip3/skippage3.csproj b/tests/src/JIT/Methodical/largeframes/skip3/skippage3.csproj new file mode 100644 index 0000000..1b7f680 --- /dev/null +++ b/tests/src/JIT/Methodical/largeframes/skip3/skippage3.csproj @@ -0,0 +1,35 @@ + + + + + Debug + AnyCPU + $(MSBuildProjectName) + 2.0 + {43F24741-6FD9-4593-92FA-D3252B540A92} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + 1 + + + + + + + False + + + + PdbOnly + True + + + + + + + + + + diff --git a/tests/src/JIT/Methodical/largeframes/skip4/skippage4.cs b/tests/src/JIT/Methodical/largeframes/skip4/skippage4.cs new file mode 100644 index 0000000..a3c0651 --- /dev/null +++ b/tests/src/JIT/Methodical/largeframes/skip4/skippage4.cs @@ -0,0 +1,61 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Passing a very large struct by value on the stack, on arm32 and x86, +// can cause it to be copied from a temp to the outgoing space without +// probing the stack. + +using System; +using System.Runtime.InteropServices; +using System.Runtime.CompilerServices; + +namespace BigFrames +{ + + [StructLayout(LayoutKind.Explicit)] + public struct LargeStructWithRef + { + [FieldOffset(0)] + public int i1; + [FieldOffset(65500)] + public Object o1; + } + + public class Test + { + public static int iret = 1; + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static void TestWrite(LargeStructWithRef s) + { + Console.Write("Enter TestWrite: "); + Console.WriteLine(s.o1.GetHashCode()); + iret = 100; + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static void Test1() + { + Console.WriteLine("Enter Test1"); + LargeStructWithRef s = new LargeStructWithRef(); + s.o1 = new Object(); + TestWrite(s); + } + + public static int Main() + { + Test1(); + + if (iret == 100) + { + Console.WriteLine("TEST PASSED"); + } + else + { + Console.WriteLine("TEST FAILED"); + } + return iret; + } + } +} diff --git a/tests/src/JIT/Methodical/largeframes/skip4/skippage4.csproj b/tests/src/JIT/Methodical/largeframes/skip4/skippage4.csproj new file mode 100644 index 0000000..1482b3e --- /dev/null +++ b/tests/src/JIT/Methodical/largeframes/skip4/skippage4.csproj @@ -0,0 +1,35 @@ + + + + + Debug + AnyCPU + $(MSBuildProjectName) + 2.0 + {43F24741-6FD9-4593-92FA-D3252B540A92} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + 1 + + + + + + + False + + + + PdbOnly + True + + + + + + + + + + diff --git a/tests/src/JIT/Methodical/largeframes/skip4/skippage4_save.cs b/tests/src/JIT/Methodical/largeframes/skip4/skippage4_save.cs new file mode 100644 index 0000000..95fe0ed --- /dev/null +++ b/tests/src/JIT/Methodical/largeframes/skip4/skippage4_save.cs @@ -0,0 +1,62 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// Passing a very large struct by value on the stack, on arm32 and x86, +// can cause it to be copied from a temp to the outgoing space without +// probing the stack. + +using System; +using System.Runtime.InteropServices; +using System.Runtime.CompilerServices; + +namespace BigFrames +{ + + [StructLayout(LayoutKind.Explicit)] + public struct Struct65500ref + { + [FieldOffset(0)] + public int i1; + [FieldOffset(65496)] + public Object o1; + } + + public class Test + { + public static int iret = 1; + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static void TestWrite(int i1, int i2, int i3, int i4, Struct65500ref s) + { + Console.Write("Enter TestWrite: "); + Console.WriteLine(i1 + i2 + i3 + i4 + s.o1.GetHashCode()); + iret = 100; + // Test1(); // recurse + } + + [MethodImplAttribute(MethodImplOptions.NoInlining)] + public static void Test1() + { + Console.WriteLine("Enter Test1"); + Struct65500ref s = new Struct65500ref(); + s.o1 = new Object(); + TestWrite(1, 2, 3, 4, s); // 4 int reg args, then struct stack arg + } + + public static int Main() + { + Test1(); + + if (iret == 100) + { + Console.WriteLine("TEST PASSED"); + } + else + { + Console.WriteLine("TEST FAILED"); + } + return iret; + } + } +} diff --git a/tests/src/JIT/Methodical/largeframes/skip5/skippage5.cs b/tests/src/JIT/Methodical/largeframes/skip5/skippage5.cs new file mode 100644 index 0000000..4d10adb --- /dev/null +++ b/tests/src/JIT/Methodical/largeframes/skip5/skippage5.cs @@ -0,0 +1,36 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +class Program +{ + [StructLayout(LayoutKind.Sequential)] + unsafe struct S + { + fixed byte x[65500]; + } + + class C + { + public S s; + } + + static int Main() => Test(new C()); + + [MethodImpl(MethodImplOptions.NoInlining)] + static void Call(int r0, int r1, int r2, int r3, int r4, int r5, int r6, S s) + { + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static int Test(C c) + { + Call(0, 1, 2, 3, 4, 5, 42, c.s); + Console.WriteLine("TEST PASSED"); + return 100; // If we don't crash, we pass + } +} diff --git a/tests/src/JIT/Methodical/largeframes/skip5/skippage5.csproj b/tests/src/JIT/Methodical/largeframes/skip5/skippage5.csproj new file mode 100644 index 0000000..3e46785 --- /dev/null +++ b/tests/src/JIT/Methodical/largeframes/skip5/skippage5.csproj @@ -0,0 +1,36 @@ + + + + + Debug + AnyCPU + $(MSBuildProjectName) + 2.0 + {43F24741-6FD9-4593-92FA-D3252B540A92} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + 1 + true + + + + + + + False + + + + PdbOnly + True + + + + + + + + + +