From: Bruce Forstall Date: Tue, 21 Mar 2017 22:27:07 +0000 (-0700) Subject: Fix Linux/x86 call alignment calculation and insertion (dotnet/coreclr#10266) X-Git-Tag: submit/tizen/20210909.063632~11030^2~7623 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=03e0c746aa86adbf2ccf40d3ab3630dd4de55c1e;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Fix Linux/x86 call alignment calculation and insertion (dotnet/coreclr#10266) * Fix Linux/x86 call alignment calculation and insertion The existing system of calculating per-call alignment adjustments did not consider the possibility of nested calls. On x86, some arguments for a call might be pushed on the stack before a nested call's arguments start being pushed. Thus, a call can't consider only its own arguments when determining the stack level. Instead, use the existing genStackLevel variable, updated dynamically during code generation, to determine the baseline stack alignment when a call first needs to push something on the stack, or if it has no stack arguments, whether it needs to align the stack before a call. One wrinkle here is that the fgAddCodeRef() function for adding throw blocks for array bounds checks, and other similar helpers, need to know the stack level on entry, and this stack level is computed during argument morphing. There is a tough phase ordering problem here. So, we bail out and force an EBP frame if there are any such throw helpers in the function. When using an EBP frame, the helper block doesn't need to know the stack level -- it is only needed for ESP frames, needed for unwinding. Note that this bail out already existed if the same helper needed multiple different stack levels on entry. We could do slightly better without too much work by not bailing out for top-level calls with no stack arguments. * Track max alignment added for nested calls during codegen This information is needed by Linux/x86 for an assert about maximum emitter stack depth. * Make max stack align tracking available in release as well The assert these are used in is a noway_assert. * Formatting Commit migrated from https://github.com/dotnet/coreclr/commit/68012bf209f37ab5b5b11a02b203f2949d1ef8dc --- diff --git a/src/coreclr/src/jit/codegencommon.cpp b/src/coreclr/src/jit/codegencommon.cpp index e6482ff..06da545 100644 --- a/src/coreclr/src/jit/codegencommon.cpp +++ b/src/coreclr/src/jit/codegencommon.cpp @@ -107,6 +107,11 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) m_stkArgVarNum = BAD_VAR_NUM; #endif +#if defined(UNIX_X86_ABI) + curNestedAlignment = 0; + maxNestedAlignment = 0; +#endif + regTracker.rsTrackInit(compiler, ®Set); gcInfo.regSet = ®Set; m_cgEmitter = new (compiler->getAllocator()) emitter(); @@ -3201,7 +3206,7 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode) genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc (compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args #if defined(UNIX_X86_ABI) - maxAllowedStackDepth += genTypeStSz(TYP_INT) * 3; // stack align for x86 - allow up to 3 INT's for padding + maxAllowedStackDepth += maxNestedAlignment; #endif noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth); } diff --git a/src/coreclr/src/jit/codegenlinear.h b/src/coreclr/src/jit/codegenlinear.h index 154d484..9fa5bdb 100644 --- a/src/coreclr/src/jit/codegenlinear.h +++ b/src/coreclr/src/jit/codegenlinear.h @@ -174,6 +174,44 @@ void genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode); void genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode); +void genAlignStackBeforeCall(GenTreePutArgStk* putArgStk); +void genAlignStackBeforeCall(GenTreeCall* call); +void genRemoveAlignmentAfterCall(GenTreeCall* call); + +#if defined(UNIX_X86_ABI) + +unsigned curNestedAlignment; // Keep track of alignment adjustment required during codegen. +unsigned maxNestedAlignment; // The maximum amount of alignment adjustment required. + +void SubtractNestedAlignment(unsigned adjustment) +{ + assert(curNestedAlignment >= adjustment); + unsigned newNestedAlignment = curNestedAlignment - adjustment; + if (curNestedAlignment != newNestedAlignment) + { + JITDUMP("Adjusting stack nested alignment from %d to %d\n", curNestedAlignment, newNestedAlignment); + } + curNestedAlignment = newNestedAlignment; +} + +void AddNestedAlignment(unsigned adjustment) +{ + unsigned newNestedAlignment = curNestedAlignment + adjustment; + if (curNestedAlignment != newNestedAlignment) + { + JITDUMP("Adjusting stack nested alignment from %d to %d\n", curNestedAlignment, newNestedAlignment); + } + curNestedAlignment = newNestedAlignment; + + if (curNestedAlignment > maxNestedAlignment) + { + JITDUMP("Max stack nested alignment changed from %d to %d\n", maxNestedAlignment, curNestedAlignment); + maxNestedAlignment = curNestedAlignment; + } +} + +#endif + #ifdef FEATURE_PUT_STRUCT_ARG_STK #ifdef _TARGET_X86_ bool genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk); diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index 9d33671..2549095 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -4752,6 +4752,8 @@ bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarri // Produce code for a GT_CALL node void CodeGen::genCallInstruction(GenTreeCall* call) { + genAlignStackBeforeCall(call); + gtCallTypes callType = (gtCallTypes)call->gtCallType; IL_OFFSETX ilOffset = BAD_IL_OFFSET; @@ -5173,14 +5175,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // clang-format on } -#if defined(UNIX_X86_ABI) - // Put back the stack pointer if there was any padding for stack alignment - unsigned padStackAlign = call->fgArgInfo->GetPadStackAlign(); - if (padStackAlign != 0) - { - inst_RV_IV(INS_add, REG_SPBASE, padStackAlign * TARGET_POINTER_SIZE, EA_PTRSIZE); - } -#endif // UNIX_X86_ABI + genRemoveAlignmentAfterCall(call); // if it was a pinvoke we may have needed to get the address of a label if (genPendingCallLabel) @@ -7502,7 +7497,80 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode) return baseVarNum; } +//--------------------------------------------------------------------- +// genAlignStackBeforeCall: Align the stack if necessary before a call. +// +// Arguments: +// putArgStk - the putArgStk node. +// +void CodeGen::genAlignStackBeforeCall(GenTreePutArgStk* putArgStk) +{ +#if defined(UNIX_X86_ABI) + + genAlignStackBeforeCall(putArgStk->gtCall); + +#endif // UNIX_X86_ABI +} + +//--------------------------------------------------------------------- +// genAlignStackBeforeCall: Align the stack if necessary before a call. +// +// Arguments: +// call - the call node. +// +void CodeGen::genAlignStackBeforeCall(GenTreeCall* call) +{ +#if defined(UNIX_X86_ABI) + + // Have we aligned the stack yet? + if (!call->fgArgInfo->IsStkAlignmentDone()) + { + // We haven't done any stack alignment yet for this call. We might need to create + // an alignment adjustment, even if this function itself doesn't have any stack args. + // This can happen if this function call is part of a nested call sequence, and the outer + // call has already pushed some arguments. + + unsigned stkLevel = genStackLevel + call->fgArgInfo->GetStkSizeBytes(); + call->fgArgInfo->ComputeStackAlignment(stkLevel); + + unsigned padStkAlign = call->fgArgInfo->GetStkAlign(); + if (padStkAlign != 0) + { + // Now generate the alignment + inst_RV_IV(INS_sub, REG_SPBASE, padStkAlign, EA_PTRSIZE); + AddStackLevel(padStkAlign); + AddNestedAlignment(padStkAlign); + } + + call->fgArgInfo->SetStkAlignmentDone(); + } + +#endif // UNIX_X86_ABI +} + +//--------------------------------------------------------------------- +// genRemoveAlignmentAfterCall: After a call, remove the alignment +// added before the call, if any. +// +// Arguments: +// call - the call node. +// +void CodeGen::genRemoveAlignmentAfterCall(GenTreeCall* call) +{ +#if defined(UNIX_X86_ABI) + // Put back the stack pointer if there was any padding for stack alignment + unsigned padStkAlign = call->fgArgInfo->GetStkAlign(); + if (padStkAlign != 0) + { + inst_RV_IV(INS_add, REG_SPBASE, padStkAlign, EA_PTRSIZE); + SubtractStackLevel(padStkAlign); + SubtractNestedAlignment(padStkAlign); + } +#endif // UNIX_X86_ABI +} + #ifdef _TARGET_X86_ + //--------------------------------------------------------------------- // genAdjustStackForPutArgStk: // adjust the stack pointer for a putArgStk node if necessary. @@ -7754,14 +7822,14 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk) } else { -#if defined(_TARGET_X86_) && defined(FEATURE_SIMD) +#if defined(FEATURE_SIMD) if (fieldType == TYP_SIMD12) { assert(genIsValidFloatReg(simdTmpReg)); genStoreSIMD12ToStack(argReg, simdTmpReg); } else -#endif // defined(_TARGET_X86_) && defined(FEATURE_SIMD) +#endif // defined(FEATURE_SIMD) { genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset); } @@ -7799,15 +7867,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk) #ifdef _TARGET_X86_ -#if defined(UNIX_X86_ABI) - // For each call, first stack argument has the padding for alignment - // if this value is not zero, use it to adjust the ESP - unsigned argPadding = putArgStk->getArgPadding(); - if (argPadding != 0) - { - inst_RV_IV(INS_sub, REG_SPBASE, argPadding * TARGET_POINTER_SIZE, EA_PTRSIZE); - } -#endif + genAlignStackBeforeCall(putArgStk); if (varTypeIsStruct(targetType)) { diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 810c404..3099c44 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -1194,11 +1194,6 @@ struct fgArgTabEntry unsigned alignment; // 1 or 2 (slots/registers) unsigned lateArgInx; // index into gtCallLateArgs list unsigned tmpNum; // the LclVar number if we had to force evaluation of this arg -#if defined(UNIX_X86_ABI) - unsigned padStkAlign; // Count of number of padding slots for stack alignment. For each Call, only the first - // argument may have a value to emit "sub esp, n" to adjust the stack before pushing - // the argument. -#endif bool isSplit : 1; // True when this argument is split between the registers and OutArg area bool needTmp : 1; // True when we force this argument's evaluation into a temp LclVar @@ -1276,9 +1271,14 @@ class fgArgInfo unsigned argCount; // Updatable arg count value unsigned nextSlotNum; // Updatable slot count value unsigned stkLevel; // Stack depth when we make this call (for x86) + #if defined(UNIX_X86_ABI) - unsigned padStkAlign; // Count of number of padding slots for stack alignment. This value is used to turn back - // stack pointer before it was adjusted after each Call + bool alignmentDone; // Updateable flag, set to 'true' after we've done any required alignment. + unsigned stkSizeBytes; // Size of stack used by this call, in bytes. Calculated during fgMorphArgs(). + unsigned padStkAlign; // Stack alignment in bytes required before arguments are pushed for this call. + // Computed dynamically during codegen, based on stkSizeBytes and the current + // stack level (genStackLevel) when the first stack adjustment is made for + // this call. #endif #if FEATURE_FIXED_OUT_ARGS @@ -1333,10 +1333,6 @@ public: void ArgsComplete(); -#if defined(UNIX_X86_ABI) - void ArgsAlignPadding(); -#endif - void SortArgs(); void EvalArgsToTemps(); @@ -1356,12 +1352,6 @@ public: { return nextSlotNum; } -#if defined(UNIX_X86_ABI) - unsigned GetPadStackAlign() - { - return padStkAlign; - } -#endif bool HasRegArgs() { return hasRegArgs; @@ -1384,6 +1374,40 @@ public: outArgSize = newVal; } #endif // FEATURE_FIXED_OUT_ARGS + + void ComputeStackAlignment(unsigned curStackLevelInBytes) + { +#if defined(UNIX_X86_ABI) + padStkAlign = AlignmentPad(curStackLevelInBytes, STACK_ALIGN); +#endif // defined(UNIX_X86_ABI) + } + + void SetStkSizeBytes(unsigned newStkSizeBytes) + { +#if defined(UNIX_X86_ABI) + stkSizeBytes = newStkSizeBytes; +#endif // defined(UNIX_X86_ABI) + } + +#if defined(UNIX_X86_ABI) + unsigned GetStkAlign() + { + return padStkAlign; + } + unsigned GetStkSizeBytes() const + { + return stkSizeBytes; + } + bool IsStkAlignmentDone() const + { + return alignmentDone; + } + void SetStkAlignmentDone() + { + alignmentDone = true; + } +#endif // defined(UNIX_X86_ABI) + // Get the late arg for arg at position argIndex. Caller must ensure this position has a late arg. GenTreePtr GetLateArg(unsigned argIndex); }; diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp index 5d4bc2b..dbaff9a 100644 --- a/src/coreclr/src/jit/flowgraph.cpp +++ b/src/coreclr/src/jit/flowgraph.cpp @@ -17583,10 +17583,28 @@ BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, Special // this restriction could be removed with more careful code // generation for BBJ_THROW (i.e. range check failed). // + // For Linux/x86, we possibly need to insert stack alignment adjustment + // before the first stack argument pushed for every call. But we + // don't know what the stack alignment adjustment will be when + // we morph a tree that calls fgAddCodeRef(), so the stack depth + // number will be incorrect. For now, simply force all functions with + // these helpers to have EBP frames. It might be possible to make + // this less conservative. E.g., for top-level (not nested) calls + // without stack args, the stack pointer hasn't changed and stack + // depth will be known to be zero. Or, figure out a way to update + // or generate all required helpers after all stack alignment + // has been added, and the stack level at each call to fgAddCodeRef() + // is known, or can be recalculated. + CLANG_FORMAT_COMMENT_ANCHOR; + +#if defined(UNIX_X86_ABI) + codeGen->setFrameRequired(true); +#else // !defined(UNIX_X86_ABI) if (add->acdStkLvl != stkDepth) { codeGen->setFrameRequired(true); } +#endif // !defined(UNIX_X86_ABI) #endif // _TARGET_X86_ return add->acdDstBlk; diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index d910ca4..2261140 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -4699,7 +4699,7 @@ struct GenTreePutArgStk : public GenTreeUnOp , gtNumberReferenceSlots(0) , gtGcPtrs(nullptr) #endif // FEATURE_PUT_STRUCT_ARG_STK -#ifdef DEBUG +#if defined(DEBUG) || defined(UNIX_X86_ABI) , gtCall(callNode) #endif { @@ -4795,7 +4795,7 @@ struct GenTreePutArgStk : public GenTreeUnOp #endif // FEATURE_PUT_STRUCT_ARG_STK -#if defined(DEBUG) +#if defined(DEBUG) || defined(UNIX_X86_ABI) GenTreeCall* gtCall; // the call node to which this argument belongs #endif diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index 05a087f..a6dd563 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -946,11 +946,6 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP GenTreePutArgStk(GT_PUTARG_STK, type, arg, info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots), call->IsFastTailCall(), call); -#if defined(UNIX_X86_ABI) - assert((info->padStkAlign > 0 && info->numSlots > 0) || (info->padStkAlign == 0)); - putArg->AsPutArgStk()->setArgPadding(info->padStkAlign); -#endif - #ifdef FEATURE_PUT_STRUCT_ARG_STK // If the ArgTabEntry indicates that this arg is a struct // get and store the number of slots that are references. diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index bc80f9c..f9cfbf5 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -858,7 +858,9 @@ fgArgInfo::fgArgInfo(Compiler* comp, GenTreeCall* call, unsigned numArgs) nextSlotNum = INIT_ARG_STACK_SLOT; stkLevel = 0; #if defined(UNIX_X86_ABI) - padStkAlign = 0; + alignmentDone = false; + stkSizeBytes = 0; + padStkAlign = 0; #endif #if FEATURE_FIXED_OUT_ARGS outArgSize = 0; @@ -902,7 +904,9 @@ fgArgInfo::fgArgInfo(GenTreeCall* newCall, GenTreeCall* oldCall) nextSlotNum = INIT_ARG_STACK_SLOT; stkLevel = oldArgInfo->stkLevel; #if defined(UNIX_X86_ABI) - padStkAlign = oldArgInfo->padStkAlign; + alignmentDone = oldArgInfo->alignmentDone; + stkSizeBytes = oldArgInfo->stkSizeBytes; + padStkAlign = oldArgInfo->padStkAlign; #endif #if FEATURE_FIXED_OUT_ARGS outArgSize = oldArgInfo->outArgSize; @@ -1086,19 +1090,16 @@ fgArgTabEntryPtr fgArgInfo::AddRegArg( { fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry; - curArgTabEntry->argNum = argNum; - curArgTabEntry->node = node; - curArgTabEntry->parent = parent; - curArgTabEntry->regNum = regNum; - curArgTabEntry->slotNum = 0; - curArgTabEntry->numRegs = numRegs; - curArgTabEntry->numSlots = 0; - curArgTabEntry->alignment = alignment; - curArgTabEntry->lateArgInx = (unsigned)-1; - curArgTabEntry->tmpNum = (unsigned)-1; -#if defined(UNIX_X86_ABI) - curArgTabEntry->padStkAlign = 0; -#endif + curArgTabEntry->argNum = argNum; + curArgTabEntry->node = node; + curArgTabEntry->parent = parent; + curArgTabEntry->regNum = regNum; + curArgTabEntry->slotNum = 0; + curArgTabEntry->numRegs = numRegs; + curArgTabEntry->numSlots = 0; + curArgTabEntry->alignment = alignment; + curArgTabEntry->lateArgInx = (unsigned)-1; + curArgTabEntry->tmpNum = (unsigned)-1; curArgTabEntry->isSplit = false; curArgTabEntry->isTmp = false; curArgTabEntry->needTmp = false; @@ -1164,19 +1165,16 @@ fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum, curArgTabEntry->isStruct = isStruct; // is this a struct arg #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) - curArgTabEntry->argNum = argNum; - curArgTabEntry->node = node; - curArgTabEntry->parent = parent; - curArgTabEntry->regNum = REG_STK; - curArgTabEntry->slotNum = nextSlotNum; - curArgTabEntry->numRegs = 0; - curArgTabEntry->numSlots = numSlots; - curArgTabEntry->alignment = alignment; - curArgTabEntry->lateArgInx = (unsigned)-1; - curArgTabEntry->tmpNum = (unsigned)-1; -#if defined(UNIX_X86_ABI) - curArgTabEntry->padStkAlign = 0; -#endif + curArgTabEntry->argNum = argNum; + curArgTabEntry->node = node; + curArgTabEntry->parent = parent; + curArgTabEntry->regNum = REG_STK; + curArgTabEntry->slotNum = nextSlotNum; + curArgTabEntry->numRegs = 0; + curArgTabEntry->numSlots = numSlots; + curArgTabEntry->alignment = alignment; + curArgTabEntry->lateArgInx = (unsigned)-1; + curArgTabEntry->tmpNum = (unsigned)-1; curArgTabEntry->isSplit = false; curArgTabEntry->isTmp = false; curArgTabEntry->needTmp = false; @@ -1702,52 +1700,6 @@ void fgArgInfo::ArgsComplete() argsComplete = true; } -#if defined(UNIX_X86_ABI) -// Get the stack alignment value for a Call holding this object -// -// NOTE: This function will calculate number of padding slots, to align the -// stack before pushing arguments to the stack. Padding value is stored in -// the first argument in fgArgTabEntry structure padStkAlign member so that -// code (sub esp, n) can be emitted before generating argument push in -// fgArgTabEntry node. As of result stack will be aligned right before -// making a "Call". After the Call, stack is re-adjusted to the value it -// was with fgArgInfo->padStkAlign value as we cann't use the one in fgArgTabEntry. -// -void fgArgInfo::ArgsAlignPadding() -{ - // To get the padding amount, sum up all the slots and get the remainder for padding - unsigned curInx; - unsigned numSlots = 0; - fgArgTabEntryPtr firstArgTabEntry = nullptr; - - for (curInx = 0; curInx < argCount; curInx++) - { - fgArgTabEntryPtr curArgTabEntry = argTable[curInx]; - if (curArgTabEntry->numSlots > 0) - { - // The argument may be REG_STK or constant or register that goes to stack - assert(nextSlotNum >= curArgTabEntry->slotNum); - - numSlots += curArgTabEntry->numSlots; - if (firstArgTabEntry == nullptr) - { - // First argument will be used to hold the padding amount - firstArgTabEntry = curArgTabEntry; - } - } - } - - if (firstArgTabEntry != nullptr) - { - const int numSlotsAligned = STACK_ALIGN / TARGET_POINTER_SIZE; - // Set stack align pad for the first argument - firstArgTabEntry->padStkAlign = AlignmentPad(numSlots, numSlotsAligned); - // Set also for fgArgInfo that will be used to reset stack pointer after the Call - this->padStkAlign = firstArgTabEntry->padStkAlign; - } -} -#endif // UNIX_X86_ABI - void fgArgInfo::SortArgs() { assert(argsComplete == true); @@ -4287,10 +4239,6 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) { call->fgArgInfo->ArgsComplete(); -#if defined(UNIX_X86_ABI) - call->fgArgInfo->ArgsAlignPadding(); -#endif // UNIX_X86_ABI - #ifdef LEGACY_BACKEND call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum); #if defined(_TARGET_ARM_) @@ -4332,6 +4280,9 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) fgPtrArgCntMax = fgPtrArgCntCur; } + assert(fgPtrArgCntCur >= genPtrArgCntSav); + call->fgArgInfo->SetStkSizeBytes((fgPtrArgCntCur - genPtrArgCntSav) * TARGET_POINTER_SIZE); + /* The call will pop all the arguments we pushed */ fgPtrArgCntCur = genPtrArgCntSav;