From a6a70630fb54b8ea215c9a469fb30938ac9335d4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 26 Jan 2021 12:08:05 -0800 Subject: [PATCH] Calculate loopSize based on post align adjusted size (#47350) * Calculate loopSize based on post align adjusted size * Review comments Remove `paddingNeeded` as we can get that value from `idCodeSie()`. --- src/coreclr/jit/emit.cpp | 77 +++++++++++++++++++++++++++++++++++-------- src/coreclr/jit/emit.h | 17 +++++----- src/coreclr/jit/emitxarch.cpp | 9 +++-- 3 files changed, 77 insertions(+), 26 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index b421116..ad044c9 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -4651,13 +4651,16 @@ bool emitter::emitEndsWithAlignInstr() // such that it doesn't exceed the maxLoopSize. // // Arguments: -// igLoopHeader - The header IG of a loop -// maxLoopSize - Maximum loop size. If the loop is bigger than this value, we will just -// return this value. +// igLoopHeader - The header IG of a loop +// maxLoopSize - Maximum loop size. If the loop is bigger than this value, we will just +// return this value. +// isAlignAdjusted - Determine if adjustments are done to the align instructions or not. +// During generating code, it is 'false' (because we haven't adjusted the size yet). +// During outputting code, it is 'true'. // // Returns: size of a loop in bytes. // -unsigned emitter::getLoopSize(insGroup* igLoopHeader, unsigned maxLoopSize) +unsigned emitter::getLoopSize(insGroup* igLoopHeader, unsigned maxLoopSize DEBUG_ARG(bool isAlignAdjusted)) { unsigned loopSize = 0; @@ -4670,9 +4673,47 @@ unsigned emitter::getLoopSize(insGroup* igLoopHeader, unsigned maxLoopSize) // of the current loop and should have backedge to current loop header. assert(igInLoop->igLoopBackEdge == igLoopHeader); - // In such cases, the current loop size should exclude the align instruction size reserved for - // next loop. - loopSize -= emitComp->opts.compJitAlignPaddingLimit; +#ifdef DEBUG + if (isAlignAdjusted) + { + // If this IG is already align adjusted, get the adjusted padding already calculated. + instrDescAlign* alignInstr = emitAlignList; + bool foundAlignInstr = false; + + // Find the alignInstr for igInLoop IG. + for (; alignInstr != nullptr; alignInstr = alignInstr->idaNext) + { + if (alignInstr->idaIG->igNum == igInLoop->igNum) + { + foundAlignInstr = true; + break; + } + } + assert(foundAlignInstr); + + unsigned adjustedPadding = 0; + if (emitComp->opts.compJitAlignLoopAdaptive) + { + adjustedPadding = alignInstr->idCodeSize(); + } + else + { + instrDescAlign *alignInstrToAdj = alignInstr, *prevAlignInstr = nullptr; + for (; alignInstrToAdj != nullptr && alignInstrToAdj->idaIG == alignInstr->idaIG; + alignInstrToAdj = alignInstrToAdj->idaNext) + { + adjustedPadding += alignInstrToAdj->idCodeSize(); + } + } + + loopSize -= adjustedPadding; + } + else +#endif + { + // The current loop size should exclude the align instruction size reserved for next loop. + loopSize -= emitComp->opts.compJitAlignPaddingLimit; + } } if ((igInLoop->igLoopBackEdge == igLoopHeader) || (loopSize > maxLoopSize)) { @@ -4843,8 +4884,10 @@ void emitter::emitLoopAlignAdjustments() alignInstr = prevAlignInstr; } - JITDUMP("Adjusted alignment of G_M%03u_IG%02u from %02d to %02d\n", emitComp->compMethodID, alignIG->igNum, + JITDUMP("Adjusted alignment of G_M%03u_IG%02u from %02d to %02d.\n", emitComp->compMethodID, alignIG->igNum, estimatedPaddingNeeded, actualPaddingNeeded); + JITDUMP("Adjusted size of G_M%03u_IG%02u from %04d to %04d.\n", emitComp->compMethodID, alignIG->igNum, + (alignIG->igSize + diff), alignIG->igSize); } // Adjust the offset of all IGs starting from next IG until we reach the IG having the next @@ -4853,6 +4896,8 @@ void emitter::emitLoopAlignAdjustments() insGroup* adjOffUptoIG = alignInstr->idaNext != nullptr ? alignInstr->idaNext->idaIG : emitIGlast; while ((adjOffIG != nullptr) && (adjOffIG->igNum <= adjOffUptoIG->igNum)) { + JITDUMP("Adjusted offset of G_M%03u_IG%02u from %04X to %04X\n", emitComp->compMethodID, adjOffIG->igNum, + adjOffIG->igOffs, (adjOffIG->igOffs - alignBytesRemoved)); adjOffIG->igOffs -= alignBytesRemoved; adjOffIG = adjOffIG->igNext; } @@ -4874,6 +4919,13 @@ void emitter::emitLoopAlignAdjustments() // emitCalculatePaddingForLoopAlignment: Calculate the padding to insert at the // end of 'ig' so the loop that starts after 'ig' is aligned. // +// Arguments: +// ig - The IG having 'align' instruction in the end. +// offset - The offset at which the IG that follows 'ig' starts. +// isAlignAdjusted - Determine if adjustments are done to the align instructions or not. +// During generating code, it is 'false' (because we haven't adjusted the size yet). +// During outputting code, it is 'true'. +// // Returns: Padding amount. // 0 means no padding is needed, either because loop is already aligned or it // is too expensive to align loop and hence it will not be aligned. @@ -4896,8 +4948,7 @@ void emitter::emitLoopAlignAdjustments() // 3b. If the loop already fits in minimum alignmentBoundary blocks, then return 0. // already best aligned // 3c. return paddingNeeded. // -unsigned emitter::emitCalculatePaddingForLoopAlignment(insGroup* ig, - size_t offset DEBUG_ARG(bool displayAlignmentDetails)) +unsigned emitter::emitCalculatePaddingForLoopAlignment(insGroup* ig, size_t offset DEBUG_ARG(bool isAlignAdjusted)) { assert(ig->isLoopAlign()); unsigned alignmentBoundary = emitComp->opts.compJitAlignLoopBoundary; @@ -4924,7 +4975,7 @@ unsigned emitter::emitCalculatePaddingForLoopAlignment(insGroup* ig, maxLoopSize = emitComp->opts.compJitAlignLoopMaxCodeSize; } - unsigned loopSize = getLoopSize(ig->igNext, maxLoopSize); + unsigned loopSize = getLoopSize(ig->igNext, maxLoopSize DEBUG_ARG(isAlignAdjusted)); // No padding if loop is big if (loopSize > maxLoopSize) @@ -5020,8 +5071,8 @@ unsigned emitter::emitCalculatePaddingForLoopAlignment(insGroup* ig, } } - JITDUMP(";; Calculated padding to add %d bytes to align at %dB boundary that starts at 0x%x.'\n", paddingToAdd, - alignmentBoundary, offset); + JITDUMP(";; Calculated padding to add %d bytes to align G_M%03u_IG%02u at %dB boundary.\n", paddingToAdd, + emitComp->compMethodID, ig->igNext->igNum, alignmentBoundary); // Either no padding is added because it is too expensive or the offset gets aligned // to the alignment boundary diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index a6000e2..50311c5 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -1759,18 +1759,19 @@ private: void emitJumpDistBind(); // Bind all the local jumps in method #if FEATURE_LOOP_ALIGN - instrDescAlign* emitCurIGAlignList; // list of align instructions in current IG - unsigned emitLastInnerLoopStartIgNum; // Start IG of last inner loop - unsigned emitLastInnerLoopEndIgNum; // End IG of last inner loop - unsigned emitLastAlignedIgNum; // last IG that has align instruction - instrDescAlign* emitAlignList; // list of local align instructions in method - instrDescAlign* emitAlignLast; // last align instruction in method - unsigned getLoopSize(insGroup* igLoopHeader, unsigned maxLoopSize); // Get the smallest loop size + instrDescAlign* emitCurIGAlignList; // list of align instructions in current IG + unsigned emitLastInnerLoopStartIgNum; // Start IG of last inner loop + unsigned emitLastInnerLoopEndIgNum; // End IG of last inner loop + unsigned emitLastAlignedIgNum; // last IG that has align instruction + instrDescAlign* emitAlignList; // list of local align instructions in method + instrDescAlign* emitAlignLast; // last align instruction in method + unsigned getLoopSize(insGroup* igLoopHeader, + unsigned maxLoopSize DEBUG_ARG(bool isAlignAdjusted)); // Get the smallest loop size void emitLoopAlignment(); bool emitEndsWithAlignInstr(); // Validate if newLabel is appropriate void emitSetLoopBackEdge(BasicBlock* loopTopBlock); void emitLoopAlignAdjustments(); // Predict if loop alignment is needed and make appropriate adjustments - unsigned emitCalculatePaddingForLoopAlignment(insGroup* ig, size_t offset DEBUG_ARG(bool displayAlignmentDetails)); + unsigned emitCalculatePaddingForLoopAlignment(insGroup* ig, size_t offset DEBUG_ARG(bool isAlignAdjusted)); #endif void emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG); // Check for illegal branches between funclets diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index ec9c091..0e78dac 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -2673,12 +2673,12 @@ void emitter::emitLoopAlign(unsigned short paddingBytes) paddingBytes = min(paddingBytes, MAX_ENCODED_SIZE); // We may need to skip up to 15 bytes of code instrDescAlign* id = emitNewInstrAlign(); id->idCodeSize(paddingBytes); - emitCurIGsize += paddingBytes; - id->idaIG = emitCurIG; /* Append this instruction to this IG's alignment list */ - id->idaNext = emitCurIGAlignList; + id->idaNext = emitCurIGAlignList; + + emitCurIGsize += paddingBytes; emitCurIGAlignList = id; } @@ -9430,8 +9430,7 @@ BYTE* emitter::emitOutputAlign(insGroup* ig, instrDesc* id, BYTE* dst) assert(0 <= paddingToAdd && paddingToAdd < emitComp->opts.compJitAlignLoopBoundary); #ifdef DEBUG - bool displayAlignmentDetails = (emitComp->opts.disAsm /*&& emitComp->opts.disAddr*/) || emitComp->verbose; - unsigned paddingNeeded = emitCalculatePaddingForLoopAlignment(ig, (size_t)dst, displayAlignmentDetails); + unsigned paddingNeeded = emitCalculatePaddingForLoopAlignment(ig, (size_t)dst, true); // For non-adaptive, padding size is spread in multiple instructions, so don't bother checking if (emitComp->opts.compJitAlignLoopAdaptive) -- 2.7.4