From: Tanner Gooding Date: Sun, 13 Jun 2021 05:25:47 +0000 (-0700) Subject: Port IsRedundantMov to xarch (#54075) X-Git-Tag: submit/tizen/20210909.063632~801 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=feae07921934805dc84d8d1c830b22008246b869;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Port IsRedundantMov to xarch (#54075) * Port IsRedundantMov to xarch * Applying formatting patch * Responding to PR feedback --- diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 96b1283..6578232 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -4243,6 +4243,20 @@ void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fld // Arguments: // ins -- The instruction being checked // +// Return Value: +// true if the instruction is a qualifying move instruction; otherwise, false +// +// Remarks: +// This methods covers most kinds of two operand move instructions that copy a +// value between two registers. It does not cover all move-like instructions +// and so doesn't currently cover things like movsb/movsw/movsd/movsq or cmovcc +// and doesn't currently cover cases where a value is read/written from memory. +// +// The reason it doesn't cover all instructions was namely to limit the scope +// of the initial change to that which was impactful to move elision so that +// it could be centrally managed and optimized. It may be beneficial to support +// the other move instructions in the future but that may require more extensive +// changes to ensure relevant codegen/emit paths flow and check things correctly. bool emitter::IsMovInstruction(instruction ins) { switch (ins) @@ -4253,7 +4267,6 @@ bool emitter::IsMovInstruction(instruction ins) case INS_movd: case INS_movdqa: case INS_movdqu: - case INS_movsd: case INS_movsdsse2: case INS_movss: case INS_movsx: @@ -4279,6 +4292,183 @@ bool emitter::IsMovInstruction(instruction ins) } } +//---------------------------------------------------------------------------------------- +// IsRedundantMov: +// Check if the current `mov` instruction is redundant and can be omitted. +// A `mov` is redundant in following 3 cases: +// +// 1. Move to same register on TARGET_AMD64 +// (Except 4-byte movement like "mov eax, eax" which zeros out upper bits of eax register) +// +// mov rax, rax +// +// 2. Move that is identical to last instruction emitted. +// +// mov rax, rbx # <-- last instruction +// mov rax, rbx # <-- current instruction can be omitted. +// +// 3. Opposite Move as that of last instruction emitted. +// +// mov rax, rbx # <-- last instruction +// mov rbx, rax # <-- current instruction can be omitted. +// +// Arguments: +// ins - The current instruction +// fmt - The current format +// size - Operand size of current instruction +// dst - The current destination +// src - The current source +// canIgnoreSideEffects - The move can be skipped as it doesn't represent special semantics +// +// Return Value: +// true if the move instruction is redundant; otherwise, false. + +bool emitter::IsRedundantMov( + instruction ins, insFormat fmt, emitAttr size, regNumber dst, regNumber src, bool canIgnoreSideEffects) +{ + assert(IsMovInstruction(ins)); + + if (canIgnoreSideEffects && (dst == src)) + { + // These elisions used to be explicit even when optimizations were disabled + + // Some instructions have a side effect and shouldn't be skipped + // however existing codepaths were skipping these instructions in + // certain scenarios and so we skip them as well for back-compat + // when canIgnoreSideEffects is true (see below for which have a + // side effect). + // + // Long term, these paths should be audited and should likely be + // replaced with copies rather than extensions. + return true; + } + + if (!emitComp->opts.OptimizationEnabled()) + { + // The remaining move elisions should only happen if optimizations are enabled + return false; + } + + // TODO-XArch-CQ: There are places where the fact that an instruction zero-extends + // is not an important detail, such as when "regular" floating-point code is generated + // + // This differs from cases like HWIntrinsics that deal with the entire vector and so + // they need to be "aware" that a given move impacts the upper-bits. + // + // Ideally we can detect this difference, likely via canIgnoreSideEffects, and allow + // the below optimizations for those scenarios as well. + + // Track whether the instruction has a zero/sign-extension or clearing of the upper-bits as a side-effect + bool hasSideEffect = false; + + switch (ins) + { + case INS_mov: + { + // non EA_PTRSIZE moves may zero-extend the source + hasSideEffect = (size != EA_PTRSIZE); + break; + } + + case INS_movapd: + case INS_movaps: + case INS_movdqa: + case INS_movdqu: + case INS_movupd: + case INS_movups: + { + // non EA_32BYTE moves clear the upper bits under VEX encoding + hasSideEffect = UseVEXEncoding() && (size != EA_32BYTE); + break; + } + + case INS_movd: + { + // Clears the upper bits + hasSideEffect = true; + break; + } + + case INS_movsdsse2: + case INS_movss: + { + // Clears the upper bits under VEX encoding + hasSideEffect = UseVEXEncoding(); + break; + } + + case INS_movsx: + case INS_movzx: + { + // Sign/Zero-extends the source + hasSideEffect = true; + break; + } + +#if defined(TARGET_AMD64) + case INS_movq: + { + // Clears the upper bits + hasSideEffect = true; + break; + } + + case INS_movsxd: + { + // Sign-extends the source + hasSideEffect = true; + break; + } +#endif // TARGET_AMD64 + + default: + { + unreached(); + } + } + + // Check if we are already in the correct register and don't have a side effect + if ((dst == src) && !hasSideEffect) + { + JITDUMP("\n -- suppressing mov because src and dst is same register and the mov has no side-effects.\n"); + return true; + } + + bool isFirstInstrInBlock = (emitCurIGinsCnt == 0) && ((emitCurIG->igFlags & IGF_EXTEND) == 0); + + // TODO-XArch-CQ: Certain instructions, such as movaps vs movups, are equivalent in + // functionality even if their actual identifier differs and we should optimize these + + if (isFirstInstrInBlock || // Don't optimize if instruction is the first instruction in IG. + (emitLastIns == nullptr) || // or if a last instruction doesn't exist + (emitLastIns->idIns() != ins) || // or if the instruction is different from the last instruction + (emitLastIns->idOpSize() != size) || // or if the operand size is different from the last instruction + (emitLastIns->idInsFmt() != fmt)) // or if the format is different from the last instruction + { + return false; + } + + regNumber lastDst = emitLastIns->idReg1(); + regNumber lastSrc = emitLastIns->idReg2(); + + // Check if we did same move in last instruction, side effects don't matter since they already happened + if ((lastDst == dst) && (lastSrc == src)) + { + JITDUMP("\n -- suppressing mov because last instruction already moved from src to dst register.\n"); + return true; + } + + // Check if we did a switched mov in the last instruction and don't have a side effect + if ((lastDst == src) && (lastSrc == dst) && !hasSideEffect) + { + JITDUMP("\n -- suppressing mov because last instruction already moved from dst to src register and the mov has " + "no side-effects.\n"); + return true; + } + + return false; +} + //------------------------------------------------------------------------ // emitIns_Mov: Emits a move instruction // @@ -4309,7 +4499,6 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN case INS_movaps: case INS_movdqa: case INS_movdqu: - case INS_movsd: case INS_movsdsse2: case INS_movss: case INS_movupd: @@ -4351,67 +4540,14 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN assert(size <= EA_32BYTE); noway_assert(emitVerifyEncodable(ins, size, dstReg, srcReg)); - if (canSkip && (dstReg == srcReg)) - { - switch (ins) - { - case INS_mov: - { - // These instructions have no side effect and can be skipped - return; - } - - case INS_movapd: - case INS_movaps: - case INS_movdqa: - case INS_movdqu: - case INS_movupd: - case INS_movups: - { - // These instructions have no side effect and can be skipped - return; - } - - case INS_movd: - case INS_movsd: - case INS_movsdsse2: - case INS_movss: - case INS_movsx: - case INS_movzx: - { - // These instructions have a side effect and shouldn't be skipped - // however existing codepaths were skipping these instructions in - // certain scenarios and so we skip them as well for back-compat. - // - // Long term, these paths should be audited and should likely be - // replaced with copies rather than extensions. - return; - } - -#if defined(TARGET_AMD64) - case INS_movq: - case INS_movsxd: - { - // These instructions have a side effect and shouldn't be skipped - // however existing codepaths were skipping these instructions in - // certain scenarios and so we skip them as well for back-compat. - // - // Long term, these paths should be audited and should likely be - // replaced with copies rather than extensions. - return; - } -#endif // TARGET_AMD64 - - default: - { - unreached(); - } - } - } - UNATIVE_OFFSET sz = emitInsSizeRR(ins, dstReg, srcReg, attr); insFormat fmt = emitInsModeFormat(ins, IF_RRD_RRD); + if (IsRedundantMov(ins, fmt, attr, dstReg, srcReg, canSkip)) + { + return; + } + instrDesc* id = emitNewInstrSmall(attr); id->idIns(ins); id->idInsFmt(fmt); diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index cda1571..8260445 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -95,6 +95,8 @@ code_t AddRexPrefix(instruction ins, code_t code); bool EncodedBySSE38orSSE3A(instruction ins); bool Is4ByteSSEInstruction(instruction ins); static bool IsMovInstruction(instruction ins); +bool IsRedundantMov( + instruction ins, insFormat fmt, emitAttr size, regNumber dst, regNumber src, bool canIgnoreSideEffects); bool AreUpper32BitsZero(regNumber reg);