From 58e82be63f411b76cd391b2c3f4862686d95590f Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 14 Dec 2022 11:23:41 -0800 Subject: [PATCH] This fixes emitGetVexPrefixSize to support detecting 2-byte prefix support (#79478) * Remove the dead emitOutputRexOrVexPrefixIfNeeded and rename emitOutputSimdPrefixIfNeeded to emitOutputRexOrSimdPrefixIfNeeded * Remove the dead emitGetAdjustedSize and rename emitGetAdjustedSizeEvexAware to emitGetAdjustedSize * Simplify the emitGetEvexPrefixSize and emitGetVexPrefixSize methods * Create helper emitExtractVexPrefix and emitExtractEvexPrefix functions * Update emitGetVexPrefixSize and emitGetEvexPrefixSize to take an instrDesc * Update emitGetPrefixSize to take the instrDesc so the VEX prefix size can be computed correctly * Update emitGetVexPrefixSize to support returning 2 or 3 * Ensure IF_RWR_CNS is handled in emitGetVexPrefixSize * Don't try to estimate the 2-byte VEX prefix when optimizations are disabled * Ensure we don't negatively impact estimated alignment sizes for debug code * Just check MinOpts not OptimizationsDisabled * Mark some of the modified methods as const to indicate they don't mutate the emitter * Resolve the throughput issue by not getting optSize untill necessary * Switch back to doing accurate vex prefix size estimation always * Small cleanup to reduce regression --- src/coreclr/jit/emitxarch.cpp | 1167 +++++++++++++++++++---------------------- src/coreclr/jit/emitxarch.h | 23 +- 2 files changed, 562 insertions(+), 628 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 19a7635..6da2ef5 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -747,7 +747,7 @@ static bool IsDstSrcImmAvxInstruction(instruction ins) // Note that this should be true for any of the instructions in instrsXArch.h // that use the SSE38 or SSE3A macro but returns false if the VEX encoding is // in use, since that encoding does not require an additional byte. -bool emitter::Is4ByteSSEInstruction(instruction ins) +bool emitter::Is4ByteSSEInstruction(instruction ins) const { return !UseVEXEncoding() && EncodedBySSE38orSSE3A(ins); } @@ -1280,501 +1280,354 @@ bool isPrefix(BYTE b) } //------------------------------------------------------------------------ -// emitOutputSimdPrefixIfNeeded: Outputs EVEX prefix (in case of AVX512 instructions), -// VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise. +// emitExtractEvexPrefix: Extract the EVEX prefix // // Arguments: -// ins -- processor instruction to check. -// dst -- buffer to write prefix to. -// code -- opcode bits. -// attr -- operand size +// [In] ins -- processor instruction to check. +// [In, Out] code -- opcode bits which will no longer contain the evex prefix on return // // Return Value: -// Size of prefix. +// The extracted EVEX prefix. // -unsigned emitter::emitOutputSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code) +emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) const { - // TODO-XArch-AVX512: Remove redundant code and collapse into single pathway for EVEX and VEX if possible. - if (hasEvexPrefix(code)) - { - // Only AVX512 instructions should have an EVEX prefix - assert(IsEvexEncodedInstruction(ins)); + assert(IsEvexEncodedInstruction(ins)); - code_t evexPrefix = (code >> 32) & 0xFFFFFFFF; - code &= 0x00000000FFFFFFFFLL; + code_t evexPrefix = (code >> 32) & 0xFFFFFFFF; + code &= 0x00000000FFFFFFFFLL; - WORD leadingBytes = 0; - BYTE check = (code >> 24) & 0xFF; - if (check != 0) + WORD leadingBytes = 0; + BYTE check = (code >> 24) & 0xFF; + + if (check != 0) + { + // check for a prefix in the 11 position + BYTE sizePrefix = (code >> 16) & 0xFF; + + if ((sizePrefix != 0) && isPrefix(sizePrefix)) { - // check for a prefix in the 11 position - BYTE sizePrefix = (code >> 16) & 0xFF; - if ((sizePrefix != 0) && isPrefix(sizePrefix)) + // 'pp' bits in byte 1 of EVEX prefix allows us to encode SIMD size prefixes as two bits + // + // 00 - None (0F - packed float) + // 01 - 66 (66 0F - packed double) + // 10 - F3 (F3 0F - scalar float + // 11 - F2 (F2 0F - scalar double) + switch (sizePrefix) { - // 'pp' bits in byte 1 of EVEX prefix allows us to encode SIMD size prefixes as two bits - // - // 00 - None (0F - packed float) - // 01 - 66 (66 0F - packed double) - // 10 - F3 (F3 0F - scalar float - // 11 - F2 (F2 0F - scalar double) - switch (sizePrefix) + case 0x66: { - case 0x66: - // None of the existing BMI instructions should be EVEX encoded. - assert(!IsBMIInstruction(ins)); - evexPrefix |= (0x01 << 8); - break; - case 0xF3: - evexPrefix |= (0x02 << 8); - break; - case 0xF2: - evexPrefix |= (0x03 << 8); - break; - default: - assert(!"unrecognized SIMD size prefix"); - unreached(); + // None of the existing BMI instructions should be EVEX encoded. + assert(!IsBMIInstruction(ins)); + evexPrefix |= (0x01 << 8); + break; } - // Now the byte in the 22 position must be an escape byte 0F - leadingBytes = check; - assert(leadingBytes == 0x0F); + case 0xF3: + { + evexPrefix |= (0x02 << 8); + break; + } - // Get rid of both sizePrefix and escape byte - code &= 0x0000FFFFLL; + case 0xF2: + { + evexPrefix |= (0x03 << 8); + break; + } - // Check the byte in the 33 position to see if it is 3A or 38. - // In such a case escape bytes must be 0x0F3A or 0x0F38 - check = code & 0xFF; - if (check == 0x3A || check == 0x38) + default: { - leadingBytes = (leadingBytes << 8) | check; - code &= 0x0000FF00LL; + assert(!"unrecognized SIMD size prefix"); + unreached(); } } + + // Now the byte in the 22 position must be an escape byte 0F + leadingBytes = check; + assert(leadingBytes == 0x0F); + + // Get rid of both sizePrefix and escape byte + code &= 0x0000FFFFLL; + + // Check the byte in the 33 position to see if it is 3A or 38. + // In such a case escape bytes must be 0x0F3A or 0x0F38 + check = code & 0xFF; + + if ((check == 0x3A) || (check == 0x38)) + { + leadingBytes = (leadingBytes << 8) | check; + code &= 0x0000FF00LL; + } } - else + } + else + { + // 2-byte opcode with the bytes ordered as 0x0011RM22 + // the byte in position 11 must be an escape byte. + leadingBytes = (code >> 16) & 0xFF; + assert(leadingBytes == 0x0F || leadingBytes == 0x00); + code &= 0xFFFF; + } + + // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38 + // mm bits in byte 0 of EVEX prefix allows us to encode these + // implied leading bytes. They are identical to low two bits of VEX.mmmmm + + switch (leadingBytes) + { + case 0x00: { - // 2-byte opcode with the bytes ordered as 0x0011RM22 - // the byte in position 11 must be an escape byte. - leadingBytes = (code >> 16) & 0xFF; - assert(leadingBytes == 0x0F || leadingBytes == 0x00); - code &= 0xFFFF; + // there is no leading byte + break; } - // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38 - // mm bits in byte 0 of EVEX prefix allows us to encode these - // implied leading bytes. They are identical to low two bits of VEX.mmmmm + case 0x0F: + { + evexPrefix |= (0x01 << 16); + break; + } - switch (leadingBytes) + case 0x0F38: { - case 0x00: - // there is no leading byte - break; - case 0x0F: - evexPrefix |= (0x01 << 16); - break; - case 0x0F38: - evexPrefix |= (0x02 << 16); - break; - case 0x0F3A: - evexPrefix |= (0x03 << 16); - break; - default: - assert(!"encountered unknown leading bytes"); - unreached(); + evexPrefix |= (0x02 << 16); + break; } - // At this point - // EVEX.2211RM33 got transformed as EVEX.0000RM33 - // EVEX.0011RM22 got transformed as EVEX.0000RM22 - // - // Now output EVEX prefix leaving the 4-byte opcode - // EVEX prefix is always 4 bytes + case 0x0F3A: + { + evexPrefix |= (0x03 << 16); + break; + } - emitOutputByte(dst, ((evexPrefix >> 24) & 0xFF)); - emitOutputByte(dst + 1, ((evexPrefix >> 16) & 0xFF)); - emitOutputByte(dst + 2, (evexPrefix >> 8) & 0xFF); - emitOutputByte(dst + 3, evexPrefix & 0xFF); - return 4; + default: + { + assert(!"encountered unknown leading bytes"); + unreached(); + } } - else if (hasVexPrefix(code)) + + // At this point + // EVEX.2211RM33 got transformed as EVEX.0000RM33 + // EVEX.0011RM22 got transformed as EVEX.0000RM22 + // + // Now output EVEX prefix leaving the 4-byte opcode + // EVEX prefix is always 4 bytes + + return evexPrefix; +} + +//------------------------------------------------------------------------ +// emitExtractVexPrefix: Extract the VEX prefix +// +// Arguments: +// [In] ins -- processor instruction to check. +// [In, Out] code -- opcode bits which will no longer contain the vex prefix on return +// +// Return Value: +// The extracted VEX prefix. +// +emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) const +{ + assert(IsVexEncodedInstruction(ins)); + + code_t vexPrefix = (code >> 32) & 0x00FFFFFF; + code &= 0x00000000FFFFFFFFLL; + + WORD leadingBytes = 0; + BYTE check = (code >> 24) & 0xFF; + + if (check != 0) { - // Only AVX instructions should have a VEX prefix - assert(UseVEXEncoding() && IsVexEncodedInstruction(ins)); - code_t vexPrefix = (code >> 32) & 0x00FFFFFF; - code &= 0x00000000FFFFFFFFLL; + // 3-byte opcode: with the bytes ordered as 0x2211RM33 or + // 4-byte opcode: with the bytes ordered as 0x22114433 - WORD leadingBytes = 0; - BYTE check = (code >> 24) & 0xFF; - if (check != 0) + // check for a prefix in the 11 position + BYTE sizePrefix = (code >> 16) & 0xFF; + + if ((sizePrefix != 0) && isPrefix(sizePrefix)) { - // 3-byte opcode: with the bytes ordered as 0x2211RM33 or - // 4-byte opcode: with the bytes ordered as 0x22114433 - // check for a prefix in the 11 position - BYTE sizePrefix = (code >> 16) & 0xFF; - if ((sizePrefix != 0) && isPrefix(sizePrefix)) + // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits + // + // 00 - None (0F - packed float) + // 01 - 66 (66 0F - packed double) + // 10 - F3 (F3 0F - scalar float + // 11 - F2 (F2 0F - scalar double) + switch (sizePrefix) { - // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits - // - // 00 - None (0F - packed float) - // 01 - 66 (66 0F - packed double) - // 10 - F3 (F3 0F - scalar float - // 11 - F2 (F2 0F - scalar double) - switch (sizePrefix) + case 0x66: { - case 0x66: - if (IsBMIInstruction(ins)) + if (IsBMIInstruction(ins)) + { + switch (ins) { - switch (ins) - { - case INS_rorx: - case INS_pdep: - case INS_mulx: + case INS_rorx: + case INS_pdep: + case INS_mulx: // TODO: Unblock when enabled for x86 #ifdef TARGET_AMD64 - case INS_shrx: + case INS_shrx: #endif - { - vexPrefix |= 0x03; - break; - } + { + vexPrefix |= 0x03; + break; + } - case INS_pext: + case INS_pext: // TODO: Unblock when enabled for x86 #ifdef TARGET_AMD64 - case INS_sarx: + case INS_sarx: #endif - { - vexPrefix |= 0x02; - break; - } + { + vexPrefix |= 0x02; + break; + } // TODO: Unblock when enabled for x86 #ifdef TARGET_AMD64 - case INS_shlx: - { - vexPrefix |= 0x01; - break; - } + case INS_shlx: + { + vexPrefix |= 0x01; + break; + } #endif - default: - { - vexPrefix |= 0x00; - break; - } + default: + { + vexPrefix |= 0x00; + break; } } - else - { - vexPrefix |= 0x01; - } - break; - case 0xF3: - vexPrefix |= 0x02; - break; - case 0xF2: - vexPrefix |= 0x03; - break; - default: - assert(!"unrecognized SIMD size prefix"); - unreached(); + } + else + { + vexPrefix |= 0x01; + } + break; } - // Now the byte in the 22 position must be an escape byte 0F - leadingBytes = check; - assert(leadingBytes == 0x0F); + case 0xF3: + { + vexPrefix |= 0x02; + break; + } - // Get rid of both sizePrefix and escape byte - code &= 0x0000FFFFLL; + case 0xF2: + { + vexPrefix |= 0x03; + break; + } - // Check the byte in the 33 position to see if it is 3A or 38. - // In such a case escape bytes must be 0x0F3A or 0x0F38 - check = code & 0xFF; - if (check == 0x3A || check == 0x38) + default: { - leadingBytes = (leadingBytes << 8) | check; - code &= 0x0000FF00LL; + assert(!"unrecognized SIMD size prefix"); + unreached(); } } - } - else - { - // 2-byte opcode with the bytes ordered as 0x0011RM22 - // the byte in position 11 must be an escape byte. - leadingBytes = (code >> 16) & 0xFF; - assert(leadingBytes == 0x0F || leadingBytes == 0x00); - code &= 0xFFFF; - } - // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38 - // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these - // implied leading bytes. 0x0F is supported by both the 2-byte and - // 3-byte encoding. While 0x0F3A and 0x0F38 are only supported by - // the 3-byte version. + // Now the byte in the 22 position must be an escape byte 0F + leadingBytes = check; + assert(leadingBytes == 0x0F); - switch (leadingBytes) - { - case 0x00: - // there is no leading byte - break; - case 0x0F: - vexPrefix |= 0x0100; - break; - case 0x0F38: - vexPrefix |= 0x0200; - break; - case 0x0F3A: - vexPrefix |= 0x0300; - break; - default: - assert(!"encountered unknown leading bytes"); - unreached(); - } + // Get rid of both sizePrefix and escape byte + code &= 0x0000FFFFLL; - // At this point - // VEX.2211RM33 got transformed as VEX.0000RM33 - // VEX.0011RM22 got transformed as VEX.0000RM22 - // - // Now output VEX prefix leaving the 4-byte opcode + // Check the byte in the 33 position to see if it is 3A or 38. + // In such a case escape bytes must be 0x0F3A or 0x0F38 + check = code & 0xFF; - // The 2-byte VEX encoding, requires that the X and B-bits are set (these - // bits are inverted from the REX values so set means off), the W-bit is - // not set (this bit is not inverted), and that the m-mmmm bits are 0-0001 - // (the 2-byte VEX encoding only supports the 0x0F leading byte). When these - // conditions are met, we can change byte-0 from 0xC4 to 0xC5 and then - // byte-1 is the logical-or of bit 7 from byte-1 and bits 0-6 from byte 2 - // from the 3-byte VEX encoding. - // - // Given the above, the check can be reduced to a simple mask and comparison. - // * 0xFFFF7F80 is a mask that ignores any bits whose value we don't care about: - // * R can be set or unset (0x7F ignores bit 7) - // * vvvv can be any value (0x80 ignores bits 3-6) - // * L can be set or unset (0x80 ignores bit 2) - // * pp can be any value (0x80 ignores bits 0-1) - // * 0x00C46100 is a value that signifies the requirements listed above were met: - // * We must be a three-byte VEX opcode (0x00C4) - // * X and B must be set (0x61 validates bits 5-6) - // * m-mmmm must be 0-00001 (0x61 validates bits 0-4) - // * W must be unset (0x00 validates bit 7) - if ((vexPrefix & 0xFFFF7F80) == 0x00C46100) - { - // Encoding optimization calculation is not done while estimating the instruction - // size and thus over-predict instruction size by 1 byte. - // If there are IGs that will be aligned, do not optimize encoding so the - // estimated alignment sizes are accurate. - if (emitCurIG->igNum > emitLastAlignedIgNum) + if ((check == 0x3A) || (check == 0x38)) { - emitOutputByte(dst, 0xC5); - emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); - return 2; + leadingBytes = (leadingBytes << 8) | check; + code &= 0x0000FF00LL; } } - - emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF)); - emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0xFF)); - emitOutputByte(dst + 2, vexPrefix & 0xFF); - return 3; + } + else + { + // 2-byte opcode with the bytes ordered as 0x0011RM22 + // the byte in position 11 must be an escape byte. + leadingBytes = (code >> 16) & 0xFF; + assert(leadingBytes == 0x0F || leadingBytes == 0x00); + code &= 0xFFFF; } -#ifdef TARGET_AMD64 - if (code > 0x00FFFFFFFFLL) + // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38 + // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these + // implied leading bytes. 0x0F is supported by both the 2-byte and + // 3-byte encoding. While 0x0F3A and 0x0F38 are only supported by + // the 3-byte version. + + switch (leadingBytes) { - BYTE prefix = (code >> 32) & 0xFF; - noway_assert(prefix >= 0x40 && prefix <= 0x4F); - code &= 0x00000000FFFFFFFFLL; + case 0x00: + { + // there is no leading byte + break; + } - // TODO-AMD64-Cleanup: when we remove the prefixes (just the SSE opcodes right now) - // we can remove this code as well + case 0x0F: + { + vexPrefix |= 0x0100; + break; + } - // The REX prefix is required to come after all other prefixes. - // Some of our 'opcodes' actually include some prefixes, if that - // is the case, shift them over and place the REX prefix after - // the other prefixes, and emit any prefix that got moved out. - BYTE check = (code >> 24) & 0xFF; - if (check == 0) + case 0x0F38: { - // 3-byte opcode: with the bytes ordered as 0x00113322 - // check for a prefix in the 11 position - check = (code >> 16) & 0xFF; - if (check != 0 && isPrefix(check)) - { - // Swap the rex prefix and whatever this prefix is - code = (((DWORD)prefix << 16) | (code & 0x0000FFFFLL)); - // and then emit the other prefix - return emitOutputByte(dst, check); - } + vexPrefix |= 0x0200; + break; } - else + + case 0x0F3A: { - // 4-byte opcode with the bytes ordered as 0x22114433 - // first check for a prefix in the 11 position - BYTE check2 = (code >> 16) & 0xFF; - if (isPrefix(check2)) - { - assert(!isPrefix(check)); // We currently don't use this, so it is untested - if (isPrefix(check)) - { - // 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX - // Change to c2rrc1XXXX, and emit check2 now - code = (((code_t)prefix << 24) | ((code_t)check << 16) | (code & 0x0000FFFFLL)); - } - else - { - // 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode) - // Change to c2XXrrXXXX, and emit check2 now - code = (((code_t)check << 24) | ((code_t)prefix << 16) | (code & 0x0000FFFFLL)); - } - return emitOutputByte(dst, check2); - } + vexPrefix |= 0x0300; + break; } - return emitOutputByte(dst, prefix); + default: + { + assert(!"encountered unknown leading bytes"); + unreached(); + } } -#endif // TARGET_AMD64 - return 0; + // At this point + // VEX.2211RM33 got transformed as VEX.0000RM33 + // VEX.0011RM22 got transformed as VEX.0000RM22 + // + // Now output VEX prefix leaving the 4-byte opcode + + return vexPrefix; } -// Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise. -unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code) +//------------------------------------------------------------------------ +// emitOutputRexOrSimdPrefixIfNeeded: Outputs EVEX prefix (in case of AVX512 instructions), +// VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise. +// +// Arguments: +// ins -- processor instruction to check. +// dst -- buffer to write prefix to. +// code -- opcode bits. +// attr -- operand size +// +// Return Value: +// Size of prefix. +// +unsigned emitter::emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code) { - if (hasVexPrefix(code)) + // TODO-XArch-AVX512: Remove redundant code and collapse into single pathway for EVEX and VEX if possible. + if (hasEvexPrefix(code)) { - // Only AVX instructions should have a VEX prefix - assert(UseVEXEncoding() && IsVexEncodedInstruction(ins)); - code_t vexPrefix = (code >> 32) & 0x00FFFFFF; - code &= 0x00000000FFFFFFFFLL; - - WORD leadingBytes = 0; - BYTE check = (code >> 24) & 0xFF; - if (check != 0) - { - // 3-byte opcode: with the bytes ordered as 0x2211RM33 or - // 4-byte opcode: with the bytes ordered as 0x22114433 - // check for a prefix in the 11 position - BYTE sizePrefix = (code >> 16) & 0xFF; - if ((sizePrefix != 0) && isPrefix(sizePrefix)) - { - // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits - // - // 00 - None (0F - packed float) - // 01 - 66 (66 0F - packed double) - // 10 - F3 (F3 0F - scalar float - // 11 - F2 (F2 0F - scalar double) - switch (sizePrefix) - { - case 0x66: - if (IsBMIInstruction(ins)) - { - switch (ins) - { - case INS_rorx: - case INS_pdep: - case INS_mulx: -// TODO: Unblock when enabled for x86 -#ifdef TARGET_AMD64 - case INS_shrx: -#endif - { - vexPrefix |= 0x03; - break; - } - - case INS_pext: -// TODO: Unblock when enabled for x86 -#ifdef TARGET_AMD64 - case INS_sarx: -#endif - { - vexPrefix |= 0x02; - break; - } -// TODO: Unblock when enabled for x86 -#ifdef TARGET_AMD64 - case INS_shlx: - { - vexPrefix |= 0x01; - break; - } -#endif - default: - { - vexPrefix |= 0x00; - break; - } - } - } - else - { - vexPrefix |= 0x01; - } - break; - case 0xF3: - vexPrefix |= 0x02; - break; - case 0xF2: - vexPrefix |= 0x03; - break; - default: - assert(!"unrecognized SIMD size prefix"); - unreached(); - } - - // Now the byte in the 22 position must be an escape byte 0F - leadingBytes = check; - assert(leadingBytes == 0x0F); - - // Get rid of both sizePrefix and escape byte - code &= 0x0000FFFFLL; + code_t evexPrefix = emitExtractEvexPrefix(ins, code); + assert(evexPrefix != 0); - // Check the byte in the 33 position to see if it is 3A or 38. - // In such a case escape bytes must be 0x0F3A or 0x0F38 - check = code & 0xFF; - if (check == 0x3A || check == 0x38) - { - leadingBytes = (leadingBytes << 8) | check; - code &= 0x0000FF00LL; - } - } - } - else - { - // 2-byte opcode with the bytes ordered as 0x0011RM22 - // the byte in position 11 must be an escape byte. - leadingBytes = (code >> 16) & 0xFF; - assert(leadingBytes == 0x0F || leadingBytes == 0x00); - code &= 0xFFFF; - } - - // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38 - // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these - // implied leading bytes. 0x0F is supported by both the 2-byte and - // 3-byte encoding. While 0x0F3A and 0x0F38 are only supported by - // the 3-byte version. - - switch (leadingBytes) - { - case 0x00: - // there is no leading byte - break; - case 0x0F: - vexPrefix |= 0x0100; - break; - case 0x0F38: - vexPrefix |= 0x0200; - break; - case 0x0F3A: - vexPrefix |= 0x0300; - break; - default: - assert(!"encountered unknown leading bytes"); - unreached(); - } + emitOutputByte(dst, ((evexPrefix >> 24) & 0xFF)); + emitOutputByte(dst + 1, ((evexPrefix >> 16) & 0xFF)); + emitOutputByte(dst + 2, (evexPrefix >> 8) & 0xFF); + emitOutputByte(dst + 3, evexPrefix & 0xFF); - // At this point - // VEX.2211RM33 got transformed as VEX.0000RM33 - // VEX.0011RM22 got transformed as VEX.0000RM22 - // - // Now output VEX prefix leaving the 4-byte opcode + return 4; + } + else if (hasVexPrefix(code)) + { + code_t vexPrefix = emitExtractVexPrefix(ins, code); + assert(vexPrefix != 0); // The 2-byte VEX encoding, requires that the X and B-bits are set (these // bits are inverted from the REX values so set means off), the W-bit is @@ -1795,23 +1648,19 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, c // * X and B must be set (0x61 validates bits 5-6) // * m-mmmm must be 0-00001 (0x61 validates bits 0-4) // * W must be unset (0x00 validates bit 7) + if ((vexPrefix & 0xFFFF7F80) == 0x00C46100) { - // Encoding optimization calculation is not done while estimating the instruction - // size and thus over-predict instruction size by 1 byte. - // If there are IGs that will be aligned, do not optimize encoding so the - // estimated alignment sizes are accurate. - if (emitCurIG->igNum > emitLastAlignedIgNum) - { - emitOutputByte(dst, 0xC5); - emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); - return 2; - } + emitOutputByte(dst, 0xC5); + emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); + + return 2; } emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF)); emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0xFF)); emitOutputByte(dst + 2, vexPrefix & 0xFF); + return 3; } @@ -1917,54 +1766,35 @@ unsigned emitter::emitGetRexPrefixSize(instruction ins) } //------------------------------------------------------------------------ -// emitGetEvexPrefixSize: Gets Size of Evex prefix in bytes -// TODO-XArch-AVX512: Once evex encoding is supported fully, this will take the place of emitGetAdjustedSize() +// emitGetEvexPrefixSize: Gets Size of EVEX prefix in bytes // // Arguments: -// ins -- The instruction +// ins -- The instruction descriptor // // Returns: // Prefix size in bytes. -// -unsigned emitter::emitGetEvexPrefixSize(instruction ins) -{ - if (IsEvexEncodedInstruction(ins)) - { - return 4; - } - - // If not AVX512, then we don't need to encode prefix. - return 0; -} - -// Size of vex prefix in bytes -unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr) +// +unsigned emitter::emitGetEvexPrefixSize(instrDesc* id) const { - if (IsVexEncodedInstruction(ins)) - { - return 3; - } - - // If not AVX, then we don't need to encode vex prefix. - return 0; + assert(IsEvexEncodedInstruction(id->idIns())); + return 4; } //------------------------------------------------------------------------ -// emitGetAdjustedSizeEvexAware: Determines any size adjustment needed for a given instruction based on the current +// emitGetAdjustedSize: Determines any size adjustment needed for a given instruction based on the current // configuration. -// TODO-XArch-AVX512: Once evex encoding is supported fully, this will take the place of emitGetAdjustedSize() // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute +// id -- The instruction descriptor being emitted // code -- The current opcode and any known prefixes // // Returns: // Updated size. // -unsigned emitter::emitGetAdjustedSizeEvexAware(instruction ins, emitAttr attr, code_t code) +unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const { - unsigned adjustedSize = 0; + instruction ins = id->idIns(); + unsigned adjustedSize = 0; // TODO-XArch-AVX512: Remove redundant code and possiblly collapse EVEX and VEX into a single pathway // IsEvexEncodedInstruction(ins) is `true` for AVX/SSE instructions also which needs to be VEX encoded unless @@ -1986,7 +1816,7 @@ unsigned emitter::emitGetAdjustedSizeEvexAware(instruction ins, emitAttr attr, c // = opcodeSize + (evexPrefixSize - ExtrabytesSize) // = opcodeSize + evexPrefixAdjustedSize - unsigned evexPrefixAdjustedSize = emitGetEvexPrefixSize(ins); + unsigned evexPrefixAdjustedSize = emitGetEvexPrefixSize(id); assert(evexPrefixAdjustedSize == 4); // In this case, opcode will contains escape prefix at least one byte, @@ -2033,8 +1863,8 @@ unsigned emitter::emitGetAdjustedSizeEvexAware(instruction ins, emitAttr attr, c // = opcodeSize + (vexPrefixSize - ExtrabytesSize) // = opcodeSize + vexPrefixAdjustedSize - unsigned simdPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr); - assert(simdPrefixAdjustedSize == 3); + unsigned simdPrefixAdjustedSize = emitGetVexPrefixSize(id); + assert((simdPrefixAdjustedSize == 2) || (simdPrefixAdjustedSize == 3)); // In this case, opcode will contains escape prefix at least one byte, // vexPrefixAdjustedSize should be minus one. @@ -2077,87 +1907,7 @@ unsigned emitter::emitGetAdjustedSizeEvexAware(instruction ins, emitAttr attr, c adjustedSize++; } - if ((attr == EA_2BYTE) && (ins != INS_movzx) && (ins != INS_movsx)) - { - // Most 16-bit operand instructions will need a 0x66 prefix. - adjustedSize++; - } - } - - return adjustedSize; -} - -//------------------------------------------------------------------------ -// emitGetAdjustedSize: Determines any size adjustment needed for a given instruction based on the current -// configuration. -// -// Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// code -- The current opcode and any known prefixes -unsigned emitter::emitGetAdjustedSize(instruction ins, emitAttr attr, code_t code) -{ - unsigned adjustedSize = 0; - - if (IsVexEncodedInstruction(ins)) - { - // VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces. - // Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always - // overstimate. - // Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so - // that - // instruction size estimate will be accurate. - // Basically this will decrease the vexPrefixSize, so that opcodeSize + vexPrefixAdjustedSize will be the right - // size. - // - // rightOpcodeSize + vexPrefixSize - // = (opcodeSize - ExtrabytesSize) + vexPrefixSize - // = opcodeSize + (vexPrefixSize - ExtrabytesSize) - // = opcodeSize + vexPrefixAdjustedSize - - unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr); - assert(vexPrefixAdjustedSize == 3); - - // In this case, opcode will contains escape prefix at least one byte, - // vexPrefixAdjustedSize should be minus one. - vexPrefixAdjustedSize -= 1; - - // Get the fourth byte in Opcode. - // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not. - BYTE check = (code >> 24) & 0xFF; - if (check != 0) - { - // 3-byte opcode: with the bytes ordered as 0x2211RM33 or - // 4-byte opcode: with the bytes ordered as 0x22114433 - // Simd prefix is at the first byte. - BYTE sizePrefix = (code >> 16) & 0xFF; - if (sizePrefix != 0 && isPrefix(sizePrefix)) - { - vexPrefixAdjustedSize -= 1; - } - - // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode. - // But in this case the opcode has not counted R\M part. - // opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize - //=opcodeSize + VexPrefixAdjustedSize -1 + 1 - //=opcodeSize + VexPrefixAdjustedSize - // So although we may have second byte escape prefix, we won't decrease vexPrefixAdjustedSize. - } - - adjustedSize = vexPrefixAdjustedSize; - } - else if (Is4ByteSSEInstruction(ins)) - { - // The 4-Byte SSE instructions require one additional byte to hold the ModRM byte - adjustedSize++; - } - else - { - if (ins == INS_crc32) - { - // Adjust code size for CRC32 that has 4-byte opcode but does not use SSE38 or EES3A encoding. - adjustedSize++; - } + emitAttr attr = id->idOpSize(); if ((attr == EA_2BYTE) && (ins != INS_movzx) && (ins != INS_movsx)) { @@ -2174,19 +1924,20 @@ unsigned emitter::emitGetAdjustedSize(instruction ins, emitAttr attr, code_t cod // emitGetPrefixSize: Get size of rex or vex prefix emitted in code // // Arguments: +// id -- The instruction descriptor for which to get its prefix size // code -- The current opcode and any known prefixes // includeRexPrefixSize -- If Rex Prefix size should be included or not // -unsigned emitter::emitGetPrefixSize(code_t code, bool includeRexPrefixSize) +unsigned emitter::emitGetPrefixSize(instrDesc* id, code_t code, bool includeRexPrefixSize) { if (hasEvexPrefix(code)) { - return 4; + return emitGetEvexPrefixSize(id); } if (hasVexPrefix(code)) { - return 3; + return emitGetVexPrefixSize(id); } if (includeRexPrefixSize && hasRexPrefix(code)) @@ -2569,6 +2320,160 @@ inline bool hasCodeMR(instruction ins) return ((insCodesMR[ins] != BAD_CODE)); } +//------------------------------------------------------------------------ +// emitGetVexPrefixSize: Gets size of VEX prefix in bytes +// +// Arguments: +// id -- The instruction descriptor +// +// Returns: +// Prefix size in bytes. +// +unsigned emitter::emitGetVexPrefixSize(instrDesc* id) const +{ + instruction ins = id->idIns(); + assert(IsVexEncodedInstruction(ins)); + + if (EncodedBySSE38orSSE3A(ins)) + { + // When the prefix is 0x0F38 or 0x0F3A, we must use the 3-byte encoding + return 3; + } + + switch (ins) + { + case INS_crc32: +#if defined(TARGET_AMD64) + case INS_sarx: + case INS_shrx: +#endif // TARGET_AMD64 + { + // When the prefix is 0x0F38 or 0x0F3A, we must use the 3-byte encoding + // These are special cases where the pp-bit is 0xF2 or 0xF3 and not 0x66 + return 3; + } + + default: + { + break; + } + } + + emitAttr size = id->idOpSize(); + + if (TakesRexWPrefix(ins, size)) + { + // When the REX.W bit is present, we must use the 3-byte encoding + return 3; + } + + regNumber regFor012Bits = REG_NA; + regNumber regForSibBits = REG_NA; + + switch (id->idInsFmt()) + { + case IF_ARD: + case IF_AWR_RRD: + case IF_RRD_ARD: + case IF_RRW_ARD: + case IF_RRW_ARD_CNS: + case IF_RWR_ARD: + case IF_RWR_ARD_CNS: + case IF_RWR_RRD_ARD: + case IF_RWR_RRD_ARD_CNS: + { + regFor012Bits = id->idAddr()->iiaAddrMode.amBaseReg; + regForSibBits = id->idAddr()->iiaAddrMode.amIndxReg; + break; + } + + case IF_MRD: + case IF_MWR_RRD: + case IF_RRD_MRD: + case IF_RRD_SRD: + case IF_RRW_MRD: + case IF_RRW_MRD_CNS: + case IF_RRW_SRD: + case IF_RRW_SRD_CNS: + case IF_RWR_MRD: + case IF_RWR_MRD_CNS: + case IF_RWR_RRD_MRD: + case IF_RWR_RRD_MRD_CNS: + case IF_RWR_RRD_SRD: + case IF_RWR_RRD_SRD_CNS: + case IF_RWR_SRD: + case IF_RWR_SRD_CNS: + case IF_SRD: + case IF_SWR_RRD: + { + // Nothing is encoded in a way to prevent the 2-byte encoding + break; + } + + case IF_RRD_CNS: + case IF_RRW_CNS: + case IF_RWR_CNS: + { + regFor012Bits = id->idReg1(); + break; + } + + case IF_RRD_RRD: + case IF_RRW_RRD: + case IF_RWR_RRD: + { + regFor012Bits = id->idReg2(); + + if ((ins == INS_movd) && isFloatReg(regFor012Bits)) + { + regFor012Bits = id->idReg1(); + } + break; + } + + case IF_RRW_RRW_CNS: + { + if (hasCodeMR(ins)) + { + regFor012Bits = id->idReg1(); + } + else + { + regFor012Bits = id->idReg2(); + } + break; + } + + case IF_RWR_RRD_RRD: + case IF_RWR_RRD_RRD_CNS: + case IF_RWR_RRD_RRD_RRD: + { + regFor012Bits = id->idReg3(); + break; + } + + default: + { + assert(!"Unhandled insFmt for emitGetVexPrefixSize"); + return 3; + } + } + + if ((regForSibBits != REG_NA) && IsExtendedReg(regForSibBits)) + { + // When the REX.X bit is present, we must use the 3-byte encoding + return 3; + } + + if ((regFor012Bits != REG_NA) && IsExtendedReg(regFor012Bits)) + { + // When the REX.B bit is present, we must use the 3-byte encoding + return 3; + } + + return 2; +} + /***************************************************************************** * * Returns the "[r/m], reg" or "[r/m]" encoding of the given CPU instruction. @@ -2633,7 +2538,7 @@ inline insTupleType insTupleTypeInfo(instruction ins) } // Return true if the instruction uses the SSE38 or SSE3A macro in instrsXArch.h. -bool emitter::EncodedBySSE38orSSE3A(instruction ins) +bool emitter::EncodedBySSE38orSSE3A(instruction ins) const { const size_t SSE38 = 0x0F660038; const size_t SSE3A = 0x0F66003A; @@ -3057,14 +2962,15 @@ bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1 // emitInsSize: Estimate the size (in bytes of generated code) of the given instruction. // // Arguments: +// id -- The instruction descriptor for which to estimate its size // code -- The current opcode and any known prefixes // includeRexPrefixSize -- If Rex Prefix size should be included or not // -inline UNATIVE_OFFSET emitter::emitInsSize(code_t code, bool includeRexPrefixSize) +inline UNATIVE_OFFSET emitter::emitInsSize(instrDesc* id, code_t code, bool includeRexPrefixSize) { UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2; #ifdef TARGET_AMD64 - size += emitGetPrefixSize(code, includeRexPrefixSize); + size += emitGetPrefixSize(id, code, includeRexPrefixSize); #endif return size; } @@ -3082,7 +2988,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id, code_t code) instruction ins = id->idIns(); emitAttr attr = id->idOpSize(); - UNATIVE_OFFSET sz = emitGetAdjustedSizeEvexAware(ins, attr, code); + UNATIVE_OFFSET sz = emitGetAdjustedSize(id, code); bool includeRexPrefixSize = true; // REX prefix @@ -3093,7 +2999,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id, code_t code) includeRexPrefixSize = !IsVexEncodedInstruction(ins); } - sz += emitInsSize(code, includeRexPrefixSize); + sz += emitInsSize(id, code, includeRexPrefixSize); return sz; } @@ -3141,21 +3047,35 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id, code_t code, int val return valSize + emitInsSizeRR(id, code); } -inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr) +//------------------------------------------------------------------------ +// emitInsSizeRR: Determines the code size for an instruction encoding that does not have any addressing modes +// +// Arguments: +// id -- The instruction descriptor being emitted +// +// TODO-Cleanup: This method should really be merged with emitInsSizeRR(instrDesc*, code_t). However, it has +// existed for a long time and its not trivial to determine why it exists as a separate method. +// +inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id) { - emitAttr size = EA_SIZE(attr); + instruction ins = id->idIns(); // If Byte 4 (which is 0xFF00) is zero, that's where the RM encoding goes. // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes. // This would probably be better expressed as a different format or something? code_t code = insCodeRM(ins); - UNATIVE_OFFSET sz = emitGetAdjustedSizeEvexAware(ins, size, insCodeRM(ins)); + UNATIVE_OFFSET sz = emitGetAdjustedSize(id, code); bool includeRexPrefixSize = true; // REX prefix if (!hasRexPrefix(code)) { + regNumber reg1 = id->idReg1(); + regNumber reg2 = id->idReg2(); + emitAttr attr = id->idOpSize(); + emitAttr size = EA_SIZE(attr); + if ((TakesRexWPrefix(ins, size) && ((ins != INS_xor) || (reg1 != reg2))) || IsExtendedReg(reg1, attr) || IsExtendedReg(reg2, attr)) { @@ -3166,11 +3086,11 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, re if ((code & 0xFF00) != 0) { - sz += IsAvx512OrPriorInstruction(ins) ? emitInsSize(code, includeRexPrefixSize) : 5; + sz += IsAvx512OrPriorInstruction(ins) ? emitInsSize(id, code, includeRexPrefixSize) : 5; } else { - sz += emitInsSize(insEncodeRMreg(ins, code), includeRexPrefixSize); + sz += emitInsSize(id, insEncodeRMreg(ins, code), includeRexPrefixSize); } return sz; @@ -3190,7 +3110,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, re // inline UNATIVE_OFFSET emitter::emitInsSizeSVCalcDisp(instrDesc* id, code_t code, int var, int dsp) { - UNATIVE_OFFSET size = emitInsSize(code, /* includeRexPrefixSize */ true); + UNATIVE_OFFSET size = emitInsSize(id, code, /* includeRexPrefixSize */ true); UNATIVE_OFFSET offs; bool offsIsUpperBound = true; bool EBPbased = true; @@ -3372,7 +3292,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var assert(id->idIns() != INS_invalid); instruction ins = id->idIns(); emitAttr attrSize = id->idOpSize(); - UNATIVE_OFFSET prefix = emitGetAdjustedSizeEvexAware(ins, attrSize, code); + UNATIVE_OFFSET prefix = emitGetAdjustedSize(id, code); // REX prefix if (TakesRexWPrefix(ins, attrSize) || IsExtendedReg(id->idReg1(), attrSize) || @@ -3390,7 +3310,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var instruction ins = id->idIns(); emitAttr attrSize = id->idOpSize(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(attrSize); - UNATIVE_OFFSET prefix = emitGetAdjustedSizeEvexAware(ins, attrSize, code); + UNATIVE_OFFSET prefix = emitGetAdjustedSize(id, code); bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); #ifdef TARGET_AMD64 @@ -3523,7 +3443,7 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) size = 2; } - size += emitGetAdjustedSizeEvexAware(ins, attrSize, code); + size += emitGetAdjustedSize(id, code); if (hasRexPrefix(code)) { @@ -3741,7 +3661,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code) // can be reached via RIP-relative addressing. UNATIVE_OFFSET size = sizeof(INT32); - size += emitGetAdjustedSizeEvexAware(ins, attrSize, code); + size += emitGetAdjustedSize(id, code); bool includeRexPrefixSize = true; @@ -3753,7 +3673,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code) includeRexPrefixSize = false; } - return size + emitInsSize(code, includeRexPrefixSize); + return size + emitInsSize(id, code, includeRexPrefixSize); } inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val) @@ -3983,14 +3903,15 @@ void emitter::emitIns(instruction ins, emitAttr attr) insFormat fmt = IF_NONE; - sz += emitGetAdjustedSizeEvexAware(ins, attr, code); + id->idIns(ins); + id->idInsFmt(fmt); + + sz += emitGetAdjustedSize(id, code); if (TakesRexWPrefix(ins, attr)) { sz += emitGetRexPrefixSize(ins); } - id->idIns(ins); - id->idInsFmt(fmt); id->idCodeSize(sz); dispIns(id); @@ -5125,7 +5046,7 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) id->idReg1(reg); // Vex bytes - sz += emitGetAdjustedSizeEvexAware(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins))); + sz += emitGetAdjustedSize(id, insEncodeMRreg(ins, reg, attr, insCodeMR(ins))); // REX byte if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, attr)) @@ -5173,6 +5094,11 @@ void emitter::emitIns_R_I(instruction ins, // (it is always encoded in a byte). Let's not complicate things until this is needed. assert(ins != INS_bt); + // TODO-Cleanup: This is used to track whether a call to emitInsSize is necessary. The call + // has existed for quite some time, historically, but it is likely problematic in practice. + // However, it is non-trivial to prove it is safe to remove at this time. + bool isSimdInsAndValInByte = false; + // Figure out the size of the instruction switch (ins) { @@ -5219,16 +5145,8 @@ void emitter::emitIns_R_I(instruction ins, { if (IsAvx512OrPriorInstruction(ins)) { - bool includeRexPrefixSize = true; - // Do not get the RexSize() but just decide if it will be included down further and if yes, - // do not include it again. - if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins)) - { - includeRexPrefixSize = false; - } - - sz = emitInsSize(insCodeMI(ins), includeRexPrefixSize); - sz += 1; + sz = 1; + isSimdInsAndValInByte = true; } else if (size == EA_1BYTE && reg == REG_EAX && !instrIs3opImul(ins)) { @@ -5267,7 +5185,31 @@ void emitter::emitIns_R_I(instruction ins, break; } - sz += emitGetAdjustedSizeEvexAware(ins, attr, insCodeMI(ins)); + id = emitNewInstrSC(attr, val); + id->idIns(ins); + id->idInsFmt(fmt); + id->idReg1(reg); + +#ifdef DEBUG + id->idDebugOnlyInfo()->idFlags = gtFlags; + id->idDebugOnlyInfo()->idMemCookie = targetHandle; +#endif + + if (isSimdInsAndValInByte) + { + bool includeRexPrefixSize = true; + + // Do not get the RexSize() but just decide if it will be included down further and if yes, + // do not include it again. + if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins)) + { + includeRexPrefixSize = false; + } + + sz += emitInsSize(id, insCodeMI(ins), includeRexPrefixSize); + } + + sz += emitGetAdjustedSize(id, insCodeMI(ins)); // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target @@ -5277,17 +5219,8 @@ void emitter::emitIns_R_I(instruction ins, sz += emitGetRexPrefixSize(ins); } - id = emitNewInstrSC(attr, val); - id->idIns(ins); - id->idInsFmt(fmt); - id->idReg1(reg); id->idCodeSize(sz); -#ifdef DEBUG - id->idDebugOnlyInfo()->idFlags = gtFlags; - id->idDebugOnlyInfo()->idMemCookie = targetHandle; -#endif - dispIns(id); emitCurIGsize += sz; @@ -5859,13 +5792,13 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN return; } - UNATIVE_OFFSET sz = emitInsSizeRR(ins, dstReg, srcReg, attr); - instrDesc* id = emitNewInstrSmall(attr); id->idIns(ins); id->idInsFmt(fmt); id->idReg1(dstReg); id->idReg2(srcReg); + + UNATIVE_OFFSET sz = emitInsSizeRR(id); id->idCodeSize(sz); dispIns(id); @@ -5890,8 +5823,6 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum assert(size <= EA_32BYTE); noway_assert(emitVerifyEncodable(ins, size, reg1, reg2)); - UNATIVE_OFFSET sz = emitInsSizeRR(ins, reg1, reg2, attr); - /* Special case: "XCHG" uses a different format */ insFormat fmt = (ins == INS_xchg) ? IF_RRW_RRW : emitInsModeFormat(ins, IF_RRD_RRD); @@ -5900,6 +5831,8 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum id->idInsFmt(fmt); id->idReg1(reg1); id->idReg2(reg2); + + UNATIVE_OFFSET sz = emitInsSizeRR(id); id->idCodeSize(sz); dispIns(id); @@ -11336,7 +11269,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) if (id->idIsCallRegPtr()) { code = insEncodeMRreg(ins, reg, EA_PTRSIZE, code); - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputWord(dst, code); goto DONE; } @@ -11362,7 +11295,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // And emit the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); #endif // TARGET_AMD64 @@ -11524,7 +11457,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } unsigned regcode = insEncodeReg345(ins, reg345, size, &code); - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); if (UseSimdEncoding() && (ins != INS_crc32)) { @@ -11552,7 +11485,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Output the highest word of the opcode // We need to check again as in case of AVX instructions leading opcode bytes are stripped off @@ -11569,7 +11502,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) assert(ins != INS_bt); // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Output the highest byte of the opcode if (code & 0x00FF0000) @@ -11637,7 +11570,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Get the displacement value dsp = emitGetInsAmdAny(id); @@ -12334,7 +12267,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } unsigned regcode = insEncodeReg345(ins, reg345, size, &code); - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); if (UseSimdEncoding() && (ins != INS_crc32)) { @@ -12362,7 +12295,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Output the highest word of the opcode // We need to check again because in case of AVX instructions the leading @@ -12379,7 +12312,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) assert(ins != INS_bt); // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Output the highest byte of the opcode. // We need to check again because in case of AVX instructions the leading @@ -12451,7 +12384,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Figure out the variable's frame position int varNum = id->idAddr()->iiaLclVar.lvaVarNum(); @@ -12834,7 +12767,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } unsigned regcode = insEncodeReg345(ins, reg345, size, &code); - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); if (UseVEXEncoding() && (ins != INS_crc32)) { @@ -12864,7 +12797,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Output the highest word of the opcode. // Check again since AVX instructions encode leading opcode bytes as part of VEX prefix. @@ -12877,7 +12810,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) else if (code & 0x00FF0000) { // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Check again as VEX prefix would have encoded leading opcode byte if (code & 0x00FF0000) @@ -12934,7 +12867,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); if (code) { @@ -13218,7 +13151,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) unsigned regcode = insEncodeReg012(ins, reg, size, &code); // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputWord(dst, code | (regcode << 8)); } @@ -13245,7 +13178,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) assert(!TakesRexWPrefix(ins, size)); // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputByte(dst, code); break; @@ -13270,7 +13203,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) unsigned regcode = insEncodeReg012(ins, reg, size, &code); // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputWord(dst, code | (regcode << 8)); break; @@ -13299,7 +13232,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) code = insEncodeMRreg(ins, reg, EA_1BYTE, insCodeMR(ins)); // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // We expect this to always be a 'big' opcode assert(code & 0x00FF0000); @@ -13344,7 +13277,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputWord(dst, code); break; @@ -13580,7 +13513,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); if (code & 0xFF000000) { @@ -13834,7 +13767,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) code = insEncodeReg3456(ins, src1, size, code); // Output the REX/VEX/EVEX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Is this a 'big' opcode? if (code & 0xFF000000) @@ -13938,7 +13871,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) unsigned regcode = (insEncodeReg345(ins, regOpcode, size, &code) | insEncodeReg012(ins, reg, size, &code)) << 8; // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); if (code & 0xFF000000) { @@ -13974,7 +13907,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) code = AddRexWPrefix(ins, code); } - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputByte(dst, code); if (size == EA_4BYTE) @@ -14103,7 +14036,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) } // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // Does the value fit in a sign-extended byte? // Important! Only set the 's' bit when we have a size larger than EA_1BYTE. @@ -14305,7 +14238,7 @@ BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id) if (TakesRexWPrefix(ins, size) || (codeEvexMigrationCheck(code) && IsWEvexOpcodeExtension(ins))) { code = AddRexWPrefix(ins, code); - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); } dst += emitOutputByte(dst, code); @@ -14931,7 +14864,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { code = AddRexWPrefix(ins, code); } - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); #endif // Is this a 'big' opcode? if (code & 0xFF000000) @@ -15224,7 +15157,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutputByte(dst, 0x66); } - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputWord(dst, code); dst += emitOutputByte(dst, emitGetInsSC(id)); sz = emitSizeOfInsDsc(id); @@ -15330,7 +15263,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) regcode = (insEncodeReg345(ins, rReg, size, &code) | insEncodeReg012(ins, mReg, size, &code)); // Output the REX prefix - dst += emitOutputSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); if (code & 0xFF000000) { diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 13f79bc..dd4eec4 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -38,13 +38,13 @@ struct CnsVal bool cnsReloc; }; -UNATIVE_OFFSET emitInsSize(code_t code, bool includeRexPrefixSize); +UNATIVE_OFFSET emitInsSize(instrDesc* id, code_t code, bool includeRexPrefixSize); UNATIVE_OFFSET emitInsSizeSVCalcDisp(instrDesc* id, code_t code, int var, int dsp); UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp); UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp, int val); UNATIVE_OFFSET emitInsSizeRR(instrDesc* id, code_t code); UNATIVE_OFFSET emitInsSizeRR(instrDesc* id, code_t code, int val); -UNATIVE_OFFSET emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr); +UNATIVE_OFFSET emitInsSizeRR(instrDesc* id); UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, code_t code); UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, code_t code, int val); UNATIVE_OFFSET emitInsSizeCV(instrDesc* id, code_t code); @@ -65,14 +65,15 @@ BYTE* emitOutputRRR(BYTE* dst, instrDesc* id); BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* id); -unsigned emitOutputSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code); -unsigned emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code); +unsigned emitOutputRexOrSimdPrefixIfNeeded(instruction ins, BYTE* dst, code_t& code); unsigned emitGetRexPrefixSize(instruction ins); -unsigned emitGetVexPrefixSize(instruction ins, emitAttr attr); -unsigned emitGetEvexPrefixSize(instruction ins); -unsigned emitGetPrefixSize(code_t code, bool includeRexPrefixSize); -unsigned emitGetAdjustedSize(instruction ins, emitAttr attr, code_t code); -unsigned emitGetAdjustedSizeEvexAware(instruction ins, emitAttr attr, code_t code); +unsigned emitGetVexPrefixSize(instrDesc* id) const; +unsigned emitGetEvexPrefixSize(instrDesc* id) const; +unsigned emitGetPrefixSize(instrDesc* id, code_t code, bool includeRexPrefixSize); +unsigned emitGetAdjustedSize(instrDesc* id, code_t code) const; + +code_t emitExtractVexPrefix(instruction ins, code_t& code) const; +code_t emitExtractEvexPrefix(instruction ins, code_t& code) const; unsigned insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code); unsigned insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code); @@ -110,8 +111,8 @@ code_t AddRexXPrefix(instruction ins, code_t code); code_t AddRexBPrefix(instruction ins, code_t code); code_t AddRexPrefix(instruction ins, code_t code); -bool EncodedBySSE38orSSE3A(instruction ins); -bool Is4ByteSSEInstruction(instruction ins); +bool EncodedBySSE38orSSE3A(instruction ins) const; +bool Is4ByteSSEInstruction(instruction ins) const; static bool IsMovInstruction(instruction ins); bool HasSideEffect(instruction ins, emitAttr size); bool IsRedundantMov( -- 2.7.4