From 06a4ac3806848a4aa823f9e6c672e55566391d5a Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 10 Dec 2018 17:41:21 -0800 Subject: [PATCH] Adding support for the 2-byte VEX encoding to the emitter (dotnet/coreclr#21453) * Adding support for the 2-byte VEX encoding to the emitter * Relocating and expanding the comment which explains the two-byte VEX encoding check Commit migrated from https://github.com/dotnet/coreclr/commit/9fe157072d6551781200a0ce3e3321fd3dafcc0d --- src/coreclr/src/jit/emitxarch.cpp | 63 +++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/src/coreclr/src/jit/emitxarch.cpp b/src/coreclr/src/jit/emitxarch.cpp index 366124c..eb02571 100644 --- a/src/coreclr/src/jit/emitxarch.cpp +++ b/src/coreclr/src/jit/emitxarch.cpp @@ -245,6 +245,7 @@ bool emitter::TakesVexPrefix(instruction ins) // Add base VEX prefix without setting W, R, X, or B bits // L bit will be set based on emitter attr. // +// 2-byte VEX prefix = C5 // 3-byte VEX prefix = C4 // - R, X, B, W - bits to express corresponding REX prefixes // - m-mmmmm (5-bit) @@ -262,26 +263,31 @@ bool emitter::TakesVexPrefix(instruction ins) // 01 - 66 (66 0F - packed double) // 10 - F3 (F3 0F - scalar float // 11 - F2 (F2 0F - scalar double) -// -// TODO-AMD64-CQ: for simplicity of implementation this routine always adds 3-byte VEX -// prefix. Based on 'attr' param we could add 2-byte VEX prefix in case of scalar -// and AVX-128 bit operations. #define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL #define DEFAULT_3BYTE_VEX_PREFIX_MASK 0xFFFFFF00000000ULL #define LBIT_IN_3BYTE_VEX_PREFIX 0x00000400000000ULL emitter::code_t emitter::AddVexPrefix(instruction ins, code_t code, emitAttr attr) { + // The 2-byte VEX encoding is preferred when possible, but actually emitting + // it depends on a number of factors that we may not know until much later. + // + // In order to handle this "easily", we just carry the 3-byte encoding all + // the way through and "fix-up" the encoding when the VEX prefix is actually + // emitted, by simply checking that all the requirements were met. + // Only AVX instructions require VEX prefix assert(IsAVXInstruction(ins)); - // Shouldn't have already added Vex prefix + // Shouldn't have already added VEX prefix assert(!hasVexPrefix(code)); - // Set L bit to 1 in case of instructions that operate on 256-bits. assert((code & DEFAULT_3BYTE_VEX_PREFIX_MASK) == 0); + code |= DEFAULT_3BYTE_VEX_PREFIX; + if (attr == EA_32BYTE) { + // Set L bit to 1 in case of instructions that operate on 256-bits. code |= LBIT_IN_3BYTE_VEX_PREFIX; } @@ -485,9 +491,9 @@ emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code) { if (UseVEXEncoding() && IsAVXInstruction(ins)) { - // W-bit is available only in 3-byte VEX prefix that starts with byte C4. if (TakesVexPrefix(ins)) { + // W-bit is available only in 3-byte VEX prefix that starts with byte C4. assert(hasVexPrefix(code)); // W-bit is the only bit that is added in non bit-inverted form. @@ -508,9 +514,9 @@ emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code) { if (UseVEXEncoding() && IsAVXInstruction(ins)) { - // Right now support 3-byte VEX prefix if (TakesVexPrefix(ins)) { + // R-bit is supported by both 2-byte and 3-byte VEX prefix assert(hasVexPrefix(code)); // R-bit is added in bit-inverted form. @@ -525,9 +531,9 @@ emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code) { if (UseVEXEncoding() && IsAVXInstruction(ins)) { - // Right now support 3-byte VEX prefix if (TakesVexPrefix(ins)) { + // X-bit is available only in 3-byte VEX prefix that starts with byte C4. assert(hasVexPrefix(code)); // X-bit is added in bit-inverted form. @@ -542,9 +548,9 @@ emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code) { if (UseVEXEncoding() && IsAVXInstruction(ins)) { - // Right now support 3-byte VEX prefix if (TakesVexPrefix(ins)) { + // B-bit is available only in 3-byte VEX prefix that starts with byte C4. assert(hasVexPrefix(code)); // B-bit is added in bit-inverted form. @@ -677,7 +683,10 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, c // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38 // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these - // implied leading bytes + // implied leading bytes. 0x0F is supported by both the 2-byte and + // 3-byte encoding. While 0x0F3A and 0x0F38 are only supported by + // the 3-byte version. + switch (leadingBytes) { case 0x00: @@ -702,6 +711,33 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, c // VEX.0011RM22 got transformed as VEX.0000RM22 // // Now output VEX prefix leaving the 4-byte opcode + + // The 2-byte VEX encoding, requires that the X and B-bits are set (these + // bits are inverted from the REX values so set means off), the W-bit is + // not set (this bit is not inverted), and that the m-mmmm bits are 0-0001 + // (the 2-byte VEX encoding only supports the 0x0F leading byte). When these + // conditions are met, we can change byte-0 from 0xC4 to 0xC5 and then + // byte-1 is the logical-or of bit 7 from byte-1 and bits 0-6 from byte 2 + // from the 3-byte VEX encoding. + // + // Given the above, the check can be reduced to a simple mask and comparison. + // * 0xFFFF7F80 is a mask that ignores any bits whose value we don't care about: + // * R can be set or unset (0x7F ignores bit 7) + // * vvvv can be any value (0x80 ignores bits 3-6) + // * L can be set or unset (0x80 ignores bit 2) + // * pp can be any value (0x80 ignores bits 0-1) + // * 0x00C46100 is a value that signifies the requirements listed above were met: + // * We must be a three-byte VEX opcode (0x00C4) + // * X and B must be set (0x61 validates bits 5-6) + // * m-mmmm must be 0-00001 (0x61 validates bits 0-4) + // * W must be unset (0x00 validates bit 7) + if ((vexPrefix & 0xFFFF7F80) == 0x00C46100) + { + emitOutputByte(dst, 0xC5); + emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0x80) | (vexPrefix & 0x7F)); + return 2; + } + emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF)); emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0xFF)); emitOutputByte(dst + 2, vexPrefix & 0xFF); @@ -812,9 +848,6 @@ unsigned emitter::emitGetRexPrefixSize(instruction ins) // Size of vex prefix in bytes unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr) { - // TODO-XArch-CQ: right now we default to 3-byte VEX prefix. There is a - // scope for size win by using 2-byte vex prefix for some of the - // scalar, avx-128 and most common avx-256 instructions. if (IsAVXInstruction(ins)) { return 3; @@ -839,8 +872,6 @@ unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, c if (IsAVXInstruction(ins)) { unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr); - // Currently vex prefix size is hard coded as 3 bytes, - // In future we should support 2 bytes vex prefix. assert(vexPrefixAdjustedSize == 3); // In this case, opcode will contains escape prefix at least one byte, -- 2.7.4