From 51d747ce7961ef63c01f48047134f5ccc6aecf17 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 14 Dec 2018 13:41:07 -0800 Subject: [PATCH] Simplifying the emitter handling of 4-byte encoded SSE instructions (dotnet/coreclr#21528) Commit migrated from https://github.com/dotnet/coreclr/commit/813bd6ec05c8ce81f3ec7b59f21966ebc9420dca --- src/coreclr/src/jit/compiler.cpp | 8 -- src/coreclr/src/jit/emit.h | 1 - src/coreclr/src/jit/emitxarch.cpp | 152 +++++++++++++++++--------------------- src/coreclr/src/jit/emitxarch.h | 12 +-- src/coreclr/src/jit/instrsxarch.h | 6 +- 5 files changed, 71 insertions(+), 108 deletions(-) diff --git a/src/coreclr/src/jit/compiler.cpp b/src/coreclr/src/jit/compiler.cpp index 2e7f8ae..43acab3 100644 --- a/src/coreclr/src/jit/compiler.cpp +++ b/src/coreclr/src/jit/compiler.cpp @@ -2489,14 +2489,6 @@ void Compiler::compSetProcessor() codeGen->getEmitter()->SetContainsAVX(false); codeGen->getEmitter()->SetContains256bitAVX(false); } - else if (compSupports(InstructionSet_SSSE3) || compSupports(InstructionSet_AES) || - compSupports(InstructionSet_PCLMULQDQ)) - { - // Emitter::UseSSE4 controls whether we support the 4-byte encoding for certain - // instructions. We need to check if either is supported independently, since - // it is currently possible to enable/disable them separately. - codeGen->getEmitter()->SetUseSSE4(true); - } } #endif #if defined(_TARGET_ARM64_) diff --git a/src/coreclr/src/jit/emit.h b/src/coreclr/src/jit/emit.h index 770de41..dbfb178 100644 --- a/src/coreclr/src/jit/emit.h +++ b/src/coreclr/src/jit/emit.h @@ -427,7 +427,6 @@ public: #endif // DEBUG #ifdef _TARGET_XARCH_ - SetUseSSE4(false); SetUseVEXEncoding(false); #endif // _TARGET_XARCH_ } diff --git a/src/coreclr/src/jit/emitxarch.cpp b/src/coreclr/src/jit/emitxarch.cpp index eb02571..9cd6a3f 100644 --- a/src/coreclr/src/jit/emitxarch.cpp +++ b/src/coreclr/src/jit/emitxarch.cpp @@ -25,19 +25,14 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "emit.h" #include "codegen.h" -bool IsSSE2Instruction(instruction ins) +bool IsSSEInstruction(instruction ins) { - return (ins >= INS_FIRST_SSE2_INSTRUCTION) && (ins <= INS_LAST_SSE2_INSTRUCTION); -} - -bool IsSSE4Instruction(instruction ins) -{ - return (ins >= INS_FIRST_SSE4_INSTRUCTION) && (ins <= INS_LAST_SSE4_INSTRUCTION); + return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_SSE_INSTRUCTION); } bool IsSSEOrAVXInstruction(instruction ins) { - return (ins >= INS_FIRST_SSE2_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION); + return (ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION); } bool IsAVXOnlyInstruction(instruction ins) @@ -189,32 +184,17 @@ static bool IsDstSrcImmAvxInstruction(instruction ins) #endif // FEATURE_HW_INTRINSICS // ------------------------------------------------------------------- -// Is4ByteSSE4Instruction: Returns true if the SSE4 instruction -// is a 4-byte opcode. +// Is4ByteSSEInstruction: Returns true if the SSE instruction is a 4-byte opcode. // // Arguments: // ins - instruction // // Note that this should be true for any of the instructions in instrsXArch.h -// that use the SSE38 or SSE3A macro. -bool emitter::Is4ByteSSE4Instruction(instruction ins) +// that use the SSE38 or SSE3A macro but returns false if the VEX encoding is +// in use, since that encoding does not require an additional byte. +bool emitter::Is4ByteSSEInstruction(instruction ins) { - return UseSSE4() && IsSSE4Instruction(ins) && EncodedBySSE38orSSE3A(ins); -} - -// ------------------------------------------------------------------------------ -// Is4ByteSSE4OrAVXInstruction: Returns true if the SSE4 or AVX instruction is a 4-byte opcode. -// -// Arguments: -// ins - instructions -// -// Note that this should be true for any of the instructions in instrsXArch.h -// that use the SSE38 or SSE3A macro. -bool emitter::Is4ByteSSE4OrAVXInstruction(instruction ins) -{ - return ((UseVEXEncoding() && (IsSSE4Instruction(ins) || IsAVXOnlyInstruction(ins))) || - (UseSSE4() && IsSSE4Instruction(ins))) && - EncodedBySSE38orSSE3A(ins); + return !UseVEXEncoding() && EncodedBySSE38orSSE3A(ins); } // Returns true if this instruction requires a VEX prefix @@ -1306,6 +1286,11 @@ bool emitter::EncodedBySSE38orSSE3A(instruction ins) size_t insCode = 0; + if (!IsSSEOrAVXInstruction(ins)) + { + return false; + } + if (hasCodeRM(ins)) { insCode = insCodeRM(ins); @@ -3858,9 +3843,9 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum UNATIVE_OFFSET sz = emitInsSizeRR(ins, reg1, reg2, attr); - if (Is4ByteSSE4Instruction(ins)) + if (Is4ByteSSEInstruction(ins)) { - // The 4-Byte SSE4 instructions require one additional byte + // The 4-Byte SSE instructions require one additional byte sz += 1; } @@ -3885,14 +3870,14 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival) { - // SSE2 version requires 5 bytes and SSE4/AVX version 6 bytes + // SSE2 version requires 5 bytes and some SSE/AVX version 6 bytes UNATIVE_OFFSET sz = 4; if (IsSSEOrAVXInstruction(ins)) { // AVX: 3 byte VEX prefix + 1 byte opcode + 1 byte ModR/M + 1 byte immediate - // SSE4: 4 byte opcode + 1 byte ModR/M + 1 byte immediate - // SSE2: 3 byte opcode + 1 byte ModR/M + 1 byte immediate - sz = (UseVEXEncoding() || UseSSE4()) ? 6 : 5; + // SSE: 4 byte opcode + 1 byte ModR/M + 1 byte immediate + // SSE: 3 byte opcode + 1 byte ModR/M + 1 byte immediate + sz = (UseVEXEncoding() || Is4ByteSSEInstruction(ins)) ? 6 : 5; } #ifdef _TARGET_AMD64_ @@ -3911,7 +3896,6 @@ void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regN if ((ins == INS_pextrq || ins == INS_pinsrq) && !UseVEXEncoding()) { - assert(UseSSE4()); sz += 1; } @@ -4012,9 +3996,9 @@ void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenT UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival); - if (Is4ByteSSE4Instruction(ins)) + if (Is4ByteSSEInstruction(ins)) { - // The 4-Byte SSE4 instructions require two additional bytes + // The 4-Byte SSE instructions require two additional bytes sz += 2; } @@ -4040,9 +4024,9 @@ void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, reg UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival); - if (Is4ByteSSE4Instruction(ins)) + if (Is4ByteSSEInstruction(ins)) { - // The 4-Byte SSE4 instructions require two additional bytes + // The 4-Byte SSE instructions require two additional bytes sz += 2; } @@ -4073,9 +4057,9 @@ void emitter::emitIns_R_C_I( UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival); - if (Is4ByteSSE4Instruction(ins)) + if (Is4ByteSSEInstruction(ins)) { - // The 4-Byte SSE4 instructions require two additional bytes + // The 4-Byte SSE instructions require two additional bytes sz += 2; } @@ -4103,9 +4087,9 @@ void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival); - if (Is4ByteSSE4Instruction(ins)) + if (Is4ByteSSEInstruction(ins)) { - // The 4-Byte SSE4 instructions require two additional bytes + // The 4-Byte SSE instructions require two additional bytes sz += 2; } @@ -5117,9 +5101,9 @@ void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNu sz = emitInsSizeAM(id, insCodeRM(ins)); - if (Is4ByteSSE4Instruction(ins)) + if (Is4ByteSSEInstruction(ins)) { - // The 4-Byte SSE4 instructions require two additional bytes + // The 4-Byte SSE instructions require two additional bytes sz += 2; } @@ -9369,7 +9353,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Special case emitting AVX instructions - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { if ((ins == INS_crc32) && (size > EA_1BYTE)) { @@ -9454,7 +9438,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // Use the large version if this is not a byte. This trick will not // work in case of SSE2 and AVX instructions. - if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSE2Instruction(ins) && !IsAVXInstruction(ins)) + if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSEInstruction(ins) && !IsAVXInstruction(ins)) { code++; } @@ -9467,7 +9451,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) code += 4; } } - else if (!IsSSE2Instruction(ins) && !IsAVXInstruction(ins)) + else if (!IsSSEInstruction(ins) && !IsAVXInstruction(ins)) { /* Is the operand size larger than a byte? */ @@ -9541,7 +9525,7 @@ GOT_DSP: // The address is of the form "[disp]" // On x86 - disp is relative to zero // On Amd64 - disp is relative to RIP - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { dst += emitOutputByte(dst, code | 0x05); } @@ -9597,7 +9581,7 @@ GOT_DSP: else { #ifdef _TARGET_X86_ - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { dst += emitOutputByte(dst, code | 0x05); } @@ -9614,7 +9598,7 @@ GOT_DSP: noway_assert((int)dsp == dsp); // This requires, specifying a SIB byte after ModRM byte. - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (EncodedBySSE38orSSE3A(ins)) { dst += emitOutputByte(dst, code | 0x04); } @@ -9631,7 +9615,7 @@ GOT_DSP: case REG_EBP: { - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { // Does the offset fit in a byte? if (dspInByte) @@ -9674,7 +9658,7 @@ GOT_DSP: case REG_ESP: { - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { // Is the offset 0 or does it at least fit in a byte? if (dspIsZero) @@ -9729,7 +9713,7 @@ GOT_DSP: default: { - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { // Put the register in the opcode code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr); @@ -9811,7 +9795,7 @@ GOT_DSP: regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul); - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0] if (dspIsZero && reg != REG_EBP) @@ -9878,7 +9862,7 @@ GOT_DSP: regByte = insEncodeReg012(ins, REG_EBP, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul); - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { dst += emitOutputByte(dst, code | 0x04); } @@ -9907,7 +9891,7 @@ GOT_DSP: // The address is "[reg+rgx+dsp]" regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr); - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { if (dspIsZero && reg != REG_EBP) { @@ -10139,7 +10123,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Special case emitting AVX instructions - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { if ((ins == INS_crc32) && (size > EA_1BYTE)) { @@ -10216,7 +10200,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Use the large version if this is not a byte - if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins)) && !IsSSE2Instruction(ins) && + if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins)) && !IsSSEInstruction(ins) && !IsAVXInstruction(ins)) { code |= 0x1; @@ -10231,7 +10215,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) code += 4; } } - else if (!IsSSE2Instruction(ins) && !IsAVXInstruction(ins)) + else if (!IsSSEInstruction(ins) && !IsAVXInstruction(ins)) { // Is the operand size larger than a byte? switch (size) @@ -10294,7 +10278,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) if (EBPbased) { // EBP-based variable: does the offset fit in a byte? - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { if (dspInByte) { @@ -10333,7 +10317,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) dspIsZero = (dsp == 0); // Does the offset fit in a byte? - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { if (dspInByte) { @@ -10604,7 +10588,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) #endif //_TARGET_X86_ // Special case emitting AVX instructions - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { if ((ins == INS_crc32) && (size > EA_1BYTE)) { @@ -10979,7 +10963,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) emitAttr size = id->idOpSize(); // We would to update GC info correctly - assert(!IsSSE2Instruction(ins)); + assert(!IsSSEInstruction(ins)); assert(!IsAVXInstruction(ins)); // Get the 'base' opcode @@ -11353,7 +11337,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF; - if (Is4ByteSSE4Instruction(ins)) + if (Is4ByteSSEInstruction(ins)) { // Output 3rd byte of the opcode dst += emitOutputByte(dst, code); @@ -11373,8 +11357,8 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) } else if ((code & 0xFF) == 0x00) { - // This case happens for SSE4/AVX instructions only - assert(IsAVXInstruction(ins) || IsSSE4Instruction(ins)); + // This case happens for some SSE/AVX instructions only + assert(IsAVXInstruction(ins) || Is4ByteSSEInstruction(ins)); dst += emitOutputByte(dst, (code >> 8) & 0xFF); dst += emitOutputByte(dst, (0xC0 | regCode)); @@ -11993,7 +11977,7 @@ BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id) bool valInByte = ((signed char)val == val); // We would to update GC info correctly - assert(!IsSSE2Instruction(ins)); + assert(!IsSSEInstruction(ins)); assert(!IsAVXInstruction(ins)); #ifdef _TARGET_AMD64_ @@ -12090,8 +12074,8 @@ BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i) bool jmp; bool relAddr = true; // does the instruction use relative-addressing? - // SSE2 doesnt make any sense here - assert(!IsSSE2Instruction(ins)); + // SSE/AVX doesnt make any sense here + assert(!IsSSEInstruction(ins)); assert(!IsAVXInstruction(ins)); size_t ssz; @@ -12872,7 +12856,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutputWord(dst, code >> 16); code &= 0x0000FFFF; - if (Is4ByteSSE4Instruction(ins)) + if (Is4ByteSSEInstruction(ins)) { // Output 3rd byte of the opcode dst += emitOutputByte(dst, code); @@ -12892,8 +12876,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) } else if ((code & 0xFF) == 0x00) { - // This case happens for SSE4/AVX instructions only - assert(IsAVXInstruction(ins) || IsSSE4Instruction(ins)); + // This case happens for some SSE/AVX instructions only + assert(IsAVXInstruction(ins) || Is4ByteSSEInstruction(ins)); dst += emitOutputByte(dst, (code >> 8) & 0xFF); dst += emitOutputByte(dst, (0xC0 | regcode)); @@ -12969,7 +12953,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = insCodeRM(ins); // Special case 4-byte AVX instructions - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (EncodedBySSE38orSSE3A(ins)) { dst = emitOutputAM(dst, id, code, &cnsVal); } @@ -12998,7 +12982,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_RWR_RRD_ARD: { code = insCodeRM(ins); - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { dst = emitOutputAM(dst, id, code); } @@ -13026,7 +13010,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { emitGetInsAmdCns(id, &cnsVal); code = insCodeRM(ins); - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (EncodedBySSE38orSSE3A(ins)) { dst = emitOutputAM(dst, id, code, &cnsVal); } @@ -13127,7 +13111,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = insCodeRM(ins); // Special case 4-byte AVX instructions - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (EncodedBySSE38orSSE3A(ins)) { dst = emitOutputSV(dst, id, code, &cnsVal); } @@ -13162,7 +13146,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // 4-byte AVX instructions are special cased inside emitOutputSV // since they do not have space to encode ModRM byte. - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { dst = emitOutputSV(dst, id, code); } @@ -13196,7 +13180,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // 4-byte AVX instructions are special cased inside emitOutputSV // since they do not have space to encode ModRM byte. - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (EncodedBySSE38orSSE3A(ins)) { dst = emitOutputSV(dst, id, code); } @@ -13222,7 +13206,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // 4-byte AVX instructions are special cased inside emitOutputSV // since they do not have space to encode ModRM byte. - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (EncodedBySSE38orSSE3A(ins)) { dst = emitOutputSV(dst, id, code, &cnsVal); } @@ -13281,7 +13265,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = insCodeRM(ins); // Special case 4-byte AVX instructions - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (EncodedBySSE38orSSE3A(ins)) { dst = emitOutputCV(dst, id, code, &cnsVal); } @@ -13326,7 +13310,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = insCodeRM(ins); // Special case 4-byte AVX instructions - if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) + if (EncodedBySSE38orSSE3A(ins) || (ins == INS_crc32)) { dst = emitOutputCV(dst, id, code); } @@ -13359,7 +13343,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code); // encode source operand reg in 'vvvv' bits in 1's complement form // Special case 4-byte AVX instructions - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (EncodedBySSE38orSSE3A(ins)) { dst = emitOutputCV(dst, id, code); } @@ -13385,7 +13369,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code); // encode source operand reg in 'vvvv' bits in 1's complement form // Special case 4-byte AVX instructions - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (EncodedBySSE38orSSE3A(ins)) { dst = emitOutputCV(dst, id, code, &cnsVal); } diff --git a/src/coreclr/src/jit/emitxarch.h b/src/coreclr/src/jit/emitxarch.h index a3e25de..8d9cafa 100644 --- a/src/coreclr/src/jit/emitxarch.h +++ b/src/coreclr/src/jit/emitxarch.h @@ -91,18 +91,8 @@ code_t AddRexXPrefix(instruction ins, code_t code); code_t AddRexBPrefix(instruction ins, code_t code); code_t AddRexPrefix(instruction ins, code_t code); -bool useSSE4Encodings; -bool UseSSE4() -{ - return useSSE4Encodings; -} -void SetUseSSE4(bool value) -{ - useSSE4Encodings = value; -} bool EncodedBySSE38orSSE3A(instruction ins); -bool Is4ByteSSE4Instruction(instruction ins); -bool Is4ByteSSE4OrAVXInstruction(instruction ins); +bool Is4ByteSSEInstruction(instruction ins); bool hasRexPrefix(code_t code) { diff --git a/src/coreclr/src/jit/instrsxarch.h b/src/coreclr/src/jit/instrsxarch.h index db02fc2..e6d1def 100644 --- a/src/coreclr/src/jit/instrsxarch.h +++ b/src/coreclr/src/jit/instrsxarch.h @@ -177,7 +177,7 @@ INSTMUL(imul_15, "imul", IUM_RD, BAD_CODE, 0x4400003868, #define VEX3INT(c1,c2) PACK4(c1, 0xc5, 0x02, c2) #define VEX3FLT(c1,c2) PACK4(c1, 0xc5, 0x02, c2) -INST3(FIRST_SSE2_INSTRUCTION, "FIRST_SSE2_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) +INST3(FIRST_SSE_INSTRUCTION, "FIRST_SSE_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) // These are the SSE instructions used on x86 INST3(mov_i2xmm, "movd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None) // Move int reg to a xmm reg. reg1=xmm reg, reg2=int reg INST3(mov_xmm2i, "movd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7E), INS_FLAGS_None) // Move xmm reg to an int reg. reg1=xmm reg, reg2=int reg @@ -385,9 +385,7 @@ INST3(unpcklpd, "unpcklpd", IUM_WR, BAD_CODE, BAD_CODE, INST3(packssdw, "packssdw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6B), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to short with saturation INST3(packsswb, "packsswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x63), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to byte with saturation INST3(packuswb, "packuswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x67), INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to unsigned byte with saturation -INST3(LAST_SSE2_INSTRUCTION, "LAST_SSE2_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) -INST3(FIRST_SSE4_INSTRUCTION, "FIRST_SSE4_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) // id nm um mr mi rm flags INST3(dpps, "dpps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x40), INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two float vector regs INST3(dppd, "dppd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x41), INS_Flags_IsDstDstSrcAVXInstruction) // Packed dot product of two double vector regs @@ -470,7 +468,7 @@ INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE, INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_FLAGS_None) // Perform the AES InvMixColumn Transformation INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_FLAGS_None) // AES Round Key Generation Assist -INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) +INST3(LAST_SSE_INSTRUCTION, "LAST_SSE_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) // AVX only instructions -- 2.7.4