From 6778d3fb46d67b127b26485b01914a8d76313554 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 6 Feb 2018 20:38:24 -0800 Subject: [PATCH] Updating the emitter to more generally handle 4-Byte SSE4 instructions. --- src/jit/emitxarch.cpp | 272 +++++++++++++++++++++++++++++++------------------- src/jit/emitxarch.h | 6 +- 2 files changed, 173 insertions(+), 105 deletions(-) diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index b9f318f..f2e56bb 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -227,19 +227,6 @@ bool emitter::IsDstSrcSrcAVXInstruction(instruction ins) return false; } } - -// ------------------------------------------------------------------------------ -// Is4ByteAVXInstruction: Returns true if the AVX instruction is a 4-byte opcode. -// -// Arguments: -// ins - instructions -// -// Note that this should be true for any of the instructions in instrsXArch.h -// that use the SSE38 or SSE3A macro. -bool emitter::Is4ByteAVXInstruction(instruction ins) -{ - return UseVEXEncoding() && (IsSSE4Instruction(ins) || IsAVXOnlyInstruction(ins)) && EncodedBySSE38orSSE3A(ins); -} #endif // !LEGACY_BACKEND // ------------------------------------------------------------------- @@ -258,7 +245,27 @@ bool emitter::Is4ByteSSE4Instruction(instruction ins) return false; #else return UseSSE4() && IsSSE4Instruction(ins) && EncodedBySSE38orSSE3A(ins); -#endif +#endif // LEGACY_BACKEND +} + +// ------------------------------------------------------------------------------ +// Is4ByteSSE4OrAVXInstruction: Returns true if the SSE4 or AVX instruction is a 4-byte opcode. +// +// Arguments: +// ins - instructions +// +// Note that this should be true for any of the instructions in instrsXArch.h +// that use the SSE38 or SSE3A macro. +bool emitter::Is4ByteSSE4OrAVXInstruction(instruction ins) +{ +#ifdef LEGACY_BACKEND + // On legacy backend SSE4 and AVX are not enabled. + return false; +#else + return ((UseVEXEncoding() && (IsSSE4Instruction(ins) || IsAVXOnlyInstruction(ins))) || + (UseSSE4() && IsSSE4Instruction(ins))) && + EncodedBySSE38orSSE3A(ins); +#endif // LEGACY_BACKEND } #ifndef LEGACY_BACKEND @@ -4036,6 +4043,13 @@ void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenT // Plus one for the 1-byte immediate (ival) UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1; + + if (Is4ByteSSE4Instruction(ins)) + { + // The 4-Byte SSE4 instructions require two additional bytes + sz += 2; + } + id->idCodeSize(sz); dispIns(id); @@ -4058,6 +4072,13 @@ void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, reg // Plus one for the 1-byte immediate (ival) UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1; + + if (Is4ByteSSE4Instruction(ins)) + { + // The 4-Byte SSE4 instructions require two additional bytes + sz += 2; + } + id->idCodeSize(sz); dispIns(id); @@ -4079,6 +4100,12 @@ void emitter::emitIns_R_C_I( instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs); UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1; + if (Is4ByteSSE4Instruction(ins)) + { + // The 4-Byte SSE4 instructions require two additional bytes + sz += 2; + } + id->idIns(ins); id->idInsFmt(IF_RRW_MRD_CNS); id->idReg1(reg1); @@ -4098,6 +4125,12 @@ void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int UNATIVE_OFFSET sz = emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1; + if (Is4ByteSSE4Instruction(ins)) + { + // The 4-Byte SSE4 instructions require two additional bytes + sz += 2; + } + id->idIns(ins); id->idInsFmt(IF_RRW_SRD_CNS); id->idReg1(reg1); @@ -8429,21 +8462,24 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) code = AddVexPrefixIfNeededAndNotPresent(ins, code, size); // For this format, moves do not support a third operand, so we only need to handle the binary ops. - if (IsDstDstSrcAVXInstruction(ins) && !Is4ByteAVXInstruction(ins)) + if (TakesVexPrefix(ins)) { - regNumber src1 = id->idReg2(); + if (IsDstDstSrcAVXInstruction(ins)) + { + regNumber src1 = id->idReg2(); + + if ((id->idInsFmt() != IF_RWR_RRD_ARD) && (id->idInsFmt() != IF_RWR_RRD_ARD_CNS)) + { + src1 = id->idReg1(); + } - if ((id->idInsFmt() != IF_RWR_RRD_ARD) && (id->idInsFmt() != IF_RWR_RRD_ARD_CNS)) + // encode source operand reg in 'vvvv' bits in 1's compliement form + code = insEncodeReg3456(ins, src1, size, code); + } + else if (IsDstSrcSrcAVXInstruction(ins)) { - src1 = id->idReg1(); + code = insEncodeReg3456(ins, id->idReg2(), size, code); } - - // encode source operand reg in 'vvvv' bits in 1's compliement form - code = insEncodeReg3456(ins, src1, size, code); - } - else if (IsDstSrcSrcAVXInstruction(ins)) - { - code = insEncodeReg3456(ins, id->idReg2(), size, code); } // Emit the REX prefix if required @@ -8467,14 +8503,24 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Special case emitting AVX instructions - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); - // Emit last opcode byte - assert((code & 0xFF) == 0); - dst += emitOutputByte(dst, (code >> 8) & 0xFF); + if (UseVEXEncoding()) + { + // Emit last opcode byte + // TODO-XArch-CQ: Right now support 4-byte opcode instructions only + assert((code & 0xFF) == 0); + dst += emitOutputByte(dst, (code >> 8) & 0xFF); + } + else + { + dst += emitOutputWord(dst, code >> 16); + dst += emitOutputWord(dst, code & 0xFFFF); + } + code = regcode; } // Is this a 'big' opcode? @@ -8600,7 +8646,7 @@ GOT_DSP: // The address is of the form "[disp]" // On x86 - disp is relative to zero // On Amd64 - disp is relative to RIP - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { dst += emitOutputByte(dst, code | 0x05); } @@ -8656,7 +8702,7 @@ GOT_DSP: else { #ifdef _TARGET_X86_ - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { dst += emitOutputByte(dst, code | 0x05); } @@ -8673,7 +8719,7 @@ GOT_DSP: noway_assert((int)dsp == dsp); // This requires, specifying a SIB byte after ModRM byte. - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { dst += emitOutputByte(dst, code | 0x04); } @@ -8688,7 +8734,7 @@ GOT_DSP: break; case REG_EBP: - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { // Does the offset fit in a byte? if (dspInByte) @@ -8742,7 +8788,7 @@ GOT_DSP: (ins == INS_or)); #endif // LEGACY_BACKEND - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { // Is the offset 0 or does it at least fit in a byte? if (dspIsZero) @@ -8795,7 +8841,7 @@ GOT_DSP: break; default: - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { // Put the register in the opcode code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr); @@ -8876,7 +8922,7 @@ GOT_DSP: regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul); - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0] if (dspIsZero && reg != REG_EBP) @@ -8943,7 +8989,7 @@ GOT_DSP: regByte = insEncodeReg012(ins, REG_EBP, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul); - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { dst += emitOutputByte(dst, code | 0x04); } @@ -8972,7 +9018,7 @@ GOT_DSP: // The address is "[reg+rgx+dsp]" regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr); - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { if (dspIsZero && reg != REG_EBP) { @@ -9198,14 +9244,24 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Special case emitting AVX instructions - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); - // Emit last opcode byte - assert((code & 0xFF) == 0); - dst += emitOutputByte(dst, (code >> 8) & 0xFF); + if (UseVEXEncoding()) + { + // Emit last opcode byte + // TODO-XArch-CQ: Right now support 4-byte opcode instructions only + assert((code & 0xFF) == 0); + dst += emitOutputByte(dst, (code >> 8) & 0xFF); + } + else + { + dst += emitOutputWord(dst, code >> 16); + dst += emitOutputWord(dst, code & 0xFFFF); + } + code = regcode; } // Is this a 'big' opcode? @@ -9319,7 +9375,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) if (EBPbased) { // EBP-based variable: does the offset fit in a byte? - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { if (dspInByte) { @@ -9358,7 +9414,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) dspIsZero = (dsp == 0); // Does the offset fit in a byte? - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { if (dspInByte) { @@ -9628,19 +9684,27 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) #endif //_TARGET_X86_ // Special case emitting AVX instructions - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); - // Emit last opcode byte - // TODO-XArch-CQ: Right now support 4-byte opcode instructions only - assert((code & 0xFF) == 0); - dst += emitOutputByte(dst, (code >> 8) & 0xFF); - code = 0; + if (UseVEXEncoding()) + { + // Emit last opcode byte + // TODO-XArch-CQ: Right now support 4-byte opcode instructions only + assert((code & 0xFF) == 0); + dst += emitOutputByte(dst, (code >> 8) & 0xFF); + } + else + { + dst += emitOutputWord(dst, code >> 16); + dst += emitOutputWord(dst, code & 0xFFFF); + } // Emit Mod,R/M byte dst += emitOutputByte(dst, regcode | 0x05); + code = 0; } // Is this a 'big' opcode? else if (code & 0xFF000000) @@ -10280,28 +10344,30 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) unsigned regCode = insEncodeReg345(ins, reg1, size, &code); regCode |= insEncodeReg012(ins, reg2, size, &code); - // In case of AVX instructions that take 3 operands, we generally want to encode reg1 - // as first source. In this case, reg1 is both a source and a destination. - // The exception is the "merge" 3-operand case, where we have a move instruction, such - // as movss, and we want to merge the source with itself. - // - // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For - // now we use the single source as source1 and source2. - if (IsDstDstSrcAVXInstruction(ins)) + if (TakesVexPrefix(ins)) { - // encode source/dest operand reg in 'vvvv' bits in 1's complement form - code = insEncodeReg3456(ins, reg1, size, code); - } - else if (IsDstSrcSrcAVXInstruction(ins)) - { - // encode source operand reg in 'vvvv' bits in 1's complement form - code = insEncodeReg3456(ins, reg2, size, code); + // In case of AVX instructions that take 3 operands, we generally want to encode reg1 + // as first source. In this case, reg1 is both a source and a destination. + // The exception is the "merge" 3-operand case, where we have a move instruction, such + // as movss, and we want to merge the source with itself. + // + // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For + // now we use the single source as source1 and source2. + if (IsDstDstSrcAVXInstruction(ins)) + { + // encode source/dest operand reg in 'vvvv' bits in 1's complement form + code = insEncodeReg3456(ins, reg1, size, code); + } + else if (IsDstSrcSrcAVXInstruction(ins)) + { + // encode source operand reg in 'vvvv' bits in 1's complement form + code = insEncodeReg3456(ins, reg2, size, code); + } } // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); - // Is this a 'big' opcode? if (code & 0xFF000000) { // Output the highest word of the opcode @@ -10332,7 +10398,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) assert(IsAVXInstruction(ins) || IsSSE4Instruction(ins)); if ((code & 0xFF00) == 0xC000) { - dst += emitOutputByte(dst, (0xC0 | regCode)); + dst += emitOutputWord(dst, code | (regCode << 8)); } else { @@ -11859,51 +11925,57 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) } } - regcode = (insEncodeReg345(ins, rReg, size, &code) | insEncodeReg012(ins, mReg, size, &code)) << 8; + regcode = (insEncodeReg345(ins, rReg, size, &code) | insEncodeReg012(ins, mReg, size, &code)); // Output the REX prefix dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); - if (Is4ByteAVXInstruction(ins)) + if (code & 0xFF000000) { - assert((code & 0xFF) == 0); - if ((code & 0xFF00) == 0xC000) - { - dst += emitOutputWord(dst, code | regcode); - } - else + // Output the highest word of the opcode + dst += emitOutputWord(dst, code >> 16); + code &= 0x0000FFFF; + + if (Is4ByteSSE4Instruction(ins)) { - dst += emitOutputByte(dst, (code >> 8) & 0xFF); - dst += emitOutputByte(dst, 0xC0 | (regcode >> 8)); + // Output 3rd byte of the opcode + dst += emitOutputByte(dst, code); + code &= 0xFF00; } } - else if (code & 0xFF000000) + else if (code & 0x00FF0000) { - dst += emitOutputWord(dst, code >> 16); + dst += emitOutputByte(dst, code >> 16); code &= 0x0000FFFF; + } - if (Is4ByteSSE4Instruction(ins)) + // If byte 4 is 0xC0, then it contains the Mod/RM encoding for a 3-byte + // encoding. Otherwise, this is an instruction with a 4-byte encoding, + // and the Mod/RM encoding needs to go in the 5th byte. + // TODO-XArch-CQ: Currently, this will only support registers in the 5th byte. + // We probably need a different mechanism to identify the 4-byte encodings. + if ((code & 0xFF) == 0x00) + { + // This case happens for SSE4/AVX instructions only + assert(IsAVXInstruction(ins) || IsSSE4Instruction(ins)); + if ((code & 0xFF00) == 0xC000) { - dst += emitOutputWord(dst, code); - dst += emitOutputByte(dst, 0xC0 | (regcode >> 8)); + dst += emitOutputWord(dst, code | (regcode << 8)); } else { - assert((code & 0xFF00) == 0xC000); - dst += emitOutputWord(dst, code | regcode); + dst += emitOutputByte(dst, (code >> 8) & 0xFF); + dst += emitOutputByte(dst, (0xC0 | regcode)); } } - else if (code & 0x00FF0000) + else if ((code & 0xFF00) == 0xC000) { - dst += emitOutputByte(dst, code >> 16); - code &= 0x0000FFFF; - assert((code & 0xFF00) == 0xC000); - dst += emitOutputWord(dst, code | regcode); + dst += emitOutputWord(dst, code | (regcode << 8)); } else { - assert((code & 0xFF00) == 0xC000); - dst += emitOutputWord(dst, code | regcode); + dst += emitOutputWord(dst, code); + dst += emitOutputByte(dst, (0xC0 | regcode)); } dst += emitOutputByte(dst, emitGetInsSC(id)); @@ -11983,7 +12055,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = insCodeRM(ins); // Special case 4-byte AVX instructions - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { dst = emitOutputAM(dst, id, code, &cnsVal); } @@ -12109,7 +12181,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = insCodeRM(ins); // Special case 4-byte AVX instructions - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { dst = emitOutputSV(dst, id, code, &cnsVal); } @@ -12143,7 +12215,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // 4-byte AVX instructions are special cased inside emitOutputSV // since they do not have space to encode ModRM byte. - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { dst = emitOutputSV(dst, id, code); } @@ -12171,7 +12243,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // 4-byte AVX instructions are special cased inside emitOutputSV // since they do not have space to encode ModRM byte. - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { dst = emitOutputSV(dst, id, code); } @@ -12198,7 +12270,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) // 4-byte AVX instructions are special cased inside emitOutputSV // since they do not have space to encode ModRM byte. - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { dst = emitOutputSV(dst, id, code, &cnsVal); } @@ -12272,7 +12344,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = insCodeRM(ins); // Special case 4-byte AVX instructions - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { dst = emitOutputCV(dst, id, code, &cnsVal); } @@ -12304,7 +12376,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_RRW_MRD: code = insCodeRM(ins); // Special case 4-byte AVX instructions - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { dst = emitOutputCV(dst, id, code); } @@ -12332,7 +12404,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = insCodeRM(ins); // Special case 4-byte AVX instructions - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { dst = emitOutputCV(dst, id, code); } @@ -12359,7 +12431,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = insCodeRM(ins); // Special case 4-byte AVX instructions - if (Is4ByteAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins)) { dst = emitOutputCV(dst, id, code, &cnsVal); } diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h index 2fb1dd1..6acd835 100644 --- a/src/jit/emitxarch.h +++ b/src/jit/emitxarch.h @@ -105,6 +105,7 @@ void SetUseSSE4(bool value) } bool EncodedBySSE38orSSE3A(instruction ins); bool Is4ByteSSE4Instruction(instruction ins); +bool Is4ByteSSE4OrAVXInstruction(instruction ins); bool hasRexPrefix(code_t code) { @@ -183,7 +184,6 @@ bool IsThreeOperandAVXInstruction(instruction ins) { return (IsDstDstSrcAVXInstruction(ins) || IsDstSrcSrcAVXInstruction(ins)); } -bool Is4ByteAVXInstruction(instruction ins); bool isAvxBlendv(instruction ins) { return ins == INS_vblendvps || ins == INS_vblendvpd || ins == INS_vpblendvb; @@ -234,10 +234,6 @@ bool IsThreeOperandAVXInstruction(instruction ins) { return false; } -bool Is4ByteAVXInstruction(instruction ins) -{ - return false; -} bool isAvxBlendv(instruction ins) { return false; -- 2.7.4