From 1b028906970c1ef5ecb53b8a2a26f2c9a9376ee1 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 17 May 2019 21:28:38 -0700 Subject: [PATCH] Ensure we don't underestimate the code size for 4 byte SSE instruction. (dotnet/coreclr#24555) * Ensure the code size estimate for emitIns_R_S is correct for 4 byte SSE instruction. * Centralizing the Is4ByteSSEInstruction size adjustment handling * Removing unnecessary calls to emitGetVexPrefixAdjustedSize * Ensure all registers are checked against IsExtendedReg * Ensure that the ival size is correct for SSE/AVX instructions * Applying formatting patch * Ensure all cases for emitIns_R_R_I are covered * Fixing a inst_RV_RV_IV call to ensure ival fits in a byte * Centralize some more checks into emitGetAdjustedSize * Applying formatting patch Commit migrated from https://github.com/dotnet/coreclr/commit/a3e43d62917bb542e1adda0a06684061ef77067f --- src/coreclr/src/jit/codegenxarch.cpp | 4 +- src/coreclr/src/jit/emitxarch.cpp | 508 ++++++++++++++++--------------- src/coreclr/src/jit/emitxarch.h | 21 +- src/coreclr/src/jit/simdcodegenxarch.cpp | 70 +++-- 4 files changed, 307 insertions(+), 296 deletions(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index 2361fd8..0297979 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -6985,7 +6985,7 @@ void CodeGen::genCkfinite(GenTree* treeNode) { inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType); } - inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, (int8_t)0xb1); copyToTmpSrcReg = targetReg; } else @@ -7014,7 +7014,7 @@ void CodeGen::genCkfinite(GenTree* treeNode) else if (targetType == TYP_DOUBLE) { // We need to re-shuffle the targetReg to get the correct result. - inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, (int8_t)0xb1); } #endif // !_TARGET_64BIT_ diff --git a/src/coreclr/src/jit/emitxarch.cpp b/src/coreclr/src/jit/emitxarch.cpp index f7b3c5b..8850c8e 100644 --- a/src/coreclr/src/jit/emitxarch.cpp +++ b/src/coreclr/src/jit/emitxarch.cpp @@ -910,20 +910,34 @@ unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr) return 0; } -// VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces. -// Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always overstimate. -// Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so that -// instruction size estimate will be accurate. -// Basically this function will decrease the vexPrefixSize, -// so that opcodeSize + vexPrefixAdjustedSize will be the right size. -// rightOpcodeSize + vexPrefixSize -//=(opcodeSize - ExtrabytesSize) + vexPrefixSize -//=opcodeSize + (vexPrefixSize - ExtrabytesSize) -//=opcodeSize + vexPrefixAdjustedSize -unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, code_t code) +//------------------------------------------------------------------------ +// emitGetAdjustedSize: Determines any size adjustment needed for a given instruction based on the current +// configuration. +// +// Arguments: +// ins -- The instruction being emitted +// attr -- The emit attribute +// code -- The current opcode and any known prefixes +unsigned emitter::emitGetAdjustedSize(instruction ins, emitAttr attr, code_t code) { + unsigned adjustedSize = 0; + if (IsAVXInstruction(ins)) { + // VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces. + // Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always + // overstimate. + // Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so + // that + // instruction size estimate will be accurate. + // Basically this will decrease the vexPrefixSize, so that opcodeSize + vexPrefixAdjustedSize will be the right + // size. + // + // rightOpcodeSize + vexPrefixSize + // = (opcodeSize - ExtrabytesSize) + vexPrefixSize + // = opcodeSize + (vexPrefixSize - ExtrabytesSize) + // = opcodeSize + vexPrefixAdjustedSize + unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr); assert(vexPrefixAdjustedSize == 3); @@ -953,9 +967,29 @@ unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, c // So although we may have second byte escape prefix, we won't decrease vexPrefixAdjustedSize. } - return vexPrefixAdjustedSize; + adjustedSize = vexPrefixAdjustedSize; } - return 0; + else if (Is4ByteSSEInstruction(ins)) + { + // The 4-Byte SSE instructions require one additional byte to hold the ModRM byte + adjustedSize++; + } + else + { + if (ins == INS_crc32) + { + // Adjust code size for CRC32 that has 4-byte opcode but does not use SSE38 or EES3A encoding. + adjustedSize++; + } + + if ((attr == EA_2BYTE) && (ins != INS_movzx) && (ins != INS_movsx)) + { + // Most 16-bit operand instructions will need a 0x66 prefix. + adjustedSize++; + } + } + + return adjustedSize; } // Get size of rex or vex prefix emitted in code @@ -1740,9 +1774,74 @@ inline UNATIVE_OFFSET emitter::emitInsSize(code_t code) return size; } -inline UNATIVE_OFFSET emitter::emitInsSizeRM(instruction ins) +//------------------------------------------------------------------------ +// emitInsSizeRR: Determines the code size for an instruction encoding that does not have any addressing modes +// +// Arguments: +// ins -- The instruction being emitted +// code -- The current opcode and any known prefixes +inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id, code_t code) +{ + assert(id->idIns() != INS_invalid); + + instruction ins = id->idIns(); + emitAttr attr = id->idOpSize(); + + UNATIVE_OFFSET sz = emitInsSize(code); + + sz += emitGetAdjustedSize(ins, attr, code); + + // REX prefix + if (TakesRexWPrefix(ins, attr) || IsExtendedReg(id->idReg1(), attr) || IsExtendedReg(id->idReg2(), attr) || + (!id->idIsSmallDsc() && (IsExtendedReg(id->idReg3(), attr) || IsExtendedReg(id->idReg4(), attr)))) + { + sz += emitGetRexPrefixSize(ins); + } + + return sz; +} + +//------------------------------------------------------------------------ +// emitInsSizeRR: Determines the code size for an instruction encoding that does not have any addressing modes and +// includes an immediate value +// +// Arguments: +// ins -- The instruction being emitted +// code -- The current opcode and any known prefixes +// val -- The immediate value to encode +inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id, code_t code, int val) { - return emitInsSize(insCodeRM(ins)); + instruction ins = id->idIns(); + UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); + bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); + +#ifdef _TARGET_AMD64_ + // mov reg, imm64 is the only opcode which takes a full 8 byte immediate + // all other opcodes take a sign-extended 4-byte immediate + noway_assert(valSize <= sizeof(INT32) || !id->idIsCnsReloc()); +#endif // _TARGET_AMD64_ + + if (valSize > sizeof(INT32)) + { + valSize = sizeof(INT32); + } + + if (id->idIsCnsReloc()) + { + valInByte = false; // relocs can't be placed in a byte + assert(valSize == sizeof(INT32)); + } + + if (valInByte) + { + valSize = sizeof(char); + } + else + { + assert(!IsSSEOrAVXInstruction(ins)); + } + + return valSize + emitInsSizeRR(id, code); } inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr) @@ -1765,14 +1864,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, re sz = emitInsSize(insEncodeRMreg(ins, code)); } - // Most 16-bit operand instructions will need a prefix - if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx) - { - sz += 1; - } - - // VEX prefix - sz += emitGetVexPrefixAdjustedSize(ins, size, insCodeRM(ins)); + sz += emitGetAdjustedSize(ins, size, insCodeRM(ins)); // REX prefix if (!hasRexPrefix(code)) @@ -1969,18 +2061,28 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(code_t code, int var, int dsp) inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp) { + assert(id->idIns() != INS_invalid); instruction ins = id->idIns(); emitAttr attrSize = id->idOpSize(); - UNATIVE_OFFSET prefix = emitGetVexPrefixAdjustedSize(ins, attrSize, code); + UNATIVE_OFFSET prefix = emitGetAdjustedSize(ins, attrSize, code); + + // REX prefix + if (TakesRexWPrefix(ins, attrSize) || IsExtendedReg(id->idReg1(), attrSize) || + IsExtendedReg(id->idReg2(), attrSize)) + { + prefix += emitGetRexPrefixSize(ins); + } + return prefix + emitInsSizeSV(code, var, dsp); } inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp, int val) { + assert(id->idIns() != INS_invalid); instruction ins = id->idIns(); emitAttr attrSize = id->idOpSize(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(attrSize); - UNATIVE_OFFSET prefix = emitGetVexPrefixAdjustedSize(ins, attrSize, code); + UNATIVE_OFFSET prefix = emitGetAdjustedSize(ins, attrSize, code); bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); #ifdef _TARGET_AMD64_ @@ -2004,12 +2106,16 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var { valSize = sizeof(char); } + else + { + assert(!IsSSEOrAVXInstruction(ins)); + } - // 16-bit operand instructions need a prefix. - // This referes to 66h size prefix override - if (id->idOpSize() == EA_2BYTE) + // 64-bit operand instructions will need a REX.W prefix + if (TakesRexWPrefix(ins, attrSize) || IsExtendedReg(id->idReg1(), attrSize) || + IsExtendedReg(id->idReg2(), attrSize)) { - prefix += 1; + prefix += emitGetRexPrefixSize(ins); } return prefix + valSize + emitInsSizeSV(code, var, dsp); @@ -2037,8 +2143,9 @@ static bool baseRegisterRequiresDisplacement(regNumber base) UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) { - emitAttr attrSize = id->idOpSize(); + assert(id->idIns() != INS_invalid); instruction ins = id->idIns(); + emitAttr attrSize = id->idOpSize(); /* The displacement field is in an unusual place for calls */ ssize_t dsp = (ins == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id); bool dspInByte = ((signed char)dsp == (ssize_t)dsp); @@ -2099,17 +2206,9 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) else { size = 2; - - // Most 16-bit operands will require a size prefix. - // This refers to 66h size prefix override. - - if (attrSize == EA_2BYTE) - { - size++; - } } - size += emitGetVexPrefixAdjustedSize(ins, attrSize, code); + size += emitGetAdjustedSize(ins, attrSize, code); if (hasRexPrefix(code)) { @@ -2122,14 +2221,12 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) size += emitGetRexPrefixSize(ins); } else if (IsExtendedReg(reg, EA_PTRSIZE) || IsExtendedReg(rgx, EA_PTRSIZE) || - ((ins != INS_call) && IsExtendedReg(id->idReg1(), attrSize))) + ((ins != INS_call) && (IsExtendedReg(id->idReg1(), attrSize) || IsExtendedReg(id->idReg2(), attrSize)))) { // Should have a REX byte size += emitGetRexPrefixSize(ins); } - size += emitAdjustSizeCrc32(ins, attrSize); - if (rgx == REG_NA) { /* The address is of the form "[reg+disp]" */ @@ -2260,6 +2357,7 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val) { + assert(id->idIns() != INS_invalid); instruction ins = id->idIns(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); @@ -2290,12 +2388,17 @@ inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val { valSize = sizeof(char); } + else + { + assert(!IsSSEOrAVXInstruction(ins)); + } return valSize + emitInsSizeAM(id, code); } inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code) { + assert(id->idIns() != INS_invalid); instruction ins = id->idIns(); emitAttr attrSize = id->idOpSize(); @@ -2304,15 +2407,13 @@ inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code) // so we should only hit this path for statics that are RIP-relative UNATIVE_OFFSET size = sizeof(INT32); - size += emitGetVexPrefixAdjustedSize(ins, attrSize, code); - size += emitAdjustSizeCrc32(ins, attrSize); + size += emitGetAdjustedSize(ins, attrSize, code); - // Most 16-bit operand instructions will need a prefix. - // This refers to 66h size prefix override. - - if (attrSize == EA_2BYTE && ins != INS_movzx && ins != INS_movsx) + // 64-bit operand instructions will need a REX.W prefix + if (TakesRexWPrefix(ins, attrSize) || IsExtendedReg(id->idReg1(), attrSize) || + IsExtendedReg(id->idReg2(), attrSize)) { - size++; + size += emitGetRexPrefixSize(ins); } return size + emitInsSize(code); @@ -2340,6 +2441,10 @@ inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val { valSize = sizeof(char); } + else + { + assert(!IsSSEOrAVXInstruction(ins)); + } return valSize + emitInsSizeCV(id, code); } @@ -2553,7 +2658,7 @@ void emitter::emitIns(instruction ins, emitAttr attr) insFormat fmt = IF_NONE; - sz += emitGetVexPrefixAdjustedSize(ins, attr, code); + sz += emitGetAdjustedSize(ins, attr, code); if (TakesRexWPrefix(ins, attr)) { sz += emitGetRexPrefixSize(ins); @@ -2849,12 +2954,6 @@ void emitter::emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, G id->idReg1(dstReg); emitHandleMemOp(mem, id, IF_RWR_ARD, ins); UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); - if (Is4ByteSSEInstruction(ins)) - { - // The 4-Byte SSE instructions require an additional byte. - sz += 1; - } - id->idCodeSize(sz); dispIns(id); emitCurIGsize += sz; @@ -3571,15 +3670,8 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) id->idInsFmt(fmt); id->idReg1(reg); - // 16-bit operand instructions will need a prefix. - // This refers to 66h size prefix override. - if (size == EA_2BYTE) - { - sz += 1; - } - // Vex bytes - sz += emitGetVexPrefixAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins))); + sz += emitGetAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins))); // REX byte if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, attr)) @@ -3670,7 +3762,8 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t { if (IsSSEOrAVXInstruction(ins)) { - sz = 5; + sz = emitInsSize(insCodeMI(ins)); + sz += 1; } else if (size == EA_1BYTE && reg == REG_EAX && !instrIs3opImul(ins)) { @@ -3683,6 +3776,8 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t } else { + assert(!IsSSEOrAVXInstruction(ins)); + if (reg == REG_EAX && !instrIs3opImul(ins)) { sz = 1; @@ -3707,8 +3802,7 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t break; } - // Vex prefix size - sz += emitGetVexPrefixSize(ins, attr); + sz += emitGetAdjustedSize(ins, attr, insCodeMI(ins)); // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target @@ -3723,12 +3817,6 @@ void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t id->idInsFmt(fmt); id->idReg1(reg); - // 16-bit operand instructions will need a prefix - if (size == EA_2BYTE) - { - sz += 1; - } - id->idCodeSize(sz); dispIns(id); @@ -3867,9 +3955,6 @@ void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fld sz = emitInsSizeCV(id, insCodeMR(ins)); } - // Vex prefix size - sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)); - if (TakesRexWPrefix(ins, attr)) { // REX.W prefix @@ -3911,12 +3996,6 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum UNATIVE_OFFSET sz = emitInsSizeRR(ins, reg1, reg2, attr); - if (Is4ByteSSEInstruction(ins)) - { - // The 4-Byte SSE instructions require one additional byte - sz += 1; - } - /* Special case: "XCHG" uses a different format */ insFormat fmt = (ins == INS_xchg) ? IF_RRW_RRW : emitInsModeFormat(ins, IF_RRD_RRD); @@ -3938,16 +4017,6 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival) { - // SSE2 version requires 5 bytes and some SSE/AVX version 6 bytes - UNATIVE_OFFSET sz = 4; - if (IsSSEOrAVXInstruction(ins)) - { - // AVX: 3 byte VEX prefix + 1 byte opcode + 1 byte ModR/M + 1 byte immediate - // SSE: 4 byte opcode + 1 byte ModR/M + 1 byte immediate - // SSE: 3 byte opcode + 1 byte ModR/M + 1 byte immediate - sz = (UseVEXEncoding() || Is4ByteSSEInstruction(ins)) ? 6 : 5; - } - #ifdef _TARGET_AMD64_ // mov reg, imm64 is the only opcode which takes a full 8 byte immediate // all other opcodes take a sign-extended 4-byte immediate @@ -3956,21 +4025,44 @@ void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regN instrDesc* id = emitNewInstrSC(attr, ival); - // REX prefix - if (IsExtendedReg(reg1, attr) || IsExtendedReg(reg2, attr)) - { - sz += emitGetRexPrefixSize(ins); - } - - if ((ins == INS_pextrq || ins == INS_pinsrq) && !UseVEXEncoding()) - { - sz += 1; - } - id->idIns(ins); id->idInsFmt(IF_RRW_RRW_CNS); id->idReg1(reg1); id->idReg2(reg2); + + code_t code = 0; + + switch (ins) + { + case INS_pextrb: + case INS_pextrd: + case INS_pextrq: + case INS_pextrw_sse41: + case INS_extractps: + case INS_vextractf128: + case INS_vextracti128: + case INS_shld: + case INS_shrd: + { + code = insCodeMR(ins); + break; + } + + case INS_psrldq: + case INS_pslldq: + { + code = insCodeMI(ins); + break; + } + + default: + { + code = insCodeRM(ins); + break; + } + } + + UNATIVE_OFFSET sz = emitInsSizeRR(id, code, ival); id->idCodeSize(sz); dispIns(id); @@ -4043,12 +4135,6 @@ void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTre emitHandleMemOp(indir, id, IF_RRW_ARD, ins); UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); - if (Is4ByteSSEInstruction(ins)) - { - // The 4-Byte SSE instructions require an additional byte. - sz += 1; - } - id->idCodeSize(sz); dispIns(id); @@ -4069,13 +4155,6 @@ void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenT emitHandleMemOp(indir, id, IF_RRW_ARD_CNS, ins); UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival); - - if (Is4ByteSSEInstruction(ins)) - { - // The 4-Byte SSE instructions require two additional bytes - sz += 2; - } - id->idCodeSize(sz); dispIns(id); @@ -4097,13 +4176,6 @@ void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, reg id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival); - - if (Is4ByteSSEInstruction(ins)) - { - // The 4-Byte SSE instructions require an additional byte. - sz += 1; - } - id->idCodeSize(sz); dispIns(id); @@ -4130,13 +4202,6 @@ void emitter::emitIns_R_C_I( id->idAddr()->iiaFieldHnd = fldHnd; UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival); - - if (Is4ByteSSEInstruction(ins)) - { - // The 4-Byte SSE instructions require two additional bytes - sz += 2; - } - id->idCodeSize(sz); dispIns(id); @@ -4160,13 +4225,6 @@ void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int #endif UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival); - - if (Is4ByteSSEInstruction(ins)) - { - // The 4-Byte SSE instructions require two additional bytes - sz += 2; - } - id->idCodeSize(sz); dispIns(id); @@ -4320,10 +4378,6 @@ void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, { assert(IsSSEOrAVXInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); - // Currently vex prefix only use three bytes mode. - // size = vex + opcode + ModR/M = 3 + 1 + 1 = 5 - // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future - UNATIVE_OFFSET sz = 5; instrDesc* id = emitNewInstr(attr); id->idIns(ins); @@ -4332,7 +4386,9 @@ void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, id->idReg2(reg1); id->idReg3(reg2); + UNATIVE_OFFSET sz = emitInsSizeRR(id, insCodeRM(ins)); id->idCodeSize(sz); + dispIns(id); emitCurIGsize += sz; } @@ -4450,10 +4506,6 @@ void emitter::emitIns_R_R_R_I( { assert(IsSSEOrAVXInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); - // Currently vex prefix only use three bytes mode. - // size = vex + opcode + ModR/M + 1-byte-cns = 3 + 1 + 1 + 1 = 6 - // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future - UNATIVE_OFFSET sz = 6; instrDesc* id = emitNewInstrCns(attr, ival); id->idIns(ins); @@ -4462,7 +4514,39 @@ void emitter::emitIns_R_R_R_I( id->idReg2(reg1); id->idReg3(reg2); + code_t code = 0; + + switch (ins) + { + case INS_pextrb: + case INS_pextrd: + case INS_pextrq: + case INS_pextrw_sse41: + case INS_extractps: + case INS_vextractf128: + case INS_vextracti128: + { + code = insCodeMR(ins); + break; + } + + case INS_psrldq: + case INS_pslldq: + { + code = insCodeMI(ins); + break; + } + + default: + { + code = insCodeRM(ins); + break; + } + } + + UNATIVE_OFFSET sz = emitInsSizeRR(id, code, ival); id->idCodeSize(sz); + dispIns(id); emitCurIGsize += sz; } @@ -4683,10 +4767,6 @@ void emitter::emitIns_R_R_R_R( { assert(isAvxBlendv(ins)); assert(UseVEXEncoding()); - // Currently vex prefix only use three bytes mode. - // size = vex + opcode + ModR/M + 1-byte-cns(Reg) = 3 + 1 + 1 + 1 = 6 - // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future - UNATIVE_OFFSET sz = 6; int ival = encodeXmmRegAsIval(reg3); instrDesc* id = emitNewInstrCns(attr, ival); @@ -4698,7 +4778,9 @@ void emitter::emitIns_R_R_R_R( id->idReg3(reg2); id->idReg4(reg3); + UNATIVE_OFFSET sz = emitInsSizeRR(id, insCodeRM(ins), ival); id->idCodeSize(sz); + dispIns(id); emitCurIGsize += sz; } @@ -4729,6 +4811,7 @@ void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO id = emitNewInstrDsp(EA_1BYTE, offs); id->idIns(ins); id->idInsFmt(IF_RWR_MRD_OFF); + id->idReg1(reg); assert(ins == INS_mov && reg == REG_EAX); @@ -4742,6 +4825,7 @@ void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO id = emitNewInstrDsp(attr, offs); id->idIns(ins); id->idInsFmt(fmt); + id->idReg1(reg); #ifdef _TARGET_X86_ // Special case: "mov eax, [addr]" is smaller. @@ -4767,16 +4851,6 @@ void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO } } - // VEX prefix - sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)); - - // REX prefix - if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr)) - { - sz += emitGetRexPrefixSize(ins); - } - - id->idReg1(reg); id->idCodeSize(sz); id->idAddr()->iiaFieldHnd = fldHnd; @@ -4814,6 +4888,7 @@ void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE f id->idIns(ins); id->idInsFmt(fmt); + id->idReg1(reg); UNATIVE_OFFSET sz; @@ -4825,8 +4900,15 @@ void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE f if (ins == INS_mov && reg == REG_EAX) { sz = 1 + TARGET_POINTER_SIZE; + if (size == EA_2BYTE) sz += 1; + + // REX prefix + if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr)) + { + sz += emitGetRexPrefixSize(ins); + } } else #endif //_TARGET_X86_ @@ -4840,16 +4922,6 @@ void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE f sz += 1; } - // VEX prefix - sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)); - - // REX prefix - if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr)) - { - sz += emitGetRexPrefixSize(ins); - } - - id->idReg1(reg); id->idCodeSize(sz); id->idAddr()->iiaFieldHnd = fldHnd; @@ -4895,20 +4967,11 @@ void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE f instrDesc* id = emitNewInstrCnsDsp(attr, val, offs); id->idIns(ins); id->idInsFmt(fmt); + id->idAddr()->iiaFieldHnd = fldHnd; code_t code = insCodeMI(ins); UNATIVE_OFFSET sz = emitInsSizeCV(id, code, val); - // Vex prefix - sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins)); - - // REX prefix, if not already included in "code" - if (TakesRexWPrefix(ins, attr) && !hasRexPrefix(code)) - { - sz += emitGetRexPrefixSize(ins); - } - - id->idAddr()->iiaFieldHnd = fldHnd; id->idCodeSize(sz); dispIns(id); @@ -5174,13 +5237,6 @@ void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNu assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly sz = emitInsSizeAM(id, insCodeRM(ins)); - - if (Is4ByteSSEInstruction(ins)) - { - // The 4-Byte SSE instructions require an additional byte. - sz += 1; - } - id->idCodeSize(sz); dispIns(id); @@ -6522,28 +6578,15 @@ void emitter::emitIns_SIMD_R_R_S_R( void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) { + UNATIVE_OFFSET sz; instrDesc* id = emitNewInstr(attr); - UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMR(ins), varx, offs); insFormat fmt = emitInsModeFormat(ins, IF_SRD); - // 16-bit operand instructions will need a prefix - if (EA_SIZE(attr) == EA_2BYTE) - { - sz += 1; - } - - // VEX prefix - sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)); - - // 64-bit operand instructions will need a REX.W prefix - if (TakesRexWPrefix(ins, attr)) - { - sz += emitGetRexPrefixSize(ins); - } - id->idIns(ins); id->idInsFmt(fmt); id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + + sz = emitInsSizeSV(id, insCodeMR(ins), varx, offs); id->idCodeSize(sz); #ifdef DEBUG @@ -6557,35 +6600,24 @@ void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs) { + UNATIVE_OFFSET sz; instrDesc* id = emitNewInstr(attr); - UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMR(ins), varx, offs); insFormat fmt = emitInsModeFormat(ins, IF_SRD_RRD); + id->idIns(ins); + id->idInsFmt(fmt); + id->idReg1(ireg); + id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + + sz = emitInsSizeSV(id, insCodeMR(ins), varx, offs); + #ifdef _TARGET_X86_ if (attr == EA_1BYTE) { assert(isByteReg(ireg)); } #endif - // 16-bit operand instructions will need a prefix - if (EA_SIZE(attr) == EA_2BYTE) - { - sz++; - } - - // VEX prefix - sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)); - - // 64-bit operand instructions will need a REX.W prefix - if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr)) - { - sz += emitGetRexPrefixSize(ins); - } - id->idIns(ins); - id->idInsFmt(fmt); - id->idReg1(ireg); - id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); id->idCodeSize(sz); #ifdef DEBUG id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; @@ -6599,31 +6631,16 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int va emitAttr size = EA_SIZE(attr); noway_assert(emitVerifyEncodable(ins, size, ireg)); + UNATIVE_OFFSET sz; instrDesc* id = emitNewInstr(attr); - UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs); insFormat fmt = emitInsModeFormat(ins, IF_RRD_SRD); - // Most 16-bit operand instructions need a prefix - if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx) - { - sz++; - } - - // VEX prefix - sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)); - - // 64-bit operand instructions will need a REX.W prefix - if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr)) - { - sz += emitGetRexPrefixSize(ins); - } - - sz += emitAdjustSizeCrc32(ins, attr); - id->idIns(ins); id->idInsFmt(fmt); id->idReg1(ireg); id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + + sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs); id->idCodeSize(sz); #ifdef DEBUG id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; @@ -6664,18 +6681,9 @@ void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, in instrDesc* id = emitNewInstrCns(attr, val); id->idIns(ins); id->idInsFmt(fmt); - UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMI(ins), varx, offs, val); - - // VEX prefix - sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins)); - - // 64-bit operand instructions will need a REX.W prefix - if (TakesRexWPrefix(ins, attr)) - { - sz += emitGetRexPrefixSize(ins); - } - id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + + UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMI(ins), varx, offs, val); id->idCodeSize(sz); #ifdef DEBUG id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; diff --git a/src/coreclr/src/jit/emitxarch.h b/src/coreclr/src/jit/emitxarch.h index 5141448..52736ba 100644 --- a/src/coreclr/src/jit/emitxarch.h +++ b/src/coreclr/src/jit/emitxarch.h @@ -40,10 +40,11 @@ struct CnsVal }; UNATIVE_OFFSET emitInsSize(code_t code); -UNATIVE_OFFSET emitInsSizeRM(instruction ins); UNATIVE_OFFSET emitInsSizeSV(code_t code, int var, int dsp); UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp); UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp, int val); +UNATIVE_OFFSET emitInsSizeRR(instrDesc* id, code_t code); +UNATIVE_OFFSET emitInsSizeRR(instrDesc* id, code_t code, int val); UNATIVE_OFFSET emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr); UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, code_t code); UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, code_t code, int val); @@ -67,7 +68,7 @@ unsigned emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, code_t& co unsigned emitGetRexPrefixSize(instruction ins); unsigned emitGetVexPrefixSize(instruction ins, emitAttr attr); unsigned emitGetPrefixSize(code_t code); -unsigned emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, code_t code); +unsigned emitGetAdjustedSize(instruction ins, emitAttr attr, code_t code); unsigned insEncodeReg012(instruction ins, regNumber reg, emitAttr size, code_t* code); unsigned insEncodeReg345(instruction ins, regNumber reg, emitAttr size, code_t* code); @@ -96,22 +97,6 @@ bool Is4ByteSSEInstruction(instruction ins); bool AreUpper32BitsZero(regNumber reg); -// Adjust code size for CRC32 that has 4-byte opcode -// but does not use SSE38 or EES3A encoding. -UNATIVE_OFFSET emitAdjustSizeCrc32(instruction ins, emitAttr attr) -{ - UNATIVE_OFFSET szDelta = 0; - if (ins == INS_crc32) - { - szDelta += 1; - if (attr == EA_2BYTE) - { - szDelta += 1; - } - } - return szDelta; -} - bool hasRexPrefix(code_t code) { #ifdef _TARGET_AMD64_ diff --git a/src/coreclr/src/jit/simdcodegenxarch.cpp b/src/coreclr/src/jit/simdcodegenxarch.cpp index 307612d..a60404a 100644 --- a/src/coreclr/src/jit/simdcodegenxarch.cpp +++ b/src/coreclr/src/jit/simdcodegenxarch.cpp @@ -693,7 +693,8 @@ void CodeGen::genSIMDScalarMove( // to zero all but the lower bits. unsigned int insertpsImm = (INSERTPS_TARGET_SELECT(0) | INSERTPS_ZERO(1) | INSERTPS_ZERO(2) | INSERTPS_ZERO(3)); - inst_RV_RV_IV(INS_insertps, EA_16BYTE, targetReg, srcReg, insertpsImm); + assert((insertpsImm >= 0) && (insertpsImm <= 255)); + inst_RV_RV_IV(INS_insertps, EA_16BYTE, targetReg, srcReg, (int8_t)insertpsImm); } else { @@ -930,7 +931,8 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) } ins = getOpForSIMDIntrinsic(SIMDIntrinsicShuffleSSE2, baseType); - getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, targetReg, shuffleControl); + assert((shuffleControl >= 0) && (shuffleControl <= 255)); + getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, targetReg, (int8_t)shuffleControl); } genProduceReg(simdNode); @@ -1002,7 +1004,8 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) regNumber operandReg = operandRegs[initCount - i - 1]; if (offset != 0) { - getEmitter()->emitIns_R_I(insLeftShift, EA_16BYTE, vectorReg, baseTypeSize); + assert((baseTypeSize >= 0) && (baseTypeSize <= 255)); + getEmitter()->emitIns_R_I(insLeftShift, EA_16BYTE, vectorReg, (int8_t)baseTypeSize); } genSIMDScalarMove(targetType, baseType, vectorReg, operandReg, SMT_PreserveUpper); @@ -1369,7 +1372,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) // get the absolute value of src and put it into tmpReg2 and targetReg inst_RV_RV(INS_movdqu, tmpReg2, op1Reg, baseType, emitActualTypeSize(simdType)); - getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(simdType), tmpReg, op1Reg, SHUFFLE_WWYY); + getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(simdType), tmpReg, op1Reg, (int8_t)SHUFFLE_WWYY); getEmitter()->emitIns_R_I(INS_psrad, emitActualTypeSize(simdType), tmpReg, 32); inst_RV_RV(INS_pxor, tmpReg2, tmpReg, baseType, emitActualTypeSize(simdType)); inst_RV_RV(INS_psubq, tmpReg2, tmpReg, baseType, emitActualTypeSize(simdType)); @@ -1558,7 +1561,8 @@ void CodeGen::genSIMDIntrinsicWiden(GenTreeSIMD* simdNode) { ival = 0xe8; } - getEmitter()->emitIns_R_R_I(INS_vpermq, emitSize, targetReg, op1Reg, ival); + assert((ival >= 0) && (ival <= 255)); + getEmitter()->emitIns_R_R_I(INS_vpermq, emitSize, targetReg, op1Reg, (int8_t)ival); } else if (targetReg != op1Reg) { @@ -1615,7 +1619,7 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) } else { - inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, tmpReg, SHUFFLE_YXYX); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, tmpReg, (int8_t)SHUFFLE_YXYX); } } else if (varTypeIsLong(baseType)) @@ -1639,8 +1643,8 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) getEmitter()->emitIns_R_R_I(INS_vinserti128, EA_32BYTE, tmpReg, tmpReg2, 0x01); inst_RV_RV(ins_Copy(simdType), tmpReg2, op1Reg, simdType, emitSize); getEmitter()->emitIns_R_R_I(INS_vinserti128, EA_32BYTE, tmpReg2, op2Reg, 0x01); - getEmitter()->emitIns_R_R_I(INS_pshufd, emitSize, tmpReg, tmpReg, SHUFFLE_XXZX); - getEmitter()->emitIns_R_R_I(INS_pshufd, emitSize, targetReg, tmpReg2, SHUFFLE_XXZX); + getEmitter()->emitIns_R_R_I(INS_pshufd, emitSize, tmpReg, tmpReg, (int8_t)SHUFFLE_XXZX); + getEmitter()->emitIns_R_R_I(INS_pshufd, emitSize, targetReg, tmpReg2, (int8_t)SHUFFLE_XXZX); inst_RV_RV_RV(INS_punpcklqdq, targetReg, targetReg, tmpReg, emitSize); } else @@ -1656,9 +1660,9 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) instruction shiftRightIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16); emitAttr emitSize = emitTypeSize(simdType); - getEmitter()->emitIns_R_R_I(INS_pshufd, emitSize, targetReg, op1Reg, SHUFFLE_ZXXX); + getEmitter()->emitIns_R_R_I(INS_pshufd, emitSize, targetReg, op1Reg, (int8_t)SHUFFLE_ZXXX); getEmitter()->emitIns_R_I(shiftRightIns, emitSize, targetReg, 8); - getEmitter()->emitIns_R_R_I(INS_pshufd, emitSize, tmpReg, op2Reg, SHUFFLE_XXZX); + getEmitter()->emitIns_R_R_I(INS_pshufd, emitSize, tmpReg, op2Reg, (int8_t)SHUFFLE_XXZX); getEmitter()->emitIns_R_I(shiftLeftIns, emitSize, tmpReg, 8); inst_RV_RV(INS_por, targetReg, tmpReg, simdType); } @@ -1682,6 +1686,8 @@ void CodeGen::genSIMDIntrinsicNarrow(GenTreeSIMD* simdNode) instruction shiftLeftIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, baseType); instruction shiftRightIns = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, baseType); + assert((shiftCount >= 0) && (shiftCount <= 127)); + if (level == SIMD_AVX2_Supported) { regNumber tmpReg = simdNode->ExtractTempReg(RBM_ALLFLOAT); @@ -1850,7 +1856,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) // Extract first and third double word results from tmpReg // tmpReg = shuffle(0,0,2,0) of tmpReg - getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(targetType), tmpReg, tmpReg, SHUFFLE_XXZX); + getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(targetType), tmpReg, tmpReg, (int8_t)SHUFFLE_XXZX); // targetReg[63:0] = op1[0] * op2[0] // targetReg[127:64] = op1[2] * op2[2] @@ -1859,7 +1865,8 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) // Extract first and third double word results from targetReg // targetReg = shuffle(0,0,2,0) of targetReg - getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(targetType), targetReg, targetReg, SHUFFLE_XXZX); + getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(targetType), targetReg, targetReg, + (int8_t)SHUFFLE_XXZX); // pack the results into a single vector inst_RV_RV(INS_punpckldq, targetReg, tmpReg, targetType, emitActualTypeSize(targetType)); @@ -1898,7 +1905,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) { // These are 16 byte operations, so we subtract from 16 bytes, not the vector register length. unsigned shiftCount = 16 - simdNode->gtSIMDSize; - assert(shiftCount != 0); + assert((shiftCount > 0) && (shiftCount <= 16)); instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16); getEmitter()->emitIns_R_I(ins, EA_16BYTE, targetReg, shiftCount); ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16); @@ -1975,7 +1982,8 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) if (varTypeIsFloating(baseType)) { - getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, otherReg, ival); + assert((ival >= 0) && (ival <= 255)); + getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, otherReg, (int8_t)ival); } else { @@ -2003,7 +2011,8 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType)); } - getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, op2Reg, ival); + assert((ival >= 0) && (ival <= 255)); + getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, op2Reg, (int8_t)ival); } break; @@ -2061,7 +2070,8 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) if (varTypeIsFloating(baseType)) { - getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(simdType), tmpReg1, otherReg, ival); + assert((ival >= 0) && (ival <= 255)); + getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(simdType), tmpReg1, otherReg, (int8_t)ival); } else { @@ -2219,9 +2229,9 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) // inst_RV_RV(INS_mulps, targetReg, op2Reg); inst_RV_RV(INS_movaps, tmpReg1, targetReg); - inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_ZXXY); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, (int8_t)SHUFFLE_ZXXY); inst_RV_RV(INS_addps, targetReg, tmpReg1); - inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_XXWW); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, (int8_t)SHUFFLE_XXWW); inst_RV_RV(INS_addps, targetReg, tmpReg1); } else if (baseType == TYP_FLOAT) @@ -2240,10 +2250,10 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) // inst_RV_RV(INS_mulps, targetReg, op2Reg); inst_RV_RV(INS_movaps, tmpReg1, targetReg); - inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_ZWXY); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, (int8_t)SHUFFLE_ZWXY); inst_RV_RV(INS_addps, targetReg, tmpReg1); inst_RV_RV(INS_movaps, tmpReg1, targetReg); - inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, SHUFFLE_XYZW); + inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg1, tmpReg1, (int8_t)SHUFFLE_XYZW); inst_RV_RV(INS_addps, targetReg, tmpReg1); } else @@ -2290,7 +2300,8 @@ void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode) // dpps computes the dot product of the upper & lower halves of the 32-byte register. // Notice that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg. unsigned mask = ((simdNode->gtFlags & GTF_SIMD12_OP) != 0) ? 0x71 : 0xf1; - inst_RV_RV_IV(INS_dpps, emitSize, targetReg, op2Reg, mask); + assert((mask >= 0) && (mask <= 255)); + inst_RV_RV_IV(INS_dpps, emitSize, targetReg, op2Reg, (int8_t)mask); // dpps computes the dot product of the upper & lower halves of the 32-byte register. // Notice that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg. // If this is TYP_SIMD32, we need to combine the lower & upper results. @@ -2578,6 +2589,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) if (byteShiftCnt != 0) { instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16); + assert((byteShiftCnt > 0) && (byteShiftCnt < 32)); getEmitter()->emitIns_R_I(ins, emitActualTypeSize(simdType), targetReg, byteShiftCnt); } } @@ -2604,6 +2616,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) index -= 8; } + assert((index >= 0) && (index <= 8)); getEmitter()->emitIns_R_R_I(INS_pextrw, emitTypeSize(TYP_INT), targetReg, srcReg, index); bool ZeroOrSignExtnReqd = true; @@ -2647,6 +2660,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode) inst_RV_RV(ins_Copy(simdType), tmpReg, srcReg, simdType, emitActualTypeSize(simdType)); } + assert((byteShiftCnt > 0) && (byteShiftCnt <= 32)); ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16); getEmitter()->emitIns_R_I(ins, emitActualTypeSize(simdType), tmpReg, byteShiftCnt); } @@ -2743,6 +2757,8 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) // (Note that for mov_xmm2i, the int register is always in the reg2 position. inst_RV_RV(ins, op2Reg, tmpReg, baseType); + assert((index >= 0) && (index <= 15)); + // First insert the lower 16-bits of tmpReg in targetReg at 2*index position // since every float has two 16-bit words. getEmitter()->emitIns_R_R_I(INS_pinsrw, emitTypeSize(TYP_INT), targetReg, tmpReg, 2 * index); @@ -2754,7 +2770,8 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode) else { unsigned int insertpsImm = (INSERTPS_SOURCE_SELECT(0) | INSERTPS_TARGET_SELECT(index)); - inst_RV_RV_IV(INS_insertps, EA_16BYTE, targetReg, op2Reg, insertpsImm); + assert((insertpsImm >= 0) && (insertpsImm <= 255)); + inst_RV_RV_IV(INS_insertps, EA_16BYTE, targetReg, op2Reg, (int8_t)insertpsImm); } genProduceReg(simdNode); @@ -2778,7 +2795,7 @@ void CodeGen::genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode) GenTree* op2 = simdNode->gtGetOp2(); assert(op2->isContained()); assert(op2->IsCnsIntOrI()); - int shuffleControl = (int)op2->AsIntConCommon()->IconValue(); + ssize_t shuffleControl = op2->AsIntConCommon()->IconValue(); var_types baseType = simdNode->gtSIMDBaseType; var_types targetType = simdNode->TypeGet(); regNumber targetReg = simdNode->gtRegNum; @@ -2791,7 +2808,8 @@ void CodeGen::genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode) } instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); - getEmitter()->emitIns_R_R_I(ins, emitTypeSize(baseType), targetReg, targetReg, shuffleControl); + assert((shuffleControl >= 0) && (shuffleControl <= 255)); + getEmitter()->emitIns_R_R_I(ins, emitTypeSize(baseType), targetReg, targetReg, (int8_t)shuffleControl); genProduceReg(simdNode); } @@ -2871,7 +2889,7 @@ void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode) getEmitter()->emitIns_R_AR(ins_Load(TYP_DOUBLE), EA_8BYTE, targetReg, operandReg, 0); // combine upper 4 bytes and lower 8 bytes in targetReg - getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, SHUFFLE_YXYX); + getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, (int8_t)SHUFFLE_YXYX); genProduceReg(treeNode); } @@ -2953,7 +2971,7 @@ void CodeGen::genLoadLclTypeSIMD12(GenTree* treeNode) getEmitter()->emitIns_R_S(ins_Move_Extend(TYP_DOUBLE, false), EA_8BYTE, targetReg, varNum, offs); // combine upper 4 bytes and lower 8 bytes in targetReg - getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, SHUFFLE_YXYX); + getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, (int8_t)SHUFFLE_YXYX); genProduceReg(treeNode); } -- 2.7.4