From dbc58e6eb1bf380d9d1456f6783d62a7a5cb78db Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 11 Jun 2018 19:58:34 -0700 Subject: [PATCH] Adding containment support to more x86 HWIntrinsics (dotnet/coreclr#18349) * Adding containment support for the Sse42.Crc32 intrinsic * Adding containment support for the x86 Compare*OrderedScalar and Compare*UnorderedScalar HWIntrinsics * Adding containment support to several intrinsics that were marked NoContainment Commit migrated from https://github.com/dotnet/coreclr/commit/7005f768866f3e0a2e7df3b67884bb5a98c92ef2 --- src/coreclr/src/jit/emitxarch.cpp | 111 +++++++++++--- src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp | 187 ++++++++++++++---------- src/coreclr/src/jit/hwintrinsiclistxarch.h | 80 +++++----- src/coreclr/src/jit/lowerxarch.cpp | 34 ++++- 4 files changed, 272 insertions(+), 140 deletions(-) diff --git a/src/coreclr/src/jit/emitxarch.cpp b/src/coreclr/src/jit/emitxarch.cpp index b3011fc..1211256 100644 --- a/src/coreclr/src/jit/emitxarch.cpp +++ b/src/coreclr/src/jit/emitxarch.cpp @@ -6512,6 +6512,11 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int va sz += emitGetRexPrefixSize(ins); } + if (ins == INS_crc32) + { + sz += 1; + } + id->idIns(ins); id->idInsFmt(fmt); id->idReg1(ireg); @@ -8368,6 +8373,12 @@ void emitter::emitDispIns( { printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr); } + else if ((ins == INS_crc32) && (attr != EA_8BYTE)) + { + // The idReg1 is always 4 bytes, but the size of idReg2 can vary. + // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx` + printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr)); + } else { printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); @@ -8583,6 +8594,12 @@ void emitter::emitDispIns( { printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr); } + else if ((ins == INS_crc32) && (attr != EA_8BYTE)) + { + // The idReg1 is always 4 bytes, but the size of idReg2 can vary. + // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx` + printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr)); + } else { printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); @@ -8696,6 +8713,8 @@ void emitter::emitDispIns( #ifdef FEATURE_HW_INTRINSICS else if (ins == INS_crc32 && attr != EA_8BYTE) { + // The idReg1 is always 4 bytes, but the size of idReg2 can vary. + // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx` printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr)); } #endif // FEATURE_HW_INTRINSICS @@ -8780,6 +8799,12 @@ void emitter::emitDispIns( attr = EA_PTRSIZE; } #endif + else if ((ins == INS_crc32) && (attr != EA_8BYTE)) + { + // The idReg1 is always 4 bytes, but the size of idReg2 can vary. + // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx` + printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr)); + } printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); @@ -9319,12 +9344,22 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Special case emitting AVX instructions - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { + if ((ins == INS_crc32) && (size > EA_1BYTE)) + { + code |= 0x0100; + + if (size == EA_2BYTE) + { + dst += emitOutputByte(dst, 0x66); + } + } + unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); - if (UseVEXEncoding()) + if (UseVEXEncoding() && (ins != INS_crc32)) { // Emit last opcode byte // TODO-XArch-CQ: Right now support 4-byte opcode instructions only @@ -9450,6 +9485,7 @@ GOT_DSP: switch (reg) { case REG_NA: + { if (id->idIsDspReloc()) { INT32 addlDelta = 0; @@ -9457,7 +9493,7 @@ GOT_DSP: // The address is of the form "[disp]" // On x86 - disp is relative to zero // On Amd64 - disp is relative to RIP - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { dst += emitOutputByte(dst, code | 0x05); } @@ -9513,7 +9549,7 @@ GOT_DSP: else { #ifdef _TARGET_X86_ - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { dst += emitOutputByte(dst, code | 0x05); } @@ -9543,9 +9579,11 @@ GOT_DSP: dst += emitOutputLong(dst, dsp); } break; + } case REG_EBP: - if (Is4ByteSSE4OrAVXInstruction(ins)) + { + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { // Does the offset fit in a byte? if (dspInByte) @@ -9584,9 +9622,11 @@ GOT_DSP: } } break; + } case REG_ESP: - if (Is4ByteSSE4OrAVXInstruction(ins)) + { + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { // Is the offset 0 or does it at least fit in a byte? if (dspIsZero) @@ -9637,9 +9677,11 @@ GOT_DSP: } } break; + } default: - if (Is4ByteSSE4OrAVXInstruction(ins)) + { + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { // Put the register in the opcode code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr); @@ -9701,6 +9743,7 @@ GOT_DSP: } break; + } } } else @@ -9720,7 +9763,7 @@ GOT_DSP: regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul); - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0] if (dspIsZero && reg != REG_EBP) @@ -9787,7 +9830,7 @@ GOT_DSP: regByte = insEncodeReg012(ins, REG_EBP, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul); - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { dst += emitOutputByte(dst, code | 0x04); } @@ -9816,7 +9859,7 @@ GOT_DSP: // The address is "[reg+rgx+dsp]" regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr); - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { if (dspIsZero && reg != REG_EBP) { @@ -10043,12 +10086,22 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) } // Special case emitting AVX instructions - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { + if ((ins == INS_crc32) && (size > EA_1BYTE)) + { + code |= 0x0100; + + if (size == EA_2BYTE) + { + dst += emitOutputByte(dst, 0x66); + } + } + unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); - if (UseVEXEncoding()) + if (UseVEXEncoding() && (ins != INS_crc32)) { // Emit last opcode byte // TODO-XArch-CQ: Right now support 4-byte opcode instructions only @@ -10174,7 +10227,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) if (EBPbased) { // EBP-based variable: does the offset fit in a byte? - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { if (dspInByte) { @@ -10213,7 +10266,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) dspIsZero = (dsp == 0); // Does the offset fit in a byte? - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { if (dspInByte) { @@ -10483,12 +10536,22 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) #endif //_TARGET_X86_ // Special case emitting AVX instructions - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { + if ((ins == INS_crc32) && (size > EA_1BYTE)) + { + code |= 0x0100; + + if (size == EA_2BYTE) + { + dst += emitOutputByte(dst, 0x66); + } + } + unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code); dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code); - if (UseVEXEncoding()) + if (UseVEXEncoding() && (ins != INS_crc32)) { // Emit last opcode byte // TODO-XArch-CQ: Right now support 4-byte opcode instructions only @@ -12813,8 +12876,9 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_RWR_ARD: case IF_RRW_ARD: case IF_RWR_RRD_ARD: + { code = insCodeRM(ins); - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { dst = emitOutputAM(dst, id, code); } @@ -12826,6 +12890,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) } sz = emitSizeOfInsDsc(id); break; + } case IF_RWR_RRD_ARD_CNS: case IF_RWR_RRD_ARD_RRD: @@ -12954,11 +13019,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_RRD_SRD: case IF_RWR_SRD: case IF_RRW_SRD: + { code = insCodeRM(ins); // 4-byte AVX instructions are special cased inside emitOutputSV // since they do not have space to encode ModRM byte. - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { dst = emitOutputSV(dst, id, code); } @@ -12975,7 +13041,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); dst = emitOutputSV(dst, id, code | regcode); } + + sz = emitSizeOfInsDsc(id); break; + } case IF_RWR_RRD_SRD: { @@ -13115,9 +13184,11 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_RRD_MRD: case IF_RWR_MRD: case IF_RRW_MRD: + { code = insCodeRM(ins); + // Special case 4-byte AVX instructions - if (Is4ByteSSE4OrAVXInstruction(ins)) + if (Is4ByteSSE4OrAVXInstruction(ins) || (ins == INS_crc32)) { dst = emitOutputCV(dst, id, code); } @@ -13134,8 +13205,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8); dst = emitOutputCV(dst, id, code | regcode | 0x0500); } + sz = emitSizeOfInsDsc(id); break; + } case IF_RWR_RRD_MRD: { diff --git a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp index 1468f03..0107273 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp @@ -25,6 +25,33 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "gcinfoencoder.h" //------------------------------------------------------------------------ +// assertIsContainableHWIntrinsicOp: Asserts that op is containable by node +// +// Arguments: +// lowering - The lowering phase from the compiler +// node - The HWIntrinsic node that has the contained node +// op - The op that is contained +// +static void assertIsContainableHWIntrinsicOp(Lowering* lowering, GenTreeHWIntrinsic* node, GenTree* op) +{ +#if DEBUG + // The Lowering::IsContainableHWIntrinsicOp call is not quite right, since it follows pre-register allocation + // logic. However, this check is still important due to the various containment rules that SIMD intrinsics follow. + // + // We use isContainable to track the special HWIntrinsic node containment rules (for things like LoadAligned and + // LoadUnaligned) and we use the supportsRegOptional check to support general-purpose loads (both from stack + // spillage + // and for isUsedFromMemory contained nodes, in the case where the register allocator decided to not allocate a + // register + // in the first place). + + bool supportsRegOptional = false; + bool isContainable = lowering->IsContainableHWIntrinsicOp(node, op, &supportsRegOptional); + assert(isContainable || supportsRegOptional); +#endif // DEBUG +} + +//------------------------------------------------------------------------ // genIsTableDrivenHWIntrinsic: // // Arguments: @@ -342,21 +369,34 @@ void CodeGen::genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node, instruction ins, emi var_types targetType = node->TypeGet(); regNumber targetReg = node->gtRegNum; GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); emitter* emit = getEmitter(); + if (op2 != nullptr) + { + // The Compare*OrderedScalar and Compare*UnorderedScalar intrinsics come down this + // code path. They are all MultiIns, as the return value comes from the flags and + // we have two operands instead. + + assert(HWIntrinsicInfo::GeneratesMultipleIns(node->gtHWIntrinsicId)); + assert(targetReg != REG_NA); + + targetReg = op1->gtRegNum; + op1 = op2; + op2 = nullptr; + } + else + { + assert(!node->OperIsCommutative()); + } + assert(targetReg != REG_NA); - assert(node->gtGetOp2() == nullptr); - assert(!node->OperIsCommutative()); + assert(op2 == nullptr); if (op1->isContained() || op1->isUsedFromSpillTemp()) { assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId)); - -#if DEBUG - bool supportsRegOptional = false; - bool isContainable = compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional); - assert(isContainable || (supportsRegOptional && op1->IsRegOptional())); -#endif // DEBUG + assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op1); TempDsc* tmpDsc = nullptr; unsigned varNum = BAD_VAR_NUM; @@ -480,12 +520,7 @@ void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, i if (op1->isContained() || op1->isUsedFromSpillTemp()) { assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId)); - -#if DEBUG - bool supportsRegOptional = false; - bool isContainable = compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional); - assert(isContainable || (supportsRegOptional && op1->IsRegOptional())); -#endif // DEBUG + assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op1); TempDsc* tmpDsc = nullptr; unsigned varNum = BAD_VAR_NUM; @@ -609,12 +644,7 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins) if (op2->isContained() || op2->isUsedFromSpillTemp()) { assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId)); - -#if DEBUG - bool supportsRegOptional = false; - bool isContainable = compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional); - assert(isContainable || (supportsRegOptional && op2->IsRegOptional())); -#endif // DEBUG + assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op2); TempDsc* tmpDsc = nullptr; unsigned varNum = BAD_VAR_NUM; @@ -770,12 +800,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, if (op2->isContained() || op2->isUsedFromSpillTemp()) { assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId)); - -#if DEBUG - bool supportsRegOptional = false; - bool isContainable = compiler->m_pLowering->IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional); - assert(isContainable || (supportsRegOptional && op2->IsRegOptional())); -#endif // DEBUG + assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op2); TempDsc* tmpDsc = nullptr; unsigned varNum = BAD_VAR_NUM; @@ -928,6 +953,9 @@ void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins) if (op2->isContained() || op2->isUsedFromSpillTemp()) { + assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId)); + assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op2); + TempDsc* tmpDsc = nullptr; unsigned varNum = BAD_VAR_NUM; unsigned offset = (unsigned)-1; @@ -1245,14 +1273,13 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE_CompareEqualUnorderedScalar: { assert(baseType == TYP_FLOAT); - op2Reg = op2->gtRegNum; regNumber tmpReg = node->GetSingleTempReg(); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); - emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg); emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg); emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg); @@ -1264,10 +1291,9 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE_CompareGreaterThanUnorderedScalar: { assert(baseType == TYP_FLOAT); - op2Reg = op2->gtRegNum; - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType); - emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); + + genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; @@ -1277,10 +1303,9 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE_CompareGreaterThanOrEqualUnorderedScalar: { assert(baseType == TYP_FLOAT); - op2Reg = op2->gtRegNum; - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType); - emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); + + genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_setae, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; @@ -1290,10 +1315,9 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE_CompareLessThanUnorderedScalar: { assert(baseType == TYP_FLOAT); - op2Reg = op2->gtRegNum; - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType); - emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg); + + genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; @@ -1303,10 +1327,9 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE_CompareLessThanOrEqualUnorderedScalar: { assert(baseType == TYP_FLOAT); - op2Reg = op2->gtRegNum; - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType); - emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg); + + genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_setae, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; @@ -1316,14 +1339,13 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE_CompareNotEqualUnorderedScalar: { assert(baseType == TYP_FLOAT); - op2Reg = op2->gtRegNum; regNumber tmpReg = node->GetSingleTempReg(); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); - emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg); emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg); emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg); @@ -1461,14 +1483,13 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); - op2Reg = op2->gtRegNum; regNumber tmpReg = node->GetSingleTempReg(); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); - emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_setpo, EA_1BYTE, targetReg); emit->emitIns_R(INS_sete, EA_1BYTE, tmpReg); emit->emitIns_R_R(INS_and, EA_1BYTE, tmpReg, targetReg); @@ -1480,10 +1501,9 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareGreaterThanUnorderedScalar: { assert(baseType == TYP_DOUBLE); - op2Reg = op2->gtRegNum; instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; @@ -1493,10 +1513,9 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareGreaterThanOrEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); - op2Reg = op2->gtRegNum; instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_setae, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; @@ -1506,10 +1525,9 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareLessThanUnorderedScalar: { assert(baseType == TYP_DOUBLE); - op2Reg = op2->gtRegNum; instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; @@ -1519,10 +1537,9 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareLessThanOrEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); - op2Reg = op2->gtRegNum; instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op2Reg, op1Reg); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_setae, EA_1BYTE, targetReg); emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; @@ -1532,14 +1549,13 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareNotEqualUnorderedScalar: { assert(baseType == TYP_DOUBLE); - op2Reg = op2->gtRegNum; instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); regNumber tmpReg = node->GetSingleTempReg(); // Ensure we aren't overwriting targetReg assert(tmpReg != targetReg); - emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), op1Reg, op2Reg); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_setpe, EA_1BYTE, targetReg); emit->emitIns_R(INS_setne, EA_1BYTE, tmpReg); emit->emitIns_R_R(INS_or, EA_1BYTE, tmpReg, targetReg); @@ -1720,9 +1736,9 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) regNumber tmpReg = node->GetSingleTempReg(); assert(HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType) == INS_ptest); emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, emitTypeSize(TYP_SIMD16), tmpReg, tmpReg, tmpReg); - emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, tmpReg); emit->emitIns_R(INS_setb, EA_1BYTE, targetReg); + emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } @@ -1730,18 +1746,18 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE41_TestZ: { assert(HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType) == INS_ptest); - emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); - emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum); + genHWIntrinsic_R_RM(node, INS_ptest, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_sete, EA_1BYTE, targetReg); + emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_SSE41_TestC: { assert(HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType) == INS_ptest); - emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); - emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum); + genHWIntrinsic_R_RM(node, INS_ptest, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_setb, EA_1BYTE, targetReg); + emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } @@ -1749,9 +1765,9 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE41_TestNotZAndNotC: { assert(HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType) == INS_ptest); - emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); - emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum); + genHWIntrinsic_R_RM(node, INS_ptest, emitTypeSize(TYP_SIMD16)); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); + emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } @@ -1812,43 +1828,58 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + regNumber targetReg = node->gtRegNum; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); - regNumber targetReg = node->gtRegNum; - assert(targetReg != REG_NA); - var_types targetType = node->TypeGet(); - var_types baseType = node->gtSIMDBaseType; + var_types baseType = node->gtSIMDBaseType; + var_types targetType = node->TypeGet(); + emitter* emit = getEmitter(); regNumber op1Reg = op1->gtRegNum; - regNumber op2Reg = op2->gtRegNum; genConsumeOperands(node); + assert(targetReg != REG_NA); + assert(op1Reg != REG_NA); + assert(op2 != nullptr); + assert(!node->OperIsCommutative()); + switch (intrinsicId) { case NI_SSE42_Crc32: + { if (op1Reg != targetReg) { - assert(op2Reg != targetReg); - inst_RV_RV(INS_mov, targetReg, op1Reg, targetType, emitTypeSize(targetType)); + assert(op2->gtRegNum != targetReg); + emit->emitIns_R_R(INS_mov, emitTypeSize(targetType), targetReg, op1Reg); } - if (baseType == TYP_UBYTE || baseType == TYP_USHORT) // baseType is the type of the second argument + // This makes the genHWIntrinsic_R_RM code much simpler, as we don't need an + // overload that explicitly takes the operands. + node->gtOp1 = op2; + node->gtOp2 = nullptr; + + if ((baseType == TYP_UBYTE) || (baseType == TYP_USHORT)) // baseType is the type of the second argument { assert(targetType == TYP_INT); - inst_RV_RV(INS_crc32, targetReg, op2Reg, baseType, emitTypeSize(baseType)); + genHWIntrinsic_R_RM(node, INS_crc32, emitTypeSize(baseType)); } else { assert(op1->TypeGet() == op2->TypeGet()); - assert(targetType == TYP_INT || targetType == TYP_LONG); - inst_RV_RV(INS_crc32, targetReg, op2Reg, targetType, emitTypeSize(targetType)); + assert((targetType == TYP_INT) || (targetType == TYP_LONG)); + genHWIntrinsic_R_RM(node, INS_crc32, emitTypeSize(targetType)); } break; + } + default: + { unreached(); break; + } } + genProduceReg(node); } @@ -1985,25 +2016,25 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node) case NI_AVX_TestC: { - emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); - emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum); + genHWIntrinsic_R_RM(node, ins, attr); emit->emitIns_R(INS_setb, EA_1BYTE, targetReg); + emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_AVX_TestNotZAndNotC: { - emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); - emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum); + genHWIntrinsic_R_RM(node, ins, attr); emit->emitIns_R(INS_seta, EA_1BYTE, targetReg); + emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } case NI_AVX_TestZ: { - emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); - emit->emitIns_R_R(ins, attr, op1->gtRegNum, op2->gtRegNum); + genHWIntrinsic_R_RM(node, ins, attr); emit->emitIns_R(INS_sete, EA_1BYTE, targetReg); + emit->emitIns_R_R(INS_movzx, EA_1BYTE, targetReg, targetReg); break; } diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index 5c91e2e..3f2f08b 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -34,29 +34,29 @@ HARDWARE_INTRINSIC(SSE_AddScalar, "AddScalar", HARDWARE_INTRINSIC(SSE_And, "And", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(SSE_AndNot, "AndNot", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE_CompareEqual, "CompareEqual", SSE, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareEqualScalar, "CompareEqualScalar", SSE, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareGreaterThan, "CompareGreaterThan", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareGreaterThanScalar, "CompareGreaterThanScalar", SSE, 6, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqual, "CompareGreaterThanOrEqual", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualScalar, "CompareGreaterThanOrEqualScalar", SSE, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareLessThan, "CompareLessThan", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareLessThanScalar, "CompareLessThanScalar", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqual, "CompareLessThanOrEqual", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualScalar, "CompareLessThanOrEqualScalar", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareNotEqual, "CompareNotEqual", SSE, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareNotEqualScalar, "CompareNotEqualScalar", SSE, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE_CompareNotGreaterThan, "CompareNotGreaterThan", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanScalar, "CompareNotGreaterThanScalar", SSE, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE_CompareNotGreaterThanOrEqual, "CompareNotGreaterThanOrEqual", SSE, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) @@ -133,29 +133,29 @@ HARDWARE_INTRINSIC(SSE2_And, "And", HARDWARE_INTRINSIC(SSE2_AndNot, "AndNot", SSE2, -1, 16, 2, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_invalid, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_Average, "Average", SSE2, -1, 16, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(SSE2_CompareEqual, "CompareEqual", SSE2, 0, 16, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE2_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_CompareEqualOrderedScalar, "CompareEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareEqualScalar, "CompareEqualScalar", SSE2, 0, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_CompareEqualUnorderedScalar, "CompareEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareGreaterThan, "CompareGreaterThan", SSE2, 6, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE2, -1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrderedScalar, "CompareGreaterThanOrderedScalar", SSE2, -1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareGreaterThanScalar, "CompareGreaterThanScalar", SSE2, 6, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE2, -1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_CompareGreaterThanUnorderedScalar, "CompareGreaterThanUnorderedScalar", SSE2, -1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqual, "CompareGreaterThanOrEqual", SSE2, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualOrderedScalar, "CompareGreaterThanOrEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualScalar, "CompareGreaterThanOrEqualScalar", SSE2, 5, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_CompareGreaterThanOrEqualUnorderedScalar, "CompareGreaterThanOrEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareLessThan, "CompareLessThan", SSE2, 1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_Special, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_CompareLessThanOrderedScalar, "CompareLessThanOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareLessThanScalar, "CompareLessThanScalar", SSE2, 1, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_CompareLessThanUnorderedScalar, "CompareLessThanUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqual, "CompareLessThanOrEqual", SSE2, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualOrderedScalar, "CompareLessThanOrEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualScalar, "CompareLessThanOrEqualScalar", SSE2, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_CompareLessThanOrEqualUnorderedScalar, "CompareLessThanOrEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareNotEqual, "CompareNotEqual", SSE2, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE2_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_CompareNotEqualOrderedScalar, "CompareNotEqualOrderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareNotEqualScalar, "CompareNotEqualScalar", SSE2, 4, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2_CompareNotEqualUnorderedScalar, "CompareNotEqualUnorderedScalar", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThan, "CompareNotGreaterThan", SSE2, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThanScalar, "CompareNotGreaterThanScalar", SSE2, 2, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2_CompareNotGreaterThanOrEqual, "CompareNotGreaterThanOrEqual", SSE2, 1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) @@ -249,9 +249,9 @@ HARDWARE_INTRINSIC(SSE3_HorizontalAdd, "HorizontalA HARDWARE_INTRINSIC(SSE3_HorizontalSubtract, "HorizontalSubtract", SSE3, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE3_LoadAndDuplicateToVector128, "LoadAndDuplicateToVector128", SSE3, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_lddqu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, HW_Category_MemoryLoad, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE3_LoadDquVector128, "LoadDquVector128", SSE3, -1, 16, 1, {INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE3_MoveAndDuplicate, "MoveAndDuplicate", SSE3, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE3_MoveHighAndDuplicate, "MoveHighAndDuplicate", SSE3, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE3_MoveLowAndDuplicate, "MoveLowAndDuplicate", SSE3, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE3_MoveAndDuplicate, "MoveAndDuplicate", SSE3, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE3_MoveHighAndDuplicate, "MoveHighAndDuplicate", SSE3, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE3_MoveLowAndDuplicate, "MoveLowAndDuplicate", SSE3, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsic ID Function name ISA ival SIMD size NumArg instructions Category Flags @@ -281,9 +281,9 @@ HARDWARE_INTRINSIC(SSE41_BlendVariable, "BlendVariab HARDWARE_INTRINSIC(SSE41_Ceiling, "Ceiling", SSE41, 10, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41_CeilingScalar, "CeilingScalar", SSE41, 10, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE41_CompareEqual, "CompareEqual", SSE41, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int16, "ConvertToVector128Int16", SSE41, -1, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int32, "ConvertToVector128Int32", SSE41, -1, 16, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int64, "ConvertToVector128Int64", SSE41, -1, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int16, "ConvertToVector128Int16", SSE41, -1, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int32, "ConvertToVector128Int32", SSE41, -1, 16, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41_ConvertToVector128Int64, "ConvertToVector128Int64", SSE41, -1, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41_DotProduct, "DotProduct", SSE41, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_dppd}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(SSE41_Extract, "Extract", SSE41, -1, 16, 2, {INS_pextrb, INS_pextrb, INS_invalid, INS_invalid, INS_pextrd, INS_pextrd, INS_pextrq, INS_pextrq, INS_extractps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41_Floor, "Floor", SSE41, 9, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) @@ -308,11 +308,11 @@ HARDWARE_INTRINSIC(SSE41_RoundToPositiveInfinityScalar, "RoundToPosi HARDWARE_INTRINSIC(SSE41_RoundToZero, "RoundToZero", SSE41, 11, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41_RoundToZeroScalar, "RoundToZeroScalar", SSE41, 11, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE41_TestAllOnes, "TestAllOnes", SSE41, -1, 16, 1, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(SSE41_TestAllZeros, "TestAllZeros", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(SSE41_TestC, "TestC", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(SSE41_TestMixOnesZeros, "TestMixOnesZeros", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(SSE41_TestNotZAndNotC, "TestNotZAndNotC", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(SSE41_TestZ, "TestZ", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(SSE41_TestAllZeros, "TestAllZeros", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(SSE41_TestC, "TestC", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(SSE41_TestMixOnesZeros, "TestMixOnesZeros", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(SSE41_TestNotZAndNotC, "TestNotZAndNotC", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(SSE41_TestZ, "TestZ", SSE41, -1, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsic ID Function name ISA ival SIMD size NumArg instructions Category Flags @@ -392,9 +392,9 @@ HARDWARE_INTRINSIC(AVX_Store, "Store", HARDWARE_INTRINSIC(AVX_StoreAligned, "StoreAligned", AVX, -1, 32, 2, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX_StoreAlignedNonTemporal, "StoreAlignedNonTemporal", AVX, -1, 32, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX_Subtract, "Subtract", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX_TestC, "TestC", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX_TestNotZAndNotC, "TestNotZAndNotC", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX_TestZ, "TestZ", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX_TestC, "TestC", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX_TestNotZAndNotC, "TestNotZAndNotC", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX_TestZ, "TestZ", AVX, -1, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_OneTypeGeneric|HW_Flag_UnfixedSIMDSize|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX_UnpackHigh, "UnpackHigh", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX_UnpackLow, "UnpackLow", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX_Xor, "Xor", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) @@ -438,11 +438,11 @@ HARDWARE_INTRINSIC(AVX2_Or, "Or", HARDWARE_INTRINSIC(AVX2_Permute2x128, "Permute2x128", AVX2, -1, 32, 3, {INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_OneTypeGeneric|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2_ShiftLeftLogical, "ShiftLeftLogical", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2_ShiftLeftLogical128BitLane, "ShiftLeftLogical128BitLane", AVX2, -1, 32, 2, {INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2_ShiftLeftLogicalVariable, "ShiftLeftLogicalVariable", AVX2, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_UnfixedSIMDSize|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_ShiftLeftLogicalVariable, "ShiftLeftLogicalVariable", AVX2, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_UnfixedSIMDSize) HARDWARE_INTRINSIC(AVX2_ShiftRightArithmetic, "ShiftRightArithmetic", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2_ShiftRightLogical, "ShiftRightLogical", AVX2, -1, 32, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2_ShiftRightLogical128BitLane, "ShiftRightLogical128BitLane", AVX2, -1, 32, 2, {INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2_ShiftRightLogicalVariable, "ShiftRightLogicalVariable", AVX2, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_UnfixedSIMDSize|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2_ShiftRightLogicalVariable, "ShiftRightLogicalVariable", AVX2, -1, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_UnfixedSIMDSize) HARDWARE_INTRINSIC(AVX2_Subtract, "Subtract", AVX2, -1, 32, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_SubtractSaturate, "SubtractSaturate", AVX2, -1, 32, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2_UnpackHigh, "UnpackHigh", AVX2, -1, 32, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index fff2ec3..a0a4dfe 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -2612,7 +2612,37 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { case HW_Category_SimpleSIMD: case HW_Category_SIMDScalar: + case HW_Category_Scalar: { + if (HWIntrinsicInfo::GeneratesMultipleIns(intrinsicId)) + { + switch (intrinsicId) + { + case NI_SSE_CompareLessThanOrderedScalar: + case NI_SSE_CompareLessThanUnorderedScalar: + case NI_SSE_CompareLessThanOrEqualOrderedScalar: + case NI_SSE_CompareLessThanOrEqualUnorderedScalar: + case NI_SSE2_CompareLessThanOrderedScalar: + case NI_SSE2_CompareLessThanUnorderedScalar: + case NI_SSE2_CompareLessThanOrEqualOrderedScalar: + case NI_SSE2_CompareLessThanOrEqualUnorderedScalar: + { + // We need to swap the operands for CompareLessThanOrEqual + node->gtOp1 = op2; + node->gtOp2 = op1; + op2 = op1; + break; + } + + default: + { + // TODO-XArch-CQ: The Compare*OrderedScalar and Compare*UnorderedScalar methods + // are commutative if you also inverse the intrinsic. + break; + } + } + } + bool supportsRegOptional = false; if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) @@ -2745,9 +2775,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) default: { - // TODO-XArch-CQ: Assert that this is unreached after we have ensured the relevant node types are - // handled. - // https://github.com/dotnet/coreclr/issues/16497 + unreached(); break; } } -- 2.7.4