From eaf9aefffcda0d588147c2a244182d4c34c83a0b Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 28 Dec 2017 17:55:50 -0800 Subject: [PATCH] Adding support for the SSE compare eq, gt, ge, lt, le, ne, ord, and unord scalar intrinsics --- src/jit/emitxarch.cpp | 2 ++ src/jit/hwintrinsiccodegenxarch.cpp | 52 +++++++++++++++++++++++++++++++++++++ src/jit/hwintrinsicxarch.cpp | 12 +++++++++ src/jit/instrsxarch.h | 2 ++ 4 files changed, 68 insertions(+) diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index e2fb1da..f380abb 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -87,6 +87,8 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins) case INS_andps: case INS_cmppd: case INS_cmpps: + case INS_cmpsd: + case INS_cmpss: case INS_cvtsi2sd: case INS_cvtsi2ss: case INS_divpd: diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 4fcb00b..76f2706 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -246,6 +246,12 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) emit->emitIns_SIMD_R_R_R_I(INS_cmpps, targetReg, op1Reg, op2Reg, 0, TYP_SIMD16); break; + case NI_SSE_CompareEqualScalar: + assert(baseType == TYP_FLOAT); + op2Reg = op2->gtRegNum; + emit->emitIns_SIMD_R_R_R_I(INS_cmpss, targetReg, op1Reg, op2Reg, 0, TYP_SIMD16); + break; + case NI_SSE_CompareGreaterThan: case NI_SSE_CompareNotLessThanOrEqual: assert(baseType == TYP_FLOAT); @@ -253,6 +259,13 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) emit->emitIns_SIMD_R_R_R_I(INS_cmpps, targetReg, op1Reg, op2Reg, 6, TYP_SIMD16); break; + case NI_SSE_CompareGreaterThanScalar: + case NI_SSE_CompareNotLessThanOrEqualScalar: + assert(baseType == TYP_FLOAT); + op2Reg = op2->gtRegNum; + emit->emitIns_SIMD_R_R_R_I(INS_cmpss, targetReg, op1Reg, op2Reg, 6, TYP_SIMD16); + break; + case NI_SSE_CompareGreaterThanOrEqual: case NI_SSE_CompareNotLessThan: assert(baseType == TYP_FLOAT); @@ -260,6 +273,13 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) emit->emitIns_SIMD_R_R_R_I(INS_cmpps, targetReg, op1Reg, op2Reg, 5, TYP_SIMD16); break; + case NI_SSE_CompareGreaterThanOrEqualScalar: + case NI_SSE_CompareNotLessThanScalar: + assert(baseType == TYP_FLOAT); + op2Reg = op2->gtRegNum; + emit->emitIns_SIMD_R_R_R_I(INS_cmpss, targetReg, op1Reg, op2Reg, 5, TYP_SIMD16); + break; + case NI_SSE_CompareLessThan: case NI_SSE_CompareNotGreaterThanOrEqual: assert(baseType == TYP_FLOAT); @@ -267,6 +287,13 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) emit->emitIns_SIMD_R_R_R_I(INS_cmpps, targetReg, op1Reg, op2Reg, 1, TYP_SIMD16); break; + case NI_SSE_CompareLessThanScalar: + case NI_SSE_CompareNotGreaterThanOrEqualScalar: + assert(baseType == TYP_FLOAT); + op2Reg = op2->gtRegNum; + emit->emitIns_SIMD_R_R_R_I(INS_cmpss, targetReg, op1Reg, op2Reg, 1, TYP_SIMD16); + break; + case NI_SSE_CompareLessThanOrEqual: case NI_SSE_CompareNotGreaterThan: assert(baseType == TYP_FLOAT); @@ -274,24 +301,49 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) emit->emitIns_SIMD_R_R_R_I(INS_cmpps, targetReg, op1Reg, op2Reg, 2, TYP_SIMD16); break; + case NI_SSE_CompareLessThanOrEqualScalar: + case NI_SSE_CompareNotGreaterThanScalar: + assert(baseType == TYP_FLOAT); + op2Reg = op2->gtRegNum; + emit->emitIns_SIMD_R_R_R_I(INS_cmpss, targetReg, op1Reg, op2Reg, 2, TYP_SIMD16); + break; + case NI_SSE_CompareNotEqual: assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; emit->emitIns_SIMD_R_R_R_I(INS_cmpps, targetReg, op1Reg, op2Reg, 4, TYP_SIMD16); break; + case NI_SSE_CompareNotEqualScalar: + assert(baseType == TYP_FLOAT); + op2Reg = op2->gtRegNum; + emit->emitIns_SIMD_R_R_R_I(INS_cmpss, targetReg, op1Reg, op2Reg, 4, TYP_SIMD16); + break; + case NI_SSE_CompareOrdered: assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; emit->emitIns_SIMD_R_R_R_I(INS_cmpps, targetReg, op1Reg, op2Reg, 7, TYP_SIMD16); break; + case NI_SSE_CompareOrderedScalar: + assert(baseType == TYP_FLOAT); + op2Reg = op2->gtRegNum; + emit->emitIns_SIMD_R_R_R_I(INS_cmpss, targetReg, op1Reg, op2Reg, 7, TYP_SIMD16); + break; + case NI_SSE_CompareUnordered: assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; emit->emitIns_SIMD_R_R_R_I(INS_cmpps, targetReg, op1Reg, op2Reg, 3, TYP_SIMD16); break; + case NI_SSE_CompareUnorderedScalar: + assert(baseType == TYP_FLOAT); + op2Reg = op2->gtRegNum; + emit->emitIns_SIMD_R_R_R_I(INS_cmpss, targetReg, op1Reg, op2Reg, 3, TYP_SIMD16); + break; + case NI_SSE_Divide: assert(baseType == TYP_FLOAT); op2Reg = op2->gtRegNum; diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index 7959956..df0ca9c 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -525,17 +525,29 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, case NI_SSE_And: case NI_SSE_AndNot: case NI_SSE_CompareEqual: + case NI_SSE_CompareEqualScalar: case NI_SSE_CompareGreaterThan: + case NI_SSE_CompareGreaterThanScalar: case NI_SSE_CompareGreaterThanOrEqual: + case NI_SSE_CompareGreaterThanOrEqualScalar: case NI_SSE_CompareLessThan: + case NI_SSE_CompareLessThanScalar: case NI_SSE_CompareLessThanOrEqual: + case NI_SSE_CompareLessThanOrEqualScalar: case NI_SSE_CompareNotEqual: + case NI_SSE_CompareNotEqualScalar: case NI_SSE_CompareNotGreaterThan: + case NI_SSE_CompareNotGreaterThanScalar: case NI_SSE_CompareNotGreaterThanOrEqual: + case NI_SSE_CompareNotGreaterThanOrEqualScalar: case NI_SSE_CompareNotLessThan: + case NI_SSE_CompareNotLessThanScalar: case NI_SSE_CompareNotLessThanOrEqual: + case NI_SSE_CompareNotLessThanOrEqualScalar: case NI_SSE_CompareOrdered: + case NI_SSE_CompareOrderedScalar: case NI_SSE_CompareUnordered: + case NI_SSE_CompareUnorderedScalar: case NI_SSE_Divide: case NI_SSE_DivideScalar: case NI_SSE_Max: diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index ee5f5c0..d9de25b 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -289,6 +289,8 @@ INST3( ucomisd, "ucomisd", 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2E // Note that these instructions not only compare but also overwrite the first source. INST3( cmpps, "cmpps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0xC2)) // compare packed singles INST3( cmppd, "cmppd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC2)) // compare packed doubles +INST3( cmpss, "cmpss", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0xC2)) // compare scalar singles +INST3( cmpsd, "cmpsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xC2)) // compare scalar doubles //SSE2 packed integer operations INST3( paddb, "paddb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xFC)) // Add packed byte integers -- 2.7.4