Adding support for the SSE MoveMask intrinsic
authorTanner Gooding <tagoo@outlook.com>
Fri, 12 Jan 2018 05:27:23 +0000 (21:27 -0800)
committerTanner Gooding <tagoo@outlook.com>
Wed, 17 Jan 2018 00:04:56 +0000 (16:04 -0800)
src/jit/hwintrinsiccodegenxarch.cpp
src/jit/hwintrinsicxarch.cpp
src/jit/instrsxarch.h

index 0c223be..e2c86b9 100644 (file)
@@ -548,6 +548,11 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
             emit->emitIns_SIMD_R_R_R(INS_movlhps, targetReg, op1Reg, op2Reg, TYP_SIMD16);
             break;
 
+        case NI_SSE_MoveMask:
+            assert(baseType == TYP_FLOAT);
+            emit->emitIns_SIMD_R_R(INS_movmskps, targetReg, op1Reg, TYP_INT);
+            break;
+
         case NI_SSE_MoveScalar:
             assert(baseType == TYP_FLOAT);
             op2Reg = op2->gtRegNum;
index f42470a..fcf0f44 100644 (file)
@@ -616,6 +616,14 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
+        case NI_SSE_MoveMask:
+            assert(sig->numArgs == 1);
+            assert(JITtype2varType(sig->retType) == TYP_INT);
+            assert(getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args)) == TYP_FLOAT);
+            op1     = impSIMDPopStack(TYP_SIMD16);
+            retNode = gtNewSimdHWIntrinsicNode(TYP_INT, op1, intrinsic, TYP_FLOAT, 16);
+            break;
+
         case NI_SSE_StaticCast:
         {
             assert(sig->numArgs == 1);
index e91e9c1..afb84a5 100644 (file)
@@ -206,6 +206,7 @@ INST3( movupd,      "movupd"      , 0, IUM_WR, 0, 0, PCKDBL(0x11), BAD_CODE, PCK
 INST3( movups,      "movups"      , 0, IUM_WR, 0, 0, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10))
 INST3( movhlps,     "movhlps"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PCKFLT(0x12))
 INST3( movlhps,     "movlhps"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PCKFLT(0x16))
+INST3( movmskps,    "movmskps"    , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PCKFLT(0x50))
 INST3( unpckhps,    "unpckhps"    , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PCKFLT(0x15))
 INST3( unpcklps,    "unpcklps"    , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PCKFLT(0x14))