Adding support for the SSE Store, StoreAligned, StoreAlignedNonTemporal, StoreHigh...
authorTanner Gooding <tagoo@outlook.com>
Sun, 28 Jan 2018 19:37:12 +0000 (11:37 -0800)
committerTanner Gooding <tagoo@outlook.com>
Sun, 28 Jan 2018 23:45:40 +0000 (15:45 -0800)
src/jit/flowgraph.cpp
src/jit/gentree.h
src/jit/hwintrinsiccodegenxarch.cpp
src/jit/hwintrinsiclistxarch.h
src/jit/hwintrinsicxarch.cpp
src/jit/instrsxarch.h
src/jit/liveness.cpp

index 4a5e032..beddbad 100644 (file)
@@ -10028,7 +10028,7 @@ bool Compiler::fgCheckRemoveStmt(BasicBlock* block, GenTreePtr node)
     GenTreePtr tree = stmt->gtStmtExpr;
     genTreeOps oper = tree->OperGet();
 
-    if (OperIsControlFlow(oper) || oper == GT_NO_OP)
+    if (OperIsControlFlow(oper) || GenTree::OperIsHWIntrinsic(oper) || oper == GT_NO_OP)
     {
         return false;
     }
index b765321..6fee2ad 100644 (file)
@@ -1083,7 +1083,7 @@ public:
         {
             // These are the only operators which can produce either VOID or non-VOID results.
             assert(OperIs(GT_NOP, GT_CALL, GT_LOCKADD, GT_FIELD_LIST, GT_COMMA) || OperIsCompare() || OperIsLong() ||
-                   OperIsSIMD());
+                   OperIsSIMD() || OperIsHWIntrinsic());
             return false;
         }
 
@@ -1593,6 +1593,20 @@ public:
         return OperIsSIMD(gtOper);
     }
 
+    static bool OperIsHWIntrinsic(genTreeOps gtOper)
+    {
+#ifdef FEATURE_HW_INTRINSICS
+        return gtOper == GT_HWIntrinsic;
+#else
+        return false;
+#endif // FEATURE_HW_INTRINSICS
+    }
+
+    bool OperIsHWIntrinsic() const
+    {
+        return OperIsHWIntrinsic(gtOper);
+    }
+
 #ifdef FEATURE_HW_INTRINSICS
     inline bool OperIsSimdHWIntrinsic() const;
 #else
index 69b3cf5..138d4dd 100644 (file)
@@ -37,8 +37,8 @@ static bool genIsTableDrivenHWIntrinsic(HWIntrinsicCategory category, HWIntrinsi
 {
     // TODO - make more categories to the table-driven framework
     // HW_Category_Helper and HW_Flag_MultiIns usually need manual codegen
-    const bool tableDrivenCategory =
-        category == HW_Category_SimpleSIMD || category == HW_Category_MemoryLoad || category == HW_Category_SIMDScalar;
+    const bool tableDrivenCategory = category == HW_Category_SimpleSIMD || category == HW_Category_MemoryLoad ||
+                                     category == HW_Category_MemoryStore || category == HW_Category_SIMDScalar;
     const bool tableDrivenFlag = (flags & HW_Flag_MultiIns) == 0;
     return tableDrivenCategory && tableDrivenFlag;
 }
@@ -94,7 +94,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
 
             case 2:
                 genConsumeOperands(node);
-                if (ival != -1)
+                if (category == HW_Category_MemoryStore)
+                {
+                    emit->emitIns_AR_R(ins, emitTypeSize(TYP_SIMD16), op2->gtRegNum, op1->gtRegNum, 0);
+                }
+                else if (ival != -1)
                 {
                     genHWIntrinsic_R_R_RM_I(node, ins);
                 }
index ec9c9d7..cb5edc0 100644 (file)
@@ -104,6 +104,12 @@ HARDWARE_INTRINSIC(SSE_Shuffle,                                      "Shuffle",
 HARDWARE_INTRINSIC(SSE_Sqrt,                                         "Sqrt",                                             SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_sqrtps,    INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_SqrtScalar,                                   "SqrtScalar",                                       SSE,        -1,           16,           1,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_sqrtss,    INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE_StaticCast,                                   "StaticCast",                                       SSE,        -1,           16,           1,           {INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps,    INS_movaps},            HW_Category_Helper,                            HW_Flag_TwoTypeGeneric)
+HARDWARE_INTRINSIC(SSE_Store,                                        "Store",                                            SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movups,    INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_StoreAligned,                                 "StoreAligned",                                     SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movaps,    INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_StoreAlignedNonTemporal,                      "StoreAlignedNonTemporal",                          SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movntps,   INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_StoreHigh,                                    "StoreHigh",                                        SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movhps,    INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_StoreLow,                                     "StoreLow",                                         SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movlps,    INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE_StoreScalar,                                  "StoreScalar",                                      SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movss,     INS_invalid},           HW_Category_MemoryStore,                       HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_Subtract,                                     "Subtract",                                         SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_subps,     INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_SubtractScalar,                               "SubtractScalar",                                   SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_subss,     INS_invalid},           HW_Category_SIMDScalar,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE_UnpackHigh,                                   "UnpackHigh",                                       SSE,        -1,           16,           2,           {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_unpckhps,  INS_invalid},           HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
index 80091f1..3f057ad 100644 (file)
@@ -531,7 +531,17 @@ GenTree* Compiler::impX86HWIntrinsic(NamedIntrinsic        intrinsic,
     {
         if (!varTypeIsSIMD(retType))
         {
-            baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
+            if (retType != TYP_VOID)
+            {
+                baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
+            }
+            else
+            {
+                assert(category == HW_Category_MemoryStore);
+                baseType =
+                    getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, info.compCompHnd->getArgNext(sig->args)));
+            }
+
             assert(baseType != TYP_UNKNOWN);
         }
 
index 1b85332..c2ad1f9 100644 (file)
@@ -193,6 +193,7 @@ INST3( xorps,       "xorps"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PCK
 INST3( cvttsd2si,   "cvttsd2si"   , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSEDBL(0x2C)) // cvt with trunc scalar double to signed DWORDs
 
 #ifndef LEGACY_BACKEND
+INST3( movntps,     "movntps"     , 0, IUM_WR, 0, 0, PCKFLT(0x2B), BAD_CODE, BAD_CODE)
 INST3( movdqu,      "movdqu"      , 0, IUM_WR, 0, 0, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F))
 INST3( movdqa,      "movdqa"      , 0, IUM_WR, 0, 0, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F))
 INST3( movlpd,      "movlpd"      , 0, IUM_WR, 0, 0, PCKDBL(0x13), BAD_CODE, PCKDBL(0x12))
index 7f268a5..dd48c5b 100644 (file)
@@ -2251,6 +2251,9 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR
             case GT_RETURNTRAP:
             case GT_PUTARG_STK:
             case GT_IL_OFFSET:
+#ifdef FEATURE_HW_INTRINSICS
+            case GT_HWIntrinsic:
+#endif // FEATURE_HW_INTRINSICS
                 // Never remove these nodes, as they are always side-effecting.
                 //
                 // NOTE: the only side-effect of some of these nodes (GT_CMP, GT_SUB_HI) is a write to the flags