Implement Sse2 memory fence instructions
authorJacek Blaszczynski <biosciencenow@outlook.com>
Wed, 7 Feb 2018 22:28:38 +0000 (23:28 +0100)
committerTanner Gooding <tagoo@outlook.com>
Thu, 8 Feb 2018 20:44:20 +0000 (12:44 -0800)
src/jit/emitxarch.cpp
src/jit/hwintrinsiccodegenxarch.cpp
src/jit/hwintrinsiclistxarch.h
src/jit/hwintrinsicxarch.cpp
src/jit/instrsxarch.h

index 55c5036..b9f318f 100644 (file)
@@ -267,11 +267,20 @@ bool emitter::Is4ByteSSE4Instruction(instruction ins)
 bool emitter::TakesVexPrefix(instruction ins)
 {
     // special case vzeroupper as it requires 2-byte VEX prefix
-    // special case sfence and the prefetch instructions as they never take a VEX prefix
-    if ((ins == INS_vzeroupper) || (ins == INS_sfence) || (ins == INS_prefetcht0) || (ins == INS_prefetcht1) ||
-        (ins == INS_prefetcht2) || (ins == INS_prefetchnta))
+    // special case (l|m|s)fence and the prefetch instructions as they never take a VEX prefix
+    switch (ins)
     {
-        return false;
+        case INS_lfence:
+        case INS_mfence:
+        case INS_prefetchnta:
+        case INS_prefetcht0:
+        case INS_prefetcht1:
+        case INS_prefetcht2:
+        case INS_sfence:
+        case INS_vzeroupper:
+            return false;
+        default:
+            break;
     }
 
     return IsAVXInstruction(ins);
@@ -2474,7 +2483,7 @@ void emitter::emitIns(instruction ins)
                            ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp
 #ifndef LEGACY_BACKEND
                            // These instructions take zero operands
-                           || ins == INS_vzeroupper || ins == INS_sfence
+                           || ins == INS_vzeroupper || ins == INS_lfence || ins == INS_mfence || ins == INS_sfence
 #endif
                            );
 
index e21def9..597a72b 100644 (file)
@@ -744,7 +744,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
     regNumber      targetReg   = node->gtRegNum;
     var_types      targetType  = node->TypeGet();
     var_types      baseType    = node->gtSIMDBaseType;
-    instruction    ins         = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
+    instruction    ins         = INS_invalid;
     regNumber      op1Reg      = REG_NA;
     regNumber      op2Reg      = REG_NA;
     emitter*       emit        = getEmitter();
@@ -765,6 +765,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
             assert(op2 != nullptr);
             assert(baseType == TYP_DOUBLE);
 
+            ins    = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
             op2Reg = op2->gtRegNum;
             ival   = Compiler::ivalOfHWIntrinsic(intrinsicID);
             emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival);
@@ -772,11 +773,29 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
             break;
         }
 
+        case NI_SSE2_LoadFence:
+        {
+            assert(baseType == TYP_VOID);
+            assert(op1 == nullptr);
+            assert(op2 == nullptr);
+            emit->emitIns(INS_lfence);
+            break;
+        }
+        case NI_SSE2_MemoryFence:
+        {
+            assert(baseType == TYP_VOID);
+            assert(op1 == nullptr);
+            assert(op2 == nullptr);
+            emit->emitIns(INS_mfence);
+            break;
+        }
+
         case NI_SSE2_MoveMask:
         {
             assert(op2 == nullptr);
             assert(baseType == TYP_BYTE || baseType == TYP_UBYTE || baseType == TYP_DOUBLE);
 
+            ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
             emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
             break;
         }
@@ -788,6 +807,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
             assert(op1 == nullptr);
             assert(op2 == nullptr);
 
+            ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
             emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
             break;
         }
index 05f00d2..7ffbcef 100644 (file)
@@ -150,9 +150,11 @@ HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32WithTruncation,       "ConvertToV
 HARDWARE_INTRINSIC(SSE2_ConvertToVector128Single,                    "ConvertToVector128Single",                         SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtdq2ps,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_cvtpd2ps},          HW_Category_SimpleSIMD,                        HW_Flag_BaseTypeFromArg)
 HARDWARE_INTRINSIC(SSE2_Divide,                                      "Divide",                                           SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_divpd},             HW_Category_SimpleSIMD,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_LoadAlignedVector128,                        "LoadAlignedVector128",                             SSE2,       -1,           16,          1,            {INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_movdqa,    INS_invalid,   INS_movapd},            HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2_LoadFence,                                   "LoadFence",                                        SSE2,       -1,            0,          0,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_LoadScalarVector128,                         "LoadScalarVector128",                              SSE2,       -1,           16,          1,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movd,      INS_movd,      INS_movq,      INS_movq,      INS_invalid,   INS_movsdsse2},         HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_LoadVector128,                               "LoadVector128",                                    SSE2,       -1,           16,          1,            {INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_movdqu,    INS_invalid,   INS_movupd},            HW_Category_MemoryLoad,                        HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_Max,                                         "Max",                                              SSE2,       -1,           16,          2,            {INS_invalid,   INS_pmaxub,    INS_pmaxsw,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_maxpd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2_MemoryFence,                                 "MemoryFence",                                      SSE2,       -1,            0,          0,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid},           HW_Category_Special,                           HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_Min,                                         "Min",                                              SSE2,       -1,           16,          2,            {INS_invalid,   INS_pminub,    INS_pminsw,    INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_minpd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative)
 HARDWARE_INTRINSIC(SSE2_MoveMask,                                    "MoveMask",                                         SSE2,       -1,           16,          1,            {INS_pmovmskb,  INS_pmovmskb,  INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_movmskpd},          HW_Category_Special,                           HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2_Multiply,                                    "Multiply",                                         SSE2,       -1,           16,          2,            {INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_invalid,   INS_pmuludq,   INS_invalid,   INS_invalid,   INS_invalid,   INS_mulpd},             HW_Category_SimpleSIMD,                        HW_Flag_Commutative|HW_Flag_BaseTypeFromArg)
index 64101e9..0fc2251 100644 (file)
@@ -736,6 +736,8 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic        intrinsic,
     var_types baseType = TYP_UNKNOWN;
     var_types retType  = TYP_UNKNOWN;
 
+    assert((simdSize == 16) || (simdSize == 0));
+
     switch (intrinsic)
     {
         case NI_SSE2_CompareLessThan:
@@ -754,6 +756,17 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic        intrinsic,
             }
             break;
 
+        case NI_SSE2_LoadFence:
+        case NI_SSE2_MemoryFence:
+        {
+            assert(sig->numArgs == 0);
+            assert(JITtype2varType(sig->retType) == TYP_VOID);
+            assert(simdSize == 0);
+
+            retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, intrinsic, TYP_VOID, simdSize);
+            break;
+        }
+
         case NI_SSE2_MoveMask:
             assert(sig->numArgs == 1);
             retType = JITtype2varType(sig->retType);
index eedfd63..49640d5 100644 (file)
@@ -220,6 +220,8 @@ INST3( shufpd,      "shufpd"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PCK
 
 INST3( punpckhdq,   "punpckhdq"   , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PCKDBL(0x6A))
 
+INST3( lfence,      "lfence"      , 0, IUM_RD, 0, 0, 0x000FE8AE,   BAD_CODE, BAD_CODE)
+INST3( mfence,      "mfence"      , 0, IUM_RD, 0, 0, 0x000FF0AE,   BAD_CODE, BAD_CODE)
 INST3( prefetchnta, "prefetchnta" , 0, IUM_RD, 0, 0, 0x000F0018,   BAD_CODE, BAD_CODE)
 INST3( prefetcht0,  "prefetcht0"  , 0, IUM_RD, 0, 0, 0x000F0818,   BAD_CODE, BAD_CODE)
 INST3( prefetcht1,  "prefetcht1"  , 0, IUM_RD, 0, 0, 0x000F1018,   BAD_CODE, BAD_CODE)