Implementing the Avx2 AlignRight, Blend, and ConvertTo* hwintrinsics.
authorTanner Gooding <tagoo@outlook.com>
Thu, 12 Jul 2018 14:40:30 +0000 (07:40 -0700)
committerTanner Gooding <tagoo@outlook.com>
Mon, 16 Jul 2018 22:13:57 +0000 (15:13 -0700)
Commit migrated from https://github.com/dotnet/coreclr/commit/71a9d8bbb0d80bb9b998c9df2ab2170027b796e3

src/coreclr/src/jit/emitxarch.cpp
src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp
src/coreclr/src/jit/hwintrinsiclistxarch.h
src/coreclr/src/jit/instrsxarch.h
src/coreclr/src/jit/lowerxarch.cpp
src/coreclr/src/jit/lsraxarch.cpp

index bdbf335..17aae89 100644 (file)
@@ -360,6 +360,7 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
         case INS_vinserti128:
         case INS_vmaskmovps:
         case INS_vmaskmovpd:
+        case INS_vpblendd:
         case INS_vperm2i128:
         case INS_vperm2f128:
         case INS_vpermilpsvar:
index 7e5a981..dafb45a 100644 (file)
@@ -1887,11 +1887,33 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node)
 
     if ((op1 != nullptr) && !op1->OperIsList())
     {
+        op1Reg = op1->gtRegNum;
         genConsumeOperands(node);
     }
 
     switch (intrinsicId)
     {
+        case NI_AVX2_ConvertToDouble:
+        {
+            assert(op2 == nullptr);
+            if (op1Reg != targetReg)
+            {
+                instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
+                emit->emitIns_R_R(ins, emitTypeSize(targetType), targetReg, op1Reg);
+            }
+            break;
+        }
+
+        case NI_AVX2_ConvertToInt32:
+        case NI_AVX2_ConvertToUInt32:
+        {
+            assert(op2 == nullptr);
+            assert((baseType == TYP_INT) || (baseType == TYP_UINT));
+            instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
+            emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg);
+            break;
+        }
+
         case NI_AVX_SetZeroVector256:
         {
             assert(op1 == nullptr);
@@ -1913,7 +1935,6 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node)
         {
             assert(op1 != nullptr);
             assert(op2 == nullptr);
-            op1Reg = op1->gtRegNum;
             if (varTypeIsIntegral(baseType))
             {
                 // If the argument is a integer, it needs to be moved into a XMM register
@@ -1977,7 +1998,6 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node)
             // to ensure that Bits MAXVL-1:128 are zeroed.
 
             assert(op2 == nullptr);
-            regNumber op1Reg = op1->gtRegNum;
             emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg);
             break;
         }
@@ -1985,8 +2005,6 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node)
         case NI_AVX_GetLowerHalf:
         {
             assert(op2 == nullptr);
-            regNumber op1Reg = op1->gtRegNum;
-
             if (op1Reg != targetReg)
             {
                 emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD32), targetReg, op1Reg);
@@ -2027,7 +2045,6 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node)
             if (numArgs == 2)
             {
                 assert(intrinsicId == NI_AVX_ExtractVector128 || NI_AVX_ExtractVector128);
-                op1Reg = op1->gtRegNum;
                 op2Reg = op2->gtRegNum;
                 lastOp = op2;
             }
index 5c76d89..bfb9cec 100644 (file)
@@ -408,9 +408,11 @@ HARDWARE_INTRINSIC(AVX2_IsSupported,                                "get_IsSuppo
 HARDWARE_INTRINSIC(AVX2_Abs,                                        "Abs",                                          AVX2,         -1,              32,           1,     {INS_pabsb,             INS_pabsb,          INS_pabsw,          INS_pabsw,          INS_pabsd,          INS_pabsd,          INS_paddq,          INS_paddq,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX2_Add,                                        "Add",                                          AVX2,         -1,              32,           2,     {INS_paddb,             INS_paddb,          INS_paddw,          INS_paddw,          INS_paddd,          INS_paddd,          INS_paddq,          INS_paddq,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_AddSaturate,                                "AddSaturate",                                  AVX2,         -1,              32,           2,     {INS_paddsb,            INS_paddusb,        INS_paddsw,         INS_paddusw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2_AlignRight,                                 "AlignRight",                                   AVX2,         -1,              32,           3,     {INS_palignr,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2_And,                                        "And",                                          AVX2,         -1,              32,           2,     {INS_pand,              INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_pand,           INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_AndNot,                                     "AndNot",                                       AVX2,         -1,              32,           2,     {INS_pandn,             INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_pandn,          INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_Average,                                    "Average",                                      AVX2,         -1,              32,           2,     {INS_invalid,           INS_pavgb,          INS_invalid,        INS_pavgw,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
+HARDWARE_INTRINSIC(AVX2_Blend,                                      "Blend",                                        AVX2,         -1,               0,           3,     {INS_invalid,           INS_invalid,        INS_pblendw,        INS_pblendw,        INS_vpblendd,       INS_vpblendd,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_UnfixedSIMDSize|HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2_BlendVariable,                              "BlendVariable",                                AVX2,         -1,              32,           3,     {INS_vpblendvb,         INS_vpblendvb,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_BroadcastScalarToVector128,                 "BroadcastScalarToVector128",                   AVX2,         -1,              16,           1,     {INS_vpbroadcastb,      INS_vpbroadcastb,   INS_vpbroadcastw,   INS_vpbroadcastw,   INS_vpbroadcastd,   INS_vpbroadcastd,   INS_vpbroadcastq,   INS_vpbroadcastq,   INS_vbroadcastss,   INS_movddup},           HW_Category_SIMDScalar,             HW_Flag_OneTypeGeneric)
 HARDWARE_INTRINSIC(AVX2_BroadcastScalarToVector256,                 "BroadcastScalarToVector256",                   AVX2,         -1,              32,           1,     {INS_vpbroadcastb,      INS_vpbroadcastb,   INS_vpbroadcastw,   INS_vpbroadcastw,   INS_vpbroadcastd,   INS_vpbroadcastd,   INS_vpbroadcastq,   INS_vpbroadcastq,   INS_vbroadcastss,   INS_vbroadcastsd},      HW_Category_SIMDScalar,             HW_Flag_OneTypeGeneric)
@@ -418,6 +420,9 @@ HARDWARE_INTRINSIC(AVX2_BroadcastVector128ToVector256,              "BroadcastVe
 HARDWARE_INTRINSIC(AVX2_CompareEqual,                               "CompareEqual",                                 AVX2,         -1,              32,           2,     {INS_pcmpeqb,           INS_pcmpeqb,        INS_pcmpeqw,        INS_pcmpeqw,        INS_pcmpeqd,        INS_pcmpeqd,        INS_pcmpeqq,        INS_pcmpeqq,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_CompareGreaterThan,                         "CompareGreaterThan",                           AVX2,         -1,              32,           2,     {INS_pcmpgtb,           INS_invalid,        INS_pcmpgtw,        INS_invalid,        INS_pcmpgtd,        INS_invalid,        INS_pcmpgtq,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_ExtractVector128,                           "ExtractVector128",                             AVX2,         -1,              32,          -1,     {INS_vextracti128,      INS_vextracti128,   INS_vextracti128,   INS_vextracti128,   INS_vextracti128,   INS_vextracti128,   INS_vextracti128,   INS_vextracti128,   INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_FullRangeIMM)
+HARDWARE_INTRINSIC(AVX2_ConvertToDouble,                            "ConvertToDouble",                              AVX2,         -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_movsdsse2},         HW_Category_Helper,                 HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX2_ConvertToInt32,                             "ConvertToInt32",                               AVX2,         -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_xmm2i,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX2_ConvertToUInt32,                            "ConvertToUInt32",                              AVX2,         -1,              32,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mov_xmm2i,      INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int16,                    "ConvertToVector256Int16",                      AVX2,         -1,              32,           1,     {INS_pmovsxbw,          INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX2_ConvertToVector256UInt16,                   "ConvertToVector256UInt16",                     AVX2,         -1,              32,           1,     {INS_invalid,           INS_pmovzxbw,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX2_ConvertToVector256Int32,                    "ConvertToVector256Int32",                      AVX2,         -1,              32,           1,     {INS_pmovsxbd,          INS_invalid,        INS_pmovsxwd,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg)
index 995cb4c..87fd76a 100644 (file)
@@ -480,6 +480,7 @@ INST3( vinserti128,  "inserti128"  , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SS
 INST3( vzeroupper,   "zeroupper"   , 0, IUM_WR, 0, 0, 0xC577F8,     BAD_CODE, BAD_CODE)      // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix)
 INST3( vperm2i128,   "perm2i128"   , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE3A(0x46))   // Permute 128-bit halves of input register
 INST3( vpermq,       "permq"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE3A(0x00))   // Permute 64-bit of input register
+INST3( vpblendd,     "pblendd"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE3A(0x02))   // Blend Packed DWORDs
 INST3( vblendvps,    "blendvps"    , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE3A(0x4A))   // Variable Blend Packed Singles
 INST3( vblendvpd,    "blendvpd"    , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE3A(0x4B))   // Variable Blend Packed Doubles
 INST3( vpblendvb,    "pblendvb"    , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE3A(0x4C))   // Variable Blend Packed Bytes
index 4787e45..938ddb5 100644 (file)
@@ -2386,6 +2386,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
                 case NI_AVX_InsertVector128:
                 case NI_AVX_Permute:
                 case NI_AVX_Permute2x128:
+                case NI_AVX2_Blend:
                 case NI_AVX2_InsertVector128:
                 case NI_AVX2_Permute2x128:
                 case NI_AVX2_ShiftLeftLogical:
@@ -2629,6 +2630,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                     case NI_SSE2_ConvertToInt64:
                     case NI_SSE2_ConvertToUInt32:
                     case NI_SSE2_ConvertToUInt64:
+                    case NI_AVX2_ConvertToInt32:
+                    case NI_AVX2_ConvertToUInt32:
                     {
                         if (varTypeIsIntegral(baseType))
                         {
@@ -2958,6 +2961,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                         case NI_AVX_Insert:
                         case NI_AVX_Permute2x128:
                         case NI_AVX_Shuffle:
+                        case NI_AVX2_Blend:
                         case NI_AVX2_Permute2x128:
                         {
                             if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional))
index 089e80f..f5c4a03 100644 (file)
@@ -2387,6 +2387,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
             case NI_SSE2_ConvertToDouble:
             case NI_AVX_ExtendToVector256:
             case NI_AVX_GetLowerHalf:
+            case NI_AVX2_ConvertToDouble:
             {
                 assert(numArgs == 1);
                 assert(!isRMW);