Fix codegen for StoreNonTemporal (#23511)
authorCarol Eidt <carol.eidt@microsoft.com>
Thu, 28 Mar 2019 20:45:32 +0000 (13:45 -0700)
committerGitHub <noreply@github.com>
Thu, 28 Mar 2019 20:45:32 +0000 (13:45 -0700)
* Fix codegen for StoreNonTemporal

Also, add some asserts and mark some intrinsics as not supporting containment.

Fix #23509

src/jit/hwintrinsiccodegenxarch.cpp
src/jit/hwintrinsiclistxarch.h

index aa30f76..d21d294 100644 (file)
@@ -272,6 +272,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 }
                 else if (category == HW_Category_MemoryStore)
                 {
+                    // The Mask instructions do not currently support containment of the address.
+                    assert(!op2->isContained());
                     if (intrinsicId == NI_AVX_MaskStore || intrinsicId == NI_AVX2_MaskStore)
                     {
                         emit->emitIns_AR_R_R(ins, simdSize, op2Reg, op3Reg, op1Reg, 0);
@@ -1462,6 +1464,8 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
             assert(baseType == TYP_UBYTE);
             assert(op2 == nullptr);
 
+            // These do not support containment.
+            assert(!op1->isContained());
             instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->gtSIMDBaseType);
             op1Reg          = op1->gtRegNum;
             emit->emitIns_AR(ins, emitTypeSize(baseType), op1Reg, 0);
@@ -1680,10 +1684,9 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
             assert(op1 != nullptr);
             assert(op2 != nullptr);
 
-            op2Reg          = op2->gtRegNum;
-            instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
-            op1Reg          = op1->gtRegNum;
-            emit->emitIns_AR_R(ins, emitTypeSize(baseType), op2Reg, op1Reg, 0);
+            instruction     ins   = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
+            GenTreeStoreInd store = storeIndirForm(node->TypeGet(), op1, op2);
+            emit->emitInsStoreInd(ins, emitTypeSize(baseType), &store);
             break;
         }
 
@@ -2128,6 +2131,8 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node)
         case NI_BMI2_MultiplyNoFlags:
         case NI_BMI2_X64_MultiplyNoFlags:
         {
+            // These do not support containment
+            assert(!op2->isContained());
             int numArgs = HWIntrinsicInfo::lookupNumArgs(node);
             assert(numArgs == 2 || numArgs == 3);
 
@@ -2153,6 +2158,7 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node)
                 argList                 = argList->Rest();
                 GenTree* op3            = argList->Current();
                 op3Reg                  = op3->gtRegNum;
+                assert(!op3->isContained());
                 assert(op3Reg != op1Reg);
                 assert(op3Reg != targetReg);
                 assert(op3Reg != REG_EDX);
@@ -2172,7 +2178,7 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node)
             // generate code for MULX
             genHWIntrinsic_R_R_RM(node, ins, attr, targetReg, lowReg, op2);
 
-            // If requires the lower half result, store in the memory opinted by op3
+            // If requires the lower half result, store in the memory pointed to by op3
             if (numArgs == 3)
             {
                 emit->emitIns_AR_R(INS_mov, attr, lowReg, op3Reg, 0);
index de177f3..58e3fbb 100644 (file)
@@ -497,7 +497,7 @@ HARDWARE_INTRINSIC(AVX2_HorizontalSubtractSaturate,                 "HorizontalS
 HARDWARE_INTRINSIC(AVX2_InsertVector128,                            "InsertVector128",                              AVX2,         -1,              32,           3,     {INS_vinserti128,       INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
 HARDWARE_INTRINSIC(AVX2_LoadAlignedVector256NonTemporal,            "LoadAlignedVector256NonTemporal",              AVX2,         -1,              32,           1,     {INS_movntdqa,          INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_invalid,        INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_MaskLoad,                                   "MaskLoad",                                     AVX2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpmaskmovd,     INS_vpmaskmovd,     INS_vpmaskmovq,     INS_vpmaskmovq,     INS_invalid,        INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_UnfixedSIMDSize)
-HARDWARE_INTRINSIC(AVX2_MaskStore,                                  "MaskStore",                                    AVX2,         -1,               0,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpmaskmovd,     INS_vpmaskmovd,     INS_vpmaskmovq,     INS_vpmaskmovq,     INS_invalid,        INS_invalid},           HW_Category_MemoryStore,            HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(AVX2_MaskStore,                                  "MaskStore",                                    AVX2,         -1,               0,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpmaskmovd,     INS_vpmaskmovd,     INS_vpmaskmovq,     INS_vpmaskmovq,     INS_invalid,        INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromSecondArg)
 HARDWARE_INTRINSIC(AVX2_Max,                                        "Max",                                          AVX2,         -1,              32,           2,     {INS_pmaxsb,            INS_pmaxub,         INS_pmaxsw,         INS_pmaxuw,         INS_pmaxsd,         INS_pmaxud,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_Min,                                        "Min",                                          AVX2,         -1,              32,           2,     {INS_pminsb,            INS_pminub,         INS_pminsw,         INS_pminuw,         INS_pminsd,         INS_pminud,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_MoveMask,                                   "MoveMask",                                     AVX2,         -1,              32,           1,     {INS_pmovmskb,          INS_pmovmskb,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
@@ -580,7 +580,7 @@ HARDWARE_INTRINSIC(BMI2_IsSupported,                                "get_IsSuppo
 HARDWARE_INTRINSIC(BMI2_ParallelBitDeposit,                         "ParallelBitDeposit",                           BMI2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_pdep,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(BMI2_ParallelBitExtract,                         "ParallelBitExtract",                           BMI2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_pext,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(BMI2_ZeroHighBits,                               "ZeroHighBits",                                 BMI2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_bzhi,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI2_MultiplyNoFlags,                            "MultiplyNoFlags",                              BMI2,         -1,               0,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_mulx,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI2_MultiplyNoFlags,                            "MultiplyNoFlags",                              BMI2,         -1,               0,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_mulx,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
@@ -591,7 +591,7 @@ HARDWARE_INTRINSIC(BMI2_X64_IsSupported,                            "get_IsSuppo
 HARDWARE_INTRINSIC(BMI2_X64_ParallelBitDeposit,                     "ParallelBitDeposit",                           BMI2_X64,     -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pdep,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(BMI2_X64_ParallelBitExtract,                     "ParallelBitExtract",                           BMI2_X64,     -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_pext,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(BMI2_X64_ZeroHighBits,                           "ZeroHighBits",                                 BMI2_X64,     -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_bzhi,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI2_X64_MultiplyNoFlags,                        "MultiplyNoFlags",                              BMI2_X64,     -1,               0,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mulx,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI2_X64_MultiplyNoFlags,                        "MultiplyNoFlags",                              BMI2_X64,     -1,               0,          -1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_mulx,           INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags