Do not emit Avx2 instruction `vpcmpeqd ymmReg, ymmReg, ymmReg` for `Vector256<T>...
authorEgor Chesakov <Egor.Chesakov@microsoft.com>
Fri, 19 Feb 2021 02:27:07 +0000 (18:27 -0800)
committerGitHub <noreply@github.com>
Fri, 19 Feb 2021 02:27:07 +0000 (18:27 -0800)
Emit `vcmptrueps ymmReg, ymmReg, ymmReg` instead

src/coreclr/jit/hwintrinsiccodegenxarch.cpp
src/coreclr/jit/hwintrinsiclistxarch.h

index a805128..49858f9 100644 (file)
@@ -1252,22 +1252,31 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
         }
 
         case NI_Vector128_get_AllBitsSet:
-        case NI_Vector256_get_AllBitsSet:
-        {
             assert(op1 == nullptr);
             if (varTypeIsFloating(baseType) && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX))
             {
-                // The immediate 8 means Equal (unordered, non-signaling)
-                // This is not available without VEX prefix.
-                emit->emitIns_SIMD_R_R_R_I(ins, attr, targetReg, targetReg, targetReg, 8);
+                // The following corresponds to vcmptrueps pseudo-op and not available without VEX prefix.
+                emit->emitIns_SIMD_R_R_R_I(ins, attr, targetReg, targetReg, targetReg, 15);
             }
             else
             {
-                assert(varTypeIsIntegral(baseType) || !compiler->compIsaSupportedDebugOnly(InstructionSet_AVX));
                 emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg);
             }
             break;
-        }
+
+        case NI_Vector256_get_AllBitsSet:
+            assert(op1 == nullptr);
+            if (varTypeIsIntegral(baseType) && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2))
+            {
+                emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg);
+            }
+            else
+            {
+                assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX));
+                // The following corresponds to vcmptrueps pseudo-op.
+                emit->emitIns_SIMD_R_R_R_I(INS_cmpps, attr, targetReg, targetReg, targetReg, 15);
+            }
+            break;
 
         default:
         {
index a9c4696..e5aec99 100644 (file)
@@ -45,8 +45,7 @@ HARDWARE_INTRINSIC(Vector128,       AsVector128,
 HARDWARE_INTRINSIC(Vector128,       Create,                                     16,            -1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       CreateScalarUnsafe,                         16,             1,      {INS_movd,              INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movss,              INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector128,       Dot,                                        16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_NoCodeGen)
-// The instruction generated for float/double depends on which ISAs are supported
-HARDWARE_INTRINSIC(Vector128,       get_AllBitsSet,                             16,             0,      {INS_pcmpeqd,           INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_cmpps,              INS_cmppd},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       get_AllBitsSet,                             16,             0,      {INS_pcmpeqd,           INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_cmpps,              INS_cmpps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector128,       get_Count,                                  16,             0,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector128,       get_Zero,                                   16,             0,      {INS_xorps,             INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector128,       GetElement,                                 16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
@@ -75,8 +74,7 @@ HARDWARE_INTRINSIC(Vector256,       AsUInt32,
 HARDWARE_INTRINSIC(Vector256,       AsUInt64,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256,       AsVector,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256,       AsVector256,                                32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-// The instruction generated for float/double depends on which ISAs are supported
-HARDWARE_INTRINSIC(Vector256,       get_AllBitsSet,                             32,             0,      {INS_pcmpeqd,           INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_cmpps,              INS_cmppd},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       get_AllBitsSet,                             32,             0,      {INS_pcmpeqd,           INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_cmpps,              INS_cmpps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256,       get_Count,                                  32,             0,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256,       get_Zero,                                   32,             0,      {INS_xorps,             INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256,       Create,                                     32,            -1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)