[JIT] More ARM64 comparison instruction optimizations with Vector.Zero (#64783)
authorWill Smith <lol.tihan@gmail.com>
Thu, 17 Feb 2022 02:32:20 +0000 (18:32 -0800)
committerGitHub <noreply@github.com>
Thu, 17 Feb 2022 02:32:20 +0000 (18:32 -0800)
* Normalizing instructions with an implicit vector zero as the second operand

* Checking number of operands before looking at opernads

* Remove assert

* Check commutative flag

* Fixed commutative check

* Handling more HW intrinsics

* Finishing up

* Finishing up

* Formatting

* numOperands = 1

* Feedback

* Added HW_Flag_SupportsContainmentZero

* Added extra assert

* Removing flag and simplifying codegen for containment with zeros

src/coreclr/jit/emitarm64.cpp
src/coreclr/jit/hwintrinsic.h
src/coreclr/jit/hwintrinsiccodegenarm64.cpp
src/coreclr/jit/hwintrinsiclistarm64.h
src/coreclr/jit/instrsarm64.h
src/coreclr/jit/lowerarmarch.cpp
src/tests/JIT/Regression/JitBlue/Runtime_33972/Runtime_33972.cs

index 2345ba4..7aa1c89 100644 (file)
@@ -12964,7 +12964,7 @@ void emitter::emitDispIns(
                 emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
                 emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
             }
-            if (ins == INS_fcmeq)
+            if (ins == INS_fcmeq || ins == INS_fcmge || ins == INS_fcmgt || ins == INS_fcmle || ins == INS_fcmlt)
             {
                 printf(", ");
                 emitDispImm(0, false);
@@ -12988,7 +12988,7 @@ void emitter::emitDispIns(
                 emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
                 emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
             }
-            if (ins == INS_cmeq)
+            if (ins == INS_cmeq || ins == INS_cmge || ins == INS_cmgt || ins == INS_cmle || ins == INS_cmlt)
             {
                 printf(", ");
                 emitDispImm(0, false);
@@ -13129,7 +13129,8 @@ void emitter::emitDispIns(
                 emitDispReg(id->idReg1(), size, true);
                 emitDispReg(id->idReg2(), size, false);
             }
-            if (fmt == IF_DV_2L && ins == INS_cmeq)
+            if (fmt == IF_DV_2L &&
+                (ins == INS_cmeq || ins == INS_cmge || ins == INS_cmgt || ins == INS_cmle || ins == INS_cmlt))
             {
                 printf(", ");
                 emitDispImm(0, false);
index 9d2d1a0..53c3920 100644 (file)
@@ -159,6 +159,7 @@ enum HWIntrinsicFlag : unsigned int
 
     // The intrinsic supports some sort of containment analysis
     HW_Flag_SupportsContainment = 0x2000
+
 #else
 #error Unsupported platform
 #endif
index a2819f5..f9bddea 100644 (file)
@@ -371,7 +371,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                     break;
 
                 case 2:
-                    if (isRMW)
+                    // This handles optimizations for instructions that have
+                    // an implicit 'zero' vector of what would be the second operand.
+                    if (HWIntrinsicInfo::SupportsContainment(intrin.id) && intrin.op2->isContained() &&
+                        intrin.op2->IsVectorZero())
+                    {
+                        GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
+                    }
+                    else if (isRMW)
                     {
                         assert(targetReg != op2Reg);
 
@@ -499,29 +506,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
                 break;
 
-            case NI_AdvSimd_CompareEqual:
-            case NI_AdvSimd_Arm64_CompareEqual:
-            case NI_AdvSimd_Arm64_CompareEqualScalar:
-                if (intrin.op1->isContained())
-                {
-                    assert(HWIntrinsicInfo::SupportsContainment(intrin.id));
-                    assert(intrin.op1->IsVectorZero());
-
-                    GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op2Reg, opt);
-                }
-                else if (intrin.op2->isContained())
-                {
-                    assert(HWIntrinsicInfo::SupportsContainment(intrin.id));
-                    assert(intrin.op2->IsVectorZero());
-
-                    GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
-                }
-                else
-                {
-                    GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
-                }
-                break;
-
             case NI_AdvSimd_AbsoluteCompareLessThan:
             case NI_AdvSimd_AbsoluteCompareLessThanOrEqual:
             case NI_AdvSimd_CompareLessThan:
index 2a1e989..b5960c9 100644 (file)
@@ -240,9 +240,9 @@ HARDWARE_INTRINSIC(AdvSimd,       BitwiseClear,
 HARDWARE_INTRINSIC(AdvSimd,       BitwiseSelect,                                                    -1,      3,     {INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl,            INS_bsl},         HW_Category_SIMD,                  HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AdvSimd,       Ceiling,                                                          -1,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_frintp,         INS_invalid},     HW_Category_SIMD,                  HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AdvSimd,       CeilingScalar,                                                     8,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_frintp,         INS_frintp},      HW_Category_SIMD,                  HW_Flag_SIMDScalar)
-HARDWARE_INTRINSIC(AdvSimd,       CompareEqual,                                                     -1,      2,     {INS_cmeq,           INS_cmeq,           INS_cmeq,           INS_cmeq,           INS_cmeq,           INS_cmeq,           INS_invalid,        INS_invalid,        INS_fcmeq,          INS_invalid},     HW_Category_SIMD,                  HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
-HARDWARE_INTRINSIC(AdvSimd,       CompareGreaterThan,                                               -1,      2,     {INS_cmgt,           INS_cmhi,           INS_cmgt,           INS_cmhi,           INS_cmgt,           INS_cmhi,           INS_invalid,        INS_invalid,        INS_fcmgt,          INS_invalid},     HW_Category_SIMD,                  HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd,       CompareGreaterThanOrEqual,                                        -1,      2,     {INS_cmge,           INS_cmhs,           INS_cmge,           INS_cmhs,           INS_cmge,           INS_cmhs,           INS_invalid,        INS_invalid,        INS_fcmge,          INS_invalid},     HW_Category_SIMD,                  HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(AdvSimd,       CompareEqual,                                                     -1,      2,     {INS_cmeq,           INS_cmeq,           INS_cmeq,           INS_cmeq,           INS_cmeq,           INS_cmeq,           INS_invalid,        INS_invalid,        INS_fcmeq,          INS_invalid},     HW_Category_SIMD,                  HW_Flag_Commutative|HW_Flag_SupportsContainment)
+HARDWARE_INTRINSIC(AdvSimd,       CompareGreaterThan,                                               -1,      2,     {INS_cmgt,           INS_cmhi,           INS_cmgt,           INS_cmhi,           INS_cmgt,           INS_cmhi,           INS_invalid,        INS_invalid,        INS_fcmgt,          INS_invalid},     HW_Category_SIMD,                  HW_Flag_SupportsContainment)
+HARDWARE_INTRINSIC(AdvSimd,       CompareGreaterThanOrEqual,                                        -1,      2,     {INS_cmge,           INS_cmhs,           INS_cmge,           INS_cmhs,           INS_cmge,           INS_cmhs,           INS_invalid,        INS_invalid,        INS_fcmge,          INS_invalid},     HW_Category_SIMD,                  HW_Flag_SupportsContainment)
 HARDWARE_INTRINSIC(AdvSimd,       CompareLessThan,                                                  -1,      2,     {INS_cmgt,           INS_cmhi,           INS_cmgt,           INS_cmhi,           INS_cmgt,           INS_cmhi,           INS_invalid,        INS_invalid,        INS_fcmgt,          INS_invalid},     HW_Category_SIMD,                  HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AdvSimd,       CompareLessThanOrEqual,                                           -1,      2,     {INS_cmge,           INS_cmhs,           INS_cmge,           INS_cmhs,           INS_cmge,           INS_cmhs,           INS_invalid,        INS_invalid,        INS_fcmge,          INS_invalid},     HW_Category_SIMD,                  HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AdvSimd,       CompareTest,                                                      -1,      2,     {INS_cmtst,          INS_cmtst,          INS_cmtst,          INS_cmtst,          INS_cmtst,          INS_cmtst,          INS_invalid,        INS_invalid,        INS_cmtst,          INS_invalid},     HW_Category_SIMD,                  HW_Flag_Commutative)
@@ -492,12 +492,12 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, AddPairwiseScalar,
 HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturate,                                                      -1,      2,     {INS_suqadd,         INS_usqadd,         INS_suqadd,         INS_usqadd,         INS_suqadd,         INS_usqadd,         INS_suqadd,         INS_usqadd,         INS_invalid,        INS_invalid},     HW_Category_SIMD,                  HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics)
 HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturateScalar,                                                 8,      2,     {INS_sqadd,          INS_uqadd,          INS_sqadd,          INS_uqadd,          INS_sqadd,          INS_uqadd,          INS_suqadd,         INS_usqadd,         INS_invalid,        INS_invalid},     HW_Category_SIMD,                  HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AdvSimd_Arm64, Ceiling,                                                          16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_frintp},      HW_Category_SIMD,                  HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqual,                                                     16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmeq,           INS_cmeq,           INS_invalid,        INS_fcmeq},       HW_Category_SIMD,                  HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
-HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqualScalar,                                                8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmeq,           INS_cmeq,           INS_fcmeq,          INS_fcmeq},       HW_Category_SIMD,                  HW_Flag_Commutative|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
-HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThan,                                               16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmgt,           INS_cmhi,           INS_invalid,        INS_fcmgt},       HW_Category_SIMD,                  HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqual,                                        16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmge,           INS_cmhs,           INS_invalid,        INS_fcmge},       HW_Category_SIMD,                  HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqualScalar,                                   8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmge,           INS_cmhs,           INS_fcmge,          INS_fcmge},       HW_Category_SIMD,                  HW_Flag_SIMDScalar)
-HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanScalar,                                          8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmgt,           INS_cmhi,           INS_fcmgt,          INS_fcmgt},       HW_Category_SIMD,                  HW_Flag_SIMDScalar)
+HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqual,                                                     16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmeq,           INS_cmeq,           INS_invalid,        INS_fcmeq},       HW_Category_SIMD,                  HW_Flag_Commutative|HW_Flag_SupportsContainment)
+HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqualScalar,                                                8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmeq,           INS_cmeq,           INS_fcmeq,          INS_fcmeq},       HW_Category_SIMD,                  HW_Flag_Commutative|HW_Flag_SIMDScalar|HW_Flag_SupportsContainment)
+HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThan,                                               16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmgt,           INS_cmhi,           INS_invalid,        INS_fcmgt},       HW_Category_SIMD,                  HW_Flag_SupportsContainment)
+HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqual,                                        16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmge,           INS_cmhs,           INS_invalid,        INS_fcmge},       HW_Category_SIMD,                  HW_Flag_SupportsContainment)
+HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqualScalar,                                   8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmge,           INS_cmhs,           INS_fcmge,          INS_fcmge},       HW_Category_SIMD,                  HW_Flag_SIMDScalar|HW_Flag_SupportsContainment)
+HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanScalar,                                          8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmgt,           INS_cmhi,           INS_fcmgt,          INS_fcmgt},       HW_Category_SIMD,                  HW_Flag_SIMDScalar|HW_Flag_SupportsContainment)
 HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThan,                                                  16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmgt,           INS_cmhi,           INS_invalid,        INS_fcmgt},       HW_Category_SIMD,                  HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThanOrEqual,                                           16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmge,           INS_cmhs,           INS_invalid,        INS_fcmge},       HW_Category_SIMD,                  HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThanOrEqualScalar,                                      8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_cmge,           INS_cmhs,           INS_fcmge,          INS_fcmge},       HW_Category_SIMD,                  HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen)
index d4e7ef5..294dcf0 100644 (file)
@@ -318,33 +318,33 @@ INST4(cmeq,        "cmeq",         0,      IF_EN4H,   0x7EE08C00,  0x2E208C00,
 INST4(cmge,        "cmge",         0,      IF_EN4H,   0x5EE03C00,  0x0E203C00,  0x7E208800,  0x2E208800)
                                    //  cmge    Vd,Vn,Vm             DV_3E  01011110111mmmmm 001111nnnnnddddd   5EE0 3C00   Vd,Vn,Vm   (scalar)
                                    //  cmge    Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 001111nnnnnddddd   0E20 3C00   Vd,Vn,Vm   (vector)
-                                   //  cmge    Vd,Vn                DV_2L  01111110XX100000 100010nnnnnddddd   5E20 8800   Vd,Vn      (scalar)
-                                   //  cmge    Vd,Vn                DV_2M  0Q101110XX100000 100010nnnnnddddd   2E20 8800   Vd,Vn      (vector)
+                                   //  cmge    Vd,Vn,#0             DV_2L  01111110XX100000 100010nnnnnddddd   5E20 8800   Vd,Vn,#0   (scalar - with zero)
+                                   //  cmge    Vd,Vn,#0             DV_2M  0Q101110XX100000 100010nnnnnddddd   2E20 8800   Vd,Vn,#0   (vector - with zero)
 
 INST4(cmgt,        "cmgt",         0,      IF_EN4H,   0x5EE03400,  0x0E203400,  0x5E208800,  0x0E208800)
                                    //  cmgt    Vd,Vn,Vm             DV_3E  01011110111mmmmm 001101nnnnnddddd   5EE0 3400   Vd,Vn,Vm   (scalar)
                                    //  cmgt    Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 001101nnnnnddddd   0E20 3400   Vd,Vn,Vm   (vector)
-                                   //  cmgt    Vd,Vn                DV_2L  01011110XX100000 100010nnnnnddddd   5E20 8800   Vd,Vn      (scalar)
-                                   //  cmgt    Vd,Vn                DV_2M  0Q001110XX100000 101110nnnnnddddd   0E20 8800   Vd,Vn      (vector)
+                                   //  cmgt    Vd,Vn,#0             DV_2L  01011110XX100000 100010nnnnnddddd   5E20 8800   Vd,Vn,#0   (scalar - with zero)
+                                   //  cmgt    Vd,Vn,#0             DV_2M  0Q001110XX100000 101110nnnnnddddd   0E20 8800   Vd,Vn,#0   (vector - with zero)
 
 //    enum         name            info               DV_3D        DV_3B        DV_2G        DV_2A
 INST4(fcmeq,       "fcmeq",        0,      IF_EN4I,   0x5E20E400,  0x0E20E400,  0x5EA0D800,  0x0EA0D800)
-                                   //  fcmeq   Vd,Vn,Vm             DV_3D  010111100X1mmmmm 111001nnnnnddddd   5E20 E400   Vd Vn Vm   (scalar)
+                                   //  fcmeq   Vd,Vn,Vm             DV_3D  010111100X1mmmmm 111001nnnnnddddd   5E20 E400   Vd,Vn,Vm   (scalar)
                                    //  fcmeq   Vd,Vn,Vm             DV_3B  0Q0011100X1mmmmm 111001nnnnnddddd   0E20 E400   Vd,Vn,Vm   (vector)
-                                   //  fcmeq   Vd,Vn,#0             DV_2G  010111101X100000 110110nnnnnddddd   5EA0 D800   Vd Vn,#0   (scalar - with zero)
-                                   //  fcmeq   Vd,Vn,#0             DV_2A  0Q0011101X100000 110110nnnnnddddd   0EA0 D800   Vd Vn,#0   (vector - with zero)
+                                   //  fcmeq   Vd,Vn,#0             DV_2G  010111101X100000 110110nnnnnddddd   5EA0 D800   Vd,Vn,#0   (scalar - with zero)
+                                   //  fcmeq   Vd,Vn,#0             DV_2A  0Q0011101X100000 110110nnnnnddddd   0EA0 D800   Vd,Vn,#0   (vector - with zero)
 
 INST4(fcmge,       "fcmge",        0,      IF_EN4I,   0x7E20E400,  0x2E20E400,  0x7EA0C800,  0x2EA0C800)
-                                   //  fcmge   Vd,Vn,Vm             DV_3D  011111100X1mmmmm 111001nnnnnddddd   7E20 E400   Vd Vn Vm   (scalar)
+                                   //  fcmge   Vd,Vn,Vm             DV_3D  011111100X1mmmmm 111001nnnnnddddd   7E20 E400   Vd,Vn,Vm   (scalar)
                                    //  fcmge   Vd,Vn,Vm             DV_3B  0Q1011100X1mmmmm 111001nnnnnddddd   2E20 E400   Vd,Vn,Vm   (vector)
-                                   //  fcmge   Vd,Vn                DV_2G  011111101X100000 110010nnnnnddddd   7EA0 E800   Vd Vn      (scalar)
-                                   //  fcmge   Vd,Vn                DV_2A  0Q1011101X100000 110010nnnnnddddd   2EA0 C800   Vd Vn      (vector)
+                                   //  fcmge   Vd,Vn,#0             DV_2G  011111101X100000 110010nnnnnddddd   7EA0 E800   Vd,Vn,#0   (scalar - with zero)
+                                   //  fcmge   Vd,Vn,#0             DV_2A  0Q1011101X100000 110010nnnnnddddd   2EA0 C800   Vd,Vn,#0   (vector - with zero)
 
 INST4(fcmgt,       "fcmgt",        0,      IF_EN4I,   0x7EA0E400,  0x2EA0E400,  0x5EA0C800,  0x0EA0C800)
-                                   //  fcmgt   Vd,Vn,Vm             DV_3D  011111101X1mmmmm 111001nnnnnddddd   7EA0 E400   Vd Vn Vm   (scalar)
+                                   //  fcmgt   Vd,Vn,Vm             DV_3D  011111101X1mmmmm 111001nnnnnddddd   7EA0 E400   Vd,Vn,Vm   (scalar)
                                    //  fcmgt   Vd,Vn,Vm             DV_3B  0Q1011101X1mmmmm 111001nnnnnddddd   2EA0 E400   Vd,Vn,Vm   (vector)
-                                   //  fcmgt   Vd,Vn                DV_2G  010111101X100000 110010nnnnnddddd   5EA0 E800   Vd Vn      (scalar)
-                                   //  fcmgt   Vd,Vn                DV_2A  0Q0011101X100000 110010nnnnnddddd   0EA0 C800   Vd Vn      (vector)
+                                   //  fcmgt   Vd,Vn,#0             DV_2G  010111101X100000 110010nnnnnddddd   5EA0 E800   Vd,Vn,#0   (scalar - with zero)
+                                   //  fcmgt   Vd,Vn,#0             DV_2A  0Q0011101X100000 110010nnnnnddddd   0EA0 C800   Vd,Vn,#0   (vector - with zero)
 
 //    enum         name            info               DV_2N        DV_2O        DV_3E        DV_3A
 INST4(sqshl,       "sqshl",        0,      IF_EN4J,   0x5F007400,  0x0F007400,  0x5E204C00,  0x0E204C00)
@@ -707,12 +707,12 @@ INST2(fabs,        "fabs",         0,      IF_EN2J,   0x0EA0F800,  0x1E20C000)
                                    //  fabs    Vd,Vn                DV_2G  000111100X100000 110000nnnnnddddd   1E20 C000   Vd,Vn    (scalar)
 
 INST2(fcmle,       "fcmle",        0,      IF_EN2J,   0x2EA0D800,  0x7EA0D800)
-                                   //  fcmle   Vd,Vn                DV_2A  0Q1011101X100000 111110nnnnnddddd   2EA0 D800   Vd,Vn    (vector)
-                                   //  fcmle   Vd,Vn                DV_2G  011111101X100000 110110nnnnnddddd   7EA0 D800   Vd,Vn    (scalar)
+                                   //  fcmle   Vd,Vn,#0             DV_2A  0Q1011101X100000 111110nnnnnddddd   2EA0 D800   Vd,Vn,#0 (vector - with zero)
+                                   //  fcmle   Vd,Vn,#0             DV_2G  011111101X100000 110110nnnnnddddd   7EA0 D800   Vd,Vn,#0 (scalar - with zero)
 
 INST2(fcmlt,       "fcmlt",        0,      IF_EN2J,   0x0EA0E800,  0x5EA0E800)
-                                   //  fcmlt   Vd,Vn                DV_2A  0Q0011101X100000 111110nnnnnddddd   0EA0 E800   Vd,Vn    (vector)
-                                   //  fcmlt   Vd,Vn                DV_2G  010111101X100000 111010nnnnnddddd   5EA0 E800   Vd,Vn    (scalar)
+                                   //  fcmlt   Vd,Vn,#0             DV_2A  0Q0011101X100000 111110nnnnnddddd   0EA0 E800   Vd,Vn,#0 (vector - with zero)
+                                   //  fcmlt   Vd,Vn,#0             DV_2G  010111101X100000 111010nnnnnddddd   5EA0 E800   Vd,Vn,#0 (scalar - with zero)
 
 INST2(fcvtxn,      "fcvtxn",       NRW,    IF_EN2J,   0x2E616800,  0x7E616800)
                                    //  fcvtxn  Vd,Vn                DV_2A  0010111001100001 011010nnnnnddddd   2E61 6800   Vd,Vn    (vector)
@@ -768,12 +768,12 @@ INST2(abs,         "abs",          0,      IF_EN2K,   0x0E20B800,  0x5E20B800)
                                    //  abs     Vd,Vn                DV_2L  01011110XX100000 101110nnnnnddddd   5E20 B800   Vd,Vn    (scalar)
 
 INST2(cmle,        "cmle",         0,      IF_EN2K,   0x2E209800,  0x7E209800)
-                                   //  cmle    Vd,Vn                DV_2M  0Q101110XX100000 100110nnnnnddddd   2E20 9800   Vd,Vn    (vector)
-                                   //  cmle    Vd,Vn                DV_2L  01111110XX100000 100110nnnnnddddd   7E20 9800   Vd,Vn    (scalar)
+                                   //  cmle    Vd,Vn,#0             DV_2M  0Q101110XX100000 100110nnnnnddddd   2E20 9800   Vd,Vn,#0 (vector - with zero)
+                                   //  cmle    Vd,Vn,#0             DV_2L  01111110XX100000 100110nnnnnddddd   7E20 9800   Vd,Vn,#0 (scalar - wtih zero)
 
 INST2(cmlt,        "cmlt",         0,      IF_EN2K,   0x0E20A800,  0x5E20A800)
-                                   //  cmlt    Vd,Vn                DV_2M  0Q101110XX100000 101010nnnnnddddd   0E20 A800   Vd,Vn    (vector)
-                                   //  cmlt    Vd,Vn                DV_2L  01011110XX100000 101010nnnnnddddd   5E20 A800   Vd,Vn    (scalar)
+                                   //  cmlt    Vd,Vn,#0             DV_2M  0Q101110XX100000 101010nnnnnddddd   0E20 A800   Vd,Vn,#0 (vector - with zero)
+                                   //  cmlt    Vd,Vn,#0             DV_2L  01011110XX100000 101010nnnnnddddd   5E20 A800   Vd,Vn,#0 (scalar - with zero)
 
 INST2(sqabs,       "sqabs",        0,      IF_EN2K,   0x0E207800,  0x5E207800)
                                    //  sqabs   Vd,Vn                DV_2M  0Q001110XX100000 011110nnnnnddddd   0E20 7800   Vd,Vn    (vector)
index 41b80fb..433046e 100644 (file)
@@ -1970,7 +1970,15 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
             {
                 if (intrin.op1->IsVectorZero())
                 {
-                    MakeSrcContained(node, intrin.op1);
+                    GenTree* op1 = intrin.op1;
+                    GenTree* op2 = intrin.op2;
+
+                    assert(HWIntrinsicInfo::IsCommutative(intrin.id));
+                    MakeSrcContained(node, op1);
+
+                    // Swap the operands here to make the containment checks in codegen simpler
+                    node->Op(1) = op2;
+                    node->Op(2) = op1;
                 }
                 else if (intrin.op2->IsVectorZero())
                 {
@@ -1979,6 +1987,24 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                 break;
             }
 
+            case NI_AdvSimd_CompareGreaterThan:
+            case NI_AdvSimd_CompareGreaterThanOrEqual:
+            case NI_AdvSimd_Arm64_CompareGreaterThan:
+            case NI_AdvSimd_Arm64_CompareGreaterThanOrEqual:
+            case NI_AdvSimd_Arm64_CompareGreaterThanScalar:
+            case NI_AdvSimd_Arm64_CompareGreaterThanOrEqualScalar:
+            {
+                // Containment is not supported for unsigned base types as the corresponding instructions:
+                //    - cmhi
+                //    - cmhs
+                // require both operands; they do not have a 'with zero'.
+                if (intrin.op2->IsVectorZero() && !varTypeIsUnsigned(intrin.baseType))
+                {
+                    MakeSrcContained(node, intrin.op2);
+                }
+                break;
+            }
+
             case NI_Vector64_CreateScalarUnsafe:
             case NI_Vector128_CreateScalarUnsafe:
             case NI_AdvSimd_DuplicateToVector64:
index 7535cd3..6e99557 100644 (file)
@@ -264,6 +264,106 @@ class Program
         return AdvSimd.Arm64.CompareEqualScalar(Vector64<long>.Zero, right);
     }
 
+    // CompareGreaterThan
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector64<byte> AdvSimd_CompareGreaterThan_Vector64_Byte_Zero(Vector64<byte> left)
+    {
+        return AdvSimd.CompareGreaterThan(left, Vector64<byte>.Zero);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector64<float> AdvSimd_CompareGreaterThan_Vector64_Single_Zero(Vector64<float> left)
+    {
+        return AdvSimd.CompareGreaterThan(left, Vector64<float>.Zero);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<byte> AdvSimd_CompareGreaterThan_Vector128_Byte_Zero(Vector128<byte> left)
+    {
+        return AdvSimd.CompareGreaterThan(left, Vector128<byte>.Zero);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<float> AdvSimd_CompareGreaterThan_Vector128_Single_Zero(Vector128<float> left)
+    {
+        return AdvSimd.CompareGreaterThan(left, Vector128<float>.Zero);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<double> AdvSimd_Arm64_CompareGreaterThan_Vector128_Double_Zero(Vector128<double> left)
+    {
+        return AdvSimd.Arm64.CompareGreaterThan(left, Vector128<double>.Zero);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<long> AdvSimd_Arm64_CompareGreaterThan_Vector128_Int64_Zero(Vector128<long> left)
+    {
+        return AdvSimd.Arm64.CompareGreaterThan(left, Vector128<long>.Zero);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector64<double> AdvSimd_Arm64_CompareGreaterThanScalar_Vector64_Double_Zero(Vector64<double> left)
+    {
+        return AdvSimd.Arm64.CompareGreaterThanScalar(left, Vector64<double>.Zero);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector64<long> AdvSimd_Arm64_CompareGreaterThanScalar_Vector64_Int64_Zero(Vector64<long> left)
+    {
+        return AdvSimd.Arm64.CompareGreaterThanScalar(left, Vector64<long>.Zero);
+    }
+
+    // CompareGreaterThanOrEqual
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector64<byte> AdvSimd_CompareGreaterThanOrEqual_Vector64_Byte_Zero(Vector64<byte> left)
+    {
+        return AdvSimd.CompareGreaterThanOrEqual(left, Vector64<byte>.Zero);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector64<float> AdvSimd_CompareGreaterThanOrEqual_Vector64_Single_Zero(Vector64<float> left)
+    {
+        return AdvSimd.CompareGreaterThanOrEqual(left, Vector64<float>.Zero);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<byte> AdvSimd_CompareGreaterThanOrEqual_Vector128_Byte_Zero(Vector128<byte> left)
+    {
+        return AdvSimd.CompareGreaterThanOrEqual(left, Vector128<byte>.Zero);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<float> AdvSimd_CompareGreaterThanOrEqual_Vector128_Single_Zero(Vector128<float> left)
+    {
+        return AdvSimd.CompareGreaterThanOrEqual(left, Vector128<float>.Zero);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<double> AdvSimd_Arm64_CompareGreaterThanOrEqual_Vector128_Double_Zero(Vector128<double> left)
+    {
+        return AdvSimd.Arm64.CompareGreaterThanOrEqual(left, Vector128<double>.Zero);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<long> AdvSimd_Arm64_CompareGreaterThanOrEqual_Vector128_Int64_Zero(Vector128<long> left)
+    {
+        return AdvSimd.Arm64.CompareGreaterThanOrEqual(left, Vector128<long>.Zero);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector64<double> AdvSimd_Arm64_CompareGreaterThanOrEqualScalar_Vector64_Double_Zero(Vector64<double> left)
+    {
+        return AdvSimd.Arm64.CompareGreaterThanOrEqualScalar(left, Vector64<double>.Zero);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector64<long> AdvSimd_Arm64_CompareGreaterThanOrEqualScalar_Vector64_Int64_Zero(Vector64<long> left)
+    {
+        return AdvSimd.Arm64.CompareGreaterThanOrEqualScalar(left, Vector64<long>.Zero);
+    }
+
     // Validation
 
     unsafe static bool ValidateResult_Vector64<T>(Vector64<T> result, T expectedElementValue) where T : unmanaged
@@ -376,6 +476,110 @@ class Program
 
         // End CompareEqual Tests
 
+        // Begin CompareGreaterThan Tests
+
+        if (!ValidateResult_Vector64<byte>(AdvSimd_CompareGreaterThan_Vector64_Byte_Zero(Vector64.Create((byte)1)), Byte.MaxValue))
+            result = -1;
+
+        if (!ValidateResult_Vector64<float>(AdvSimd_CompareGreaterThan_Vector64_Single_Zero(Vector64.Create(1.0f)), Single.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector128<byte>(AdvSimd_CompareGreaterThan_Vector128_Byte_Zero(Vector128.Create((byte)1)), Byte.MaxValue))
+            result = -1;
+
+        if (!ValidateResult_Vector128<float>(AdvSimd_CompareGreaterThan_Vector128_Single_Zero(Vector128.Create(1.0f)), Single.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector128<double>(AdvSimd_Arm64_CompareGreaterThan_Vector128_Double_Zero(Vector128.Create(1.0)), Double.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector128<long>(AdvSimd_Arm64_CompareGreaterThan_Vector128_Int64_Zero(Vector128.Create(1L)), -1))
+            result = -1;
+
+        if (!ValidateResult_Vector64<double>(AdvSimd_Arm64_CompareGreaterThanScalar_Vector64_Double_Zero(Vector64.Create(1.0)), Double.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector64<long>(AdvSimd_Arm64_CompareGreaterThanScalar_Vector64_Int64_Zero(Vector64.Create(1L)), -1))
+            result = -1;
+
+        if (ValidateResult_Vector64<byte>(AdvSimd_CompareGreaterThan_Vector64_Byte_Zero(Vector64<byte>.Zero), Byte.MaxValue))
+            result = -1;
+
+        if (ValidateResult_Vector64<float>(AdvSimd_CompareGreaterThan_Vector64_Single_Zero(Vector64<float>.Zero), Single.NaN))
+            result = -1;
+
+        if (ValidateResult_Vector128<byte>(AdvSimd_CompareGreaterThan_Vector128_Byte_Zero(Vector128<byte>.Zero), Byte.MaxValue))
+            result = -1;
+
+        if (ValidateResult_Vector128<float>(AdvSimd_CompareGreaterThan_Vector128_Single_Zero(Vector128<float>.Zero), Single.NaN))
+            result = -1;
+
+        if (ValidateResult_Vector128<double>(AdvSimd_Arm64_CompareGreaterThan_Vector128_Double_Zero(Vector128<double>.Zero), Double.NaN))
+            result = -1;
+
+        if (ValidateResult_Vector128<long>(AdvSimd_Arm64_CompareGreaterThan_Vector128_Int64_Zero(Vector128<long>.Zero), -1))
+            result = -1;
+
+        if (ValidateResult_Vector64<double>(AdvSimd_Arm64_CompareGreaterThanScalar_Vector64_Double_Zero(Vector64<double>.Zero), Double.NaN))
+            result = -1;
+
+        if (ValidateResult_Vector64<long>(AdvSimd_Arm64_CompareGreaterThanScalar_Vector64_Int64_Zero(Vector64<long>.Zero), -1))
+            result = -1;
+
+        // End CompareGreaterThan Tests
+
+        // Begin CompareGreaterThanOrEqual Tests
+
+        if (!ValidateResult_Vector64<byte>(AdvSimd_CompareGreaterThanOrEqual_Vector64_Byte_Zero(Vector64.Create((byte)1)), Byte.MaxValue))
+            result = -1;
+
+        if (!ValidateResult_Vector64<float>(AdvSimd_CompareGreaterThanOrEqual_Vector64_Single_Zero(Vector64.Create(1.0f)), Single.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector128<byte>(AdvSimd_CompareGreaterThanOrEqual_Vector128_Byte_Zero(Vector128.Create((byte)1)), Byte.MaxValue))
+            result = -1;
+
+        if (!ValidateResult_Vector128<float>(AdvSimd_CompareGreaterThanOrEqual_Vector128_Single_Zero(Vector128.Create(1.0f)), Single.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector128<double>(AdvSimd_Arm64_CompareGreaterThanOrEqual_Vector128_Double_Zero(Vector128.Create(1.0)), Double.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector128<long>(AdvSimd_Arm64_CompareGreaterThanOrEqual_Vector128_Int64_Zero(Vector128.Create(1L)), -1))
+            result = -1;
+
+        if (!ValidateResult_Vector64<double>(AdvSimd_Arm64_CompareGreaterThanOrEqualScalar_Vector64_Double_Zero(Vector64.Create(1.0)), Double.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector64<long>(AdvSimd_Arm64_CompareGreaterThanOrEqualScalar_Vector64_Int64_Zero(Vector64.Create(1L)), -1))
+            result = -1;
+
+        if (!ValidateResult_Vector64<byte>(AdvSimd_CompareGreaterThanOrEqual_Vector64_Byte_Zero(Vector64<byte>.Zero), Byte.MaxValue))
+            result = -1;
+
+        if (!ValidateResult_Vector64<float>(AdvSimd_CompareGreaterThanOrEqual_Vector64_Single_Zero(Vector64<float>.Zero), Single.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector128<byte>(AdvSimd_CompareGreaterThanOrEqual_Vector128_Byte_Zero(Vector128<byte>.Zero), Byte.MaxValue))
+            result = -1;
+
+        if (!ValidateResult_Vector128<float>(AdvSimd_CompareGreaterThanOrEqual_Vector128_Single_Zero(Vector128<float>.Zero), Single.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector128<double>(AdvSimd_Arm64_CompareGreaterThanOrEqual_Vector128_Double_Zero(Vector128<double>.Zero), Double.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector128<long>(AdvSimd_Arm64_CompareGreaterThanOrEqual_Vector128_Int64_Zero(Vector128<long>.Zero), -1))
+            result = -1;
+
+        if (!ValidateResult_Vector64<double>(AdvSimd_Arm64_CompareGreaterThanOrEqualScalar_Vector64_Double_Zero(Vector64<double>.Zero), Double.NaN))
+            result = -1;
+
+        if (!ValidateResult_Vector64<long>(AdvSimd_Arm64_CompareGreaterThanOrEqualScalar_Vector64_Int64_Zero(Vector64<long>.Zero), -1))
+            result = -1;
+
+        // End CompareGreaterThanOrEqual Tests
+
         return result;
     }