From: Will Smith Date: Thu, 17 Feb 2022 02:32:20 +0000 (-0800) Subject: [JIT] More ARM64 comparison instruction optimizations with Vector.Zero (#64783) X-Git-Tag: accepted/tizen/unified/riscv/20231226.055536~10779 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=c3f57277b178e11d3a2b495bcff5888b61db1f5c;p=platform%2Fupstream%2Fdotnet%2Fruntime.git [JIT] More ARM64 comparison instruction optimizations with Vector.Zero (#64783) * Normalizing instructions with an implicit vector zero as the second operand * Checking number of operands before looking at opernads * Remove assert * Check commutative flag * Fixed commutative check * Handling more HW intrinsics * Finishing up * Finishing up * Formatting * numOperands = 1 * Feedback * Added HW_Flag_SupportsContainmentZero * Added extra assert * Removing flag and simplifying codegen for containment with zeros --- diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 2345ba4..7aa1c89 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -12964,7 +12964,7 @@ void emitter::emitDispIns( emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); } - if (ins == INS_fcmeq) + if (ins == INS_fcmeq || ins == INS_fcmge || ins == INS_fcmgt || ins == INS_fcmle || ins == INS_fcmlt) { printf(", "); emitDispImm(0, false); @@ -12988,7 +12988,7 @@ void emitter::emitDispIns( emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); } - if (ins == INS_cmeq) + if (ins == INS_cmeq || ins == INS_cmge || ins == INS_cmgt || ins == INS_cmle || ins == INS_cmlt) { printf(", "); emitDispImm(0, false); @@ -13129,7 +13129,8 @@ void emitter::emitDispIns( emitDispReg(id->idReg1(), size, true); emitDispReg(id->idReg2(), size, false); } - if (fmt == IF_DV_2L && ins == INS_cmeq) + if (fmt == IF_DV_2L && + (ins == INS_cmeq || ins == INS_cmge || ins == INS_cmgt || ins == INS_cmle || ins == INS_cmlt)) { printf(", "); emitDispImm(0, false); diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 9d2d1a0..53c3920 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -159,6 +159,7 @@ enum HWIntrinsicFlag : unsigned int // The intrinsic supports some sort of containment analysis HW_Flag_SupportsContainment = 0x2000 + #else #error Unsupported platform #endif diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index a2819f5..f9bddea 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -371,7 +371,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; case 2: - if (isRMW) + // This handles optimizations for instructions that have + // an implicit 'zero' vector of what would be the second operand. + if (HWIntrinsicInfo::SupportsContainment(intrin.id) && intrin.op2->isContained() && + intrin.op2->IsVectorZero()) + { + GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt); + } + else if (isRMW) { assert(targetReg != op2Reg); @@ -499,29 +506,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); break; - case NI_AdvSimd_CompareEqual: - case NI_AdvSimd_Arm64_CompareEqual: - case NI_AdvSimd_Arm64_CompareEqualScalar: - if (intrin.op1->isContained()) - { - assert(HWIntrinsicInfo::SupportsContainment(intrin.id)); - assert(intrin.op1->IsVectorZero()); - - GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op2Reg, opt); - } - else if (intrin.op2->isContained()) - { - assert(HWIntrinsicInfo::SupportsContainment(intrin.id)); - assert(intrin.op2->IsVectorZero()); - - GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt); - } - else - { - GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); - } - break; - case NI_AdvSimd_AbsoluteCompareLessThan: case NI_AdvSimd_AbsoluteCompareLessThanOrEqual: case NI_AdvSimd_CompareLessThan: diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 2a1e989..b5960c9 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -240,9 +240,9 @@ HARDWARE_INTRINSIC(AdvSimd, BitwiseClear, HARDWARE_INTRINSIC(AdvSimd, BitwiseSelect, -1, 3, {INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, Ceiling, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd, CeilingScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_frintp}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThanOrEqual, -1, 2, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThanOrEqual, -1, 2, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(AdvSimd, CompareLessThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, CompareLessThanOrEqual, -1, 2, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, CompareTest, -1, 2, {INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_cmtst, INS_invalid, INS_invalid, INS_cmtst, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative) @@ -492,12 +492,12 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, AddPairwiseScalar, HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturate, -1, 2, {INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturateScalar, 8, 2, {INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_suqadd, INS_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_invalid, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_fcmeq, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_fcmgt, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SIMDScalar) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_invalid, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_fcmeq, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge}, HW_Category_SIMD, HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_fcmgt, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareLessThanOrEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge}, HW_Category_SIMD, HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) diff --git a/src/coreclr/jit/instrsarm64.h b/src/coreclr/jit/instrsarm64.h index d4e7ef5..294dcf0 100644 --- a/src/coreclr/jit/instrsarm64.h +++ b/src/coreclr/jit/instrsarm64.h @@ -318,33 +318,33 @@ INST4(cmeq, "cmeq", 0, IF_EN4H, 0x7EE08C00, 0x2E208C00, INST4(cmge, "cmge", 0, IF_EN4H, 0x5EE03C00, 0x0E203C00, 0x7E208800, 0x2E208800) // cmge Vd,Vn,Vm DV_3E 01011110111mmmmm 001111nnnnnddddd 5EE0 3C00 Vd,Vn,Vm (scalar) // cmge Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 001111nnnnnddddd 0E20 3C00 Vd,Vn,Vm (vector) - // cmge Vd,Vn DV_2L 01111110XX100000 100010nnnnnddddd 5E20 8800 Vd,Vn (scalar) - // cmge Vd,Vn DV_2M 0Q101110XX100000 100010nnnnnddddd 2E20 8800 Vd,Vn (vector) + // cmge Vd,Vn,#0 DV_2L 01111110XX100000 100010nnnnnddddd 5E20 8800 Vd,Vn,#0 (scalar - with zero) + // cmge Vd,Vn,#0 DV_2M 0Q101110XX100000 100010nnnnnddddd 2E20 8800 Vd,Vn,#0 (vector - with zero) INST4(cmgt, "cmgt", 0, IF_EN4H, 0x5EE03400, 0x0E203400, 0x5E208800, 0x0E208800) // cmgt Vd,Vn,Vm DV_3E 01011110111mmmmm 001101nnnnnddddd 5EE0 3400 Vd,Vn,Vm (scalar) // cmgt Vd,Vn,Vm DV_3A 0Q001110XX1mmmmm 001101nnnnnddddd 0E20 3400 Vd,Vn,Vm (vector) - // cmgt Vd,Vn DV_2L 01011110XX100000 100010nnnnnddddd 5E20 8800 Vd,Vn (scalar) - // cmgt Vd,Vn DV_2M 0Q001110XX100000 101110nnnnnddddd 0E20 8800 Vd,Vn (vector) + // cmgt Vd,Vn,#0 DV_2L 01011110XX100000 100010nnnnnddddd 5E20 8800 Vd,Vn,#0 (scalar - with zero) + // cmgt Vd,Vn,#0 DV_2M 0Q001110XX100000 101110nnnnnddddd 0E20 8800 Vd,Vn,#0 (vector - with zero) // enum name info DV_3D DV_3B DV_2G DV_2A INST4(fcmeq, "fcmeq", 0, IF_EN4I, 0x5E20E400, 0x0E20E400, 0x5EA0D800, 0x0EA0D800) - // fcmeq Vd,Vn,Vm DV_3D 010111100X1mmmmm 111001nnnnnddddd 5E20 E400 Vd Vn Vm (scalar) + // fcmeq Vd,Vn,Vm DV_3D 010111100X1mmmmm 111001nnnnnddddd 5E20 E400 Vd,Vn,Vm (scalar) // fcmeq Vd,Vn,Vm DV_3B 0Q0011100X1mmmmm 111001nnnnnddddd 0E20 E400 Vd,Vn,Vm (vector) - // fcmeq Vd,Vn,#0 DV_2G 010111101X100000 110110nnnnnddddd 5EA0 D800 Vd Vn,#0 (scalar - with zero) - // fcmeq Vd,Vn,#0 DV_2A 0Q0011101X100000 110110nnnnnddddd 0EA0 D800 Vd Vn,#0 (vector - with zero) + // fcmeq Vd,Vn,#0 DV_2G 010111101X100000 110110nnnnnddddd 5EA0 D800 Vd,Vn,#0 (scalar - with zero) + // fcmeq Vd,Vn,#0 DV_2A 0Q0011101X100000 110110nnnnnddddd 0EA0 D800 Vd,Vn,#0 (vector - with zero) INST4(fcmge, "fcmge", 0, IF_EN4I, 0x7E20E400, 0x2E20E400, 0x7EA0C800, 0x2EA0C800) - // fcmge Vd,Vn,Vm DV_3D 011111100X1mmmmm 111001nnnnnddddd 7E20 E400 Vd Vn Vm (scalar) + // fcmge Vd,Vn,Vm DV_3D 011111100X1mmmmm 111001nnnnnddddd 7E20 E400 Vd,Vn,Vm (scalar) // fcmge Vd,Vn,Vm DV_3B 0Q1011100X1mmmmm 111001nnnnnddddd 2E20 E400 Vd,Vn,Vm (vector) - // fcmge Vd,Vn DV_2G 011111101X100000 110010nnnnnddddd 7EA0 E800 Vd Vn (scalar) - // fcmge Vd,Vn DV_2A 0Q1011101X100000 110010nnnnnddddd 2EA0 C800 Vd Vn (vector) + // fcmge Vd,Vn,#0 DV_2G 011111101X100000 110010nnnnnddddd 7EA0 E800 Vd,Vn,#0 (scalar - with zero) + // fcmge Vd,Vn,#0 DV_2A 0Q1011101X100000 110010nnnnnddddd 2EA0 C800 Vd,Vn,#0 (vector - with zero) INST4(fcmgt, "fcmgt", 0, IF_EN4I, 0x7EA0E400, 0x2EA0E400, 0x5EA0C800, 0x0EA0C800) - // fcmgt Vd,Vn,Vm DV_3D 011111101X1mmmmm 111001nnnnnddddd 7EA0 E400 Vd Vn Vm (scalar) + // fcmgt Vd,Vn,Vm DV_3D 011111101X1mmmmm 111001nnnnnddddd 7EA0 E400 Vd,Vn,Vm (scalar) // fcmgt Vd,Vn,Vm DV_3B 0Q1011101X1mmmmm 111001nnnnnddddd 2EA0 E400 Vd,Vn,Vm (vector) - // fcmgt Vd,Vn DV_2G 010111101X100000 110010nnnnnddddd 5EA0 E800 Vd Vn (scalar) - // fcmgt Vd,Vn DV_2A 0Q0011101X100000 110010nnnnnddddd 0EA0 C800 Vd Vn (vector) + // fcmgt Vd,Vn,#0 DV_2G 010111101X100000 110010nnnnnddddd 5EA0 E800 Vd,Vn,#0 (scalar - with zero) + // fcmgt Vd,Vn,#0 DV_2A 0Q0011101X100000 110010nnnnnddddd 0EA0 C800 Vd,Vn,#0 (vector - with zero) // enum name info DV_2N DV_2O DV_3E DV_3A INST4(sqshl, "sqshl", 0, IF_EN4J, 0x5F007400, 0x0F007400, 0x5E204C00, 0x0E204C00) @@ -707,12 +707,12 @@ INST2(fabs, "fabs", 0, IF_EN2J, 0x0EA0F800, 0x1E20C000) // fabs Vd,Vn DV_2G 000111100X100000 110000nnnnnddddd 1E20 C000 Vd,Vn (scalar) INST2(fcmle, "fcmle", 0, IF_EN2J, 0x2EA0D800, 0x7EA0D800) - // fcmle Vd,Vn DV_2A 0Q1011101X100000 111110nnnnnddddd 2EA0 D800 Vd,Vn (vector) - // fcmle Vd,Vn DV_2G 011111101X100000 110110nnnnnddddd 7EA0 D800 Vd,Vn (scalar) + // fcmle Vd,Vn,#0 DV_2A 0Q1011101X100000 111110nnnnnddddd 2EA0 D800 Vd,Vn,#0 (vector - with zero) + // fcmle Vd,Vn,#0 DV_2G 011111101X100000 110110nnnnnddddd 7EA0 D800 Vd,Vn,#0 (scalar - with zero) INST2(fcmlt, "fcmlt", 0, IF_EN2J, 0x0EA0E800, 0x5EA0E800) - // fcmlt Vd,Vn DV_2A 0Q0011101X100000 111110nnnnnddddd 0EA0 E800 Vd,Vn (vector) - // fcmlt Vd,Vn DV_2G 010111101X100000 111010nnnnnddddd 5EA0 E800 Vd,Vn (scalar) + // fcmlt Vd,Vn,#0 DV_2A 0Q0011101X100000 111110nnnnnddddd 0EA0 E800 Vd,Vn,#0 (vector - with zero) + // fcmlt Vd,Vn,#0 DV_2G 010111101X100000 111010nnnnnddddd 5EA0 E800 Vd,Vn,#0 (scalar - with zero) INST2(fcvtxn, "fcvtxn", NRW, IF_EN2J, 0x2E616800, 0x7E616800) // fcvtxn Vd,Vn DV_2A 0010111001100001 011010nnnnnddddd 2E61 6800 Vd,Vn (vector) @@ -768,12 +768,12 @@ INST2(abs, "abs", 0, IF_EN2K, 0x0E20B800, 0x5E20B800) // abs Vd,Vn DV_2L 01011110XX100000 101110nnnnnddddd 5E20 B800 Vd,Vn (scalar) INST2(cmle, "cmle", 0, IF_EN2K, 0x2E209800, 0x7E209800) - // cmle Vd,Vn DV_2M 0Q101110XX100000 100110nnnnnddddd 2E20 9800 Vd,Vn (vector) - // cmle Vd,Vn DV_2L 01111110XX100000 100110nnnnnddddd 7E20 9800 Vd,Vn (scalar) + // cmle Vd,Vn,#0 DV_2M 0Q101110XX100000 100110nnnnnddddd 2E20 9800 Vd,Vn,#0 (vector - with zero) + // cmle Vd,Vn,#0 DV_2L 01111110XX100000 100110nnnnnddddd 7E20 9800 Vd,Vn,#0 (scalar - wtih zero) INST2(cmlt, "cmlt", 0, IF_EN2K, 0x0E20A800, 0x5E20A800) - // cmlt Vd,Vn DV_2M 0Q101110XX100000 101010nnnnnddddd 0E20 A800 Vd,Vn (vector) - // cmlt Vd,Vn DV_2L 01011110XX100000 101010nnnnnddddd 5E20 A800 Vd,Vn (scalar) + // cmlt Vd,Vn,#0 DV_2M 0Q101110XX100000 101010nnnnnddddd 0E20 A800 Vd,Vn,#0 (vector - with zero) + // cmlt Vd,Vn,#0 DV_2L 01011110XX100000 101010nnnnnddddd 5E20 A800 Vd,Vn,#0 (scalar - with zero) INST2(sqabs, "sqabs", 0, IF_EN2K, 0x0E207800, 0x5E207800) // sqabs Vd,Vn DV_2M 0Q001110XX100000 011110nnnnnddddd 0E20 7800 Vd,Vn (vector) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 41b80fb..433046e 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1970,7 +1970,15 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { if (intrin.op1->IsVectorZero()) { - MakeSrcContained(node, intrin.op1); + GenTree* op1 = intrin.op1; + GenTree* op2 = intrin.op2; + + assert(HWIntrinsicInfo::IsCommutative(intrin.id)); + MakeSrcContained(node, op1); + + // Swap the operands here to make the containment checks in codegen simpler + node->Op(1) = op2; + node->Op(2) = op1; } else if (intrin.op2->IsVectorZero()) { @@ -1979,6 +1987,24 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_AdvSimd_CompareGreaterThan: + case NI_AdvSimd_CompareGreaterThanOrEqual: + case NI_AdvSimd_Arm64_CompareGreaterThan: + case NI_AdvSimd_Arm64_CompareGreaterThanOrEqual: + case NI_AdvSimd_Arm64_CompareGreaterThanScalar: + case NI_AdvSimd_Arm64_CompareGreaterThanOrEqualScalar: + { + // Containment is not supported for unsigned base types as the corresponding instructions: + // - cmhi + // - cmhs + // require both operands; they do not have a 'with zero'. + if (intrin.op2->IsVectorZero() && !varTypeIsUnsigned(intrin.baseType)) + { + MakeSrcContained(node, intrin.op2); + } + break; + } + case NI_Vector64_CreateScalarUnsafe: case NI_Vector128_CreateScalarUnsafe: case NI_AdvSimd_DuplicateToVector64: diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_33972/Runtime_33972.cs b/src/tests/JIT/Regression/JitBlue/Runtime_33972/Runtime_33972.cs index 7535cd3..6e99557 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_33972/Runtime_33972.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_33972/Runtime_33972.cs @@ -264,6 +264,106 @@ class Program return AdvSimd.Arm64.CompareEqualScalar(Vector64.Zero, right); } + // CompareGreaterThan + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector64 AdvSimd_CompareGreaterThan_Vector64_Byte_Zero(Vector64 left) + { + return AdvSimd.CompareGreaterThan(left, Vector64.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector64 AdvSimd_CompareGreaterThan_Vector64_Single_Zero(Vector64 left) + { + return AdvSimd.CompareGreaterThan(left, Vector64.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AdvSimd_CompareGreaterThan_Vector128_Byte_Zero(Vector128 left) + { + return AdvSimd.CompareGreaterThan(left, Vector128.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AdvSimd_CompareGreaterThan_Vector128_Single_Zero(Vector128 left) + { + return AdvSimd.CompareGreaterThan(left, Vector128.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AdvSimd_Arm64_CompareGreaterThan_Vector128_Double_Zero(Vector128 left) + { + return AdvSimd.Arm64.CompareGreaterThan(left, Vector128.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AdvSimd_Arm64_CompareGreaterThan_Vector128_Int64_Zero(Vector128 left) + { + return AdvSimd.Arm64.CompareGreaterThan(left, Vector128.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector64 AdvSimd_Arm64_CompareGreaterThanScalar_Vector64_Double_Zero(Vector64 left) + { + return AdvSimd.Arm64.CompareGreaterThanScalar(left, Vector64.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector64 AdvSimd_Arm64_CompareGreaterThanScalar_Vector64_Int64_Zero(Vector64 left) + { + return AdvSimd.Arm64.CompareGreaterThanScalar(left, Vector64.Zero); + } + + // CompareGreaterThanOrEqual + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector64 AdvSimd_CompareGreaterThanOrEqual_Vector64_Byte_Zero(Vector64 left) + { + return AdvSimd.CompareGreaterThanOrEqual(left, Vector64.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector64 AdvSimd_CompareGreaterThanOrEqual_Vector64_Single_Zero(Vector64 left) + { + return AdvSimd.CompareGreaterThanOrEqual(left, Vector64.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AdvSimd_CompareGreaterThanOrEqual_Vector128_Byte_Zero(Vector128 left) + { + return AdvSimd.CompareGreaterThanOrEqual(left, Vector128.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AdvSimd_CompareGreaterThanOrEqual_Vector128_Single_Zero(Vector128 left) + { + return AdvSimd.CompareGreaterThanOrEqual(left, Vector128.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AdvSimd_Arm64_CompareGreaterThanOrEqual_Vector128_Double_Zero(Vector128 left) + { + return AdvSimd.Arm64.CompareGreaterThanOrEqual(left, Vector128.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector128 AdvSimd_Arm64_CompareGreaterThanOrEqual_Vector128_Int64_Zero(Vector128 left) + { + return AdvSimd.Arm64.CompareGreaterThanOrEqual(left, Vector128.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector64 AdvSimd_Arm64_CompareGreaterThanOrEqualScalar_Vector64_Double_Zero(Vector64 left) + { + return AdvSimd.Arm64.CompareGreaterThanOrEqualScalar(left, Vector64.Zero); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static Vector64 AdvSimd_Arm64_CompareGreaterThanOrEqualScalar_Vector64_Int64_Zero(Vector64 left) + { + return AdvSimd.Arm64.CompareGreaterThanOrEqualScalar(left, Vector64.Zero); + } + // Validation unsafe static bool ValidateResult_Vector64(Vector64 result, T expectedElementValue) where T : unmanaged @@ -376,6 +476,110 @@ class Program // End CompareEqual Tests + // Begin CompareGreaterThan Tests + + if (!ValidateResult_Vector64(AdvSimd_CompareGreaterThan_Vector64_Byte_Zero(Vector64.Create((byte)1)), Byte.MaxValue)) + result = -1; + + if (!ValidateResult_Vector64(AdvSimd_CompareGreaterThan_Vector64_Single_Zero(Vector64.Create(1.0f)), Single.NaN)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_CompareGreaterThan_Vector128_Byte_Zero(Vector128.Create((byte)1)), Byte.MaxValue)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_CompareGreaterThan_Vector128_Single_Zero(Vector128.Create(1.0f)), Single.NaN)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_Arm64_CompareGreaterThan_Vector128_Double_Zero(Vector128.Create(1.0)), Double.NaN)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_Arm64_CompareGreaterThan_Vector128_Int64_Zero(Vector128.Create(1L)), -1)) + result = -1; + + if (!ValidateResult_Vector64(AdvSimd_Arm64_CompareGreaterThanScalar_Vector64_Double_Zero(Vector64.Create(1.0)), Double.NaN)) + result = -1; + + if (!ValidateResult_Vector64(AdvSimd_Arm64_CompareGreaterThanScalar_Vector64_Int64_Zero(Vector64.Create(1L)), -1)) + result = -1; + + if (ValidateResult_Vector64(AdvSimd_CompareGreaterThan_Vector64_Byte_Zero(Vector64.Zero), Byte.MaxValue)) + result = -1; + + if (ValidateResult_Vector64(AdvSimd_CompareGreaterThan_Vector64_Single_Zero(Vector64.Zero), Single.NaN)) + result = -1; + + if (ValidateResult_Vector128(AdvSimd_CompareGreaterThan_Vector128_Byte_Zero(Vector128.Zero), Byte.MaxValue)) + result = -1; + + if (ValidateResult_Vector128(AdvSimd_CompareGreaterThan_Vector128_Single_Zero(Vector128.Zero), Single.NaN)) + result = -1; + + if (ValidateResult_Vector128(AdvSimd_Arm64_CompareGreaterThan_Vector128_Double_Zero(Vector128.Zero), Double.NaN)) + result = -1; + + if (ValidateResult_Vector128(AdvSimd_Arm64_CompareGreaterThan_Vector128_Int64_Zero(Vector128.Zero), -1)) + result = -1; + + if (ValidateResult_Vector64(AdvSimd_Arm64_CompareGreaterThanScalar_Vector64_Double_Zero(Vector64.Zero), Double.NaN)) + result = -1; + + if (ValidateResult_Vector64(AdvSimd_Arm64_CompareGreaterThanScalar_Vector64_Int64_Zero(Vector64.Zero), -1)) + result = -1; + + // End CompareGreaterThan Tests + + // Begin CompareGreaterThanOrEqual Tests + + if (!ValidateResult_Vector64(AdvSimd_CompareGreaterThanOrEqual_Vector64_Byte_Zero(Vector64.Create((byte)1)), Byte.MaxValue)) + result = -1; + + if (!ValidateResult_Vector64(AdvSimd_CompareGreaterThanOrEqual_Vector64_Single_Zero(Vector64.Create(1.0f)), Single.NaN)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_CompareGreaterThanOrEqual_Vector128_Byte_Zero(Vector128.Create((byte)1)), Byte.MaxValue)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_CompareGreaterThanOrEqual_Vector128_Single_Zero(Vector128.Create(1.0f)), Single.NaN)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_Arm64_CompareGreaterThanOrEqual_Vector128_Double_Zero(Vector128.Create(1.0)), Double.NaN)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_Arm64_CompareGreaterThanOrEqual_Vector128_Int64_Zero(Vector128.Create(1L)), -1)) + result = -1; + + if (!ValidateResult_Vector64(AdvSimd_Arm64_CompareGreaterThanOrEqualScalar_Vector64_Double_Zero(Vector64.Create(1.0)), Double.NaN)) + result = -1; + + if (!ValidateResult_Vector64(AdvSimd_Arm64_CompareGreaterThanOrEqualScalar_Vector64_Int64_Zero(Vector64.Create(1L)), -1)) + result = -1; + + if (!ValidateResult_Vector64(AdvSimd_CompareGreaterThanOrEqual_Vector64_Byte_Zero(Vector64.Zero), Byte.MaxValue)) + result = -1; + + if (!ValidateResult_Vector64(AdvSimd_CompareGreaterThanOrEqual_Vector64_Single_Zero(Vector64.Zero), Single.NaN)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_CompareGreaterThanOrEqual_Vector128_Byte_Zero(Vector128.Zero), Byte.MaxValue)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_CompareGreaterThanOrEqual_Vector128_Single_Zero(Vector128.Zero), Single.NaN)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_Arm64_CompareGreaterThanOrEqual_Vector128_Double_Zero(Vector128.Zero), Double.NaN)) + result = -1; + + if (!ValidateResult_Vector128(AdvSimd_Arm64_CompareGreaterThanOrEqual_Vector128_Int64_Zero(Vector128.Zero), -1)) + result = -1; + + if (!ValidateResult_Vector64(AdvSimd_Arm64_CompareGreaterThanOrEqualScalar_Vector64_Double_Zero(Vector64.Zero), Double.NaN)) + result = -1; + + if (!ValidateResult_Vector64(AdvSimd_Arm64_CompareGreaterThanOrEqualScalar_Vector64_Int64_Zero(Vector64.Zero), -1)) + result = -1; + + // End CompareGreaterThanOrEqual Tests + return result; }