case ARMISD::VCNEZ: return "ARMISD::VCNEZ";
case ARMISD::VCGE: return "ARMISD::VCGE";
case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
+ case ARMISD::VCLE: return "ARMISD::VCLE";
case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
case ARMISD::VCGEU: return "ARMISD::VCGEU";
case ARMISD::VCGT: return "ARMISD::VCGT";
case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
+ case ARMISD::VCLT: return "ARMISD::VCLT";
case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
case ARMISD::VCGTU: return "ARMISD::VCGTU";
case ARMISD::VTST: return "ARMISD::VTST";
return SDValue();
}
+static SDValue PerformORCombine_i1(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
+ // together with predicates
+ struct Codes {
+ unsigned Opcode;
+ unsigned Opposite;
+ } InvertCodes[] = {
+ {ARMISD::VCEQ, ARMISD::VCNE},
+ {ARMISD::VCEQZ, ARMISD::VCNEZ},
+ {ARMISD::VCGE, ARMISD::VCLT},
+ {ARMISD::VCGEZ, ARMISD::VCLTZ},
+ {ARMISD::VCGT, ARMISD::VCLE},
+ {ARMISD::VCGTZ, ARMISD::VCLEZ},
+ };
+
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ unsigned Opposite0 = 0;
+ unsigned Opposite1 = 0;
+ for (auto Code : InvertCodes) {
+ if (N0->getOpcode() == Code.Opcode)
+ Opposite0 = Code.Opposite;
+ if (N0->getOpcode() == Code.Opposite)
+ Opposite0 = Code.Opcode;
+ if (N1->getOpcode() == Code.Opcode)
+ Opposite1 = Code.Opposite;
+ if (N1->getOpcode() == Code.Opposite)
+ Opposite1 = Code.Opcode;
+ }
+
+ if (!Opposite0 || !Opposite1)
+ return SDValue();
+
+ SmallVector<SDValue, 4> Ops0;
+ for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i)
+ Ops0.push_back(N0->getOperand(i));
+ SmallVector<SDValue, 4> Ops1;
+ for (unsigned i = 0, e = N1->getNumOperands(); i != e; ++i)
+ Ops1.push_back(N1->getOperand(i));
+
+ SDValue NewN0 = DCI.DAG.getNode(Opposite0, SDLoc(N0), VT, Ops0);
+ SDValue NewN1 = DCI.DAG.getNode(Opposite1, SDLoc(N1), VT, Ops1);
+ SDValue And = DCI.DAG.getNode(ISD::AND, SDLoc(N), VT, NewN0, NewN1);
+ return DCI.DAG.getNode(ISD::XOR, SDLoc(N), VT, And,
+ DCI.DAG.getAllOnesConstant(SDLoc(N), VT));
+}
+
/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
static SDValue PerformORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
}
}
+ if (Subtarget->hasMVEIntegerOps() &&
+ (VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1))
+ return PerformORCombine_i1(N, DCI, Subtarget);
+
// Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
// reasonable.
if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
VCNEZ, // Vector compare not equal to zero (MVE)
VCGE, // Vector compare greater than or equal.
VCGEZ, // Vector compare greater than or equal to zero.
+ VCLE, // Vector compare less than or equal.
VCLEZ, // Vector compare less than or equal to zero.
VCGEU, // Vector compare unsigned greater than or equal.
VCGT, // Vector compare greater than.
VCGTZ, // Vector compare greater than zero.
+ VCLT, // Vector compare less than.
VCLTZ, // Vector compare less than zero.
VCGTU, // Vector compare unsigned greater than.
VTST, // Vector test bits.
def ARMvcnez : SDNode<"ARMISD::VCNEZ", SDTARMVCMPZ>;
def ARMvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
def ARMvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def ARMvcle : SDNode<"ARMISD::VCLE", SDTARMVCMP>;
def ARMvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
def ARMvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
def ARMvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
def ARMvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def ARMvclt : SDNode<"ARMISD::VCLT", SDTARMVCMP>;
def ARMvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
def ARMvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
let Predicates = [HasMVEInt] in {
defm MVE_VCEQZ : unpred_vcmp_z<ARMvceqz, "i", 0>;
defm MVE_VCNEZ : unpred_vcmp_z<ARMvcnez, "i", 1>;
- defm MVE_VCLEZ : unpred_vcmp_z<ARMvclez, "s", 13>;
- defm MVE_VCGTZ : unpred_vcmp_z<ARMvcgtz, "s", 12>;
- defm MVE_VCLTZ : unpred_vcmp_z<ARMvcltz, "s", 11>;
defm MVE_VCGEZ : unpred_vcmp_z<ARMvcgez, "s", 10>;
+ defm MVE_VCLTZ : unpred_vcmp_z<ARMvcltz, "s", 11>;
+ defm MVE_VCGTZ : unpred_vcmp_z<ARMvcgtz, "s", 12>;
+ defm MVE_VCLEZ : unpred_vcmp_z<ARMvclez, "s", 13>;
defm MVE_VCEQ : unpred_vcmp_r<ARMvceq, "i", 0>;
defm MVE_VCNE : unpred_vcmp_r<ARMvcne, "i", 1>;
- defm MVE_VCGT : unpred_vcmp_r<ARMvcgt, "s", 12>;
defm MVE_VCGE : unpred_vcmp_r<ARMvcge, "s", 10>;
+ defm MVE_VCLT : unpred_vcmp_r<ARMvclt, "s", 11>;
+ defm MVE_VCGT : unpred_vcmp_r<ARMvcgt, "s", 12>;
+ defm MVE_VCLE : unpred_vcmp_r<ARMvcle, "s", 13>;
defm MVE_VCGTU : unpred_vcmp_r<ARMvcgtu, "u", 8>;
defm MVE_VCGEU : unpred_vcmp_r<ARMvcgeu, "u", 2>;
}
let Predicates = [HasMVEFloat] in {
defm MVE_VFCEQZ : unpred_vcmpf_z<ARMvceqz, 0>;
defm MVE_VFCNEZ : unpred_vcmpf_z<ARMvcnez, 1>;
- defm MVE_VFCLEZ : unpred_vcmpf_z<ARMvclez, 13>;
- defm MVE_VFCGTZ : unpred_vcmpf_z<ARMvcgtz, 12>;
- defm MVE_VFCLTZ : unpred_vcmpf_z<ARMvcltz, 11>;
defm MVE_VFCGEZ : unpred_vcmpf_z<ARMvcgez, 10>;
+ defm MVE_VFCLTZ : unpred_vcmpf_z<ARMvcltz, 11>;
+ defm MVE_VFCGTZ : unpred_vcmpf_z<ARMvcgtz, 12>;
+ defm MVE_VFCLEZ : unpred_vcmpf_z<ARMvclez, 13>;
- defm MVE_VFCGT : unpred_vcmpf_r<ARMvcgt, 12>;
defm MVE_VFCGE : unpred_vcmpf_r<ARMvcge, 10>;
+ defm MVE_VFCLT : unpred_vcmpf_r<ARMvclt, 11>;
+ defm MVE_VFCGT : unpred_vcmpf_r<ARMvcgt, 12>;
+ defm MVE_VFCLE : unpred_vcmpf_r<ARMvcle, 13>;
defm MVE_VFCEQ : unpred_vcmpf_r<ARMvceq, 0>;
defm MVE_VFCNE : unpred_vcmpf_r<ARMvcne, 1>;
}
define arm_aapcs_vfpcc <4 x i32> @cmpeqz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpeqz_v4i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.i32 eq, q1, zr
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.i32 ne, q1, zr
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i32> @cmpnez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpnez_v4i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.i32 ne, q1, zr
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.i32 eq, q1, zr
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i32> @cmpsltz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpsltz_v4i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.s32 lt, q1, zr
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.s32 ge, q1, zr
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i32> @cmpsgtz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpsgtz_v4i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.s32 gt, q1, zr
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.s32 le, q1, zr
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i32> @cmpslez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpslez_v4i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.s32 le, q1, zr
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.s32 gt, q1, zr
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i32> @cmpsgez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpsgez_v4i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.s32 ge, q1, zr
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.s32 lt, q1, zr
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i32> @cmpugtz_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpugtz_v4i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.i32 ne, q1, zr
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.i32 eq, q1, zr
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i32> @cmpeq_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: cmpeq_v4i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.i32 eq, q1, q2
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.i32 ne, q1, q2
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i32> @cmpne_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: cmpne_v4i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.i32 ne, q1, q2
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.i32 eq, q1, q2
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i32> @cmpslt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: cmpslt_v4i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.s32 gt, q2, q1
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.s32 le, q2, q1
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i32> @cmpsgt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: cmpsgt_v4i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.s32 gt, q1, q2
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.s32 le, q1, q2
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i32> @cmpsle_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: cmpsle_v4i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.s32 ge, q2, q1
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.s32 lt, q2, q1
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <4 x i32> @cmpsge_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; CHECK-LABEL: cmpsge_v4i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.s32 ge, q1, q2
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.s32 lt, q1, q2
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i32 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <8 x i16> @cmpeqz_v8i1(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: cmpeqz_v8i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.i16 eq, q1, zr
+; CHECK-NEXT: vcmp.i16 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.i16 ne, q1, zr
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i16 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <8 x i16> @cmpeq_v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
; CHECK-LABEL: cmpeq_v8i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.i16 eq, q1, q2
+; CHECK-NEXT: vcmp.i16 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.i16 ne, q1, q2
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i16 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <16 x i8> @cmpeqz_v16i1(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: cmpeqz_v16i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.i8 eq, q1, zr
+; CHECK-NEXT: vcmp.i8 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.i8 ne, q1, zr
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i8 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
define arm_aapcs_vfpcc <16 x i8> @cmpeq_v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK-LABEL: cmpeq_v16i1:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.i8 eq, q1, q2
+; CHECK-NEXT: vcmp.i8 ne, q0, zr
+; CHECK-NEXT: movw r1, #65535
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vcmpt.i8 ne, q1, q2
; CHECK-NEXT: vmrs r0, p0
-; CHECK-NEXT: vcmp.i8 eq, q0, zr
-; CHECK-NEXT: vmrs r1, p0
-; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_one_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, q1
+; CHECK-MVEFP-NEXT: vcmp.f32 le, q1, q0
+; CHECK-MVEFP-NEXT: movw r1, #65535
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q1
; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
+; CHECK-MVEFP-NEXT: eors r0, r1
; CHECK-MVEFP-NEXT: vmsr p0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ueq_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, q1
-; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
-; CHECK-MVEFP-NEXT: movw r1, #65535
-; CHECK-MVEFP-NEXT: eors r0, r1
-; CHECK-MVEFP-NEXT: vmsr p0, r0
+; CHECK-MVEFP-NEXT: vcmp.f32 le, q1, q0
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q1
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
;
; CHECK-MVEFP-LABEL: vcmp_ord_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, q1
+; CHECK-MVEFP-NEXT: vcmp.f32 le, q1, q0
+; CHECK-MVEFP-NEXT: movw r1, #65535
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q1
; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
+; CHECK-MVEFP-NEXT: eors r0, r1
; CHECK-MVEFP-NEXT: vmsr p0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_uno_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, q1
-; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
-; CHECK-MVEFP-NEXT: movw r1, #65535
-; CHECK-MVEFP-NEXT: eors r0, r1
-; CHECK-MVEFP-NEXT: vmsr p0, r0
+; CHECK-MVEFP-NEXT: vcmp.f32 le, q1, q0
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q1
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
;
; CHECK-MVEFP-LABEL: vcmp_one_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, q1
+; CHECK-MVEFP-NEXT: vcmp.f16 le, q1, q0
+; CHECK-MVEFP-NEXT: movw r1, #65535
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q1
; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
+; CHECK-MVEFP-NEXT: eors r0, r1
; CHECK-MVEFP-NEXT: vmsr p0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_ueq_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, q1
-; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
-; CHECK-MVEFP-NEXT: movw r1, #65535
-; CHECK-MVEFP-NEXT: eors r0, r1
-; CHECK-MVEFP-NEXT: vmsr p0, r0
+; CHECK-MVEFP-NEXT: vcmp.f16 le, q1, q0
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q1
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
;
; CHECK-MVEFP-LABEL: vcmp_ord_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, q1
+; CHECK-MVEFP-NEXT: vcmp.f16 le, q1, q0
+; CHECK-MVEFP-NEXT: movw r1, #65535
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q1
; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
+; CHECK-MVEFP-NEXT: eors r0, r1
; CHECK-MVEFP-NEXT: vmsr p0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_uno_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
-; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, q1
-; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
-; CHECK-MVEFP-NEXT: movw r1, #65535
-; CHECK-MVEFP-NEXT: eors r0, r1
-; CHECK-MVEFP-NEXT: vmsr p0, r0
+; CHECK-MVEFP-NEXT: vcmp.f16 le, q1, q0
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q1
; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
; CHECK-MVEFP-NEXT: bx lr
entry:
; CHECK-MVEFP-LABEL: vcmp_one_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, q3
+; CHECK-MVEFP-NEXT: movw r1, #65535
+; CHECK-MVEFP-NEXT: vcmp.f32 le, q3, q0
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q3
; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f32 gt, q3, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
+; CHECK-MVEFP-NEXT: eors r0, r1
; CHECK-MVEFP-NEXT: vmsr p0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
; CHECK-MVEFP-LABEL: vcmp_ueq_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, q3
-; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f32 gt, q3, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
-; CHECK-MVEFP-NEXT: movw r1, #65535
-; CHECK-MVEFP-NEXT: eors r0, r1
-; CHECK-MVEFP-NEXT: vmsr p0, r0
+; CHECK-MVEFP-NEXT: vcmp.f32 le, q3, q0
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q3
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
; CHECK-MVEFP-LABEL: vcmp_ord_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, q3
+; CHECK-MVEFP-NEXT: movw r1, #65535
+; CHECK-MVEFP-NEXT: vcmp.f32 le, q3, q0
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q3
; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f32 gt, q3, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
+; CHECK-MVEFP-NEXT: eors r0, r1
; CHECK-MVEFP-NEXT: vmsr p0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
; CHECK-MVEFP-LABEL: vcmp_uno_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, q3
-; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f32 gt, q3, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
-; CHECK-MVEFP-NEXT: movw r1, #65535
-; CHECK-MVEFP-NEXT: eors r0, r1
-; CHECK-MVEFP-NEXT: vmsr p0, r0
+; CHECK-MVEFP-NEXT: vcmp.f32 le, q3, q0
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q3
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
; CHECK-MVEFP-LABEL: vcmp_one_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, q3
+; CHECK-MVEFP-NEXT: movw r1, #65535
+; CHECK-MVEFP-NEXT: vcmp.f16 le, q3, q0
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q3
; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f16 gt, q3, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
+; CHECK-MVEFP-NEXT: eors r0, r1
; CHECK-MVEFP-NEXT: vmsr p0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
; CHECK-MVEFP-LABEL: vcmp_ueq_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, q3
-; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f16 gt, q3, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
-; CHECK-MVEFP-NEXT: movw r1, #65535
-; CHECK-MVEFP-NEXT: eors r0, r1
-; CHECK-MVEFP-NEXT: vmsr p0, r0
+; CHECK-MVEFP-NEXT: vcmp.f16 le, q3, q0
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q3
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
; CHECK-MVEFP-LABEL: vcmp_ord_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, q3
+; CHECK-MVEFP-NEXT: movw r1, #65535
+; CHECK-MVEFP-NEXT: vcmp.f16 le, q3, q0
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q3
; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f16 gt, q3, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
+; CHECK-MVEFP-NEXT: eors r0, r1
; CHECK-MVEFP-NEXT: vmsr p0, r0
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
; CHECK-MVEFP-LABEL: vcmp_uno_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0
-; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, q3
-; CHECK-MVEFP-NEXT: vmrs r0, p0
-; CHECK-MVEFP-NEXT: vcmp.f16 gt, q3, q0
-; CHECK-MVEFP-NEXT: vmrs r1, p0
-; CHECK-MVEFP-NEXT: orrs r0, r1
-; CHECK-MVEFP-NEXT: movw r1, #65535
-; CHECK-MVEFP-NEXT: eors r0, r1
-; CHECK-MVEFP-NEXT: vmsr p0, r0
+; CHECK-MVEFP-NEXT: vcmp.f16 le, q3, q0
+; CHECK-MVEFP-NEXT: vpst
+; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q3
; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
; CHECK-MVEFP-NEXT: bx lr
entry: