setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);
+ if (Subtarget->hasMVEIntegerOps())
+ setTargetDAGCombine(ISD::VSELECT);
+
if (Subtarget->hasV6Ops())
setTargetDAGCombine(ISD::SRL);
if (Subtarget->isThumb1Only())
return SDValue();
}
+static SDValue PerformVSELECTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
+ //
+ // We need to re-implement this optimization here as the implementation in the
+ // Target-Independent DAGCombiner does not handle the kind of constant we make
+ // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
+ // good reason, allowing truncation there would break other targets).
+ //
+ // Currently, this is only done for MVE, as it's the only target that benefits
+ // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
+ if (!Subtarget->hasMVEIntegerOps())
+ return SDValue();
+
+ if (N->getOperand(0).getOpcode() != ISD::XOR)
+ return SDValue();
+ SDValue XOR = N->getOperand(0);
+
+ // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
+ // It is important to check with truncation allowed as the BUILD_VECTORs we
+ // generate in those situations will truncate their operands.
+ ConstantSDNode *Const =
+ isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
+ /*AllowTruncation*/ true);
+ if (!Const || !Const->isOne())
+ return SDValue();
+
+ // Rewrite into vselect(cond, rhs, lhs).
+ SDValue Cond = XOR->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ EVT Type = N->getValueType(0);
+ return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
+}
+
static SDValue PerformABSCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
+ case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 ne, q1, zr
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 eq, q1, zr
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 ge, q1, zr
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 le, q1, zr
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 gt, q1, zr
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 lt, q1, zr
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 eq, q1, zr
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 ne, q1, q2
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.i32 eq, q1, q2
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 le, q2, q1
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 le, q1, q2
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 lt, q2, q1
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i32 ne, q0, zr
; CHECK-NEXT: vcmpt.s32 lt, q1, q2
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <4 x i32> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i16 ne, q0, zr
; CHECK-NEXT: vcmpt.i16 ne, q1, zr
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <8 x i16> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i16 ne, q0, zr
; CHECK-NEXT: vcmpt.i16 ne, q1, q2
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <8 x i16> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i8 ne, q0, zr
; CHECK-NEXT: vcmpt.i8 ne, q1, zr
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <16 x i8> %a, zeroinitializer
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vpt.i8 ne, q0, zr
; CHECK-NEXT: vcmpt.i8 ne, q1, q2
-; CHECK-NEXT: vpnot
-; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c1 = icmp eq <16 x i8> %a, zeroinitializer
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q1
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp one <4 x float> %src, %src2
; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 ge, q1, q0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ugt <4 x float> %src, %src2
; CHECK-MVEFP-LABEL: vcmp_uge_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp uge <4 x float> %src, %src2
; CHECK-MVEFP-LABEL: vcmp_ult_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, q1
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ult <4 x float> %src, %src2
; CHECK-MVEFP-LABEL: vcmp_ule_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, q1
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ule <4 x float> %src, %src2
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q1
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ord <4 x float> %src, %src2
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f16 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q1
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp one <8 x half> %src, %src2
; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 ge, q1, q0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ugt <8 x half> %src, %src2
; CHECK-MVEFP-LABEL: vcmp_uge_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp uge <8 x half> %src, %src2
; CHECK-MVEFP-LABEL: vcmp_ult_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, q1
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ult <8 x half> %src, %src2
; CHECK-MVEFP-LABEL: vcmp_ule_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, q1
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ule <8 x half> %src, %src2
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f16 le, q1, q0
; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q1
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ord <8 x half> %src, %src2
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, r0
; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, r0
; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, r0
; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, r0
; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vpt.f32 le, q0, r0
; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
; CHECK-MVEFP-NEXT: vmov r0, s4
; CHECK-MVEFP-NEXT: vpt.f32 le, q0, r0
; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q2, q3
+; CHECK-MVEFP-NEXT: vpsel q0, q3, q2
; CHECK-MVEFP-NEXT: bx lr
entry:
%i = insertelement <4 x float> undef, float %src2, i32 0
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
; CHECK-MVEFP-NEXT: vpt.f16 le, q0, r0
; CHECK-MVEFP-NEXT: vcmpt.f16 ge, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
; CHECK-MVEFP-NEXT: ldrh r0, [r0]
; CHECK-MVEFP-NEXT: vpt.f16 le, q0, r0
; CHECK-MVEFP-NEXT: vcmpt.f16 gt, q0, r0
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%src2 = load half, half* %src2p
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr
; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp one <4 x float> %src, zeroinitializer
; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ugt <4 x float> %src, zeroinitializer
; CHECK-MVEFP-LABEL: vcmp_uge_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp uge <4 x float> %src, zeroinitializer
; CHECK-MVEFP-LABEL: vcmp_ult_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ult <4 x float> %src, zeroinitializer
; CHECK-MVEFP-LABEL: vcmp_ule_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ule <4 x float> %src, zeroinitializer
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr
; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ord <4 x float> %src, zeroinitializer
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr
; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp one <8 x half> %src, zeroinitializer
; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ugt <8 x half> %src, zeroinitializer
; CHECK-MVEFP-LABEL: vcmp_uge_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp uge <8 x half> %src, zeroinitializer
; CHECK-MVEFP-LABEL: vcmp_ult_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ult <8 x half> %src, zeroinitializer
; CHECK-MVEFP-LABEL: vcmp_ule_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ule <8 x half> %src, zeroinitializer
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr
; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ord <8 x half> %src, zeroinitializer
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr
; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp one <4 x float> zeroinitializer, %src
; CHECK-MVEFP-LABEL: vcmp_r_ugt_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ugt <4 x float> zeroinitializer, %src
; CHECK-MVEFP-LABEL: vcmp_r_uge_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp uge <4 x float> zeroinitializer, %src
; CHECK-MVEFP-LABEL: vcmp_r_ult_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ult <4 x float> zeroinitializer, %src
; CHECK-MVEFP-LABEL: vcmp_r_ule_v4f32:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ule <4 x float> zeroinitializer, %src
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr
; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ord <4 x float> zeroinitializer, %src
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr
; CHECK-MVEFP-NEXT: vcmpt.f16 ge, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp one <8 x half> zeroinitializer, %src
; CHECK-MVEFP-LABEL: vcmp_r_ugt_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ugt <8 x half> zeroinitializer, %src
; CHECK-MVEFP-LABEL: vcmp_r_uge_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp uge <8 x half> zeroinitializer, %src
; CHECK-MVEFP-LABEL: vcmp_r_ult_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ult <8 x half> zeroinitializer, %src
; CHECK-MVEFP-LABEL: vcmp_r_ule_v8f16:
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ule <8 x half> zeroinitializer, %src
; CHECK-MVEFP: @ %bb.0: @ %entry
; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr
; CHECK-MVEFP-NEXT: vcmpt.f16 gt, q0, zr
-; CHECK-MVEFP-NEXT: vpnot
-; CHECK-MVEFP-NEXT: vpsel q0, q1, q2
+; CHECK-MVEFP-NEXT: vpsel q0, q2, q1
; CHECK-MVEFP-NEXT: bx lr
entry:
%c = fcmp ord <8 x half> zeroinitializer, %src