setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::ABS, VT, Legal);
+ setOperationAction(ISD::SETCC, VT, Custom);
// No native support for these.
setOperationAction(ISD::UDIV, VT, Expand);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
+
+ // Predicate types
+ const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
+ for (auto VT : pTypes) {
+ addRegisterClass(VT, &ARM::VCCRRegClass);
+ }
}
ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
+ case ARMISD::VCNE: return "ARMISD::VCNE";
+ case ARMISD::VCNEZ: return "ARMISD::VCNEZ";
case ARMISD::VCGE: return "ARMISD::VCGE";
case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
EVT VT) const {
if (!VT.isVector())
return getPointerTy(DL);
+
+ // MVE has a predicate register.
+ if (Subtarget->hasMVEIntegerOps() &&
+ (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8))
+ return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
}
return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
}
-static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
SDValue TmpOp0, TmpOp1;
bool Invert = false;
bool Swap = false;
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
- EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
EVT VT = Op.getValueType();
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
SDLoc dl(Op);
+ EVT CmpVT;
+ if (ST->hasNEON())
+ CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
+ else {
+ assert(ST->hasMVEIntegerOps() &&
+ "No hardware support for integer vector comparison!");
+
+ if (Op.getValueType().getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ CmpVT = VT;
+ }
+
if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
(SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
// Special-case integer 64-bit equality comparisons. They aren't legal,
// Integer comparisons.
switch (SetCCOpcode) {
default: llvm_unreachable("Illegal integer comparison");
- case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
+ case ISD::SETNE:
+ if (ST->hasMVEIntegerOps()) {
+ Opc = ARMISD::VCNE; break;
+ } else {
+ Invert = true; LLVM_FALLTHROUGH;
+ }
case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGT: Opc = ARMISD::VCGT; break;
}
// Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
- if (Opc == ARMISD::VCEQ) {
+ if (ST->hasNEON() && Opc == ARMISD::VCEQ) {
SDValue AndOp;
if (ISD::isBuildVectorAllZeros(Op1.getNode()))
AndOp = Op0;
SDValue Result;
if (SingleOp.getNode()) {
switch (Opc) {
+ case ARMISD::VCNE:
+ assert(ST->hasMVEIntegerOps() && "Unexpected DAG node");
+ Result = DAG.getNode(ARMISD::VCNEZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCEQ:
Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCGE:
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
- case ISD::SETCC: return LowerVSETCC(Op, DAG);
+ case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
if (!Subtarget->hasMVEIntegerOps())
return false;
+
+ // These are for predicates
+ if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1)) {
+ if (Fast)
+ *Fast = true;
+ return true;
+ }
+
if (Ty != MVT::v16i8 && Ty != MVT::v8i16 && Ty != MVT::v8f16 &&
Ty != MVT::v4i32 && Ty != MVT::v4f32 && Ty != MVT::v2i64 &&
Ty != MVT::v2f64 &&
VCEQ, // Vector compare equal.
VCEQZ, // Vector compare equal to zero.
+ VCNE, // Vector compare not equal (MVE)
+ VCNEZ, // Vector compare not equal to zero (MVE)
VCGE, // Vector compare greater than or equal.
VCGEZ, // Vector compare greater than or equal to zero.
VCLEZ, // Vector compare less than or equal to zero.
def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>;
+def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
+
+def ARMvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
+def ARMvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
+def ARMvcne : SDNode<"ARMISD::VCNE", SDTARMVCMP>;
+def ARMvcnez : SDNode<"ARMISD::VCNEZ", SDTARMVCMPZ>;
+def ARMvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
+def ARMvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def ARMvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
+def ARMvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
+def ARMvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
+def ARMvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def ARMvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
+def ARMvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
+
def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMLoLoop, [SDNPHasChain]>;
def ARMLE : SDNode<"ARMISD::LE", SDT_ARMLoLoop, [SDNPHasChain]>;
def ARMLoopDec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>;
+
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>;
def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>;
+multiclass unpred_vcmp_z<SDPatternOperator opnode, string suffix, int fc> {
+ def i8 : Pat<(v16i1 (opnode (v16i8 MQPR:$v1))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>;
+ def i16 : Pat<(v8i1 (opnode (v8i16 MQPR:$v1))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>;
+ def i32 : Pat<(v4i1 (opnode (v4i32 MQPR:$v1))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>;
+}
+
+multiclass unpred_vcmp_r<SDPatternOperator opnode, string suffix, int fc> {
+ def i8 : Pat<(v16i1 (opnode (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>;
+ def i16 : Pat<(v8i1 (opnode (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>;
+ def i32 : Pat<(v4i1 (opnode (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>;
+}
+
+let Predicates = [HasMVEInt] in {
+ defm MVE_VCEQZ : unpred_vcmp_z<ARMvceqz, "i", 0>;
+ defm MVE_VCNEZ : unpred_vcmp_z<ARMvcnez, "i", 1>;
+ defm MVE_VCLEZ : unpred_vcmp_z<ARMvclez, "s", 13>;
+ defm MVE_VCGTZ : unpred_vcmp_z<ARMvcgtz, "s", 12>;
+ defm MVE_VCLTZ : unpred_vcmp_z<ARMvcltz, "s", 11>;
+ defm MVE_VCGEZ : unpred_vcmp_z<ARMvcgez, "s", 10>;
+
+ defm MVE_VCEQ : unpred_vcmp_r<ARMvceq, "i", 0>;
+ defm MVE_VCNE : unpred_vcmp_r<ARMvcne, "i", 1>;
+ defm MVE_VCGT : unpred_vcmp_r<ARMvcgt, "s", 12>;
+ defm MVE_VCGE : unpred_vcmp_r<ARMvcge, "s", 10>;
+ defm MVE_VCGTU : unpred_vcmp_r<ARMvcgtu, "u", 8>;
+ defm MVE_VCGEU : unpred_vcmp_r<ARMvcgeu, "u", 2>;
+}
+
// end of MVE compares
// start of MVE_qDest_qSrc
def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm",
(MVE_VPSEL MQPR:$Qd, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
+let Predicates = [HasMVEInt] in {
+ def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
+ (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
+ (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+ def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
+ (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
+}
+
def MVE_VPNOT : MVE_p<(outs), (ins), NoItinerary,
"vpnot", "", "", vpred_n, "", []> {
let Inst{31-0} = 0b11111110001100010000111101001101;
// NEON-specific DAG Nodes.
//===----------------------------------------------------------------------===//
-def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
-def SDTARMVCMPZ : SDTypeProfile<1, 1, []>;
-
-def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
-def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
-def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
-def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
-def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
-def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
-def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
-def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
-def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
-def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
// Types for vector shift by immediates. The "SHX" version is for long and
// VCEQ : Vector Compare Equal
defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
+ IIC_VSUBi4Q, "vceq", "i", ARMvceq, 1>;
def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
- NEONvceq, 1>;
+ ARMvceq, 1>;
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
- NEONvceq, 1>;
+ ARMvceq, 1>;
def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
- NEONvceq, 1>,
+ ARMvceq, 1>,
Requires<[HasNEON, HasFullFP16]>;
def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
- NEONvceq, 1>,
+ ARMvceq, 1>,
Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
- "$Vd, $Vm, #0", NEONvceqz>;
+ "$Vd, $Vm, #0", ARMvceqz>;
// VCGE : Vector Compare Greater Than or Equal
defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
+ IIC_VSUBi4Q, "vcge", "s", ARMvcge, 0>;
defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
+ IIC_VSUBi4Q, "vcge", "u", ARMvcgeu, 0>;
def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
- NEONvcge, 0>;
+ ARMvcge, 0>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
- NEONvcge, 0>;
+ ARMvcge, 0>;
def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
- NEONvcge, 0>,
+ ARMvcge, 0>,
Requires<[HasNEON, HasFullFP16]>;
def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
- NEONvcge, 0>,
+ ARMvcge, 0>,
Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
- "$Vd, $Vm, #0", NEONvcgez>;
+ "$Vd, $Vm, #0", ARMvcgez>;
defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
- "$Vd, $Vm, #0", NEONvclez>;
+ "$Vd, $Vm, #0", ARMvclez>;
}
// VCGT : Vector Compare Greater Than
defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
+ IIC_VSUBi4Q, "vcgt", "s", ARMvcgt, 0>;
defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
- IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
+ IIC_VSUBi4Q, "vcgt", "u", ARMvcgtu, 0>;
def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
- NEONvcgt, 0>;
+ ARMvcgt, 0>;
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
- NEONvcgt, 0>;
+ ARMvcgt, 0>;
def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
- NEONvcgt, 0>,
+ ARMvcgt, 0>,
Requires<[HasNEON, HasFullFP16]>;
def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
- NEONvcgt, 0>,
+ ARMvcgt, 0>,
Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
- "$Vd, $Vm, #0", NEONvcgtz>;
+ "$Vd, $Vm, #0", ARMvcgtz>;
defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
- "$Vd, $Vm, #0", NEONvcltz>;
+ "$Vd, $Vm, #0", ARMvcltz>;
}
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_eq_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_eq_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i32 eq, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp eq <4 x i32> %src, %srcb
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_ne_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_ne_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i32 ne, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ne <4 x i32> %src, %srcb
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_sgt_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_sgt_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s32 gt, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sgt <4 x i32> %src, %srcb
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_sge_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_sge_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s32 ge, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sge <4 x i32> %src, %srcb
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_slt_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_slt_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s32 gt, q1, q0
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp slt <4 x i32> %src, %srcb
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_sle_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_sle_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s32 ge, q1, q0
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sle <4 x i32> %src, %srcb
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_ugt_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_ugt_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.u32 hi, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ugt <4 x i32> %src, %srcb
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_uge_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_uge_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.u32 cs, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp uge <4 x i32> %src, %srcb
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_ult_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_ult_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.u32 hi, q1, q0
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ult <4 x i32> %src, %srcb
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_ule_v4i32(<4 x i32> %src, <4 x i32> %srcb, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_ule_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.u32 cs, q1, q0
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ule <4 x i32> %src, %srcb
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_eq_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_eq_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i16 eq, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp eq <8 x i16> %src, %srcb
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_ne_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_ne_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i16 ne, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ne <8 x i16> %src, %srcb
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_sgt_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_sgt_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s16 gt, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sgt <8 x i16> %src, %srcb
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_sge_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_sge_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s16 ge, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sge <8 x i16> %src, %srcb
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_slt_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_slt_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s16 gt, q1, q0
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp slt <8 x i16> %src, %srcb
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_sle_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_sle_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s16 ge, q1, q0
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sle <8 x i16> %src, %srcb
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_ugt_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_ugt_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.u16 hi, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ugt <8 x i16> %src, %srcb
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_uge_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_uge_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.u16 cs, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp uge <8 x i16> %src, %srcb
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_ult_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_ult_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.u16 hi, q1, q0
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ult <8 x i16> %src, %srcb
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_ule_v8i16(<8 x i16> %src, <8 x i16> %srcb, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_ule_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.u16 cs, q1, q0
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ule <8 x i16> %src, %srcb
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_eq_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_eq_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i8 eq, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp eq <16 x i8> %src, %srcb
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_ne_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_ne_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i8 ne, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ne <16 x i8> %src, %srcb
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_sgt_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_sgt_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s8 gt, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sgt <16 x i8> %src, %srcb
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_sge_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_sge_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s8 ge, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sge <16 x i8> %src, %srcb
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_slt_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_slt_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s8 gt, q1, q0
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp slt <16 x i8> %src, %srcb
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_sle_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_sle_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s8 ge, q1, q0
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sle <16 x i8> %src, %srcb
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_ugt_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_ugt_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.u8 hi, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ugt <16 x i8> %src, %srcb
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_uge_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_uge_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.u8 cs, q0, q1
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp uge <16 x i8> %src, %srcb
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_ult_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_ult_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.u8 hi, q1, q0
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ult <16 x i8> %src, %srcb
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_ule_v16i8(<16 x i8> %src, <16 x i8> %srcb, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_ule_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.u8 cs, q1, q0
+; CHECK-NEXT: vpsel q0, q2, q3
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ule <16 x i8> %src, %srcb
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+
+define arm_aapcs_vfpcc <2 x i64> @vcmp_eq_v2i64(<2 x i64> %src, <2 x i64> %srcb, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: vcmp_eq_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov r0, s5
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: eors r0, r1
+; CHECK-NEXT: vmov r1, s4
+; CHECK-NEXT: eors r1, r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q4[0], r0
+; CHECK-NEXT: vmov.32 q4[1], r0
+; CHECK-NEXT: vmov r0, s7
+; CHECK-NEXT: eors r0, r1
+; CHECK-NEXT: vmov r1, s6
+; CHECK-NEXT: eors r1, r2
+; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q4[2], r0
+; CHECK-NEXT: vmov.32 q4[3], r0
+; CHECK-NEXT: vbic q0, q3, q4
+; CHECK-NEXT: vand q1, q2, q4
+; CHECK-NEXT: vorr q0, q1, q0
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp eq <2 x i64> %src, %srcb
+ %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %s
+}
+
+define arm_aapcs_vfpcc <2 x i32> @vcmp_eq_v2i32(<2 x i64> %src, <2 x i64> %srcb, <2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: vcmp_eq_v2i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .vsave {d8, d9}
+; CHECK-NEXT: vpush {d8, d9}
+; CHECK-NEXT: vmov r0, s5
+; CHECK-NEXT: vmov r1, s1
+; CHECK-NEXT: vmov r2, s0
+; CHECK-NEXT: eors r0, r1
+; CHECK-NEXT: vmov r1, s4
+; CHECK-NEXT: eors r1, r2
+; CHECK-NEXT: vmov r2, s2
+; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: vmov r1, s3
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q4[0], r0
+; CHECK-NEXT: vmov.32 q4[1], r0
+; CHECK-NEXT: vmov r0, s7
+; CHECK-NEXT: eors r0, r1
+; CHECK-NEXT: vmov r1, s6
+; CHECK-NEXT: eors r1, r2
+; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q4[2], r0
+; CHECK-NEXT: vmov.32 q4[3], r0
+; CHECK-NEXT: vbic q0, q3, q4
+; CHECK-NEXT: vand q1, q2, q4
+; CHECK-NEXT: vorr q0, q1, q0
+; CHECK-NEXT: vpop {d8, d9}
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp eq <2 x i64> %src, %srcb
+ %s = select <2 x i1> %c, <2 x i32> %a, <2 x i32> %b
+ ret <2 x i32> %s
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_eqz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_eqz_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i32 eq, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp eq <4 x i32> %src, zeroinitializer
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_nez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_nez_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ne <4 x i32> %src, zeroinitializer
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_sgtz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_sgtz_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s32 gt, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sgt <4 x i32> %src, zeroinitializer
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_sgez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_sgez_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s32 ge, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sge <4 x i32> %src, zeroinitializer
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_sltz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_sltz_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s32 lt, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp slt <4 x i32> %src, zeroinitializer
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_slez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_slez_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s32 le, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sle <4 x i32> %src, zeroinitializer
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_ugtz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_ugtz_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i32 ne, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ugt <4 x i32> %src, zeroinitializer
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_ugez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_ugez_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp uge <4 x i32> %src, zeroinitializer
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_ultz_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_ultz_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ult <4 x i32> %src, zeroinitializer
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vcmp_ulez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: vcmp_ulez_v4i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q3, #0x0
+; CHECK-NEXT: vcmp.u32 cs, q3, q0
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ule <4 x i32> %src, zeroinitializer
+ %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %b
+ ret <4 x i32> %s
+}
+
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_eqz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_eqz_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i16 eq, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp eq <8 x i16> %src, zeroinitializer
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_nez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_nez_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i16 ne, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ne <8 x i16> %src, zeroinitializer
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_sgtz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_sgtz_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s16 gt, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sgt <8 x i16> %src, zeroinitializer
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_sgez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_sgez_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s16 ge, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sge <8 x i16> %src, zeroinitializer
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_sltz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_sltz_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s16 lt, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp slt <8 x i16> %src, zeroinitializer
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_slez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_slez_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s16 le, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sle <8 x i16> %src, zeroinitializer
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_ugtz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_ugtz_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i16 ne, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ugt <8 x i16> %src, zeroinitializer
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_ugez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_ugez_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp uge <8 x i16> %src, zeroinitializer
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_ultz_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_ultz_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ult <8 x i16> %src, zeroinitializer
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vcmp_ulez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: vcmp_ulez_v8i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q3, #0x0
+; CHECK-NEXT: vcmp.u16 cs, q3, q0
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ule <8 x i16> %src, zeroinitializer
+ %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
+ ret <8 x i16> %s
+}
+
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_eqz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_eqz_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i8 eq, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp eq <16 x i8> %src, zeroinitializer
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_nez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_nez_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i8 ne, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ne <16 x i8> %src, zeroinitializer
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_sgtz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_sgtz_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s8 gt, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sgt <16 x i8> %src, zeroinitializer
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_sgez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_sgez_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s8 ge, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sge <16 x i8> %src, zeroinitializer
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_sltz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_sltz_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s8 lt, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp slt <16 x i8> %src, zeroinitializer
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_slez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_slez_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.s8 le, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp sle <16 x i8> %src, zeroinitializer
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_ugtz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_ugtz_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmp.i8 ne, q0, zr
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ugt <16 x i8> %src, zeroinitializer
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_ugez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_ugez_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp uge <16 x i8> %src, zeroinitializer
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_ultz_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_ultz_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov q0, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ult <16 x i8> %src, zeroinitializer
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vcmp_ulez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: vcmp_ulez_v16i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov.i32 q3, #0x0
+; CHECK-NEXT: vcmp.u8 cs, q3, q0
+; CHECK-NEXT: vpsel q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp ule <16 x i8> %src, zeroinitializer
+ %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %b
+ ret <16 x i8> %s
+}
+
+
+define arm_aapcs_vfpcc <2 x i64> @vcmp_eqz_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: vcmp_eqz_v2i64:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s1
+; CHECK-NEXT: vmov r1, s0
+; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: vmov r1, s2
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q3[0], r0
+; CHECK-NEXT: vmov.32 q3[1], r0
+; CHECK-NEXT: vmov r0, s3
+; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q3[2], r0
+; CHECK-NEXT: vmov.32 q3[3], r0
+; CHECK-NEXT: vbic q0, q2, q3
+; CHECK-NEXT: vand q1, q1, q3
+; CHECK-NEXT: vorr q0, q1, q0
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp eq <2 x i64> %src, zeroinitializer
+ %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> %b
+ ret <2 x i64> %s
+}
+
+define arm_aapcs_vfpcc <2 x i32> @vcmp_eqz_v2i32(<2 x i64> %src, <2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: vcmp_eqz_v2i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov r0, s1
+; CHECK-NEXT: vmov r1, s0
+; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: vmov r1, s2
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q3[0], r0
+; CHECK-NEXT: vmov.32 q3[1], r0
+; CHECK-NEXT: vmov r0, s3
+; CHECK-NEXT: orrs r0, r1
+; CHECK-NEXT: clz r0, r0
+; CHECK-NEXT: lsrs r0, r0, #5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r0, #-1
+; CHECK-NEXT: vmov.32 q3[2], r0
+; CHECK-NEXT: vmov.32 q3[3], r0
+; CHECK-NEXT: vbic q0, q2, q3
+; CHECK-NEXT: vand q1, q1, q3
+; CHECK-NEXT: vorr q0, q1, q0
+; CHECK-NEXT: bx lr
+entry:
+ %c = icmp eq <2 x i64> %src, zeroinitializer
+ %s = select <2 x i1> %c, <2 x i32> %a, <2 x i32> %b
+ ret <2 x i32> %s
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @vpsel_i8(<16 x i1> *%mask, <16 x i8> %src1, <16 x i8> %src2) {
+; CHECK-LABEL: vpsel_i8:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vldr p0, [r0]
+; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = load <16 x i1>, <16 x i1>* %mask, align 4
+ %1 = select <16 x i1> %0, <16 x i8> %src1, <16 x i8> %src2
+ ret <16 x i8> %1
+}
+
+define arm_aapcs_vfpcc <8 x i16> @vpsel_i16(<8 x i1> *%mask, <8 x i16> %src1, <8 x i16> %src2) {
+; CHECK-LABEL: vpsel_i16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vldr p0, [r0]
+; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = load <8 x i1>, <8 x i1>* %mask, align 4
+ %1 = select <8 x i1> %0, <8 x i16> %src1, <8 x i16> %src2
+ ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @vpsel_i32(<4 x i1> *%mask, <4 x i32> %src1, <4 x i32> %src2) {
+; CHECK-LABEL: vpsel_i32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vldr p0, [r0]
+; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %0 = load <4 x i1>, <4 x i1>* %mask, align 4
+ %1 = select <4 x i1> %0, <4 x i32> %src1, <4 x i32> %src2
+ ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @foo(<4 x i32> %vec.ind) {
+; CHECK-LABEL: foo:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov.i32 q2, #0x1
+; CHECK-NEXT: vmov.i32 q1, #0x0
+; CHECK-NEXT: vand q2, q0, q2
+; CHECK-NEXT: vcmp.i32 eq, q2, zr
+; CHECK-NEXT: vpsel q0, q0, q1
+; CHECK-NEXT: bx lr
+ %tmp = and <4 x i32> %vec.ind, <i32 1, i32 1, i32 1, i32 1>
+ %tmp1 = icmp eq <4 x i32> %tmp, zeroinitializer
+ %tmp2 = select <4 x i1> %tmp1, <4 x i32> %vec.ind, <4 x i32> zeroinitializer
+ ret <4 x i32> %tmp2
+}