setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+
// Expand all extending loads to types larger than this, and truncating
// stores from types larger than this.
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setOperationAction(ISD::BITCAST, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+
// Operations below are different for between masks and other vectors.
if (VT.getVectorElementType() == MVT::i1) {
setOperationAction(ISD::AND, VT, Custom);
// Custom-lower reduction operations to set up the corresponding custom
// nodes' operands.
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
- setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_SMIN:
+ return lowerVECREDUCE(Op, DAG);
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
+ if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
+ return lowerVectorMaskVECREDUCE(Op, DAG);
return lowerVECREDUCE(Op, DAG);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_SEQ_FADD:
}
}
+SDValue RISCVTargetLowering::lowerVectorMaskVECREDUCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Vec = Op.getOperand(0);
+ MVT VecVT = Vec.getSimpleValueType();
+ assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
+ Op.getOpcode() == ISD::VECREDUCE_OR ||
+ Op.getOpcode() == ISD::VECREDUCE_XOR) &&
+ "Unexpected reduction lowering");
+
+ MVT XLenVT = Subtarget.getXLenVT();
+ assert(Op.getValueType() == XLenVT &&
+ "Expected reduction output to be legalized to XLenVT");
+
+ MVT ContainerVT = VecVT;
+ if (VecVT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VecVT);
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ }
+
+ SDValue Mask, VL;
+ std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
+ SDValue Zero = DAG.getConstant(0, DL, XLenVT);
+
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unhandled reduction");
+ case ISD::VECREDUCE_AND:
+ // vpopc ~x == 0
+ Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, Mask, VL);
+ Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
+ return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETEQ);
+ case ISD::VECREDUCE_OR:
+ // vpopc x != 0
+ Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
+ return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
+ case ISD::VECREDUCE_XOR: {
+ // ((vpopc x) & 1) != 0
+ SDValue One = DAG.getConstant(1, DL, XLenVT);
+ Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
+ Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
+ return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
+ }
+ }
+}
+
SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
NODE_NAME_CASE(VRGATHEREI16_VV_VL)
NODE_NAME_CASE(VSEXT_VL)
NODE_NAME_CASE(VZEXT_VL)
+ NODE_NAME_CASE(VPOPC_VL)
NODE_NAME_CASE(VLE_VL)
NODE_NAME_CASE(VSE_VL)
NODE_NAME_CASE(READ_CSR)
// Vector sign/zero extend with additional mask & VL operands.
VSEXT_VL,
VZEXT_VL,
+ // vpopc.m with additional mask and VL operands.
+ VPOPC_VL,
// Reads value of CSR.
// The first operand is a chain pointer. The second specifies address of the
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVectorMaskVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
def riscv_vmnot_vl : PatFrag<(ops node:$rs, node:$vl),
(riscv_vmxor_vl node:$rs, true_mask, node:$vl)>;
+def riscv_vpopc_vl : SDNode<"RISCVISD::VPOPC_VL",
+ SDTypeProfile<1, 3, [SDTCisVT<0, XLenVT>,
+ SDTCisVec<1>, SDTCisInt<1>,
+ SDTCVecEltisVT<2, i1>,
+ SDTCisSameNumEltsAs<1, 2>,
+ SDTCisVT<3, XLenVT>]>>;
+
def SDT_RISCVVEXTEND_VL : SDTypeProfile<1, 3, [SDTCisVec<0>,
SDTCisSameNumEltsAs<0, 1>,
SDTCisSameNumEltsAs<1, 2>,
} // Predicates = [HasStdExtV, HasStdExtF]
-// 16.1 Vector Mask-Register Logical Instructions
let Predicates = [HasStdExtV] in {
foreach mti = AllMasks in {
+ // 16.1 Vector Mask-Register Logical Instructions
def : Pat<(mti.Mask (riscv_vmset_vl (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSET_M_" # mti.BX) GPR:$vl, mti.SEW)>;
def : Pat<(mti.Mask (riscv_vmclr_vl (XLenVT (VLOp GPR:$vl)))),
def : Pat<(mti.Mask (riscv_vmnot_vl VR:$rs, (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMNAND_MM_" # mti.LMul.MX)
VR:$rs, VR:$rs, GPR:$vl, mti.SEW)>;
+
+ // 16.2 Vector Mask Population Count vpopc
+ def : Pat<(XLenVT (riscv_vpopc_vl (mti.Mask VR:$rs2), (mti.Mask true_mask),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVPOPC_M_" # mti.BX)
+ VR:$rs2, GPR:$vl, mti.SEW)>;
}
} // Predicates = [HasStdExtV]
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+
+declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>)
+
+define signext i1 @vreduce_or_v1i1(<1 x i1> %v) {
+; CHECK-LABEL: vreduce_or_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vmv.x.s a0, v25
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.v1i1(<1 x i1>)
+
+define signext i1 @vreduce_xor_v1i1(<1 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vmv.x.s a0, v25
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1>)
+
+define signext i1 @vreduce_and_v1i1(<1 x i1> %v) {
+; CHECK-LABEL: vreduce_and_v1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vmv.x.s a0, v25
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>)
+
+define signext i1 @vreduce_or_v2i1(<2 x i1> %v) {
+; CHECK-LABEL: vreduce_or_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1>)
+
+define signext i1 @vreduce_xor_v2i1(<2 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>)
+
+define signext i1 @vreduce_and_v2i1(<2 x i1> %v) {
+; CHECK-LABEL: vreduce_and_v2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; CHECK-NEXT: vmnand.mm v25, v0, v0
+; CHECK-NEXT: vpopc.m a0, v25
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>)
+
+define signext i1 @vreduce_or_v4i1(<4 x i1> %v) {
+; CHECK-LABEL: vreduce_or_v4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1>)
+
+define signext i1 @vreduce_xor_v4i1(<4 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_v4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
+
+define signext i1 @vreduce_and_v4i1(<4 x i1> %v) {
+; CHECK-LABEL: vreduce_and_v4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vmnand.mm v25, v0, v0
+; CHECK-NEXT: vpopc.m a0, v25
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1>)
+
+define signext i1 @vreduce_or_v8i1(<8 x i1> %v) {
+; CHECK-LABEL: vreduce_or_v8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1>)
+
+define signext i1 @vreduce_xor_v8i1(<8 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_v8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)
+
+define signext i1 @vreduce_and_v8i1(<8 x i1> %v) {
+; CHECK-LABEL: vreduce_and_v8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmnand.mm v25, v0, v0
+; CHECK-NEXT: vpopc.m a0, v25
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1>)
+
+define signext i1 @vreduce_or_v16i1(<16 x i1> %v) {
+; CHECK-LABEL: vreduce_or_v16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1>)
+
+define signext i1 @vreduce_xor_v16i1(<16 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_v16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1>)
+
+define signext i1 @vreduce_and_v16i1(<16 x i1> %v) {
+; CHECK-LABEL: vreduce_and_v16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 16, e8,m1,ta,mu
+; CHECK-NEXT: vmnand.mm v25, v0, v0
+; CHECK-NEXT: vpopc.m a0, v25
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1>)
+
+define signext i1 @vreduce_or_v32i1(<32 x i1> %v) {
+; LMULMAX1-LABEL: vreduce_or_v32i1:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a0, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmor.mm v25, v0, v8
+; LMULMAX1-NEXT: vpopc.m a0, v25
+; LMULMAX1-NEXT: snez a0, a0
+; LMULMAX1-NEXT: neg a0, a0
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX8-LABEL: vreduce_or_v32i1:
+; LMULMAX8: # %bb.0:
+; LMULMAX8-NEXT: addi a0, zero, 32
+; LMULMAX8-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; LMULMAX8-NEXT: vpopc.m a0, v0
+; LMULMAX8-NEXT: snez a0, a0
+; LMULMAX8-NEXT: neg a0, a0
+; LMULMAX8-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1>)
+
+define signext i1 @vreduce_xor_v32i1(<32 x i1> %v) {
+; LMULMAX1-LABEL: vreduce_xor_v32i1:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a0, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmxor.mm v25, v0, v8
+; LMULMAX1-NEXT: vpopc.m a0, v25
+; LMULMAX1-NEXT: andi a0, a0, 1
+; LMULMAX1-NEXT: neg a0, a0
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX8-LABEL: vreduce_xor_v32i1:
+; LMULMAX8: # %bb.0:
+; LMULMAX8-NEXT: addi a0, zero, 32
+; LMULMAX8-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; LMULMAX8-NEXT: vpopc.m a0, v0
+; LMULMAX8-NEXT: andi a0, a0, 1
+; LMULMAX8-NEXT: neg a0, a0
+; LMULMAX8-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1>)
+
+define signext i1 @vreduce_and_v32i1(<32 x i1> %v) {
+; LMULMAX1-LABEL: vreduce_and_v32i1:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a0, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmnand.mm v25, v0, v8
+; LMULMAX1-NEXT: vpopc.m a0, v25
+; LMULMAX1-NEXT: seqz a0, a0
+; LMULMAX1-NEXT: neg a0, a0
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX8-LABEL: vreduce_and_v32i1:
+; LMULMAX8: # %bb.0:
+; LMULMAX8-NEXT: addi a0, zero, 32
+; LMULMAX8-NEXT: vsetvli a0, a0, e8,m2,ta,mu
+; LMULMAX8-NEXT: vmnand.mm v25, v0, v0
+; LMULMAX8-NEXT: vpopc.m a0, v25
+; LMULMAX8-NEXT: seqz a0, a0
+; LMULMAX8-NEXT: neg a0, a0
+; LMULMAX8-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.v64i1(<64 x i1>)
+
+define signext i1 @vreduce_or_v64i1(<64 x i1> %v) {
+; LMULMAX1-LABEL: vreduce_or_v64i1:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a0, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmor.mm v25, v8, v10
+; LMULMAX1-NEXT: vmor.mm v26, v0, v9
+; LMULMAX1-NEXT: vmor.mm v25, v26, v25
+; LMULMAX1-NEXT: vpopc.m a0, v25
+; LMULMAX1-NEXT: snez a0, a0
+; LMULMAX1-NEXT: neg a0, a0
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX8-LABEL: vreduce_or_v64i1:
+; LMULMAX8: # %bb.0:
+; LMULMAX8-NEXT: addi a0, zero, 64
+; LMULMAX8-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; LMULMAX8-NEXT: vpopc.m a0, v0
+; LMULMAX8-NEXT: snez a0, a0
+; LMULMAX8-NEXT: neg a0, a0
+; LMULMAX8-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.v64i1(<64 x i1>)
+
+define signext i1 @vreduce_xor_v64i1(<64 x i1> %v) {
+; LMULMAX1-LABEL: vreduce_xor_v64i1:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a0, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmxor.mm v25, v8, v10
+; LMULMAX1-NEXT: vmxor.mm v26, v0, v9
+; LMULMAX1-NEXT: vmxor.mm v25, v26, v25
+; LMULMAX1-NEXT: vpopc.m a0, v25
+; LMULMAX1-NEXT: andi a0, a0, 1
+; LMULMAX1-NEXT: neg a0, a0
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX8-LABEL: vreduce_xor_v64i1:
+; LMULMAX8: # %bb.0:
+; LMULMAX8-NEXT: addi a0, zero, 64
+; LMULMAX8-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; LMULMAX8-NEXT: vpopc.m a0, v0
+; LMULMAX8-NEXT: andi a0, a0, 1
+; LMULMAX8-NEXT: neg a0, a0
+; LMULMAX8-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.v64i1(<64 x i1>)
+
+define signext i1 @vreduce_and_v64i1(<64 x i1> %v) {
+; LMULMAX1-LABEL: vreduce_and_v64i1:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a0, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmand.mm v25, v8, v10
+; LMULMAX1-NEXT: vmand.mm v26, v0, v9
+; LMULMAX1-NEXT: vmnand.mm v25, v26, v25
+; LMULMAX1-NEXT: vpopc.m a0, v25
+; LMULMAX1-NEXT: seqz a0, a0
+; LMULMAX1-NEXT: neg a0, a0
+; LMULMAX1-NEXT: ret
+;
+; LMULMAX8-LABEL: vreduce_and_v64i1:
+; LMULMAX8: # %bb.0:
+; LMULMAX8-NEXT: addi a0, zero, 64
+; LMULMAX8-NEXT: vsetvli a0, a0, e8,m4,ta,mu
+; LMULMAX8-NEXT: vmnand.mm v25, v0, v0
+; LMULMAX8-NEXT: vpopc.m a0, v25
+; LMULMAX8-NEXT: seqz a0, a0
+; LMULMAX8-NEXT: neg a0, a0
+; LMULMAX8-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %v)
+ ret i1 %red
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s
+
+declare i1 @llvm.vector.reduce.or.nxv1i1(<vscale x 1 x i1>)
+
+define signext i1 @vreduce_or_nxv1i1(<vscale x 1 x i1> %v) {
+; CHECK-LABEL: vreduce_or_nxv1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.nxv1i1(<vscale x 1 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.nxv1i1(<vscale x 1 x i1>)
+
+define signext i1 @vreduce_xor_nxv1i1(<vscale x 1 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_nxv1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.nxv1i1(<vscale x 1 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1>)
+
+define signext i1 @vreduce_and_nxv1i1(<vscale x 1 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT: vmnand.mm v25, v0, v0
+; CHECK-NEXT: vpopc.m a0, v25
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.nxv2i1(<vscale x 2 x i1>)
+
+define signext i1 @vreduce_or_nxv2i1(<vscale x 2 x i1> %v) {
+; CHECK-LABEL: vreduce_or_nxv2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.nxv2i1(<vscale x 2 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.nxv2i1(<vscale x 2 x i1>)
+
+define signext i1 @vreduce_xor_nxv2i1(<vscale x 2 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_nxv2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.nxv2i1(<vscale x 2 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1>)
+
+define signext i1 @vreduce_and_nxv2i1(<vscale x 2 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vmnand.mm v25, v0, v0
+; CHECK-NEXT: vpopc.m a0, v25
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1>)
+
+define signext i1 @vreduce_or_nxv4i1(<vscale x 4 x i1> %v) {
+; CHECK-LABEL: vreduce_or_nxv4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.nxv4i1(<vscale x 4 x i1>)
+
+define signext i1 @vreduce_xor_nxv4i1(<vscale x 4 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_nxv4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.nxv4i1(<vscale x 4 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1>)
+
+define signext i1 @vreduce_and_nxv4i1(<vscale x 4 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT: vmnand.mm v25, v0, v0
+; CHECK-NEXT: vpopc.m a0, v25
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.nxv8i1(<vscale x 8 x i1>)
+
+define signext i1 @vreduce_or_nxv8i1(<vscale x 8 x i1> %v) {
+; CHECK-LABEL: vreduce_or_nxv8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.nxv8i1(<vscale x 8 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.nxv8i1(<vscale x 8 x i1>)
+
+define signext i1 @vreduce_xor_nxv8i1(<vscale x 8 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_nxv8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.nxv8i1(<vscale x 8 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1>)
+
+define signext i1 @vreduce_and_nxv8i1(<vscale x 8 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu
+; CHECK-NEXT: vmnand.mm v25, v0, v0
+; CHECK-NEXT: vpopc.m a0, v25
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1>)
+
+define signext i1 @vreduce_or_nxv16i1(<vscale x 16 x i1> %v) {
+; CHECK-LABEL: vreduce_or_nxv16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.nxv16i1(<vscale x 16 x i1>)
+
+define signext i1 @vreduce_xor_nxv16i1(<vscale x 16 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_nxv16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.nxv16i1(<vscale x 16 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1>)
+
+define signext i1 @vreduce_and_nxv16i1(<vscale x 16 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m2,ta,mu
+; CHECK-NEXT: vmnand.mm v25, v0, v0
+; CHECK-NEXT: vpopc.m a0, v25
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.nxv32i1(<vscale x 32 x i1>)
+
+define signext i1 @vreduce_or_nxv32i1(<vscale x 32 x i1> %v) {
+; CHECK-LABEL: vreduce_or_nxv32i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.nxv32i1(<vscale x 32 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.nxv32i1(<vscale x 32 x i1>)
+
+define signext i1 @vreduce_xor_nxv32i1(<vscale x 32 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_nxv32i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.nxv32i1(<vscale x 32 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1>)
+
+define signext i1 @vreduce_and_nxv32i1(<vscale x 32 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv32i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m4,ta,mu
+; CHECK-NEXT: vmnand.mm v25, v0, v0
+; CHECK-NEXT: vpopc.m a0, v25
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.or.nxv64i1(<vscale x 64 x i1>)
+
+define signext i1 @vreduce_or_nxv64i1(<vscale x 64 x i1> %v) {
+; CHECK-LABEL: vreduce_or_nxv64i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: snez a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.or.nxv64i1(<vscale x 64 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.xor.nxv64i1(<vscale x 64 x i1>)
+
+define signext i1 @vreduce_xor_nxv64i1(<vscale x 64 x i1> %v) {
+; CHECK-LABEL: vreduce_xor_nxv64i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu
+; CHECK-NEXT: vpopc.m a0, v0
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.xor.nxv64i1(<vscale x 64 x i1> %v)
+ ret i1 %red
+}
+
+declare i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1>)
+
+define signext i1 @vreduce_and_nxv64i1(<vscale x 64 x i1> %v) {
+; CHECK-LABEL: vreduce_and_nxv64i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e8,m8,ta,mu
+; CHECK-NEXT: vmnand.mm v25, v0, v0
+; CHECK-NEXT: vpopc.m a0, v25
+; CHECK-NEXT: seqz a0, a0
+; CHECK-NEXT: neg a0, a0
+; CHECK-NEXT: ret
+ %red = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %v)
+ ret i1 %red
+}