// Mask VTs are custom-expanded into a series of standard nodes
setOperationAction(ISD::TRUNCATE, VT, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
}
for (MVT VT : IntVecVTs) {
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
+
// Operations below are different for between masks and other vectors.
if (VT.getVectorElementType() == MVT::i1) {
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::XOR, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
continue;
}
setOperationAction(ISD::VSELECT, VT, Custom);
- setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
"Unexpected type for vector mask lowering");
SDValue Src = Op.getOperand(0);
- EVT VecVT = Src.getValueType();
-
- // Be careful not to introduce illegal scalar types at this stage, and be
- // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
- // illegal and must be expanded. Since we know that the constants are
- // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
- bool IsRV32E64 =
- !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
+ MVT VecVT = Src.getSimpleValueType();
+
+ // If this is a fixed vector, we need to convert it to a scalable vector.
+ MVT ContainerVT = VecVT;
+ if (VecVT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
+ Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
+ }
+
SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
- if (!IsRV32E64) {
- SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne);
- SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
- } else {
- SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne);
- SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
+ SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
+ SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
+
+ if (VecVT.isScalableVector()) {
+ SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
+ return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
}
- SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
+ SDValue Mask, VL;
+ std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
- return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
+ MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
+ SDValue Trunc =
+ DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
+ Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
+ DAG.getCondCode(ISD::SETNE), Mask, VL);
+ return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
}
SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
unsigned OrigIdx = Op.getConstantOperandVal(1);
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ // We don't have the ability to slide mask vectors down indexed by their i1
+ // elements; the smallest we can do is i8. Often we are able to bitcast to
+ // equivalent i8 vectors. Note that when extracting a fixed-length vector
+ // from a scalable one, we might not necessarily have enough scalable
+ // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
+ if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
+ if (VecVT.getVectorMinNumElements() >= 8 &&
+ SubVecVT.getVectorMinNumElements() >= 8) {
+ assert(OrigIdx % 8 == 0 && "Invalid index");
+ assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
+ SubVecVT.getVectorMinNumElements() % 8 == 0 &&
+ "Unexpected mask vector lowering");
+ OrigIdx /= 8;
+ SubVecVT =
+ MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
+ SubVecVT.isScalableVector());
+ VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
+ VecVT.isScalableVector());
+ Vec = DAG.getBitcast(VecVT, Vec);
+ } else {
+ // We can't slide this mask vector down, indexed by its i1 elements.
+ // This poses a problem when we wish to extract a scalable vector which
+ // can't be re-expressed as a larger type. Just choose the slow path and
+ // extend to a larger type, then truncate back down.
+ // TODO: We could probably improve this when extracting certain fixed
+ // from fixed, where we can extract as i8 and shift the correct element
+ // right to reach the desired subvector?
+ MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
+ MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
+ Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
+ Op.getOperand(1));
+ SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
+ return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
+ }
+ }
+
// If the subvector vector is a fixed-length type, we cannot use subregister
// manipulation to simplify the codegen; we don't know which register of a
// LMUL group contains the specific subvector as we only know the minimum
// Now the vector is in the right position, extract our final subvector. This
// should resolve to a COPY.
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
- DAG.getConstant(0, DL, XLenVT));
+ Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
+ DAG.getConstant(0, DL, XLenVT));
+
+ // We might have bitcast from a mask type: cast back to the original type if
+ // required.
+ return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
}
SDValue
ret void
}
+define void @extract_v8i1_v64i1_0(<64 x i1>* %x, <8 x i1>* %y) {
+; LMULMAX2-LABEL: extract_v8i1_v64i1_0:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle1.v v25, (a0)
+; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX2-NEXT: vse1.v v25, (a1)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: extract_v8i1_v64i1_0:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle1.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vse1.v v25, (a1)
+; LMULMAX1-NEXT: ret
+ %a = load <64 x i1>, <64 x i1>* %x
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 0)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+define void @extract_v8i1_v64i1_8(<64 x i1>* %x, <8 x i1>* %y) {
+; LMULMAX2-LABEL: extract_v8i1_v64i1_8:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle1.v v25, (a0)
+; LMULMAX2-NEXT: vsetivli a0, 1, e8,m1,ta,mu
+; LMULMAX2-NEXT: vslidedown.vi v25, v25, 1
+; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX2-NEXT: vse1.v v25, (a1)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: extract_v8i1_v64i1_8:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle1.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 1, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 1
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vse1.v v25, (a1)
+; LMULMAX1-NEXT: ret
+ %a = load <64 x i1>, <64 x i1>* %x
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 8)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+define void @extract_v8i1_v64i1_48(<64 x i1>* %x, <8 x i1>* %y) {
+; LMULMAX2-LABEL: extract_v8i1_v64i1_48:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a0, a0, 4
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle1.v v25, (a0)
+; LMULMAX2-NEXT: vsetivli a0, 1, e8,m1,ta,mu
+; LMULMAX2-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX2-NEXT: vse1.v v25, (a1)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: extract_v8i1_v64i1_48:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: addi a0, a0, 6
+; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle1.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vse1.v v25, (a1)
+; LMULMAX1-NEXT: ret
+ %a = load <64 x i1>, <64 x i1>* %x
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %a, i64 48)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+define void @extract_v8i1_nxv2i1_0(<vscale x 2 x i1> %x, <8 x i1>* %y) {
+; CHECK-LABEL: extract_v8i1_nxv2i1_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vse1.v v0, (a0)
+; CHECK-NEXT: ret
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %x, i64 0)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+define void @extract_v8i1_nxv2i1_2(<vscale x 2 x i1> %x, <8 x i1>* %y) {
+; CHECK-LABEL: extract_v8i1_nxv2i1_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a1, 8, e8,mf4,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v25, 2
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vse1.v v26, (a0)
+; CHECK-NEXT: ret
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %x, i64 2)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+define void @extract_v8i1_nxv64i1_0(<vscale x 64 x i1> %x, <8 x i1>* %y) {
+; CHECK-LABEL: extract_v8i1_nxv64i1_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vse1.v v0, (a0)
+; CHECK-NEXT: ret
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 0)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+define void @extract_v8i1_nxv64i1_8(<vscale x 64 x i1> %x, <8 x i1>* %y) {
+; CHECK-LABEL: extract_v8i1_nxv64i1_8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v0, 1
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vse1.v v25, (a0)
+; CHECK-NEXT: ret
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 8)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+define void @extract_v8i1_nxv64i1_48(<vscale x 64 x i1> %x, <8 x i1>* %y) {
+; CHECK-LABEL: extract_v8i1_nxv64i1_48:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v0, 6
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vse1.v v25, (a0)
+; CHECK-NEXT: ret
+ %c = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %x, i64 48)
+ store <8 x i1> %c, <8 x i1>* %y
+ ret void
+}
+
+
+define void @extract_v2i1_v64i1_0(<64 x i1>* %x, <2 x i1>* %y) {
+; LMULMAX2-LABEL: extract_v2i1_v64i1_0:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle1.v v25, (a0)
+; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX2-NEXT: vse1.v v25, (a1)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: extract_v2i1_v64i1_0:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle1.v v25, (a0)
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vse1.v v25, (a1)
+; LMULMAX1-NEXT: ret
+ %a = load <64 x i1>, <64 x i1>* %x
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 0)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+define void @extract_v2i1_v64i1_2(<64 x i1>* %x, <2 x i1>* %y) {
+; LMULMAX2-LABEL: extract_v2i1_v64i1_2:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle1.v v0, (a0)
+; LMULMAX2-NEXT: vmv.v.i v26, 0
+; LMULMAX2-NEXT: vmerge.vim v26, v26, 1, v0
+; LMULMAX2-NEXT: vsetivli a0, 2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vslidedown.vi v26, v26, 2
+; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX2-NEXT: vmsne.vi v25, v26, 0
+; LMULMAX2-NEXT: vse1.v v25, (a1)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: extract_v2i1_v64i1_2:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle1.v v0, (a0)
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
+; LMULMAX1-NEXT: vmsne.vi v26, v25, 0
+; LMULMAX1-NEXT: vse1.v v26, (a1)
+; LMULMAX1-NEXT: ret
+ %a = load <64 x i1>, <64 x i1>* %x
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 2)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+define void @extract_v2i1_v64i1_42(<64 x i1>* %x, <2 x i1>* %y) {
+; LMULMAX2-LABEL: extract_v2i1_v64i1_42:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: addi a0, a0, 4
+; LMULMAX2-NEXT: addi a2, zero, 32
+; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vle1.v v0, (a0)
+; LMULMAX2-NEXT: vmv.v.i v26, 0
+; LMULMAX2-NEXT: vmerge.vim v26, v26, 1, v0
+; LMULMAX2-NEXT: vsetivli a0, 2, e8,m2,ta,mu
+; LMULMAX2-NEXT: vslidedown.vi v26, v26, 10
+; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX2-NEXT: vmsne.vi v25, v26, 0
+; LMULMAX2-NEXT: vse1.v v25, (a1)
+; LMULMAX2-NEXT: ret
+;
+; LMULMAX1-LABEL: extract_v2i1_v64i1_42:
+; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: addi a0, a0, 4
+; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT: vle1.v v0, (a0)
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vslidedown.vi v25, v25, 10
+; LMULMAX1-NEXT: vmsne.vi v26, v25, 0
+; LMULMAX1-NEXT: vse1.v v26, (a1)
+; LMULMAX1-NEXT: ret
+ %a = load <64 x i1>, <64 x i1>* %x
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 42)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+define void @extract_v2i1_nxv2i1_0(<vscale x 2 x i1> %x, <2 x i1>* %y) {
+; CHECK-LABEL: extract_v2i1_nxv2i1_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vse1.v v0, (a0)
+; CHECK-NEXT: ret
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %x, i64 0)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+define void @extract_v2i1_nxv2i1_2(<vscale x 2 x i1> %x, <2 x i1>* %y) {
+; CHECK-LABEL: extract_v2i1_nxv2i1_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a1, 2, e8,mf4,ta,mu
+; CHECK-NEXT: vslidedown.vi v25, v25, 2
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v26, v25, 0
+; CHECK-NEXT: vse1.v v26, (a0)
+; CHECK-NEXT: ret
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %x, i64 2)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+define void @extract_v2i1_nxv64i1_0(<vscale x 64 x i1> %x, <2 x i1>* %y) {
+; CHECK-LABEL: extract_v2i1_nxv64i1_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vse1.v v0, (a0)
+; CHECK-NEXT: ret
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 0)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+define void @extract_v2i1_nxv64i1_2(<vscale x 64 x i1> %x, <2 x i1>* %y) {
+; CHECK-LABEL: extract_v2i1_nxv64i1_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: vsetivli a1, 2, e8,m8,ta,mu
+; CHECK-NEXT: vslidedown.vi v8, v8, 2
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v8, 0
+; CHECK-NEXT: vse1.v v25, (a0)
+; CHECK-NEXT: ret
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 2)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+define void @extract_v2i1_nxv64i1_42(<vscale x 64 x i1> %x, <2 x i1>* %y) {
+; CHECK-LABEL: extract_v2i1_nxv64i1_42:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: addi a1, zero, 42
+; CHECK-NEXT: vsetivli a2, 2, e8,m8,ta,mu
+; CHECK-NEXT: vslidedown.vx v8, v8, a1
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v8, 0
+; CHECK-NEXT: vse1.v v25, (a0)
+; CHECK-NEXT: ret
+ %c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 42)
+ store <2 x i1> %c, <2 x i1>* %y
+ ret void
+}
+
+declare <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %vec, i64 %idx)
+declare <8 x i1> @llvm.experimental.vector.extract.v8i1.v64i1(<64 x i1> %vec, i64 %idx)
+
+declare <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %vec, i64 %idx)
+declare <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv2i1(<vscale x 2 x i1> %vec, i64 %idx)
+
+declare <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %vec, i64 %idx)
+declare <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv64i1(<vscale x 64 x i1> %vec, i64 %idx)
+
declare <2 x i8> @llvm.experimental.vector.extract.v2i8.v8i8(<8 x i8> %vec, i64 %idx)
declare <2 x i32> @llvm.experimental.vector.extract.v2i32.v8i32(<8 x i32> %vec, i64 %idx)