return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
Vec, VL);
}
+ case Intrinsic::riscv_vslide1up:
+ case Intrinsic::riscv_vslide1down:
+ case Intrinsic::riscv_vslide1up_mask:
+ case Intrinsic::riscv_vslide1down_mask: {
+ // We need to special case these when the scalar is larger than XLen.
+ unsigned NumOps = Op.getNumOperands();
+ bool IsMasked = NumOps == 6;
+ unsigned OpOffset = IsMasked ? 1 : 0;
+ SDValue Scalar = Op.getOperand(2 + OpOffset);
+ if (Scalar.getValueType().bitsLE(XLenVT))
+ break;
+
+ // Splatting a sign extended constant is fine.
+ if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
+ if (isInt<32>(CVal->getSExtValue()))
+ break;
+
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.getVectorElementType() == MVT::i64 &&
+ Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
+
+ // Convert the vector source to the equivalent nxvXi32 vector.
+ MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
+ SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
+
+ SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
+ DAG.getConstant(0, DL, XLenVT));
+ SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
+ DAG.getConstant(1, DL, XLenVT));
+
+ // Double the VL since we halved SEW.
+ SDValue VL = Op.getOperand(NumOps - 1);
+ SDValue I32VL =
+ DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
+
+ MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
+ SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
+
+ // Shift the two scalar parts in using SEW=32 slide1up/slide1down
+ // instructions.
+ if (IntNo == Intrinsic::riscv_vslide1up ||
+ IntNo == Intrinsic::riscv_vslide1up_mask) {
+ Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
+ I32Mask, I32VL);
+ Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
+ I32Mask, I32VL);
+ } else {
+ Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
+ I32Mask, I32VL);
+ Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
+ I32Mask, I32VL);
+ }
+
+ // Convert back to nxvXi64.
+ Vec = DAG.getBitcast(VT, Vec);
+
+ if (!IsMasked)
+ return Vec;
+
+ // Apply mask after the operation.
+ SDValue Mask = Op.getOperand(NumOps - 2);
+ SDValue MaskedOff = Op.getOperand(1);
+ return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
+ }
}
return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
NODE_NAME_CASE(VSLIDEUP_VL)
NODE_NAME_CASE(VSLIDE1UP_VL)
NODE_NAME_CASE(VSLIDEDOWN_VL)
+ NODE_NAME_CASE(VSLIDE1DOWN_VL)
NODE_NAME_CASE(VID_VL)
NODE_NAME_CASE(VFNCVT_ROD_VL)
NODE_NAME_CASE(VECREDUCE_ADD_VL)
// and the fifth the VL.
VSLIDEUP_VL,
VSLIDEDOWN_VL,
- // Matches the semantics of vslide1up. The first operand is the source
- // vector, the second is the XLenVT scalar value. The third and fourth
+ // Matches the semantics of vslide1up/slide1down. The first operand is the
+ // source vector, the second is the XLenVT scalar value. The third and fourth
// operands are the mask and VL operands.
VSLIDE1UP_VL,
+ VSLIDE1DOWN_VL,
// Matches the semantics of the vid.v instruction, with a mask and VL
// operand.
VID_VL,
def riscv_slideup_vl : SDNode<"RISCVISD::VSLIDEUP_VL", SDTRVVSlide, []>;
def riscv_slide1up_vl : SDNode<"RISCVISD::VSLIDE1UP_VL", SDTRVVSlide1, []>;
def riscv_slidedown_vl : SDNode<"RISCVISD::VSLIDEDOWN_VL", SDTRVVSlide, []>;
+def riscv_slide1down_vl : SDNode<"RISCVISD::VSLIDE1DOWN_VL", SDTRVVSlide1, []>;
let Predicates = [HasStdExtV] in {
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVSLIDE1UP_VX_"#vti.LMul.MX)
vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.SEW)>;
+ def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rs1),
+ GPR:$rs2, (vti.Mask true_mask),
+ (XLenVT (VLOp GPR:$vl)))),
+ (!cast<Instruction>("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX)
+ vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.SEW)>;
}
foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in {
ret <vscale x 16 x i32> %a
}
+
+declare <vscale x 1 x i64> @llvm.riscv.vslide1down.nxv1i64.i64(
+ <vscale x 1 x i64>,
+ i64,
+ i32);
+
+define <vscale x 1 x i64> @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vslide1down.vx v25, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v25, a1
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vslide1down.nxv1i64.i64(
+ <vscale x 1 x i64> %0,
+ i64 %1,
+ i32 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vslide1down.mask.nxv1i64.i64(
+ <vscale x 1 x i64>,
+ <vscale x 1 x i64>,
+ i64,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x i64> @intrinsic_vslide1down_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv1i64_nxv1i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a3, a2, 1
+; CHECK-NEXT: vsetvli a3, a3, e32,m1,ta,mu
+; CHECK-NEXT: vslide1down.vx v25, v9, a0
+; CHECK-NEXT: vslide1down.vx v25, v25, a1
+; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT: vmerge.vvm v8, v8, v25, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vslide1down.mask.nxv1i64.i64(
+ <vscale x 1 x i64> %0,
+ <vscale x 1 x i64> %1,
+ i64 %2,
+ <vscale x 1 x i1> %3,
+ i32 %4)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vslide1down.nxv2i64.i64(
+ <vscale x 2 x i64>,
+ i64,
+ i32);
+
+define <vscale x 2 x i64> @intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: vsetvli a2, a2, e32,m2,ta,mu
+; CHECK-NEXT: vslide1down.vx v26, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v26, a1
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vslide1down.nxv2i64.i64(
+ <vscale x 2 x i64> %0,
+ i64 %1,
+ i32 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vslide1down.mask.nxv2i64.i64(
+ <vscale x 2 x i64>,
+ <vscale x 2 x i64>,
+ i64,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x i64> @intrinsic_vslide1down_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv2i64_nxv2i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a3, a2, 1
+; CHECK-NEXT: vsetvli a3, a3, e32,m2,ta,mu
+; CHECK-NEXT: vslide1down.vx v26, v10, a0
+; CHECK-NEXT: vslide1down.vx v26, v26, a1
+; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT: vmerge.vvm v8, v8, v26, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vslide1down.mask.nxv2i64.i64(
+ <vscale x 2 x i64> %0,
+ <vscale x 2 x i64> %1,
+ i64 %2,
+ <vscale x 2 x i1> %3,
+ i32 %4)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vslide1down.nxv4i64.i64(
+ <vscale x 4 x i64>,
+ i64,
+ i32);
+
+define <vscale x 4 x i64> @intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: vsetvli a2, a2, e32,m4,ta,mu
+; CHECK-NEXT: vslide1down.vx v28, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v28, a1
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vslide1down.nxv4i64.i64(
+ <vscale x 4 x i64> %0,
+ i64 %1,
+ i32 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vslide1down.mask.nxv4i64.i64(
+ <vscale x 4 x i64>,
+ <vscale x 4 x i64>,
+ i64,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x i64> @intrinsic_vslide1down_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv4i64_nxv4i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a3, a2, 1
+; CHECK-NEXT: vsetvli a3, a3, e32,m4,ta,mu
+; CHECK-NEXT: vslide1down.vx v28, v12, a0
+; CHECK-NEXT: vslide1down.vx v28, v28, a1
+; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT: vmerge.vvm v8, v8, v28, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vslide1down.mask.nxv4i64.i64(
+ <vscale x 4 x i64> %0,
+ <vscale x 4 x i64> %1,
+ i64 %2,
+ <vscale x 4 x i1> %3,
+ i32 %4)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vslide1down.nxv8i64.i64(
+ <vscale x 8 x i64>,
+ i64,
+ i32);
+
+define <vscale x 8 x i64> @intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: vsetvli a2, a2, e32,m8,ta,mu
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vslide1down.nxv8i64.i64(
+ <vscale x 8 x i64> %0,
+ i64 %1,
+ i32 %2)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vslide1down.mask.nxv8i64.i64(
+ <vscale x 8 x i64>,
+ <vscale x 8 x i64>,
+ i64,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x i64> @intrinsic_vslide1down_mask_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, <vscale x 8 x i64> %1, i64 %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv8i64_nxv8i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a3, a2, 1
+; CHECK-NEXT: vsetvli a3, a3, e32,m8,ta,mu
+; CHECK-NEXT: vslide1down.vx v16, v16, a0
+; CHECK-NEXT: vslide1down.vx v16, v16, a1
+; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vslide1down.mask.nxv8i64.i64(
+ <vscale x 8 x i64> %0,
+ <vscale x 8 x i64> %1,
+ i64 %2,
+ <vscale x 8 x i1> %3,
+ i32 %4)
+
+ ret <vscale x 8 x i64> %a
+}
ret <vscale x 16 x i32> %a
}
+
+declare <vscale x 1 x i64> @llvm.riscv.vslide1up.nxv1i64.i64(
+ <vscale x 1 x i64>,
+ i64,
+ i32);
+
+define <vscale x 1 x i64> @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu
+; CHECK-NEXT: vslide1up.vx v25, v8, a1
+; CHECK-NEXT: vslide1up.vx v8, v25, a0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vslide1up.nxv1i64.i64(
+ <vscale x 1 x i64> %0,
+ i64 %1,
+ i32 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 1 x i64> @llvm.riscv.vslide1up.mask.nxv1i64.i64(
+ <vscale x 1 x i64>,
+ <vscale x 1 x i64>,
+ i64,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x i64> @intrinsic_vslide1up_mask_vx_nxv1i64_nxv1i64_i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv1i64_nxv1i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a3, a2, 1
+; CHECK-NEXT: vsetvli a3, a3, e32,m1,ta,mu
+; CHECK-NEXT: vslide1up.vx v25, v9, a1
+; CHECK-NEXT: vslide1up.vx v26, v25, a0
+; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu
+; CHECK-NEXT: vmerge.vvm v8, v8, v26, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vslide1up.mask.nxv1i64.i64(
+ <vscale x 1 x i64> %0,
+ <vscale x 1 x i64> %1,
+ i64 %2,
+ <vscale x 1 x i1> %3,
+ i32 %4)
+
+ ret <vscale x 1 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vslide1up.nxv2i64.i64(
+ <vscale x 2 x i64>,
+ i64,
+ i32);
+
+define <vscale x 2 x i64> @intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: vsetvli a2, a2, e32,m2,ta,mu
+; CHECK-NEXT: vslide1up.vx v26, v8, a1
+; CHECK-NEXT: vslide1up.vx v8, v26, a0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vslide1up.nxv2i64.i64(
+ <vscale x 2 x i64> %0,
+ i64 %1,
+ i32 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 2 x i64> @llvm.riscv.vslide1up.mask.nxv2i64.i64(
+ <vscale x 2 x i64>,
+ <vscale x 2 x i64>,
+ i64,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x i64> @intrinsic_vslide1up_mask_vx_nxv2i64_nxv2i64_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv2i64_nxv2i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a3, a2, 1
+; CHECK-NEXT: vsetvli a3, a3, e32,m2,ta,mu
+; CHECK-NEXT: vslide1up.vx v26, v10, a1
+; CHECK-NEXT: vslide1up.vx v28, v26, a0
+; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu
+; CHECK-NEXT: vmerge.vvm v8, v8, v28, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vslide1up.mask.nxv2i64.i64(
+ <vscale x 2 x i64> %0,
+ <vscale x 2 x i64> %1,
+ i64 %2,
+ <vscale x 2 x i1> %3,
+ i32 %4)
+
+ ret <vscale x 2 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vslide1up.nxv4i64.i64(
+ <vscale x 4 x i64>,
+ i64,
+ i32);
+
+define <vscale x 4 x i64> @intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: vsetvli a2, a2, e32,m4,ta,mu
+; CHECK-NEXT: vslide1up.vx v28, v8, a1
+; CHECK-NEXT: vslide1up.vx v8, v28, a0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vslide1up.nxv4i64.i64(
+ <vscale x 4 x i64> %0,
+ i64 %1,
+ i32 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 4 x i64> @llvm.riscv.vslide1up.mask.nxv4i64.i64(
+ <vscale x 4 x i64>,
+ <vscale x 4 x i64>,
+ i64,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x i64> @intrinsic_vslide1up_mask_vx_nxv4i64_nxv4i64_i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv4i64_nxv4i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a3, a2, 1
+; CHECK-NEXT: vsetvli a3, a3, e32,m4,ta,mu
+; CHECK-NEXT: vslide1up.vx v28, v12, a1
+; CHECK-NEXT: vslide1up.vx v12, v28, a0
+; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu
+; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vslide1up.mask.nxv4i64.i64(
+ <vscale x 4 x i64> %0,
+ <vscale x 4 x i64> %1,
+ i64 %2,
+ <vscale x 4 x i1> %3,
+ i32 %4)
+
+ ret <vscale x 4 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vslide1up.nxv8i64.i64(
+ <vscale x 8 x i64>,
+ i64,
+ i32);
+
+define <vscale x 8 x i64> @intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, i64 %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a2, a2, 1
+; CHECK-NEXT: vsetvli a2, a2, e32,m8,ta,mu
+; CHECK-NEXT: vslide1up.vx v16, v8, a1
+; CHECK-NEXT: vslide1up.vx v8, v16, a0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vslide1up.nxv8i64.i64(
+ <vscale x 8 x i64> %0,
+ i64 %1,
+ i32 %2)
+
+ ret <vscale x 8 x i64> %a
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.vslide1up.mask.nxv8i64.i64(
+ <vscale x 8 x i64>,
+ <vscale x 8 x i64>,
+ i64,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x i64> @intrinsic_vslide1up_mask_vx_nxv8i64_nxv8i64_i64(<vscale x 8 x i64> %0, <vscale x 8 x i64> %1, i64 %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv8i64_nxv8i64_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: slli a3, a2, 1
+; CHECK-NEXT: vsetvli a3, a3, e32,m8,ta,mu
+; CHECK-NEXT: vslide1up.vx v24, v16, a1
+; CHECK-NEXT: vslide1up.vx v16, v24, a0
+; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu
+; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT: jalr zero, 0(ra)
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vslide1up.mask.nxv8i64.i64(
+ <vscale x 8 x i64> %0,
+ <vscale x 8 x i64> %1,
+ i64 %2,
+ <vscale x 8 x i1> %3,
+ i32 %4)
+
+ ret <vscale x 8 x i64> %a
+}