return SDValue();
}
-// Called by type legalization to handle splat of i64 on RV32.
-// FIXME: We can optimize this when the type has sign or zero bits in one
-// of the halves.
-static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
- SDValue VL, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
- DAG.getConstant(1, DL, MVT::i32));
-
- // Fall back to a stack store and stride x0 vector load.
+// Use a stack slot to splat the two i32 values in Lo/Hi to the vector desired
+// vector nxvXi64 VT.
+static SDValue splatPartsI64ThroughStack(const SDLoc &DL, MVT VT, SDValue Lo,
+ SDValue Hi, SDValue VL,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
+ Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
+ "Unexpected VTs!");
MachineFunction &MF = DAG.getMachineFunction();
RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
MPI, Align(8), MachineMemOperand::MOLoad);
}
+// Called by type legalization to handle splat of i64 on RV32.
+// FIXME: We can optimize this when the type has sign or zero bits in one
+// of the halves.
+static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
+ SDValue VL, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
+ DAG.getConstant(0, DL, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
+ DAG.getConstant(1, DL, MVT::i32));
+
+ // Fall back to a stack store and stride x0 vector load.
+ return splatPartsI64ThroughStack(DL, VT, Lo, Hi, VL, DAG, Subtarget);
+}
+
// This function lowers a splat of a scalar operand Splat with the vector
// length VL. It ensures the final sequence is type legal, which is useful when
// lowering a splat after type legalization.
SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
- EVT VecVT = Op.getValueType();
+ MVT VecVT = Op.getSimpleValueType();
assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
"Unexpected SPLAT_VECTOR_PARTS lowering");
Hi.getConstantOperandVal(1) == 31)
return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
- // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
- // to accidentally sign-extend the 32-bit halves to the e64 SEW:
- // vmv.v.x vX, hi
- // vsll.vx vX, vX, /*32*/
- // vmv.v.x vY, lo
- // vsll.vx vY, vY, /*32*/
- // vsrl.vx vY, vY, /*32*/
- // vor.vv vX, vX, vY
- SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT);
-
- Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
- Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV);
- Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV);
-
- if (isNullConstant(Hi))
- return Lo;
-
- Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi);
- Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV);
-
- return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi);
+ // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
+ return splatPartsI64ThroughStack(
+ DL, VecVT, Lo, Hi, DAG.getRegister(RISCV::X0, MVT::i64), DAG, Subtarget);
}
// Custom-lower extensions from mask vectors by using a vselect either with 1
define i64 @vreduce_smin_v2i64(<2 x i64>* %x) {
; RV32-LABEL: vreduce_smin_v2i64:
; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; RV32-NEXT: vle64.v v25, (a0)
-; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
-; RV32-NEXT: vmv.v.i v26, -1
-; RV32-NEXT: addi a1, zero, 32
-; RV32-NEXT: vsll.vx v26, v26, a1
-; RV32-NEXT: vsrl.vx v26, v26, a1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vmv.v.x v27, a0
-; RV32-NEXT: vsll.vx v27, v27, a1
-; RV32-NEXT: vor.vv v26, v26, v27
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; RV32-NEXT: vredmin.vs v25, v25, v26
; RV32-NEXT: vmv.x.s a0, v25
+; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_smin_v2i64:
define i64 @vreduce_smin_v4i64(<4 x i64>* %x) {
; RV32-LABEL: vreduce_smin_v4i64:
; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
; RV32-NEXT: vle64.v v26, (a0)
-; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
-; RV32-NEXT: vmv.v.i v25, -1
-; RV32-NEXT: addi a1, zero, 32
-; RV32-NEXT: vsll.vx v25, v25, a1
-; RV32-NEXT: vsrl.vx v25, v25, a1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vmv.v.x v28, a0
-; RV32-NEXT: vsll.vx v28, v28, a1
-; RV32-NEXT: vor.vv v25, v25, v28
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
; RV32-NEXT: vredmin.vs v25, v26, v25
; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV32-NEXT: vmv.x.s a0, v25
+; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_smin_v4i64:
define i64 @vreduce_smin_v8i64(<8 x i64>* %x) {
; RV32-LABEL: vreduce_smin_v8i64:
; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu
; RV32-NEXT: vle64.v v28, (a0)
-; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
-; RV32-NEXT: vmv.v.i v25, -1
-; RV32-NEXT: addi a1, zero, 32
-; RV32-NEXT: vsll.vx v25, v25, a1
-; RV32-NEXT: vsrl.vx v25, v25, a1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vmv.v.x v26, a0
-; RV32-NEXT: vsll.vx v26, v26, a1
-; RV32-NEXT: vor.vv v25, v25, v26
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; RV32-NEXT: vredmin.vs v25, v28, v25
; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV32-NEXT: vmv.x.s a0, v25
+; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_smin_v8i64:
define i64 @vreduce_smin_v16i64(<16 x i64>* %x) {
; RV32-LABEL: vreduce_smin_v16i64:
; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu
; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
-; RV32-NEXT: vmv.v.i v25, -1
-; RV32-NEXT: addi a1, zero, 32
-; RV32-NEXT: vsll.vx v25, v25, a1
-; RV32-NEXT: vsrl.vx v25, v25, a1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vmv.v.x v26, a0
-; RV32-NEXT: vsll.vx v26, v26, a1
-; RV32-NEXT: vor.vv v25, v25, v26
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
; RV32-NEXT: vredmin.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV32-NEXT: vmv.x.s a0, v25
+; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_smin_v16i64:
define i64 @vreduce_smin_v32i64(<32 x i64>* %x) {
; RV32-LABEL: vreduce_smin_v32i64:
; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: vle64.v v16, (a0)
-; RV32-NEXT: vmin.vv v8, v8, v16
-; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
-; RV32-NEXT: vmv.v.i v25, -1
-; RV32-NEXT: addi a1, zero, 32
-; RV32-NEXT: vsll.vx v25, v25, a1
-; RV32-NEXT: vsrl.vx v25, v25, a1
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vmv.v.x v26, a0
-; RV32-NEXT: vsll.vx v26, v26, a1
-; RV32-NEXT: vor.vv v25, v25, v26
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: vmin.vv v8, v8, v16
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
; RV32-NEXT: vredmin.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV32-NEXT: vmv.x.s a0, v25
+; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_smin_v32i64:
define i64 @vreduce_smin_v64i64(<64 x i64>* %x) nounwind {
; RV32-LABEL: vreduce_smin_v64i64:
; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: addi a1, a0, 256
-; RV32-NEXT: addi a2, a0, 384
-; RV32-NEXT: vle64.v v16, (a2)
+; RV32-NEXT: vle64.v v16, (a1)
+; RV32-NEXT: addi a1, a0, 384
+; RV32-NEXT: vle64.v v24, (a1)
; RV32-NEXT: addi a0, a0, 128
-; RV32-NEXT: vle64.v v24, (a0)
-; RV32-NEXT: vle64.v v0, (a1)
-; RV32-NEXT: vmin.vv v16, v24, v16
-; RV32-NEXT: vmin.vv v8, v8, v0
-; RV32-NEXT: vmin.vv v8, v8, v16
-; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
-; RV32-NEXT: vmv.v.i v25, -1
-; RV32-NEXT: addi a1, zero, 32
-; RV32-NEXT: vsll.vx v25, v25, a1
-; RV32-NEXT: vsrl.vx v25, v25, a1
+; RV32-NEXT: vle64.v v0, (a0)
+; RV32-NEXT: addi a0, zero, -1
+; RV32-NEXT: sw a0, 8(sp)
; RV32-NEXT: lui a0, 524288
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: vmv.v.x v26, a0
-; RV32-NEXT: vsll.vx v26, v26, a1
-; RV32-NEXT: vor.vv v25, v25, v26
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: vmin.vv v24, v0, v24
+; RV32-NEXT: vmin.vv v8, v8, v16
+; RV32-NEXT: vmin.vv v8, v8, v24
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
; RV32-NEXT: vredmin.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV32-NEXT: vmv.x.s a0, v25
+; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_smin_v64i64:
define i64 @vreduce_smax_v2i64(<2 x i64>* %x) {
; RV32-LABEL: vreduce_smax_v2i64:
; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu
; RV32-NEXT: vle64.v v25, (a0)
; RV32-NEXT: lui a0, 524288
-; RV32-NEXT: vsetvli a1, zero, e64,m1,ta,mu
-; RV32-NEXT: vmv.v.x v26, a0
-; RV32-NEXT: addi a1, zero, 32
-; RV32-NEXT: vsll.vx v26, v26, a1
-; RV32-NEXT: vmv.v.i v27, 0
-; RV32-NEXT: vsll.vx v27, v27, a1
-; RV32-NEXT: vsrl.vx v27, v27, a1
-; RV32-NEXT: vor.vv v26, v27, v26
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw zero, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v26, (a0), zero
; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
; RV32-NEXT: vredmax.vs v25, v25, v26
; RV32-NEXT: vmv.x.s a0, v25
+; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_smax_v2i64:
define i64 @vreduce_smax_v4i64(<4 x i64>* %x) {
; RV32-LABEL: vreduce_smax_v4i64:
; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli a1, 4, e64,m2,ta,mu
; RV32-NEXT: vle64.v v26, (a0)
; RV32-NEXT: lui a0, 524288
-; RV32-NEXT: vsetvli a1, zero, e64,m1,ta,mu
-; RV32-NEXT: vmv.v.x v25, a0
-; RV32-NEXT: addi a1, zero, 32
-; RV32-NEXT: vsll.vx v25, v25, a1
-; RV32-NEXT: vmv.v.i v28, 0
-; RV32-NEXT: vsll.vx v28, v28, a1
-; RV32-NEXT: vsrl.vx v28, v28, a1
-; RV32-NEXT: vor.vv v25, v28, v25
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw zero, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
; RV32-NEXT: vsetivli a0, 4, e64,m2,ta,mu
; RV32-NEXT: vredmax.vs v25, v26, v25
; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV32-NEXT: vmv.x.s a0, v25
+; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_smax_v4i64:
define i64 @vreduce_smax_v8i64(<8 x i64>* %x) {
; RV32-LABEL: vreduce_smax_v8i64:
; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli a1, 8, e64,m4,ta,mu
; RV32-NEXT: vle64.v v28, (a0)
; RV32-NEXT: lui a0, 524288
-; RV32-NEXT: vsetvli a1, zero, e64,m1,ta,mu
-; RV32-NEXT: vmv.v.x v25, a0
-; RV32-NEXT: addi a1, zero, 32
-; RV32-NEXT: vsll.vx v25, v25, a1
-; RV32-NEXT: vmv.v.i v26, 0
-; RV32-NEXT: vsll.vx v26, v26, a1
-; RV32-NEXT: vsrl.vx v26, v26, a1
-; RV32-NEXT: vor.vv v25, v26, v25
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw zero, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
; RV32-NEXT: vsetivli a0, 8, e64,m4,ta,mu
; RV32-NEXT: vredmax.vs v25, v28, v25
; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV32-NEXT: vmv.x.s a0, v25
+; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_smax_v8i64:
define i64 @vreduce_smax_v16i64(<16 x i64>* %x) {
; RV32-LABEL: vreduce_smax_v16i64:
; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: lui a0, 524288
-; RV32-NEXT: vsetvli a1, zero, e64,m1,ta,mu
-; RV32-NEXT: vmv.v.x v25, a0
-; RV32-NEXT: addi a1, zero, 32
-; RV32-NEXT: vsll.vx v25, v25, a1
-; RV32-NEXT: vmv.v.i v26, 0
-; RV32-NEXT: vsll.vx v26, v26, a1
-; RV32-NEXT: vsrl.vx v26, v26, a1
-; RV32-NEXT: vor.vv v25, v26, v25
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw zero, 8(sp)
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
; RV32-NEXT: vredmax.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV32-NEXT: vmv.x.s a0, v25
+; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_smax_v16i64:
define i64 @vreduce_smax_v32i64(<32 x i64>* %x) {
; RV32-LABEL: vreduce_smax_v32i64:
; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: vle64.v v16, (a0)
-; RV32-NEXT: vmax.vv v8, v8, v16
; RV32-NEXT: lui a0, 524288
-; RV32-NEXT: vsetvli a1, zero, e64,m1,ta,mu
-; RV32-NEXT: vmv.v.x v25, a0
-; RV32-NEXT: addi a1, zero, 32
-; RV32-NEXT: vsll.vx v25, v25, a1
-; RV32-NEXT: vmv.v.i v26, 0
-; RV32-NEXT: vsll.vx v26, v26, a1
-; RV32-NEXT: vsrl.vx v26, v26, a1
-; RV32-NEXT: vor.vv v25, v26, v25
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw zero, 8(sp)
+; RV32-NEXT: vmax.vv v8, v8, v16
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
; RV32-NEXT: vredmax.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV32-NEXT: vmv.x.s a0, v25
+; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_smax_v32i64:
define i64 @vreduce_smax_v64i64(<64 x i64>* %x) nounwind {
; RV32-LABEL: vreduce_smax_v64i64:
; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: vsetivli a1, 16, e64,m8,ta,mu
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: addi a1, a0, 256
-; RV32-NEXT: addi a2, a0, 384
-; RV32-NEXT: vle64.v v16, (a2)
+; RV32-NEXT: vle64.v v16, (a1)
+; RV32-NEXT: addi a1, a0, 384
+; RV32-NEXT: vle64.v v24, (a1)
; RV32-NEXT: addi a0, a0, 128
-; RV32-NEXT: vle64.v v24, (a0)
-; RV32-NEXT: vle64.v v0, (a1)
-; RV32-NEXT: vmax.vv v16, v24, v16
-; RV32-NEXT: vmax.vv v8, v8, v0
-; RV32-NEXT: vmax.vv v8, v8, v16
+; RV32-NEXT: vle64.v v0, (a0)
; RV32-NEXT: lui a0, 524288
-; RV32-NEXT: vsetvli a1, zero, e64,m1,ta,mu
-; RV32-NEXT: vmv.v.x v25, a0
-; RV32-NEXT: addi a1, zero, 32
-; RV32-NEXT: vsll.vx v25, v25, a1
-; RV32-NEXT: vmv.v.i v26, 0
-; RV32-NEXT: vsll.vx v26, v26, a1
-; RV32-NEXT: vsrl.vx v26, v26, a1
-; RV32-NEXT: vor.vv v25, v26, v25
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw zero, 8(sp)
+; RV32-NEXT: vmax.vv v24, v0, v24
+; RV32-NEXT: vmax.vv v8, v8, v16
+; RV32-NEXT: vmax.vv v8, v8, v24
+; RV32-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
; RV32-NEXT: vsetivli a0, 16, e64,m8,ta,mu
; RV32-NEXT: vredmax.vs v25, v8, v25
; RV32-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; RV32-NEXT: vmv.x.s a0, v25
+; RV32-NEXT: addi a1, zero, 32
; RV32-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; RV32-NEXT: vsrl.vx v25, v25, a1
; RV32-NEXT: vmv.x.s a1, v25
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vreduce_smax_v64i64:
define <vscale x 8 x i1> @icmp_eq_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_eq_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmseq.vv v0, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_eq_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_eq_xv_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmseq.vv v0, v16, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_ne_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_ne_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmsne.vv v0, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_ne_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_ne_xv_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmsne.vv v0, v16, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_ugt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_ugt_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmsltu.vv v0, v16, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_ugt_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_ugt_xv_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmsltu.vv v0, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_uge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_uge_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmsleu.vv v0, v16, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_uge_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_uge_xv_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmsleu.vv v0, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_ult_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_ult_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmsltu.vv v0, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_ult_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_ult_xv_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmsltu.vv v0, v16, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_ule_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_ule_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmsleu.vv v0, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_ule_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_ule_xv_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmsleu.vv v0, v16, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_sgt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_sgt_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmslt.vv v0, v16, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_sgt_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_sgt_xv_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmslt.vv v0, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_sge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_sge_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmsle.vv v0, v16, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_sge_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_sge_xv_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmsle.vv v0, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_slt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_slt_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmslt.vv v0, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_slt_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_slt_xv_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmslt.vv v0, v16, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_sle_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_sle_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmsle.vv v0, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i1> @icmp_sle_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: icmp_sle_xv_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmsle.vv v0, v16, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vadd_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vadd_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vadd.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vadd_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vadd_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vadd.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vadd_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vadd_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vadd.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vadd_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vadd_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vand_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vand_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vand.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vand_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vand_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vand.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vand_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vand_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vand.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vand_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vand_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vand.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vdiv_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vdiv_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vdiv.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 1 x i64> @vdiv_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
; CHECK-LABEL: vdiv_vi_nxv1i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: lui a0, 748983
; CHECK-NEXT: addi a0, a0, -586
-; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a0
-; CHECK-NEXT: lui a1, 898779
-; CHECK-NEXT: addi a1, a1, 1755
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: vsll.vx v26, v26, a0
-; CHECK-NEXT: vsrl.vx v26, v26, a0
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: lui a0, 898779
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vmulh.vv v25, v8, v25
; CHECK-NEXT: addi a0, zero, 63
; CHECK-NEXT: vsrl.vx v26, v25, a0
; CHECK-NEXT: vsra.vi v25, v25, 1
; CHECK-NEXT: vadd.vv v8, v25, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vdiv_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vdiv_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vdiv.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 2 x i64> @vdiv_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
; CHECK-LABEL: vdiv_vi_nxv2i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: lui a0, 748983
; CHECK-NEXT: addi a0, a0, -586
-; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a0
-; CHECK-NEXT: lui a1, 898779
-; CHECK-NEXT: addi a1, a1, 1755
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: vsll.vx v28, v28, a0
-; CHECK-NEXT: vsrl.vx v28, v28, a0
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: lui a0, 898779
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vmulh.vv v26, v8, v26
; CHECK-NEXT: addi a0, zero, 63
; CHECK-NEXT: vsrl.vx v28, v26, a0
; CHECK-NEXT: vsra.vi v26, v26, 1
; CHECK-NEXT: vadd.vv v8, v26, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vdiv_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vdiv_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vdiv.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 4 x i64> @vdiv_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
; CHECK-LABEL: vdiv_vi_nxv4i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: lui a0, 748983
; CHECK-NEXT: addi a0, a0, -586
-; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a0
-; CHECK-NEXT: lui a1, 898779
-; CHECK-NEXT: addi a1, a1, 1755
-; CHECK-NEXT: vmv.v.x v12, a1
-; CHECK-NEXT: vsll.vx v12, v12, a0
-; CHECK-NEXT: vsrl.vx v12, v12, a0
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: lui a0, 898779
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vmulh.vv v28, v8, v28
; CHECK-NEXT: addi a0, zero, 63
; CHECK-NEXT: vsrl.vx v8, v28, a0
; CHECK-NEXT: vsra.vi v28, v28, 1
; CHECK-NEXT: vadd.vv v8, v28, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vdiv_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vdiv_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vdiv.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i64> @vdiv_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
; CHECK-LABEL: vdiv_vi_nxv8i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: lui a0, 748983
; CHECK-NEXT: addi a0, a0, -586
-; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a0
-; CHECK-NEXT: lui a1, 898779
-; CHECK-NEXT: addi a1, a1, 1755
-; CHECK-NEXT: vmv.v.x v24, a1
-; CHECK-NEXT: vsll.vx v24, v24, a0
-; CHECK-NEXT: vsrl.vx v24, v24, a0
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: lui a0, 898779
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmulh.vv v8, v8, v16
; CHECK-NEXT: addi a0, zero, 63
; CHECK-NEXT: vsrl.vx v16, v8, a0
; CHECK-NEXT: vsra.vi v8, v8, 1
; CHECK-NEXT: vadd.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vdivu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vdivu_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vdivu.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 1 x i64> @vdivu_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
; CHECK-LABEL: vdivu_vi_nxv1i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.i v25, -7
; CHECK-NEXT: lui a0, 131072
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a0
-; CHECK-NEXT: vmv.v.i v27, 1
-; CHECK-NEXT: vsll.vx v27, v27, a0
-; CHECK-NEXT: vsrl.vx v27, v27, a0
-; CHECK-NEXT: vor.vv v26, v27, v26
-; CHECK-NEXT: vmulhu.vv v26, v8, v26
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
+; CHECK-NEXT: vmv.v.i v26, -7
+; CHECK-NEXT: vmulhu.vv v25, v8, v25
; CHECK-NEXT: addi a0, zero, 61
-; CHECK-NEXT: vsrl.vx v26, v26, a0
-; CHECK-NEXT: vmseq.vi v0, v25, 1
-; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0
+; CHECK-NEXT: vsrl.vx v25, v25, a0
+; CHECK-NEXT: vmseq.vi v0, v26, 1
+; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vdivu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vdivu_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vdivu.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 2 x i64> @vdivu_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
; CHECK-LABEL: vdivu_vi_nxv2i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.i v26, -7
; CHECK-NEXT: lui a0, 131072
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a0
-; CHECK-NEXT: vmv.v.i v30, 1
-; CHECK-NEXT: vsll.vx v30, v30, a0
-; CHECK-NEXT: vsrl.vx v30, v30, a0
-; CHECK-NEXT: vor.vv v28, v30, v28
-; CHECK-NEXT: vmulhu.vv v28, v8, v28
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
+; CHECK-NEXT: vmv.v.i v28, -7
+; CHECK-NEXT: vmulhu.vv v26, v8, v26
; CHECK-NEXT: addi a0, zero, 61
-; CHECK-NEXT: vsrl.vx v28, v28, a0
-; CHECK-NEXT: vmseq.vi v0, v26, 1
-; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0
+; CHECK-NEXT: vsrl.vx v26, v26, a0
+; CHECK-NEXT: vmseq.vi v0, v28, 1
+; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vdivu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vdivu_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vdivu.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 4 x i64> @vdivu_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
; CHECK-LABEL: vdivu_vi_nxv4i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.i v28, -7
; CHECK-NEXT: lui a0, 131072
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v12, v12, a0
-; CHECK-NEXT: vmv.v.i v16, 1
-; CHECK-NEXT: vsll.vx v16, v16, a0
-; CHECK-NEXT: vsrl.vx v16, v16, a0
-; CHECK-NEXT: vor.vv v12, v16, v12
-; CHECK-NEXT: vmulhu.vv v12, v8, v12
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
+; CHECK-NEXT: vmv.v.i v12, -7
+; CHECK-NEXT: vmulhu.vv v28, v8, v28
; CHECK-NEXT: addi a0, zero, 61
-; CHECK-NEXT: vsrl.vx v12, v12, a0
-; CHECK-NEXT: vmseq.vi v0, v28, 1
-; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT: vsrl.vx v28, v28, a0
+; CHECK-NEXT: vmseq.vi v0, v12, 1
+; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vdivu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vdivu_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vdivu.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i64> @vdivu_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
; CHECK-LABEL: vdivu_vi_nxv8i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.i v16, -7
; CHECK-NEXT: lui a0, 131072
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v24, v24, a0
-; CHECK-NEXT: vmv.v.i v0, 1
-; CHECK-NEXT: vsll.vx v0, v0, a0
-; CHECK-NEXT: vsrl.vx v0, v0, a0
-; CHECK-NEXT: vor.vv v24, v0, v24
-; CHECK-NEXT: vmulhu.vv v24, v8, v24
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
+; CHECK-NEXT: vmv.v.i v24, -7
+; CHECK-NEXT: vmulhu.vv v16, v8, v16
; CHECK-NEXT: addi a0, zero, 61
-; CHECK-NEXT: vsrl.vx v24, v24, a0
-; CHECK-NEXT: vmseq.vi v0, v16, 1
-; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
+; CHECK-NEXT: vsrl.vx v16, v16, a0
+; CHECK-NEXT: vmseq.vi v0, v24, 1
+; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vmax_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vmax_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vmax.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vmax_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vmax_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vmax.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vmax_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vmax_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vmax.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vmax_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vmax_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmax.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vmax_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vmax_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vmaxu.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vmax_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vmax_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vmaxu.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vmax_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vmax_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vmaxu.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vmax_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vmax_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmaxu.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vmin_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vmin_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vmin.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vmin_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vmin_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vmin.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vmin_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vmin_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vmin.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vmin_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vmin_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmin.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vmin_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vmin_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vminu.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vmin_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vmin_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vminu.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vmin_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vmin_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vminu.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vmin_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vmin_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vminu.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vmul_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vmul_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vmul.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vmul_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vmul_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vmul.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vmul_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vmul_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vmul.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vmul_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vmul_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vor_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vor_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vor.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vor_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vor_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vor.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vor_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vor_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vor.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vor_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vor_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define i64 @vreduce_smax_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: vreduce_smax_nxv1i64:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a0
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.i v26, 0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: sw zero, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vmv.x.s a0, v25
+; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; CHECK-NEXT: vsrl.vx v25, v25, a1
; CHECK-NEXT: vmv.x.s a1, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> %v)
ret i64 %red
define i64 @vreduce_smin_nxv1i64(<vscale x 1 x i64> %v) {
; CHECK-LABEL: vreduce_smin_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.i v25, -1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vsrl.vx v25, v25, a1
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: addi a0, zero, -1
+; CHECK-NEXT: sw a0, 8(sp)
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v25, v26
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vmv.x.s a0, v25
+; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; CHECK-NEXT: vsrl.vx v25, v25, a1
; CHECK-NEXT: vmv.x.s a1, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> %v)
ret i64 %red
define i64 @vreduce_smax_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: vreduce_smax_nxv2i64:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a0
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.i v26, 0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: sw zero, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
+; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; CHECK-NEXT: vsrl.vx v25, v25, a1
; CHECK-NEXT: vmv.x.s a1, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %v)
ret i64 %red
define i64 @vreduce_smin_nxv2i64(<vscale x 2 x i64> %v) {
; CHECK-LABEL: vreduce_smin_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.i v25, -1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vsrl.vx v25, v25, a1
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: addi a0, zero, -1
+; CHECK-NEXT: sw a0, 8(sp)
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v25, v26
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
+; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; CHECK-NEXT: vsrl.vx v25, v25, a1
; CHECK-NEXT: vmv.x.s a1, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %v)
ret i64 %red
define i64 @vreduce_smax_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: vreduce_smax_nxv4i64:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a0
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.i v26, 0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: sw zero, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredmax.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
+; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; CHECK-NEXT: vsrl.vx v25, v25, a1
; CHECK-NEXT: vmv.x.s a1, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v)
ret i64 %red
define i64 @vreduce_smin_nxv4i64(<vscale x 4 x i64> %v) {
; CHECK-LABEL: vreduce_smin_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.i v25, -1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vsrl.vx v25, v25, a1
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: addi a0, zero, -1
+; CHECK-NEXT: sw a0, 8(sp)
; CHECK-NEXT: lui a0, 524288
; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v25, v26
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
; CHECK-NEXT: vredmin.vs v25, v8, v25
; CHECK-NEXT: vsetvli zero, zero, e64,m1,ta,mu
; CHECK-NEXT: vmv.x.s a0, v25
+; CHECK-NEXT: addi a1, zero, 32
; CHECK-NEXT: vsetivli a2, 1, e64,m1,ta,mu
; CHECK-NEXT: vsrl.vx v25, v25, a1
; CHECK-NEXT: vmv.x.s a1, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%red = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v)
ret i64 %red
define <vscale x 1 x i64> @vrem_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vrem_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vrem.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 1 x i64> @vrem_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
; CHECK-LABEL: vrem_vi_nxv1i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: lui a0, 748983
; CHECK-NEXT: addi a0, a0, -586
-; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a0
-; CHECK-NEXT: lui a1, 898779
-; CHECK-NEXT: addi a1, a1, 1755
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: vsll.vx v26, v26, a0
-; CHECK-NEXT: vsrl.vx v26, v26, a0
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: lui a0, 898779
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vmulh.vv v25, v8, v25
; CHECK-NEXT: addi a0, zero, 63
; CHECK-NEXT: vsrl.vx v26, v25, a0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vrem_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vrem_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vrem.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 2 x i64> @vrem_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
; CHECK-LABEL: vrem_vi_nxv2i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: lui a0, 748983
; CHECK-NEXT: addi a0, a0, -586
-; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a0
-; CHECK-NEXT: lui a1, 898779
-; CHECK-NEXT: addi a1, a1, 1755
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: vsll.vx v28, v28, a0
-; CHECK-NEXT: vsrl.vx v28, v28, a0
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: lui a0, 898779
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vmulh.vv v26, v8, v26
; CHECK-NEXT: addi a0, zero, 63
; CHECK-NEXT: vsrl.vx v28, v26, a0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v26, v26, a0
; CHECK-NEXT: vsub.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vrem_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vrem_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vrem.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 4 x i64> @vrem_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
; CHECK-LABEL: vrem_vi_nxv4i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: lui a0, 748983
; CHECK-NEXT: addi a0, a0, -586
-; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a0
-; CHECK-NEXT: lui a1, 898779
-; CHECK-NEXT: addi a1, a1, 1755
-; CHECK-NEXT: vmv.v.x v12, a1
-; CHECK-NEXT: vsll.vx v12, v12, a0
-; CHECK-NEXT: vsrl.vx v12, v12, a0
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: lui a0, 898779
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vmulh.vv v28, v8, v28
; CHECK-NEXT: addi a0, zero, 63
; CHECK-NEXT: vsrl.vx v12, v28, a0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v28, v28, a0
; CHECK-NEXT: vsub.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vrem_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vrem_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vrem.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i64> @vrem_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
; CHECK-LABEL: vrem_vi_nxv8i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: lui a0, 748983
; CHECK-NEXT: addi a0, a0, -586
-; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a0
-; CHECK-NEXT: lui a1, 898779
-; CHECK-NEXT: addi a1, a1, 1755
-; CHECK-NEXT: vmv.v.x v24, a1
-; CHECK-NEXT: vsll.vx v24, v24, a0
-; CHECK-NEXT: vsrl.vx v24, v24, a0
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: lui a0, 898779
+; CHECK-NEXT: addi a0, a0, 1755
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmulh.vv v16, v8, v16
; CHECK-NEXT: addi a0, zero, 63
; CHECK-NEXT: vsrl.vx v24, v16, a0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v16, v16, a0
; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vremu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vremu_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vremu.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 1 x i64> @vremu_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
; CHECK-LABEL: vremu_vi_nxv1i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.i v25, -7
; CHECK-NEXT: lui a0, 131072
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a0
-; CHECK-NEXT: vmv.v.i v27, 1
-; CHECK-NEXT: vsll.vx v27, v27, a0
-; CHECK-NEXT: vsrl.vx v27, v27, a0
-; CHECK-NEXT: vor.vv v26, v27, v26
-; CHECK-NEXT: vmulhu.vv v26, v8, v26
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
+; CHECK-NEXT: vmv.v.i v26, -7
+; CHECK-NEXT: vmulhu.vv v25, v8, v25
; CHECK-NEXT: addi a0, zero, 61
-; CHECK-NEXT: vsrl.vx v26, v26, a0
-; CHECK-NEXT: vmseq.vi v0, v25, 1
-; CHECK-NEXT: vmerge.vvm v25, v26, v8, v0
+; CHECK-NEXT: vsrl.vx v25, v25, a0
+; CHECK-NEXT: vmseq.vi v0, v26, 1
+; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v25, v25, a0
; CHECK-NEXT: vsub.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vremu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vremu_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vremu.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 2 x i64> @vremu_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
; CHECK-LABEL: vremu_vi_nxv2i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.i v26, -7
; CHECK-NEXT: lui a0, 131072
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a0
-; CHECK-NEXT: vmv.v.i v30, 1
-; CHECK-NEXT: vsll.vx v30, v30, a0
-; CHECK-NEXT: vsrl.vx v30, v30, a0
-; CHECK-NEXT: vor.vv v28, v30, v28
-; CHECK-NEXT: vmulhu.vv v28, v8, v28
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
+; CHECK-NEXT: vmv.v.i v28, -7
+; CHECK-NEXT: vmulhu.vv v26, v8, v26
; CHECK-NEXT: addi a0, zero, 61
-; CHECK-NEXT: vsrl.vx v28, v28, a0
-; CHECK-NEXT: vmseq.vi v0, v26, 1
-; CHECK-NEXT: vmerge.vvm v26, v28, v8, v0
+; CHECK-NEXT: vsrl.vx v26, v26, a0
+; CHECK-NEXT: vmseq.vi v0, v28, 1
+; CHECK-NEXT: vmerge.vvm v26, v26, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v26, v26, a0
; CHECK-NEXT: vsub.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vremu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vremu_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vremu.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 4 x i64> @vremu_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
; CHECK-LABEL: vremu_vi_nxv4i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.i v28, -7
; CHECK-NEXT: lui a0, 131072
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v12, v12, a0
-; CHECK-NEXT: vmv.v.i v16, 1
-; CHECK-NEXT: vsll.vx v16, v16, a0
-; CHECK-NEXT: vsrl.vx v16, v16, a0
-; CHECK-NEXT: vor.vv v12, v16, v12
-; CHECK-NEXT: vmulhu.vv v12, v8, v12
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
+; CHECK-NEXT: vmv.v.i v12, -7
+; CHECK-NEXT: vmulhu.vv v28, v8, v28
; CHECK-NEXT: addi a0, zero, 61
-; CHECK-NEXT: vsrl.vx v12, v12, a0
-; CHECK-NEXT: vmseq.vi v0, v28, 1
-; CHECK-NEXT: vmerge.vvm v28, v12, v8, v0
+; CHECK-NEXT: vsrl.vx v28, v28, a0
+; CHECK-NEXT: vmseq.vi v0, v12, 1
+; CHECK-NEXT: vmerge.vvm v28, v28, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v28, v28, a0
; CHECK-NEXT: vsub.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vremu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vremu_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vremu.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i64> @vremu_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
; CHECK-LABEL: vremu_vi_nxv8i64_0:
; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.i v16, -7
; CHECK-NEXT: lui a0, 131072
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: addi a0, zero, 32
-; CHECK-NEXT: vsll.vx v24, v24, a0
-; CHECK-NEXT: vmv.v.i v0, 1
-; CHECK-NEXT: vsll.vx v0, v0, a0
-; CHECK-NEXT: vsrl.vx v0, v0, a0
-; CHECK-NEXT: vor.vv v24, v0, v24
-; CHECK-NEXT: vmulhu.vv v24, v8, v24
+; CHECK-NEXT: sw a0, 12(sp)
+; CHECK-NEXT: addi a0, zero, 1
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
+; CHECK-NEXT: vmv.v.i v24, -7
+; CHECK-NEXT: vmulhu.vv v16, v8, v16
; CHECK-NEXT: addi a0, zero, 61
-; CHECK-NEXT: vsrl.vx v24, v24, a0
-; CHECK-NEXT: vmseq.vi v0, v16, 1
-; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0
+; CHECK-NEXT: vsrl.vx v16, v16, a0
+; CHECK-NEXT: vmseq.vi v0, v24, 1
+; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0
; CHECK-NEXT: addi a0, zero, -7
; CHECK-NEXT: vmul.vx v16, v16, a0
; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 -7, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vrsub_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vrsub_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vsub.vv v8, v25, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vrsub_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vrsub_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vsub.vv v8, v26, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vrsub_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vrsub_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vsub.vv v8, v28, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vrsub_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vrsub_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vsub.vv v8, v16, v8
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vmerge_xv_nxv1i64(<vscale x 1 x i64> %va, i64 %b, <vscale x 1 x i1> %cond) {
; CHECK-LABEL: vmerge_xv_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vmerge.vvm v8, v8, v25, v0
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vmerge_xv_nxv2i64(<vscale x 2 x i64> %va, i64 %b, <vscale x 2 x i1> %cond) {
; CHECK-LABEL: vmerge_xv_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vmerge.vvm v8, v8, v26, v0
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vmerge_xv_nxv4i64(<vscale x 4 x i64> %va, i64 %b, <vscale x 4 x i1> %cond) {
; CHECK-LABEL: vmerge_xv_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vmerge.vvm v8, v8, v28, v0
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vmerge_xv_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vscale x 8 x i1> %cond) {
; CHECK-LABEL: vmerge_xv_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vshl_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vshl_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vsll.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vshl_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vshl_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vsll.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vshl_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vshl_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vsll.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vshl_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vshl_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vsll.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 8 x i64> @vsplat_nxv8i64_4() {
; RV32V-LABEL: vsplat_nxv8i64_4:
; RV32V: # %bb.0:
+; RV32V-NEXT: addi sp, sp, -16
+; RV32V-NEXT: .cfi_def_cfa_offset 16
+; RV32V-NEXT: sw zero, 12(sp)
; RV32V-NEXT: lui a0, 1028096
; RV32V-NEXT: addi a0, a0, -1281
-; RV32V-NEXT: vsetvli a1, zero, e64,m8,ta,mu
-; RV32V-NEXT: vmv.v.x v8, a0
-; RV32V-NEXT: addi a0, zero, 32
-; RV32V-NEXT: vsll.vx v8, v8, a0
-; RV32V-NEXT: vsrl.vx v8, v8, a0
+; RV32V-NEXT: sw a0, 8(sp)
+; RV32V-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32V-NEXT: addi a0, sp, 8
+; RV32V-NEXT: vlse64.v v8, (a0), zero
+; RV32V-NEXT: addi sp, sp, 16
; RV32V-NEXT: ret
;
; RV64V-LABEL: vsplat_nxv8i64_4:
define <vscale x 8 x i64> @vsplat_nxv8i64_5(i64 %a) {
; RV32V-LABEL: vsplat_nxv8i64_5:
; RV32V: # %bb.0:
-; RV32V-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; RV32V-NEXT: vmv.v.x v8, a1
-; RV32V-NEXT: addi a1, zero, 32
-; RV32V-NEXT: vsll.vx v8, v8, a1
-; RV32V-NEXT: vmv.v.x v16, a0
-; RV32V-NEXT: vsll.vx v16, v16, a1
-; RV32V-NEXT: vsrl.vx v16, v16, a1
-; RV32V-NEXT: vor.vv v8, v16, v8
+; RV32V-NEXT: addi sp, sp, -16
+; RV32V-NEXT: .cfi_def_cfa_offset 16
+; RV32V-NEXT: sw a1, 12(sp)
+; RV32V-NEXT: sw a0, 8(sp)
+; RV32V-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32V-NEXT: addi a0, sp, 8
+; RV32V-NEXT: vlse64.v v8, (a0), zero
+; RV32V-NEXT: addi sp, sp, 16
; RV32V-NEXT: ret
;
; RV64V-LABEL: vsplat_nxv8i64_5:
define <vscale x 8 x i64> @vadd_vx_nxv8i64_10(<vscale x 8 x i64> %v) {
; RV32V-LABEL: vadd_vx_nxv8i64_10:
; RV32V: # %bb.0:
+; RV32V-NEXT: addi sp, sp, -16
+; RV32V-NEXT: .cfi_def_cfa_offset 16
+; RV32V-NEXT: sw zero, 12(sp)
; RV32V-NEXT: lui a0, 1028096
; RV32V-NEXT: addi a0, a0, -1281
-; RV32V-NEXT: vsetvli a1, zero, e64,m8,ta,mu
-; RV32V-NEXT: vmv.v.x v16, a0
-; RV32V-NEXT: addi a0, zero, 32
-; RV32V-NEXT: vsll.vx v16, v16, a0
-; RV32V-NEXT: vsrl.vx v16, v16, a0
+; RV32V-NEXT: sw a0, 8(sp)
+; RV32V-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32V-NEXT: addi a0, sp, 8
+; RV32V-NEXT: vlse64.v v16, (a0), zero
; RV32V-NEXT: vadd.vv v8, v8, v16
+; RV32V-NEXT: addi sp, sp, 16
; RV32V-NEXT: ret
;
; RV64V-LABEL: vadd_vx_nxv8i64_10:
define <vscale x 8 x i64> @vadd_vx_nxv8i64_11(<vscale x 8 x i64> %v) {
; RV32V-LABEL: vadd_vx_nxv8i64_11:
; RV32V: # %bb.0:
+; RV32V-NEXT: addi sp, sp, -16
+; RV32V-NEXT: .cfi_def_cfa_offset 16
+; RV32V-NEXT: addi a0, zero, 1
+; RV32V-NEXT: sw a0, 12(sp)
+; RV32V-NEXT: lui a0, 1028096
+; RV32V-NEXT: addi a0, a0, -1281
+; RV32V-NEXT: sw a0, 8(sp)
; RV32V-NEXT: vsetvli a0, zero, e64,m8,ta,mu
-; RV32V-NEXT: vmv.v.i v16, 1
-; RV32V-NEXT: addi a0, zero, 32
-; RV32V-NEXT: vsll.vx v16, v16, a0
-; RV32V-NEXT: lui a1, 1028096
-; RV32V-NEXT: addi a1, a1, -1281
-; RV32V-NEXT: vmv.v.x v24, a1
-; RV32V-NEXT: vsll.vx v24, v24, a0
-; RV32V-NEXT: vsrl.vx v24, v24, a0
-; RV32V-NEXT: vor.vv v16, v24, v16
+; RV32V-NEXT: addi a0, sp, 8
+; RV32V-NEXT: vlse64.v v16, (a0), zero
; RV32V-NEXT: vadd.vv v8, v8, v16
+; RV32V-NEXT: addi sp, sp, 16
; RV32V-NEXT: ret
;
; RV64V-LABEL: vadd_vx_nxv8i64_11:
define <vscale x 8 x i64> @vadd_vx_nxv8i64_12(<vscale x 8 x i64> %v, i64 %a) {
; RV32V-LABEL: vadd_vx_nxv8i64_12:
; RV32V: # %bb.0:
-; RV32V-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; RV32V-NEXT: vmv.v.x v16, a1
-; RV32V-NEXT: addi a1, zero, 32
-; RV32V-NEXT: vsll.vx v16, v16, a1
-; RV32V-NEXT: vmv.v.x v24, a0
-; RV32V-NEXT: vsll.vx v24, v24, a1
-; RV32V-NEXT: vsrl.vx v24, v24, a1
-; RV32V-NEXT: vor.vv v16, v24, v16
+; RV32V-NEXT: addi sp, sp, -16
+; RV32V-NEXT: .cfi_def_cfa_offset 16
+; RV32V-NEXT: sw a1, 12(sp)
+; RV32V-NEXT: sw a0, 8(sp)
+; RV32V-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32V-NEXT: addi a0, sp, 8
+; RV32V-NEXT: vlse64.v v16, (a0), zero
; RV32V-NEXT: vadd.vv v8, v8, v16
+; RV32V-NEXT: addi sp, sp, 16
; RV32V-NEXT: ret
;
; RV64V-LABEL: vadd_vx_nxv8i64_12:
define <vscale x 8 x i64> @vsplat_nxv8i64_14(i32 %a) {
; RV32V-LABEL: vsplat_nxv8i64_14:
; RV32V: # %bb.0:
-; RV32V-NEXT: vsetvli a1, zero, e64,m8,ta,mu
-; RV32V-NEXT: vmv.v.x v8, a0
-; RV32V-NEXT: addi a0, zero, 32
-; RV32V-NEXT: vsll.vx v8, v8, a0
-; RV32V-NEXT: vsrl.vx v8, v8, a0
+; RV32V-NEXT: addi sp, sp, -16
+; RV32V-NEXT: .cfi_def_cfa_offset 16
+; RV32V-NEXT: sw zero, 12(sp)
+; RV32V-NEXT: sw a0, 8(sp)
+; RV32V-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; RV32V-NEXT: addi a0, sp, 8
+; RV32V-NEXT: vlse64.v v8, (a0), zero
+; RV32V-NEXT: addi sp, sp, 16
; RV32V-NEXT: ret
;
; RV64V-LABEL: vsplat_nxv8i64_14:
define <vscale x 1 x i64> @vsra_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vsra_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vsra.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vsra_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vsra_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vsra.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vsra_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vsra_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vsra.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vsra_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vsra_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vsra.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vsrl_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vsrl_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vsrl.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vsrl_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vsrl_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vsrl.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vsrl_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vsrl_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vsrl.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vsrl_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vsrl_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vsrl.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vsub_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vsub_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vsub.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vsub_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vsub_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vsub.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vsub_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vsub_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vsub.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vsub_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vsub_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vsub.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
define <vscale x 1 x i64> @vxor_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vxor_vx_nxv1i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m1,ta,mu
-; CHECK-NEXT: vmv.v.x v25, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v25, v25, a1
-; CHECK-NEXT: vmv.v.x v26, a0
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vsrl.vx v26, v26, a1
-; CHECK-NEXT: vor.vv v25, v26, v25
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v25, (a0), zero
; CHECK-NEXT: vxor.vv v8, v8, v25
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
define <vscale x 2 x i64> @vxor_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vxor_vx_nxv2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m2,ta,mu
-; CHECK-NEXT: vmv.v.x v26, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v26, v26, a1
-; CHECK-NEXT: vmv.v.x v28, a0
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vsrl.vx v28, v28, a1
-; CHECK-NEXT: vor.vv v26, v28, v26
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v26, (a0), zero
; CHECK-NEXT: vxor.vv v8, v8, v26
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
define <vscale x 4 x i64> @vxor_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vxor_vx_nxv4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m4,ta,mu
-; CHECK-NEXT: vmv.v.x v28, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v28, v28, a1
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsll.vx v12, v12, a1
-; CHECK-NEXT: vsrl.vx v12, v12, a1
-; CHECK-NEXT: vor.vv v28, v12, v28
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v28, (a0), zero
; CHECK-NEXT: vxor.vv v8, v8, v28
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
define <vscale x 8 x i64> @vxor_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vxor_vx_nxv8i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
-; CHECK-NEXT: vmv.v.x v16, a1
-; CHECK-NEXT: addi a1, zero, 32
-; CHECK-NEXT: vsll.vx v16, v16, a1
-; CHECK-NEXT: vmv.v.x v24, a0
-; CHECK-NEXT: vsll.vx v24, v24, a1
-; CHECK-NEXT: vsrl.vx v24, v24, a1
-; CHECK-NEXT: vor.vv v16, v24, v16
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sw a1, 12(sp)
+; CHECK-NEXT: sw a0, 8(sp)
+; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu
+; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: vlse64.v v16, (a0), zero
; CHECK-NEXT: vxor.vv v8, v8, v16
+; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> undef, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer