From 46dee4a3a3dfb372a0eaa0b4490c80be2f421f29 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 3 Jan 2023 14:34:28 -0800 Subject: [PATCH] [RISCV][InsertVSETVLI] Split out demanded property for zero/non-zero of VL The scalar move instructions (vmv.s.x, and fvmv.s.f) depend solely on whether the VL is 0 or non-zero. By tracking the fact we only demand the zeroness and not the whole VL value, we can allow changing VL over a scalar move. This helps to eliminate vsetvli toggles. Differential Revision: https://reviews.llvm.org/D140157 --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 67 ++++++++++++++++------ .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 3 +- .../RISCV/rvv/fixed-vectors-int-shuffles.ll | 9 +-- .../RISCV/rvv/fixed-vectors-mask-buildvec.ll | 12 ++-- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 6 +- .../RISCV/rvv/fixed-vectors-reduction-fp.ll | 3 +- .../RISCV/rvv/fixed-vectors-reduction-int.ll | 12 ++-- llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll | 27 +++------ .../CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll | 48 ++++++---------- llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll | 57 ++++++------------ 10 files changed, 110 insertions(+), 134 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index bc36d06..361bcc4 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -126,7 +126,11 @@ static bool isMaskRegOp(const MachineInstr &MI) { /// Which subfields of VL or VTYPE have values we need to preserve? struct DemandedFields { - bool VL = false; + // Some unknown property of VL is used. If demanded, must preserve entire + // value. + bool VLAny = false; + // Only zero vs non-zero is used. If demanded, can change non-zero values. + bool VLZeroness = false; bool SEW = false; bool LMUL = false; bool SEWLMULRatio = false; @@ -138,6 +142,11 @@ struct DemandedFields { return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; } + // Return true if any property of VL was used + bool usedVL() { + return VLAny || VLZeroness; + } + // Mark all VTYPE subfields and properties as demanded void demandVTYPE() { SEW = true; @@ -146,6 +155,13 @@ struct DemandedFields { TailPolicy = true; MaskPolicy = true; } + + // Mark all VL properties as demanded + void demandVL() { + VLAny = true; + VLZeroness = true; + } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Support for debugging, callable in GDB: V->dump() LLVM_DUMP_METHOD void dump() const { @@ -156,7 +172,8 @@ struct DemandedFields { /// Implement operator<<. void print(raw_ostream &OS) const { OS << "{"; - OS << "VL=" << VL << ", "; + OS << "VLAny=" << VLAny << ", "; + OS << "VLZeroness=" << VLZeroness << ", "; OS << "SEW=" << SEW << ", "; OS << "LMUL=" << LMUL << ", "; OS << "SEWLMULRatio=" << SEWLMULRatio << ", "; @@ -219,7 +236,7 @@ static DemandedFields getDemanded(const MachineInstr &MI) { DemandedFields Res; // Start conservative if registers are used if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL)) - Res.VL = true; + Res.demandVL();; if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE)) Res.demandVTYPE(); // Start conservative on the unlowered form too @@ -227,7 +244,7 @@ static DemandedFields getDemanded(const MachineInstr &MI) { if (RISCVII::hasSEWOp(TSFlags)) { Res.demandVTYPE(); if (RISCVII::hasVLOp(TSFlags)) - Res.VL = true; + Res.demandVL(); // Behavior is independent of mask policy. if (!RISCVII::usesMaskPolicy(TSFlags)) @@ -264,6 +281,7 @@ static DemandedFields getDemanded(const MachineInstr &MI) { if (isScalarMoveInstr(MI)) { Res.LMUL = false; Res.SEWLMULRatio = false; + Res.VLAny = false; } return Res; @@ -436,8 +454,12 @@ public: if (SEW == Require.SEW) return true; - if (Used.VL && !hasSameAVL(Require)) + if (Used.VLAny && !hasSameAVL(Require)) + return false; + + if (Used.VLZeroness && !hasEquallyZeroAVL(Require)) return false; + return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used); } @@ -787,12 +809,10 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, DemandedFields Used = getDemanded(MI); - // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. - if (isScalarMoveInstr(MI) && CurInfo.hasEquallyZeroAVL(Require)) { - Used.VL = false; - // Additionally, if writing to an implicit_def operand, we don't need to - // preserve any other bits and are thus compatible with any larger etype, - // and can disregard policy bits. Warning: It's tempting to try doing + if (isScalarMoveInstr(MI)) { + // For vmv.s.x and vfmv.s.f, if writing to an implicit_def operand, we don't + // need to preserve any other bits and are thus compatible with any larger, + // etype and can disregard policy bits. Warning: It's tempting to try doing // this for any tail agnostic operation, but we can't as TA requires // tail lanes to either be the original value or -1. We are writing // unknown bits to the lanes here. @@ -1204,7 +1224,8 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { } static void doUnion(DemandedFields &A, DemandedFields B) { - A.VL |= B.VL; + A.VLAny |= B.VLAny; + A.VLZeroness |= B.VLZeroness; A.SEW |= B.SEW; A.LMUL |= B.LMUL; A.SEWLMULRatio |= B.SEWLMULRatio; @@ -1212,6 +1233,13 @@ static void doUnion(DemandedFields &A, DemandedFields B) { A.MaskPolicy |= B.MaskPolicy; } +static bool isNonZeroAVL(const MachineOperand &MO) { + if (MO.isReg()) + return RISCV::X0 == MO.getReg(); + assert(MO.isImm()); + return 0 != MO.getImm(); +} + // Return true if we can mutate PrevMI to match MI without changing any the // fields which would be observed. static bool canMutatePriorConfig(const MachineInstr &PrevMI, @@ -1221,13 +1249,20 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI, // demanded, or b) we can't rewrite the former to be the later for // implementation reasons. if (!isVLPreservingConfig(MI)) { - if (Used.VL) + if (Used.VLAny) return false; // TODO: Requires more care in the mutation... if (isVLPreservingConfig(PrevMI)) return false; + // We don't bother to handle the equally zero case here as it's largely + // uninteresting. + if (Used.VLZeroness && + (!isNonZeroAVL(MI.getOperand(1)) || + !isNonZeroAVL(PrevMI.getOperand(1)))) + return false; + // TODO: Track whether the register is defined between // PrevMI and MI. if (MI.getOperand(1).isReg() && @@ -1255,7 +1290,7 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { // We can have arbitrary code in successors, so VL and VTYPE // must be considered demanded. DemandedFields Used; - Used.VL = true; + Used.demandVL(); Used.demandVTYPE(); SmallVector ToDelete; for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { @@ -1268,10 +1303,10 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { Register VRegDef = MI.getOperand(0).getReg(); if (VRegDef != RISCV::X0 && !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef))) - Used.VL = true; + Used.demandVL(); if (NextMI) { - if (!Used.VL && !Used.usedVTYPE()) { + if (!Used.usedVL() && !Used.usedVTYPE()) { ToDelete.push_back(&MI); // Leave NextMI unchanged continue; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index 767dbc0..fb753f5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -467,9 +467,8 @@ define void @buildvec_seq_v9i8(ptr %x) { ; RV32-NEXT: li a1, 3 ; RV32-NEXT: sb a1, 8(a0) ; RV32-NEXT: li a1, 73 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vmv.v.i v9, 2 ; RV32-NEXT: li a1, 36 ; RV32-NEXT: vmv.s.x v8, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 8e0167c..3616128 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -201,9 +201,8 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV64-LABEL: vrgather_shuffle_vv_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: li a0, 5 -; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v16, a0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vmv.s.x v16, a0 ; RV64-NEXT: vmv.v.i v20, 2 ; RV64-NEXT: lui a0, %hi(.LCPI11_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI11_0) @@ -386,9 +385,8 @@ define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 66 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -420,9 +418,8 @@ define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i0we4: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 67 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index a812c0a..6b7f581 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -166,9 +166,8 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 3 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.s.x v0, a2 ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -178,9 +177,8 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) { ; ZVE32F-LABEL: buildvec_mask_nonconst_v4i1: ; ZVE32F: # %bb.0: ; ZVE32F-NEXT: li a2, 3 -; ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32F-NEXT: vmv.s.x v0, a2 ; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32F-NEXT: vmv.s.x v0, a2 ; ZVE32F-NEXT: vmv.v.x v8, a1 ; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; ZVE32F-NEXT: vand.vi v8, v8, 1 @@ -295,9 +293,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 19 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a2 ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -307,9 +304,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { ; ZVE32F-LABEL: buildvec_mask_nonconst_v8i1: ; ZVE32F: # %bb.0: ; ZVE32F-NEXT: li a2, 19 -; ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32F-NEXT: vmv.s.x v0, a2 ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; ZVE32F-NEXT: vmv.s.x v0, a2 ; ZVE32F-NEXT: vmv.v.x v8, a1 ; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; ZVE32F-NEXT: vand.vi v8, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 11b059a..be781a9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -2530,9 +2530,8 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB34_16: # %cond.load19 ; RV64ZVE32F-NEXT: ld a0, 56(a0) ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vmv.s.x v10, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7 ; RV64ZVE32F-NEXT: ret %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) @@ -8445,9 +8444,8 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas ; RV64ZVE32F-NEXT: .LBB73_16: # %cond.load19 ; RV64ZVE32F-NEXT: ld a0, 56(a0) ; RV64ZVE32F-NEXT: flw ft0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7 ; RV64ZVE32F-NEXT: ret %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index 0515d61..d6e4d87 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1110,9 +1110,8 @@ define double @vreduce_fwadd_v32f64(ptr %x, double %s) { ; CHECK-NEXT: vslidedown.vi v16, v8, 16 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: vfredusum.vs v8, v24, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll index 4a0b023..45e91c5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -1435,9 +1435,8 @@ define i64 @vwreduce_add_v32i64(ptr %x) { ; RV32-NEXT: vslidedown.vi v16, v8, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vwadd.vv v24, v8, v16 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v8, zero ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vmv.s.x v8, zero ; RV32-NEXT: vredsum.vs v8, v24, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1454,9 +1453,8 @@ define i64 @vwreduce_add_v32i64(ptr %x) { ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwadd.vv v24, v8, v16 -; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v8, zero ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vmv.s.x v8, zero ; RV64-NEXT: vredsum.vs v8, v24, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1476,9 +1474,8 @@ define i64 @vwreduce_uadd_v32i64(ptr %x) { ; RV32-NEXT: vslidedown.vi v16, v8, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vwaddu.vv v24, v8, v16 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v8, zero ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vmv.s.x v8, zero ; RV32-NEXT: vredsum.vs v8, v24, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1495,9 +1492,8 @@ define i64 @vwreduce_uadd_v32i64(ptr %x) { ; RV64-NEXT: vslidedown.vi v16, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwaddu.vv v24, v8, v16 -; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v8, zero ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vmv.s.x v8, zero ; RV64-NEXT: vredsum.vs v8, v24, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll index 0141d00..2326db8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll @@ -4,9 +4,8 @@ define i64 @reduce_add(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_add: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -34,9 +33,8 @@ entry: define i64 @reduce_and(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_and: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vredand.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -64,9 +62,8 @@ entry: define i64 @reduce_or(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_or: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vredor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -94,9 +91,8 @@ entry: define i64 @reduce_xor(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_xor: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -109,9 +105,8 @@ entry: define i64 @reduce_xor2(<4 x i64> %v) { ; CHECK-LABEL: reduce_xor2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: andi a0, a0, 8 @@ -125,9 +120,8 @@ entry: define i64 @reduce_umax(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_umax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -155,9 +149,8 @@ entry: define i64 @reduce_umin(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_umin: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vredminu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -185,9 +178,8 @@ entry: define i64 @reduce_smax(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_smax: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vredmax.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -215,9 +207,8 @@ entry: define i64 @reduce_smin(i64 %x, <4 x i64> %v) { ; CHECK-LABEL: reduce_smin: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vredmin.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll index 9722980..aafa5de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -199,9 +199,8 @@ declare float @llvm.vector.reduce.fadd.nxv4f32(float, ) define float @vreduce_fadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_fadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -212,9 +211,8 @@ define float @vreduce_fadd_nxv4f32( %v, float %s) { define float @vreduce_ord_fadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -313,9 +311,8 @@ declare double @llvm.vector.reduce.fadd.nxv2f64(double, ) define double @vreduce_fadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_fadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -326,9 +323,8 @@ define double @vreduce_fadd_nxv2f64( %v, double %s) { define double @vreduce_ord_fadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -371,9 +367,8 @@ declare double @llvm.vector.reduce.fadd.nxv4f64(double, ) define double @vreduce_fadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_fadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -384,9 +379,8 @@ define double @vreduce_fadd_nxv4f64( %v, double %s) { define double @vreduce_ord_fadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -584,9 +578,8 @@ define float @vreduce_fmin_nxv4f32( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI40_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI40_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -662,9 +655,8 @@ define double @vreduce_fmin_nxv2f64( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI45_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI45_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -679,9 +671,8 @@ define double @vreduce_fmin_nxv4f64( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI46_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -866,9 +857,8 @@ define float @vreduce_fmax_nxv4f32( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI58_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI58_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -944,9 +934,8 @@ define double @vreduce_fmax_nxv2f64( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI63_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI63_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -961,9 +950,8 @@ define double @vreduce_fmax_nxv4f64( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI64_0) ; CHECK-NEXT: fld ft0, %lo(.LCPI64_0)(a0) -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vfredmax.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1040,9 +1028,8 @@ define half @vreduce_ord_fadd_nxv6f16( %v, half %s) { ; CHECK-NEXT: vfmv.v.f v10, ft0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v9, v10, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1068,9 +1055,8 @@ define half @vreduce_ord_fadd_nxv10f16( %v, half %s) { ; CHECK-NEXT: vslideup.vi v11, v12, 0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v11, v12, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vfredosum.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1132,9 +1118,8 @@ define half @vreduce_fadd_nxv6f16( %v, half %s) { ; CHECK-NEXT: vfmv.v.f v10, ft0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v9, v10, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1160,9 +1145,8 @@ define half @vreduce_fmin_nxv10f16( %v) { ; CHECK-NEXT: vslideup.vi v11, v12, 0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vx v11, v12, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vfmv.s.f v12, ft0 ; CHECK-NEXT: vfredmin.vs v8, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll index 7941ab5..ac7b927 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll @@ -1126,9 +1126,8 @@ declare i32 @llvm.vector.reduce.add.nxv4i32() define signext i32 @vreduce_add_nxv4i32( %v) { ; CHECK-LABEL: vreduce_add_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vredsum.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1171,9 +1170,8 @@ declare i32 @llvm.vector.reduce.umax.nxv4i32() define signext i32 @vreduce_umax_nxv4i32( %v) { ; CHECK-LABEL: vreduce_umax_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vredmaxu.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1187,9 +1185,8 @@ define signext i32 @vreduce_smax_nxv4i32( %v) { ; CHECK-LABEL: vreduce_smax_nxv4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vredmax.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1219,9 +1216,8 @@ define signext i32 @vreduce_smin_nxv4i32( %v) { ; RV32: # %bb.0: ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; RV32-NEXT: vmv.s.x v10, a0 -; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV32-NEXT: vredmin.vs v8, v8, v10 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret @@ -1230,9 +1226,8 @@ define signext i32 @vreduce_smin_nxv4i32( %v) { ; RV64: # %bb.0: ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1260,9 +1255,8 @@ declare i32 @llvm.vector.reduce.or.nxv4i32() define signext i32 @vreduce_or_nxv4i32( %v) { ; CHECK-LABEL: vreduce_or_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vredor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1275,9 +1269,8 @@ declare i32 @llvm.vector.reduce.xor.nxv4i32() define signext i32 @vreduce_xor_nxv4i32( %v) { ; CHECK-LABEL: vreduce_xor_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vredxor.vs v8, v8, v10 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1597,9 +1590,8 @@ define i64 @vreduce_add_nxv2i64( %v) { ; ; RV64-LABEL: vreduce_add_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vredsum.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1681,9 +1673,8 @@ define i64 @vreduce_umax_nxv2i64( %v) { ; ; RV64-LABEL: vreduce_umax_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vredmaxu.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1718,9 +1709,8 @@ define i64 @vreduce_smax_nxv2i64( %v) { ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredmax.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1785,9 +1775,8 @@ define i64 @vreduce_smin_nxv2i64( %v) { ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1841,9 +1830,8 @@ define i64 @vreduce_or_nxv2i64( %v) { ; ; RV64-LABEL: vreduce_or_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vredor.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1869,9 +1857,8 @@ define i64 @vreduce_xor_nxv2i64( %v) { ; ; RV64-LABEL: vreduce_xor_nxv2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64-NEXT: vmv.s.x v10, zero ; RV64-NEXT: vredxor.vs v8, v8, v10 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1897,9 +1884,8 @@ define i64 @vreduce_add_nxv4i64( %v) { ; ; RV64-LABEL: vreduce_add_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vredsum.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -1981,9 +1967,8 @@ define i64 @vreduce_umax_nxv4i64( %v) { ; ; RV64-LABEL: vreduce_umax_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vredmaxu.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2018,9 +2003,8 @@ define i64 @vreduce_smax_nxv4i64( %v) { ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredmax.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2085,9 +2069,8 @@ define i64 @vreduce_smin_nxv4i64( %v) { ; RV64: # %bb.0: ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV64-NEXT: vmv.s.x v12, a0 -; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vredmin.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2141,9 +2124,8 @@ define i64 @vreduce_or_nxv4i64( %v) { ; ; RV64-LABEL: vreduce_or_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vredor.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2169,9 +2151,8 @@ define i64 @vreduce_xor_nxv4i64( %v) { ; ; RV64-LABEL: vreduce_xor_nxv4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vredxor.vs v8, v8, v12 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret -- 2.7.4