Noticed this while looking at some SLP output. If we have an extractelement, we're probably using a slidedown into an destination with no contents. Given this, we can allow the slideup to use a larger VL and clobber tail elements of the destination vector. Doing this allows us to avoid vsetvli toggles in many fixed length vector examples.
Differential Revision: https://reviews.llvm.org/D148834
}
}
+static bool isVSlideInstr(const MachineInstr &MI) {
+ switch (getRVVMCOpcode(MI.getOpcode())) {
+ default:
+ return false;
+ case RISCV::VSLIDEDOWN_VX:
+ case RISCV::VSLIDEDOWN_VI:
+ case RISCV::VSLIDEUP_VX:
+ case RISCV::VSLIDEUP_VI:
+ return true;
+ }
+}
+
/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
/// not a load or store which ignores SEW.
static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
.addImm(Info.encodeVTYPE());
}
+static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
+ auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
+ return Fractional || LMul == 1;
+}
+
/// Return true if a VSETVLI is required to transition from CurInfo to Require
/// before MI.
bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
}
}
+ // A slidedown/slideup with a VL of 1 whose destination is an IMPLICIT_DEF
+ // can use any VL/SEW combination which writes at least the first element.
+ // Notes:
+ // * VL=1 is special only because we have existing support for zero vs
+ // non-zero VL. We could generalize this if we had a VL > C predicate.
+ // * The LMUL1 restriction is for machines whose latency may depend on VL.
+ // * As above, this is only legal for IMPLICIT_DEF, not TA.
+ if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
+ isLMUL1OrSmaller(CurInfo.getVLMUL())) {
+ auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
+ if (VRegDef && VRegDef->isImplicitDef() &&
+ CurInfo.getSEW() >= Require.getSEW()) {
+ Used.VLAny = false;
+ Used.VLZeroness = true;
+ Used.SEW = false;
+ Used.LMUL = false;
+ Used.SEWLMULRatio = false;
+ Used.TailPolicy = false;
+ }
+ }
+
if (CurInfo.isCompatible(Used, Require))
return false;
; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: vmv.x.s a0, v8
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vfirst.m a1, v0
; CHECK-NEXT: seqz a1, a1
; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v9, v8, 1
; CHECK-NEXT: addi a0, sp, 14
+; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; CHECK-NEXT: vse8.v v9, (a0)
; CHECK-NEXT: vslidedown.vi v9, v8, 2
; CHECK-NEXT: addi a0, sp, 13
; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v9
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vmseq.vi v0, v8, 0
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-NEXT: vlm.v v8, (a0)
-; LMULMAX1-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; LMULMAX1-NEXT: vslidedown.vi v8, v8, 1
; LMULMAX1-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; LMULMAX1-NEXT: vsm.v v8, (a1)
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 7
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 7
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 7
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vadd.vv v8, v8, v8
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: ret
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: vadd.vv v8, v8, v8
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vslidedown.vx v8, v8, a1
; RV32-NEXT: vmv.x.s a0, v8
; RV32-NEXT: li a1, 32
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v8, v8, a1
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: ret
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vslidedown.vx v8, v8, a1
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: vfadd.vv v8, v8, v8
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vfadd.vv v8, v8, v8
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vfadd.vv v8, v8, v8
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vfmv.f.s fa0, v8
; CHECK-NEXT: ret
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 7
+; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vse8.v v8, (a1)
; CHECK-NEXT: ret
%a = load <16 x i8>, ptr %x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 7
+; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vse16.v v8, (a1)
; CHECK-NEXT: ret
%a = load <8 x i16>, ptr %x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 2
+; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vse32.v v8, (a1)
; CHECK-NEXT: ret
%a = load <4 x i32>, ptr %x
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 1
; RV32-NEXT: li a0, 32
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vsrl.vx v9, v8, a0
; RV32-NEXT: vmv.x.s a0, v9
; RV32-NEXT: vmv.x.s a2, v8
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 1
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: ret
%a = load <2 x i64>, ptr %x
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 1
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vse64.v v8, (a1)
; CHECK-NEXT: ret
%a = load <2 x double>, ptr %x
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vadd.vi v8, v8, 13
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vrsub.vi v8, v8, 13
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV32NOM-NEXT: li a0, 13
; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32NOM-NEXT: vmul.vx v8, v8, a0
-; RV32NOM-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32NOM-NEXT: vslidedown.vi v8, v8, 2
; RV32NOM-NEXT: vmv.x.s a0, v8
; RV32NOM-NEXT: ret
; RV64-NEXT: li a0, 13
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT: vmul.vx v8, v8, a0
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV32NOM-NEXT: vsra.vv v9, v8, v11
; RV32NOM-NEXT: vsrl.vi v8, v8, 31
; RV32NOM-NEXT: vadd.vv v8, v9, v8
-; RV32NOM-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32NOM-NEXT: vslidedown.vi v8, v8, 2
; RV32NOM-NEXT: vmv.x.s a0, v8
; RV32NOM-NEXT: ret
; RV64-NEXT: vsra.vv v8, v8, v11
; RV64-NEXT: vsrl.vi v9, v8, 31
; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; RV32NOM-NEXT: lui a0, 322639
; RV32NOM-NEXT: addi a0, a0, -945
; RV32NOM-NEXT: vmulhu.vx v8, v8, a0
-; RV32NOM-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32NOM-NEXT: vslidedown.vi v8, v8, 2
; RV32NOM-NEXT: vmv.x.s a0, v8
; RV32NOM-NEXT: srli a0, a0, 2
; RV64-NEXT: addiw a0, a0, -945
; RV64-NEXT: vmulhu.vx v8, v8, a0
; RV64-NEXT: vsrl.vi v8, v8, 2
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: vmv.x.s a0, v8
; RV64-NEXT: ret
; LMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v9
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v9
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v9
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v9
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfneg.v v8, v8
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfabs.v v8, v8
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v8, v9
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfsgnj.vf v8, v8, fa0
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfsgnjn.vv v8, v8, v9
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV32-NEXT: vle16.v v9, (a0)
; LMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v8
; LMULMAX1-RV32-NEXT: vfsgnjn.vv v8, v9, v10
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX1-RV32-NEXT: addi a1, a0, 4
+; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; LMULMAX1-RV32-NEXT: vse16.v v9, (a1)
; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: vle32.v v9, (a1)
; LMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9
; LMULMAX1-RV64-NEXT: vfsgnjn.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX1-RV64-NEXT: addi a1, a0, 4
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; LMULMAX1-RV64-NEXT: vse16.v v9, (a1)
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: vfsqrt.v v8, v8
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfmacc.vv v10, v8, v9
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v10, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfmsac.vv v10, v8, v9
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v10, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfadd.vf v8, v8, fa0
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfadd.vf v8, v8, fa0
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfsub.vf v8, v8, fa0
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfrsub.vf v8, v8, fa0
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfmul.vf v8, v8, fa0
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfmul.vf v8, v8, fa0
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfdiv.vf v8, v8, fa0
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfrdiv.vf v8, v8, fa0
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfmacc.vf v9, fa0, v8
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfmacc.vf v9, fa0, v8
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfmsac.vf v9, fa0, v8
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfmacc.vv v10, v8, v9
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v10, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; LMULMAX1-RV64-NEXT: vfmsac.vv v10, v8, v9
; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse64.v v10, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2
; LMULMAX1-RV64-NEXT: addi a0, a0, 8
+; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT: ret
%a = load <6 x half>, ptr %x
; RV32-NEXT: fmin.d fa5, fa5, fa3
; RV32-NEXT: fcvt.w.d a2, fa5, rtz
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 1
; RV32-NEXT: vfmv.f.s fa5, v8
; RV32-NEXT: feq.d a2, fa5, fa5
; RV32-NEXT: fmin.d fa5, fa5, fa3
; RV32-NEXT: fcvt.w.d a3, fa5, rtz
; RV32-NEXT: and a2, a2, a3
-; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; RV32-NEXT: vmv.v.x v8, a2
; RV32-NEXT: vsetvli zero, zero, e8, mf8, tu, ma
; RV32-NEXT: vmv.s.x v8, a0
; RV64-NEXT: fmin.d fa5, fa5, fa3
; RV64-NEXT: fcvt.l.d a2, fa5, rtz
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 1
; RV64-NEXT: vfmv.f.s fa5, v8
; RV64-NEXT: feq.d a2, fa5, fa5
; RV64-NEXT: fmin.d fa5, fa5, fa3
; RV64-NEXT: fcvt.l.d a3, fa5, rtz
; RV64-NEXT: and a2, a2, a3
-; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; RV64-NEXT: vmv.v.x v8, a2
; RV64-NEXT: vsetvli zero, zero, e8, mf8, tu, ma
; RV64-NEXT: vmv.s.x v8, a0
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV32-NEXT: vslidedown.vi v8, v8, 1
; RV32-NEXT: vfmv.f.s fa4, v8
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa5, fa4, fa5
; RV32-NEXT: fcvt.wu.d a2, fa5, rtz
-; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; RV32-NEXT: vmv.v.x v8, a2
; RV32-NEXT: vsetvli zero, zero, e8, mf8, tu, ma
; RV32-NEXT: vmv.s.x v8, a0
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 1
; RV64-NEXT: vfmv.f.s fa4, v8
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa5, fa4, fa5
; RV64-NEXT: fcvt.lu.d a2, fa5, rtz
-; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
; RV64-NEXT: vmv.v.x v8, a2
; RV64-NEXT: vsetvli zero, zero, e8, mf8, tu, ma
; RV64-NEXT: vmv.s.x v8, a0
; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX8RV32-NEXT: vle32.v v8, (a0)
; LMULMAX8RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
-; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX8RV32-NEXT: addi a0, a1, 8
+; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV32-NEXT: vse32.v v9, (a0)
; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX8RV32-NEXT: vse32.v v8, (a1)
; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX8RV64-NEXT: vle32.v v8, (a0)
; LMULMAX8RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
-; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX8RV64-NEXT: addi a0, a1, 8
+; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV64-NEXT: vse32.v v9, (a0)
; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX8RV64-NEXT: vse64.v v8, (a1)
; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1RV32-NEXT: vle32.v v8, (a0)
; LMULMAX1RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
-; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX1RV32-NEXT: addi a0, a1, 8
+; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV32-NEXT: vse32.v v9, (a0)
; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX1RV32-NEXT: vse32.v v8, (a1)
; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1RV64-NEXT: vle32.v v8, (a0)
; LMULMAX1RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
-; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX1RV64-NEXT: addi a0, a1, 8
+; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV64-NEXT: vse32.v v9, (a0)
; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1RV64-NEXT: vse64.v v8, (a1)
; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX8RV32-NEXT: vle32.v v8, (a0)
; LMULMAX8RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
-; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX8RV32-NEXT: addi a0, a1, 8
+; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV32-NEXT: vse32.v v9, (a0)
; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX8RV32-NEXT: vse32.v v8, (a1)
; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX8RV64-NEXT: vle32.v v8, (a0)
; LMULMAX8RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
-; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX8RV64-NEXT: addi a0, a1, 8
+; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV64-NEXT: vse32.v v9, (a0)
; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX8RV64-NEXT: vse64.v v8, (a1)
; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1RV32-NEXT: vle32.v v8, (a0)
; LMULMAX1RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
-; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX1RV32-NEXT: addi a0, a1, 8
+; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV32-NEXT: vse32.v v9, (a0)
; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX1RV32-NEXT: vse32.v v8, (a1)
; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1RV64-NEXT: vle32.v v8, (a0)
; LMULMAX1RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
-; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX1RV64-NEXT: addi a0, a1, 8
+; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV64-NEXT: vse32.v v9, (a0)
; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1RV64-NEXT: vse64.v v8, (a1)
; LMULMAX8RV32: # %bb.0:
; LMULMAX8RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; LMULMAX8RV32-NEXT: vfncvt.rtz.x.f.w v9, v8
-; LMULMAX8RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; LMULMAX8RV32-NEXT: vslidedown.vi v8, v9, 2
; LMULMAX8RV32-NEXT: vmv.x.s a1, v8
; LMULMAX8RV32-NEXT: slli a2, a1, 17
; LMULMAX8RV64-NEXT: lui a2, 8
; LMULMAX8RV64-NEXT: addiw a2, a2, -1
; LMULMAX8RV64-NEXT: and a1, a1, a2
-; LMULMAX8RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; LMULMAX8RV64-NEXT: vslidedown.vi v8, v9, 1
; LMULMAX8RV64-NEXT: vmv.x.s a3, v8
; LMULMAX8RV64-NEXT: and a2, a3, a2
; LMULMAX1RV32: # %bb.0:
; LMULMAX1RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; LMULMAX1RV32-NEXT: vfncvt.rtz.x.f.w v9, v8
-; LMULMAX1RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; LMULMAX1RV32-NEXT: vslidedown.vi v8, v9, 2
; LMULMAX1RV32-NEXT: vmv.x.s a1, v8
; LMULMAX1RV32-NEXT: slli a2, a1, 17
; LMULMAX1RV64-NEXT: lui a2, 8
; LMULMAX1RV64-NEXT: addiw a2, a2, -1
; LMULMAX1RV64-NEXT: and a1, a1, a2
-; LMULMAX1RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; LMULMAX1RV64-NEXT: vslidedown.vi v8, v9, 1
; LMULMAX1RV64-NEXT: vmv.x.s a3, v8
; LMULMAX1RV64-NEXT: and a2, a3, a2
; LMULMAX8RV32: # %bb.0:
; LMULMAX8RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; LMULMAX8RV32-NEXT: vfncvt.rtz.x.f.w v9, v8
-; LMULMAX8RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; LMULMAX8RV32-NEXT: vslidedown.vi v8, v9, 2
; LMULMAX8RV32-NEXT: vmv.x.s a1, v8
; LMULMAX8RV32-NEXT: slli a2, a1, 17
; LMULMAX8RV64-NEXT: lui a2, 16
; LMULMAX8RV64-NEXT: addiw a2, a2, -1
; LMULMAX8RV64-NEXT: and a1, a1, a2
-; LMULMAX8RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; LMULMAX8RV64-NEXT: vslidedown.vi v8, v9, 1
; LMULMAX8RV64-NEXT: vmv.x.s a3, v8
; LMULMAX8RV64-NEXT: and a2, a3, a2
; LMULMAX1RV32: # %bb.0:
; LMULMAX1RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; LMULMAX1RV32-NEXT: vfncvt.rtz.x.f.w v9, v8
-; LMULMAX1RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; LMULMAX1RV32-NEXT: vslidedown.vi v8, v9, 2
; LMULMAX1RV32-NEXT: vmv.x.s a1, v8
; LMULMAX1RV32-NEXT: slli a2, a1, 17
; LMULMAX1RV64-NEXT: lui a2, 16
; LMULMAX1RV64-NEXT: addiw a2, a2, -1
; LMULMAX1RV64-NEXT: and a1, a1, a2
-; LMULMAX1RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; LMULMAX1RV64-NEXT: vslidedown.vi v8, v9, 1
; LMULMAX1RV64-NEXT: vmv.x.s a3, v8
; LMULMAX1RV64-NEXT: and a2, a3, a2
; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX8RV32-NEXT: vle32.v v8, (a0)
; LMULMAX8RV32-NEXT: vfcvt.f.x.v v8, v8
-; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX8RV32-NEXT: addi a0, a1, 8
+; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV32-NEXT: vse32.v v9, (a0)
; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX8RV32-NEXT: vse32.v v8, (a1)
; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX8RV64-NEXT: vle32.v v8, (a0)
; LMULMAX8RV64-NEXT: vfcvt.f.x.v v8, v8
-; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX8RV64-NEXT: addi a0, a1, 8
+; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV64-NEXT: vse32.v v9, (a0)
; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX8RV64-NEXT: vse64.v v8, (a1)
; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1RV32-NEXT: vle32.v v8, (a0)
; LMULMAX1RV32-NEXT: vfcvt.f.x.v v8, v8
-; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX1RV32-NEXT: addi a0, a1, 8
+; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV32-NEXT: vse32.v v9, (a0)
; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX1RV32-NEXT: vse32.v v8, (a1)
; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1RV64-NEXT: vle32.v v8, (a0)
; LMULMAX1RV64-NEXT: vfcvt.f.x.v v8, v8
-; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX1RV64-NEXT: addi a0, a1, 8
+; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV64-NEXT: vse32.v v9, (a0)
; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1RV64-NEXT: vse64.v v8, (a1)
; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX8RV32-NEXT: vle32.v v8, (a0)
; LMULMAX8RV32-NEXT: vfcvt.f.xu.v v8, v8
-; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX8RV32-NEXT: addi a0, a1, 8
+; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV32-NEXT: vse32.v v9, (a0)
; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX8RV32-NEXT: vse32.v v8, (a1)
; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX8RV64-NEXT: vle32.v v8, (a0)
; LMULMAX8RV64-NEXT: vfcvt.f.xu.v v8, v8
-; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX8RV64-NEXT: addi a0, a1, 8
+; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX8RV64-NEXT: vse32.v v9, (a0)
; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX8RV64-NEXT: vse64.v v8, (a1)
; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1RV32-NEXT: vle32.v v8, (a0)
; LMULMAX1RV32-NEXT: vfcvt.f.xu.v v8, v8
-; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX1RV32-NEXT: addi a0, a1, 8
+; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV32-NEXT: vse32.v v9, (a0)
; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX1RV32-NEXT: vse32.v v8, (a1)
; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1RV64-NEXT: vle32.v v8, (a0)
; LMULMAX1RV64-NEXT: vfcvt.f.xu.v v8, v8
-; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2
; LMULMAX1RV64-NEXT: addi a0, a1, 8
+; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; LMULMAX1RV64-NEXT: vse32.v v9, (a0)
; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1RV64-NEXT: vse64.v v8, (a1)
; RV64-NEXT: vadd.vv v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vsub.vv v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vmul.vv v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vand.vv v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vor.vv v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vxor.vv v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vsrl.vv v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vsra.vv v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vsll.vv v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vslideup.vi v8, v10, 4
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vslideup.vi v8, v10, 4
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vslideup.vi v8, v10, 4
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vslideup.vi v8, v10, 4
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vslideup.vi v9, v8, 4
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v9, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v9, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vslideup.vi v8, v9, 4
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vmin.vv v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vmin.vx v8, v8, a1
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vmin.vx v8, v8, a1
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vmax.vv v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vmax.vx v8, v8, a1
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vmax.vx v8, v8, a1
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vminu.vv v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vminu.vx v8, v8, a1
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vminu.vx v8, v8, a1
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vmaxu.vv v8, v8, v9
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vmaxu.vx v8, v8, a1
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV64-NEXT: vmaxu.vx v8, v8, a1
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
; RV32-NEXT: or a1, a3, a1
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV32-NEXT: vmv.s.x v8, a1
-; RV32-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV32-NEXT: vslidedown.vi v9, v8, 1
; RV32-NEXT: vslidedown.vi v10, v8, 2
; RV32-NEXT: vslidedown.vi v11, v8, 3
; RV32-NEXT: lb a0, 4(a0)
; RV32-NEXT: addi a1, sp, 8
+; RV32-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV32-NEXT: vse8.v v8, (a1)
; RV32-NEXT: addi a2, sp, 11
; RV32-NEXT: vse8.v v11, (a2)
; RV64-NEXT: or a1, a3, a1
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64-NEXT: vmv.s.x v8, a1
-; RV64-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64-NEXT: vslidedown.vi v9, v8, 1
; RV64-NEXT: vslidedown.vi v10, v8, 2
; RV64-NEXT: vslidedown.vi v11, v8, 3
; RV64-NEXT: lb a0, 4(a0)
; RV64-NEXT: addi a1, sp, 8
+; RV64-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64-NEXT: vse8.v v8, (a1)
; RV64-NEXT: addi a2, sp, 11
; RV64-NEXT: vse8.v v11, (a2)
; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma
; RV64-NEXT: vmv.s.x v8, a2
; RV64-NEXT: sd a2, 0(a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
%x = load <6 x half>, ptr %p
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: srai a2, a1, 31
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: andi a1, a1, 255
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB12_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB12_8
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: srai a2, a1, 31
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lui a2, 16
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB23_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB23_8
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB24_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB24_8
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB25_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB25_8
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB26_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB26_8
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: srai a1, a1, 31
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: srai a2, a2, 31
+; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vse32.v v9, (a0)
; RV32ZVE32F-NEXT: addi a3, a0, 8
; RV32ZVE32F-NEXT: vse32.v v8, (a3)
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
-; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: sw zero, 4(a0)
+; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vse32.v v9, (a0)
; RV32ZVE32F-NEXT: addi a0, a0, 8
; RV32ZVE32F-NEXT: vse32.v v8, (a0)
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB35_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB36_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB37_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB38_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB39_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB40_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a3
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: beqz a7, .LBB48_12
; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: andi t0, a6, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB48_10
; RV64ZVE32F-NEXT: .LBB48_13: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: andi t1, a6, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB48_11
; RV64ZVE32F-NEXT: .LBB48_14: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: andi a6, a6, -128
; RV64ZVE32F-NEXT: beqz a6, .LBB48_17
; RV64ZVE32F-NEXT: .LBB48_19: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: beqz a7, .LBB49_12
; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: andi t0, a6, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB49_10
; RV64ZVE32F-NEXT: .LBB49_13: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: andi t1, a6, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB49_11
; RV64ZVE32F-NEXT: .LBB49_14: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: andi a6, a6, -128
; RV64ZVE32F-NEXT: beqz a6, .LBB49_17
; RV64ZVE32F-NEXT: .LBB49_19: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: beqz a7, .LBB50_12
; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: andi a7, a7, 255
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB50_10
; RV64ZVE32F-NEXT: .LBB50_13: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: andi t0, t0, 255
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB50_11
; RV64ZVE32F-NEXT: .LBB50_14: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: andi t1, t1, 255
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB50_17
; RV64ZVE32F-NEXT: .LBB50_19: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a6, v0
-; RV64ZVE32F-NEXT: andi a3, a6, 1
+; RV64ZVE32F-NEXT: vmv.x.s a5, v0
+; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB51_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
-; RV64ZVE32F-NEXT: andi a4, a6, 2
+; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB51_4
; RV64ZVE32F-NEXT: .LBB51_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB51_5
; RV64ZVE32F-NEXT: .LBB51_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a6, 2
+; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB51_2
; RV64ZVE32F-NEXT: .LBB51_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB51_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: andi a5, a6, 4
+; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: beqz a5, .LBB51_7
+; RV64ZVE32F-NEXT: beqz a6, .LBB51_7
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a5, v9
-; RV64ZVE32F-NEXT: slli a5, a5, 3
-; RV64ZVE32F-NEXT: add a5, a1, a5
-; RV64ZVE32F-NEXT: ld a5, 0(a5)
+; RV64ZVE32F-NEXT: vmv.x.s a6, v9
+; RV64ZVE32F-NEXT: slli a6, a6, 3
+; RV64ZVE32F-NEXT: add a6, a1, a6
+; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: j .LBB51_8
; RV64ZVE32F-NEXT: .LBB51_7:
-; RV64ZVE32F-NEXT: ld a5, 16(a2)
+; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: .LBB51_8: # %else5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: andi a7, a6, 8
+; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: beqz a7, .LBB51_12
; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: andi t0, a6, 16
+; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB51_13
; RV64ZVE32F-NEXT: .LBB51_10:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: andi t1, a6, 32
+; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB51_14
; RV64ZVE32F-NEXT: .LBB51_11:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB51_15
; RV64ZVE32F-NEXT: .LBB51_12:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: andi t0, a6, 16
+; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB51_10
; RV64ZVE32F-NEXT: .LBB51_13: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a6, 32
+; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB51_11
; RV64ZVE32F-NEXT: .LBB51_14: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB51_15: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: andi t2, a6, 64
+; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB51_18
; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: andi a6, a6, -128
-; RV64ZVE32F-NEXT: bnez a6, .LBB51_19
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: bnez a5, .LBB51_19
; RV64ZVE32F-NEXT: .LBB51_17:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB51_20
; RV64ZVE32F-NEXT: .LBB51_18:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
-; RV64ZVE32F-NEXT: andi a6, a6, -128
-; RV64ZVE32F-NEXT: beqz a6, .LBB51_17
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: beqz a5, .LBB51_17
; RV64ZVE32F-NEXT: .LBB51_19: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: .LBB51_20: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
-; RV64ZVE32F-NEXT: sd a5, 16(a0)
+; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
-; RV64ZVE32F-NEXT: vmv.x.s a6, v0
-; RV64ZVE32F-NEXT: andi a3, a6, 1
+; RV64ZVE32F-NEXT: vmv.x.s a5, v0
+; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB52_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
-; RV64ZVE32F-NEXT: andi a4, a6, 2
+; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB52_4
; RV64ZVE32F-NEXT: .LBB52_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB52_5
; RV64ZVE32F-NEXT: .LBB52_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
-; RV64ZVE32F-NEXT: andi a4, a6, 2
+; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB52_2
; RV64ZVE32F-NEXT: .LBB52_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB52_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: andi a5, a6, 4
+; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2
-; RV64ZVE32F-NEXT: beqz a5, .LBB52_7
+; RV64ZVE32F-NEXT: beqz a6, .LBB52_7
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
-; RV64ZVE32F-NEXT: vmv.x.s a5, v9
-; RV64ZVE32F-NEXT: slli a5, a5, 3
-; RV64ZVE32F-NEXT: add a5, a1, a5
-; RV64ZVE32F-NEXT: ld a5, 0(a5)
+; RV64ZVE32F-NEXT: vmv.x.s a6, v9
+; RV64ZVE32F-NEXT: slli a6, a6, 3
+; RV64ZVE32F-NEXT: add a6, a1, a6
+; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: j .LBB52_8
; RV64ZVE32F-NEXT: .LBB52_7:
-; RV64ZVE32F-NEXT: ld a5, 16(a2)
+; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: .LBB52_8: # %else5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
-; RV64ZVE32F-NEXT: andi a7, a6, 8
+; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: beqz a7, .LBB52_12
; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v9
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
-; RV64ZVE32F-NEXT: andi t0, a6, 16
+; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB52_13
; RV64ZVE32F-NEXT: .LBB52_10:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
-; RV64ZVE32F-NEXT: andi t1, a6, 32
+; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB52_14
; RV64ZVE32F-NEXT: .LBB52_11:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB52_15
; RV64ZVE32F-NEXT: .LBB52_12:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
-; RV64ZVE32F-NEXT: andi t0, a6, 16
+; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB52_10
; RV64ZVE32F-NEXT: .LBB52_13: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
-; RV64ZVE32F-NEXT: andi t1, a6, 32
+; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB52_11
; RV64ZVE32F-NEXT: .LBB52_14: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB52_15: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
-; RV64ZVE32F-NEXT: andi t2, a6, 64
+; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB52_18
; RV64ZVE32F-NEXT: # %bb.16: # %cond.load16
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
-; RV64ZVE32F-NEXT: andi a6, a6, -128
-; RV64ZVE32F-NEXT: bnez a6, .LBB52_19
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: bnez a5, .LBB52_19
; RV64ZVE32F-NEXT: .LBB52_17:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB52_20
; RV64ZVE32F-NEXT: .LBB52_18:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
-; RV64ZVE32F-NEXT: andi a6, a6, -128
-; RV64ZVE32F-NEXT: beqz a6, .LBB52_17
+; RV64ZVE32F-NEXT: andi a5, a5, -128
+; RV64ZVE32F-NEXT: beqz a5, .LBB52_17
; RV64ZVE32F-NEXT: .LBB52_19: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: .LBB52_20: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
-; RV64ZVE32F-NEXT: sd a5, 16(a0)
+; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4
; RV64ZVE32F-NEXT: beqz t0, .LBB53_12
; RV64ZVE32F-NEXT: # %bb.9: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: and t0, t0, a5
; RV64ZVE32F-NEXT: andi t1, a6, 16
; RV64ZVE32F-NEXT: beqz t1, .LBB53_10
; RV64ZVE32F-NEXT: .LBB53_13: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: and t1, t1, a5
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: andi t2, a6, 32
; RV64ZVE32F-NEXT: beqz t2, .LBB53_11
; RV64ZVE32F-NEXT: .LBB53_14: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t2, v9
; RV64ZVE32F-NEXT: and t2, t2, a5
; RV64ZVE32F-NEXT: andi a6, a6, -128
; RV64ZVE32F-NEXT: beqz a6, .LBB53_17
; RV64ZVE32F-NEXT: .LBB53_19: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: and a2, a2, a5
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB54_7
; RV64ZVE32F-NEXT: .LBB54_11: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB54_8
; RV64ZVE32F-NEXT: .LBB54_12: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s t0, v10
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB54_9
; RV64ZVE32F-NEXT: .LBB54_13: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB54_14: # %else14
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB54_17
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB54_16
; RV64ZVE32F-NEXT: .LBB54_18: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB55_7
; RV64ZVE32F-NEXT: .LBB55_11: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB55_8
; RV64ZVE32F-NEXT: .LBB55_12: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s t0, v10
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB55_9
; RV64ZVE32F-NEXT: .LBB55_13: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB55_14: # %else14
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB55_17
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB55_16
; RV64ZVE32F-NEXT: .LBB55_18: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB56_7
; RV64ZVE32F-NEXT: .LBB56_11: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 32
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB56_8
; RV64ZVE32F-NEXT: .LBB56_12: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s t0, v10
; RV64ZVE32F-NEXT: slli t0, t0, 32
; RV64ZVE32F-NEXT: srli t0, t0, 29
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB56_9
; RV64ZVE32F-NEXT: .LBB56_13: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 32
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB56_14: # %else14
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB56_17
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB56_16
; RV64ZVE32F-NEXT: .LBB56_18: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB64_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB64_8
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB65_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB65_8
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB66_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB66_8
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB67_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
-; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
+; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB67_8
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB74_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB75_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB76_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB77_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB78_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB79_13: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
-; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB87_10
; RV64ZVE32F-NEXT: .LBB87_9: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB87_14
; RV64ZVE32F-NEXT: # %bb.13: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB87_8
; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB88_10
; RV64ZVE32F-NEXT: .LBB88_9: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB88_14
; RV64ZVE32F-NEXT: # %bb.13: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB88_8
; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB89_10
; RV64ZVE32F-NEXT: .LBB89_9: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB89_14
; RV64ZVE32F-NEXT: # %bb.13: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB89_8
; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB90_10
; RV64ZVE32F-NEXT: .LBB90_9: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB90_14
; RV64ZVE32F-NEXT: # %bb.13: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB90_8
; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB91_10
; RV64ZVE32F-NEXT: .LBB91_9: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB91_14
; RV64ZVE32F-NEXT: # %bb.13: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB91_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB91_8
; RV64ZVE32F-NEXT: .LBB91_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: andi a4, a3, 32
; RV64ZVE32F-NEXT: beqz a4, .LBB92_10
; RV64ZVE32F-NEXT: .LBB92_9: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: andi a3, a3, -128
; RV64ZVE32F-NEXT: beqz a3, .LBB92_14
; RV64ZVE32F-NEXT: # %bb.13: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a2, a3, a2
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB92_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: andi a4, a3, 16
; RV64ZVE32F-NEXT: beqz a4, .LBB92_8
; RV64ZVE32F-NEXT: .LBB92_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB93_9
; RV64ZVE32F-NEXT: .LBB93_8: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB93_9: # %else14
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB93_11
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB93_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB93_6
; RV64ZVE32F-NEXT: .LBB93_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB93_7
; RV64ZVE32F-NEXT: .LBB93_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB94_9
; RV64ZVE32F-NEXT: .LBB94_8: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB94_9: # %else14
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB94_11
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB94_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB94_6
; RV64ZVE32F-NEXT: .LBB94_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB94_7
; RV64ZVE32F-NEXT: .LBB94_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB95_9
; RV64ZVE32F-NEXT: .LBB95_8: # %cond.load13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB95_9: # %else14
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB95_11
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB95_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB95_6
; RV64ZVE32F-NEXT: .LBB95_15: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB95_7
; RV64ZVE32F-NEXT: .LBB95_16: # %cond.load10
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB97_26: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: bnez a2, .LBB97_14
; RV64ZVE32F-NEXT: j .LBB97_15
; RV64ZVE32F-NEXT: .LBB97_31: # %cond.load31
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 4
; RV64ZVE32F-NEXT: bgez a2, .LBB98_19
; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB98_50: # %cond.load7
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: bltz a2, .LBB98_27
; RV64ZVE32F-NEXT: j .LBB98_28
; RV64ZVE32F-NEXT: .LBB98_58: # %cond.load55
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: bltz a2, .LBB98_38
; RV64ZVE32F-NEXT: j .LBB98_39
; RV64ZVE32F-NEXT: .LBB98_63: # %cond.load79
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB9_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
; RV64ZVE32F-NEXT: .LBB9_6: # %else4
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB9_10: # %else10
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: .LBB9_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB9_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB9_8
; RV64ZVE32F-NEXT: .LBB9_15: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB9_12
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v9
; RV64ZVE32F-NEXT: add a0, a0, a1
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
; RV64ZVE32F-NEXT: .LBB18_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB18_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: .LBB19_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB19_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: .LBB20_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB20_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB21_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v11, (a2)
; RV64ZVE32F-NEXT: .LBB21_6: # %else4
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB21_10: # %else10
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: .LBB21_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB21_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB21_8
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB21_12
; RV64ZVE32F-NEXT: vmv.x.s a1, v9
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a0)
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: .LBB29_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB29_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: .LBB30_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB30_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: .LBB31_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB31_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: .LBB32_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB32_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: .LBB33_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB33_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: .LBB34_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB34_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB35_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB35_9: # %else10
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vse32.v v14, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB35_11
; RV64ZVE32F-NEXT: vmv.x.s a1, v10
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB42_10
; RV64ZVE32F-NEXT: .LBB42_9: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: .LBB42_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB42_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB42_8
; RV64ZVE32F-NEXT: .LBB42_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB42_12
; RV64ZVE32F-NEXT: .LBB42_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB43_10
; RV64ZVE32F-NEXT: .LBB43_9: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: .LBB43_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB43_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB43_8
; RV64ZVE32F-NEXT: .LBB43_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB43_12
; RV64ZVE32F-NEXT: .LBB43_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB44_10
; RV64ZVE32F-NEXT: .LBB44_9: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: andi a0, a0, 255
; RV64ZVE32F-NEXT: .LBB44_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB44_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: andi a0, a0, 255
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB44_8
; RV64ZVE32F-NEXT: .LBB44_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: andi a0, a0, 255
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB44_12
; RV64ZVE32F-NEXT: .LBB44_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: andi a0, a0, 255
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB45_10
; RV64ZVE32F-NEXT: .LBB45_9: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: .LBB45_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB45_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB45_8
; RV64ZVE32F-NEXT: .LBB45_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB45_12
; RV64ZVE32F-NEXT: .LBB45_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB46_10
; RV64ZVE32F-NEXT: .LBB46_9: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: .LBB46_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB46_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB46_8
; RV64ZVE32F-NEXT: .LBB46_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB46_12
; RV64ZVE32F-NEXT: .LBB46_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: andi a0, a5, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB47_10
; RV64ZVE32F-NEXT: .LBB47_9: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: and a0, a0, a4
; RV64ZVE32F-NEXT: .LBB47_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v9
; RV64ZVE32F-NEXT: and a0, a0, a4
; RV64ZVE32F-NEXT: andi a0, a5, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB47_8
; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: and a0, a0, a4
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: andi a0, a5, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB47_12
; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: and a0, a0, a4
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB48_9
; RV64ZVE32F-NEXT: .LBB48_8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a5, 0(a0)
; RV64ZVE32F-NEXT: .LBB48_9: # %else10
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB48_15
; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB48_6
; RV64ZVE32F-NEXT: .LBB48_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB48_7
; RV64ZVE32F-NEXT: .LBB48_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v10
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB48_11
; RV64ZVE32F-NEXT: .LBB48_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB49_9
; RV64ZVE32F-NEXT: .LBB49_8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a5, 0(a0)
; RV64ZVE32F-NEXT: .LBB49_9: # %else10
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB49_15
; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB49_6
; RV64ZVE32F-NEXT: .LBB49_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB49_7
; RV64ZVE32F-NEXT: .LBB49_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v10
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB49_11
; RV64ZVE32F-NEXT: .LBB49_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 3
; RV64ZVE32F-NEXT: andi a0, a4, 32
; RV64ZVE32F-NEXT: beqz a0, .LBB50_9
; RV64ZVE32F-NEXT: .LBB50_8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 32
; RV64ZVE32F-NEXT: add a0, a1, a0
; RV64ZVE32F-NEXT: sd a5, 0(a0)
; RV64ZVE32F-NEXT: .LBB50_9: # %else10
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a0, a4, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a0, .LBB50_15
; RV64ZVE32F-NEXT: andi a0, a4, 8
; RV64ZVE32F-NEXT: beqz a0, .LBB50_6
; RV64ZVE32F-NEXT: .LBB50_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 32
; RV64ZVE32F-NEXT: andi a0, a4, 16
; RV64ZVE32F-NEXT: beqz a0, .LBB50_7
; RV64ZVE32F-NEXT: .LBB50_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v10
; RV64ZVE32F-NEXT: slli a0, a0, 32
; RV64ZVE32F-NEXT: srli a0, a0, 29
; RV64ZVE32F-NEXT: andi a0, a4, -128
; RV64ZVE32F-NEXT: beqz a0, .LBB50_11
; RV64ZVE32F-NEXT: .LBB50_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 32
; RV64ZVE32F-NEXT: .LBB58_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB58_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: .LBB59_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB59_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: .LBB60_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB60_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB61_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v11, (a2)
; RV64ZVE32F-NEXT: .LBB61_6: # %else4
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB61_10: # %else10
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: .LBB61_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB61_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB61_8
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 6
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v10, (a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB61_12
; RV64ZVE32F-NEXT: vmv.x.s a1, v9
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vse16.v v8, (a0)
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
; RV64ZVE32F-NEXT: .LBB68_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB68_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: .LBB69_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB69_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: .LBB70_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB70_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: .LBB71_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB71_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: .LBB72_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB72_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: .LBB73_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB73_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v11
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB74_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB74_9: # %else10
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vse32.v v14, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB74_6
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vse32.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB74_7
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vse32.v v12, (a2)
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB74_11
; RV64ZVE32F-NEXT: vmv.x.s a1, v10
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vse32.v v8, (a0)
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB81_10
; RV64ZVE32F-NEXT: .LBB81_9: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: .LBB81_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB81_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB81_8
; RV64ZVE32F-NEXT: .LBB81_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB81_12
; RV64ZVE32F-NEXT: .LBB81_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 3
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB82_10
; RV64ZVE32F-NEXT: .LBB82_9: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: .LBB82_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB82_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB82_8
; RV64ZVE32F-NEXT: .LBB82_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB82_12
; RV64ZVE32F-NEXT: .LBB82_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 3
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB83_10
; RV64ZVE32F-NEXT: .LBB83_9: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: .LBB83_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB83_8
; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB83_12
; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: andi a1, a1, 255
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB84_10
; RV64ZVE32F-NEXT: .LBB84_9: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: .LBB84_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB84_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB84_8
; RV64ZVE32F-NEXT: .LBB84_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB84_12
; RV64ZVE32F-NEXT: .LBB84_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 3
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB85_10
; RV64ZVE32F-NEXT: .LBB85_9: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: .LBB85_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB85_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB85_8
; RV64ZVE32F-NEXT: .LBB85_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB85_12
; RV64ZVE32F-NEXT: .LBB85_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 3
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB86_10
; RV64ZVE32F-NEXT: .LBB86_9: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: .LBB86_12: # %else14
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB86_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB86_8
; RV64ZVE32F-NEXT: .LBB86_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB86_12
; RV64ZVE32F-NEXT: .LBB86_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: and a1, a2, a1
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB87_9
; RV64ZVE32F-NEXT: .LBB87_8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB87_9: # %else10
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB87_15
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB87_6
; RV64ZVE32F-NEXT: .LBB87_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB87_7
; RV64ZVE32F-NEXT: .LBB87_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB87_11
; RV64ZVE32F-NEXT: .LBB87_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 3
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB88_9
; RV64ZVE32F-NEXT: .LBB88_8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB88_9: # %else10
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB88_15
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB88_6
; RV64ZVE32F-NEXT: .LBB88_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB88_7
; RV64ZVE32F-NEXT: .LBB88_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB88_11
; RV64ZVE32F-NEXT: .LBB88_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 3
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB89_9
; RV64ZVE32F-NEXT: .LBB89_8: # %cond.store9
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: fsd fa5, 0(a2)
; RV64ZVE32F-NEXT: .LBB89_9: # %else10
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB89_15
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB89_6
; RV64ZVE32F-NEXT: .LBB89_13: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB89_7
; RV64ZVE32F-NEXT: .LBB89_14: # %cond.store7
-; RV64ZVE32F-NEXT: vsetivli zero, 0, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB89_11
; RV64ZVE32F-NEXT: .LBB89_16: # %cond.store13
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 32
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB91_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB91_6: # %else4
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
; RV64ZVE32F-NEXT: .LBB91_10: # %else10
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB91_15: # %else18
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 10
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
; RV64ZVE32F-NEXT: .LBB91_17: # %else20
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 13
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB91_21: # %else26
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: # %bb.22: # %cond.store27
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: .LBB91_23: # %else28
; RV64ZVE32F-NEXT: lui a2, 1048568
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v9
; RV64ZVE32F-NEXT: add a0, a0, a1
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB91_25: # %else30
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB91_26: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB91_8
; RV64ZVE32F-NEXT: .LBB91_28: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 6
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v11, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: beqz a2, .LBB91_12
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: beqz a2, .LBB91_13
; RV64ZVE32F-NEXT: bnez a2, .LBB91_14
; RV64ZVE32F-NEXT: j .LBB91_15
; RV64ZVE32F-NEXT: .LBB91_31: # %cond.store21
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 11
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vse8.v v10, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: bgez a2, .LBB91_19
; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: # %bb.5: # %cond.store3
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 2
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_6: # %else4
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 5
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_10: # %else10
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 9
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_15: # %else18
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 10
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_17: # %else20
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 4
; RV64ZVE32F-NEXT: bgez a2, .LBB92_19
; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 11
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_19: # %else22
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 13
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_23: # %else26
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_28: # %else34
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: # %bb.29: # %cond.store35
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: .LBB92_30: # %else36
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 21
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_34: # %else42
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_39: # %else50
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: # %bb.40: # %cond.store51
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_41: # %else52
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_45: # %else58
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: # %bb.46: # %cond.store59
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: .LBB92_47: # %else60
; RV64ZVE32F-NEXT: lui a2, 524288
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v10
; RV64ZVE32F-NEXT: add a0, a0, a1
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB92_49: # %else62
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB92_50: # %cond.store5
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 3
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB92_8
; RV64ZVE32F-NEXT: .LBB92_52: # %cond.store11
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 6
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: beqz a2, .LBB92_12
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 7
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v14, (a2)
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: beqz a2, .LBB92_13
; RV64ZVE32F-NEXT: .LBB92_55: # %cond.store27
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 14
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 48
; RV64ZVE32F-NEXT: bgez a2, .LBB92_25
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 15
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 47
; RV64ZVE32F-NEXT: bgez a2, .LBB92_26
; RV64ZVE32F-NEXT: bltz a2, .LBB92_27
; RV64ZVE32F-NEXT: j .LBB92_28
; RV64ZVE32F-NEXT: .LBB92_58: # %cond.store37
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 19
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 43
; RV64ZVE32F-NEXT: bgez a2, .LBB92_32
; RV64ZVE32F-NEXT: .LBB92_60: # %cond.store43
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 40
; RV64ZVE32F-NEXT: bgez a2, .LBB92_36
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 39
; RV64ZVE32F-NEXT: bgez a2, .LBB92_37
; RV64ZVE32F-NEXT: bltz a2, .LBB92_38
; RV64ZVE32F-NEXT: j .LBB92_39
; RV64ZVE32F-NEXT: .LBB92_63: # %cond.store53
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
-; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27
+; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vse8.v v12, (a2)
; RV64ZVE32F-NEXT: slli a2, a1, 35
; RV64ZVE32F-NEXT: bgez a2, .LBB92_43
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: addi a0, a0, 4
+; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; CHECK-NEXT: vse16.v v8, (a0)
; CHECK-NEXT: ret
store <6 x i8> %v, ptr %p
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
store <12 x i8> %v, ptr %p
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
store <6 x i16> %v, ptr %p
; RV64-NEXT: vmv.s.x v8, a2
; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
; RV64-NEXT: addi a0, a0, 8
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vse32.v v8, (a0)
; RV64-NEXT: ret
store <6 x half> %v, ptr %p
; CHECK-V-NEXT: vmslt.vx v0, v8, a1
; CHECK-V-NEXT: vmv.v.i v8, 0
; CHECK-V-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-V-NEXT: vslidedown.vi v8, v8, 1
; CHECK-V-NEXT: vmv.x.s a0, v8
; CHECK-V-NEXT: ret
; CHECK-NEXT: vslidedown.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vslideup.vi v10, v8, 1
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v9, 1
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vslidedown.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v10, v8, 1
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v9, 1
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vslidedown.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v10, v8, 1
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v9, 1
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vslidedown.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vslideup.vi v10, v8, 1
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v9, 1
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vslidedown.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v10, v8, 1
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v9, 1
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vslidedown.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v10, v8, 1
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v9, 1
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vslidedown.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vslideup.vi v10, v8, 1
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v9, 1
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
; CHECK-NEXT: vslidedown.vx v10, v10, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v8, v9, 1, v0
; CHECK-NEXT: vslideup.vi v10, v8, 1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vx v10, v10, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v8, v9, 1, v0
; CHECK-NEXT: vslideup.vi v10, v8, 1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 1
; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v10, v10, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v8, v9, 1, v0
; CHECK-NEXT: vslideup.vi v10, v8, 1
; CHECK-NEXT: vmerge.vim v10, v9, 1, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
; CHECK-NEXT: vslidedown.vx v10, v10, a0
-; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: vmerge.vim v8, v9, 1, v0
; CHECK-NEXT: vslideup.vi v10, v8, 1
; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vx v8, v8, a0
; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: andi a0, a0, 1
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vmsne.vi v0, v8, 0
; CHECK-NEXT: ret
; RV32MV-NEXT: vand.vx v8, v8, a1
; RV32MV-NEXT: vmsltu.vv v0, v12, v8
; RV32MV-NEXT: vmerge.vim v8, v10, -1, v0
-; RV32MV-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV32MV-NEXT: vslidedown.vi v9, v8, 2
; RV32MV-NEXT: vmv.x.s a1, v9
; RV32MV-NEXT: slli a2, a1, 21
; RV64MV-NEXT: vmerge.vim v8, v10, -1, v0
; RV64MV-NEXT: vmv.x.s a1, v8
; RV64MV-NEXT: andi a1, a1, 2047
-; RV64MV-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64MV-NEXT: vslidedown.vi v9, v8, 1
; RV64MV-NEXT: vmv.x.s a2, v9
; RV64MV-NEXT: andi a2, a2, 2047